From a7e11604913099fc508da99cc648f991b02e20b9 Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Sun, 29 Sep 2013 16:27:06 -0400 Subject: [PATCH 1/9] CLN/TST: Mark areas that need to be improved throughout. * Add TODOs to places that are clearly not tested (b/c they had undefined names) and fix the undefined names * Add TODOs to unused names. --- pandas/computation/align.py | 5 +++-- pandas/computation/eval.py | 5 ----- pandas/core/algorithms.py | 6 ++++-- pandas/core/groupby.py | 3 +-- pandas/core/index.py | 2 ++ pandas/core/indexing.py | 13 ++++--------- pandas/core/internals.py | 6 +++--- pandas/core/panel.py | 9 +++------ pandas/io/ga.py | 1 - pandas/io/parsers.py | 3 --- pandas/io/pytables.py | 6 ++++-- pandas/stats/misc.py | 5 +++++ pandas/tools/rplot.py | 2 ++ pandas/tseries/converter.py | 4 ++++ pandas/tseries/plotting.py | 1 - 15 files changed, 35 insertions(+), 36 deletions(-) diff --git a/pandas/computation/align.py b/pandas/computation/align.py index f420d0dacf34c..2f776f2db053f 100644 --- a/pandas/computation/align.py +++ b/pandas/computation/align.py @@ -10,6 +10,7 @@ import pandas as pd from pandas import compat import pandas.core.common as com +import pandas.computation.ops as ops def _align_core_single_unary_op(term): @@ -170,10 +171,10 @@ def _align_core(terms): return typ, _zip_axes_from_type(typ, axes) - +# TODO: Add tests that cover this function! def _filter_terms(flat): # numeric literals - literals = frozenset(filter(lambda x: isinstance(x, Constant), flat)) + literals = frozenset(filter(lambda x: isinstance(x, ops.Constant), flat)) # these are strings which are variable names names = frozenset(flat) - literals diff --git a/pandas/computation/eval.py b/pandas/computation/eval.py index 36b1e2bc96090..62869b8773ba0 100644 --- a/pandas/computation/eval.py +++ b/pandas/computation/eval.py @@ -2,12 +2,7 @@ """Top level ``eval`` module. """ - -import numbers -import numpy as np - from pandas.core import common as com -from pandas.compat import string_types from pandas.computation.expr import Expr, _parsers, _ensure_scope from pandas.computation.engines import _engines diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 5778a524a584a..db743356f1adf 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -78,14 +78,16 @@ def _hashtable_algo(f, dtype): return f(htable.PyObjectHashTable, com._ensure_object) +# TODO: Test this function! def _count_generic(values, table_type, type_caster): from pandas.core.series import Series values = type_caster(values) table = table_type(min(len(values), 1000000)) uniques, labels = table.factorize(values) - - return Series(counts, index=uniques) + # What should this be?? + # return Series(counts, index=uniques) + return Series(labels, index=uniques) def _match_generic(values, index, table_type, type_caster): diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index e5447e5f8f58f..58657f881c8fb 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1,3 +1,4 @@ +import sys import types from functools import wraps import numpy as np @@ -2123,8 +2124,6 @@ def filter(self, func, dropna=True, *args, **kwargs): >>> grouped = df.groupby(lambda x: mapping[x]) >>> grouped.filter(lambda x: x['A'].sum() + x['B'].sum() > 0) """ - from pandas.tools.merge import concat - indexers = [] obj = self._obj_with_exclusions diff --git a/pandas/core/index.py b/pandas/core/index.py index 1f2e823833810..8f3b133e14f9e 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -501,6 +501,8 @@ def is_int(v): # if we are mixed and have integers try: if is_positional and self.is_mixed(): + # TODO: Figure out what these *ought* to be assigning to. + # (currently unused) if start is not None: i = self.get_loc(start) if stop is not None: diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 0bc0afaf255f2..6f30cb7b4b911 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1,14 +1,12 @@ # pylint: disable=W0223 -from datetime import datetime from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.compat import range, zip import pandas.compat as compat import pandas.core.common as com -from pandas.core.common import (_is_bool_indexer, is_integer_dtype, +from pandas.core.common import (is_integer_dtype, _asarray_tuplesafe, is_list_like, isnull, ABCSeries, ABCDataFrame, ABCPanel) -import pandas.lib as lib import numpy as np @@ -152,9 +150,6 @@ def _setitem_with_indexer(self, indexer, value): self._has_valid_setitem_indexer(indexer) - # also has the side effect of consolidating in-place - from pandas import Panel, DataFrame, Series - # maybe partial set take_split_path = self.obj._is_mixed_type if isinstance(indexer,tuple): @@ -540,8 +535,6 @@ def _align_frame(self, indexer, df): raise ValueError('Incompatible indexer with DataFrame') def _align_panel(self, indexer, df): - is_frame = self.obj.ndim == 2 - is_panel = self.obj.ndim >= 3 raise NotImplementedError("cannot set using an indexer with a Panel yet!") def _getitem_tuple(self, tup): @@ -637,6 +630,7 @@ def _getitem_lowerdim(self, tup): if not ax0.is_lexsorted_for_tuple(tup): raise e1 try: + # TODO: Figure out why this is not used here. loc = ax0.get_loc(tup[0]) except KeyError: raise e1 @@ -933,6 +927,7 @@ class _IXIndexer(_NDFrameIndexer): """ A primarily location based indexer, with integer fallback """ def _has_valid_type(self, key, axis): + # TODO: Figure out why this is unused ax = self.obj._get_axis(axis) if isinstance(key, slice): @@ -945,7 +940,7 @@ def _has_valid_type(self, key, axis): return True else: - + # TODO: Figure out why this is unused, should it be returned? self._convert_scalar_indexer(key, axis) return True diff --git a/pandas/core/internals.py b/pandas/core/internals.py index b4d5c1814a6bc..1fd7ae8a7f6fb 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1,7 +1,6 @@ import itertools import re from datetime import datetime, timedelta -import copy from collections import defaultdict import numpy as np @@ -589,9 +588,9 @@ def setitem(self, indexer, value): values = self._try_coerce_result(values) values = self._try_cast_result(values, dtype) return [make_block(transf(values), self.items, self.ref_items, ndim=self.ndim, fastpath=True)] - except (ValueError, TypeError) as detail: + except (ValueError, TypeError): raise - except (Exception) as detail: + except Exception: pass return [ self ] @@ -3681,6 +3680,7 @@ def _lcd_dtype(l): have_complex = len(counts[ComplexBlock]) > 0 have_dt64 = len(counts[DatetimeBlock]) > 0 have_td64 = len(counts[TimeDeltaBlock]) > 0 + # TODO: Use this. have_sparse = len(counts[SparseBlock]) > 0 have_numeric = have_float or have_complex or have_int diff --git a/pandas/core/panel.py b/pandas/core/panel.py index f35070c634aa1..cffd21b54ab55 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -3,13 +3,10 @@ """ # pylint: disable=E1103,W0231,W0212,W0621 -from pandas.compat import map, zip, range, lrange, lmap, u, OrderedDict, OrderedDefaultdict -from pandas import compat import sys import numpy as np -from pandas.core.common import (PandasError, - _try_sort, _default_index, _infer_dtype_from_scalar, - notnull) +from pandas.core.common import (PandasError, _try_sort, _default_index, + _infer_dtype_from_scalar) from pandas.core.categorical import Categorical from pandas.core.index import (Index, MultiIndex, _ensure_index, _get_combined_index) @@ -20,10 +17,10 @@ from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame, _shared_docs from pandas import compat +from pandas.compat import zip, range, lrange, u, OrderedDict, OrderedDefaultdict from pandas.util.decorators import deprecate, Appender, Substitution import pandas.core.common as com import pandas.core.ops as ops -import pandas.core.nanops as nanops import pandas.computation.expressions as expressions diff --git a/pandas/io/ga.py b/pandas/io/ga.py index dcbecd74886ac..f13f1dd5b73a2 100644 --- a/pandas/io/ga.py +++ b/pandas/io/ga.py @@ -394,7 +394,6 @@ def _get_match(obj_store, name, id, **kwargs): id_ok = lambda item: id is not None and item.get('id') == id key_ok = lambda item: key is not None and item.get(key) == val - match = None if obj_store.get('items'): # TODO look up gapi for faster lookup for item in obj_store.get('items'): diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index e9e82824326a7..8b6b57ec9bf67 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -183,7 +183,6 @@ def _read(filepath_or_buffer, kwds): "Generic reader of line files." - encoding = kwds.get('encoding', None) skipfooter = kwds.pop('skipfooter', None) if skipfooter is not None: kwds['skip_footer'] = skipfooter @@ -2033,8 +2032,6 @@ def _stringify_na_values(na_values): def _get_na_values(col, na_values, na_fvalues): if isinstance(na_values, dict): if col in na_values: - values = na_values[col] - fvalues = na_fvalues[col] return na_values[col], na_fvalues[col] else: return _NA_VALUES, set() diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 999f0751abe99..f385cce202fa2 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -29,10 +29,10 @@ import pandas.core.common as com from pandas.tools.merge import concat from pandas import compat -from pandas.compat import u_safe as u, PY3, range, lrange +from pandas.compat import u_safe as u, PY3, range, lrange, lmap from pandas.io.common import PerformanceWarning from pandas.core.config import get_option -from pandas.computation.pytables import Expr, maybe_expression +from pandas.computation.pytables import Expr, maybe_expression, TermValue import pandas.lib as lib import pandas.algos as algos @@ -1798,6 +1798,7 @@ def convert(self, values, nan_rep, encoding): elif dtype == u('timedelta64'): self.data = np.asarray(self.data, dtype='m8[ns]') + # TODO: Add test cases for this branch!! elif dtype == u('date'): try: self.data = np.array( @@ -3888,6 +3889,7 @@ def _unconvert_index(data, kind, encoding=None): index = np.array( [date.fromordinal(v) for v in data], dtype=object) except (ValueError): + # TODO: Add a test that reaches this part of the code! index = np.array( [date.fromtimestamp(v) for v in data], dtype=object) elif kind in (u('integer'), u('float')): diff --git a/pandas/stats/misc.py b/pandas/stats/misc.py index c79bae34f20c4..05918d78a8332 100644 --- a/pandas/stats/misc.py +++ b/pandas/stats/misc.py @@ -5,6 +5,7 @@ from pandas.core.api import Series, DataFrame, isnull, notnull from pandas.core.series import remove_na from pandas.compat import zip +import pandas.core.common as com def zscore(series): @@ -157,6 +158,7 @@ def bucketcat(series, cats): cats = np.asarray(cats) unique_labels = np.unique(cats) + # TODO: Add test case that reaches this code. unique_labels = unique_labels[com.notnull(unique_labels)] # group by @@ -217,6 +219,7 @@ def _bucketpanel_by(series, xby, yby, xbins, ybins): labels = _uniquify(xlabels, ylabels, xbins, ybins) + # TODO: Add a test that reaches this part of the code. mask = com.isnull(labels) labels[mask] = -1 @@ -232,6 +235,7 @@ def relabel(key): xlab = xlabels[pos] ylab = ylabels[pos] + # TODO: Add a test that reaches this part of the code. return '%sx%s' % (int(xlab) if com.notnull(xlab) else 'NULL', int(ylab) if com.notnull(ylab) else 'NULL') @@ -251,6 +255,7 @@ def _bucketpanel_cat(series, xcat, ycat): sorted_ylabels = ylabels.take(sorter) unique_labels = np.unique(labels) + # TODO: Add a test that reaches this part of the code. unique_labels = unique_labels[com.notnull(unique_labels)] locs = sorted_labels.searchsorted(unique_labels) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 1c3d17ee908cb..f61c59b891c97 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -553,6 +553,8 @@ def work(self, fig=None, ax=None): ax = fig.gca() x = self.data[self.aes['x']] y = self.data[self.aes['y']] + # TODO: Figure out what this was supposed to be used for + # (currently unused) rvs = np.array([x, y]) x_min = x.min() x_max = x.max() diff --git a/pandas/tseries/converter.py b/pandas/tseries/converter.py index d059d229ef22e..ddc6aa75ef95d 100644 --- a/pandas/tseries/converter.py +++ b/pandas/tseries/converter.py @@ -296,12 +296,14 @@ def __call__(self): try: start = dmin - delta except ValueError: + # TODO: Never used. start = _from_ordinal(1.0) try: stop = dmax + delta except ValueError: # The magic number! + # TODO: Never used. stop = _from_ordinal(3652059.9999999) nmax, nmin = dates.date2num((dmax, dmin)) @@ -357,12 +359,14 @@ def autoscale(self): try: start = dmin - delta except ValueError: + # TODO: Never used. start = _from_ordinal(1.0) try: stop = dmax + delta except ValueError: # The magic number! + # TODO: Never used. stop = _from_ordinal(3652059.9999999) dmin, dmax = self.datalim_to_dt() diff --git a/pandas/tseries/plotting.py b/pandas/tseries/plotting.py index ae32367a57cd3..f36194eeb44de 100644 --- a/pandas/tseries/plotting.py +++ b/pandas/tseries/plotting.py @@ -75,7 +75,6 @@ def tsplot(series, plotf, **kwargs): args.append(style) lines = plotf(ax, *args, **kwargs) - label = kwargs.get('label', None) # set date formatter, locators and rescale limits format_dateaxis(ax, ax.freq) From b6ac129ef9f9199d65c7d45e87ce8a31ec7c855d Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Sun, 29 Sep 2013 16:50:54 -0400 Subject: [PATCH 2/9] restore orig_header for now --- pandas/io/parsers.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 8b6b57ec9bf67..6eaf45547f7cd 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -719,6 +719,9 @@ def _extract_multi_indexer_columns(self, header, index_names, col_names, passed_ ic = [ ic ] sic = set(ic) + # TODO: Decide if this is necessary... + orig_header = list(header) + # clean the index_names index_names = header.pop(-1) index_names, names, index_col = _clean_index_names(index_names, From 9f966a4b920b689d2bc1f6f748d40e155e6d53cc Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Thu, 3 Oct 2013 18:17:24 -0400 Subject: [PATCH 3/9] CLN: Cleanup unused assignments in core/index --- pandas/core/index.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/core/index.py b/pandas/core/index.py index 8f3b133e14f9e..ed8028fc33132 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -501,12 +501,11 @@ def is_int(v): # if we are mixed and have integers try: if is_positional and self.is_mixed(): - # TODO: Figure out what these *ought* to be assigning to. - # (currently unused) + # check that start and stop are valid if start is not None: - i = self.get_loc(start) + self.get_loc(start) if stop is not None: - j = self.get_loc(stop) + self.get_loc(stop) is_positional = False except KeyError: if self.inferred_type == 'mixed-integer-float': From 490b6a94fa4c5ef3c41912169010713df0c970c9 Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Thu, 3 Oct 2013 18:30:55 -0400 Subject: [PATCH 4/9] CLN: Mark code paths that are used to check input to functions (and leave in) --- pandas/core/indexing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 6f30cb7b4b911..131a202da5430 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -927,8 +927,8 @@ class _IXIndexer(_NDFrameIndexer): """ A primarily location based indexer, with integer fallback """ def _has_valid_type(self, key, axis): - # TODO: Figure out why this is unused - ax = self.obj._get_axis(axis) + # check for valid axis (raises if invalid) + self.obj._get_axis(axis) if isinstance(key, slice): return True @@ -940,7 +940,7 @@ def _has_valid_type(self, key, axis): return True else: - # TODO: Figure out why this is unused, should it be returned? + # check for valid key/axis combo (raises if invalid) self._convert_scalar_indexer(key, axis) return True From 611c67949ac3e2969b0a5b34ddb5bac67de43eb7 Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Thu, 3 Oct 2013 18:31:10 -0400 Subject: [PATCH 5/9] Revert encoding removal (fixup me!) --- pandas/io/parsers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 6eaf45547f7cd..7235bf87e37ca 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -183,6 +183,7 @@ def _read(filepath_or_buffer, kwds): "Generic reader of line files." + encoding = kwds.get('encoding', None) skipfooter = kwds.pop('skipfooter', None) if skipfooter is not None: kwds['skip_footer'] = skipfooter From 2920895547e81eb8de2d4f125a194b491898f1aa Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Thu, 3 Oct 2013 18:34:46 -0400 Subject: [PATCH 6/9] CLN: Remove unused `rvs` assignment. Does not appear to ever have been used, even when class was first created. Not sure what it was meant to be for. --- pandas/tools/rplot.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index f61c59b891c97..768929b655b69 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -553,9 +553,6 @@ def work(self, fig=None, ax=None): ax = fig.gca() x = self.data[self.aes['x']] y = self.data[self.aes['y']] - # TODO: Figure out what this was supposed to be used for - # (currently unused) - rvs = np.array([x, y]) x_min = x.min() x_max = x.max() y_min = y.min() From 966727944e8d54581a24b7384f2c58cac70d8d23 Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Thu, 17 Oct 2013 21:01:12 -0400 Subject: [PATCH 7/9] Remove apparently unused code from core/algorithms --- pandas/core/algorithms.py | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index db743356f1adf..cc5074a1fe381 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -57,15 +57,6 @@ def unique(values): return _hashtable_algo(f, values.dtype) -# def count(values, uniques=None): -# f = lambda htype, caster: _count_generic(values, htype, caster) - -# if uniques is not None: -# raise NotImplementedError -# else: -# return _hashtable_algo(f, values.dtype) - - def _hashtable_algo(f, dtype): """ f(HashTable, type_caster) -> result @@ -78,18 +69,6 @@ def _hashtable_algo(f, dtype): return f(htable.PyObjectHashTable, com._ensure_object) -# TODO: Test this function! -def _count_generic(values, table_type, type_caster): - from pandas.core.series import Series - - values = type_caster(values) - table = table_type(min(len(values), 1000000)) - uniques, labels = table.factorize(values) - # What should this be?? - # return Series(counts, index=uniques) - return Series(labels, index=uniques) - - def _match_generic(values, index, table_type, type_caster): values = type_caster(values) index = type_caster(index) From e7cf945a16582dfb51af2fa3afd67ae941a64b53 Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Thu, 17 Oct 2013 21:08:08 -0400 Subject: [PATCH 8/9] remove comments --- pandas/io/pytables.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index f385cce202fa2..511d75ba6451f 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1798,7 +1798,6 @@ def convert(self, values, nan_rep, encoding): elif dtype == u('timedelta64'): self.data = np.asarray(self.data, dtype='m8[ns]') - # TODO: Add test cases for this branch!! elif dtype == u('date'): try: self.data = np.array( @@ -3889,7 +3888,6 @@ def _unconvert_index(data, kind, encoding=None): index = np.array( [date.fromordinal(v) for v in data], dtype=object) except (ValueError): - # TODO: Add a test that reaches this part of the code! index = np.array( [date.fromtimestamp(v) for v in data], dtype=object) elif kind in (u('integer'), u('float')): From 6a3619f7d676e9c7af4f380b85065f1cde0982d2 Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Thu, 17 Oct 2013 21:40:59 -0400 Subject: [PATCH 9/9] fixps --- pandas/core/indexing.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 131a202da5430..88d2b6e8e4411 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1,12 +1,14 @@ # pylint: disable=W0223 +from datetime import datetime from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.compat import range, zip import pandas.compat as compat import pandas.core.common as com -from pandas.core.common import (is_integer_dtype, +from pandas.core.common import (_is_bool_indexer, is_integer_dtype, _asarray_tuplesafe, is_list_like, isnull, ABCSeries, ABCDataFrame, ABCPanel) +import pandas.lib as lib import numpy as np @@ -150,6 +152,9 @@ def _setitem_with_indexer(self, indexer, value): self._has_valid_setitem_indexer(indexer) + # also has the side effect of consolidating in-place + from pandas import Panel, DataFrame, Series + # maybe partial set take_split_path = self.obj._is_mixed_type if isinstance(indexer,tuple): @@ -574,11 +579,9 @@ def _multi_take_opportunity(self, tup): return False # just too complicated - for indexer, ax in zip(tup,self.obj._data.axes): + for ax in self.obj._data.axes: if isinstance(ax, MultiIndex): return False - elif com._is_bool_indexer(indexer): - return False return True @@ -630,7 +633,7 @@ def _getitem_lowerdim(self, tup): if not ax0.is_lexsorted_for_tuple(tup): raise e1 try: - # TODO: Figure out why this is not used here. + # Check for valid axis loc = ax0.get_loc(tup[0]) except KeyError: raise e1 @@ -928,7 +931,7 @@ class _IXIndexer(_NDFrameIndexer): def _has_valid_type(self, key, axis): # check for valid axis (raises if invalid) - self.obj._get_axis(axis) + ax = self.obj._get_axis(axis) if isinstance(key, slice): return True