From a4a71f89bf473a84c2a45260acac675dfac83cb3 Mon Sep 17 00:00:00 2001 From: Viktor Kerkez Date: Sat, 16 Nov 2013 03:05:51 +0100 Subject: [PATCH] CLN: PEP8 cleanup --- pandas/compat/__init__.py | 123 +++---- pandas/compat/pickle_compat.py | 16 +- pandas/compat/scipy.py | 10 +- pandas/computation/align.py | 14 +- pandas/computation/expr.py | 36 +- pandas/computation/expressions.py | 17 +- pandas/computation/ops.py | 1 - pandas/computation/pytables.py | 33 +- pandas/core/algorithms.py | 11 +- pandas/core/api.py | 6 +- pandas/core/array.py | 2 +- pandas/core/base.py | 22 +- pandas/core/categorical.py | 14 +- pandas/core/common.py | 204 +++++++----- pandas/core/config.py | 47 ++- pandas/core/config_init.py | 48 +-- pandas/core/datetools.py | 2 +- pandas/core/format.py | 158 +++++---- pandas/core/frame.py | 208 +++++++----- pandas/core/generic.py | 373 +++++++++++++-------- pandas/core/groupby.py | 205 +++++++----- pandas/core/index.py | 131 +++++--- pandas/core/indexing.py | 342 +++++++++++-------- pandas/core/internals.py | 525 ++++++++++++++++++------------ pandas/core/nanops.py | 58 ++-- pandas/core/ops.py | 14 +- pandas/core/panel.py | 82 ++--- pandas/core/panel4d.py | 18 +- pandas/core/panelnd.py | 39 +-- pandas/core/reshape.py | 45 ++- pandas/core/series.py | 110 ++++--- pandas/core/sparse.py | 4 +- pandas/core/strings.py | 10 +- 33 files changed, 1743 insertions(+), 1185 deletions(-) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index a2531ebd43c82..982b5de49e6fa 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -120,7 +120,8 @@ def iteritems(obj, **kwargs): """replacement for six's iteritems for Python2/3 compat uses 'iteritems' if available and otherwise uses 'items'. - Passes kwargs to method.""" + Passes kwargs to method. + """ func = getattr(obj, "iteritems", None) if not func: func = obj.items @@ -180,6 +181,7 @@ class to receive bound method def u(s): return s + def u_safe(s): return s else: @@ -243,8 +245,7 @@ def wrapper(cls): class _OrderedDict(dict): - - 'Dictionary that remembers insertion order' + """Dictionary that remembers insertion order""" # An inherited dict maps keys to values. # The inherited dict provides __getitem__, __len__, __contains__, and get. # The remaining methods are order-aware. @@ -258,11 +259,10 @@ class _OrderedDict(dict): # KEY]. def __init__(self, *args, **kwds): - '''Initialize an ordered dictionary. Signature is the same as for + """Initialize an ordered dictionary. Signature is the same as for regular dictionaries, but keyword arguments are not recommended because their insertion order is arbitrary. - - ''' + """ if len(args) > 1: raise TypeError('expected at most 1 arguments, got %d' % len(args)) try: @@ -274,7 +274,7 @@ def __init__(self, *args, **kwds): self.__update(*args, **kwds) def __setitem__(self, key, value, dict_setitem=dict.__setitem__): - 'od.__setitem__(i, y) <==> od[i]=y' + """od.__setitem__(i, y) <==> od[i]=y""" # Setting a new item creates a new link which goes at the end of the # linked list, and the inherited dictionary is updated with the new # key/value pair. @@ -285,7 +285,7 @@ def __setitem__(self, key, value, dict_setitem=dict.__setitem__): dict_setitem(self, key, value) def __delitem__(self, key, dict_delitem=dict.__delitem__): - 'od.__delitem__(y) <==> del od[y]' + """od.__delitem__(y) <==> del od[y]""" # Deleting an existing item uses self.__map to find the link which is # then removed by updating the links in the predecessor and successor # nodes. @@ -295,7 +295,7 @@ def __delitem__(self, key, dict_delitem=dict.__delitem__): link_next[0] = link_prev def __iter__(self): - 'od.__iter__() <==> iter(od)' + """od.__iter__() <==> iter(od)""" root = self.__root curr = root[1] while curr is not root: @@ -303,7 +303,7 @@ def __iter__(self): curr = curr[1] def __reversed__(self): - 'od.__reversed__() <==> reversed(od)' + """od.__reversed__() <==> reversed(od)""" root = self.__root curr = root[0] while curr is not root: @@ -311,7 +311,7 @@ def __reversed__(self): curr = curr[0] def clear(self): - 'od.clear() -> None. Remove all items from od.' + """od.clear() -> None. Remove all items from od.""" try: for node in itervalues(self.__map): del node[:] @@ -323,10 +323,11 @@ def clear(self): dict.clear(self) def popitem(self, last=True): - '''od.popitem() -> (k, v), return and remove a (key, value) pair. + """od.popitem() -> (k, v), return and remove a (key, value) pair. + Pairs are returned in LIFO order if last is true or FIFO order if false. - ''' + """ if not self: raise KeyError('dictionary is empty') root = self.__root @@ -348,39 +349,39 @@ def popitem(self, last=True): # -- the following methods do not depend on the internal structure -- def keys(self): - 'od.keys() -> list of keys in od' + """od.keys() -> list of keys in od""" return list(self) def values(self): - 'od.values() -> list of values in od' + """od.values() -> list of values in od""" return [self[key] for key in self] def items(self): - 'od.items() -> list of (key, value) pairs in od' + """od.items() -> list of (key, value) pairs in od""" return [(key, self[key]) for key in self] def iterkeys(self): - 'od.iterkeys() -> an iterator over the keys in od' + """od.iterkeys() -> an iterator over the keys in od""" return iter(self) def itervalues(self): - 'od.itervalues -> an iterator over the values in od' + """od.itervalues -> an iterator over the values in od""" for k in self: yield self[k] def iteritems(self): - 'od.iteritems -> an iterator over the (key, value) items in od' + """od.iteritems -> an iterator over the (key, value) items in od""" for k in self: yield (k, self[k]) def update(*args, **kwds): - '''od.update(E, **F) -> None. Update od from dict/iterable E and F. + """od.update(E, **F) -> None. Update od from dict/iterable E and F. If E is a dict instance, does: for k in E: od[k] = E[k] If E has a .keys() method, does: for k in E.keys(): od[k] = E[k] Or if E is an iterable of items, does:for k, v in E: od[k] = v In either case, this is followed by: for k, v in F.items(): od[k] = v - ''' + """ if len(args) > 2: raise TypeError('update() takes at most 2 positional ' 'arguments (%d given)' % (len(args),)) @@ -408,10 +409,10 @@ def update(*args, **kwds): __marker = object() def pop(self, key, default=__marker): - '''od.pop(k[,d]) -> v, remove specified key and return the\ + """od.pop(k[,d]) -> v, remove specified key and return the corresponding value. If key is not found, d is returned if given, otherwise KeyError is raised. - ''' + """ if key in self: result = self[key] del self[key] @@ -421,14 +422,15 @@ def pop(self, key, default=__marker): return default def setdefault(self, key, default=None): - 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' + """od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od + """ if key in self: return self[key] self[key] = default return default def __repr__(self, _repr_running={}): - 'od.__repr__() <==> repr(od)' + """od.__repr__() <==> repr(od)""" call_key = id(self), _get_ident() if call_key in _repr_running: return '...' @@ -441,7 +443,7 @@ def __repr__(self, _repr_running={}): del _repr_running[call_key] def __reduce__(self): - 'Return state information for pickling' + """Return state information for pickling""" items = [[k, self[k]] for k in self] inst_dict = vars(self).copy() for k in vars(OrderedDict()): @@ -451,24 +453,24 @@ def __reduce__(self): return self.__class__, (items,) def copy(self): - 'od.copy() -> a shallow copy of od' + """od.copy() -> a shallow copy of od""" return self.__class__(self) @classmethod def fromkeys(cls, iterable, value=None): - '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S and + """OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S and values equal to v (which defaults to None). - ''' + """ d = cls() for key in iterable: d[key] = value return d def __eq__(self, other): - '''od.__eq__(y) <==> od==y. Comparison to another OD is + """od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive while comparison to a regular mapping is order-insensitive. - ''' + """ if isinstance(other, OrderedDict): return (len(self) == len(other) and list(self.items()) == list(other.items())) @@ -480,15 +482,16 @@ def __ne__(self, other): # -- the following methods are only used in Python 2.7 -- def viewkeys(self): - "od.viewkeys() -> a set-like object providing a view on od's keys" + """od.viewkeys() -> a set-like object providing a view on od's keys""" return KeysView(self) def viewvalues(self): - "od.viewvalues() -> an object providing a view on od's values" + """od.viewvalues() -> an object providing a view on od's values""" return ValuesView(self) def viewitems(self): - "od.viewitems() -> a set-like object providing a view on od's items" + """od.viewitems() -> a set-like object providing a view on od's items + """ return ItemsView(self) @@ -502,18 +505,17 @@ def viewitems(self): class _Counter(dict): - - '''Dict subclass for counting hashable objects. Sometimes called a bag + """Dict subclass for counting hashable objects. Sometimes called a bag or multiset. Elements are stored as dictionary keys and their counts are stored as dictionary values. >>> Counter('zyzygy') Counter({'y': 3, 'z': 2, 'g': 1}) - ''' + """ def __init__(self, iterable=None, **kwds): - '''Create a new, empty Counter object. And if given, count elements + """Create a new, empty Counter object. And if given, count elements from an input iterable. Or, initialize the count from another mapping of elements to their counts. @@ -522,26 +524,26 @@ def __init__(self, iterable=None, **kwds): >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping >>> c = Counter(a=4, b=2) # a new counter from keyword args - ''' + """ self.update(iterable, **kwds) def __missing__(self, key): return 0 def most_common(self, n=None): - '''List the n most common elements and their counts from the most + """List the n most common elements and their counts from the most common to the least. If n is None, then list all element counts. >>> Counter('abracadabra').most_common(3) [('a', 5), ('r', 2), ('b', 2)] - ''' + """ if n is None: return sorted(iteritems(self), key=itemgetter(1), reverse=True) return nlargest(n, iteritems(self), key=itemgetter(1)) def elements(self): - '''Iterator over elements repeating each as many times as its count. + """Iterator over elements repeating each as many times as its count. >>> c = Counter('ABCABC') >>> sorted(c.elements()) @@ -550,7 +552,7 @@ def elements(self): If an element's count has been set to zero or is a negative number, elements() will ignore it. - ''' + """ for elem, count in iteritems(self): for _ in range(count): yield elem @@ -563,7 +565,7 @@ def fromkeys(cls, iterable, v=None): 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.') def update(self, iterable=None, **kwds): - '''Like dict.update() but add counts instead of replacing them. + """Like dict.update() but add counts instead of replacing them. Source can be an iterable, a dictionary, or another Counter instance. @@ -574,7 +576,7 @@ def update(self, iterable=None, **kwds): >>> c['h'] # four 'h' in which, witch, and watch 4 - ''' + """ if iterable is not None: if hasattr(iterable, 'iteritems'): if self: @@ -592,12 +594,14 @@ def update(self, iterable=None, **kwds): self.update(kwds) def copy(self): - 'Like dict.copy() but returns a Counter instance instead of a dict.' + """Like dict.copy() but returns a Counter instance instead of a dict. + """ return Counter(self) def __delitem__(self, elem): - '''Like dict.__delitem__() but does not raise KeyError for missing - values.''' + """Like dict.__delitem__() but does not raise KeyError for missing + values. + """ if elem in self: dict.__delitem__(self, elem) @@ -617,13 +621,12 @@ def __repr__(self): # c += Counter() def __add__(self, other): - '''Add counts from two counters. + """Add counts from two counters. >>> Counter('abbb') + Counter('bcc') Counter({'b': 4, 'c': 2, 'a': 1}) - - ''' + """ if not isinstance(other, Counter): return NotImplemented result = Counter() @@ -634,12 +637,12 @@ def __add__(self, other): return result def __sub__(self, other): - ''' Subtract count, but keep only results with positive counts. + """Subtract count, but keep only results with positive counts. >>> Counter('abbbc') - Counter('bccd') Counter({'b': 2, 'a': 1}) - ''' + """ if not isinstance(other, Counter): return NotImplemented result = Counter() @@ -650,12 +653,12 @@ def __sub__(self, other): return result def __or__(self, other): - '''Union is the maximum of value in either of the input counters. + """Union is the maximum of value in either of the input counters. >>> Counter('abbb') | Counter('bcc') Counter({'b': 3, 'c': 2, 'a': 1}) - ''' + """ if not isinstance(other, Counter): return NotImplemented _max = max @@ -667,12 +670,12 @@ def __or__(self, other): return result def __and__(self, other): - ''' Intersection is the minimum of corresponding counts. + """Intersection is the minimum of corresponding counts. >>> Counter('abbb') & Counter('bcc') Counter({'b': 1}) - ''' + """ if not isinstance(other, Counter): return NotImplemented _min = min @@ -705,10 +708,9 @@ def raise_with_traceback(exc, traceback=Ellipsis): raise exc, None, traceback """) -raise_with_traceback.__doc__ = ( -"""Raise exception with existing traceback. +raise_with_traceback.__doc__ = """Raise exception with existing traceback. If traceback is not passed, uses sys.exc_info() to get traceback.""" -) + # http://stackoverflow.com/questions/4126348 # Thanks to @martineau at SO @@ -723,6 +725,7 @@ def parse_date(timestr, *args, **kwargs): else: parse_date = _date_parser.parse + class OrderedDefaultdict(OrderedDict): def __init__(self, *args, **kwargs): diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index bf52fc30a9ea3..3365f1bb630b9 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -9,6 +9,7 @@ from pandas.core.series import Series, TimeSeries from pandas.sparse.series import SparseSeries, SparseTimeSeries + def load_reduce(self): stack = self.stack args = stack.pop() @@ -18,7 +19,8 @@ def load_reduce(self): if n == u('DeprecatedSeries') or n == u('DeprecatedTimeSeries'): stack[-1] = object.__new__(Series) return - elif n == u('DeprecatedSparseSeries') or n == u('DeprecatedSparseTimeSeries'): + elif (n == u('DeprecatedSparseSeries') or + n == u('DeprecatedSparseTimeSeries')): stack[-1] = object.__new__(SparseSeries) return @@ -28,7 +30,9 @@ def load_reduce(self): # try to reencode the arguments if self.encoding is not None: - args = tuple([ arg.encode(self.encoding) if isinstance(arg, string_types) else arg for arg in args ]) + args = tuple([arg.encode(self.encoding) + if isinstance(arg, string_types) + else arg for arg in args]) try: stack[-1] = func(*args) return @@ -51,9 +55,9 @@ class Unpickler(pkl.Unpickler): Unpickler.dispatch[pkl.REDUCE[0]] = load_reduce + def load(fh, encoding=None, compat=False, is_verbose=False): - """ - load a pickle, with a provided encoding + """load a pickle, with a provided encoding if compat is True: fake the old class hierarchy @@ -90,14 +94,18 @@ def load(fh, encoding=None, compat=False, is_verbose=False): pandas.sparse.series.SparseSeries = SparseSeries pandas.sparse.series.SparseTimeSeries = SparseTimeSeries + class DeprecatedSeries(np.ndarray, Series): pass + class DeprecatedTimeSeries(DeprecatedSeries): pass + class DeprecatedSparseSeries(DeprecatedSeries): pass + class DeprecatedSparseTimeSeries(DeprecatedSparseSeries): pass diff --git a/pandas/compat/scipy.py b/pandas/compat/scipy.py index 3dab5b1f0451e..81601ffe25609 100644 --- a/pandas/compat/scipy.py +++ b/pandas/compat/scipy.py @@ -7,8 +7,7 @@ def scoreatpercentile(a, per, limit=(), interpolation_method='fraction'): - """ - Calculate the score at the given `per` percentile of the sequence `a`. + """Calculate the score at the given `per` percentile of the sequence `a`. For example, the score at `per=50` is the median. If the desired quantile lies between two data points, we interpolate between them, according to @@ -65,7 +64,7 @@ def scoreatpercentile(a, per, limit=(), interpolation_method='fraction'): values = values[(limit[0] <= values) & (values <= limit[1])] idx = per / 100. * (values.shape[0] - 1) - if (idx % 1 == 0): + if idx % 1 == 0: score = values[idx] else: if interpolation_method == 'fraction': @@ -153,8 +152,7 @@ def fastsort(a): def percentileofscore(a, score, kind='rank'): - ''' - The percentile rank of a score relative to a list of scores. + """The percentile rank of a score relative to a list of scores. A `percentileofscore` of, for example, 80% means that 80% of the scores in `a` are below the given score. In the case of gaps or @@ -217,7 +215,7 @@ def percentileofscore(a, score, kind='rank'): >>> percentileofscore([1, 2, 3, 3, 4], 3, kind='mean') 60.0 - ''' + """ a = np.array(a) n = len(a) diff --git a/pandas/computation/align.py b/pandas/computation/align.py index f420d0dacf34c..233f2b61dc463 100644 --- a/pandas/computation/align.py +++ b/pandas/computation/align.py @@ -101,8 +101,8 @@ def wrapper(terms): @_filter_special_cases def _align_core(terms): - term_index = [i for i, term in enumerate(terms) if hasattr(term.value, - 'axes')] + term_index = [i for i, term in enumerate(terms) + if hasattr(term.value, 'axes')] term_dims = [terms[i].value.ndim for i in term_index] ndims = pd.Series(dict(zip(term_index, term_dims))) @@ -139,10 +139,10 @@ def _align_core(terms): ordm = np.log10(abs(reindexer_size - term_axis_size)) if ordm >= 1 and reindexer_size >= 10000: - warnings.warn("Alignment difference on axis {0} is larger" - " than an order of magnitude on term {1!r}, " - "by more than {2:.4g}; performance may suffer" - "".format(axis, terms[i].name, ordm), + warnings.warn('Alignment difference on axis {0} is larger ' + 'than an order of magnitude on term {1!r}, ' + 'by more than {2:.4g}; performance may ' + 'suffer'.format(axis, terms[i].name, ordm), category=pd.io.common.PerformanceWarning) if transpose: @@ -237,7 +237,7 @@ def _reconstruct_object(typ, obj, axes, dtype): res_t = dtype if (not isinstance(typ, partial) and - issubclass(typ, pd.core.generic.PandasObject)): + issubclass(typ, pd.core.generic.PandasObject)): return typ(obj, dtype=res_t, **axes) # special case for pathological things like ~True/~False diff --git a/pandas/computation/expr.py b/pandas/computation/expr.py index 64bceee118fd1..1af41acd34ede 100644 --- a/pandas/computation/expr.py +++ b/pandas/computation/expr.py @@ -91,7 +91,8 @@ class Scope(StringMixin): __slots__ = ('globals', 'locals', 'resolvers', '_global_resolvers', 'resolver_keys', '_resolver', 'level', 'ntemps', 'target') - def __init__(self, gbls=None, lcls=None, level=1, resolvers=None, target=None): + def __init__(self, gbls=None, lcls=None, level=1, resolvers=None, + target=None): self.level = level self.resolvers = tuple(resolvers or []) self.globals = dict() @@ -133,11 +134,12 @@ def __init__(self, gbls=None, lcls=None, level=1, resolvers=None, target=None): self.resolver_dict.update(dict(o)) def __unicode__(self): - return com.pprint_thing("locals: {0}\nglobals: {0}\nresolvers: " - "{0}\ntarget: {0}".format(list(self.locals.keys()), - list(self.globals.keys()), - list(self.resolver_keys), - self.target)) + return com.pprint_thing( + 'locals: {0}\nglobals: {0}\nresolvers: ' + '{0}\ntarget: {0}'.format(list(self.locals.keys()), + list(self.globals.keys()), + list(self.resolver_keys), + self.target)) def __getitem__(self, key): return self.resolve(key, globally=False) @@ -499,9 +501,8 @@ def _possibly_evaluate_binop(self, op, op_class, lhs, rhs, maybe_eval_in_python=('==', '!=')): res = op(lhs, rhs) - if (res.op in _cmp_ops_syms and - lhs.is_datetime or rhs.is_datetime and - self.engine != 'pytables'): + if (res.op in _cmp_ops_syms and lhs.is_datetime or rhs.is_datetime and + self.engine != 'pytables'): # all date ops must be done in python bc numexpr doesn't work well # with NaT return self._possibly_eval(res, self.binary_ops) @@ -594,18 +595,20 @@ def visit_Assign(self, node, **kwargs): if len(node.targets) != 1: raise SyntaxError('can only assign a single expression') if not isinstance(node.targets[0], ast.Name): - raise SyntaxError('left hand side of an assignment must be a single name') + raise SyntaxError('left hand side of an assignment must be a ' + 'single name') if self.env.target is None: raise ValueError('cannot assign without a target object') try: assigner = self.visit(node.targets[0], **kwargs) - except (UndefinedVariableError): + except UndefinedVariableError: assigner = node.targets[0].id - self.assigner = getattr(assigner,'name',assigner) + self.assigner = getattr(assigner, 'name', assigner) if self.assigner is None: - raise SyntaxError('left hand side of an assignment must be a single resolvable name') + raise SyntaxError('left hand side of an assignment must be a ' + 'single resolvable name') return self.visit(node.value, **kwargs) @@ -622,7 +625,7 @@ def visit_Attribute(self, node, **kwargs): name = self.env.add_tmp(v) return self.term_type(name, self.env) except AttributeError: - # something like datetime.datetime where scope is overriden + # something like datetime.datetime where scope is overridden if isinstance(value, ast.Name) and value.id == attr: return resolved @@ -699,8 +702,7 @@ def visitor(x, y): return reduce(visitor, operands) -_python_not_supported = frozenset(['Dict', 'Call', 'BoolOp', - 'In', 'NotIn']) +_python_not_supported = frozenset(['Dict', 'Call', 'BoolOp', 'In', 'NotIn']) _numexpr_supported_calls = frozenset(_reductions + _mathops) @@ -744,7 +746,7 @@ def __init__(self, expr, engine='numexpr', parser='pandas', env=None, @property def assigner(self): - return getattr(self._visitor,'assigner',None) + return getattr(self._visitor, 'assigner', None) def __call__(self): self.env.locals['truediv'] = self.truediv diff --git a/pandas/computation/expressions.py b/pandas/computation/expressions.py index f1007cbc81eb7..035878e20c645 100644 --- a/pandas/computation/expressions.py +++ b/pandas/computation/expressions.py @@ -2,7 +2,7 @@ Expressions ----------- -Offer fast expression evaluation thru numexpr +Offer fast expression evaluation through numexpr """ @@ -22,9 +22,10 @@ _where = None # the set of dtypes that we will allow pass to numexpr -_ALLOWED_DTYPES = dict( - evaluate=set(['int64', 'int32', 'float64', 'float32', 'bool']), - where=set(['int64', 'float64', 'bool'])) +_ALLOWED_DTYPES = { + 'evaluate': set(['int64', 'int32', 'float64', 'float32', 'bool']), + 'where': set(['int64', 'float64', 'bool']) +} # the minimum prod shape that we will use numexpr _MIN_ELEMENTS = 10000 @@ -100,10 +101,10 @@ def _evaluate_numexpr(op, op_str, a, b, raise_on_error=False, truediv=True, 'b_value': b_value}, casting='safe', truediv=truediv, **eval_kwargs) - except (ValueError) as detail: + except ValueError as detail: if 'unknown type object' in str(detail): pass - except (Exception) as detail: + except Exception as detail: if raise_on_error: raise @@ -135,10 +136,10 @@ def _where_numexpr(cond, a, b, raise_on_error=False): 'a_value': a_value, 'b_value': b_value}, casting='safe') - except (ValueError) as detail: + except ValueError as detail: if 'unknown type object' in str(detail): pass - except (Exception) as detail: + except Exception as detail: if raise_on_error: raise TypeError(str(detail)) diff --git a/pandas/computation/ops.py b/pandas/computation/ops.py index fd5ee159fe2b4..0510ee86760a3 100644 --- a/pandas/computation/ops.py +++ b/pandas/computation/ops.py @@ -207,7 +207,6 @@ def name(self): return self.value - _bool_op_map = {'not': '~', 'and': '&', 'or': '|'} diff --git a/pandas/computation/pytables.py b/pandas/computation/pytables.py index eb675d6230c8c..8afe8e909a434 100644 --- a/pandas/computation/pytables.py +++ b/pandas/computation/pytables.py @@ -16,6 +16,7 @@ from pandas.computation.common import _ensure_decoded from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type + class Scope(expr.Scope): __slots__ = 'globals', 'locals', 'queryables' @@ -85,7 +86,7 @@ def _disallow_scalar_only_bool_ops(self): def prune(self, klass): def pr(left, right): - """ create and return a new specilized BinOp from myself """ + """ create and return a new specialized BinOp from myself """ if left is None: return right @@ -95,7 +96,7 @@ def pr(left, right): k = klass if isinstance(left, ConditionBinOp): if (isinstance(left, ConditionBinOp) and - isinstance(right, ConditionBinOp)): + isinstance(right, ConditionBinOp)): k = JointConditionBinOp elif isinstance(left, k): return left @@ -104,7 +105,7 @@ def pr(left, right): elif isinstance(left, FilterBinOp): if (isinstance(left, FilterBinOp) and - isinstance(right, FilterBinOp)): + isinstance(right, FilterBinOp)): k = JointFilterBinOp elif isinstance(left, k): return left @@ -177,11 +178,12 @@ def stringify(value): if v.tz is not None: v = v.tz_convert('UTC') return TermValue(v, v.value, kind) - elif isinstance(v, datetime) or hasattr(v, 'timetuple') or kind == u('date'): + elif (isinstance(v, datetime) or hasattr(v, 'timetuple') or + kind == u('date')): v = time.mktime(v.timetuple()) return TermValue(v, pd.Timestamp(v), kind) elif kind == u('timedelta64') or kind == u('timedelta'): - v = _coerce_scalar_to_timedelta_type(v,unit='s').item() + v = _coerce_scalar_to_timedelta_type(v, unit='s').item() return TermValue(int(v), v, kind) elif kind == u('integer'): v = int(float(v)) @@ -293,7 +295,8 @@ def invert(self): #if self.condition is not None: # self.condition = "~(%s)" % self.condition #return self - raise NotImplementedError("cannot use an invert condition when passing to numexpr") + raise NotImplementedError("cannot use an invert condition when " + "passing to numexpr") def format(self): """ return the actual ne format """ @@ -352,10 +355,10 @@ def prune(self, klass): operand = operand.prune(klass) if operand is not None: - if issubclass(klass,ConditionBinOp): + if issubclass(klass, ConditionBinOp): if operand.condition is not None: return operand.invert() - elif issubclass(klass,FilterBinOp): + elif issubclass(klass, FilterBinOp): if operand.filter is not None: return operand.invert() @@ -364,6 +367,7 @@ def prune(self, klass): _op_classes = {'unary': UnaryOp} + class ExprVisitor(BaseExprVisitor): const_type = Constant term_type = Term @@ -401,7 +405,7 @@ def visit_Subscript(self, node, **kwargs): return self.const_type(value[slobj], self.env) except TypeError: raise ValueError("cannot subscript {0!r} with " - "{1!r}".format(value, slobj)) + "{1!r}".format(value, slobj)) def visit_Attribute(self, node, **kwargs): attr = node.attr @@ -435,7 +439,8 @@ class Expr(expr.Expr): Parameters ---------- where : string term expression, Expr, or list-like of Exprs - queryables : a "kinds" map (dict of column name -> kind), or None if column is non-indexable + queryables : a "kinds" map (dict of column name -> kind), or None if column + is non-indexable encoding : an encoding that will encode the query terms Returns @@ -538,13 +543,13 @@ def evaluate(self): try: self.condition = self.terms.prune(ConditionBinOp) except AttributeError: - raise ValueError( - "cannot process expression [{0}], [{1}] is not a valid condition".format(self.expr,self)) + raise ValueError("cannot process expression [{0}], [{1}] is not a " + "valid condition".format(self.expr, self)) try: self.filter = self.terms.prune(FilterBinOp) except AttributeError: - raise ValueError( - "cannot process expression [{0}], [{1}] is not a valid filter".format(self.expr,self)) + raise ValueError("cannot process expression [{0}], [{1}] is not a " + "valid filter".format(self.expr, self)) return self.condition, self.filter diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 2699dd0a25a2b..24c14a5d7f215 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -153,7 +153,8 @@ def factorize(values, sort=False, order=None, na_sentinel=-1): return labels, uniques -def value_counts(values, sort=True, ascending=False, normalize=False, bins=None): +def value_counts(values, sort=True, ascending=False, normalize=False, + bins=None): """ Compute a histogram of the counts of non-null values @@ -191,7 +192,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False, bins=None) values = com._ensure_int64(values) keys, counts = htable.value_count_int64(values) - elif issubclass(values.dtype.type, (np.datetime64,np.timedelta64)): + elif issubclass(values.dtype.type, (np.datetime64, np.timedelta64)): dtype = values.dtype values = values.view(np.int64) keys, counts = htable.value_count_int64(values) @@ -223,7 +224,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False, bins=None) def mode(values): - "Returns the mode or mode(s) of the passed Series or ndarray (sorted)" + """Returns the mode or mode(s) of the passed Series or ndarray (sorted)""" # must sort because hash order isn't necessarily defined. from pandas.core.series import Series @@ -239,7 +240,7 @@ def mode(values): values = com._ensure_int64(values) result = constructor(sorted(htable.mode_int64(values)), dtype=dtype) - elif issubclass(values.dtype.type, (np.datetime64,np.timedelta64)): + elif issubclass(values.dtype.type, (np.datetime64, np.timedelta64)): dtype = values.dtype values = values.view(np.int64) result = constructor(sorted(htable.mode_int64(values)), dtype=dtype) @@ -324,7 +325,7 @@ def _get_score(at): return np.nan idx = at * (len(values) - 1) - if (idx % 1 == 0): + if idx % 1 == 0: score = values[idx] else: if interpolation_method == 'fraction': diff --git a/pandas/core/api.py b/pandas/core/api.py index 36081cc34cc3a..28118c60776ce 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -27,8 +27,8 @@ # legacy from pandas.core.daterange import DateRange # deprecated -from pandas.core.common import save, load # deprecated, remove in 0.13 +from pandas.core.common import save, load # deprecated, remove in 0.13 import pandas.core.datetools as datetools -from pandas.core.config import get_option, set_option, reset_option,\ - describe_option, options +from pandas.core.config import (get_option, set_option, reset_option, + describe_option, options) diff --git a/pandas/core/array.py b/pandas/core/array.py index 6847ba073b92a..209b00cf8bb3c 100644 --- a/pandas/core/array.py +++ b/pandas/core/array.py @@ -37,6 +37,7 @@ #### a series-like ndarray #### + class SNDArray(Array): def __new__(cls, data, index=None, name=None): @@ -49,4 +50,3 @@ def __new__(cls, data, index=None, name=None): @property def values(self): return self.view(Array) - diff --git a/pandas/core/base.py b/pandas/core/base.py index 6b9fa78d45406..a702e7c87c0a9 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -5,10 +5,15 @@ import numpy as np from pandas.core import common as com + class StringMixin(object): - """implements string methods so long as object defines a `__unicode__` method. - Handles Python2/3 compatibility transparently.""" - # side note - this could be made into a metaclass if more than one object nees + """implements string methods so long as object defines a `__unicode__` + method. + + Handles Python2/3 compatibility transparently. + """ + # side note - this could be made into a metaclass if more than one + # object needs #---------------------------------------------------------------------- # Formatting @@ -96,7 +101,8 @@ class FrozenList(PandasObject, list): because it's technically non-hashable, will be used for lookups, appropriately, etc. """ - # Sidenote: This has to be of type list, otherwise it messes up PyTables typechecks + # Sidenote: This has to be of type list, otherwise it messes up PyTables + # typechecks def __add__(self, other): if isinstance(other, tuple): @@ -146,7 +152,7 @@ def _disabled(self, *args, **kwargs): def __unicode__(self): from pandas.core.common import pprint_thing return pprint_thing(self, quote_strings=True, - escape_chars=('\t', '\r', '\n')) + escape_chars=('\t', '\r', '\n')) def __repr__(self): return "%s(%s)" % (self.__class__.__name__, @@ -185,7 +191,9 @@ def __unicode__(self): """ Return a string representation for this object. - Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3. + Invoked by unicode(df) in py2 only. Yields a Unicode String in both + py2/py3. """ - prepr = com.pprint_thing(self, escape_chars=('\t', '\r', '\n'),quote_strings=True) + prepr = com.pprint_thing(self, escape_chars=('\t', '\r', '\n'), + quote_strings=True) return "%s(%s, dtype='%s')" % (type(self).__name__, prepr, self.dtype) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index f412947f92255..fec9cd4ff4274 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -31,6 +31,7 @@ def f(self, other): return f + class Categorical(PandasObject): """ Represents a categorical variable in classic R / S-plus fashion @@ -167,8 +168,8 @@ def _repr_footer(self): def _get_repr(self, name=False, length=True, na_rep='NaN', footer=True): formatter = fmt.CategoricalFormatter(self, name=name, - length=length, na_rep=na_rep, - footer=footer) + length=length, na_rep=na_rep, + footer=footer) result = formatter.to_string() return compat.text_type(result) @@ -226,7 +227,8 @@ def describe(self): grouped = DataFrame(self.labels).groupby(0) counts = grouped.count().values.squeeze() freqs = counts/float(counts.sum()) - return DataFrame.from_dict(dict( - counts=counts, - freqs=freqs, - levels=self.levels)).set_index('levels') + return DataFrame.from_dict({ + 'counts': counts, + 'freqs': freqs, + 'levels': self.levels + }).set_index('levels') diff --git a/pandas/core/common.py b/pandas/core/common.py index 42964c9d48537..6fc015d2cb575 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -26,20 +26,23 @@ class PandasError(Exception): pass + class SettingWithCopyError(ValueError): pass + class SettingWithCopyWarning(Warning): pass + class AmbiguousIndexError(PandasError, KeyError): pass _POSSIBLY_CAST_DTYPES = set([np.dtype(t) - for t in ['M8[ns]', 'm8[ns]', 'O', 'int8', - 'uint8', 'int16', 'uint16', 'int32', - 'uint32', 'int64', 'uint64']]) + for t in ['M8[ns]', 'm8[ns]', 'O', 'int8', + 'uint8', 'int16', 'uint16', 'int32', + 'uint32', 'int64', 'uint64']]) _NS_DTYPE = np.dtype('M8[ns]') _TD_DTYPE = np.dtype('m8[ns]') @@ -136,8 +139,7 @@ def _isnull_new(obj): def _isnull_old(obj): - ''' - Detect missing values. Treat None, NaN, INF, -INF as null. + """Detect missing values. Treat None, NaN, INF, -INF as null. Parameters ---------- @@ -146,7 +148,7 @@ def _isnull_old(obj): Returns ------- boolean ndarray or boolean - ''' + """ if lib.isscalar(obj): return lib.checknull_old(obj) # hack (for now) because MI registers as ndarray @@ -155,7 +157,8 @@ def _isnull_old(obj): elif isinstance(obj, (ABCSeries, np.ndarray)): return _isnull_ndarraylike_old(obj) elif isinstance(obj, ABCGeneric): - return obj._constructor(obj._data.apply(lambda x: _isnull_old(x.values))) + return obj._constructor(obj._data.apply( + lambda x: _isnull_old(x.values))) elif isinstance(obj, list) or hasattr(obj, '__array__'): return _isnull_ndarraylike_old(np.asarray(obj)) else: @@ -165,7 +168,7 @@ def _isnull_old(obj): def _use_inf_as_null(key): - '''Option change callback for null/inf behaviour + """Option change callback for null/inf behaviour Choose which replacement for numpy.isnan / -numpy.isfinite is used. Parameters @@ -182,7 +185,7 @@ def _use_inf_as_null(key): * http://stackoverflow.com/questions/4859217/ programmatically-creating-variables-in-python/4859312#4859312 - ''' + """ flag = get_option(key) if flag: globals()['_isnull'] = _isnull_old @@ -192,7 +195,7 @@ def _use_inf_as_null(key): def _isnull_ndarraylike(obj): - values = getattr(obj,'values',obj) + values = getattr(obj, 'values', obj) dtype = values.dtype if dtype.kind in ('O', 'S', 'U'): @@ -221,7 +224,7 @@ def _isnull_ndarraylike(obj): def _isnull_ndarraylike_old(obj): - values = getattr(obj,'values',obj) + values = getattr(obj, 'values', obj) dtype = values.dtype if dtype.kind in ('O', 'S', 'U'): @@ -775,13 +778,15 @@ def diff(arr, n, axis=0): def _coerce_to_dtypes(result, dtypes): - """ given a dtypes and a result set, coerce the result elements to the dtypes """ + """ given a dtypes and a result set, coerce the result elements to the + dtypes + """ if len(result) != len(dtypes): raise AssertionError("_coerce_to_dtypes requires equal len arrays") from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type - def conv(r,dtype): + def conv(r, dtype): try: if isnull(r): pass @@ -800,7 +805,7 @@ def conv(r,dtype): return r - return np.array([ conv(r,dtype) for r, dtype in zip(result,dtypes) ]) + return np.array([conv(r, dtype) for r, dtype in zip(result, dtypes)]) def _infer_dtype_from_scalar(val): @@ -850,7 +855,9 @@ def _infer_dtype_from_scalar(val): def _maybe_cast_scalar(dtype, value): - """ if we a scalar value and are casting to a dtype that needs nan -> NaT conversion """ + """ if we a scalar value and are casting to a dtype that needs nan -> NaT + conversion + """ if np.isscalar(value) and dtype in _DATELIKE_DTYPES and isnull(value): return tslib.iNaT return value @@ -882,8 +889,8 @@ def _maybe_promote(dtype, fill_value=np.nan): try: fill_value = lib.Timestamp(fill_value).value except: - # the proper thing to do here would probably be to upcast to - # object (but numpy 1.6.1 doesn't do this properly) + # the proper thing to do here would probably be to upcast + # to object (but numpy 1.6.1 doesn't do this properly) fill_value = tslib.iNaT else: fill_value = tslib.iNaT @@ -920,10 +927,10 @@ def _maybe_promote(dtype, fill_value=np.nan): def _maybe_upcast_putmask(result, mask, other, dtype=None, change=None): """ a safe version of put mask that (potentially upcasts the result - return the result - if change is not None, then MUTATE the change (and change the dtype) - return a changed flag - """ + return the result + if change is not None, then MUTATE the change (and change the dtype) + return a changed flag + """ if mask.any(): @@ -964,15 +971,17 @@ def changeit(): return r, True # we want to decide whether putmask will work - # if we have nans in the False portion of our mask then we need to upcast (possibily) - # otherwise we DON't want to upcast (e.g. if we are have values, say integers in - # the success portion then its ok to not upcast) + # if we have nans in the False portion of our mask then we need to + # upcast (possibily) otherwise we DON't want to upcast (e.g. if we are + # have values, say integers in the success portion then its ok to not + # upcast) new_dtype, fill_value = _maybe_promote(result.dtype, other) if new_dtype != result.dtype: # we have a scalar or len 0 ndarray # and its nan and we are changing some values - if np.isscalar(other) or (isinstance(other, np.ndarray) and other.ndim < 1): + if (np.isscalar(other) or + (isinstance(other, np.ndarray) and other.ndim < 1)): if isnull(other): return changeit() @@ -991,14 +1000,15 @@ def changeit(): def _maybe_upcast(values, fill_value=np.nan, dtype=None, copy=False): - """ provide explicty type promotion and coercion + """ provide explict type promotion and coercion - Parameters - ---------- - values : the ndarray that we want to maybe upcast - fill_value : what we want to fill with - dtype : if None, then use the dtype of the values, else coerce to this type - copy : if True always make a copy even if no upcast is required """ + Parameters + ---------- + values : the ndarray that we want to maybe upcast + fill_value : what we want to fill with + dtype : if None, then use the dtype of the values, else coerce to this type + copy : if True always make a copy even if no upcast is required + """ if dtype is None: dtype = values.dtype @@ -1022,7 +1032,8 @@ def _possibly_cast_item(obj, item, dtype): def _possibly_downcast_to_dtype(result, dtype): """ try to cast to the specified dtype (e.g. convert back to bool/int - or could be an astype of float64->float32 """ + or could be an astype of float64->float32 + """ if np.isscalar(result) or not len(result): return result @@ -1065,22 +1076,25 @@ def _possibly_downcast_to_dtype(result, dtype): # do a test on the first element, if it fails then we are done r = result.ravel() - arr = np.array([ r[0] ]) - if not np.allclose(arr,trans(arr).astype(dtype)): + arr = np.array([r[0]]) + if not np.allclose(arr, trans(arr).astype(dtype)): return result # a comparable, e.g. a Decimal may slip in here - elif not isinstance(r[0], (np.integer,np.floating,np.bool,int,float,bool)): + elif not isinstance(r[0], (np.integer, np.floating, np.bool, int, + float, bool)): return result - if issubclass(result.dtype.type, (np.object_,np.number)) and notnull(result).all(): + if (issubclass(result.dtype.type, (np.object_, np.number)) and + notnull(result).all()): new_result = trans(result).astype(dtype) try: - if np.allclose(new_result,result): + if np.allclose(new_result, result): return new_result except: - # comparison of an object dtype with a number type could hit here + # comparison of an object dtype with a number type could + # hit here if (new_result == result).all(): return new_result except: @@ -1119,8 +1133,9 @@ def _lcd_dtypes(a_dtype, b_dtype): def _fill_zeros(result, y, fill): """ if we have an integer value (or array in y) - and we have 0's, fill them with the fill, - return the result """ + and we have 0's, fill them with the fill, + return the result + """ if fill is not None: if not isinstance(y, np.ndarray): @@ -1155,7 +1170,6 @@ def wrapper(arr, mask, limit=None): np.int64) - def pad_1d(values, limit=None, mask=None): dtype = values.dtype.name @@ -1357,8 +1371,8 @@ def _interp_limit(invalid, limit): new_x = new_x[firstIndex:] xvalues = xvalues[firstIndex:] - result[firstIndex:][invalid] = _interpolate_scipy_wrapper(valid_x, - valid_y, new_x, method=method, fill_value=fill_value, + result[firstIndex:][invalid] = _interpolate_scipy_wrapper( + valid_x, valid_y, new_x, method=method, fill_value=fill_value, bounds_error=bounds_error, **kwargs) if limit: result[violate_limit] = np.nan @@ -1384,7 +1398,7 @@ def _interpolate_scipy_wrapper(x, y, new_x, method, fill_value=None, 'barycentric': interpolate.barycentric_interpolate, 'krogh': interpolate.krogh_interpolate, 'piecewise_polynomial': interpolate.piecewise_polynomial_interpolate, - } + } try: alt_methods['pchip'] = interpolate.pchip_interpolate @@ -1411,16 +1425,18 @@ def _interpolate_scipy_wrapper(x, y, new_x, method, fill_value=None, def interpolate_2d(values, method='pad', axis=0, limit=None, fill_value=None): - """ perform an actual interpolation of values, values will be make 2-d if needed - fills inplace, returns the result """ + """ perform an actual interpolation of values, values will be make 2-d if + needed fills inplace, returns the result + """ transf = (lambda x: x) if axis == 0 else (lambda x: x.T) # reshape a 1 dim if needed ndim = values.ndim if values.ndim == 1: - if axis != 0: # pragma: no cover - raise AssertionError("cannot interpolate on a ndim == 1 with axis != 0") + if axis != 0: # pragma: no cover + raise AssertionError("cannot interpolate on a ndim == 1 with " + "axis != 0") values = values.reshape(tuple((1,) + values.shape)) if fill_value is None: @@ -1451,6 +1467,7 @@ def _consensus_name_attr(objs): _fill_methods = {'pad': pad_1d, 'backfill': backfill_1d} + def _get_fill_func(method): method = _clean_fill_method(method) return _fill_methods[method] @@ -1478,8 +1495,9 @@ def _values_from_object(o): return o -def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True): - """ if we have an object dtype, try to coerce dates and/or numers """ +def _possibly_convert_objects(values, convert_dates=True, + convert_numeric=True): + """ if we have an object dtype, try to coerce dates and/or numbers """ # if we have passed in a list or scalar if isinstance(values, (list, tuple)): @@ -1537,7 +1555,9 @@ def _possibly_convert_platform(values): def _possibly_cast_to_datetime(value, dtype, coerce=False): - """ try to cast the array/value to a datetimelike dtype, converting float nan to iNaT """ + """ try to cast the array/value to a datetimelike dtype, converting float + nan to iNaT + """ if dtype is not None: if isinstance(dtype, compat.string_types): @@ -1573,21 +1593,26 @@ def _possibly_cast_to_datetime(value, dtype, coerce=False): from pandas.tseries.tools import to_datetime value = to_datetime(value, coerce=coerce).values elif is_timedelta64: - from pandas.tseries.timedeltas import _possibly_cast_to_timedelta + from pandas.tseries.timedeltas import \ + _possibly_cast_to_timedelta value = _possibly_cast_to_timedelta(value) except: pass else: - # only do this if we have an array and the dtype of the array is not setup already - # we are not an integer/object, so don't bother with this conversion - if isinstance(value, np.ndarray) and not (issubclass(value.dtype.type, np.integer) or value.dtype == np.object_): + # only do this if we have an array and the dtype of the array is not + # setup already we are not an integer/object, so don't bother with this + # conversion + if (isinstance(value, np.ndarray) and not + (issubclass(value.dtype.type, np.integer) or + value.dtype == np.object_)): pass else: - # we might have a array (or single object) that is datetime like, and no dtype is passed - # don't change the value unless we find a datetime set + # we might have a array (or single object) that is datetime like, + # and no dtype is passed don't change the value unless we find a + # datetime set v = value if not is_list_like(v): v = [v] @@ -1599,7 +1624,8 @@ def _possibly_cast_to_datetime(value, dtype, coerce=False): except: pass elif inferred_type in ['timedelta', 'timedelta64']: - from pandas.tseries.timedeltas import _possibly_cast_to_timedelta + from pandas.tseries.timedeltas import \ + _possibly_cast_to_timedelta value = _possibly_cast_to_timedelta(value) return value @@ -1874,9 +1900,9 @@ def _asarray_tuplesafe(values, dtype=None): try: result = np.empty(len(values), dtype=object) result[:] = values - except (ValueError): + except ValueError: # we have a list-of-list - result[:] = [ tuple(x) for x in values ] + result[:] = [tuple(x) for x in values] return result @@ -1977,7 +2003,8 @@ def is_timedelta64_dtype(arr_or_dtype): def needs_i8_conversion(arr_or_dtype): - return is_datetime64_dtype(arr_or_dtype) or is_timedelta64_dtype(arr_or_dtype) + return (is_datetime64_dtype(arr_or_dtype) or + is_timedelta64_dtype(arr_or_dtype)) def is_float_dtype(arr_or_dtype): @@ -2010,7 +2037,8 @@ def is_re_compilable(obj): def is_list_like(arg): - return hasattr(arg, '__iter__') and not isinstance(arg, compat.string_and_binary_types) + return (hasattr(arg, '__iter__') and + not isinstance(arg, compat.string_and_binary_types)) def _is_sequence(x): @@ -2044,8 +2072,8 @@ def _astype_nansafe(arr, dtype, copy=True): elif dtype == np.int64: return arr.view(dtype) elif dtype != _NS_DTYPE: - raise TypeError( - "cannot astype a datetimelike from [%s] to [%s]" % (arr.dtype, dtype)) + raise TypeError("cannot astype a datetimelike from [%s] to [%s]" % + (arr.dtype, dtype)) return arr.astype(_NS_DTYPE) elif is_timedelta64_dtype(arr): if dtype == np.int64: @@ -2054,7 +2082,8 @@ def _astype_nansafe(arr, dtype, copy=True): return arr.astype(object) # in py3, timedelta64[ns] are int64 - elif (compat.PY3 and dtype not in [_INT64_DTYPE,_TD_DTYPE]) or (not compat.PY3 and dtype != _TD_DTYPE): + elif ((compat.PY3 and dtype not in [_INT64_DTYPE, _TD_DTYPE]) or + (not compat.PY3 and dtype != _TD_DTYPE)): # allow frequency conversions if dtype.kind == 'm': @@ -2063,7 +2092,8 @@ def _astype_nansafe(arr, dtype, copy=True): result[mask] = np.nan return result - raise TypeError("cannot astype a timedelta from [%s] to [%s]" % (arr.dtype,dtype)) + raise TypeError("cannot astype a timedelta from [%s] to [%s]" % + (arr.dtype, dtype)) return arr.astype(_TD_DTYPE) elif (np.issubdtype(arr.dtype, np.floating) and @@ -2083,7 +2113,8 @@ def _astype_nansafe(arr, dtype, copy=True): def _clean_fill_method(method): - if method is None: return None + if method is None: + return None method = method.lower() if method == 'ffill': method = 'pad' @@ -2130,8 +2161,9 @@ def next(self): def _get_handle(path, mode, encoding=None, compression=None): """Gets file handle for given path and mode. - NOTE: Under Python 3.2, getting a compressed file handle means reading in the entire file, - decompressing it and decoding it to ``str`` all at once and then wrapping it in a StringIO. + NOTE: Under Python 3.2, getting a compressed file handle means reading in + the entire file, decompressing it and decoding it to ``str`` all at once + and then wrapping it in a StringIO. """ if compression is not None: if encoding is not None and not compat.PY3: @@ -2327,8 +2359,10 @@ def in_qtconsole(): """ try: ip = get_ipython() - front_end = (ip.config.get('KernelApp', {}).get('parent_appname', "") or - ip.config.get('IPKernelApp', {}).get('parent_appname', "")) + front_end = ( + ip.config.get('KernelApp', {}).get('parent_appname', "") or + ip.config.get('IPKernelApp', {}).get('parent_appname', "") + ) if 'qtconsole' in front_end.lower(): return True except: @@ -2342,8 +2376,10 @@ def in_ipnb(): """ try: ip = get_ipython() - front_end = (ip.config.get('KernelApp', {}).get('parent_appname', "") or - ip.config.get('IPKernelApp', {}).get('parent_appname', "")) + front_end = ( + ip.config.get('KernelApp', {}).get('parent_appname', "") or + ip.config.get('IPKernelApp', {}).get('parent_appname', "") + ) if 'notebook' in front_end.lower(): return True except: @@ -2399,7 +2435,7 @@ def _pprint_seq(seq, _nest_lvl=0, **kwds): bounds length of printed sequence, depending on options """ - if isinstance(seq,set): + if isinstance(seq, set): fmt = u("set([%s])") else: fmt = u("[%s]") if hasattr(seq, '__setitem__') else u("(%s)") @@ -2433,8 +2469,8 @@ def _pprint_dict(seq, _nest_lvl=0, **kwds): nitems = get_option("max_seq_items") or len(seq) for k, v in list(seq.items())[:nitems]: - pairs.append(pfmt % (pprint_thing(k,_nest_lvl+1,**kwds), - pprint_thing(v,_nest_lvl+1,**kwds))) + pairs.append(pfmt % (pprint_thing(k, _nest_lvl+1, **kwds), + pprint_thing(v, _nest_lvl+1, **kwds))) if nitems < len(seq): return fmt % (", ".join(pairs) + ", ...") @@ -2505,7 +2541,7 @@ def as_escaped_unicode(thing, escape_chars=escape_chars): get_option("display.pprint_nest_depth"): result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars, quote_strings=quote_strings) - elif isinstance(thing,compat.string_types) and quote_strings: + elif isinstance(thing, compat.string_types) and quote_strings: if compat.PY3: fmt = "'%s'" else: @@ -2539,8 +2575,8 @@ def load(path): # TODO remove in 0.13 Load pickled pandas object (or any other pickled object) from the specified file path - Warning: Loading pickled data received from untrusted sources can be unsafe. - See: http://docs.python.org/2.7/library/pickle.html + Warning: Loading pickled data received from untrusted sources can be + unsafe. See: http://docs.python.org/2.7/library/pickle.html Parameters ---------- @@ -2558,7 +2594,7 @@ def load(path): # TODO remove in 0.13 def save(obj, path): # TODO remove in 0.13 - ''' + """ Pickle (serialize) object to input file path Parameters @@ -2566,7 +2602,7 @@ def save(obj, path): # TODO remove in 0.13 obj : any object path : string File path - ''' + """ import warnings warnings.warn("save is deprecated, use obj.to_pickle", FutureWarning) from pandas.io.pickle import to_pickle @@ -2574,8 +2610,8 @@ def save(obj, path): # TODO remove in 0.13 def _maybe_match_name(a, b): - a_name = getattr(a,'name',None) - b_name = getattr(b,'name',None) + a_name = getattr(a, 'name', None) + b_name = getattr(b, 'name', None) if a_name == b_name: return a_name return None diff --git a/pandas/core/config.py b/pandas/core/config.py index 20ec30398fd64..6eb947119578f 100644 --- a/pandas/core/config.py +++ b/pandas/core/config.py @@ -173,16 +173,19 @@ def _reset_option(pat): if len(keys) > 1 and len(pat) < 4 and pat != 'all': raise ValueError('You must specify at least 4 characters when ' - 'resetting multiple keys, use the special keyword "all" ' - 'to reset all the options to their default value') + 'resetting multiple keys, use the special keyword ' + '"all" to reset all the options to their default ' + 'value') for k in keys: _set_option(k, _registered_options[k].defval) + def get_default_val(pat): - key = _get_single_key(pat, silent=True) + key = _get_single_key(pat, silent=True) return _get_registered_option(key).defval + class DictWrapper(object): """ provide attribute-style access to a nested dict """ @@ -242,7 +245,8 @@ def __doc__(self): return self.__doc_tmpl__.format(opts_desc=opts_desc, opts_list=opts_list) -_get_option_tmpl = """"get_option(pat) - Retrieves the value of the specified option +_get_option_tmpl = """ +get_option(pat) - Retrieves the value of the specified option Available options: {opts_list} @@ -266,7 +270,8 @@ def __doc__(self): {opts_desc} """ -_set_option_tmpl = """set_option(pat,value) - Sets the value of the specified option +_set_option_tmpl = """ +set_option(pat,value) - Sets the value of the specified option Available options: {opts_list} @@ -292,7 +297,8 @@ def __doc__(self): {opts_desc} """ -_describe_option_tmpl = """describe_option(pat,_print_desc=False) Prints the description +_describe_option_tmpl = """ +describe_option(pat,_print_desc=False) Prints the description for one or more registered options. Call with not arguments to get a listing for all registered options. @@ -317,7 +323,8 @@ def __doc__(self): {opts_desc} """ -_reset_option_tmpl = """reset_option(pat) - Reset one or more options to their default value. +_reset_option_tmpl = """ +reset_option(pat) - Reset one or more options to their default value. Pass "all" as argument to reset all options. @@ -353,9 +360,11 @@ def __doc__(self): class option_context(object): def __init__(self, *args): - if not ( len(args) % 2 == 0 and len(args) >= 2): - errmsg = "Need to invoke as option_context(pat,val,[(pat,val),..))." - raise AssertionError(errmsg) + if not (len(args) % 2 == 0 and len(args) >= 2): + raise AssertionError( + 'Need to invoke as' + 'option_context(pat, val, [(pat, val), ...)).' + ) ops = list(zip(args[::2], args[1::2])) undo = [] @@ -425,20 +434,21 @@ def register_option(key, defval, doc='', validator=None, cb=None): for i, p in enumerate(path[:-1]): if not isinstance(cursor, dict): raise OptionError("Path prefix to option '%s' is already an option" - % '.'.join(path[:i])) + % '.'.join(path[:i])) if p not in cursor: cursor[p] = {} cursor = cursor[p] if not isinstance(cursor, dict): raise OptionError("Path prefix to option '%s' is already an option" - % '.'.join(path[:-1])) + % '.'.join(path[:-1])) cursor[path[-1]] = defval # initialize # save the option metadata _registered_options[key] = RegisteredOption(key=key, defval=defval, - doc=doc, validator=validator, cb=cb) + doc=doc, validator=validator, + cb=cb) def deprecate_option(key, msg=None, rkey=None, removal_ver=None): @@ -484,7 +494,7 @@ def deprecate_option(key, msg=None, rkey=None, removal_ver=None): if key in _deprecated_options: raise OptionError("Option '%s' has already been defined as deprecated." - % key) + % key) _deprecated_options[key] = DeprecatedOption(key, msg, rkey, removal_ver) @@ -512,6 +522,7 @@ def _get_root(key): cursor = cursor[p] return cursor, path[-1] + def _get_option_fast(key): """ internal quick access routine, no error checking """ path = key.split('.') @@ -520,6 +531,7 @@ def _get_option_fast(key): cursor = cursor[p] return cursor + def _is_deprecated(key): """ Returns True if the given option has been deprecated """ @@ -603,7 +615,8 @@ def _build_option_description(k): s = u('%s: ') % k if o: - s += u('[default: %s] [currently: %s]') % (o.defval, _get_option(k, True)) + s += u('[default: %s] [currently: %s]') % (o.defval, + _get_option(k, True)) if o.doc: s += '\n' + '\n '.join(o.doc.strip().split('\n')) @@ -755,12 +768,14 @@ def inner(x): return inner + def is_one_of_factory(legal_values): def inner(x): from pandas.core.common import pprint_thing as pp if not x in legal_values: pp_values = lmap(pp, legal_values) - raise ValueError("Value must be one of %s" % pp("|".join(pp_values))) + raise ValueError("Value must be one of %s" + % pp("|".join(pp_values))) return inner diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 9e95759ac088b..b9b934769793f 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -1,8 +1,3 @@ -import pandas.core.config as cf -from pandas.core.config import (is_int, is_bool, is_text, is_float, - is_instance_factory,is_one_of_factory,get_default_val) -from pandas.core.format import detect_console_encoding - """ This module is imported from the pandas package __init__.py file in order to ensure that the core.config options registered here will @@ -15,6 +10,12 @@ """ +import pandas.core.config as cf +from pandas.core.config import (is_int, is_bool, is_text, is_float, + is_instance_factory, is_one_of_factory, + get_default_val) +from pandas.core.format import detect_console_encoding + ########################################### # options from the "display" namespace @@ -113,8 +114,8 @@ pc_expand_repr_doc = """ : boolean - Whether to print out the full DataFrame repr for wide DataFrames - across multiple lines, `max_columns` is still respected, but the output will + Whether to print out the full DataFrame repr for wide DataFrames across + multiple lines, `max_columns` is still respected, but the output will wrap-around across multiple "pages" if it's width exceeds `display.width`. """ @@ -124,7 +125,8 @@ """ pc_line_width_deprecation_warning = """\ -line_width has been deprecated, use display.width instead (currently both are identical) +line_width has been deprecated, use display.width instead (currently both are +identical) """ pc_height_deprecation_warning = """\ @@ -134,8 +136,8 @@ pc_width_doc = """ : int Width of the display in characters. In case python/IPython is running in - a terminal this can be set to None and pandas will correctly auto-detect the - width. + a terminal this can be set to None and pandas will correctly auto-detect + the width. Note that the IPython notebook, IPython qtconsole, or IDLE do not run in a terminal and hence it is not possible to correctly detect the width. """ @@ -155,8 +157,8 @@ : int or None when pretty-printing a long sequence, no more then `max_seq_items` - will be printed. If items are ommitted, they will be denoted by the addition - of "..." to the resulting string. + will be printed. If items are omitted, they will be denoted by the + addition of "..." to the resulting string. If set to None, the number of items to be printed is unlimited. """ @@ -182,6 +184,8 @@ """ style_backup = dict() + + def mpl_style_cb(key): import sys from pandas.tools.plotting import mpl_stylesheet @@ -190,15 +194,14 @@ def mpl_style_cb(key): val = cf.get_option(key) if 'matplotlib' not in sys.modules.keys(): - if not(val): # starting up, we get reset to None + if not(val): # starting up, we get reset to None return val raise Exception("matplotlib has not been imported. aborting") import matplotlib.pyplot as plt - if val == 'default': - style_backup = dict([(k,plt.rcParams[k]) for k in mpl_stylesheet]) + style_backup = dict([(k, plt.rcParams[k]) for k in mpl_stylesheet]) plt.rcParams.update(mpl_stylesheet) elif not val: if style_backup: @@ -241,10 +244,11 @@ def mpl_style_cb(key): cb=mpl_style_cb) cf.register_option('height', 60, pc_height_doc, validator=is_instance_factory([type(None), int])) - cf.register_option('width',80, pc_width_doc, + cf.register_option('width', 80, pc_width_doc, validator=is_instance_factory([type(None), int])) # redirected to width, make defval identical - cf.register_option('line_width', get_default_val('display.width'), pc_line_width_doc) + cf.register_option('line_width', get_default_val('display.width'), + pc_line_width_doc) cf.deprecate_option('display.line_width', msg=pc_line_width_deprecation_warning, @@ -271,6 +275,7 @@ def mpl_style_cb(key): # We don't want to start importing everything at the global context level # or we'll hit circular deps. + def use_inf_as_null_cb(key): from pandas.core.common import _use_inf_as_null _use_inf_as_null(key) @@ -283,7 +288,8 @@ def use_inf_as_null_cb(key): # user warnings chained_assignment = """ : string - Raise an exception, warn, or no action if trying to use chained assignment, The default is warn + Raise an exception, warn, or no action if trying to use chained assignment, + The default is warn """ with cf.config_prefix('mode'): @@ -294,7 +300,8 @@ def use_inf_as_null_cb(key): # Set up the io.excel specific configuration. writer_engine_doc = """ : string - The default Excel writer engine for '{ext}' files. Available options: '{default}' (the default){others}. + The default Excel writer engine for '{ext}' files. Available options: + '{default}' (the default){others}. """ with cf.config_prefix('io.excel'): @@ -309,12 +316,13 @@ def use_inf_as_null_cb(key): doc = writer_engine_doc.format(ext=ext, default=default, others=options) cf.register_option(ext + '.writer', default, doc, validator=str) + def _register_xlsx(engine, other): cf.register_option('xlsx.writer', engine, writer_engine_doc.format(ext='xlsx', default=engine, others=", '%s'" % other), - validator=str) + validator=str) try: # better memory footprint diff --git a/pandas/core/datetools.py b/pandas/core/datetools.py index 91a29259d8f2f..1fb6ae4225f25 100644 --- a/pandas/core/datetools.py +++ b/pandas/core/datetools.py @@ -36,6 +36,7 @@ isMonthEnd = MonthEnd().onOffset isBMonthEnd = BMonthEnd().onOffset + def _resolve_offset(freq, kwds): if 'timeRule' in kwds or 'offset' in kwds: offset = kwds.get('offset', None) @@ -54,4 +55,3 @@ def _resolve_offset(freq, kwds): FutureWarning) return offset - diff --git a/pandas/core/format.py b/pandas/core/format.py index ae0d95b1c3074..9abfe3c43b8e5 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -62,6 +62,7 @@ ------- formatted : string (or unicode, depending on data and options)""" + class CategoricalFormatter(object): def __init__(self, categorical, buf=None, length=True, na_rep='NaN', name=False, footer=True): @@ -78,8 +79,8 @@ def _get_footer(self): if self.name: name = com.pprint_thing(self.categorical.name, escape_chars=('\t', '\r', '\n')) - footer += ('Name: %s' % - name) if self.categorical.name is not None else "" + footer += ('Name: %s' % name if self.categorical.name is not None + else '') if self.length: if footer: @@ -88,7 +89,7 @@ def _get_footer(self): levheader = 'Levels (%d): ' % len(self.categorical.levels) - #TODO: should max_line_width respect a setting? + # TODO: should max_line_width respect a setting? levstring = np.array_repr(self.categorical.levels, max_line_width=60) indent = ' ' * (levstring.find('[') + len(levheader) + 1) lines = levstring.split('\n') @@ -140,7 +141,7 @@ def __init__(self, series, buf=None, header=True, length=True, if float_format is None: float_format = get_option("display.float_format") self.float_format = float_format - self.dtype = dtype + self.dtype = dtype def _get_footer(self): footer = u('') @@ -163,10 +164,11 @@ def _get_footer(self): footer += 'Length: %d' % len(self.series) if self.dtype: - if getattr(self.series.dtype,'name',None): + name = getattr(self.series.dtype, 'name', None) + if name: if footer: footer += ', ' - footer += 'dtype: %s' % com.pprint_thing(self.series.dtype.name) + footer += 'dtype: %s' % com.pprint_thing(name) return compat.text_type(footer) @@ -213,6 +215,7 @@ def to_string(self): return compat.text_type(u('\n').join(result)) + def _strlen_func(): if compat.PY3: # pragma: no cover _strlen = len @@ -420,9 +423,10 @@ def get_col_type(dtype): column_format = 'l%s' % ''.join(map(get_col_type, dtypes)) else: column_format = '%s' % ''.join(map(get_col_type, dtypes)) - elif not isinstance(column_format, compat.string_types): # pragma: no cover - raise AssertionError(('column_format must be str or unicode, not %s' - % type(column_format))) + elif not isinstance(column_format, + compat.string_types): # pragma: no cover + raise AssertionError('column_format must be str or unicode, not %s' + % type(column_format)) def write(buf, frame, column_format, strcols): buf.write('\\begin{tabular}{%s}\n' % column_format) @@ -482,10 +486,9 @@ def is_numeric_dtype(dtype): fmt_columns = lzip(*fmt_columns) dtypes = self.frame.dtypes.values need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes))) - str_columns = list(zip(*[[' ' + y - if y not in self.formatters and need_leadsp[x] - else y for y in x] - for x in fmt_columns])) + str_columns = list(zip(*[ + [' ' + y if y not in self.formatters and need_leadsp[x] + else y for y in x] for x in fmt_columns])) if self.sparsify: str_columns = _sparsify(str_columns) @@ -690,11 +693,12 @@ def _column_header(): sentinal = com.sentinal_factory() levels = self.columns.format(sparsify=sentinal, adjoin=False, names=False) - level_lengths = _get_level_lengths(levels,sentinal) + level_lengths = _get_level_lengths(levels, sentinal) row_levels = self.frame.index.nlevels - for lnum, (records, values) in enumerate(zip(level_lengths, levels)): + for lnum, (records, values) in enumerate(zip(level_lengths, + levels)): name = self.columns.names[lnum] row = [''] * (row_levels - 1) + ['' if name is None else str(name)] @@ -784,8 +788,9 @@ def _write_hierarchical_rows(self, fmt_values, indent): # GH3547 sentinal = com.sentinal_factory() - levels = frame.index.format(sparsify=sentinal, adjoin=False, names=False) - level_lengths = _get_level_lengths(levels,sentinal) + levels = frame.index.format(sparsify=sentinal, adjoin=False, + names=False) + level_lengths = _get_level_lengths(levels, sentinal) for i in range(len(frame)): row = [] @@ -810,15 +815,16 @@ def _write_hierarchical_rows(self, fmt_values, indent): else: for i in range(len(frame)): idx_values = list(zip(*frame.index.format(sparsify=False, - adjoin=False, - names=False))) + adjoin=False, + names=False))) row = [] row.extend(idx_values[i]) row.extend(fmt_values[j][i] for j in range(ncols)) self.write_tr(row, indent, self.indent_delta, tags=None, nindex_levels=frame.index.nlevels) -def _get_level_lengths(levels,sentinal=''): + +def _get_level_lengths(levels, sentinal=''): from itertools import groupby def _make_grouper(): @@ -882,8 +888,8 @@ def __init__(self, obj, path_or_buf, sep=",", na_rep='', float_format=None, #GH3457 if not self.obj.columns.is_unique and engine == 'python': - msg= "columns.is_unique == False not supported with engine='python'" - raise NotImplementedError(msg) + raise NotImplementedError("columns.is_unique == False not " + "supported with engine='python'") self.tupleize_cols = tupleize_cols self.has_mi_columns = isinstance(obj.columns, MultiIndex @@ -892,24 +898,27 @@ def __init__(self, obj, path_or_buf, sep=",", na_rep='', float_format=None, # validate mi options if self.has_mi_columns: if cols is not None: - raise TypeError("cannot specify cols with a MultiIndex on the columns") + raise TypeError("cannot specify cols with a MultiIndex on the " + "columns") if cols is not None: - if isinstance(cols,Index): - cols = cols.to_native_types(na_rep=na_rep,float_format=float_format, + if isinstance(cols, Index): + cols = cols.to_native_types(na_rep=na_rep, + float_format=float_format, date_format=date_format) else: - cols=list(cols) - self.obj = self.obj.loc[:,cols] + cols = list(cols) + self.obj = self.obj.loc[:, cols] # update columns to include possible multiplicity of dupes # and make sure sure cols is just a list of labels cols = self.obj.columns - if isinstance(cols,Index): - cols = cols.to_native_types(na_rep=na_rep,float_format=float_format, + if isinstance(cols, Index): + cols = cols.to_native_types(na_rep=na_rep, + float_format=float_format, date_format=date_format) else: - cols=list(cols) + cols = list(cols) # save it self.cols = cols @@ -917,19 +926,22 @@ def __init__(self, obj, path_or_buf, sep=",", na_rep='', float_format=None, # preallocate data 2d list self.blocks = self.obj._data.blocks ncols = sum(len(b.items) for b in self.blocks) - self.data =[None] * ncols + self.data = [None] * ncols self.column_map = self.obj._data.get_items_map(use_cached=False) if chunksize is None: - chunksize = (100000/ (len(self.cols) or 1)) or 1 + chunksize = (100000 / (len(self.cols) or 1)) or 1 self.chunksize = chunksize self.data_index = obj.index if isinstance(obj.index, PeriodIndex): self.data_index = obj.index.to_timestamp() - if isinstance(self.data_index, DatetimeIndex) and date_format is not None: - self.data_index = Index([x.strftime(date_format) if notnull(x) else '' for x in self.data_index]) + if (isinstance(self.data_index, DatetimeIndex) and + date_format is not None): + self.data_index = Index([x.strftime(date_format) + if notnull(x) else '' + for x in self.data_index]) self.nlevels = getattr(self.data_index, 'nlevels', 1) if not index: @@ -961,7 +973,8 @@ def _helper_csv(self, writer, na_rep=None, cols=None, index_label = [''] else: index_label = [index_label] - elif not isinstance(index_label, (list, tuple, np.ndarray)): + elif not isinstance(index_label, + (list, tuple, np.ndarray)): # given a string for a DF with Index index_label = [index_label] @@ -1004,8 +1017,9 @@ def strftime_with_nulls(x): values = self.obj.copy() values.index = data_index - values.columns = values.columns.to_native_types(na_rep=na_rep,float_format=float_format, - date_format=date_format) + values.columns = values.columns.to_native_types( + na_rep=na_rep, float_format=float_format, + date_format=date_format) values = values[cols] series = {} @@ -1018,7 +1032,7 @@ def strftime_with_nulls(x): if index: if nlevels == 1: row_fields = [idx] - else: # handle MultiIndex + else: # handle MultiIndex row_fields = list(idx) for i, col in enumerate(cols): val = series[col][j] @@ -1040,7 +1054,8 @@ def save(self): f = self.path_or_buf close = False else: - f = com._get_handle(self.path_or_buf, self.mode, encoding=self.encoding) + f = com._get_handle(self.path_or_buf, self.mode, + encoding=self.encoding) close = True try: @@ -1056,14 +1071,15 @@ def save(self): if self.engine == 'python': # to be removed in 0.13 self._helper_csv(self.writer, na_rep=self.na_rep, - float_format=self.float_format, cols=self.cols, - header=self.header, index=self.index, - index_label=self.index_label, date_format=self.date_format) + float_format=self.float_format, + cols=self.cols, header=self.header, + index=self.index, + index_label=self.index_label, + date_format=self.date_format) else: self._save() - finally: if close: f.close() @@ -1127,7 +1143,8 @@ def _save_header(self): if has_mi_columns: columns = obj.columns - # write out the names for each level, then ALL of the values for each level + # write out the names for each level, then ALL of the values for + # each level for i in range(columns.nlevels): # we need at least 1 index column to write our col names @@ -1135,10 +1152,10 @@ def _save_header(self): if self.index: # name is the first column - col_line.append( columns.names[i] ) + col_line.append(columns.names[i]) - if isinstance(index_label,list) and len(index_label)>1: - col_line.extend([ '' ] * (len(index_label)-1)) + if isinstance(index_label, list) and len(index_label) > 1: + col_line.extend([''] * (len(index_label)-1)) col_line.extend(columns.get_level_values(i)) @@ -1146,7 +1163,7 @@ def _save_header(self): # add blanks for the columns, so that we # have consistent seps - encoded_labels.extend([ '' ] * len(columns)) + encoded_labels.extend([''] * len(columns)) # write out the index label line writer.writerow(encoded_labels) @@ -1171,14 +1188,15 @@ def _save(self): def _save_chunk(self, start_i, end_i): - data_index = self.data_index + data_index = self.data_index # create the data for a chunk - slicer = slice(start_i,end_i) + slicer = slice(start_i, end_i) for i in range(len(self.blocks)): b = self.blocks[i] d = b.to_native_types(slicer=slicer, na_rep=self.na_rep, - float_format=self.float_format, date_format=self.date_format) + float_format=self.float_format, + date_format=self.date_format) for i, item in enumerate(b.items): @@ -1186,7 +1204,8 @@ def _save_chunk(self, start_i, end_i): self.data[self.column_map[b][i]] = d[i] ix = data_index.to_native_types(slicer=slicer, na_rep=self.na_rep, - float_format=self.float_format, date_format=self.date_format) + float_format=self.float_format, + date_format=self.date_format) lib.write_csv_rows(self.data, ix, self.nlevels, self.cols, self.writer) @@ -1194,6 +1213,7 @@ def _save_chunk(self, start_i, end_i): # ExcelCell = namedtuple("ExcelCell", # 'row, col, val, style, mergestart, mergeend') + class ExcelCell(object): __fields__ = ('row', 'col', 'val', 'style', 'mergestart', 'mergeend') __slots__ = __fields__ @@ -1539,8 +1559,8 @@ def _format_strings(self): else: float_format = self.float_format - formatter = (lambda x: com.pprint_thing(x, escape_chars=('\t', '\r', '\n'))) \ - if self.formatter is None else self.formatter + formatter = self.formatter if self.formatter is not None else \ + (lambda x: com.pprint_thing(x, escape_chars=('\t', '\r', '\n'))) def _format(x): if self.na_rep is not None and lib.checknull(x): @@ -1584,19 +1604,20 @@ def __init__(self, *args, **kwargs): def _format_with(self, fmt_str): def _val(x, threshold): if notnull(x): - if threshold is None or abs(x) > get_option("display.chop_threshold"): - return fmt_str % x + if (threshold is None or + abs(x) > get_option("display.chop_threshold")): + return fmt_str % x else: - if fmt_str.endswith("e"): # engineering format - return "0" + if fmt_str.endswith("e"): # engineering format + return "0" else: - return fmt_str % 0 + return fmt_str % 0 else: return self.na_rep threshold = get_option("display.chop_threshold") - fmt_values = [ _val(x, threshold) for x in self.values] + fmt_values = [_val(x, threshold) for x in self.values] return _trim_zeros(fmt_values, self.na_rep) def get_result(self): @@ -1654,6 +1675,7 @@ def get_result(self): fmt_values = [formatter(x) for x in self.values] return _make_fixed_width(fmt_values, self.justify) + def _format_datetime64(x, tz=None): if isnull(x): return 'NaT' @@ -1674,12 +1696,14 @@ def get_result(self): fmt_values = [formatter(x) for x in self.values] return _make_fixed_width(fmt_values, self.justify) + def _format_timedelta64(x): if isnull(x): return 'NaT' return lib.repr_timedelta64(x) + def _make_fixed_width(strings, justify='right', minimum=None): if len(strings) == 0: return strings @@ -1762,6 +1786,8 @@ def _has_names(index): # Global formatting options _initial_defencoding = None + + def detect_console_encoding(): """ Try to find the most capable encoding supported by the console. @@ -1776,13 +1802,15 @@ def detect_console_encoding(): except AttributeError: pass - if not encoding or 'ascii' in encoding.lower(): # try again for something better + # try again for something better + if not encoding or 'ascii' in encoding.lower(): try: encoding = locale.getpreferredencoding() except Exception: pass - if not encoding or 'ascii' in encoding.lower(): # when all else fails. this will usually be "ascii" + # when all else fails. this will usually be "ascii" + if not encoding or 'ascii' in encoding.lower(): encoding = sys.getdefaultencoding() # GH3360, save the reported defencoding at import time @@ -1804,8 +1832,8 @@ def get_console_size(): # Consider # interactive shell terminal, can detect term size - # interactive non-shell terminal (ipnb/ipqtconsole), cannot detect term size - # non-interactive script, should disregard term size + # interactive non-shell terminal (ipnb/ipqtconsole), cannot detect term + # size non-interactive script, should disregard term size # in addition # width,height have default values, but setting to 'None' signals @@ -1823,7 +1851,7 @@ def get_console_size(): # pure terminal terminal_width, terminal_height = get_terminal_size() else: - terminal_width, terminal_height = None,None + terminal_width, terminal_height = None, None # Note if the User sets width/Height to None (auto-detection) # and we're in a script (non-inter), this will return (None,None) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1222b5b93799d..b194c938b13cc 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -60,9 +60,8 @@ #---------------------------------------------------------------------- # Docstring templates -_shared_doc_kwargs = dict(axes='index, columns', - klass='DataFrame', - axes_single_arg="{0,1,'index','columns'}") +_shared_doc_kwargs = dict(axes='index, columns', klass='DataFrame', + axes_single_arg="{0,1,'index','columns'}") _numeric_only_doc = """numeric_only : boolean, default None Include only float, int, boolean data. If None, will attempt to use @@ -196,15 +195,16 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, data = data._data if isinstance(data, BlockManager): - mgr = self._init_mgr( - data, axes=dict(index=index, columns=columns), dtype=dtype, copy=copy) + mgr = self._init_mgr(data, axes=dict(index=index, columns=columns), + dtype=dtype, copy=copy) elif isinstance(data, dict): mgr = self._init_dict(data, index, columns, dtype=dtype) elif isinstance(data, ma.MaskedArray): # masked recarray if isinstance(data, ma.mrecords.MaskedRecords): - mgr = _masked_rec_array_to_mgr(data, index, columns, dtype, copy) + mgr = _masked_rec_array_to_mgr(data, index, columns, dtype, + copy) # a masked array else: @@ -224,8 +224,9 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, if columns is None: columns = data_columns mgr = self._init_dict(data, index, columns, dtype=dtype) - elif getattr(data,'name',None): - mgr = self._init_dict({ data.name : data }, index, columns, dtype=dtype) + elif getattr(data, 'name', None): + mgr = self._init_dict({data.name: data}, index, columns, + dtype=dtype) else: mgr = self._init_ndarray(data, index, columns, dtype=dtype, copy=copy) @@ -236,7 +237,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, if index is None and isinstance(data[0], Series): index = _get_names_from_index(data) - if is_list_like(data[0]) and getattr(data[0],'ndim',1) == 1: + if is_list_like(data[0]) and getattr(data[0], 'ndim', 1) == 1: arrays, columns = _to_arrays(data, columns, dtype=dtype) columns = _ensure_index(columns) @@ -283,7 +284,8 @@ def _init_dict(self, data, index, columns, dtype=None): # prefilter if columns passed - data = dict((k, v) for k, v in compat.iteritems(data) if k in columns) + data = dict((k, v) for k, v in compat.iteritems(data) + if k in columns) if index is None: index = extract_index(list(data.values())) @@ -395,7 +397,8 @@ def _repr_fits_horizontal_(self, ignore_width=False): return False if (ignore_width # used by repr_html under IPython notebook - or not com.in_interactive_session()): # scripts ignore terminal dims + # scripts ignore terminal dims + or not com.in_interactive_session()): return True if (get_option('display.width') is not None or @@ -671,22 +674,25 @@ def to_dict(self, outtype='dict'): else: # pragma: no cover raise ValueError("outtype %s not understood" % outtype) - def to_gbq(self, destination_table, schema=None, col_order=None, if_exists='fail', **kwargs): + def to_gbq(self, destination_table, schema=None, col_order=None, + if_exists='fail', **kwargs): """Write a DataFrame to a Google BigQuery table. - If the table exists, the DataFrame will be appended. If not, a new table - will be created, in which case the schema will have to be specified. By default, - rows will be written in the order they appear in the DataFrame, though - the user may specify an alternative order. + If the table exists, the DataFrame will be appended. If not, a new + table will be created, in which case the schema will have to be + specified. By default, rows will be written in the order they appear + in the DataFrame, though the user may specify an alternative order. Parameters --------------- destination_table : string name of table to be written, in the form 'dataset.tablename' schema : sequence (optional) - list of column types in order for data to be inserted, e.g. ['INTEGER', 'TIMESTAMP', 'BOOLEAN'] + list of column types in order for data to be inserted, e.g. + ['INTEGER', 'TIMESTAMP', 'BOOLEAN'] col_order : sequence (optional) - order which columns are to be inserted, e.g. ['primary_key', 'birthday', 'username'] + order which columns are to be inserted, e.g. ['primary_key', + 'birthday', 'username'] if_exists : {'fail', 'replace', 'append'} (optional) - fail: If table exists, do nothing. - replace: If table exists, drop it, recreate it, and insert data. @@ -696,15 +702,19 @@ def to_gbq(self, destination_table, schema=None, col_order=None, if_exists='fail Raises ------ SchemaMissing : - Raised if the 'if_exists' parameter is set to 'replace', but no schema is specified + Raised if the 'if_exists' parameter is set to 'replace', but no + schema is specified TableExists : - Raised if the specified 'destination_table' exists but the 'if_exists' parameter is set to 'fail' (the default) + Raised if the specified 'destination_table' exists but the + 'if_exists' parameter is set to 'fail' (the default) InvalidSchema : - Raised if the 'schema' parameter does not match the provided DataFrame + Raised if the 'schema' parameter does not match the provided + DataFrame """ from pandas.io import gbq - return gbq.to_gbq(self, destination_table, schema=None, col_order=None, if_exists='fail', **kwargs) + return gbq.to_gbq(self, destination_table, schema=None, col_order=None, + if_exists='fail', **kwargs) @classmethod def from_records(cls, data, index=None, exclude=None, columns=None, @@ -757,7 +767,7 @@ def from_records(cls, data, index=None, exclude=None, columns=None, values = [first_row] #if unknown length iterable (generator) - if nrows == None: + if nrows is None: #consume whole generator values += list(data) else: @@ -785,7 +795,8 @@ def from_records(cls, data, index=None, exclude=None, columns=None, arr_columns.append(k) arrays.append(v) - arrays, arr_columns = _reorder_arrays(arrays, arr_columns, columns) + arrays, arr_columns = _reorder_arrays(arrays, arr_columns, + columns) elif isinstance(data, (np.ndarray, DataFrame)): arrays, columns = _to_arrays(data, columns) @@ -864,7 +875,7 @@ def to_records(self, index=True, convert_datetime64=True): else: if isinstance(self.index, MultiIndex): # array of tuples to numpy cols. copy copy copy - ix_vals = lmap(np.array,zip(*self.index.values)) + ix_vals = lmap(np.array, zip(*self.index.values)) else: ix_vals = [self.index.values] @@ -1017,13 +1028,13 @@ def to_panel(self): from pandas.core.reshape import block2d_to_blocknd # only support this kind for now - if (not isinstance(self.index, MultiIndex) or # pragma: no cover + if (not isinstance(self.index, MultiIndex) or # pragma: no cover len(self.index.levels) != 2): raise NotImplementedError('Only 2-level MultiIndex are supported.') if not self.index.is_unique: raise ValueError("Can't convert non-uniquely indexed " - "DataFrame to Panel") + "DataFrame to Panel") self._consolidate_inplace() @@ -1228,8 +1239,8 @@ def to_stata( >>> writer.write_file() """ from pandas.io.stata import StataWriter - writer = StataWriter( - fname, self, convert_dates=convert_dates, encoding=encoding, byteorder=byteorder) + writer = StataWriter(fname, self, convert_dates=convert_dates, + encoding=encoding, byteorder=byteorder) writer.write_file() def to_sql(self, name, con, flavor='sqlite', if_exists='fail', **kwargs): @@ -1407,7 +1418,7 @@ def info(self, verbose=True, buf=None, max_cols=None): len(self.columns)) space = max([len(com.pprint_thing(k)) for k in self.columns]) + 4 counts = self.count() - if len(cols) != len(counts): # pragma: no cover + if len(cols) != len(counts): # pragma: no cover raise AssertionError('Columns must equal counts (%d != %d)' % (len(cols), len(counts))) for col, count in compat.iteritems(counts): @@ -1516,8 +1527,8 @@ def set_value(self, index, col, value): except KeyError: # set using a non-recursive method & reset the cache - self.loc[index,col] = value - self._item_cache.pop(col,None) + self.loc[index, col] = value + self._item_cache.pop(col, None) return self @@ -1581,7 +1592,7 @@ def _ixs(self, i, axis=0, copy=False): # a numpy error (as numpy should really raise) values = self._data.iget(i) if not len(values): - values = np.array([np.nan]*len(self.index),dtype=object) + values = np.array([np.nan]*len(self.index), dtype=object) return self._constructor_sliced.from_array( values, index=self.index, name=label, fastpath=True) @@ -1824,7 +1835,8 @@ def _box_item_values(self, key, values): def _box_col_values(self, values, items): """ provide boxed values for a column """ - return self._constructor_sliced.from_array(values, index=self.index, name=items, fastpath=True) + return self._constructor_sliced.from_array(values, index=self.index, + name=items, fastpath=True) def __setitem__(self, key, value): # see if we can slice the rows @@ -1877,11 +1889,13 @@ def _setitem_frame(self, key, value): def _ensure_valid_index(self, value): """ - ensure that if we don't have an index, that we can create one from the passed value + ensure that if we don't have an index, that we can create one from the + passed value """ if not len(self.index): if not isinstance(value, Series): - raise ValueError("cannot set a frame with no defined index and a non-series") + raise ValueError('Cannot set a frame with no defined index ' + 'and a non-series') self._data.set_axis(1, value.index.copy(), check_axis=False) def _set_item(self, key, value): @@ -1909,7 +1923,8 @@ def _set_item(self, key, value): def insert(self, loc, column, value, allow_duplicates=False): """ Insert column into DataFrame at specified location. - if allow_duplicates is False, Raises Exception if column is already contained in the DataFrame + if allow_duplicates is False, Raises Exception if column is already + contained in the DataFrame Parameters ---------- @@ -1945,8 +1960,8 @@ def _sanitize_column(self, key, value): value = value.T elif isinstance(value, Index) or _is_sequence(value): if len(value) != len(self.index): - raise ValueError('Length of values does not match ' - 'length of index') + raise ValueError('Length of values does not match length of ' + 'index') if not isinstance(value, (np.ndarray, Index)): if isinstance(value, list) and len(value) > 0: @@ -1967,7 +1982,8 @@ def _sanitize_column(self, key, value): # broadcast across multiple columns if necessary if key in self.columns and value.ndim == 1: - if not self.columns.is_unique or isinstance(self.columns, MultiIndex): + if not self.columns.is_unique or isinstance(self.columns, + MultiIndex): existing_piece = self[key] if isinstance(existing_piece, DataFrame): value = np.tile(value, (len(existing_piece.columns), 1)) @@ -2053,7 +2069,7 @@ def xs(self, key, axis=0, level=None, copy=True, drop_level=True): labels = self._get_axis(axis) if level is not None: loc, new_ax = labels.get_loc_level(key, level=level, - drop_level=drop_level) + drop_level=drop_level) if not copy and not isinstance(loc, slice): raise ValueError('Cannot retrieve view (copy=False)') @@ -2088,7 +2104,7 @@ def xs(self, key, axis=0, level=None, copy=True, drop_level=True): index = self.index if isinstance(index, MultiIndex): loc, new_index = self.index.get_loc_level(key, - drop_level=drop_level) + drop_level=drop_level) else: loc = self.index.get_loc(key) @@ -2146,8 +2162,7 @@ def lookup(self, row_labels, col_labels): """ n = len(row_labels) if n != len(col_labels): - raise ValueError('Row labels must have same size as ' - 'column labels') + raise ValueError('Row labels must have same size as column labels') thresh = 1000 if not self._is_mixed_type or n > thresh: @@ -2173,13 +2188,14 @@ def lookup(self, row_labels, col_labels): #---------------------------------------------------------------------- # Reindexing and alignment - def _reindex_axes(self, axes, level, limit, method, fill_value, copy, takeable=False): + def _reindex_axes(self, axes, level, limit, method, fill_value, copy, + takeable=False): frame = self columns = axes['columns'] if columns is not None: - frame = frame._reindex_columns(columns, copy, level, - fill_value, limit, takeable=takeable) + frame = frame._reindex_columns(columns, copy, level, fill_value, + limit, takeable=takeable) index = axes['index'] if index is not None: @@ -2191,18 +2207,22 @@ def _reindex_axes(self, axes, level, limit, method, fill_value, copy, takeable=F def _reindex_index(self, new_index, method, copy, level, fill_value=NA, limit=None, takeable=False): new_index, indexer = self.index.reindex(new_index, method, level, - limit=limit, copy_if_needed=True, + limit=limit, + copy_if_needed=True, takeable=takeable) return self._reindex_with_indexers({0: [new_index, indexer]}, - copy=copy, fill_value=fill_value, allow_dups=takeable) + copy=copy, fill_value=fill_value, + allow_dups=takeable) def _reindex_columns(self, new_columns, copy, level, fill_value=NA, limit=None, takeable=False): new_columns, indexer = self.columns.reindex(new_columns, level=level, - limit=limit, copy_if_needed=True, + limit=limit, + copy_if_needed=True, takeable=takeable) return self._reindex_with_indexers({1: [new_columns, indexer]}, - copy=copy, fill_value=fill_value, allow_dups=takeable) + copy=copy, fill_value=fill_value, + allow_dups=takeable) def _reindex_multi(self, axes, copy, fill_value): """ we are guaranteed non-Nones in the axes! """ @@ -2218,7 +2238,9 @@ def _reindex_multi(self, axes, copy, fill_value): columns=new_columns) else: return self._reindex_with_indexers({0: [new_index, row_indexer], - 1: [new_columns, col_indexer]}, copy=copy, fill_value=fill_value) + 1: [new_columns, col_indexer]}, + copy=copy, + fill_value=fill_value) @Appender(_shared_docs['reindex'] % _shared_doc_kwargs) def reindex(self, index=None, columns=None, **kwargs): @@ -2434,7 +2456,8 @@ def _maybe_cast(values, labels=None): #---------------------------------------------------------------------- # Reindex-based selection methods - def dropna(self, axis=0, how='any', thresh=None, subset=None, inplace=False): + def dropna(self, axis=0, how='any', thresh=None, subset=None, + inplace=False): """ Return object with labels on given axis omitted where alternately any or all of the data are missing @@ -2493,7 +2516,6 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None, inplace=False): else: return result - def drop_duplicates(self, cols=None, take_last=False, inplace=False): """ Return DataFrame with duplicate rows removed, optionally only @@ -2630,14 +2652,15 @@ def sort_index(self, axis=0, by=None, ascending=True, inplace=False, from pandas.core.groupby import _lexsort_indexer axis = self._get_axis_number(axis) - if axis not in [0, 1]: # pragma: no cover + if axis not in [0, 1]: # pragma: no cover raise AssertionError('Axis must be 0 or 1, got %s' % str(axis)) labels = self._get_axis(axis) if by is not None: if axis != 0: - raise ValueError('When sorting by column, axis must be 0 (rows)') + raise ValueError('When sorting by column, axis must be 0 ' + '(rows)') if not isinstance(by, (tuple, list)): by = [by] if com._is_sequence(ascending) and len(by) != len(ascending): @@ -2721,9 +2744,9 @@ def sortlevel(self, level=0, axis=0, ascending=True, inplace=False): ax = 'index' if axis == 0 else 'columns' if new_axis.is_unique: - d = { ax : new_axis } + d = {ax: new_axis} else: - d = { ax : indexer, 'takeable' : True } + d = {ax: indexer, 'takeable': True} return self.reindex(**d) if inplace: @@ -2816,18 +2839,23 @@ def _arith_op(left, right): def f(col): r = _arith_op(this[col].values, other[col].values) - return self._constructor_sliced(r,index=new_index,dtype=r.dtype) + return self._constructor_sliced(r, index=new_index, + dtype=r.dtype) - result = dict([ (col, f(col)) for col in this ]) + result = dict([(col, f(col)) for col in this]) # non-unique else: def f(i): - r = _arith_op(this.iloc[:,i].values, other.iloc[:,i].values) - return self._constructor_sliced(r,index=new_index,dtype=r.dtype) - - result = dict([ (i,f(i)) for i, col in enumerate(this.columns) ]) + r = _arith_op(this.iloc[:, i].values, + other.iloc[:, i].values) + return self._constructor_sliced(r, index=new_index, + dtype=r.dtype) + + result = dict([ + (i, f(i)) for i, col in enumerate(this.columns) + ]) result = self._constructor(result, index=new_index, copy=False) result.columns = new_columns return result @@ -2894,7 +2922,6 @@ def _combine_const(self, other, func, raise_on_error=True): new_data = self._data.eval(func, other, raise_on_error=raise_on_error) return self._constructor(new_data) - def _compare_frame_evaluate(self, other, func, str_rep): # unique @@ -2907,7 +2934,8 @@ def _compare(a, b): # non-unique else: def _compare(a, b): - return dict([(i,func(a.iloc[:,i], b.iloc[:,i])) for i, col in enumerate(a.columns)]) + return dict([(i, func(a.iloc[:, i], b.iloc[:, i])) + for i, col in enumerate(a.columns)]) new_data = expressions.evaluate(_compare, str_rep, self, other) result = self._constructor(data=new_data, index=self.index, copy=False) @@ -2917,7 +2945,7 @@ def _compare(a, b): def _compare_frame(self, other, func, str_rep): if not self._indexed_same(other): raise ValueError('Can only compare identically-labeled ' - 'DataFrame objects') + 'DataFrame objects') return self._compare_frame_evaluate(other, func, str_rep) def _flex_compare_frame(self, other, func, str_rep, level): @@ -3046,7 +3074,8 @@ def combiner(x, y, needs_i8_conversion=False): else: mask = isnull(x_values) - return expressions.where(mask, y_values, x_values, raise_on_error=True) + return expressions.where(mask, y_values, x_values, + raise_on_error=True) return self.combine(other, combiner, overwrite=False) @@ -3070,7 +3099,7 @@ def update(self, other, join='left', overwrite=True, filter_func=None, contain data in the same place. """ # TODO: Support other joins - if join != 'left': # pragma: no cover + if join != 'left': # pragma: no cover raise NotImplementedError("Only left join is supported") if not isinstance(other, DataFrame): @@ -3413,7 +3442,7 @@ def _apply_standard(self, func, axis, ignore_failures=False, reduce=True): series_gen = (Series.from_array(arr, index=res_columns, name=name) for i, (arr, name) in enumerate(zip(values, res_index))) - else: # pragma : no cover + else: # pragma : no cover raise AssertionError('Axis must be 0 or 1, got %s' % str(axis)) i = None @@ -3442,7 +3471,7 @@ def _apply_standard(self, func, axis, ignore_failures=False, reduce=True): if i is not None: k = res_index[i] e.args = e.args + ('occurred at index %s' % - com.pprint_thing(k),) + com.pprint_thing(k),) raise if len(results) > 0 and _is_sequence(results[0]): @@ -3837,13 +3866,13 @@ def pretty_name(x): destat = [] for i in range(len(numdata.columns)): - series = numdata.iloc[:,i] + series = numdata.iloc[:, i] destat.append([series.count(), series.mean(), series.std(), series.min(), series.quantile(lb), series.median(), series.quantile(ub), series.max()]) - return self._constructor(lmap(list, zip(*destat)), index=destat_columns, - columns=numdata.columns) + return self._constructor(lmap(list, zip(*destat)), + index=destat_columns, columns=numdata.columns) #---------------------------------------------------------------------- # ndarray-like stats methods @@ -3920,7 +3949,8 @@ def _count_level(self, level, axis=0, numeric_only=False): else: return result - def any(self, axis=None, bool_only=None, skipna=True, level=None, **kwargs): + def any(self, axis=None, bool_only=None, skipna=True, level=None, + **kwargs): """ Return whether any element is True over requested axis. %(na_action)s @@ -3950,7 +3980,8 @@ def any(self, axis=None, bool_only=None, skipna=True, level=None, **kwargs): return self._reduce(nanops.nanany, axis=axis, skipna=skipna, numeric_only=bool_only, filter_type='bool') - def all(self, axis=None, bool_only=None, skipna=True, level=None, **kwargs): + def all(self, axis=None, bool_only=None, skipna=True, level=None, + **kwargs): """ Return whether all elements are True over requested axis. %(na_action)s @@ -3987,7 +4018,8 @@ def _reduce(self, op, axis=0, skipna=True, numeric_only=None, labels = self._get_agg_axis(axis) # exclude timedelta/datetime unless we are uniform types - if axis == 1 and self._is_mixed_type and len(set(self.dtypes) & _DATELIKE_DTYPES): + if axis == 1 and self._is_mixed_type and len(set(self.dtypes) & + _DATELIKE_DTYPES): numeric_only = True if numeric_only is None: @@ -4020,7 +4052,7 @@ def _reduce(self, op, axis=0, skipna=True, numeric_only=None, data = self._get_numeric_data() elif filter_type == 'bool': data = self._get_bool_data() - else: # pragma: no cover + else: # pragma: no cover msg = ("Generating numeric_only data with filter_type %s" "not supported." % filter_type) raise NotImplementedError(msg) @@ -4167,6 +4199,7 @@ def f(arr): data = self._get_numeric_data() if numeric_only else self return data.apply(f, axis=axis) + def rank(self, axis=0, numeric_only=None, method='average', na_option='keep', ascending=True): """ @@ -4242,7 +4275,7 @@ def to_timestamp(self, freq=None, how='start', axis=0, copy=True): new_data.set_axis(1, self.index.to_timestamp(freq=freq, how=how)) elif axis == 1: new_data.set_axis(0, self.columns.to_timestamp(freq=freq, how=how)) - else: # pragma: no cover + else: # pragma: no cover raise AssertionError('Axis must be 0 or 1. Got %s' % str(axis)) return self._constructor(new_data) @@ -4277,7 +4310,7 @@ def to_period(self, freq=None, axis=0, copy=True): if freq is None: freq = self.columns.freqstr or self.columns.inferred_freq new_data.set_axis(0, self.columns.to_period(freq=freq)) - else: # pragma: no cover + else: # pragma: no cover raise AssertionError('Axis must be 0 or 1. Got %s' % str(axis)) return self._constructor(new_data) @@ -4510,7 +4543,7 @@ def extract_index(data): elif isinstance(v, dict): have_dicts = True indexes.append(list(v.keys())) - elif is_list_like(v) and getattr(v,'ndim',1) == 1: + elif is_list_like(v) and getattr(v, 'ndim', 1) == 1: have_raw_arrays = True raw_lengths.append(len(v)) @@ -4658,7 +4691,8 @@ def _masked_rec_array_to_mgr(data, index, columns, dtype, copy): def _reorder_arrays(arrays, arr_columns, columns): # reorder according to the columns - if columns is not None and len(columns) and arr_columns is not None and len(arr_columns): + if (columns is not None and len(columns) and arr_columns is not None and + len(arr_columns)): indexer = _ensure_index( arr_columns).get_indexer(columns) arr_columns = _ensure_index( @@ -4681,13 +4715,15 @@ def _list_of_series_to_arrays(data, columns, coerce_float=False, dtype=None): from pandas.core.index import _get_combined_index if columns is None: - columns = _get_combined_index([s.index for s in data if getattr(s,'index',None) is not None ]) + columns = _get_combined_index([ + s.index for s in data if getattr(s, 'index', None) is not None + ]) indexer_cache = {} aligned_values = [] for s in data: - index = getattr(s,'index',None) + index = getattr(s, 'index', None) if index is None: index = _default_index(len(s)) @@ -4741,13 +4777,13 @@ def _convert_object_array(content, columns, coerce_float=False, dtype=None): def _get_names_from_index(data): index = lrange(len(data)) - has_some_name = any([getattr(s,'name',None) is not None for s in data]) + has_some_name = any([getattr(s, 'name', None) is not None for s in data]) if not has_some_name: return index count = 0 for i, s in enumerate(data): - n = getattr(s,'name',None) + n = getattr(s, 'name', None) if n is not None: index[i] = n else: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index efa083e239f63..f960f64e7be16 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7,7 +7,8 @@ import pandas as pd from pandas.core.base import PandasObject -from pandas.core.index import Index, MultiIndex, _ensure_index, InvalidIndexError +from pandas.core.index import (Index, MultiIndex, _ensure_index, + InvalidIndexError) import pandas.core.indexing as indexing from pandas.core.indexing import _maybe_convert_indices from pandas.tseries.index import DatetimeIndex @@ -34,6 +35,7 @@ args_transpose='axes to permute (int or label for' ' object)') + def is_dictlike(x): return isinstance(x, (dict, com.ABCSeries)) @@ -49,7 +51,8 @@ def _single_replace(self, to_replace, method, inplace, limit): if values.dtype == orig_dtype and inplace: return - result = pd.Series(values, index=self.index, dtype=self.dtype).__finalize__(self) + result = pd.Series(values, index=self.index, + dtype=self.dtype).__finalize__(self) if inplace: self._data = result._data @@ -70,13 +73,14 @@ class NDFrame(PandasObject): axes : list copy : boolean, default False """ - _internal_names = [ - '_data', 'name', '_cacher', '_is_copy', '_subtyp', '_index', '_default_kind', '_default_fill_value'] + _internal_names = ['_data', 'name', '_cacher', '_is_copy', '_subtyp', + '_index', '_default_kind', '_default_fill_value'] _internal_names_set = set(_internal_names) _metadata = [] _is_copy = None - def __init__(self, data, axes=None, copy=False, dtype=None, fastpath=False): + def __init__(self, data, axes=None, copy=False, dtype=None, + fastpath=False): if not fastpath: if dtype is not None: @@ -101,7 +105,8 @@ def _validate_dtype(self, dtype): # a compound dtype if dtype.kind == 'V': raise NotImplementedError("compound dtypes are not implemented" - "in the {0} constructor".format(self.__class__.__name__)) + "in the {0} constructor" + .format(self.__class__.__name__)) return dtype def _init_mgr(self, mgr, axes=None, dtype=None, copy=False): @@ -136,7 +141,7 @@ def __unicode__(self): def _local_dir(self): """ add the string-like attributes from the info_axis """ return [c for c in self._info_axis - if isinstance(c, string_types) and isidentifier(c) ] + if isinstance(c, string_types) and isidentifier(c)] @property def _constructor_sliced(self): @@ -156,7 +161,8 @@ def _setup_axes( stat_axis_num : the number of axis for the default stats (int) aliases : other names for a single axis (dict) slicers : how axes slice to others (dict) - axes_are_reversed : boolean whether to treat passed axes as reversed (DataFrame) + axes_are_reversed : boolean whether to treat passed axes as + reversed (DataFrame) build_axes : setup the axis properties (default True) """ @@ -238,7 +244,9 @@ def _construct_axes_from_arguments(self, args, kwargs, require_all=False): if a in kwargs: if alias in kwargs: raise TypeError( - "arguments are multually exclusive for [%s,%s]" % (a, alias)) + "arguments are mutually exclusive for [%s,%s]" % + (a, alias) + ) continue if alias in kwargs: kwargs[a] = kwargs.pop(alias) @@ -277,7 +285,8 @@ def _get_axis_number(self, axis): return self._AXIS_NUMBERS[axis] except: pass - raise ValueError('No axis named {0} for object type {1}'.format(axis,type(self))) + raise ValueError('No axis named {0} for object type {1}' + .format(axis, type(self))) def _get_axis_name(self, axis): axis = self._AXIS_ALIASES.get(axis, axis) @@ -289,7 +298,8 @@ def _get_axis_name(self, axis): return self._AXIS_NAMES[axis] except: pass - raise ValueError('No axis named {0} for object type {1}'.format(axis,type(self))) + raise ValueError('No axis named {0} for object type {1}' + .format(axis, type(self))) def _get_axis(self, axis): name = self._get_axis_name(axis) @@ -399,6 +409,7 @@ def _set_axis(self, axis, labels): ------- y : same as input """ + @Appender(_shared_docs['transpose'] % _shared_doc_kwargs) def transpose(self, *args, **kwargs): @@ -458,7 +469,8 @@ def pop(self, item): def squeeze(self): """ squeeze length 1 dimensions """ try: - return self.ix[tuple([slice(None) if len(a) > 1 else a[0] for a in self.axes])] + return self.ix[tuple([slice(None) if len(a) > 1 else a[0] + for a in self.axes])] except: return self @@ -506,6 +518,7 @@ def swaplevel(self, i, j, axis=0): ------- renamed : %(klass)s (new object) """ + @Appender(_shared_docs['rename'] % dict(axes='axes keywords for this' ' object', klass='NDFrame')) def rename(self, *args, **kwargs): @@ -530,14 +543,14 @@ def f(x): return f - self._consolidate_inplace() result = self if inplace else self.copy(deep=copy) # start in the axis order to eliminate too many copies for axis in lrange(self._AXIS_LEN): v = axes.get(self._AXIS_NAMES[axis]) - if v is None: continue + if v is None: + continue f = _get_rename_function(v) baxis = self._get_block_manager_axis(axis) @@ -572,7 +585,7 @@ def rename_axis(self, mapper, axis=0, copy=True, inplace=False): renamed : type of caller """ axis = self._get_axis_name(axis) - d = { 'copy' : copy, 'inplace' : inplace } + d = {'copy': copy, 'inplace': inplace} d[axis] = mapper return self.rename(**d) @@ -580,7 +593,8 @@ def rename_axis(self, mapper, axis=0, copy=True, inplace=False): # Comparisons def _indexed_same(self, other): - return all([self._get_axis(a).equals(other._get_axis(a)) for a in self._AXIS_ORDERS]) + return all([self._get_axis(a).equals(other._get_axis(a)) + for a in self._AXIS_ORDERS]) def __neg__(self): arr = operator.neg(_values_from_object(self)) @@ -626,7 +640,8 @@ def iteritems(self): def iterkv(self, *args, **kwargs): "iteritems alias used to get around 2to3. Deprecated" warnings.warn("iterkv is deprecated and will be removed in a future " - "release, use ``iteritems`` instead.", DeprecationWarning) + "release, use ``iteritems`` instead.", + DeprecationWarning) return self.iteritems(*args, **kwargs) def __len__(self): @@ -644,7 +659,8 @@ def empty(self): def __nonzero__(self): raise ValueError("The truth value of a {0} is ambiguous. " - "Use a.empty, a.bool(), a.item(), a.any() or a.all().".format(self.__class__.__name__)) + "Use a.empty, a.bool(), a.item(), a.any() or a.all()." + .format(self.__class__.__name__)) __bool__ = __nonzero__ @@ -655,10 +671,11 @@ def bool(self): Raise a ValueError if the PandasObject does not have exactly 1 element, or that element is not boolean """ v = self.squeeze() - if isinstance(v, (bool,np.bool_)): + if isinstance(v, (bool, np.bool_)): return bool(v) elif np.isscalar(v): - raise ValueError("bool cannot act on a non-boolean single element {0}".format(self.__class__.__name__)) + raise ValueError("bool cannot act on a non-boolean single element " + "{0}".format(self.__class__.__name__)) self.__nonzero__() @@ -823,9 +840,9 @@ def to_hdf(self, path_or_buf, key, **kwargs): fixed(f) : Fixed format Fast writing/reading. Not-appendable, nor searchable table(t) : Table format - Write as a PyTables Table structure which may perform worse but - allow more flexible operations like searching / selecting subsets - of the data + Write as a PyTables Table structure which may perform + worse but allow more flexible operations like searching + / selecting subsets of the data append : boolean, default False For Table formats, append the input data to the existing complevel : int, 1-9, default 0 @@ -852,10 +869,11 @@ def to_msgpack(self, path_or_buf=None, **kwargs): Parameters ---------- path : string File path, buffer-like, or None - if None, return generated string + if None, return generated string append : boolean whether to append to an existing msgpack - (default is False) - compress : type of compressor (zlib or blosc), default to None (no compression) + (default is False) + compress : type of compressor (zlib or blosc), default to None (no + compression) """ from pandas.io import packers @@ -956,7 +974,7 @@ def _get_item_cache(self, item): values = self._data.get(item) res = self._box_item_values(item, values) cache[item] = res - res._cacher = (item,weakref.ref(self)) + res._cacher = (item, weakref.ref(self)) return res def _box_item_values(self, key, values): @@ -970,10 +988,10 @@ def _maybe_cache_changed(self, item, value): def _maybe_update_cacher(self, clear=False): """ see if we need to update our parent cacher if clear, then clear our cache """ - cacher = getattr(self,'_cacher',None) + cacher = getattr(self, '_cacher', None) if cacher is not None: try: - cacher[1]()._maybe_cache_changed(cacher[0],self) + cacher[1]()._maybe_cache_changed(cacher[0], self) except: # our referant is dead @@ -984,7 +1002,7 @@ def _maybe_update_cacher(self, clear=False): def _clear_item_cache(self, i=None): if i is not None: - self._item_cache.pop(i,None) + self._item_cache.pop(i, None) else: self._item_cache.clear() @@ -1002,11 +1020,13 @@ def _check_setitem_copy(self): if self._is_copy: value = config._get_option_fast('mode.chained_assignment') - t = "A value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_index,col_indexer] = value instead" + t = ("A value is trying to be set on a copy of a slice from a " + "DataFrame.\nTry using .loc[row_index,col_indexer] = value " + "instead") if value == 'raise': raise SettingWithCopyError(t) elif value == 'warn': - warnings.warn(t,SettingWithCopyWarning) + warnings.warn(t, SettingWithCopyWarning) def __delitem__(self, key): """ @@ -1066,10 +1086,13 @@ def take(self, indices, axis=0, convert=True): if baxis == 0: labels = self._get_axis(axis) new_items = labels.take(indices) - new_data = self._data.reindex_axis(new_items, indexer=indices, axis=0) + new_data = self._data.reindex_axis(new_items, indexer=indices, + axis=0) else: new_data = self._data.take(indices, axis=baxis) - return self._constructor(new_data)._setitem_copy(True).__finalize__(self) + return self._constructor(new_data)\ + ._setitem_copy(True)\ + .__finalize__(self) # TODO: Check if this was clearer in 0.12 def select(self, crit, axis=0): @@ -1149,7 +1172,7 @@ def drop(self, labels, axis=0, level=None, inplace=False, **kwargs): new_axis = axis.drop(labels, level=level) else: new_axis = axis.drop(labels) - dropped = self.reindex(**{ axis_name: new_axis }) + dropped = self.reindex(**{axis_name: new_axis}) try: dropped.axes[axis_].set_names(axis.names, inplace=True) except AttributeError: @@ -1247,7 +1270,8 @@ def sort_index(self, axis=0, ascending=True): Parameters ---------- - %(axes)s : array-like, optional (can be specified in order, or as keywords) + %(axes)s : array-like, optional (can be specified in order, or as + keywords) New labels / index to conform to. Preferably an Index object to avoid duplicating data method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None @@ -1277,6 +1301,7 @@ def sort_index(self, axis=0, ascending=True): """ # TODO: Decide if we care about having different examples for different # kinds + @Appender(_shared_docs['reindex'] % dict(axes="axes", klass="NDFrame")) def reindex(self, *args, **kwargs): @@ -1298,18 +1323,21 @@ def reindex(self, *args, **kwargs): except: pass - # if all axes that are requested to reindex are equal, then only copy if indicated - # must have index names equal here as well as values - if all([ self._get_axis(axis).identical(ax) for axis, ax in axes.items() if ax is not None ]): + # if all axes that are requested to reindex are equal, then only copy + # if indicated must have index names equal here as well as values + if all([self._get_axis(axis).identical(ax) + for axis, ax in axes.items() if ax is not None]): if copy: return self.copy() return self # perform the reindex on the axes return self._reindex_axes(axes, level, limit, - method, fill_value, copy, takeable=takeable).__finalize__(self) + method, fill_value, copy, + takeable=takeable).__finalize__(self) - def _reindex_axes(self, axes, level, limit, method, fill_value, copy, takeable=False): + def _reindex_axes(self, axes, level, limit, method, fill_value, copy, + takeable=False): """ perform the reinxed for all the axes """ obj = self for a in self._AXIS_ORDERS: @@ -1324,35 +1352,42 @@ def _reindex_axes(self, axes, level, limit, method, fill_value, copy, takeable=F axis = self._get_axis_number(a) ax = self._get_axis(a) try: - new_index, indexer = ax.reindex(labels, level=level, - limit=limit, method=method, takeable=takeable) + new_index, indexer = ax.reindex( + labels, level=level, limit=limit, method=method, + takeable=takeable) except (ValueError): - # catch trying to reindex a non-monotonic index with a specialized indexer - # e.g. pad, so fallback to the regular indexer - # this will show up on reindexing a not-naturally ordering series, e.g. - # Series([1,2,3,4],index=['a','b','c','d']).reindex(['c','b','g'],method='pad') - new_index, indexer = ax.reindex(labels, level=level, - limit=limit, method=None, takeable=takeable) + # catch trying to reindex a non-monotonic index with a + # specialized indexer e.g. pad, so fallback to the regular + # indexer this will show up on reindexing a not-naturally + # ordering series, + # e.g. + # Series( + # [1,2,3,4], index=['a','b','c','d'] + # ).reindex(['c','b','g'], method='pad') + new_index, indexer = ax.reindex( + labels, level=level, limit=limit, method=None, + takeable=takeable) obj = obj._reindex_with_indexers( - {axis: [new_index, indexer]}, method=method, fill_value=fill_value, - limit=limit, copy=copy) + {axis: [new_index, indexer]}, method=method, + fill_value=fill_value, limit=limit, copy=copy) return obj def _needs_reindex_multi(self, axes, method, level): """ check if we do need a multi reindex """ - return (com._count_not_none(*axes.values()) == self._AXIS_LEN) and method is None and level is None and not self._is_mixed_type + return ((com._count_not_none(*axes.values()) == self._AXIS_LEN) and + method is None and level is None and not self._is_mixed_type) def _reindex_multi(self, axes, copy, fill_value): return NotImplemented _shared_docs['reindex_axis'] = ( - """Conform input object to new index with optional filling logic, placing - NA/NaN in locations having no value in the previous index. A new object - is produced unless the new index is equivalent to the current one and - copy=False + """Conform input object to new index with optional filling logic, + placing NA/NaN in locations having no value in the previous index. A + new object is produced unless the new index is equivalent to the + current one and copy=False Parameters ---------- @@ -1384,6 +1419,7 @@ def _reindex_multi(self, axes, copy, fill_value): ------- reindexed : %(klass)s """) + @Appender(_shared_docs['reindex_axis'] % _shared_doc_kwargs) def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True, limit=None, fill_value=np.nan): @@ -1392,12 +1428,15 @@ def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True, axis_name = self._get_axis_name(axis) axis_values = self._get_axis(axis_name) method = com._clean_fill_method(method) - new_index, indexer = axis_values.reindex(labels, method, level, - limit=limit, copy_if_needed=True) - return self._reindex_with_indexers({axis: [new_index, indexer]}, method=method, fill_value=fill_value, - limit=limit, copy=copy).__finalize__(self) - - def _reindex_with_indexers(self, reindexers, method=None, fill_value=np.nan, limit=None, copy=False, allow_dups=False): + new_index, indexer = axis_values.reindex( + labels, method, level, limit=limit, copy_if_needed=True) + return self._reindex_with_indexers( + {axis: [new_index, indexer]}, method=method, fill_value=fill_value, + limit=limit, copy=copy).__finalize__(self) + + def _reindex_with_indexers(self, reindexers, method=None, + fill_value=np.nan, limit=None, copy=False, + allow_dups=False): """ allow_dups indicates an internal call here """ # reindex doing multiple operations on different axes if indiciated @@ -1420,13 +1459,16 @@ def _reindex_with_indexers(self, reindexers, method=None, fill_value=np.nan, lim # TODO: speed up on homogeneous DataFrame objects indexer = com._ensure_int64(indexer) new_data = new_data.reindex_indexer(index, indexer, axis=baxis, - fill_value=fill_value, allow_dups=allow_dups) + fill_value=fill_value, + allow_dups=allow_dups) - elif baxis == 0 and index is not None and index is not new_data.axes[baxis]: + elif (baxis == 0 and index is not None and + index is not new_data.axes[baxis]): new_data = new_data.reindex_items(index, copy=copy, fill_value=fill_value) - elif baxis > 0 and index is not None and index is not new_data.axes[baxis]: + elif (baxis > 0 and index is not None and + index is not new_data.axes[baxis]): new_data = new_data.copy(deep=copy) new_data.set_axis(baxis, index) @@ -1470,14 +1512,16 @@ def filter(self, items=None, like=None, regex=None, axis=None): axis_values = self._get_axis(axis_name) if items is not None: - return self.reindex(**{axis_name: [r for r in items if r in axis_values]}) + return self.reindex(**{axis_name: [r for r in items + if r in axis_values]}) elif like: matchf = lambda x: (like in x if isinstance(x, string_types) else like in str(x)) return self.select(matchf, axis=axis_name) elif regex: matcher = re.compile(regex) - return self.select(lambda x: matcher.search(x) is not None, axis=axis_name) + return self.select(lambda x: matcher.search(x) is not None, + axis=axis_name) else: raise TypeError('Must pass either `items`, `like`, or `regex`') @@ -1508,9 +1552,10 @@ def __finalize__(self, other, method=None, **kwargs): Parameters ---------- - other : the object from which to get the attributes that we are going to propagate - method : optional, a passed method name ; possibily to take different types - of propagation actions based on this + other : the object from which to get the attributes that we are going + to propagate + method : optional, a passed method name ; possibly to take different + types of propagation actions based on this """ for name in self._metadata: @@ -1518,8 +1563,11 @@ def __finalize__(self, other, method=None, **kwargs): return self def __getattr__(self, name): - """After regular attribute access, try looking up the name of a the info - This allows simpler access to columns for interactive use.""" + """After regular attribute access, try looking up the name of a the + info. + + This allows simpler access to columns for interactive use. + """ if name in self._info_axis: return self[name] raise AttributeError("'%s' object has no attribute '%s'" % @@ -1594,7 +1642,8 @@ def _protect_consolidate(self, f): return result def _get_numeric_data(self): - return self._constructor(self._data.get_numeric_data()).__finalize__(self) + return self._constructor( + self._data.get_numeric_data()).__finalize__(self) def _get_bool_data(self): return self._constructor(self._data.get_bool_data()).__finalize__(self) @@ -1608,9 +1657,10 @@ def as_matrix(self, columns=None): are presented in sorted order unless a specific list of columns is provided. - NOTE: the dtype will be a lower-common-denominator dtype (implicit upcasting) - that is to say if the dtypes (even of numeric types) are mixed, the one that accomodates all will be chosen - use this with care if you are not dealing with the blocks + NOTE: the dtype will be a lower-common-denominator dtype (implicit + upcasting) that is to say if the dtypes (even of numeric types) + are mixed, the one that accommodates all will be chosen use this + with care if you are not dealing with the blocks e.g. if the dtypes are float16,float32 -> float32 float16,float32,float64 -> float64 @@ -1654,11 +1704,14 @@ def get_ftype_counts(self): def as_blocks(self, columns=None): """ - Convert the frame to a dict of dtype -> Constructor Types that each has a homogeneous dtype. + Convert the frame to a dict of dtype -> Constructor Types that each has + a homogeneous dtype. + are presented in sorted order unless a specific list of columns is provided. - NOTE: the dtypes of the blocks WILL BE PRESERVED HERE (unlike in as_matrix) + NOTE: the dtypes of the blocks WILL BE PRESERVED HERE (unlike in + as_matrix) Parameters ---------- @@ -1720,23 +1773,27 @@ def copy(self, deep=True): data = data.copy() return self._constructor(data).__finalize__(self) - def convert_objects(self, convert_dates=True, convert_numeric=False, copy=True): + def convert_objects(self, convert_dates=True, convert_numeric=False, + copy=True): """ Attempt to infer better dtype for object columns Parameters ---------- - convert_dates : if True, attempt to soft convert_dates, if 'coerce', force conversion (and non-convertibles get NaT) - convert_numeric : if True attempt to coerce to numerbers (including strings), non-convertibles get NaN + convert_dates : if True, attempt to soft convert_dates, if 'coerce', + force conversion (and non-convertibles get NaT) + convert_numeric : if True attempt to coerce to numbers (including + strings), non-convertibles get NaN copy : Boolean, if True, return copy, default is True Returns ------- converted : asm as input object """ - return self._constructor(self._data.convert(convert_dates=convert_dates, - convert_numeric=convert_numeric, - copy=copy)).__finalize__(self) + return self._constructor( + self._data.convert(convert_dates=convert_dates, + convert_numeric=convert_numeric, + copy=copy)).__finalize__(self) #---------------------------------------------------------------------- # Filling NA's @@ -1767,7 +1824,8 @@ def fillna(self, value=None, method=None, axis=0, inplace=False, Maximum size gap to forward or backward fill downcast : dict, default is None, a dict of item->dtype of what to downcast if possible, or the string 'infer' which will try to - downcast to an appropriate equal type (e.g. float64 to int64 if possible) + downcast to an appropriate equal type (e.g. float64 to int64 if + possible) See also -------- @@ -1800,13 +1858,16 @@ def fillna(self, value=None, method=None, axis=0, inplace=False, # > 3d if self.ndim > 3: - raise NotImplementedError('cannot fillna with a method for > 3dims') + raise NotImplementedError( + 'Cannot fillna with a method for > 3dims' + ) # 3d elif self.ndim == 3: # fill in 2d chunks - result = dict([ (col,s.fillna(method=method, value=value)) for col, s in compat.iteritems(self) ]) + result = dict([(col, s.fillna(method=method, value=value)) + for col, s in compat.iteritems(self)]) return self._constructor.from_dict(result).__finalize__(self) # 2d or less @@ -2036,7 +2097,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, raise TypeError('Fill value must be scalar, dict, or ' 'Series') - elif com.is_list_like(to_replace): # [NA, ''] -> [0, 'missing'] + elif com.is_list_like(to_replace): # [NA, ''] -> [0, 'missing'] if com.is_list_like(value): if len(to_replace) != len(value): raise ValueError('Replacement lists must match ' @@ -2212,8 +2273,8 @@ def isnull(self): return isnull(self).__finalize__(self) def notnull(self): - """ - Return a boolean same-sized object indicating if the values are not null + """Return a boolean same-sized object indicating if the values are + not null """ return notnull(self).__finalize__(self) @@ -2305,8 +2366,8 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, group_keys : boolean, default True When calling apply, add group keys to index to identify pieces squeeze : boolean, default False - reduce the dimensionaility of the return type if possible, otherwise - return a consistent type + reduce the dimensionaility of the return type if possible, + otherwise return a consistent type Examples -------- @@ -2590,7 +2651,8 @@ def _align_series(self, other, join='outer', axis=None, level=None, # series/series compat if isinstance(self, ABCSeries) and isinstance(other, ABCSeries): if axis: - raise ValueError('cannot align series to a series other than axis 0') + raise ValueError('cannot align series to a series other than ' + 'axis 0') join_index, lidx, ridx = self.index.join(other.index, how=join, level=level, @@ -2607,8 +2669,8 @@ def _align_series(self, other, join='outer', axis=None, level=None, join_index = self.index lidx, ridx = None, None if not self.index.equals(other.index): - join_index, lidx, ridx = self.index.join(other.index, how=join, - return_indexers=True) + join_index, lidx, ridx = self.index.join( + other.index, how=join, return_indexers=True) if lidx is not None: fdata = fdata.reindex_indexer(join_index, lidx, axis=1) @@ -2617,8 +2679,8 @@ def _align_series(self, other, join='outer', axis=None, level=None, lidx, ridx = None, None if not self.columns.equals(other.index): join_index, lidx, ridx = \ - self.columns.join(other.index, how=join, - return_indexers=True) + self.columns.join(other.index, how=join, + return_indexers=True) if lidx is not None: fdata = fdata.reindex_indexer(join_index, lidx, axis=0) @@ -2639,7 +2701,8 @@ def _align_series(self, other, join='outer', axis=None, level=None, right_result.fillna(fill_value, method=method, limit=limit)) else: - return left_result.__finalize__(self), right_result.__finalize__(other) + return (left_result.__finalize__(self), + right_result.__finalize__(other)) def where(self, cond, other=np.nan, inplace=False, axis=None, level=None, try_cast=False, raise_on_error=True): @@ -2669,8 +2732,8 @@ def where(self, cond, other=np.nan, inplace=False, axis=None, level=None, cond = cond.reindex(**self._construct_axes_dict()) else: if not hasattr(cond, 'shape'): - raise ValueError('where requires an ndarray like object for its ' - 'condition') + raise ValueError('where requires an ndarray like object for ' + 'its condition') if cond.shape != self.shape: raise ValueError( 'Array conditional must be same shape as self') @@ -2693,12 +2756,16 @@ def where(self, cond, other=np.nan, inplace=False, axis=None, level=None, fill_value=np.nan) # if we are NOT aligned, raise as we cannot where index - if axis is None and not all([ other._get_axis(i).equals(ax) for i, ax in enumerate(self.axes) ]): + if (axis is None and + not all([other._get_axis(i).equals(ax) + for i, ax in enumerate(self.axes)])): raise InvalidIndexError # slice me out of the other else: - raise NotImplemented("cannot align with a higher dimensional NDFrame") + raise NotImplemented( + "cannot align with a higher dimensional NDFrame" + ) elif is_list_like(other): @@ -2770,11 +2837,13 @@ def where(self, cond, other=np.nan, inplace=False, axis=None, level=None, if inplace: # we may have different type blocks come out of putmask, so # reconstruct the block manager - self._data = self._data.putmask(cond, other, align=axis is None, inplace=True) + self._data = self._data.putmask(cond, other, align=axis is None, + inplace=True) else: - new_data = self._data.where( - other, cond, align=axis is None, raise_on_error=raise_on_error, try_cast=try_cast) + new_data = self._data.where(other, cond, align=axis is None, + raise_on_error=raise_on_error, + try_cast=try_cast) return self._constructor(new_data).__finalize__(self) @@ -2793,7 +2862,6 @@ def mask(self, cond): """ return self.where(~cond, np.nan) - def shift(self, periods=1, freq=None, axis=0, **kwds): """ Shift index by desired number of periods with an optional time freq @@ -2862,7 +2930,6 @@ def tshift(self, periods=1, freq=None, axis=0, **kwds): msg = 'Freq was not given and was not set in the index' raise ValueError(msg) - if periods == 0: return self @@ -2923,12 +2990,13 @@ def truncate(self, before=None, after=None, axis=None, copy=True): raise ValueError('Truncate: %s must be after %s' % (after, before)) - slicer = [ slice(None, None) ] * self._AXIS_LEN - slicer[axis] = slice(before,after) + slicer = [slice(None, None)] * self._AXIS_LEN + slicer[axis] = slice(before, after) result = self.ix[tuple(slicer)] if isinstance(ax, MultiIndex): - setattr(result,self._get_axis_name(axis),ax.truncate(before, after)) + setattr(result, self._get_axis_name(axis), + ax.truncate(before, after)) if copy: result = result.copy() @@ -3083,8 +3151,11 @@ def _agg_by_level(self, name, axis=0, level=0, skipna=True, **kwds): def _add_numeric_operations(cls): """ add the operations to the cls; evaluate the doc strings again """ - axis_descr = "{" + ', '.join([ "{0} ({1})".format(a,i) for i, a in enumerate(cls._AXIS_ORDERS)]) + "}" - name = cls._constructor_sliced.__name__ if cls._AXIS_LEN > 1 else 'scalar' + axis_descr = "{%s}" % ', '.join([ + "{0} ({1})".format(a, i) for i, a in enumerate(cls._AXIS_ORDERS) + ]) + name = (cls._constructor_sliced.__name__ + if cls._AXIS_LEN > 1 else 'scalar') _num_doc = """ %(desc)s @@ -3123,8 +3194,8 @@ def _make_stat_function(name, desc, f): @Substitution(outname=name, desc=desc) @Appender(_num_doc) - def stat_func(self, axis=None, skipna=None, level=None, numeric_only=None, - **kwargs): + def stat_func(self, axis=None, skipna=None, level=None, + numeric_only=None, **kwargs): if skipna is None: skipna = True if axis is None: @@ -3137,24 +3208,40 @@ def stat_func(self, axis=None, skipna=None, level=None, numeric_only=None, stat_func.__name__ = name return stat_func - cls.sum = _make_stat_function('sum',"Return the sum of the values for the requested axis", nanops.nansum) - cls.mean = _make_stat_function('mean',"Return the mean of the values for the requested axis", nanops.nanmean) - cls.skew = _make_stat_function('skew',"Return unbiased skew over requested axis\nNormalized by N-1", nanops.nanskew) - cls.kurt = _make_stat_function('kurt',"Return unbiased kurtosis over requested axis\nNormalized by N-1", nanops.nankurt) + cls.sum = _make_stat_function( + 'sum', 'Return the sum of the values for the requested axis', + nanops.nansum) + cls.mean = _make_stat_function( + 'mean', 'Return the mean of the values for the requested axis', + nanops.nanmean) + cls.skew = _make_stat_function( + 'skew', + 'Return unbiased skew over requested axis\nNormalized by N-1', + nanops.nanskew) + cls.kurt = _make_stat_function( + 'kurt', + 'Return unbiased kurtosis over requested axis\nNormalized by N-1', + nanops.nankurt) cls.kurtosis = cls.kurt - cls.prod = _make_stat_function('prod',"Return the product of the values for the requested axis", nanops.nanprod) + cls.prod = _make_stat_function( + 'prod', 'Return the product of the values for the requested axis', + nanops.nanprod) cls.product = cls.prod - cls.median = _make_stat_function('median',"Return the median of the values for the requested axis", nanops.nanmedian) - cls.max = _make_stat_function('max',""" + cls.median = _make_stat_function( + 'median', 'Return the median of the values for the requested axis', + nanops.nanmedian) + cls.max = _make_stat_function('max', """ This method returns the maximum of the values in the object. If you want the *index* of the maximum, use ``idxmax``. This is the equivalent of the ``numpy.ndarray`` method ``argmax``.""", nanops.nanmax) - cls.min = _make_stat_function('min',""" + cls.min = _make_stat_function('min', """ This method returns the minimum of the values in the object. If you want the *index* of the minimum, use ``idxmin``. This is the equivalent of the ``numpy.ndarray`` method ``argmin``.""", nanops.nanmin) - @Substitution(outname='mad', desc="Return the mean absolute deviation of the values for the requested axis") + @Substitution(outname='mad', + desc="Return the mean absolute deviation of the values " + "for the requested axis") @Appender(_num_doc) def mad(self, axis=None, skipna=None, level=None, **kwargs): if skipna is None: @@ -3173,7 +3260,9 @@ def mad(self, axis=None, skipna=None, level=None, **kwargs): return np.abs(demeaned).mean(axis=axis, skipna=skipna) cls.mad = mad - @Substitution(outname='variance',desc="Return unbiased variance over requested axis\nNormalized by N-1") + @Substitution(outname='variance', + desc="Return unbiased variance over requested " + "axis\nNormalized by N-1") @Appender(_num_doc) def var(self, axis=None, skipna=None, level=None, ddof=1, **kwargs): if skipna is None: @@ -3184,10 +3273,13 @@ def var(self, axis=None, skipna=None, level=None, ddof=1, **kwargs): return self._agg_by_level('var', axis=axis, level=level, skipna=skipna, ddof=ddof) - return self._reduce(nanops.nanvar, axis=axis, skipna=skipna, ddof=ddof) + return self._reduce(nanops.nanvar, axis=axis, skipna=skipna, + ddof=ddof) cls.var = var - @Substitution(outname='stdev',desc="Return unbiased standard deviation over requested axis\nNormalized by N-1") + @Substitution(outname='stdev', + desc="Return unbiased standard deviation over requested " + "axis\nNormalized by N-1") @Appender(_num_doc) def std(self, axis=None, skipna=None, level=None, ddof=1, **kwargs): if skipna is None: @@ -3198,12 +3290,14 @@ def std(self, axis=None, skipna=None, level=None, ddof=1, **kwargs): return self._agg_by_level('std', axis=axis, level=level, skipna=skipna, ddof=ddof) result = self.var(axis=axis, skipna=skipna, ddof=ddof) - if getattr(result,'ndim',0) > 0: + if getattr(result, 'ndim', 0) > 0: return result.apply(np.sqrt) return np.sqrt(result) cls.std = std - @Substitution(outname='compounded',desc="Return the compound percentage of the values for the requested axis") + @Substitution(outname='compounded', + desc="Return the compound percentage of the values for " + "the requested axis") @Appender(_num_doc) def compound(self, axis=None, skipna=None, level=None, **kwargs): if skipna is None: @@ -3214,15 +3308,17 @@ def compound(self, axis=None, skipna=None, level=None, **kwargs): def _make_cum_function(name, accum_func, mask_a, mask_b): @Substitution(outname=name) - @Appender("Return cumulative {0} over requested axis.".format(name) + _cnum_doc) - def func(self, axis=None, dtype=None, out=None, skipna=True, **kwargs): + @Appender("Return cumulative {0} over requested axis.".format(name) + + _cnum_doc) + def func(self, axis=None, dtype=None, out=None, skipna=True, + **kwargs): if axis is None: axis = self._stat_axis_number else: axis = self._get_axis_number(axis) y = _values_from_object(self).copy() - if not issubclass(y.dtype.type, (np.integer,np.bool_)): + if not issubclass(y.dtype.type, (np.integer, np.bool_)): mask = isnull(self) if skipna: np.putmask(y, mask, mask_a) @@ -3239,11 +3335,16 @@ def func(self, axis=None, dtype=None, out=None, skipna=True, **kwargs): func.__name__ = name return func - - cls.cummin = _make_cum_function('min', lambda y, axis: np.minimum.accumulate(y, axis), np.inf, np.nan) - cls.cumsum = _make_cum_function('sum', lambda y, axis: y.cumsum(axis), 0., np.nan) - cls.cumprod = _make_cum_function('prod', lambda y, axis: y.cumprod(axis), 1., np.nan) - cls.cummax = _make_cum_function('max', lambda y, axis: np.maximum.accumulate(y, axis), -np.inf, np.nan) + cls.cummin = _make_cum_function( + 'min', lambda y, axis: np.minimum.accumulate(y, axis), + np.inf, np.nan) + cls.cumsum = _make_cum_function( + 'sum', lambda y, axis: y.cumsum(axis), 0., np.nan) + cls.cumprod = _make_cum_function( + 'prod', lambda y, axis: y.cumprod(axis), 1., np.nan) + cls.cummax = _make_cum_function( + 'max', lambda y, axis: np.maximum.accumulate(y, axis), + -np.inf, np.nan) # install the indexerse for _name, _indexer in indexing.get_indexers_list(): diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index f37b94cd7f689..18f41917067f2 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -60,7 +60,6 @@ 'fillna', 'dtype']) | _plotting_methods - class GroupByError(Exception): pass @@ -482,17 +481,17 @@ def picker(arr): return self.agg(picker) def cumcount(self): - ''' - Number each item in each group from 0 to the length of that group. + """Number each item in each group from 0 to the length of that group. Essentially this is equivalent to - - >>> self.apply(lambda x: Series(np.arange(len(x)), x.index)). + + >>> self.apply(lambda x: Series(np.arange(len(x)), x.index)) Example ------- - >>> df = pd.DataFrame([['a'], ['a'], ['a'], ['b'], ['b'], ['a']], columns=['A']) + >>> df = pd.DataFrame([['a'], ['a'], ['a'], ['b'], ['b'], ['a']], + ... columns=['A']) >>> df A 0 a @@ -510,14 +509,13 @@ def cumcount(self): 5 3 dtype: int64 - ''' + """ index = self.obj.index cumcounts = np.zeros(len(index), dtype='int64') for v in self.indices.values(): cumcounts[v] = np.arange(len(v), dtype='int64') return Series(cumcounts, index) - def _try_cast(self, result, obj): """ try to cast the result to our obj original type, @@ -578,7 +576,7 @@ def _python_agg_general(self, func, *args, **kwargs): if _is_numeric_dtype(values.dtype): values = com.ensure_float(values) - output[name] = self._try_cast(values[mask],result) + output[name] = self._try_cast(values[mask], result) return self._wrap_aggregated_output(output) @@ -620,7 +618,7 @@ def _apply_filter(self, indices, dropna): mask[indices.astype(int)] = True # mask fails to broadcast when passed to where; broadcast manually. mask = np.tile(mask, list(self.obj.shape[1:]) + [1]).T - filtered = self.obj.where(mask) # Fill with NaNs. + filtered = self.obj.where(mask) # Fill with NaNs. return filtered @@ -710,7 +708,7 @@ def apply(self, f, data, axis=0): # oh boy if (f.__name__ not in _plotting_methods and - hasattr(splitter, 'fast_apply') and axis == 0): + hasattr(splitter, 'fast_apply') and axis == 0): try: values, mutated = splitter.fast_apply(f, group_keys) return group_keys, values, mutated @@ -840,16 +838,21 @@ def get_group_levels(self): # Aggregation functions _cython_functions = { - 'add' : 'group_add', - 'prod' : 'group_prod', - 'min' : 'group_min', - 'max' : 'group_max', - 'mean' : 'group_mean', - 'median': dict(name = 'group_median'), - 'var' : 'group_var', - 'std' : 'group_var', - 'first': dict(name = 'group_nth', f = lambda func, a, b, c, d: func(a, b, c, d, 1)), - 'last' : 'group_last', + 'add': 'group_add', + 'prod': 'group_prod', + 'min': 'group_min', + 'max': 'group_max', + 'mean': 'group_mean', + 'median': { + 'name': 'group_median' + }, + 'var': 'group_var', + 'std': 'group_var', + 'first': { + 'name': 'group_nth', + 'f': lambda func, a, b, c, d: func(a, b, c, d, 1) + }, + 'last': 'group_last', } _cython_transforms = { @@ -867,18 +870,19 @@ def get_group_levels(self): def _get_aggregate_function(self, how, values): dtype_str = values.dtype.name - def get_func(fname): - # find the function, or use the object function, or return a generic - for dt in [dtype_str,'object']: - f = getattr(_algos,"%s_%s" % (fname,dtype_str),None) + def get_func(fname): + # find the function, or use the object function, or return a + # generic + for dt in [dtype_str, 'object']: + f = getattr(_algos, "%s_%s" % (fname, dtype_str), None) if f is not None: return f - return getattr(_algos,fname,None) + return getattr(_algos, fname, None) ftype = self._cython_functions[how] - if isinstance(ftype,dict): + if isinstance(ftype, dict): func = afunc = get_func(ftype['name']) # a sub-function @@ -895,7 +899,9 @@ def wrapper(*args, **kwargs): func = get_func(ftype) if func is None: - raise NotImplementedError("function is not implemented for this dtype: [how->%s,dtype->%s]" % (how,dtype_str)) + raise NotImplementedError("function is not implemented for this" + "dtype: [how->%s,dtype->%s]" % + (how, dtype_str)) return func, dtype_str def aggregate(self, values, how, axis=0): @@ -934,11 +940,11 @@ def aggregate(self, values, how, axis=0): if self._filter_empty_groups: if result.ndim == 2: if is_numeric: - result = lib.row_bool_subset(result, - (counts > 0).view(np.uint8)) + result = lib.row_bool_subset( + result, (counts > 0).view(np.uint8)) else: - result = lib.row_bool_subset_object(result, - (counts > 0).view(np.uint8)) + result = lib.row_bool_subset_object( + result, (counts > 0).view(np.uint8)) else: result = result[counts > 0] @@ -957,8 +963,8 @@ def aggregate(self, values, how, axis=0): return result, names def _aggregate(self, result, counts, values, how, is_numeric): - agg_func,dtype = self._get_aggregate_function(how, values) - trans_func = self._cython_transforms.get(how, lambda x: x) + agg_func, dtype = self._get_aggregate_function(how, values) + trans_func = self._cython_transforms.get(how, lambda x: x) comp_ids, _, ngroups = self.group_info if values.ndim > 3: @@ -989,7 +995,7 @@ def _aggregate_series_fast(self, obj, func): group_index, _, ngroups = self.group_info # avoids object / Series creation overhead - dummy = obj._get_values(slice(None,0)).to_dense() + dummy = obj._get_values(slice(None, 0)).to_dense() indexer = _algos.groupsort_indexer(group_index, ngroups)[0] obj = obj.take(indexer, convert=False) group_index = com.take_nd(group_index, indexer, allow_fill=False) @@ -1010,7 +1016,8 @@ def _aggregate_series_pure_python(self, obj, func): for label, group in splitter: res = func(group) if result is None: - if isinstance(res, (Series, np.ndarray)) or isinstance(res, list): + if (isinstance(res, (Series, np.ndarray)) or + isinstance(res, list)): raise ValueError('Function does not reduce') result = np.empty(ngroups, dtype='O') @@ -1158,16 +1165,19 @@ def names(self): # cython aggregation _cython_functions = { - 'add' : 'group_add_bin', - 'prod' : 'group_prod_bin', - 'mean' : 'group_mean_bin', - 'min' : 'group_min_bin', - 'max' : 'group_max_bin', - 'var' : 'group_var_bin', - 'std' : 'group_var_bin', - 'ohlc' : 'group_ohlc', - 'first': dict(name = 'group_nth_bin', f = lambda func, a, b, c, d: func(a, b, c, d, 1)), - 'last' : 'group_last_bin', + 'add': 'group_add_bin', + 'prod': 'group_prod_bin', + 'mean': 'group_mean_bin', + 'min': 'group_min_bin', + 'max': 'group_max_bin', + 'var': 'group_var_bin', + 'std': 'group_var_bin', + 'ohlc': 'group_ohlc', + 'first': { + 'name': 'group_nth_bin', + 'f': lambda func, a, b, c, d: func(a, b, c, d, 1) + }, + 'last': 'group_last_bin', } _name_functions = { @@ -1178,8 +1188,8 @@ def names(self): def _aggregate(self, result, counts, values, how, is_numeric=True): - agg_func,dtype = self._get_aggregate_function(how, values) - trans_func = self._cython_transforms.get(how, lambda x: x) + agg_func, dtype = self._get_aggregate_function(how, values) + trans_func = self._cython_transforms.get(how, lambda x: x) if values.ndim > 3: # punting for now @@ -1295,14 +1305,14 @@ def __init__(self, index, grouper=None, name=None, level=None, # no level passed if not isinstance(self.grouper, (Series, np.ndarray)): self.grouper = self.index.map(self.grouper) - if not (hasattr(self.grouper,"__len__") and \ - len(self.grouper) == len(self.index)): - errmsg = "Grouper result violates len(labels) == len(data)\n" - errmsg += "result: %s" % com.pprint_thing(self.grouper) - self.grouper = None # Try for sanity + if not (hasattr(self.grouper, "__len__") and + len(self.grouper) == len(self.index)): + errmsg = ('Grouper result violates len(labels) == ' + 'len(data)\nresult: %s' % + com.pprint_thing(self.grouper)) + self.grouper = None # Try for sanity raise AssertionError(errmsg) - def __repr__(self): return 'Grouping(%s)' % self.name @@ -1357,7 +1367,8 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True): if not isinstance(group_axis, MultiIndex): if isinstance(level, compat.string_types): if obj.index.name != level: - raise ValueError('level name %s is not the name of the index' % level) + raise ValueError('level name %s is not the name of the ' + 'index' % level) elif level > 0: raise ValueError('level > 0 only valid with MultiIndex') @@ -1416,7 +1427,7 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True): name = gpr gpr = obj[gpr] - if (isinstance(gpr,Categorical) and len(gpr) != len(obj)): + if isinstance(gpr, Categorical) and len(gpr) != len(obj): errmsg = "Categorical grouper must have len(grouper) == len(data)" raise AssertionError(errmsg) @@ -1628,7 +1639,7 @@ def transform(self, func, *args, **kwargs): transformed : Series """ result = self.obj.copy() - if hasattr(result,'values'): + if hasattr(result, 'values'): result = result.values dtype = result.dtype @@ -1642,7 +1653,7 @@ def transform(self, func, *args, **kwargs): group = com.ensure_float(group) object.__setattr__(group, 'name', name) res = wrapper(group) - if hasattr(res,'values'): + if hasattr(res, 'values'): res = res.values # need to do a safe put here, as the dtype may be different @@ -1653,7 +1664,8 @@ def transform(self, func, *args, **kwargs): # downcast if we can (and need) result = _possibly_downcast_to_dtype(result, dtype) - return self.obj.__class__(result,index=self.obj.index,name=self.obj.name) + return self.obj.__class__(result, index=self.obj.index, + name=self.obj.name) def filter(self, func, dropna=True, *args, **kwargs): """ @@ -1686,8 +1698,8 @@ def true_and_notnull(x, *args, **kwargs): return b and notnull(b) try: - indices = [self.indices[name] if true_and_notnull(group) else [] - for name, group in self] + indices = [self.indices[name] if true_and_notnull(group) else [] + for name, group in self] except ValueError: raise TypeError("the filter must return a boolean result") except TypeError: @@ -1880,7 +1892,7 @@ def _aggregate_multiple_funcs(self, arg): grouper=self.grouper) results.append(colg.aggregate(arg)) keys.append(col) - except (TypeError, DataError) : + except (TypeError, DataError): pass except SpecificationError: raise @@ -1901,14 +1913,16 @@ def _aggregate_generic(self, func, *args, **kwargs): for name, data in self: # for name in self.indices: # data = self.get_group(name, obj=obj) - result[name] = self._try_cast(func(data, *args, **kwargs),data) + result[name] = self._try_cast(func(data, *args, **kwargs), + data) except Exception: return self._aggregate_item_by_item(func, *args, **kwargs) else: for name in self.indices: try: data = self.get_group(name, obj=obj) - result[name] = self._try_cast(func(data, *args, **kwargs), data) + result[name] = self._try_cast(func(data, *args, **kwargs), + data) except Exception: wrapper = lambda x: func(x, *args, **kwargs) result[name] = data.apply(wrapper, axis=axis) @@ -1929,7 +1943,8 @@ def _aggregate_item_by_item(self, func, *args, **kwargs): data = obj[item] colg = SeriesGroupBy(data, selection=item, grouper=self.grouper) - result[item] = self._try_cast(colg.aggregate(func, *args, **kwargs), data) + result[item] = self._try_cast( + colg.aggregate(func, *args, **kwargs), data) except ValueError: cannot_agg.append(item) continue @@ -1987,12 +2002,15 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False): if isinstance(values[0], (np.ndarray, Series)): if isinstance(values[0], Series): - applied_index = self.obj._get_axis(self.axis) - all_indexed_same = _all_indexes_same([x.index for x in values]) - singular_series = len(values) == 1 and applied_index.nlevels == 1 + applied_index = self.obj._get_axis(self.axis) + all_indexed_same = _all_indexes_same([x.index + for x in values]) + singular_series = (len(values) == 1 and + applied_index.nlevels == 1) # GH3596 - # provide a reduction (Frame -> Series) if groups are unique + # provide a reduction (Frame -> Series) if groups are + # unique if self.squeeze: # assign the name to this series @@ -2000,15 +2018,19 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False): values[0].name = keys[0] # GH2893 - # we have series in the values array, we want to produce a series: + # we have series in the values array, we want to + # produce a series: # if any of the sub-series are not indexed the same - # OR we don't have a multi-index and we have only a single values - return self._concat_objects(keys, values, - not_indexed_same=not_indexed_same) + # OR we don't have a multi-index and we have only a + # single values + return self._concat_objects( + keys, values, not_indexed_same=not_indexed_same + ) if not all_indexed_same: - return self._concat_objects(keys, values, - not_indexed_same=not_indexed_same) + return self._concat_objects( + keys, values, not_indexed_same=not_indexed_same + ) try: if self.axis == 0: @@ -2079,7 +2101,7 @@ def transform(self, func, *args, **kwargs): except TypeError: return self._transform_item_by_item(obj, fast_path) except Exception: # pragma: no cover - res = fast_path(group) + res = fast_path(group) path = fast_path else: res = path(group) @@ -2104,15 +2126,17 @@ def transform(self, func, *args, **kwargs): def _define_paths(self, func, *args, **kwargs): if isinstance(func, compat.string_types): fast_path = lambda group: getattr(group, func)(*args, **kwargs) - slow_path = lambda group: group.apply(lambda x: getattr(x, func)(*args, **kwargs), axis=self.axis) + slow_path = lambda group: group.apply( + lambda x: getattr(x, func)(*args, **kwargs), axis=self.axis) else: fast_path = lambda group: func(group, *args, **kwargs) - slow_path = lambda group: group.apply(lambda x: func(x, *args, **kwargs), axis=self.axis) + slow_path = lambda group: group.apply( + lambda x: func(x, *args, **kwargs), axis=self.axis) return fast_path, slow_path def _choose_path(self, fast_path, slow_path, group): path = slow_path - res = slow_path(group) + res = slow_path(group) # if we make it here, test if we can use the fast path try: @@ -2190,7 +2214,7 @@ def filter(self, func, dropna=True, *args, **kwargs): try: path, res = self._choose_path(fast_path, slow_path, group) except Exception: # pragma: no cover - res = fast_path(group) + res = fast_path(group) path = fast_path else: res = path(group) @@ -2199,11 +2223,11 @@ def add_indices(): indices.append(self.indices[name]) # interpret the result of the filter - if isinstance(res,(bool,np.bool_)): + if isinstance(res, (bool, np.bool_)): if res: add_indices() else: - if getattr(res,'ndim',None) == 1: + if getattr(res, 'ndim', None) == 1: val = res.ravel()[0] if val and notnull(val): add_indices() @@ -2224,7 +2248,8 @@ def __getitem__(self, key): if self._selection is not None: raise Exception('Column(s) %s already selected' % self._selection) - if isinstance(key, (list, tuple, Series, np.ndarray)) or not self.as_index: + if (isinstance(key, (list, tuple, Series, np.ndarray)) or + not self.as_index): return DataFrameGroupBy(self.obj, self.grouper, selection=key, grouper=self.grouper, exclusions=self.exclusions, @@ -2324,16 +2349,17 @@ def _wrap_agged_blocks(self, blocks): def _iterate_column_groupbys(self): for i, colname in enumerate(self.obj.columns): - yield colname, SeriesGroupBy(self.obj.iloc[:, i], selection=colname, + yield colname, SeriesGroupBy(self.obj.iloc[:, i], + selection=colname, grouper=self.grouper, exclusions=self.exclusions) def _apply_to_column_groupbys(self, func): from pandas.tools.merge import concat - return concat((func(col_groupby) - for _, col_groupby in self._iterate_column_groupbys()), - keys=self.obj.columns, - axis=1) + return concat( + (func(col_groupby) for _, col_groupby + in self._iterate_column_groupbys()), + keys=self.obj.columns, axis=1) def ohlc(self): """ @@ -2341,7 +2367,8 @@ def ohlc(self): For multiple groupings, the result index will be a MultiIndex """ - return self._apply_to_column_groupbys(lambda x: x._cython_agg_general('ohlc')) + return self._apply_to_column_groupbys( + lambda x: x._cython_agg_general('ohlc')) from pandas.tools.plotting import boxplot_frame_groupby diff --git a/pandas/core/index.py b/pandas/core/index.py index 096aff548dc9c..65eb8486c36d2 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -56,6 +56,7 @@ def _shouldbe_timestamp(obj): _Identity = object + class Index(FrozenNDArray): """ @@ -144,7 +145,7 @@ def __new__(cls, data, dtype=None, copy=False, name=None, fastpath=False, inferred = lib.infer_dtype(subarr) if inferred == 'integer': return Int64Index(subarr.astype('i8'), copy=copy, name=name) - elif inferred in ['floating','mixed-integer-float']: + elif inferred in ['floating', 'mixed-integer-float']: return Float64Index(subarr, copy=copy, name=name) elif inferred != 'string': if (inferred.startswith('datetime') or @@ -179,7 +180,7 @@ def is_(self, other): return self._id is getattr(other, '_id', Ellipsis) def _reset_identity(self): - "Initializes or resets ``_id`` attribute with new object" + """Initializes or resets ``_id`` attribute with new object""" self._id = _Identity() def view(self, *args, **kwargs): @@ -191,8 +192,10 @@ def view(self, *args, **kwargs): # construction helpers @classmethod def _scalar_data_error(cls, data): - raise TypeError('{0}(...) must be called with a collection ' - 'of some kind, {1} was passed'.format(cls.__name__,repr(data))) + raise TypeError( + '{0}(...) must be called with a collection of some kind, {1} was ' + 'passed'.format(cls.__name__, repr(data)) + ) @classmethod def _string_data_error(cls, data): @@ -411,7 +414,7 @@ def is_integer(self): return self.inferred_type in ['integer'] def is_floating(self): - return self.inferred_type in ['floating','mixed-integer-float'] + return self.inferred_type in ['floating', 'mixed-integer-float'] def is_numeric(self): return self.inferred_type in ['integer', 'floating'] @@ -423,8 +426,9 @@ def holds_integer(self): return self.inferred_type in ['integer', 'mixed-integer'] def _convert_scalar_indexer(self, key, typ=None): - """ convert a scalar indexer, right now we are converting floats -> ints - if the index supports it """ + """ convert a scalar indexer, right now we are converting + floats -> ints if the index supports it + """ def to_int(): ikey = int(key) @@ -463,7 +467,7 @@ def _convert_slice_indexer_getitem(self, key, is_index_slice=False): whether positional or not """ if self.is_integer() or is_index_slice: return key - return self._convert_slice_indexer(key) + return self._convert_slice_indexer(key) def _convert_slice_indexer(self, key, typ=None): """ convert a slice indexer. disallow floats in the start/stop/step """ @@ -494,7 +498,8 @@ def is_int(v): if typ == 'iloc': return self._convert_slice_indexer_iloc(key) elif typ == 'getitem': - return self._convert_slice_indexer_getitem(key, is_index_slice=is_index_slice) + return self._convert_slice_indexer_getitem( + key, is_index_slice=is_index_slice) # convert the slice to an indexer here @@ -535,9 +540,9 @@ def _convert_list_indexer(self, key, typ=None): def _convert_indexer_error(self, key, msg=None): if msg is None: msg = 'label' - raise TypeError("the {0} [{1}] is not a proper indexer for this index type ({2})".format(msg, - key, - self.__class__.__name__)) + raise TypeError("the {0} [{1}] is not a proper indexer for this index " + "type ({2})".format(msg, key, self.__class__.__name__)) + def get_duplicates(self): from collections import defaultdict counter = defaultdict(lambda: 0) @@ -750,11 +755,12 @@ def equals(self, other): return np.array_equal(self, other) def identical(self, other): + """Similar to equals, but check that other comparable attributes are + also equal """ - Similar to equals, but check that other comparable attributes are also equal - """ - return self.equals(other) and all( - (getattr(self, c, None) == getattr(other, c, None) for c in self._comparables)) + return (self.equals(other) and + all((getattr(self, c, None) == getattr(other, c, None) + for c in self._comparables))) def asof(self, label): """ @@ -1213,7 +1219,8 @@ def reindex(self, target, method=None, level=None, limit=None, indexer = None # to avoid aliasing an existing index - if copy_if_needed and target.name != self.name and self.name is not None: + if (copy_if_needed and target.name != self.name and + self.name is not None): if target.name is None: target = self.copy() @@ -1621,9 +1628,10 @@ class Int64Index(Index): """ Immutable ndarray implementing an ordered, sliceable set. The basic object - storing axis labels for all pandas objects. Int64Index is a special case of `Index` - with purely integer labels. This is the default index type used by the DataFrame - and Series ctors when no explicit index is provided by the user. + storing axis labels for all pandas objects. Int64Index is a special case + of `Index` with purely integer labels. This is the default index type used + by the DataFrame and Series ctors when no explicit index is provided by the + user. Parameters ---------- @@ -1664,7 +1672,8 @@ def __new__(cls, data, dtype=None, copy=False, name=None, fastpath=False): elif issubclass(data.dtype.type, np.integer): # don't force the upcast as we may be dealing # with a platform int - if dtype is None or not issubclass(np.dtype(dtype).type, np.integer): + if dtype is None or not issubclass(np.dtype(dtype).type, + np.integer): dtype = np.int64 subarr = np.array(data, dtype=dtype, copy=copy) @@ -1719,8 +1728,8 @@ def _wrap_joined_index(self, joined, other): class Float64Index(Index): """ Immutable ndarray implementing an ordered, sliceable set. The basic object - storing axis labels for all pandas objects. Float64Index is a special case of `Index` - with purely floating point labels. + storing axis labels for all pandas objects. Float64Index is a special case + of `Index` with purely floating point labels. Parameters ---------- @@ -1774,14 +1783,15 @@ def inferred_type(self): def astype(self, dtype): if np.dtype(dtype) != np.object_: - raise TypeError( - "Setting %s dtype to anything other than object is not supported" % self.__class__) - return Index(self.values,name=self.name,dtype=object) + raise TypeError('Setting %s dtype to anything other than object ' + 'is not supported' % self.__class__) + return Index(self.values, name=self.name, dtype=object) def _convert_scalar_indexer(self, key, typ=None): if typ == 'iloc': - return super(Float64Index, self)._convert_scalar_indexer(key, typ=typ) + return super(Float64Index, self)._convert_scalar_indexer(key, + typ=typ) return key def _convert_slice_indexer(self, key, typ=None): @@ -1793,10 +1803,11 @@ def _convert_slice_indexer(self, key, typ=None): pass # allow floats here - self._validate_slicer(key, lambda v: v is None or is_integer(v) or is_float(v)) + self._validate_slicer( + key, lambda v: v is None or is_integer(v) or is_float(v)) # translate to locations - return self.slice_indexer(key.start,key.stop,key.step) + return self.slice_indexer(key.start, key.stop, key.step) def get_value(self, series, key): """ we always want to get an index value, never a value """ @@ -1980,8 +1991,8 @@ def _set_labels(self, labels, copy=False, validate=True, verify_integrity=False): if validate and len(labels) != self.nlevels: raise ValueError("Length of labels must match length of levels") - self._labels = FrozenList(_ensure_frozen(labs, copy=copy)._shallow_copy() - for labs in labels) + self._labels = FrozenList( + _ensure_frozen(labs, copy=copy)._shallow_copy() for labs in labels) self._tuples = None self._reset_cache() @@ -2108,12 +2119,12 @@ def __repr__(self): res = res.encode(encoding) return res - def __unicode__(self): """ Return a string representation for a particular Index - Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3. + Invoked by unicode(df) in py2 only. Yields a Unicode String in both + py2/py3. """ rows = self.format(names=True) max_rows = get_option('display.max_rows') @@ -2133,7 +2144,7 @@ def _convert_slice_indexer(self, key, typ=None): if typ == 'iloc': return self._convert_slice_indexer_iloc(key) - return super(MultiIndex,self)._convert_slice_indexer(key, typ=typ) + return super(MultiIndex, self)._convert_slice_indexer(key, typ=typ) def _get_names(self): return FrozenList(level.name for level in self.levels) @@ -2142,8 +2153,8 @@ def _set_names(self, values, validate=True): """ sets names on levels. WARNING: mutates! - Note that you generally want to set this *after* changing levels, so that it only - acts on copies""" + Note that you generally want to set this *after* changing levels, so + that it only acts on copies""" values = list(values) if validate and len(values) != self.nlevels: raise ValueError('Length of names must match length of levels') @@ -2189,8 +2200,8 @@ def _get_level_number(self, level): level += self.nlevels # Note: levels are zero-based elif level >= self.nlevels: - raise IndexError('Too many levels: Index has only %d levels, not %d' - % (self.nlevels, level + 1)) + raise IndexError('Too many levels: Index has only %d levels, ' + 'not %d' % (self.nlevels, level + 1)) return level _tuples = None @@ -2288,8 +2299,8 @@ def _try_mi(k): # a Timestamp will raise a TypeError in a multi-index # rather than a KeyError, try it here - if isinstance(key, (datetime.datetime,np.datetime64)) or ( - compat.PY3 and isinstance(key, compat.string_types)): + if isinstance(key, (datetime.datetime, np.datetime64)) or ( + compat.PY3 and isinstance(key, compat.string_types)): try: return _try_mi(Timestamp(key)) except: @@ -2338,7 +2349,8 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False, else: # weird all NA case - formatted = [com.pprint_thing(na_rep if isnull(x) else x, escape_chars=('\t', '\r', '\n')) + formatted = [com.pprint_thing(na_rep if isnull(x) else x, + escape_chars=('\t', '\r', '\n')) for x in com.take_1d(lev.values, lab)] stringified_levels.append(formatted) @@ -2347,7 +2359,8 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False, level = [] if names: - level.append(com.pprint_thing(name, escape_chars=('\t', '\r', '\n')) + level.append(com.pprint_thing(name, + escape_chars=('\t', '\r', '\n')) if name is not None else '') level.extend(np.array(lev, dtype=object)) @@ -2847,7 +2860,7 @@ def reindex(self, target, method=None, level=None, limit=None, else: if takeable: if method is not None or limit is not None: - raise ValueError("cannot do a takeable reindex with " + raise ValueError("cannot do a takeable reindex " "with a method or limit") return self[target], target @@ -3039,17 +3052,24 @@ def partial_selection(key): raise KeyError(key) ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)] - return indexer, _maybe_drop_levels(indexer, ilevels, drop_level) + return indexer, _maybe_drop_levels(indexer, ilevels, + drop_level) if len(key) == self.nlevels: if self.is_unique: - # here we have a completely specified key, but are using some partial string matching here + # here we have a completely specified key, but are + # using some partial string matching here # GH4758 - can_index_exactly = any( - [l.is_all_dates and not isinstance(k, compat.string_types) for k, l in zip(key, self.levels)]) - if any([l.is_all_dates for k, l in zip(key, self.levels)]) and not can_index_exactly: + can_index_exactly = any([ + (l.is_all_dates and + not isinstance(k, compat.string_types)) + for k, l in zip(key, self.levels) + ]) + if any([ + l.is_all_dates for k, l in zip(key, self.levels) + ]) and not can_index_exactly: indexer = slice(*self.slice_locs(key, key)) # we have a multiple selection here @@ -3058,7 +3078,8 @@ def partial_selection(key): key = tuple(self[indexer].tolist()[0]) - return self._engine.get_loc(_values_from_object(key)), None + return (self._engine.get_loc(_values_from_object(key)), + None) else: return partial_selection(key) else: @@ -3089,7 +3110,8 @@ def partial_selection(key): indexer = slice(None, None) ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)] - return indexer, _maybe_drop_levels(indexer, ilevels, drop_level) + return indexer, _maybe_drop_levels(indexer, ilevels, + drop_level) else: indexer = self._get_level_indexer(key, level=level) new_index = _maybe_drop_levels(indexer, [level], drop_level) @@ -3277,8 +3299,8 @@ def _assert_can_do_setop(self, other): def astype(self, dtype): if np.dtype(dtype) != np.object_: - raise TypeError( - "Setting %s dtype to anything other than object is not supported" % self.__class__) + raise TypeError('Setting %s dtype to anything other than object ' + 'is not supported' % self.__class__) return self._shallow_copy() def insert(self, loc, item): @@ -3530,8 +3552,9 @@ def _get_consensus_names(indexes): # find the non-none names, need to tupleify to make # the set hashable, then reverse on return - consensus_names = set([tuple(i.names) - for i in indexes if all(n is not None for n in i.names)]) + consensus_names = set([ + tuple(i.names) for i in indexes if all(n is not None for n in i.names) + ]) if len(consensus_names) == 1: return list(list(consensus_names)[0]) return [None] * indexes[0].nlevels diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index b462624dde1f5..ab9000fd21a0a 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -12,15 +12,16 @@ import numpy as np + # the supported indexers def get_indexers_list(): return [ - ('ix' ,_IXIndexer ), - ('iloc',_iLocIndexer ), - ('loc' ,_LocIndexer ), - ('at' ,_AtIndexer ), - ('iat' ,_iAtIndexer ), + ('ix', _IXIndexer), + ('iloc', _iLocIndexer), + ('loc', _LocIndexer), + ('at', _AtIndexer), + ('iat', _iAtIndexer), ] # "null slice" @@ -33,7 +34,7 @@ class IndexingError(Exception): class _NDFrameIndexer(object): _valid_types = None - _exception = KeyError + _exception = KeyError def __init__(self, obj, name): self.obj = obj @@ -70,7 +71,8 @@ def _get_loc(self, key, axis=0): return self.obj._ixs(key, axis=axis) def _slice(self, obj, axis=0, raise_on_error=False, typ=None): - return self.obj._slice(obj, axis=axis, raise_on_error=raise_on_error, typ=typ) + return self.obj._slice(obj, axis=axis, raise_on_error=raise_on_error, + typ=typ) def __setitem__(self, key, value): # kludgetastic @@ -101,8 +103,9 @@ def _has_valid_tuple(self, key): for i, k in enumerate(key): if i >= self.obj.ndim: raise IndexingError('Too many indexers') - if not self._has_valid_type(k,i): - raise ValueError("Location based indexing can only have [%s] types" % self._valid_types) + if not self._has_valid_type(k, i): + raise ValueError("Location based indexing can only have [%s] " + "types" % self._valid_types) def _convert_tuple(self, key, is_setter=False): keyidx = [] @@ -113,13 +116,13 @@ def _convert_tuple(self, key, is_setter=False): def _convert_scalar_indexer(self, key, axis): # if we are accessing via lowered dim, use the last dim - ax = self.obj._get_axis(min(axis,self.ndim-1)) + ax = self.obj._get_axis(min(axis, self.ndim-1)) # a scalar return ax._convert_scalar_indexer(key, typ=self.name) def _convert_slice_indexer(self, key, axis): # if we are accessing via lowered dim, use the last dim - ax = self.obj._get_axis(min(axis,self.ndim-1)) + ax = self.obj._get_axis(min(axis, self.ndim-1)) return ax._convert_slice_indexer(key, typ=self.name) def _has_valid_setitem_indexer(self, indexer): @@ -129,11 +132,12 @@ def _has_valid_positional_setitem_indexer(self, indexer): """ validate that an positional indexer cannot enlarge its target will raise if needed, does not modify the indexer externally """ if isinstance(indexer, dict): - raise IndexError("{0} cannot enlarge its target object".format(self.name)) + raise IndexError("{0} cannot enlarge its target object" + .format(self.name)) else: if not isinstance(indexer, tuple): indexer = self._tuplify(indexer) - for ax, i in zip(self.obj.axes,indexer): + for ax, i in zip(self.obj.axes, indexer): if isinstance(i, slice): # should check the stop slice? pass @@ -142,9 +146,11 @@ def _has_valid_positional_setitem_indexer(self, indexer): pass elif com.is_integer(i): if i >= len(ax): - raise IndexError("{0} cannot enlarge its target object".format(self.name)) + raise IndexError("{0} cannot enlarge its target object" + .format(self.name)) elif isinstance(i, dict): - raise IndexError("{0} cannot enlarge its target object".format(self.name)) + raise IndexError("{0} cannot enlarge its target object" + .format(self.name)) return True @@ -157,34 +163,41 @@ def _setitem_with_indexer(self, indexer, value): # maybe partial set take_split_path = self.obj._is_mixed_type - if isinstance(indexer,tuple): + if isinstance(indexer, tuple): nindexer = [] for i, idx in enumerate(indexer): if isinstance(idx, dict): # reindex the axis to the new value # and set inplace - key,_ = _convert_missing_indexer(idx) + key, _ = _convert_missing_indexer(idx) - # if this is the items axes, then take the main missing path - # first; this correctly sets the dtype and avoids cache issues - # essentially this separates out the block that is needed to possibly - # be modified + # if this is the items axes, then take the main missing + # path first + # this correctly sets the dtype and avoids cache issues + # essentially this separates out the block that is needed + # to possibly be modified if self.ndim > 1 and i == self.obj._info_axis_number: # add the new item, and set the value # must have all defined axes if we have a scalar - # or a list-like on the non-info axes if we have a list-like - len_non_info_axes = [ len(_ax) for _i, _ax in enumerate(self.obj.axes) if _i != i ] - if any([ not l for l in len_non_info_axes ]): + # or a list-like on the non-info axes if we have a + # list-like + len_non_info_axes = [ + len(_ax) for _i, _ax in enumerate(self.obj.axes) + if _i != i + ] + if any([not l for l in len_non_info_axes]): if not is_list_like(value): - raise ValueError("cannot set a frame with no defined index and a scalar") + raise ValueError("cannot set a frame with no " + "defined index and a scalar") self.obj[key] = value return self.obj self.obj[key] = np.nan - new_indexer = _convert_from_missing_indexer_tuple(indexer, self.obj.axes) + new_indexer = _convert_from_missing_indexer_tuple( + indexer, self.obj.axes) self._setitem_with_indexer(new_indexer, value) return self.obj @@ -194,10 +207,10 @@ def _setitem_with_indexer(self, indexer, value): # so the object is the same index = self.obj._get_axis(i) labels = _safe_append_to_index(index, key) - self.obj._data = self.obj.reindex_axis(labels,i)._data + self.obj._data = self.obj.reindex_axis(labels, i)._data self.obj._maybe_update_cacher(clear=True) - if isinstance(labels,MultiIndex): + if isinstance(labels, MultiIndex): self.obj.sortlevel(inplace=True) labels = self.obj._get_axis(i) @@ -225,10 +238,11 @@ def _setitem_with_indexer(self, indexer, value): # this preserves dtype of the value new_values = Series([value]).values if len(self.obj.values): - new_values = np.concatenate([self.obj.values, new_values]) + new_values = np.concatenate([self.obj.values, + new_values]) - self.obj._data = self.obj._constructor(new_values, - index=new_index, name=self.obj.name)._data + self.obj._data = self.obj._constructor( + new_values, index=new_index, name=self.obj.name)._data self.obj._maybe_update_cacher(clear=True) return self.obj @@ -236,24 +250,28 @@ def _setitem_with_indexer(self, indexer, value): # no columns and scalar if not len(self.obj.columns): - raise ValueError("cannot set a frame with no defined columns") + raise ValueError( + "cannot set a frame with no defined columns" + ) index = self.obj._get_axis(0) labels = _safe_append_to_index(index, indexer) - self.obj._data = self.obj.reindex_axis(labels,0)._data + self.obj._data = self.obj.reindex_axis(labels, 0)._data self.obj._maybe_update_cacher(clear=True) - return getattr(self.obj,self.name).__setitem__(indexer,value) + return getattr(self.obj, self.name).__setitem__(indexer, + value) # set using setitem (Panel and > dims) elif self.ndim >= 3: - return self.obj.__setitem__(indexer,value) + return self.obj.__setitem__(indexer, value) # set info_axis = self.obj._info_axis_number item_labels = self.obj._get_axis(info_axis) # if we have a complicated setup, take the split path - if isinstance(indexer, tuple) and any([ isinstance(ax,MultiIndex) for ax in self.obj.axes ]): + if (isinstance(indexer, tuple) and + any([isinstance(ax, MultiIndex) for ax in self.obj.axes])): take_split_path = True # align and set the values @@ -270,8 +288,10 @@ def _setitem_with_indexer(self, indexer, value): info_idx = [info_idx] labels = item_labels[info_idx] - # if we have a partial multiindex, then need to adjust the plane indexer here - if len(labels) == 1 and isinstance(self.obj[labels[0]].index,MultiIndex): + # if we have a partial multiindex, then need to adjust the plane + # indexer here + if (len(labels) == 1 and + isinstance(self.obj[labels[0]].index, MultiIndex)): item = labels[0] obj = self.obj[item] index = obj.index @@ -282,19 +302,23 @@ def _setitem_with_indexer(self, indexer, value): except: pass plane_indexer = tuple([idx]) + indexer[info_axis + 1:] - lplane_indexer = _length_of_indexer(plane_indexer[0],index) + lplane_indexer = _length_of_indexer(plane_indexer[0], index) - # require that we are setting the right number of values that we are indexing + # require that we are setting the right number of values that + # we are indexing if is_list_like(value) and lplane_indexer != len(value): if len(obj[idx]) != len(value): - raise ValueError("cannot set using a multi-index selection indexer with a different length than the value") + raise ValueError( + "cannot set using a multi-index selection indexer " + "with a different length than the value" + ) # we can directly set the series here # as we select a slice indexer on the mi idx = index._convert_slice_indexer(idx) obj = obj.copy() - obj._data = obj._data.setitem(tuple([idx]),value) + obj._data = obj._data.setitem(tuple([idx]), value) self.obj[item] = obj return @@ -303,7 +327,8 @@ def _setitem_with_indexer(self, indexer, value): plane_indexer = indexer[:info_axis] + indexer[info_axis + 1:] if info_axis > 0: plane_axis = self.obj.axes[:info_axis][0] - lplane_indexer = _length_of_indexer(plane_indexer[0],plane_axis) + lplane_indexer = _length_of_indexer(plane_indexer[0], + plane_axis) else: lplane_indexer = 0 @@ -313,7 +338,7 @@ def setter(item, v): # set the item, possibly having a dtype change s = s.copy() - s._data = s._data.setitem(pi,v) + s._data = s._data.setitem(pi, v) s._maybe_update_cacher(clear=True) self.obj[item] = s @@ -352,11 +377,11 @@ def can_do_equal_len(): # we have an equal len ndarray to our labels elif isinstance(value, np.ndarray) and value.ndim == 2: if len(labels) != value.shape[1]: - raise ValueError('Must have equal len keys and value when' - ' setting with an ndarray') + raise ValueError('Must have equal len keys and value ' + 'when setting with an ndarray') for i, item in enumerate(labels): - setter(item, value[:,i]) + setter(item, value[:, i]) # we have an equal len list/ndarray elif can_do_equal_len(): @@ -366,8 +391,8 @@ def can_do_equal_len(): else: if len(labels) != len(value): - raise ValueError('Must have equal len keys and value when' - ' setting with an iterable') + raise ValueError('Must have equal len keys and value' + 'when setting with an iterable') for item, v in zip(labels, value): setter(item, v) @@ -390,14 +415,14 @@ def can_do_equal_len(): if isinstance(value, ABCPanel): value = self._align_panel(indexer, value) - self.obj._data = self.obj._data.setitem(indexer,value) + self.obj._data = self.obj._data.setitem(indexer, value) self.obj._maybe_update_cacher(clear=True) def _align_series(self, indexer, ser): # indexer to assign Series can be tuple or scalar if isinstance(indexer, tuple): - aligners = [ not _is_null_slice(idx) for idx in indexer ] + aligners = [not _is_null_slice(idx) for idx in indexer] sum_aligners = sum(aligners) single_aligner = sum_aligners == 1 is_frame = self.obj.ndim == 2 @@ -415,15 +440,17 @@ def _align_series(self, indexer, ser): # panel elif is_panel: - single_aligner = single_aligner and (aligners[1] or aligners[2]) - - # we have a frame, with multiple indexers on both axes; and a series, - # so need to broadcast (see GH5206) - if sum_aligners == self.ndim and all([ com._is_sequence(_) for _ in indexer ]): - - ser = ser.reindex(obj.axes[0][indexer[0].ravel()],copy=True).values + single_aligner = (single_aligner and + (aligners[1] or aligners[2])) + + # we have a frame, with multiple indexers on both axes; and a + # series, so need to broadcast (see GH5206) + if (sum_aligners == self.ndim and + all([com._is_sequence(_) for _ in indexer])): + ser = ser.reindex(obj.axes[0][indexer[0].ravel()], + copy=True).values l = len(indexer[1].ravel()) - ser = np.tile(ser,l).reshape(l,-1).T + ser = np.tile(ser, l).reshape(l, -1).T return ser for i, idx in enumerate(indexer): @@ -462,14 +489,14 @@ def _align_series(self, indexer, ser): if len(labels & ser.index): ser = ser.reindex(labels) else: - broadcast.append((n,len(labels))) + broadcast.append((n, len(labels))) # broadcast along other dims ser = ser.values.copy() - for (axis,l) in broadcast: - shape = [ -1 ] * (len(broadcast)+1) + for (axis, l) in broadcast: + shape = [-1] * (len(broadcast)+1) shape[axis] = l - ser = np.tile(ser,l).reshape(shape) + ser = np.tile(ser, l).reshape(shape) if self.obj.ndim == 3: ser = ser.T @@ -509,7 +536,7 @@ def _align_frame(self, indexer, df): if len(sindexers) == 1 and idx is None and cols is None: if sindexers[0] == 0: df = df.T - return self.obj.conform(df,axis=sindexers[0]) + return self.obj.conform(df, axis=sindexers[0]) df = df.T if idx is not None and cols is not None: @@ -551,7 +578,8 @@ def _align_frame(self, indexer, df): def _align_panel(self, indexer, df): is_frame = self.obj.ndim == 2 is_panel = self.obj.ndim >= 3 - raise NotImplementedError("cannot set using an indexer with a Panel yet!") + raise NotImplementedError("cannot set using an indexer with a Panel " + "yet!") def _getitem_tuple(self, tup): try: @@ -575,7 +603,7 @@ def _getitem_tuple(self, tup): if _is_null_slice(key): continue - retval = getattr(retval,self.name)._getitem_axis(key, axis=i) + retval = getattr(retval, self.name)._getitem_axis(key, axis=i) return retval @@ -590,7 +618,7 @@ def _multi_take_opportunity(self, tup): return False # just too complicated - for indexer, ax in zip(tup,self.obj._data.axes): + for indexer, ax in zip(tup, self.obj._data.axes): if isinstance(ax, MultiIndex): return False elif com._is_bool_indexer(indexer): @@ -599,11 +627,15 @@ def _multi_take_opportunity(self, tup): return True def _multi_take(self, tup): - """ create the reindex map for our objects, raise the _exception if we can't create the indexer """ - + """ create the reindex map for our objects, raise the _exception if we + can't create the indexer + """ try: o = self.obj - d = dict([ (a,self._convert_for_reindex(t, axis=o._get_axis_number(a))) for t, a in zip(tup, o._AXIS_ORDERS) ]) + d = dict([ + (a, self._convert_for_reindex(t, axis=o._get_axis_number(a))) + for t, a in zip(tup, o._AXIS_ORDERS) + ]) return o.reindex(**d) except: raise self._exception @@ -682,7 +714,7 @@ def _getitem_lowerdim(self, tup): if len(new_key) == 1: new_key, = new_key - return getattr(section,self.name)[new_key] + return getattr(section, self.name)[new_key] raise IndexingError('not applicable') @@ -769,7 +801,8 @@ def _reindex(keys, level=None): else: indexer, missing = labels.get_indexer_non_unique(keyarr) check = indexer != -1 - result = self.obj.take(indexer[check], axis=axis, convert=False) + result = self.obj.take(indexer[check], axis=axis, + convert=False) # need to merge the result labels and the missing labels if len(missing): @@ -781,33 +814,39 @@ def _reindex(keys, level=None): cur_labels = result._get_axis(axis).values cur_indexer = com._ensure_int64(l[check]) - new_labels = np.empty(tuple([len(indexer)]),dtype=object) - new_labels[cur_indexer] = cur_labels + new_labels = np.empty(tuple([len(indexer)]), dtype=object) + new_labels[cur_indexer] = cur_labels new_labels[missing_indexer] = missing_labels # reindex with the specified axis ndim = self.obj.ndim if axis+1 > ndim: - raise AssertionError("invalid indexing error with non-unique index") + raise AssertionError("invalid indexing error with " + "non-unique index") # a unique indexer if keyarr_is_unique: - new_indexer = (Index(cur_indexer) + Index(missing_indexer)).values + new_indexer = (Index(cur_indexer) + + Index(missing_indexer)).values new_indexer[missing_indexer] = -1 - # we have a non_unique selector, need to use the original indexer here + # we have a non_unique selector, need to use the original + # indexer here else: # need to retake to have the same size as the indexer rindexer = indexer.values rindexer[~check] = 0 - result = self.obj.take(rindexer, axis=axis, convert=False) + result = self.obj.take(rindexer, axis=axis, + convert=False) # reset the new indexer to account for the new size new_indexer = np.arange(len(result)) new_indexer[~check] = -1 - result = result._reindex_with_indexers({ axis : [ new_labels, new_indexer ] }, copy=True, allow_dups=True) + result = result._reindex_with_indexers({ + axis: [new_labels, new_indexer] + }, copy=True, allow_dups=True) return result @@ -853,11 +892,12 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False): # always valid if self.name == 'loc': - return { 'key' : obj } + return {'key': obj} # a positional if obj >= len(self.obj) and not isinstance(labels, MultiIndex): - raise ValueError("cannot set by positional indexing with enlargement") + raise ValueError("cannot set by positional indexing with " + "enlargement") return obj @@ -898,7 +938,8 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False): # non-unique (dups) else: - indexer, missing = labels.get_indexer_non_unique(objarr) + (indexer, + missing) = labels.get_indexer_non_unique(objarr) check = indexer mask = check == -1 @@ -906,7 +947,7 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False): # mi here if isinstance(obj, tuple) and is_setter: - return { 'key' : obj } + return {'key': obj} raise KeyError('%s not in index' % objarr[mask]) return indexer @@ -914,11 +955,10 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False): else: try: return labels.get_loc(obj) - except (KeyError): - + except KeyError: # allow a not found key only if we are a setter if not is_list_like(obj) and is_setter: - return { 'key' : obj } + return {'key': obj} raise def _tuplify(self, loc): @@ -938,6 +978,7 @@ def _get_slice_axis(self, slice_obj, axis=0): else: return self.obj.take(indexer, axis=axis) + class _IXIndexer(_NDFrameIndexer): """ A primarily location based indexer, with integer fallback """ @@ -959,8 +1000,9 @@ def _has_valid_type(self, key, axis): return True + class _LocationIndexer(_NDFrameIndexer): - _exception = Exception + _exception = Exception def __getitem__(self, key): if type(key) is tuple: @@ -977,8 +1019,9 @@ def _getbool_axis(self, key, axis=0): inds, = key.nonzero() try: return self.obj.take(inds, axis=axis, convert=False) - except (Exception) as detail: + except Exception as detail: raise self._exception(detail) + def _get_slice_axis(self, slice_obj, axis=0): """ this is pretty simple as we just have to deal with labels """ obj = self.obj @@ -986,17 +1029,21 @@ def _get_slice_axis(self, slice_obj, axis=0): return obj labels = obj._get_axis(axis) - indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop, slice_obj.step) + indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop, + slice_obj.step) if isinstance(indexer, slice): return self._slice(indexer, axis=axis, typ='iloc') else: return self.obj.take(indexer, axis=axis) + class _LocIndexer(_LocationIndexer): """ purely label based location based indexing """ - _valid_types = "labels (MUST BE IN THE INDEX), slices of labels (BOTH endpoints included! Can be slices of integers if the index is integers), listlike of labels, boolean" - _exception = KeyError + _valid_types = ("labels (MUST BE IN THE INDEX), slices of labels (BOTH " + "endpoints included! Can be slices of integers if the " + "index is integers), listlike of labels, boolean") + _exception = KeyError def _has_valid_type(self, key, axis): ax = self.obj._get_axis(axis) @@ -1016,10 +1063,16 @@ def _has_valid_type(self, key, axis): else: if key.start is not None: if key.start not in ax: - raise KeyError("start bound [%s] is not the [%s]" % (key.start,self.obj._get_axis_name(axis))) + raise KeyError( + "start bound [%s] is not the [%s]" % + (key.start, self.obj._get_axis_name(axis)) + ) if key.stop is not None: if key.stop not in ax: - raise KeyError("stop bound [%s] is not in the [%s]" % (key.stop,self.obj._get_axis_name(axis))) + raise KeyError( + "stop bound [%s] is not in the [%s]" % + (key.stop, self.obj._get_axis_name(axis)) + ) elif com._is_bool_indexer(key): return True @@ -1033,7 +1086,8 @@ def _has_valid_type(self, key, axis): # require all elements in the index idx = _ensure_index(key) if not idx.isin(ax).all(): - raise KeyError("[%s] are not in ALL in the [%s]" % (key,self.obj._get_axis_name(axis))) + raise KeyError("[%s] are not in ALL in the [%s]" % + (key, self.obj._get_axis_name(axis))) return True @@ -1041,8 +1095,10 @@ def _has_valid_type(self, key, axis): def error(): if isnull(key): - raise ValueError("cannot use label indexing with a null key") - raise KeyError("the label [%s] is not in the [%s]" % (key,self.obj._get_axis_name(axis))) + raise ValueError( + "cannot use label indexing with a null key") + raise KeyError("the label [%s] is not in the [%s]" % + (key, self.obj._get_axis_name(axis))) try: key = self._convert_scalar_indexer(key, axis) @@ -1063,7 +1119,7 @@ def _getitem_axis(self, key, axis=0): labels = self.obj._get_axis(axis) if isinstance(key, slice): - self._has_valid_type(key,axis) + self._has_valid_type(key, axis) return self._get_slice_axis(key, axis=axis) elif com._is_bool_indexer(key): return self._getbool_axis(key, axis=axis) @@ -1075,23 +1131,31 @@ def _getitem_axis(self, key, axis=0): return self._getitem_iterable(key, axis=axis) else: - self._has_valid_type(key,axis) + self._has_valid_type(key, axis) return self._get_label(key, axis=axis) + class _iLocIndexer(_LocationIndexer): """ purely integer based location based indexing """ - _valid_types = "integer, integer slice (START point is INCLUDED, END point is EXCLUDED), listlike of integers, boolean array" - _exception = IndexError + _valid_types = ("integer, integer slice (START point is INCLUDED, END " + "point is EXCLUDED), listlike of integers, boolean array") + _exception = IndexError def _has_valid_type(self, key, axis): if com._is_bool_indexer(key): - if hasattr(key,'index') and isinstance(key.index,Index): + if hasattr(key, 'index') and isinstance(key.index, Index): if key.index.inferred_type == 'integer': - raise NotImplementedError("iLocation based boolean indexing on an integer type is not available") - raise ValueError("iLocation based boolean indexing cannot use an indexable as a mask") + raise NotImplementedError( + "iLocation based boolean indexing on an integer type " + "is not available" + ) + raise ValueError("iLocation based boolean indexing cannot use " + "an indexable as a mask") return True - return isinstance(key, slice) or com.is_integer(key) or _is_list_like(key) + return (isinstance(key, slice) or + com.is_integer(key) or + _is_list_like(key)) def _has_valid_setitem_indexer(self, indexer): self._has_valid_positional_setitem_indexer(indexer) @@ -1112,7 +1176,7 @@ def _getitem_tuple(self, tup): if _is_null_slice(key): continue - retval = getattr(retval,self.name)._getitem_axis(key, axis=i) + retval = getattr(retval, self.name)._getitem_axis(key, axis=i) return retval @@ -1123,18 +1187,19 @@ def _get_slice_axis(self, slice_obj, axis=0): return obj if isinstance(slice_obj, slice): - return self._slice(slice_obj, axis=axis, raise_on_error=True, typ='iloc') + return self._slice(slice_obj, axis=axis, raise_on_error=True, + typ='iloc') else: return self.obj.take(slice_obj, axis=axis) def _getitem_axis(self, key, axis=0): if isinstance(key, slice): - self._has_valid_type(key,axis) + self._has_valid_type(key, axis) return self._get_slice_axis(key, axis=axis) elif com._is_bool_indexer(key): - self._has_valid_type(key,axis) + self._has_valid_type(key, axis) return self._getbool_axis(key, axis=axis) # a single integer or a list of integers @@ -1148,16 +1213,18 @@ def _getitem_axis(self, key, axis=0): key = self._convert_scalar_indexer(key, axis) if not com.is_integer(key): - raise TypeError("Cannot index by location index with a non-integer key") + raise TypeError("Cannot index by location index with a " + "non-integer key") - return self._get_loc(key,axis=axis) + return self._get_loc(key, axis=axis) def _convert_to_indexer(self, obj, axis=0, is_setter=False): """ much simpler as we only have to deal with our valid types """ - if self._has_valid_type(obj,axis): + if self._has_valid_type(obj, axis): return obj - raise ValueError("Can only index by location with a [%s]" % self._valid_types) + raise ValueError("Can only index by location with a [%s]" % + self._valid_types) class _ScalarAccessIndexer(_NDFrameIndexer): @@ -1171,7 +1238,7 @@ def __getitem__(self, key): # we could have a convertible item here (e.g. Timestamp) if not _is_list_like(key): - key = tuple([ key ]) + key = tuple([key]) else: raise ValueError('Invalid call for scalar access (getting)!') @@ -1182,15 +1249,18 @@ def __setitem__(self, key, value): if not isinstance(key, tuple): key = self._tuplify(key) if len(key) != self.obj.ndim: - raise ValueError('Not enough indexers for scalar access (setting)!') + raise ValueError('Not enough indexers for scalar access ' + '(setting)!') key = self._convert_key(key) key.append(value) self.obj.set_value(*key) + class _AtIndexer(_ScalarAccessIndexer): """ label based scalar accessor """ pass + class _iAtIndexer(_ScalarAccessIndexer): """ integer based scalar accessor """ @@ -1200,17 +1270,20 @@ def _has_valid_setitem_indexer(self, indexer): def _convert_key(self, key): """ require integer args (and convert to label arguments) """ ckey = [] - for a, i in zip(self.obj.axes,key): + for a, i in zip(self.obj.axes, key): if not com.is_integer(i): - raise ValueError("iAt based indexing can only have integer indexers") + raise ValueError("iAt based indexing can only have integer " + "indexers") ckey.append(a[i]) return ckey # 32-bit floating point machine epsilon _eps = np.finfo('f4').eps -def _length_of_indexer(indexer,target=None): - """ return the length of a single non-tuple indexer which could be a slice """ + +def _length_of_indexer(indexer, target=None): + """return the length of a single non-tuple indexer which could be a slice + """ if target is not None and isinstance(indexer, slice): l = len(target) start = indexer.start @@ -1235,8 +1308,10 @@ def _length_of_indexer(indexer,target=None): return 1 raise AssertionError("cannot find the length of the indexer") + def _convert_to_index_sliceable(obj, key): - """ if we are index sliceable, then return my slicer, otherwise return None """ + """if we are index sliceable, then return my slicer, otherwise return None + """ idx = obj.index if isinstance(key, slice): return idx._convert_slice_indexer(key, typ='getitem') @@ -1256,6 +1331,7 @@ def _convert_to_index_sliceable(obj, key): return None + def _is_index_slice(obj): def _is_valid_index(x): return (com.is_integer(x) or com.is_float(x) @@ -1301,11 +1377,13 @@ def _setitem_with_indexer(self, indexer, value): # need to delegate to the super setter if isinstance(indexer, dict): - return super(_SeriesIndexer, self)._setitem_with_indexer(indexer, value) + return super(_SeriesIndexer, self)._setitem_with_indexer(indexer, + value) # fast access self.obj._set_values(indexer, value) + def _check_bool_indexer(ax, key): # boolean indexing, need to check that the data are aligned, otherwise # disallowed @@ -1344,14 +1422,18 @@ def _convert_missing_indexer(indexer): return indexer, False + def _convert_from_missing_indexer_tuple(indexer, axes): """ create a filtered indexer that doesn't have any missing indexers """ def get_indexer(_i, _idx): - return axes[_i].get_loc(_idx['key']) if isinstance(_idx,dict) else _idx - return tuple([ get_indexer(_i, _idx) for _i, _idx in enumerate(indexer) ]) + return (axes[_i].get_loc(_idx['key']) + if isinstance(_idx, dict) else _idx) + return tuple([get_indexer(_i, _idx) for _i, _idx in enumerate(indexer)]) + def _safe_append_to_index(index, key): - """ a safe append to an index, if incorrect type, then catch and recreate """ + """ a safe append to an index, if incorrect type, then catch and recreate + """ try: return index.insert(len(index), key) except: @@ -1359,23 +1441,26 @@ def _safe_append_to_index(index, key): # raise here as this is basically an unsafe operation and we want # it to be obvious that you are doing something wrong - raise ValueError("unsafe appending to index of " - "type {0} with a key {1}".format(index.__class__.__name__,key)) + raise ValueError("unsafe appending to index of type {0} with a key " + "{1}".format(index.__class__.__name__, key)) + def _maybe_convert_indices(indices, n): """ if we have negative indicies, translate to postive here - if have indicies that are out-of-bounds, raise an IndexError """ + if have indicies that are out-of-bounds, raise an IndexError + """ if isinstance(indices, list): indices = np.array(indices) - mask = indices<0 + mask = indices < 0 if mask.any(): indices[mask] += n - mask = (indices>=n) | (indices<0) + mask = (indices >= n) | (indices < 0) if mask.any(): raise IndexError("indices are out-of-bounds") return indices + def _maybe_convert_ix(*args): """ We likely want to take the cross-product @@ -1426,6 +1511,7 @@ def _check_slice_bounds(slobj, values): if stop < -l-1 or stop > l: raise IndexError("out-of-bounds on slice (end)") + def _maybe_droplevels(index, key): # drop levels original_index = index diff --git a/pandas/core/internals.py b/pandas/core/internals.py index c5e245d2e320c..bb719722fd090 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -8,8 +8,9 @@ from pandas.core.base import PandasObject from pandas.core.common import (_possibly_downcast_to_dtype, isnull, notnull, - _NS_DTYPE, _TD_DTYPE, ABCSeries, ABCSparseSeries, - is_list_like, _infer_dtype_from_scalar, _values_from_object) + _NS_DTYPE, _TD_DTYPE, ABCSeries, is_list_like, + ABCSparseSeries, _infer_dtype_from_scalar, + _values_from_object) from pandas.core.index import (Index, MultiIndex, _ensure_index, _handle_legacy_indexes) from pandas.core.indexing import (_check_slice_bounds, _maybe_convert_indices, @@ -25,6 +26,7 @@ from pandas.compat import range, lrange, lmap, callable, map, zip, u from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type + class Block(PandasObject): """ @@ -49,7 +51,8 @@ class Block(PandasObject): _verify_integrity = True _ftype = 'dense' - def __init__(self, values, items, ref_items, ndim=None, fastpath=False, placement=None): + def __init__(self, values, items, ref_items, ndim=None, fastpath=False, + placement=None): if ndim is None: ndim = values.ndim @@ -58,8 +61,8 @@ def __init__(self, values, items, ref_items, ndim=None, fastpath=False, placemen raise ValueError('Wrong number of dimensions') if len(items) != len(values): - raise ValueError('Wrong number of items passed %d, indices imply %d' - % (len(items), len(values))) + raise ValueError('Wrong number of items passed %d, indices imply ' + '%d' % (len(items), len(values))) self.set_ref_locs(placement) self.values = values @@ -100,10 +103,11 @@ def ref_locs(self): # this means that we have nan's in our block try: - indexer[indexer == -1] = np.arange(len(self.items))[isnull(self.items)] + indexer[indexer == -1] = np.arange( + len(self.items))[isnull(self.items)] except: - raise AssertionError('Some block items were not in block ' - 'ref_items') + raise AssertionError('Some block items were not in ' + 'block ref_items') self._ref_locs = indexer return self._ref_locs @@ -113,7 +117,9 @@ def reset_ref_locs(self): self._ref_locs = np.empty(len(self.items), dtype='int64') def set_ref_locs(self, placement): - """ explicity set the ref_locs indexer, only necessary for duplicate indicies """ + """ explicity set the ref_locs indexer, only necessary for duplicate + indicies + """ if placement is None: self._ref_locs = None else: @@ -195,7 +201,8 @@ def merge(self, other): # union_ref = self.ref_items + other.ref_items return _merge_blocks([self, other], self.ref_items) - def reindex_axis(self, indexer, method=None, axis=1, fill_value=None, limit=None, mask_info=None): + def reindex_axis(self, indexer, method=None, axis=1, fill_value=None, + limit=None, mask_info=None): """ Reindex using pre-computed indexer information """ @@ -206,11 +213,12 @@ def reindex_axis(self, indexer, method=None, axis=1, fill_value=None, limit=None new_values = com.take_nd(self.values, indexer, axis, fill_value=fill_value, mask_info=mask_info) - return make_block( - new_values, self.items, self.ref_items, ndim=self.ndim, fastpath=True, - placement=self._ref_locs) + return make_block(new_values, self.items, self.ref_items, + ndim=self.ndim, fastpath=True, + placement=self._ref_locs) - def reindex_items_from(self, new_ref_items, indexer=None, method=None, fill_value=None, limit=None, copy=True): + def reindex_items_from(self, new_ref_items, indexer=None, method=None, + fill_value=None, limit=None, copy=True): """ Reindex to only those items contained in the input set of items @@ -222,7 +230,8 @@ def reindex_items_from(self, new_ref_items, indexer=None, method=None, fill_valu reindexed : Block """ if indexer is None: - new_ref_items, indexer = self.items.reindex(new_ref_items, limit=limit) + new_ref_items, indexer = self.items.reindex(new_ref_items, + limit=limit) needs_fill = method is not None and limit is None if fill_value is None: @@ -247,9 +256,11 @@ def reindex_items_from(self, new_ref_items, indexer=None, method=None, fill_valu # fill if needed if needs_fill: - new_values = com.interpolate_2d(new_values, method=method, limit=limit, fill_value=fill_value) + new_values = com.interpolate_2d(new_values, method=method, + limit=limit, fill_value=fill_value) - block = make_block(new_values, new_items, new_ref_items, ndim=self.ndim, fastpath=True) + block = make_block(new_values, new_items, new_ref_items, + ndim=self.ndim, fastpath=True) # down cast if needed if not self.is_float and (needs_fill or notnull(fill_value)): @@ -284,7 +295,8 @@ def delete(self, item): loc = self.items.get_loc(item) new_items = self.items.delete(loc) new_values = np.delete(self.values, loc, 0) - return make_block(new_values, new_items, self.ref_items, ndim=self.ndim, klass=self.__class__, fastpath=True) + return make_block(new_values, new_items, self.ref_items, + ndim=self.ndim, klass=self.__class__, fastpath=True) def split_block_at(self, item): """ @@ -344,7 +356,7 @@ def downcast(self, dtypes=None): # turn it off completely if dtypes is False: - return [ self ] + return [self] values = self.values @@ -356,14 +368,16 @@ def downcast(self, dtypes=None): dtypes = 'infer' nv = _possibly_downcast_to_dtype(values, dtypes) - return [ make_block(nv, self.items, self.ref_items, ndim=self.ndim, fastpath=True) ] + return [make_block(nv, self.items, self.ref_items, ndim=self.ndim, + fastpath=True)] # ndim > 1 if dtypes is None: - return [ self ] + return [self] if not (dtypes == 'infer' or isinstance(dtypes, dict)): - raise ValueError("downcast must have a dictionary or 'infer' as its argument") + raise ValueError("downcast must have a dictionary or 'infer' as " + "its argument") # item-by-item # this is expensive as it splits the blocks items-by-item @@ -376,12 +390,13 @@ def downcast(self, dtypes=None): dtype = dtypes.get(item, self._downcast_dtype) if dtype is None: - nv = _block_shape(values[i],ndim=self.ndim) + nv = _block_shape(values[i], ndim=self.ndim) else: nv = _possibly_downcast_to_dtype(values[i], dtype) - nv = _block_shape(nv,ndim=self.ndim) + nv = _block_shape(nv, ndim=self.ndim) - blocks.append(make_block(nv, Index([item]), self.ref_items, ndim=self.ndim, fastpath=True)) + blocks.append(make_block(nv, Index([item]), self.ref_items, + ndim=self.ndim, fastpath=True)) return blocks @@ -405,9 +420,9 @@ def _astype(self, dtype, copy=False, raise_on_error=True, values=None, # force the copy here if values is None: values = com._astype_nansafe(self.values, dtype, copy=True) - newb = make_block( - values, self.items, self.ref_items, ndim=self.ndim, placement=self._ref_locs, - fastpath=True, dtype=dtype, klass=klass) + newb = make_block(values, self.items, self.ref_items, + ndim=self.ndim, placement=self._ref_locs, + fastpath=True, dtype=dtype, klass=klass) except: if raise_on_error is True: raise @@ -418,15 +433,16 @@ def _astype(self, dtype, copy=False, raise_on_error=True, values=None, raise TypeError("cannot set astype for copy = [%s] for dtype " "(%s [%s]) with smaller itemsize that current " "(%s [%s])" % (copy, self.dtype.name, - self.itemsize, newb.dtype.name, newb.itemsize)) - return [ newb ] + self.itemsize, newb.dtype.name, + newb.itemsize)) + return [newb] def convert(self, copy=True, **kwargs): """ attempt to coerce any object types to better types return a copy of the block (if copy = True) by definition we are not an ObjectBlock here! """ - return [ self.copy() ] if copy else [ self ] + return [self.copy()] if copy else [self] def prepare_for_merge(self, **kwargs): """ a regular block is ok to merge as is """ @@ -445,8 +461,8 @@ def post_merge(self, items, **kwargs): # this is a safe bet with multiple dtypes dtype = list(dtypes)[0] if len(dtypes) == 1 else np.float64 - b = make_block( - SparseArray(self.get(item), dtype=dtype), [item], self.ref_items) + b = make_block(SparseArray(self.get(item), dtype=dtype), + [item], self.ref_items) new_blocks.append(b) return new_blocks @@ -470,18 +486,18 @@ def _try_cast_result(self, result, dtype=None): elif self.is_float and result.dtype == self.dtype: # protect against a bool/object showing up here - if isinstance(dtype,compat.string_types) and dtype == 'infer': + if isinstance(dtype, compat.string_types) and dtype == 'infer': return result - if not isinstance(dtype,type): + if not isinstance(dtype, type): dtype = dtype.type - if issubclass(dtype,(np.bool_,np.object_)): - if issubclass(dtype,np.bool_): + if issubclass(dtype, (np.bool_, np.object_)): + if issubclass(dtype, np.bool_): if isnull(result).all(): return result.astype(np.bool_) else: result = result.astype(np.object_) - result[result==1] = True - result[result==0] = False + result[result == 1] = True + result[result == 0] = False return result else: return result.astype(np.object_) @@ -524,9 +540,9 @@ def copy(self, deep=True, ref_items=None): values = values.copy() if ref_items is None: ref_items = self.ref_items - return make_block( - values, self.items, ref_items, ndim=self.ndim, klass=self.__class__, - fastpath=True, placement=self._ref_locs) + return make_block(values, self.items, ref_items, ndim=self.ndim, + klass=self.__class__, fastpath=True, + placement=self._ref_locs) def replace(self, to_replace, value, inplace=False, filter=None, regex=False): @@ -547,8 +563,12 @@ def replace(self, to_replace, value, inplace=False, filter=None, return self.putmask(mask, value, inplace=inplace) def setitem(self, indexer, value): - """ set the value inplace; return a new block (of a possibly different dtype) - indexer is a direct slice/positional indexer; value must be a compaitable shape """ + """ set the value inplace; return a new block (of a possibly different + dtype) + + indexer is a direct slice/positional indexer; value must be a + compatible shape + """ # coerce args values, value = self._try_coerce_args(self.values, value) @@ -567,15 +587,19 @@ def setitem(self, indexer, value): # boolean with truth values == len of the value is ok too if isinstance(indexer, (np.ndarray, list)): if is_list_like(value) and len(indexer) != len(value): - if not (isinstance(indexer, np.ndarray) and indexer.dtype == np.bool_ and len(indexer[indexer]) == len(value)): - raise ValueError("cannot set using a list-like indexer with a different length than the value") + if not (isinstance(indexer, np.ndarray) and + indexer.dtype == np.bool_ and + len(indexer[indexer]) == len(value)): + raise ValueError("cannot set using a list-like indexer " + "with a different length than the value") # slice elif isinstance(indexer, slice): if is_list_like(value) and l: if len(value) != _length_of_indexer(indexer, values): - raise ValueError("cannot set using a slice indexer with a different length than the value") + raise ValueError("cannot set using a slice indexer with a " + "different length than the value") try: # set and return a block @@ -583,22 +607,25 @@ def setitem(self, indexer, value): # coerce and try to infer the dtypes of the result if np.isscalar(value): - dtype,_ = _infer_dtype_from_scalar(value) + dtype, _ = _infer_dtype_from_scalar(value) else: dtype = 'infer' values = self._try_coerce_result(values) values = self._try_cast_result(values, dtype) - return [make_block(transf(values), self.items, self.ref_items, ndim=self.ndim, fastpath=True)] + return [make_block(transf(values), self.items, self.ref_items, + ndim=self.ndim, fastpath=True)] except (ValueError, TypeError) as detail: raise - except (Exception) as detail: + except Exception as detail: pass - return [ self ] + return [self] def putmask(self, mask, new, align=True, inplace=False): - """ putmask the data to the block; it is possible that we may create a new dtype of block - return the resulting block(s) + """ putmask the data to the block; it is possible that we may create a + new dtype of block + + return the resulting block(s) Parameters ---------- @@ -618,7 +645,8 @@ def putmask(self, mask, new, align=True, inplace=False): if hasattr(new, 'reindex_axis'): if align: axis = getattr(new, '_info_axis_number', 0) - new = new.reindex_axis(self.items, axis=axis, copy=False).values.T + new = new.reindex_axis(self.items, axis=axis, + copy=False).values.T else: new = new.values.T @@ -639,8 +667,8 @@ def putmask(self, mask, new, align=True, inplace=False): new = self._try_cast(new) # pseudo-broadcast - if isinstance(new,np.ndarray) and new.ndim == self.ndim-1: - new = np.repeat(new,self.shape[-1]).reshape(self.shape) + if isinstance(new, np.ndarray) and new.ndim == self.ndim-1: + new = np.repeat(new, self.shape[-1]).reshape(self.shape) np.putmask(new_values, mask, new) @@ -712,16 +740,16 @@ def create_block(v, m, n, item, reshape=True): new_blocks.append(block) else: - - new_blocks.append( - create_block(new_values, mask, new, self.items, reshape=False)) + new_blocks.append(create_block(new_values, mask, new, + self.items, reshape=False)) return new_blocks if inplace: return [self] - return [make_block(new_values, self.items, self.ref_items, placement=self._ref_locs, fastpath=True)] + return [make_block(new_values, self.items, self.ref_items, + placement=self._ref_locs, fastpath=True)] def interpolate(self, method='pad', axis=0, index=None, values=None, inplace=False, limit=None, @@ -761,7 +789,8 @@ def interpolate(self, method='pad', axis=0, index=None, raise ValueError("invalid method '{0}' to interpolate.".format(method)) def _interpolate_with_fill(self, method='pad', axis=0, inplace=False, - limit=None, fill_value=None, coerce=False, downcast=None): + limit=None, fill_value=None, coerce=False, + downcast=None): """ fillna but using the interpolate machinery """ # if we are coercing, then don't force the conversion @@ -779,7 +808,9 @@ def _interpolate_with_fill(self, method='pad', axis=0, inplace=False, values = com.interpolate_2d(values, method, axis, limit, fill_value) values = self._try_coerce_result(values) - blocks = [ make_block(values, self.items, self.ref_items, ndim=self.ndim, klass=self.__class__, fastpath=True) ] + blocks = [make_block(values, self.items, self.ref_items, + ndim=self.ndim, klass=self.__class__, + fastpath=True)] return self._maybe_downcast(blocks, downcast) def _interpolate(self, method=None, index=None, values=None, @@ -810,8 +841,8 @@ def func(x): # should the axis argument be handled below in apply_along_axis? # i.e. not an arg to com.interpolate_1d return com.interpolate_1d(index, x, method=method, limit=limit, - fill_value=fill_value, bounds_error=False, - **kwargs) + fill_value=fill_value, + bounds_error=False, **kwargs) # interp each column independently interp_values = np.apply_along_axis(func, axis, data) @@ -825,7 +856,8 @@ def take(self, indexer, ref_items, axis=1): raise AssertionError('axis must be at least 1, got %d' % axis) new_values = com.take_nd(self.values, indexer, axis=axis, allow_fill=False) - return [make_block(new_values, self.items, ref_items, ndim=self.ndim, klass=self.__class__, fastpath=True)] + return [make_block(new_values, self.items, ref_items, ndim=self.ndim, + klass=self.__class__, fastpath=True)] def get_values(self, dtype=None): return self.values @@ -836,7 +868,8 @@ def get_merge_length(self): def diff(self, n): """ return block for the diff of the values """ new_values = com.diff(self.values, n, axis=1) - return [make_block(new_values, self.items, self.ref_items, ndim=self.ndim, fastpath=True)] + return [make_block(new_values, self.items, self.ref_items, + ndim=self.ndim, fastpath=True)] def shift(self, indexer, periods, axis=0): """ shift the block by periods, possibly upcast """ @@ -859,7 +892,8 @@ def shift(self, indexer, periods, axis=0): new_values[:, :periods] = fill_value else: new_values[:, periods:] = fill_value - return [make_block(new_values, self.items, self.ref_items, ndim=self.ndim, fastpath=True)] + return [make_block(new_values, self.items, self.ref_items, + ndim=self.ndim, fastpath=True)] def eval(self, func, other, raise_on_error=True, try_cast=False): """ @@ -869,8 +903,8 @@ def eval(self, func, other, raise_on_error=True, try_cast=False): ---------- func : how to combine self, other other : a ndarray/object - raise_on_error : if True, raise when I can't perform the function, False by default (and just return - the data that we had coming in) + raise_on_error : if True, raise when I can't perform the function, + False by default (and just return the data that we had coming in) Returns ------- @@ -896,8 +930,9 @@ def eval(self, func, other, raise_on_error=True, try_cast=False): is_transposed = True else: # this is a broadcast error heree - raise ValueError("cannot broadcast shape [%s] with block values [%s]" - % (values.T.shape,other.shape)) + raise ValueError("cannot broadcast shape [%s] with block " + "values [%s]" % (values.T.shape, + other.shape)) transf = (lambda x: x.T) if is_transposed else (lambda x: x) @@ -925,21 +960,22 @@ def handle_error(): result = get_result(other) # if we have an invalid shape/broadcast error - # GH4576, so raise instead of allowing to pass thru - except (ValueError) as detail: + # GH4576, so raise instead of allowing to pass through + except ValueError as detail: raise - except (Exception) as detail: + except Exception as detail: result = handle_error() - # technically a broadcast error in numpy can 'work' by returning a boolean False + # technically a broadcast error in numpy can 'work' by returning a + # boolean False if not isinstance(result, np.ndarray): if not isinstance(result, np.ndarray): - # differentiate between an invalid ndarray-ndarray comparsion and - # an invalid type comparison + # differentiate between an invalid ndarray-ndarray comparison + # and an invalid type comparison if isinstance(values, np.ndarray) and is_list_like(other): - raise ValueError('Invalid broadcasting comparison [%s] with block values' - % repr(other)) + raise ValueError('Invalid broadcasting comparison [%s] ' + 'with block values' % repr(other)) raise TypeError('Could not compare [%s] with block values' % repr(other)) @@ -951,9 +987,11 @@ def handle_error(): if try_cast: result = self._try_cast_result(result) - return [make_block(result, self.items, self.ref_items, ndim=self.ndim, fastpath=True)] + return [make_block(result, self.items, self.ref_items, ndim=self.ndim, + fastpath=True)] - def where(self, other, cond, align=True, raise_on_error=True, try_cast=False): + def where(self, other, cond, align=True, raise_on_error=True, + try_cast=False): """ evaluate the block; return result block(s) from the result @@ -962,8 +1000,8 @@ def where(self, other, cond, align=True, raise_on_error=True, try_cast=False): other : a ndarray/object cond : the condition to respect align : boolean, perform alignment on other/cond - raise_on_error : if True, raise when I can't perform the function, False by default (and just return - the data that we had coming in) + raise_on_error : if True, raise when I can't perform the function, + False by default (and just return the data that we had coming in) Returns ------- @@ -976,7 +1014,8 @@ def where(self, other, cond, align=True, raise_on_error=True, try_cast=False): if hasattr(other, 'reindex_axis'): if align: axis = getattr(other, '_info_axis_number', 0) - other = other.reindex_axis(self.items, axis=axis, copy=True).values + other = other.reindex_axis(self.items, axis=axis, + copy=True).values else: other = other.values @@ -985,8 +1024,10 @@ def where(self, other, cond, align=True, raise_on_error=True, try_cast=False): if hasattr(other, 'ndim') and hasattr(values, 'ndim'): if values.ndim != other.ndim or values.shape == other.shape[::-1]: - # pseodo broadcast (its a 2d vs 1d say and where needs it in a specific direction) - if other.ndim >= 1 and values.ndim-1 == other.ndim and values.shape[0] != other.shape[0]: + # pseodo broadcast (its a 2d vs 1d say and where needs it in a + # specific direction) + if (other.ndim >= 1 and values.ndim-1 == other.ndim and + values.shape[0] != other.shape[0]): other = _block_shape(other).T else: values = values.T @@ -1016,11 +1057,13 @@ def func(c, v, o): v, o = self._try_coerce_args(v, o) try: - return self._try_coerce_result(expressions.where(c, v, o, raise_on_error=True)) - except (Exception) as detail: + return self._try_coerce_result( + expressions.where(c, v, o, raise_on_error=True) + ) + except Exception as detail: if raise_on_error: - raise TypeError('Could not operate [%s] with block values [%s]' - % (repr(o), str(detail))) + raise TypeError('Could not operate [%s] with block values ' + '[%s]' % (repr(o), str(detail))) else: # return the values result = np.empty(v.shape, dtype='float64') @@ -1043,7 +1086,8 @@ def func(c, v, o): if try_cast: result = self._try_cast_result(result) - return make_block(result, self.items, self.ref_items, ndim=self.ndim) + return make_block(result, self.items, self.ref_items, + ndim=self.ndim) # might need to separate out blocks axis = cond.ndim - 1 @@ -1076,7 +1120,8 @@ def _can_hold_element(self, element): if is_list_like(element): element = np.array(element) return issubclass(element.dtype.type, (np.floating, np.integer)) - return isinstance(element, (float, int, np.float_, np.int_)) and not isinstance(bool,np.bool_) + return (isinstance(element, (float, int, np.float_, np.int_)) and + not isinstance(bool, np.bool_)) def _try_cast(self, element): try: @@ -1084,7 +1129,8 @@ def _try_cast(self, element): except: # pragma: no cover return element - def to_native_types(self, slicer=None, na_rep='', float_format=None, **kwargs): + def to_native_types(self, slicer=None, na_rep='', float_format=None, + **kwargs): """ convert to our native types format, slicing if desired """ values = self.values @@ -1102,7 +1148,8 @@ def to_native_types(self, slicer=None, na_rep='', float_format=None, **kwargs): def should_store(self, value): # when inserting a column should not coerce integers to floats # unnecessarily - return issubclass(value.dtype.type, np.floating) and value.dtype == self.dtype + return (issubclass(value.dtype.type, np.floating) and + value.dtype == self.dtype) class ComplexBlock(NumericBlock): @@ -1176,7 +1223,7 @@ def masker(v): if isnull(other) or (np.isscalar(other) and other == tslib.iNaT): other = np.nan elif isinstance(other, np.timedelta64): - other = _coerce_scalar_to_timedelta_type(other,unit='s').item() + other = _coerce_scalar_to_timedelta_type(other, unit='s').item() if other == tslib.iNaT: other = np.nan else: @@ -1191,7 +1238,7 @@ def _try_operate(self, values): def _try_coerce_result(self, result): """ reverse of try_coerce_args / try_operate """ if isinstance(result, np.ndarray): - if result.dtype.kind in ['i','f','O']: + if result.dtype.kind in ['i', 'f', 'O']: result = result.astype('m8[ns]') elif isinstance(result, np.integer): result = np.timedelta64(result) @@ -1214,7 +1261,8 @@ def to_native_types(self, slicer=None, na_rep=None, **kwargs): rvalues[mask] = na_rep imask = (-mask).ravel() rvalues.flat[imask] = np.array([lib.repr_timedelta64(val) - for val in values.ravel()[imask]], dtype=object) + for val in values.ravel()[imask]], + dtype=object) return rvalues.tolist() @@ -1242,19 +1290,24 @@ class ObjectBlock(Block): is_object = True _can_hold_na = True - def __init__(self, values, items, ref_items, ndim=2, fastpath=False, placement=None): + def __init__(self, values, items, ref_items, ndim=2, fastpath=False, + placement=None): if issubclass(values.dtype.type, compat.string_types): values = np.array(values, dtype=object) - super(ObjectBlock, self).__init__(values, items, ref_items, - ndim=ndim, fastpath=fastpath, placement=placement) + super(ObjectBlock, self).__init__(values, items, ref_items, ndim=ndim, + fastpath=fastpath, + placement=placement) @property def is_bool(self): - """ we can be a bool if we have only bool values but are of type object """ + """ we can be a bool if we have only bool values but are of type + object + """ return lib.is_bool_array(self.values.ravel()) - def convert(self, convert_dates=True, convert_numeric=True, copy=True, by_item=True): + def convert(self, convert_dates=True, convert_numeric=True, copy=True, + by_item=True): """ attempt to coerce any object types to better types return a copy of the block (if copy = True) by definition we ARE an ObjectBlock!!!!! @@ -1271,20 +1324,24 @@ def convert(self, convert_dates=True, convert_numeric=True, copy=True, by_item=T values = self.iget(i) values = com._possibly_convert_objects( - values.ravel(), convert_dates=convert_dates, convert_numeric=convert_numeric).reshape(values.shape) + values.ravel(), convert_dates=convert_dates, + convert_numeric=convert_numeric + ).reshape(values.shape) values = _block_shape(values, ndim=self.ndim) items = self.items.take([i]) placement = None if is_unique else [i] - newb = make_block( - values, items, self.ref_items, ndim=self.ndim, placement=placement) + newb = make_block(values, items, self.ref_items, + ndim=self.ndim, placement=placement) blocks.append(newb) else: values = com._possibly_convert_objects( - self.values.ravel(), convert_dates=convert_dates, convert_numeric=convert_numeric).reshape(self.values.shape) - blocks.append( - make_block(values, self.items, self.ref_items, ndim=self.ndim)) + self.values.ravel(), convert_dates=convert_dates, + convert_numeric=convert_numeric + ).reshape(self.values.shape) + blocks.append(make_block(values, self.items, self.ref_items, + ndim=self.ndim)) return blocks @@ -1296,7 +1353,8 @@ def _maybe_downcast(self, blocks, downcast=None): # split and convert the blocks result_blocks = [] for blk in blocks: - result_blocks.extend(blk.convert(convert_dates=True,convert_numeric=False)) + result_blocks.extend(blk.convert(convert_dates=True, + convert_numeric=False)) return result_blocks def _can_hold_element(self, element): @@ -1376,7 +1434,8 @@ def _replace_single(self, to_replace, value, inplace=False, filter=None, # the superclass method -> to_replace is some kind of object result = super(ObjectBlock, self).replace(to_replace, value, inplace=inplace, - filter=filter, regex=regex) + filter=filter, + regex=regex) if not isinstance(result, list): result = [result] return result @@ -1417,18 +1476,22 @@ class DatetimeBlock(Block): is_datetime = True _can_hold_na = True - def __init__(self, values, items, ref_items, fastpath=False, placement=None, **kwargs): + def __init__(self, values, items, ref_items, fastpath=False, + placement=None, **kwargs): if values.dtype != _NS_DTYPE: values = tslib.cast_to_nanoseconds(values) super(DatetimeBlock, self).__init__(values, items, ref_items, - fastpath=True, placement=placement, **kwargs) + fastpath=True, placement=placement, + **kwargs) def _can_hold_element(self, element): if is_list_like(element): element = np.array(element) return element.dtype == _NS_DTYPE or element.dtype == np.int64 - return com.is_integer(element) or isinstance(element, datetime) or isnull(element) + return (com.is_integer(element) or + isinstance(element, datetime) or + isnull(element)) def _try_cast(self, element): try: @@ -1460,7 +1523,7 @@ def _try_coerce_result(self, result): if result.dtype == 'i8': result = tslib.array_to_datetime( result.astype(object).ravel()).reshape(result.shape) - elif result.dtype.kind in ['i','f','O']: + elif result.dtype.kind in ['i', 'f', 'O']: result = result.astype('M8[ns]') elif isinstance(result, (np.integer, np.datetime64)): result = lib.Timestamp(result) @@ -1477,11 +1540,12 @@ def fillna(self, value, inplace=False, downcast=None): values = self.values if inplace else self.values.copy() mask = com.isnull(self.values) value = self._try_fill(value) - np.putmask(values,mask,value) - return [self if inplace else make_block(values, self.items, - self.ref_items, fastpath=True)] + np.putmask(values, mask, value) + return [self if inplace else + make_block(values, self.items, self.ref_items, fastpath=True)] - def to_native_types(self, slicer=None, na_rep=None, date_format=None, **kwargs): + def to_native_types(self, slicer=None, na_rep=None, date_format=None, + **kwargs): """ convert to our native types format, slicing if desired """ values = self.values @@ -1515,7 +1579,8 @@ def astype(self, dtype, copy=False, raise_on_error=True): klass = None if np.dtype(dtype).type == np.object_: klass = ObjectBlock - return self._astype(dtype, copy=copy, raise_on_error=raise_on_error, klass=klass) + return self._astype(dtype, copy=copy, raise_on_error=raise_on_error, + klass=klass) def set(self, item, value): """ @@ -1535,7 +1600,8 @@ def set(self, item, value): def get_values(self, dtype=None): # return object dtype as Timestamps if dtype == object: - return lib.map_infer(self.values.ravel(), lib.Timestamp).reshape(self.values.shape) + return lib.map_infer(self.values.ravel(), lib.Timestamp)\ + .reshape(self.values.shape) return self.values @@ -1550,7 +1616,8 @@ class SparseBlock(Block): _verify_integrity = False _ftype = 'sparse' - def __init__(self, values, items, ref_items, ndim=None, fastpath=False, placement=None): + def __init__(self, values, items, ref_items, ndim=None, fastpath=False, + placement=None): # kludgetastic if ndim is not None: @@ -1600,8 +1667,9 @@ def sp_values(self): @sp_values.setter def sp_values(self, v): # reset the sparse values - self.values = SparseArray( - v, sparse_index=self.sp_index, kind=self.kind, dtype=v.dtype, fill_value=self.fill_value, copy=False) + self.values = SparseArray(v, sparse_index=self.sp_index, + kind=self.kind, dtype=v.dtype, + fill_value=self.fill_value, copy=False) @property def sp_index(self): @@ -1651,9 +1719,9 @@ def get_values(self, dtype=None): def get_merge_length(self): return 1 - def make_block( - self, values, items=None, ref_items=None, sparse_index=None, kind=None, dtype=None, fill_value=None, - copy=False, fastpath=True): + def make_block(self, values, items=None, ref_items=None, sparse_index=None, + kind=None, dtype=None, fill_value=None, copy=False, + fastpath=True): """ return a new block """ if dtype is None: dtype = self.dtype @@ -1664,8 +1732,10 @@ def make_block( if ref_items is None: ref_items = self.ref_items new_values = SparseArray(values, sparse_index=sparse_index, - kind=kind or self.kind, dtype=dtype, fill_value=fill_value, copy=copy) - return make_block(new_values, items, ref_items, ndim=self.ndim, fastpath=fastpath) + kind=kind or self.kind, dtype=dtype, + fill_value=fill_value, copy=copy) + return make_block(new_values, items, ref_items, ndim=self.ndim, + fastpath=fastpath) def interpolate(self, method='pad', axis=0, inplace=False, limit=None, fill_value=None, **kwargs): @@ -1679,7 +1749,7 @@ def fillna(self, value, inplace=False, downcast=None): if issubclass(self.dtype.type, np.floating): value = float(value) values = self.values if inplace else self.values.copy() - return [ self.make_block(values.get_values(value), fill_value=value) ] + return [self.make_block(values.get_values(value), fill_value=value)] def shift(self, indexer, periods, axis=0): """ shift the block by periods """ @@ -1692,7 +1762,7 @@ def shift(self, indexer, periods, axis=0): new_values[:periods] = fill_value else: new_values[periods:] = fill_value - return [ self.make_block(new_values) ] + return [self.make_block(new_values)] def take(self, indexer, ref_items, axis=1): """ going to take our items @@ -1700,9 +1770,10 @@ def take(self, indexer, ref_items, axis=1): if axis < 1: raise AssertionError('axis must be at least 1, got %d' % axis) - return [ self.make_block(self.values.take(indexer)) ] + return [self.make_block(self.values.take(indexer))] - def reindex_axis(self, indexer, method=None, axis=1, fill_value=None, limit=None, mask_info=None): + def reindex_axis(self, indexer, method=None, axis=1, fill_value=None, + limit=None, mask_info=None): """ Reindex using pre-computed indexer information """ @@ -1712,9 +1783,11 @@ def reindex_axis(self, indexer, method=None, axis=1, fill_value=None, limit=None # taking on the 0th axis always here if fill_value is None: fill_value = self.fill_value - return self.make_block(self.values.take(indexer), items=self.items, fill_value=fill_value) + return self.make_block(self.values.take(indexer), items=self.items, + fill_value=fill_value) - def reindex_items_from(self, new_ref_items, indexer=None, method=None, fill_value=None, limit=None, copy=True): + def reindex_items_from(self, new_ref_items, indexer=None, method=None, + fill_value=None, limit=None, copy=True): """ Reindex to only those items contained in the input set of items @@ -1728,7 +1801,8 @@ def reindex_items_from(self, new_ref_items, indexer=None, method=None, fill_valu # 1-d always if indexer is None: - new_ref_items, indexer = self.items.reindex(new_ref_items, limit=limit) + new_ref_items, indexer = self.items.reindex(new_ref_items, + limit=limit) if indexer is None: indexer = np.arange(len(self.items)) @@ -1751,9 +1825,11 @@ def reindex_items_from(self, new_ref_items, indexer=None, method=None, fill_valu if method is not None or limit is not None: if fill_value is None: fill_value = self.fill_value - new_values = com.interpolate_2d(new_values, method=method, limit=limit, fill_value=fill_value) + new_values = com.interpolate_2d(new_values, method=method, + limit=limit, fill_value=fill_value) - return self.make_block(new_values, items=new_items, ref_items=new_ref_items, copy=copy) + return self.make_block(new_values, items=new_items, + ref_items=new_ref_items, copy=copy) def sparse_reindex(self, new_index): """ sparse reindex and return a new block @@ -1772,8 +1848,8 @@ def _try_cast_result(self, result, dtype=None): return result -def make_block(values, items, ref_items, klass=None, ndim=None, dtype=None, fastpath=False, placement=None): - +def make_block(values, items, ref_items, klass=None, ndim=None, dtype=None, + fastpath=False, placement=None): if klass is None: dtype = dtype or values.dtype vtype = dtype.type @@ -1782,9 +1858,11 @@ def make_block(values, items, ref_items, klass=None, ndim=None, dtype=None, fast klass = SparseBlock elif issubclass(vtype, np.floating): klass = FloatBlock - elif issubclass(vtype, np.integer) and issubclass(vtype, np.timedelta64): + elif (issubclass(vtype, np.integer) and + issubclass(vtype, np.timedelta64)): klass = TimeDeltaBlock - elif issubclass(vtype, np.integer) and not issubclass(vtype, np.datetime64): + elif (issubclass(vtype, np.integer) and + not issubclass(vtype, np.datetime64)): klass = IntBlock elif dtype == np.bool_: klass = BoolBlock @@ -1799,10 +1877,10 @@ def make_block(values, items, ref_items, klass=None, ndim=None, dtype=None, fast if np.prod(values.shape): flat = values.ravel() inferred_type = lib.infer_dtype(flat) - if inferred_type in ['datetime','datetime64']: + if inferred_type in ['datetime', 'datetime64']: - # we have an object array that has been inferred as datetime, so - # convert it + # we have an object array that has been inferred as + # datetime, so convert it try: values = tslib.array_to_datetime( flat).reshape(values.shape) @@ -1814,7 +1892,9 @@ def make_block(values, items, ref_items, klass=None, ndim=None, dtype=None, fast if klass is None: klass = ObjectBlock - return klass(values, items, ref_items, ndim=ndim, fastpath=fastpath, placement=placement) + return klass(values, items, ref_items, ndim=ndim, fastpath=fastpath, + placement=placement) + # TODO: flexible with index=None and/or items=None @@ -1863,11 +1943,13 @@ def __init__(self, blocks, axes, do_integrity_check=True, fastpath=True): def make_empty(self, axes=None): """ return an empty BlockManager with the items axis of len 0 """ if axes is None: - axes = [_ensure_index([]) ] + [ _ensure_index(a) for a in self.axes[1:] ] + axes = [_ensure_index([])] + [ + _ensure_index(a) for a in self.axes[1:] + ] # preserve dtype if possible dtype = self.dtype if self.ndim == 1 else object - return self.__class__(np.array([],dtype=dtype), axes) + return self.__class__(np.array([], dtype=dtype), axes) def __nonzero__(self): return True @@ -1892,8 +1974,9 @@ def set_axis(self, axis, value, maybe_rename=True, check_axis=True): value = _ensure_index(value) if check_axis and len(value) != len(cur_axis): - raise ValueError('Length mismatch: Expected axis has %d elements, new values have %d elements' - % (len(cur_axis), len(value))) + raise ValueError('Length mismatch: Expected axis has %d elements, ' + 'new values have %d elements' % (len(cur_axis), + len(value))) self.axes[axis] = value self._shape = None @@ -1929,9 +2012,10 @@ def _reset_ref_locs(self): self._items_map = None def _rebuild_ref_locs(self): - """ take _ref_locs and set the individual block ref_locs, skipping Nones - no effect on a unique index """ - if getattr(self,'_ref_locs',None) is not None: + """Take _ref_locs and set the individual block ref_locs, skipping Nones + no effect on a unique index + """ + if getattr(self, '_ref_locs', None) is not None: item_count = 0 for v in self._ref_locs: if v is not None: @@ -1984,9 +2068,10 @@ def _set_ref_locs(self, labels=None, do_refs=False): try: rl = block.ref_locs except: - raise AssertionError("cannot create BlockManager._ref_locs because " - "block [%s] with duplicate items [%s] " - "does not have _ref_locs set" % (block, labels)) + raise AssertionError( + 'Cannot create BlockManager._ref_locs because ' + 'block [%s] with duplicate items [%s] does not ' + 'have _ref_locs set' % (block, labels)) m = maybe_create_block_in_items_map(im, block) for i, item in enumerate(block.items): @@ -2138,7 +2223,8 @@ def apply(self, f, *args, **kwargs): ---------- f : the callable or function name to operate on at the block level axes : optional (if not supplied, use self.axes) - filter : list, if supplied, only call the block if the filter is in the block + filter : list, if supplied, only call the block if the filter is in + the block """ axes = kwargs.pop('axes', None) @@ -2169,8 +2255,8 @@ def apply(self, f, *args, **kwargs): result_blocks.append(applied) if len(result_blocks) == 0: return self.make_empty(axes or self.axes) - bm = self.__class__( - result_blocks, axes or self.axes, do_integrity_check=do_integrity_check) + bm = self.__class__(result_blocks, axes or self.axes, + do_integrity_check=do_integrity_check) bm._consolidate_inplace() return bm @@ -2254,7 +2340,9 @@ def comp(s): return bm def prepare_for_merge(self, *args, **kwargs): - """ prepare for merging, return a new block manager with Sparse -> Dense """ + """ prepare for merging, return a new block manager with + Sparse -> Dense + """ self._consolidate_inplace() if self._has_sparse: return self.apply('prepare_for_merge', *args, **kwargs) @@ -2305,7 +2393,8 @@ def is_numeric_mixed_type(self): self._consolidate_inplace() return all([block.is_numeric for block in self.blocks]) - def get_block_map(self, copy=False, typ=None, columns=None, is_numeric=False, is_bool=False): + def get_block_map(self, copy=False, typ=None, columns=None, + is_numeric=False, is_bool=False): """ return a dictionary mapping the ftype -> block list Parameters @@ -2316,7 +2405,8 @@ def get_block_map(self, copy=False, typ=None, columns=None, is_numeric=False, is filter if the type is indicated """ # short circuit - mainly for merging - if typ == 'dict' and columns is None and not is_numeric and not is_bool and not copy: + if (typ == 'dict' and columns is None and not is_numeric and + not is_bool and not copy): bm = defaultdict(list) for b in self.blocks: bm[str(b.ftype)].append(b) @@ -2414,15 +2504,13 @@ def get_slice(self, slobj, axis=0, raise_on_error=False): new_items = new_axes[0] if len(self.blocks) == 1: blk = self.blocks[0] - newb = make_block(blk._slice(slobj), - new_items, - new_items, - klass=blk.__class__, - fastpath=True, + newb = make_block(blk._slice(slobj), new_items, new_items, + klass=blk.__class__, fastpath=True, placement=blk._ref_locs) new_blocks = [newb] else: - return self.reindex_items(new_items, indexer=np.arange(len(self.items))[slobj]) + return self.reindex_items( + new_items, indexer=np.arange(len(self.items))[slobj]) else: new_blocks = self._slice_blocks(slobj, axis) @@ -2477,7 +2565,7 @@ def copy(self, deep=True): else: new_axes = list(self.axes) return self.apply('copy', axes=new_axes, deep=deep, - ref_items=new_axes[0], do_integrity_check=False) + ref_items=new_axes[0], do_integrity_check=False) def as_matrix(self, items=None): if len(self.blocks) == 0: @@ -2947,7 +3035,7 @@ def _add_new_block(self, item, value, loc=None): # need to shift elements to the right if self._ref_locs[loc] is not None: - for i in reversed(lrange(loc+1,len(self._ref_locs))): + for i in reversed(lrange(loc+1, len(self._ref_locs))): self._ref_locs[i] = self._ref_locs[i-1] self._ref_locs[loc] = (new_block, 0) @@ -2966,7 +3054,8 @@ def _check_have(self, item): if item not in self.items: raise KeyError('no item named %s' % com.pprint_thing(item)) - def reindex_axis(self, new_axis, indexer=None, method=None, axis=0, fill_value=None, limit=None, copy=True): + def reindex_axis(self, new_axis, indexer=None, method=None, axis=0, + fill_value=None, limit=None, copy=True): new_axis = _ensure_index(new_axis) cur_axis = self.axes[axis] @@ -2987,19 +3076,25 @@ def reindex_axis(self, new_axis, indexer=None, method=None, axis=0, fill_value=N if axis == 0: if method is not None or limit is not None: - return self.reindex_axis0_with_method(new_axis, indexer=indexer, - method=method, fill_value=fill_value, limit=limit, copy=copy) - return self.reindex_items(new_axis, indexer=indexer, copy=copy, fill_value=fill_value) + return self.reindex_axis0_with_method( + new_axis, indexer=indexer, method=method, + fill_value=fill_value, limit=limit, copy=copy + ) + return self.reindex_items(new_axis, indexer=indexer, copy=copy, + fill_value=fill_value) new_axis, indexer = cur_axis.reindex( new_axis, method, copy_if_needed=True) - return self.reindex_indexer(new_axis, indexer, axis=axis, fill_value=fill_value) + return self.reindex_indexer(new_axis, indexer, axis=axis, + fill_value=fill_value) - def reindex_axis0_with_method(self, new_axis, indexer=None, method=None, fill_value=None, limit=None, copy=True): + def reindex_axis0_with_method(self, new_axis, indexer=None, method=None, + fill_value=None, limit=None, copy=True): raise AssertionError('method argument not supported for ' 'axis == 0') - def reindex_indexer(self, new_axis, indexer, axis=1, fill_value=None, allow_dups=False): + def reindex_indexer(self, new_axis, indexer, axis=1, fill_value=None, + allow_dups=False): """ pandas-indexer with -1's only. """ @@ -3063,7 +3158,8 @@ def _reindex_indexer_items(self, new_items, indexer, fill_value): return self.__class__(new_blocks, new_axes) - def reindex_items(self, new_items, indexer=None, copy=True, fill_value=None): + def reindex_items(self, new_items, indexer=None, copy=True, + fill_value=None): """ """ @@ -3071,10 +3167,12 @@ def reindex_items(self, new_items, indexer=None, copy=True, fill_value=None): data = self if not data.is_consolidated(): data = data.consolidate() - return data.reindex_items(new_items, copy=copy, fill_value=fill_value) + return data.reindex_items(new_items, copy=copy, + fill_value=fill_value) if indexer is None: - new_items, indexer = self.items.reindex(new_items, copy_if_needed=True) + new_items, indexer = self.items.reindex(new_items, + copy_if_needed=True) new_axes = [new_items] + self.axes[1:] # could have so me pathological (MultiIndex) issues here @@ -3103,12 +3201,9 @@ def reindex_items(self, new_items, indexer=None, copy=True, fill_value=None): for i, idx in enumerate(indexer): blk, lidx = rl[idx] item = new_items.take([i]) - blk = make_block(_block_shape(blk.iget(lidx)), - item, - new_items, - ndim=self.ndim, - fastpath=True, - placement = [i]) + blk = make_block(_block_shape(blk.iget(lidx)), item, + new_items, ndim=self.ndim, fastpath=True, + placement=[i]) new_blocks.append(blk) # add a na block if we are missing items @@ -3122,7 +3217,8 @@ def reindex_items(self, new_items, indexer=None, copy=True, fill_value=None): return self.__class__(new_blocks, new_axes) - def _make_na_block(self, items, ref_items, placement=None, fill_value=None): + def _make_na_block(self, items, ref_items, placement=None, + fill_value=None): # TODO: infer dtypes other than float64 from fill_value if fill_value is None: @@ -3157,7 +3253,8 @@ def take(self, indexer, new_index=None, axis=1, verify=True): new_index = self.axes[axis].take(indexer) new_axes[axis] = new_index - return self.apply('take', axes=new_axes, indexer=indexer, ref_items=new_axes[0], axis=axis) + return self.apply('take', axes=new_axes, indexer=indexer, + ref_items=new_axes[0], axis=axis) def merge(self, other, lsuffix=None, rsuffix=None): if not self._is_indexed_like(other): @@ -3220,7 +3317,8 @@ def rename_axis(self, mapper, axis=1): index = self.axes[axis] if isinstance(index, MultiIndex): new_axis = MultiIndex.from_tuples( - [tuple(mapper(y) for y in x) for x in index], names=index.names) + [tuple(mapper(y) for y in x) for x in index], + names=index.names) else: new_axis = Index([mapper(x) for x in index], name=index.name) @@ -3307,8 +3405,8 @@ def __init__(self, block, axis, do_integrity_check=False, fastpath=True): self.axes = [axis] if isinstance(block, list): if len(block) != 1: - raise ValueError( - "cannot create SingleBlockManager with more than 1 block") + raise ValueError('Cannot create SingleBlockManager with ' + 'more than 1 block') block = block[0] if not isinstance(block, Block): block = make_block(block, axis, axis, ndim=1, fastpath=True) @@ -3327,8 +3425,8 @@ def __init__(self, block, axis, do_integrity_check=False, fastpath=True): block = _consolidate(block, axis) if len(block) != 1: - raise ValueError( - "cannot create SingleBlockManager with more than 1 block") + raise ValueError('Cannot create SingleBlockManager with ' + 'more than 1 block') block = block[0] if not isinstance(block, Block): @@ -3349,39 +3447,46 @@ def shape(self): self._shape = tuple([len(self.axes[0])]) return self._shape - def reindex(self, new_axis, indexer=None, method=None, fill_value=None, limit=None, copy=True): - + def reindex(self, new_axis, indexer=None, method=None, fill_value=None, + limit=None, copy=True): # if we are the same and don't copy, just return if not copy and self.index.equals(new_axis): return self - block = self._block.reindex_items_from(new_axis, indexer=indexer, method=method, - fill_value=fill_value, limit=limit, copy=copy) + block = self._block.reindex_items_from(new_axis, indexer=indexer, + method=method, + fill_value=fill_value, + limit=limit, copy=copy) mgr = SingleBlockManager(block, new_axis) mgr._consolidate_inplace() return mgr def _reindex_indexer_items(self, new_items, indexer, fill_value): # equiv to a reindex - return self.reindex(new_items, indexer=indexer, fill_value=fill_value, copy=False) + return self.reindex(new_items, indexer=indexer, fill_value=fill_value, + copy=False) - def reindex_axis0_with_method(self, new_axis, indexer=None, method=None, fill_value=None, limit=None, copy=True): + def reindex_axis0_with_method(self, new_axis, indexer=None, method=None, + fill_value=None, limit=None, copy=True): if method is None: indexer = None - return self.reindex(new_axis, indexer=indexer, method=method, fill_value=fill_value, limit=limit, copy=copy) + return self.reindex(new_axis, indexer=indexer, method=method, + fill_value=fill_value, limit=limit, copy=copy) def get_slice(self, slobj, raise_on_error=False): if raise_on_error: _check_slice_bounds(slobj, self.index) - return self.__class__(self._block._slice(slobj), self.index._getitem_slice(slobj), fastpath=True) + return self.__class__(self._block._slice(slobj), + self.index._getitem_slice(slobj), fastpath=True) def set_axis(self, axis, value): cur_axis = self.axes[axis] value = _ensure_index(value) if len(value) != len(cur_axis): - raise ValueError('Length mismatch: Expected axis has %d elements, new values have %d elements' - % (len(cur_axis), len(value))) + raise ValueError('Length mismatch: Expected axis has %d elements, ' + 'new values have %d elements' % (len(cur_axis), + len(value))) self.axes[axis] = value self._shape = None @@ -3575,7 +3680,9 @@ def form_blocks(arrays, names, axes): def _simple_blockify(tuples, ref_items, dtype, is_unique=True): - """ return a single array of a block that has a single dtype; if dtype is not None, coerce to this dtype """ + """ return a single array of a block that has a single dtype; if dtype is + not None, coerce to this dtype + """ block_items, values, placement = _stack_arrays(tuples, ref_items, dtype) # CHECK DTYPE? @@ -3608,7 +3715,9 @@ def _multi_blockify(tuples, ref_items, dtype=None, is_unique=True): def _sparse_blockify(tuples, ref_items, dtype=None): - """ return an array of blocks that potentially have different dtypes (and are sparse) """ + """ return an array of blocks that potentially have different dtypes (and + are sparse) + """ new_blocks = [] for i, names, array in tuples: @@ -3748,8 +3857,8 @@ def _consolidate(blocks, items): new_blocks = [] for (_can_consolidate, dtype), group_blocks in grouper: - merged_blocks = _merge_blocks( - list(group_blocks), items, dtype=dtype, _can_consolidate=_can_consolidate) + merged_blocks = _merge_blocks(list(group_blocks), items, dtype=dtype, + _can_consolidate=_can_consolidate) if isinstance(merged_blocks, list): new_blocks.extend(merged_blocks) else: @@ -3810,6 +3919,6 @@ def _vstack(to_stack, dtype): def _possibly_convert_to_indexer(loc): if com._is_bool_indexer(loc): loc = [i for i, v in enumerate(loc) if v] - elif isinstance(loc,slice): - loc = lrange(loc.start,loc.stop) + elif isinstance(loc, slice): + loc = lrange(loc.start, loc.stop) return loc diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 45e6a54721bd2..b6ebeb7f96489 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -105,6 +105,7 @@ def _has_infs(result): return False return np.isinf(result) or np.isneginf(result) + def _get_fill_value(dtype, fill_value=None, fill_value_typ=None): """ return the correct fill value for the dtype of the values """ if fill_value is not None: @@ -127,7 +128,9 @@ def _get_fill_value(dtype, fill_value=None, fill_value_typ=None): else: return tslib.iNaT -def _get_values(values, skipna, fill_value=None, fill_value_typ=None, isfinite=False, copy=True): + +def _get_values(values, skipna, fill_value=None, fill_value_typ=None, + isfinite=False, copy=True): """ utility to get the values view, mask, dtype if necessary copy and mask using the specified fill_value copy = True will force the copy """ @@ -137,11 +140,13 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None, isfinite=F else: mask = isnull(values) - dtype = values.dtype + dtype = values.dtype dtype_ok = _na_ok_dtype(dtype) - # get our fill value (in case we need to provide an alternative dtype for it) - fill_value = _get_fill_value(dtype, fill_value=fill_value, fill_value_typ=fill_value_typ) + # get our fill value (in case we need to provide an alternative + # dtype for it) + fill_value = _get_fill_value(dtype, fill_value=fill_value, + fill_value_typ=fill_value_typ) if skipna: if copy: @@ -151,7 +156,8 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None, isfinite=F # promote if needed else: - values, changed = com._maybe_upcast_putmask(values, mask, fill_value) + values, changed = com._maybe_upcast_putmask(values, mask, + fill_value) elif copy: values = values.copy() @@ -159,20 +165,25 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None, isfinite=F values = _view_if_needed(values) return values, mask, dtype + def _isfinite(values): - if issubclass(values.dtype.type, (np.timedelta64,np.datetime64)): + if issubclass(values.dtype.type, (np.timedelta64, np.datetime64)): return isnull(values) return -np.isfinite(values) + def _na_ok_dtype(dtype): - return not issubclass(dtype.type, (np.integer, np.datetime64, np.timedelta64)) + return not issubclass(dtype.type, (np.integer, np.datetime64, + np.timedelta64)) + def _view_if_needed(values): - if issubclass(values.dtype.type, (np.datetime64,np.timedelta64)): + if issubclass(values.dtype.type, (np.datetime64, np.timedelta64)): return values.view(np.int64) return values -def _wrap_results(result,dtype): + +def _wrap_results(result, dtype): """ wrap our results if needed """ if issubclass(dtype.type, np.datetime64): @@ -185,27 +196,30 @@ def _wrap_results(result,dtype): # this is a scalar timedelta result! # we have series convert then take the element (scalar) - # as series will do the right thing in py3 (and deal with numpy 1.6.2 - # bug in that it results dtype of timedelta64[us] + # as series will do the right thing in py3 (and deal with numpy + # 1.6.2 bug in that it results dtype of timedelta64[us] from pandas import Series # coerce float to results if is_float(result): result = int(result) - result = Series([result],dtype='timedelta64[ns]') + result = Series([result], dtype='timedelta64[ns]') else: result = result.view(dtype) return result + def nanany(values, axis=None, skipna=True): values, mask, dtype = _get_values(values, skipna, False, copy=skipna) return values.any(axis) + def nanall(values, axis=None, skipna=True): values, mask, dtype = _get_values(values, skipna, True, copy=skipna) return values.all(axis) + @disallow('M8') @bottleneck_switch(zero_value=0) def nansum(values, axis=None, skipna=True): @@ -214,6 +228,7 @@ def nansum(values, axis=None, skipna=True): the_sum = _maybe_null_out(the_sum, axis, mask) return the_sum + @disallow('M8') @bottleneck_switch() def nanmean(values, axis=None, skipna=True): @@ -229,7 +244,8 @@ def nanmean(values, axis=None, skipna=True): else: the_mean = the_sum / count if count > 0 else np.nan - return _wrap_results(the_mean,dtype) + return _wrap_results(the_mean, dtype) + @disallow('M8') @bottleneck_switch() @@ -265,7 +281,7 @@ def get_median(x): return ret # otherwise return a scalar value - return _wrap_results(get_median(values),dtype) if notempty else np.nan + return _wrap_results(get_median(values), dtype) if notempty else np.nan @disallow('M8') @@ -292,7 +308,7 @@ def nanvar(values, axis=None, skipna=True, ddof=1): @bottleneck_switch() def nanmin(values, axis=None, skipna=True): - values, mask, dtype = _get_values(values, skipna, fill_value_typ = '+inf') + values, mask, dtype = _get_values(values, skipna, fill_value_typ='+inf') # numpy 1.6.1 workaround in Python 3.x if (values.dtype == np.object_ and compat.PY3): @@ -315,13 +331,13 @@ def nanmin(values, axis=None, skipna=True): else: result = values.min(axis) - result = _wrap_results(result,dtype) + result = _wrap_results(result, dtype) return _maybe_null_out(result, axis, mask) @bottleneck_switch() def nanmax(values, axis=None, skipna=True): - values, mask, dtype = _get_values(values, skipna, fill_value_typ ='-inf') + values, mask, dtype = _get_values(values, skipna, fill_value_typ='-inf') # numpy 1.6.1 workaround in Python 3.x if (values.dtype == np.object_ and compat.PY3): @@ -345,7 +361,7 @@ def nanmax(values, axis=None, skipna=True): else: result = values.max(axis) - result = _wrap_results(result,dtype) + result = _wrap_results(result, dtype) return _maybe_null_out(result, axis, mask) @@ -353,7 +369,8 @@ def nanargmax(values, axis=None, skipna=True): """ Returns -1 in the NA case """ - values, mask, dtype = _get_values(values, skipna, fill_value_typ = '-inf', isfinite=True) + values, mask, dtype = _get_values(values, skipna, fill_value_typ='-inf', + isfinite=True) result = values.argmax(axis) result = _maybe_arg_null_out(result, axis, mask, skipna) return result @@ -363,7 +380,8 @@ def nanargmin(values, axis=None, skipna=True): """ Returns -1 in the NA case """ - values, mask, dtype = _get_values(values, skipna, fill_value_typ = '+inf', isfinite=True) + values, mask, dtype = _get_values(values, skipna, fill_value_typ='+inf', + isfinite=True) result = values.argmin(axis) result = _maybe_arg_null_out(result, axis, mask, skipna) return result diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 249468c332e0c..0836ac7bc22a6 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -317,13 +317,14 @@ def _convert_to_array(self, values, name=None, other=None): if inferred_type in ('datetime64', 'datetime', 'date', 'time'): # if we have a other of timedelta, but use pd.NaT here we # we are in the wrong path - if other is not None and other.dtype == 'timedelta64[ns]' and all(isnull(v) for v in values): - values = np.empty(values.shape,dtype=other.dtype) + if (other is not None and other.dtype == 'timedelta64[ns]' and + all(isnull(v) for v in values)): + values = np.empty(values.shape, dtype=other.dtype) values[:] = tslib.iNaT # a datetlike elif not (isinstance(values, (pa.Array, pd.Series)) and - com.is_datetime64_dtype(values)): + com.is_datetime64_dtype(values)): values = tslib.array_to_datetime(values) elif isinstance(values, pd.DatetimeIndex): values = values.to_series() @@ -353,11 +354,12 @@ def _convert_to_array(self, values, name=None, other=None): # all nan, so ok, use the other dtype (e.g. timedelta or datetime) if isnull(values).all(): - values = np.empty(values.shape,dtype=other.dtype) + values = np.empty(values.shape, dtype=other.dtype) values[:] = tslib.iNaT else: - raise TypeError("incompatible type [{0}] for a datetime/timedelta" - " operation".format(pa.array(values).dtype)) + raise TypeError( + 'incompatible type [{0}] for a datetime/timedelta ' + 'operation'.format(pa.array(values).dtype)) else: raise TypeError("incompatible type [{0}] for a datetime/timedelta" " operation".format(pa.array(values).dtype)) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 885ec2714c47a..c695dc44dbdb5 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -3,13 +3,13 @@ """ # pylint: disable=E1103,W0231,W0212,W0621 from __future__ import division -from pandas.compat import map, zip, range, lrange, lmap, u, OrderedDict, OrderedDefaultdict +from pandas.compat import (map, zip, range, lrange, lmap, u, OrderedDict, + OrderedDefaultdict) from pandas import compat import sys import numpy as np -from pandas.core.common import (PandasError, - _try_sort, _default_index, _infer_dtype_from_scalar, - notnull) +from pandas.core.common import (PandasError, _try_sort, _default_index, + _infer_dtype_from_scalar, notnull) from pandas.core.categorical import Categorical from pandas.core.index import (Index, MultiIndex, _ensure_index, _get_combined_index) @@ -100,8 +100,6 @@ def panel_index(time, panels, names=['time', 'panel']): verify_integrity=False) - - class Panel(NDFrame): """ @@ -130,9 +128,8 @@ def _constructor(self): def __init__(self, data=None, items=None, major_axis=None, minor_axis=None, copy=False, dtype=None): - self._init_data( - data=data, items=items, major_axis=major_axis, minor_axis=minor_axis, - copy=copy, dtype=dtype) + self._init_data(data=data, items=items, major_axis=major_axis, + minor_axis=minor_axis, copy=copy, dtype=dtype) def _init_data(self, data, copy, dtype, **kwargs): """ @@ -327,8 +324,8 @@ def axis_pretty(a): v = getattr(self, a) if len(v) > 0: return u('%s axis: %s to %s') % (a.capitalize(), - com.pprint_thing(v[0]), - com.pprint_thing(v[-1])) + com.pprint_thing(v[0]), + com.pprint_thing(v[-1])) else: return u('%s axis: None') % a.capitalize() @@ -535,9 +532,9 @@ def __setitem__(self, key, value): mat = value.values elif isinstance(value, np.ndarray): if value.shape != shape[1:]: - raise ValueError('shape of value must be {0}, shape of given ' - 'object was {1}'.format(shape[1:], - tuple(map(int, value.shape)))) + raise ValueError( + 'shape of value must be {0}, shape of given object was ' + '{1}'.format(shape[1:], tuple(map(int, value.shape)))) mat = np.asarray(value) elif np.isscalar(value): dtype, value = _infer_dtype_from_scalar(value) @@ -589,7 +586,10 @@ def tail(self, n=5): def _needs_reindex_multi(self, axes, method, level): # only allowing multi-index on Panel (and not > dims) - return method is None and not self._is_mixed_type and self._AXIS_LEN <= 3 and com._count_not_none(*axes.values()) == 3 + return (method is None and + not self._is_mixed_type and + self._AXIS_LEN <= 3 and + com._count_not_none(*axes.values()) == 3) def _reindex_multi(self, axes, copy, fill_value): """ we are guaranteed non-Nones in the axes! """ @@ -780,13 +780,13 @@ def _ixs(self, i, axis=0): # xs cannot handle a non-scalar key, so just reindex here if _is_list_like(key): - indexer = { self._get_axis_name(axis): key } + indexer = {self._get_axis_name(axis): key} return self.reindex(**indexer) # a reduction if axis == 0: values = self._data.iget(i) - return self._box_item_values(key,values) + return self._box_item_values(key, values) # xs by position self._consolidate_inplace() @@ -904,11 +904,11 @@ def _construct_return_type(self, result, axes=None, **kwargs): elif self.ndim == ndim + 1: if axes is None: return self._constructor_sliced(result) - return self._constructor_sliced(result, - **self._extract_axes_for_slice(self, axes)) + return self._constructor_sliced( + result, **self._extract_axes_for_slice(self, axes)) - raise PandasError("invalid _construct_return_type [self->%s] [result->%s]" % - (self.ndim, result.ndim)) + raise PandasError('invalid _construct_return_type [self->%s] ' + '[result->%s]' % (self.ndim, result.ndim)) def _wrap_result(self, result, axis): axis = self._get_axis_name(axis) @@ -920,15 +920,19 @@ def _wrap_result(self, result, axis): @Appender(_shared_docs['reindex'] % _shared_doc_kwargs) def reindex(self, items=None, major_axis=None, minor_axis=None, **kwargs): - major_axis = major_axis if major_axis is not None else kwargs.pop('major', None) - minor_axis = minor_axis if minor_axis is not None else kwargs.pop('minor', None) + major_axis = (major_axis if major_axis is not None + else kwargs.pop('major', None)) + minor_axis = (minor_axis if minor_axis is not None + else kwargs.pop('minor', None)) return super(Panel, self).reindex(items=items, major_axis=major_axis, minor_axis=minor_axis, **kwargs) @Appender(_shared_docs['rename'] % _shared_doc_kwargs) def rename(self, items=None, major_axis=None, minor_axis=None, **kwargs): - major_axis = major_axis if major_axis is not None else kwargs.pop('major', None) - minor_axis = minor_axis if minor_axis is not None else kwargs.pop('minor', None) + major_axis = (major_axis if major_axis is not None + else kwargs.pop('major', None)) + minor_axis = (minor_axis if minor_axis is not None + else kwargs.pop('minor', None)) return super(Panel, self).rename(items=items, major_axis=major_axis, minor_axis=minor_axis, **kwargs) @@ -939,6 +943,7 @@ def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True, method=method, level=level, copy=copy, limit=limit, fill_value=fill_value) + @Appender(_shared_docs['transpose'] % _shared_doc_kwargs) def transpose(self, *args, **kwargs): return super(Panel, self).transpose(*args, **kwargs) @@ -1225,11 +1230,11 @@ def _add_aggregate_operations(cls, use_numexpr=True): # doc strings substitors _agg_doc = """ -Wrapper method for %s +Wrapper method for %%s Parameters ---------- -other : """ + "%s or %s" % (cls._constructor_sliced.__name__, cls.__name__) + """ +other : %s or %s""" % (cls._constructor_sliced.__name__, cls.__name__) + """ axis : {""" + ', '.join(cls._AXIS_ORDERS) + "}" + """ Axis to broadcast over @@ -1237,19 +1242,22 @@ def _add_aggregate_operations(cls, use_numexpr=True): ------- """ + cls.__name__ + "\n" - def _panel_arith_method(op, name, str_rep = None, default_axis=None, + def _panel_arith_method(op, name, str_rep=None, default_axis=None, fill_zeros=None, **eval_kwargs): def na_op(x, y): try: - result = expressions.evaluate(op, str_rep, x, y, raise_on_error=True, **eval_kwargs) + result = expressions.evaluate(op, str_rep, x, y, + raise_on_error=True, + **eval_kwargs) except TypeError: result = op(x, y) - # handles discrepancy between numpy and numexpr on division/mod by 0 - # though, given that these are generally (always?) non-scalars, I'm - # not sure whether it's worth it at the moment - result = com._fill_zeros(result,y,fill_zeros) + # handles discrepancy between numpy and numexpr on division/mod + # by 0 though, given that these are generally (always?) + # non-scalars, I'm not sure whether it's worth it at the moment + result = com._fill_zeros(result, y, fill_zeros) return result + @Substitution(name) @Appender(_agg_doc) def f(self, other, axis=0): @@ -1258,9 +1266,9 @@ def f(self, other, axis=0): return f # add `div`, `mul`, `pow`, etc.. - ops.add_flex_arithmetic_methods(cls, _panel_arith_method, - use_numexpr=use_numexpr, - flex_comp_method=ops._comp_method_PANEL) + ops.add_flex_arithmetic_methods( + cls, _panel_arith_method, use_numexpr=use_numexpr, + flex_comp_method=ops._comp_method_PANEL) Panel._setup_axes(axes=['items', 'major_axis', 'minor_axis'], info_axis=0, @@ -1276,5 +1284,3 @@ def f(self, other, axis=0): WidePanel = Panel LongPanel = DataFrame - - diff --git a/pandas/core/panel4d.py b/pandas/core/panel4d.py index 5679506cc6bb8..3d480464388c8 100644 --- a/pandas/core/panel4d.py +++ b/pandas/core/panel4d.py @@ -5,15 +5,14 @@ Panel4D = create_nd_panel_factory( klass_name='Panel4D', - orders =['labels', 'items', 'major_axis', 'minor_axis'], - slices ={'labels': 'labels', 'items': 'items', - 'major_axis': 'major_axis', - 'minor_axis': 'minor_axis'}, + orders=['labels', 'items', 'major_axis', 'minor_axis'], + slices={'labels': 'labels', 'items': 'items', 'major_axis': 'major_axis', + 'minor_axis': 'minor_axis'}, slicer=Panel, - aliases ={'major': 'major_axis', 'minor': 'minor_axis'}, + aliases={'major': 'major_axis', 'minor': 'minor_axis'}, stat_axis=2, - ns=dict(__doc__= """ - Represents a 4 dimensonal structured + ns=dict(__doc__=""" + Represents a 4 dimensional structured Parameters ---------- @@ -28,10 +27,9 @@ Data type to force, otherwise infer copy : boolean, default False Copy data from inputs. Only affects DataFrame / 2d ndarray input - """ + """) +) - ) - ) def panel4d_init(self, data=None, labels=None, items=None, major_axis=None, minor_axis=None, copy=False, dtype=None): diff --git a/pandas/core/panelnd.py b/pandas/core/panelnd.py index 9ccce1edc9067..8ac84c0d91adc 100644 --- a/pandas/core/panelnd.py +++ b/pandas/core/panelnd.py @@ -5,27 +5,24 @@ import pandas.compat as compat - -def create_nd_panel_factory(klass_name, orders, slices, slicer, aliases=None, stat_axis=2, info_axis=0, ns=None): +def create_nd_panel_factory(klass_name, orders, slices, slicer, aliases=None, + stat_axis=2, info_axis=0, ns=None): """ manufacture a n-d class: - parameters + Parameters ---------- klass_name : the klass name - orders : the names of the axes in order (highest to lowest) - slices : a dictionary that defines how the axes map to the sliced axis - slicer : the class representing a slice of this panel - aliases : a dictionary defining aliases for various axes - default = { major : major_axis, minor : minor_axis } - stat_axis : the default statistic axis - default = 2 - info_axis : the info axis - - - returns + orders : the names of the axes in order (highest to lowest) + slices : a dictionary that defines how the axes map to the slice axis + slicer : the class representing a slice of this panel + aliases : a dictionary defining aliases for various axes + default = { major : major_axis, minor : minor_axis } + stat_axis : the default statistic axis default = 2 + info_axis : the info axis + + Returns ------- - a class object reprsenting this panel - + a class object representing this panel """ @@ -42,11 +39,8 @@ def create_nd_panel_factory(klass_name, orders, slices, slicer, aliases=None, st klass = type(klass_name, (slicer,), ns) # setup the axes - klass._setup_axes(axes = orders, - info_axis = info_axis, - stat_axis = stat_axis, - aliases = aliases, - slicers = slices) + klass._setup_axes(axes=orders, info_axis=info_axis, stat_axis=stat_axis, + aliases=aliases, slicers=slices) klass._constructor_sliced = slicer @@ -101,7 +95,8 @@ def _combine_with_constructor(self, other, func): klass._combine_with_constructor = _combine_with_constructor # set as NonImplemented operations which we don't support - for f in ['to_frame', 'to_excel', 'to_sparse', 'groupby', 'join', 'filter', 'dropna', 'shift']: + for f in ['to_frame', 'to_excel', 'to_sparse', 'groupby', 'join', 'filter', + 'dropna', 'shift']: def func(self, *args, **kwargs): raise NotImplementedError setattr(klass, f, func) diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index c2c1a2931d4aa..24a4797759dab 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -71,12 +71,14 @@ def __init__(self, values, index, level=-1, value_columns=None): levels = index.levels labels = index.labels - def _make_index(lev,lab): - i = lev.__class__(_make_index_array_level(lev.values,lab)) + + def _make_index(lev, lab): + i = lev.__class__(_make_index_array_level(lev.values, lab)) i.name = lev.name return i - self.new_index_levels = list([ _make_index(lev,lab) for lev,lab in zip(levels,labels) ]) + self.new_index_levels = [_make_index(lev, lab) + for lev, lab in zip(levels, labels)] self.new_index_names = list(index.names) self.removed_name = self.new_index_names.pop(self.level) @@ -154,7 +156,8 @@ def get_result(self): mask = isnull(index) if mask.any(): l = np.arange(len(index)) - values, orig_values = np.empty((len(index),values.shape[1])), values + values, orig_values = (np.empty((len(index), values.shape[1])), + values) values.fill(np.nan) values_indexer = com._ensure_int64(l[~mask]) for i, j in enumerate(values_indexer): @@ -224,7 +227,7 @@ def get_new_index(self): result_labels = [] for cur in self.sorted_labels[:-1]: labels = cur.take(self.compressor) - labels = _make_index_array_level(labels,cur) + labels = _make_index_array_level(labels, cur) result_labels.append(labels) # construct the new index @@ -240,26 +243,27 @@ def get_new_index(self): return new_index -def _make_index_array_level(lev,lab): +def _make_index_array_level(lev, lab): """ create the combined index array, preserving nans, return an array """ mask = lab == -1 if not mask.any(): return lev l = np.arange(len(lab)) - mask_labels = np.empty(len(mask[mask]),dtype=object) + mask_labels = np.empty(len(mask[mask]), dtype=object) mask_labels.fill(np.nan) mask_indexer = com._ensure_int64(l[mask]) labels = lev labels_indexer = com._ensure_int64(l[~mask]) - new_labels = np.empty(tuple([len(lab)]),dtype=object) + new_labels = np.empty(tuple([len(lab)]), dtype=object) new_labels[labels_indexer] = labels - new_labels[mask_indexer] = mask_labels + new_labels[mask_indexer] = mask_labels return new_labels + def _unstack_multiple(data, clocs): if len(clocs) == 0: return data @@ -341,7 +345,8 @@ def pivot(self, index=None, columns=None, values=None): return indexed.unstack(columns) else: indexed = Series(self[values].values, - index=MultiIndex.from_arrays([self[index], self[columns]])) + index=MultiIndex.from_arrays([self[index], + self[columns]])) return indexed.unstack(columns) @@ -540,9 +545,10 @@ def _stack_multi_columns(frame, level=-1, dropna=True): # tuple list excluding level for grouping columns if len(frame.columns.levels) > 2: - tuples = list(zip(*[lev.values.take(lab) - for lev, lab in zip(this.columns.levels[:-1], - this.columns.labels[:-1])])) + tuples = list(zip(*[ + lev.values.take(lab) for lev, lab in + zip(this.columns.levels[:-1], this.columns.labels[:-1]) + ])) unique_groups = [key for key, _ in itertools.groupby(tuples)] new_names = this.columns.names[:-1] new_columns = MultiIndex.from_tuples(unique_groups, names=new_names) @@ -678,7 +684,8 @@ def melt(frame, id_vars=None, value_vars=None, frame = frame.copy() if col_level is not None: # allow list or other? - frame.columns = frame.columns.get_level_values(col_level) # frame is a copy + # frame is a copy + frame.columns = frame.columns.get_level_values(col_level) if var_name is None: if isinstance(frame.columns, MultiIndex): @@ -848,7 +855,8 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False): 2 0 0 1 """ - cat = Categorical.from_array(Series(data)) # Series avoids inconsistent NaN handling + # Series avoids inconsistent NaN handling + cat = Categorical.from_array(Series(data)) levels = cat.levels # if all NaN @@ -957,6 +965,9 @@ def block2d_to_blocknd(values, items, shape, labels, ref_items=None): def factor_indexer(shape, labels): - """ given a tuple of shape and a list of Categorical labels, return the expanded label indexer """ + """ given a tuple of shape and a list of Categorical labels, return the + expanded label indexer + """ mult = np.array(shape)[::-1].cumprod()[::-1] - return com._ensure_platform_int(np.sum(np.array(labels).T * np.append(mult, [1]), axis=1).T) + return com._ensure_platform_int( + np.sum(np.array(labels).T * np.append(mult, [1]), axis=1).T) diff --git a/pandas/core/series.py b/pandas/core/series.py index 3e8202c7ec0b6..cf704e9aef174 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -22,8 +22,8 @@ _values_from_object, _possibly_cast_to_datetime, _possibly_castable, _possibly_convert_platform, - ABCSparseArray, _maybe_match_name, _ensure_object, - SettingWithCopyError) + ABCSparseArray, _maybe_match_name, + _ensure_object, SettingWithCopyError) from pandas.core.index import (Index, MultiIndex, InvalidIndexError, _ensure_index, _handle_legacy_indexes) @@ -63,6 +63,7 @@ axes_single_arg="{0,'index'}" ) + def _coerce_method(converter): """ install the scalar coercion methods """ @@ -224,8 +225,8 @@ def __init__(self, data=None, index=None, dtype=None, name=None, self._set_axis(0, index, fastpath=True) @classmethod - def from_array(cls, arr, index=None, name=None, copy=False, fastpath=False): - + def from_array(cls, arr, index=None, name=None, copy=False, + fastpath=False): # return a sparse series here if isinstance(arr, ABCSparseArray): from pandas.sparse.series import SparseSeries @@ -336,7 +337,8 @@ def __len__(self): return len(self._data) def view(self, dtype=None): - return self._constructor(self.values.view(dtype), index=self.index).__finalize__(self) + return self._constructor(self.values.view(dtype), + index=self.index).__finalize__(self) def __array__(self, result=None): """ the array interface, return my values """ @@ -346,7 +348,8 @@ def __array_wrap__(self, result): """ Gets called prior to a ufunc (and after) """ - return self._constructor(result, index=self.index, copy=False).__finalize__(self) + return self._constructor(result, index=self.index, + copy=False).__finalize__(self) def __contains__(self, key): return key in self.index @@ -455,7 +458,7 @@ def _ixs(self, i, axis=0): raise except: if isinstance(i, slice): - indexer = self.index._convert_slice_indexer(i,typ='iloc') + indexer = self.index._convert_slice_indexer(i, typ='iloc') return self._get_values(indexer) else: label = self.index[i] @@ -472,8 +475,9 @@ def _is_mixed_type(self): def _slice(self, slobj, axis=0, raise_on_error=False, typ=None): if raise_on_error: _check_slice_bounds(slobj, self.values) - slobj = self.index._convert_slice_indexer(slobj,typ=typ or 'getitem') - return self._constructor(self.values[slobj], index=self.index[slobj]).__finalize__(self) + slobj = self.index._convert_slice_indexer(slobj, typ=typ or 'getitem') + return self._constructor(self.values[slobj], + index=self.index[slobj]).__finalize__(self) def __getitem__(self, key): try: @@ -510,7 +514,7 @@ def __getitem__(self, key): def _get_with(self, key): # other: fancy integer or otherwise if isinstance(key, slice): - indexer = self.index._convert_slice_indexer(key,typ='getitem') + indexer = self.index._convert_slice_indexer(key, typ='getitem') return self._get_values(indexer) else: if isinstance(key, tuple): @@ -564,11 +568,13 @@ def _get_values_tuple(self, key): # If key is contained, would have returned by now indexer, new_index = self.index.get_loc_level(key) - return self._constructor(self.values[indexer], index=new_index).__finalize__(self) + return self._constructor(self.values[indexer], + index=new_index).__finalize__(self) def _get_values(self, indexer): try: - return self._constructor(self._data.get_slice(indexer), fastpath=True).__finalize__(self) + return self._constructor(self._data.get_slice(indexer), + fastpath=True).__finalize__(self) except Exception: return self.values[indexer] @@ -605,7 +611,8 @@ def __setitem__(self, key, value): return except TypeError as e: - if isinstance(key, tuple) and not isinstance(self.index, MultiIndex): + if isinstance(key, tuple) and not isinstance(self.index, + MultiIndex): raise ValueError("Can only tuple-index with a MultiIndex") # python 3 type errors should be raised @@ -635,7 +642,7 @@ def _set_with_engine(self, key, value): def _set_with(self, key, value): # other: fancy integer or otherwise if isinstance(key, slice): - indexer = self.index._convert_slice_indexer(key,typ='getitem') + indexer = self.index._convert_slice_indexer(key, typ='getitem') return self._set_values(indexer, value) else: if isinstance(key, tuple): @@ -677,7 +684,7 @@ def _set_labels(self, key, value): def _set_values(self, key, value): if isinstance(key, Series): key = key.values - self._data = self._data.setitem(key,value) + self._data = self._data.setitem(key, value) # help out SparseSeries _get_val_at = ndarray.__getitem__ @@ -705,7 +712,8 @@ def repeat(self, reps): """ new_index = self.index.repeat(reps) new_values = self.values.repeat(reps) - return self._constructor(new_values, index=new_index).__finalize__(self) + return self._constructor(new_values, + index=new_index).__finalize__(self) def reshape(self, *args, **kwargs): """ @@ -722,7 +730,6 @@ def reshape(self, *args, **kwargs): return self.values.reshape(shape, **kwargs) - def get(self, label, default=None): """ Returns value occupying requested label, default to specified @@ -824,7 +831,8 @@ def reset_index(self, level=None, drop=False, name=None, inplace=False): # set name if it was passed, otherwise, keep the previous name self.name = name or self.name else: - return self._constructor(self.values.copy(), index=new_index).__finalize__(self) + return self._constructor(self.values.copy(), + index=new_index).__finalize__(self) elif inplace: raise TypeError('Cannot reset_index inplace on a Series ' 'to create a DataFrame') @@ -1035,7 +1043,8 @@ def to_frame(self, name=None): Parameters ---------- name : object, default None - The passed name should substitute for the series name (if it has one). + The passed name should substitute for the series name (if it has + one). Returns ------- @@ -1094,18 +1103,21 @@ def count(self, level=None): level_index = self.index.levels[level] if len(self) == 0: - return self._constructor(0, index=level_index).__finalize__(self) + return self._constructor(0, index=level_index)\ + .__finalize__(self) # call cython function max_bin = len(level_index) labels = com._ensure_int64(self.index.labels[level]) counts = lib.count_level_1d(mask.view(pa.uint8), labels, max_bin) - return self._constructor(counts, index=level_index).__finalize__(self) + return self._constructor(counts, + index=level_index).__finalize__(self) return notnull(_values_from_object(self)).sum() - def value_counts(self, normalize=False, sort=True, ascending=False, bins=None): + def value_counts(self, normalize=False, sort=True, ascending=False, + bins=None): """ Returns Series containing counts of unique values. The resulting Series will be in descending order so that the first element is the most @@ -1195,7 +1207,6 @@ def drop_duplicates(self, take_last=False, inplace=False): else: return result - def duplicated(self, take_last=False): """ Return boolean Series denoting duplicate values @@ -1211,7 +1222,8 @@ def duplicated(self, take_last=False): """ keys = _ensure_object(self.values) duplicated = lib.duplicated(keys, take_last=take_last) - return self._constructor(duplicated, index=self.index).__finalize__(self) + return self._constructor(duplicated, + index=self.index).__finalize__(self) def idxmin(self, axis=None, out=None, skipna=True): """ @@ -1276,7 +1288,8 @@ def round(self, decimals=0, out=None): """ result = _values_from_object(self).round(decimals, out=out) if out is None: - result = self._constructor(result, index=self.index).__finalize__(self) + result = self._constructor(result, + index=self.index).__finalize__(self) return result @@ -1448,7 +1461,8 @@ def autocorr(self): def dot(self, other): """ - Matrix multiplication with DataFrame or inner-product with Series objects + Matrix multiplication with DataFrame or inner-product with Series + objects Parameters ---------- @@ -1692,7 +1706,8 @@ def sort_index(self, ascending=True): ascending=ascending) new_values = self.values.take(indexer) - return self._constructor(new_values, index=new_labels).__finalize__(self) + return self._constructor(new_values, + index=new_labels).__finalize__(self) def argsort(self, axis=0, kind='quicksort', order=None): """ @@ -1720,7 +1735,8 @@ def argsort(self, axis=0, kind='quicksort', order=None): -1, index=self.index, name=self.name, dtype='int64') notmask = -mask result[notmask] = np.argsort(values[notmask], kind=kind) - return self._constructor(result, index=self.index).__finalize__(self) + return self._constructor(result, + index=self.index).__finalize__(self) else: return self._constructor( np.argsort(values, kind=kind), index=self.index, @@ -1802,8 +1818,8 @@ def _try_kind_sort(arr): sortedIdx[n:] = idx[good][argsorted] sortedIdx[:n] = idx[bad] - return self._constructor(arr[sortedIdx], - index=self.index[sortedIdx]).__finalize__(self) + return self._constructor(arr[sortedIdx], index=self.index[sortedIdx])\ + .__finalize__(self) def sortlevel(self, level=0, ascending=True): """ @@ -1825,7 +1841,8 @@ def sortlevel(self, level=0, ascending=True): new_index, indexer = self.index.sortlevel(level, ascending=ascending) new_values = self.values.take(indexer) - return self._constructor(new_values, index=new_index).__finalize__(self) + return self._constructor(new_values, + index=new_index).__finalize__(self) def swaplevel(self, i, j, copy=True): """ @@ -1954,10 +1971,12 @@ def map_f(values, f): indexer = arg.index.get_indexer(values) new_values = com.take_1d(arg.values, indexer) - return self._constructor(new_values, index=self.index).__finalize__(self) + return self._constructor(new_values, + index=self.index).__finalize__(self) else: mapped = map_f(values, arg) - return self._constructor(mapped, index=self.index).__finalize__(self) + return self._constructor(mapped, + index=self.index).__finalize__(self) def apply(self, func, convert_dtype=True, args=(), **kwds): """ @@ -2000,7 +2019,8 @@ def apply(self, func, convert_dtype=True, args=(), **kwds): from pandas.core.frame import DataFrame return DataFrame(mapped.tolist(), index=self.index) else: - return self._constructor(mapped, index=self.index).__finalize__(self) + return self._constructor(mapped, + index=self.index).__finalize__(self) def _reduce(self, op, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds): @@ -2018,7 +2038,9 @@ def _reindex_indexer(self, new_index, indexer, copy): return self._constructor(new_values, index=new_index) def _needs_reindex_multi(self, axes, method, level): - """ check if we do need a multi reindex; this is for compat with higher dims """ + """ check if we do need a multi reindex; this is for compat with + higher dims + """ return False @Appender(generic._shared_docs['reindex'] % _shared_doc_kwargs) @@ -2057,7 +2079,8 @@ def take(self, indices, axis=0, convert=True): indices = com._ensure_platform_int(indices) new_index = self.index.take(indices) new_values = self.values.take(indices) - return self._constructor(new_values, index=new_index).__finalize__(self) + return self._constructor(new_values, + index=new_index).__finalize__(self) def isin(self, values): """ @@ -2314,7 +2337,8 @@ def asof(self, where): @property def weekday(self): - return self._constructor([d.weekday() for d in self.index], index=self.index).__finalize__(self) + return self._constructor([d.weekday() for d in self.index], + index=self.index).__finalize__(self) def tz_convert(self, tz, copy=True): """ @@ -2336,7 +2360,8 @@ def tz_convert(self, tz, copy=True): if copy: new_values = new_values.copy() - return self._constructor(new_values, index=new_index).__finalize__(self) + return self._constructor(new_values, + index=new_index).__finalize__(self) def tz_localize(self, tz, copy=True, infer_dst=False): """ @@ -2373,7 +2398,8 @@ def tz_localize(self, tz, copy=True, infer_dst=False): if copy: new_values = new_values.copy() - return self._constructor(new_values, index=new_index).__finalize__(self) + return self._constructor(new_values, + index=new_index).__finalize__(self) @cache_readonly def str(self): @@ -2401,7 +2427,8 @@ def to_timestamp(self, freq=None, how='start', copy=True): new_values = new_values.copy() new_index = self.index.to_timestamp(freq=freq, how=how) - return self._constructor(new_values, index=new_index).__finalize__(self) + return self._constructor(new_values, + index=new_index).__finalize__(self) def to_period(self, freq=None, copy=True): """ @@ -2423,7 +2450,8 @@ def to_period(self, freq=None, copy=True): if freq is None: freq = self.index.freqstr or self.index.inferred_freq new_index = self.index.to_period(freq=freq) - return self._constructor(new_values, index=new_index).__finalize__(self) + return self._constructor(new_values, + index=new_index).__finalize__(self) Series._setup_axes(['index'], info_axis=0, stat_axis=0, aliases={'rows': 0}) diff --git a/pandas/core/sparse.py b/pandas/core/sparse.py index 7b9caaa3a0139..84149e5598f82 100644 --- a/pandas/core/sparse.py +++ b/pandas/core/sparse.py @@ -1,6 +1,6 @@ """ -Data structures for sparse float data. Life is made simpler by dealing only with -float64 data +Data structures for sparse float data. Life is made simpler by dealing only +with float64 data """ # pylint: disable=W0611 diff --git a/pandas/core/strings.py b/pandas/core/strings.py index c1bd369686969..0df9db2ebd06c 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -9,6 +9,7 @@ import pandas.lib as lib import warnings + def _get_array_list(arr, others): if isinstance(others[0], (list, np.ndarray)): arrays = [arr] + list(others) @@ -115,6 +116,7 @@ def g(x): else: return lib.map_infer(arr, f) + def str_title(arr): """ Convert strings to titlecased version @@ -399,29 +401,31 @@ def f(x): return None m = regex.search(x) if m: - return m.groups()[0] # may be None + return m.groups()[0] # may be None else: return None else: empty_row = Series(regex.groups*[None]) + def f(x): if not isinstance(x, compat.string_types): return empty_row m = regex.search(x) if m: - return Series(list(m.groups())) # may contain None + return Series(list(m.groups())) # may contain None else: return empty_row result = arr.apply(f) result.replace({None: np.nan}, inplace=True) if regex.groups > 1: - result = DataFrame(result) # Don't rely on the wrapper; name columns. + result = DataFrame(result) # Don't rely on the wrapper; name columns. names = dict(zip(regex.groupindex.values(), regex.groupindex.keys())) result.columns = [names.get(1 + i, i) for i in range(regex.groups)] else: result.name = regex.groupindex.get(0) return result + def str_join(arr, sep): """ Join lists contained as elements in array, a la str.join