diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 958571fdc2218..b970923ff0fe3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -32,11 +32,10 @@ # goal is to be able to define the docs close to function, while still being # able to share _shared_docs = dict() -_shared_doc_kwargs = dict(axes='keywords for axes', - klass='NDFrame', +_shared_doc_kwargs = dict(axes='keywords for axes', klass='NDFrame', axes_single_arg='int or labels for object', args_transpose='axes to permute (int or label for' - ' object)') + ' object)') def is_dictlike(x): @@ -69,7 +68,6 @@ def _single_replace(self, to_replace, method, inplace, limit): class NDFrame(PandasObject): - """ N-dimensional analogue of DataFrame. Store multi-dimensional in a size-mutable, labeled data structure @@ -80,10 +78,10 @@ class NDFrame(PandasObject): axes : list copy : boolean, default False """ - _internal_names = ['_data', '_cacher', '_item_cache', '_cache', - 'is_copy', '_subtyp', '_index', - '_default_kind', '_default_fill_value', '_metadata', - '__array_struct__', '__array_interface__'] + _internal_names = ['_data', '_cacher', '_item_cache', '_cache', 'is_copy', + '_subtyp', '_index', '_default_kind', + '_default_fill_value', '_metadata', '__array_struct__', + '__array_interface__'] _internal_names_set = set(_internal_names) _accessors = frozenset([]) _metadata = [] @@ -123,8 +121,9 @@ def _init_mgr(self, mgr, axes=None, dtype=None, copy=False): """ passed a manager and a axes dict """ for a, axe in axes.items(): if axe is not None: - mgr = mgr.reindex_axis( - axe, axis=self._get_block_manager_axis(a), copy=False) + mgr = mgr.reindex_axis(axe, + axis=self._get_block_manager_axis(a), + copy=False) # make a copy if explicitly requested if copy: @@ -135,7 +134,7 @@ def _init_mgr(self, mgr, axes=None, dtype=None, copy=False): mgr = mgr.astype(dtype=dtype) return mgr - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # Construction @property @@ -154,7 +153,7 @@ def __unicode__(self): def _dir_additions(self): """ add the string-like attributes from the info_axis """ return set([c for c in self._info_axis - if isinstance(c, string_types) and isidentifier(c)]) + if isinstance(c, string_types) and isidentifier(c)]) @property def _constructor_sliced(self): @@ -170,31 +169,32 @@ def _constructor_expanddim(self): """ raise NotImplementedError - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # Axis @classmethod - def _setup_axes( - cls, axes, info_axis=None, stat_axis=None, aliases=None, slicers=None, - axes_are_reversed=False, build_axes=True, ns=None): - """ provide axes setup for the major PandasObjects - - axes : the names of the axes in order (lowest to highest) - info_axis_num : the axis of the selector dimension (int) - stat_axis_num : the number of axis for the default stats (int) - aliases : other names for a single axis (dict) - slicers : how axes slice to others (dict) - axes_are_reversed : boolean whether to treat passed axes as - reversed (DataFrame) - build_axes : setup the axis properties (default True) - """ + def _setup_axes(cls, axes, info_axis=None, stat_axis=None, aliases=None, + slicers=None, axes_are_reversed=False, build_axes=True, + ns=None): + """Provide axes setup for the major PandasObjects. + + Parameters + ---------- + axes : the names of the axes in order (lowest to highest) + info_axis_num : the axis of the selector dimension (int) + stat_axis_num : the number of axis for the default stats (int) + aliases : other names for a single axis (dict) + slicers : how axes slice to others (dict) + axes_are_reversed : boolean whether to treat passed axes as + reversed (DataFrame) + build_axes : setup the axis properties (default True) + """ cls._AXIS_ORDERS = axes cls._AXIS_NUMBERS = dict((a, i) for i, a in enumerate(axes)) cls._AXIS_LEN = len(axes) cls._AXIS_ALIASES = aliases or dict() - cls._AXIS_IALIASES = dict((v, k) - for k, v in cls._AXIS_ALIASES.items()) + cls._AXIS_IALIASES = dict((v, k) for k, v in cls._AXIS_ALIASES.items()) cls._AXIS_NAMES = dict(enumerate(axes)) cls._AXIS_SLICEMAP = slicers or None cls._AXIS_REVERSED = axes_are_reversed @@ -234,29 +234,31 @@ def set_axis(a, i): setattr(cls, k, v) def _construct_axes_dict(self, axes=None, **kwargs): - """ return an axes dictionary for myself """ + """Return an axes dictionary for myself.""" d = dict([(a, self._get_axis(a)) for a in (axes or self._AXIS_ORDERS)]) d.update(kwargs) return d @staticmethod def _construct_axes_dict_from(self, axes, **kwargs): - """ return an axes dictionary for the passed axes """ + """Return an axes dictionary for the passed axes.""" d = dict([(a, ax) for a, ax in zip(self._AXIS_ORDERS, axes)]) d.update(kwargs) return d def _construct_axes_dict_for_slice(self, axes=None, **kwargs): - """ return an axes dictionary for myself """ + """Return an axes dictionary for myself.""" d = dict([(self._AXIS_SLICEMAP[a], self._get_axis(a)) - for a in (axes or self._AXIS_ORDERS)]) + for a in (axes or self._AXIS_ORDERS)]) d.update(kwargs) return d def _construct_axes_from_arguments(self, args, kwargs, require_all=False): - """ construct and returns axes if supplied in args/kwargs - if require_all, raise if all axis arguments are not supplied - return a tuple of (axes, kwargs) """ + """Construct and returns axes if supplied in args/kwargs. + + If require_all, raise if all axis arguments are not supplied + return a tuple of (axes, kwargs). + """ # construct the args args = list(args) @@ -267,10 +269,8 @@ def _construct_axes_from_arguments(self, args, kwargs, require_all=False): if alias is not None: if a in kwargs: if alias in kwargs: - raise TypeError( - "arguments are mutually exclusive for [%s,%s]" % - (a, alias) - ) + raise TypeError("arguments are mutually exclusive " + "for [%s,%s]" % (a, alias)) continue if alias in kwargs: kwargs[a] = kwargs.pop(alias) @@ -280,10 +280,10 @@ def _construct_axes_from_arguments(self, args, kwargs, require_all=False): if a not in kwargs: try: kwargs[a] = args.pop(0) - except (IndexError): + except IndexError: if require_all: - raise TypeError( - "not enough/duplicate arguments specified!") + raise TypeError("not enough/duplicate arguments " + "specified!") axes = dict([(a, kwargs.pop(a, None)) for a in self._AXIS_ORDERS]) return axes, kwargs @@ -331,7 +331,7 @@ def _get_axis(self, axis): return getattr(self, name) def _get_block_manager_axis(self, axis): - """ map the axis to the block_manager axis """ + """Map the axis to the block_manager axis.""" axis = self._get_axis_number(axis) if self._AXIS_REVERSED: m = self._AXIS_LEN - 1 @@ -384,24 +384,24 @@ def _stat_axis(self): @property def shape(self): - "Return a tuple of axis dimensions" + """Return a tuple of axis dimensions""" return tuple(len(self._get_axis(a)) for a in self._AXIS_ORDERS) @property def axes(self): - "Return index label(s) of the internal NDFrame" + """Return index label(s) of the internal NDFrame""" # we do it this way because if we have reversed axes, then # the block manager shows then reversed return [self._get_axis(a) for a in self._AXIS_ORDERS] @property def ndim(self): - "Number of axes / array dimensions" + """Number of axes / array dimensions""" return self._data.ndim @property def size(self): - "number of elements in the NDFrame" + """number of elements in the NDFrame""" return np.prod(self.shape) def _expand_axes(self, key): @@ -418,7 +418,7 @@ def _expand_axes(self, key): def set_axis(self, axis, labels): """ public verson of axis assignment """ - setattr(self,self._get_axis_name(axis),labels) + setattr(self, self._get_axis_name(axis), labels) def _set_axis(self, axis, labels): self._data.set_axis(axis, labels) @@ -448,26 +448,26 @@ def _set_axis(self, axis, labels): def transpose(self, *args, **kwargs): # construct the args - axes, kwargs = self._construct_axes_from_arguments( - args, kwargs, require_all=True) + axes, kwargs = self._construct_axes_from_arguments(args, kwargs, + require_all=True) axes_names = tuple([self._get_axis_name(axes[a]) for a in self._AXIS_ORDERS]) axes_numbers = tuple([self._get_axis_number(axes[a]) - for a in self._AXIS_ORDERS]) + for a in self._AXIS_ORDERS]) # we must have unique axes if len(axes) != len(set(axes)): raise ValueError('Must specify %s unique axes' % self._AXIS_LEN) - new_axes = self._construct_axes_dict_from( - self, [self._get_axis(x) for x in axes_names]) + new_axes = self._construct_axes_dict_from(self, [self._get_axis(x) + for x in axes_names]) new_values = self.values.transpose(axes_numbers) if kwargs.pop('copy', None) or (len(args) and args[-1]): new_values = new_values.copy() if kwargs: raise TypeError('transpose() got an unexpected keyword ' - 'argument "{0}"'.format(list(kwargs.keys())[0])) + 'argument "{0}"'.format(list(kwargs.keys())[0])) return self._constructor(new_values, **new_axes).__finalize__(self) @@ -511,10 +511,10 @@ def pop(self, item): return result def squeeze(self): - """ squeeze length 1 dimensions """ + """Squeeze length 1 dimensions.""" try: return self.iloc[tuple([0 if len(a) == 1 else slice(None) - for a in self.axes])] + for a in self.axes])] except: return self @@ -537,7 +537,7 @@ def swaplevel(self, i, j, axis=0): result._data.set_axis(axis, labels.swaplevel(i, j)) return result - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # Rename # TODO: define separate funcs for DataFrame, Series and Panel so you can @@ -573,14 +573,15 @@ def rename(self, *args, **kwargs): if kwargs: raise TypeError('rename() got an unexpected keyword ' - 'argument "{0}"'.format(list(kwargs.keys())[0])) + 'argument "{0}"'.format(list(kwargs.keys())[0])) - if (com._count_not_none(*axes.values()) == 0): + if com._count_not_none(*axes.values()) == 0: raise TypeError('must pass an index to rename') # renamer function if passed a dict def _get_rename_function(mapper): if isinstance(mapper, (dict, ABCSeries)): + def f(x): if x in mapper: return mapper[x] @@ -635,7 +636,7 @@ def rename_axis(self, mapper, axis=0, copy=True, inplace=False): d[axis] = mapper return self.rename(**d) - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # Comparisons def _indexed_same(self, other): @@ -664,14 +665,14 @@ def __invert__(self): def equals(self, other): """ - Determines if two NDFrame objects contain the same elements. NaNs in the - same location are considered equal. + Determines if two NDFrame objects contain the same elements. NaNs in + the same location are considered equal. """ if not isinstance(other, self._constructor): return False return self._data.equals(other._data) - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # Iteration def __hash__(self): @@ -679,9 +680,7 @@ def __hash__(self): ' hashed'.format(self.__class__.__name__)) def __iter__(self): - """ - Iterate over infor axis - """ + """Iterate over infor axis""" return iter(self._info_axis) # can we get a better explanation of this? @@ -689,7 +688,8 @@ def keys(self): """Get the 'info axis' (see Indexing for more) This is index for Series, columns for DataFrame and major_axis for - Panel.""" + Panel. + """ return self._info_axis def iteritems(self): @@ -707,21 +707,21 @@ def iteritems(self): def iterkv(self, *args, **kwargs): "iteritems alias used to get around 2to3. Deprecated" warnings.warn("iterkv is deprecated and will be removed in a future " - "release, use ``iteritems`` instead.", - FutureWarning, stacklevel=2) + "release, use ``iteritems`` instead.", FutureWarning, + stacklevel=2) return self.iteritems(*args, **kwargs) def __len__(self): - """Returns length of info axis """ + """Returns length of info axis""" return len(self._info_axis) def __contains__(self, key): - """True if the key is in the info axis """ + """True if the key is in the info axis""" return key in self._info_axis @property def empty(self): - "True if NDFrame is entirely empty [no items]" + """True if NDFrame is entirely empty [no items]""" return not all(len(self._get_axis(a)) > 0 for a in self._AXIS_ORDERS) def __nonzero__(self): @@ -732,11 +732,12 @@ def __nonzero__(self): __bool__ = __nonzero__ def bool(self): - """ Return the bool of a single element PandasObject - This must be a boolean scalar value, either True or False + """Return the bool of a single element PandasObject. - Raise a ValueError if the PandasObject does not have exactly - 1 element, or that element is not boolean """ + This must be a boolean scalar value, either True or False. Raise a + ValueError if the PandasObject does not have exactly 1 element, or that + element is not boolean + """ v = self.squeeze() if isinstance(v, (bool, np.bool_)): return bool(v) @@ -749,10 +750,10 @@ def bool(self): def __abs__(self): return self.abs() - def __round__(self,decimals=0): + def __round__(self, decimals=0): return self.round(decimals) - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # Array Interface def __array__(self, dtype=None): @@ -764,24 +765,24 @@ def __array_wrap__(self, result, context=None): # ideally we would define this to avoid the getattr checks, but # is slower - #@property - #def __array_interface__(self): + # @property + # def __array_interface__(self): # """ provide numpy array interface method """ # values = self.values # return dict(typestr=values.dtype.str,shape=values.shape,data=values) def to_dense(self): - "Return dense representation of NDFrame (as opposed to sparse)" + """Return dense representation of NDFrame (as opposed to sparse)""" # compat return self - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # Picklability def __getstate__(self): meta = dict((k, getattr(self, k, None)) for k in self._metadata) - return dict(_data=self._data, _typ=self._typ, - _metadata=self._metadata, **meta) + return dict(_data=self._data, _typ=self._typ, _metadata=self._metadata, + **meta) def __setstate__(self, state): @@ -822,10 +823,10 @@ def __setstate__(self, state): self._item_cache = {} - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # IO - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # I/O Methods def to_json(self, path_or_buf=None, orient=None, date_format='epoch', @@ -886,17 +887,14 @@ def to_json(self, path_or_buf=None, orient=None, date_format='epoch', """ from pandas.io import json - return json.to_json( - path_or_buf=path_or_buf, - obj=self, orient=orient, - date_format=date_format, - double_precision=double_precision, - force_ascii=force_ascii, - date_unit=date_unit, - default_handler=default_handler) + return json.to_json(path_or_buf=path_or_buf, obj=self, orient=orient, + date_format=date_format, + double_precision=double_precision, + force_ascii=force_ascii, date_unit=date_unit, + default_handler=default_handler) def to_hdf(self, path_or_buf, key, **kwargs): - """ activate the HDFStore + """Activate the HDFStore. Parameters ---------- @@ -975,8 +973,8 @@ def to_sql(self, name, con, flavor='sqlite', schema=None, if_exists='fail', If a DBAPI2 object, only sqlite3 is supported. flavor : {'sqlite', 'mysql'}, default 'sqlite' The flavor of SQL to use. Ignored when using SQLAlchemy engine. - 'mysql' is deprecated and will be removed in future versions, but it - will be further supported through SQLAlchemy engines. + 'mysql' is deprecated and will be removed in future versions, but + it will be further supported through SQLAlchemy engines. schema : string, default None Specify the schema (if database flavor supports this). If None, use default schema. @@ -999,14 +997,13 @@ def to_sql(self, name, con, flavor='sqlite', schema=None, if_exists='fail', """ from pandas.io import sql - sql.to_sql( - self, name, con, flavor=flavor, schema=schema, if_exists=if_exists, - index=index, index_label=index_label, chunksize=chunksize, - dtype=dtype) + sql.to_sql(self, name, con, flavor=flavor, schema=schema, + if_exists=if_exists, index=index, index_label=index_label, + chunksize=chunksize, dtype=dtype) def to_pickle(self, path): """ - Pickle (serialize) object to input file path + Pickle (serialize) object to input file path. Parameters ---------- @@ -1041,12 +1038,12 @@ def to_clipboard(self, excel=None, sep=None, **kwargs): from pandas.io import clipboard clipboard.to_clipboard(self, excel=excel, sep=sep, **kwargs) - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # Fancy Indexing @classmethod def _create_indexer(cls, name, indexer): - """ create an indexer like _name in the class """ + """Create an indexer like _name in the class.""" if getattr(cls, name, None) is None: iname = '_%s' % name @@ -1067,7 +1064,7 @@ def _indexer(self): def get(self, key, default=None): """ Get item from object for given key (DataFrame column, Panel slice, - etc.). Returns default value if not found + etc.). Returns default value if not found. Parameters ---------- @@ -1086,7 +1083,7 @@ def __getitem__(self, item): return self._get_item_cache(item) def _get_item_cache(self, item): - """ return the cached item, item represents a label indexer """ + """Return the cached item, item represents a label indexer.""" cache = self._item_cache res = cache.get(item) if res is None: @@ -1100,17 +1097,18 @@ def _get_item_cache(self, item): return res def _set_as_cached(self, item, cacher): - """ set the _cacher attribute on the calling object with - a weakref to cacher """ + """Set the _cacher attribute on the calling object with a weakref to + cacher. + """ self._cacher = (item, weakref.ref(cacher)) def _reset_cacher(self): - """ reset the cacher """ - if hasattr(self,'_cacher'): + """Reset the cacher.""" + if hasattr(self, '_cacher'): del self._cacher def _iget_item_cache(self, item): - """ return the cached item, item represents a positional indexer """ + """Return the cached item, item represents a positional indexer.""" ax = self._info_axis if ax.is_unique: lower = self._get_item_cache(ax[item]) @@ -1122,9 +1120,7 @@ def _box_item_values(self, key, values): raise AbstractMethodError(self) def _maybe_cache_changed(self, item, value): - """ - the object has called back to us saying - maybe it has changed + """The object has called back to us saying maybe it has changed. numpy < 1.8 has an issue with object arrays and aliasing GH6026 @@ -1133,11 +1129,11 @@ def _maybe_cache_changed(self, item, value): @property def _is_cached(self): - """ boolean : return if I am cached """ + """Return boolean indicating if self is cached or not.""" return getattr(self, '_cacher', None) is not None def _get_cacher(self): - """ return my cacher or None """ + """return my cacher or None""" cacher = getattr(self, '_cacher', None) if cacher is not None: cacher = cacher[1]() @@ -1145,14 +1141,13 @@ def _get_cacher(self): @property def _is_view(self): - """ boolean : return if I am a view of another array """ + """Return boolean indicating if self is view of another array """ return self._data.is_view def _maybe_update_cacher(self, clear=False, verify_is_copy=True): """ - - see if we need to update our parent cacher - if clear, then clear our cache + See if we need to update our parent cacher if clear, then clear our + cache. Parameters ---------- @@ -1194,7 +1189,6 @@ def _slice(self, slobj, axis=0, kind=None): Construct a slice of this container. kind parameter is maintained for compatibility with Series slicing. - """ axis = self._get_block_manager_axis(axis) result = self._constructor(self._data.get_slice(slobj, axis=axis)) @@ -1202,7 +1196,7 @@ def _slice(self, slobj, axis=0, kind=None): # this could be a view # but only in a single-dtyped view slicable case - is_copy = axis!=0 or result._is_view + is_copy = axis != 0 or result._is_view result._set_is_copy(self, copy=is_copy) return result @@ -1221,18 +1215,20 @@ def _set_is_copy(self, ref=None, copy=True): def _check_is_chained_assignment_possible(self): """ - check if we are a view, have a cacher, and are of mixed type - if so, then force a setitem_copy check + Check if we are a view, have a cacher, and are of mixed type. + If so, then force a setitem_copy check. - should be called just near setting a value + Should be called just near setting a value - will return a boolean if it we are a view and are cached, but a single-dtype - meaning that the cacher should be updated following setting + Will return a boolean if it we are a view and are cached, but a + single-dtype meaning that the cacher should be updated following + setting. """ if self._is_view and self._is_cached: ref = self._get_cacher() if ref is not None and ref._is_mixed_type: - self._check_setitem_copy(stacklevel=4, t='referant', force=True) + self._check_setitem_copy(stacklevel=4, t='referant', + force=True) return True elif self.is_copy: self._check_setitem_copy(stacklevel=4, t='referant') @@ -1255,16 +1251,16 @@ def _check_setitem_copy(self, stacklevel=4, t='setting', force=False): user will see the error *at the level of setting* It is technically possible to figure out that we are setting on - a copy even WITH a multi-dtyped pandas object. In other words, some blocks - may be views while other are not. Currently _is_view will ALWAYS return False - for multi-blocks to avoid having to handle this case. + a copy even WITH a multi-dtyped pandas object. In other words, some + blocks may be views while other are not. Currently _is_view will ALWAYS + return False for multi-blocks to avoid having to handle this case. df = DataFrame(np.arange(0,9), columns=['count']) df['group'] = 'b' - # this technically need not raise SettingWithCopy if both are view (which is not - # generally guaranteed but is usually True - # however, this is in general not a good practice and we recommend using .loc + # This technically need not raise SettingWithCopy if both are view + # (which is not # generally guaranteed but is usually True. However, + # this is in general not a good practice and we recommend using .loc. df.iloc[0:5]['group'] = 'a' """ @@ -1302,15 +1298,19 @@ def _check_setitem_copy(self, stacklevel=4, t='setting', force=False): "A value is trying to be set on a copy of a slice from a " "DataFrame\n\n" "See the caveats in the documentation: " - "http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy") + "http://pandas.pydata.org/pandas-docs/stable/" + "indexing.html#indexing-view-versus-copy" + ) else: t = ("\n" "A value is trying to be set on a copy of a slice from a " "DataFrame.\n" - "Try using .loc[row_indexer,col_indexer] = value instead\n\n" - "See the caveats in the documentation: " - "http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy") + "Try using .loc[row_indexer,col_indexer] = value " + "instead\n\nSee the caveats in the documentation: " + "http://pandas.pydata.org/pandas-docs/stable/" + "indexing.html#indexing-view-versus-copy" + ) if value == 'raise': raise SettingWithCopyError(t) @@ -1334,7 +1334,7 @@ def __delitem__(self, key): # Allow shorthand to delete all columns whose first len(key) # elements match key: if not isinstance(key, tuple): - key = (key,) + key = (key, ) for col in self.columns: if isinstance(col, tuple) and col[:len(key)] == key: del self[col] @@ -1382,8 +1382,8 @@ def take(self, indices, axis=0, convert=True, is_copy=True): def xs(self, key, axis=0, level=None, copy=None, drop_level=True): """ - Returns a cross-section (row(s) or column(s)) from the Series/DataFrame. - Defaults to cross-section on the rows (axis=0). + Returns a cross-section (row(s) or column(s)) from the + Series/DataFrame. Defaults to cross-section on the rows (axis=0). Parameters ---------- @@ -1446,8 +1446,9 @@ def xs(self, key, axis=0, level=None, copy=None, drop_level=True): ----- xs is only for getting, not setting values. - MultiIndex Slicers is a generic way to get/set values on any level or levels - it is a superset of xs functionality, see :ref:`MultiIndex Slicers ` + MultiIndex Slicers is a generic way to get/set values on any level or + levels. It is a superset of xs functionality, see + :ref:`MultiIndex Slicers ` """ if copy is not None: @@ -1509,10 +1510,8 @@ def xs(self, key, axis=0, level=None, copy=None, drop_level=True): if not is_list_like(new_values) or self.ndim == 1: return _maybe_box_datetimelike(new_values) - result = Series(new_values, - index=self.columns, - name=self.index[loc], - copy=copy, + result = Series(new_values, index=self.columns, + name=self.index[loc], copy=copy, dtype=new_values.dtype) else: @@ -1555,7 +1554,7 @@ def select(self, crit, axis=0): def reindex_like(self, other, method=None, copy=True, limit=None, tolerance=None): - """ return an object with matching indicies to myself + """Return an object with matching indices to myself. Parameters ---------- @@ -1579,15 +1578,15 @@ def reindex_like(self, other, method=None, copy=True, limit=None, ------- reindexed : same as input """ - d = other._construct_axes_dict(axes=self._AXIS_ORDERS, - method=method, copy=copy, limit=limit, - tolerance=tolerance) + d = other._construct_axes_dict(axes=self._AXIS_ORDERS, method=method, + copy=copy, limit=limit, + tolerance=tolerance) return self.reindex(**d) def drop(self, labels, axis=0, level=None, inplace=False, errors='raise'): """ - Return new object with labels in requested axis removed + Return new object with labels in requested axis removed. Parameters ---------- @@ -1629,8 +1628,8 @@ def drop(self, labels, axis=0, level=None, inplace=False, errors='raise'): if level is not None: if not isinstance(axis, MultiIndex): raise AssertionError('axis must be a MultiIndex') - indexer = ~lib.ismember(axis.get_level_values(level).values, - set(labels)) + indexer = ~lib.ismember( + axis.get_level_values(level).values, set(labels)) else: indexer = ~axis.isin(labels) @@ -1646,7 +1645,7 @@ def drop(self, labels, axis=0, level=None, inplace=False, errors='raise'): def _update_inplace(self, result, verify_is_copy=True): """ - replace self internals with result. + Replace self internals with result. Parameters ---------- @@ -1659,7 +1658,7 @@ def _update_inplace(self, result, verify_is_copy=True): self._reset_cache() self._clear_item_cache() - self._data = getattr(result,'_data',result) + self._data = getattr(result, '_data', result) self._maybe_update_cacher(verify_is_copy=verify_is_copy) def add_prefix(self, prefix): @@ -1679,7 +1678,7 @@ def add_prefix(self, prefix): def add_suffix(self, suffix): """ - Concatenate suffix string with panel items names + Concatenate suffix string with panel items names. Parameters ---------- @@ -1702,14 +1701,16 @@ def add_suffix(self, suffix): by : string name or list of names which refer to the axis items axis : %(axes)s to direct sorting ascending : bool or list of bool - Sort ascending vs. descending. Specify list for multiple sort orders. - If this is a list of bools, must match the length of the by + Sort ascending vs. descending. Specify list for multiple sort + orders. If this is a list of bools, must match the length of + the by. inplace : bool if True, perform operation in-place kind : {`quicksort`, `mergesort`, `heapsort`} - Choice of sorting algorithm. See also ndarray.np.sort for more information. - `mergesort` is the only stable algorithm. For DataFrames, this option is - only applied when sorting on a single column or label. + Choice of sorting algorithm. See also ndarray.np.sort for more + information. `mergesort` is the only stable algorithm. For + DataFrames, this option is only applied when sorting on a single + column or label. na_position : {'first', 'last'} `first` puts NaNs at the beginning, `last` puts NaNs at the end @@ -1717,6 +1718,7 @@ def add_suffix(self, suffix): ------- sorted_obj : %(klass)s """ + def sort_values(self, by, axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last'): raise AbstractMethodError(self) @@ -1734,14 +1736,15 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False, inplace : bool if True, perform operation in-place kind : {`quicksort`, `mergesort`, `heapsort`} - Choice of sorting algorithm. See also ndarray.np.sort for more information. - `mergesort` is the only stable algorithm. For DataFrames, this option is - only applied when sorting on a single column or label. + Choice of sorting algorithm. See also ndarray.np.sort for more + information. `mergesort` is the only stable algorithm. For + DataFrames, this option is only applied when sorting on a single + column or label. na_position : {'first', 'last'} `first` puts NaNs at the beginning, `last` puts NaNs at the end sort_remaining : bool - if true and sorting by level and index is multilevel, sort by other levels - too (in order) after sorting by specified level + if true and sorting by level and index is multilevel, sort by other + levels too (in order) after sorting by specified level Returns ------- @@ -1784,7 +1787,8 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, Please note: this is only applicable to DataFrames/Series with a monotonically increasing/decreasing index. * default: don't fill gaps - * pad / ffill: propagate last valid observation forward to next valid + * pad / ffill: propagate last valid observation forward to next + valid * backfill / bfill: use next valid observation to fill gap * nearest: use nearest valid observations to fill gap copy : boolean, default True @@ -1923,6 +1927,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, ------- reindexed : %(klass)s """ + # TODO: Decide if we care about having different examples for different # kinds @@ -1940,7 +1945,7 @@ def reindex(self, *args, **kwargs): if kwargs: raise TypeError('reindex() got an unexpected keyword ' - 'argument "{0}"'.format(list(kwargs.keys())[0])) + 'argument "{0}"'.format(list(kwargs.keys())[0])) self._consolidate_inplace() @@ -1960,12 +1965,12 @@ def reindex(self, *args, **kwargs): pass # perform the reindex on the axes - return self._reindex_axes(axes, level, limit, tolerance, - method, fill_value, copy).__finalize__(self) + return self._reindex_axes(axes, level, limit, tolerance, method, + fill_value, copy).__finalize__(self) - def _reindex_axes(self, axes, level, limit, tolerance, method, - fill_value, copy): - """ perform the reinxed for all the axes """ + def _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, + copy): + """Perform the reindex for all the axes.""" obj = self for a in self._AXIS_ORDERS: labels = axes[a] @@ -1973,30 +1978,29 @@ def _reindex_axes(self, axes, level, limit, tolerance, method, continue ax = self._get_axis(a) - new_index, indexer = ax.reindex( - labels, level=level, limit=limit, tolerance=tolerance, - method=method) + new_index, indexer = ax.reindex(labels, level=level, limit=limit, + tolerance=tolerance, method=method) axis = self._get_axis_number(a) - obj = obj._reindex_with_indexers( - {axis: [new_index, indexer]}, - fill_value=fill_value, copy=copy, allow_dups=False) + obj = obj._reindex_with_indexers({axis: [new_index, indexer]}, + fill_value=fill_value, + copy=copy, allow_dups=False) return obj def _needs_reindex_multi(self, axes, method, level): - """ check if we do need a multi reindex """ + """Check if we do need a multi reindex.""" return ((com._count_not_none(*axes.values()) == self._AXIS_LEN) and method is None and level is None and not self._is_mixed_type) def _reindex_multi(self, axes, copy, fill_value): return NotImplemented - _shared_docs['reindex_axis'] = ( - """Conform input object to new index with optional filling logic, - placing NA/NaN in locations having no value in the previous index. A - new object is produced unless the new index is equivalent to the - current one and copy=False + _shared_docs[ + 'reindex_axis'] = ("""Conform input object to new index with optional + filling logic, placing NA/NaN in locations having no value in the + previous index. A new object is produced unless the new index is + equivalent to the current one and copy=False Parameters ---------- @@ -2007,7 +2011,8 @@ def _reindex_multi(self, axes, copy, fill_value): method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}, optional Method to use for filling holes in reindexed DataFrame: * default: don't fill gaps - * pad / ffill: propagate last valid observation forward to next valid + * pad / ffill: propagate last valid observation forward to next + valid * backfill / bfill: use next valid observation to fill gap * nearest: use nearest valid observations to fill gap copy : boolean, default True @@ -2047,15 +2052,14 @@ def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True, method = mis._clean_reindex_fill_method(method) new_index, indexer = axis_values.reindex(labels, method, level, limit=limit) - return self._reindex_with_indexers( - {axis: [new_index, indexer]}, fill_value=fill_value, copy=copy) + return self._reindex_with_indexers({axis: [new_index, indexer]}, + fill_value=fill_value, copy=copy) - def _reindex_with_indexers(self, reindexers, - fill_value=np.nan, copy=False, + def _reindex_with_indexers(self, reindexers, fill_value=np.nan, copy=False, allow_dups=False): - """ allow_dups indicates an internal call here """ + """allow_dups indicates an internal call here """ - # reindex doing multiple operations on different axes if indiciated + # reindex doing multiple operations on different axes if indicated new_data = self._data for axis in sorted(reindexers.keys()): index, indexer = reindexers[axis] @@ -2119,11 +2123,11 @@ def filter(self, items=None, like=None, regex=None, axis=None): axis_values = self._get_axis(axis_name) if items is not None: - return self.reindex(**{axis_name: [r for r in items - if r in axis_values]}) + return self.reindex(**{axis_name: + [r for r in items if r in axis_values]}) elif like: - matchf = lambda x: (like in x if isinstance(x, string_types) - else like in str(x)) + matchf = lambda x: (like in x if isinstance(x, string_types) else + like in str(x)) return self.select(matchf, axis=axis_name) elif regex: matcher = re.compile(regex) @@ -2146,8 +2150,8 @@ def tail(self, n=5): return self.iloc[0:0] return self.iloc[-n:] - - def sample(self, n=None, frac=None, replace=False, weights=None, random_state=None, axis=None): + def sample(self, n=None, frac=None, replace=False, weights=None, + random_state=None, axis=None): """ Returns a random sample of items from an axis of object. @@ -2252,22 +2256,28 @@ def sample(self, n=None, frac=None, replace=False, weights=None, random_state=No try: weights = self[weights] except KeyError: - raise KeyError("String passed to weights not a valid column") + raise KeyError("String passed to weights not a " + "valid column") else: - raise ValueError("Strings can only be passed to weights when sampling from rows on a DataFrame") + raise ValueError("Strings can only be passed to " + "weights when sampling from rows on " + "a DataFrame") else: - raise ValueError("Strings cannot be passed as weights when sampling from a Series or Panel.") + raise ValueError("Strings cannot be passed as weights " + "when sampling from a Series or Panel.") weights = pd.Series(weights, dtype='float64') if len(weights) != axis_length: - raise ValueError("Weights and axis to be sampled must be of same length") + raise ValueError("Weights and axis to be sampled must be of " + "same length") if (weights == np.inf).any() or (weights == -np.inf).any(): raise ValueError("weight vector may not include `inf` values") if (weights < 0).any(): - raise ValueError("weight vector many not include negative values") + raise ValueError("weight vector many not include negative " + "values") # If has nan, set to zero. weights = weights.fillna(0) @@ -2289,16 +2299,17 @@ def sample(self, n=None, frac=None, replace=False, weights=None, random_state=No elif n is None and frac is not None: n = int(round(frac * axis_length)) elif n is not None and frac is not None: - raise ValueError('Please enter a value for `frac` OR `n`, not both') + raise ValueError('Please enter a value for `frac` OR `n`, not ' + 'both') # Check for negative sizes if n < 0: - raise ValueError("A negative number of rows requested. Please provide positive value.") + raise ValueError("A negative number of rows requested. Please " + "provide positive value.") locs = rs.choice(axis_length, size=n, replace=replace, p=weights) return self.take(locs, axis=axis, is_copy=False) - _shared_docs['pipe'] = (""" Apply func(self, \*args, \*\*kwargs) @@ -2348,26 +2359,26 @@ def sample(self, n=None, frac=None, replace=False, weights=None, random_state=No pandas.DataFrame.apply pandas.DataFrame.applymap pandas.Series.map - """ - ) + """) + @Appender(_shared_docs['pipe'] % _shared_doc_kwargs) def pipe(self, func, *args, **kwargs): if isinstance(func, tuple): func, target = func if target in kwargs: - msg = '%s is both the pipe target and a keyword argument' % target - raise ValueError(msg) + raise ValueError('%s is both the pipe target and a keyword ' + 'argument' % target) kwargs[target] = self return func(*args, **kwargs) else: return func(self, *args, **kwargs) - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # Attribute access def __finalize__(self, other, method=None, **kwargs): """ - propagate metadata from other to self + Propagate metadata from other to self. Parameters ---------- @@ -2386,12 +2397,12 @@ def __getattr__(self, name): """After regular attribute access, try looking up the name This allows simpler access to columns for interactive use. """ + # Note: obj.x will always call obj.__getattribute__('x') prior to # calling obj.__getattr__('x'). - if (name in self._internal_names_set - or name in self._metadata - or name in self._accessors): + if (name in self._internal_names_set or name in self._metadata or + name in self._accessors): return object.__getattribute__(self, name) else: if name in self._info_axis: @@ -2400,7 +2411,9 @@ def __getattr__(self, name): def __setattr__(self, name, value): """After regular attribute access, try setting the name - This allows simpler access to columns for interactive use.""" + This allows simpler access to columns for interactive use. + """ + # first try regular attribute access via __getattribute__, so that # e.g. ``obj.x`` and ``obj.x = 4`` will always reference/modify # the same attribute. @@ -2429,14 +2442,16 @@ def __setattr__(self, name, value): except (AttributeError, TypeError): object.__setattr__(self, name, value) - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # Getting and setting elements - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # Consolidation of internals def _protect_consolidate(self, f): - """ consolidate _data. if the blocks have changed, then clear the cache """ + """Consolidate _data -- if the blocks have changed, then clear the + cache + """ blocks_before = len(self._data.blocks) result = f() if len(self._data.blocks) != blocks_before: @@ -2444,9 +2459,11 @@ def _protect_consolidate(self, f): return result def _consolidate_inplace(self): - """ we are inplace consolidating; return None """ + """Consolidate data in place and return None""" + def f(): self._data = self._data.consolidate() + self._protect_consolidate(f) def consolidate(self, inplace=False): @@ -2499,8 +2516,8 @@ def _check_inplace_setting(self, value): except: pass - raise TypeError( - 'Cannot do inplace boolean setting on mixed-types with a non np.nan value') + raise TypeError('Cannot do inplace boolean setting on ' + 'mixed-types with a non np.nan value') return True @@ -2511,7 +2528,7 @@ def _get_numeric_data(self): def _get_bool_data(self): return self._constructor(self._data.get_bool_data()).__finalize__(self) - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # Internal Interface Methods def as_matrix(self, columns=None): @@ -2574,7 +2591,7 @@ def values(self): @property def _values(self): - """ internal implementation """ + """internal implementation""" return self.values @property @@ -2583,22 +2600,22 @@ def _get_values(self): return self.as_matrix() def get_values(self): - """ same as values (but handles sparseness conversions) """ + """same as values (but handles sparseness conversions)""" return self.as_matrix() def get_dtype_counts(self): - """ Return the counts of dtypes in this object """ + """Return the counts of dtypes in this object.""" from pandas import Series return Series(self._data.get_dtype_counts()) def get_ftype_counts(self): - """ Return the counts of ftypes in this object """ + """Return the counts of ftypes in this object.""" from pandas import Series return Series(self._data.get_ftype_counts()) @property def dtypes(self): - """ Return the dtypes in this object """ + """Return the dtypes in this object.""" from pandas import Series return Series(self._data.get_dtypes(), index=self._info_axis, dtype=np.object_) @@ -2648,7 +2665,7 @@ def as_blocks(self, copy=True): @property def blocks(self): - "Internal property, property synonym for as_blocks()" + """Internal property, property synonym for as_blocks()""" return self.as_blocks() def astype(self, dtype, copy=True, raise_on_error=True, **kwargs): @@ -2667,8 +2684,8 @@ def astype(self, dtype, copy=True, raise_on_error=True, **kwargs): casted : type of caller """ - mgr = self._data.astype( - dtype=dtype, copy=copy, raise_on_error=raise_on_error, **kwargs) + mgr = self._data.astype(dtype=dtype, copy=copy, + raise_on_error=raise_on_error, **kwargs) return self._constructor(mgr).__finalize__(self) def copy(self, deep=True): @@ -2714,11 +2731,9 @@ def _convert(self, datetime=False, numeric=False, timedelta=False, converted : same as input object """ return self._constructor( - self._data.convert(datetime=datetime, - numeric=numeric, - timedelta=timedelta, - coerce=coerce, - copy=copy)).__finalize__(self) + self._data.convert(datetime=datetime, numeric=numeric, + timedelta=timedelta, coerce=coerce, + copy=copy)).__finalize__(self) # TODO: Remove in 0.18 or 2017, which ever is sooner def convert_objects(self, convert_dates=True, convert_numeric=False, @@ -2757,20 +2772,20 @@ def convert_objects(self, convert_dates=True, convert_numeric=False, convert_timedeltas=convert_timedeltas, copy=copy)).__finalize__(self) - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # Filling NA's - _shared_docs['fillna'] = ( - """ + _shared_docs['fillna'] = (""" Fill NA/NaN values using the specified method Parameters ---------- value : scalar, dict, Series, or DataFrame - Value to use to fill holes (e.g. 0), alternately a dict/Series/DataFrame of - values specifying which value to use for each index (for a Series) or - column (for a DataFrame). (values not in the dict/Series/DataFrame will not be - filled). This value cannot be a list. + Value to use to fill holes (e.g. 0), alternately a + dict/Series/DataFrame of values specifying which value to use for + each index (for a Series) or column (for a DataFrame). (values not + in the dict/Series/DataFrame will not be filled). This value cannot + be a list. method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None Method to use for filling holes in reindexed Series pad / ffill: propagate last valid observation forward to next valid @@ -2799,8 +2814,7 @@ def convert_objects(self, convert_dates=True, convert_numeric=False, Returns ------- filled : %(klass)s - """ - ) + """) @Appender(_shared_docs['fillna'] % _shared_doc_kwargs) def fillna(self, value=None, method=None, axis=None, inplace=False, @@ -2833,9 +2847,8 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, # > 3d if self.ndim > 3: - raise NotImplementedError( - 'Cannot fillna with a method for > 3dims' - ) + raise NotImplementedError('Cannot fillna with a method for > ' + '3dims') # 3d elif self.ndim == 3: @@ -2847,12 +2860,9 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, # 2d or less method = mis._clean_fill_method(method) - new_data = self._data.interpolate(method=method, - axis=axis, - limit=limit, - inplace=inplace, - coerce=True, - downcast=downcast) + new_data = self._data.interpolate(method=method, axis=axis, + limit=limit, inplace=inplace, + coerce=True, downcast=downcast) else: if method is not None: raise ValueError('cannot specify both a fill method and value') @@ -2867,10 +2877,10 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, elif not com.is_list_like(value): pass else: - raise ValueError("invalid fill value with a %s" % type(value)) + raise ValueError("invalid fill value with a %s" % + type(value)) - new_data = self._data.fillna(value=value, - limit=limit, + new_data = self._data.fillna(value=value, limit=limit, inplace=inplace, downcast=downcast) @@ -2888,8 +2898,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, obj.fillna(v, limit=limit, inplace=True) return result elif not com.is_list_like(value): - new_data = self._data.fillna(value=value, - limit=limit, + new_data = self._data.fillna(value=value, limit=limit, inplace=inplace, downcast=downcast) elif isinstance(value, DataFrame) and self.ndim == 2: @@ -2903,12 +2912,12 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, return self._constructor(new_data).__finalize__(self) def ffill(self, axis=None, inplace=False, limit=None, downcast=None): - "Synonym for NDFrame.fillna(method='ffill')" + """Synonym for NDFrame.fillna(method='ffill')""" return self.fillna(method='ffill', axis=axis, inplace=inplace, limit=limit, downcast=downcast) def bfill(self, axis=None, inplace=False, limit=None, downcast=None): - "Synonym for NDFrame.fillna(method='bfill')" + """Synonym for NDFrame.fillna(method='bfill')""" return self.fillna(method='bfill', axis=axis, inplace=inplace, limit=limit, downcast=downcast) @@ -3085,8 +3094,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, if c in value and c in self: res[c] = res[c].replace(to_replace=src, value=value[c], - inplace=False, - regex=regex) + inplace=False, regex=regex) return None if inplace else res # {'A': NA} -> 0 @@ -3116,13 +3124,11 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, else: # [NA, ''] -> 0 new_data = self._data.replace(to_replace=to_replace, - value=value, - inplace=inplace, + value=value, inplace=inplace, regex=regex) elif to_replace is None: if not (com.is_re_compilable(regex) or - com.is_list_like(regex) or - is_dictlike(regex)): + com.is_list_like(regex) or is_dictlike(regex)): raise TypeError("'regex' must be a string or a compiled " "regular expression or a list or dict of " "strings or regular expressions, you " @@ -3139,14 +3145,14 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, for k, v in compat.iteritems(value): if k in self: new_data = new_data.replace(to_replace=to_replace, - value=v, - filter=[k], + value=v, filter=[k], inplace=inplace, regex=regex) elif not com.is_list_like(value): # NA -> 0 - new_data = self._data.replace(to_replace=to_replace, value=value, - inplace=inplace, regex=regex) + new_data = self._data.replace(to_replace=to_replace, + value=value, inplace=inplace, + regex=regex) else: msg = ('Invalid "to_replace" type: ' '{0!r}').format(type(to_replace).__name__) @@ -3162,8 +3168,8 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, """ Interpolate values according to different methods. - Please note that only ``method='linear'`` is supported for DataFrames/Series - with a MultiIndex. + Please note that only ``method='linear'`` is supported for + DataFrames/Series with a MultiIndex. Parameters ---------- @@ -3187,8 +3193,8 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, wrappers around the scipy interpolation methods of similar names. These use the actual numerical values of the index. See the scipy documentation for more on their behavior - `here `__ - `and here `__ + `here `__ # noqa + `and here `__ # noqa axis : {0, 1}, default 0 * 0: fill column-by-column @@ -3248,16 +3254,19 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, else: alt_ax = ax - if isinstance(_maybe_transposed_self.index, MultiIndex) and method != 'linear': + if (isinstance(_maybe_transposed_self.index, MultiIndex) and + method != 'linear'): raise ValueError("Only `method=linear` interpolation is supported " "on MultiIndexes.") - if _maybe_transposed_self._data.get_dtype_counts().get('object') == len(_maybe_transposed_self.T): + if _maybe_transposed_self._data.get_dtype_counts().get( + 'object') == len(_maybe_transposed_self.T): raise TypeError("Cannot interpolate with all NaNs.") # create/use the index if method == 'linear': - index = np.arange(len(_maybe_transposed_self._get_axis(alt_ax))) # prior default + # prior default + index = np.arange(len(_maybe_transposed_self._get_axis(alt_ax))) else: index = _maybe_transposed_self._get_axis(alt_ax) @@ -3265,17 +3274,13 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, raise NotImplementedError("Interpolation with NaNs in the index " "has not been implemented. Try filling " "those NaNs before interpolating.") - new_data = _maybe_transposed_self._data.interpolate( - method=method, - axis=ax, - index=index, - values=_maybe_transposed_self, - limit=limit, - limit_direction=limit_direction, - inplace=inplace, - downcast=downcast, - **kwargs - ) + data = _maybe_transposed_self._data + new_data = data.interpolate(method=method, axis=ax, index=index, + values=_maybe_transposed_self, limit=limit, + limit_direction=limit_direction, + inplace=inplace, downcast=downcast, + **kwargs) + if inplace: if axis == 1: new_data = self._constructor(new_data).T._data @@ -3286,12 +3291,12 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, res = res.T return res - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # Action Methods def isnull(self): """ - Return a boolean same-sized object indicating if the values are null + Return a boolean same-sized object indicating if the values are null. See also -------- @@ -3301,7 +3306,7 @@ def isnull(self): def notnull(self): """Return a boolean same-sized object indicating if the values are - not null + not null. See also -------- @@ -3311,7 +3316,7 @@ def notnull(self): def clip(self, lower=None, upper=None, out=None, axis=None): """ - Trim values at input threshold(s) + Trim values at input threshold(s). Parameters ---------- @@ -3373,7 +3378,7 @@ def clip(self, lower=None, upper=None, out=None, axis=None): def clip_upper(self, threshold, axis=None): """ - Return copy of input with values above given value(s) truncated + Return copy of input with values above given value(s) truncated. Parameters ---------- @@ -3397,7 +3402,7 @@ def clip_upper(self, threshold, axis=None): def clip_lower(self, threshold, axis=None): """ - Return copy of the input with values below given value(s) truncated + Return copy of the input with values below given value(s) truncated. Parameters ---------- @@ -3423,7 +3428,7 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, group_keys=True, squeeze=False): """ Group series using mapper (dict or key function, apply given function - to group, return result as series) or by a series of columns + to group, return result as series) or by a series of columns. Parameters ---------- @@ -3442,8 +3447,8 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, effectively "SQL-style" grouped output sort : boolean, default True Sort group keys. Get better performance by turning this off. - Note this does not influence the order of observations within each group. - groupby preserves the order of rows within each group. + Note this does not influence the order of observations within each + group. groupby preserves the order of rows within each group. group_keys : boolean, default True When calling apply, add group keys to index to identify pieces squeeze : boolean, default False @@ -3496,12 +3501,11 @@ def asfreq(self, freq, method=None, how=None, normalize=False): converted : type of caller """ from pandas.tseries.resample import asfreq - return asfreq(self, freq, method=method, how=how, - normalize=normalize) + return asfreq(self, freq, method=method, how=how, normalize=normalize) def at_time(self, time, asof=False): """ - Select values at particular time of day (e.g. 9:30AM) + Select values at particular time of day (e.g. 9:30AM). Parameters ---------- @@ -3520,7 +3524,7 @@ def at_time(self, time, asof=False): def between_time(self, start_time, end_time, include_start=True, include_end=True): """ - Select values between particular times of the day (e.g., 9:00-9:30 AM) + Select values between particular times of the day (e.g., 9:00-9:30 AM). Parameters ---------- @@ -3541,9 +3545,9 @@ def between_time(self, start_time, end_time, include_start=True, except AttributeError: raise TypeError('Index must be DatetimeIndex') - def resample(self, rule, how=None, axis=0, fill_method=None, - closed=None, label=None, convention='start', - kind=None, loffset=None, limit=None, base=0): + def resample(self, rule, how=None, axis=0, fill_method=None, closed=None, + label=None, convention='start', kind=None, loffset=None, + limit=None, base=0): """ Convenience method for frequency conversion and resampling of regular time-series data. @@ -3684,7 +3688,7 @@ def resample(self, rule, how=None, axis=0, fill_method=None, def first(self, offset): """ Convenience method for subsetting initial periods of time series data - based on a date offset + based on a date offset. Parameters ---------- @@ -3719,7 +3723,7 @@ def first(self, offset): def last(self, offset): """ Convenience method for subsetting final periods of time series data - based on a date offset + based on a date offset. Parameters ---------- @@ -3747,8 +3751,7 @@ def last(self, offset): start = self.index.searchsorted(start_date, side='right') return self.ix[start:] - _shared_docs['align'] = ( - """ + _shared_docs['align'] = (""" Align two object on their axes with the specified join method for each axis Index @@ -3781,8 +3784,7 @@ def last(self, offset): ------- (left, right) : (%(klass)s, type of other) Aligned objects - """ - ) + """) @Appender(_shared_docs['align'] % _shared_doc_kwargs) def align(self, other, join='outer', axis=None, level=None, copy=True, @@ -3793,15 +3795,18 @@ def align(self, other, join='outer', axis=None, level=None, copy=True, if broadcast_axis == 1 and self.ndim != other.ndim: if isinstance(self, Series): - # this means other is a DataFrame, and we need to broadcast self - df = DataFrame(dict((c, self) for c in other.columns), - **other._construct_axes_dict()) - return df._align_frame(other, join=join, axis=axis, level=level, - copy=copy, fill_value=fill_value, - method=method, limit=limit, - fill_axis=fill_axis) + # this means other is a DataFrame, and we need to broadcast + # self + df = DataFrame( + dict((c, self) for c in other.columns), + **other._construct_axes_dict()) + return df._align_frame(other, join=join, axis=axis, + level=level, copy=copy, + fill_value=fill_value, method=method, + limit=limit, fill_axis=fill_axis) elif isinstance(other, Series): - # this means self is a DataFrame, and we need to broadcast other + # this means self is a DataFrame, and we need to broadcast + # other df = DataFrame(dict((c, other) for c in self.columns), **self._construct_axes_dict()) return self._align_frame(df, join=join, axis=axis, level=level, @@ -3834,15 +3839,13 @@ def _align_frame(self, other, join='outer', axis=None, level=None, if axis is None or axis == 0: if not self.index.equals(other.index): - join_index, ilidx, iridx = \ - self.index.join(other.index, how=join, level=level, - return_indexers=True) + join_index, ilidx, iridx = self.index.join( + other.index, how=join, level=level, return_indexers=True) if axis is None or axis == 1: if not self.columns.equals(other.columns): - join_columns, clidx, cridx = \ - self.columns.join(other.columns, how=join, level=level, - return_indexers=True) + join_columns, clidx, cridx = self.columns.join( + other.columns, how=join, level=level, return_indexers=True) left = self._reindex_with_indexers({0: [join_index, ilidx], 1: [join_columns, clidx]}, @@ -3871,7 +3874,7 @@ def _align_series(self, other, join='outer', axis=None, level=None, 'axis 0') # equal - if self.index.equals(other.index): + if self.index.equals(other.index): join_index, lidx, ridx = None, None, None else: join_index, lidx, ridx = self.index.join(other.index, how=join, @@ -3888,9 +3891,9 @@ def _align_series(self, other, join='outer', axis=None, level=None, join_index = self.index lidx, ridx = None, None if not self.index.equals(other.index): - join_index, lidx, ridx = \ - self.index.join(other.index, how=join, level=level, - return_indexers=True) + join_index, lidx, ridx = self.index.join( + other.index, how=join, level=level, + return_indexers=True) if lidx is not None: fdata = fdata.reindex_indexer(join_index, lidx, axis=1) @@ -3899,9 +3902,9 @@ def _align_series(self, other, join='outer', axis=None, level=None, join_index = self.columns lidx, ridx = None, None if not self.columns.equals(other.index): - join_index, lidx, ridx = \ - self.columns.join(other.index, how=join, level=level, - return_indexers=True) + join_index, lidx, ridx = self.columns.join( + other.index, how=join, level=level, + return_indexers=True) if lidx is not None: fdata = fdata.reindex_indexer(join_index, lidx, axis=0) @@ -3921,13 +3924,15 @@ def _align_series(self, other, join='outer', axis=None, level=None, # fill fill_na = notnull(fill_value) or (method is not None) if fill_na: - left = left.fillna(fill_value, method=method, limit=limit, axis=fill_axis) + left = left.fillna(fill_value, method=method, limit=limit, + axis=fill_axis) right = right.fillna(fill_value, method=method, limit=limit) - return (left.__finalize__(self), right.__finalize__(other)) + return left.__finalize__(self), right.__finalize__(other) _shared_docs['where'] = (""" Return an object of same shape as self and whose corresponding - entries are from self where cond is %(cond)s and otherwise are from other. + entries are from self where cond is %(cond)s and otherwise are from + other. Parameters ---------- @@ -3947,6 +3952,7 @@ def _align_series(self, other, join='outer', axis=None, level=None, ------- wh : same type as caller """) + @Appender(_shared_docs['where'] % dict(_shared_doc_kwargs, cond="True")) def where(self, cond, other=np.nan, inplace=False, axis=None, level=None, try_cast=False, raise_on_error=True): @@ -3958,8 +3964,8 @@ def where(self, cond, other=np.nan, inplace=False, axis=None, level=None, raise ValueError('where requires an ndarray like object for ' 'its condition') if cond.shape != self.shape: - raise ValueError( - 'Array conditional must be same shape as self') + raise ValueError('Array conditional must be same shape as ' + 'self') cond = self._constructor(cond, **self._construct_axes_dict()) if inplace: @@ -3974,9 +3980,8 @@ def where(self, cond, other=np.nan, inplace=False, axis=None, level=None, # align with me if other.ndim <= self.ndim: - _, other = self.align(other, join='left', - axis=axis, level=level, - fill_value=np.nan) + _, other = self.align(other, join='left', axis=axis, + level=level, fill_value=np.nan) # if we are NOT aligned, raise as we cannot where index if (axis is None and @@ -3986,9 +3991,8 @@ def where(self, cond, other=np.nan, inplace=False, axis=None, level=None, # slice me out of the other else: - raise NotImplemented( - "cannot align with a higher dimensional NDFrame" - ) + raise NotImplemented("cannot align with a higher dimensional " + "NDFrame") elif is_list_like(other): @@ -4018,7 +4022,9 @@ def where(self, cond, other=np.nan, inplace=False, axis=None, level=None, other = np.array(other) else: other = np.asarray(other) - other = np.asarray(other, dtype=np.common_type(other, new_other)) + other = np.asarray(other, + dtype=np.common_type(other, + new_other)) # we need to use the new dtype try_quick = False @@ -4066,8 +4072,8 @@ def where(self, cond, other=np.nan, inplace=False, axis=None, level=None, other = new_other else: - raise ValueError( - 'Length of replacements must equal series length') + raise ValueError('Length of replacements must equal ' + 'series length') else: raise ValueError('other must be the same shape as self ' @@ -4109,7 +4115,8 @@ def where(self, cond, other=np.nan, inplace=False, axis=None, level=None, def mask(self, cond, other=np.nan, inplace=False, axis=None, level=None, try_cast=False, raise_on_error=True): return self.where(~cond, other=other, inplace=inplace, axis=axis, - level=level, try_cast=try_cast, raise_on_error=raise_on_error) + level=level, try_cast=try_cast, + raise_on_error=raise_on_error) _shared_docs['shift'] = (""" Shift index by desired number of periods with an optional time freq @@ -4133,6 +4140,7 @@ def mask(self, cond, other=np.nan, inplace=False, axis=None, level=None, ------- shifted : %(klass)s """) + @Appender(_shared_docs['shift'] % _shared_doc_kwargs) def shift(self, periods=1, freq=None, axis=0): if periods == 0: @@ -4184,7 +4192,7 @@ def slice_shift(self, periods=1, axis=0): def tshift(self, periods=1, freq=None, axis=0): """ - Shift the time index, using the index's frequency if available + Shift the time index, using the index's frequency if available. Parameters ---------- @@ -4317,10 +4325,10 @@ def _tz_convert(ax, tz): if not hasattr(ax, 'tz_convert'): if len(ax) > 0: ax_name = self._get_axis_name(axis) - raise TypeError('%s is not a valid DatetimeIndex or PeriodIndex' % - ax_name) + raise TypeError('%s is not a valid DatetimeIndex or ' + 'PeriodIndex' % ax_name) else: - ax = DatetimeIndex([],tz=tz) + ax = DatetimeIndex([], tz=tz) else: ax = ax.tz_convert(tz) return ax @@ -4334,18 +4342,19 @@ def _tz_convert(ax, tz): else: if level not in (None, 0, ax.name): raise ValueError("The level {0} is not valid".format(level)) - ax = _tz_convert(ax, tz) + ax = _tz_convert(ax, tz) result = self._constructor(self._data, copy=copy) - result.set_axis(axis,ax) + result.set_axis(axis, ax) return result.__finalize__(self) @deprecate_kwarg(old_arg_name='infer_dst', new_arg_name='ambiguous', - mapping={True: 'infer', False: 'raise'}) + mapping={True: 'infer', + False: 'raise'}) def tz_localize(self, tz, axis=0, level=None, copy=True, ambiguous='raise'): """ - Localize tz-naive TimeSeries to target time zone + Localize tz-naive TimeSeries to target time zone. Parameters ---------- @@ -4357,11 +4366,14 @@ def tz_localize(self, tz, axis=0, level=None, copy=True, copy : boolean, default True Also make a copy of the underlying data ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' - - 'infer' will attempt to infer fall dst-transition hours based on order + - 'infer' will attempt to infer fall dst-transition hours based on + order - bool-ndarray where True signifies a DST time, False designates - a non-DST time (note that this flag is only applicable for ambiguous times) + a non-DST time (note that this flag is only applicable for + ambiguous times) - 'NaT' will return NaT where there are ambiguous times - - 'raise' will raise an AmbiguousTimeError if there are ambiguous times + - 'raise' will raise an AmbiguousTimeError if there are ambiguous + times infer_dst : boolean, default False (DEPRECATED) Attempt to infer fall dst-transition hours based on order @@ -4380,10 +4392,10 @@ def _tz_localize(ax, tz, ambiguous): if not hasattr(ax, 'tz_localize'): if len(ax) > 0: ax_name = self._get_axis_name(axis) - raise TypeError('%s is not a valid DatetimeIndex or PeriodIndex' % - ax_name) + raise TypeError('%s is not a valid DatetimeIndex or ' + 'PeriodIndex' % ax_name) else: - ax = DatetimeIndex([],tz=tz) + ax = DatetimeIndex([], tz=tz) else: ax = ax.tz_localize(tz, ambiguous=ambiguous) return ax @@ -4397,18 +4409,18 @@ def _tz_localize(ax, tz, ambiguous): else: if level not in (None, 0, ax.name): raise ValueError("The level {0} is not valid".format(level)) - ax = _tz_localize(ax, tz, ambiguous) + ax = _tz_localize(ax, tz, ambiguous) result = self._constructor(self._data, copy=copy) - result.set_axis(axis,ax) + result.set_axis(axis, ax) return result.__finalize__(self) - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # Numeric Methods def abs(self): """ - Return an object with absolute value taken. Only applicable to objects - that are all numeric + Return an object with absolute value taken--only applicable to objects + that are all numeric. Returns ------- @@ -4428,8 +4440,8 @@ def abs(self): include, exclude : list-like, 'all', or None (default) Specify the form of the returned result. Either: - - None to both (default). The result will include only numeric-typed - columns or, if none are, only categorical columns. + - None to both (default). The result will include only + numeric-typed columns or, if none are, only categorical columns. - A list of dtypes or strings to be included/excluded. To select all numeric types use numpy numpy.number. To select categorical objects use type object. See also the select_dtypes @@ -4469,7 +4481,7 @@ def abs(self): """ @Appender(_shared_docs['describe'] % _shared_doc_kwargs) - def describe(self, percentiles=None, include=None, exclude=None ): + def describe(self, percentiles=None, include=None, exclude=None): if self.ndim >= 3: msg = "describe is not implemented on on Panel or PanelND objects." raise NotImplementedError(msg) @@ -4496,20 +4508,20 @@ def pretty_name(x): def describe_numeric_1d(series, percentiles): stat_index = (['count', 'mean', 'std', 'min'] + - [pretty_name(x) for x in percentiles] + ['max']) + [pretty_name(x) for x in percentiles] + ['max']) d = ([series.count(), series.mean(), series.std(), series.min()] + [series.quantile(x) for x in percentiles] + [series.max()]) return pd.Series(d, index=stat_index, name=series.name) - def describe_categorical_1d(data): names = ['count', 'unique'] objcounts = data.value_counts() - result = [data.count(), len(objcounts[objcounts!=0])] + result = [data.count(), len(objcounts[objcounts != 0])] if result[1] > 0: top, freq = objcounts.index[0], objcounts.iloc[0] - if data.dtype == object or com.is_categorical_dtype(data.dtype): + if (data.dtype == object or + com.is_categorical_dtype(data.dtype)): names += ['top', 'freq'] result += [top, freq] @@ -4559,7 +4571,7 @@ def describe_1d(data, percentiles): return d def _check_percentile(self, q): - """ Validate percentiles. Used by describe and quantile """ + """Validate percentiles (used by describe and quantile).""" msg = ("percentiles should all be in the interval [0, 1]. " "Try {0} instead.") @@ -4608,8 +4620,8 @@ def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None, else: data = self.fillna(method=fill_method, limit=limit, axis=axis) - rs = (data.div(data.shift(periods=periods, freq=freq, - axis=axis, **kwargs)) - 1) + rs = (data.div(data.shift(periods=periods, freq=freq, axis=axis, + **kwargs)) - 1) if freq is None: mask = com.isnull(_values_from_object(self)) np.putmask(rs.values, mask, np.nan) @@ -4626,7 +4638,7 @@ def _agg_by_level(self, name, axis=0, level=0, skipna=True, **kwargs): @classmethod def _add_numeric_operations(cls): - """ add the operations to the cls; evaluate the doc strings again """ + """Add the operations to the cls; evaluate the doc strings again""" axis_descr, name, name2 = _doc_parms(cls) @@ -4642,11 +4654,9 @@ def _add_numeric_operations(cls): @Substitution(outname='mad', desc="Return the mean absolute deviation of the values " "for the requested axis", - name1=name, - name2=name2, - axis_descr=axis_descr) + name1=name, name2=name2, axis_descr=axis_descr) @Appender(_num_doc) - def mad(self, axis=None, skipna=None, level=None): + def mad(self, axis=None, skipna=None, level=None): if skipna is None: skipna = True if axis is None: @@ -4661,58 +4671,51 @@ def mad(self, axis=None, skipna=None, level=None): else: demeaned = data.sub(data.mean(axis=1), axis=0) return np.abs(demeaned).mean(axis=axis, skipna=skipna) + cls.mad = mad cls.sem = _make_stat_function_ddof( 'sem', name, name2, axis_descr, - "Return unbiased standard error of the mean over " - "requested axis.\n\nNormalized by N-1 by default. " - "This can be changed using the ddof argument", + "Return unbiased standard error of the mean over requested " + "axis.\n\nNormalized by N-1 by default. This can be changed " + "using the ddof argument", nanops.nansem) cls.var = _make_stat_function_ddof( 'var', name, name2, axis_descr, - "Return unbiased variance over requested " - "axis.\n\nNormalized by N-1 by default. " - "This can be changed using the ddof argument", + "Return unbiased variance over requested axis.\n\nNormalized by " + "N-1 by default. This can be changed using the ddof argument", nanops.nanvar) cls.std = _make_stat_function_ddof( 'std', name, name2, axis_descr, - "Return unbiased standard deviation over requested " - "axis.\n\nNormalized by N-1 by default. " - "This can be changed using the ddof argument", + "Return unbiased standard deviation over requested axis." + "\n\nNormalized by N-1 by default. This can be changed using the " + "ddof argument", nanops.nanstd) @Substitution(outname='compounded', desc="Return the compound percentage of the values for " - "the requested axis", - name1=name, - name2=name2, + "the requested axis", name1=name, name2=name2, axis_descr=axis_descr) @Appender(_num_doc) def compound(self, axis=None, skipna=None, level=None): if skipna is None: skipna = True return (1 + self).prod(axis=axis, skipna=skipna, level=level) - 1 + cls.compound = compound cls.cummin = _make_cum_function( - 'min', name, name2, axis_descr, - "cumulative minimum", - lambda y, axis: np.minimum.accumulate(y, axis), - np.inf, np.nan) + 'min', name, name2, axis_descr, "cumulative minimum", + lambda y, axis: np.minimum.accumulate(y, axis), np.inf, np.nan) cls.cumsum = _make_cum_function( - 'sum', name, name2, axis_descr, - "cumulative sum", + 'sum', name, name2, axis_descr, "cumulative sum", lambda y, axis: y.cumsum(axis), 0., np.nan) cls.cumprod = _make_cum_function( - 'prod', name, name2, axis_descr, - "cumulative product", + 'prod', name, name2, axis_descr, "cumulative product", lambda y, axis: y.cumprod(axis), 1., np.nan) cls.cummax = _make_cum_function( - 'max', name, name2, axis_descr, - "cumulative max", - lambda y, axis: np.maximum.accumulate(y, axis), - -np.inf, np.nan) + 'max', name, name2, axis_descr, "cumulative max", + lambda y, axis: np.maximum.accumulate(y, axis), -np.inf, np.nan) cls.sum = _make_stat_function( 'sum', name, name2, axis_descr, @@ -4728,9 +4731,9 @@ def compound(self, axis=None, skipna=None, level=None): nanops.nanskew) cls.kurt = _make_stat_function( 'kurt', name, name2, axis_descr, - 'Return unbiased kurtosis over requested axis using Fisher''s ' - 'definition of\nkurtosis (kurtosis of normal == 0.0). Normalized ' - 'by N-1\n', + "Return unbiased kurtosis over requested axis using Fisher's " + "definition of\nkurtosis (kurtosis of normal == 0.0). Normalized " + "by N-1\n", nanops.nankurt) cls.kurtosis = cls.kurt cls.prod = _make_stat_function( @@ -4742,20 +4745,24 @@ def compound(self, axis=None, skipna=None, level=None): 'median', name, name2, axis_descr, 'Return the median of the values for the requested axis', nanops.nanmedian) - cls.max = _make_stat_function('max', name, name2, axis_descr, - """This method returns the maximum of the values in the object. If you - want the *index* of the maximum, use ``idxmax``. This is the - equivalent of the ``numpy.ndarray`` method ``argmax``.""", - nanops.nanmax) - cls.min = _make_stat_function('min', name, name2, axis_descr, - """This method returns the minimum of the values in the object. If you - want the *index* of the minimum, use ``idxmin``. This is the - equivalent of the ``numpy.ndarray`` method ``argmin``.""", - nanops.nanmin) + cls.max = _make_stat_function( + 'max', name, name2, axis_descr, + """This method returns the maximum of the values in the object. + If you want the *index* of the maximum, use ``idxmax``. This is + the equivalent of the ``numpy.ndarray`` method ``argmax``.""", + nanops.nanmax) + cls.min = _make_stat_function( + 'min', name, name2, axis_descr, + """This method returns the minimum of the values in the object. + If you want the *index* of the minimum, use ``idxmin``. This is + the equivalent of the ``numpy.ndarray`` method ``argmin``.""", + nanops.nanmin) @classmethod def _add_series_only_operations(cls): - """ add the series only operations to the cls; evaluate the doc strings again """ + """Add the series only operations to the cls; evaluate the doc + strings again. + """ axis_descr, name, name2 = _doc_parms(cls) @@ -4764,16 +4771,18 @@ def nanptp(values, axis=0, skipna=True): nmin = nanops.nanmin(values, axis, skipna) return nmax - nmin - cls.ptp = _make_stat_function('ptp', name, name2, axis_descr, - """ - Returns the difference between the maximum value and the minimum - value in the object. This is the equivalent of the ``numpy.ndarray`` - method ``ptp``.""", nanptp) - + cls.ptp = _make_stat_function( + 'ptp', name, name2, axis_descr, + """Returns the difference between the maximum value and the + minimum value in the object. This is the equivalent of the + ``numpy.ndarray`` method ``ptp``.""", + nanptp) @classmethod def _add_series_or_dataframe_operations(cls): - """ add the series or dataframe only operations to the cls; evaluate the doc strings again """ + """Add the series or dataframe only operations to the cls; evaluate + the doc strings again. + """ from pandas.core import window as rwindow @@ -4781,35 +4790,41 @@ def _add_series_or_dataframe_operations(cls): def rolling(self, window, min_periods=None, freq=None, center=False, win_type=None, axis=0): axis = self._get_axis_number(axis) - return rwindow.rolling(self, window=window, min_periods=min_periods, freq=freq, center=center, - win_type=win_type, axis=axis) + return rwindow.rolling(self, window=window, + min_periods=min_periods, freq=freq, + center=center, win_type=win_type, axis=axis) + cls.rolling = rolling @Appender(rwindow.expanding.__doc__) def expanding(self, min_periods=1, freq=None, center=False, axis=0): axis = self._get_axis_number(axis) - return rwindow.expanding(self, min_periods=min_periods, freq=freq, center=center, - axis=axis) + return rwindow.expanding(self, min_periods=min_periods, freq=freq, + center=center, axis=axis) + cls.expanding = expanding @Appender(rwindow.ewm.__doc__) - def ewm(self, com=None, span=None, halflife=None, min_periods=0, freq=None, - adjust=True, ignore_na=False, axis=0): + def ewm(self, com=None, span=None, halflife=None, min_periods=0, + freq=None, adjust=True, ignore_na=False, axis=0): axis = self._get_axis_number(axis) - return rwindow.ewm(self, com=com, span=span, halflife=halflife, min_periods=min_periods, - freq=freq, adjust=adjust, ignore_na=ignore_na, axis=axis) + return rwindow.ewm(self, com=com, span=span, halflife=halflife, + min_periods=min_periods, freq=freq, + adjust=adjust, ignore_na=ignore_na, axis=axis) + cls.ewm = ewm + def _doc_parms(cls): - """ return a tuple of the doc parms """ - axis_descr = "{%s}" % ', '.join([ - "{0} ({1})".format(a, i) for i, a in enumerate(cls._AXIS_ORDERS) - ]) + """Return a tuple of the doc parms.""" + axis_descr = "{%s}" % ', '.join(["{0} ({1})".format(a, i) + for i, a in enumerate(cls._AXIS_ORDERS)]) name = (cls._constructor_sliced.__name__ if cls._AXIS_LEN > 1 else 'scalar') name2 = cls.__name__ return axis_descr, name, name2 + _num_doc = """ %(desc)s @@ -4888,12 +4903,13 @@ def _doc_parms(cls): ------- %(outname)s : %(name1)s\n""" -def _make_stat_function(name, name1, name2, axis_descr, desc, f): - @Substitution(outname=name, desc=desc, name1=name1, name2=name2, axis_descr=axis_descr) +def _make_stat_function(name, name1, name2, axis_descr, desc, f): + @Substitution(outname=name, desc=desc, name1=name1, name2=name2, + axis_descr=axis_descr) @Appender(_num_doc) - def stat_func(self, axis=None, skipna=None, level=None, - numeric_only=None, **kwargs): + def stat_func(self, axis=None, skipna=None, level=None, numeric_only=None, + **kwargs): if skipna is None: skipna = True if axis is None: @@ -4901,14 +4917,16 @@ def stat_func(self, axis=None, skipna=None, level=None, if level is not None: return self._agg_by_level(name, axis=axis, level=level, skipna=skipna) - return self._reduce(f, name, axis=axis, - skipna=skipna, numeric_only=numeric_only) + return self._reduce(f, name, axis=axis, skipna=skipna, + numeric_only=numeric_only) + stat_func.__name__ = name return stat_func -def _make_stat_function_ddof(name, name1, name2, axis_descr, desc, f): - @Substitution(outname=name, desc=desc, name1=name1, name2=name2, axis_descr=axis_descr) +def _make_stat_function_ddof(name, name1, name2, axis_descr, desc, f): + @Substitution(outname=name, desc=desc, name1=name1, name2=name2, + axis_descr=axis_descr) @Appender(_num_ddof_doc) def stat_func(self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None, **kwargs): @@ -4919,19 +4937,20 @@ def stat_func(self, axis=None, skipna=None, level=None, ddof=1, if level is not None: return self._agg_by_level(name, axis=axis, level=level, skipna=skipna, ddof=ddof) - return self._reduce(f, name, axis=axis, - numeric_only=numeric_only, + return self._reduce(f, name, axis=axis, numeric_only=numeric_only, skipna=skipna, ddof=ddof) + stat_func.__name__ = name return stat_func -def _make_cum_function(name, name1, name2, axis_descr, desc, accum_func, mask_a, mask_b): - @Substitution(outname=name, desc=desc, name1=name1, name2=name2, axis_descr=axis_descr) - @Appender("Return cumulative {0} over requested axis.".format(name) - + _cnum_doc) - def func(self, axis=None, dtype=None, out=None, skipna=True, - **kwargs): +def _make_cum_function(name, name1, name2, axis_descr, desc, accum_func, + mask_a, mask_b): + @Substitution(outname=name, desc=desc, name1=name1, name2=name2, + axis_descr=axis_descr) + @Appender("Return cumulative {0} over requested axis.".format(name) + + _cnum_doc) + def func(self, axis=None, dtype=None, out=None, skipna=True, **kwargs): if axis is None: axis = self._stat_axis_number else: @@ -4939,8 +4958,8 @@ def func(self, axis=None, dtype=None, out=None, skipna=True, y = _values_from_object(self).copy() - if skipna and issubclass(y.dtype.type, - (np.datetime64, np.timedelta64)): + if (skipna and + issubclass(y.dtype.type, (np.datetime64, np.timedelta64))): result = accum_func(y, axis) mask = isnull(self) np.putmask(result, mask, pd.tslib.iNaT) @@ -4959,26 +4978,27 @@ def func(self, axis=None, dtype=None, out=None, skipna=True, func.__name__ = name return func -def _make_logical_function(name, name1, name2, axis_descr, desc, f): - @Substitution(outname=name, desc=desc, name1=name1, name2=name2, axis_descr=axis_descr) +def _make_logical_function(name, name1, name2, axis_descr, desc, f): + @Substitution(outname=name, desc=desc, name1=name1, name2=name2, + axis_descr=axis_descr) @Appender(_bool_doc) - def logical_func(self, axis=None, bool_only=None, skipna=None, - level=None, **kwargs): + def logical_func(self, axis=None, bool_only=None, skipna=None, level=None, + **kwargs): if skipna is None: skipna = True if axis is None: axis = self._stat_axis_number if level is not None: if bool_only is not None: - raise NotImplementedError( - "Option bool_only is not implemented with option " - "level.") + raise NotImplementedError("Option bool_only is not " + "implemented with option level.") return self._agg_by_level(name, axis=axis, level=level, - skipna=skipna) + skipna=skipna) return self._reduce(f, axis=axis, skipna=skipna, numeric_only=bool_only, filter_type='bool', name=name) + logical_func.__name__ = name return logical_func diff --git a/pandas/core/index.py b/pandas/core/index.py index 63b748ada6afa..e4a56f7a5f0bd 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -37,21 +37,16 @@ from pandas.core.config import get_option from pandas.io.common import PerformanceWarning - # simplify -default_pprint = lambda x, max_seq_items=None: com.pprint_thing(x, - escape_chars=('\t', '\r', '\n'), - quote_strings=True, - max_seq_items=max_seq_items) - +default_pprint = lambda x, max_seq_items=None: \ + com.pprint_thing(x, escape_chars=('\t', '\r', '\n'), quote_strings=True, + max_seq_items=max_seq_items) __all__ = ['Index'] - _unsortable_types = frozenset(('mixed', 'mixed-integer')) -_index_doc_kwargs = dict(klass='Index', inplace='', - duplicated='np.array') +_index_doc_kwargs = dict(klass='Index', inplace='', duplicated='np.array') _index_shared_docs = dict() @@ -61,19 +56,23 @@ def _try_get_item(x): except AttributeError: return x + class InvalidIndexError(Exception): pass + _o_dtype = np.dtype(object) _Identity = object + def _new_Index(cls, d): - """ This is called upon unpickling, rather than the default which doesn't have arguments - and breaks __new__ """ + """ This is called upon unpickling, rather than the default which doesn't + have arguments and breaks __new__ + """ return cls.__new__(cls, **d) -class Index(IndexOpsMixin, StringAccessorMixin, PandasObject): +class Index(IndexOpsMixin, StringAccessorMixin, PandasObject): """ Immutable ndarray implementing an ordered, sliceable set. The basic object storing axis labels for all pandas objects @@ -124,8 +123,8 @@ class Index(IndexOpsMixin, StringAccessorMixin, PandasObject): _engine_type = _index.ObjectEngine - def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, - tupleize_cols=True, **kwargs): + def __new__(cls, data=None, dtype=None, copy=False, name=None, + fastpath=False, tupleize_cols=True, **kwargs): if name is None and hasattr(data, 'name'): name = data.name @@ -147,9 +146,8 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, # index-like elif isinstance(data, (np.ndarray, Index, ABCSeries)): - if issubclass(data.dtype.type, - np.datetime64) or is_datetimetz(data): - + if (issubclass(data.dtype.type, np.datetime64) or + is_datetimetz(data)): from pandas.tseries.index import DatetimeIndex result = DatetimeIndex(data, copy=copy, name=name, **kwargs) if dtype is not None and _o_dtype == dtype: @@ -192,7 +190,8 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, if dtype is None: inferred = lib.infer_dtype(subarr) if inferred == 'integer': - return Int64Index(subarr.astype('i8'), copy=copy, name=name) + return Int64Index(subarr.astype('i8'), copy=copy, + name=name) elif inferred in ['floating', 'mixed-integer-float']: return Float64Index(subarr, copy=copy, name=name) elif inferred == 'boolean': @@ -200,18 +199,20 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, pass elif inferred != 'string': if (inferred.startswith('datetime') or - tslib.is_timestamp_array(subarr)): + tslib.is_timestamp_array(subarr)): if (lib.is_datetime_with_singletz_array(subarr) or - 'tz' in kwargs): + 'tz' in kwargs): # only when subarr has the same tz from pandas.tseries.index import DatetimeIndex - return DatetimeIndex(subarr, copy=copy, name=name, **kwargs) + return DatetimeIndex(subarr, copy=copy, name=name, + **kwargs) elif (inferred.startswith('timedelta') or lib.is_timedelta_array(subarr)): from pandas.tseries.tdi import TimedeltaIndex - return TimedeltaIndex(subarr, copy=copy, name=name, **kwargs) + return TimedeltaIndex(subarr, copy=copy, name=name, + **kwargs) elif inferred == 'period': return PeriodIndex(subarr, name=name, **kwargs) return cls._simple_new(subarr, name) @@ -222,17 +223,18 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, elif data is None or np.isscalar(data): cls._scalar_data_error(data) else: - if tupleize_cols and isinstance(data, list) and data and isinstance(data[0], tuple): + if (tupleize_cols and isinstance(data, list) and data and + isinstance(data[0], tuple)): # we must be all tuples, otherwise don't construct # 10697 - if all( isinstance(e, tuple) for e in data ): + if all(isinstance(e, tuple) for e in data): try: # must be orderable in py3 if compat.PY3: sorted(data) - return MultiIndex.from_tuples( - data, names=name or kwargs.get('names')) + return MultiIndex.from_tuples(data, names=name or + kwargs.get('names')) except (TypeError, KeyError): # python2 - MultiIndex fails on mixed types pass @@ -245,11 +247,12 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, - _simple_new: It returns new Index with the same type as the caller. All metadata (such as name) must be provided by caller's responsibility. - Using _shallow_copy is recommended because it fills these metadata otherwise specified. + Using _shallow_copy is recommended because it fills these metadata + otherwise specified. - - _shallow_copy: It returns new Index with the same type (using _simple_new), - but fills caller's metadata otherwise specified. Passed kwargs will - overwrite corresponding metadata. + - _shallow_copy: It returns new Index with the same type (using + _simple_new), but fills caller's metadata otherwise specified. Passed + kwargs will overwrite corresponding metadata. - _shallow_copy_with_infer: It returns new Index inferring its type from passed values. It fills caller's metadata otherwise specified as the @@ -270,22 +273,24 @@ def _simple_new(cls, values, name=None, dtype=None, **kwargs): if values is None and dtype is not None: values = np.empty(0, dtype=dtype) else: - values = np.array(values,copy=False) + values = np.array(values, copy=False) if is_object_dtype(values): - values = cls(values, name=name, dtype=dtype, **kwargs)._values + values = cls(values, name=name, dtype=dtype, + **kwargs)._values result = object.__new__(cls) result._data = values result.name = name for k, v in compat.iteritems(kwargs): - setattr(result,k,v) + setattr(result, k, v) result._reset_identity() return result def _shallow_copy(self, values=None, **kwargs): """ - create a new Index with the same class as the caller, don't copy the data, - use the same object attributes with passed in attributes taking precedence + create a new Index with the same class as the caller, don't copy the + data, use the same object attributes with passed in attributes taking + precedence *this is an internal non-public method* @@ -302,8 +307,9 @@ def _shallow_copy(self, values=None, **kwargs): def _shallow_copy_with_infer(self, values=None, **kwargs): """ - create a new Index inferring the class with passed value, don't copy the data, - use the same object attributes with passed in attributes taking precedence + create a new Index inferring the class with passed value, don't copy + the data, use the same object attributes with passed in attributes + taking precedence *this is an internal non-public method* @@ -320,7 +326,7 @@ def _shallow_copy_with_infer(self, values=None, **kwargs): if self._infer_as_myclass: try: return self._constructor(values, **attributes) - except (TypeError, ValueError) as e: + except (TypeError, ValueError): pass return Index(values, **attributes) @@ -423,10 +429,9 @@ def ravel(self, order='C'): # construction helpers @classmethod def _scalar_data_error(cls, data): - raise TypeError( - '{0}(...) must be called with a collection of some kind, {1} was ' - 'passed'.format(cls.__name__, repr(data)) - ) + raise TypeError('{0}(...) must be called with a collection of some ' + 'kind, {1} was passed'.format(cls.__name__, + repr(data))) @classmethod def _string_data_error(cls, data): @@ -436,7 +441,8 @@ def _string_data_error(cls, data): @classmethod def _coerce_to_ndarray(cls, data): """coerces data to ndarray, raises on scalar data. Converts other - iterables to list first and then to array. Does not touch ndarrays.""" + iterables to list first and then to array. Does not touch ndarrays. + """ if not isinstance(data, (np.ndarray, Index)): if data is None or np.isscalar(data): @@ -450,13 +456,13 @@ def _coerce_to_ndarray(cls, data): def _get_attributes_dict(self): """ return an attributes dict for my class """ - return dict([ (k,getattr(self,k,None)) for k in self._attributes]) + return dict([(k, getattr(self, k, None)) for k in self._attributes]) def view(self, cls=None): # we need to see if we are subclassing an # index type here - if cls is not None and not hasattr(cls,'_typ'): + if cls is not None and not hasattr(cls, '_typ'): result = self._data.view(cls) else: result = self._shallow_copy() @@ -528,16 +534,14 @@ def __unicode__(self): attrs = self._format_attrs() space = self._format_space() - prepr = (u(",%s") % space).join([u("%s=%s") % (k, v) - for k, v in attrs]) + prepr = (u(",%s") % + space).join([u("%s=%s") % (k, v) for k, v in attrs]) # no data provided, just attributes if data is None: data = '' - res = u("%s(%s%s)") % (klass, - data, - prepr) + res = u("%s(%s%s)") % (klass, data, prepr) return res @@ -546,8 +550,8 @@ def _format_space(self): # using space here controls if the attributes # are line separated or not (the default) - #max_seq_items = get_option('display.max_seq_items') - #if len(self) > max_seq_items: + # max_seq_items = get_option('display.max_seq_items') + # if len(self) > max_seq_items: # space = "\n%s" % (' ' * (len(klass) + 1)) return " " @@ -588,7 +592,8 @@ def _format_data(self): def _extend_line(s, line, value, display_width, next_line_prefix): - if adj.len(line.rstrip()) + adj.len(value.rstrip()) >= display_width: + if (adj.len(line.rstrip()) + adj.len(value.rstrip()) >= + display_width): s += line.rstrip() line = next_line_prefix line += value @@ -612,18 +617,21 @@ def best_len(values): else: if n > max_seq_items: - n = min(max_seq_items//2,10) - head = [ formatter(x) for x in self[:n] ] - tail = [ formatter(x) for x in self[-n:] ] + n = min(max_seq_items // 2, 10) + head = [formatter(x) for x in self[:n]] + tail = [formatter(x) for x in self[-n:]] else: head = [] - tail = [ formatter(x) for x in self ] + tail = [formatter(x) for x in self] # adjust all values to max length if needed if is_justify: - # however, if we are not truncated and we are only a single line, then don't justify - if is_truncated or not (len(', '.join(head)) < display_width and len(', '.join(tail)) < display_width): + # however, if we are not truncated and we are only a single + # line, then don't justify + if (is_truncated or + not (len(', '.join(head)) < display_width and + len(', '.join(tail)) < display_width)): max_len = max(best_len(head), best_len(tail)) head = [x.rjust(max_len) for x in head] tail = [x.rjust(max_len) for x in tail] @@ -641,7 +649,7 @@ def best_len(values): summary += line.rstrip() + space2 + '...' line = space2 - for i in range(len(tail)-1): + for i in range(len(tail) - 1): word = tail[i] + sep + ' ' summary, line = _extend_line(summary, line, word, display_width, space2) @@ -667,12 +675,12 @@ def _format_attrs(self): Return a list of tuples of the (attr,formatted_value) """ attrs = [] - attrs.append(('dtype',"'%s'" % self.dtype)) + attrs.append(('dtype', "'%s'" % self.dtype)) if self.name is not None: - attrs.append(('name',default_pprint(self.name))) + attrs.append(('name', default_pprint(self.name))) max_seq_items = get_option('display.max_seq_items') or len(self) if len(self) > max_seq_items: - attrs.append(('length',len(self))) + attrs.append(('length', len(self))) return attrs def to_series(self, **kwargs): @@ -698,8 +706,7 @@ def _to_embed(self, keep_tz=False): return self.values.copy() def astype(self, dtype): - return Index(self.values.astype(dtype), name=self.name, - dtype=dtype) + return Index(self.values.astype(dtype), name=self.name, dtype=dtype) def _to_safe_for_reshape(self): """ convert to object if we are a categorical """ @@ -737,12 +744,12 @@ def nlevels(self): return 1 def _get_names(self): - return FrozenList((self.name,)) + return FrozenList((self.name, )) def _set_names(self, values, level=None): if len(values) != 1: - raise ValueError('Length of new names must be 1, got %d' - % len(values)) + raise ValueError('Length of new names must be 1, got %d' % + len(values)) self.name = values[0] names = property(fset=_set_names, fget=_get_names) @@ -755,9 +762,9 @@ def set_names(self, names, level=None, inplace=False): ---------- names : str or sequence name(s) to set - level : int or level name, or sequence of int / level names (default None) - If the index is a MultiIndex (hierarchical), level(s) to set (None for all levels) - Otherwise level must be None + level : int, level name, or sequence of int/level names (default None) + If the index is a MultiIndex (hierarchical), level(s) to set (None + for all levels). Otherwise level must be None inplace : bool if True, mutates in place @@ -786,7 +793,8 @@ def set_names(self, names, level=None, inplace=False): if level is not None and self.nlevels == 1: raise ValueError('Level must be None for non-MultiIndex') - if level is not None and not is_list_like(level) and is_list_like(names): + if level is not None and not is_list_like(level) and is_list_like( + names): raise TypeError("Names must be a string") if not is_list_like(names) and level is None and self.nlevels > 1: @@ -830,12 +838,12 @@ def _has_complex_internals(self): def summary(self, name=None): if len(self) > 0: head = self[0] - if hasattr(head, 'format') and\ - not isinstance(head, compat.string_types): + if (hasattr(head, 'format') and + not isinstance(head, compat.string_types)): head = head.format() tail = self[-1] - if hasattr(tail, 'format') and\ - not isinstance(tail, compat.string_types): + if (hasattr(tail, 'format') and + not isinstance(tail, compat.string_types)): tail = tail.format() index_summary = ', %s to %s' % (com.pprint_thing(head), com.pprint_thing(tail)) @@ -934,16 +942,20 @@ def to_int(): return key elif is_float(key): key = to_int() - warnings.warn("scalar indexers for index type {0} should be integers and not floating point".format( - type(self).__name__), FutureWarning, stacklevel=5) + warnings.warn("scalar indexers for index type {0} should be " + "integers and not floating point".format( + type(self).__name__), + FutureWarning, stacklevel=5) return key return self._invalid_indexer('label', key) if is_float(key): if isnull(key): return self._invalid_indexer('label', key) - warnings.warn("scalar indexers for index type {0} should be integers and not floating point".format( - type(self).__name__), FutureWarning, stacklevel=3) + warnings.warn("scalar indexers for index type {0} should be " + "integers and not floating point".format( + type(self).__name__), + FutureWarning, stacklevel=3) return to_int() return key @@ -974,19 +986,20 @@ def _convert_slice_indexer(self, key, kind=None): # need to coerce to_int if needed def f(c): - v = getattr(key,c) + v = getattr(key, c) if v is None or is_integer(v): return v # warn if it's a convertible float if v == int(v): - warnings.warn("slice indexers when using iloc should be integers " - "and not floating point", FutureWarning, stacklevel=7) + warnings.warn("slice indexers when using iloc should be " + "integers and not floating point", + FutureWarning, stacklevel=7) return int(v) self._invalid_indexer('slice {0} value'.format(c), v) - return slice(*[ f(c) for c in ['start','stop','step']]) + return slice(*[f(c) for c in ['start', 'stop', 'step']]) # validate slicers def validate(v): @@ -1001,8 +1014,9 @@ def validate(v): return False return True - for c in ['start','stop','step']: - v = getattr(key,c) + + for c in ['start', 'stop', 'step']: + v = getattr(key, c) if not validate(v): self._invalid_indexer('slice {0} value'.format(c), v) @@ -1025,10 +1039,11 @@ def is_int(v): # if we are mixed and have integers try: if is_positional and self.is_mixed(): + # TODO: i, j are not used anywhere if start is not None: - i = self.get_loc(start) + i = self.get_loc(start) # noqa if stop is not None: - j = self.get_loc(stop) + j = self.get_loc(stop) # noqa is_positional = False except KeyError: if self.inferred_type == 'mixed-integer-float': @@ -1058,23 +1073,25 @@ def _convert_list_indexer(self, keyarr, kind=None): and we have a mixed index (e.g. number/labels). figure out the indexer. return None if we can't help """ - if kind in [None, 'iloc', 'ix'] and is_integer_dtype(keyarr) \ - and not self.is_floating() and not isinstance(keyarr, ABCPeriodIndex): + if (kind in [None, 'iloc', 'ix'] and + is_integer_dtype(keyarr) and not self.is_floating() and + not isinstance(keyarr, ABCPeriodIndex)): if self.inferred_type == 'mixed-integer': indexer = self.get_indexer(keyarr) if (indexer >= 0).all(): return indexer - # missing values are flagged as -1 by get_indexer and negative indices are already - # converted to positive indices in the above if-statement, so the negative flags are changed to - # values outside the range of indices so as to trigger an IndexError in maybe_convert_indices + # missing values are flagged as -1 by get_indexer and negative + # indices are already converted to positive indices in the + # above if-statement, so the negative flags are changed to + # values outside the range of indices so as to trigger an + # IndexError in maybe_convert_indices indexer[indexer < 0] = len(self) from pandas.core.indexing import maybe_convert_indices return maybe_convert_indices(indexer, len(self)) elif not self.inferred_type == 'integer': - keyarr = np.where(keyarr < 0, - len(self) + keyarr, keyarr) + keyarr = np.where(keyarr < 0, len(self) + keyarr, keyarr) return keyarr return None @@ -1082,10 +1099,9 @@ def _convert_list_indexer(self, keyarr, kind=None): def _invalid_indexer(self, form, key): """ consistent invalid indexer message """ raise TypeError("cannot do {form} indexing on {klass} with these " - "indexers [{key}] of {kind}".format(form=form, - klass=type(self), - key=key, - kind=type(key))) + "indexers [{key}] of {kind}".format( + form=form, klass=type(self), key=key, + kind=type(key))) def get_duplicates(self): from collections import defaultdict @@ -1119,14 +1135,14 @@ def _validate_index_level(self, level): if isinstance(level, int): if level < 0 and level != -1: raise IndexError("Too many levels: Index has only 1 level," - " %d is not a valid level number" % (level,)) + " %d is not a valid level number" % (level, )) elif level > 0: raise IndexError("Too many levels:" " Index has only 1 level, not %d" % (level + 1)) elif level != self.name: - raise KeyError('Level %s must be same as name (%s)' - % (level, self.name)) + raise KeyError('Level %s must be same as name (%s)' % + (level, self.name)) def _get_level_number(self, level): self._validate_index_level(level) @@ -1178,11 +1194,12 @@ def __setstate__(self, state): self._reset_identity() else: raise Exception("invalid pickle state") + _unpickle_compat = __setstate__ def __deepcopy__(self, memo=None): if memo is None: - memo = {} + memo = {} return self.copy(deep=True) def __nonzero__(self): @@ -1257,9 +1274,8 @@ def _ensure_compat_append(self, other): to_concat.append(other) for obj in to_concat: - if (isinstance(obj, Index) and - obj.name != name and - obj.name is not None): + if (isinstance(obj, Index) and obj.name != name and + obj.name is not None): name = None break @@ -1283,11 +1299,13 @@ def append(self, other): to_concat, name = self._ensure_compat_append(other) attribs = self._get_attributes_dict() attribs['name'] = name - return self._shallow_copy_with_infer(np.concatenate(to_concat), **attribs) + return self._shallow_copy_with_infer( + np.concatenate(to_concat), **attribs) @staticmethod def _ensure_compat_concat(indexes): - from pandas.tseries.api import DatetimeIndex, PeriodIndex, TimedeltaIndex + from pandas.tseries.api import (DatetimeIndex, PeriodIndex, + TimedeltaIndex) klasses = DatetimeIndex, PeriodIndex, TimedeltaIndex is_ts = [isinstance(idx, klasses) for idx in indexes] @@ -1375,8 +1393,8 @@ def format(self, name=False, formatter=None, **kwargs): header = [] if name: header.append(com.pprint_thing(self.name, - escape_chars=('\t', '\r', '\n')) - if self.name is not None else '') + escape_chars=('\t', '\r', '\n')) if + self.name is not None else '') if formatter is not None: return header + list(self.map(formatter)) @@ -1436,7 +1454,8 @@ def equals(self, other): if not isinstance(other, Index): return False - return array_equivalent(_values_from_object(self), _values_from_object(other)) + return array_equivalent(_values_from_object(self), + _values_from_object(other)) def identical(self, other): """Similar to equals, but check that other comparable attributes are @@ -1504,10 +1523,12 @@ def order(self, return_indexer=False, ascending=True): """ warnings.warn("order is deprecated, use sort_values(...)", FutureWarning, stacklevel=2) - return self.sort_values(return_indexer=return_indexer, ascending=ascending) + return self.sort_values(return_indexer=return_indexer, + ascending=ascending) def sort(self, *args, **kwargs): - raise TypeError("cannot sort an Index object in-place, use sort_values instead") + raise TypeError("cannot sort an Index object in-place, use " + "sort_values instead") def sortlevel(self, level=None, ascending=True, sort_remaining=None): """ @@ -1538,7 +1559,8 @@ def shift(self, periods=1, freq=None): ------- shifted : Index """ - raise NotImplementedError("Not supported for type %s" % type(self).__name__) + raise NotImplementedError("Not supported for type %s" % + type(self).__name__) def argsort(self, *args, **kwargs): """ @@ -1555,23 +1577,26 @@ def argsort(self, *args, **kwargs): def __add__(self, other): if com.is_list_like(other): - warnings.warn("using '+' to provide set union with Indexes is deprecated, " - "use '|' or .union()", FutureWarning, stacklevel=2) + warnings.warn("using '+' to provide set union with Indexes is " + "deprecated, use '|' or .union()", FutureWarning, + stacklevel=2) if isinstance(other, Index): return self.union(other) return Index(np.array(self) + other) def __radd__(self, other): if is_list_like(other): - warnings.warn("using '+' to provide set union with Indexes is deprecated, " - "use '|' or .union()", FutureWarning, stacklevel=2) + warnings.warn("using '+' to provide set union with Indexes is " + "deprecated, use '|' or .union()", FutureWarning, + stacklevel=2) return Index(other + np.array(self)) __iadd__ = __add__ def __sub__(self, other): - warnings.warn("using '-' to provide set differences with Indexes is deprecated, " - "use .difference()",FutureWarning, stacklevel=2) + warnings.warn("using '-' to provide set differences with Indexes is " + "deprecated, use .difference()", FutureWarning, + stacklevel=2) return self.difference(other) def __and__(self, other): @@ -1613,7 +1638,7 @@ def union(self, other): if len(self) == 0: return other - if not is_dtype_equal(self.dtype,other.dtype): + if not is_dtype_equal(self.dtype, other.dtype): this = self.astype('O') other = other.astype('O') return this.union(other) @@ -1641,8 +1666,7 @@ def union(self, other): self.values[0] < other_diff[0] except TypeError as e: warnings.warn("%s, sort order is undefined for " - "incomparable objects" % e, - RuntimeWarning, + "incomparable objects" % e, RuntimeWarning, stacklevel=3) else: types = frozenset((self.inferred_type, @@ -1657,8 +1681,7 @@ def union(self, other): result = np.sort(result) except TypeError as e: warnings.warn("%s, sort order is undefined for " - "incomparable objects" % e, - RuntimeWarning, + "incomparable objects" % e, RuntimeWarning, stacklevel=3) # for subclasses @@ -1698,7 +1721,7 @@ def intersection(self, other): if self.equals(other): return self - if not is_dtype_equal(self.dtype,other.dtype): + if not is_dtype_equal(self.dtype, other.dtype): this = self.astype('O') other = other.astype('O') return this.intersection(other) @@ -1715,7 +1738,8 @@ def intersection(self, other): indexer = indexer.take((indexer != -1).nonzero()[0]) except: # duplicates - indexer = Index(self.values).get_indexer_non_unique(other._values)[0].unique() + indexer = Index(self.values).get_indexer_non_unique( + other._values)[0].unique() indexer = indexer[indexer != -1] taken = self.take(indexer) @@ -1725,7 +1749,8 @@ def intersection(self, other): def difference(self, other): """ - Return a new Index with elements from the index that are not in `other`. + Return a new Index with elements from the index that are not in + `other`. This is the sorted set difference of two Index objects. @@ -1797,7 +1822,8 @@ def sym_diff(self, other, result_name=None): if result_name is None: result_name = result_name_update - the_diff = sorted(set((self.difference(other)).union(other.difference(self)))) + the_diff = sorted(set((self.difference(other)). + union(other.difference(self)))) attribs = self._get_attributes_dict() attribs['name'] = result_name if 'freq' in attribs: @@ -1835,8 +1861,7 @@ def get_loc(self, key, method=None, tolerance=None): key = _values_from_object(key) return self._engine.get_loc(key) - indexer = self.get_indexer([key], method=method, - tolerance=tolerance) + indexer = self.get_indexer([key], method=method, tolerance=tolerance) if indexer.ndim > 1 or indexer.size > 1: raise TypeError('get_loc requires scalar valued input') loc = indexer.item() @@ -1852,7 +1877,7 @@ def get_value(self, series, key): # if we have something that is Index-like, then # use this, e.g. DatetimeIndex - s = getattr(series,'_values',None) + s = getattr(series, '_values', None) if isinstance(s, Index) and lib.isscalar(key): return s[key] @@ -1866,7 +1891,7 @@ def get_value(self, series, key): try: return self._engine.get_value(s, k) except KeyError as e1: - if len(self) > 0 and self.inferred_type in ['integer','boolean']: + if len(self) > 0 and self.inferred_type in ['integer', 'boolean']: raise try: @@ -1892,8 +1917,8 @@ def set_value(self, arr, key, value): Fast lookup of value from 1-dimensional ndarray. Only use this if you know what you're doing """ - self._engine.set_value( - _values_from_object(arr), _values_from_object(key), value) + self._engine.set_value(_values_from_object(arr), + _values_from_object(key), value) def get_level_values(self, level): """ @@ -1991,14 +2016,15 @@ def _convert_tolerance(self, tolerance): def _get_fill_indexer(self, target, method, limit=None, tolerance=None): if self.is_monotonic_increasing and target.is_monotonic_increasing: - method = (self._engine.get_pad_indexer if method == 'pad' - else self._engine.get_backfill_indexer) + method = (self._engine.get_pad_indexer if method == 'pad' else + self._engine.get_backfill_indexer) indexer = method(target._values, limit) else: - indexer = self._get_fill_indexer_searchsorted(target, method, limit) + indexer = self._get_fill_indexer_searchsorted(target, method, + limit) if tolerance is not None: - indexer = self._filter_indexer_tolerance( - target._values, indexer, tolerance) + indexer = self._filter_indexer_tolerance(target._values, indexer, + tolerance) return indexer def _get_fill_indexer_searchsorted(self, target, method, limit=None): @@ -2016,7 +2042,8 @@ def _get_fill_indexer_searchsorted(self, target, method, limit=None): # find exact matches first (this simplifies the algorithm) indexer = self.get_indexer(target) nonexact = (indexer == -1) - indexer[nonexact] = self._searchsorted_monotonic(target[nonexact], side) + indexer[nonexact] = self._searchsorted_monotonic(target[nonexact], + side) if side == 'left': # searchsorted returns "indices into a sorted array such that, # if the corresponding elements in v were inserted before the @@ -2045,12 +2072,11 @@ def _get_nearest_indexer(self, target, limit, tolerance): right_distances = abs(self.values[right_indexer] - target) op = operator.lt if self.is_monotonic_increasing else operator.le - indexer = np.where(op(left_distances, right_distances) - | (right_indexer == -1), - left_indexer, right_indexer) + indexer = np.where(op(left_distances, right_distances) | + (right_indexer == -1), left_indexer, right_indexer) if tolerance is not None: - indexer = self._filter_indexer_tolerance( - target, indexer, tolerance) + indexer = self._filter_indexer_tolerance(target, indexer, + tolerance) return indexer def _filter_indexer_tolerance(self, target, indexer, tolerance): @@ -2222,8 +2248,8 @@ def _reindex_non_unique(self, target): """ *this is an internal non-public method* - Create a new index with target's values (move/add/delete values as necessary) - use with non-unique Index and a possibly non-unique target + Create a new index with target's values (move/add/delete values as + necessary) use with non-unique Index and a possibly non-unique target Parameters ---------- @@ -2303,16 +2329,17 @@ def join(self, other, how='left', level=None, return_indexers=False): # try to figure out the join level # GH3662 - if (level is None and (self_is_mi or other_is_mi)): + if level is None and (self_is_mi or other_is_mi): # have the same levels/names so a simple join if self.names == other.names: pass else: - return self._join_multi(other, how=how, return_indexers=return_indexers) + return self._join_multi(other, how=how, + return_indexers=return_indexers) # join on the level - if (level is not None and (self_is_mi or other_is_mi)): + if level is not None and (self_is_mi or other_is_mi): return self._join_level(other, level, how=how, return_indexers=return_indexers) @@ -2343,11 +2370,10 @@ def join(self, other, how='left', level=None, return_indexers=False): result = x, z, y return result - if not is_dtype_equal(self.dtype,other.dtype): + if not is_dtype_equal(self.dtype, other.dtype): this = self.astype('O') other = other.astype('O') - return this.join(other, how=how, - return_indexers=return_indexers) + return this.join(other, how=how, return_indexers=return_indexers) _validate_join_method(how) @@ -2396,15 +2422,18 @@ def _join_multi(self, other, how, return_indexers=True): other_is_mi = isinstance(other, MultiIndex) # figure out join names - self_names = [ n for n in self.names if n is not None ] - other_names = [ n for n in other.names if n is not None ] + self_names = [n for n in self.names if n is not None] + other_names = [n for n in other.names if n is not None] overlap = list(set(self_names) & set(other_names)) # need at least 1 in common, but not more than 1 if not len(overlap): - raise ValueError("cannot join with no level specified and no overlapping names") + raise ValueError("cannot join with no level specified and no " + "overlapping names") if len(overlap) > 1: - raise NotImplementedError("merging with more than one level overlap on a multi-index is not implemented") + raise NotImplementedError("merging with more than one level " + "overlap on a multi-index is not " + "implemented") jl = overlap[0] # make the indices into mi's that match @@ -2427,13 +2456,15 @@ def _join_multi(self, other, how, return_indexers=True): return result # 2 multi-indexes - raise NotImplementedError("merging with both multi-indexes is not implemented") + raise NotImplementedError("merging with both multi-indexes is not " + "implemented") def _join_non_unique(self, other, how='left', return_indexers=False): from pandas.tools.merge import _get_join_indexers - left_idx, right_idx = _get_join_indexers([self.values], [other._values], - how=how, sort=True) + left_idx, right_idx = _get_join_indexers([self.values], + [other._values], how=how, + sort=True) left_idx = com._ensure_platform_int(left_idx) right_idx = com._ensure_platform_int(right_idx) @@ -2449,8 +2480,7 @@ def _join_non_unique(self, other, how='left', return_indexers=False): else: return join_index - def _join_level(self, other, level, how='left', - return_indexers=False, + def _join_level(self, other, level, how='left', return_indexers=False, keep_order=True): """ The join method *only* affects the level of the resulting @@ -2557,10 +2587,8 @@ def _get_leaf_sorter(labels): if not mask_all: left_indexer = mask.nonzero()[0][left_indexer] - join_index = MultiIndex(levels=new_levels, - labels=new_labels, - names=left.names, - verify_integrity=False) + join_index = MultiIndex(levels=new_levels, labels=new_labels, + names=left.names, verify_integrity=False) if right_lev_indexer is not None: right_indexer = com.take_nd(right_lev_indexer, @@ -2646,7 +2674,8 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): ----- This function assumes that the data is sorted, so use at your own peril """ - start_slice, end_slice = self.slice_locs(start, end, step=step, kind=kind) + start_slice, end_slice = self.slice_locs(start, end, step=step, + kind=kind) # return a slice if not lib.isscalar(start_slice): @@ -2683,12 +2712,12 @@ def _maybe_cast_slice_bound(self, label, side, kind): # datetimelike Indexes # reject them if is_float(label): - self._invalid_indexer('slice',label) + self._invalid_indexer('slice', label) # we are trying to find integer bounds on a non-integer based index # this is rejected (generally .loc gets you here) elif is_integer(label): - self._invalid_indexer('slice',label) + self._invalid_indexer('slice', label) return label @@ -2699,8 +2728,8 @@ def _searchsorted_monotonic(self, label, side='left'): # np.searchsorted expects ascending sort order, have to reverse # everything for it to work (element ordering, search side and # resulting value). - pos = self[::-1].searchsorted( - label, side='right' if side == 'left' else 'right') + pos = self[::-1].searchsorted(label, side='right' if side == 'left' + else 'right') return len(self) - pos raise ValueError('index must be monotonic increasing or decreasing') @@ -2720,9 +2749,9 @@ def get_slice_bound(self, label, side, kind): """ if side not in ('left', 'right'): - raise ValueError( - "Invalid value for side kwarg," - " must be either 'left' or 'right': %s" % (side,)) + raise ValueError("Invalid value for side kwarg," + " must be either 'left' or 'right': %s" % + (side, )) original_label = label @@ -2748,9 +2777,8 @@ def get_slice_bound(self, label, side, kind): else: slc = lib.maybe_indices_to_slice(slc.astype('i8'), len(self)) if isinstance(slc, np.ndarray): - raise KeyError( - "Cannot get %s slice bound for non-unique label:" - " %r" % (side, original_label)) + raise KeyError("Cannot get %s slice bound for non-unique " + "label: %r" % (side, original_label)) if isinstance(slc, slice): if side == 'left': @@ -2854,8 +2882,7 @@ def insert(self, loc, item): _self = np.asarray(self) item = self._coerce_scalar_to_index(item)._values - idx = np.concatenate( - (_self[:loc], item, _self[loc:])) + idx = np.concatenate((_self[:loc], item, _self[loc:])) return self._shallow_copy_with_infer(idx) def drop(self, labels, errors='raise'): @@ -2877,16 +2904,19 @@ def drop(self, labels, errors='raise'): mask = indexer == -1 if mask.any(): if errors != 'ignore': - raise ValueError('labels %s not contained in axis' % labels[mask]) + raise ValueError('labels %s not contained in axis' % + labels[mask]) indexer = indexer[~mask] return self.delete(indexer) - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'}) + @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', + False: 'first'}) @Appender(base._shared_docs['drop_duplicates'] % _index_doc_kwargs) def drop_duplicates(self, keep='first'): return super(Index, self).drop_duplicates(keep=keep) - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'}) + @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', + False: 'first'}) @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) def duplicated(self, keep='first'): return super(Index, self).duplicated(keep=keep) @@ -2931,7 +2961,6 @@ def _add_comparison_methods(cls): """ add in comparison methods """ def _make_compare(op): - def _evaluate_compare(self, other): if isinstance(other, (np.ndarray, Index, ABCSeries)): if other.ndim > 0 and len(self) != len(other): @@ -2962,25 +2991,25 @@ def _add_numericlike_set_methods_disabled(cls): """ add in the numeric set-like methods to disable """ def _make_invalid_op(name): - def invalid_op(self, other=None): - raise TypeError("cannot perform {name} with this index type: {typ}".format(name=name, - typ=type(self))) + raise TypeError("cannot perform {name} with this index type: " + "{typ}".format(name=name, typ=type(self))) + invalid_op.__name__ = name return invalid_op - cls.__add__ = cls.__radd__ = __iadd__ = _make_invalid_op('__add__') - cls.__sub__ = __isub__ = _make_invalid_op('__sub__') + cls.__add__ = cls.__radd__ = __iadd__ = _make_invalid_op('__add__') # noqa + cls.__sub__ = __isub__ = _make_invalid_op('__sub__') # noqa @classmethod def _add_numeric_methods_disabled(cls): """ add in numeric methods to disable """ def _make_invalid_op(name): - def invalid_op(self, other=None): - raise TypeError("cannot perform {name} with this index type: {typ}".format(name=name, - typ=type(self))) + raise TypeError("cannot perform {name} with this index type: " + "{typ}".format(name=name, typ=type(self))) + invalid_op.__name__ = name return invalid_op @@ -3063,7 +3092,6 @@ def _add_numeric_methods_binary(cls): """ add in numeric methods """ def _make_evaluate_binop(op, opstr, reversed=False): - def _evaluate_numeric_binop(self, other): from pandas.tseries.offsets import DateOffset @@ -3156,34 +3184,36 @@ def _add_logical_methods(cls): A single element array_like may be converted to bool.""" def _make_logical_function(name, desc, f): - @Substitution(outname=name, desc=desc) @Appender(_doc) def logical_func(self, *args, **kwargs): result = f(self.values) - if isinstance(result, (np.ndarray, ABCSeries, Index)) \ - and result.ndim == 0: + if (isinstance(result, (np.ndarray, ABCSeries, Index)) and + result.ndim == 0): # return NumPy type return result.dtype.type(result.item()) else: # pragma: no cover return result + logical_func.__name__ = name return logical_func - cls.all = _make_logical_function( - 'all', 'Return whether all elements are True', np.all) - cls.any = _make_logical_function( - 'any', 'Return whether any element is True', np.any) + cls.all = _make_logical_function('all', 'Return whether all elements ' + 'are True', + np.all) + cls.any = _make_logical_function('any', + 'Return whether any element is True', + np.any) @classmethod def _add_logical_methods_disabled(cls): """ add in logical methods to disable """ def _make_invalid_op(name): - def invalid_op(self, other=None): - raise TypeError("cannot perform {name} with this index type: {typ}".format(name=name, - typ=type(self))) + raise TypeError("cannot perform {name} with this index type: " + "{typ}".format(name=name, typ=type(self))) + invalid_op.__name__ = name return invalid_op @@ -3195,6 +3225,7 @@ def invalid_op(self, other=None): Index._add_logical_methods() Index._add_comparison_methods() + class CategoricalIndex(Index, PandasDelegate): """ @@ -3221,7 +3252,8 @@ class CategoricalIndex(Index, PandasDelegate): _engine_type = _index.Int64Engine _attributes = ['name'] - def __new__(cls, data=None, categories=None, ordered=None, dtype=None, copy=False, name=None, fastpath=False, **kwargs): + def __new__(cls, data=None, categories=None, ordered=None, dtype=None, + copy=False, name=None, fastpath=False, **kwargs): if fastpath: return cls._simple_new(data, name=name) @@ -3246,7 +3278,8 @@ def __new__(cls, data=None, categories=None, ordered=None, dtype=None, copy=Fals return cls._simple_new(data, name=name) - def _create_from_codes(self, codes, categories=None, ordered=None, name=None): + def _create_from_codes(self, codes, categories=None, ordered=None, + name=None): """ *this is an internal non-public method* @@ -3271,7 +3304,8 @@ def _create_from_codes(self, codes, categories=None, ordered=None, name=None): ordered = self.ordered if name is None: name = self.name - cat = Categorical.from_codes(codes, categories=categories, ordered=self.ordered) + cat = Categorical.from_codes(codes, categories=categories, + ordered=self.ordered) return CategoricalIndex(cat, name=name) @staticmethod @@ -3303,14 +3337,15 @@ def _create_categorical(self, data, categories=None, ordered=None): return data @classmethod - def _simple_new(cls, values, name=None, categories=None, ordered=None, **kwargs): + def _simple_new(cls, values, name=None, categories=None, ordered=None, + **kwargs): result = object.__new__(cls) values = cls._create_categorical(cls, values, categories, ordered) result._data = values result.name = name for k, v in compat.iteritems(kwargs): - setattr(result,k,v) + setattr(result, k, v) result._reset_identity() return result @@ -3319,7 +3354,8 @@ def _is_dtype_compat(self, other): """ *this is an internal non-public method* - provide a comparison between the dtype of self and other (coercing if needed) + provide a comparison between the dtype of self and other (coercing if + needed) Raises ------ @@ -3330,14 +3366,17 @@ def _is_dtype_compat(self, other): if isinstance(other, CategoricalIndex): other = other._values if not other.is_dtype_equal(self): - raise TypeError("categories must match existing categories when appending") + raise TypeError("categories must match existing categories " + "when appending") else: values = other if not is_list_like(values): - values = [ values ] - other = CategoricalIndex(self._create_categorical(self, other, categories=self.categories, ordered=self.ordered)) + values = [values] + other = CategoricalIndex(self._create_categorical( + self, other, categories=self.categories, ordered=self.ordered)) if not other.isin(values).all(): - raise TypeError("cannot append a non-category item to a CategoricalIndex") + raise TypeError("cannot append a non-category item to a " + "CategoricalIndex") return other @@ -3364,16 +3403,17 @@ def _format_attrs(self): """ Return a list of tuples of the (attr,formatted_value) """ - max_categories = (10 if get_option("display.max_categories") == 0 - else get_option("display.max_categories")) - attrs = [('categories', default_pprint(self.categories, max_seq_items=max_categories)), - ('ordered',self.ordered)] + max_categories = (10 if get_option("display.max_categories") == 0 else + get_option("display.max_categories")) + attrs = [('categories', default_pprint(self.categories, + max_seq_items=max_categories)), + ('ordered', self.ordered)] if self.name is not None: - attrs.append(('name',default_pprint(self.name))) - attrs.append(('dtype',"'%s'" % self.dtype)) + attrs.append(('name', default_pprint(self.name))) + attrs.append(('dtype', "'%s'" % self.dtype)) max_seq_items = get_option('display.max_seq_items') or len(self) if len(self) > max_seq_items: - attrs.append(('length',len(self))) + attrs.append(('length', len(self))) return attrs @property @@ -3432,7 +3472,8 @@ def _engine(self): def is_unique(self): return not self.duplicated().any() - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'}) + @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', + False: 'first'}) @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) def duplicated(self, keep='first'): from pandas.hashtable import duplicated_int64 @@ -3460,7 +3501,7 @@ def get_loc(self, key, method=None): if (codes == -1): raise KeyError(key) indexer, _ = self._engine.get_indexer_non_unique(np.array([codes])) - if (indexer==-1).any(): + if (indexer == -1).any(): raise KeyError(key) return indexer @@ -3484,11 +3525,14 @@ def reindex(self, target, method=None, level=None, limit=None, """ if method is not None: - raise NotImplementedError("argument method is not implemented for CategoricalIndex.reindex") + raise NotImplementedError("argument method is not implemented for " + "CategoricalIndex.reindex") if level is not None: - raise NotImplementedError("argument level is not implemented for CategoricalIndex.reindex") + raise NotImplementedError("argument level is not implemented for " + "CategoricalIndex.reindex") if limit is not None: - raise NotImplementedError("argument limit is not implemented for CategoricalIndex.reindex") + raise NotImplementedError("argument limit is not implemented for " + "CategoricalIndex.reindex") target = _ensure_index(target) @@ -3498,25 +3542,25 @@ def reindex(self, target, method=None, level=None, limit=None, indexer, missing = self.get_indexer_non_unique(np.array(target)) new_target = self.take(indexer) - # filling in missing if needed if len(missing): cats = self.categories.get_indexer(target) - if (cats==-1).any(): + if (cats == -1).any(): # coerce to a regular index here! - result = Index(np.array(self),name=self.name) - new_target, indexer, _ = result._reindex_non_unique(np.array(target)) + result = Index(np.array(self), name=self.name) + new_target, indexer, _ = result._reindex_non_unique( + np.array(target)) else: codes = new_target.codes.copy() - codes[indexer==-1] = cats[missing] + codes[indexer == -1] = cats[missing] new_target = self._create_from_codes(codes) # we always want to return an Index type here - # to be consistent with .reindex for other index types (e.g. they don't coerce - # based on the actual values, only on the dtype) + # to be consistent with .reindex for other index types (e.g. they don't + # coerce based on the actual values, only on the dtype) # unless we had an inital Categorical to begin with # in which case we are going to conform to the passed Categorical new_target = np.asarray(new_target) @@ -3528,11 +3572,13 @@ def reindex(self, target, method=None, level=None, limit=None, return new_target, indexer def _reindex_non_unique(self, target): - """ reindex from a non-unique; which CategoricalIndex's are almost always """ + """ reindex from a non-unique; which CategoricalIndex's are almost + always + """ new_target, indexer = self.reindex(target) new_indexer = None - check = indexer==-1 + check = indexer == -1 if check.any(): new_indexer = np.arange(len(self.take(indexer))) new_indexer[check] = -1 @@ -3580,8 +3626,8 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): target = target.categories if method == 'pad' or method == 'backfill': - raise NotImplementedError("method='pad' and method='backfill' not implemented yet " - 'for CategoricalIndex') + raise NotImplementedError("method='pad' and method='backfill' not " + "implemented yet for CategoricalIndex") elif method == 'nearest': raise NotImplementedError("method='nearest' not implemented yet " 'for CategoricalIndex') @@ -3593,7 +3639,9 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): return com._ensure_platform_int(indexer) def get_indexer_non_unique(self, target): - """ this is the same for a CategoricalIndex for get_indexer; the API returns the missing values as well """ + """ this is the same for a CategoricalIndex for get_indexer; the API + returns the missing values as well + """ target = _ensure_index(target) if isinstance(target, CategoricalIndex): @@ -3605,11 +3653,13 @@ def get_indexer_non_unique(self, target): def _convert_list_indexer(self, keyarr, kind=None): """ we are passed a list indexer. - Return our indexer or raise if all of the values are not included in the categories + Return our indexer or raise if all of the values are not included in + the categories """ codes = self.categories.get_indexer(keyarr) - if (codes==-1).any(): - raise KeyError("a list-indexer must only include values that are in the categories") + if (codes == -1).any(): + raise KeyError("a list-indexer must only include values that are " + "in the categories") return None @@ -3661,11 +3711,11 @@ def insert(self, loc, item): """ code = self.categories.get_indexer([item]) if (code == -1): - raise TypeError("cannot insert an item into a CategoricalIndex that is not already an existing category") + raise TypeError("cannot insert an item into a CategoricalIndex " + "that is not already an existing category") codes = self.codes - codes = np.concatenate( - (codes[:loc], code, codes[loc:])) + codes = np.concatenate((codes[:loc], code, codes[loc:])) return self._create_from_codes(codes) def append(self, other): @@ -3685,8 +3735,8 @@ def append(self, other): ValueError if other is not in the categories """ to_concat, name = self._ensure_compat_append(other) - to_concat = [ self._is_dtype_compat(c) for c in to_concat ] - codes = np.concatenate([ c.codes for c in to_concat ]) + to_concat = [self._is_dtype_compat(c) for c in to_concat] + codes = np.concatenate([c.codes for c in to_concat]) return self._create_from_codes(codes, name=name) @classmethod @@ -3694,14 +3744,16 @@ def _add_comparison_methods(cls): """ add in comparison methods """ def _make_compare(op): - def _evaluate_compare(self, other): - # if we have a Categorical type, then must have the same categories + # if we have a Categorical type, then must have the same + # categories if isinstance(other, CategoricalIndex): other = other._values elif isinstance(other, Index): - other = self._create_categorical(self, other._values, categories=self.categories, ordered=self.ordered) + other = self._create_categorical( + self, other._values, categories=self.categories, + ordered=self.ordered) if isinstance(other, (ABCCategorical, np.ndarray, ABCSeries)): if len(self.values) != len(other): @@ -3709,7 +3761,9 @@ def _evaluate_compare(self, other): if isinstance(other, ABCCategorical): if not self.values.is_dtype_equal(other): - raise TypeError("categorical index comparisions must have the same categories and ordered attributes") + raise TypeError("categorical index comparisions must " + "have the same categories and ordered " + "attributes") return getattr(self.values, op)(other) @@ -3722,7 +3776,6 @@ def _evaluate_compare(self, other): cls.__le__ = _make_compare('__le__') cls.__ge__ = _make_compare('__ge__') - def _delegate_method(self, name, *args, **kwargs): """ method delegation to the ._values """ method = getattr(self._values, name) @@ -3738,19 +3791,16 @@ def _add_accessors(cls): """ add in Categorical accessor methods """ from pandas.core.categorical import Categorical - CategoricalIndex._add_delegate_accessors(delegate=Categorical, - accessors=["rename_categories", - "reorder_categories", - "add_categories", - "remove_categories", - "remove_unused_categories", - "set_categories", - "as_ordered", - "as_unordered", - "min", - "max"], - typ='method', - overwrite=True) + CategoricalIndex._add_delegate_accessors( + delegate=Categorical, accessors=["rename_categories", + "reorder_categories", + "add_categories", + "remove_categories", + "remove_unused_categories", + "set_categories", + "as_ordered", "as_unordered", + "min", "max"], + typ='method', overwrite=True) CategoricalIndex._add_numericlike_set_methods_disabled() @@ -3794,7 +3844,7 @@ def _maybe_cast_slice_bound(self, label, side, kind): # we are a numeric index, so we accept # integer/floats directly if not (is_integer(label) or is_float(label)): - self._invalid_indexer('slice',label) + self._invalid_indexer('slice', label) return label @@ -3802,12 +3852,11 @@ def _convert_tolerance(self, tolerance): try: return float(tolerance) except ValueError: - raise ValueError('tolerance argument for %s must be numeric: %r' - % (type(self).__name__, tolerance)) + raise ValueError('tolerance argument for %s must be numeric: %r' % + (type(self).__name__, tolerance)) class Int64Index(NumericIndex): - """ Immutable ndarray implementing an ordered, sliceable set. The basic object storing axis labels for all pandas objects. Int64Index is a special case @@ -3841,7 +3890,8 @@ class Int64Index(NumericIndex): _engine_type = _index.Int64Engine - def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, **kwargs): + def __new__(cls, data=None, dtype=None, copy=False, name=None, + fastpath=False, **kwargs): if fastpath: return cls._simple_new(data, name=name) @@ -3855,8 +3905,8 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, * elif issubclass(data.dtype.type, np.integer): # don't force the upcast as we may be dealing # with a platform int - if dtype is None or not issubclass(np.dtype(dtype).type, - np.integer): + if (dtype is None or + not issubclass(np.dtype(dtype).type, np.integer)): dtype = np.int64 subarr = np.array(data, dtype=dtype, copy=copy) @@ -3896,7 +3946,8 @@ def equals(self, other): # return False try: - return array_equivalent(_values_from_object(self), _values_from_object(other)) + return array_equivalent(_values_from_object(self), + _values_from_object(other)) except TypeError: # e.g. fails in numpy 1.6 with DatetimeIndex #1681 return False @@ -4465,7 +4516,6 @@ def _evaluate_numeric_binop(self, other): class Float64Index(NumericIndex): - """ Immutable ndarray implementing an ordered, sliceable set. The basic object storing axis labels for all pandas objects. Float64Index is a special case @@ -4494,7 +4544,8 @@ class Float64Index(NumericIndex): _inner_indexer = _algos.inner_join_indexer_float64 _outer_indexer = _algos.outer_join_indexer_float64 - def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, **kwargs): + def __new__(cls, data=None, dtype=None, copy=False, name=None, + fastpath=False, **kwargs): if fastpath: return cls._simple_new(data, name) @@ -4510,8 +4561,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, * try: subarr = np.array(data, dtype=dtype, copy=copy) except: - raise TypeError('Unsafe NumPy casting, you must ' - 'explicitly cast') + raise TypeError('Unsafe NumPy casting, you must explicitly cast') # coerce to float64 for storage if subarr.dtype != np.float64: @@ -4546,7 +4596,8 @@ def _convert_scalar_indexer(self, key, kind=None): if kind == 'iloc': if is_integer(key): return key - return super(Float64Index, self)._convert_scalar_indexer(key, kind=kind) + return super(Float64Index, self)._convert_scalar_indexer(key, + kind=kind) return key @@ -4572,8 +4623,8 @@ def _convert_slice_indexer(self, key, kind=None): # translate to locations return self.slice_indexer(key.start, key.stop, key.step) - def _format_native_types(self, na_rep='', float_format=None, - decimal='.', quoting=None, **kwargs): + def _format_native_types(self, na_rep='', float_format=None, decimal='.', + quoting=None, **kwargs): from pandas.core.format import FloatArrayFormatter formatter = FloatArrayFormatter(self.values, na_rep=na_rep, float_format=float_format, @@ -4611,7 +4662,8 @@ def equals(self, other): try: if not isinstance(other, Float64Index): other = self._constructor(other) - if not is_dtype_equal(self.dtype,other.dtype) or self.shape != other.shape: + if (not is_dtype_equal(self.dtype, other.dtype) or + self.shape != other.shape): return False left, right = self._values, other._values return ((left == right) | (self._isnan & other._isnan)).all() @@ -4674,7 +4726,6 @@ def isin(self, values, level=None): class MultiIndex(Index): - """ A multi-level, or hierarchical, index object for pandas objects @@ -4704,7 +4755,8 @@ class MultiIndex(Index): rename = Index.set_names def __new__(cls, levels=None, labels=None, sortorder=None, names=None, - copy=False, verify_integrity=True, _set_identity=True, name=None, **kwargs): + copy=False, verify_integrity=True, _set_identity=True, + name=None, **kwargs): # compat with Index if name is not None: @@ -4756,8 +4808,8 @@ def _verify_integrity(self): label_length = len(self.labels[0]) for i, (level, label) in enumerate(zip(levels, labels)): if len(label) != label_length: - raise ValueError("Unequal label lengths: %s" % ( - [len(lab) for lab in labels])) + raise ValueError("Unequal label lengths: %s" % + ([len(lab) for lab in labels])) if len(label) and label.max() >= len(level): raise ValueError("On level %d, label max (%d) >= length of" " level (%d). NOTE: this index is in an" @@ -4780,8 +4832,9 @@ def _set_levels(self, levels, level=None, copy=False, validate=True, raise ValueError('Length of levels must match length of level.') if level is None: - new_levels = FrozenList(_ensure_index(lev, copy=copy)._shallow_copy() - for lev in levels) + new_levels = FrozenList( + _ensure_index(lev, copy=copy)._shallow_copy() + for lev in levels) else: level = [self._get_level_number(l) for l in level] new_levels = list(self._levels) @@ -4800,7 +4853,8 @@ def _set_levels(self, levels, level=None, copy=False, validate=True, if verify_integrity: self._verify_integrity() - def set_levels(self, levels, level=None, inplace=False, verify_integrity=True): + def set_levels(self, levels, level=None, inplace=False, + verify_integrity=True): """ Set new levels on MultiIndex. Defaults to returning new index. @@ -4809,7 +4863,7 @@ def set_levels(self, levels, level=None, inplace=False, verify_integrity=True): ---------- levels : sequence or list of sequence new level(s) to apply - level : int or level name, or sequence of int / level names (default None) + level : int, level name, or sequence of int/level names (default None) level(s) to set (None for all levels) inplace : bool if True, mutates in place @@ -4883,13 +4937,15 @@ def _set_labels(self, labels, level=None, copy=False, validate=True, raise ValueError('Length of labels must match length of levels.') if level is None: - new_labels = FrozenList(_ensure_frozen(lab, lev, copy=copy)._shallow_copy() - for lev, lab in zip(self.levels, labels)) + new_labels = FrozenList( + _ensure_frozen(lab, lev, copy=copy)._shallow_copy() + for lev, lab in zip(self.levels, labels)) else: level = [self._get_level_number(l) for l in level] new_labels = list(self._labels) for l, lev, lab in zip(level, self.levels, labels): - new_labels[l] = _ensure_frozen(lab, lev, copy=copy)._shallow_copy() + new_labels[l] = _ensure_frozen( + lab, lev, copy=copy)._shallow_copy() new_labels = FrozenList(new_labels) self._labels = new_labels @@ -4899,7 +4955,8 @@ def _set_labels(self, labels, level=None, copy=False, validate=True, if verify_integrity: self._verify_integrity() - def set_labels(self, labels, level=None, inplace=False, verify_integrity=True): + def set_labels(self, labels, level=None, inplace=False, + verify_integrity=True): """ Set new labels on MultiIndex. Defaults to returning new index. @@ -4908,7 +4965,7 @@ def set_labels(self, labels, level=None, inplace=False, verify_integrity=True): ---------- labels : sequence or list of sequence new labels to apply - level : int or level name, or sequence of int / level names (default None) + level : int, level name, or sequence of int/level names (default None) level(s) to set (None for all levels) inplace : bool if True, mutates in place @@ -5000,11 +5057,8 @@ def copy(self, names=None, dtype=None, levels=None, labels=None, levels = self.levels labels = self.labels names = self.names - return MultiIndex(levels=levels, - labels=labels, - names=names, - sortorder=self.sortorder, - verify_integrity=False, + return MultiIndex(levels=levels, labels=labels, names=names, + sortorder=self.sortorder, verify_integrity=False, _set_identity=_set_identity) def __array__(self, dtype=None): @@ -5023,7 +5077,7 @@ def _shallow_copy_with_infer(self, values=None, **kwargs): def _shallow_copy(self, values=None, **kwargs): if values is not None: if 'name' in kwargs: - kwargs['names'] = kwargs.pop('name',None) + kwargs['names'] = kwargs.pop('name', None) # discards freq kwargs.pop('freq', None) return MultiIndex.from_tuples(values, **kwargs) @@ -5036,9 +5090,9 @@ def dtype(self): @cache_readonly def nbytes(self): """ return the number of bytes in the underlying data """ - level_nbytes = sum(( i.nbytes for i in self.levels )) - label_nbytes = sum(( i.nbytes for i in self.labels )) - names_nbytes = sum(( getsizeof(i) for i in self.names )) + level_nbytes = sum((i.nbytes for i in self.levels)) + label_nbytes = sum((i.nbytes for i in self.labels)) + names_nbytes = sum((getsizeof(i) for i in self.names)) return level_nbytes + label_nbytes + names_nbytes def _format_attrs(self): @@ -5079,8 +5133,8 @@ def _set_names(self, names, level=None, validate=True): if validate and level is not None and len(names) != len(level): raise ValueError('Length of names must match length of level.') if validate and level is None and len(names) != self.nlevels: - raise ValueError( - 'Length of names must match number of levels in MultiIndex.') + raise ValueError('Length of names must match number of levels in ' + 'MultiIndex.') if level is None: level = range(self.nlevels) @@ -5091,8 +5145,8 @@ def _set_names(self, names, level=None, validate=True): for l, name in zip(level, names): self.levels[l].rename(name, inplace=True) - names = property( - fset=_set_names, fget=_get_names, doc="Names of levels in MultiIndex") + names = property(fset=_set_names, fget=_get_names, + doc="Names of levels in MultiIndex") def _reference_duplicate_name(self, name): """ @@ -5151,10 +5205,9 @@ def _get_level_number(self, level): level += self.nlevels if level < 0: orig_level = level - self.nlevels - raise IndexError( - 'Too many levels: Index has only %d levels, ' - '%d is not a valid level number' % (self.nlevels, orig_level) - ) + raise IndexError('Too many levels: Index has only %d ' + 'levels, %d is not a valid level number' % + (self.nlevels, orig_level)) # Note: levels are zero-based elif level >= self.nlevels: raise IndexError('Too many levels: Index has only %d levels, ' @@ -5203,7 +5256,8 @@ def _has_complex_internals(self): def is_unique(self): return not self.duplicated().any() - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'}) + @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', + False: 'first'}) @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) def duplicated(self, keep='first'): from pandas.core.groupby import get_group_index @@ -5262,8 +5316,8 @@ def _try_mi(k): # rather than a KeyError, try it here # note that a string that 'looks' like a Timestamp will raise # a KeyError! (GH5725) - if isinstance(key, (datetime.datetime, np.datetime64)) or ( - compat.PY3 and isinstance(key, compat.string_types)): + if (isinstance(key, (datetime.datetime, np.datetime64)) or + (compat.PY3 and isinstance(key, compat.string_types))): try: return _try_mi(key) except (KeyError): @@ -5352,13 +5406,11 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False, if sparsify not in [True, 1]: sentinel = sparsify # little bit of a kludge job for #1217 - result_levels = _sparsify(result_levels, - start=int(names), + result_levels = _sparsify(result_levels, start=int(names), sentinel=sentinel) - if adjoin: - from pandas.core.format import _get_adjustment + from pandas.core.format import _get_adjustment adj = _get_adjustment() return adj.adjoin(space, *result_levels).split('\n') else: @@ -5366,7 +5418,7 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False, def _to_safe_for_reshape(self): """ convert to object if we are a categorical """ - return self.set_levels([ i._to_safe_for_reshape() for i in self.levels ]) + return self.set_levels([i._to_safe_for_reshape() for i in self.levels]) def to_hierarchical(self, n_repeat, n_shuffle=1): """ @@ -5478,9 +5530,8 @@ def from_arrays(cls, arrays, sortorder=None, names=None): if names is None: names = [getattr(arr, "name", None) for arr in arrays] - return MultiIndex(levels=levels, labels=labels, - sortorder=sortorder, names=names, - verify_integrity=False) + return MultiIndex(levels=levels, labels=labels, sortorder=sortorder, + names=names, verify_integrity=False) @classmethod def from_tuples(cls, tuples, sortorder=None, names=None): @@ -5525,8 +5576,7 @@ def from_tuples(cls, tuples, sortorder=None, names=None): else: arrays = lzip(*tuples) - return MultiIndex.from_arrays(arrays, sortorder=sortorder, - names=names) + return MultiIndex.from_arrays(arrays, sortorder=sortorder, names=names) @classmethod def from_product(cls, iterables, sortorder=None, names=None): @@ -5565,7 +5615,8 @@ def from_product(cls, iterables, sortorder=None, names=None): from pandas.core.categorical import Categorical from pandas.tools.util import cartesian_product - categoricals = [Categorical.from_array(it, ordered=True) for it in iterables] + categoricals = [Categorical.from_array(it, ordered=True) + for it in iterables] labels = cartesian_product([c.codes for c in categoricals]) return MultiIndex(levels=[c.categories for c in categoricals], @@ -5590,10 +5641,9 @@ def __contains__(self, key): def __reduce__(self): """Necessary for making this object picklable""" - d = dict(levels = [lev for lev in self.levels], - labels = [label for label in self.labels], - sortorder = self.sortorder, - names = list(self.names)) + d = dict(levels=[lev for lev in self.levels], + labels=[label for label in self.labels], + sortorder=self.sortorder, names=list(self.names)) return _new_Index, (self.__class__, d), None def __setstate__(self, state): @@ -5637,10 +5687,8 @@ def __getitem__(self, key): new_labels = [lab[key] for lab in self.labels] - return MultiIndex(levels=self.levels, - labels=new_labels, - names=self.names, - sortorder=sortorder, + return MultiIndex(levels=self.levels, labels=new_labels, + names=self.names, sortorder=sortorder, verify_integrity=False) def take(self, indexer, axis=None): @@ -5664,7 +5712,8 @@ def append(self, other): if not isinstance(other, (list, tuple)): other = [other] - if all((isinstance(o, MultiIndex) and o.nlevels >= self.nlevels) for o in other): + if all((isinstance(o, MultiIndex) and o.nlevels >= self.nlevels) + for o in other): arrays = [] for i in range(self.nlevels): label = self.get_level_values(i) @@ -5672,7 +5721,7 @@ def append(self, other): arrays.append(label.append(appended)) return MultiIndex.from_arrays(arrays, names=self.names) - to_concat = (self.values,) + tuple(k._values for k in other) + to_concat = (self.values, ) + tuple(k._values for k in other) new_tuples = np.concatenate(to_concat) # if all(isinstance(x, MultiIndex) for x in other): @@ -5686,10 +5735,9 @@ def argsort(self, *args, **kwargs): def repeat(self, n): return MultiIndex(levels=self.levels, - labels=[label.view(np.ndarray).repeat(n) for label in self.labels], - names=self.names, - sortorder=self.sortorder, - verify_integrity=False) + labels=[label.view(np.ndarray).repeat(n) + for label in self.labels], names=self.names, + sortorder=self.sortorder, verify_integrity=False) def drop(self, labels, level=None, errors='raise'): """ @@ -5715,8 +5763,8 @@ def drop(self, labels, level=None, errors='raise'): mask = indexer == -1 if mask.any(): if errors != 'ignore': - raise ValueError('labels %s not contained in axis' - % labels[mask]) + raise ValueError('labels %s not contained in axis' % + labels[mask]) indexer = indexer[~mask] except Exception: pass @@ -5827,9 +5875,9 @@ def reorder_levels(self, order): """ order = [self._get_level_number(i) for i in order] if len(order) != self.nlevels: - raise AssertionError(('Length of order must be same as ' - 'number of levels (%d), got %d') - % (self.nlevels, len(order))) + raise AssertionError('Length of order must be same as ' + 'number of levels (%d), got %d' % + (self.nlevels, len(order))) new_levels = [self.levels[i] for i in order] new_labels = [self.labels[i] for i in order] new_names = [self.names[i] for i in order] @@ -5890,8 +5938,7 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True): else: sortorder = level[0] - indexer = _indexer_from_factorized(primary, - primshp, + indexer = _indexer_from_factorized(primary, primshp, compress=False) if not ascending: @@ -6007,8 +6054,7 @@ def reindex(self, target, method=None, level=None, limit=None, limit=limit, tolerance=tolerance) else: - raise Exception( - "cannot handle a non-unique multi-index!") + raise Exception("cannot handle a non-unique multi-index!") if not isinstance(target, MultiIndex): if indexer is None: @@ -6020,7 +6066,7 @@ def reindex(self, target, method=None, level=None, limit=None, target = MultiIndex.from_tuples(target) if (preserve_names and target.nlevels == self.nlevels and - target.names != self.names): + target.names != self.names): target = target.copy(deep=False) target.names = self.names @@ -6102,9 +6148,9 @@ def _partial_tup_index(self, tup, side='left'): def get_loc(self, key, method=None): """ - Get integer location, slice or boolean mask for requested label or tuple - If the key is past the lexsort depth, the return may be a boolean mask - array, otherwise it is always a slice or int. + Get integer location, slice or boolean mask for requested label or + tuple. If the key is past the lexsort depth, the return may be a + boolean mask array, otherwise it is always a slice or int. Parameters ---------- @@ -6140,9 +6186,10 @@ def _maybe_to_slice(loc): keylen = len(key) if self.nlevels < keylen: raise KeyError('Key length ({0}) exceeds index depth ({1})' - ''.format(keylen, self.nlevels)) + ''.format(keylen, self.nlevels)) if keylen == self.nlevels and self.is_unique: + def _maybe_str_to_time_stamp(key, lev): if lev.is_all_dates and not isinstance(key, Timestamp): try: @@ -6150,6 +6197,7 @@ def _maybe_str_to_time_stamp(key, lev): except Exception: pass return key + key = _values_from_object(key) key = tuple(map(_maybe_str_to_time_stamp, key, self.levels)) return self._engine.get_loc(key) @@ -6160,8 +6208,8 @@ def _maybe_str_to_time_stamp(key, lev): # needs linear search within the slice i = self.lexsort_depth lead_key, follow_key = key[:i], key[i:] - start, stop = self.slice_locs(lead_key, lead_key) \ - if lead_key else (0, len(self)) + start, stop = (self.slice_locs(lead_key, lead_key) + if lead_key else (0, len(self))) if start == stop: raise KeyError(key) @@ -6181,9 +6229,8 @@ def _maybe_str_to_time_stamp(key, lev): if not len(loc): raise KeyError(key) - return _maybe_to_slice(loc) \ - if len(loc) != stop - start \ - else slice(start, stop) + return (_maybe_to_slice(loc) if len(loc) != stop - start else + slice(start, stop)) def get_loc_level(self, key, level=0, drop_level=True): """ @@ -6198,6 +6245,7 @@ def get_loc_level(self, key, level=0, drop_level=True): ------- loc : int or slice object """ + def maybe_droplevels(indexer, levels, drop_level): if not drop_level: return self[indexer] @@ -6264,19 +6312,18 @@ def partial_selection(key, indexer=None): # here we have a completely specified key, but are # using some partial string matching here # GH4758 - can_index_exactly = any([ - (l.is_all_dates and - not isinstance(k, compat.string_types)) - for k, l in zip(key, self.levels) - ]) - if any([ - l.is_all_dates for k, l in zip(key, self.levels) - ]) and not can_index_exactly: + all_dates = [(l.is_all_dates and + not isinstance(k, compat.string_types)) + for k, l in zip(key, self.levels)] + can_index_exactly = any(all_dates) + if (any([l.is_all_dates + for k, l in zip(key, self.levels)]) and + not can_index_exactly): indexer = self.get_loc(key) # we have a multiple selection here - if not isinstance(indexer, slice) \ - or indexer.stop - indexer.start != 1: + if (not isinstance(indexer, slice) or + indexer.stop - indexer.start != 1): return partial_selection(key, indexer) key = tuple(self[indexer].tolist()[0]) @@ -6313,8 +6360,7 @@ def partial_selection(key, indexer=None): indexer = slice(None, None) ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)] - return indexer, maybe_droplevels(indexer, ilevels, - drop_level) + return indexer, maybe_droplevels(indexer, ilevels, drop_level) else: indexer = self._get_level_indexer(key, level=level) return indexer, maybe_droplevels(indexer, [level], drop_level) @@ -6332,15 +6378,15 @@ def convert_indexer(start, stop, step, indexer=indexer, labels=labels): # if we have a provided indexer, then this need not consider # the entire labels set - r = np.arange(start,stop,step) + r = np.arange(start, stop, step) if indexer is not None and len(indexer) != len(labels): - # we have an indexer which maps the locations in the labels that we - # have already selected (and is not an indexer for the entire set) - # otherwise this is wasteful - # so we only need to examine locations that are in this set - # the only magic here is that the result are the mappings to the - # set that we have selected + # we have an indexer which maps the locations in the labels + # that we have already selected (and is not an indexer for the + # entire set) otherwise this is wasteful so we only need to + # examine locations that are in this set the only magic here is + # that the result are the mappings to the set that we have + # selected from pandas import Series mapper = Series(indexer) indexer = labels.take(com._ensure_platform_int(indexer)) @@ -6348,8 +6394,8 @@ def convert_indexer(start, stop, step, indexer=indexer, labels=labels): m = result.map(mapper)._values else: - m = np.zeros(len(labels),dtype=bool) - m[np.in1d(labels,r,assume_unique=True)] = True + m = np.zeros(len(labels), dtype=bool) + m[np.in1d(labels, r, assume_unique=True)] = True return m @@ -6363,17 +6409,19 @@ def convert_indexer(start, stop, step, indexer=indexer, labels=labels): else: start = 0 if key.stop is not None: - stop = level_index.get_loc(key.stop) + stop = level_index.get_loc(key.stop) else: - stop = len(level_index)-1 + stop = len(level_index) - 1 step = key.step - except (KeyError): + except KeyError: - # we have a partial slice (like looking up a partial date string) - start = stop = level_index.slice_indexer(key.start, key.stop, key.step) + # we have a partial slice (like looking up a partial date + # string) + start = stop = level_index.slice_indexer(key.start, key.stop, + key.step) step = start.step - if isinstance(start,slice) or isinstance(stop,slice): + if isinstance(start, slice) or isinstance(stop, slice): # we have a slice for start and/or stop # a partial date slicer on a DatetimeIndex generates a slice # note that the stop ALREADY includes the stopped point (if @@ -6384,7 +6432,7 @@ def convert_indexer(start, stop, step, indexer=indexer, labels=labels): # need to have like semantics here to right # searching as when we are using a slice # so include the stop+1 (so we include stop) - return convert_indexer(start,stop+1,step) + return convert_indexer(start, stop + 1, step) else: # sorted, so can return slice object -> view i = labels.searchsorted(start, side='left') @@ -6395,7 +6443,7 @@ def convert_indexer(start, stop, step, indexer=indexer, labels=labels): loc = level_index.get_loc(key) if level > 0 or self.lexsort_depth == 0: - return np.array(labels == loc,dtype=bool) + return np.array(labels == loc, dtype=bool) else: # sorted, so can return slice object -> view i = labels.searchsorted(loc, side='left') @@ -6404,8 +6452,8 @@ def convert_indexer(start, stop, step, indexer=indexer, labels=labels): def get_locs(self, tup): """ - Given a tuple of slices/lists/labels/boolean indexer to a level-wise spec - produce an indexer to extract those locations + Given a tuple of slices/lists/labels/boolean indexer to a level-wise + spec produce an indexer to extract those locations Parameters ---------- @@ -6419,8 +6467,9 @@ def get_locs(self, tup): # must be lexsorted to at least as many levels if not self.is_lexsorted_for_tuple(tup): - raise KeyError('MultiIndex Slicing requires the index to be fully lexsorted' - ' tuple len ({0}), lexsort depth ({1})'.format(len(tup), self.lexsort_depth)) + raise KeyError('MultiIndex Slicing requires the index to be fully ' + 'lexsorted tuple len ({0}), lexsort depth ' + '({1})'.format(len(tup), self.lexsort_depth)) # indexer # this is the list of all values that we want to select @@ -6430,13 +6479,14 @@ def get_locs(self, tup): def _convert_to_indexer(r): # return an indexer if isinstance(r, slice): - m = np.zeros(n,dtype=bool) + m = np.zeros(n, dtype=bool) m[r] = True r = m.nonzero()[0] elif is_bool_indexer(r): if len(r) != n: - raise ValueError("cannot index with a boolean indexer that is" - " not the same length as the index") + raise ValueError("cannot index with a boolean indexer " + "that is not the same length as the " + "index") r = r.nonzero()[0] return Int64Index(r) @@ -6447,21 +6497,26 @@ def _update_indexer(idxr, indexer=indexer): return indexer return indexer & idxr - for i,k in enumerate(tup): + for i, k in enumerate(tup): if is_bool_indexer(k): # a boolean indexer, must be the same length! k = np.asarray(k) - indexer = _update_indexer(_convert_to_indexer(k), indexer=indexer) + indexer = _update_indexer(_convert_to_indexer(k), + indexer=indexer) elif is_list_like(k): - # a collection of labels to include from this level (these are or'd) + # a collection of labels to include from this level (these + # are or'd) indexers = None for x in k: try: - idxrs = _convert_to_indexer(self._get_level_indexer(x, level=i, indexer=indexer)) - indexers = idxrs if indexers is None else indexers | idxrs - except (KeyError): + idxrs = _convert_to_indexer( + self._get_level_indexer(x, level=i, + indexer=indexer)) + indexers = (idxrs if indexers is None + else indexers | idxrs) + except KeyError: # ignore not founds continue @@ -6477,13 +6532,17 @@ def _update_indexer(idxr, indexer=indexer): # empty slice indexer = _update_indexer(None, indexer=indexer) - elif isinstance(k,slice): + elif isinstance(k, slice): # a slice, include BOTH of the labels - indexer = _update_indexer(_convert_to_indexer(self._get_level_indexer(k,level=i,indexer=indexer)), indexer=indexer) + indexer = _update_indexer(_convert_to_indexer( + self._get_level_indexer(k, level=i, indexer=indexer)), + indexer=indexer) else: # a single label - indexer = _update_indexer(_convert_to_indexer(self.get_loc_level(k,level=i,drop_level=False)[0]), indexer=indexer) + indexer = _update_indexer(_convert_to_indexer( + self.get_loc_level(k, level=i, drop_level=False)[0]), + indexer=indexer) # empty indexer if indexer is None: @@ -6543,10 +6602,10 @@ def equals(self, other): return False for i in range(self.nlevels): - svalues = com.take_nd(np.asarray(self.levels[i]._values), self.labels[i], - allow_fill=False) - ovalues = com.take_nd(np.asarray(other.levels[i]._values), other.labels[i], - allow_fill=False) + svalues = com.take_nd(np.asarray(self.levels[i]._values), + self.labels[i], allow_fill=False) + ovalues = com.take_nd(np.asarray(other.levels[i]._values), + other.labels[i], allow_fill=False) if not array_equivalent(svalues, ovalues): return False @@ -6630,7 +6689,7 @@ def difference(self, other): other, result_names = self._convert_can_do_setop(other) if len(other) == 0: - return self + return self if self.equals(other): return MultiIndex(levels=[[]] * self.nlevels, @@ -6688,10 +6747,10 @@ def insert(self, loc, item): # Pad the key with empty strings if lower levels of the key # aren't specified: if not isinstance(item, tuple): - item = (item,) + ('',) * (self.nlevels - 1) + item = (item, ) + ('', ) * (self.nlevels - 1) elif len(item) != self.nlevels: - raise ValueError( - 'Item must have length equal to number of levels.') + raise ValueError('Item must have length equal to number of ' + 'levels.') new_levels = [] new_labels = [] @@ -6762,9 +6821,9 @@ def isin(self, values, level=None): MultiIndex._add_numeric_methods_disabled() MultiIndex._add_logical_methods_disabled() - # For utility purposes + def _sparsify(label_list, start=0, sentinel=''): pivoted = lzip(*label_list) k = len(label_list) @@ -6814,8 +6873,8 @@ def _ensure_index(index_like, copy=False): else: index_like = converted else: - # clean_index_list does the equivalent of copying - # so only need to do this if not list instance + # clean_index_list does the equivalent of copying + # so only need to do this if not list instance if copy: from copy import copy index_like = copy(index_like) @@ -6866,12 +6925,14 @@ def _union_indexes(indexes): return result indexes, kind = _sanitize_and_check(indexes) + def _unique_indices(inds): def conv(i): if isinstance(i, Index): i = i.tolist() return i - return Index(lib.fast_unique_multiple_list([ conv(i) for i in inds ])) + + return Index(lib.fast_unique_multiple_list([conv(i) for i in inds])) if kind == 'special': result = indexes[0] @@ -6908,9 +6969,8 @@ def _sanitize_and_check(indexes): if list in kinds: if len(kinds) > 1: - indexes = [Index(com._try_sort(x)) - if not isinstance(x, Index) else x - for x in indexes] + indexes = [Index(com._try_sort(x)) if not isinstance(x, Index) else + x for x in indexes] kinds.remove(list) else: return indexes, 'list' @@ -6925,9 +6985,8 @@ def _get_consensus_names(indexes): # find the non-none names, need to tupleify to make # the set hashable, then reverse on return - consensus_names = set([ - tuple(i.names) for i in indexes if all(n is not None for n in i.names) - ]) + consensus_names = set([tuple(i.names) for i in indexes + if all(n is not None for n in i.names)]) if len(consensus_names) == 1: return list(list(consensus_names)[0]) return [None] * indexes[0].nlevels @@ -6955,8 +7014,8 @@ def _get_na_rep(dtype): def _get_na_value(dtype): - return {np.datetime64: tslib.NaT, np.timedelta64: tslib.NaT}.get(dtype, - np.nan) + return {np.datetime64: tslib.NaT, + np.timedelta64: tslib.NaT}.get(dtype, np.nan) def _ensure_has_len(seq):