From c0ef67fcd6555b0d5a692638f5e2968a4aa73235 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 2 Aug 2019 19:20:33 -0700 Subject: [PATCH 1/3] define concat classmethods in the appropriate places --- pandas/core/dtypes/concat.py | 83 ++------------------------- pandas/core/indexes/base.py | 19 +++++- pandas/core/indexes/numeric.py | 4 +- pandas/core/indexes/range.py | 52 ++++++++++++++++- pandas/tests/indexes/test_category.py | 2 +- 5 files changed, 72 insertions(+), 88 deletions(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 9c49e91134288..12f3fd2c75dc8 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -20,12 +20,11 @@ is_timedelta64_dtype, ) from pandas.core.dtypes.generic import ( + ABCCategoricalIndex, ABCDatetimeArray, - ABCDatetimeIndex, ABCIndexClass, - ABCPeriodIndex, ABCRangeIndex, - ABCTimedeltaIndex, + ABCSeries, ) @@ -285,14 +284,14 @@ def union_categoricals(to_union, sort_categories=False, ignore_order=False): [b, c, a, b] Categories (3, object): [b, c, a] """ - from pandas import Index, Categorical, CategoricalIndex, Series + from pandas import Index, Categorical from pandas.core.arrays.categorical import _recode_for_categories if len(to_union) == 0: raise ValueError("No Categoricals to union") def _maybe_unwrap(x): - if isinstance(x, (CategoricalIndex, Series)): + if isinstance(x, (ABCCategoricalIndex, ABCSeries)): return x.values elif isinstance(x, Categorical): return x @@ -450,31 +449,6 @@ def _concat_datetimetz(to_concat, name=None): return sample._concat_same_type(to_concat) -def _concat_index_same_dtype(indexes, klass=None): - klass = klass if klass is not None else indexes[0].__class__ - return klass(np.concatenate([x._values for x in indexes])) - - -def _concat_index_asobject(to_concat, name=None): - """ - concat all inputs as object. DatetimeIndex, TimedeltaIndex and - PeriodIndex are converted to object dtype before concatenation - """ - from pandas import Index - from pandas.core.arrays import ExtensionArray - - klasses = (ABCDatetimeIndex, ABCTimedeltaIndex, ABCPeriodIndex, ExtensionArray) - to_concat = [x.astype(object) if isinstance(x, klasses) else x for x in to_concat] - - self = to_concat[0] - attribs = self._get_attributes_dict() - attribs["name"] = name - - to_concat = [x._values if isinstance(x, Index) else x for x in to_concat] - - return self._shallow_copy_with_infer(np.concatenate(to_concat), **attribs) - - def _concat_sparse(to_concat, axis=0, typs=None): """ provide concatenation of an sparse/dense array of arrays each of which is a @@ -505,52 +479,3 @@ def _concat_sparse(to_concat, axis=0, typs=None): ] return SparseArray._concat_same_type(to_concat) - - -def _concat_rangeindex_same_dtype(indexes): - """ - Concatenates multiple RangeIndex instances. All members of "indexes" must - be of type RangeIndex; result will be RangeIndex if possible, Int64Index - otherwise. E.g.: - indexes = [RangeIndex(3), RangeIndex(3, 6)] -> RangeIndex(6) - indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Int64Index([0,1,2,4,5]) - """ - from pandas import Int64Index, RangeIndex - - start = step = next_ = None - - # Filter the empty indexes - non_empty_indexes = [obj for obj in indexes if len(obj)] - - for obj in non_empty_indexes: - rng = obj._range # type: range - - if start is None: - # This is set by the first non-empty index - start = rng.start - if step is None and len(rng) > 1: - step = rng.step - elif step is None: - # First non-empty index had only one element - if rng.start == start: - return _concat_index_same_dtype(indexes, klass=Int64Index) - step = rng.start - start - - non_consecutive = (step != rng.step and len(rng) > 1) or ( - next_ is not None and rng.start != next_ - ) - if non_consecutive: - return _concat_index_same_dtype(indexes, klass=Int64Index) - - if step is not None: - next_ = rng[-1] + step - - if non_empty_indexes: - # Get the stop value from "next" or alternatively - # from the last non-empty index - stop = non_empty_indexes[-1].stop if next_ is None else next_ - return RangeIndex(start, stop, step) - - # Here all "indexes" had 0 length, i.e. were empty. - # In this case return an empty range index. - return RangeIndex(0, 0) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ce7b73a92b18a..22d30da4f71c6 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -51,6 +51,7 @@ ABCDataFrame, ABCDateOffset, ABCDatetimeArray, + ABCDatetimeIndex, ABCIndexClass, ABCMultiIndex, ABCPandasArray, @@ -4309,14 +4310,26 @@ def _concat(self, to_concat, name): if len(typs) == 1: return self._concat_same_dtype(to_concat, name=name) - return _concat._concat_index_asobject(to_concat, name=name) + return Index._concat_same_dtype(self, to_concat, name=name) - def _concat_same_dtype(self, to_concat, name): + @classmethod + def _concat_same_dtype(cls, to_concat, name): """ Concatenate to_concat which has the same class. """ # must be overridden in specific classes - return _concat._concat_index_asobject(to_concat, name) + klasses = (ABCDatetimeIndex, ABCTimedeltaIndex, ABCPeriodIndex, ExtensionArray) + to_concat = [ + x.astype(object) if isinstance(x, klasses) else x for x in to_concat + ] + + self = to_concat[0] + attribs = self._get_attributes_dict() + attribs["name"] = name + + to_concat = [x._values if isinstance(x, Index) else x for x in to_concat] + + return self._shallow_copy_with_infer(np.concatenate(to_concat), **attribs) def putmask(self, mask, value): """ diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 1a1f8ae826ca7..2cdf73788dd9b 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -17,7 +17,6 @@ needs_i8_conversion, pandas_dtype, ) -import pandas.core.dtypes.concat as _concat from pandas.core.dtypes.generic import ( ABCFloat64Index, ABCInt64Index, @@ -129,7 +128,8 @@ def _assert_safe_casting(cls, data, subarr): pass def _concat_same_dtype(self, indexes, name): - return _concat._concat_index_same_dtype(indexes).rename(name) + result = type(indexes[0])(np.concatenate([x._values for x in indexes])) + return result.rename(name) @property def is_all_dates(self): diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 16098c474a473..5fa8c4a12775f 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -11,7 +11,6 @@ from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, cache_readonly -from pandas.core.dtypes import concat as _concat from pandas.core.dtypes.common import ( ensure_platform_int, ensure_python_int, @@ -646,8 +645,55 @@ def join(self, other, how="left", level=None, return_indexers=False, sort=False) return super().join(other, how, level, return_indexers, sort) - def _concat_same_dtype(self, indexes, name): - return _concat._concat_rangeindex_same_dtype(indexes).rename(name) + @classmethod + def _concat_same_dtype(cls, indexes, name): + """ + Concatenates multiple RangeIndex instances. All members of "indexes" must + be of type RangeIndex; result will be RangeIndex if possible, Int64Index + otherwise. E.g.: + indexes = [RangeIndex(3), RangeIndex(3, 6)] -> RangeIndex(6) + indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Int64Index([0,1,2,4,5]) + """ + start = step = next_ = None + + # Filter the empty indexes + non_empty_indexes = [obj for obj in indexes if len(obj)] + + for obj in non_empty_indexes: + rng = obj._range # type: range + + if start is None: + # This is set by the first non-empty index + start = rng.start + if step is None and len(rng) > 1: + step = rng.step + elif step is None: + # First non-empty index had only one element + if rng.start == start: + result = Int64Index(np.concatenate([x._values for x in indexes])) + return result.rename(name) + + step = rng.start - start + + non_consecutive = (step != rng.step and len(rng) > 1) or ( + next_ is not None and rng.start != next_ + ) + if non_consecutive: + result = Int64Index(np.concatenate([x._values for x in indexes])) + return result.rename(name) + + if step is not None: + next_ = rng[-1] + step + + if non_empty_indexes: + # Get the stop value from "next" or alternatively + # from the last non-empty index + stop = non_empty_indexes[-1].stop if next_ is None else next_ + return cls(start, stop, step).rename(name) + + # Here all "indexes" had 0 length, i.e. were empty. + # In this case return an empty range index. + return cls(0, 0).rename(name) def __len__(self): """ diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index e79991f652154..280b0a99c7e68 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -411,7 +411,7 @@ def test_append(self): tm.assert_index_equal(result, expected, exact=True) def test_append_to_another(self): - # hits _concat_index_asobject + # hits Index._concat_same_dtype fst = Index(["a", "b"]) snd = CategoricalIndex(["d", "e"]) result = fst.append(snd) From 3990a55c8cac2882fd152eeede09d83cc37c37eb Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 2 Aug 2019 19:39:00 -0700 Subject: [PATCH 2/3] declass --- pandas/core/indexes/range.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 5fa8c4a12775f..a026f08a7560d 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -645,8 +645,7 @@ def join(self, other, how="left", level=None, return_indexers=False, sort=False) return super().join(other, how, level, return_indexers, sort) - @classmethod - def _concat_same_dtype(cls, indexes, name): + def _concat_same_dtype(self, indexes, name): """ Concatenates multiple RangeIndex instances. All members of "indexes" must be of type RangeIndex; result will be RangeIndex if possible, Int64Index @@ -689,11 +688,11 @@ def _concat_same_dtype(cls, indexes, name): # Get the stop value from "next" or alternatively # from the last non-empty index stop = non_empty_indexes[-1].stop if next_ is None else next_ - return cls(start, stop, step).rename(name) + return RangeIndex(start, stop, step).rename(name) # Here all "indexes" had 0 length, i.e. were empty. # In this case return an empty range index. - return cls(0, 0).rename(name) + return RangeIndex(0, 0).rename(name) def __len__(self): """ From a69cfe919395d249c04dff65920e3cf3f01c3863 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 2 Aug 2019 19:40:05 -0700 Subject: [PATCH 3/3] declass --- pandas/core/indexes/base.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 22d30da4f71c6..b167f76d16445 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4312,8 +4312,7 @@ def _concat(self, to_concat, name): return self._concat_same_dtype(to_concat, name=name) return Index._concat_same_dtype(self, to_concat, name=name) - @classmethod - def _concat_same_dtype(cls, to_concat, name): + def _concat_same_dtype(self, to_concat, name): """ Concatenate to_concat which has the same class. """