diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 9c49e91134288..12f3fd2c75dc8 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -20,12 +20,11 @@ is_timedelta64_dtype, ) from pandas.core.dtypes.generic import ( + ABCCategoricalIndex, ABCDatetimeArray, - ABCDatetimeIndex, ABCIndexClass, - ABCPeriodIndex, ABCRangeIndex, - ABCTimedeltaIndex, + ABCSeries, ) @@ -285,14 +284,14 @@ def union_categoricals(to_union, sort_categories=False, ignore_order=False): [b, c, a, b] Categories (3, object): [b, c, a] """ - from pandas import Index, Categorical, CategoricalIndex, Series + from pandas import Index, Categorical from pandas.core.arrays.categorical import _recode_for_categories if len(to_union) == 0: raise ValueError("No Categoricals to union") def _maybe_unwrap(x): - if isinstance(x, (CategoricalIndex, Series)): + if isinstance(x, (ABCCategoricalIndex, ABCSeries)): return x.values elif isinstance(x, Categorical): return x @@ -450,31 +449,6 @@ def _concat_datetimetz(to_concat, name=None): return sample._concat_same_type(to_concat) -def _concat_index_same_dtype(indexes, klass=None): - klass = klass if klass is not None else indexes[0].__class__ - return klass(np.concatenate([x._values for x in indexes])) - - -def _concat_index_asobject(to_concat, name=None): - """ - concat all inputs as object. DatetimeIndex, TimedeltaIndex and - PeriodIndex are converted to object dtype before concatenation - """ - from pandas import Index - from pandas.core.arrays import ExtensionArray - - klasses = (ABCDatetimeIndex, ABCTimedeltaIndex, ABCPeriodIndex, ExtensionArray) - to_concat = [x.astype(object) if isinstance(x, klasses) else x for x in to_concat] - - self = to_concat[0] - attribs = self._get_attributes_dict() - attribs["name"] = name - - to_concat = [x._values if isinstance(x, Index) else x for x in to_concat] - - return self._shallow_copy_with_infer(np.concatenate(to_concat), **attribs) - - def _concat_sparse(to_concat, axis=0, typs=None): """ provide concatenation of an sparse/dense array of arrays each of which is a @@ -505,52 +479,3 @@ def _concat_sparse(to_concat, axis=0, typs=None): ] return SparseArray._concat_same_type(to_concat) - - -def _concat_rangeindex_same_dtype(indexes): - """ - Concatenates multiple RangeIndex instances. All members of "indexes" must - be of type RangeIndex; result will be RangeIndex if possible, Int64Index - otherwise. E.g.: - indexes = [RangeIndex(3), RangeIndex(3, 6)] -> RangeIndex(6) - indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Int64Index([0,1,2,4,5]) - """ - from pandas import Int64Index, RangeIndex - - start = step = next_ = None - - # Filter the empty indexes - non_empty_indexes = [obj for obj in indexes if len(obj)] - - for obj in non_empty_indexes: - rng = obj._range # type: range - - if start is None: - # This is set by the first non-empty index - start = rng.start - if step is None and len(rng) > 1: - step = rng.step - elif step is None: - # First non-empty index had only one element - if rng.start == start: - return _concat_index_same_dtype(indexes, klass=Int64Index) - step = rng.start - start - - non_consecutive = (step != rng.step and len(rng) > 1) or ( - next_ is not None and rng.start != next_ - ) - if non_consecutive: - return _concat_index_same_dtype(indexes, klass=Int64Index) - - if step is not None: - next_ = rng[-1] + step - - if non_empty_indexes: - # Get the stop value from "next" or alternatively - # from the last non-empty index - stop = non_empty_indexes[-1].stop if next_ is None else next_ - return RangeIndex(start, stop, step) - - # Here all "indexes" had 0 length, i.e. were empty. - # In this case return an empty range index. - return RangeIndex(0, 0) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ce7b73a92b18a..b167f76d16445 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -51,6 +51,7 @@ ABCDataFrame, ABCDateOffset, ABCDatetimeArray, + ABCDatetimeIndex, ABCIndexClass, ABCMultiIndex, ABCPandasArray, @@ -4309,14 +4310,25 @@ def _concat(self, to_concat, name): if len(typs) == 1: return self._concat_same_dtype(to_concat, name=name) - return _concat._concat_index_asobject(to_concat, name=name) + return Index._concat_same_dtype(self, to_concat, name=name) def _concat_same_dtype(self, to_concat, name): """ Concatenate to_concat which has the same class. """ # must be overridden in specific classes - return _concat._concat_index_asobject(to_concat, name) + klasses = (ABCDatetimeIndex, ABCTimedeltaIndex, ABCPeriodIndex, ExtensionArray) + to_concat = [ + x.astype(object) if isinstance(x, klasses) else x for x in to_concat + ] + + self = to_concat[0] + attribs = self._get_attributes_dict() + attribs["name"] = name + + to_concat = [x._values if isinstance(x, Index) else x for x in to_concat] + + return self._shallow_copy_with_infer(np.concatenate(to_concat), **attribs) def putmask(self, mask, value): """ diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 1a1f8ae826ca7..2cdf73788dd9b 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -17,7 +17,6 @@ needs_i8_conversion, pandas_dtype, ) -import pandas.core.dtypes.concat as _concat from pandas.core.dtypes.generic import ( ABCFloat64Index, ABCInt64Index, @@ -129,7 +128,8 @@ def _assert_safe_casting(cls, data, subarr): pass def _concat_same_dtype(self, indexes, name): - return _concat._concat_index_same_dtype(indexes).rename(name) + result = type(indexes[0])(np.concatenate([x._values for x in indexes])) + return result.rename(name) @property def is_all_dates(self): diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 16098c474a473..a026f08a7560d 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -11,7 +11,6 @@ from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, cache_readonly -from pandas.core.dtypes import concat as _concat from pandas.core.dtypes.common import ( ensure_platform_int, ensure_python_int, @@ -647,7 +646,53 @@ def join(self, other, how="left", level=None, return_indexers=False, sort=False) return super().join(other, how, level, return_indexers, sort) def _concat_same_dtype(self, indexes, name): - return _concat._concat_rangeindex_same_dtype(indexes).rename(name) + """ + Concatenates multiple RangeIndex instances. All members of "indexes" must + be of type RangeIndex; result will be RangeIndex if possible, Int64Index + otherwise. E.g.: + indexes = [RangeIndex(3), RangeIndex(3, 6)] -> RangeIndex(6) + indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Int64Index([0,1,2,4,5]) + """ + start = step = next_ = None + + # Filter the empty indexes + non_empty_indexes = [obj for obj in indexes if len(obj)] + + for obj in non_empty_indexes: + rng = obj._range # type: range + + if start is None: + # This is set by the first non-empty index + start = rng.start + if step is None and len(rng) > 1: + step = rng.step + elif step is None: + # First non-empty index had only one element + if rng.start == start: + result = Int64Index(np.concatenate([x._values for x in indexes])) + return result.rename(name) + + step = rng.start - start + + non_consecutive = (step != rng.step and len(rng) > 1) or ( + next_ is not None and rng.start != next_ + ) + if non_consecutive: + result = Int64Index(np.concatenate([x._values for x in indexes])) + return result.rename(name) + + if step is not None: + next_ = rng[-1] + step + + if non_empty_indexes: + # Get the stop value from "next" or alternatively + # from the last non-empty index + stop = non_empty_indexes[-1].stop if next_ is None else next_ + return RangeIndex(start, stop, step).rename(name) + + # Here all "indexes" had 0 length, i.e. were empty. + # In this case return an empty range index. + return RangeIndex(0, 0).rename(name) def __len__(self): """ diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index e79991f652154..280b0a99c7e68 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -411,7 +411,7 @@ def test_append(self): tm.assert_index_equal(result, expected, exact=True) def test_append_to_another(self): - # hits _concat_index_asobject + # hits Index._concat_same_dtype fst = Index(["a", "b"]) snd = CategoricalIndex(["d", "e"]) result = fst.append(snd)