From ccfe97baf9d0b3c3ff9b06206c74d00584cc5639 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 21 Feb 2020 16:25:19 -0800 Subject: [PATCH 1/2] REF: de-duplicate object-dtype handling --- pandas/core/indexes/base.py | 72 +++++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 31 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 14ee21ea5614c..63398138e3bd8 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -311,14 +311,7 @@ def __new__( # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 from pandas.core.indexes.interval import IntervalIndex - closed = kwargs.pop("closed", None) - if is_dtype_equal(_o_dtype, dtype): - return IntervalIndex( - data, name=name, copy=copy, closed=closed, **kwargs - ).astype(object) - return IntervalIndex( - data, dtype=dtype, name=name, copy=copy, closed=closed, **kwargs - ) + return _maybe_asobject(dtype, IntervalIndex, data, copy, name, **kwargs) elif ( is_datetime64_any_dtype(data) @@ -328,39 +321,19 @@ def __new__( # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 from pandas import DatetimeIndex - if is_dtype_equal(_o_dtype, dtype): - # GH#23524 passing `dtype=object` to DatetimeIndex is invalid, - # will raise in the where `data` is already tz-aware. So - # we leave it out of this step and cast to object-dtype after - # the DatetimeIndex construction. - # Note we can pass copy=False because the .astype below - # will always make a copy - return DatetimeIndex(data, copy=False, name=name, **kwargs).astype( - object - ) - else: - return DatetimeIndex(data, copy=copy, name=name, dtype=dtype, **kwargs) + return _maybe_asobject(dtype, DatetimeIndex, data, copy, name, **kwargs) elif is_timedelta64_dtype(data) or is_timedelta64_dtype(dtype): # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 from pandas import TimedeltaIndex - if is_dtype_equal(_o_dtype, dtype): - # Note we can pass copy=False because the .astype below - # will always make a copy - return TimedeltaIndex(data, copy=False, name=name, **kwargs).astype( - object - ) - else: - return TimedeltaIndex(data, copy=copy, name=name, dtype=dtype, **kwargs) + return _maybe_asobject(dtype, TimedeltaIndex, data, copy, name, **kwargs) elif is_period_dtype(data) or is_period_dtype(dtype): # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 from pandas import PeriodIndex - if is_dtype_equal(_o_dtype, dtype): - return PeriodIndex(data, copy=False, name=name, **kwargs).astype(object) - return PeriodIndex(data, dtype=dtype, copy=copy, name=name, **kwargs) + return _maybe_asobject(dtype, PeriodIndex, data, copy, name, **kwargs) # extension dtype elif is_extension_array_dtype(data) or is_extension_array_dtype(dtype): @@ -5765,3 +5738,40 @@ def _try_convert_to_int_array( pass raise ValueError + + +def _maybe_asobject(dtype, klass, data, copy: bool, name: Label, **kwargs): + """ + If and object dtype was specified, create the non-object Index + and then convert it to object. + + Parameters + ---------- + dtype : np.dtype, ExtensionDtype, str + klass : Index subclass + data : list-like + copy : bool + name : hashable + **kwargs + + Returns + ------- + Index + + Notes + ----- + We assume that calling .astype(object) on this klass will make a copy. + """ + + # GH#23524 passing `dtype=object` to DatetimeIndex is invalid, + # will raise in the where `data` is already tz-aware. So + # we leave it out of this step and cast to object-dtype after + # the DatetimeIndex construction. + + if is_dtype_equal(_o_dtype, dtype): + # Note we can pass copy=False because the .astype below + # will always make a copy + index = klass(data, copy=False, name=name, **kwargs) + return index.astype(object) + + return klass(data, dtype=dtype, copy=copy, name=name, **kwargs) From d066d64662241391d9b304811d28b39793d7c1b2 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 22 Feb 2020 07:27:19 -0800 Subject: [PATCH 2/2] Update pandas/core/indexes/base.py Co-Authored-By: gfyoung --- pandas/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 63398138e3bd8..e102e4ae14d86 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5742,7 +5742,7 @@ def _try_convert_to_int_array( def _maybe_asobject(dtype, klass, data, copy: bool, name: Label, **kwargs): """ - If and object dtype was specified, create the non-object Index + If an object dtype was specified, create the non-object Index and then convert it to object. Parameters