From c0c87aa6fcc656361733cd3f79ab5d527bdcef8f Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 1 Jan 2019 16:47:31 -0500 Subject: [PATCH 01/29] CLN: use idiomatic pandas_dtypes in pandas/dtypes/common.py --- asv_bench/benchmarks/dtypes.py | 44 ++++ asv_bench/benchmarks/pandas_vb_common.py | 10 + pandas/core/dtypes/common.py | 267 +++++++++-------------- pandas/core/indexes/numeric.py | 6 +- pandas/core/internals/construction.py | 9 +- pandas/tests/dtypes/test_common.py | 15 +- 6 files changed, 179 insertions(+), 172 deletions(-) create mode 100644 asv_bench/benchmarks/dtypes.py diff --git a/asv_bench/benchmarks/dtypes.py b/asv_bench/benchmarks/dtypes.py new file mode 100644 index 0000000000000..1032c267d323a --- /dev/null +++ b/asv_bench/benchmarks/dtypes.py @@ -0,0 +1,44 @@ +from pandas.api.types import pandas_dtype + +import numpy as np +from .pandas_vb_common import ( + numeric_dtypes, datetime_dtypes, string_dtypes, extension_dtypes) + + +_numpy_dtypes = list(map(np.dtype, (numeric_dtypes + + datetime_dtypes + + string_dtypes))) +_dtypes = _numpy_dtypes + extension_dtypes + + +class Dtypes(object): + params = (_dtypes + + list(map(lambda dt: dt.name, _dtypes))) + param_names = ['dtype'] + + def time_pandas_dtype(self, dtype): + pandas_dtype(dtype) + + +class DtypesInvalid(object): + params = ['foo', 1, ['foo'] * 1000, np.array(['foo'] * 1000)] + param_names = ['dtype'] + + def time_pandas_dtype_invalid(self, dtype): + try: + pandas_dtype(dtype) + except TypeError: + pass + + param_names = ['dtype'] + params = ['scalar-string', 'scalar-int', 'list-string', 'array-string'] + data_dict = {'scalar-string': 'foo', + 'scalar-int': 1, + 'list-string': ['foo'] * 1000, + 'array-string': np.array(['foo'] * 1000)} + + def setup(self, dtype): + self.data = self.data_dict[dtype] + + +from .pandas_vb_common import setup # noqa: F401 diff --git a/asv_bench/benchmarks/pandas_vb_common.py b/asv_bench/benchmarks/pandas_vb_common.py index e7b25d567e03b..ab5e5fd3bfe10 100644 --- a/asv_bench/benchmarks/pandas_vb_common.py +++ b/asv_bench/benchmarks/pandas_vb_common.py @@ -2,6 +2,7 @@ from importlib import import_module import numpy as np +import pandas as pd # Compatibility import for lib for imp in ['pandas._libs.lib', 'pandas.lib']: @@ -14,6 +15,15 @@ numeric_dtypes = [np.int64, np.int32, np.uint32, np.uint64, np.float32, np.float64, np.int16, np.int8, np.uint16, np.uint8] datetime_dtypes = [np.datetime64, np.timedelta64] +string_dtypes = [np.object] +extension_dtypes = [pd.Int8Dtype, pd.Int16Dtype, + pd.Int32Dtype, pd.Int64Dtype, + pd.UInt8Dtype, pd.UInt16Dtype, + pd.UInt32Dtype, pd.UInt64Dtype, + pd.CategoricalDtype, + pd.IntervalDtype, + pd.DatetimeTZDtype('ns', 'UTC'), + pd.PeriodDtype('D')] def setup(*args, **kwargs): diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index e1141c6b6b3a8..d889c2c8170ff 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -2,20 +2,17 @@ import warnings import numpy as np - from pandas._libs import algos, lib -from pandas._libs.interval import Interval -from pandas._libs.tslibs import Period, Timestamp, conversion +from pandas._libs.tslibs import conversion from pandas.compat import PY3, PY36, binary_type, string_types, text_type from pandas.core.dtypes.dtypes import ( - CategoricalDtype, CategoricalDtypeType, DatetimeTZDtype, ExtensionDtype, + CategoricalDtype, DatetimeTZDtype, ExtensionDtype, IntervalDtype, PandasExtensionDtype, PeriodDtype, _pandas_registry, registry) from pandas.core.dtypes.generic import ( - ABCCategorical, ABCCategoricalIndex, ABCDateOffset, ABCDatetimeIndex, - ABCIndexClass, ABCPeriodArray, ABCPeriodIndex, ABCSeries, ABCSparseArray, - ABCSparseSeries) + ABCCategorical, ABCDateOffset, ABCDatetimeIndex, + ABCIndexClass, ABCPeriodArray, ABCPeriodIndex, ABCSeries) from pandas.core.dtypes.inference import ( # noqa:F401 is_array_like, is_bool, is_complex, is_decimal, is_dict_like, is_file_like, is_float, is_hashable, is_integer, is_interval, is_iterator, is_list_like, @@ -143,11 +140,8 @@ def is_object_dtype(arr_or_dtype): >>> is_object_dtype([1, 2, 3]) False """ - - if arr_or_dtype is None: - return False - tipo = _get_dtype_type(arr_or_dtype) - return issubclass(tipo, np.object_) + return _is_dtype_type( + arr_or_dtype, lambda tipo: issubclass(tipo, np.object_)) def is_sparse(arr): @@ -421,13 +415,8 @@ def is_datetime64_dtype(arr_or_dtype): False """ - if arr_or_dtype is None: - return False - try: - tipo = _get_dtype_type(arr_or_dtype) - except (TypeError, UnicodeEncodeError): - return False - return issubclass(tipo, np.datetime64) + return _is_dtype_type( + arr_or_dtype, lambda tipo: issubclass(tipo, np.datetime64)) def is_datetime64tz_dtype(arr_or_dtype): @@ -496,13 +485,8 @@ def is_timedelta64_dtype(arr_or_dtype): False """ - if arr_or_dtype is None: - return False - try: - tipo = _get_dtype_type(arr_or_dtype) - except (TypeError, ValueError, SyntaxError): - return False - return issubclass(tipo, np.timedelta64) + return _is_dtype_type( + arr_or_dtype, lambda tipo: issubclass(tipo, np.timedelta64)) def is_period_dtype(arr_or_dtype): @@ -636,14 +620,9 @@ def is_string_dtype(arr_or_dtype): """ # TODO: gh-15585: consider making the checks stricter. - - if arr_or_dtype is None: - return False - try: - dtype = _get_dtype(arr_or_dtype) + def condition(dtype): return dtype.kind in ('O', 'S', 'U') and not is_period_dtype(dtype) - except TypeError: - return False + return _is_dtype(arr_or_dtype, condition) def is_period_arraylike(arr): @@ -865,10 +844,9 @@ def is_any_int_dtype(arr_or_dtype): False """ - if arr_or_dtype is None: - return False - tipo = _get_dtype_type(arr_or_dtype) - return issubclass(tipo, np.integer) + def condition(tipo): + return issubclass(tipo, (np.integer, np.timedelta64)) + return _is_dtype_type(arr_or_dtype, condition) def is_integer_dtype(arr_or_dtype): @@ -911,11 +889,10 @@ def is_integer_dtype(arr_or_dtype): False """ - if arr_or_dtype is None: - return False - tipo = _get_dtype_type(arr_or_dtype) - return (issubclass(tipo, np.integer) and - not issubclass(tipo, (np.datetime64, np.timedelta64))) + def condition(tipo): + return (issubclass(tipo, np.integer) and + not issubclass(tipo, (np.datetime64, np.timedelta64))) + return _is_dtype_type(arr_or_dtype, condition) def is_signed_integer_dtype(arr_or_dtype): @@ -960,11 +937,10 @@ def is_signed_integer_dtype(arr_or_dtype): False """ - if arr_or_dtype is None: - return False - tipo = _get_dtype_type(arr_or_dtype) - return (issubclass(tipo, np.signedinteger) and - not issubclass(tipo, (np.datetime64, np.timedelta64))) + def condition(tipo): + return (issubclass(tipo, np.signedinteger) and + not issubclass(tipo, (np.datetime64, np.timedelta64))) + return _is_dtype_type(arr_or_dtype, condition) def is_unsigned_integer_dtype(arr_or_dtype): @@ -1001,11 +977,10 @@ def is_unsigned_integer_dtype(arr_or_dtype): True """ - if arr_or_dtype is None: - return False - tipo = _get_dtype_type(arr_or_dtype) - return (issubclass(tipo, np.unsignedinteger) and - not issubclass(tipo, (np.datetime64, np.timedelta64))) + def condition(tipo): + return (issubclass(tipo, np.unsignedinteger) and + not issubclass(tipo, (np.datetime64, np.timedelta64))) + return _is_dtype_type(arr_or_dtype, condition) def is_int64_dtype(arr_or_dtype): @@ -1049,10 +1024,8 @@ def is_int64_dtype(arr_or_dtype): False """ - if arr_or_dtype is None: - return False - tipo = _get_dtype_type(arr_or_dtype) - return issubclass(tipo, np.int64) + return _is_dtype_type( + arr_or_dtype, lambda tipo: issubclass(tipo, np.int64)) def is_datetime64_any_dtype(arr_or_dtype): @@ -1172,14 +1145,7 @@ def is_timedelta64_ns_dtype(arr_or_dtype): >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype=np.timedelta64)) False """ - - if arr_or_dtype is None: - return False - try: - tipo = _get_dtype(arr_or_dtype) - return tipo == _TD_DTYPE - except TypeError: - return False + return _is_dtype(arr_or_dtype, lambda dtype: dtype == _TD_DTYPE) def is_datetime_or_timedelta_dtype(arr_or_dtype): @@ -1217,10 +1183,9 @@ def is_datetime_or_timedelta_dtype(arr_or_dtype): True """ - if arr_or_dtype is None: - return False - tipo = _get_dtype_type(arr_or_dtype) - return issubclass(tipo, (np.datetime64, np.timedelta64)) + return _is_dtype_type( + arr_or_dtype, + lambda tipo: issubclass(tipo, (np.datetime64, np.timedelta64))) def _is_unorderable_exception(e): @@ -1495,11 +1460,10 @@ def is_numeric_dtype(arr_or_dtype): False """ - if arr_or_dtype is None: - return False - tipo = _get_dtype_type(arr_or_dtype) - return (issubclass(tipo, (np.number, np.bool_)) and - not issubclass(tipo, (np.datetime64, np.timedelta64))) + def condition(tipo): + return (issubclass(tipo, (np.number, np.bool_)) and + not issubclass(tipo, (np.datetime64, np.timedelta64))) + return _is_dtype_type(arr_or_dtype, condition) def is_string_like_dtype(arr_or_dtype): @@ -1530,13 +1494,8 @@ def is_string_like_dtype(arr_or_dtype): False """ - if arr_or_dtype is None: - return False - try: - dtype = _get_dtype(arr_or_dtype) - return dtype.kind in ('S', 'U') - except TypeError: - return False + return _is_dtype( + arr_or_dtype, lambda dtype: dtype.kind in ('S', 'U')) def is_float_dtype(arr_or_dtype): @@ -1569,11 +1528,8 @@ def is_float_dtype(arr_or_dtype): >>> is_float_dtype(pd.Index([1, 2.])) True """ - - if arr_or_dtype is None: - return False - tipo = _get_dtype_type(arr_or_dtype) - return issubclass(tipo, np.floating) + return _is_dtype_type( + arr_or_dtype, lambda tipo: issubclass(tipo, np.floating)) def is_bool_dtype(arr_or_dtype): @@ -1618,14 +1574,10 @@ def is_bool_dtype(arr_or_dtype): if arr_or_dtype is None: return False try: - tipo = _get_dtype_type(arr_or_dtype) - except ValueError: - # this isn't even a dtype + dtype = _get_dtype(arr_or_dtype) + except TypeError: return False - if isinstance(arr_or_dtype, (ABCCategorical, ABCCategoricalIndex)): - arr_or_dtype = arr_or_dtype.dtype - if isinstance(arr_or_dtype, CategoricalDtype): arr_or_dtype = arr_or_dtype.categories # now we use the special definition for Index @@ -1642,7 +1594,7 @@ def is_bool_dtype(arr_or_dtype): dtype = getattr(arr_or_dtype, 'dtype', arr_or_dtype) return dtype._is_boolean - return issubclass(tipo, np.bool_) + return issubclass(dtype.type, np.bool_) def is_extension_type(arr): @@ -1761,10 +1713,33 @@ def is_complex_dtype(arr_or_dtype): True """ + return _is_dtype_type( + arr_or_dtype, lambda tipo: issubclass(tipo, np.complexfloating)) + + +def _is_dtype(arr_or_dtype, condition): + """ + Return a boolean if the the condition is satisfied for the arr_or_dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array-like or dtype object whose dtype we want to extract. + condition : callable[Union[np.dtype, ExtensionDtype]] + + Returns + ------- + bool + + """ + if arr_or_dtype is None: return False - tipo = _get_dtype_type(arr_or_dtype) - return issubclass(tipo, np.complexfloating) + try: + dtype = _get_dtype(arr_or_dtype) + except (TypeError, ValueError, SyntaxError, UnicodeEncodeError): + return False + return condition(dtype) def _get_dtype(arr_or_dtype): @@ -1787,92 +1762,66 @@ def _get_dtype(arr_or_dtype): TypeError : The passed in object is None. """ - # TODO(extension) - # replace with pandas_dtype - if arr_or_dtype is None: raise TypeError("Cannot deduce dtype from null object") - if isinstance(arr_or_dtype, np.dtype): + + # fastpath + elif isinstance(arr_or_dtype, np.dtype): return arr_or_dtype elif isinstance(arr_or_dtype, type): return np.dtype(arr_or_dtype) - elif isinstance(arr_or_dtype, ExtensionDtype): - return arr_or_dtype - elif isinstance(arr_or_dtype, DatetimeTZDtype): - return arr_or_dtype - elif isinstance(arr_or_dtype, PeriodDtype): - return arr_or_dtype - elif isinstance(arr_or_dtype, IntervalDtype): - return arr_or_dtype - elif isinstance(arr_or_dtype, string_types): - if is_categorical_dtype(arr_or_dtype): - return CategoricalDtype.construct_from_string(arr_or_dtype) - elif is_datetime64tz_dtype(arr_or_dtype): - return DatetimeTZDtype.construct_from_string(arr_or_dtype) - elif is_period_dtype(arr_or_dtype): - return PeriodDtype.construct_from_string(arr_or_dtype) - elif is_interval_dtype(arr_or_dtype): - return IntervalDtype.construct_from_string(arr_or_dtype) - elif isinstance(arr_or_dtype, (ABCCategorical, ABCCategoricalIndex, - ABCSparseArray, ABCSparseSeries)): - return arr_or_dtype.dtype - if hasattr(arr_or_dtype, 'dtype'): + # if we have an array-like + elif hasattr(arr_or_dtype, 'dtype'): arr_or_dtype = arr_or_dtype.dtype - return np.dtype(arr_or_dtype) + return pandas_dtype(arr_or_dtype) -def _get_dtype_type(arr_or_dtype): + +def _is_dtype_type(arr_or_dtype, condition): """ - Get the type (NOT dtype) instance associated with - an array or dtype object. + Return a boolean if the the condition is satisfied for the arr_or_dtype. Parameters ---------- arr_or_dtype : array-like - The array-like or dtype object whose type we want to extract. + The array-like or dtype object whose dtype we want to extract. + condition : callable[Union[type(np.dtype), ExtensionDtypeType]] Returns ------- - obj_type : The extract type instance from the - passed in array or dtype object. + bool + """ - # TODO(extension) - # replace with pandas_dtype + if arr_or_dtype is None: + return condition(type(None)) + + # fastpath if isinstance(arr_or_dtype, np.dtype): - return arr_or_dtype.type + return condition(arr_or_dtype.type) elif isinstance(arr_or_dtype, type): - return np.dtype(arr_or_dtype).type - elif isinstance(arr_or_dtype, CategoricalDtype): - return CategoricalDtypeType - elif isinstance(arr_or_dtype, DatetimeTZDtype): - return Timestamp - elif isinstance(arr_or_dtype, IntervalDtype): - return Interval - elif isinstance(arr_or_dtype, PeriodDtype): - return Period - elif isinstance(arr_or_dtype, string_types): - if is_categorical_dtype(arr_or_dtype): - return CategoricalDtypeType - elif is_datetime64tz_dtype(arr_or_dtype): - return Timestamp - elif is_period_dtype(arr_or_dtype): - return Period - elif is_interval_dtype(arr_or_dtype): - return Interval - return _get_dtype_type(np.dtype(arr_or_dtype)) - else: - from pandas.core.arrays.sparse import SparseDtype - if isinstance(arr_or_dtype, (ABCSparseSeries, - ABCSparseArray, - SparseDtype)): - dtype = getattr(arr_or_dtype, 'dtype', arr_or_dtype) - return dtype.type + return condition(np.dtype(arr_or_dtype).type) + elif arr_or_dtype is None: + return condition(type(None)) + + # if we have an array-like + if hasattr(arr_or_dtype, 'dtype'): + arr_or_dtype = arr_or_dtype.dtype + + # we are not possibly a dtype + elif is_list_like(arr_or_dtype): + return condition(type(None)) + try: - return arr_or_dtype.dtype.type - except AttributeError: - return type(None) + tipo = pandas_dtype(arr_or_dtype).type + except (TypeError, ValueError, SyntaxError, UnicodeEncodeError): + if is_scalar(arr_or_dtype): + return condition(type(None)) + + return False + + return condition(tipo) def _get_dtype_from_object(dtype): @@ -1980,7 +1929,7 @@ def pandas_dtype(dtype): # short-circuit if isinstance(dtype, np.ndarray): return dtype.dtype - elif isinstance(dtype, np.dtype): + elif isinstance(dtype, (np.dtype, PandasExtensionDtype, ExtensionDtype)): return dtype # registered extension types diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 9d6a56200df6e..d2974a516a584 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -8,7 +8,7 @@ from pandas.core.dtypes.common import ( is_bool, is_bool_dtype, is_dtype_equal, is_float, is_integer_dtype, - is_scalar, needs_i8_conversion, pandas_dtype) + is_scalar, needs_i8_conversion, is_extension_array_dtype, pandas_dtype) import pandas.core.dtypes.concat as _concat from pandas.core.dtypes.missing import isna @@ -328,7 +328,9 @@ def astype(self, dtype, copy=True): msg = ('Cannot convert Float64Index to dtype {dtype}; integer ' 'values are required for conversion').format(dtype=dtype) raise TypeError(msg) - elif is_integer_dtype(dtype) and self.hasnans: + elif (is_integer_dtype(dtype) and + not is_extension_array_dtype(dtype)) and self.hasnans: + # TODO(jreback); this can change once we have an EA Index type # GH 13149 raise ValueError('Cannot convert NA to integer') return super(Float64Index, self).astype(dtype, copy=copy) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index b18b966406bbb..7adbd8bec8523 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -559,11 +559,12 @@ def sanitize_array(data, index, dtype=None, copy=False, # possibility of nan -> garbage if is_float_dtype(data.dtype) and is_integer_dtype(dtype): - if not isna(data).any(): + try: subarr = _try_cast(data, True, dtype, copy, - raise_cast_failure) - elif copy: - subarr = data.copy() + True) + except ValueError: + if copy: + subarr = data.copy() else: subarr = _try_cast(data, True, dtype, copy, raise_cast_failure) elif isinstance(data, Index): diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 2d6d3101f7371..d3ba6e1d8505a 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -5,7 +5,8 @@ import pandas as pd from pandas.core.dtypes.dtypes import (DatetimeTZDtype, PeriodDtype, - CategoricalDtype, IntervalDtype) + CategoricalDtype, CategoricalDtypeType, + IntervalDtype) from pandas.core.sparse.api import SparseDtype import pandas.core.dtypes.common as com @@ -587,11 +588,11 @@ def test__get_dtype_fails(input_param): (pd.Series(['a', 'b']), np.object_), (pd.Index([1, 2], dtype='int64'), np.int64), (pd.Index(['a', 'b']), np.object_), - ('category', com.CategoricalDtypeType), - (pd.Categorical(['a', 'b']).dtype, com.CategoricalDtypeType), - (pd.Categorical(['a', 'b']), com.CategoricalDtypeType), - (pd.CategoricalIndex(['a', 'b']).dtype, com.CategoricalDtypeType), - (pd.CategoricalIndex(['a', 'b']), com.CategoricalDtypeType), + ('category', CategoricalDtypeType), + (pd.Categorical(['a', 'b']).dtype, CategoricalDtypeType), + (pd.Categorical(['a', 'b']), CategoricalDtypeType), + (pd.CategoricalIndex(['a', 'b']).dtype, CategoricalDtypeType), + (pd.CategoricalIndex(['a', 'b']), CategoricalDtypeType), (pd.DatetimeIndex([1, 2]), np.datetime64), (pd.DatetimeIndex([1, 2]).dtype, np.datetime64), (' Date: Tue, 1 Jan 2019 20:48:13 -0500 Subject: [PATCH 02/29] fix imports --- pandas/core/dtypes/common.py | 10 +++++----- pandas/core/indexes/numeric.py | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index d889c2c8170ff..428e59f7f43b3 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -2,17 +2,17 @@ import warnings import numpy as np + from pandas._libs import algos, lib from pandas._libs.tslibs import conversion from pandas.compat import PY3, PY36, binary_type, string_types, text_type from pandas.core.dtypes.dtypes import ( - CategoricalDtype, DatetimeTZDtype, ExtensionDtype, - IntervalDtype, PandasExtensionDtype, PeriodDtype, _pandas_registry, - registry) + CategoricalDtype, DatetimeTZDtype, ExtensionDtype, IntervalDtype, + PandasExtensionDtype, PeriodDtype, _pandas_registry, registry) from pandas.core.dtypes.generic import ( - ABCCategorical, ABCDateOffset, ABCDatetimeIndex, - ABCIndexClass, ABCPeriodArray, ABCPeriodIndex, ABCSeries) + ABCCategorical, ABCDateOffset, ABCDatetimeIndex, ABCIndexClass, + ABCPeriodArray, ABCPeriodIndex, ABCSeries) from pandas.core.dtypes.inference import ( # noqa:F401 is_array_like, is_bool, is_complex, is_decimal, is_dict_like, is_file_like, is_float, is_hashable, is_integer, is_interval, is_iterator, is_list_like, diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index d2974a516a584..379464f4fced6 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -7,8 +7,8 @@ from pandas.util._decorators import Appender, cache_readonly from pandas.core.dtypes.common import ( - is_bool, is_bool_dtype, is_dtype_equal, is_float, is_integer_dtype, - is_scalar, needs_i8_conversion, is_extension_array_dtype, pandas_dtype) + is_bool, is_bool_dtype, is_dtype_equal, is_extension_array_dtype, is_float, + is_integer_dtype, is_scalar, needs_i8_conversion, pandas_dtype) import pandas.core.dtypes.concat as _concat from pandas.core.dtypes.missing import isna From 3e063ba6770d2ddb009b5dd65feaf9909b8e8ee8 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 1 Jan 2019 22:03:04 -0500 Subject: [PATCH 03/29] clean asv --- asv_bench/benchmarks/dtypes.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/asv_bench/benchmarks/dtypes.py b/asv_bench/benchmarks/dtypes.py index 1032c267d323a..3af06b00a8388 100644 --- a/asv_bench/benchmarks/dtypes.py +++ b/asv_bench/benchmarks/dtypes.py @@ -21,15 +21,6 @@ def time_pandas_dtype(self, dtype): class DtypesInvalid(object): - params = ['foo', 1, ['foo'] * 1000, np.array(['foo'] * 1000)] - param_names = ['dtype'] - - def time_pandas_dtype_invalid(self, dtype): - try: - pandas_dtype(dtype) - except TypeError: - pass - param_names = ['dtype'] params = ['scalar-string', 'scalar-int', 'list-string', 'array-string'] data_dict = {'scalar-string': 'foo', @@ -37,8 +28,11 @@ def time_pandas_dtype_invalid(self, dtype): 'list-string': ['foo'] * 1000, 'array-string': np.array(['foo'] * 1000)} - def setup(self, dtype): - self.data = self.data_dict[dtype] + def time_pandas_dtype_invalid(self, dtype): + try: + pandas_dtype(self.data_dict[dtype]) + except TypeError: + pass from .pandas_vb_common import setup # noqa: F401 From 2e4518576510721d5c1a83d5965ca9740cff74be Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 2 Jan 2019 09:34:29 -0500 Subject: [PATCH 04/29] review comments --- asv_bench/benchmarks/dtypes.py | 7 ++++--- pandas/core/dtypes/common.py | 13 +++++++------ 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/asv_bench/benchmarks/dtypes.py b/asv_bench/benchmarks/dtypes.py index 3af06b00a8388..e59154cd99965 100644 --- a/asv_bench/benchmarks/dtypes.py +++ b/asv_bench/benchmarks/dtypes.py @@ -5,9 +5,10 @@ numeric_dtypes, datetime_dtypes, string_dtypes, extension_dtypes) -_numpy_dtypes = list(map(np.dtype, (numeric_dtypes + - datetime_dtypes + - string_dtypes))) +_numpy_dtypes = [np.dtype(dtype) + for dtype in (numeric_dtypes + + datetime_dtypes + + string_dtypes)] _dtypes = _numpy_dtypes + extension_dtypes diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 428e59f7f43b3..36e49868deffb 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1719,11 +1719,11 @@ def is_complex_dtype(arr_or_dtype): def _is_dtype(arr_or_dtype, condition): """ - Return a boolean if the the condition is satisfied for the arr_or_dtype. + Return a boolean if the condition is satisfied for the arr_or_dtype. Parameters ---------- - arr_or_dtype : array-like + arr_or_dtype : array-like, str, np.dtype, or ExtensionArrayType The array-like or dtype object whose dtype we want to extract. condition : callable[Union[np.dtype, ExtensionDtype]] @@ -1780,7 +1780,7 @@ def _get_dtype(arr_or_dtype): def _is_dtype_type(arr_or_dtype, condition): """ - Return a boolean if the the condition is satisfied for the arr_or_dtype. + Return a boolean if the condition is satisfied for the arr_or_dtype. Parameters ---------- @@ -1790,7 +1790,7 @@ def _is_dtype_type(arr_or_dtype, condition): Returns ------- - bool + bool : if the condition is satisifed for the arr_or_dtype """ @@ -1798,7 +1798,8 @@ def _is_dtype_type(arr_or_dtype, condition): return condition(type(None)) # fastpath - if isinstance(arr_or_dtype, np.dtype): + if isinstance(arr_or_dtype, ( + np.dtype, PandasExtensionDtype, ExtensionDtype)): return condition(arr_or_dtype.type) elif isinstance(arr_or_dtype, type): return condition(np.dtype(arr_or_dtype).type) @@ -1815,7 +1816,7 @@ def _is_dtype_type(arr_or_dtype, condition): try: tipo = pandas_dtype(arr_or_dtype).type - except (TypeError, ValueError, SyntaxError, UnicodeEncodeError): + except (TypeError, ValueError, UnicodeEncodeError): if is_scalar(arr_or_dtype): return condition(type(None)) From 929604209aef3ebe5203301fa514ba2c675460ed Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 2 Jan 2019 11:07:16 -0500 Subject: [PATCH 05/29] moar clean --- pandas/core/dtypes/cast.py | 4 ++-- pandas/core/dtypes/common.py | 12 ++++-------- pandas/core/frame.py | 4 ++-- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index eae9eb97f35fe..b050d18b4cdf4 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -9,7 +9,7 @@ from pandas.compat import PY3, string_types, text_type, to_str from .common import ( - _INT64_DTYPE, _NS_DTYPE, _POSSIBLY_CAST_DTYPES, _TD_DTYPE, _string_dtypes, + _INT64_DTYPE, _NS_DTYPE, _POSSIBLY_CAST_DTYPES, _TD_DTYPE, ensure_int8, ensure_int16, ensure_int32, ensure_int64, ensure_object, is_bool, is_bool_dtype, is_categorical_dtype, is_complex, is_complex_dtype, is_datetime64_dtype, is_datetime64_ns_dtype, is_datetime64tz_dtype, @@ -541,7 +541,7 @@ def invalidate_string_dtypes(dtype_set): """Change string like dtypes to object for ``DataFrame.select_dtypes()``. """ - non_string_dtypes = dtype_set - _string_dtypes + non_string_dtypes = dtype_set - {np.dtype('S').type, np.dtype(' Date: Wed, 2 Jan 2019 11:40:12 -0500 Subject: [PATCH 06/29] remove extraneous --- pandas/core/dtypes/common.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index da1106fd74ce9..0eac69eda73d6 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1934,10 +1934,6 @@ def pandas_dtype(dtype): if result is not None: return result - # un-registered extension types - elif isinstance(dtype, (PandasExtensionDtype, ExtensionDtype)): - return dtype - # try a numpy dtype # raise a consistent TypeError if failed try: From a4f4cdb8cbf78566a179f61b2958e39bdfbca345 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 2 Jan 2019 11:49:35 -0500 Subject: [PATCH 07/29] use pandas_dtype --- pandas/core/dtypes/common.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 0eac69eda73d6..a7763cc2ba256 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1848,18 +1848,26 @@ def infer_dtype_from_object(dtype): if isinstance(dtype, type) and issubclass(dtype, np.generic): # Type object from a dtype return dtype - elif is_categorical(dtype): - return CategoricalDtype().type - elif is_datetime64tz_dtype(dtype): - return DatetimeTZDtype(dtype).type - elif isinstance(dtype, np.dtype): # dtype object + elif isinstance(dtype, np.dtype): + # dtype object try: _validate_date_like_dtype(dtype) except TypeError: # Should still pass if we don't have a date-like pass return dtype.type + + try: + dtype = pandas_dtype(dtype) + except TypeError: + pass + + if is_extension_array_dtype(dtype): + return dtype.type elif isinstance(dtype, string_types): + + # TODO(jreback) + # should deprecate these if dtype in ['datetimetz', 'datetime64tz']: return DatetimeTZDtype.type elif dtype in ['period']: @@ -1867,7 +1875,6 @@ def infer_dtype_from_object(dtype): if dtype == 'datetime' or dtype == 'timedelta': dtype += '64' - try: return infer_dtype_from_object(getattr(np, dtype)) except (AttributeError, TypeError): From a21164a6bc51a25af793486da86fb4562e738c67 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 2 Jan 2019 12:57:24 -0500 Subject: [PATCH 08/29] isort --- pandas/core/dtypes/cast.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index b050d18b4cdf4..4c5e9b7868829 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -9,9 +9,9 @@ from pandas.compat import PY3, string_types, text_type, to_str from .common import ( - _INT64_DTYPE, _NS_DTYPE, _POSSIBLY_CAST_DTYPES, _TD_DTYPE, - ensure_int8, ensure_int16, ensure_int32, ensure_int64, ensure_object, - is_bool, is_bool_dtype, is_categorical_dtype, is_complex, is_complex_dtype, + _INT64_DTYPE, _NS_DTYPE, _POSSIBLY_CAST_DTYPES, _TD_DTYPE, ensure_int8, + ensure_int16, ensure_int32, ensure_int64, ensure_object, is_bool, + is_bool_dtype, is_categorical_dtype, is_complex, is_complex_dtype, is_datetime64_dtype, is_datetime64_ns_dtype, is_datetime64tz_dtype, is_datetime_or_timedelta_dtype, is_datetimelike, is_dtype_equal, is_extension_array_dtype, is_extension_type, is_float, is_float_dtype, From 8f0a4d3c496c8a29aa438bc51d3894ba46ef49c5 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 2 Jan 2019 13:22:48 -0500 Subject: [PATCH 09/29] fix --- pandas/core/dtypes/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index f123a954f9ef2..5d775593a9bad 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -9,7 +9,7 @@ from pandas.core.dtypes.dtypes import ( CategoricalDtype, DatetimeTZDtype, ExtensionDtype, IntervalDtype, - PandasExtensionDtype, PeriodDtype, _pandas_registry, registry) + PandasExtensionDtype, PeriodDtype, registry) from pandas.core.dtypes.generic import ( ABCCategorical, ABCDateOffset, ABCDatetimeIndex, ABCIndexClass, ABCPeriodArray, ABCPeriodIndex, ABCSeries) From abd1620eefec676dd40472eb47ce154ec649308c Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 2 Jan 2019 19:49:10 -0500 Subject: [PATCH 10/29] remove newline --- pandas/core/dtypes/common.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 5d775593a9bad..bfb6f19a6504b 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1791,7 +1791,6 @@ def _is_dtype_type(arr_or_dtype, condition): Returns ------- bool : if the condition is satisifed for the arr_or_dtype - """ if arr_or_dtype is None: From b58cf186f00afc845b8e6384fd20d9e2e6e264b2 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 2 Jan 2019 21:11:29 -0500 Subject: [PATCH 11/29] parametrize tests --- pandas/conftest.py | 5 ++ pandas/tests/dtypes/test_common.py | 91 +++++++++++++++++------------- 2 files changed, 57 insertions(+), 39 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index f383fb32810e7..30b24e00779a9 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -388,9 +388,14 @@ def tz_aware_fixture(request): return request.param +# ---------------------------------------------------------------- +# Dtypes UNSIGNED_INT_DTYPES = ["uint8", "uint16", "uint32", "uint64"] +UNSIGNED_EA_INT_DTYPES = ["UInt8", "UInt16", "UInt32", "UInt64"] SIGNED_INT_DTYPES = [int, "int8", "int16", "int32", "int64"] +SIGNED_EA_INT_DTYPES = ["Int8", "Int16", "Int32", "Int64"] ALL_INT_DTYPES = UNSIGNED_INT_DTYPES + SIGNED_INT_DTYPES +ALL_EA_INT_DTYPES = UNSIGNED_EA_INT_DTYPES + SIGNED_EA_INT_DTYPES FLOAT_DTYPES = [float, "float32", "float64"] COMPLEX_DTYPES = [complex, "complex64", "complex128"] diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index d3ba6e1d8505a..07f9b7349fa5c 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -9,6 +9,9 @@ IntervalDtype) from pandas.core.sparse.api import SparseDtype +from pandas.conftest import ( + ALL_INT_DTYPES, UNSIGNED_INT_DTYPES, SIGNED_INT_DTYPES, + ALL_EA_INT_DTYPES, UNSIGNED_EA_INT_DTYPES, SIGNED_EA_INT_DTYPES) import pandas.core.dtypes.common as com import pandas.util._test_decorators as td import pandas.util.testing as tm @@ -278,45 +281,55 @@ def test_is_datetimelike(): assert com.is_datetimelike(s) -def test_is_integer_dtype(): - assert not com.is_integer_dtype(str) - assert not com.is_integer_dtype(float) - assert not com.is_integer_dtype(np.datetime64) - assert not com.is_integer_dtype(np.timedelta64) - assert not com.is_integer_dtype(pd.Index([1, 2.])) - assert not com.is_integer_dtype(np.array(['a', 'b'])) - assert not com.is_integer_dtype(np.array([], dtype=np.timedelta64)) - - assert com.is_integer_dtype(int) - assert com.is_integer_dtype(np.uint64) - assert com.is_integer_dtype(pd.Series([1, 2])) - - -def test_is_signed_integer_dtype(): - assert not com.is_signed_integer_dtype(str) - assert not com.is_signed_integer_dtype(float) - assert not com.is_signed_integer_dtype(np.uint64) - assert not com.is_signed_integer_dtype(np.datetime64) - assert not com.is_signed_integer_dtype(np.timedelta64) - assert not com.is_signed_integer_dtype(pd.Index([1, 2.])) - assert not com.is_signed_integer_dtype(np.array(['a', 'b'])) - assert not com.is_signed_integer_dtype(np.array([1, 2], dtype=np.uint32)) - assert not com.is_signed_integer_dtype(np.array([], dtype=np.timedelta64)) - - assert com.is_signed_integer_dtype(int) - assert com.is_signed_integer_dtype(pd.Series([1, 2])) - - -def test_is_unsigned_integer_dtype(): - assert not com.is_unsigned_integer_dtype(str) - assert not com.is_unsigned_integer_dtype(int) - assert not com.is_unsigned_integer_dtype(float) - assert not com.is_unsigned_integer_dtype(pd.Series([1, 2])) - assert not com.is_unsigned_integer_dtype(pd.Index([1, 2.])) - assert not com.is_unsigned_integer_dtype(np.array(['a', 'b'])) - - assert com.is_unsigned_integer_dtype(np.uint64) - assert com.is_unsigned_integer_dtype(np.array([1, 2], dtype=np.uint32)) +@pytest.mark.parametrize( + 'dtype', ALL_INT_DTYPES + ALL_EA_INT_DTYPES + [pd.Series([1, 2])]) +def test_is_integer_dtype(dtype): + assert com.is_integer_dtype(dtype) + + +@pytest.mark.parametrize( + 'dtype', [str, float, np.datetime64, np.timedelta64, + pd.Index([1, 2.]), np.array(['a', 'b']), + np.array([], dtype=np.timedelta64)]) +def test_is_not_integer_dtype(dtype): + assert not com.is_integer_dtype(dtype) + + +@pytest.mark.parametrize( + 'dtype', SIGNED_INT_DTYPES + SIGNED_EA_INT_DTYPES + [pd.Series([1, 2])]) +def test_is_signed_integer_dtype(dtype): + assert com.is_integer_dtype(dtype) + + +@pytest.mark.parametrize( + 'dtype', + [ + str, float, np.datetime64, np.timedelta64, + pd.Index([1, 2.]), np.array(['a', 'b']), + np.array([], dtype=np.timedelta64)] + + UNSIGNED_INT_DTYPES + UNSIGNED_EA_INT_DTYPES) +def test_is_not_signed_integer_dtype(dtype): + assert not com.is_signed_integer_dtype(dtype) + + +@pytest.mark.parametrize( + 'dtype', + UNSIGNED_INT_DTYPES + + UNSIGNED_EA_INT_DTYPES + + [pd.Series([1, 2], dtype=np.uint32)]) +def test_is_unsigned_integer_dtype(dtype): + assert com.is_unsigned_integer_dtype(dtype) + + +@pytest.mark.parametrize( + 'dtype', + [ + str, float, np.datetime64, np.timedelta64, + pd.Index([1, 2.]), np.array(['a', 'b']), + np.array([], dtype=np.timedelta64)] + + SIGNED_INT_DTYPES + SIGNED_EA_INT_DTYPES) +def test_is_not_unsigned_integer_dtype(dtype): + assert not com.is_unsigned_integer_dtype(dtype) def test_is_int64_dtype(): From 7cb889a139d867685e94b95b92842aef1e2c2b74 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 2 Jan 2019 21:32:05 -0500 Subject: [PATCH 12/29] add additional types --- pandas/core/dtypes/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index d52fc1bda95ff..3919607f182b6 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1848,7 +1848,7 @@ def infer_dtype_from_object(dtype): if isinstance(dtype, type) and issubclass(dtype, np.generic): # Type object from a dtype return dtype - elif isinstance(dtype, np.dtype): + elif isinstance(dtype, (np.dtype, PandasExtensionDtype, ExtensionDtype)): # dtype object try: _validate_date_like_dtype(dtype) From 86a47a8bdc4249f56608523dbefeb87fcc129483 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 2 Jan 2019 21:41:33 -0500 Subject: [PATCH 13/29] simplify issubclass a bit --- pandas/core/dtypes/common.py | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 3919607f182b6..5c37f58647f5a 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -114,6 +114,11 @@ def ensure_int64_or_float64(arr, copy=False): return arr.astype('float64', copy=copy) +def _issubclass(*klasses): + """ evaluate if the tipo is a subclass of the klasses """ + return lambda tipo: issubclass(tipo, klasses) + + def is_object_dtype(arr_or_dtype): """ Check whether an array-like or dtype is of the object dtype. @@ -140,8 +145,7 @@ def is_object_dtype(arr_or_dtype): >>> is_object_dtype([1, 2, 3]) False """ - return _is_dtype_type( - arr_or_dtype, lambda tipo: issubclass(tipo, np.object_)) + return _is_dtype_type(arr_or_dtype, _issubclass(np.object_)) def is_sparse(arr): @@ -415,8 +419,7 @@ def is_datetime64_dtype(arr_or_dtype): False """ - return _is_dtype_type( - arr_or_dtype, lambda tipo: issubclass(tipo, np.datetime64)) + return _is_dtype_type(arr_or_dtype, _issubclass(np.datetime64)) def is_datetime64tz_dtype(arr_or_dtype): @@ -485,8 +488,7 @@ def is_timedelta64_dtype(arr_or_dtype): False """ - return _is_dtype_type( - arr_or_dtype, lambda tipo: issubclass(tipo, np.timedelta64)) + return _is_dtype_type(arr_or_dtype, _issubclass(np.timedelta64)) def is_period_dtype(arr_or_dtype): @@ -845,9 +847,8 @@ def is_any_int_dtype(arr_or_dtype): False """ - def condition(tipo): - return issubclass(tipo, (np.integer, np.timedelta64)) - return _is_dtype_type(arr_or_dtype, condition) + return _is_dtype_type( + arr_or_dtype, _issubclass(np.integer, np.timedelta64)) def is_integer_dtype(arr_or_dtype): @@ -1025,8 +1026,7 @@ def is_int64_dtype(arr_or_dtype): False """ - return _is_dtype_type( - arr_or_dtype, lambda tipo: issubclass(tipo, np.int64)) + return _is_dtype_type(arr_or_dtype, _issubclass(np.int64)) def is_datetime64_any_dtype(arr_or_dtype): @@ -1185,8 +1185,7 @@ def is_datetime_or_timedelta_dtype(arr_or_dtype): """ return _is_dtype_type( - arr_or_dtype, - lambda tipo: issubclass(tipo, (np.datetime64, np.timedelta64))) + arr_or_dtype, _issubclass(np.datetime64, np.timedelta64)) def _is_unorderable_exception(e): @@ -1529,8 +1528,7 @@ def is_float_dtype(arr_or_dtype): >>> is_float_dtype(pd.Index([1, 2.])) True """ - return _is_dtype_type( - arr_or_dtype, lambda tipo: issubclass(tipo, np.floating)) + return _is_dtype_type(arr_or_dtype, _issubclass(np.floating)) def is_bool_dtype(arr_or_dtype): @@ -1714,8 +1712,7 @@ def is_complex_dtype(arr_or_dtype): True """ - return _is_dtype_type( - arr_or_dtype, lambda tipo: issubclass(tipo, np.complexfloating)) + return _is_dtype_type(arr_or_dtype, _issubclass(np.complexfloating)) def _is_dtype(arr_or_dtype, condition): From 7c73269e430b6ed463f8e8fe067c52ba4c7031b0 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 2 Jan 2019 21:47:10 -0500 Subject: [PATCH 14/29] moar clean --- pandas/core/dtypes/common.py | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 5c37f58647f5a..c2353ed5396de 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -119,6 +119,15 @@ def _issubclass(*klasses): return lambda tipo: issubclass(tipo, klasses) +def _issubclass_and_not_datetimelike(*klasses): + """ + evaluate if the tipo is a subclass of the klasses + and not a datetimelike + """ + return lambda tipo: (issubclass(tipo, klasses) and + not issubclass(tipo, (np.datetime64, np.timedelta64))) + + def is_object_dtype(arr_or_dtype): """ Check whether an array-like or dtype is of the object dtype. @@ -891,10 +900,8 @@ def is_integer_dtype(arr_or_dtype): False """ - def condition(tipo): - return (issubclass(tipo, np.integer) and - not issubclass(tipo, (np.datetime64, np.timedelta64))) - return _is_dtype_type(arr_or_dtype, condition) + return _is_dtype_type( + arr_or_dtype, _issubclass_and_not_datetimelike(np.integer)) def is_signed_integer_dtype(arr_or_dtype): @@ -939,10 +946,8 @@ def is_signed_integer_dtype(arr_or_dtype): False """ - def condition(tipo): - return (issubclass(tipo, np.signedinteger) and - not issubclass(tipo, (np.datetime64, np.timedelta64))) - return _is_dtype_type(arr_or_dtype, condition) + return _is_dtype_type( + arr_or_dtype, _issubclass_and_not_datetimelike(np.signedinteger)) def is_unsigned_integer_dtype(arr_or_dtype): @@ -979,10 +984,8 @@ def is_unsigned_integer_dtype(arr_or_dtype): True """ - def condition(tipo): - return (issubclass(tipo, np.unsignedinteger) and - not issubclass(tipo, (np.datetime64, np.timedelta64))) - return _is_dtype_type(arr_or_dtype, condition) + return _is_dtype_type( + arr_or_dtype, _issubclass_and_not_datetimelike(np.unsignedinteger)) def is_int64_dtype(arr_or_dtype): @@ -1460,10 +1463,8 @@ def is_numeric_dtype(arr_or_dtype): False """ - def condition(tipo): - return (issubclass(tipo, (np.number, np.bool_)) and - not issubclass(tipo, (np.datetime64, np.timedelta64))) - return _is_dtype_type(arr_or_dtype, condition) + return _is_dtype_type( + arr_or_dtype, _issubclass_and_not_datetimelike(np.number, np.bool_)) def is_string_like_dtype(arr_or_dtype): From d44b778fba45bd79fd9fee7ca5f2557ebdda6c8a Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 2 Jan 2019 21:49:38 -0500 Subject: [PATCH 15/29] remove syntax error --- pandas/core/dtypes/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index c2353ed5396de..0ffafcc8e8cb7 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1736,7 +1736,7 @@ def _is_dtype(arr_or_dtype, condition): return False try: dtype = _get_dtype(arr_or_dtype) - except (TypeError, ValueError, SyntaxError, UnicodeEncodeError): + except (TypeError, ValueError, UnicodeEncodeError): return False return condition(dtype) From 4e9887eac6b36460702898072d1758a14743feab Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 2 Jan 2019 21:53:57 -0500 Subject: [PATCH 16/29] clean --- pandas/tests/dtypes/test_common.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 07f9b7349fa5c..be645e6c2cc62 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -282,7 +282,9 @@ def test_is_datetimelike(): @pytest.mark.parametrize( - 'dtype', ALL_INT_DTYPES + ALL_EA_INT_DTYPES + [pd.Series([1, 2])]) + 'dtype', [ + pd.Series([1, 2])] + + ALL_INT_DTYPES + ALL_EA_INT_DTYPES) def test_is_integer_dtype(dtype): assert com.is_integer_dtype(dtype) @@ -296,7 +298,9 @@ def test_is_not_integer_dtype(dtype): @pytest.mark.parametrize( - 'dtype', SIGNED_INT_DTYPES + SIGNED_EA_INT_DTYPES + [pd.Series([1, 2])]) + 'dtype', [ + pd.Series([1, 2])] + + SIGNED_INT_DTYPES + SIGNED_EA_INT_DTYPES) def test_is_signed_integer_dtype(dtype): assert com.is_integer_dtype(dtype) @@ -314,9 +318,8 @@ def test_is_not_signed_integer_dtype(dtype): @pytest.mark.parametrize( 'dtype', - UNSIGNED_INT_DTYPES + - UNSIGNED_EA_INT_DTYPES + - [pd.Series([1, 2], dtype=np.uint32)]) + [pd.Series([1, 2], dtype=np.uint32)] + + UNSIGNED_INT_DTYPES + UNSIGNED_EA_INT_DTYPES) def test_is_unsigned_integer_dtype(dtype): assert com.is_unsigned_integer_dtype(dtype) From d5cd4d67028503834bc0c14af1db526ca5cfcc8e Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 2 Jan 2019 22:04:45 -0500 Subject: [PATCH 17/29] merge --- pandas/tests/dtypes/test_common.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 2bb4214a8b169..f8af7497db49a 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -3,20 +3,18 @@ import numpy as np import pytest -from pandas.core.dtypes.dtypes import (DatetimeTZDtype, PeriodDtype, - CategoricalDtype, CategoricalDtypeType, - IntervalDtype) -from pandas.core.sparse.api import SparseDtype import pandas.util._test_decorators as td -from pandas.conftest import ( - ALL_INT_DTYPES, UNSIGNED_INT_DTYPES, SIGNED_INT_DTYPES, - ALL_EA_INT_DTYPES, UNSIGNED_EA_INT_DTYPES, SIGNED_EA_INT_DTYPES) import pandas.core.dtypes.common as com from pandas.core.dtypes.dtypes import ( - CategoricalDtype, DatetimeTZDtype, IntervalDtype, PeriodDtype) + CategoricalDtype, CategoricalDtypeType, DatetimeTZDtype, IntervalDtype, + PeriodDtype) import pandas as pd +from pandas.conftest import ( + ALL_EA_INT_DTYPES, ALL_INT_DTYPES, SIGNED_EA_INT_DTYPES, SIGNED_INT_DTYPES, + UNSIGNED_EA_INT_DTYPES, UNSIGNED_INT_DTYPES) +from pandas.core.sparse.api import SparseDtype import pandas.util.testing as tm From fe26970e58c8191ee9fb4f40671802207f437859 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 3 Jan 2019 08:14:25 -0500 Subject: [PATCH 18/29] moar --- pandas/core/arrays/integer.py | 5 ++++- pandas/core/dtypes/common.py | 31 +++++++++++++++++++++++++++--- pandas/tests/dtypes/test_common.py | 28 +++++++++++++++++++++------ 3 files changed, 54 insertions(+), 10 deletions(-) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index af2c05bbee7c2..2564b5f990268 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -32,6 +32,7 @@ class _IntegerDtype(ExtensionDtype): The attributes name & type are set when these subclasses are created. """ name = None + base = None type = None na_value = np.nan @@ -655,7 +656,9 @@ def integer_arithmetic_method(self, other): else: name = dtype.capitalize() classname = "{}Dtype".format(name) - attributes_dict = {'type': getattr(np, dtype), + numpy_dtype = getattr(np, dtype) + attributes_dict = {'type': numpy_dtype, + 'base': np.dtype(numpy_dtype), 'name': name} dtype_type = register_extension_dtype( type(classname, (_IntegerDtype, ), attributes_dict) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 0ffafcc8e8cb7..0d6969805af4d 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -823,6 +823,10 @@ def is_any_int_dtype(arr_or_dtype): This function is internal and should not be exposed in the public API. + null-able Integer support + + .. versionadded:: 0.24.0 + Parameters ---------- arr_or_dtype : array-like @@ -866,6 +870,10 @@ def is_integer_dtype(arr_or_dtype): Unlike in `in_any_int_dtype`, timedelta64 instances will return False. + null-able Integer support + + .. versionadded:: 0.24.0 + Parameters ---------- arr_or_dtype : array-like @@ -910,6 +918,10 @@ def is_signed_integer_dtype(arr_or_dtype): Unlike in `in_any_int_dtype`, timedelta64 instances will return False. + null-able Integer support + + .. versionadded:: 0.24.0 + Parameters ---------- arr_or_dtype : array-like @@ -954,6 +966,10 @@ def is_unsigned_integer_dtype(arr_or_dtype): """ Check whether the provided array or dtype is of an unsigned integer dtype. + null-able Integer support + + .. versionadded:: 0.24.0 + Parameters ---------- arr_or_dtype : array-like @@ -983,7 +999,6 @@ def is_unsigned_integer_dtype(arr_or_dtype): >>> is_unsigned_integer_dtype(np.array([1, 2], dtype=np.uint32)) True """ - return _is_dtype_type( arr_or_dtype, _issubclass_and_not_datetimelike(np.unsignedinteger)) @@ -1796,10 +1811,20 @@ def _is_dtype_type(arr_or_dtype, condition): return condition(type(None)) # fastpath - if isinstance(arr_or_dtype, ( - np.dtype, PandasExtensionDtype, ExtensionDtype)): + if isinstance(arr_or_dtype, np.dtype): return condition(arr_or_dtype.type) + elif isinstance(arr_or_dtype, (PandasExtensionDtype, ExtensionDtype)): + + # introspect the underlying type + if hasattr(arr_or_dtype, 'subtype'): + arr_or_dtype = arr_or_dtype.subtype.type + elif hasattr(arr_or_dtype, 'base'): + arr_or_dtype = arr_or_dtype.base.type + + return condition(np.dtype(arr_or_dtype).type) elif isinstance(arr_or_dtype, type): + if issubclass(arr_or_dtype, (PandasExtensionDtype, ExtensionDtype)): + arr_or_dtype = arr_or_dtype.type return condition(np.dtype(arr_or_dtype).type) elif arr_or_dtype is None: return condition(type(None)) diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index f8af7497db49a..2427e736c2abd 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -18,6 +18,17 @@ import pandas.util.testing as tm +# EA & Actual Dtypes +def to_ea_dtypes(dtypes): + """ convert list of string dtypes to EA dtype """ + return [getattr(pd, dt + 'Dtype') for dt in dtypes] + + +def to_numpy_dtypes(dtypes): + """ convert list of string dtypes to numpy dtype """ + return [getattr(np, dt) for dt in dtypes if isinstance(dt, str)] + + class TestPandasDtype(object): # Passing invalid dtype, both as a string or object, must raise TypeError @@ -285,7 +296,8 @@ def test_is_datetimelike(): @pytest.mark.parametrize( 'dtype', [ pd.Series([1, 2])] + - ALL_INT_DTYPES + ALL_EA_INT_DTYPES) + ALL_INT_DTYPES + to_numpy_dtypes(ALL_INT_DTYPES) + + ALL_EA_INT_DTYPES + to_ea_dtypes(ALL_EA_INT_DTYPES)) def test_is_integer_dtype(dtype): assert com.is_integer_dtype(dtype) @@ -301,7 +313,8 @@ def test_is_not_integer_dtype(dtype): @pytest.mark.parametrize( 'dtype', [ pd.Series([1, 2])] + - SIGNED_INT_DTYPES + SIGNED_EA_INT_DTYPES) + SIGNED_INT_DTYPES + to_numpy_dtypes(SIGNED_INT_DTYPES) + + SIGNED_EA_INT_DTYPES + to_ea_dtypes(SIGNED_EA_INT_DTYPES)) def test_is_signed_integer_dtype(dtype): assert com.is_integer_dtype(dtype) @@ -312,7 +325,8 @@ def test_is_signed_integer_dtype(dtype): str, float, np.datetime64, np.timedelta64, pd.Index([1, 2.]), np.array(['a', 'b']), np.array([], dtype=np.timedelta64)] + - UNSIGNED_INT_DTYPES + UNSIGNED_EA_INT_DTYPES) + UNSIGNED_INT_DTYPES + to_numpy_dtypes(UNSIGNED_INT_DTYPES) + + UNSIGNED_EA_INT_DTYPES + to_ea_dtypes(UNSIGNED_EA_INT_DTYPES)) def test_is_not_signed_integer_dtype(dtype): assert not com.is_signed_integer_dtype(dtype) @@ -320,7 +334,8 @@ def test_is_not_signed_integer_dtype(dtype): @pytest.mark.parametrize( 'dtype', [pd.Series([1, 2], dtype=np.uint32)] + - UNSIGNED_INT_DTYPES + UNSIGNED_EA_INT_DTYPES) + UNSIGNED_INT_DTYPES + to_numpy_dtypes(UNSIGNED_INT_DTYPES) + + UNSIGNED_EA_INT_DTYPES + to_ea_dtypes(UNSIGNED_EA_INT_DTYPES)) def test_is_unsigned_integer_dtype(dtype): assert com.is_unsigned_integer_dtype(dtype) @@ -331,7 +346,8 @@ def test_is_unsigned_integer_dtype(dtype): str, float, np.datetime64, np.timedelta64, pd.Index([1, 2.]), np.array(['a', 'b']), np.array([], dtype=np.timedelta64)] + - SIGNED_INT_DTYPES + SIGNED_EA_INT_DTYPES) + SIGNED_INT_DTYPES + to_numpy_dtypes(SIGNED_INT_DTYPES) + + SIGNED_EA_INT_DTYPES + to_ea_dtypes(SIGNED_EA_INT_DTYPES)) def test_is_not_unsigned_integer_dtype(dtype): assert not com.is_unsigned_integer_dtype(dtype) @@ -627,5 +643,5 @@ def test__get_dtype_fails(input_param): (1.2, type(None)), (pd.DataFrame([1, 2]), type(None)), # composite dtype ]) -def test__get_dtype_type(input_param, result): +def test__is_dtype_type(input_param, result): assert com._is_dtype_type(input_param, lambda tipo: tipo == result) From badb3bc204b769ef721a83b8e64e4f6f44d57014 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 3 Jan 2019 08:49:37 -0500 Subject: [PATCH 19/29] introspect more types --- pandas/core/dtypes/common.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 0d6969805af4d..1ed6f4052a6fb 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1817,11 +1817,12 @@ def _is_dtype_type(arr_or_dtype, condition): # introspect the underlying type if hasattr(arr_or_dtype, 'subtype'): - arr_or_dtype = arr_or_dtype.subtype.type - elif hasattr(arr_or_dtype, 'base'): - arr_or_dtype = arr_or_dtype.base.type + arr_or_dtype = arr_or_dtype.subtype - return condition(np.dtype(arr_or_dtype).type) + if hasattr(arr_or_dtype, 'base'): + arr_or_dtype = arr_or_dtype.base + + return condition(np.dtype(arr_or_dtype.type).type) elif isinstance(arr_or_dtype, type): if issubclass(arr_or_dtype, (PandasExtensionDtype, ExtensionDtype)): arr_or_dtype = arr_or_dtype.type From dd518fbbc88e10e241015151ca70467e1167f02c Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 3 Jan 2019 12:34:01 -0500 Subject: [PATCH 20/29] fix --- pandas/core/dtypes/common.py | 10 ---------- pandas/tests/dtypes/test_common.py | 2 ++ 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 1ed6f4052a6fb..d9bfb88f36971 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1813,16 +1813,6 @@ def _is_dtype_type(arr_or_dtype, condition): # fastpath if isinstance(arr_or_dtype, np.dtype): return condition(arr_or_dtype.type) - elif isinstance(arr_or_dtype, (PandasExtensionDtype, ExtensionDtype)): - - # introspect the underlying type - if hasattr(arr_or_dtype, 'subtype'): - arr_or_dtype = arr_or_dtype.subtype - - if hasattr(arr_or_dtype, 'base'): - arr_or_dtype = arr_or_dtype.base - - return condition(np.dtype(arr_or_dtype.type).type) elif isinstance(arr_or_dtype, type): if issubclass(arr_or_dtype, (PandasExtensionDtype, ExtensionDtype)): arr_or_dtype = arr_or_dtype.type diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 2427e736c2abd..bbdae162343d0 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -408,6 +408,8 @@ def test_is_datetime_or_timedelta_dtype(): assert not com.is_datetime_or_timedelta_dtype(str) assert not com.is_datetime_or_timedelta_dtype(pd.Series([1, 2])) assert not com.is_datetime_or_timedelta_dtype(np.array(['a', 'b'])) + + # TODO(jreback), this is sligthly suspect assert not com.is_datetime_or_timedelta_dtype( DatetimeTZDtype("ns", "US/Eastern")) From d50fb71e690ab2179d1d44e3ce8a59afb8d496d2 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 3 Jan 2019 12:43:27 -0500 Subject: [PATCH 21/29] finalize --- pandas/core/dtypes/common.py | 24 ++++++++++++++++++++++++ pandas/tests/dtypes/test_common.py | 26 +++++++++++++++----------- 2 files changed, 39 insertions(+), 11 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index d9bfb88f36971..71c4981a60063 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -894,6 +894,12 @@ def is_integer_dtype(arr_or_dtype): False >>> is_integer_dtype(np.uint64) True + >>> is_integer_dtype('int8') + True + >>> is_integer_dtype('Int8') + True + >>> is_integer_dtype(pd.Int8Dtype) + True >>> is_integer_dtype(np.datetime64) False >>> is_integer_dtype(np.timedelta64) @@ -942,6 +948,12 @@ def is_signed_integer_dtype(arr_or_dtype): False >>> is_signed_integer_dtype(np.uint64) # unsigned False + >>> is_signed_integer_dtype('int8') + True + >>> is_signed_integer_dtype('Int8') + True + >>> is_signed_dtype(pd.Int8Dtype) + True >>> is_signed_integer_dtype(np.datetime64) False >>> is_signed_integer_dtype(np.timedelta64) @@ -990,6 +1002,12 @@ def is_unsigned_integer_dtype(arr_or_dtype): False >>> is_unsigned_integer_dtype(np.uint64) True + >>> is_unsigned_integer_dtype('uint8') + True + >>> is_unsigned_integer_dtype('UInt8') + True + >>> is_unsigned_integer_dtype(pd.UInt8Dtype) + True >>> is_unsigned_integer_dtype(np.array(['a', 'b'])) False >>> is_unsigned_integer_dtype(pd.Series([1, 2])) # signed @@ -1030,6 +1048,12 @@ def is_int64_dtype(arr_or_dtype): False >>> is_int64_dtype(np.int64) True + >>> is_int64_dtype('int8') + False + >>> is_int64_dtype('Int8') + False + >>> is_int64_dtype(pd.Int64Dtype) + True >>> is_int64_dtype(float) False >>> is_int64_dtype(np.uint64) # unsigned diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index bbdae162343d0..f0f77b4977610 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -352,17 +352,21 @@ def test_is_not_unsigned_integer_dtype(dtype): assert not com.is_unsigned_integer_dtype(dtype) -def test_is_int64_dtype(): - assert not com.is_int64_dtype(str) - assert not com.is_int64_dtype(float) - assert not com.is_int64_dtype(np.int32) - assert not com.is_int64_dtype(np.uint64) - assert not com.is_int64_dtype(pd.Index([1, 2.])) - assert not com.is_int64_dtype(np.array(['a', 'b'])) - assert not com.is_int64_dtype(np.array([1, 2], dtype=np.uint32)) - - assert com.is_int64_dtype(np.int64) - assert com.is_int64_dtype(np.array([1, 2], dtype=np.int64)) +@pytest.mark.parametrize( + 'dtype', + [np.int64, np.array([1, 2], dtype=np.int64), 'Int64', pd.Int64Dtype]) +def test_is_int64_dtype(dtype): + assert com.is_int64_dtype(dtype) + + +@pytest.mark.parametrize( + 'dtype', + [ + str, float, np.int32, np.uint64, pd.Index([1, 2.]), + np.array(['a', 'b']), np.array([1, 2], dtype=np.uint32), + 'int8', 'Int8', pd.Int8Dtype]) +def test_is_not_int64_dtype(dtype): + assert not com.is_int64_dtype(dtype) def test_is_datetime64_any_dtype(): From 0c895f5f4a77c469700928271510670e314e3922 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 3 Jan 2019 12:58:33 -0500 Subject: [PATCH 22/29] remove base --- pandas/core/arrays/integer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 2564b5f990268..ca43f3ef2e45b 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -658,7 +658,6 @@ def integer_arithmetic_method(self, other): classname = "{}Dtype".format(name) numpy_dtype = getattr(np, dtype) attributes_dict = {'type': numpy_dtype, - 'base': np.dtype(numpy_dtype), 'name': name} dtype_type = register_extension_dtype( type(classname, (_IntegerDtype, ), attributes_dict) From f326e37b13ed0ad722a04d48dd7a70e8ae1b1aca Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 3 Jan 2019 17:15:31 -0500 Subject: [PATCH 23/29] review --- pandas/core/dtypes/common.py | 46 ++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 71c4981a60063..159325e1eca5c 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -114,12 +114,12 @@ def ensure_int64_or_float64(arr, copy=False): return arr.astype('float64', copy=copy) -def _issubclass(*klasses): +def classes(*klasses): """ evaluate if the tipo is a subclass of the klasses """ return lambda tipo: issubclass(tipo, klasses) -def _issubclass_and_not_datetimelike(*klasses): +def classes_and_not_datetimelike(*klasses): """ evaluate if the tipo is a subclass of the klasses and not a datetimelike @@ -154,7 +154,7 @@ def is_object_dtype(arr_or_dtype): >>> is_object_dtype([1, 2, 3]) False """ - return _is_dtype_type(arr_or_dtype, _issubclass(np.object_)) + return _is_dtype_type(arr_or_dtype, classes(np.object_)) def is_sparse(arr): @@ -428,7 +428,7 @@ def is_datetime64_dtype(arr_or_dtype): False """ - return _is_dtype_type(arr_or_dtype, _issubclass(np.datetime64)) + return _is_dtype_type(arr_or_dtype, classes(np.datetime64)) def is_datetime64tz_dtype(arr_or_dtype): @@ -497,7 +497,7 @@ def is_timedelta64_dtype(arr_or_dtype): False """ - return _is_dtype_type(arr_or_dtype, _issubclass(np.timedelta64)) + return _is_dtype_type(arr_or_dtype, classes(np.timedelta64)) def is_period_dtype(arr_or_dtype): @@ -823,9 +823,9 @@ def is_any_int_dtype(arr_or_dtype): This function is internal and should not be exposed in the public API. - null-able Integer support + .. versionchanged:: 0.24.0 - .. versionadded:: 0.24.0 + Nullable Integer support Parameters ---------- @@ -861,7 +861,7 @@ def is_any_int_dtype(arr_or_dtype): """ return _is_dtype_type( - arr_or_dtype, _issubclass(np.integer, np.timedelta64)) + arr_or_dtype, classes(np.integer, np.timedelta64)) def is_integer_dtype(arr_or_dtype): @@ -870,9 +870,9 @@ def is_integer_dtype(arr_or_dtype): Unlike in `in_any_int_dtype`, timedelta64 instances will return False. - null-able Integer support + .. versionchanged:: 0.24.0 - .. versionadded:: 0.24.0 + Nullable Integer support Parameters ---------- @@ -915,7 +915,7 @@ def is_integer_dtype(arr_or_dtype): """ return _is_dtype_type( - arr_or_dtype, _issubclass_and_not_datetimelike(np.integer)) + arr_or_dtype, classes_and_not_datetimelike(np.integer)) def is_signed_integer_dtype(arr_or_dtype): @@ -924,9 +924,9 @@ def is_signed_integer_dtype(arr_or_dtype): Unlike in `in_any_int_dtype`, timedelta64 instances will return False. - null-able Integer support + .. versionchanged:: 0.24.0 - .. versionadded:: 0.24.0 + Nullable Integer support Parameters ---------- @@ -971,16 +971,16 @@ def is_signed_integer_dtype(arr_or_dtype): """ return _is_dtype_type( - arr_or_dtype, _issubclass_and_not_datetimelike(np.signedinteger)) + arr_or_dtype, classes_and_not_datetimelike(np.signedinteger)) def is_unsigned_integer_dtype(arr_or_dtype): """ Check whether the provided array or dtype is of an unsigned integer dtype. - null-able Integer support + .. versionchanged:: 0.24.0 - .. versionadded:: 0.24.0 + Nullable Integer support Parameters ---------- @@ -1018,7 +1018,7 @@ def is_unsigned_integer_dtype(arr_or_dtype): True """ return _is_dtype_type( - arr_or_dtype, _issubclass_and_not_datetimelike(np.unsignedinteger)) + arr_or_dtype, classes_and_not_datetimelike(np.unsignedinteger)) def is_int64_dtype(arr_or_dtype): @@ -1068,7 +1068,7 @@ def is_int64_dtype(arr_or_dtype): False """ - return _is_dtype_type(arr_or_dtype, _issubclass(np.int64)) + return _is_dtype_type(arr_or_dtype, classes(np.int64)) def is_datetime64_any_dtype(arr_or_dtype): @@ -1227,7 +1227,7 @@ def is_datetime_or_timedelta_dtype(arr_or_dtype): """ return _is_dtype_type( - arr_or_dtype, _issubclass(np.datetime64, np.timedelta64)) + arr_or_dtype, classes(np.datetime64, np.timedelta64)) def _is_unorderable_exception(e): @@ -1503,7 +1503,7 @@ def is_numeric_dtype(arr_or_dtype): """ return _is_dtype_type( - arr_or_dtype, _issubclass_and_not_datetimelike(np.number, np.bool_)) + arr_or_dtype, classes_and_not_datetimelike(np.number, np.bool_)) def is_string_like_dtype(arr_or_dtype): @@ -1568,7 +1568,7 @@ def is_float_dtype(arr_or_dtype): >>> is_float_dtype(pd.Index([1, 2.])) True """ - return _is_dtype_type(arr_or_dtype, _issubclass(np.floating)) + return _is_dtype_type(arr_or_dtype, classes(np.floating)) def is_bool_dtype(arr_or_dtype): @@ -1752,7 +1752,7 @@ def is_complex_dtype(arr_or_dtype): True """ - return _is_dtype_type(arr_or_dtype, _issubclass(np.complexfloating)) + return _is_dtype_type(arr_or_dtype, classes(np.complexfloating)) def _is_dtype(arr_or_dtype, condition): @@ -1824,7 +1824,7 @@ def _is_dtype_type(arr_or_dtype, condition): ---------- arr_or_dtype : array-like The array-like or dtype object whose dtype we want to extract. - condition : callable[Union[type(np.dtype), ExtensionDtypeType]] + condition : callable[Union[np.dtype, ExtensionDtypeType]] Returns ------- From 1f1d96b08c8475f8d14ccdf4891cd773af2db6f2 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 3 Jan 2019 19:28:57 -0500 Subject: [PATCH 24/29] doc-string --- pandas/core/dtypes/common.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 159325e1eca5c..507dacb5322a6 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -825,7 +825,8 @@ def is_any_int_dtype(arr_or_dtype): .. versionchanged:: 0.24.0 - Nullable Integer support + The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered + as integer by this function. Parameters ---------- @@ -872,7 +873,8 @@ def is_integer_dtype(arr_or_dtype): .. versionchanged:: 0.24.0 - Nullable Integer support + The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered + as integer by this function. Parameters ---------- @@ -926,7 +928,8 @@ def is_signed_integer_dtype(arr_or_dtype): .. versionchanged:: 0.24.0 - Nullable Integer support + The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered + as integer by this function. Parameters ---------- @@ -980,7 +983,8 @@ def is_unsigned_integer_dtype(arr_or_dtype): .. versionchanged:: 0.24.0 - Nullable Integer support + The nullable Integer dtypes (e.g. pandas.UInt64Dtype) are also + considered as integer by this function. Parameters ---------- From 95ef3ce573672a3c268e5f07715edbe6348fc49c Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 3 Jan 2019 19:44:40 -0500 Subject: [PATCH 25/29] update whatsnew --- doc/source/whatsnew/v0.24.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 7897c28a619b0..4bb06e921e54c 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -432,7 +432,7 @@ Backwards incompatible API changes - The column order of the resultant :class:`DataFrame` from :meth:`MultiIndex.to_frame` is now guaranteed to match the :attr:`MultiIndex.names` order. (:issue:`22420`) - Incorrectly passing a :class:`DatetimeIndex` to :meth:`MultiIndex.from_tuples`, rather than a sequence of tuples, now raises a ``TypeError`` rather than a ``ValueError`` (:issue:`24024`) - :func:`pd.offsets.generate_range` argument ``time_rule`` has been removed; use ``offset`` instead (:issue:`24157`) -- In 0.23.x, pandas would raise a ``ValueError`` on a merge of a numeric column (e.g. ``int`` dtyped column) and an ``object`` dtyped column (:issue:`9780`). We have re-enabled the ability to merge ``object`` and other dtypes (:issue:`21681`) +- In 0.23.x, pandas would raise a ``ValueError`` on a merge of a numeric column (e.g. ``int`` dtyped column) and an ``object`` dtyped column, though only if it is a numeric column (:issue:`9780`). We have re-enabled the ability to merge ``object`` and other dtypes; pandas will sill raise on a merge between a numeric and an ``object`` dtyped coumn that is composed only of strings (:issue:`21681`) Percentage change on groupby ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ From b9f4004886991153ea39049fc65a27eb6ed061c3 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 3 Jan 2019 19:47:58 -0500 Subject: [PATCH 26/29] deprecation about MutableMapping --- pandas/compat/__init__.py | 2 +- pandas/compat/chainmap_impl.py | 9 +-------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index f9c659106a516..8c2b7f5de8e7e 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -139,7 +139,7 @@ def lfilter(*args, **kwargs): Hashable = collections.abc.Hashable Iterable = collections.abc.Iterable Mapping = collections.abc.Mapping - MutableMapping = collections.abc.MutableMapping + MutableMapping = collections.MutableMapping Sequence = collections.abc.Sequence Sized = collections.abc.Sized Set = collections.abc.Set diff --git a/pandas/compat/chainmap_impl.py b/pandas/compat/chainmap_impl.py index 3ea5414cc41eb..c4aa8c8d6ab30 100644 --- a/pandas/compat/chainmap_impl.py +++ b/pandas/compat/chainmap_impl.py @@ -1,11 +1,4 @@ -import sys - -PY3 = sys.version_info[0] >= 3 - -if PY3: - from collections.abc import MutableMapping -else: - from collections import MutableMapping +from collections import MutableMapping try: from thread import get_ident From 76d1d86c07b3ee6a710c3fb468c2db3b536c14e1 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 3 Jan 2019 21:13:10 -0500 Subject: [PATCH 27/29] fix warnings --- pandas/core/arrays/integer.py | 1 + pandas/core/dtypes/concat.py | 8 ++------ pandas/core/internals/concat.py | 6 ++++-- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index ca43f3ef2e45b..f8f87ff1c96f1 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -154,6 +154,7 @@ def coerce_to_array(values, dtype, mask=None, copy=False): # Avoid DeprecationWarning from NumPy about np.dtype("Int64") # https://github.com/numpy/numpy/pull/7476 dtype = dtype.lower() + if not issubclass(type(dtype), _IntegerDtype): try: dtype = _dtypes[str(np.dtype(dtype))] diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index e6967ed2a4d3d..aada777decaa7 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -9,8 +9,7 @@ from pandas.core.dtypes.common import ( _NS_DTYPE, _TD_DTYPE, is_bool_dtype, is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype, is_dtype_equal, - is_extension_array_dtype, is_interval_dtype, is_object_dtype, - is_period_dtype, is_sparse, is_timedelta64_dtype) + is_extension_array_dtype, is_object_dtype, is_sparse, is_timedelta64_dtype) from pandas.core.dtypes.generic import ( ABCDatetimeArray, ABCDatetimeIndex, ABCIndexClass, ABCPeriodIndex, ABCRangeIndex, ABCSparseDataFrame, ABCTimedeltaIndex) @@ -51,9 +50,7 @@ def get_dtype_kinds(l): typ = 'object' elif is_bool_dtype(dtype): typ = 'bool' - elif is_period_dtype(dtype): - typ = str(arr.dtype) - elif is_interval_dtype(dtype): + elif is_extension_array_dtype(dtype): typ = str(arr.dtype) else: typ = dtype.kind @@ -136,7 +133,6 @@ def is_nonempty(x): # np.concatenate which has them both implemented is compiled. typs = get_dtype_kinds(to_concat) - _contains_datetime = any(typ.startswith('datetime') for typ in typs) _contains_period = any(typ.startswith('period') for typ in typs) diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 067b95f9d8847..4a16707a376e9 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -11,8 +11,8 @@ from pandas.core.dtypes.cast import maybe_promote from pandas.core.dtypes.common import ( _get_dtype, is_categorical_dtype, is_datetime64_dtype, - is_datetime64tz_dtype, is_float_dtype, is_numeric_dtype, is_sparse, - is_timedelta64_dtype) + is_datetime64tz_dtype, is_extension_array_dtype, is_float_dtype, + is_numeric_dtype, is_sparse, is_timedelta64_dtype) import pandas.core.dtypes.concat as _concat from pandas.core.dtypes.missing import isna @@ -306,6 +306,8 @@ def get_empty_dtype_and_na(join_units): upcast_cls = 'timedelta' elif is_sparse(dtype): upcast_cls = dtype.subtype.name + elif is_extension_array_dtype(dtype): + upcast_cls = 'object' elif is_float_dtype(dtype) or is_numeric_dtype(dtype): upcast_cls = dtype.name else: From 8c484573e803997fdfa9cd108b029ef416af45fa Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 4 Jan 2019 07:09:12 -0500 Subject: [PATCH 28/29] Update doc/source/whatsnew/v0.24.0.rst Co-Authored-By: jreback --- doc/source/whatsnew/v0.24.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 4bb06e921e54c..684d3bff19ae5 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -432,7 +432,7 @@ Backwards incompatible API changes - The column order of the resultant :class:`DataFrame` from :meth:`MultiIndex.to_frame` is now guaranteed to match the :attr:`MultiIndex.names` order. (:issue:`22420`) - Incorrectly passing a :class:`DatetimeIndex` to :meth:`MultiIndex.from_tuples`, rather than a sequence of tuples, now raises a ``TypeError`` rather than a ``ValueError`` (:issue:`24024`) - :func:`pd.offsets.generate_range` argument ``time_rule`` has been removed; use ``offset`` instead (:issue:`24157`) -- In 0.23.x, pandas would raise a ``ValueError`` on a merge of a numeric column (e.g. ``int`` dtyped column) and an ``object`` dtyped column, though only if it is a numeric column (:issue:`9780`). We have re-enabled the ability to merge ``object`` and other dtypes; pandas will sill raise on a merge between a numeric and an ``object`` dtyped coumn that is composed only of strings (:issue:`21681`) +- In 0.23.x, pandas would raise a ``ValueError`` on a merge of a numeric column (e.g. ``int`` dtyped column) and an ``object`` dtyped column (:issue:`9780`). We have re-enabled the ability to merge ``object`` and other dtypes; pandas will still raise on a merge between a numeric and an ``object`` dtyped column that is composed only of strings (:issue:`21681`) Percentage change on groupby ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ From 7d4bd5e22051c8dedc42ec1ac6156f3449f09948 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 4 Jan 2019 07:10:36 -0500 Subject: [PATCH 29/29] Revert "deprecation about MutableMapping" This reverts commit b9f4004886991153ea39049fc65a27eb6ed061c3. --- pandas/compat/__init__.py | 2 +- pandas/compat/chainmap_impl.py | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 8c2b7f5de8e7e..f9c659106a516 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -139,7 +139,7 @@ def lfilter(*args, **kwargs): Hashable = collections.abc.Hashable Iterable = collections.abc.Iterable Mapping = collections.abc.Mapping - MutableMapping = collections.MutableMapping + MutableMapping = collections.abc.MutableMapping Sequence = collections.abc.Sequence Sized = collections.abc.Sized Set = collections.abc.Set diff --git a/pandas/compat/chainmap_impl.py b/pandas/compat/chainmap_impl.py index c4aa8c8d6ab30..3ea5414cc41eb 100644 --- a/pandas/compat/chainmap_impl.py +++ b/pandas/compat/chainmap_impl.py @@ -1,4 +1,11 @@ -from collections import MutableMapping +import sys + +PY3 = sys.version_info[0] >= 3 + +if PY3: + from collections.abc import MutableMapping +else: + from collections import MutableMapping try: from thread import get_ident