From bfd9de465a6792fa692714fab78997491dab62f7 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 12 Mar 2021 20:11:16 -0800 Subject: [PATCH 1/3] TYP: fix ignores --- pandas/_libs/parsers.pyx | 13 +-- pandas/_testing/asserters.py | 4 +- pandas/core/algorithms.py | 35 +++----- pandas/core/arrays/categorical.py | 28 ++----- pandas/core/arrays/datetimelike.py | 5 +- pandas/core/indexes/base.py | 114 +++++++------------------- pandas/core/internals/blocks.py | 15 +--- pandas/core/internals/concat.py | 26 ++---- pandas/core/internals/construction.py | 24 ++---- pandas/core/reshape/merge.py | 7 +- pandas/core/util/hashing.py | 9 +- pandas/io/parsers/base_parser.py | 4 +- 12 files changed, 85 insertions(+), 199 deletions(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 493333fded6dd..b9be68071f3f9 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -104,7 +104,6 @@ from pandas.core.dtypes.common import ( is_bool_dtype, is_categorical_dtype, is_datetime64_dtype, - is_extension_array_dtype, is_float_dtype, is_integer_dtype, is_object_dtype, @@ -337,7 +336,7 @@ cdef class TextReader: object skiprows object dtype object usecols - list dtype_cast_order + list dtype_cast_order # list[np.dtype] set unnamed_cols set noconvert @@ -1019,7 +1018,7 @@ cdef class TextReader: # don't try to upcast EAs try_upcast = upcast_na and na_count > 0 - if try_upcast and not is_extension_array_dtype(col_dtype): + if try_upcast and isinstance(col_dtype, np.dtype): col_res = _maybe_upcast(col_res) if col_res is None: @@ -1035,6 +1034,7 @@ cdef class TextReader: object name, bint na_filter, kh_str_starts_t *na_hashset, object na_flist, object col_dtype): + # Note: col_dtype is DtypeObj if col_dtype is not None: col_res, na_count = self._convert_with_dtype( @@ -1095,6 +1095,8 @@ cdef class TextReader: bint user_dtype, kh_str_starts_t *na_hashset, object na_flist): + # Note: dtype is a DtypeObj + if is_categorical_dtype(dtype): # TODO: I suspect that _categorical_convert could be # optimized when dtype is an instance of CategoricalDtype @@ -1108,7 +1110,8 @@ cdef class TextReader: cats, codes, dtype, true_values=true_values) return cat, na_count - elif is_extension_array_dtype(dtype): + elif not isinstance(dtype, np.dtype): + # i.e ExtensionDtype result, na_count = self._string_convert(i, start, end, na_filter, na_hashset) @@ -1926,7 +1929,7 @@ def _concatenate_chunks(list chunks): result[name] = union_categoricals(arrs, sort_categories=sort_categories) else: - if is_extension_array_dtype(dtype): + if not isinstance(dtype, np.dtype): array_type = dtype.construct_array_type() result[name] = array_type._concat_same_type(arrs) else: diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 731b55464c11b..2adc70438cce7 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -976,8 +976,8 @@ def assert_series_equal( left_values = left._values right_values = right._values # Only check exact if dtype is numeric - if is_extension_array_dtype(left_values) and is_extension_array_dtype( - right_values + if isinstance(left_values, ExtensionArray) and isinstance( + right_values, ExtensionArray ): assert_extension_array_equal( left_values, diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 0fa02d54b5b78..a888bfabd6f80 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -235,41 +235,26 @@ def _reconstruct_data( # Catch DatetimeArray/TimedeltaArray return values - if is_extension_array_dtype(dtype): - # error: Item "dtype[Any]" of "Union[dtype[Any], ExtensionDtype]" has no - # attribute "construct_array_type" - cls = dtype.construct_array_type() # type: ignore[union-attr] + if not isinstance(dtype, np.dtype): + # i.e. ExtensionDtype + cls = dtype.construct_array_type() if isinstance(values, cls) and values.dtype == dtype: return values values = cls._from_sequence(values) elif is_bool_dtype(dtype): - # error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has - # incompatible type "Union[dtype, ExtensionDtype]"; expected - # "Union[dtype, None, type, _SupportsDtype, str, Tuple[Any, int], - # Tuple[Any, Union[int, Sequence[int]]], List[Any], _DtypeDict, - # Tuple[Any, Any]]" - values = values.astype(dtype, copy=False) # type: ignore[arg-type] + values = values.astype(dtype, copy=False) # we only support object dtypes bool Index if isinstance(original, ABCIndex): values = values.astype(object, copy=False) elif dtype is not None: if is_datetime64_dtype(dtype): - # error: Incompatible types in assignment (expression has type - # "str", variable has type "Union[dtype, ExtensionDtype]") - dtype = "datetime64[ns]" # type: ignore[assignment] + dtype = np.dtype("datetime64[ns]") elif is_timedelta64_dtype(dtype): - # error: Incompatible types in assignment (expression has type - # "str", variable has type "Union[dtype, ExtensionDtype]") - dtype = "timedelta64[ns]" # type: ignore[assignment] + dtype = np.dtype("timedelta64[ns]") - # error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has - # incompatible type "Union[dtype, ExtensionDtype]"; expected - # "Union[dtype, None, type, _SupportsDtype, str, Tuple[Any, int], - # Tuple[Any, Union[int, Sequence[int]]], List[Any], _DtypeDict, - # Tuple[Any, Any]]" - values = values.astype(dtype, copy=False) # type: ignore[arg-type] + values = values.astype(dtype, copy=False) return values @@ -772,7 +757,8 @@ def factorize( uniques = Index(uniques) return codes, uniques - if is_extension_array_dtype(values.dtype): + if not isinstance(values.dtype, np.dtype): + # i.e. ExtensionDtype codes, uniques = values.factorize(na_sentinel=na_sentinel) dtype = original.dtype else: @@ -1662,7 +1648,8 @@ def diff(arr, n: int, axis: int = 0, stacklevel=3): arr = arr.to_numpy() dtype = arr.dtype - if is_extension_array_dtype(dtype): + if not isinstance(dtype, np.dtype): + # i.e ExtensionDtype if hasattr(arr, f"__{op.__name__}__"): if axis != 0: raise ValueError(f"cannot diff {type(arr).__name__} on axis={axis}") diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 0bf5e05786d4d..2f0a88a2d33bc 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -66,7 +66,10 @@ needs_i8_conversion, pandas_dtype, ) -from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + ExtensionDtype, +) from pandas.core.dtypes.generic import ( ABCIndex, ABCSeries, @@ -504,7 +507,7 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: result = self._set_dtype(dtype) # TODO: consolidate with ndarray case? - elif is_extension_array_dtype(dtype): + elif isinstance(dtype, ExtensionDtype): result = pd_array(self, dtype=dtype, copy=copy) elif is_integer_dtype(dtype) and self.isna().any(): @@ -515,13 +518,7 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: # variable has type "Categorical") result = np.array( # type: ignore[assignment] self, - # error: Argument "dtype" to "array" has incompatible type - # "Union[ExtensionDtype, str, dtype[Any], Type[str], Type[float], - # Type[int], Type[complex], Type[bool], Type[object]]"; expected - # "Union[dtype[Any], None, type, _SupportsDType, str, Union[Tuple[Any, - # int], Tuple[Any, Union[int, Sequence[int]]], List[Any], _DTypeDict, - # Tuple[Any, Any]]]" - dtype=dtype, # type: ignore[arg-type] + dtype=dtype, copy=copy, ) @@ -529,14 +526,7 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: # GH8628 (PERF): astype category codes instead of astyping array try: new_cats = np.asarray(self.categories) - # error: Argument "dtype" to "astype" of "_ArrayOrScalarCommon" has - # incompatible type "Union[ExtensionDtype, dtype[Any]]"; expected - # "Union[dtype[Any], None, type, _SupportsDType, str, Union[Tuple[Any, - # int], Tuple[Any, Union[int, Sequence[int]]], List[Any], _DTypeDict, - # Tuple[Any, Any]]]" - new_cats = new_cats.astype( - dtype=dtype, copy=copy # type: ignore[arg-type] - ) + new_cats = new_cats.astype(dtype=dtype, copy=copy) except ( TypeError, # downstream error msg for CategoricalIndex is misleading ValueError, @@ -2658,9 +2648,7 @@ def _get_codes_for_values(values, categories: Index) -> np.ndarray: # Only hit here when we've already coerced to object dtypee. hash_klass, vals = get_data_algo(values) - # pandas/core/arrays/categorical.py:2661: error: Argument 1 to "get_data_algo" has - # incompatible type "Index"; expected "Union[ExtensionArray, ndarray]" [arg-type] - _, cats = get_data_algo(categories) # type: ignore[arg-type] + _, cats = get_data_algo(categories._values) t = hash_klass(len(cats)) t.map_locations(cats) return coerce_indexer_dtype(t.lookup(vals), cats) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index bd5cc04659a06..42299aaf46a48 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -878,12 +878,11 @@ def _isnan(self) -> np.ndarray: return self.asi8 == iNaT @property # NB: override with cache_readonly in immutable subclasses - def _hasnans(self) -> np.ndarray: + def _hasnans(self) -> bool: """ return if I have any nans; enables various perf speedups """ - # error: Incompatible return value type (got "bool", expected "ndarray") - return bool(self._isnan.any()) # type: ignore[return-value] + return bool(self._isnan.any()) def _maybe_mask_results( self, result: np.ndarray, fill_value=iNaT, convert=None diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 8b67b98b32f7f..26d25645b02c6 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -191,8 +191,7 @@ str_t = str -# error: Value of type variable "_DTypeScalar" of "dtype" cannot be "object" -_o_dtype = np.dtype(object) # type: ignore[type-var] +_o_dtype = np.dtype("object") _Identity = NewType("_Identity", object) @@ -417,11 +416,7 @@ def __new__( # maybe coerce to a sub-class arr = data else: - # error: Argument "dtype" to "asarray_tuplesafe" has incompatible type - # "Type[object]"; expected "Union[str, dtype[Any], None]" - arr = com.asarray_tuplesafe( - data, dtype=object # type: ignore[arg-type] - ) + arr = com.asarray_tuplesafe(data, dtype=np.dtype("object")) if dtype is None: arr = _maybe_cast_data_without_dtype(arr) @@ -456,9 +451,7 @@ def __new__( ) # other iterable of some kind - # error: Argument "dtype" to "asarray_tuplesafe" has incompatible type - # "Type[object]"; expected "Union[str, dtype[Any], None]" - subarr = com.asarray_tuplesafe(data, dtype=object) # type: ignore[arg-type] + subarr = com.asarray_tuplesafe(data, dtype=np.dtype("object")) return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs) @classmethod @@ -2902,16 +2895,10 @@ def union(self, other, sort=None): # | -> T # | -> object if not (is_integer_dtype(self.dtype) and is_integer_dtype(other.dtype)): - # error: Incompatible types in assignment (expression has type - # "str", variable has type "Union[dtype[Any], ExtensionDtype]") - dtype = "float64" # type: ignore[assignment] + dtype = np.dtype("float64") else: # one is int64 other is uint64 - - # error: Incompatible types in assignment (expression has type - # "Type[object]", variable has type "Union[dtype[Any], - # ExtensionDtype]") - dtype = object # type: ignore[assignment] + dtype = np.dtype("object") left = self.astype(dtype, copy=False) right = other.astype(dtype, copy=False) @@ -3906,6 +3893,9 @@ def join( self_is_mi = isinstance(self, ABCMultiIndex) other_is_mi = isinstance(other, ABCMultiIndex) + lindexer: Optional[np.ndarray] + rindexer: Optional[np.ndarray] + # try to figure out the join level # GH3662 if level is None and (self_is_mi or other_is_mi): @@ -4003,15 +3993,11 @@ def join( if return_indexers: if join_index is self: - # error: Incompatible types in assignment (expression has type "None", - # variable has type "ndarray") - lindexer = None # type: ignore[assignment] + lindexer = None else: lindexer = self.get_indexer(join_index) if join_index is other: - # error: Incompatible types in assignment (expression has type "None", - # variable has type "ndarray") - rindexer = None # type: ignore[assignment] + rindexer = None else: rindexer = other.get_indexer(join_index) return join_index, lindexer, rindexer @@ -4114,15 +4100,11 @@ def _join_non_unique(self, other, how="left", return_indexers=False): left_idx = ensure_platform_int(left_idx) right_idx = ensure_platform_int(right_idx) - join_index = np.asarray(lvalues.take(left_idx)) + join_array = np.asarray(lvalues.take(left_idx)) mask = left_idx == -1 - np.putmask(join_index, mask, rvalues.take(right_idx)) + np.putmask(join_array, mask, rvalues.take(right_idx)) - # error: Incompatible types in assignment (expression has type "Index", variable - # has type "ndarray") - join_index = self._wrap_joined_index( - join_index, other # type: ignore[assignment] - ) + join_index = self._wrap_joined_index(join_array, other) if return_indexers: return join_index, left_idx, right_idx @@ -4286,6 +4268,9 @@ def _join_monotonic(self, other, how="left", return_indexers=False): sv = self._get_engine_target() ov = other._get_engine_target() + ridx: Optional[np.ndarray] + lidx: Optional[np.ndarray] + if self.is_unique and other.is_unique: # We can perform much better than the general case if how == "left": @@ -4295,61 +4280,24 @@ def _join_monotonic(self, other, how="left", return_indexers=False): elif how == "right": join_index = other lidx = self._left_indexer_unique(ov, sv) - # error: Incompatible types in assignment (expression has type "None", - # variable has type "ndarray") - ridx = None # type: ignore[assignment] + ridx = None elif how == "inner": - # error: Incompatible types in assignment (expression has type - # "ndarray", variable has type "Index") - join_index, lidx, ridx = self._inner_indexer( # type:ignore[assignment] - sv, ov - ) - # error: Argument 1 to "_wrap_joined_index" of "Index" has incompatible - # type "Index"; expected "ndarray" - join_index = self._wrap_joined_index( - join_index, other # type: ignore[arg-type] - ) + join_array, lidx, ridx = self._inner_indexer(sv, ov) + join_index = self._wrap_joined_index(join_array, other) elif how == "outer": - # error: Incompatible types in assignment (expression has type - # "ndarray", variable has type "Index") - join_index, lidx, ridx = self._outer_indexer( # type:ignore[assignment] - sv, ov - ) - # error: Argument 1 to "_wrap_joined_index" of "Index" has incompatible - # type "Index"; expected "ndarray" - join_index = self._wrap_joined_index( - join_index, other # type: ignore[arg-type] - ) + join_array, lidx, ridx = self._outer_indexer(sv, ov) + join_index = self._wrap_joined_index(join_array, other) else: if how == "left": - # error: Incompatible types in assignment (expression has type - # "ndarray", variable has type "Index") - join_index, lidx, ridx = self._left_indexer( # type: ignore[assignment] - sv, ov - ) + join_array, lidx, ridx = self._left_indexer(sv, ov) elif how == "right": - # error: Incompatible types in assignment (expression has type - # "ndarray", variable has type "Index") - join_index, ridx, lidx = self._left_indexer( # type: ignore[assignment] - ov, sv - ) + join_array, ridx, lidx = self._left_indexer(ov, sv) elif how == "inner": - # error: Incompatible types in assignment (expression has type - # "ndarray", variable has type "Index") - join_index, lidx, ridx = self._inner_indexer( # type:ignore[assignment] - sv, ov - ) + join_array, lidx, ridx = self._inner_indexer(sv, ov) elif how == "outer": - # error: Incompatible types in assignment (expression has type - # "ndarray", variable has type "Index") - join_index, lidx, ridx = self._outer_indexer( # type:ignore[assignment] - sv, ov - ) - # error: Argument 1 to "_wrap_joined_index" of "Index" has incompatible type - # "Index"; expected "ndarray" - join_index = self._wrap_joined_index( - join_index, other # type: ignore[arg-type] - ) + join_array, lidx, ridx = self._outer_indexer(sv, ov) + + join_index = self._wrap_joined_index(join_array, other) if return_indexers: lidx = None if lidx is None else ensure_platform_int(lidx) @@ -6481,12 +6429,8 @@ def _maybe_cast_data_without_dtype(subarr): pass elif inferred.startswith("timedelta"): - # error: Incompatible types in assignment (expression has type - # "TimedeltaArray", variable has type "ndarray") - data = TimedeltaArray._from_sequence( # type: ignore[assignment] - subarr, copy=False - ) - return data + tda = TimedeltaArray._from_sequence(subarr, copy=False) + return tda elif inferred == "period": try: data = PeriodArray._from_sequence(subarr) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 1bcddee4d726e..b1a552cff2274 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -118,9 +118,7 @@ from pandas.core.arrays._mixins import NDArrayBackedExtensionArray # comparison is faster than is_object_dtype - -# error: Value of type variable "_DTypeScalar" of "dtype" cannot be "object" -_dtype_obj = np.dtype(object) # type: ignore[type-var] +_dtype_obj = np.dtype("object") class Block(PandasObject): @@ -1598,14 +1596,9 @@ def to_native_types(self, na_rep="nan", quoting=None, **kwargs): values = self.values mask = isna(values) - # error: Incompatible types in assignment (expression has type "ndarray", - # variable has type "ExtensionArray") - values = np.asarray(values.astype(object)) # type: ignore[assignment] - values[mask] = na_rep - - # TODO(EA2D): reshape not needed with 2D EAs - # we are expected to return a 2-d ndarray - return self.make_block(values) + new_values = np.asarray(values.astype(object)) + new_values[mask] = na_rep + return self.make_block(new_values) def take_nd( self, indexer, axis: int = 0, new_mgr_locs=None, fill_value=lib.no_default diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index e2949eb227fbf..b82ab807562f4 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -31,6 +31,7 @@ is_sparse, ) from pandas.core.dtypes.concat import concat_compat +from pandas.core.dtypes.dtypes import ExtensionDtype from pandas.core.dtypes.missing import ( is_valid_na_for_dtype, isna_all, @@ -331,9 +332,7 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike: if self.is_valid_na_for(empty_dtype): blk_dtype = getattr(self.block, "dtype", None) - # error: Value of type variable "_DTypeScalar" of "dtype" cannot be - # "object" - if blk_dtype == np.dtype(object): # type: ignore[type-var] + if blk_dtype == np.dtype("object"): # we want to avoid filling with np.nan if we are # using None; we already know that we are all # nulls @@ -347,10 +346,8 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike: return DatetimeArray(i8values, dtype=empty_dtype) elif is_extension_array_dtype(blk_dtype): pass - elif is_extension_array_dtype(empty_dtype): - # error: Item "dtype[Any]" of "Union[dtype[Any], ExtensionDtype]" - # has no attribute "construct_array_type" - cls = empty_dtype.construct_array_type() # type: ignore[union-attr] + elif isinstance(empty_dtype, ExtensionDtype): + cls = empty_dtype.construct_array_type() missing_arr = cls._from_sequence([], dtype=empty_dtype) ncols, nrows = self.shape assert ncols == 1, ncols @@ -362,14 +359,7 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike: # NB: we should never get here with empty_dtype integer or bool; # if we did, the missing_arr.fill would cast to gibberish - # error: Argument "dtype" to "empty" has incompatible type - # "Union[dtype[Any], ExtensionDtype]"; expected "Union[dtype[Any], - # None, type, _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any, - # Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, - # Any]]]" - missing_arr = np.empty( - self.shape, dtype=empty_dtype # type: ignore[arg-type] - ) + missing_arr = np.empty(self.shape, dtype=empty_dtype) missing_arr.fill(fill_value) return missing_arr @@ -449,10 +439,8 @@ def _dtype_to_na_value(dtype: DtypeObj, has_none_blocks: bool): """ Find the NA value to go with this dtype. """ - if is_extension_array_dtype(dtype): - # error: Item "dtype[Any]" of "Union[dtype[Any], ExtensionDtype]" has no - # attribute "na_value" - return dtype.na_value # type: ignore[union-attr] + if isinstance(dtype, ExtensionDtype): + return dtype.na_value elif dtype.kind in ["m", "M"]: return dtype.type("NaT") elif dtype.kind in ["f", "c"]: diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 63a437a91f6e4..93aade8d58a71 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -45,6 +45,7 @@ is_named_tuple, is_object_dtype, ) +from pandas.core.dtypes.dtypes import ExtensionDtype from pandas.core.dtypes.generic import ( ABCDataFrame, ABCDatetimeIndex, @@ -249,7 +250,7 @@ def ndarray_to_mgr( if not len(values) and columns is not None and len(columns): values = np.empty((0, 1), dtype=object) - if is_extension_array_dtype(values) or is_extension_array_dtype(dtype): + if is_extension_array_dtype(values) or isinstance(dtype, ExtensionDtype): # GH#19157 if isinstance(values, np.ndarray) and values.ndim > 1: @@ -365,19 +366,10 @@ def dict_to_mgr( # no obvious "empty" int column if missing.any() and not is_integer_dtype(dtype): if dtype is None or ( - not is_extension_array_dtype(dtype) - # error: Argument 1 to "issubdtype" has incompatible type - # "Union[dtype, ExtensionDtype]"; expected "Union[dtype, None, - # type, _SupportsDtype, str, Tuple[Any, int], Tuple[Any, - # Union[int, Sequence[int]]], List[Any], _DtypeDict, Tuple[Any, - # Any]]" - and np.issubdtype(dtype, np.flexible) # type: ignore[arg-type] + isinstance(dtype, np.dtype) and np.issubdtype(dtype, np.flexible) ): # GH#1783 - - # error: Value of type variable "_DTypeScalar" of "dtype" cannot be - # "object" - nan_dtype = np.dtype(object) # type: ignore[type-var] + nan_dtype = np.dtype("object") else: # error: Incompatible types in assignment (expression has type # "Union[dtype, ExtensionDtype]", variable has type "dtype") @@ -682,13 +674,11 @@ def to_arrays( if not len(data): if isinstance(data, np.ndarray): - # error: Incompatible types in assignment (expression has type - # "Optional[Tuple[str, ...]]", variable has type "Optional[Index]") - columns = data.dtype.names # type: ignore[assignment] - if columns is not None: + if data.dtype.names is not None: # i.e. numpy structured array + columns = ensure_index(data.dtype.names) arrays = [data[name] for name in columns] - return arrays, ensure_index(columns) + return arrays, columns return [], ensure_index([]) elif isinstance(data[0], Categorical): diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index f4de822262cf4..c01bf3931b27a 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -73,6 +73,7 @@ ) from pandas.core import groupby import pandas.core.algorithms as algos +from pandas.core.arrays import ExtensionArray import pandas.core.common as com from pandas.core.construction import extract_array from pandas.core.frame import _merge_doc @@ -2083,12 +2084,10 @@ def _factorize_keys( lk = ensure_int64(lk.codes) rk = ensure_int64(rk.codes) - elif is_extension_array_dtype(lk.dtype) and is_dtype_equal(lk.dtype, rk.dtype): + elif isinstance(lk, ExtensionArray) and is_dtype_equal(lk.dtype, rk.dtype): # error: Incompatible types in assignment (expression has type "ndarray", # variable has type "ExtensionArray") - # error: Item "ndarray" of "Union[Any, ndarray]" has no attribute - # "_values_for_factorize" - lk, _ = lk._values_for_factorize() # type: ignore[union-attr,assignment] + lk, _ = lk._values_for_factorize() # error: Incompatible types in assignment (expression has type # "ndarray", variable has type "ExtensionArray") diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index 5e45d36e188a2..0f0b4d583919f 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -25,7 +25,6 @@ from pandas.core.dtypes.common import ( is_categorical_dtype, - is_extension_array_dtype, is_list_like, ) from pandas.core.dtypes.generic import ( @@ -297,11 +296,9 @@ def hash_array( if is_categorical_dtype(dtype): vals = cast("Categorical", vals) return _hash_categorical(vals, encoding, hash_key) - elif is_extension_array_dtype(dtype): - # pandas/core/util/hashing.py:301: error: Item "ndarray" of - # "Union[ExtensionArray, ndarray]" has no attribute "_values_for_factorize" - # [union-attr] - vals, _ = vals._values_for_factorize() # type: ignore[union-attr] + elif not isinstance(dtype, np.dtype): + # i.e ExtensionDtype + vals, _ = vals._values_for_factorize() # error: Argument 1 to "_hash_ndarray" has incompatible type "ExtensionArray"; # expected "ndarray" diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 8cfbae3cafc18..a011a789bf17c 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -725,9 +725,7 @@ def _cast_types(self, values, cast_type, column): # c-parser which parses all categories # as strings - # error: Argument 2 to "astype_nansafe" has incompatible type - # "Type[str]"; expected "Union[dtype[Any], ExtensionDtype]" - values = astype_nansafe(values, str) # type: ignore[arg-type] + values = astype_nansafe(values, np.dtype(str)) cats = Index(values).unique().dropna() values = Categorical._from_inferred_categories( From 4ba77d194c53d161657c1289466912994703d2c0 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 12 Mar 2021 20:49:23 -0800 Subject: [PATCH 2/3] revert incorrect --- pandas/core/arrays/categorical.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 2f0a88a2d33bc..0062ed01e957a 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2648,7 +2648,9 @@ def _get_codes_for_values(values, categories: Index) -> np.ndarray: # Only hit here when we've already coerced to object dtypee. hash_klass, vals = get_data_algo(values) - _, cats = get_data_algo(categories._values) + # pandas/core/arrays/categorical.py:2661: error: Argument 1 to "get_data_algo" has + # incompatible type "Index"; expected "Union[ExtensionArray, ndarray]" [arg-type] + _, cats = get_data_algo(categories) # type: ignore[arg-type] t = hash_klass(len(cats)) t.map_locations(cats) return coerce_indexer_dtype(t.lookup(vals), cats) From 83563ce062729093894b4a847d9d3c5df8a8031c Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 12 Mar 2021 21:16:21 -0800 Subject: [PATCH 3/3] comment --- pandas/_libs/parsers.pyx | 11 ++++------- pandas/core/util/hashing.py | 9 ++++++--- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index b9be68071f3f9..baf5633db0cb3 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -104,6 +104,7 @@ from pandas.core.dtypes.common import ( is_bool_dtype, is_categorical_dtype, is_datetime64_dtype, + is_extension_array_dtype, is_float_dtype, is_integer_dtype, is_object_dtype, @@ -1018,7 +1019,7 @@ cdef class TextReader: # don't try to upcast EAs try_upcast = upcast_na and na_count > 0 - if try_upcast and isinstance(col_dtype, np.dtype): + if try_upcast and not is_extension_array_dtype(col_dtype): col_res = _maybe_upcast(col_res) if col_res is None: @@ -1034,7 +1035,6 @@ cdef class TextReader: object name, bint na_filter, kh_str_starts_t *na_hashset, object na_flist, object col_dtype): - # Note: col_dtype is DtypeObj if col_dtype is not None: col_res, na_count = self._convert_with_dtype( @@ -1095,8 +1095,6 @@ cdef class TextReader: bint user_dtype, kh_str_starts_t *na_hashset, object na_flist): - # Note: dtype is a DtypeObj - if is_categorical_dtype(dtype): # TODO: I suspect that _categorical_convert could be # optimized when dtype is an instance of CategoricalDtype @@ -1110,8 +1108,7 @@ cdef class TextReader: cats, codes, dtype, true_values=true_values) return cat, na_count - elif not isinstance(dtype, np.dtype): - # i.e ExtensionDtype + elif is_extension_array_dtype(dtype): result, na_count = self._string_convert(i, start, end, na_filter, na_hashset) @@ -1929,7 +1926,7 @@ def _concatenate_chunks(list chunks): result[name] = union_categoricals(arrs, sort_categories=sort_categories) else: - if not isinstance(dtype, np.dtype): + if is_extension_array_dtype(dtype): array_type = dtype.construct_array_type() result[name] = array_type._concat_same_type(arrs) else: diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index 0f0b4d583919f..5e45d36e188a2 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -25,6 +25,7 @@ from pandas.core.dtypes.common import ( is_categorical_dtype, + is_extension_array_dtype, is_list_like, ) from pandas.core.dtypes.generic import ( @@ -296,9 +297,11 @@ def hash_array( if is_categorical_dtype(dtype): vals = cast("Categorical", vals) return _hash_categorical(vals, encoding, hash_key) - elif not isinstance(dtype, np.dtype): - # i.e ExtensionDtype - vals, _ = vals._values_for_factorize() + elif is_extension_array_dtype(dtype): + # pandas/core/util/hashing.py:301: error: Item "ndarray" of + # "Union[ExtensionArray, ndarray]" has no attribute "_values_for_factorize" + # [union-attr] + vals, _ = vals._values_for_factorize() # type: ignore[union-attr] # error: Argument 1 to "_hash_ndarray" has incompatible type "ExtensionArray"; # expected "ndarray"