pandas-dev · jreback · Jan 4, 2022 · Jan 4, 2022
diff --git a/pandas/core/array_algos/putmask.py b/pandas/core/array_algos/putmask.py
@@ -107,11 +107,7 @@ def putmask_smart(values: np.ndarray, mask: npt.NDArray[np.bool_], new) -> np.nd
         return values
 
     dtype = find_common_type([values.dtype, new.dtype])
-    # error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has incompatible type
-    # "Union[dtype[Any], ExtensionDtype]"; expected "Union[dtype[Any], None, type,
-    # _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]],
-    # List[Any], _DTypeDict, Tuple[Any, Any]]]"
-    values = values.astype(dtype)  # type: ignore[arg-type]
+    values = values.astype(dtype)
 
     np.putmask(values, mask, new)
     return values

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -1130,7 +1130,6 @@ def astype_nansafe(
                 "is deprecated and will raise in a future version. "
                 "Use .view(...) instead.",
                 FutureWarning,
-                # stacklevel chosen to be correct when reached via Series.astype
                 stacklevel=find_stack_level(),
             )
             if isna(arr).any():
@@ -1152,7 +1151,6 @@ def astype_nansafe(
                 "is deprecated and will raise in a future version. "
                 "Use .view(...) instead.",
                 FutureWarning,
-                # stacklevel chosen to be correct when reached via Series.astype
                 stacklevel=find_stack_level(),
             )
             if isna(arr).any():
@@ -1791,8 +1789,22 @@ def ensure_nanosecond_dtype(dtype: DtypeObj) -> DtypeObj:
     return dtype
 
 
-# TODO: overload to clarify that if all types are np.dtype then result is np.dtype
+@overload
+def find_common_type(types: list[np.dtype]) -> np.dtype:
+    ...
+
+
+@overload
+def find_common_type(types: list[ExtensionDtype]) -> DtypeObj:
+    ...
+
+
+@overload
 def find_common_type(types: list[DtypeObj]) -> DtypeObj:
+    ...
+
+
+def find_common_type(types):
     """
     Find a common data type among the given dtypes.
 
@@ -1844,11 +1856,7 @@ def find_common_type(types: list[DtypeObj]) -> DtypeObj:
             if is_integer_dtype(t) or is_float_dtype(t) or is_complex_dtype(t):
                 return np.dtype("object")
 
-    # error: Argument 1 to "find_common_type" has incompatible type
-    # "List[Union[dtype, ExtensionDtype]]"; expected "Sequence[Union[dtype,
-    # None, type, _SupportsDtype, str, Tuple[Any, int], Tuple[Any, Union[int,
-    # Sequence[int]]], List[Any], _DtypeDict, Tuple[Any, Any]]]"
-    return np.find_common_type(types, [])  # type: ignore[arg-type]
+    return np.find_common_type(types, [])
 
 
 def construct_2d_arraylike_from_scalar(

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -3943,7 +3943,7 @@ def _check_setitem_copy(self, t="setting", force=False):
         df['group'] = 'b'
 
         # This technically need not raise SettingWithCopy if both are view
-        # (which is not # generally guaranteed but is usually True.  However,
+        # (which is not generally guaranteed but is usually True.  However,
         # this is in general not a good practice and we recommend using .loc.
         df.iloc[0:5]['group'] = 'a'
 

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -3184,7 +3184,6 @@ def _union(self, other: Index, sort):
         -------
         Index
         """
-        # TODO(EA): setops-refactor, clean all this up
         lvals = self._values
         rvals = other._values
 
@@ -3244,11 +3243,13 @@ def _wrap_setop_result(self, other: Index, result) -> Index:
         else:
             result = self._shallow_copy(result, name=name)
 
-        # TODO(ExtensionIndex): revert this astype; it is a kludge to make
-        #  it possible to split ExtensionEngine from ExtensionIndex PR.
-        return result.astype(self.dtype, copy=False)
+        if type(self) is Index and self.dtype != object:
+            # i.e. ExtensionArray-backed
+            # TODO(ExtensionIndex): revert this astype; it is a kludge to make
+            #  it possible to split ExtensionEngine from ExtensionIndex PR.
+            return result.astype(self.dtype, copy=False)
+        return result
 
-    # TODO: standardize return type of non-union setops type(self vs other)
     @final
     def intersection(self, other, sort=False):
         """
@@ -6537,8 +6538,6 @@ def insert(self, loc: int, item) -> Index:
         -------
         new_index : Index
         """
-        # Note: this method is overridden by all ExtensionIndex subclasses,
-        #  so self is never backed by an EA.
         item = lib.item_from_zerodim(item)
         if is_valid_na_for_dtype(item, self.dtype) and self.dtype != object:
             item = self._na_value

diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py
@@ -798,8 +798,8 @@ def _chunk_to_dataframe(self) -> DataFrame:
             name = self.column_names[j]
 
             if self._column_types[j] == b"d":
-                rslt[name] = self._byte_chunk[jb, :].view(dtype=self.byte_order + "d")
-                rslt[name] = pd.Series(rslt[name], dtype=np.float64, index=ix)
+                col_arr = self._byte_chunk[jb, :].view(dtype=self.byte_order + "d")
+                rslt[name] = pd.Series(col_arr, dtype=np.float64, index=ix)
                 if self.convert_dates:
                     if self.column_formats[j] in const.sas_date_formats:
                         rslt[name] = _convert_datetimes(rslt[name], "d")

diff --git a/pandas/io/stata.py b/pandas/io/stata.py
@@ -682,9 +682,9 @@ def _prepare_value_labels(self):
         self.txt: list[bytes] = []
         self.n = 0
         # Offsets (length of categories), converted to int32
-        self.off = np.array([])
+        self.off = np.array([], dtype=np.int32)
         # Values, converted to int32
-        self.val = np.array([])
+        self.val = np.array([], dtype=np.int32)
         self.len = 0
 
         # Compute lengths and setup lists of offsets and labels
@@ -1679,7 +1679,7 @@ def read(
         offset = self._lines_read * dtype.itemsize
         self.path_or_buf.seek(self.data_location + offset)
         read_lines = min(nrows, self.nobs - self._lines_read)
-        data = np.frombuffer(
+        raw_data = np.frombuffer(
             self.path_or_buf.read(read_len), dtype=dtype, count=read_lines
         )
 
@@ -1689,15 +1689,15 @@ def read(
             self._data_read = True
         # if necessary, swap the byte order to native here
         if self.byteorder != self._native_byteorder:
-            data = data.byteswap().newbyteorder()
+            raw_data = raw_data.byteswap().newbyteorder()
 
         if convert_categoricals:
             self._read_value_labels()
 
-        if len(data) == 0:
+        if len(raw_data) == 0:
             data = DataFrame(columns=self.varlist)
         else:
-            data = DataFrame.from_records(data)
+            data = DataFrame.from_records(raw_data)
             data.columns = self.varlist
 
         # If index is not specified, use actual row number rather than

diff --git a/pandas/tests/arrays/categorical/test_astype.py b/pandas/tests/arrays/categorical/test_astype.py
@@ -1,9 +1,13 @@
 import numpy as np
+import pytest
 
 from pandas import (
     Categorical,
     CategoricalDtype,
+    NaT,
+    Timestamp,
     array,
+    to_datetime,
 )
 import pandas._testing as tm
 
@@ -12,8 +16,74 @@ class TestAstype:
     def test_astype_str_int_categories_to_nullable_int(self):
         # GH#39616
         dtype = CategoricalDtype([str(i) for i in range(5)])
-        arr = Categorical.from_codes(np.random.randint(5, size=20), dtype=dtype)
+        codes = np.random.randint(5, size=20)
+        arr = Categorical.from_codes(codes, dtype=dtype)
 
         res = arr.astype("Int64")
-        expected = array(arr.astype("int64"), dtype="Int64")
+        expected = array(codes, dtype="Int64")
         tm.assert_extension_array_equal(res, expected)
+
+    @pytest.mark.parametrize("ordered", [True, False])
+    def test_astype(self, ordered):
+        # string
+        cat = Categorical(list("abbaaccc"), ordered=ordered)
+        result = cat.astype(object)
+        expected = np.array(cat)
+        tm.assert_numpy_array_equal(result, expected)
+
+        msg = r"Cannot cast object dtype to float64"
+        with pytest.raises(ValueError, match=msg):
+            cat.astype(float)
+
+        # numeric
+        cat = Categorical([0, 1, 2, 2, 1, 0, 1, 0, 2], ordered=ordered)
+        result = cat.astype(object)
+        expected = np.array(cat, dtype=object)
+        tm.assert_numpy_array_equal(result, expected)
+
+        result = cat.astype(int)
+        expected = np.array(cat, dtype="int")
+        tm.assert_numpy_array_equal(result, expected)
+
+        result = cat.astype(float)
+        expected = np.array(cat, dtype=float)
+        tm.assert_numpy_array_equal(result, expected)
+
+    @pytest.mark.parametrize("dtype_ordered", [True, False])
+    @pytest.mark.parametrize("cat_ordered", [True, False])
+    def test_astype_category(self, dtype_ordered, cat_ordered):
+        # GH#10696/GH#18593
+        data = list("abcaacbab")
+        cat = Categorical(data, categories=list("bac"), ordered=cat_ordered)
+
+        # standard categories
+        dtype = CategoricalDtype(ordered=dtype_ordered)
+        result = cat.astype(dtype)
+        expected = Categorical(data, categories=cat.categories, ordered=dtype_ordered)
+        tm.assert_categorical_equal(result, expected)
+
+        # non-standard categories
+        dtype = CategoricalDtype(list("adc"), dtype_ordered)
+        result = cat.astype(dtype)
+        expected = Categorical(data, dtype=dtype)
+        tm.assert_categorical_equal(result, expected)
+
+        if dtype_ordered is False:
+            # dtype='category' can't specify ordered, so only test once
+            result = cat.astype("category")
+            expected = cat
+            tm.assert_categorical_equal(result, expected)
+
+    def test_astype_object_datetime_categories(self):
+        # GH#40754
+        cat = Categorical(to_datetime(["2021-03-27", NaT]))
+        result = cat.astype(object)
+        expected = np.array([Timestamp("2021-03-27 00:00:00"), NaT], dtype="object")
+        tm.assert_numpy_array_equal(result, expected)
+
+    def test_astype_object_timestamp_categories(self):
+        # GH#18024
+        cat = Categorical([Timestamp("2014-01-01")])
+        result = cat.astype(object)
+        expected = np.array([Timestamp("2014-01-01 00:00:00")], dtype="object")
+        tm.assert_numpy_array_equal(result, expected)
diff --git a/pandas/tests/arrays/categorical/test_dtypes.py b/pandas/tests/arrays/categorical/test_dtypes.py
@@ -1,4 +1,3 @@
-import numpy as np
 import pytest
 
 from pandas.core.dtypes.dtypes import CategoricalDtype
@@ -7,10 +6,8 @@
     Categorical,
     CategoricalIndex,
     Index,
-    NaT,
     Series,
     Timestamp,
-    to_datetime,
 )
 import pandas._testing as tm
 
@@ -127,71 +124,6 @@ def test_codes_dtypes(self):
         result = result.remove_categories([f"foo{i:05d}" for i in range(300)])
         assert result.codes.dtype == "int8"
 
-    @pytest.mark.parametrize("ordered", [True, False])
-    def test_astype(self, ordered):
-        # string
-        cat = Categorical(list("abbaaccc"), ordered=ordered)
-        result = cat.astype(object)
-        expected = np.array(cat)
-        tm.assert_numpy_array_equal(result, expected)
-
-        msg = r"Cannot cast object dtype to float64"
-        with pytest.raises(ValueError, match=msg):
-            cat.astype(float)
-
-        # numeric
-        cat = Categorical([0, 1, 2, 2, 1, 0, 1, 0, 2], ordered=ordered)
-        result = cat.astype(object)
-        expected = np.array(cat, dtype=object)
-        tm.assert_numpy_array_equal(result, expected)
-
-        result = cat.astype(int)
-        expected = np.array(cat, dtype="int")
-        tm.assert_numpy_array_equal(result, expected)
-
-        result = cat.astype(float)
-        expected = np.array(cat, dtype=float)
-        tm.assert_numpy_array_equal(result, expected)
-
-    @pytest.mark.parametrize("dtype_ordered", [True, False])
-    @pytest.mark.parametrize("cat_ordered", [True, False])
-    def test_astype_category(self, dtype_ordered, cat_ordered):
-        # GH 10696/18593
-        data = list("abcaacbab")
-        cat = Categorical(data, categories=list("bac"), ordered=cat_ordered)
-
-        # standard categories
-        dtype = CategoricalDtype(ordered=dtype_ordered)
-        result = cat.astype(dtype)
-        expected = Categorical(data, categories=cat.categories, ordered=dtype_ordered)
-        tm.assert_categorical_equal(result, expected)
-
-        # non-standard categories
-        dtype = CategoricalDtype(list("adc"), dtype_ordered)
-        result = cat.astype(dtype)
-        expected = Categorical(data, dtype=dtype)
-        tm.assert_categorical_equal(result, expected)
-
-        if dtype_ordered is False:
-            # dtype='category' can't specify ordered, so only test once
-            result = cat.astype("category")
-            expected = cat
-            tm.assert_categorical_equal(result, expected)
-
-    def test_astype_object_datetime_categories(self):
-        # GH#40754
-        cat = Categorical(to_datetime(["2021-03-27", NaT]))
-        result = cat.astype(object)
-        expected = np.array([Timestamp("2021-03-27 00:00:00"), NaT], dtype="object")
-        tm.assert_numpy_array_equal(result, expected)
-
-    def test_astype_object_timestamp_categories(self):
-        # GH#18024
-        cat = Categorical([Timestamp("2014-01-01")])
-        result = cat.astype(object)
-        expected = np.array([Timestamp("2014-01-01 00:00:00")], dtype="object")
-        tm.assert_numpy_array_equal(result, expected)
-
     def test_iter_python_types(self):
         # GH-19909
         cat = Categorical([1, 2])