Skip to content

CLN: assorted follow-ups #45184

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions pandas/core/array_algos/putmask.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,7 @@ def putmask_smart(values: np.ndarray, mask: npt.NDArray[np.bool_], new) -> np.nd
return values

dtype = find_common_type([values.dtype, new.dtype])
# error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has incompatible type
# "Union[dtype[Any], ExtensionDtype]"; expected "Union[dtype[Any], None, type,
# _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]],
# List[Any], _DTypeDict, Tuple[Any, Any]]]"
values = values.astype(dtype) # type: ignore[arg-type]
values = values.astype(dtype)

np.putmask(values, mask, new)
return values
Expand Down
24 changes: 16 additions & 8 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1130,7 +1130,6 @@ def astype_nansafe(
"is deprecated and will raise in a future version. "
"Use .view(...) instead.",
FutureWarning,
# stacklevel chosen to be correct when reached via Series.astype
stacklevel=find_stack_level(),
)
if isna(arr).any():
Expand All @@ -1152,7 +1151,6 @@ def astype_nansafe(
"is deprecated and will raise in a future version. "
"Use .view(...) instead.",
FutureWarning,
# stacklevel chosen to be correct when reached via Series.astype
stacklevel=find_stack_level(),
)
if isna(arr).any():
Expand Down Expand Up @@ -1791,8 +1789,22 @@ def ensure_nanosecond_dtype(dtype: DtypeObj) -> DtypeObj:
return dtype


# TODO: overload to clarify that if all types are np.dtype then result is np.dtype
@overload
def find_common_type(types: list[np.dtype]) -> np.dtype:
...


@overload
def find_common_type(types: list[ExtensionDtype]) -> DtypeObj:
...


@overload
def find_common_type(types: list[DtypeObj]) -> DtypeObj:
...


def find_common_type(types):
"""
Find a common data type among the given dtypes.

Expand Down Expand Up @@ -1844,11 +1856,7 @@ def find_common_type(types: list[DtypeObj]) -> DtypeObj:
if is_integer_dtype(t) or is_float_dtype(t) or is_complex_dtype(t):
return np.dtype("object")

# error: Argument 1 to "find_common_type" has incompatible type
# "List[Union[dtype, ExtensionDtype]]"; expected "Sequence[Union[dtype,
# None, type, _SupportsDtype, str, Tuple[Any, int], Tuple[Any, Union[int,
# Sequence[int]]], List[Any], _DtypeDict, Tuple[Any, Any]]]"
return np.find_common_type(types, []) # type: ignore[arg-type]
return np.find_common_type(types, [])


def construct_2d_arraylike_from_scalar(
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3943,7 +3943,7 @@ def _check_setitem_copy(self, t="setting", force=False):
df['group'] = 'b'

# This technically need not raise SettingWithCopy if both are view
# (which is not # generally guaranteed but is usually True. However,
# (which is not generally guaranteed but is usually True. However,
# this is in general not a good practice and we recommend using .loc.
df.iloc[0:5]['group'] = 'a'

Expand Down
13 changes: 6 additions & 7 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3184,7 +3184,6 @@ def _union(self, other: Index, sort):
-------
Index
"""
# TODO(EA): setops-refactor, clean all this up
lvals = self._values
rvals = other._values

Expand Down Expand Up @@ -3244,11 +3243,13 @@ def _wrap_setop_result(self, other: Index, result) -> Index:
else:
result = self._shallow_copy(result, name=name)

# TODO(ExtensionIndex): revert this astype; it is a kludge to make
# it possible to split ExtensionEngine from ExtensionIndex PR.
return result.astype(self.dtype, copy=False)
if type(self) is Index and self.dtype != object:
# i.e. ExtensionArray-backed
# TODO(ExtensionIndex): revert this astype; it is a kludge to make
# it possible to split ExtensionEngine from ExtensionIndex PR.
return result.astype(self.dtype, copy=False)
return result

# TODO: standardize return type of non-union setops type(self vs other)
@final
def intersection(self, other, sort=False):
"""
Expand Down Expand Up @@ -6537,8 +6538,6 @@ def insert(self, loc: int, item) -> Index:
-------
new_index : Index
"""
# Note: this method is overridden by all ExtensionIndex subclasses,
# so self is never backed by an EA.
item = lib.item_from_zerodim(item)
if is_valid_na_for_dtype(item, self.dtype) and self.dtype != object:
item = self._na_value
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/sas/sas7bdat.py
Original file line number Diff line number Diff line change
Expand Up @@ -798,8 +798,8 @@ def _chunk_to_dataframe(self) -> DataFrame:
name = self.column_names[j]

if self._column_types[j] == b"d":
rslt[name] = self._byte_chunk[jb, :].view(dtype=self.byte_order + "d")
rslt[name] = pd.Series(rslt[name], dtype=np.float64, index=ix)
col_arr = self._byte_chunk[jb, :].view(dtype=self.byte_order + "d")
rslt[name] = pd.Series(col_arr, dtype=np.float64, index=ix)
if self.convert_dates:
if self.column_formats[j] in const.sas_date_formats:
rslt[name] = _convert_datetimes(rslt[name], "d")
Expand Down
12 changes: 6 additions & 6 deletions pandas/io/stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -682,9 +682,9 @@ def _prepare_value_labels(self):
self.txt: list[bytes] = []
self.n = 0
# Offsets (length of categories), converted to int32
self.off = np.array([])
self.off = np.array([], dtype=np.int32)
# Values, converted to int32
self.val = np.array([])
self.val = np.array([], dtype=np.int32)
self.len = 0

# Compute lengths and setup lists of offsets and labels
Expand Down Expand Up @@ -1679,7 +1679,7 @@ def read(
offset = self._lines_read * dtype.itemsize
self.path_or_buf.seek(self.data_location + offset)
read_lines = min(nrows, self.nobs - self._lines_read)
data = np.frombuffer(
raw_data = np.frombuffer(
self.path_or_buf.read(read_len), dtype=dtype, count=read_lines
)

Expand All @@ -1689,15 +1689,15 @@ def read(
self._data_read = True
# if necessary, swap the byte order to native here
if self.byteorder != self._native_byteorder:
data = data.byteswap().newbyteorder()
raw_data = raw_data.byteswap().newbyteorder()

if convert_categoricals:
self._read_value_labels()

if len(data) == 0:
if len(raw_data) == 0:
data = DataFrame(columns=self.varlist)
else:
data = DataFrame.from_records(data)
data = DataFrame.from_records(raw_data)
data.columns = self.varlist

# If index is not specified, use actual row number rather than
Expand Down
74 changes: 72 additions & 2 deletions pandas/tests/arrays/categorical/test_astype.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
import numpy as np
import pytest

from pandas import (
Categorical,
CategoricalDtype,
NaT,
Timestamp,
array,
to_datetime,
)
import pandas._testing as tm

Expand All @@ -12,8 +16,74 @@ class TestAstype:
def test_astype_str_int_categories_to_nullable_int(self):
# GH#39616
dtype = CategoricalDtype([str(i) for i in range(5)])
arr = Categorical.from_codes(np.random.randint(5, size=20), dtype=dtype)
codes = np.random.randint(5, size=20)
arr = Categorical.from_codes(codes, dtype=dtype)

res = arr.astype("Int64")
expected = array(arr.astype("int64"), dtype="Int64")
expected = array(codes, dtype="Int64")
tm.assert_extension_array_equal(res, expected)

@pytest.mark.parametrize("ordered", [True, False])
def test_astype(self, ordered):
# string
cat = Categorical(list("abbaaccc"), ordered=ordered)
result = cat.astype(object)
expected = np.array(cat)
tm.assert_numpy_array_equal(result, expected)

msg = r"Cannot cast object dtype to float64"
with pytest.raises(ValueError, match=msg):
cat.astype(float)

# numeric
cat = Categorical([0, 1, 2, 2, 1, 0, 1, 0, 2], ordered=ordered)
result = cat.astype(object)
expected = np.array(cat, dtype=object)
tm.assert_numpy_array_equal(result, expected)

result = cat.astype(int)
expected = np.array(cat, dtype="int")
tm.assert_numpy_array_equal(result, expected)

result = cat.astype(float)
expected = np.array(cat, dtype=float)
tm.assert_numpy_array_equal(result, expected)

@pytest.mark.parametrize("dtype_ordered", [True, False])
@pytest.mark.parametrize("cat_ordered", [True, False])
def test_astype_category(self, dtype_ordered, cat_ordered):
# GH#10696/GH#18593
data = list("abcaacbab")
cat = Categorical(data, categories=list("bac"), ordered=cat_ordered)

# standard categories
dtype = CategoricalDtype(ordered=dtype_ordered)
result = cat.astype(dtype)
expected = Categorical(data, categories=cat.categories, ordered=dtype_ordered)
tm.assert_categorical_equal(result, expected)

# non-standard categories
dtype = CategoricalDtype(list("adc"), dtype_ordered)
result = cat.astype(dtype)
expected = Categorical(data, dtype=dtype)
tm.assert_categorical_equal(result, expected)

if dtype_ordered is False:
# dtype='category' can't specify ordered, so only test once
result = cat.astype("category")
expected = cat
tm.assert_categorical_equal(result, expected)

def test_astype_object_datetime_categories(self):
# GH#40754
cat = Categorical(to_datetime(["2021-03-27", NaT]))
result = cat.astype(object)
expected = np.array([Timestamp("2021-03-27 00:00:00"), NaT], dtype="object")
tm.assert_numpy_array_equal(result, expected)

def test_astype_object_timestamp_categories(self):
# GH#18024
cat = Categorical([Timestamp("2014-01-01")])
result = cat.astype(object)
expected = np.array([Timestamp("2014-01-01 00:00:00")], dtype="object")
tm.assert_numpy_array_equal(result, expected)
68 changes: 0 additions & 68 deletions pandas/tests/arrays/categorical/test_dtypes.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import numpy as np
import pytest

from pandas.core.dtypes.dtypes import CategoricalDtype
Expand All @@ -7,10 +6,8 @@
Categorical,
CategoricalIndex,
Index,
NaT,
Series,
Timestamp,
to_datetime,
)
import pandas._testing as tm

Expand Down Expand Up @@ -127,71 +124,6 @@ def test_codes_dtypes(self):
result = result.remove_categories([f"foo{i:05d}" for i in range(300)])
assert result.codes.dtype == "int8"

@pytest.mark.parametrize("ordered", [True, False])
def test_astype(self, ordered):
# string
cat = Categorical(list("abbaaccc"), ordered=ordered)
result = cat.astype(object)
expected = np.array(cat)
tm.assert_numpy_array_equal(result, expected)

msg = r"Cannot cast object dtype to float64"
with pytest.raises(ValueError, match=msg):
cat.astype(float)

# numeric
cat = Categorical([0, 1, 2, 2, 1, 0, 1, 0, 2], ordered=ordered)
result = cat.astype(object)
expected = np.array(cat, dtype=object)
tm.assert_numpy_array_equal(result, expected)

result = cat.astype(int)
expected = np.array(cat, dtype="int")
tm.assert_numpy_array_equal(result, expected)

result = cat.astype(float)
expected = np.array(cat, dtype=float)
tm.assert_numpy_array_equal(result, expected)

@pytest.mark.parametrize("dtype_ordered", [True, False])
@pytest.mark.parametrize("cat_ordered", [True, False])
def test_astype_category(self, dtype_ordered, cat_ordered):
# GH 10696/18593
data = list("abcaacbab")
cat = Categorical(data, categories=list("bac"), ordered=cat_ordered)

# standard categories
dtype = CategoricalDtype(ordered=dtype_ordered)
result = cat.astype(dtype)
expected = Categorical(data, categories=cat.categories, ordered=dtype_ordered)
tm.assert_categorical_equal(result, expected)

# non-standard categories
dtype = CategoricalDtype(list("adc"), dtype_ordered)
result = cat.astype(dtype)
expected = Categorical(data, dtype=dtype)
tm.assert_categorical_equal(result, expected)

if dtype_ordered is False:
# dtype='category' can't specify ordered, so only test once
result = cat.astype("category")
expected = cat
tm.assert_categorical_equal(result, expected)

def test_astype_object_datetime_categories(self):
# GH#40754
cat = Categorical(to_datetime(["2021-03-27", NaT]))
result = cat.astype(object)
expected = np.array([Timestamp("2021-03-27 00:00:00"), NaT], dtype="object")
tm.assert_numpy_array_equal(result, expected)

def test_astype_object_timestamp_categories(self):
# GH#18024
cat = Categorical([Timestamp("2014-01-01")])
result = cat.astype(object)
expected = np.array([Timestamp("2014-01-01 00:00:00")], dtype="object")
tm.assert_numpy_array_equal(result, expected)

def test_iter_python_types(self):
# GH-19909
cat = Categorical([1, 2])
Expand Down
Loading