Skip to content

Commit 221f20c

Browse files
authored
Merge branch 'main' into sas/decompress3
2 parents 91f8436 + 37e6239 commit 221f20c

29 files changed

+483
-63
lines changed

.pre-commit-config.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,3 +229,11 @@ repos:
229229
entry: python scripts/validate_min_versions_in_sync.py
230230
language: python
231231
files: ^(ci/deps/actions-.*-minimum_versions\.yaml|pandas/compat/_optional\.py)$
232+
- id: flake8-pyi
233+
name: flake8-pyi
234+
entry: flake8 --extend-ignore=E301,E302,E305,E701,E704
235+
types: [pyi]
236+
language: python
237+
additional_dependencies:
238+
- flake8==4.0.1
239+
- flake8-pyi==22.5.1

doc/source/whatsnew/v1.5.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,9 @@ Other enhancements
275275
- :class:`.DataError`, :class:`.SpecificationError`, :class:`.SettingWithCopyError`, :class:`.SettingWithCopyWarning`, :class:`.NumExprClobberingError`, :class:`.UndefinedVariableError`, and :class:`.IndexingError` are now exposed in ``pandas.errors`` (:issue:`27656`)
276276
- Added ``check_like`` argument to :func:`testing.assert_series_equal` (:issue:`47247`)
277277
- Allow reading compressed SAS files with :func:`read_sas` (e.g., ``.sas7bdat.gz`` files)
278+
- :meth:`DatetimeIndex.astype` now supports casting timezone-naive indexes to ``datetime64[s]``, ``datetime64[ms]``, and ``datetime64[us]``, and timezone-aware indexes to the corresponding ``datetime64[unit, tzname]`` dtypes (:issue:`47579`)
278279
- :class:`Series` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) will now successfully operate when the dtype is numeric and ``numeric_only=True`` is provided; previously this would raise a ``NotImplementedError`` (:issue:`47500`)
280+
-
279281

280282
.. ---------------------------------------------------------------------------
281283
.. _whatsnew_150.notable_bug_fixes:

pandas/_libs/index.pyi

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ class BaseMultiIndexCodesEngine:
6969
) -> npt.NDArray[np.intp]: ...
7070

7171
class ExtensionEngine:
72-
def __init__(self, values: "ExtensionArray"): ...
72+
def __init__(self, values: ExtensionArray): ...
7373
def __contains__(self, val: object) -> bool: ...
7474
def get_loc(self, val: object) -> int | slice | np.ndarray: ...
7575
def get_indexer(self, values: np.ndarray) -> npt.NDArray[np.intp]: ...

pandas/_libs/interval.pyi

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ from typing import (
44
Any,
55
Generic,
66
TypeVar,
7-
Union,
87
overload,
98
)
109

@@ -81,11 +80,7 @@ class Interval(IntervalMixin, Generic[_OrderableT]):
8180
self: Interval[_OrderableTimesT], key: _OrderableTimesT
8281
) -> bool: ...
8382
@overload
84-
def __contains__(
85-
self: Interval[_OrderableScalarT], key: Union[int, float]
86-
) -> bool: ...
87-
def __repr__(self) -> str: ...
88-
def __str__(self) -> str: ...
83+
def __contains__(self: Interval[_OrderableScalarT], key: int | float) -> bool: ...
8984
@overload
9085
def __add__(
9186
self: Interval[_OrderableTimesT], y: Timedelta
@@ -95,7 +90,7 @@ class Interval(IntervalMixin, Generic[_OrderableT]):
9590
self: Interval[int], y: _OrderableScalarT
9691
) -> Interval[_OrderableScalarT]: ...
9792
@overload
98-
def __add__(self: Interval[float], y: Union[int, float]) -> Interval[float]: ...
93+
def __add__(self: Interval[float], y: int | float) -> Interval[float]: ...
9994
@overload
10095
def __radd__(
10196
self: Interval[_OrderableTimesT], y: Timedelta
@@ -105,7 +100,7 @@ class Interval(IntervalMixin, Generic[_OrderableT]):
105100
self: Interval[int], y: _OrderableScalarT
106101
) -> Interval[_OrderableScalarT]: ...
107102
@overload
108-
def __radd__(self: Interval[float], y: Union[int, float]) -> Interval[float]: ...
103+
def __radd__(self: Interval[float], y: int | float) -> Interval[float]: ...
109104
@overload
110105
def __sub__(
111106
self: Interval[_OrderableTimesT], y: Timedelta
@@ -115,7 +110,7 @@ class Interval(IntervalMixin, Generic[_OrderableT]):
115110
self: Interval[int], y: _OrderableScalarT
116111
) -> Interval[_OrderableScalarT]: ...
117112
@overload
118-
def __sub__(self: Interval[float], y: Union[int, float]) -> Interval[float]: ...
113+
def __sub__(self: Interval[float], y: int | float) -> Interval[float]: ...
119114
@overload
120115
def __rsub__(
121116
self: Interval[_OrderableTimesT], y: Timedelta
@@ -125,33 +120,31 @@ class Interval(IntervalMixin, Generic[_OrderableT]):
125120
self: Interval[int], y: _OrderableScalarT
126121
) -> Interval[_OrderableScalarT]: ...
127122
@overload
128-
def __rsub__(self: Interval[float], y: Union[int, float]) -> Interval[float]: ...
123+
def __rsub__(self: Interval[float], y: int | float) -> Interval[float]: ...
129124
@overload
130125
def __mul__(
131126
self: Interval[int], y: _OrderableScalarT
132127
) -> Interval[_OrderableScalarT]: ...
133128
@overload
134-
def __mul__(self: Interval[float], y: Union[int, float]) -> Interval[float]: ...
129+
def __mul__(self: Interval[float], y: int | float) -> Interval[float]: ...
135130
@overload
136131
def __rmul__(
137132
self: Interval[int], y: _OrderableScalarT
138133
) -> Interval[_OrderableScalarT]: ...
139134
@overload
140-
def __rmul__(self: Interval[float], y: Union[int, float]) -> Interval[float]: ...
135+
def __rmul__(self: Interval[float], y: int | float) -> Interval[float]: ...
141136
@overload
142137
def __truediv__(
143138
self: Interval[int], y: _OrderableScalarT
144139
) -> Interval[_OrderableScalarT]: ...
145140
@overload
146-
def __truediv__(self: Interval[float], y: Union[int, float]) -> Interval[float]: ...
141+
def __truediv__(self: Interval[float], y: int | float) -> Interval[float]: ...
147142
@overload
148143
def __floordiv__(
149144
self: Interval[int], y: _OrderableScalarT
150145
) -> Interval[_OrderableScalarT]: ...
151146
@overload
152-
def __floordiv__(
153-
self: Interval[float], y: Union[int, float]
154-
) -> Interval[float]: ...
147+
def __floordiv__(self: Interval[float], y: int | float) -> Interval[float]: ...
155148
def overlaps(self: Interval[_OrderableT], other: Interval[_OrderableT]) -> bool: ...
156149

157150
def intervals_to_interval_bounds(

pandas/_libs/lib.pyi

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ def count_level_2d(
213213
def get_level_sorter(
214214
label: np.ndarray, # const int64_t[:]
215215
starts: np.ndarray, # const intp_t[:]
216-
) -> np.ndarray: ... # np.ndarray[np.intp, ndim=1]
216+
) -> np.ndarray: ... # np.ndarray[np.intp, ndim=1]
217217
def generate_bins_dt64(
218218
values: npt.NDArray[np.int64],
219219
binner: np.ndarray, # const int64_t[:]

pandas/_libs/sparse.pyi

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import numpy as np
77

88
from pandas._typing import npt
99

10-
SparseIndexT = TypeVar("SparseIndexT", bound="SparseIndex")
10+
_SparseIndexT = TypeVar("_SparseIndexT", bound=SparseIndex)
1111

1212
class SparseIndex:
1313
length: int
@@ -24,8 +24,8 @@ class SparseIndex:
2424
def lookup_array(self, indexer: npt.NDArray[np.int32]) -> npt.NDArray[np.int32]: ...
2525
def to_int_index(self) -> IntIndex: ...
2626
def to_block_index(self) -> BlockIndex: ...
27-
def intersect(self: SparseIndexT, y_: SparseIndex) -> SparseIndexT: ...
28-
def make_union(self: SparseIndexT, y_: SparseIndex) -> SparseIndexT: ...
27+
def intersect(self: _SparseIndexT, y_: SparseIndex) -> _SparseIndexT: ...
28+
def make_union(self: _SparseIndexT, y_: SparseIndex) -> _SparseIndexT: ...
2929

3030
class IntIndex(SparseIndex):
3131
indices: npt.NDArray[np.int32]

pandas/_libs/tslibs/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,14 @@
3030
"get_unit_from_dtype",
3131
"periods_per_day",
3232
"periods_per_second",
33+
"is_supported_unit",
3334
]
3435

3536
from pandas._libs.tslibs import dtypes
3637
from pandas._libs.tslibs.conversion import localize_pydatetime
3738
from pandas._libs.tslibs.dtypes import (
3839
Resolution,
40+
is_supported_unit,
3941
periods_per_day,
4042
periods_per_second,
4143
)

pandas/_libs/tslibs/conversion.pyi

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@ from datetime import (
55

66
import numpy as np
77

8-
from pandas._typing import npt
9-
108
DT64NS_DTYPE: np.dtype
119
TD64NS_DTYPE: np.dtype
1210

pandas/_libs/tslibs/dtypes.pyi

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ _period_code_map: dict[str, int]
77

88
def periods_per_day(reso: int) -> int: ...
99
def periods_per_second(reso: int) -> int: ...
10+
def is_supported_unit(reso: int) -> bool: ...
1011

1112
class PeriodDtypeBase:
1213
_dtype_code: int # PeriodDtypeCode

pandas/_libs/tslibs/dtypes.pyx

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,15 @@ class NpyDatetimeUnit(Enum):
277277
NPY_FR_GENERIC = NPY_DATETIMEUNIT.NPY_FR_GENERIC
278278

279279

280+
def is_supported_unit(NPY_DATETIMEUNIT reso):
281+
return (
282+
reso == NPY_DATETIMEUNIT.NPY_FR_ns
283+
or reso == NPY_DATETIMEUNIT.NPY_FR_us
284+
or reso == NPY_DATETIMEUNIT.NPY_FR_ms
285+
or reso == NPY_DATETIMEUNIT.NPY_FR_s
286+
)
287+
288+
280289
cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit):
281290
if unit == NPY_DATETIMEUNIT.NPY_FR_ns or unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
282291
# generic -> default to nanoseconds

pandas/_libs/tslibs/nattype.pyi

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,6 @@ from datetime import (
33
timedelta,
44
tzinfo as _tzinfo,
55
)
6-
from typing import (
7-
Any,
8-
Union,
9-
)
106

117
import numpy as np
128

@@ -18,7 +14,7 @@ nat_strings: set[str]
1814

1915
def is_null_datetimelike(val: object, inat_is_null: bool = ...) -> bool: ...
2016

21-
_NaTComparisonTypes = Union[datetime, timedelta, Period, np.datetime64, np.timedelta64]
17+
_NaTComparisonTypes = datetime | timedelta | Period | np.datetime64 | np.timedelta64
2218

2319
class _NatComparison:
2420
def __call__(self, other: _NaTComparisonTypes) -> bool: ...
@@ -117,8 +113,8 @@ class NaTType:
117113
# inject Period properties
118114
@property
119115
def qyear(self) -> float: ...
120-
def __eq__(self, other: Any) -> bool: ...
121-
def __ne__(self, other: Any) -> bool: ...
116+
def __eq__(self, other: object) -> bool: ...
117+
def __ne__(self, other: object) -> bool: ...
122118
__lt__: _NatComparison
123119
__le__: _NatComparison
124120
__gt__: _NatComparison

pandas/_libs/tslibs/offsets.pyi

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ from .timedeltas import Timedelta
2121

2222
if TYPE_CHECKING:
2323
from pandas.core.indexes.datetimes import DatetimeIndex
24-
_BaseOffsetT = TypeVar("_BaseOffsetT", bound="BaseOffset")
24+
_BaseOffsetT = TypeVar("_BaseOffsetT", bound=BaseOffset)
2525
_DatetimeT = TypeVar("_DatetimeT", bound=datetime)
2626
_TimedeltaT = TypeVar("_TimedeltaT", bound=timedelta)
2727

@@ -76,13 +76,12 @@ class BaseOffset:
7676
def __rmul__(self: _BaseOffsetT, other: int) -> _BaseOffsetT: ...
7777
def __neg__(self: _BaseOffsetT) -> _BaseOffsetT: ...
7878
def copy(self: _BaseOffsetT) -> _BaseOffsetT: ...
79-
def __repr__(self) -> str: ...
8079
@property
8180
def name(self) -> str: ...
8281
@property
8382
def rule_code(self) -> str: ...
8483
def freqstr(self) -> str: ...
85-
def apply_index(self, dtindex: "DatetimeIndex") -> "DatetimeIndex": ...
84+
def apply_index(self, dtindex: DatetimeIndex) -> DatetimeIndex: ...
8685
def _apply_array(self, dtarr) -> None: ...
8786
def rollback(self, dt: datetime) -> datetime: ...
8887
def rollforward(self, dt: datetime) -> datetime: ...

pandas/_libs/tslibs/timedeltas.pyi

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ from datetime import timedelta
22
from typing import (
33
ClassVar,
44
Literal,
5-
Type,
65
TypeVar,
76
overload,
87
)
@@ -84,7 +83,7 @@ class Timedelta(timedelta):
8483
resolution: ClassVar[Timedelta]
8584
value: int # np.int64
8685
def __new__(
87-
cls: Type[_S],
86+
cls: type[_S],
8887
value=...,
8988
unit: str = ...,
9089
**kwargs: int | float | np.integer | np.floating,

pandas/_libs/tslibs/timestamps.pyi

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,9 @@ class Timestamp(datetime):
104104
def utcnow(cls: type[_DatetimeT]) -> _DatetimeT: ...
105105
# error: Signature of "combine" incompatible with supertype "datetime"
106106
@classmethod
107-
def combine(cls, date: _date, time: _time) -> datetime: ... # type: ignore[override]
107+
def combine( # type: ignore[override]
108+
cls, date: _date, time: _time
109+
) -> datetime: ...
108110
@classmethod
109111
def fromisoformat(cls: type[_DatetimeT], date_string: str) -> _DatetimeT: ...
110112
def strftime(self, format: str) -> str: ...

pandas/_libs/writers.pyi

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def max_len_string_array(
1717
) -> int: ...
1818
def word_len(val: object) -> int: ...
1919
def string_array_replace_from_nan_rep(
20-
arr: np.ndarray, # np.ndarray[object, ndim=1]
20+
arr: np.ndarray, # np.ndarray[object, ndim=1]
2121
nan_rep: object,
2222
replace: object = ...,
2323
) -> None: ...

pandas/core/arrays/datetimes.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
get_unit_from_dtype,
3232
ints_to_pydatetime,
3333
is_date_array_normalized,
34+
is_supported_unit,
3435
is_unitless,
3536
normalize_i8_timestamps,
3637
timezones,
@@ -603,12 +604,26 @@ def astype(self, dtype, copy: bool = True):
603604
return self.copy()
604605
return self
605606

607+
elif (
608+
self.tz is None
609+
and is_datetime64_dtype(dtype)
610+
and not is_unitless(dtype)
611+
and is_supported_unit(get_unit_from_dtype(dtype))
612+
):
613+
# unit conversion e.g. datetime64[s]
614+
res_values = astype_overflowsafe(self._ndarray, dtype, copy=True)
615+
return type(self)._simple_new(res_values, dtype=res_values.dtype)
616+
# TODO: preserve freq?
617+
606618
elif is_datetime64_ns_dtype(dtype):
607619
return astype_dt64_to_dt64tz(self, dtype, copy, via_utc=False)
608620

609-
elif self.tz is None and is_datetime64_dtype(dtype) and dtype != self.dtype:
610-
# unit conversion e.g. datetime64[s]
611-
return self._ndarray.astype(dtype)
621+
elif self.tz is not None and isinstance(dtype, DatetimeTZDtype):
622+
# tzaware unit conversion e.g. datetime64[s, UTC]
623+
np_dtype = np.dtype(dtype.str)
624+
res_values = astype_overflowsafe(self._ndarray, np_dtype, copy=copy)
625+
return type(self)._simple_new(res_values, dtype=dtype)
626+
# TODO: preserve freq?
612627

613628
elif is_period_dtype(dtype):
614629
return self.to_period(freq=dtype.freq)

pandas/core/base.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -983,10 +983,12 @@ def unique(self):
983983

984984
if not isinstance(values, np.ndarray):
985985
result: ArrayLike = values.unique()
986-
if self.dtype.kind in ["m", "M"] and isinstance(self, ABCSeries):
987-
# GH#31182 Series._values returns EA, unpack for backward-compat
988-
if getattr(self.dtype, "tz", None) is None:
989-
result = np.asarray(result)
986+
if (
987+
isinstance(self.dtype, np.dtype) and self.dtype.kind in ["m", "M"]
988+
) and isinstance(self, ABCSeries):
989+
# GH#31182 Series._values returns EA
990+
# unpack numpy datetime for backward-compat
991+
result = np.asarray(result)
990992
else:
991993
result = unique1d(values)
992994

pandas/core/dtypes/astype.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import numpy as np
1616

1717
from pandas._libs import lib
18+
from pandas._libs.tslibs import is_unitless
1819
from pandas._libs.tslibs.timedeltas import array_to_timedelta64
1920
from pandas._typing import (
2021
ArrayLike,
@@ -280,6 +281,20 @@ def astype_array_safe(
280281
# Ensure we don't end up with a PandasArray
281282
dtype = dtype.numpy_dtype
282283

284+
if (
285+
is_datetime64_dtype(values.dtype)
286+
# need to do np.dtype check instead of is_datetime64_dtype
287+
# otherwise pyright complains
288+
and isinstance(dtype, np.dtype)
289+
and dtype.kind == "M"
290+
and not is_unitless(dtype)
291+
and not is_dtype_equal(dtype, values.dtype)
292+
):
293+
# unit conversion, we would re-cast to nanosecond, so this is
294+
# effectively just a copy (regardless of copy kwd)
295+
# TODO(2.0): remove special-case
296+
return values.copy()
297+
283298
try:
284299
new_values = astype_array(values, dtype, copy=copy)
285300
except (ValueError, TypeError):

pandas/core/dtypes/common.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -966,7 +966,9 @@ def is_datetime64_ns_dtype(arr_or_dtype) -> bool:
966966
tipo = get_dtype(arr_or_dtype.dtype)
967967
else:
968968
return False
969-
return tipo == DT64NS_DTYPE or getattr(tipo, "base", None) == DT64NS_DTYPE
969+
return tipo == DT64NS_DTYPE or (
970+
isinstance(tipo, DatetimeTZDtype) and tipo._unit == "ns"
971+
)
970972

971973

972974
def is_timedelta64_ns_dtype(arr_or_dtype) -> bool:

pandas/core/indexes/base.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1064,16 +1064,6 @@ def astype(self, dtype, copy: bool = True):
10641064
# Ensure that self.astype(self.dtype) is self
10651065
return self.copy() if copy else self
10661066

1067-
if (
1068-
self.dtype == np.dtype("M8[ns]")
1069-
and isinstance(dtype, np.dtype)
1070-
and dtype.kind == "M"
1071-
and dtype != np.dtype("M8[ns]")
1072-
):
1073-
# For now DatetimeArray supports this by unwrapping ndarray,
1074-
# but DatetimeIndex doesn't
1075-
raise TypeError(f"Cannot cast {type(self).__name__} to dtype")
1076-
10771067
values = self._data
10781068
if isinstance(values, ExtensionArray):
10791069
with rewrite_exception(type(values).__name__, type(self).__name__):

0 commit comments

Comments
 (0)