diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 94d21f39dc61a..2565d7998c9c7 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -238,8 +238,6 @@ cdef class IndexEngine: return self.unique == 1 cdef _do_unique_check(self): - - # this de-facto the same self._ensure_mapping_populated() @property diff --git a/pandas/_libs/indexing.pyi b/pandas/_libs/indexing.pyi index b219f991f2362..3ae5c5044a2f7 100644 --- a/pandas/_libs/indexing.pyi +++ b/pandas/_libs/indexing.pyi @@ -9,7 +9,7 @@ _IndexingMixinT = TypeVar("_IndexingMixinT", bound=IndexingMixin) class NDFrameIndexerBase(Generic[_IndexingMixinT]): name: str - # in practise obj is either a DataFrame or a Series + # in practice obj is either a DataFrame or a Series obj: _IndexingMixinT def __init__(self, name: str, obj: _IndexingMixinT) -> None: ... diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index ee51a4fd402fb..3333ac1115177 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -33,7 +33,6 @@ from pandas._libs.util cimport ( @cython.final @cython.freelist(32) cdef class BlockPlacement: - # __slots__ = '_as_slice', '_as_array', '_len' cdef: slice _as_slice ndarray _as_array # Note: this still allows `None`; will be intp_t @@ -621,7 +620,7 @@ cdef class NumpyBlock(SharedBlock): public ndarray values def __cinit__(self, ndarray values, BlockPlacement placement, int ndim): - # set values here the (implicit) call to SharedBlock.__cinit__ will + # set values here; the (implicit) call to SharedBlock.__cinit__ will # set placement and ndim self.values = values @@ -643,7 +642,7 @@ cdef class NDArrayBackedBlock(SharedBlock): NDArrayBacked values def __cinit__(self, NDArrayBacked values, BlockPlacement placement, int ndim): - # set values here the (implicit) call to SharedBlock.__cinit__ will + # set values here; the (implicit) call to SharedBlock.__cinit__ will # set placement and ndim self.values = values @@ -662,7 +661,7 @@ cdef class Block(SharedBlock): public object values def __cinit__(self, object values, BlockPlacement placement, int ndim): - # set values here the (implicit) call to SharedBlock.__cinit__ will + # set values here; the (implicit) call to SharedBlock.__cinit__ will # set placement and ndim self.values = values diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 16d5bbaad9de9..2931f178a7c1c 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -492,7 +492,7 @@ def get_reverse_indexer(const intp_t[:] indexer, Py_ssize_t length) -> ndarray: @cython.wraparound(False) @cython.boundscheck(False) -# Can add const once https://github.com/cython/cython/issues/1772 resolved +# TODO(cython3): Can add const once cython#1772 is resolved def has_infs(floating[:] arr) -> bool: cdef: Py_ssize_t i, n = len(arr) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 6150c2b262fc8..998a6f69a930a 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -46,6 +46,7 @@ from libc.string cimport ( cdef extern from "Python.h": + # TODO(cython3): get this from cpython.unicode object PyUnicode_FromString(char *v) @@ -453,14 +454,12 @@ cdef class TextReader: self.skipfooter = skipfooter - # suboptimal if usecols is not None: self.has_usecols = 1 # GH-20558, validate usecols at higher level and only pass clean # usecols into TextReader. self.usecols = usecols - # TODO: XXX? if skipfooter > 0: self.parser.on_bad_lines = SKIP @@ -501,7 +500,6 @@ cdef class TextReader: self.dtype = dtype self.use_nullable_dtypes = use_nullable_dtypes - # XXX self.noconvert = set() self.index_col = index_col @@ -761,7 +759,7 @@ cdef class TextReader: # Corner case, not enough lines in the file if self.parser.lines < data_line + 1: field_count = len(header[0]) - else: # not self.has_usecols: + else: field_count = self.parser.line_fields[data_line] @@ -1409,6 +1407,8 @@ def _maybe_upcast(arr, use_nullable_dtypes: bool = False): The casted array. """ if is_extension_array_dtype(arr.dtype): + # TODO: the docstring says arr is an ndarray, in which case this cannot + # be reached. Is that incorrect? return arr na_value = na_values[arr.dtype] diff --git a/pandas/_libs/reduction.pyi b/pandas/_libs/reduction.pyi index ad73e94137583..525546f26c854 100644 --- a/pandas/_libs/reduction.pyi +++ b/pandas/_libs/reduction.pyi @@ -1,8 +1,6 @@ from typing import Any -import numpy as np +from pandas._typing import DtypeObj -from pandas._typing import ExtensionDtype - -def check_result_array(obj: object, dtype: np.dtype | ExtensionDtype) -> None: ... +def check_result_array(obj: object, dtype: DtypeObj) -> None: ... def extract_result(res: object) -> Any: ... diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index 45ddade7b4eb5..74f7653ebbe0c 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -301,9 +301,6 @@ cdef class BlockIndex(SparseIndex): self.nblocks = np.int32(len(self.blocs)) self.npoints = self.blengths.sum() - # self.block_start = blocs - # self.block_end = blocs + blengths - self.check_integrity() def __reduce__(self): diff --git a/pandas/_libs/sparse_op_helper.pxi.in b/pandas/_libs/sparse_op_helper.pxi.in index 0e310e91fab74..ac0c705fafbcd 100644 --- a/pandas/_libs/sparse_op_helper.pxi.in +++ b/pandas/_libs/sparse_op_helper.pxi.in @@ -137,16 +137,16 @@ cdef tuple block_op_{{opname}}_{{dtype}}({{dtype}}_t[:] x_, {{dtype}}_t[:] y_, BlockIndex yindex, {{dtype}}_t yfill): - ''' + """ Binary operator on BlockIndex objects with fill values - ''' + """ cdef: BlockIndex out_index - Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices - int32_t xbp = 0, ybp = 0 # block positions + Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices + int32_t xbp = 0, ybp = 0 # block positions int32_t xloc, yloc - Py_ssize_t xblock = 0, yblock = 0 # block numbers + Py_ssize_t xblock = 0, yblock = 0 # block numbers {{dtype}}_t[:] x, y ndarray[{{rdtype}}_t, ndim=1] out diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 9d9b93f274c60..783b8b8939c74 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -115,7 +115,7 @@ def format_array_from_datetime( Parameters ---------- - values : a 1-d i8 array + values : ndarray[int64_t], arbitrary ndim tz : tzinfo or None, default None format : str or None, default None a strftime capable string @@ -260,9 +260,9 @@ def array_with_unit_to_datetime( cdef: Py_ssize_t i, n=len(values) int64_t mult - bint is_ignore = errors=="ignore" - bint is_coerce = errors=="coerce" - bint is_raise = errors=="raise" + bint is_ignore = errors == "ignore" + bint is_coerce = errors == "coerce" + bint is_raise = errors == "raise" ndarray[int64_t] iresult tzinfo tz = None float fval @@ -446,9 +446,9 @@ cpdef array_to_datetime( npy_datetimestruct dts bint utc_convert = bool(utc) bint seen_datetime_offset = False - bint is_raise = errors=="raise" - bint is_ignore = errors=="ignore" - bint is_coerce = errors=="coerce" + bint is_raise = errors == "raise" + bint is_ignore = errors == "ignore" + bint is_coerce = errors == "coerce" bint is_same_offsets _TSObject _ts float tz_offset diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 5b636ff69a6a6..62fff2ace1627 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -53,7 +53,6 @@ from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime from pandas._libs.tslibs.timezones cimport ( get_utcoffset, is_utc, - maybe_get_tz, ) from pandas._libs.tslibs.util cimport ( is_datetime64_object, @@ -124,7 +123,7 @@ cdef int64_t cast_from_unit(object ts, str unit) except? -1: dt64obj = np.datetime64(ts, unit) return get_datetime64_nanos(dt64obj, NPY_FR_ns) - # cast the unit, multiply base/frace separately + # cast the unit, multiply base/frac separately # to avoid precision issues from float -> int try: base = ts @@ -380,7 +379,6 @@ cdef _TSObject convert_datetime_to_tsobject( obj.creso = reso obj.fold = ts.fold if tz is not None: - tz = maybe_get_tz(tz) if ts.tzinfo is not None: # Convert the current timezone to the passed timezone diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index e36198da03d04..bd56773cddb18 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -2612,11 +2612,7 @@ class Period(_Period): if freq is None and ordinal != NPY_NAT: # Skip NaT, since it doesn't have a resolution - try: - freq = attrname_to_abbrevs[reso] - except KeyError: - raise ValueError(f"Invalid frequency or could not " - f"infer: {reso}") + freq = attrname_to_abbrevs[reso] freq = to_offset(freq) elif PyDateTime_Check(value): diff --git a/pandas/_libs/tslibs/timedeltas.pxd b/pandas/_libs/tslibs/timedeltas.pxd index 3f37ef7eb1e3f..8a354c4cb631f 100644 --- a/pandas/_libs/tslibs/timedeltas.pxd +++ b/pandas/_libs/tslibs/timedeltas.pxd @@ -10,7 +10,6 @@ cpdef int64_t delta_to_nanoseconds( ) except? -1 cdef convert_to_timedelta64(object ts, str unit) cdef bint is_any_td_scalar(object obj) -cdef object ensure_td64ns(object ts) cdef class _Timedelta(timedelta): diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index feae4d1c28f83..d7352f80132a2 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -691,10 +691,6 @@ cdef timedelta_from_spec(object number, object frac, object unit): "values and are not supported." ) - if unit == "M": - # To parse ISO 8601 string, 'M' should be treated as minute, - # not month - unit = "m" unit = parse_timedelta_unit(unit) n = "".join(number) + "." + "".join(frac) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 547286bd40b64..c5f3b0ab7154f 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -545,7 +545,7 @@ cdef _get_utc_bounds_zoneinfo(ndarray vals, tz, NPY_DATETIMEUNIT creso): pandas_datetime_to_datetimestruct(val, creso, &dts) # casting to pydatetime drops nanoseconds etc, which we will - # need to re-add later as 'extra'' + # need to re-add later as 'extra' extra = (dts.ps // 1000) * (pps // 1_000_000_000) dt = datetime_new(dts.year, dts.month, dts.day, dts.hour, diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd index d8bc9363f1a23..4e55bc1c48fd0 100644 --- a/pandas/_libs/tslibs/util.pxd +++ b/pandas/_libs/tslibs/util.pxd @@ -2,15 +2,6 @@ from cpython.object cimport PyTypeObject -cdef extern from *: - """ - PyObject* char_to_string(const char* data) { - return PyUnicode_FromString(data); - } - """ - object char_to_string(const char* data) - - cdef extern from "Python.h": # Note: importing extern-style allows us to declare these as nogil # functions, whereas `from cpython cimport` does not. diff --git a/pandas/_libs/writers.pyi b/pandas/_libs/writers.pyi index 611c0c7cd1512..7b41856525dad 100644 --- a/pandas/_libs/writers.pyi +++ b/pandas/_libs/writers.pyi @@ -17,5 +17,4 @@ def word_len(val: object) -> int: ... def string_array_replace_from_nan_rep( arr: np.ndarray, # np.ndarray[object, ndim=1] nan_rep: object, - replace: object = ..., ) -> None: ... diff --git a/pandas/_libs/writers.pyx b/pandas/_libs/writers.pyx index fbd08687d7c82..bd5c0290b2879 100644 --- a/pandas/_libs/writers.pyx +++ b/pandas/_libs/writers.pyx @@ -161,15 +161,13 @@ cpdef inline Py_ssize_t word_len(object val): def string_array_replace_from_nan_rep( ndarray[object, ndim=1] arr, object nan_rep, - object replace=np.nan ) -> None: """ - Replace the values in the array with 'replacement' if - they are 'nan_rep'. Return the same array. + Replace the values in the array with np.nan if they are nan_rep. """ cdef: Py_ssize_t length = len(arr), i = 0 for i in range(length): if arr[i] == nan_rep: - arr[i] = replace + arr[i] = np.nan diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index d570a8822649a..49e2d3e8a6637 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -2030,17 +2030,11 @@ def _sequence_to_dt64ns( ) if tz and inferred_tz: # two timezones: convert to intended from base UTC repr - if data.dtype == "i8": - # GH#42505 - # by convention, these are _already_ UTC, e.g - return data.view(DT64NS_DTYPE), tz, None - - if timezones.is_utc(tz): - # Fastpath, avoid copy made in tzconversion - utc_vals = data.view("i8") - else: - utc_vals = tz_convert_from_utc(data.view("i8"), tz) - data = utc_vals.view(DT64NS_DTYPE) + assert data.dtype == "i8" + # GH#42505 + # by convention, these are _already_ UTC, e.g + return data.view(DT64NS_DTYPE), tz, None + elif inferred_tz: tz = inferred_tz diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 5efdef4a9385a..bce2a82f057f3 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -134,7 +134,7 @@ def __eq__(self, other: Any) -> bool: def __hash__(self) -> int: # for python>=3.10, different nan objects have different hashes - # we need to avoid that und thus use hash function with old behavior + # we need to avoid that and thus use hash function with old behavior return object_hash(tuple(getattr(self, attr) for attr in self._metadata)) def __ne__(self, other: Any) -> bool: diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 7d2e0a4d71c52..33ff6d1eee686 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -28,8 +28,8 @@ tz_compare, ) from pandas._libs.tslibs.dtypes import ( - NpyDatetimeUnit, PeriodDtypeBase, + abbrev_to_npy_unit, ) from pandas._typing import ( Dtype, @@ -722,13 +722,7 @@ def _creso(self) -> int: """ The NPY_DATETIMEUNIT corresponding to this dtype's resolution. """ - reso = { - "s": NpyDatetimeUnit.NPY_FR_s, - "ms": NpyDatetimeUnit.NPY_FR_ms, - "us": NpyDatetimeUnit.NPY_FR_us, - "ns": NpyDatetimeUnit.NPY_FR_ns, - }[self.unit] - return reso.value + return abbrev_to_npy_unit(self.unit) @property def unit(self) -> str_type: diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 2340c36d14301..68c7ba72729e6 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -503,6 +503,7 @@ def _cython_transform( "transform", obj._values, how, axis, **kwargs ) except NotImplementedError as err: + # e.g. test_groupby_raises_string raise TypeError(f"{how} is not supported for {obj.dtype} dtype") from err return obj._constructor(result, index=self.obj.index, name=obj.name) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 2bf0a8f0b4293..eba97f41dc66a 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -49,7 +49,7 @@ class providing the base-class of operations. ArrayLike, Axis, AxisInt, - Dtype, + DtypeObj, FillnaOptions, IndexLabel, NDFrameT, @@ -3175,13 +3175,13 @@ def quantile( f"numeric_only={numeric_only} and dtype {self.obj.dtype}" ) - def pre_processor(vals: ArrayLike) -> tuple[np.ndarray, Dtype | None]: + def pre_processor(vals: ArrayLike) -> tuple[np.ndarray, DtypeObj | None]: if is_object_dtype(vals): raise TypeError( "'quantile' cannot be performed against 'object' dtypes!" ) - inference: Dtype | None = None + inference: DtypeObj | None = None if isinstance(vals, BaseMaskedArray) and is_numeric_dtype(vals.dtype): out = vals.to_numpy(dtype=float, na_value=np.nan) inference = vals.dtype @@ -3209,7 +3209,7 @@ def pre_processor(vals: ArrayLike) -> tuple[np.ndarray, Dtype | None]: def post_processor( vals: np.ndarray, - inference: Dtype | None, + inference: DtypeObj | None, result_mask: np.ndarray | None, orig_vals: ArrayLike, ) -> ArrayLike: @@ -3773,22 +3773,12 @@ def blk_func(values: ArrayLike) -> ArrayLike: # Operate block-wise instead of column-by-column is_ser = obj.ndim == 1 mgr = self._get_data_to_aggregate() - orig_mgr_len = len(mgr) if numeric_only: mgr = mgr.get_numeric_data() res_mgr = mgr.grouped_reduce(blk_func) - if not is_ser and len(res_mgr.items) != orig_mgr_len: - if len(res_mgr.items) == 0: - # We re-call grouped_reduce to get the right exception message - mgr.grouped_reduce(blk_func) - # grouped_reduce _should_ raise, so this should not be reached - raise TypeError( # pragma: no cover - "All columns were dropped in grouped_reduce" - ) - if is_ser: out = self._wrap_agged_manager(res_mgr) else: diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index ced3190206f37..d28812c8aa275 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -15,7 +15,6 @@ Generic, Hashable, Iterator, - NoReturn, Sequence, final, ) @@ -906,9 +905,6 @@ def codes_info(self) -> npt.NDArray[np.intp]: if self.indexer is not None: sorter = np.lexsort((ids, self.indexer)) ids = ids[sorter] - ids = ensure_platform_int(ids) - # TODO: if numpy annotates np.lexsort, this ensure_platform_int - # may become unnecessary return ids @final @@ -1012,21 +1008,14 @@ def agg_series( # test_groupby_empty_with_category gets here with self.ngroups == 0 # and len(obj) > 0 - if len(obj) == 0: - # SeriesGrouper would raise if we were to call _aggregate_series_fast - result = self._aggregate_series_pure_python(obj, func) - - elif not isinstance(obj._values, np.ndarray): - result = self._aggregate_series_pure_python(obj, func) - + if len(obj) > 0 and not isinstance(obj._values, np.ndarray): # we can preserve a little bit more aggressively with EA dtype # because maybe_cast_pointwise_result will do a try/except # with _from_sequence. NB we are assuming here that _from_sequence # is sufficiently strict that it casts appropriately. preserve_dtype = True - else: - result = self._aggregate_series_pure_python(obj, func) + result = self._aggregate_series_pure_python(obj, func) npvalues = lib.maybe_convert_objects(result, try_float=False) if preserve_dtype: @@ -1223,12 +1212,6 @@ def groupings(self) -> list[grouper.Grouping]: ) return [ping] - def _aggregate_series_fast(self, obj: Series, func: Callable) -> NoReturn: - # -> np.ndarray[object] - raise NotImplementedError( - "This should not be reached; use _aggregate_series_pure_python" - ) - def _is_indexed_like(obj, axes, axis: AxisInt) -> bool: if isinstance(obj, Series): @@ -1261,7 +1244,7 @@ def __init__( assert isinstance(axis, int), axis @cache_readonly - def slabels(self) -> npt.NDArray[np.intp]: + def _slabels(self) -> npt.NDArray[np.intp]: # Sorted labels return self.labels.take(self._sort_idx) @@ -1271,20 +1254,20 @@ def _sort_idx(self) -> npt.NDArray[np.intp]: return get_group_index_sorter(self.labels, self.ngroups) def __iter__(self) -> Iterator: - sdata = self.sorted_data + sdata = self._sorted_data if self.ngroups == 0: # we are inside a generator, rather than raise StopIteration # we merely return signal the end return - starts, ends = lib.generate_slices(self.slabels, self.ngroups) + starts, ends = lib.generate_slices(self._slabels, self.ngroups) for start, end in zip(starts, ends): yield self._chop(sdata, slice(start, end)) @cache_readonly - def sorted_data(self) -> NDFrameT: + def _sorted_data(self) -> NDFrameT: return self.data.take(self._sort_idx, axis=self.axis) def _chop(self, sdata, slice_obj: slice) -> NDFrame: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index be84e292b63e7..1cb577e4f3c92 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1416,6 +1416,7 @@ def to_flat_index(self: _IndexT) -> _IndexT: """ return self + @final def to_series(self, index=None, name: Hashable = None) -> Series: """ Create a Series with both index and values equal to the index keys. @@ -5226,6 +5227,7 @@ def putmask(self, mask, value) -> Index: if is_object_dtype(self): # pragma: no cover raise err + # See also: Block.coerce_to_target_dtype dtype = self._find_common_type_compat(value) return self.astype(dtype).putmask(mask, value) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index fc40ae4a99be0..b2305d0fe1cbf 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -24,7 +24,6 @@ timezones, to_offset, ) -from pandas._libs.tslibs.dtypes import NpyDatetimeUnit from pandas._libs.tslibs.offsets import prefix_mapping from pandas._typing import ( Dtype, @@ -420,53 +419,23 @@ def _get_time_micros(self) -> npt.NDArray[np.int64]: """ values = self._data._local_timestamps() - reso = self._data._creso - ppd = periods_per_day(reso) + ppd = periods_per_day(self._data._creso) frac = values % ppd - if reso == NpyDatetimeUnit.NPY_FR_ns.value: + if self.unit == "ns": micros = frac // 1000 - elif reso == NpyDatetimeUnit.NPY_FR_us.value: + elif self.unit == "us": micros = frac - elif reso == NpyDatetimeUnit.NPY_FR_ms.value: + elif self.unit == "ms": micros = frac * 1000 - elif reso == NpyDatetimeUnit.NPY_FR_s.value: + elif self.unit == "s": micros = frac * 1_000_000 else: # pragma: no cover - raise NotImplementedError(reso) + raise NotImplementedError(self.unit) micros[self._isnan] = -1 return micros - def to_series(self, index=None, name=None): - """ - Create a Series with both index and values equal to the index keys. - - Useful with map for returning an indexer based on an index. - - Parameters - ---------- - index : Index, optional - Index of resulting Series. If None, defaults to original index. - name : str, optional - Name of resulting Series. If None, defaults to name of original - index. - - Returns - ------- - Series - """ - from pandas import Series - - if index is None: - index = self._view() - if name is None: - name = self.name - - values = self._values.copy() - - return Series(values, index=index, name=name) - def snap(self, freq: Frequency = "S") -> DatetimeIndex: """ Snap time stamps to nearest occurring frequency. @@ -502,7 +471,7 @@ def _parsed_string_to_bounds(self, reso: Resolution, parsed: dt.datetime): Parameters ---------- - reso : str + reso : Resolution Resolution provided by parsed string. parsed : datetime Datetime from parsed string. diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index b32f0954a7c30..bcd073051cb27 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -413,7 +413,6 @@ def get_loc(self, key): try: return self._partial_date_slice(reso, parsed) except KeyError as err: - # TODO: pass if method is not None, like DTI does? raise KeyError(key) from err if reso == self._resolution_obj: diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 8fb6a18ca137a..afbf2735865dc 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1180,6 +1180,7 @@ def fillna( def interpolate( self, + *, method: FillnaOptions = "pad", axis: AxisInt = 0, index: Index | None = None, @@ -1212,15 +1213,15 @@ def interpolate( # split improves performance in ndarray.copy() return self.split_and_operate( type(self).interpolate, - method, - axis, - index, - inplace, - limit, - limit_direction, - limit_area, - fill_value, - downcast, + method=method, + axis=axis, + index=index, + inplace=inplace, + limit=limit, + limit_direction=limit_direction, + limit_area=limit_area, + fill_value=fill_value, + downcast=downcast, **kwargs, ) @@ -1598,8 +1599,7 @@ def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray: def values_for_json(self) -> np.ndarray: return np.asarray(self.values) - # error: Signature of "interpolate" incompatible with supertype "Block" - def interpolate( # type: ignore[override] + def interpolate( self, method: FillnaOptions = "pad", axis: int = 0, diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index c39da8b8156b4..c203792be2694 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -775,8 +775,7 @@ def test_inv(self, simple_index): # check that we are matching Series behavior res2 = ~Series(idx) - # TODO(2.0): once we preserve dtype, check_dtype can be True - tm.assert_series_equal(res2, Series(expected), check_dtype=False) + tm.assert_series_equal(res2, Series(expected)) else: if idx.dtype.kind == "f": msg = "ufunc 'invert' not supported for the input types" diff --git a/pandas/tests/indexes/conftest.py b/pandas/tests/indexes/conftest.py index 83a5e7165ae85..458a37c994091 100644 --- a/pandas/tests/indexes/conftest.py +++ b/pandas/tests/indexes/conftest.py @@ -43,15 +43,15 @@ def listlike_box(request): @pytest.fixture( - params=[ - *tm.ALL_REAL_NUMPY_DTYPES, + params=tm.ALL_REAL_NUMPY_DTYPES + + [ "object", "category", "datetime64[ns]", "timedelta64[ns]", ] ) -def any_numpy_dtype_for_small_pos_integer_indexes(request): +def any_dtype_for_small_pos_integer_indexes(request): """ Dtypes that can be given to an Index with small positive integers. diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index 2b05a42e9e526..001efe07b5d2b 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -561,10 +561,10 @@ def test_intersection_duplicates_all_indexes(index): def test_union_duplicate_index_subsets_of_each_other( - any_numpy_dtype_for_small_pos_integer_indexes, + any_dtype_for_small_pos_integer_indexes, ): # GH#31326 - dtype = any_numpy_dtype_for_small_pos_integer_indexes + dtype = any_dtype_for_small_pos_integer_indexes a = Index([1, 2, 2, 3], dtype=dtype) b = Index([3, 3, 4], dtype=dtype) @@ -578,10 +578,10 @@ def test_union_duplicate_index_subsets_of_each_other( def test_union_with_duplicate_index_and_non_monotonic( - any_numpy_dtype_for_small_pos_integer_indexes, + any_dtype_for_small_pos_integer_indexes, ): # GH#36289 - dtype = any_numpy_dtype_for_small_pos_integer_indexes + dtype = any_dtype_for_small_pos_integer_indexes a = Index([1, 0, 0], dtype=dtype) b = Index([0, 1], dtype=dtype) expected = Index([0, 0, 1], dtype=dtype) @@ -622,10 +622,10 @@ def test_union_nan_in_both(dup): def test_union_with_duplicate_index_not_subset_and_non_monotonic( - any_numpy_dtype_for_small_pos_integer_indexes, + any_dtype_for_small_pos_integer_indexes, ): # GH#36289 - dtype = any_numpy_dtype_for_small_pos_integer_indexes + dtype = any_dtype_for_small_pos_integer_indexes a = Index([1, 0, 2], dtype=dtype) b = Index([0, 0, 1], dtype=dtype) expected = Index([0, 0, 1, 2], dtype=dtype)