Skip to content

CLN: assorted #51078

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -238,8 +238,6 @@ cdef class IndexEngine:
return self.unique == 1

cdef _do_unique_check(self):

# this de-facto the same
self._ensure_mapping_populated()

@property
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/indexing.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ _IndexingMixinT = TypeVar("_IndexingMixinT", bound=IndexingMixin)

class NDFrameIndexerBase(Generic[_IndexingMixinT]):
name: str
# in practise obj is either a DataFrame or a Series
# in practice obj is either a DataFrame or a Series
obj: _IndexingMixinT

def __init__(self, name: str, obj: _IndexingMixinT) -> None: ...
Expand Down
7 changes: 3 additions & 4 deletions pandas/_libs/internals.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ from pandas._libs.util cimport (
@cython.final
@cython.freelist(32)
cdef class BlockPlacement:
# __slots__ = '_as_slice', '_as_array', '_len'
cdef:
slice _as_slice
ndarray _as_array # Note: this still allows `None`; will be intp_t
Expand Down Expand Up @@ -621,7 +620,7 @@ cdef class NumpyBlock(SharedBlock):
public ndarray values

def __cinit__(self, ndarray values, BlockPlacement placement, int ndim):
# set values here the (implicit) call to SharedBlock.__cinit__ will
# set values here; the (implicit) call to SharedBlock.__cinit__ will
# set placement and ndim
self.values = values

Expand All @@ -643,7 +642,7 @@ cdef class NDArrayBackedBlock(SharedBlock):
NDArrayBacked values

def __cinit__(self, NDArrayBacked values, BlockPlacement placement, int ndim):
# set values here the (implicit) call to SharedBlock.__cinit__ will
# set values here; the (implicit) call to SharedBlock.__cinit__ will
# set placement and ndim
self.values = values

Expand All @@ -662,7 +661,7 @@ cdef class Block(SharedBlock):
public object values

def __cinit__(self, object values, BlockPlacement placement, int ndim):
# set values here the (implicit) call to SharedBlock.__cinit__ will
# set values here; the (implicit) call to SharedBlock.__cinit__ will
# set placement and ndim
self.values = values

Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ def get_reverse_indexer(const intp_t[:] indexer, Py_ssize_t length) -> ndarray:

@cython.wraparound(False)
@cython.boundscheck(False)
# Can add const once https://github.com/cython/cython/issues/1772 resolved
# TODO(cython3): Can add const once cython#1772 is resolved
def has_infs(floating[:] arr) -> bool:
cdef:
Py_ssize_t i, n = len(arr)
Expand Down
8 changes: 4 additions & 4 deletions pandas/_libs/parsers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ from libc.string cimport (


cdef extern from "Python.h":
# TODO(cython3): get this from cpython.unicode
object PyUnicode_FromString(char *v)


Expand Down Expand Up @@ -453,14 +454,12 @@ cdef class TextReader:

self.skipfooter = skipfooter

# suboptimal
if usecols is not None:
self.has_usecols = 1
# GH-20558, validate usecols at higher level and only pass clean
# usecols into TextReader.
self.usecols = usecols

# TODO: XXX?
if skipfooter > 0:
self.parser.on_bad_lines = SKIP

Expand Down Expand Up @@ -501,7 +500,6 @@ cdef class TextReader:
self.dtype = dtype
self.use_nullable_dtypes = use_nullable_dtypes

# XXX
self.noconvert = set()

self.index_col = index_col
Expand Down Expand Up @@ -761,7 +759,7 @@ cdef class TextReader:
# Corner case, not enough lines in the file
if self.parser.lines < data_line + 1:
field_count = len(header[0])
else: # not self.has_usecols:
else:

field_count = self.parser.line_fields[data_line]

Expand Down Expand Up @@ -1409,6 +1407,8 @@ def _maybe_upcast(arr, use_nullable_dtypes: bool = False):
The casted array.
"""
if is_extension_array_dtype(arr.dtype):
# TODO: the docstring says arr is an ndarray, in which case this cannot
# be reached. Is that incorrect?
return arr

na_value = na_values[arr.dtype]
Expand Down
6 changes: 2 additions & 4 deletions pandas/_libs/reduction.pyi
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
from typing import Any

import numpy as np
from pandas._typing import DtypeObj

from pandas._typing import ExtensionDtype

def check_result_array(obj: object, dtype: np.dtype | ExtensionDtype) -> None: ...
def check_result_array(obj: object, dtype: DtypeObj) -> None: ...
def extract_result(res: object) -> Any: ...
3 changes: 0 additions & 3 deletions pandas/_libs/sparse.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -301,9 +301,6 @@ cdef class BlockIndex(SparseIndex):
self.nblocks = np.int32(len(self.blocs))
self.npoints = self.blengths.sum()

# self.block_start = blocs
# self.block_end = blocs + blengths

self.check_integrity()

def __reduce__(self):
Expand Down
10 changes: 5 additions & 5 deletions pandas/_libs/sparse_op_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -137,16 +137,16 @@ cdef tuple block_op_{{opname}}_{{dtype}}({{dtype}}_t[:] x_,
{{dtype}}_t[:] y_,
BlockIndex yindex,
{{dtype}}_t yfill):
'''
"""
Binary operator on BlockIndex objects with fill values
'''
"""

cdef:
BlockIndex out_index
Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
int32_t xbp = 0, ybp = 0 # block positions
Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
int32_t xbp = 0, ybp = 0 # block positions
int32_t xloc, yloc
Py_ssize_t xblock = 0, yblock = 0 # block numbers
Py_ssize_t xblock = 0, yblock = 0 # block numbers

{{dtype}}_t[:] x, y
ndarray[{{rdtype}}_t, ndim=1] out
Expand Down
14 changes: 7 additions & 7 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def format_array_from_datetime(

Parameters
----------
values : a 1-d i8 array
values : ndarray[int64_t], arbitrary ndim
tz : tzinfo or None, default None
format : str or None, default None
a strftime capable string
Expand Down Expand Up @@ -260,9 +260,9 @@ def array_with_unit_to_datetime(
cdef:
Py_ssize_t i, n=len(values)
int64_t mult
bint is_ignore = errors=="ignore"
bint is_coerce = errors=="coerce"
bint is_raise = errors=="raise"
bint is_ignore = errors == "ignore"
bint is_coerce = errors == "coerce"
bint is_raise = errors == "raise"
ndarray[int64_t] iresult
tzinfo tz = None
float fval
Expand Down Expand Up @@ -446,9 +446,9 @@ cpdef array_to_datetime(
npy_datetimestruct dts
bint utc_convert = bool(utc)
bint seen_datetime_offset = False
bint is_raise = errors=="raise"
bint is_ignore = errors=="ignore"
bint is_coerce = errors=="coerce"
bint is_raise = errors == "raise"
bint is_ignore = errors == "ignore"
bint is_coerce = errors == "coerce"
bint is_same_offsets
_TSObject _ts
float tz_offset
Expand Down
4 changes: 1 addition & 3 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime
from pandas._libs.tslibs.timezones cimport (
get_utcoffset,
is_utc,
maybe_get_tz,
)
from pandas._libs.tslibs.util cimport (
is_datetime64_object,
Expand Down Expand Up @@ -124,7 +123,7 @@ cdef int64_t cast_from_unit(object ts, str unit) except? -1:
dt64obj = np.datetime64(ts, unit)
return get_datetime64_nanos(dt64obj, NPY_FR_ns)

# cast the unit, multiply base/frace separately
# cast the unit, multiply base/frac separately
# to avoid precision issues from float -> int
try:
base = <int64_t>ts
Expand Down Expand Up @@ -380,7 +379,6 @@ cdef _TSObject convert_datetime_to_tsobject(
obj.creso = reso
obj.fold = ts.fold
if tz is not None:
tz = maybe_get_tz(tz)

if ts.tzinfo is not None:
# Convert the current timezone to the passed timezone
Expand Down
6 changes: 1 addition & 5 deletions pandas/_libs/tslibs/period.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2612,11 +2612,7 @@ class Period(_Period):

if freq is None and ordinal != NPY_NAT:
# Skip NaT, since it doesn't have a resolution
try:
freq = attrname_to_abbrevs[reso]
except KeyError:
raise ValueError(f"Invalid frequency or could not "
f"infer: {reso}")
freq = attrname_to_abbrevs[reso]
freq = to_offset(freq)

elif PyDateTime_Check(value):
Expand Down
1 change: 0 additions & 1 deletion pandas/_libs/tslibs/timedeltas.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ cpdef int64_t delta_to_nanoseconds(
) except? -1
cdef convert_to_timedelta64(object ts, str unit)
cdef bint is_any_td_scalar(object obj)
cdef object ensure_td64ns(object ts)


cdef class _Timedelta(timedelta):
Expand Down
4 changes: 0 additions & 4 deletions pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -691,10 +691,6 @@ cdef timedelta_from_spec(object number, object frac, object unit):
"values and are not supported."
)

if unit == "M":
# To parse ISO 8601 string, 'M' should be treated as minute,
# not month
unit = "m"
unit = parse_timedelta_unit(unit)

n = "".join(number) + "." + "".join(frac)
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/tzconversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,7 @@ cdef _get_utc_bounds_zoneinfo(ndarray vals, tz, NPY_DATETIMEUNIT creso):

pandas_datetime_to_datetimestruct(val, creso, &dts)
# casting to pydatetime drops nanoseconds etc, which we will
# need to re-add later as 'extra''
# need to re-add later as 'extra'
extra = (dts.ps // 1000) * (pps // 1_000_000_000)

dt = datetime_new(dts.year, dts.month, dts.day, dts.hour,
Expand Down
9 changes: 0 additions & 9 deletions pandas/_libs/tslibs/util.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,6 @@
from cpython.object cimport PyTypeObject


cdef extern from *:
"""
PyObject* char_to_string(const char* data) {
return PyUnicode_FromString(data);
}
"""
object char_to_string(const char* data)


cdef extern from "Python.h":
# Note: importing extern-style allows us to declare these as nogil
# functions, whereas `from cpython cimport` does not.
Expand Down
1 change: 0 additions & 1 deletion pandas/_libs/writers.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,4 @@ def word_len(val: object) -> int: ...
def string_array_replace_from_nan_rep(
arr: np.ndarray, # np.ndarray[object, ndim=1]
nan_rep: object,
replace: object = ...,
) -> None: ...
6 changes: 2 additions & 4 deletions pandas/_libs/writers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -161,15 +161,13 @@ cpdef inline Py_ssize_t word_len(object val):
def string_array_replace_from_nan_rep(
ndarray[object, ndim=1] arr,
object nan_rep,
object replace=np.nan
) -> None:
"""
Replace the values in the array with 'replacement' if
they are 'nan_rep'. Return the same array.
Replace the values in the array with np.nan if they are nan_rep.
"""
cdef:
Py_ssize_t length = len(arr), i = 0

for i in range(length):
if arr[i] == nan_rep:
arr[i] = replace
arr[i] = np.nan
16 changes: 5 additions & 11 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2030,17 +2030,11 @@ def _sequence_to_dt64ns(
)
if tz and inferred_tz:
# two timezones: convert to intended from base UTC repr
if data.dtype == "i8":
# GH#42505
# by convention, these are _already_ UTC, e.g
return data.view(DT64NS_DTYPE), tz, None

if timezones.is_utc(tz):
# Fastpath, avoid copy made in tzconversion
utc_vals = data.view("i8")
else:
utc_vals = tz_convert_from_utc(data.view("i8"), tz)
data = utc_vals.view(DT64NS_DTYPE)
assert data.dtype == "i8"
# GH#42505
# by convention, these are _already_ UTC, e.g
return data.view(DT64NS_DTYPE), tz, None

elif inferred_tz:
tz = inferred_tz

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/dtypes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def __eq__(self, other: Any) -> bool:

def __hash__(self) -> int:
# for python>=3.10, different nan objects have different hashes
# we need to avoid that und thus use hash function with old behavior
# we need to avoid that and thus use hash function with old behavior
return object_hash(tuple(getattr(self, attr) for attr in self._metadata))

def __ne__(self, other: Any) -> bool:
Expand Down
10 changes: 2 additions & 8 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@
tz_compare,
)
from pandas._libs.tslibs.dtypes import (
NpyDatetimeUnit,
PeriodDtypeBase,
abbrev_to_npy_unit,
)
from pandas._typing import (
Dtype,
Expand Down Expand Up @@ -722,13 +722,7 @@ def _creso(self) -> int:
"""
The NPY_DATETIMEUNIT corresponding to this dtype's resolution.
"""
reso = {
"s": NpyDatetimeUnit.NPY_FR_s,
"ms": NpyDatetimeUnit.NPY_FR_ms,
"us": NpyDatetimeUnit.NPY_FR_us,
"ns": NpyDatetimeUnit.NPY_FR_ns,
}[self.unit]
return reso.value
return abbrev_to_npy_unit(self.unit)

@property
def unit(self) -> str_type:
Expand Down
1 change: 1 addition & 0 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,7 @@ def _cython_transform(
"transform", obj._values, how, axis, **kwargs
)
except NotImplementedError as err:
# e.g. test_groupby_raises_string
raise TypeError(f"{how} is not supported for {obj.dtype} dtype") from err

return obj._constructor(result, index=self.obj.index, name=obj.name)
Expand Down
Loading