diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index 9d5922f8a50bd..aba635e19995a 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -516,9 +516,9 @@ def intervals_to_interval_bounds(ndarray intervals, bint validate_closed=True): Returns ------- - tuple of tuples - left : (ndarray, object, array) - right : (ndarray, object, array) + tuple of + left : ndarray + right : ndarray closed: str """ cdef: diff --git a/pandas/_libs/missing.pyi b/pandas/_libs/missing.pyi new file mode 100644 index 0000000000000..1177e82906190 --- /dev/null +++ b/pandas/_libs/missing.pyi @@ -0,0 +1,15 @@ +import numpy as np +from numpy import typing as npt + +class NAType: ... + +NA: NAType + +def is_matching_na( + left: object, right: object, nan_matches_none: bool = ... +) -> bool: ... +def isposinf_scalar(val: object) -> bool: ... +def isneginf_scalar(val: object) -> bool: ... +def checknull(val: object, inf_as_na: bool = ...) -> bool: ... +def isnaobj(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ... +def is_numeric_na(values: np.ndarray) -> npt.NDArray[np.bool_]: ... diff --git a/pandas/_libs/tslibs/dtypes.pyi b/pandas/_libs/tslibs/dtypes.pyi index 8c510b05de4ce..8e47993e9d85f 100644 --- a/pandas/_libs/tslibs/dtypes.pyi +++ b/pandas/_libs/tslibs/dtypes.pyi @@ -18,33 +18,33 @@ class PeriodDtypeBase: def resolution(self) -> Resolution: ... class FreqGroup(Enum): - FR_ANN: int = ... - FR_QTR: int = ... - FR_MTH: int = ... - FR_WK: int = ... - FR_BUS: int = ... - FR_DAY: int = ... - FR_HR: int = ... - FR_MIN: int = ... - FR_SEC: int = ... - FR_MS: int = ... - FR_US: int = ... - FR_NS: int = ... - FR_UND: int = ... + FR_ANN: int + FR_QTR: int + FR_MTH: int + FR_WK: int + FR_BUS: int + FR_DAY: int + FR_HR: int + FR_MIN: int + FR_SEC: int + FR_MS: int + FR_US: int + FR_NS: int + FR_UND: int @staticmethod def get_freq_group(code: int) -> FreqGroup: ... class Resolution(Enum): - RESO_NS: int = ... - RESO_US: int = ... - RESO_MS: int = ... - RESO_SEC: int = ... - RESO_MIN: int = ... - RESO_HR: int = ... - RESO_DAY: int = ... - RESO_MTH: int = ... - RESO_QTR: int = ... - RESO_YR: int = ... + RESO_NS: int + RESO_US: int + RESO_MS: int + RESO_SEC: int + RESO_MIN: int + RESO_HR: int + RESO_DAY: int + RESO_MTH: int + RESO_QTR: int + RESO_YR: int def __lt__(self, other: Resolution) -> bool: ... def __ge__(self, other: Resolution) -> bool: ... @property diff --git a/pandas/_libs/tslibs/nattype.pyi b/pandas/_libs/tslibs/nattype.pyi index 6a5555cfff030..a7ee9a70342d4 100644 --- a/pandas/_libs/tslibs/nattype.pyi +++ b/pandas/_libs/tslibs/nattype.pyi @@ -12,6 +12,8 @@ NaT: NaTType iNaT: int nat_strings: set[str] +def is_null_datetimelike(val: object, inat_is_null: bool = ...) -> bool: ... + class NaTType(datetime): value: np.int64 def asm8(self) -> np.datetime64: ... diff --git a/pandas/_libs/tslibs/np_datetime.pyi b/pandas/_libs/tslibs/np_datetime.pyi new file mode 100644 index 0000000000000..db0c277b73bd5 --- /dev/null +++ b/pandas/_libs/tslibs/np_datetime.pyi @@ -0,0 +1 @@ +class OutOfBoundsDatetime(ValueError): ... diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index f293557a51ac2..7e6d8fa38aa45 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -3573,7 +3573,7 @@ cpdef to_offset(freq): Parameters ---------- - freq : str, tuple, datetime.timedelta, DateOffset or None + freq : str, datetime.timedelta, BaseOffset or None Returns ------- @@ -3586,7 +3586,7 @@ cpdef to_offset(freq): See Also -------- - DateOffset : Standard kind of date increment used for a date range. + BaseOffset : Standard kind of date increment used for a date range. Examples -------- diff --git a/pandas/_libs/tslibs/timestamps.pyi b/pandas/_libs/tslibs/timestamps.pyi index a89d0aecfc26c..17df594a39c44 100644 --- a/pandas/_libs/tslibs/timestamps.pyi +++ b/pandas/_libs/tslibs/timestamps.pyi @@ -17,7 +17,6 @@ import numpy as np from pandas._libs.tslibs import ( BaseOffset, - NaT, NaTType, Period, Timedelta, @@ -25,7 +24,7 @@ from pandas._libs.tslibs import ( _S = TypeVar("_S") -def integer_op_not_supported(obj) -> None: ... +def integer_op_not_supported(obj) -> TypeError: ... class Timestamp(datetime): min: ClassVar[Timestamp] diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index df71501d55b20..c6987d9a11e4c 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -512,7 +512,9 @@ def _cmp_method(self, other, op): # ------------------------------------------------------------------------ # String methods interface - _str_na_value = StringDtype.na_value + # error: Incompatible types in assignment (expression has type "NAType", + # base class "PandasArray" defined the type as "float") + _str_na_value = StringDtype.na_value # type: ignore[assignment] def _str_map( self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 79ea7731466d4..3b04490ae098c 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -912,6 +912,10 @@ def maybe_upcast( # We get a copy in all cases _except_ (values.dtype == new_dtype and not copy) upcast_values = values.astype(new_dtype, copy=copy) + # error: Incompatible return value type (got "Tuple[ndarray[Any, dtype[Any]], + # Union[Union[str, int, float, bool] Union[Period, Timestamp, Timedelta, Any]]]", + # expected "Tuple[NumpyArrayT, Union[Union[str, int, float, bool], Union[Period, + # Timestamp, Timedelta, Any]]]") return upcast_values, fill_value # type: ignore[return-value] diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 71da0a4b20b41..e74d73b84e94b 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -876,15 +876,15 @@ def freq(self): @classmethod def _parse_dtype_strict(cls, freq: str_type) -> BaseOffset: - if isinstance(freq, str): + if isinstance(freq, str): # note: freq is already of type str! if freq.startswith("period[") or freq.startswith("Period["): m = cls._match.search(freq) if m is not None: freq = m.group("freq") - freq = to_offset(freq) - if freq is not None: - return freq + freq_offset = to_offset(freq) + if freq_offset is not None: + return freq_offset raise ValueError("could not construct PeriodDtype") diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 47949334df021..4e3306e84c1a1 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -241,7 +241,10 @@ def _isna_array(values: ArrayLike, inf_as_na: bool = False): if inf_as_na and is_categorical_dtype(dtype): result = libmissing.isnaobj(values.to_numpy(), inf_as_na=inf_as_na) else: - result = values.isna() + # error: Incompatible types in assignment (expression has type + # "Union[ndarray[Any, Any], ExtensionArraySupportsAnyAll]", variable has + # type "ndarray[Any, dtype[bool_]]") + result = values.isna() # type: ignore[assignment] elif is_string_or_object_np_dtype(values.dtype): result = _isna_string_dtype(values, inf_as_na=inf_as_na) elif needs_i8_conversion(dtype): diff --git a/pandas/core/ops/mask_ops.py b/pandas/core/ops/mask_ops.py index d21c80b81b582..57bacba0d4bee 100644 --- a/pandas/core/ops/mask_ops.py +++ b/pandas/core/ops/mask_ops.py @@ -12,8 +12,8 @@ def kleene_or( - left: bool | np.ndarray, - right: bool | np.ndarray, + left: bool | np.ndarray | libmissing.NAType, + right: bool | np.ndarray | libmissing.NAType, left_mask: np.ndarray | None, right_mask: np.ndarray | None, ): @@ -37,12 +37,13 @@ def kleene_or( The result of the logical or, and the new mask. """ # To reduce the number of cases, we ensure that `left` & `left_mask` - # always come from an array, not a scalar. This is safe, since because + # always come from an array, not a scalar. This is safe, since # A | B == B | A if left_mask is None: return kleene_or(right, left, right_mask, left_mask) - assert isinstance(left, np.ndarray) + if not isinstance(left, np.ndarray): + raise TypeError("Either `left` or `right` need to be a np.ndarray.") raise_for_nan(right, method="or") @@ -73,8 +74,8 @@ def kleene_or( def kleene_xor( - left: bool | np.ndarray, - right: bool | np.ndarray, + left: bool | np.ndarray | libmissing.NAType, + right: bool | np.ndarray | libmissing.NAType, left_mask: np.ndarray | None, right_mask: np.ndarray | None, ): @@ -99,16 +100,20 @@ def kleene_xor( result, mask: ndarray[bool] The result of the logical xor, and the new mask. """ + # To reduce the number of cases, we ensure that `left` & `left_mask` + # always come from an array, not a scalar. This is safe, since + # A ^ B == B ^ A if left_mask is None: return kleene_xor(right, left, right_mask, left_mask) + if not isinstance(left, np.ndarray): + raise TypeError("Either `left` or `right` need to be a np.ndarray.") + raise_for_nan(right, method="xor") if right is libmissing.NA: result = np.zeros_like(left) else: - # error: Incompatible types in assignment (expression has type - # "Union[bool, Any]", variable has type "ndarray") - result = left ^ right # type: ignore[assignment] + result = left ^ right if right_mask is None: if right is libmissing.NA: @@ -146,12 +151,13 @@ def kleene_and( The result of the logical xor, and the new mask. """ # To reduce the number of cases, we ensure that `left` & `left_mask` - # always come from an array, not a scalar. This is safe, since because - # A | B == B | A + # always come from an array, not a scalar. This is safe, since + # A & B == B & A if left_mask is None: return kleene_and(right, left, right_mask, left_mask) - assert isinstance(left, np.ndarray) + if not isinstance(left, np.ndarray): + raise TypeError("Either `left` or `right` need to be a np.ndarray.") raise_for_nan(right, method="and") if right is libmissing.NA: diff --git a/pandas/core/resample.py b/pandas/core/resample.py index f132dd88d5147..e00defcfcffd1 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -2012,30 +2012,30 @@ def _adjust_dates_anchored( if closed == "right": if foffset > 0: # roll back - fresult = first.value - foffset + fresult_int = first.value - foffset else: - fresult = first.value - freq.nanos + fresult_int = first.value - freq.nanos if loffset > 0: # roll forward - lresult = last.value + (freq.nanos - loffset) + lresult_int = last.value + (freq.nanos - loffset) else: # already the end of the road - lresult = last.value + lresult_int = last.value else: # closed == 'left' if foffset > 0: - fresult = first.value - foffset + fresult_int = first.value - foffset else: # start of the road - fresult = first.value + fresult_int = first.value if loffset > 0: # roll forward - lresult = last.value + (freq.nanos - loffset) + lresult_int = last.value + (freq.nanos - loffset) else: - lresult = last.value + freq.nanos - fresult = Timestamp(fresult) - lresult = Timestamp(lresult) + lresult_int = last.value + freq.nanos + fresult = Timestamp(fresult_int) + lresult = Timestamp(lresult_int) if first_tzinfo is not None: fresult = fresult.tz_localize("UTC").tz_convert(first_tzinfo) if last_tzinfo is not None: diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py index 2ce5c0cbea272..6b0380a292f07 100644 --- a/pandas/core/strings/object_array.py +++ b/pandas/core/strings/object_array.py @@ -193,7 +193,7 @@ def rep(x, r): return result def _str_match( - self, pat: str, case: bool = True, flags: int = 0, na: Scalar = None + self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None ): if not case: flags |= re.IGNORECASE @@ -208,7 +208,7 @@ def _str_fullmatch( pat: str | re.Pattern, case: bool = True, flags: int = 0, - na: Scalar = None, + na: Scalar | None = None, ): if not case: flags |= re.IGNORECASE diff --git a/pandas/tests/arrays/boolean/test_logical.py b/pandas/tests/arrays/boolean/test_logical.py index 938fa8f1a5d6a..b4cca635fa238 100644 --- a/pandas/tests/arrays/boolean/test_logical.py +++ b/pandas/tests/arrays/boolean/test_logical.py @@ -6,6 +6,11 @@ import pandas as pd import pandas._testing as tm from pandas.arrays import BooleanArray +from pandas.core.ops.mask_ops import ( + kleene_and, + kleene_or, + kleene_xor, +) from pandas.tests.extension.base import BaseOpsUtil @@ -239,3 +244,11 @@ def test_no_masked_assumptions(self, other, all_logical_operators): result = getattr(a, all_logical_operators)(other) expected = getattr(b, all_logical_operators)(other) tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize("operation", [kleene_or, kleene_xor, kleene_and]) +def test_error_both_scalar(operation): + msg = r"Either `left` or `right` need to be a np\.ndarray." + with pytest.raises(TypeError, match=msg): + # masks need to be non-None, otherwise it ends up in an infinite recursion + operation(True, True, np.zeros(1), np.zeros(1))