TYP: Subset of "Improved the type stubs in the _libs directory to help with type checking" (#44251)

twoertwein · web-flow · commit 447ef5722951 · 2021-12-13T20:38:43.000-05:00
diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx
@@ -516,9 +516,9 @@ def intervals_to_interval_bounds(ndarray intervals, bint validate_closed=True):
 
     Returns
     -------
-    tuple of tuples
-        left : (ndarray, object, array)
-        right : (ndarray, object, array)
+    tuple of
+        left : ndarray
+        right : ndarray
         closed: str
     """
     cdef:
diff --git a/pandas/_libs/missing.pyi b/pandas/_libs/missing.pyi
@@ -0,0 +1,15 @@
+import numpy as np
+from numpy import typing as npt
+
+class NAType: ...
+
+NA: NAType
+
+def is_matching_na(
+    left: object, right: object, nan_matches_none: bool = ...
+) -> bool: ...
+def isposinf_scalar(val: object) -> bool: ...
+def isneginf_scalar(val: object) -> bool: ...
+def checknull(val: object, inf_as_na: bool = ...) -> bool: ...
+def isnaobj(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ...
+def is_numeric_na(values: np.ndarray) -> npt.NDArray[np.bool_]: ...
diff --git a/pandas/_libs/tslibs/dtypes.pyi b/pandas/_libs/tslibs/dtypes.pyi
@@ -18,33 +18,33 @@ class PeriodDtypeBase:
     def resolution(self) -> Resolution: ...
 
 class FreqGroup(Enum):
-    FR_ANN: int = ...
-    FR_QTR: int = ...
-    FR_MTH: int = ...
-    FR_WK: int = ...
-    FR_BUS: int = ...
-    FR_DAY: int = ...
-    FR_HR: int = ...
-    FR_MIN: int = ...
-    FR_SEC: int = ...
-    FR_MS: int = ...
-    FR_US: int = ...
-    FR_NS: int = ...
-    FR_UND: int = ...
+    FR_ANN: int
+    FR_QTR: int
+    FR_MTH: int
+    FR_WK: int
+    FR_BUS: int
+    FR_DAY: int
+    FR_HR: int
+    FR_MIN: int
+    FR_SEC: int
+    FR_MS: int
+    FR_US: int
+    FR_NS: int
+    FR_UND: int
     @staticmethod
     def get_freq_group(code: int) -> FreqGroup: ...
 
 class Resolution(Enum):
-    RESO_NS: int = ...
-    RESO_US: int = ...
-    RESO_MS: int = ...
-    RESO_SEC: int = ...
-    RESO_MIN: int = ...
-    RESO_HR: int = ...
-    RESO_DAY: int = ...
-    RESO_MTH: int = ...
-    RESO_QTR: int = ...
-    RESO_YR: int = ...
+    RESO_NS: int
+    RESO_US: int
+    RESO_MS: int
+    RESO_SEC: int
+    RESO_MIN: int
+    RESO_HR: int
+    RESO_DAY: int
+    RESO_MTH: int
+    RESO_QTR: int
+    RESO_YR: int
     def __lt__(self, other: Resolution) -> bool: ...
     def __ge__(self, other: Resolution) -> bool: ...
     @property
diff --git a/pandas/_libs/tslibs/nattype.pyi b/pandas/_libs/tslibs/nattype.pyi
@@ -12,6 +12,8 @@ NaT: NaTType
 iNaT: int
 nat_strings: set[str]
 
+def is_null_datetimelike(val: object, inat_is_null: bool = ...) -> bool: ...
+
 class NaTType(datetime):
     value: np.int64
     def asm8(self) -> np.datetime64: ...
diff --git a/pandas/_libs/tslibs/np_datetime.pyi b/pandas/_libs/tslibs/np_datetime.pyi
@@ -0,0 +1 @@
+class OutOfBoundsDatetime(ValueError): ...
diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
@@ -3573,7 +3573,7 @@ cpdef to_offset(freq):
 
     Parameters
     ----------
-    freq : str, tuple, datetime.timedelta, DateOffset or None
+    freq : str, datetime.timedelta, BaseOffset or None
 
     Returns
     -------
@@ -3586,7 +3586,7 @@ cpdef to_offset(freq):
 
     See Also
     --------
-    DateOffset : Standard kind of date increment used for a date range.
+    BaseOffset : Standard kind of date increment used for a date range.
 
     Examples
     --------
diff --git a/pandas/_libs/tslibs/timestamps.pyi b/pandas/_libs/tslibs/timestamps.pyi
@@ -17,15 +17,14 @@ import numpy as np
 
 from pandas._libs.tslibs import (
     BaseOffset,
-    NaT,
     NaTType,
     Period,
     Timedelta,
 )
 
 _S = TypeVar("_S")
 
-def integer_op_not_supported(obj) -> None: ...
+def integer_op_not_supported(obj) -> TypeError: ...
 
 class Timestamp(datetime):
     min: ClassVar[Timestamp]
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
@@ -512,7 +512,9 @@ def _cmp_method(self, other, op):
 
     # ------------------------------------------------------------------------
     # String methods interface
-    _str_na_value = StringDtype.na_value
+    # error: Incompatible types in assignment (expression has type "NAType",
+    # base class "PandasArray" defined the type as "float")
+    _str_na_value = StringDtype.na_value  # type: ignore[assignment]
 
     def _str_map(
         self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -912,6 +912,10 @@ def maybe_upcast(
     # We get a copy in all cases _except_ (values.dtype == new_dtype and not copy)
     upcast_values = values.astype(new_dtype, copy=copy)
 
+    # error: Incompatible return value type (got "Tuple[ndarray[Any, dtype[Any]],
+    # Union[Union[str, int, float, bool] Union[Period, Timestamp, Timedelta, Any]]]",
+    # expected "Tuple[NumpyArrayT, Union[Union[str, int, float, bool], Union[Period,
+    # Timestamp, Timedelta, Any]]]")
     return upcast_values, fill_value  # type: ignore[return-value]
 
 
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
@@ -876,15 +876,15 @@ def freq(self):
 
     @classmethod
     def _parse_dtype_strict(cls, freq: str_type) -> BaseOffset:
-        if isinstance(freq, str):
+        if isinstance(freq, str):  # note: freq is already of type str!
             if freq.startswith("period[") or freq.startswith("Period["):
                 m = cls._match.search(freq)
                 if m is not None:
                     freq = m.group("freq")
 
-            freq = to_offset(freq)
-            if freq is not None:
-                return freq
+            freq_offset = to_offset(freq)
+            if freq_offset is not None:
+                return freq_offset
 
         raise ValueError("could not construct PeriodDtype")
 
diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
@@ -241,7 +241,10 @@ def _isna_array(values: ArrayLike, inf_as_na: bool = False):
         if inf_as_na and is_categorical_dtype(dtype):
             result = libmissing.isnaobj(values.to_numpy(), inf_as_na=inf_as_na)
         else:
-            result = values.isna()
+            # error: Incompatible types in assignment (expression has type
+            # "Union[ndarray[Any, Any], ExtensionArraySupportsAnyAll]", variable has
+            # type "ndarray[Any, dtype[bool_]]")
+            result = values.isna()  # type: ignore[assignment]
     elif is_string_or_object_np_dtype(values.dtype):
         result = _isna_string_dtype(values, inf_as_na=inf_as_na)
     elif needs_i8_conversion(dtype):
diff --git a/pandas/core/ops/mask_ops.py b/pandas/core/ops/mask_ops.py
@@ -12,8 +12,8 @@
 
 
 def kleene_or(
-    left: bool | np.ndarray,
-    right: bool | np.ndarray,
+    left: bool | np.ndarray | libmissing.NAType,
+    right: bool | np.ndarray | libmissing.NAType,
     left_mask: np.ndarray | None,
     right_mask: np.ndarray | None,
 ):
@@ -37,12 +37,13 @@ def kleene_or(
         The result of the logical or, and the new mask.
     """
     # To reduce the number of cases, we ensure that `left` & `left_mask`
-    # always come from an array, not a scalar. This is safe, since because
+    # always come from an array, not a scalar. This is safe, since
     # A | B == B | A
     if left_mask is None:
         return kleene_or(right, left, right_mask, left_mask)
 
-    assert isinstance(left, np.ndarray)
+    if not isinstance(left, np.ndarray):
+        raise TypeError("Either `left` or `right` need to be a np.ndarray.")
 
     raise_for_nan(right, method="or")
 
@@ -73,8 +74,8 @@ def kleene_or(
 
 
 def kleene_xor(
-    left: bool | np.ndarray,
-    right: bool | np.ndarray,
+    left: bool | np.ndarray | libmissing.NAType,
+    right: bool | np.ndarray | libmissing.NAType,
     left_mask: np.ndarray | None,
     right_mask: np.ndarray | None,
 ):
@@ -99,16 +100,20 @@ def kleene_xor(
     result, mask: ndarray[bool]
         The result of the logical xor, and the new mask.
     """
+    # To reduce the number of cases, we ensure that `left` & `left_mask`
+    # always come from an array, not a scalar. This is safe, since
+    # A ^ B == B ^ A
     if left_mask is None:
         return kleene_xor(right, left, right_mask, left_mask)
 
+    if not isinstance(left, np.ndarray):
+        raise TypeError("Either `left` or `right` need to be a np.ndarray.")
+
     raise_for_nan(right, method="xor")
     if right is libmissing.NA:
         result = np.zeros_like(left)
     else:
-        # error: Incompatible types in assignment (expression has type
-        # "Union[bool, Any]", variable has type "ndarray")
-        result = left ^ right  # type: ignore[assignment]
+        result = left ^ right
 
     if right_mask is None:
         if right is libmissing.NA:
@@ -146,12 +151,13 @@ def kleene_and(
         The result of the logical xor, and the new mask.
     """
     # To reduce the number of cases, we ensure that `left` & `left_mask`
-    # always come from an array, not a scalar. This is safe, since because
-    # A | B == B | A
+    # always come from an array, not a scalar. This is safe, since
+    # A & B == B & A
     if left_mask is None:
         return kleene_and(right, left, right_mask, left_mask)
 
-    assert isinstance(left, np.ndarray)
+    if not isinstance(left, np.ndarray):
+        raise TypeError("Either `left` or `right` need to be a np.ndarray.")
     raise_for_nan(right, method="and")
 
     if right is libmissing.NA:
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
@@ -2012,30 +2012,30 @@ def _adjust_dates_anchored(
     if closed == "right":
         if foffset > 0:
             # roll back
-            fresult = first.value - foffset
+            fresult_int = first.value - foffset
         else:
-            fresult = first.value - freq.nanos
+            fresult_int = first.value - freq.nanos
 
         if loffset > 0:
             # roll forward
-            lresult = last.value + (freq.nanos - loffset)
+            lresult_int = last.value + (freq.nanos - loffset)
         else:
             # already the end of the road
-            lresult = last.value
+            lresult_int = last.value
     else:  # closed == 'left'
         if foffset > 0:
-            fresult = first.value - foffset
+            fresult_int = first.value - foffset
         else:
             # start of the road
-            fresult = first.value
+            fresult_int = first.value
 
         if loffset > 0:
             # roll forward
-            lresult = last.value + (freq.nanos - loffset)
+            lresult_int = last.value + (freq.nanos - loffset)
         else:
-            lresult = last.value + freq.nanos
-    fresult = Timestamp(fresult)
-    lresult = Timestamp(lresult)
+            lresult_int = last.value + freq.nanos
+    fresult = Timestamp(fresult_int)
+    lresult = Timestamp(lresult_int)
     if first_tzinfo is not None:
         fresult = fresult.tz_localize("UTC").tz_convert(first_tzinfo)
     if last_tzinfo is not None:
diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py
@@ -193,7 +193,7 @@ def rep(x, r):
             return result
 
     def _str_match(
-        self, pat: str, case: bool = True, flags: int = 0, na: Scalar = None
+        self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None
     ):
         if not case:
             flags |= re.IGNORECASE
@@ -208,7 +208,7 @@ def _str_fullmatch(
         pat: str | re.Pattern,
         case: bool = True,
         flags: int = 0,
-        na: Scalar = None,
+        na: Scalar | None = None,
     ):
         if not case:
             flags |= re.IGNORECASE
diff --git a/pandas/tests/arrays/boolean/test_logical.py b/pandas/tests/arrays/boolean/test_logical.py
@@ -6,6 +6,11 @@
 import pandas as pd
 import pandas._testing as tm
 from pandas.arrays import BooleanArray
+from pandas.core.ops.mask_ops import (
+    kleene_and,
+    kleene_or,
+    kleene_xor,
+)
 from pandas.tests.extension.base import BaseOpsUtil
 
 
@@ -239,3 +244,11 @@ def test_no_masked_assumptions(self, other, all_logical_operators):
             result = getattr(a, all_logical_operators)(other)
             expected = getattr(b, all_logical_operators)(other)
             tm.assert_extension_array_equal(result, expected)
+
+
+@pytest.mark.parametrize("operation", [kleene_or, kleene_xor, kleene_and])
+def test_error_both_scalar(operation):
+    msg = r"Either `left` or `right` need to be a np\.ndarray."
+    with pytest.raises(TypeError, match=msg):
+        # masks need to be non-None, otherwise it ends up in an infinite recursion
+        operation(True, True, np.zeros(1), np.zeros(1))

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+class OutOfBoundsDatetime(ValueError): ...`