diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi new file mode 100644 index 0000000000000..477c9fd655a4a --- /dev/null +++ b/pandas/_libs/lib.pyi @@ -0,0 +1,200 @@ +# TODO(npdtypes): Many types specified here can be made more specific/accurate; +# the more specific versions are specified in comments + +from typing import ( + Any, + Callable, +) + +import numpy as np + +from pandas._typing import ArrayLike + +# placeholder until we can specify np.ndarray[object, ndim=2] +ndarray_obj_2d = np.ndarray + +from enum import Enum + +class NoDefault(Enum): + ... + +no_default: NoDefault + + +def item_from_zerodim(val: object) -> object: ... +def infer_dtype(value: object, skipna: bool = True) -> str: ... + +def is_iterator(obj: object) -> bool: ... +def is_scalar(val: object) -> bool: ... +def is_list_like(obj: object, allow_sets: bool = True) -> bool: ... + +def is_period(val: object) -> bool: ... +def is_interval(val: object) -> bool: ... +def is_decimal(val: object) -> bool: ... +def is_complex(val: object) -> bool: ... +def is_bool(val: object) -> bool: ... +def is_integer(val: object) -> bool: ... +def is_float(val: object) -> bool: ... + +def is_interval_array(values: np.ndarray) -> bool: ... +def is_period_array(values: np.ndarray) -> bool: ... +def is_datetime64_array(values: np.ndarray) -> bool: ... +def is_timedelta_or_timedelta64_array(values: np.ndarray) -> bool: ... +def is_datetime_with_singletz_array(values: np.ndarray) -> bool: ... + +def is_time_array(values: np.ndarray, skipna: bool = False): ... +def is_date_array(values: np.ndarray, skipna: bool = False): ... +def is_datetime_array(values: np.ndarray, skipna: bool = False): ... +def is_string_array(values: np.ndarray, skipna: bool = False): ... +def is_float_array(values: np.ndarray, skipna: bool = False): ... +def is_integer_array(values: np.ndarray, skipna: bool = False): ... +def is_bool_array(values: np.ndarray, skipna: bool = False): ... + +def fast_multiget(mapping: dict, keys: np.ndarray, default=np.nan) -> ArrayLike: ... + +# TODO: gen: Generator? +def fast_unique_multiple_list_gen(gen: object, sort: bool = True) -> list: ... +def fast_unique_multiple_list(lists: list, sort: bool = True) -> list: ... +def fast_unique_multiple(arrays: list, sort: bool = True) -> list: ... + +def map_infer( + arr: np.ndarray, f: Callable[[Any], Any], convert: bool = True, ignore_na: bool = False +) -> ArrayLike: ... + +def maybe_convert_objects( + objects: np.ndarray, # np.ndarray[object] + try_float: bool = False, + safe: bool = False, + convert_datetime: bool = False, + convert_timedelta: bool = False, + convert_to_nullable_integer: bool = False, +) -> ArrayLike: ... + +def maybe_convert_numeric( + values: np.ndarray, # np.ndarray[object] + na_values: set, + convert_empty: bool = True, + coerce_numeric: bool = False, +) -> np.ndarray: ... + +# TODO: restrict `arr`? +def ensure_string_array( + arr, + na_value: object = np.nan, + convert_na_value: bool = True, + copy: bool = True, + skipna: bool = True, +) -> np.ndarray: ... # np.ndarray[object] + +def infer_datetimelike_array( + arr: np.ndarray # np.ndarray[object] +) -> str: ... + +# TODO: new_dtype -> np.dtype? +def astype_intsafe( + arr: np.ndarray, # np.ndarray[object] + new_dtype, +) -> np.ndarray: ... + +def fast_zip(ndarrays: list) -> np.ndarray: ... # np.ndarray[object] + +# TODO: can we be more specific about rows? +def to_object_array_tuples(rows: object) -> ndarray_obj_2d: ... + +def tuples_to_object_array( + tuples: np.ndarray # np.ndarray[object] +) -> ndarray_obj_2d: ... + +# TODO: can we be more specific about rows? +def to_object_array(rows: object, min_width: int = 0) -> ndarray_obj_2d: ... + +def dicts_to_array(dicts: list, columns: list) -> ndarray_obj_2d: ... + + +def maybe_booleans_to_slice( + mask: np.ndarray # ndarray[uint8_t] +) -> slice | np.ndarray: ... # np.ndarray[np.uint8] + +def maybe_indices_to_slice( + indices: np.ndarray, # np.ndarray[np.intp] + max_len: int, +) -> slice | np.ndarray: ... # np.ndarray[np.uint8] + +def clean_index_list(obj: list) -> tuple[ + list | np.ndarray, # np.ndarray[object] | np.ndarray[np.int64] + bool, +]: ... + + +# ----------------------------------------------------------------- +# Functions which in reality take memoryviews + +def memory_usage_of_objects( + arr: np.ndarray # object[:] +) -> int: ... # np.int64 + + +# TODO: f: Callable? +# TODO: dtype -> DtypeObj? +def map_infer_mask( + arr: np.ndarray, + f: Callable[[Any], Any], + mask: np.ndarray, # const uint8_t[:] + convert: bool = ..., + na_value: Any = ..., + dtype: Any = ..., +) -> ArrayLike: ... + +def indices_fast( + index: np.ndarray, # ndarray[intp_t] + labels: np.ndarray, # const int64_t[:] + keys: list, + sorted_labels: list[np.ndarray], # list[ndarray[np.int64]] +) -> dict: ... + +def generate_slices( + labels: np.ndarray, # const intp_t[:] + ngroups: int +) -> tuple[ + np.ndarray, # np.ndarray[np.int64] + np.ndarray, # np.ndarray[np.int64] +]: ... + +def count_level_2d( + mask: np.ndarray, # ndarray[uint8_t, ndim=2, cast=True], + labels: np.ndarray, # const intp_t[:] + max_bin: int, + axis: int +) -> np.ndarray: ... # np.ndarray[np.int64, ndim=2] + +def get_level_sorter( + label: np.ndarray, # const int64_t[:] + starts: np.ndarray, # const intp_t[:] +) -> np.ndarray: ... # np.ndarray[np.intp, ndim=1] + + +def generate_bins_dt64( + values: np.ndarray, # np.ndarray[np.int64] + binner: np.ndarray, # const int64_t[:] + closed: object = "left", + hasnans: bool = False, +) -> np.ndarray: ... # np.ndarray[np.int64, ndim=1] + + +def array_equivalent_object( + left: np.ndarray, # object[:] + right: np.ndarray, # object[:] +) -> bool: ... + +def has_infs_f8( + arr: np.ndarray # const float64_t[:] +) -> bool: ... + +def has_infs_f4( + arr: np.ndarray # const float32_t[:] +) -> bool: ... + +def get_reverse_indexer( + indexer: np.ndarray, # const intp_t[:] + length: int, +) -> np.ndarray: ... # np.ndarray[np.intp] diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 62205b9203bf0..4f61ceee4d66b 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -6,7 +6,10 @@ import numpy as np -from pandas._libs.lib import no_default +from pandas._libs.lib import ( + NoDefault, + no_default, +) from pandas._libs.missing import is_matching_na import pandas._libs.testing as _testing @@ -54,7 +57,7 @@ def assert_almost_equal( left, right, check_dtype: Union[bool, str] = "equiv", - check_less_precise: Union[bool, int] = no_default, + check_less_precise: Union[bool, int, NoDefault] = no_default, rtol: float = 1.0e-5, atol: float = 1.0e-8, **kwargs, @@ -104,7 +107,11 @@ def assert_almost_equal( FutureWarning, stacklevel=2, ) - rtol = atol = _get_tol_from_less_precise(check_less_precise) + # error: Argument 1 to "_get_tol_from_less_precise" has incompatible + # type "Union[bool, int, NoDefault]"; expected "Union[bool, int]" + rtol = atol = _get_tol_from_less_precise( + check_less_precise # type: ignore[arg-type] + ) if isinstance(left, Index): assert_index_equal( @@ -242,7 +249,7 @@ def assert_index_equal( right: Index, exact: Union[bool, str] = "equiv", check_names: bool = True, - check_less_precise: Union[bool, int] = no_default, + check_less_precise: Union[bool, int, NoDefault] = no_default, check_exact: bool = True, check_categorical: bool = True, check_order: bool = True, @@ -331,7 +338,11 @@ def _get_ilevel_values(index, level): FutureWarning, stacklevel=2, ) - rtol = atol = _get_tol_from_less_precise(check_less_precise) + # error: Argument 1 to "_get_tol_from_less_precise" has incompatible + # type "Union[bool, int, NoDefault]"; expected "Union[bool, int]" + rtol = atol = _get_tol_from_less_precise( + check_less_precise # type: ignore[arg-type] + ) # instance validation _check_isinstance(left, right, Index) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 305b4eafd0cec..9d6e81ed8dda5 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1022,7 +1022,16 @@ def apply_standard(self) -> FrameOrSeriesUnion: mapped = obj._values.map(f) else: values = obj.astype(object)._values - mapped = lib.map_infer(values, f, convert=self.convert_dtype) + # error: Argument 2 to "map_infer" has incompatible type + # "Union[Callable[..., Any], str, List[Union[Callable[..., Any], str]], + # Dict[Hashable, Union[Union[Callable[..., Any], str], + # List[Union[Callable[..., Any], str]]]]]"; expected + # "Callable[[Any], Any]" + mapped = lib.map_infer( + values, + f, # type: ignore[arg-type] + convert=self.convert_dtype, + ) if len(mapped) and isinstance(mapped[0], ABCSeries): # GH 25959 use pd.array instead of tolist diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 2e7f18965d2b2..05bc945cc0c23 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -290,7 +290,9 @@ def _box_values(self, values) -> np.ndarray: """ apply box func to passed values """ - return lib.map_infer(values, self._box_func) + # error: Incompatible return value type (got + # "Union[ExtensionArray, ndarray]", expected "ndarray") + return lib.map_infer(values, self._box_func) # type: ignore[return-value] def __iter__(self): if self.ndim > 1: diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index e5b8d512367f7..c4b70fa9613bf 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -441,7 +441,9 @@ def _str_map(self, f, na_value=None, dtype: Dtype | None = None): if not na_value_is_na: mask[:] = False - return constructor(result, mask) + # error: Argument 1 to "maybe_convert_objects" has incompatible + # type "Union[ExtensionArray, ndarray]"; expected "ndarray" + return constructor(result, mask) # type: ignore[arg-type] elif is_string_dtype(dtype) and not is_object_dtype(dtype): # i.e. StringDtype diff --git a/pandas/core/base.py b/pandas/core/base.py index b0c2af89ad0c7..3b6ff4ac9aee4 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1194,7 +1194,8 @@ def _memory_usage(self, deep: bool = False) -> int: v = self.array.nbytes if deep and is_object_dtype(self) and not PYPY: - v += lib.memory_usage_of_objects(self._values) + values = cast(np.ndarray, self._values) + v += lib.memory_usage_of_objects(values) return v @doc( diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 714e659f99894..c01c289d9e77b 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -121,18 +121,19 @@ def maybe_convert_platform( values: list | tuple | range | np.ndarray | ExtensionArray, ) -> ArrayLike: """ try to do platform conversion, allow ndarray or list here """ + arr: ArrayLike + if isinstance(values, (list, tuple, range)): arr = construct_1d_object_array_from_listlike(values) else: # The caller is responsible for ensuring that we have np.ndarray # or ExtensionArray here. - - # error: Incompatible types in assignment (expression has type "Union[ndarray, - # ExtensionArray]", variable has type "ndarray") - arr = values # type: ignore[assignment] + arr = values if arr.dtype == object: - arr = lib.maybe_convert_objects(arr) + # error: Argument 1 to "maybe_convert_objects" has incompatible type + # "Union[ExtensionArray, ndarray]"; expected "ndarray" + arr = lib.maybe_convert_objects(arr) # type: ignore[arg-type] return arr @@ -1436,9 +1437,13 @@ def convert_dtypes( Returns ------- + str, np.dtype, or ExtensionDtype dtype new dtype """ + inferred_dtype: str | np.dtype | ExtensionDtype + # TODO: rule out str + if ( convert_string or convert_integer or convert_boolean or convert_floating ) and isinstance(input_array, np.ndarray): diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py index 58da2570015b5..1360b66e77dc0 100644 --- a/pandas/core/dtypes/inference.py +++ b/pandas/core/dtypes/inference.py @@ -440,6 +440,9 @@ def is_inferred_bool_dtype(arr: ArrayLike) -> bool: This does not include the special treatment is_bool_dtype uses for Categorical. """ + if not isinstance(arr, np.ndarray): + return False + dtype = arr.dtype if dtype == np.dtype(bool): return True diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5d7a994c4b1f6..5ad0114d8fd01 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2021,7 +2021,13 @@ def from_records( if coerce_float: for i, arr in enumerate(arrays): if arr.dtype == object: - arrays[i] = lib.maybe_convert_objects(arr, try_float=True) + # error: Argument 1 to "maybe_convert_objects" has + # incompatible type "Union[ExtensionArray, ndarray]"; + # expected "ndarray" + arrays[i] = lib.maybe_convert_objects( + arr, # type: ignore[arg-type] + try_float=True, + ) arr_columns = ensure_index(arr_columns) if columns is None: @@ -7388,7 +7394,7 @@ def groupby( as_index: bool = True, sort: bool = True, group_keys: bool = True, - squeeze: bool = no_default, + squeeze: bool | lib.NoDefault = no_default, observed: bool = False, dropna: bool = True, ) -> DataFrameGroupBy: @@ -7410,6 +7416,8 @@ def groupby( raise TypeError("You have to supply one of 'by' and 'level'") axis = self._get_axis_number(axis) + # error: Argument "squeeze" to "DataFrameGroupBy" has incompatible type + # "Union[bool, NoDefault]"; expected "bool" return DataFrameGroupBy( obj=self, keys=by, @@ -7418,7 +7426,7 @@ def groupby( as_index=as_index, sort=sort, group_keys=group_keys, - squeeze=squeeze, + squeeze=squeeze, # type: ignore[arg-type] observed=observed, dropna=dropna, ) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 4ef5aa1109074..c74c10bfbabd3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2019,15 +2019,15 @@ def __array_wrap__( ----- Series implements __array_ufunc_ so this not called for ufunc on Series. """ - result = lib.item_from_zerodim(result) - if is_scalar(result): + res = lib.item_from_zerodim(result) + if is_scalar(res): # e.g. we get here with np.ptp(series) # ptp also requires the item_from_zerodim - return result + return res d = self._construct_axes_dict(self._AXIS_ORDERS, copy=False) # error: Argument 1 to "NDFrame" has incompatible type "ndarray"; # expected "BlockManager" - return self._constructor(result, **d).__finalize__( # type: ignore[arg-type] + return self._constructor(res, **d).__finalize__( # type: ignore[arg-type] self, method="__array_wrap__" ) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index de6d6c8e07144..172553205f039 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1144,6 +1144,7 @@ def _format_with_header( values = self._values if is_object_dtype(values.dtype): + values = cast(np.ndarray, values) values = lib.maybe_convert_objects(values, safe=True) result = [pprint_thing(x, escape_chars=("\t", "\r", "\n")) for x in values] diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index bac00b2399121..7bc0655ea9529 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -550,7 +550,10 @@ def _get_delete_freq(self, loc: Union[int, slice, Sequence[int]]): freq = self.freq else: if is_list_like(loc): - loc = lib.maybe_indices_to_slice( + # error: Incompatible types in assignment (expression has + # type "Union[slice, ndarray]", variable has type + # "Union[int, slice, Sequence[int]]") + loc = lib.maybe_indices_to_slice( # type: ignore[assignment] np.asarray(loc, dtype=np.intp), len(self) ) if isinstance(loc, slice) and loc.step in (1, None): diff --git a/pandas/core/series.py b/pandas/core/series.py index 36623695d7569..155ee58d505ea 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1757,7 +1757,7 @@ def groupby( as_index: bool = True, sort: bool = True, group_keys: bool = True, - squeeze: bool = no_default, + squeeze: bool | lib.NoDefault = no_default, observed: bool = False, dropna: bool = True, ) -> SeriesGroupBy: @@ -1779,6 +1779,8 @@ def groupby( raise TypeError("You have to supply one of 'by' and 'level'") axis = self._get_axis_number(axis) + # error: Argument "squeeze" to "SeriesGroupBy" has incompatible type + # "Union[bool, NoDefault]"; expected "bool" return SeriesGroupBy( obj=self, keys=by, @@ -1787,7 +1789,7 @@ def groupby( as_index=as_index, sort=sort, group_keys=group_keys, - squeeze=squeeze, + squeeze=squeeze, # type: ignore[arg-type] observed=observed, dropna=dropna, ) diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py index 45f1faa637b85..f2027f2707a8b 100644 --- a/pandas/core/strings/object_array.py +++ b/pandas/core/strings/object_array.py @@ -92,6 +92,8 @@ def g(x): return na_value return self._str_map(g, na_value=na_value, dtype=dtype) + if not isinstance(result, np.ndarray): + return result if na_value is not np.nan: np.putmask(result, mask, na_value) if result.dtype == object: diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 6c13350df2fa3..0333bd75a9eaf 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1318,9 +1318,14 @@ def _format(x): return str(formatter(x)) vals = extract_array(self.values, extract_numpy=True) - + if not isinstance(vals, np.ndarray): + raise TypeError( + "ExtensionArray formatting should use ExtensionArrayFormatter" + ) + inferred = lib.map_infer(vals, is_float) + inferred = cast(np.ndarray, inferred) is_float_type = ( - lib.map_infer(vals, is_float) + inferred # vals may have 2 or more dimensions & np.all(notna(vals), axis=tuple(range(1, len(vals.shape)))) ) diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index 6f4a6d87c7959..7986d2e4338cb 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -258,6 +258,7 @@ def _write_col_header(self, indent: int) -> None: if isinstance(self.columns, MultiIndex): template = 'colspan="{span:d}" halign="left"' + sentinel: lib.NoDefault | bool if self.fmt.sparsify: # GH3547 sentinel = lib.no_default