From bfd1550f9aa9cc2e934e2f156f26815289a49fb2 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Fri, 30 Sep 2022 18:55:00 +0100 Subject: [PATCH 1/2] ENH: Improve hash functions --- pandas-stubs/core/util/hashing.pyi | 12 ++++++++++-- tests/test_pandas.py | 16 ++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/pandas-stubs/core/util/hashing.pyi b/pandas-stubs/core/util/hashing.pyi index 538f5b2a3..a5fa21003 100644 --- a/pandas-stubs/core/util/hashing.pyi +++ b/pandas-stubs/core/util/hashing.pyi @@ -1,10 +1,18 @@ +from pandas import ( + DataFrame, + Index, + Series, +) + +from pandas._typing import ArrayLike + def hash_pandas_object( - obj, + obj: Index | Series | DataFrame, index: bool = ..., encoding: str = ..., hash_key: str | None = ..., categorize: bool = ..., ): ... def hash_array( - vals, encoding: str = ..., hash_key: str = ..., categorize: bool = ... + vals: ArrayLike, encoding: str = ..., hash_key: str = ..., categorize: bool = ... ): ... diff --git a/tests/test_pandas.py b/tests/test_pandas.py index cd42caa34..864799560 100644 --- a/tests/test_pandas.py +++ b/tests/test_pandas.py @@ -275,3 +275,19 @@ def test_arrow_dtype() -> None: ), pd.ArrowDtype, ) + + +def test_hashing(): + a = np.array([1, 2, 3]) + pd.util.hash_array(a) + pd.util.hash_array(a, encoding="latin1", hash_key="1", categorize=True) + + b = pd.Series(a) + c = pd.DataFrame({"a": a, "b": a}) + d = pd.Index(b) + pd.util.hash_pandas_object(b) + pd.util.hash_pandas_object(c) + pd.util.hash_pandas_object(d) + pd.util.hash_pandas_object( + d, index=True, encoding="latin1", hash_key="apple", categorize=True + ) From 0f6d7628332cfa63b8995f336a99f4b4f3150072 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Fri, 30 Sep 2022 19:12:42 +0100 Subject: [PATCH 2/2] ENH: Improve eval --- pandas-stubs/__init__.pyi | 1 + pandas-stubs/core/computation/eval.pyi | 40 +++++++++++++++++++------- pandas-stubs/core/util/hashing.pyi | 10 +++++-- tests/test_pandas.py | 39 ++++++++++++++++++++----- 4 files changed, 69 insertions(+), 21 deletions(-) diff --git a/pandas-stubs/__init__.pyi b/pandas-stubs/__init__.pyi index bbac8bd73..b1dba7b4b 100644 --- a/pandas-stubs/__init__.pyi +++ b/pandas-stubs/__init__.pyi @@ -6,6 +6,7 @@ from . import ( plotting as plotting, testing as testing, tseries as tseries, + util as util, ) from ._config import ( describe_option as describe_option, diff --git a/pandas-stubs/core/computation/eval.pyi b/pandas-stubs/core/computation/eval.pyi index 388513783..ad4e2ed11 100644 --- a/pandas-stubs/core/computation/eval.pyi +++ b/pandas-stubs/core/computation/eval.pyi @@ -1,12 +1,30 @@ +from typing import ( + Any, + Literal, + Mapping, +) + +from pandas import ( + DataFrame, + Series, +) +from pandas.core.computation.ops import BinOp + +from pandas._typing import ( + Scalar, + npt, +) + def eval( - expr, - parser=..., - engine: str | None = ..., - truediv=..., - local_dict=..., - global_dict=..., - resolvers=..., - level=..., - target=..., - inplace=..., -): ... + expr: str | BinOp, + parser: Literal["pandas", "python"] = ..., + engine: Literal["python", "numexpr"] | None = ..., + # Keyword only due to omitted deprecated argument + *, + local_dict: dict[str, Any] | None = ..., + global_dict: dict[str, Any] | None = ..., + resolvers: list[Mapping] | None = ..., + level: int = ..., + target: object | None = ..., + inplace: bool = ..., +) -> npt.NDArray | Scalar | DataFrame | Series | None: ... diff --git a/pandas-stubs/core/util/hashing.pyi b/pandas-stubs/core/util/hashing.pyi index a5fa21003..cedae1308 100644 --- a/pandas-stubs/core/util/hashing.pyi +++ b/pandas-stubs/core/util/hashing.pyi @@ -1,10 +1,14 @@ +import numpy as np from pandas import ( DataFrame, Index, Series, ) -from pandas._typing import ArrayLike +from pandas._typing import ( + ArrayLike, + npt, +) def hash_pandas_object( obj: Index | Series | DataFrame, @@ -12,7 +16,7 @@ def hash_pandas_object( encoding: str = ..., hash_key: str | None = ..., categorize: bool = ..., -): ... +) -> Series: ... def hash_array( vals: ArrayLike, encoding: str = ..., hash_key: str = ..., categorize: bool = ... -): ... +) -> npt.NDArray[np.uint64]: ... diff --git a/tests/test_pandas.py b/tests/test_pandas.py index 864799560..f44afc595 100644 --- a/tests/test_pandas.py +++ b/tests/test_pandas.py @@ -14,6 +14,8 @@ import pytest from typing_extensions import assert_type +from pandas._typing import Scalar + from tests import check @@ -279,15 +281,38 @@ def test_arrow_dtype() -> None: def test_hashing(): a = np.array([1, 2, 3]) - pd.util.hash_array(a) - pd.util.hash_array(a, encoding="latin1", hash_key="1", categorize=True) + check(assert_type(pd.util.hash_array(a), npt.NDArray[np.uint64]), np.ndarray) + check( + assert_type( + pd.util.hash_array(a, encoding="latin1", hash_key="1", categorize=True), + npt.NDArray[np.uint64], + ), + np.ndarray, + ) b = pd.Series(a) c = pd.DataFrame({"a": a, "b": a}) d = pd.Index(b) - pd.util.hash_pandas_object(b) - pd.util.hash_pandas_object(c) - pd.util.hash_pandas_object(d) - pd.util.hash_pandas_object( - d, index=True, encoding="latin1", hash_key="apple", categorize=True + check(assert_type(pd.util.hash_pandas_object(b), pd.Series), pd.Series) + check(assert_type(pd.util.hash_pandas_object(c), pd.Series), pd.Series) + check(assert_type(pd.util.hash_pandas_object(d), pd.Series), pd.Series) + check( + assert_type( + pd.util.hash_pandas_object( + d, index=True, encoding="latin1", hash_key="apple", categorize=True + ), + pd.Series, + ), + pd.Series, + ) + + +def test_eval(): + df = pd.DataFrame({"animal": ["dog", "pig"], "age": [10, 20]}) + check( + assert_type( + pd.eval("double_age = df.age * 2", target=df), + Union[npt.NDArray, Scalar, pd.DataFrame, pd.Series, None], + ), + pd.DataFrame, )