Skip to content

Commit 511876d

Browse files
authored
ENH: Improve hashing and eval (#347)
* ENH: Improve hash functions * ENH: Improve eval
1 parent 7bcb0c4 commit 511876d

File tree

4 files changed

+87
-15
lines changed

4 files changed

+87
-15
lines changed

pandas-stubs/__init__.pyi

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ from . import (
66
plotting as plotting,
77
testing as testing,
88
tseries as tseries,
9+
util as util,
910
)
1011
from ._config import (
1112
describe_option as describe_option,
Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,30 @@
1+
from typing import (
2+
Any,
3+
Literal,
4+
Mapping,
5+
)
6+
7+
from pandas import (
8+
DataFrame,
9+
Series,
10+
)
11+
from pandas.core.computation.ops import BinOp
12+
13+
from pandas._typing import (
14+
Scalar,
15+
npt,
16+
)
17+
118
def eval(
2-
expr,
3-
parser=...,
4-
engine: str | None = ...,
5-
truediv=...,
6-
local_dict=...,
7-
global_dict=...,
8-
resolvers=...,
9-
level=...,
10-
target=...,
11-
inplace=...,
12-
): ...
19+
expr: str | BinOp,
20+
parser: Literal["pandas", "python"] = ...,
21+
engine: Literal["python", "numexpr"] | None = ...,
22+
# Keyword only due to omitted deprecated argument
23+
*,
24+
local_dict: dict[str, Any] | None = ...,
25+
global_dict: dict[str, Any] | None = ...,
26+
resolvers: list[Mapping] | None = ...,
27+
level: int = ...,
28+
target: object | None = ...,
29+
inplace: bool = ...,
30+
) -> npt.NDArray | Scalar | DataFrame | Series | None: ...

pandas-stubs/core/util/hashing.pyi

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,22 @@
1+
import numpy as np
2+
from pandas import (
3+
DataFrame,
4+
Index,
5+
Series,
6+
)
7+
8+
from pandas._typing import (
9+
ArrayLike,
10+
npt,
11+
)
12+
113
def hash_pandas_object(
2-
obj,
14+
obj: Index | Series | DataFrame,
315
index: bool = ...,
416
encoding: str = ...,
517
hash_key: str | None = ...,
618
categorize: bool = ...,
7-
): ...
19+
) -> Series: ...
820
def hash_array(
9-
vals, encoding: str = ..., hash_key: str = ..., categorize: bool = ...
10-
): ...
21+
vals: ArrayLike, encoding: str = ..., hash_key: str = ..., categorize: bool = ...
22+
) -> npt.NDArray[np.uint64]: ...

tests/test_pandas.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
import pytest
1515
from typing_extensions import assert_type
1616

17+
from pandas._typing import Scalar
18+
1719
from tests import check
1820

1921

@@ -275,3 +277,42 @@ def test_arrow_dtype() -> None:
275277
),
276278
pd.ArrowDtype,
277279
)
280+
281+
282+
def test_hashing():
283+
a = np.array([1, 2, 3])
284+
check(assert_type(pd.util.hash_array(a), npt.NDArray[np.uint64]), np.ndarray)
285+
check(
286+
assert_type(
287+
pd.util.hash_array(a, encoding="latin1", hash_key="1", categorize=True),
288+
npt.NDArray[np.uint64],
289+
),
290+
np.ndarray,
291+
)
292+
293+
b = pd.Series(a)
294+
c = pd.DataFrame({"a": a, "b": a})
295+
d = pd.Index(b)
296+
check(assert_type(pd.util.hash_pandas_object(b), pd.Series), pd.Series)
297+
check(assert_type(pd.util.hash_pandas_object(c), pd.Series), pd.Series)
298+
check(assert_type(pd.util.hash_pandas_object(d), pd.Series), pd.Series)
299+
check(
300+
assert_type(
301+
pd.util.hash_pandas_object(
302+
d, index=True, encoding="latin1", hash_key="apple", categorize=True
303+
),
304+
pd.Series,
305+
),
306+
pd.Series,
307+
)
308+
309+
310+
def test_eval():
311+
df = pd.DataFrame({"animal": ["dog", "pig"], "age": [10, 20]})
312+
check(
313+
assert_type(
314+
pd.eval("double_age = df.age * 2", target=df),
315+
Union[npt.NDArray, Scalar, pd.DataFrame, pd.Series, None],
316+
),
317+
pd.DataFrame,
318+
)

0 commit comments

Comments
 (0)