Skip to content

Commit d2ae5ee

Browse files
bashtageKevin Sheppard
and
Kevin Sheppard
authored
ENH: Improve typing of some general functions (#355)
* ENH: Improve typing of some general functions * Further typing * ENH: Add overload to factorize * BUG: Correct npt import * ENH: Improve unique * ENH: Imrpveo typing in merge functions, cut and qcut * CLN: Remove unused imports * CLN: Catch warning * TST: Add tests for cut and fixes * TST: Add final test for cut * TST: Add tests and fixes for qcut * TYP: Add typ ignore for overlapping defs * TST: Add tests for merge * TST: Add tests for merge_ordered and improve typing * TST: Add tests for merge_asof and improve typing accuracy * CLN: Remove TODO since pandas PR opened * TYP: Final refinements * TST: Fix intentionally failing test * MAINT: Refactor MergeHow and add JoinHow * TYP: Correct use of MergeHow and add test Co-authored-by: Kevin Sheppard <kevin.sheppard@gmail.com>
1 parent ba654d1 commit d2ae5ee

File tree

10 files changed

+1142
-143
lines changed

10 files changed

+1142
-143
lines changed

pandas-stubs/_typing.pyi

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,8 @@ FillnaOptions: TypeAlias = Literal["backfill", "bfill", "ffill", "pad"]
287287
ReplaceMethod: TypeAlias = Literal["pad", "ffill", "bfill"]
288288
SortKind: TypeAlias = Literal["quicksort", "mergesort", "heapsort", "stable"]
289289
NaPosition: TypeAlias = Literal["first", "last"]
290-
MergeHow: TypeAlias = Literal["left", "right", "outer", "inner"]
290+
JoinHow: TypeAlias = Literal["left", "right", "outer", "inner"]
291+
MergeHow: TypeAlias = Union[JoinHow, Literal["cross"]]
291292
JsonFrameOrient: TypeAlias = Literal[
292293
"split", "records", "index", "columns", "values", "table"
293294
]
@@ -333,4 +334,14 @@ class StyleExportDict(TypedDict, total=False):
333334

334335
CalculationMethod: TypeAlias = Literal["single", "table"]
335336

337+
ValidationOptions: TypeAlias = Literal[
338+
"one_to_one",
339+
"1:1",
340+
"one_to_many",
341+
"1:m",
342+
"many_to_one",
343+
"m:1",
344+
"many_to_many",
345+
"m:m",
346+
]
336347
__all__ = ["npt", "type_t"]

pandas-stubs/core/algorithms.pyi

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,32 @@
11
from typing import (
2-
Any,
2+
Sequence,
33
overload,
44
)
55

66
import numpy as np
7+
import pandas as pd
78
from pandas import (
89
Categorical,
10+
CategoricalIndex,
911
Index,
12+
IntervalIndex,
13+
PeriodIndex,
1014
Series,
1115
)
1216
from pandas.api.extensions import ExtensionArray
1317

1418
from pandas._typing import AnyArrayLike
1519

20+
# These are type: ignored because the Index types overlap due to inheritance but indices
21+
# with extension types return the same type while standard type return ndarray
1622
@overload
17-
def unique(values: Index) -> Index: ...
23+
def unique(values: PeriodIndex) -> PeriodIndex: ... # type: ignore[misc]
24+
@overload
25+
def unique(values: CategoricalIndex) -> CategoricalIndex: ... # type: ignore[misc]
26+
@overload
27+
def unique(values: IntervalIndex) -> IntervalIndex: ... # type: ignore[misc]
28+
@overload
29+
def unique(values: Index) -> np.ndarray: ...
1830
@overload
1931
def unique(values: Categorical) -> Categorical: ...
2032
@overload
@@ -23,14 +35,33 @@ def unique(values: Series) -> np.ndarray | ExtensionArray: ...
2335
def unique(values: np.ndarray | list) -> np.ndarray: ...
2436
@overload
2537
def unique(values: ExtensionArray) -> ExtensionArray: ...
38+
@overload
39+
def factorize(
40+
values: Sequence,
41+
sort: bool = ...,
42+
# Not actually positional-only, used to handle deprecations in 1.5.0
43+
*,
44+
use_na_sentinel: bool = ...,
45+
size_hint: int | None = ...,
46+
) -> tuple[np.ndarray, np.ndarray]: ...
47+
@overload
48+
def factorize(
49+
values: Index | Series,
50+
sort: bool = ...,
51+
# Not actually positional-only, used to handle deprecations in 1.5.0
52+
*,
53+
use_na_sentinel: bool = ...,
54+
size_hint: int | None = ...,
55+
) -> tuple[np.ndarray, Index]: ...
56+
@overload
2657
def factorize(
27-
values: Any,
58+
values: Categorical,
2859
sort: bool = ...,
2960
# Not actually positional-only, used to handle deprecations in 1.5.0
3061
*,
3162
use_na_sentinel: bool = ...,
3263
size_hint: int | None = ...,
33-
) -> tuple[np.ndarray, np.ndarray | Index]: ...
64+
) -> tuple[np.ndarray, Categorical]: ...
3465
def value_counts(
3566
values: AnyArrayLike | list | tuple,
3667
sort: bool = ...,

pandas-stubs/core/frame.pyi

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ from pandas._typing import (
7474
IndexLabel,
7575
IndexType,
7676
IntervalClosedType,
77+
JoinHow,
7778
JsonFrameOrient,
7879
Label,
7980
Level,
@@ -97,6 +98,7 @@ from pandas._typing import (
9798
Suffixes,
9899
T as TType,
99100
TimestampConvention,
101+
ValidationOptions,
100102
WriteBuffer,
101103
XMLParsers,
102104
np_ndarray_bool,
@@ -531,7 +533,7 @@ class DataFrame(NDFrame, OpsMixin):
531533
def align(
532534
self,
533535
other: DataFrame | Series,
534-
join: MergeHow = ...,
536+
join: JoinHow = ...,
535537
axis: AxisType | None = ...,
536538
level: Level | None = ...,
537539
copy: _bool = ...,
@@ -1101,21 +1103,11 @@ class DataFrame(NDFrame, OpsMixin):
11011103
self,
11021104
other: DataFrame | Series | list[DataFrame | Series],
11031105
on: _str | list[_str] | None = ...,
1104-
how: MergeHow = ...,
1106+
how: JoinHow = ...,
11051107
lsuffix: _str = ...,
11061108
rsuffix: _str = ...,
11071109
sort: _bool = ...,
1108-
validate: Literal[
1109-
"one_to_one",
1110-
"1:1",
1111-
"one_to_many",
1112-
"1:m",
1113-
"many_to_one",
1114-
"m:1",
1115-
"many_to_many",
1116-
"m:m",
1117-
]
1118-
| None = ...,
1110+
validate: ValidationOptions | None = ...,
11191111
) -> DataFrame: ...
11201112
def merge(
11211113
self,

pandas-stubs/core/reshape/melt.pyi

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,27 @@
1+
from typing import Hashable
2+
13
import numpy as np
24
from pandas.core.frame import DataFrame
35

6+
from pandas._typing import HashableT
7+
48
def melt(
59
frame: DataFrame,
610
id_vars: tuple | list | np.ndarray | None = ...,
711
value_vars: tuple | list | np.ndarray | None = ...,
812
var_name: str | None = ...,
9-
value_name: str = ...,
13+
value_name: Hashable = ...,
1014
col_level: int | str | None = ...,
1115
ignore_index: bool = ...,
1216
) -> DataFrame: ...
13-
def lreshape(data: DataFrame, groups, dropna: bool = ..., label=...) -> DataFrame: ...
17+
def lreshape(
18+
data: DataFrame, groups: dict[HashableT, list[HashableT]], dropna: bool = ...
19+
) -> DataFrame: ...
1420
def wide_to_long(
15-
df: DataFrame, stubnames, i, j, sep: str = ..., suffix: str = ...
21+
df: DataFrame,
22+
stubnames: str | list[str],
23+
i: str | list[str],
24+
j: str,
25+
sep: str = ...,
26+
suffix: str = ...,
1627
) -> DataFrame: ...

pandas-stubs/core/reshape/merge.pyi

Lines changed: 72 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
from typing import Sequence
1+
from typing import (
2+
Literal,
3+
overload,
4+
)
25

36
from pandas import (
47
DataFrame,
@@ -8,131 +11,95 @@ from pandas import (
811
from pandas._libs.tslibs import Timedelta
912
from pandas._typing import (
1013
AnyArrayLike,
14+
HashableT,
15+
JoinHow,
1116
Label,
17+
MergeHow,
18+
ValidationOptions,
1219
)
1320

1421
def merge(
1522
left: DataFrame | Series,
1623
right: DataFrame | Series,
17-
how: str = ...,
18-
on: Label | Sequence | AnyArrayLike | None = ...,
19-
left_on: Label | Sequence | AnyArrayLike | None = ...,
20-
right_on: Label | Sequence | AnyArrayLike | None = ...,
24+
how: MergeHow = ...,
25+
on: Label | list[HashableT] | AnyArrayLike | None = ...,
26+
left_on: Label | list[HashableT] | AnyArrayLike | None = ...,
27+
right_on: Label | list[HashableT] | AnyArrayLike | None = ...,
2128
left_index: bool = ...,
2229
right_index: bool = ...,
2330
sort: bool = ...,
24-
suffixes: Sequence[str | None] = ...,
31+
suffixes: list[str | None]
32+
| tuple[str, str]
33+
| tuple[None, str]
34+
| tuple[str, None] = ...,
2535
copy: bool = ...,
2636
indicator: bool | str = ...,
27-
validate: str = ...,
37+
validate: ValidationOptions = ...,
2838
) -> DataFrame: ...
39+
@overload
2940
def merge_ordered(
30-
left: DataFrame | Series,
41+
left: DataFrame,
42+
right: DataFrame,
43+
on: Label | list[HashableT] | None = ...,
44+
left_on: Label | list[HashableT] | None = ...,
45+
right_on: Label | list[HashableT] | None = ...,
46+
left_by: Label | list[HashableT] | None = ...,
47+
right_by: Label | list[HashableT] | None = ...,
48+
fill_method: Literal["ffill"] | None = ...,
49+
suffixes: list[str | None]
50+
| tuple[str, str]
51+
| tuple[None, str]
52+
| tuple[str, None] = ...,
53+
how: JoinHow = ...,
54+
) -> DataFrame: ...
55+
@overload
56+
def merge_ordered(
57+
left: Series,
3158
right: DataFrame | Series,
32-
on: Label | Sequence | AnyArrayLike | None = ...,
33-
left_on: Label | Sequence | AnyArrayLike | None = ...,
34-
right_on: Label | Sequence | AnyArrayLike | None = ...,
35-
left_by: str | list[str] | None = ...,
36-
right_by: str | list[str] | None = ...,
37-
fill_method: str | None = ...,
38-
suffixes: Sequence[str | None] = ...,
39-
how: str = ...,
59+
on: Label | list[HashableT] | None = ...,
60+
left_on: Label | list[HashableT] | None = ...,
61+
right_on: Label | list[HashableT] | None = ...,
62+
left_by: None = ...,
63+
right_by: None = ...,
64+
fill_method: Literal["ffill"] | None = ...,
65+
suffixes: list[str | None]
66+
| tuple[str, str]
67+
| tuple[None, str]
68+
| tuple[str, None] = ...,
69+
how: JoinHow = ...,
70+
) -> DataFrame: ...
71+
@overload
72+
def merge_ordered(
73+
left: DataFrame | Series,
74+
right: Series,
75+
on: Label | list[HashableT] | None = ...,
76+
left_on: Label | list[HashableT] | None = ...,
77+
right_on: Label | list[HashableT] | None = ...,
78+
left_by: None = ...,
79+
right_by: None = ...,
80+
fill_method: Literal["ffill"] | None = ...,
81+
suffixes: list[str | None]
82+
| tuple[str, str]
83+
| tuple[None, str]
84+
| tuple[str, None] = ...,
85+
how: JoinHow = ...,
4086
) -> DataFrame: ...
4187
def merge_asof(
4288
left: DataFrame | Series,
4389
right: DataFrame | Series,
4490
on: Label | None = ...,
45-
left_on: Label | AnyArrayLike | None = ...,
46-
right_on: Label | AnyArrayLike | None = ...,
91+
left_on: Label | None = ...,
92+
right_on: Label | None = ...,
4793
left_index: bool = ...,
4894
right_index: bool = ...,
49-
by: str | list[str] | None = ...,
50-
left_by: str | None = ...,
51-
right_by: str | None = ...,
52-
suffixes: Sequence[str | None] = ...,
95+
by: Label | list[HashableT] | None = ...,
96+
left_by: Label | list[HashableT] | None = ...,
97+
right_by: Label | list[HashableT] | None = ...,
98+
suffixes: list[str | None]
99+
| tuple[str, str]
100+
| tuple[None, str]
101+
| tuple[str, None] = ...,
53102
tolerance: int | Timedelta | None = ...,
54103
allow_exact_matches: bool = ...,
55-
direction: str = ...,
104+
direction: Literal["backward", "forward", "nearest"] = ...,
56105
) -> DataFrame: ...
57-
58-
class _MergeOperation:
59-
left = ...
60-
right = ...
61-
how = ...
62-
axis = ...
63-
on = ...
64-
left_on = ...
65-
right_on = ...
66-
copy = ...
67-
suffixes = ...
68-
sort = ...
69-
left_index = ...
70-
right_index = ...
71-
indicator = ...
72-
indicator_name = ...
73-
def __init__(
74-
self,
75-
left: Series | DataFrame,
76-
right: Series | DataFrame,
77-
how: str = ...,
78-
on=...,
79-
left_on=...,
80-
right_on=...,
81-
axis=...,
82-
left_index: bool = ...,
83-
right_index: bool = ...,
84-
sort: bool = ...,
85-
suffixes=...,
86-
copy: bool = ...,
87-
indicator: bool = ...,
88-
validate=...,
89-
) -> None: ...
90-
def get_result(self): ...
91-
92-
class _OrderedMerge(_MergeOperation):
93-
fill_method = ...
94-
def __init__(
95-
self,
96-
left,
97-
right,
98-
on=...,
99-
left_on=...,
100-
right_on=...,
101-
left_index: bool = ...,
102-
right_index: bool = ...,
103-
axis=...,
104-
suffixes=...,
105-
copy: bool = ...,
106-
fill_method=...,
107-
how: str = ...,
108-
) -> None: ...
109-
def get_result(self): ...
110-
111-
class _AsOfMerge(_OrderedMerge):
112-
by = ...
113-
left_by = ...
114-
right_by = ...
115-
tolerance = ...
116-
allow_exact_matches = ...
117-
direction = ...
118-
def __init__(
119-
self,
120-
left,
121-
right,
122-
on=...,
123-
left_on=...,
124-
right_on=...,
125-
left_index: bool = ...,
126-
right_index: bool = ...,
127-
by=...,
128-
left_by=...,
129-
right_by=...,
130-
axis=...,
131-
suffixes=...,
132-
copy: bool = ...,
133-
fill_method=...,
134-
how: str = ...,
135-
tolerance=...,
136-
allow_exact_matches: bool = ...,
137-
direction: str = ...,
138-
) -> None: ...

0 commit comments

Comments
 (0)