From e812d1a7076977a9f17f17f95930cbaff451ffec Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Fri, 2 Sep 2022 06:55:44 +0100 Subject: [PATCH 1/8] TYP: Remove deprecated inplace for set_index --- pandas-stubs/core/frame.pyi | 5 ----- tests/test_frame.py | 18 ++++++++++++------ 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 4bf3d5b8e..745351a5a 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -645,8 +645,6 @@ class DataFrame(NDFrame, OpsMixin): drop: _bool = ..., append: _bool = ..., verify_integrity: _bool = ..., - *, - inplace: Literal[True], ) -> None: ... @overload def set_index( @@ -660,8 +658,6 @@ class DataFrame(NDFrame, OpsMixin): drop: _bool = ..., append: _bool = ..., verify_integrity: _bool = ..., - *, - inplace: Literal[False], ) -> DataFrame: ... @overload def set_index( @@ -688,7 +684,6 @@ class DataFrame(NDFrame, OpsMixin): | list[HashableT], drop: _bool = ..., append: _bool = ..., - inplace: _bool | None = ..., verify_integrity: _bool = ..., ) -> DataFrame | None: ... @overload diff --git a/tests/test_frame.py b/tests/test_frame.py index ab873e856..d8ba15bf8 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -287,7 +287,6 @@ def test_types_set_index() -> None: res3: pd.DataFrame = df.set_index("col1", append=True) res4: pd.DataFrame = df.set_index("col1", verify_integrity=True) res5: pd.DataFrame = df.set_index(["col1", "col2"]) - res6: None = df.set_index("col1", inplace=True) # GH 140 res7: pd.DataFrame = df.set_index(pd.Index(["w", "x", "y", "z"])) @@ -1505,7 +1504,8 @@ def test_frame_scalars_slice() -> None: # Note: bool_ cannot be tested since the index is object and pandas does not # support boolean access using loc except when the index is boolean - check(assert_type(df.loc[str_], pd.Series), pd.Series) + with pytest.warns(FutureWarning, match="Comparison of Timestamp with datetime"): + check(assert_type(df.loc[str_], pd.Series), pd.Series) check(assert_type(df.loc[bytes_], pd.Series), pd.Series) check(assert_type(df.loc[date], pd.Series), pd.Series) check(assert_type(df.loc[datetime_], pd.Series), pd.Series) @@ -1513,11 +1513,13 @@ def test_frame_scalars_slice() -> None: check(assert_type(df.loc[int_], pd.Series), pd.Series) check(assert_type(df.loc[float_], pd.Series), pd.Series) check(assert_type(df.loc[complex_], pd.Series), pd.Series) - check(assert_type(df.loc[timestamp], pd.Series), pd.Series) + with pytest.warns(FutureWarning, match="Comparison of Timestamp with datetime"): + check(assert_type(df.loc[timestamp], pd.Series), pd.Series) check(assert_type(df.loc[pd_timedelta], pd.Series), pd.Series) check(assert_type(df.loc[none], pd.Series), pd.Series) - check(assert_type(df.loc[:, str_], pd.Series), pd.Series) + with pytest.warns(FutureWarning, match="Comparison of Timestamp with datetime"): + check(assert_type(df.loc[:, str_], pd.Series), pd.Series) check(assert_type(df.loc[:, bytes_], pd.Series), pd.Series) check(assert_type(df.loc[:, date], pd.Series), pd.Series) check(assert_type(df.loc[:, datetime_], pd.Series), pd.Series) @@ -1525,7 +1527,8 @@ def test_frame_scalars_slice() -> None: check(assert_type(df.loc[:, int_], pd.Series), pd.Series) check(assert_type(df.loc[:, float_], pd.Series), pd.Series) check(assert_type(df.loc[:, complex_], pd.Series), pd.Series) - check(assert_type(df.loc[:, timestamp], pd.Series), pd.Series) + with pytest.warns(FutureWarning, match="Comparison of Timestamp with datetime"): + check(assert_type(df.loc[:, timestamp], pd.Series), pd.Series) check(assert_type(df.loc[:, pd_timedelta], pd.Series), pd.Series) check(assert_type(df.loc[:, none], pd.Series), pd.Series) @@ -1635,7 +1638,10 @@ def sample_to_df(x: pd.DataFrame) -> pd.DataFrame: return x.sample() check( - assert_type(df.groupby("col1").apply(sample_to_df), pd.DataFrame), pd.DataFrame + assert_type( + df.groupby("col1").apply(sample_to_df, group_keys=True), pd.DataFrame + ), + pd.DataFrame, ) From eb0464e65a2a052a0a77f80901d09b13b8a860e3 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Fri, 2 Sep 2022 06:56:14 +0100 Subject: [PATCH 2/8] TYP: Make read_stata position only and fix arg name --- pandas-stubs/io/stata.pyi | 26 ++++++++++++++------------ tests/test_io.py | 16 +--------------- 2 files changed, 15 insertions(+), 27 deletions(-) diff --git a/pandas-stubs/io/stata.pyi b/pandas-stubs/io/stata.pyi index 12b8dad74..9b3387f66 100644 --- a/pandas-stubs/io/stata.pyi +++ b/pandas-stubs/io/stata.pyi @@ -25,7 +25,8 @@ from pandas._typing import ( @overload def read_stata( - path: FilePath | ReadBuffer[bytes], + filepath_or_buffer: FilePath | ReadBuffer[bytes], + *, convert_dates: bool = ..., convert_categoricals: bool = ..., index_col: str | None = ..., @@ -34,29 +35,30 @@ def read_stata( columns: list[HashableT] | None = ..., order_categoricals: bool = ..., chunksize: int | None = ..., - *, iterator: Literal[True], compression: CompressionOptions = ..., storage_options: StorageOptions = ..., ) -> StataReader: ... @overload def read_stata( - path: FilePath | ReadBuffer[bytes], - convert_dates: bool, - convert_categoricals: bool, - index_col: str | None, - convert_missing: bool, - preserve_dtypes: bool, - columns: list[HashableT] | None, - order_categoricals: bool, - chunksize: int | None, - iterator: Literal[True], + filepath_or_buffer: FilePath | ReadBuffer[bytes], + *, + convert_dates: bool = ..., + convert_categoricals: bool = ..., + index_col: str | None = ..., + convert_missing: bool = ..., + preserve_dtypes: bool = ..., + columns: list[HashableT] | None = ..., + order_categoricals: bool = ..., + chunksize: int, + iterator: bool = ..., compression: CompressionOptions = ..., storage_options: StorageOptions = ..., ) -> StataReader: ... @overload def read_stata( path: FilePath | ReadBuffer[bytes], + *, convert_dates: bool = ..., convert_categoricals: bool = ..., index_col: str | None = ..., diff --git a/tests/test_io.py b/tests/test_io.py index f87265710..711dfa174 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -109,21 +109,6 @@ def test_read_stata_df(): check(assert_type(read_stata(path), pd.DataFrame), pd.DataFrame) -def test_read_stata_iterator_positional(): - with ensure_clean() as path: - str_path = str(path) - DF.to_stata(str_path) - check( - assert_type( - read_stata( - str_path, False, False, None, False, False, None, False, 2, True - ), - StataReader, - ), - StataReader, - ) - - def test_read_stata_iterator(): with ensure_clean() as path: str_path = str(path) @@ -131,6 +116,7 @@ def test_read_stata_iterator(): check( assert_type(read_stata(str_path, iterator=True), StataReader), StataReader ) + check(assert_type(read_stata(str_path, chunksize=1), StataReader), StataReader) def test_clipboard(): From 6e3113ebd8a3ab7d68cdde262b893ac9b99b0adc Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Fri, 2 Sep 2022 06:56:48 +0100 Subject: [PATCH 3/8] MAINT: Silence warning about rank and object array --- tests/test_series.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_series.py b/tests/test_series.py index 322c5bd44..354e80794 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -250,7 +250,8 @@ def test_types_rank() -> None: s.rank(method="min", pct=True) with pytest.warns(FutureWarning, match="Dropping of nuisance columns"): s.rank(method="dense", ascending=True) - s.rank(method="first", numeric_only=True) + with pytest.warns(FutureWarning, match="Calling series.rank with numeric"): + s.rank(method="first", numeric_only=True) def test_types_mean() -> None: From 9a87f8514a7c0a37ee54a0a07ce0f03813f8079e Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Fri, 2 Sep 2022 08:15:58 +0100 Subject: [PATCH 4/8] TYP: Simplify types --- pandas-stubs/core/frame.pyi | 41 ------------------------------------- tests/test_frame.py | 2 +- tests/test_series.py | 4 ++-- 3 files changed, 3 insertions(+), 44 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 745351a5a..39566a02d 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -633,20 +633,6 @@ class DataFrame(NDFrame, OpsMixin): axis: AxisType = ..., fill_value: Hashable | None = ..., ) -> DataFrame: ... - @overload - def set_index( - self, - keys: Label - | Series - | Index - | np.ndarray - | Iterator[HashableT] - | list[HashableT], - drop: _bool = ..., - append: _bool = ..., - verify_integrity: _bool = ..., - ) -> None: ... - @overload def set_index( self, keys: Label @@ -660,33 +646,6 @@ class DataFrame(NDFrame, OpsMixin): verify_integrity: _bool = ..., ) -> DataFrame: ... @overload - def set_index( - self, - keys: Label - | Series - | Index - | np.ndarray - | Iterator[HashableT] - | list[HashableT], - drop: _bool = ..., - append: _bool = ..., - *, - verify_integrity: _bool = ..., - ) -> DataFrame: ... - @overload - def set_index( - self, - keys: Label - | Series - | Index - | np.ndarray - | Iterator[HashableT] - | list[HashableT], - drop: _bool = ..., - append: _bool = ..., - verify_integrity: _bool = ..., - ) -> DataFrame | None: ... - @overload def reset_index( self, level: Level | Sequence[Level] = ..., diff --git a/tests/test_frame.py b/tests/test_frame.py index d8ba15bf8..56df3e62d 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -1639,7 +1639,7 @@ def sample_to_df(x: pd.DataFrame) -> pd.DataFrame: check( assert_type( - df.groupby("col1").apply(sample_to_df, group_keys=True), pd.DataFrame + df.groupby("col1", group_keys=True).apply(sample_to_df), pd.DataFrame ), pd.DataFrame, ) diff --git a/tests/test_series.py b/tests/test_series.py index 354e80794..3e043d908 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -250,8 +250,8 @@ def test_types_rank() -> None: s.rank(method="min", pct=True) with pytest.warns(FutureWarning, match="Dropping of nuisance columns"): s.rank(method="dense", ascending=True) - with pytest.warns(FutureWarning, match="Calling series.rank with numeric"): - s.rank(method="first", numeric_only=True) + s2 = pd.Series([1, 1, 2, 5, 6, np.nan]) + s2.rank(method="first", numeric_only=True) def test_types_mean() -> None: From b54e6b235dea5800c57f8d332cb7995d7bc5e554 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Sat, 3 Sep 2022 08:50:07 +0100 Subject: [PATCH 5/8] MAINT: Remove warning checks --- tests/test_frame.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/tests/test_frame.py b/tests/test_frame.py index 030a98081..247820b20 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -1504,8 +1504,7 @@ def test_frame_scalars_slice() -> None: # Note: bool_ cannot be tested since the index is object and pandas does not # support boolean access using loc except when the index is boolean - with pytest.warns(FutureWarning, match="Comparison of Timestamp with datetime"): - check(assert_type(df.loc[str_], pd.Series), pd.Series) + check(assert_type(df.loc[str_], pd.Series), pd.Series) check(assert_type(df.loc[bytes_], pd.Series), pd.Series) check(assert_type(df.loc[date], pd.Series), pd.Series) check(assert_type(df.loc[datetime_], pd.Series), pd.Series) @@ -1513,13 +1512,11 @@ def test_frame_scalars_slice() -> None: check(assert_type(df.loc[int_], pd.Series), pd.Series) check(assert_type(df.loc[float_], pd.Series), pd.Series) check(assert_type(df.loc[complex_], pd.Series), pd.Series) - with pytest.warns(FutureWarning, match="Comparison of Timestamp with datetime"): - check(assert_type(df.loc[timestamp], pd.Series), pd.Series) + check(assert_type(df.loc[timestamp], pd.Series), pd.Series) check(assert_type(df.loc[pd_timedelta], pd.Series), pd.Series) check(assert_type(df.loc[none], pd.Series), pd.Series) - with pytest.warns(FutureWarning, match="Comparison of Timestamp with datetime"): - check(assert_type(df.loc[:, str_], pd.Series), pd.Series) + check(assert_type(df.loc[:, str_], pd.Series), pd.Series) check(assert_type(df.loc[:, bytes_], pd.Series), pd.Series) check(assert_type(df.loc[:, date], pd.Series), pd.Series) check(assert_type(df.loc[:, datetime_], pd.Series), pd.Series) @@ -1527,8 +1524,7 @@ def test_frame_scalars_slice() -> None: check(assert_type(df.loc[:, int_], pd.Series), pd.Series) check(assert_type(df.loc[:, float_], pd.Series), pd.Series) check(assert_type(df.loc[:, complex_], pd.Series), pd.Series) - with pytest.warns(FutureWarning, match="Comparison of Timestamp with datetime"): - check(assert_type(df.loc[:, timestamp], pd.Series), pd.Series) + check(assert_type(df.loc[:, timestamp], pd.Series), pd.Series) check(assert_type(df.loc[:, pd_timedelta], pd.Series), pd.Series) check(assert_type(df.loc[:, none], pd.Series), pd.Series) From 3c2b00b20bcdac0f9acd843b9e9c9e9e4569be76 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Thu, 15 Sep 2022 23:29:43 +0100 Subject: [PATCH 6/8] MAINT: Restore positional version Restore positional for 1.4, and note removal in 1.5 --- pandas-stubs/io/stata.pyi | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pandas-stubs/io/stata.pyi b/pandas-stubs/io/stata.pyi index c401c494d..f7c845e7c 100644 --- a/pandas-stubs/io/stata.pyi +++ b/pandas-stubs/io/stata.pyi @@ -23,6 +23,22 @@ from pandas._typing import ( WriteBuffer, ) +# TODO: Remove after 1.5.0 since positional is required +@overload +def read_stata( + filepath_or_buffer: FilePath | ReadBuffer[bytes], + convert_dates: bool, + convert_categoricals: bool, + index_col: str | None, + convert_missing: bool, + preserve_dtypes: bool, + columns: list[HashableT] | None, + order_categoricals: bool, + chunksize: int | None, + iterator: Literal[True], + compression: CompressionOptions = ..., + storage_options: StorageOptions = ..., +) -> StataReader: ... @overload def read_stata( filepath_or_buffer: FilePath | ReadBuffer[bytes], From 381ebcbf166a412681f4a7a16965d2b530bf4c4d Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Fri, 16 Sep 2022 08:55:49 +0100 Subject: [PATCH 7/8] MAINT: Restore inplace --- pandas-stubs/core/frame.pyi | 31 +++++++++++++++++++++++++++++++ tests/test_frame.py | 1 + 2 files changed, 32 insertions(+) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 943b60a8e..84f5232fb 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -684,6 +684,37 @@ class DataFrame(NDFrame, OpsMixin): axis: AxisType = ..., fill_value: Hashable | None = ..., ) -> DataFrame: ... + @overload + def set_index( + self, + keys: Label + | Series + | Index + | np.ndarray + | Iterator[HashableT] + | list[HashableT], + drop: _bool = ..., + append: _bool = ..., + verify_integrity: _bool = ..., + *, + inplace: Literal[True], + ) -> None: ... + @overload + def set_index( + self, + keys: Label + | Series + | Index + | np.ndarray + | Iterator[HashableT] + | list[HashableT], + drop: _bool = ..., + append: _bool = ..., + verify_integrity: _bool = ..., + *, + inplace: Literal[False], + ) -> DataFrame: ... + @overload def set_index( self, keys: Label diff --git a/tests/test_frame.py b/tests/test_frame.py index 25ccac5ab..635bb6ee8 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -296,6 +296,7 @@ def test_types_set_index() -> None: res3: pd.DataFrame = df.set_index("col1", append=True) res4: pd.DataFrame = df.set_index("col1", verify_integrity=True) res5: pd.DataFrame = df.set_index(["col1", "col2"]) + res6: None = df.set_index("col1", inplace=True) # GH 140 res7: pd.DataFrame = df.set_index(pd.Index(["w", "x", "y", "z"])) From ad9d879ab3a28aca17f516190256a8adf6ffefc6 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Tue, 27 Sep 2022 10:42:23 +0100 Subject: [PATCH 8/8] BUG: Correct read_stata --- pandas-stubs/io/stata.pyi | 14 +++++++------- tests/test_io.py | 3 ++- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/pandas-stubs/io/stata.pyi b/pandas-stubs/io/stata.pyi index 04f3d5c1d..d2ff3f130 100644 --- a/pandas-stubs/io/stata.pyi +++ b/pandas-stubs/io/stata.pyi @@ -43,13 +43,13 @@ def read_stata( def read_stata( filepath_or_buffer: FilePath | ReadBuffer[bytes], *, - convert_dates: bool, - convert_categoricals: bool, - index_col: str | None, - convert_missing: bool, - preserve_dtypes: bool, - columns: list[HashableT] | None, - order_categoricals: bool, + convert_dates: bool = ..., + convert_categoricals: bool = ..., + index_col: str | None = ..., + convert_missing: bool = ..., + preserve_dtypes: bool = ..., + columns: list[HashableT] | None = ..., + order_categoricals: bool = ..., chunksize: int, iterator: bool = ..., compression: CompressionOptions = ..., diff --git a/tests/test_io.py b/tests/test_io.py index 0292ce5e8..bf9484e07 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -196,7 +196,8 @@ def test_read_stata_iterator(): check( assert_type(read_stata(str_path, iterator=True), StataReader), StataReader ) - check(assert_type(read_stata(str_path, chunksize=1), StataReader), StataReader) + reader = read_stata(str_path, chunksize=1) + check(assert_type(reader, StataReader), StataReader) def test_clipboard():