From 4599f2dfab8a15c0d64f777bbafc48ecdf65cc50 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Wed, 17 Aug 2022 16:47:39 +0100 Subject: [PATCH 1/6] ENH: Improve clipboard Synchronize clipboard with upstream pandas --- pandas-stubs/io/clipboards.pyi | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas-stubs/io/clipboards.pyi b/pandas-stubs/io/clipboards.pyi index cdad8fccb..4f6e738e9 100644 --- a/pandas-stubs/io/clipboards.pyi +++ b/pandas-stubs/io/clipboards.pyi @@ -1,4 +1,8 @@ +from typing import Any + from pandas.core.frame import DataFrame -def read_clipboard(sep: str = ..., **kwargs) -> DataFrame: ... -def to_clipboard(obj, excel: bool = ..., sep=..., **kwargs) -> None: ... +def read_clipboard(sep: str = ..., **kwargs: Any) -> DataFrame: ... +def to_clipboard( + obj, excel: bool = ..., sep: str | None = ..., **kwargs: Any +) -> None: ... From 009cbaf6e23f0bd1116e1dc15894d75f5664f8a2 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Thu, 18 Aug 2022 08:04:08 +0100 Subject: [PATCH 2/6] TST: Add a test for clipboard --- tests/test_io.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 tests/test_io.py diff --git a/tests/test_io.py b/tests/test_io.py new file mode 100644 index 000000000..9eec34488 --- /dev/null +++ b/tests/test_io.py @@ -0,0 +1,13 @@ +from pandas import DataFrame +from typing_extensions import assert_type + +from tests import check + +from pandas.io.clipboards import read_clipboard + +DF = DataFrame({"a": [1, 2, 3], "b": [0.0, 0.0, 0.0]}) + + +def test_clipboard(): + DF.to_clipboard() + check(assert_type(read_clipboard(), DataFrame), DataFrame) From 03c8f98393612032575ab5473a4e949cc93d56da Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Thu, 18 Aug 2022 08:27:55 +0100 Subject: [PATCH 3/6] TST: Make test skippable if not available --- tests/test_io.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/test_io.py b/tests/test_io.py index 9eec34488..ecea3a1c4 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -1,13 +1,18 @@ from pandas import DataFrame +import pytest from typing_extensions import assert_type from tests import check +from pandas.io.clipboard import PyperclipException from pandas.io.clipboards import read_clipboard DF = DataFrame({"a": [1, 2, 3], "b": [0.0, 0.0, 0.0]}) def test_clipboard(): - DF.to_clipboard() + try: + DF.to_clipboard() + except PyperclipException: + pytest.skip("clipboard not available for testing") check(assert_type(read_clipboard(), DataFrame), DataFrame) From 649dacd907b0dc42235b130eaa3a7846e616422f Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Mon, 22 Aug 2022 10:59:31 +0100 Subject: [PATCH 4/6] CLN: Import from main namespace --- tests/test_io.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/test_io.py b/tests/test_io.py index ecea3a1c4..822ab6df0 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -1,11 +1,13 @@ -from pandas import DataFrame +from pandas import ( + DataFrame, + read_clipboard, +) import pytest from typing_extensions import assert_type from tests import check from pandas.io.clipboard import PyperclipException -from pandas.io.clipboards import read_clipboard DF = DataFrame({"a": [1, 2, 3], "b": [0.0, 0.0, 0.0]}) From 7aeb11b1de7da58e2abff7ee83c7977ad3a726e9 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Mon, 22 Aug 2022 14:52:52 +0100 Subject: [PATCH 5/6] ENH: Expand kwargs Expand kwargs Add types for all read_csv args --- pandas-stubs/_typing.pyi | 2 + pandas-stubs/io/clipboards.pyi | 221 ++++++++++++++++++++++++++++++++- 2 files changed, 221 insertions(+), 2 deletions(-) diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index bd2e87e9b..4cb504d17 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -211,4 +211,6 @@ MergeHow = Literal["left", "right", "outer", "inner"] JsonOrient = Literal["split", "records", "index", "columns", "values", "table"] TimestampConvention = Literal["start", "end", "s", "e"] +CSVEngine = Literal["c", "python", "pyarrow", "python-fwf"] + __all__ = ["npt", "type_t"] diff --git a/pandas-stubs/io/clipboards.pyi b/pandas-stubs/io/clipboards.pyi index 4f6e738e9..744156d48 100644 --- a/pandas-stubs/io/clipboards.pyi +++ b/pandas-stubs/io/clipboards.pyi @@ -1,8 +1,225 @@ -from typing import Any +import csv +from typing import ( + Any, + Callable, + Literal, + Sequence, + overload, +) +import numpy as np from pandas.core.frame import DataFrame +from pandas.core.indexes.base import Index +from pandas.core.series import Series -def read_clipboard(sep: str = ..., **kwargs: Any) -> DataFrame: ... +from pandas._typing import ( + CompressionOptions, + CSVEngine, + DtypeArg, + StorageOptions, + npt, +) + +from pandas.io.parsers import TextFileReader + +@overload +def read_clipboard( + sep: str | None = ..., + *, + delimiter: str | None = ..., + header: int | Sequence[int] | Literal["infer"] | None = ..., + names: list[str] = ..., + index_col: int | str | Sequence[str | int] | Literal[False] | None = ..., + usecols: list[str] + | Sequence[int] + | Series + | Index + | npt.NDArray + | Callable[[str], bool] + | None = ..., + # squeeze: bool | None = ..., # deprecated: 1.4.0 + # prefix: str | None = ..., # deprecated: 1.4.0 + mangle_dupe_cols: bool = ..., # deprecated: 1.5.0 + dtype: DtypeArg | None = ..., + engine: CSVEngine | None = ..., + converters: dict[int | str, Callable[[str], Any]] = ..., + true_values: list[str] = ..., + false_values: list[str] = ..., + skipinitialspace: bool = ..., + skiprows: int | Sequence[int] | Callable[[int], bool] = ..., + skipfooter: int = ..., + nrows: int | None = ..., + na_values: Sequence[str] | dict[str, Sequence[str]] = ..., + keep_default_na: bool = ..., + na_filter: bool = ..., + verbose: bool = ..., + skip_blank_lines: bool = ..., + parse_dates: bool + | Sequence[int] + | list[str] + | Sequence[Sequence[int]] + | dict[str, Sequence[int]] = ..., + infer_datetime_format: bool = ..., + keep_date_col: bool = ..., + date_parser: Callable = ..., + dayfirst: bool = ..., + cache_dates: bool = ..., + iterator: Literal[True], + chunksize: int | None = ..., + compression: CompressionOptions = ..., + thousands: str | None = ..., + decimal: str = ..., + lineterminator: str | None = ..., + quotechar: str = ..., + quoting: int = ..., + doublequote: bool = ..., + escapechar: str | None = ..., + comment: str | None = ..., + encoding: str | None = ..., + encoding_errors: str | None = ..., + dialect: str | csv.Dialect = ..., + # error_bad_lines: bool | None = ..., # Deprecated: 1.3.0 + # warn_bad_lines: bool | None = ..., # Deprecated: 1.3.0 + on_bad_lines: Literal["error", "warn", "skip"] + | Callable[[list[str]], list[str] | None] = ..., + delim_whitespace: bool = ..., + low_memory: bool = ..., + memory_map: bool = ..., + float_precision: Literal["high", "legacy", "round_trip"] | None = ..., + storage_options: StorageOptions | None = ..., +) -> TextFileReader: ... +@overload +def read_clipboard( + sep: str | None = ..., + *, + delimiter: str | None = ..., + header: int | Sequence[int] | Literal["infer"] | None = ..., + names: list[str] = ..., + index_col: int | str | Sequence[str | int] | Literal[False] | None = ..., + usecols: list[str] + | Sequence[int] + | Series + | Index + | npt.NDArray + | Callable[[str], bool] + | None = ..., + # squeeze: bool | None = ..., # deprecated: 1.4.0 + # prefix: str | None = ..., # deprecated: 1.4.0 + mangle_dupe_cols: bool = ..., # deprecated: 1.5.0 + dtype: DtypeArg | None = ..., + engine: CSVEngine | None = ..., + converters: dict[int | str, Callable[[str], Any]] = ..., + true_values: list[str] = ..., + false_values: list[str] = ..., + skipinitialspace: bool = ..., + skiprows: int | Sequence[int] | Callable[[int], bool] = ..., + skipfooter: int = ..., + nrows: int | None = ..., + na_values: Sequence[str] | dict[str, Sequence[str]] = ..., + keep_default_na: bool = ..., + na_filter: bool = ..., + verbose: bool = ..., + skip_blank_lines: bool = ..., + parse_dates: bool + | Sequence[int] + | list[str] + | Sequence[Sequence[int]] + | dict[str, Sequence[int]] = ..., + infer_datetime_format: bool = ..., + keep_date_col: bool = ..., + date_parser: Callable = ..., + dayfirst: bool = ..., + cache_dates: bool = ..., + iterator: bool | None, + chunksize: int, + compression: CompressionOptions = ..., + thousands: str | None = ..., + decimal: str = ..., + lineterminator: str | None = ..., + quotechar: str = ..., + quoting: int = ..., + doublequote: bool = ..., + escapechar: str | None = ..., + comment: str | None = ..., + encoding: str | None = ..., + encoding_errors: str | None = ..., + dialect: str | csv.Dialect = ..., + # error_bad_lines: bool | None = ..., # Deprecated: 1.3.0 + # warn_bad_lines: bool | None = ..., # Deprecated: 1.3.0 + on_bad_lines: Literal["error", "warn", "skip"] + | Callable[[list[str]], list[str] | None] = ..., + delim_whitespace: bool = ..., + low_memory: bool = ..., + memory_map: bool = ..., + float_precision: Literal["high", "legacy", "round_trip"] | None = ..., + storage_options: StorageOptions | None = ..., +) -> TextFileReader: ... +@overload +def read_clipboard( + sep: str | None = ..., + *, + delimiter: str | None = ..., + header: int | Sequence[int] | Literal["infer"] | None = ..., + names: list[str] = ..., + index_col: int | str | Sequence[str | int] | Literal[False] | None = ..., + usecols: list[str] + | Sequence[int] + | Series + | Index + | npt.NDArray + | Callable[[str], bool] + | None = ..., + # squeeze: bool | None = ..., # deprecated: 1.4.0 + # prefix: str | None = ..., # deprecated: 1.4.0 + mangle_dupe_cols: bool = ..., # deprecated: 1.5.0 + dtype: DtypeArg | None = ..., + engine: CSVEngine | None = ..., + converters: dict[int | str, Callable[[str], Any]] = ..., + true_values: list[str] = ..., + false_values: list[str] = ..., + skipinitialspace: bool = ..., + skiprows: int | Sequence[int] | Callable[[int], bool] = ..., + skipfooter: int = ..., + nrows: int | None = ..., + na_values: Sequence[str] | dict[str, Sequence[str]] = ..., + keep_default_na: bool = ..., + na_filter: bool = ..., + verbose: bool = ..., + skip_blank_lines: bool = ..., + parse_dates: bool + | Sequence[int] + | list[str] + | Sequence[Sequence[int]] + | dict[str, Sequence[int]] = ..., + infer_datetime_format: bool = ..., + keep_date_col: bool = ..., + date_parser: Callable = ..., + dayfirst: bool = ..., + cache_dates: bool = ..., + iterator: Literal[False] = ..., + chunksize: None = ..., + compression: CompressionOptions = ..., + thousands: str | None = ..., + decimal: str = ..., + lineterminator: str | None = ..., + quotechar: str = ..., + quoting: int = ..., + doublequote: bool = ..., + escapechar: str | None = ..., + comment: str | None = ..., + encoding: str | None = ..., + encoding_errors: str | None = ..., + dialect: str | csv.Dialect = ..., + # error_bad_lines: bool | None = ..., # Deprecated: 1.3.0 + # warn_bad_lines: bool | None = ..., # Deprecated: 1.3.0 + on_bad_lines: Literal["error", "warn", "skip"] + | Callable[[list[str]], list[str] | None] = ..., + delim_whitespace: bool = ..., + low_memory: bool = ..., + memory_map: bool = ..., + float_precision: Literal["high", "legacy", "round_trip"] | None = ..., + storage_options: StorageOptions | None = ..., +) -> DataFrame: ... def to_clipboard( obj, excel: bool = ..., sep: str | None = ..., **kwargs: Any ) -> None: ... From bd2c856705f1cc777e8efe748a1b17e576297e53 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Mon, 22 Aug 2022 15:01:11 +0100 Subject: [PATCH 6/6] TST: Add tests for overloads --- pandas-stubs/io/clipboards.pyi | 2 +- tests/test_io.py | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/pandas-stubs/io/clipboards.pyi b/pandas-stubs/io/clipboards.pyi index 744156d48..6531f4641 100644 --- a/pandas-stubs/io/clipboards.pyi +++ b/pandas-stubs/io/clipboards.pyi @@ -130,7 +130,7 @@ def read_clipboard( date_parser: Callable = ..., dayfirst: bool = ..., cache_dates: bool = ..., - iterator: bool | None, + iterator: bool = ..., chunksize: int, compression: CompressionOptions = ..., thousands: str | None = ..., diff --git a/tests/test_io.py b/tests/test_io.py index 822ab6df0..70a05144e 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -8,6 +8,7 @@ from tests import check from pandas.io.clipboard import PyperclipException +from pandas.io.parsers import TextFileReader DF = DataFrame({"a": [1, 2, 3], "b": [0.0, 0.0, 0.0]}) @@ -18,3 +19,22 @@ def test_clipboard(): except PyperclipException: pytest.skip("clipboard not available for testing") check(assert_type(read_clipboard(), DataFrame), DataFrame) + check(assert_type(read_clipboard(iterator=False), DataFrame), DataFrame) + check(assert_type(read_clipboard(chunksize=None), DataFrame), DataFrame) + + +def test_clipboard_iterator(): + try: + DF.to_clipboard() + except PyperclipException: + pytest.skip("clipboard not available for testing") + check(assert_type(read_clipboard(iterator=True), TextFileReader), TextFileReader) + check( + assert_type(read_clipboard(iterator=True, chunksize=None), TextFileReader), + TextFileReader, + ) + check(assert_type(read_clipboard(chunksize=1), TextFileReader), TextFileReader) + check( + assert_type(read_clipboard(iterator=False, chunksize=1), TextFileReader), + TextFileReader, + )