From 7603dde6440a0114aab15b0d6d1b36bdf90e474b Mon Sep 17 00:00:00 2001 From: Loic Diridollou Date: Sat, 16 Nov 2024 18:04:44 -0500 Subject: [PATCH 1/5] GHXXX Add overloads of engine for pd.read_json --- pandas-stubs/io/json/_json.pyi | 81 +++++++++++++++++++++++++++++++++- tests/test_io.py | 25 +++++++++++ 2 files changed, 105 insertions(+), 1 deletion(-) diff --git a/pandas-stubs/io/json/_json.pyi b/pandas-stubs/io/json/_json.pyi index 8151a6459..e87dc0224 100644 --- a/pandas-stubs/io/json/_json.pyi +++ b/pandas-stubs/io/json/_json.pyi @@ -42,16 +42,42 @@ def read_json( Literal["strict", "ignore", "replace", "backslashreplace", "surrogateescape"] | None ) = ..., + lines: bool, + chunksize: int, + compression: CompressionOptions = ..., + nrows: int | None = ..., + storage_options: StorageOptions = ..., + dtype_backend: DtypeBackend | NoDefault = ..., + engine: Literal["ujson"] = ..., +) -> JsonReader[Series]: ... +@overload +def read_json( + path_or_buf: FilePath | ReadBuffer[bytes], + *, + orient: JsonSeriesOrient | None = ..., + typ: Literal["series"], + dtype: bool | Mapping[HashableT, DtypeArg] | None = ..., + convert_axes: bool | None = ..., + convert_dates: bool | list[str] = ..., + keep_default_dates: bool = ..., + precise_float: bool = ..., + date_unit: TimeUnit | None = ..., + encoding: str | None = ..., + encoding_errors: ( + Literal["strict", "ignore", "replace", "backslashreplace", "surrogateescape"] + | None + ) = ..., lines: Literal[True], chunksize: int, compression: CompressionOptions = ..., nrows: int | None = ..., storage_options: StorageOptions = ..., dtype_backend: DtypeBackend | NoDefault = ..., + engine: Literal["pyarrow"] = ..., ) -> JsonReader[Series]: ... @overload def read_json( - path_or_buf: FilePath | ReadBuffer[str] | ReadBuffer[bytes], + path_or_buf: FilePath | ReadBuffer[bytes], *, orient: JsonFrameOrient | None = ..., typ: Literal["frame"] = ..., @@ -72,6 +98,7 @@ def read_json( nrows: int | None = ..., storage_options: StorageOptions = ..., dtype_backend: DtypeBackend | NoDefault = ..., + engine: Literal["pyarrow"] = ..., ) -> JsonReader[DataFrame]: ... @overload def read_json( @@ -96,6 +123,32 @@ def read_json( nrows: int | None = ..., storage_options: StorageOptions = ..., dtype_backend: DtypeBackend | NoDefault = ..., + engine: Literal["ujson"] = ..., +) -> Series: ... +@overload +def read_json( + path_or_buf: FilePath | ReadBuffer[bytes], + *, + orient: JsonSeriesOrient | None = ..., + typ: Literal["series"], + dtype: bool | Mapping[HashableT, DtypeArg] | None = ..., + convert_axes: bool | None = ..., + convert_dates: bool | list[str] = ..., + keep_default_dates: bool = ..., + precise_float: bool = ..., + date_unit: TimeUnit | None = ..., + encoding: str | None = ..., + encoding_errors: ( + Literal["strict", "ignore", "replace", "backslashreplace", "surrogateescape"] + | None + ) = ..., + lines: Literal[True] = ..., + chunksize: None = ..., + compression: CompressionOptions = ..., + nrows: int | None = ..., + storage_options: StorageOptions = ..., + dtype_backend: DtypeBackend | NoDefault = ..., + engine: Literal["pyarrow"] = ..., ) -> Series: ... @overload def read_json( @@ -120,6 +173,32 @@ def read_json( nrows: int | None = ..., storage_options: StorageOptions = ..., dtype_backend: DtypeBackend | NoDefault = ..., + engine: Literal["ujson"] = ..., +) -> DataFrame: ... +@overload +def read_json( + path_or_buf: FilePath | ReadBuffer[bytes], + *, + orient: JsonFrameOrient | None = ..., + typ: Literal["frame"] = ..., + dtype: bool | Mapping[HashableT, DtypeArg] | None = ..., + convert_axes: bool | None = ..., + convert_dates: bool | list[str] = ..., + keep_default_dates: bool = ..., + precise_float: bool = ..., + date_unit: TimeUnit | None = ..., + encoding: str | None = ..., + encoding_errors: ( + Literal["strict", "ignore", "replace", "backslashreplace", "surrogateescape"] + | None + ) = ..., + lines: Literal[True] = ..., + chunksize: None = ..., + compression: CompressionOptions = ..., + nrows: int | None = ..., + storage_options: StorageOptions = ..., + dtype_backend: DtypeBackend | NoDefault = ..., + engine: Literal["pyarrow"] = ..., ) -> DataFrame: ... class JsonReader(abc.Iterator, Generic[NDFrameT]): diff --git a/tests/test_io.py b/tests/test_io.py index 66573ff82..e0b8c549c 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -1625,3 +1625,28 @@ def test_read_excel_index_col() -> None: ), pd.DataFrame, ) + + +def test_read_json_engine() -> None: + """Test the engine argument for `pd.read_json` introduced with pandas 2.0.""" + data = """{"index": {"0": 0, "1": 1}, + "a": {"0": 1, "1": null}, + "b": {"0": 2.5, "1": 4.5}, + "c": {"0": true, "1": false}, + "d": {"0": "a", "1": "b"}, + "e": {"0": 1577.2, "1": 1577.1}}""" + check( + assert_type(pd.read_json(io.StringIO(data), engine="ujson"), pd.DataFrame), + pd.DataFrame, + ) + + data_lines = b"""{"col 1":"a","col 2":"b"} + {"col 1":"c","col 2":"d"}""" + dd = io.BytesIO(data_lines) + check( + assert_type( + pd.read_json(dd, lines=True, engine="pyarrow"), + pd.DataFrame, + ), + pd.DataFrame, + ) From e64c0854d77d505c039c764fdf63ead9e6235a94 Mon Sep 17 00:00:00 2001 From: Loic Diridollou Date: Wed, 20 Nov 2024 18:00:02 -0500 Subject: [PATCH 2/5] GH1033 PR Feedback --- pandas-stubs/io/json/_json.pyi | 14 +++++++------- tests/test_io.py | 5 +++++ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/pandas-stubs/io/json/_json.pyi b/pandas-stubs/io/json/_json.pyi index e87dc0224..1b25f8c9e 100644 --- a/pandas-stubs/io/json/_json.pyi +++ b/pandas-stubs/io/json/_json.pyi @@ -73,7 +73,7 @@ def read_json( nrows: int | None = ..., storage_options: StorageOptions = ..., dtype_backend: DtypeBackend | NoDefault = ..., - engine: Literal["pyarrow"] = ..., + engine: Literal["pyarrow"], ) -> JsonReader[Series]: ... @overload def read_json( @@ -92,13 +92,13 @@ def read_json( Literal["strict", "ignore", "replace", "backslashreplace", "surrogateescape"] | None ) = ..., - lines: Literal[True], + lines: Literal[True] = True, chunksize: int, compression: CompressionOptions = ..., nrows: int | None = ..., storage_options: StorageOptions = ..., dtype_backend: DtypeBackend | NoDefault = ..., - engine: Literal["pyarrow"] = ..., + engine: Literal["pyarrow"] = "pyarrow", ) -> JsonReader[DataFrame]: ... @overload def read_json( @@ -142,13 +142,13 @@ def read_json( Literal["strict", "ignore", "replace", "backslashreplace", "surrogateescape"] | None ) = ..., - lines: Literal[True] = ..., + lines: Literal[True] = True, chunksize: None = ..., compression: CompressionOptions = ..., nrows: int | None = ..., storage_options: StorageOptions = ..., dtype_backend: DtypeBackend | NoDefault = ..., - engine: Literal["pyarrow"] = ..., + engine: Literal["pyarrow"] = "pyarrow", ) -> Series: ... @overload def read_json( @@ -192,13 +192,13 @@ def read_json( Literal["strict", "ignore", "replace", "backslashreplace", "surrogateescape"] | None ) = ..., - lines: Literal[True] = ..., + lines: Literal[True] = True, chunksize: None = ..., compression: CompressionOptions = ..., nrows: int | None = ..., storage_options: StorageOptions = ..., dtype_backend: DtypeBackend | NoDefault = ..., - engine: Literal["pyarrow"] = ..., + engine: Literal["pyarrow"], ) -> DataFrame: ... class JsonReader(abc.Iterator, Generic[NDFrameT]): diff --git a/tests/test_io.py b/tests/test_io.py index e0b8c549c..addc4511d 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -1650,3 +1650,8 @@ def test_read_json_engine() -> None: ), pd.DataFrame, ) + + if TYPE_CHECKING_INVALID_USAGE: + pd.read_json(dd, lines=False, engine="pyarrow") # type: ignore # pyright: ignore + pd.read_json(io.StringIO(data), engine="pyarrow") # type: ignore # pyright: ignore + pd.read_json(io.StringIO(data), lines=True, engine="pyarrow") # type: ignore # pyright: ignore From b7eb663dde700064f14431ed56a828e6b9eebd59 Mon Sep 17 00:00:00 2001 From: Loic Diridollou Date: Wed, 20 Nov 2024 18:05:38 -0500 Subject: [PATCH 3/5] GH1033 PR Feedback --- pandas-stubs/io/json/_json.pyi | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/pandas-stubs/io/json/_json.pyi b/pandas-stubs/io/json/_json.pyi index 1b25f8c9e..a8eb04ccd 100644 --- a/pandas-stubs/io/json/_json.pyi +++ b/pandas-stubs/io/json/_json.pyi @@ -98,7 +98,32 @@ def read_json( nrows: int | None = ..., storage_options: StorageOptions = ..., dtype_backend: DtypeBackend | NoDefault = ..., - engine: Literal["pyarrow"] = "pyarrow", + engine: Literal["ujson"] = ..., +) -> JsonReader[DataFrame]: ... +@overload +def read_json( + path_or_buf: FilePath | ReadBuffer[bytes], + *, + orient: JsonFrameOrient | None = ..., + typ: Literal["frame"] = ..., + dtype: bool | Mapping[HashableT, DtypeArg] | None = ..., + convert_axes: bool | None = ..., + convert_dates: bool | list[str] = ..., + keep_default_dates: bool = ..., + precise_float: bool = ..., + date_unit: TimeUnit | None = ..., + encoding: str | None = ..., + encoding_errors: ( + Literal["strict", "ignore", "replace", "backslashreplace", "surrogateescape"] + | None + ) = ..., + lines: Literal[True] = True, + chunksize: int, + compression: CompressionOptions = ..., + nrows: int | None = ..., + storage_options: StorageOptions = ..., + dtype_backend: DtypeBackend | NoDefault = ..., + engine: Literal["pyarrow"], ) -> JsonReader[DataFrame]: ... @overload def read_json( @@ -148,7 +173,7 @@ def read_json( nrows: int | None = ..., storage_options: StorageOptions = ..., dtype_backend: DtypeBackend | NoDefault = ..., - engine: Literal["pyarrow"] = "pyarrow", + engine: Literal["pyarrow"], ) -> Series: ... @overload def read_json( From 53af84659b71d353d672a0113f168c93dfdeb4f8 Mon Sep 17 00:00:00 2001 From: Loic Diridollou Date: Wed, 20 Nov 2024 18:32:05 -0500 Subject: [PATCH 4/5] GH1033 Fix ignore type --- tests/test_io.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_io.py b/tests/test_io.py index 7247b44fd..e806c0240 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -1741,6 +1741,6 @@ def test_read_json_engine() -> None: ) if TYPE_CHECKING_INVALID_USAGE: - pd.read_json(dd, lines=False, engine="pyarrow") # type: ignore # pyright: ignore - pd.read_json(io.StringIO(data), engine="pyarrow") # type: ignore # pyright: ignore - pd.read_json(io.StringIO(data), lines=True, engine="pyarrow") # type: ignore # pyright: ignore + pd.read_json(dd, lines=False, engine="pyarrow") # type: ignore[call-overload] # pyright: ignore[reportArgumentType, reportCallIssue] + pd.read_json(io.StringIO(data), engine="pyarrow") # type: ignore[call-overload] # pyright: ignore[reportArgumentType, reportCallIssue] + pd.read_json(io.StringIO(data), lines=True, engine="pyarrow") # type: ignore[call-overload] # pyright: ignore[reportArgumentType, reportCallIssue] From 62ce800077855b0484578290f9238847ffa5a928 Mon Sep 17 00:00:00 2001 From: Loic Diridollou Date: Thu, 21 Nov 2024 17:38:24 -0500 Subject: [PATCH 5/5] GH1033 PR feedback --- pandas-stubs/io/json/_json.pyi | 10 +++++----- tests/test_io.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas-stubs/io/json/_json.pyi b/pandas-stubs/io/json/_json.pyi index a8eb04ccd..8e023631c 100644 --- a/pandas-stubs/io/json/_json.pyi +++ b/pandas-stubs/io/json/_json.pyi @@ -42,7 +42,7 @@ def read_json( Literal["strict", "ignore", "replace", "backslashreplace", "surrogateescape"] | None ) = ..., - lines: bool, + lines: Literal[True], chunksize: int, compression: CompressionOptions = ..., nrows: int | None = ..., @@ -92,7 +92,7 @@ def read_json( Literal["strict", "ignore", "replace", "backslashreplace", "surrogateescape"] | None ) = ..., - lines: Literal[True] = True, + lines: Literal[True], chunksize: int, compression: CompressionOptions = ..., nrows: int | None = ..., @@ -117,7 +117,7 @@ def read_json( Literal["strict", "ignore", "replace", "backslashreplace", "surrogateescape"] | None ) = ..., - lines: Literal[True] = True, + lines: Literal[True], chunksize: int, compression: CompressionOptions = ..., nrows: int | None = ..., @@ -167,7 +167,7 @@ def read_json( Literal["strict", "ignore", "replace", "backslashreplace", "surrogateescape"] | None ) = ..., - lines: Literal[True] = True, + lines: Literal[True], chunksize: None = ..., compression: CompressionOptions = ..., nrows: int | None = ..., @@ -217,7 +217,7 @@ def read_json( Literal["strict", "ignore", "replace", "backslashreplace", "surrogateescape"] | None ) = ..., - lines: Literal[True] = True, + lines: Literal[True], chunksize: None = ..., compression: CompressionOptions = ..., nrows: int | None = ..., diff --git a/tests/test_io.py b/tests/test_io.py index e806c0240..17c702d20 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -1742,5 +1742,5 @@ def test_read_json_engine() -> None: if TYPE_CHECKING_INVALID_USAGE: pd.read_json(dd, lines=False, engine="pyarrow") # type: ignore[call-overload] # pyright: ignore[reportArgumentType, reportCallIssue] - pd.read_json(io.StringIO(data), engine="pyarrow") # type: ignore[call-overload] # pyright: ignore[reportArgumentType, reportCallIssue] + pd.read_json(io.StringIO(data), engine="pyarrow") # type: ignore[call-overload] # pyright: ignore[reportArgumentType] pd.read_json(io.StringIO(data), lines=True, engine="pyarrow") # type: ignore[call-overload] # pyright: ignore[reportArgumentType, reportCallIssue]