Merge remote-tracking branch 'upstream/main' into io-fwf-csv-table2

Kevin Sheppard · Kevin Sheppard · commit 53a9e3f9fdfa · 2022-09-04T23:37:12.000+01:00
diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi
@@ -40,6 +40,8 @@ from pandas._libs.tslibs import (
 
 from pandas.core.dtypes.dtypes import ExtensionDtype
 
+from pandas.io.formats.format import EngFormatter
+
 ArrayLike = Union[ExtensionArray, np.ndarray]
 AnyArrayLike = Union[Index, Series, np.ndarray]
 PythonScalar = Union[str, bool, complex]
@@ -195,7 +197,10 @@ CompressionDict = dict[str, Any]
 CompressionOptions = Optional[
     Union[Literal["infer", "gzip", "bz2", "zip", "xz", "zstd"], CompressionDict]
 ]
-
+FormattersType = Union[
+    list[Callable], tuple[Callable, ...], Mapping[Union[str, int], Callable]
+]
+FloatFormatType = str | Callable | EngFormatter
 # converters
 ConvertersArg = dict[Hashable, Callable[[Dtype], Dtype]]
 
diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi
@@ -52,11 +52,14 @@ from pandas._typing import (
     AxisType,
     ColspaceArgType,
     CompressionOptions,
+    CSVQuoting,
     Dtype,
     DtypeNp,
     FilePath,
     FilePathOrBuffer,
     FillnaOptions,
+    FloatFormatType,
+    FormattersType,
     GroupByObjectNonScalar,
     HashableT,
     IgnoreRaise,
@@ -347,7 +350,7 @@ class DataFrame(NDFrame, OpsMixin):
     def to_html(
         self,
         buf: FilePath | WriteBuffer[str],
-        columns: list[HashableT] | None = ...,
+        columns: list[HashableT] | Index | Series | None = ...,
         col_space: ColspaceArgType | None = ...,
         header: _bool = ...,
         index: _bool = ...,
@@ -1915,6 +1918,55 @@ class DataFrame(NDFrame, OpsMixin):
         self, excel: _bool = ..., sep: _str | None = ..., **kwargs
     ) -> None: ...
     @overload
+    def to_csv(
+        self,
+        path_or_buf: FilePathOrBuffer | None,
+        sep: _str = ...,
+        na_rep: _str = ...,
+        float_format: _str | None = ...,
+        columns: Sequence[Hashable] | None = ...,
+        header: _bool | list[_str] = ...,
+        index: _bool = ...,
+        index_label: _bool | _str | Sequence[Hashable] | None = ...,
+        mode: _str = ...,
+        encoding: _str | None = ...,
+        compression: _str | Mapping[_str, _str] = ...,
+        quoting: CSVQuoting | None = ...,
+        quotechar: _str = ...,
+        line_terminator: _str | None = ...,
+        chunksize: int | None = ...,
+        date_format: _str | None = ...,
+        doublequote: _bool = ...,
+        escapechar: _str | None = ...,
+        decimal: _str = ...,
+        errors: _str = ...,
+        storage_options: dict[_str, Any] | None = ...,
+    ) -> None: ...
+    @overload
+    def to_csv(
+        self,
+        sep: _str = ...,
+        na_rep: _str = ...,
+        float_format: _str | None = ...,
+        columns: Sequence[Hashable] | None = ...,
+        header: _bool | list[_str] = ...,
+        index: _bool = ...,
+        index_label: _bool | _str | Sequence[Hashable] | None = ...,
+        mode: _str = ...,
+        encoding: _str | None = ...,
+        compression: _str | Mapping[_str, _str] = ...,
+        quoting: CSVQuoting | None = ...,
+        quotechar: _str = ...,
+        line_terminator: _str | None = ...,
+        chunksize: int | None = ...,
+        date_format: _str | None = ...,
+        doublequote: _bool = ...,
+        escapechar: _str | None = ...,
+        decimal: _str = ...,
+        errors: _str = ...,
+        storage_options: dict[_str, Any] | None = ...,
+    ) -> _str: ...
+    @overload
     def to_json(
         self,
         path_or_buf: FilePathOrBuffer | None,
@@ -1954,45 +2006,46 @@ class DataFrame(NDFrame, OpsMixin):
     @overload
     def to_string(
         self,
-        buf: FilePathOrBuffer | None,
-        columns: Sequence[_str] | None = ...,
-        col_space: int | list[int] | dict[_str | int, int] | None = ...,
-        header: _bool | Sequence[_str] = ...,
+        buf: FilePath | WriteBuffer[str],
+        columns: list[HashableT] | Index | Series | None = ...,
+        col_space: int | list[int] | dict[HashableT, int] | None = ...,
+        header: _bool | list[_str] | tuple[str, ...] = ...,
         index: _bool = ...,
         na_rep: _str = ...,
-        formatters=...,
-        float_format=...,
+        formatters: FormattersType | None = ...,
+        float_format: FloatFormatType | None = ...,
         sparsify: _bool | None = ...,
         index_names: _bool = ...,
         justify: _str | None = ...,
         max_rows: int | None = ...,
-        min_rows: int | None = ...,
         max_cols: int | None = ...,
         show_dimensions: _bool = ...,
         decimal: _str = ...,
         line_width: int | None = ...,
+        min_rows: int | None = ...,
         max_colwidth: int | None = ...,
         encoding: _str | None = ...,
     ) -> None: ...
     @overload
     def to_string(
         self,
-        columns: Sequence[_str] | None = ...,
-        col_space: int | list[int] | dict[_str | int, int] | None = ...,
+        buf: None = ...,
+        columns: list[HashableT] | Index | Series | None = ...,
+        col_space: int | list[int] | dict[Hashable, int] | None = ...,
         header: _bool | Sequence[_str] = ...,
         index: _bool = ...,
         na_rep: _str = ...,
-        formatters=...,
-        float_format=...,
+        formatters: FormattersType | None = ...,
+        float_format: FloatFormatType | None = ...,
         sparsify: _bool | None = ...,
         index_names: _bool = ...,
         justify: _str | None = ...,
         max_rows: int | None = ...,
-        min_rows: int | None = ...,
         max_cols: int | None = ...,
         show_dimensions: _bool = ...,
         decimal: _str = ...,
         line_width: int | None = ...,
+        min_rows: int | None = ...,
         max_colwidth: int | None = ...,
         encoding: _str | None = ...,
     ) -> _str: ...
diff --git a/pandas-stubs/core/generic.pyi b/pandas-stubs/core/generic.pyi
@@ -242,7 +242,7 @@ class NDFrame(PandasObject, indexing.IndexingMixin):
         mode: FileWriteMode = ...,
         encoding: _str | None = ...,
         compression: CompressionOptions = ...,
-        quoting: int | None = ...,
+        quoting: Literal[0, 1, 2, 3] | None = ...,
         quotechar: _str = ...,
         line_terminator: _str | None = ...,
         chunksize: int | None = ...,
@@ -267,7 +267,7 @@ class NDFrame(PandasObject, indexing.IndexingMixin):
         mode: FileWriteMode = ...,
         encoding: _str | None = ...,
         compression: CompressionOptions = ...,
-        quoting: int | None = ...,
+        quoting: Literal[0, 1, 2, 3] | None = ...,
         quotechar: _str = ...,
         line_terminator: _str | None = ...,
         chunksize: int | None = ...,
diff --git a/pandas-stubs/io/parsers/readers.pyi b/pandas-stubs/io/parsers/readers.pyi
@@ -44,6 +44,7 @@ def read_csv(
     | npt.NDArray
     | Callable[[str], bool]
     | None = ...,
+    mangle_dupe_cols: bool = ...,
     dtype: DtypeArg | None = ...,
     engine: CSVEngine | None = ...,
     converters: dict[int | str, Callable[[str], Any]] = ...,
@@ -107,6 +108,7 @@ def read_csv(
     | npt.NDArray
     | Callable[[str], bool]
     | None = ...,
+    mangle_dupe_cols: bool = ...,
     dtype: DtypeArg | None = ...,
     engine: CSVEngine | None = ...,
     converters: dict[int | str, Callable[[str], Any]] = ...,
@@ -170,6 +172,7 @@ def read_csv(
     | npt.NDArray
     | Callable[[str], bool]
     | None = ...,
+    mangle_dupe_cols: bool = ...,
     dtype: DtypeArg | None = ...,
     engine: CSVEngine | None = ...,
     converters: dict[int | str, Callable[[str], Any]] = ...,
@@ -405,13 +408,39 @@ def read_table(
     float_precision: Literal["high", "legacy", "round_trip"] | None = ...,
     storage_options: StorageOptions | None = ...,
 ) -> DataFrame: ...
+@overload
 def read_fwf(
     filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],
-    colspecs: Sequence[tuple[int, int]] | str | None = ...,
+    colspecs: Sequence[tuple[int, int]] | Literal["infer"] | None = ...,
     widths: Sequence[int] | None = ...,
     infer_nrows: int = ...,
+    *,
+    iterator: Literal[True],
+    chunksize: int | None = ...,
     **kwds: Any,
-) -> DataFrame | TextFileReader: ...
+) -> TextFileReader: ...
+@overload
+def read_fwf(
+    filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],
+    colspecs: Sequence[tuple[int, int]] | Literal["infer"] | None = ...,
+    widths: Sequence[int] | None = ...,
+    infer_nrows: int = ...,
+    *,
+    iterator: bool = ...,
+    chunksize: int,
+    **kwds: Any,
+) -> TextFileReader: ...
+@overload
+def read_fwf(
+    filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],
+    colspecs: Sequence[tuple[int, int]] | Literal["infer"] | None = ...,
+    widths: Sequence[int] | None = ...,
+    infer_nrows: int = ...,
+    *,
+    iterator: Literal[False] = ...,
+    chunksize: None = ...,
+    **kwds: Any,
+) -> DataFrame: ...
 
 class TextFileReader(abc.Iterator):
     engine: CSVEngine
diff --git a/tests/test_frame.py b/tests/test_frame.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 from collections import defaultdict
+import csv
 import datetime
 import io
 from pathlib import Path
@@ -114,6 +115,9 @@ def test_types_to_csv() -> None:
     # Testing support for binary file handles, added in 1.2.0 https://pandas.pydata.org/docs/whatsnew/v1.2.0.html
     df.to_csv(io.BytesIO(), encoding="utf-8", compression="gzip")
 
+    # Testing support for binary file handles, added in 1.2.0 https://pandas.pydata.org/docs/whatsnew/v1.2.0.html
+    df.to_csv(io.BytesIO(), quoting=csv.QUOTE_ALL, encoding="utf-8", compression="gzip")
+
 
 def test_types_to_csv_when_path_passed() -> None:
     df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]})
diff --git a/tests/test_io.py b/tests/test_io.py
@@ -1,3 +1,4 @@
+import csv
 import io
 import os.path
 import pathlib
@@ -443,34 +444,28 @@ def test_read_table_iterator():
         )
 
 
-def test_read_fwf():
+def btest_read_fwf():
     with ensure_clean() as path:
         DF.to_string(path, index=False)
-        check(assert_type(read_fwf(path), Union[DataFrame, TextFileReader]), DataFrame)
-        check(
-            assert_type(read_fwf(pathlib.Path(path)), Union[DataFrame, TextFileReader]),
-            DataFrame,
-        )
+        check(assert_type(read_fwf(path), DataFrame), DataFrame)
+        check(assert_type(read_fwf(pathlib.Path(path)), DataFrame), DataFrame)
 
         with open(path) as fwf_file:
             check(
-                assert_type(read_fwf(fwf_file), Union[DataFrame, TextFileReader]),
+                assert_type(read_fwf(fwf_file), DataFrame),
                 DataFrame,
             )
         with open(path) as fwf_file:
             sio = io.StringIO(fwf_file.read())
-            check(
-                assert_type(read_fwf(sio), Union[DataFrame, TextFileReader]), DataFrame
-            )
+            check(assert_type(read_fwf(sio), DataFrame), DataFrame)
         with open(path, "rb") as fwf_file:
             bio = io.BytesIO(fwf_file.read())
-            check(
-                assert_type(read_fwf(bio), Union[DataFrame, TextFileReader]), DataFrame
-            )
+            check(assert_type(read_fwf(bio), DataFrame), DataFrame)
         fwf_iterator = read_fwf(path, iterator=True)
-        check(
-            assert_type(fwf_iterator, Union[DataFrame, TextFileReader]), TextFileReader
-        )
+        check(assert_type(fwf_iterator, TextFileReader), TextFileReader)
+        fwf_iterator.close()
+        fwf_iterator2 = read_fwf(path, chunksize=1)
+        check(assert_type(fwf_iterator2, TextFileReader), TextFileReader)
         fwf_iterator.close()
 
 
@@ -498,6 +493,17 @@ def test_to_csv_series():
         check(assert_type(s.to_csv(path), None), type(None))
 
 
+def test_to_string():
+    check(assert_type(DF.to_string(), str), str)
+    with ensure_clean() as path:
+        check(assert_type(DF.to_string(path), None), type(None))
+        check(assert_type(DF.to_string(pathlib.Path(path)), None), type(None))
+        with open(path, "wt") as df_string:
+            check(assert_type(DF.to_string(df_string), None), type(None))
+        sio = io.StringIO()
+        check(assert_type(DF.to_string(sio), None), type(None))
+
+
 def test_read_sql():
     with ensure_clean() as path:
         con = sqlite3.connect(path)
@@ -572,3 +578,13 @@ def test_read_html():
     with ensure_clean() as path:
         check(assert_type(DF.to_html(path), None), type(None))
         check(assert_type(read_html(path), List[DataFrame]), list)
+
+
+def test_csv_quoting():
+    with ensure_clean() as path:
+        check(assert_type(DF.to_csv(path, quoting=csv.QUOTE_ALL), None), type(None))
+        check(assert_type(DF.to_csv(path, quoting=csv.QUOTE_NONE), None), type(None))
+        check(
+            assert_type(DF.to_csv(path, quoting=csv.QUOTE_NONNUMERIC), None), type(None)
+        )
+        check(assert_type(DF.to_csv(path, quoting=csv.QUOTE_MINIMAL), None), type(None))