ENH: Improve remaining io (#252)

bashtage · web-flow · commit f73002f45acc · 2022-09-03T10:27:27.000-04:00
* ENH: Improve to_dict typing

* ENH: Improve to_records typing

* ENH: Improve xarray and dict

* TYP: Restore usual case for to_dict

* TYP: Restore add overloads for to_dict

* TST: Add tests for final io funcs

* TST: Add tests for final io funcs and final fixes

* Use type rather than actual for 3.8
diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi
@@ -238,6 +238,9 @@ CSVEngine = Literal["c", "python", "pyarrow", "python-fwf"]
 
 HDFCompLib = Literal["zlib", "lzo", "bzip2", "blosc"]
 ParquetEngine = Literal["auto", "pyarrow", "fastparquet"]
+FileWriteMode = Literal[
+    "a", "w", "x", "at", "wt", "xt", "ab", "wb", "xb", "w+", "w+b", "a+", "a+b"
+]
 ColspaceArgType = str | int | Sequence[int | str] | Mapping[Hashable, str | int]
 
 __all__ = ["npt", "type_t"]
diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi
@@ -38,6 +38,7 @@ from pandas.core.window.rolling import (
     Rolling,
     Window,
 )
+import xarray as xr
 
 from pandas._typing import (
     S1,
@@ -87,6 +88,7 @@ from pandas._typing import (
     XMLParsers,
     np_ndarray_bool,
     np_ndarray_str,
+    npt,
     num,
 )
 
@@ -228,15 +230,32 @@ class DataFrame(NDFrame, OpsMixin):
     @overload
     def to_dict(
         self,
-        orient: Literal["records"],
-        into: Hashable = ...,
-    ) -> list[dict[_str, Any]]: ...
+        orient: Literal["dict", "list", "series", "split", "tight", "index"],
+        into: Mapping | type[Mapping],
+    ) -> Mapping[Hashable, Any]: ...
     @overload
     def to_dict(
         self,
         orient: Literal["dict", "list", "series", "split", "tight", "index"] = ...,
-        into: Hashable = ...,
-    ) -> dict[_str, Any]: ...
+        *,
+        into: Mapping | type[Mapping],
+    ) -> Mapping[Hashable, Any]: ...
+    @overload
+    def to_dict(
+        self,
+        orient: Literal["dict", "list", "series", "split", "tight", "index"] = ...,
+        into: None = ...,
+    ) -> dict[Hashable, Any]: ...
+    @overload
+    def to_dict(
+        self,
+        orient: Literal["records"],
+        into: Mapping | type[Mapping],
+    ) -> list[Mapping[Hashable, Any]]: ...
+    @overload
+    def to_dict(
+        self, orient: Literal["records"], into: None = ...
+    ) -> list[dict[Hashable, Any]]: ...
     def to_gbq(
         self,
         destination_table: str,
@@ -258,8 +277,14 @@ class DataFrame(NDFrame, OpsMixin):
     def to_records(
         self,
         index: _bool = ...,
-        columnDTypes: _str | dict | None = ...,
-        indexDTypes: _str | dict | None = ...,
+        column_dtypes: _str
+        | npt.DTypeLike
+        | Mapping[HashableT, npt.DTypeLike]
+        | None = ...,
+        index_dtypes: _str
+        | npt.DTypeLike
+        | Mapping[HashableT, npt.DTypeLike]
+        | None = ...,
     ) -> np.recarray: ...
     def to_stata(
         self,
@@ -279,12 +304,6 @@ class DataFrame(NDFrame, OpsMixin):
     ) -> None: ...
     def to_feather(self, path: FilePath | WriteBuffer[bytes], **kwargs) -> None: ...
     @overload
-    def to_markdown(
-        self, buf: FilePathOrBuffer | None, mode: _str | None = ..., **kwargs
-    ) -> None: ...
-    @overload
-    def to_markdown(self, mode: _str | None = ..., **kwargs) -> _str: ...
-    @overload
     def to_parquet(
         self,
         path: FilePath | WriteBuffer[bytes],
@@ -2038,7 +2057,7 @@ class DataFrame(NDFrame, OpsMixin):
         max_colwidth: int | None = ...,
         encoding: _str | None = ...,
     ) -> _str: ...
-    def to_xarray(self): ...
+    def to_xarray(self) -> xr.Dataset: ...
     def truediv(
         self,
         other: num | ListLike | DataFrame,
diff --git a/pandas-stubs/core/generic.pyi b/pandas-stubs/core/generic.pyi
@@ -22,6 +22,7 @@ from pandas._typing import (
     Dtype,
     FilePath,
     FilePathOrBuffer,
+    FileWriteMode,
     FillnaOptions,
     FrameOrSeries,
     FrameOrSeriesUnion,
@@ -34,6 +35,7 @@ from pandas._typing import (
     Scalar,
     SeriesAxisType,
     SortKind,
+    StorageOptions,
     T,
 )
 
@@ -129,6 +131,24 @@ class NDFrame(PandasObject, indexing.IndexingMixin):
         ] = ...,
         encoding: _str = ...,
     ) -> None: ...
+    @overload
+    def to_markdown(
+        self,
+        buf: FilePathOrBuffer,
+        mode: FileWriteMode | None = ...,
+        index: _bool = ...,
+        storage_options: StorageOptions = ...,
+        **kwargs: Any,
+    ) -> None: ...
+    @overload
+    def to_markdown(
+        self,
+        buf: None = ...,
+        mode: FileWriteMode | None = ...,
+        index: _bool = ...,
+        storage_options: StorageOptions = ...,
+        **kwargs: Any,
+    ) -> _str: ...
     def to_sql(
         self,
         name: _str,
@@ -150,7 +170,6 @@ class NDFrame(PandasObject, indexing.IndexingMixin):
     def to_clipboard(
         self, excel: _bool = ..., sep: _str | None = ..., **kwargs
     ) -> None: ...
-    def to_xarray(self): ...
     @overload
     def to_latex(
         self,
diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi
@@ -54,6 +54,7 @@ from pandas.core.window.rolling import (
     Rolling,
     Window,
 )
+import xarray as xr
 
 from pandas._typing import (
     S1,
@@ -354,22 +355,6 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
         encoding: _str | None = ...,
     ) -> _str: ...
     @overload
-    def to_markdown(
-        self,
-        buf: FilePathOrBuffer | None,
-        mode: _str | None = ...,
-        index: _bool = ...,
-        storage_options: dict | None = ...,
-        **kwargs,
-    ) -> None: ...
-    @overload
-    def to_markdown(
-        self,
-        mode: _str | None = ...,
-        index: _bool = ...,
-        storage_options: dict | None = ...,
-    ) -> _str: ...
-    @overload
     def to_json(
         self,
         path_or_buf: FilePathOrBuffer | None,
@@ -400,10 +385,14 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
         index: _bool = ...,
         indent: int | None = ...,
     ) -> _str: ...
+    def to_xarray(self) -> xr.DataArray: ...
     def items(self) -> Iterable[tuple[Hashable, S1]]: ...
     def iteritems(self) -> Iterable[tuple[Label, S1]]: ...
     def keys(self) -> list: ...
-    def to_dict(self, into: Hashable = ...) -> dict[Any, S1]: ...
+    @overload
+    def to_dict(self) -> dict[Hashable, S1]: ...
+    @overload
+    def to_dict(self, into: type[Mapping] | Mapping) -> Mapping[Hashable, S1]: ...
     def to_frame(self, name: object | None = ...) -> DataFrame: ...
     @overload
     def groupby(
diff --git a/pyproject.toml b/pyproject.toml
@@ -51,6 +51,8 @@ openpyxl = ">=3.0.10"
 tables = ">=3.7.0"
 lxml = ">=4.7.1,<4.9.0"
 pyreadstat = ">=1.1.9"
+xarray = ">=22.6.0"
+tabulate = ">=0.8.10"
 
 [build-system]
 requires = ["poetry-core>=1.0.0"]
diff --git a/tests/test_frame.py b/tests/test_frame.py
@@ -1,16 +1,20 @@
 from __future__ import annotations
 
+from collections import defaultdict
 import datetime
 import io
 from pathlib import Path
 from typing import (
     TYPE_CHECKING,
     Any,
     Callable,
+    Dict,
     Generic,
     Hashable,
     Iterable,
     Iterator,
+    List,
+    Mapping,
     Tuple,
     TypeVar,
     Union,
@@ -24,13 +28,16 @@
 )
 import pytest
 from typing_extensions import assert_type
+import xarray as xr
 
 from pandas._typing import Scalar
 
 from tests import check
 
 from pandas.io.parsers import TextFileReader
 
+DF = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]})
+
 
 def test_types_init() -> None:
     pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]})
@@ -777,12 +784,18 @@ def test_types_to_numpy() -> None:
 
 
 def test_to_markdown() -> None:
-    pytest.importorskip("tabulate")
     df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5]})
-    df.to_markdown()
-    df.to_markdown(buf=None, mode="wt")
+    check(assert_type(df.to_markdown(), str), str)
+    check(assert_type(df.to_markdown(None), str), str)
+    check(assert_type(df.to_markdown(buf=None, mode="wt"), str), str)
     # index param was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html
-    df.to_markdown(index=False)
+    check(assert_type(df.to_markdown(index=False), str), str)
+    with ensure_clean() as path:
+        check(assert_type(df.to_markdown(path), None), type(None))
+    with ensure_clean() as path:
+        check(assert_type(df.to_markdown(Path(path)), None), type(None))
+    sio = io.StringIO()
+    check(assert_type(df.to_markdown(sio), None), type(None))
 
 
 def test_types_to_feather() -> None:
@@ -1687,6 +1700,43 @@ def func() -> MyDataFrame[int]:
     func()
 
 
+def test_to_xarray():
+    check(assert_type(DF.to_xarray(), xr.Dataset), xr.Dataset)
+
+
+def test_to_records():
+    check(assert_type(DF.to_records(False, "int8"), np.recarray), np.recarray)
+    check(
+        assert_type(DF.to_records(False, index_dtypes=np.int8), np.recarray),
+        np.recarray,
+    )
+    check(
+        assert_type(
+            DF.to_records(False, {"col1": np.int8, "col2": np.int16}), np.recarray
+        ),
+        np.recarray,
+    )
+
+
+def test_to_dict():
+    check(assert_type(DF.to_dict(), Dict[Hashable, Any]), dict)
+    check(assert_type(DF.to_dict("split"), Dict[Hashable, Any]), dict)
+
+    target: Mapping = defaultdict(list)
+    check(assert_type(DF.to_dict(into=target), Mapping[Hashable, Any]), defaultdict)
+    target = defaultdict(list)
+    check(
+        assert_type(DF.to_dict("tight", into=target), Mapping[Hashable, Any]),
+        defaultdict,
+    )
+    target = defaultdict(list)
+    check(assert_type(DF.to_dict("records"), List[Dict[Hashable, Any]]), list)
+    check(
+        assert_type(DF.to_dict("records", into=target), List[Mapping[Hashable, Any]]),
+        list,
+    )
+
+
 def test_neg() -> None:
     # GH 253
     df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]})
diff --git a/tests/test_series.py b/tests/test_series.py
@@ -7,6 +7,7 @@
     TYPE_CHECKING,
     Any,
     Dict,
+    Hashable,
     Iterable,
     Iterator,
     List,
@@ -22,6 +23,7 @@
 from pandas.core.window import ExponentialMovingWindow
 import pytest
 from typing_extensions import assert_type
+import xarray as xr
 
 from pandas._typing import Scalar
 
@@ -909,7 +911,7 @@ def test_types_to_list() -> None:
 
 def test_types_to_dict() -> None:
     s = pd.Series(["a", "b", "c"], dtype=str)
-    assert_type(s.to_dict(), Dict[Any, str])
+    assert_type(s.to_dict(), Dict[Hashable, str])
 
 
 def test_categorical_codes():
@@ -1126,6 +1128,11 @@ def test_resample() -> None:
     check(assert_type(df.resample("2T").ohlc(), pd.DataFrame), pd.DataFrame)
 
 
+def test_to_xarray():
+    s = pd.Series([1, 2])
+    check(assert_type(s.to_xarray(), xr.DataArray), xr.DataArray)
+
+
 def test_neg() -> None:
     # GH 253
     sr = pd.Series([1, 2, 3])