ENH: Improve typing of some general functions

bashtage · bashtage · commit 3da08b10b11b · 2022-10-05T07:23:43.000+01:00
diff --git a/pandas-stubs/core/algorithms.pyi b/pandas-stubs/core/algorithms.pyi
@@ -4,17 +4,30 @@ from typing import (
 )
 
 import numpy as np
+import pandas as pd
 from pandas import (
     Categorical,
+    CategoricalIndex,
+    DatetimeIndex,
     Index,
+    PeriodIndex,
+    RangeIndex,
     Series,
 )
 from pandas.api.extensions import ExtensionArray
 
 from pandas._typing import AnyArrayLike
 
 @overload
-def unique(values: Index) -> Index: ...
+def unique(values: DatetimeIndex) -> DatetimeIndex: ...
+@overload
+def unique(values: PeriodIndex) -> PeriodIndex: ...
+@overload
+def unique(values: CategoricalIndex) -> CategoricalIndex: ...
+@overload
+def unique(values: RangeIndex | pd.Float64Index) -> np.ndarray: ...
+@overload
+def unique(values: Index) -> Index | np.ndarray: ...
 @overload
 def unique(values: Categorical) -> Categorical: ...
 @overload
diff --git a/pandas-stubs/core/reshape/melt.pyi b/pandas-stubs/core/reshape/melt.pyi
@@ -1,16 +1,27 @@
+from typing import Hashable
+
 import numpy as np
 from pandas.core.frame import DataFrame
 
+from pandas._typing import HashableT
+
 def melt(
     frame: DataFrame,
     id_vars: tuple | list | np.ndarray | None = ...,
     value_vars: tuple | list | np.ndarray | None = ...,
     var_name: str | None = ...,
-    value_name: str = ...,
+    value_name: Hashable = ...,
     col_level: int | str | None = ...,
     ignore_index: bool = ...,
 ) -> DataFrame: ...
-def lreshape(data: DataFrame, groups, dropna: bool = ..., label=...) -> DataFrame: ...
+def lreshape(
+    data: DataFrame, groups: dict[HashableT, list[HashableT]], dropna: bool = ...
+) -> DataFrame: ...
 def wide_to_long(
-    df: DataFrame, stubnames, i, j, sep: str = ..., suffix: str = ...
+    df: DataFrame,
+    stubnames: str | list[str],
+    i: str | list[str],
+    j: str,
+    sep: str = ...,
+    suffix: str = ...,
 ) -> DataFrame: ...
diff --git a/tests/test_pandas.py b/tests/test_pandas.py
@@ -208,7 +208,7 @@ def test_unique() -> None:
                     ]
                 )
             ),
-            pd.Index,
+            Union[pd.Index, np.ndarray],
         ),
         pd.DatetimeIndex,
     )
@@ -246,6 +246,34 @@ def test_unique() -> None:
         ),
         np.ndarray,
     )
+    check(
+        assert_type(
+            pd.unique(pd.Index(["a", "b", "c", "a"])), Union[pd.Index, np.ndarray]
+        ),
+        np.ndarray,
+    )
+    check(
+        assert_type(pd.unique(pd.RangeIndex(0, 10)), np.ndarray),
+        np.ndarray,
+    )
+    check(
+        assert_type(pd.unique(pd.Categorical(["a", "b", "c", "a"])), pd.Categorical),
+        pd.Categorical,
+    )
+    check(
+        assert_type(
+            pd.unique(pd.period_range("2001Q1", periods=10, freq="D")),
+            pd.PeriodIndex,
+        ),
+        pd.PeriodIndex,
+    )
+    check(
+        assert_type(
+            pd.unique(pd.timedelta_range(start="1 day", periods=4)),
+            Union[pd.Index, np.ndarray],
+        ),
+        np.ndarray,
+    )
 
 
 # GH 200
@@ -423,3 +451,106 @@ def test_to_numeric_array_series() -> None:
         assert_type(pd.to_numeric(pd.Series([1, 2, 3]), downcast="float"), pd.Series),
         pd.Series,
     )
+
+
+def test_wide_to_long():
+    df = pd.DataFrame(
+        {
+            "A1970": {0: "a", 1: "b", 2: "c"},
+            "A1980": {0: "d", 1: "e", 2: "f"},
+            "B1970": {0: 2.5, 1: 1.2, 2: 0.7},
+            "B1980": {0: 3.2, 1: 1.3, 2: 0.1},
+            "X": dict(zip(range(3), np.random.randn(3))),
+        }
+    )
+    df["id"] = df.index
+    df["id2"] = df.index + 1
+    check(
+        assert_type(pd.wide_to_long(df, ["A", "B"], i="id", j="year"), pd.DataFrame),
+        pd.DataFrame,
+    )
+    check(
+        assert_type(
+            pd.wide_to_long(df, ["A", "B"], i=["id", "id2"], j="year"), pd.DataFrame
+        ),
+        pd.DataFrame,
+    )
+
+
+def test_melt():
+    df = pd.DataFrame(
+        {
+            "A": {0: "a", 1: "b", 2: "c"},
+            "B": {0: 1, 1: 3, 2: 5},
+            "C": {0: 2, 1: 4, 2: 6},
+            "D": {0: 3, 1: 6, 2: 9},
+            "E": {0: 3, 1: 6, 2: 9},
+        }
+    )
+    check(
+        assert_type(
+            pd.melt(df, id_vars=["A"], value_vars=["B"], ignore_index=False),
+            pd.DataFrame,
+        ),
+        pd.DataFrame,
+    )
+    check(
+        assert_type(
+            pd.melt(df, id_vars=["A"], value_vars=["B"], value_name=("F",)),
+            pd.DataFrame,
+        ),
+        pd.DataFrame,
+    )
+    df.columns = pd.MultiIndex.from_arrays([list("ABCDE"), list("FGHIJ")])
+    check(
+        assert_type(
+            pd.melt(
+                df, id_vars=["A"], value_vars=["B"], ignore_index=False, col_level=0
+            ),
+            pd.DataFrame,
+        ),
+        pd.DataFrame,
+    )
+
+
+def test_lreshape() -> None:
+    data = pd.DataFrame(
+        {
+            "hr1": [514, 573],
+            "hr2": [545, 526],
+            "team": ["Red Sox", "Yankees"],
+            "year1": [2007, 2007],
+            "year2": [2008, 2008],
+        }
+    )
+    check(
+        assert_type(
+            pd.lreshape(
+                data, {"year": ["year1", "year2"], "hr": ["hr1", "hr2"]}, dropna=True
+            ),
+            pd.DataFrame,
+        ),
+        pd.DataFrame,
+    )
+    data2 = pd.DataFrame(
+        {
+            "hr1": [514, 573],
+            ("hr2",): [545, 526],
+            "team": ["Red Sox", "Yankees"],
+            ("year1",): [2007, 2007],
+            "year2": [2008, 2008],
+        }
+    )
+    from typing import Hashable
+
+    groups: dict[Hashable, list[Hashable]] = {
+        ("year",): [("year1",), "year2"],
+        ("hr",): ["hr1", ("hr2",)],
+    }
+    check(
+        assert_type(
+            pd.lreshape(data2, groups=groups),
+            pd.DataFrame,
+        ),
+        pd.DataFrame,
+    )