pandas-dev · jreback · May 19, 2022 · May 18, 2022 · May 18, 2022 · May 18, 2022
diff --git a/asv_bench/benchmarks/strings.py b/asv_bench/benchmarks/strings.py
@@ -268,7 +268,7 @@ def time_get_dummies(self, dtype):
 
 class Encode:
     def setup(self):
-        self.ser = Series(tm.makeUnicodeIndex())
+        self.ser = Series(tm.makeStringIndex())
 
     def time_encode_decode(self):
         self.ser.str.encode("utf-8").str.decode("utf-8")

diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py
@@ -62,7 +62,6 @@
     randbool,
     rands,
     rands_array,
-    randu_array,
 )
 from pandas._testing._warnings import (  # noqa:F401
     assert_produces_warning,
@@ -305,10 +304,6 @@ def makeStringIndex(k=10, name=None):
     return Index(rands_array(nchars=10, size=k), name=name)
 
 
-def makeUnicodeIndex(k=10, name=None):
-    return Index(randu_array(nchars=10, size=k), name=name)
-
-
 def makeCategoricalIndex(k=10, n=3, name=None, **kwargs):
     """make a length k index or n categories"""
     x = rands_array(nchars=4, size=n, replace=False)
@@ -521,10 +516,10 @@ def makeCustomIndex(
        label will repeated at the corresponding level, you can specify just
        the first few, the rest will use the default ndupe_l of 1.
        len(ndupe_l) <= nlevels.
-    idx_type - "i"/"f"/"s"/"u"/"dt"/"p"/"td".
+    idx_type - "i"/"f"/"s"/"dt"/"p"/"td".
        If idx_type is not None, `idx_nlevels` must be 1.
        "i"/"f" creates an integer/float index,
-       "s"/"u" creates a string/unicode index
+       "s" creates a string
        "dt" create a datetime index.
        "td" create a datetime index.
 
@@ -554,7 +549,6 @@ def makeCustomIndex(
         "i": makeIntIndex,
         "f": makeFloatIndex,
         "s": makeStringIndex,
-        "u": makeUnicodeIndex,
         "dt": makeDateIndex,
         "td": makeTimedeltaIndex,
         "p": makePeriodIndex,
@@ -569,7 +563,7 @@ def makeCustomIndex(
     elif idx_type is not None:
         raise ValueError(
             f"{repr(idx_type)} is not a legal value for `idx_type`, "
-            "use  'i'/'f'/'s'/'u'/'dt'/'p'/'td'."
+            "use  'i'/'f'/'s'/'dt'/'p'/'td'."
         )
 
     if len(ndupe_l) < nlevels:
@@ -651,10 +645,10 @@ def makeCustomDataframe(
             nrows/ncol, the last label might have lower multiplicity.
     dtype - passed to the DataFrame constructor as is, in case you wish to
             have more control in conjunction with a custom `data_gen_f`
-    r_idx_type, c_idx_type -  "i"/"f"/"s"/"u"/"dt"/"td".
+    r_idx_type, c_idx_type -  "i"/"f"/"s"/"dt"/"td".
         If idx_type is not None, `idx_nlevels` must be 1.
         "i"/"f" creates an integer/float index,
-        "s"/"u" creates a string/unicode index
+        "s" creates a string index
         "dt" create a datetime index.
         "td" create a timedelta index.
 
@@ -689,10 +683,10 @@ def makeCustomDataframe(
     assert c_idx_nlevels > 0
     assert r_idx_nlevels > 0
     assert r_idx_type is None or (
-        r_idx_type in ("i", "f", "s", "u", "dt", "p", "td") and r_idx_nlevels == 1
+        r_idx_type in ("i", "f", "s", "dt", "p", "td") and r_idx_nlevels == 1
     )
     assert c_idx_type is None or (
-        c_idx_type in ("i", "f", "s", "u", "dt", "p", "td") and c_idx_nlevels == 1
+        c_idx_type in ("i", "f", "s", "dt", "p", "td") and c_idx_nlevels == 1
     )
 
     columns = makeCustomIndex(

diff --git a/pandas/_testing/_random.py b/pandas/_testing/_random.py
@@ -26,18 +26,6 @@ def rands_array(nchars, size, dtype="O", replace=True):
     return retval.astype(dtype)
 
 
-def randu_array(nchars, size, dtype="O"):
-    """
-    Generate an array of unicode strings.
-    """
-    retval = (
-        np.random.choice(RANDU_CHARS, size=nchars * np.prod(size))
-        .view((np.unicode_, nchars))
-        .reshape(size)
-    )
-    return retval.astype(dtype)
-
-
 def rands(nchars):
     """
     Generate one random byte string.

diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -543,7 +543,6 @@ def _create_mi_with_dt64tz_level():
 
 
 indices_dict = {
-    "unicode": tm.makeUnicodeIndex(100),
     "string": tm.makeStringIndex(100),
     "datetime": tm.makeDateIndex(100),
     "datetime-tz": tm.makeDateIndex(100, tz="US/Pacific"),

diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
@@ -730,7 +730,7 @@ def should_warn(*args):
 
 class TestAlignment:
 
-    index_types = ["i", "u", "dt"]
+    index_types = ["i", "s", "dt"]
     lhs_index_types = index_types + ["s"]  # 'p'
 
     def test_align_nested_unary_op(self, engine, parser):
@@ -829,7 +829,7 @@ def test_basic_frame_series_alignment(
     @pytest.mark.parametrize("index_name", ["index", "columns"])
     @pytest.mark.parametrize(
         "r_idx_type, c_idx_type",
-        list(product(["i", "u", "s"], ["i", "u", "s"])) + [("dt", "dt")],
+        list(product(["i", "s"], ["i", "s"])) + [("dt", "dt")],
     )
     @pytest.mark.filterwarnings("ignore::RuntimeWarning")
     def test_basic_series_frame_alignment(

diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py
@@ -346,7 +346,7 @@ def test_to_csv_nrows(self, nrows):
         "nrows", [2, 10, 99, 100, 101, 102, 198, 199, 200, 201, 202, 249, 250, 251]
     )
     @pytest.mark.parametrize(
-        "r_idx_type, c_idx_type", [("i", "i"), ("s", "s"), ("u", "dt"), ("p", "p")]
+        "r_idx_type, c_idx_type", [("i", "i"), ("s", "s"), ("s", "dt"), ("p", "p")]
     )
     @pytest.mark.parametrize("ncols", [1, 2, 3, 4])
     def test_to_csv_idx_types(self, nrows, r_idx_type, c_idx_type, ncols):

diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
@@ -150,24 +150,26 @@ def test_indices_grouped_by_tuple_with_lambda(self):
 
 
 class TestGrouping:
-    def test_grouper_index_types(self):
-        # related GH5375
-        # groupby misbehaving when using a Floatlike index
-        df = DataFrame(np.arange(10).reshape(5, 2), columns=list("AB"))
-        for index in [
+    @pytest.mark.parametrize(
+        "index",
+        [
             tm.makeFloatIndex,
             tm.makeStringIndex,
-            tm.makeUnicodeIndex,
             tm.makeIntIndex,
             tm.makeDateIndex,
             tm.makePeriodIndex,
-        ]:
+        ],
+    )
+    def test_grouper_index_types(self, index):
+        # related GH5375
+        # groupby misbehaving when using a Floatlike index
+        df = DataFrame(np.arange(10).reshape(5, 2), columns=list("AB"))
 
-            df.index = index(len(df))
-            df.groupby(list("abcde"), group_keys=False).apply(lambda x: x)
+        df.index = index(len(df))
+        df.groupby(list("abcde"), group_keys=False).apply(lambda x: x)
 
-            df.index = list(reversed(df.index.tolist()))
-            df.groupby(list("abcde"), group_keys=False).apply(lambda x: x)
+        df.index = list(reversed(df.index.tolist()))
+        df.groupby(list("abcde"), group_keys=False).apply(lambda x: x)
 
     def test_grouper_multilevel_freq(self):
 

diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
@@ -318,7 +318,6 @@ def test_view_with_args(self, index):
     @pytest.mark.parametrize(
         "index",
         [
-            "unicode",
             "string",
             pytest.param("categorical", marks=pytest.mark.xfail(reason="gh-25464")),
             "bool-object",
@@ -927,7 +926,7 @@ def test_slice_keep_name(self):
 
     @pytest.mark.parametrize(
         "index",
-        ["unicode", "string", "datetime", "int", "uint", "float"],
+        ["string", "datetime", "int", "uint", "float"],
         indirect=True,
     )
     def test_join_self(self, index, join_type):

diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py
@@ -45,7 +45,6 @@ def check(self, result, original, indexer, getitem):
         "index_func",
         [
             tm.makeStringIndex,
-            tm.makeUnicodeIndex,
             tm.makeCategoricalIndex,
             tm.makeDateIndex,
             tm.makeTimedeltaIndex,
@@ -83,7 +82,6 @@ def test_scalar_non_numeric(self, index_func, frame_or_series, indexer_sl):
         "index_func",
         [
             tm.makeStringIndex,
-            tm.makeUnicodeIndex,
             tm.makeCategoricalIndex,
             tm.makeDateIndex,
             tm.makeTimedeltaIndex,
@@ -220,7 +218,6 @@ def test_scalar_float(self, frame_or_series):
         "index_func",
         [
             tm.makeStringIndex,
-            tm.makeUnicodeIndex,
             tm.makeDateIndex,
             tm.makeTimedeltaIndex,
             tm.makePeriodIndex,

diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
@@ -950,36 +950,35 @@ def test_to_string_with_column_specific_col_space(self):
         result = df.to_string(col_space=[10, 11, 12])
         assert len(result.split("\n")[1]) == (3 + 1 + 10 + 11 + 12)
 
-    def test_to_string_truncate_indices(self):
-        for index in [
+    @pytest.mark.parametrize(
+        "index",
+        [
             tm.makeStringIndex,
-            tm.makeUnicodeIndex,
             tm.makeIntIndex,
             tm.makeDateIndex,
             tm.makePeriodIndex,
-        ]:
-            for column in [tm.makeStringIndex]:
-                for h in [10, 20]:
-                    for w in [10, 20]:
-                        with option_context("display.expand_frame_repr", False):
-                            df = DataFrame(index=index(h), columns=column(w))
-                            with option_context("display.max_rows", 15):
-                                if h == 20:
-                                    assert has_vertically_truncated_repr(df)
-                                else:
-                                    assert not has_vertically_truncated_repr(df)
-                            with option_context("display.max_columns", 15):
-                                if w == 20:
-                                    assert has_horizontally_truncated_repr(df)
-                                else:
-                                    assert not (has_horizontally_truncated_repr(df))
-                            with option_context(
-                                "display.max_rows", 15, "display.max_columns", 15
-                            ):
-                                if h == 20 and w == 20:
-                                    assert has_doubly_truncated_repr(df)
-                                else:
-                                    assert not has_doubly_truncated_repr(df)
+        ],
+    )
+    @pytest.mark.parametrize("h", [10, 20])
+    @pytest.mark.parametrize("w", [10, 20])
+    def test_to_string_truncate_indices(self, index, h, w):
+        with option_context("display.expand_frame_repr", False):
+            df = DataFrame(index=index(h), columns=tm.makeStringIndex(w))
+            with option_context("display.max_rows", 15):
+                if h == 20:
+                    assert has_vertically_truncated_repr(df)
+                else:
+                    assert not has_vertically_truncated_repr(df)
+            with option_context("display.max_columns", 15):
+                if w == 20:
+                    assert has_horizontally_truncated_repr(df)
+                else:
+                    assert not (has_horizontally_truncated_repr(df))
+            with option_context("display.max_rows", 15, "display.max_columns", 15):
+                if h == 20 and w == 20:
+                    assert has_doubly_truncated_repr(df)
+                else:
+                    assert not has_doubly_truncated_repr(df)
 
     def test_to_string_truncate_multilevel(self):
         arrays = [

diff --git a/pandas/tests/io/pytables/test_put.py b/pandas/tests/io/pytables/test_put.py
@@ -227,8 +227,6 @@ def test_put_mixed_type(setup_path):
         ["fixed", tm.makeDateIndex],
         ["table", tm.makePeriodIndex],  # GH#7796
         ["fixed", tm.makePeriodIndex],
-        ["table", tm.makeUnicodeIndex],
-        ["fixed", tm.makeUnicodeIndex],
     ],
 )
 def test_store_index_types(setup_path, format, index):

diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py
@@ -993,7 +993,6 @@ def test_to_hdf_with_object_column_names(setup_path):
     types_should_run = [
         tm.makeStringIndex,
         tm.makeCategoricalIndex,
-        tm.makeUnicodeIndex,
     ]
 
     for index in types_should_fail:

diff --git a/pandas/tests/io/test_fsspec.py b/pandas/tests/io/test_fsspec.py
@@ -20,15 +20,17 @@
 import pandas._testing as tm
 from pandas.util import _test_decorators as td
 
-df1 = DataFrame(
-    {
-        "int": [1, 3],
-        "float": [2.0, np.nan],
-        "str": ["t", "s"],
-        "dt": date_range("2018-06-18", periods=2),
-    }
-)
-text = str(df1.to_csv(index=False)).encode()
+
+@pytest.fixture
+def df1():
+    return DataFrame(
+        {
+            "int": [1, 3],
+            "float": [2.0, np.nan],
+            "str": ["t", "s"],
+            "dt": date_range("2018-06-18", periods=2),
+        }
+    )
 
 
 @pytest.fixture
@@ -40,7 +42,8 @@ def cleared_fs():
     memfs.store.clear()
 
 
-def test_read_csv(cleared_fs):
+def test_read_csv(cleared_fs, df1):
+    text = str(df1.to_csv(index=False)).encode()
     with cleared_fs.open("test/test.csv", "wb") as w:
         w.write(text)
     df2 = read_csv("memory://test/test.csv", parse_dates=["dt"])
@@ -65,7 +68,7 @@ def test_reasonable_error(monkeypatch, cleared_fs):
         read_csv("couldexist://test/test.csv")
 
 
-def test_to_csv(cleared_fs):
+def test_to_csv(cleared_fs, df1):
     df1.to_csv("memory://test/test.csv", index=True)
 
     df2 = read_csv("memory://test/test.csv", parse_dates=["dt"], index_col=0)
@@ -74,7 +77,7 @@ def test_to_csv(cleared_fs):
 
 
 @pytest.mark.parametrize("ext", ["xls", "xlsx"])
-def test_to_excel(cleared_fs, ext):
+def test_to_excel(cleared_fs, ext, df1):
     if ext == "xls":
         pytest.importorskip("xlwt")
     else:
@@ -89,7 +92,7 @@ def test_to_excel(cleared_fs, ext):
 
 
 @pytest.mark.parametrize("binary_mode", [False, True])
-def test_to_csv_fsspec_object(cleared_fs, binary_mode):
+def test_to_csv_fsspec_object(cleared_fs, binary_mode, df1):
     fsspec = pytest.importorskip("fsspec")
 
     path = "memory://test/test.csv"
@@ -153,7 +156,7 @@ def test_excel_options(fsspectest, extension):
 
 
 @td.skip_if_no("fastparquet")
-def test_to_parquet_new_file(cleared_fs):
+def test_to_parquet_new_file(cleared_fs, df1):
     """Regression test for writing to a not-yet-existent GCS Parquet file."""
     df1.to_parquet(
         "memory://test/test.csv", index=True, engine="fastparquet", compression=None
@@ -230,7 +233,7 @@ def test_s3_protocols(s3_resource, tips_file, protocol, s3so):
 @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) fastparquet
 @td.skip_if_no("s3fs")
 @td.skip_if_no("fastparquet")
-def test_s3_parquet(s3_resource, s3so):
+def test_s3_parquet(s3_resource, s3so, df1):
     fn = "s3://pandas-test/test.parquet"
     df1.to_parquet(
         fn, index=False, engine="fastparquet", compression=None, storage_options=s3so

diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py
@@ -38,7 +38,6 @@ def get_objs():
         tm.makeDateIndex(10, name="a").tz_localize(tz="US/Eastern"),
         tm.makePeriodIndex(10, name="a"),
         tm.makeStringIndex(10, name="a"),
-        tm.makeUnicodeIndex(10, name="a"),
     ]
 
     arr = np.random.randn(10)

diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py
@@ -81,7 +81,7 @@ def f(df):
     "name, func",
     [
         ("Int64Index", tm.makeIntIndex),
-        ("Index", tm.makeUnicodeIndex),
+        ("Index", tm.makeStringIndex),
         ("Float64Index", tm.makeFloatIndex),
         ("MultiIndex", lambda m: tm.makeCustomIndex(m, 2)),
     ],