Skip to content

CLN/TST: Remove tm.makeUnicodeIndex #47050

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
May 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def time_get_dummies(self, dtype):

class Encode:
def setup(self):
self.ser = Series(tm.makeUnicodeIndex())
self.ser = Series(tm.makeStringIndex())

def time_encode_decode(self):
self.ser.str.encode("utf-8").str.decode("utf-8")
Expand Down
20 changes: 7 additions & 13 deletions pandas/_testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@
randbool,
rands,
rands_array,
randu_array,
)
from pandas._testing._warnings import ( # noqa:F401
assert_produces_warning,
Expand Down Expand Up @@ -305,10 +304,6 @@ def makeStringIndex(k=10, name=None):
return Index(rands_array(nchars=10, size=k), name=name)


def makeUnicodeIndex(k=10, name=None):
return Index(randu_array(nchars=10, size=k), name=name)


def makeCategoricalIndex(k=10, n=3, name=None, **kwargs):
"""make a length k index or n categories"""
x = rands_array(nchars=4, size=n, replace=False)
Expand Down Expand Up @@ -521,10 +516,10 @@ def makeCustomIndex(
label will repeated at the corresponding level, you can specify just
the first few, the rest will use the default ndupe_l of 1.
len(ndupe_l) <= nlevels.
idx_type - "i"/"f"/"s"/"u"/"dt"/"p"/"td".
idx_type - "i"/"f"/"s"/"dt"/"p"/"td".
If idx_type is not None, `idx_nlevels` must be 1.
"i"/"f" creates an integer/float index,
"s"/"u" creates a string/unicode index
"s" creates a string
"dt" create a datetime index.
"td" create a datetime index.

Expand Down Expand Up @@ -554,7 +549,6 @@ def makeCustomIndex(
"i": makeIntIndex,
"f": makeFloatIndex,
"s": makeStringIndex,
"u": makeUnicodeIndex,
"dt": makeDateIndex,
"td": makeTimedeltaIndex,
"p": makePeriodIndex,
Expand All @@ -569,7 +563,7 @@ def makeCustomIndex(
elif idx_type is not None:
raise ValueError(
f"{repr(idx_type)} is not a legal value for `idx_type`, "
"use 'i'/'f'/'s'/'u'/'dt'/'p'/'td'."
"use 'i'/'f'/'s'/'dt'/'p'/'td'."
)

if len(ndupe_l) < nlevels:
Expand Down Expand Up @@ -651,10 +645,10 @@ def makeCustomDataframe(
nrows/ncol, the last label might have lower multiplicity.
dtype - passed to the DataFrame constructor as is, in case you wish to
have more control in conjunction with a custom `data_gen_f`
r_idx_type, c_idx_type - "i"/"f"/"s"/"u"/"dt"/"td".
r_idx_type, c_idx_type - "i"/"f"/"s"/"dt"/"td".
If idx_type is not None, `idx_nlevels` must be 1.
"i"/"f" creates an integer/float index,
"s"/"u" creates a string/unicode index
"s" creates a string index
"dt" create a datetime index.
"td" create a timedelta index.

Expand Down Expand Up @@ -689,10 +683,10 @@ def makeCustomDataframe(
assert c_idx_nlevels > 0
assert r_idx_nlevels > 0
assert r_idx_type is None or (
r_idx_type in ("i", "f", "s", "u", "dt", "p", "td") and r_idx_nlevels == 1
r_idx_type in ("i", "f", "s", "dt", "p", "td") and r_idx_nlevels == 1
)
assert c_idx_type is None or (
c_idx_type in ("i", "f", "s", "u", "dt", "p", "td") and c_idx_nlevels == 1
c_idx_type in ("i", "f", "s", "dt", "p", "td") and c_idx_nlevels == 1
)

columns = makeCustomIndex(
Expand Down
12 changes: 0 additions & 12 deletions pandas/_testing/_random.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,6 @@ def rands_array(nchars, size, dtype="O", replace=True):
return retval.astype(dtype)


def randu_array(nchars, size, dtype="O"):
"""
Generate an array of unicode strings.
"""
retval = (
np.random.choice(RANDU_CHARS, size=nchars * np.prod(size))
.view((np.unicode_, nchars))
.reshape(size)
)
return retval.astype(dtype)


def rands(nchars):
"""
Generate one random byte string.
Expand Down
1 change: 0 additions & 1 deletion pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,6 @@ def _create_mi_with_dt64tz_level():


indices_dict = {
"unicode": tm.makeUnicodeIndex(100),
"string": tm.makeStringIndex(100),
"datetime": tm.makeDateIndex(100),
"datetime-tz": tm.makeDateIndex(100, tz="US/Pacific"),
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/computation/test_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -730,7 +730,7 @@ def should_warn(*args):

class TestAlignment:

index_types = ["i", "u", "dt"]
index_types = ["i", "s", "dt"]
lhs_index_types = index_types + ["s"] # 'p'

def test_align_nested_unary_op(self, engine, parser):
Expand Down Expand Up @@ -829,7 +829,7 @@ def test_basic_frame_series_alignment(
@pytest.mark.parametrize("index_name", ["index", "columns"])
@pytest.mark.parametrize(
"r_idx_type, c_idx_type",
list(product(["i", "u", "s"], ["i", "u", "s"])) + [("dt", "dt")],
list(product(["i", "s"], ["i", "s"])) + [("dt", "dt")],
)
@pytest.mark.filterwarnings("ignore::RuntimeWarning")
def test_basic_series_frame_alignment(
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/methods/test_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ def test_to_csv_nrows(self, nrows):
"nrows", [2, 10, 99, 100, 101, 102, 198, 199, 200, 201, 202, 249, 250, 251]
)
@pytest.mark.parametrize(
"r_idx_type, c_idx_type", [("i", "i"), ("s", "s"), ("u", "dt"), ("p", "p")]
"r_idx_type, c_idx_type", [("i", "i"), ("s", "s"), ("s", "dt"), ("p", "p")]
)
@pytest.mark.parametrize("ncols", [1, 2, 3, 4])
def test_to_csv_idx_types(self, nrows, r_idx_type, c_idx_type, ncols):
Expand Down
24 changes: 13 additions & 11 deletions pandas/tests/groupby/test_grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,24 +150,26 @@ def test_indices_grouped_by_tuple_with_lambda(self):


class TestGrouping:
def test_grouper_index_types(self):
# related GH5375
# groupby misbehaving when using a Floatlike index
df = DataFrame(np.arange(10).reshape(5, 2), columns=list("AB"))
for index in [
@pytest.mark.parametrize(
"index",
[
tm.makeFloatIndex,
tm.makeStringIndex,
tm.makeUnicodeIndex,
tm.makeIntIndex,
tm.makeDateIndex,
tm.makePeriodIndex,
]:
],
)
def test_grouper_index_types(self, index):
# related GH5375
# groupby misbehaving when using a Floatlike index
df = DataFrame(np.arange(10).reshape(5, 2), columns=list("AB"))

df.index = index(len(df))
df.groupby(list("abcde"), group_keys=False).apply(lambda x: x)
df.index = index(len(df))
df.groupby(list("abcde"), group_keys=False).apply(lambda x: x)

df.index = list(reversed(df.index.tolist()))
df.groupby(list("abcde"), group_keys=False).apply(lambda x: x)
df.index = list(reversed(df.index.tolist()))
df.groupby(list("abcde"), group_keys=False).apply(lambda x: x)

def test_grouper_multilevel_freq(self):

Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,6 @@ def test_view_with_args(self, index):
@pytest.mark.parametrize(
"index",
[
"unicode",
"string",
pytest.param("categorical", marks=pytest.mark.xfail(reason="gh-25464")),
"bool-object",
Expand Down Expand Up @@ -927,7 +926,7 @@ def test_slice_keep_name(self):

@pytest.mark.parametrize(
"index",
["unicode", "string", "datetime", "int", "uint", "float"],
["string", "datetime", "int", "uint", "float"],
indirect=True,
)
def test_join_self(self, index, join_type):
Expand Down
3 changes: 0 additions & 3 deletions pandas/tests/indexing/test_floats.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ def check(self, result, original, indexer, getitem):
"index_func",
[
tm.makeStringIndex,
tm.makeUnicodeIndex,
tm.makeCategoricalIndex,
tm.makeDateIndex,
tm.makeTimedeltaIndex,
Expand Down Expand Up @@ -83,7 +82,6 @@ def test_scalar_non_numeric(self, index_func, frame_or_series, indexer_sl):
"index_func",
[
tm.makeStringIndex,
tm.makeUnicodeIndex,
tm.makeCategoricalIndex,
tm.makeDateIndex,
tm.makeTimedeltaIndex,
Expand Down Expand Up @@ -220,7 +218,6 @@ def test_scalar_float(self, frame_or_series):
"index_func",
[
tm.makeStringIndex,
tm.makeUnicodeIndex,
tm.makeDateIndex,
tm.makeTimedeltaIndex,
tm.makePeriodIndex,
Expand Down
51 changes: 25 additions & 26 deletions pandas/tests/io/formats/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -950,36 +950,35 @@ def test_to_string_with_column_specific_col_space(self):
result = df.to_string(col_space=[10, 11, 12])
assert len(result.split("\n")[1]) == (3 + 1 + 10 + 11 + 12)

def test_to_string_truncate_indices(self):
for index in [
@pytest.mark.parametrize(
"index",
[
tm.makeStringIndex,
tm.makeUnicodeIndex,
tm.makeIntIndex,
tm.makeDateIndex,
tm.makePeriodIndex,
]:
for column in [tm.makeStringIndex]:
for h in [10, 20]:
for w in [10, 20]:
with option_context("display.expand_frame_repr", False):
df = DataFrame(index=index(h), columns=column(w))
with option_context("display.max_rows", 15):
if h == 20:
assert has_vertically_truncated_repr(df)
else:
assert not has_vertically_truncated_repr(df)
with option_context("display.max_columns", 15):
if w == 20:
assert has_horizontally_truncated_repr(df)
else:
assert not (has_horizontally_truncated_repr(df))
with option_context(
"display.max_rows", 15, "display.max_columns", 15
):
if h == 20 and w == 20:
assert has_doubly_truncated_repr(df)
else:
assert not has_doubly_truncated_repr(df)
],
)
@pytest.mark.parametrize("h", [10, 20])
@pytest.mark.parametrize("w", [10, 20])
def test_to_string_truncate_indices(self, index, h, w):
with option_context("display.expand_frame_repr", False):
df = DataFrame(index=index(h), columns=tm.makeStringIndex(w))
with option_context("display.max_rows", 15):
if h == 20:
assert has_vertically_truncated_repr(df)
else:
assert not has_vertically_truncated_repr(df)
with option_context("display.max_columns", 15):
if w == 20:
assert has_horizontally_truncated_repr(df)
else:
assert not (has_horizontally_truncated_repr(df))
with option_context("display.max_rows", 15, "display.max_columns", 15):
if h == 20 and w == 20:
assert has_doubly_truncated_repr(df)
else:
assert not has_doubly_truncated_repr(df)

def test_to_string_truncate_multilevel(self):
arrays = [
Expand Down
2 changes: 0 additions & 2 deletions pandas/tests/io/pytables/test_put.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,6 @@ def test_put_mixed_type(setup_path):
["fixed", tm.makeDateIndex],
["table", tm.makePeriodIndex], # GH#7796
["fixed", tm.makePeriodIndex],
["table", tm.makeUnicodeIndex],
["fixed", tm.makeUnicodeIndex],
],
)
def test_store_index_types(setup_path, format, index):
Expand Down
1 change: 0 additions & 1 deletion pandas/tests/io/pytables/test_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -993,7 +993,6 @@ def test_to_hdf_with_object_column_names(setup_path):
types_should_run = [
tm.makeStringIndex,
tm.makeCategoricalIndex,
tm.makeUnicodeIndex,
]

for index in types_should_fail:
Expand Down
33 changes: 18 additions & 15 deletions pandas/tests/io/test_fsspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,17 @@
import pandas._testing as tm
from pandas.util import _test_decorators as td

df1 = DataFrame(
{
"int": [1, 3],
"float": [2.0, np.nan],
"str": ["t", "s"],
"dt": date_range("2018-06-18", periods=2),
}
)
text = str(df1.to_csv(index=False)).encode()

@pytest.fixture
def df1():
return DataFrame(
{
"int": [1, 3],
"float": [2.0, np.nan],
"str": ["t", "s"],
"dt": date_range("2018-06-18", periods=2),
}
)


@pytest.fixture
Expand All @@ -40,7 +42,8 @@ def cleared_fs():
memfs.store.clear()


def test_read_csv(cleared_fs):
def test_read_csv(cleared_fs, df1):
text = str(df1.to_csv(index=False)).encode()
with cleared_fs.open("test/test.csv", "wb") as w:
w.write(text)
df2 = read_csv("memory://test/test.csv", parse_dates=["dt"])
Expand All @@ -65,7 +68,7 @@ def test_reasonable_error(monkeypatch, cleared_fs):
read_csv("couldexist://test/test.csv")


def test_to_csv(cleared_fs):
def test_to_csv(cleared_fs, df1):
df1.to_csv("memory://test/test.csv", index=True)

df2 = read_csv("memory://test/test.csv", parse_dates=["dt"], index_col=0)
Expand All @@ -74,7 +77,7 @@ def test_to_csv(cleared_fs):


@pytest.mark.parametrize("ext", ["xls", "xlsx"])
def test_to_excel(cleared_fs, ext):
def test_to_excel(cleared_fs, ext, df1):
if ext == "xls":
pytest.importorskip("xlwt")
else:
Expand All @@ -89,7 +92,7 @@ def test_to_excel(cleared_fs, ext):


@pytest.mark.parametrize("binary_mode", [False, True])
def test_to_csv_fsspec_object(cleared_fs, binary_mode):
def test_to_csv_fsspec_object(cleared_fs, binary_mode, df1):
fsspec = pytest.importorskip("fsspec")

path = "memory://test/test.csv"
Expand Down Expand Up @@ -153,7 +156,7 @@ def test_excel_options(fsspectest, extension):


@td.skip_if_no("fastparquet")
def test_to_parquet_new_file(cleared_fs):
def test_to_parquet_new_file(cleared_fs, df1):
"""Regression test for writing to a not-yet-existent GCS Parquet file."""
df1.to_parquet(
"memory://test/test.csv", index=True, engine="fastparquet", compression=None
Expand Down Expand Up @@ -230,7 +233,7 @@ def test_s3_protocols(s3_resource, tips_file, protocol, s3so):
@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) fastparquet
@td.skip_if_no("s3fs")
@td.skip_if_no("fastparquet")
def test_s3_parquet(s3_resource, s3so):
def test_s3_parquet(s3_resource, s3so, df1):
fn = "s3://pandas-test/test.parquet"
df1.to_parquet(
fn, index=False, engine="fastparquet", compression=None, storage_options=s3so
Expand Down
1 change: 0 additions & 1 deletion pandas/tests/reductions/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ def get_objs():
tm.makeDateIndex(10, name="a").tz_localize(tz="US/Eastern"),
tm.makePeriodIndex(10, name="a"),
tm.makeStringIndex(10, name="a"),
tm.makeUnicodeIndex(10, name="a"),
]

arr = np.random.randn(10)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/resample/test_time_grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def f(df):
"name, func",
[
("Int64Index", tm.makeIntIndex),
("Index", tm.makeUnicodeIndex),
("Index", tm.makeStringIndex),
("Float64Index", tm.makeFloatIndex),
("MultiIndex", lambda m: tm.makeCustomIndex(m, 2)),
],
Expand Down
Loading