Skip to content

TST/CLN: Remove getSeriesData/makeObjectSeries/makeDatetimeIndex #56241

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Nov 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 1 addition & 35 deletions pandas/_testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import collections
from collections import Counter
from datetime import datetime
from decimal import Decimal
import operator
import os
Expand Down Expand Up @@ -36,12 +35,10 @@
ArrowDtype,
Categorical,
DataFrame,
DatetimeIndex,
Index,
MultiIndex,
RangeIndex,
Series,
bdate_range,
date_range,
period_range,
timedelta_range,
Expand Down Expand Up @@ -348,34 +345,12 @@ def getCols(k) -> str:
return string.ascii_uppercase[:k]


def makeDateIndex(
k: int = 10, freq: Frequency = "B", name=None, **kwargs
) -> DatetimeIndex:
dt = datetime(2000, 1, 1)
dr = bdate_range(dt, periods=k, freq=freq, name=name)
return DatetimeIndex(dr, name=name, **kwargs)


def makeObjectSeries(name=None) -> Series:
data = [f"foo_{i}" for i in range(_N)]
index = Index([f"bar_{i}" for i in range(_N)])
return Series(data, index=index, name=name, dtype=object)


def getSeriesData() -> dict[str, Series]:
index = Index([f"foo_{i}" for i in range(_N)])
return {
c: Series(np.random.default_rng(i).standard_normal(_N), index=index)
for i, c in enumerate(getCols(_K))
}


def makeTimeSeries(nper=None, freq: Frequency = "B", name=None) -> Series:
if nper is None:
nper = _N
return Series(
np.random.default_rng(2).standard_normal(nper),
index=makeDateIndex(nper, freq=freq),
index=date_range("2000-01-01", periods=nper, freq=freq),
name=name,
)

Expand All @@ -390,11 +365,6 @@ def makeTimeDataFrame(nper=None, freq: Frequency = "B") -> DataFrame:
return DataFrame(data)


def makeDataFrame() -> DataFrame:
data = getSeriesData()
return DataFrame(data)


def makeCustomIndex(
nentries,
nlevels,
Expand Down Expand Up @@ -925,16 +895,12 @@ def shares_memory(left, right) -> bool:
"get_finest_unit",
"get_obj",
"get_op_from_name",
"getSeriesData",
"getTimeSeriesData",
"iat",
"iloc",
"loc",
"makeCustomDataframe",
"makeCustomIndex",
"makeDataFrame",
"makeDateIndex",
"makeObjectSeries",
"makeTimeDataFrame",
"makeTimeSeries",
"maybe_produces_warning",
Expand Down
67 changes: 19 additions & 48 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
Series,
Timedelta,
Timestamp,
date_range,
period_range,
timedelta_range,
)
Expand Down Expand Up @@ -608,15 +609,15 @@ def _create_mi_with_dt64tz_level():
"""
# GH#8367 round trip with pickle
return MultiIndex.from_product(
[[1, 2], ["a", "b"], pd.date_range("20130101", periods=3, tz="US/Eastern")],
[[1, 2], ["a", "b"], date_range("20130101", periods=3, tz="US/Eastern")],
names=["one", "two", "three"],
)


indices_dict = {
"string": Index([f"pandas_{i}" for i in range(100)]),
"datetime": tm.makeDateIndex(100),
"datetime-tz": tm.makeDateIndex(100, tz="US/Pacific"),
"datetime": date_range("2020-01-01", periods=100),
"datetime-tz": date_range("2020-01-01", periods=100, tz="US/Pacific"),
"period": period_range("2020-01-01", periods=100, freq="D"),
"timedelta": timedelta_range(start="1 day", periods=100, freq="D"),
"range": RangeIndex(100),
Expand All @@ -631,7 +632,7 @@ def _create_mi_with_dt64tz_level():
"float32": Index(np.arange(100), dtype="float32"),
"float64": Index(np.arange(100), dtype="float64"),
"bool-object": Index([True, False] * 5, dtype=object),
"bool-dtype": Index(np.random.default_rng(2).standard_normal(10) < 0),
"bool-dtype": Index([True, False] * 5, dtype=bool),
"complex64": Index(
np.arange(100, dtype="complex64") + 1.0j * np.arange(100, dtype="complex64")
),
Expand Down Expand Up @@ -751,9 +752,9 @@ def object_series() -> Series:
"""
Fixture for Series of dtype object with Index of unique strings
"""
s = tm.makeObjectSeries()
s.name = "objects"
return s
data = [f"foo_{i}" for i in range(30)]
index = Index([f"bar_{i}" for i in range(30)], dtype=object)
return Series(data, index=index, name="objects", dtype=object)


@pytest.fixture
Expand Down Expand Up @@ -839,27 +840,12 @@ def int_frame() -> DataFrame:
Fixture for DataFrame of ints with index of unique strings

Columns are ['A', 'B', 'C', 'D']

A B C D
vpBeWjM651 1 0 1 0
5JyxmrP1En -1 0 0 0
qEDaoD49U2 -1 1 0 0
m66TkTfsFe 0 0 0 0
EHPaNzEUFm -1 0 -1 0
fpRJCevQhi 2 0 0 0
OlQvnmfi3Q 0 0 -2 0
... .. .. .. ..
uB1FPlz4uP 0 0 0 1
EcSe6yNzCU 0 0 -1 0
L50VudaiI8 -1 1 -2 0
y3bpw4nwIp 0 -1 0 0
H0RdLLwrCT 1 1 0 0
rY82K0vMwm 0 0 0 0
1OPIUjnkjk 2 0 0 0

[30 rows x 4 columns]
"""
return DataFrame(tm.getSeriesData()).astype("int64")
return DataFrame(
np.ones((30, 4), dtype=np.int64),
index=Index([f"foo_{i}" for i in range(30)], dtype=object),
columns=Index(list("ABCD"), dtype=object),
)


@pytest.fixture
Expand All @@ -868,27 +854,12 @@ def float_frame() -> DataFrame:
Fixture for DataFrame of floats with index of unique strings

Columns are ['A', 'B', 'C', 'D'].

A B C D
P7GACiRnxd -0.465578 -0.361863 0.886172 -0.053465
qZKh6afn8n -0.466693 -0.373773 0.266873 1.673901
tkp0r6Qble 0.148691 -0.059051 0.174817 1.598433
wP70WOCtv8 0.133045 -0.581994 -0.992240 0.261651
M2AeYQMnCz -1.207959 -0.185775 0.588206 0.563938
QEPzyGDYDo -0.381843 -0.758281 0.502575 -0.565053
r78Jwns6dn -0.653707 0.883127 0.682199 0.206159
... ... ... ... ...
IHEGx9NO0T -0.277360 0.113021 -1.018314 0.196316
lPMj8K27FA -1.313667 -0.604776 -1.305618 -0.863999
qa66YMWQa5 1.110525 0.475310 -0.747865 0.032121
yOa0ATsmcE -0.431457 0.067094 0.096567 -0.264962
65znX3uRNG 1.528446 0.160416 -0.109635 -0.032987
eCOBvKqf3e 0.235281 1.622222 0.781255 0.392871
xSucinXxuV -1.263557 0.252799 -0.552247 0.400426

[30 rows x 4 columns]
"""
return DataFrame(tm.getSeriesData())
"""
return DataFrame(
np.random.default_rng(2).standard_normal((30, 4)),
index=Index([f"foo_{i}" for i in range(30)], dtype=object),
columns=Index(list("ABCD"), dtype=object),
)


@pytest.fixture
Expand Down
7 changes: 4 additions & 3 deletions pandas/tests/apply/test_numba.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,10 @@ def test_numba_vs_python_indexing():
"reduction",
[lambda x: x.mean(), lambda x: x.min(), lambda x: x.max(), lambda x: x.sum()],
)
def test_numba_vs_python_reductions(float_frame, reduction, apply_axis):
result = float_frame.apply(reduction, engine="numba", axis=apply_axis)
expected = float_frame.apply(reduction, engine="python", axis=apply_axis)
def test_numba_vs_python_reductions(reduction, apply_axis):
df = DataFrame(np.ones((4, 4), dtype=np.float64))
result = df.apply(reduction, engine="numba", axis=apply_axis)
expected = df.apply(reduction, engine="python", axis=apply_axis)
tm.assert_series_equal(result, expected)


Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arithmetic/test_datetime64.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ def test_dt64_compare_datetime_scalar(self, datetimelike, op, expected):
class TestDatetimeIndexComparisons:
# TODO: moved from tests.indexes.test_base; parametrize and de-duplicate
def test_comparators(self, comparison_op):
index = tm.makeDateIndex(100)
index = date_range("2020-01-01", periods=10)
element = index[len(index) // 2]
element = Timestamp(element).to_datetime64()

Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/arithmetic/test_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,7 @@ def test_objarr_add_invalid(self, op, box_with_array):
# invalid ops
box = box_with_array

obj_ser = tm.makeObjectSeries()
obj_ser.name = "objects"
obj_ser = Series(list("abc"), dtype=object, name="objects")

obj_ser = tm.box_expected(obj_ser, box)
msg = "|".join(
Expand Down
8 changes: 6 additions & 2 deletions pandas/tests/dtypes/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,12 @@ def test_notna_notnull(notna_f):
@pytest.mark.parametrize(
"ser",
[
tm.makeObjectSeries(),
tm.makeTimeSeries(),
Series(
[str(i) for i in range(5)],
index=Index([str(i) for i in range(5)], dtype=object),
dtype=object,
),
Series(range(5), date_range("2020-01-01", periods=5)),
Series(range(5), period_range("2020-01-01", periods=5)),
],
)
Expand Down
93 changes: 24 additions & 69 deletions pandas/tests/frame/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from pandas import (
DataFrame,
Index,
NaT,
date_range,
)
Expand Down Expand Up @@ -44,27 +45,12 @@ def float_string_frame():
Fixture for DataFrame of floats and strings with index of unique strings

Columns are ['A', 'B', 'C', 'D', 'foo'].

A B C D foo
w3orJvq07g -1.594062 -1.084273 -1.252457 0.356460 bar
PeukuVdmz2 0.109855 -0.955086 -0.809485 0.409747 bar
ahp2KvwiM8 -1.533729 -0.142519 -0.154666 1.302623 bar
3WSJ7BUCGd 2.484964 0.213829 0.034778 -2.327831 bar
khdAmufk0U -0.193480 -0.743518 -0.077987 0.153646 bar
LE2DZiFlrE -0.193566 -1.343194 -0.107321 0.959978 bar
HJXSJhVn7b 0.142590 1.257603 -0.659409 -0.223844 bar
... ... ... ... ... ...
9a1Vypttgw -1.316394 1.601354 0.173596 1.213196 bar
h5d1gVFbEy 0.609475 1.106738 -0.155271 0.294630 bar
mK9LsTQG92 1.303613 0.857040 -1.019153 0.369468 bar
oOLksd9gKH 0.558219 -0.134491 -0.289869 -0.951033 bar
9jgoOjKyHg 0.058270 -0.496110 -0.413212 -0.852659 bar
jZLDHclHAO 0.096298 1.267510 0.549206 -0.005235 bar
lR0nxDp1C2 -2.119350 -0.794384 0.544118 0.145849 bar

[30 rows x 5 columns]
"""
df = DataFrame(tm.getSeriesData())
df = DataFrame(
np.random.default_rng(2).standard_normal((30, 4)),
index=Index([f"foo_{i}" for i in range(30)], dtype=object),
columns=Index(list("ABCD"), dtype=object),
)
df["foo"] = "bar"
return df

Expand All @@ -75,31 +61,18 @@ def mixed_float_frame():
Fixture for DataFrame of different float types with index of unique strings

Columns are ['A', 'B', 'C', 'D'].

A B C D
GI7bbDaEZe -0.237908 -0.246225 -0.468506 0.752993
KGp9mFepzA -1.140809 -0.644046 -1.225586 0.801588
VeVYLAb1l2 -1.154013 -1.677615 0.690430 -0.003731
kmPME4WKhO 0.979578 0.998274 -0.776367 0.897607
CPyopdXTiz 0.048119 -0.257174 0.836426 0.111266
0kJZQndAj0 0.274357 -0.281135 -0.344238 0.834541
tqdwQsaHG8 -0.979716 -0.519897 0.582031 0.144710
... ... ... ... ...
7FhZTWILQj -2.906357 1.261039 -0.780273 -0.537237
4pUDPM4eGq -2.042512 -0.464382 -0.382080 1.132612
B8dUgUzwTi -1.506637 -0.364435 1.087891 0.297653
hErlVYjVv9 1.477453 -0.495515 -0.713867 1.438427
1BKN3o7YLs 0.127535 -0.349812 -0.881836 0.489827
9S4Ekn7zga 1.445518 -2.095149 0.031982 0.373204
xN1dNn6OV6 1.425017 -0.983995 -0.363281 -0.224502

[30 rows x 4 columns]
"""
df = DataFrame(tm.getSeriesData())
df.A = df.A.astype("float32")
df.B = df.B.astype("float32")
df.C = df.C.astype("float16")
df.D = df.D.astype("float64")
df = DataFrame(
{
col: np.random.default_rng(2).random(30, dtype=dtype)
for col, dtype in zip(
list("ABCD"), ["float32", "float32", "float32", "float64"]
)
},
index=Index([f"foo_{i}" for i in range(30)], dtype=object),
)
# not supported by numpy random
df["C"] = df["C"].astype("float16")
return df


Expand All @@ -109,32 +82,14 @@ def mixed_int_frame():
Fixture for DataFrame of different int types with index of unique strings

Columns are ['A', 'B', 'C', 'D'].

A B C D
mUrCZ67juP 0 1 2 2
rw99ACYaKS 0 1 0 0
7QsEcpaaVU 0 1 1 1
xkrimI2pcE 0 1 0 0
dz01SuzoS8 0 1 255 255
ccQkqOHX75 -1 1 0 0
DN0iXaoDLd 0 1 0 0
... .. .. ... ...
Dfb141wAaQ 1 1 254 254
IPD8eQOVu5 0 1 0 0
CcaKulsCmv 0 1 0 0
rIBa8gu7E5 0 1 0 0
RP6peZmh5o 0 1 1 1
NMb9pipQWQ 0 1 0 0
PqgbJEzjib 0 1 3 3

[30 rows x 4 columns]
"""
df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()})
df.A = df.A.astype("int32")
df.B = np.ones(len(df.B), dtype="uint64")
df.C = df.C.astype("uint8")
df.D = df.C.astype("int64")
return df
return DataFrame(
{
col: np.ones(30, dtype=dtype)
for col, dtype in zip(list("ABCD"), ["int32", "uint64", "uint8", "int64"])
},
index=Index([f"foo_{i}" for i in range(30)], dtype=object),
)


@pytest.fixture
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/frame/methods/test_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,11 +532,11 @@ def test_info_compute_numba():

with option_context("compute.use_numba", True):
buf = StringIO()
df.info()
df.info(buf=buf)
result = buf.getvalue()

buf = StringIO()
df.info()
df.info(buf=buf)
expected = buf.getvalue()
assert result == expected

Expand Down
Loading