Skip to content

TST: parametrize test_partial_slicing tests #33873

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 29, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 29 additions & 75 deletions pandas/tests/indexes/datetimes/test_partial_slicing.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import numpy as np
import pytest

import pandas as pd
from pandas import (
DataFrame,
DatetimeIndex,
Expand All @@ -21,69 +20,26 @@


class TestSlicing:
def test_slice_with_negative_step(self):
ts = Series(np.arange(20), date_range("2014-01-01", periods=20, freq="MS"))
SLC = pd.IndexSlice

def assert_slices_equivalent(l_slc, i_slc):
expected = ts.iloc[i_slc]

tm.assert_series_equal(ts[l_slc], expected)
tm.assert_series_equal(ts.loc[l_slc], expected)
tm.assert_series_equal(ts.loc[l_slc], expected)

assert_slices_equivalent(SLC[Timestamp("2014-10-01") :: -1], SLC[9::-1])
assert_slices_equivalent(SLC["2014-10-01"::-1], SLC[9::-1])

assert_slices_equivalent(SLC[: Timestamp("2014-10-01") : -1], SLC[:8:-1])
assert_slices_equivalent(SLC[:"2014-10-01":-1], SLC[:8:-1])

assert_slices_equivalent(SLC["2015-02-01":"2014-10-01":-1], SLC[13:8:-1])
assert_slices_equivalent(
SLC[Timestamp("2015-02-01") : Timestamp("2014-10-01") : -1], SLC[13:8:-1]
)
assert_slices_equivalent(
SLC["2015-02-01" : Timestamp("2014-10-01") : -1], SLC[13:8:-1]
)
assert_slices_equivalent(
SLC[Timestamp("2015-02-01") : "2014-10-01" : -1], SLC[13:8:-1]
)

assert_slices_equivalent(SLC["2014-10-01":"2015-02-01":-1], SLC[0:0:-1])

def test_slice_with_zero_step_raises(self):
ts = Series(np.arange(20), date_range("2014-01-01", periods=20, freq="MS"))
with pytest.raises(ValueError, match="slice step cannot be zero"):
ts[::0]
with pytest.raises(ValueError, match="slice step cannot be zero"):
ts.loc[::0]
with pytest.raises(ValueError, match="slice step cannot be zero"):
ts.loc[::0]

def test_monotone_DTI_indexing_bug(self):
# GH 19362
# Testing accessing the first element in a monotonic descending
# partial string indexing.

df = pd.DataFrame(list(range(5)))
df = DataFrame(list(range(5)))
date_list = [
"2018-01-02",
"2017-02-10",
"2016-03-10",
"2015-03-15",
"2014-03-16",
]
date_index = pd.to_datetime(date_list)
date_index = DatetimeIndex(date_list)
df["date"] = date_index
expected = pd.DataFrame({0: list(range(5)), "date": date_index})
expected = DataFrame({0: list(range(5)), "date": date_index})
tm.assert_frame_equal(df, expected)

df = pd.DataFrame(
{"A": [1, 2, 3]}, index=pd.date_range("20170101", periods=3)[::-1]
)
expected = pd.DataFrame(
{"A": 1}, index=pd.date_range("20170103", periods=1)[::-1]
)
df = DataFrame({"A": [1, 2, 3]}, index=date_range("20170101", periods=3)[::-1])
expected = DataFrame({"A": 1}, index=date_range("20170103", periods=1)[::-1])
tm.assert_frame_equal(df.loc["2017-01-03"], expected)

def test_slice_year(self):
Expand Down Expand Up @@ -120,7 +76,7 @@ def test_slice_end_of_period_resolution(self, partial_dtime):
# GH#31064
dti = date_range("2019-12-31 23:59:55.999999999", periods=10, freq="s")

ser = pd.Series(range(10), index=dti)
ser = Series(range(10), index=dti)
result = ser[partial_dtime]
expected = ser.iloc[:5]
tm.assert_series_equal(result, expected)
Expand Down Expand Up @@ -321,7 +277,7 @@ def test_partial_slicing_with_multiindex(self):
tm.assert_frame_equal(result, expected)

expected = df_multi.loc[
(pd.Timestamp("2013-06-19 09:30:00", tz=None), "ACCT1", "ABC")
(Timestamp("2013-06-19 09:30:00", tz=None), "ACCT1", "ABC")
]
result = df_multi.loc[("2013-06-19 09:30:00", "ACCT1", "ABC")]
tm.assert_series_equal(result, expected)
Expand All @@ -334,31 +290,31 @@ def test_partial_slicing_with_multiindex(self):

# GH 4294
# partial slice on a series mi
s = pd.DataFrame(
np.random.rand(1000, 1000), index=pd.date_range("2000-1-1", periods=1000)
s = DataFrame(
np.random.rand(1000, 1000), index=date_range("2000-1-1", periods=1000)
).stack()

s2 = s[:-1].copy()
expected = s2["2000-1-4"]
result = s2[pd.Timestamp("2000-1-4")]
result = s2[Timestamp("2000-1-4")]
tm.assert_series_equal(result, expected)

result = s[pd.Timestamp("2000-1-4")]
result = s[Timestamp("2000-1-4")]
expected = s["2000-1-4"]
tm.assert_series_equal(result, expected)

df2 = pd.DataFrame(s)
df2 = DataFrame(s)
expected = df2.xs("2000-1-4")
result = df2.loc[pd.Timestamp("2000-1-4")]
result = df2.loc[Timestamp("2000-1-4")]
tm.assert_frame_equal(result, expected)

def test_partial_slice_doesnt_require_monotonicity(self):
# For historical reasons.
s = pd.Series(np.arange(10), pd.date_range("2014-01-01", periods=10))
s = Series(np.arange(10), date_range("2014-01-01", periods=10))

nonmonotonic = s[[3, 5, 4]]
expected = nonmonotonic.iloc[:0]
timestamp = pd.Timestamp("2014-01-10")
timestamp = Timestamp("2014-01-10")

tm.assert_series_equal(nonmonotonic["2014-01-10":], expected)
with pytest.raises(KeyError, match=r"Timestamp\('2014-01-10 00:00:00'\)"):
Expand All @@ -370,9 +326,9 @@ def test_partial_slice_doesnt_require_monotonicity(self):

def test_loc_datetime_length_one(self):
# GH16071
df = pd.DataFrame(
df = DataFrame(
columns=["1"],
index=pd.date_range("2016-10-01T00:00:00", "2016-10-01T23:59:59"),
index=date_range("2016-10-01T00:00:00", "2016-10-01T23:59:59"),
)
result = df.loc[datetime(2016, 10, 1) :]
tm.assert_frame_equal(result, df)
Expand Down Expand Up @@ -403,10 +359,10 @@ def test_selection_by_datetimelike(self, datetimelike, op, expected):
df = DataFrame(
{
"A": [
pd.Timestamp("20120101"),
pd.Timestamp("20130101"),
Timestamp("20120101"),
Timestamp("20130101"),
np.nan,
pd.Timestamp("20130103"),
Timestamp("20130103"),
]
}
)
Expand All @@ -418,26 +374,26 @@ def test_selection_by_datetimelike(self, datetimelike, op, expected):
"start",
[
"2018-12-02 21:50:00+00:00",
pd.Timestamp("2018-12-02 21:50:00+00:00"),
pd.Timestamp("2018-12-02 21:50:00+00:00").to_pydatetime(),
Timestamp("2018-12-02 21:50:00+00:00"),
Timestamp("2018-12-02 21:50:00+00:00").to_pydatetime(),
],
)
@pytest.mark.parametrize(
"end",
[
"2018-12-02 21:52:00+00:00",
pd.Timestamp("2018-12-02 21:52:00+00:00"),
pd.Timestamp("2018-12-02 21:52:00+00:00").to_pydatetime(),
Timestamp("2018-12-02 21:52:00+00:00"),
Timestamp("2018-12-02 21:52:00+00:00").to_pydatetime(),
],
)
def test_getitem_with_datestring_with_UTC_offset(self, start, end):
# GH 24076
idx = pd.date_range(
idx = date_range(
start="2018-12-02 14:50:00-07:00",
end="2018-12-02 14:50:00-07:00",
freq="1min",
)
df = pd.DataFrame(1, index=idx, columns=["A"])
df = DataFrame(1, index=idx, columns=["A"])
result = df[start:end]
expected = df.iloc[0:3, :]
tm.assert_frame_equal(result, expected)
Expand All @@ -454,11 +410,9 @@ def test_getitem_with_datestring_with_UTC_offset(self, start, end):

def test_slice_reduce_to_series(self):
# GH 27516
df = pd.DataFrame(
{"A": range(24)}, index=pd.date_range("2000", periods=24, freq="M")
)
expected = pd.Series(
range(12), index=pd.date_range("2000", periods=12, freq="M"), name="A"
df = DataFrame({"A": range(24)}, index=date_range("2000", periods=24, freq="M"))
expected = Series(
range(12), index=date_range("2000", periods=12, freq="M"), name="A"
)
result = df.loc["2000", "A"]
tm.assert_series_equal(result, expected)
61 changes: 33 additions & 28 deletions pandas/tests/indexes/multi/test_partial_indexing.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,11 @@
import numpy as np
import pytest

import pandas as pd
from pandas import DataFrame, MultiIndex, date_range
from pandas import DataFrame, IndexSlice, MultiIndex, date_range
import pandas._testing as tm


def test_partial_string_timestamp_multiindex():
# GH10331
dr = pd.date_range("2016-01-01", "2016-01-03", freq="12H")
abc = ["a", "b", "c"]
ix = pd.MultiIndex.from_product([dr, abc])
df = pd.DataFrame({"c1": range(0, 15)}, index=ix)
idx = pd.IndexSlice

@pytest.fixture
def df():
# c1
# 2016-01-01 00:00:00 a 0
# b 1
Expand All @@ -30,33 +22,39 @@ def test_partial_string_timestamp_multiindex():
# 2016-01-03 00:00:00 a 12
# b 13
# c 14
dr = date_range("2016-01-01", "2016-01-03", freq="12H")
abc = ["a", "b", "c"]
mi = MultiIndex.from_product([dr, abc])
frame = DataFrame({"c1": range(0, 15)}, index=mi)
return frame


def test_partial_string_matching_single_index(df):
# partial string matching on a single index
for df_swap in (df.swaplevel(), df.swaplevel(0), df.swaplevel(0, 1)):
for df_swap in [df.swaplevel(), df.swaplevel(0), df.swaplevel(0, 1)]:
df_swap = df_swap.sort_index()
just_a = df_swap.loc["a"]
result = just_a.loc["2016-01-01"]
expected = df.loc[idx[:, "a"], :].iloc[0:2]
expected = df.loc[IndexSlice[:, "a"], :].iloc[0:2]
expected.index = expected.index.droplevel(1)
tm.assert_frame_equal(result, expected)


def test_partial_string_timestamp_multiindex(df):
# GH10331
df_swap = df.swaplevel(0, 1).sort_index()
SLC = IndexSlice

# indexing with IndexSlice
result = df.loc[idx["2016-01-01":"2016-02-01", :], :]
result = df.loc[SLC["2016-01-01":"2016-02-01", :], :]
expected = df
tm.assert_frame_equal(result, expected)

# match on secondary index
result = df_swap.loc[idx[:, "2016-01-01":"2016-01-01"], :]
result = df_swap.loc[SLC[:, "2016-01-01":"2016-01-01"], :]
expected = df_swap.iloc[[0, 1, 5, 6, 10, 11]]
tm.assert_frame_equal(result, expected)

# Even though this syntax works on a single index, this is somewhat
# ambiguous and we don't want to extend this behavior forward to work
# in multi-indexes. This would amount to selecting a scalar from a
# column.
with pytest.raises(KeyError, match="'2016-01-01'"):
df["2016-01-01"]

# partial string match on year only
result = df.loc["2016"]
expected = df
Expand All @@ -73,7 +71,7 @@ def test_partial_string_timestamp_multiindex():
tm.assert_frame_equal(result, expected)

# partial string match on secondary index
result = df_swap.loc[idx[:, "2016-01-02"], :]
result = df_swap.loc[SLC[:, "2016-01-02"], :]
expected = df_swap.iloc[[2, 3, 7, 8, 12, 13]]
tm.assert_frame_equal(result, expected)

Expand All @@ -86,11 +84,18 @@ def test_partial_string_timestamp_multiindex():
with pytest.raises(KeyError, match="'2016-01-01'"):
df_swap.loc["2016-01-01"]

# GH12685 (partial string with daily resolution or below)
dr = date_range("2013-01-01", periods=100, freq="D")
ix = MultiIndex.from_product([dr, ["a", "b"]])
df = DataFrame(np.random.randn(200, 1), columns=["A"], index=ix)

result = df.loc[idx["2013-03":"2013-03", :], :]
def test_partial_string_timestamp_multiindex_str_key_raises(df):
# Even though this syntax works on a single index, this is somewhat
# ambiguous and we don't want to extend this behavior forward to work
# in multi-indexes. This would amount to selecting a scalar from a
# column.
with pytest.raises(KeyError, match="'2016-01-01'"):
df["2016-01-01"]


def test_partial_string_timestamp_multiindex_daily_resolution(df):
# GH12685 (partial string with daily resolution or below)
result = df.loc[IndexSlice["2013-03":"2013-03", :], :]
expected = df.iloc[118:180]
tm.assert_frame_equal(result, expected)
Loading