diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index d462917277f99..38f6b6d38008c 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -791,15 +791,15 @@ def _join_by_hand(a, b, how="left"): def test_join_inner_multiindex_deterministic_order(): # GH: 36910 - left = pd.DataFrame( + left = DataFrame( data={"e": 5}, index=pd.MultiIndex.from_tuples([(1, 2, 4)], names=("a", "b", "d")), ) - right = pd.DataFrame( + right = DataFrame( data={"f": 6}, index=pd.MultiIndex.from_tuples([(2, 3)], names=("b", "c")) ) result = left.join(right, how="inner") - expected = pd.DataFrame( + expected = DataFrame( {"e": [5], "f": [6]}, index=pd.MultiIndex.from_tuples([(2, 1, 4, 3)], names=("b", "a", "d", "c")), ) diff --git a/pandas/tests/series/indexing/test_boolean.py b/pandas/tests/series/indexing/test_boolean.py deleted file mode 100644 index 3f88f4193e770..0000000000000 --- a/pandas/tests/series/indexing/test_boolean.py +++ /dev/null @@ -1,146 +0,0 @@ -import numpy as np -import pytest - -from pandas import Index, Series, date_range -import pandas._testing as tm -from pandas.core.indexing import IndexingError - -from pandas.tseries.offsets import BDay - - -def test_getitem_boolean(string_series): - s = string_series - mask = s > s.median() - - # passing list is OK - result = s[list(mask)] - expected = s[mask] - tm.assert_series_equal(result, expected) - tm.assert_index_equal(result.index, s.index[mask]) - - -def test_getitem_boolean_empty(): - s = Series([], dtype=np.int64) - s.index.name = "index_name" - s = s[s.isna()] - assert s.index.name == "index_name" - assert s.dtype == np.int64 - - # GH5877 - # indexing with empty series - s = Series(["A", "B"]) - expected = Series(dtype=object, index=Index([], dtype="int64")) - result = s[Series([], dtype=object)] - tm.assert_series_equal(result, expected) - - # invalid because of the boolean indexer - # that's empty or not-aligned - msg = ( - r"Unalignable boolean Series provided as indexer \(index of " - r"the boolean Series and of the indexed object do not match" - ) - with pytest.raises(IndexingError, match=msg): - s[Series([], dtype=bool)] - - with pytest.raises(IndexingError, match=msg): - s[Series([True], dtype=bool)] - - -def test_getitem_boolean_object(string_series): - # using column from DataFrame - - s = string_series - mask = s > s.median() - omask = mask.astype(object) - - # getitem - result = s[omask] - expected = s[mask] - tm.assert_series_equal(result, expected) - - # setitem - s2 = s.copy() - cop = s.copy() - cop[omask] = 5 - s2[mask] = 5 - tm.assert_series_equal(cop, s2) - - # nans raise exception - omask[5:10] = np.nan - msg = "Cannot mask with non-boolean array containing NA / NaN values" - with pytest.raises(ValueError, match=msg): - s[omask] - with pytest.raises(ValueError, match=msg): - s[omask] = 5 - - -def test_getitem_setitem_boolean_corner(datetime_series): - ts = datetime_series - mask_shifted = ts.shift(1, freq=BDay()) > ts.median() - - # these used to raise...?? - - msg = ( - r"Unalignable boolean Series provided as indexer \(index of " - r"the boolean Series and of the indexed object do not match" - ) - with pytest.raises(IndexingError, match=msg): - ts[mask_shifted] - with pytest.raises(IndexingError, match=msg): - ts[mask_shifted] = 1 - - with pytest.raises(IndexingError, match=msg): - ts.loc[mask_shifted] - with pytest.raises(IndexingError, match=msg): - ts.loc[mask_shifted] = 1 - - -def test_setitem_boolean(string_series): - mask = string_series > string_series.median() - - # similar indexed series - result = string_series.copy() - result[mask] = string_series * 2 - expected = string_series * 2 - tm.assert_series_equal(result[mask], expected[mask]) - - # needs alignment - result = string_series.copy() - result[mask] = (string_series * 2)[0:5] - expected = (string_series * 2)[0:5].reindex_like(string_series) - expected[-mask] = string_series[mask] - tm.assert_series_equal(result[mask], expected[mask]) - - -def test_get_set_boolean_different_order(string_series): - ordered = string_series.sort_values() - - # setting - copy = string_series.copy() - copy[ordered > 0] = 0 - - expected = string_series.copy() - expected[expected > 0] = 0 - - tm.assert_series_equal(copy, expected) - - # getting - sel = string_series[ordered > 0] - exp = string_series[string_series > 0] - tm.assert_series_equal(sel, exp) - - -def test_getitem_boolean_dt64_copies(): - # GH#36210 - dti = date_range("2016-01-01", periods=4, tz="US/Pacific") - key = np.array([True, True, False, False]) - - ser = Series(dti._data) - - res = ser[key] - assert res._values._data.base is None - - # compare with numeric case for reference - ser2 = Series(range(4)) - res2 = ser2[key] - assert res2._values.base is None diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index 06c14a95ab04e..b4c861312ad3d 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -9,8 +9,11 @@ from pandas._libs.tslibs import conversion, timezones import pandas as pd -from pandas import Series, Timestamp, date_range, period_range +from pandas import Index, Series, Timestamp, date_range, period_range import pandas._testing as tm +from pandas.core.indexing import IndexingError + +from pandas.tseries.offsets import BDay class TestSeriesGetitemScalars: @@ -124,6 +127,107 @@ def test_getitem_intlist_multiindex_numeric_level(self, dtype, box): ser[key] +class TestGetitemBooleanMask: + def test_getitem_boolean(self, string_series): + ser = string_series + mask = ser > ser.median() + + # passing list is OK + result = ser[list(mask)] + expected = ser[mask] + tm.assert_series_equal(result, expected) + tm.assert_index_equal(result.index, ser.index[mask]) + + def test_getitem_boolean_empty(self): + ser = Series([], dtype=np.int64) + ser.index.name = "index_name" + ser = ser[ser.isna()] + assert ser.index.name == "index_name" + assert ser.dtype == np.int64 + + # GH#5877 + # indexing with empty series + ser = Series(["A", "B"]) + expected = Series(dtype=object, index=Index([], dtype="int64")) + result = ser[Series([], dtype=object)] + tm.assert_series_equal(result, expected) + + # invalid because of the boolean indexer + # that's empty or not-aligned + msg = ( + r"Unalignable boolean Series provided as indexer \(index of " + r"the boolean Series and of the indexed object do not match" + ) + with pytest.raises(IndexingError, match=msg): + ser[Series([], dtype=bool)] + + with pytest.raises(IndexingError, match=msg): + ser[Series([True], dtype=bool)] + + def test_getitem_boolean_object(self, string_series): + # using column from DataFrame + + ser = string_series + mask = ser > ser.median() + omask = mask.astype(object) + + # getitem + result = ser[omask] + expected = ser[mask] + tm.assert_series_equal(result, expected) + + # setitem + s2 = ser.copy() + cop = ser.copy() + cop[omask] = 5 + s2[mask] = 5 + tm.assert_series_equal(cop, s2) + + # nans raise exception + omask[5:10] = np.nan + msg = "Cannot mask with non-boolean array containing NA / NaN values" + with pytest.raises(ValueError, match=msg): + ser[omask] + with pytest.raises(ValueError, match=msg): + ser[omask] = 5 + + def test_getitem_boolean_dt64_copies(self): + # GH#36210 + dti = date_range("2016-01-01", periods=4, tz="US/Pacific") + key = np.array([True, True, False, False]) + + ser = Series(dti._data) + + res = ser[key] + assert res._values._data.base is None + + # compare with numeric case for reference + ser2 = Series(range(4)) + res2 = ser2[key] + assert res2._values.base is None + + def test_getitem_boolean_corner(self, datetime_series): + ts = datetime_series + mask_shifted = ts.shift(1, freq=BDay()) > ts.median() + + msg = ( + r"Unalignable boolean Series provided as indexer \(index of " + r"the boolean Series and of the indexed object do not match" + ) + with pytest.raises(IndexingError, match=msg): + ts[mask_shifted] + + with pytest.raises(IndexingError, match=msg): + ts.loc[mask_shifted] + + def test_getitem_boolean_different_order(self, string_series): + ordered = string_series.sort_values() + + sel = string_series[ordered > 0] + exp = string_series[string_series > 0] + tm.assert_series_equal(sel, exp) + + def test_getitem_generator(string_series): gen = (x > 0 for x in string_series) result = string_series[gen] diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 6ac1397fa7695..c069b689c1710 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -4,8 +4,11 @@ import pytest from pandas import MultiIndex, NaT, Series, Timestamp, date_range, period_range +from pandas.core.indexing import IndexingError import pandas.testing as tm +from pandas.tseries.offsets import BDay + class TestSetitemDT64Values: def test_setitem_none_nan(self): @@ -61,3 +64,46 @@ def test_setitem_na_period_dtype_casts_to_nat(self, na_val): ser[3:5] = na_val assert ser[4] is NaT + + +class TestSetitemBooleanMask: + def test_setitem_boolean(self, string_series): + mask = string_series > string_series.median() + + # similar indexed series + result = string_series.copy() + result[mask] = string_series * 2 + expected = string_series * 2 + tm.assert_series_equal(result[mask], expected[mask]) + + # needs alignment + result = string_series.copy() + result[mask] = (string_series * 2)[0:5] + expected = (string_series * 2)[0:5].reindex_like(string_series) + expected[-mask] = string_series[mask] + tm.assert_series_equal(result[mask], expected[mask]) + + def test_setitem_boolean_corner(self, datetime_series): + ts = datetime_series + mask_shifted = ts.shift(1, freq=BDay()) > ts.median() + + msg = ( + r"Unalignable boolean Series provided as indexer \(index of " + r"the boolean Series and of the indexed object do not match" + ) + with pytest.raises(IndexingError, match=msg): + ts[mask_shifted] = 1 + + with pytest.raises(IndexingError, match=msg): + ts.loc[mask_shifted] = 1 + + def test_setitem_boolean_different_order(self, string_series): + ordered = string_series.sort_values() + + copy = string_series.copy() + copy[ordered > 0] = 0 + + expected = string_series.copy() + expected[expected > 0] = 0 + + tm.assert_series_equal(copy, expected)