From e6c8d5e56e03f97eb47e721f272130b7eb8b7a71 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 24 Oct 2020 19:36:12 -0700 Subject: [PATCH 1/2] TST/REF: collect tests by method --- pandas/tests/frame/indexing/test_setitem.py | 16 +++++++ pandas/tests/frame/test_period.py | 16 +------ pandas/tests/groupby/test_counting.py | 14 ++++++ pandas/tests/series/indexing/test_setitem.py | 35 ++++++++++++++ pandas/tests/series/methods/test_astype.py | 14 ++++++ pandas/tests/series/methods/test_isna.py | 32 +++++++++++++ pandas/tests/series/test_block_internals.py | 39 ---------------- pandas/tests/series/test_constructors.py | 23 +++++++++ pandas/tests/series/test_internals.py | 49 -------------------- pandas/tests/series/test_missing.py | 29 +++++------- pandas/tests/series/test_period.py | 12 ----- pandas/tests/series/test_timeseries.py | 14 ------ 12 files changed, 147 insertions(+), 146 deletions(-) create mode 100644 pandas/tests/series/methods/test_isna.py delete mode 100644 pandas/tests/series/test_block_internals.py delete mode 100644 pandas/tests/series/test_internals.py diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 87c6ae09aac11..c317e90181a8f 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -10,10 +10,12 @@ Interval, NaT, Period, + PeriodIndex, Series, Timestamp, date_range, notna, + period_range, ) import pandas._testing as tm from pandas.core.arrays import SparseArray @@ -213,3 +215,17 @@ def test_setitem_dt64tz(self, timezone_frame): result = df2["B"] tm.assert_series_equal(notna(result), Series([True, False, True], name="B")) tm.assert_series_equal(df2.dtypes, df.dtypes) + + def test_setitem_periodindex(self): + rng = period_range("1/1/2000", periods=5, name="index") + df = DataFrame(np.random.randn(5, 3), index=rng) + + df["Index"] = rng + rs = Index(df["Index"]) + tm.assert_index_equal(rs, rng, check_names=False) + assert rs.name == "Index" + assert rng.name == "index" + + rs = df.reset_index().set_index("index") + assert isinstance(rs.index, PeriodIndex) + tm.assert_index_equal(rs.index, rng) diff --git a/pandas/tests/frame/test_period.py b/pandas/tests/frame/test_period.py index 37a4d7ffcf04f..cf467e3eda60a 100644 --- a/pandas/tests/frame/test_period.py +++ b/pandas/tests/frame/test_period.py @@ -1,6 +1,6 @@ import numpy as np -from pandas import DataFrame, Index, PeriodIndex, period_range +from pandas import DataFrame, period_range import pandas._testing as tm @@ -17,17 +17,3 @@ def test_as_frame_columns(self): ts = df["1/1/2000"] tm.assert_series_equal(ts, df.iloc[:, 0]) - - def test_frame_setitem(self): - rng = period_range("1/1/2000", periods=5, name="index") - df = DataFrame(np.random.randn(5, 3), index=rng) - - df["Index"] = rng - rs = Index(df["Index"]) - tm.assert_index_equal(rs, rng, check_names=False) - assert rs.name == "Index" - assert rng.name == "index" - - rs = df.reset_index().set_index("index") - assert isinstance(rs.index, PeriodIndex) - tm.assert_index_equal(rs.index, rng) diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py index c03ed00e1a081..04b73b16ae2c7 100644 --- a/pandas/tests/groupby/test_counting.py +++ b/pandas/tests/groupby/test_counting.py @@ -241,6 +241,20 @@ def test_count_groupby_column_with_nan_in_groupby_column(self): ) tm.assert_frame_equal(expected, res) + def test_groupby_count_dateparseerror(self): + dr = date_range(start="1/1/2012", freq="5min", periods=10) + + # BAD Example, datetimes first + ser = Series(np.arange(10), index=[dr, np.arange(10)]) + grouped = ser.groupby(lambda x: x[1] % 2 == 0) + result = grouped.count() + + ser = Series(np.arange(10), index=[np.arange(10), dr]) + grouped = ser.groupby(lambda x: x[0] % 2 == 0) + expected = grouped.count() + + tm.assert_series_equal(result, expected) + def test_groupby_timedelta_cython_count(): df = DataFrame( diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index c069b689c1710..3ea5f3729d793 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -107,3 +107,38 @@ def test_setitem_boolean_different_order(self, string_series): expected[expected > 0] = 0 tm.assert_series_equal(copy, expected) + + +class TestSetitemViewCopySemantics: + def test_setitem_invalidates_datetime_index_freq(self): + # GH#24096 altering a datetime64tz Series inplace invalidates the + # `freq` attribute on the underlying DatetimeIndex + + dti = date_range("20130101", periods=3, tz="US/Eastern") + ts = dti[1] + ser = Series(dti) + assert ser._values is not dti + assert ser._values._data.base is not dti._data._data.base + assert dti.freq == "D" + ser.iloc[1] = NaT + assert ser._values.freq is None + + # check that the DatetimeIndex was not altered in place + assert ser._values is not dti + assert ser._values._data.base is not dti._data._data.base + assert dti[1] == ts + assert dti.freq == "D" + + def test_dt64tz_setitem_does_not_mutate_dti(self): + # GH#21907, GH#24096 + dti = date_range("2016-01-01", periods=10, tz="US/Pacific") + ts = dti[0] + ser = Series(dti) + assert ser._values is not dti + assert ser._values._data.base is not dti._data._data.base + assert ser._mgr.blocks[0].values is not dti + assert ser._mgr.blocks[0].values._data.base is not dti._data._data.base + + ser[::3] = NaT + assert ser[0] is NaT + assert dti[0] == ts diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index dc9edccb640b5..8044b590b3463 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -14,6 +14,7 @@ CategoricalDtype, Index, Interval, + NaT, Series, Timedelta, Timestamp, @@ -75,6 +76,19 @@ def test_astype_dict_like(self, dtype_class): class TestAstype: + def test_astype_float_to_period(self): + result = Series([np.nan]).astype("period[D]") + expected = Series([NaT], dtype="period[D]") + tm.assert_series_equal(result, expected) + + def test_astype_no_pandas_dtype(self): + # https://github.com/pandas-dev/pandas/pull/24866 + ser = Series([1, 2], dtype="int64") + # Don't have PandasDtype in the public API, so we use `.array.dtype`, + # which is a PandasDtype. + result = ser.astype(ser.array.dtype) + tm.assert_series_equal(result, ser) + @pytest.mark.parametrize("dtype", [np.datetime64, np.timedelta64]) def test_astype_generic_timestamp_no_frequency(self, dtype, request): # see GH#15524, GH#15987 diff --git a/pandas/tests/series/methods/test_isna.py b/pandas/tests/series/methods/test_isna.py new file mode 100644 index 0000000000000..1760b0b9726e0 --- /dev/null +++ b/pandas/tests/series/methods/test_isna.py @@ -0,0 +1,32 @@ +""" +We also test Series.notna in this file. +""" +import numpy as np + +from pandas import Period, Series +import pandas._testing as tm + + +class TestIsna: + def test_isna_period_dtype(self): + # GH#13737 + ser = Series([Period("2011-01", freq="M"), Period("NaT", freq="M")]) + + expected = Series([False, True]) + + result = ser.isna() + tm.assert_series_equal(result, expected) + + result = ser.notna() + tm.assert_series_equal(result, ~expected) + + def test_isna(self): + ser = Series([0, 5.4, 3, np.nan, -0.001]) + expected = Series([False, False, False, True, False]) + tm.assert_series_equal(ser.isna(), expected) + tm.assert_series_equal(ser.notna(), ~expected) + + ser = Series(["hi", "", np.nan]) + expected = Series([False, False, True]) + tm.assert_series_equal(ser.isna(), expected) + tm.assert_series_equal(ser.notna(), ~expected) diff --git a/pandas/tests/series/test_block_internals.py b/pandas/tests/series/test_block_internals.py deleted file mode 100644 index d0dfbe6f5b569..0000000000000 --- a/pandas/tests/series/test_block_internals.py +++ /dev/null @@ -1,39 +0,0 @@ -import pandas as pd - -# Segregated collection of methods that require the BlockManager internal data -# structure - - -class TestSeriesBlockInternals: - def test_setitem_invalidates_datetime_index_freq(self): - # GH#24096 altering a datetime64tz Series inplace invalidates the - # `freq` attribute on the underlying DatetimeIndex - - dti = pd.date_range("20130101", periods=3, tz="US/Eastern") - ts = dti[1] - ser = pd.Series(dti) - assert ser._values is not dti - assert ser._values._data.base is not dti._data._data.base - assert dti.freq == "D" - ser.iloc[1] = pd.NaT - assert ser._values.freq is None - - # check that the DatetimeIndex was not altered in place - assert ser._values is not dti - assert ser._values._data.base is not dti._data._data.base - assert dti[1] == ts - assert dti.freq == "D" - - def test_dt64tz_setitem_does_not_mutate_dti(self): - # GH#21907, GH#24096 - dti = pd.date_range("2016-01-01", periods=10, tz="US/Pacific") - ts = dti[0] - ser = pd.Series(dti) - assert ser._values is not dti - assert ser._values._data.base is not dti._data._data.base - assert ser._mgr.blocks[0].values is not dti - assert ser._mgr.blocks[0].values._data.base is not dti._data._data.base - - ser[::3] = pd.NaT - assert ser[0] is pd.NaT - assert dti[0] == ts diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 1958e44b6d1a3..25ef82c8eecbc 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -35,6 +35,7 @@ ) import pandas._testing as tm from pandas.core.arrays import IntervalArray, period_array +from pandas.core.internals.blocks import IntBlock class TestSeriesConstructors: @@ -1527,3 +1528,25 @@ def test_series_constructor_datetimelike_index_coercion(self): with tm.assert_produces_warning(FutureWarning): assert ser.index.is_all_dates assert isinstance(ser.index, DatetimeIndex) + + +class TestSeriesConstructorInternals: + def test_constructor_no_pandas_array(self): + ser = Series([1, 2, 3]) + result = Series(ser.array) + tm.assert_series_equal(ser, result) + assert isinstance(result._mgr.blocks[0], IntBlock) + + def test_from_array(self): + result = Series(pd.array(["1H", "2H"], dtype="timedelta64[ns]")) + assert result._mgr.blocks[0].is_extension is False + + result = Series(pd.array(["2015"], dtype="datetime64[ns]")) + assert result._mgr.blocks[0].is_extension is False + + def test_from_list_dtype(self): + result = Series(["1H", "2H"], dtype="timedelta64[ns]") + assert result._mgr.blocks[0].is_extension is False + + result = Series(["2015"], dtype="datetime64[ns]") + assert result._mgr.blocks[0].is_extension is False diff --git a/pandas/tests/series/test_internals.py b/pandas/tests/series/test_internals.py deleted file mode 100644 index be106e8b1fef4..0000000000000 --- a/pandas/tests/series/test_internals.py +++ /dev/null @@ -1,49 +0,0 @@ -import numpy as np - -import pandas as pd -from pandas import Series -import pandas._testing as tm -from pandas.core.internals.blocks import IntBlock - - -class TestSeriesInternals: - def test_constructor_no_pandas_array(self): - ser = Series([1, 2, 3]) - result = Series(ser.array) - tm.assert_series_equal(ser, result) - assert isinstance(result._mgr.blocks[0], IntBlock) - - def test_astype_no_pandas_dtype(self): - # https://github.com/pandas-dev/pandas/pull/24866 - ser = Series([1, 2], dtype="int64") - # Don't have PandasDtype in the public API, so we use `.array.dtype`, - # which is a PandasDtype. - result = ser.astype(ser.array.dtype) - tm.assert_series_equal(result, ser) - - def test_from_array(self): - result = Series(pd.array(["1H", "2H"], dtype="timedelta64[ns]")) - assert result._mgr.blocks[0].is_extension is False - - result = Series(pd.array(["2015"], dtype="datetime64[ns]")) - assert result._mgr.blocks[0].is_extension is False - - def test_from_list_dtype(self): - result = Series(["1H", "2H"], dtype="timedelta64[ns]") - assert result._mgr.blocks[0].is_extension is False - - result = Series(["2015"], dtype="datetime64[ns]") - assert result._mgr.blocks[0].is_extension is False - - -def test_hasnans_uncached_for_series(): - # GH#19700 - idx = pd.Index([0, 1]) - assert idx.hasnans is False - assert "hasnans" in idx._cache - ser = idx.to_series() - assert ser.hasnans is False - assert not hasattr(ser, "_cache") - ser.iloc[-1] = np.nan - assert ser.hasnans is True - assert Series.hasnans.__doc__ == pd.Index.hasnans.__doc__ diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index f601d7cb655b3..530a22904dedf 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -92,20 +92,15 @@ def test_valid(self, datetime_series): tm.assert_series_equal(result, ts[1::2]) tm.assert_series_equal(result, ts[pd.notna(ts)]) - def test_isna(self): - ser = Series([0, 5.4, 3, np.nan, -0.001]) - expected = Series([False, False, False, True, False]) - tm.assert_series_equal(ser.isna(), expected) - - ser = Series(["hi", "", np.nan]) - expected = Series([False, False, True]) - tm.assert_series_equal(ser.isna(), expected) - - def test_notna(self): - ser = Series([0, 5.4, 3, np.nan, -0.001]) - expected = Series([True, True, True, False, True]) - tm.assert_series_equal(ser.notna(), expected) - - ser = Series(["hi", "", np.nan]) - expected = Series([True, True, False]) - tm.assert_series_equal(ser.notna(), expected) + +def test_hasnans_uncached_for_series(): + # GH#19700 + idx = pd.Index([0, 1]) + assert idx.hasnans is False + assert "hasnans" in idx._cache + ser = idx.to_series() + assert ser.hasnans is False + assert not hasattr(ser, "_cache") + ser.iloc[-1] = np.nan + assert ser.hasnans is True + assert Series.hasnans.__doc__ == pd.Index.hasnans.__doc__ diff --git a/pandas/tests/series/test_period.py b/pandas/tests/series/test_period.py index c73c57c3d2d91..d079111aa12d6 100644 --- a/pandas/tests/series/test_period.py +++ b/pandas/tests/series/test_period.py @@ -3,19 +3,12 @@ import pandas as pd from pandas import DataFrame, Series, period_range -import pandas._testing as tm class TestSeriesPeriod: def setup_method(self, method): self.series = Series(period_range("2000-01-01", periods=10, freq="D")) - def test_isna(self): - # GH 13737 - s = Series([pd.Period("2011-01", freq="M"), pd.Period("NaT", freq="M")]) - tm.assert_series_equal(s.isna(), Series([False, True])) - tm.assert_series_equal(s.notna(), Series([True, False])) - # --------------------------------------------------------------------- # NaT support @@ -29,11 +22,6 @@ def test_NaT_scalar(self): series[2] = val assert pd.isna(series[2]) - def test_NaT_cast(self): - result = Series([np.nan]).astype("period[D]") - expected = Series([pd.NaT], dtype="period[D]") - tm.assert_series_equal(result, expected) - def test_intercept_astype_object(self): expected = self.series.astype("object") diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index ba270448c19ed..295f70935a786 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -46,20 +46,6 @@ def test_promote_datetime_date(self): expected = rng.get_indexer(ts_slice.index) tm.assert_numpy_array_equal(result, expected) - def test_groupby_count_dateparseerror(self): - dr = date_range(start="1/1/2012", freq="5min", periods=10) - - # BAD Example, datetimes first - s = Series(np.arange(10), index=[dr, np.arange(10)]) - grouped = s.groupby(lambda x: x[1] % 2 == 0) - result = grouped.count() - - s = Series(np.arange(10), index=[np.arange(10), dr]) - grouped = s.groupby(lambda x: x[0] % 2 == 0) - expected = grouped.count() - - tm.assert_series_equal(result, expected) - def test_series_map_box_timedelta(self): # GH 11349 s = Series(timedelta_range("1 day 1 s", periods=5, freq="h")) From 8d7b081b7fb5ff10844e4ae069ee439bdd266a77 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 25 Oct 2020 08:22:43 -0700 Subject: [PATCH 2/2] lint fixup --- pandas/tests/series/test_missing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 530a22904dedf..6fefeaa818a77 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -95,7 +95,7 @@ def test_valid(self, datetime_series): def test_hasnans_uncached_for_series(): # GH#19700 - idx = pd.Index([0, 1]) + idx = Index([0, 1]) assert idx.hasnans is False assert "hasnans" in idx._cache ser = idx.to_series() @@ -103,4 +103,4 @@ def test_hasnans_uncached_for_series(): assert not hasattr(ser, "_cache") ser.iloc[-1] = np.nan assert ser.hasnans is True - assert Series.hasnans.__doc__ == pd.Index.hasnans.__doc__ + assert Series.hasnans.__doc__ == Index.hasnans.__doc__