From 0ed276f7018abc83ffeaa9dfaa361ee0ba22e696 Mon Sep 17 00:00:00 2001 From: Dea Leon Date: Wed, 5 Apr 2023 12:56:22 +0200 Subject: [PATCH 1/9] BUG first --- pandas/core/generic.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8c6b163647f65..d112361937d10 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -40,7 +40,9 @@ from pandas._libs.tslibs import ( Period, Tick, + Timedelta, Timestamp, + offsets, to_offset, ) from pandas._typing import ( @@ -9078,7 +9080,18 @@ def first(self, offset) -> Self: return self.copy(deep=False) offset = to_offset(offset) - if not isinstance(offset, Tick) and offset.is_on_offset(self.index[0]): + + if type(offset) is offsets.DateOffset and offset.is_on_offset(self.index[0]): + end_date = self.index[0] - Timedelta(1, unit="d") + offset + + return self.loc[:end_date] + + # if not isinstance(offset, Tick) and offset.is_on_offset(self.index[0]): + if ( + not isinstance(offset, Tick) + and offset.is_on_offset(self.index[0]) + and type(offset) is not offsets.DateOffset + ): # GH#29623 if first value is end of period, remove offset with n = 1 # before adding the real offset end_date = end = self.index[0] - offset.base + offset From d76aa112333ed13cdd8a7b5231b1a00661d321d4 Mon Sep 17 00:00:00 2001 From: Dea Leon Date: Thu, 6 Apr 2023 15:34:57 +0200 Subject: [PATCH 2/9] BUG first --- pandas/core/generic.py | 4 +- .../frame/methods/test_first_and_last.py | 40 +++++++++++++++++++ 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 526780e2afcf0..657f610130d4f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9078,8 +9078,8 @@ def first(self, offset) -> Self: if len(self.index) == 0: return self.copy(deep=False) - - offset = to_offset(offset) + if type(offset) is not offsets.DateOffset: + offset = to_offset(offset) if type(offset) is offsets.DateOffset and offset.is_on_offset(self.index[0]): end_date = self.index[0] - Timedelta(1, unit="d") + offset diff --git a/pandas/tests/frame/methods/test_first_and_last.py b/pandas/tests/frame/methods/test_first_and_last.py index 64f6665ecd709..5c57c07c375f1 100644 --- a/pandas/tests/frame/methods/test_first_and_last.py +++ b/pandas/tests/frame/methods/test_first_and_last.py @@ -1,13 +1,17 @@ """ Note: includes tests for `last` """ +import numpy as np import pytest import pandas as pd from pandas import ( DataFrame, + Series, bdate_range, ) + +# from pandas._libs.tslibs import offsets import pandas._testing as tm @@ -95,3 +99,39 @@ def test_empty_not_input(self, func): result = getattr(df, func)(offset=1) tm.assert_frame_equal(df, result) assert df is not result + + @pytest.mark.parametrize("start, periods", [("2010-03-31", 1), ("2010-03-30", 2)]) + def test_first_with_first_day_last_of_m_DO(self, frame_or_series, start, periods): + x = frame_or_series([1] * 100, index=bdate_range(start, periods=100)) + result = x.first(pd.DateOffset(days=periods)) + expected = frame_or_series( + [1] * periods, index=bdate_range(start, periods=periods) + ) + tm.assert_equal(result, expected) + + def test_first_with_first_day_end_of_frq_n_greater_one_DO(self, frame_or_series): + x = frame_or_series([1] * 100, index=bdate_range("2010-03-31", periods=100)) + result = x.first(pd.DateOffset(days=2)) + expected = frame_or_series( + [1] * 2, index=bdate_range("2010-03-31", "2010-04-01") + ) + tm.assert_equal(result, expected) + + def test_first_w_DateOffset(self): + # GH#51284 + i = pd.date_range("2018-04-09", periods=4, freq="2D") + x = DataFrame({"A": [1, 2, 3, 4]}, index=i) + result = x.first(pd.DateOffset(days=3)) + expected = DataFrame( + {"A": [1, 2]}, index=pd.date_range("2018-04-09", periods=2, freq="2D") + ) + tm.assert_equal(result, expected) + + def test_first_w_DateOffset_other(self): + # GH#45908 + i = pd.date_range("2018-04-09", periods=30, freq="2D") + x = DataFrame({"A": Series(np.arange(30), index=i)}, index=i) + result = x.first(pd.DateOffset(days=15)) + i2 = pd.date_range("2018-04-09", periods=8, freq="2D") + expected = DataFrame({"A": Series(np.arange(8), index=i2)}, index=i2) + tm.assert_equal(result, expected) From fbf2d103dd4b3e4df0cdc4a5bf6c05bb59bd4c79 Mon Sep 17 00:00:00 2001 From: Dea Leon Date: Wed, 12 Apr 2023 19:59:08 +0200 Subject: [PATCH 3/9] BUG fixing func first with DO --- doc/source/whatsnew/v2.0.1.rst | 1 + pandas/core/generic.py | 20 ++++++++----------- .../frame/methods/test_first_and_last.py | 9 +++++++++ 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v2.0.1.rst b/doc/source/whatsnew/v2.0.1.rst index 9071d242e25b5..a58fbb47cb8ff 100644 --- a/doc/source/whatsnew/v2.0.1.rst +++ b/doc/source/whatsnew/v2.0.1.rst @@ -20,6 +20,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ +- Fixed bug in :func:`first` when used with a DateOffset (:issue:`45908`) - Fixed bug in :func:`merge` when merging with ``ArrowDtype`` one one and a NumPy dtype on the other side (:issue:`52406`) - Bug in :meth:`Series.describe` not returning :class:`ArrowDtype` with ``pyarrow.float64`` type with numeric data (:issue:`52427`) - Fixed segfault in :meth:`Series.to_numpy` with ``null[pyarrow]`` dtype (:issue:`52443`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9c375a46eb0a0..8f72f4370628d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -40,7 +40,6 @@ from pandas._libs.tslibs import ( Period, Tick, - Timedelta, Timestamp, offsets, to_offset, @@ -9078,20 +9077,17 @@ def first(self, offset) -> Self: if len(self.index) == 0: return self.copy(deep=False) - if type(offset) is not offsets.DateOffset: - offset = to_offset(offset) - if type(offset) is offsets.DateOffset and offset.is_on_offset(self.index[0]): - end_date = self.index[0] - Timedelta(1, unit="d") + offset + if isinstance(offset, offsets.DateOffset): + end_date = end = self.index[0] + offset + if end_date in self.index: + end = self.index.searchsorted(end_date, side="left") + return self.iloc[:end] + return self.loc[:end] - return self.loc[:end_date] + offset = to_offset(offset) - # if not isinstance(offset, Tick) and offset.is_on_offset(self.index[0]): - if ( - not isinstance(offset, Tick) - and offset.is_on_offset(self.index[0]) - and type(offset) is not offsets.DateOffset - ): + if not isinstance(offset, Tick) and offset.is_on_offset(self.index[0]): # GH#29623 if first value is end of period, remove offset with n = 1 # before adding the real offset end_date = end = self.index[0] - offset.base + offset diff --git a/pandas/tests/frame/methods/test_first_and_last.py b/pandas/tests/frame/methods/test_first_and_last.py index 5c57c07c375f1..579b9fd72083d 100644 --- a/pandas/tests/frame/methods/test_first_and_last.py +++ b/pandas/tests/frame/methods/test_first_and_last.py @@ -135,3 +135,12 @@ def test_first_w_DateOffset_other(self): i2 = pd.date_range("2018-04-09", periods=8, freq="2D") expected = DataFrame({"A": Series(np.arange(8), index=i2)}, index=i2) tm.assert_equal(result, expected) + + def test_first_with_fst_day_last_of_m_DO_kwds_months(self, frame_or_series): + periods = 10 + x = frame_or_series([1] * 10, index=bdate_range("2010-03-31", periods=periods)) + result = x.first(pd.DateOffset(months=1)) + expected = frame_or_series( + [1] * periods, index=bdate_range("2010-03-31", periods=periods) + ) + tm.assert_equal(result, expected) From 45e594f8a22086a5171cc9e0b427d4d6c09f03e2 Mon Sep 17 00:00:00 2001 From: Dea Leon Date: Mon, 17 Apr 2023 09:32:50 +0200 Subject: [PATCH 4/9] Modified as suggestions --- .../frame/methods/test_first_and_last.py | 39 +++++++++---------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/pandas/tests/frame/methods/test_first_and_last.py b/pandas/tests/frame/methods/test_first_and_last.py index 579b9fd72083d..4e285832b6b8b 100644 --- a/pandas/tests/frame/methods/test_first_and_last.py +++ b/pandas/tests/frame/methods/test_first_and_last.py @@ -7,11 +7,8 @@ import pandas as pd from pandas import ( DataFrame, - Series, bdate_range, ) - -# from pandas._libs.tslibs import offsets import pandas._testing as tm @@ -101,46 +98,48 @@ def test_empty_not_input(self, func): assert df is not result @pytest.mark.parametrize("start, periods", [("2010-03-31", 1), ("2010-03-30", 2)]) - def test_first_with_first_day_last_of_m_DO(self, frame_or_series, start, periods): - x = frame_or_series([1] * 100, index=bdate_range(start, periods=100)) + def test_last_day_of_months_with_date_offset(self, frame_or_series, start, periods): + x = frame_or_series([1] * 100, index=pd.date_range(start, periods=100)) result = x.first(pd.DateOffset(days=periods)) expected = frame_or_series( - [1] * periods, index=bdate_range(start, periods=periods) + [1] * periods, index=pd.date_range(start, periods=periods) ) tm.assert_equal(result, expected) - def test_first_with_first_day_end_of_frq_n_greater_one_DO(self, frame_or_series): - x = frame_or_series([1] * 100, index=bdate_range("2010-03-31", periods=100)) + def test_date_offset_multiple_days(self, frame_or_series): + x = frame_or_series([1] * 100, index=pd.date_range("2010-03-31", periods=100)) result = x.first(pd.DateOffset(days=2)) expected = frame_or_series( - [1] * 2, index=bdate_range("2010-03-31", "2010-04-01") + [1] * 2, index=pd.date_range("2010-03-31", "2010-04-01") ) tm.assert_equal(result, expected) - def test_first_w_DateOffset(self): + def test_first_with_date_offset(self): # GH#51284 - i = pd.date_range("2018-04-09", periods=4, freq="2D") + i = pd.to_datetime(["2018-04-09", "2018-04-10", "2018-04-11", "2018-04-12"]) x = DataFrame({"A": [1, 2, 3, 4]}, index=i) - result = x.first(pd.DateOffset(days=3)) + result = x.first(pd.DateOffset(days=2)) expected = DataFrame( - {"A": [1, 2]}, index=pd.date_range("2018-04-09", periods=2, freq="2D") + {"A": [1, 2]}, index=pd.to_datetime(["2018-04-09", "2018-04-10"]) ) tm.assert_equal(result, expected) - def test_first_w_DateOffset_other(self): + def test_date_offset_15_days(self): # GH#45908 i = pd.date_range("2018-04-09", periods=30, freq="2D") - x = DataFrame({"A": Series(np.arange(30), index=i)}, index=i) + x = DataFrame({"A": np.arange(30)}, index=i) result = x.first(pd.DateOffset(days=15)) i2 = pd.date_range("2018-04-09", periods=8, freq="2D") - expected = DataFrame({"A": Series(np.arange(8), index=i2)}, index=i2) + expected = DataFrame({"A": np.arange(8)}, index=i2) tm.assert_equal(result, expected) - def test_first_with_fst_day_last_of_m_DO_kwds_months(self, frame_or_series): - periods = 10 - x = frame_or_series([1] * 10, index=bdate_range("2010-03-31", periods=periods)) + def test_first_with_date_offset_months(self, frame_or_series): + periods = 40 + x = frame_or_series( + [1] * periods, index=pd.date_range("2010-03-31", periods=periods) + ) result = x.first(pd.DateOffset(months=1)) expected = frame_or_series( - [1] * periods, index=bdate_range("2010-03-31", periods=periods) + [1] * 30, index=pd.date_range("2010-03-31", periods=30) ) tm.assert_equal(result, expected) From 0d808945637e29a4fde2cb250bfcc40e773d7482 Mon Sep 17 00:00:00 2001 From: Dea Leon Date: Mon, 17 Apr 2023 19:13:19 +0200 Subject: [PATCH 5/9] Corrected whatsnew --- doc/source/whatsnew/v2.0.1.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.1.rst b/doc/source/whatsnew/v2.0.1.rst index dc7d7972c95e3..43c790b55b0bf 100644 --- a/doc/source/whatsnew/v2.0.1.rst +++ b/doc/source/whatsnew/v2.0.1.rst @@ -33,7 +33,7 @@ Bug fixes - Bug in :meth:`ArrowDtype.__from_arrow__` not respecting if dtype is explicitly given (:issue:`52533`) - Bug in :meth:`DataFrame.max` and related casting different :class:`Timestamp` resolutions always to nanoseconds (:issue:`52524`) - Bug in :meth:`Series.describe` not returning :class:`ArrowDtype` with ``pyarrow.float64`` type with numeric data (:issue:`52427`) -- Fixed bug in :func:`first` when used with a DateOffset (:issue:`45908`) +- Fixed bug in :func:`first` when used with a :class:`DateOffset` (:issue:`45908`) - Fixed bug in :func:`merge` when merging with ``ArrowDtype`` one one and a NumPy dtype on the other side (:issue:`52406`) - Fixed segfault in :meth:`Series.to_numpy` with ``null[pyarrow]`` dtype (:issue:`52443`) From 621c02862db9740595b90d0911b6984e502cd445 Mon Sep 17 00:00:00 2001 From: Dea Leon Date: Thu, 20 Apr 2023 09:10:55 +0200 Subject: [PATCH 6/9] fixing first --- pandas/core/generic.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 51e215c5d4f01..a55400d9b7607 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9071,16 +9071,11 @@ def first(self, offset) -> Self: if len(self.index) == 0: return self.copy(deep=False) - if isinstance(offset, offsets.DateOffset): - end_date = end = self.index[0] + offset - if end_date in self.index: - end = self.index.searchsorted(end_date, side="left") - return self.iloc[:end] - return self.loc[:end] - offset = to_offset(offset) - if not isinstance(offset, Tick) and offset.is_on_offset(self.index[0]): + if (not isinstance(offset, Tick) and offset.is_on_offset(self.index[0])) and ( + type(offset) is not offsets.DateOffset + ): # GH#29623 if first value is end of period, remove offset with n = 1 # before adding the real offset end_date = end = self.index[0] - offset.base + offset @@ -9088,7 +9083,9 @@ def first(self, offset) -> Self: end_date = end = self.index[0] + offset # Tick-like, e.g. 3 weeks - if isinstance(offset, Tick) and end_date in self.index: + if ( + isinstance(offset, Tick) or (type(offset) is offsets.DateOffset) + ) and end_date in self.index: end = self.index.searchsorted(end_date, side="left") return self.iloc[:end] From 4d3754207296e0dc4e3af4be78bb1300fcc46a98 Mon Sep 17 00:00:00 2001 From: Dea Leon Date: Thu, 20 Apr 2023 11:02:11 +0200 Subject: [PATCH 7/9] added var --- pandas/core/generic.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a55400d9b7607..abf90e469922a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9071,10 +9071,14 @@ def first(self, offset) -> Self: if len(self.index) == 0: return self.copy(deep=False) - offset = to_offset(offset) + if isinstance(offset, offsets.DateOffset): + input_is_offset = True + else: + input_is_offset = False + offset = to_offset(offset) if (not isinstance(offset, Tick) and offset.is_on_offset(self.index[0])) and ( - type(offset) is not offsets.DateOffset + not input_is_offset ): # GH#29623 if first value is end of period, remove offset with n = 1 # before adding the real offset @@ -9083,9 +9087,7 @@ def first(self, offset) -> Self: end_date = end = self.index[0] + offset # Tick-like, e.g. 3 weeks - if ( - isinstance(offset, Tick) or (type(offset) is offsets.DateOffset) - ) and end_date in self.index: + if (isinstance(offset, Tick) or (input_is_offset)) and end_date in self.index: end = self.index.searchsorted(end_date, side="left") return self.iloc[:end] From ac12fdcae991533d06b9c8d87ea06acb86ae2adb Mon Sep 17 00:00:00 2001 From: Dea Leon Date: Thu, 20 Apr 2023 11:20:57 +0200 Subject: [PATCH 8/9] removed parenthesis --- pandas/core/generic.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index abf90e469922a..9623c82a66f0c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9077,8 +9077,10 @@ def first(self, offset) -> Self: input_is_offset = False offset = to_offset(offset) - if (not isinstance(offset, Tick) and offset.is_on_offset(self.index[0])) and ( - not input_is_offset + if ( + not isinstance(offset, Tick) + and offset.is_on_offset(self.index[0]) + and not input_is_offset ): # GH#29623 if first value is end of period, remove offset with n = 1 # before adding the real offset @@ -9087,7 +9089,7 @@ def first(self, offset) -> Self: end_date = end = self.index[0] + offset # Tick-like, e.g. 3 weeks - if (isinstance(offset, Tick) or (input_is_offset)) and end_date in self.index: + if isinstance(offset, Tick) or input_is_offset and end_date in self.index: end = self.index.searchsorted(end_date, side="left") return self.iloc[:end] From 27b6377c3833c4c632c6b3614fbbdfb77c09da47 Mon Sep 17 00:00:00 2001 From: Dea Leon Date: Thu, 20 Apr 2023 11:36:00 +0200 Subject: [PATCH 9/9] Removed duplicated code --- doc/source/whatsnew/v2.0.1.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.1.rst b/doc/source/whatsnew/v2.0.1.rst index 43c790b55b0bf..a033a594327c5 100644 --- a/doc/source/whatsnew/v2.0.1.rst +++ b/doc/source/whatsnew/v2.0.1.rst @@ -33,8 +33,8 @@ Bug fixes - Bug in :meth:`ArrowDtype.__from_arrow__` not respecting if dtype is explicitly given (:issue:`52533`) - Bug in :meth:`DataFrame.max` and related casting different :class:`Timestamp` resolutions always to nanoseconds (:issue:`52524`) - Bug in :meth:`Series.describe` not returning :class:`ArrowDtype` with ``pyarrow.float64`` type with numeric data (:issue:`52427`) -- Fixed bug in :func:`first` when used with a :class:`DateOffset` (:issue:`45908`) - Fixed bug in :func:`merge` when merging with ``ArrowDtype`` one one and a NumPy dtype on the other side (:issue:`52406`) +- Fixed bug in :meth:`DataFrame.first` when used with a :class:`DateOffset` (:issue:`45908`) - Fixed segfault in :meth:`Series.to_numpy` with ``null[pyarrow]`` dtype (:issue:`52443`) .. ---------------------------------------------------------------------------