From 8a98f2cca529fe865f57908b7d9804c88037bf5f Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 6 Sep 2020 19:18:56 -0700 Subject: [PATCH 1/7] DEPR: string indexing along index for datetimes --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/core/indexing.py | 11 ++++++++++- .../tests/indexes/datetimes/test_partial_slicing.py | 4 +++- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 9a778acba4764..0eca656a26a2a 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -195,7 +195,7 @@ Deprecations ~~~~~~~~~~~~ - Deprecated parameter ``inplace`` in :meth:`MultiIndex.set_codes` and :meth:`MultiIndex.set_levels` (:issue:`35626`) - Deprecated parameter ``dtype`` in :~meth:`Index.copy` on method all index classes. Use the :meth:`Index.astype` method instead for changing dtype(:issue:`35853`) -- +- Deprecated indexing :class:`DataFrame` rows with datetime-like strings ``df[string]``, use ``df.loc[string]`` instead (:issue:`????`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index fe2fec1c52063..a933e822953c6 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1,4 +1,5 @@ from typing import TYPE_CHECKING, Hashable, List, Tuple, Union +import warnings import numpy as np @@ -2154,7 +2155,15 @@ def convert_to_index_sliceable(obj: "DataFrame", key): # slice here via partial string indexing if idx._supports_partial_string_indexing: try: - return idx._get_string_slice(key) + res = idx._get_string_slice(key) + warnings.warn( + "Indexing on datetimelike rows with `frame[string]` is " + "deprecated and will be removed in a future version. " + "Use `frame.loc[string]` instead.", + FutureWarning, + stacklevel=3, + ) + return res except (KeyError, ValueError, NotImplementedError): return None diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index 635470b930252..7d5b94381dfec 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -228,7 +228,9 @@ def test_partial_slicing_dataframe(self): tm.assert_series_equal(result, expected) # Frame should return slice as well - result = df[ts_string] + with tm.assert_produces_warning(FutureWarning): + # GH#???? deprecated this indexing + result = df[ts_string] expected = df[theslice] tm.assert_frame_equal(result, expected) From 2a40762328e5e2c2cd99c41cd144aa89a6988993 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 7 Sep 2020 14:41:00 -0700 Subject: [PATCH 2/7] GH references --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/tests/indexes/datetimes/test_partial_slicing.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 0eca656a26a2a..93658cf003592 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -195,7 +195,7 @@ Deprecations ~~~~~~~~~~~~ - Deprecated parameter ``inplace`` in :meth:`MultiIndex.set_codes` and :meth:`MultiIndex.set_levels` (:issue:`35626`) - Deprecated parameter ``dtype`` in :~meth:`Index.copy` on method all index classes. Use the :meth:`Index.astype` method instead for changing dtype(:issue:`35853`) -- Deprecated indexing :class:`DataFrame` rows with datetime-like strings ``df[string]``, use ``df.loc[string]`` instead (:issue:`????`) +- Deprecated indexing :class:`DataFrame` rows with datetime-like strings ``df[string]``, use ``df.loc[string]`` instead (:issue:`36179`) .. --------------------------------------------------------------------------- diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index 7d5b94381dfec..57dc46e1fb415 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -229,7 +229,7 @@ def test_partial_slicing_dataframe(self): # Frame should return slice as well with tm.assert_produces_warning(FutureWarning): - # GH#???? deprecated this indexing + # GH#36179 deprecated this indexing result = df[ts_string] expected = df[theslice] tm.assert_frame_equal(result, expected) From 47bfa61b6e510ac7c1e369c1b9001d5526de6812 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 11 Sep 2020 15:12:49 -0700 Subject: [PATCH 3/7] catch warnings --- pandas/tests/series/indexing/test_datetime.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py index 088f8681feb99..4cc156746bd39 100644 --- a/pandas/tests/series/indexing/test_datetime.py +++ b/pandas/tests/series/indexing/test_datetime.py @@ -11,6 +11,7 @@ from pandas import DataFrame, DatetimeIndex, NaT, Series, Timestamp, date_range import pandas._testing as tm + """ Also test support for datetime64[ns] in Series / DataFrame """ @@ -605,7 +606,9 @@ def test_indexing(): expected.name = "A" df = DataFrame(dict(A=ts)) - result = df["2001"]["A"] + with tm.assert_produces_warning(FutureWarning): + # GH#36179 string indexing on rows for DataFrame deprecated + result = df["2001"]["A"] tm.assert_series_equal(expected, result) # setting @@ -615,7 +618,9 @@ def test_indexing(): df.loc["2001", "A"] = 1 - result = df["2001"]["A"] + with tm.assert_produces_warning(FutureWarning): + # GH#36179 string indexing on rows for DataFrame deprecated + result = df["2001"]["A"] tm.assert_series_equal(expected, result) # GH3546 (not including times on the last day) From 7a9b8b0e831a65d437faf34415a4a45f253e1f20 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 11 Sep 2020 16:02:30 -0700 Subject: [PATCH 4/7] isort fixup --- pandas/tests/series/indexing/test_datetime.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py index 4cc156746bd39..b7fbed2b325b3 100644 --- a/pandas/tests/series/indexing/test_datetime.py +++ b/pandas/tests/series/indexing/test_datetime.py @@ -1,3 +1,6 @@ +""" +Also test support for datetime64[ns] in Series / DataFrame +""" from datetime import datetime, timedelta import re @@ -12,11 +15,6 @@ import pandas._testing as tm -""" -Also test support for datetime64[ns] in Series / DataFrame -""" - - def test_fancy_getitem(): dti = date_range( freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1) From 42c5c73855d6317405af25d71e0836e64aca12ae Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 11 Sep 2020 17:59:20 -0700 Subject: [PATCH 5/7] update docs --- doc/source/user_guide/timeseries.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index 71eefb9a76562..91be744b64972 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -581,7 +581,12 @@ This type of slicing will work on a ``DataFrame`` with a ``DatetimeIndex`` as we partial string selection is a form of label slicing, the endpoints **will be** included. This would include matching times on an included date: +.. warning:: + + Indexing ``DataFrame`` rows with strings is deprecated in pandas 1.2.0 and will be removed in a future version. Use ``frame.loc[dtstring]`` instead. + .. ipython:: python + :okwarning: dft = pd.DataFrame(np.random.randn(100000, 1), columns=['A'], index=pd.date_range('20130101', periods=100000, freq='T')) @@ -592,24 +597,28 @@ This starts on the very first time in the month, and includes the last date and time for the month: .. ipython:: python + :okwarning: dft['2013-1':'2013-2'] This specifies a stop time **that includes all of the times on the last day**: .. ipython:: python + :okwarning: dft['2013-1':'2013-2-28'] This specifies an **exact** stop time (and is not the same as the above): .. ipython:: python + :okwarning: dft['2013-1':'2013-2-28 00:00:00'] We are stopping on the included end-point as it is part of the index: .. ipython:: python + :okwarning: dft['2013-1-15':'2013-1-15 12:30:00'] @@ -633,6 +642,7 @@ We are stopping on the included end-point as it is part of the index: Slicing with string indexing also honors UTC offset. .. ipython:: python + :okwarning: df = pd.DataFrame([0], index=pd.DatetimeIndex(['2019-01-01'], tz='US/Pacific')) df @@ -685,6 +695,7 @@ If index resolution is second, then the minute-accurate timestamp gives a If the timestamp string is treated as a slice, it can be used to index ``DataFrame`` with ``[]`` as well. .. ipython:: python + :okwarning: dft_minute = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=series_minute.index) From 9c4a8fab511653eec3e86ba90488137c639af0d6 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 12 Sep 2020 07:59:46 -0700 Subject: [PATCH 6/7] update doc --- doc/source/user_guide/timeseries.rst | 1 + doc/source/whatsnew/v0.11.0.rst | 1 + 2 files changed, 2 insertions(+) diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index 91be744b64972..07780209b7bdf 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -2042,6 +2042,7 @@ You can pass in dates and strings to ``Series`` and ``DataFrame`` with ``PeriodI Passing a string representing a lower frequency than ``PeriodIndex`` returns partial sliced data. .. ipython:: python + :okwarning: ps['2011'] diff --git a/doc/source/whatsnew/v0.11.0.rst b/doc/source/whatsnew/v0.11.0.rst index 6c13a125a4e54..c0bc74c9ff036 100644 --- a/doc/source/whatsnew/v0.11.0.rst +++ b/doc/source/whatsnew/v0.11.0.rst @@ -367,6 +367,7 @@ Enhancements - You can now select with a string from a DataFrame with a datelike index, in a similar way to a Series (:issue:`3070`) .. ipython:: python + :okwarning: idx = pd.date_range("2001-10-1", periods=5, freq='M') ts = pd.Series(np.random.rand(len(idx)), index=idx) From 0bff387a815bd216991c17c09d1f46a30f3bfd43 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 24 Sep 2020 10:00:39 -0700 Subject: [PATCH 7/7] whitespace fixup --- doc/source/user_guide/timeseries.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index e0c3a692fdda7..868bf5a1672ff 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -691,7 +691,7 @@ If index resolution is second, then the minute-accurate timestamp gives a If the timestamp string is treated as a slice, it can be used to index ``DataFrame`` with ``[]`` as well. .. ipython:: python - :okwarning: + :okwarning: dft_minute = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=series_minute.index)