From bae9342ecabc88799466985cf40128ed188bfaa2 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 1 May 2023 12:35:57 -0700 Subject: [PATCH 01/27] POC: infer time objects to ArrowDtype[time] --- pandas/_libs/lib.pyi | 1 + pandas/_libs/lib.pyx | 42 +++++++++++++++- pandas/core/config_init.py | 11 ++++ pandas/core/construction.py | 29 +++++++++++ pandas/core/indexes/accessors.py | 4 +- pandas/tests/arithmetic/test_datetime64.py | 29 +++++++---- pandas/tests/dtypes/test_inference.py | 50 +++++++++++++++++++ pandas/tests/extension/test_arrow.py | 34 +++++++++++++ pandas/tests/groupby/test_apply.py | 12 ++++- pandas/tests/io/excel/test_readers.py | 10 ++-- pandas/tests/io/test_sql.py | 21 ++++++-- pandas/tests/plotting/frame/test_frame.py | 18 ++++++- pandas/tests/plotting/test_datetimelike.py | 7 ++- .../series/accessors/test_dt_accessor.py | 5 +- pandas/tests/strings/test_api.py | 11 +++- pandas/tests/tools/test_to_time.py | 10 +++- 16 files changed, 266 insertions(+), 28 deletions(-) diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index e9d4e45c07925..3f15e50a95b7a 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -71,6 +71,7 @@ def map_infer( convert: bool = ..., ignore_na: bool = ..., ) -> np.ndarray: ... + @overload def maybe_convert_objects( objects: npt.NDArray[np.object_], diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index a622de742a840..0e5974a8186cb 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -6,6 +6,7 @@ from typing import ( Literal, _GenericAlias, ) +import warnings cimport cython from cpython.datetime cimport ( @@ -99,6 +100,8 @@ cdef extern from "pandas/parser/pd_parser.h": PandasParser_IMPORT +from pandas._config import get_option + from pandas._libs cimport util from pandas._libs.util cimport ( INT64_MAX, @@ -1267,6 +1270,7 @@ cdef class Seen: bint datetimetz_ # seen_datetimetz bint period_ # seen_period bint interval_ # seen_interval + bint time_ def __cinit__(self, bint coerce_numeric=False): """ @@ -1293,6 +1297,7 @@ cdef class Seen: self.datetimetz_ = False self.period_ = False self.interval_ = False + self.time_ = False self.coerce_numeric = coerce_numeric cdef bint check_uint64_conflict(self) except -1: @@ -2583,6 +2588,12 @@ def maybe_convert_objects(ndarray[object] objects, else: seen.object_ = True break + elif PyTime_Check(val): + if convert_time: + seen.time_ = True + else: + seen.object_ = True + break else: seen.object_ = True break @@ -2647,7 +2658,36 @@ def maybe_convert_objects(ndarray[object] objects, seen.object_ = True - elif seen.nat_: + elif seen.time_: + if is_time_array(objects): + opt = get_option("future.infer_time") + if opt is True: + import pyarrow as pa + + from pandas.core.arrays.arrow import ArrowDtype + + obj = pa.array(objects) + dtype = ArrowDtype(obj.type) + return dtype.construct_array_type()(obj) + elif opt is False: + # explicitly set to keep the old behavior and avoid the warning + pass + else: + from pandas.util._exceptions import find_stack_level + warnings.warn( + "Pandas type inference with a sequence of `datetime.time` " + "objects is deprecated. In a future version, this will give " + "time32[pyarrow] dtype, which will require pyarrow to be " + "installed. To opt in to the new behavior immediately set " + "`pd.set_option('future.infer_time', True)`. To keep the " + "old behavior pass `dtype=object`.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + seen.object_ = True + + if seen.nat_: if not seen.object_ and not seen.numeric_ and not seen.bool_: # all NaT, None, or nan (at least one NaT) # see GH#49340 for discussion of desired behavior diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 3f662073f0357..5f7d448a1092a 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -889,3 +889,14 @@ def register_converter_cb(key) -> None: styler_environment, validator=is_instance_factory([type(None), str]), ) + + +with cf.config_prefix("future"): + cf.register_option( + "future.infer_time", + None, + "Whether to infer sequence of datetime.time objects as pyarrow time " + "dtype, which will be the default in pandas 3.0 " + "(at which point this option will be deprecated).", + validator=is_one_of_factory([True, False, None]), + ) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 9b4d67a20a7cd..330555468e8cc 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -14,10 +14,13 @@ cast, overload, ) +import warnings import numpy as np from numpy import ma +from pandas._config import get_option + from pandas._libs import lib from pandas._libs.tslibs import ( Period, @@ -31,6 +34,7 @@ DtypeObj, T, ) +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.base import ExtensionDtype from pandas.core.dtypes.cast import ( @@ -293,6 +297,7 @@ def array( PeriodArray, TimedeltaArray, ) + from pandas.core.arrays.arrow import ArrowDtype from pandas.core.arrays.string_ import StringDtype if lib.is_scalar(data): @@ -360,6 +365,30 @@ def array( elif inferred_dtype == "boolean": return BooleanArray._from_sequence(data, copy=copy) + elif inferred_dtype == "time": + opt = get_option("future.infer_time") + + if opt is True: + import pyarrow as pa + + obj = pa.array(data) + dtype = ArrowDtype(obj.type) + return dtype.construct_array_type()(obj) + elif opt is False: + # explicitly set to keep the old behavior and avoid the warning + pass + else: + warnings.warn( + "Pandas type inference with a sequence of `datetime.time` " + "objects is deprecated. In a future version, this will give " + "time32[pyarrow] dtype, which will require pyarrow to be " + "installed. To opt in to the new behavior immediately set " + "`pd.set_option('future.infer_time', True)`. To keep the " + "old behavior pass `dtype=object`.", + FutureWarning, + stacklevel=find_stack_level(), + ) + # Pandas overrides NumPy for # 1. datetime64[ns,us,ms,s] # 2. timedelta64[ns,us,ms,s] diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index c6da7d847c363..dfae72064354f 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -108,7 +108,9 @@ def _delegate_property_get(self, name: str): # type: ignore[override] else: index = self._parent.index # return the result as a Series - result = Series(result, index=index, name=self.name).__finalize__(self._parent) + result = Series( + result, index=index, name=self.name, dtype=result.dtype + ).__finalize__(self._parent) # setting this object will show a SettingWithCopyWarning/Error result._is_copy = ( diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 6a0584485be42..1f323d059d86d 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -1166,7 +1166,10 @@ def test_dt64arr_add_sub_parr( ) assert_invalid_addsub_type(dtarr, parr, msg) - def test_dt64arr_addsub_time_objects_raises(self, box_with_array, tz_naive_fixture): + @pytest.mark.parametrize("future", [True, False, None]) + def test_dt64arr_addsub_time_objects_raises( + self, box_with_array, tz_naive_fixture, future + ): # https://github.com/pandas-dev/pandas/issues/10329 tz = tz_naive_fixture @@ -1175,15 +1178,23 @@ def test_dt64arr_addsub_time_objects_raises(self, box_with_array, tz_naive_fixtu obj2 = [time(i, i, i) for i in range(3)] obj1 = tm.box_expected(obj1, box_with_array) - obj2 = tm.box_expected(obj2, box_with_array) - - msg = "|".join( - [ - "unsupported operand", - "cannot subtract DatetimeArray from ndarray", - ] - ) + msgs = [ + "unsupported operand", + "cannot subtract DatetimeArray from ndarray", + ] + warn_msg = "Pandas type inference with a sequence of `datetime.time` objects" + warn = None + if future is True: + msgs.append("cannot subtract DatetimeArray from ArrowExtensionArray") + elif future is None: + warn = FutureWarning + + with pd.option_context("future.infer_time", future): + with tm.assert_produces_warning(warn, match=warn_msg): + obj2 = tm.box_expected(obj2, box_with_array) + + msg = "|".join(msgs) with warnings.catch_warnings(record=True): # pandas.errors.PerformanceWarning: Non-vectorized DateOffset being # applied to Series or DatetimeIndex diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index bbce40727c669..8579b91d47013 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -1037,6 +1037,56 @@ def test_maybe_convert_objects_ea(self, idx): ) tm.assert_extension_array_equal(result, idx._data) + @pytest.mark.parametrize("future", [True, False, None]) + def test_maybe_convert_objects_time(self, future): + ts = Timestamp.now() + objs = np.array([ts.time()], dtype=object) + + msg = "Pandas type inference with a sequence of `datetime.time` objects" + warn = None + if future is True: + pa = pytest.importorskip("pyarrow") + dtype = pd.ArrowDtype(pa.time64("us")) + exp = dtype.construct_array_type()._from_sequence(objs, dtype=dtype) + else: + if future is None: + warn = FutureWarning + exp = objs + + with pd.option_context("future.infer_time", future): + with tm.assert_produces_warning(warn, match=msg): + out = lib.maybe_convert_objects(objs, convert_time=True) + with tm.assert_produces_warning(warn, match=msg): + ser = Series(objs) + with tm.assert_produces_warning(warn, match=msg): + ser2 = Series(list(objs)) + with tm.assert_produces_warning(warn, match=msg): + df = DataFrame(objs) + with tm.assert_produces_warning(warn, match=msg): + df2 = DataFrame(list(objs)) + with tm.assert_produces_warning(warn, match=msg): + idx = Index(objs) + with tm.assert_produces_warning(warn, match=msg): + idx2 = Index(list(objs)) + with tm.assert_produces_warning(warn, match=msg): + arr = pd.array(objs) + with tm.assert_produces_warning(warn, match=msg): + arr2 = pd.array(list(objs)) + + tm.assert_equal(out, exp) + if future: + tm.assert_equal(arr, exp) + tm.assert_equal(arr2, exp) + else: + tm.assert_equal(arr, pd.core.arrays.PandasArray(exp)) + tm.assert_equal(arr2, pd.core.arrays.PandasArray(exp)) + tm.assert_series_equal(ser, Series(exp, dtype=exp.dtype)) + tm.assert_series_equal(ser2, Series(exp, dtype=exp.dtype)) + tm.assert_frame_equal(df, DataFrame(exp, dtype=exp.dtype)) + tm.assert_frame_equal(df2, DataFrame(exp, dtype=exp.dtype)) + tm.assert_index_equal(idx, Index(exp, dtype=exp.dtype)) + tm.assert_index_equal(idx2, Index(exp, dtype=exp.dtype)) + class TestTypeInference: # Dummy class used for testing with Python objects diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index abcca16340365..61231974a878f 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -716,6 +716,28 @@ class TestBaseReshaping(base.BaseReshapingTests): def test_transpose(self, data): super().test_transpose(data) + @pytest.mark.parametrize( + "columns", + [ + ["A", "B"], + pd.MultiIndex.from_tuples( + [("A", "a"), ("A", "b")], names=["outer", "inner"] + ), + ], + ) + def test_stack(self, data, columns): + warn = None + warn_msg = "Pandas type inference with a sequence of `datetime.time` objects" + + pa_dtype = data.dtype.pyarrow_dtype + if pa.types.is_time(pa_dtype): + # FIXME: need to avoid doing inference when calling frame._constructor + # in _stack_multi_columns + warn = FutureWarning + + with tm.assert_produces_warning(warn, match=warn_msg, check_stacklevel=False): + super().test_stack(data, columns) + class TestBaseSetitem(base.BaseSetitemTests): @pytest.mark.xfail( @@ -778,6 +800,18 @@ def test_invert(self, data, request): class TestBaseMethods(base.BaseMethodsTests): + def test_hash_pandas_object_works(self, data, as_frame): + pa_dtype = data.dtype.pyarrow_dtype + warn_msg = "Pandas type inference with a sequence of `datetime.time`" + warn = None + if pa.types.is_time(pa_dtype): + # TODO(#48964) This warning will be avoided by implementing + # ArrowExtensionArray.hash_pandas_object + warn = FutureWarning + + with tm.assert_produces_warning(warn, match=warn_msg, check_stacklevel=False): + super().test_hash_pandas_object_works(data, as_frame) + @pytest.mark.parametrize("periods", [1, -2]) def test_diff(self, data, periods, request): pa_dtype = data.dtype.pyarrow_dtype diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 832192d8a33e6..0797e70acb4a9 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1,6 +1,7 @@ from datetime import ( date, datetime, + time, ) from io import StringIO @@ -836,7 +837,16 @@ def test_apply_datetime_issue(group_column_dtlike): # is a datetime object and the column labels are different from # standard int values in range(len(num_columns)) - df = DataFrame({"a": ["foo"], "b": [group_column_dtlike]}) + warn = None + warn_msg = ( + "Pandas type inference with a sequence of `datetime.time` " + "objects is deprecated" + ) + if isinstance(group_column_dtlike, time): + warn = FutureWarning + + with tm.assert_produces_warning(warn, match=warn_msg): + df = DataFrame({"a": ["foo"], "b": [group_column_dtlike]}) result = df.groupby("a").apply(lambda x: Series(["spam"], index=[42])) expected = DataFrame( diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 66dd090ec0783..b0b05c14fb980 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -993,13 +993,17 @@ def test_reader_seconds(self, request, engine, read_ext): time(16, 37, 0, 900000), time(18, 20, 54), ] - } + }, + dtype=object, ) - actual = pd.read_excel("times_1900" + read_ext, sheet_name="Sheet1") + warn_msg = "Pandas type inference with a sequence of `datetime.time` objects" + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + actual = pd.read_excel("times_1900" + read_ext, sheet_name="Sheet1") tm.assert_frame_equal(actual, expected) - actual = pd.read_excel("times_1904" + read_ext, sheet_name="Sheet1") + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + actual = pd.read_excel("times_1904" + read_ext, sheet_name="Sheet1") tm.assert_frame_equal(actual, expected) def test_read_excel_multiindex(self, request, read_ext): diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 75fcef09535d4..535ff19ebc7dc 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -2249,9 +2249,14 @@ def test_datetime_date(self): def test_datetime_time(self, sqlite_buildin): # test support for datetime.time - df = DataFrame([time(9, 0, 0), time(9, 1, 30)], columns=["a"]) + + warn_msg = "Pandas type inference with a sequence of `datetime.time`" + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + df = DataFrame([time(9, 0, 0), time(9, 1, 30)], columns=["a"]) assert df.to_sql("test_time", self.conn, index=False) == 2 - res = read_sql_table("test_time", self.conn) + + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + res = read_sql_table("test_time", self.conn) tm.assert_frame_equal(res, df) # GH8341 @@ -2267,7 +2272,9 @@ def test_datetime_time(self, sqlite_buildin): res = sql.read_sql_query("SELECT * FROM test_time3", self.conn) ref = df.map(lambda _: _.strftime("%H:%M:%S.%f")) tm.assert_frame_equal(ref, res) - res = sql.read_sql_table("test_time3", self.conn) + + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + res = sql.read_sql_table("test_time3", self.conn) tm.assert_frame_equal(df, res) def test_mixed_dtype_insert(self): @@ -3104,13 +3111,17 @@ def test_datetime_date(self): @pytest.mark.parametrize("tz_aware", [False, True]) def test_datetime_time(self, tz_aware): # test support for datetime.time, GH #8341 + + warn_msg = "Pandas type inference with a sequence of `datetime.time`" if not tz_aware: tz_times = [time(9, 0, 0), time(9, 1, 30)] else: tz_dt = date_range("2013-01-01 09:00:00", periods=2, tz="US/Pacific") - tz_times = Series(tz_dt.to_pydatetime()).map(lambda dt: dt.timetz()) + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + tz_times = Series(tz_dt.to_pydatetime()).map(lambda dt: dt.timetz()) - df = DataFrame(tz_times, columns=["a"]) + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + df = DataFrame(tz_times, columns=["a"]) assert df.to_sql("test_time", self.conn, index=False) == 2 res = read_sql_query("SELECT * FROM test_time", self.conn) diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index 01762e39c36c1..44bacb21d6ebc 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -673,11 +673,25 @@ def test_plot_scatter(self): def test_raise_error_on_datetime_time_data(self): # GH 8113, datetime.time type is not supported by matplotlib in scatter df = DataFrame(np.random.randn(10), columns=["a"]) - df["dtime"] = date_range(start="2014-01-01", freq="h", periods=10).time + warn_msg = ( + "Pandas type inference with a sequence of `datetime.time` " + "objects is deprecated" + ) + + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + df["dtime"] = date_range(start="2014-01-01", freq="h", periods=10).time + msg = "must be a string or a (real )?number, not 'datetime.time'" with pytest.raises(TypeError, match=msg): - df.plot(kind="scatter", x="dtime", y="a") + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + # warns bc it calls infer_objects inside df.plot + df.plot(kind="scatter", x="dtime", y="a") + + with pd.option_context("future.infer_time", True): + with pytest.raises(TypeError, match=msg): + with tm.assert_produces_warning(None): + df.plot(kind="scatter", x="dtime", y="a") def test_scatterplot_datetime_data(self): # GH 30391 diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index dda71328d4e6c..30cb04186efe6 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -1008,6 +1008,7 @@ def test_time(self): t = datetime(1, 1, 1, 3, 30, 0) deltas = np.random.randint(1, 20, 3).cumsum() ts = np.array([(t + timedelta(minutes=int(x))).time() for x in deltas]) + ts = Index(ts, dtype=object) df = DataFrame( {"a": np.random.randn(len(ts)), "b": np.random.randn(len(ts))}, index=ts ) @@ -1031,7 +1032,10 @@ def test_time(self): def test_time_change_xlim(self): t = datetime(1, 1, 1, 3, 30, 0) deltas = np.random.randint(1, 20, 3).cumsum() - ts = np.array([(t + timedelta(minutes=int(x))).time() for x in deltas]) + ts = Index( + np.array([(t + timedelta(minutes=int(x))).time() for x in deltas]), + dtype=object, + ) df = DataFrame( {"a": np.random.randn(len(ts)), "b": np.random.randn(len(ts))}, index=ts ) @@ -1073,6 +1077,7 @@ def test_time_musec(self): t = datetime(1, 1, 1, 3, 30, 0) deltas = np.random.randint(1, 20, 3).cumsum() ts = np.array([(t + timedelta(microseconds=int(x))).time() for x in deltas]) + ts = Index(ts, dtype=object) df = DataFrame( {"a": np.random.randn(len(ts)), "b": np.random.randn(len(ts))}, index=ts ) diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index 5cdeee20f3435..2d92d034a2ed6 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -87,7 +87,7 @@ def get_expected(ser, prop): result = result.astype("int64") elif not is_list_like(result) or isinstance(result, DataFrame): return result - return Series(result, index=ser.index, name=ser.name) + return Series(result, index=ser.index, name=ser.name, dtype=result.dtype) left = getattr(ser.dt, name) right = get_expected(ser, name) @@ -725,7 +725,8 @@ def test_dt_timetz_accessor(self, tz_naive_fixture): ) ser = Series(dtindex) expected = Series( - [time(23, 56, tzinfo=tz), time(21, 24, tzinfo=tz), time(22, 14, tzinfo=tz)] + [time(23, 56, tzinfo=tz), time(21, 24, tzinfo=tz), time(22, 14, tzinfo=tz)], + dtype=object, ) result = ser.dt.timetz tm.assert_series_equal(result, expected) diff --git a/pandas/tests/strings/test_api.py b/pandas/tests/strings/test_api.py index c439a5f006922..c3b77569cc751 100644 --- a/pandas/tests/strings/test_api.py +++ b/pandas/tests/strings/test_api.py @@ -31,7 +31,16 @@ def test_api_per_dtype(index_or_series, dtype, any_skipna_inferred_dtype): box = index_or_series inferred_dtype, values = any_skipna_inferred_dtype - t = box(values, dtype=dtype) # explicit dtype to avoid casting + warn_msg = ( + "Pandas type inference with a sequence of `datetime.time` objects " + "is deprecated" + ) + warn = None + if dtype == "category" and inferred_dtype == "time": + warn = FutureWarning + + with tm.assert_produces_warning(warn, match=warn_msg): + t = box(values, dtype=dtype) # explicit dtype to avoid casting types_passing_constructor = [ "string", diff --git a/pandas/tests/tools/test_to_time.py b/pandas/tests/tools/test_to_time.py index 5046fd9d0edc1..eb987d8a63b39 100644 --- a/pandas/tests/tools/test_to_time.py +++ b/pandas/tests/tools/test_to_time.py @@ -61,9 +61,15 @@ def test_arraylike(self): with pytest.raises(ValueError, match=msg): to_time(arg, format="%I:%M%p", errors="raise") - tm.assert_series_equal( - to_time(Series(arg, name="test")), Series(expected_arr, name="test") + warn_msg = ( + "Pandas type inference with a sequence of `datetime.time` objects " + "is deprecated" ) + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + res_ser = to_time(Series(arg, name="test")) + exp_ser = Series(expected_arr, name="test", dtype=object) + + tm.assert_series_equal(res_ser, exp_ser) res = to_time(np.array(arg)) assert isinstance(res, list) From d0cce3d8f91b70d892ffbe27e6b95c4df7b04f05 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 1 May 2023 15:30:18 -0700 Subject: [PATCH 02/27] skip if no pyarrow --- pandas/tests/arithmetic/test_datetime64.py | 5 ++++- pandas/tests/dtypes/test_inference.py | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 1f323d059d86d..72fc44ecd3fcf 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -20,6 +20,7 @@ from pandas._libs.tslibs.conversion import localize_pydatetime from pandas._libs.tslibs.offsets import shift_months from pandas.errors import PerformanceWarning +import pandas.util._test_decorators as td import pandas as pd from pandas import ( @@ -1166,7 +1167,9 @@ def test_dt64arr_add_sub_parr( ) assert_invalid_addsub_type(dtarr, parr, msg) - @pytest.mark.parametrize("future", [True, False, None]) + @pytest.mark.parametrize( + "future", [pytest.param(True, marks=td.skip_if_no("pyarrow")), False, None] + ) def test_dt64arr_addsub_time_objects_raises( self, box_with_array, tz_naive_fixture, future ): diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 8579b91d47013..280941153e7f5 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -1037,7 +1037,9 @@ def test_maybe_convert_objects_ea(self, idx): ) tm.assert_extension_array_equal(result, idx._data) - @pytest.mark.parametrize("future", [True, False, None]) + @pytest.mark.parametrize( + "future", [pytest.param(True, marks=td.skip_if_no("pyarrow")), False, None] + ) def test_maybe_convert_objects_time(self, future): ts = Timestamp.now() objs = np.array([ts.time()], dtype=object) From 5857d67eea7f8a1ac75bc8a65711ceae1dd8b486 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 3 May 2023 09:46:37 -0700 Subject: [PATCH 03/27] dt.time --- pandas/_libs/lib.pyx | 4 +-- pandas/core/arrays/datetimes.py | 28 ++++++++++++++- pandas/core/construction.py | 3 +- pandas/core/indexes/accessors.py | 2 -- pandas/tests/arrays/test_datetimes.py | 10 ++++-- pandas/tests/generic/test_finalize.py | 9 ++++- .../indexes/datetimes/test_scalar_compat.py | 4 ++- .../tests/indexes/datetimes/test_timezones.py | 5 ++- pandas/tests/io/parser/test_parse_dates.py | 4 ++- .../series/accessors/test_cat_accessor.py | 8 +++-- .../series/accessors/test_dt_accessor.py | 35 +++++++++++++++++-- 11 files changed, 94 insertions(+), 18 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 0e5974a8186cb..e23652f5e5a22 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2589,7 +2589,7 @@ def maybe_convert_objects(ndarray[object] objects, seen.object_ = True break elif PyTime_Check(val): - if convert_time: + if convert_non_numeric: seen.time_ = True else: seen.object_ = True @@ -2664,7 +2664,7 @@ def maybe_convert_objects(ndarray[object] objects, if opt is True: import pyarrow as pa - from pandas.core.arrays.arrow import ArrowDtype + from pandas.core.dtypes.dtypes import ArrowDtype obj = pa.array(objects) dtype = ArrowDtype(obj.type) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index d6afba8c34904..f0efbf1232304 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -15,6 +15,8 @@ import numpy as np +from pandas._config import get_option + from pandas._libs import ( lib, tslib, @@ -58,6 +60,7 @@ DatetimeTZDtype, ExtensionDtype, PeriodDtype, + ArrowDtype, ) from pandas.core.dtypes.missing import isna @@ -1368,7 +1371,30 @@ def time(self) -> npt.NDArray[np.object_]: # keeping their timezone and not using UTC timestamps = self._local_timestamps() - return ints_to_pydatetime(timestamps, box="time", reso=self._creso) + result = ints_to_pydatetime(timestamps, box="time", reso=self._creso) + + opt = get_option("future.infer_time") + if opt is None: + warnings.warn( + f"The behavior of {type(self).__name__}.time is deprecated. " + "In a future version, this will an array with pyarrow time " + "dtype instead of object dtype. To opt in to the future behavior, " + "set `pd.set_option('future.infer_time', True)`.", + FutureWarning, + stacklevel=find_stack_level(), + ) + elif opt is True: + # TODO: optimize this to avoid going through ints_to_pydatetime + import pyarrow as pa + + pa_type = pa.time64(self.unit) + result[self.isna()] = None + obj = pa.array(result, type=pa_type) + dtype = ArrowDtype(obj.type) + out = dtype.construct_array_type()(obj) + return out + + return result @property def timetz(self) -> npt.NDArray[np.object_]: diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 330555468e8cc..3f9bb0bc4cc60 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -51,7 +51,7 @@ is_object_dtype, pandas_dtype, ) -from pandas.core.dtypes.dtypes import PandasDtype +from pandas.core.dtypes.dtypes import PandasDtype, ArrowDtype from pandas.core.dtypes.generic import ( ABCDataFrame, ABCExtensionArray, @@ -297,7 +297,6 @@ def array( PeriodArray, TimedeltaArray, ) - from pandas.core.arrays.arrow import ArrowDtype from pandas.core.arrays.string_ import StringDtype if lib.is_scalar(data): diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index dfae72064354f..a8e9649906cc0 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -101,8 +101,6 @@ def _delegate_property_get(self, name: str): # type: ignore[override] elif not is_list_like(result): return result - result = np.asarray(result) - if self.orig is not None: index = self.orig.index else: diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 2acc7bdc0d902..47759de1bbda5 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -155,8 +155,14 @@ def test_to_pydatetime(self, dta_dti): def test_time_date(self, dta_dti, meth): dta, dti = dta_dti - result = getattr(dta, meth) - expected = getattr(dti, meth) + warn = None + msg = "In a future version, this will an array with pyarrow time dtype" + if meth == "time": + warn = FutureWarning + + with tm.assert_produces_warning(warn, match=msg): + result = getattr(dta, meth) + expected = getattr(dti, meth) tm.assert_numpy_array_equal(result, expected) def test_format_native_types(self, unit, dtype, dta_dti): diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index f827eaf63a342..73ca85b612031 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -671,7 +671,14 @@ def test_datetime_method(method): def test_datetime_property(attr): s = pd.Series(pd.date_range("2000", periods=4)) s.attrs = {"a": 1} - result = getattr(s.dt, attr) + + warn = None + msg = "In a future version, this will an array with pyarrow time dtype" + if attr == "time": + warn = FutureWarning + with tm.assert_produces_warning(warn, match=msg): + result = getattr(s.dt, attr) + assert result.attrs == {"a": 1} diff --git a/pandas/tests/indexes/datetimes/test_scalar_compat.py b/pandas/tests/indexes/datetimes/test_scalar_compat.py index f07a9dce5f6ae..79ab191855945 100644 --- a/pandas/tests/indexes/datetimes/test_scalar_compat.py +++ b/pandas/tests/indexes/datetimes/test_scalar_compat.py @@ -24,7 +24,9 @@ class TestDatetimeIndexOps: def test_dti_time(self): rng = date_range("1/1/2000", freq="12min", periods=10) - result = pd.Index(rng).time + msg = "In a future version, this will an array with pyarrow time dtype" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = pd.Index(rng).time expected = [t.time() for t in rng] assert (result == expected).all() diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 6f3c83b999e94..f4daa50113dad 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -853,7 +853,10 @@ def test_time_accessor(self, dtype): expected = np.array([time(10, 20, 30), pd.NaT]) index = DatetimeIndex(["2018-06-04 10:20:30", pd.NaT], dtype=dtype) - result = index.time + + msg = "In a future version, this will an array with pyarrow time dtype" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = index.time tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 571e09bb5e9dd..dcc3d9f8f13a3 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -479,7 +479,9 @@ def test_date_col_as_index_col(all_parsers): if parser.engine == "pyarrow": # https://github.com/pandas-dev/pandas/issues/44231 # pyarrow 6.0 starts to infer time type - expected["X2"] = pd.to_datetime("1970-01-01" + expected["X2"]).dt.time + msg = "In a future version, this will an array with pyarrow time dtype" + with tm.assert_produces_warning(FutureWarning, match=msg): + expected["X2"] = pd.to_datetime("1970-01-01" + expected["X2"]).dt.time tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/accessors/test_cat_accessor.py b/pandas/tests/series/accessors/test_cat_accessor.py index cc64a9388fd7c..069b2e10f2ad1 100644 --- a/pandas/tests/series/accessors/test_cat_accessor.py +++ b/pandas/tests/series/accessors/test_cat_accessor.py @@ -212,8 +212,12 @@ def test_dt_accessor_api_for_categorical(self, idx): tm.assert_equal(res, exp) for attr in attr_names: - res = getattr(cat.dt, attr) - exp = getattr(ser.dt, attr) + with warnings.catch_warnings(): + if attr == "time": + # deprecated to return pyarrow time dtype + warnings.simplefilter("ignore", FutureWarning) + res = getattr(cat.dt, attr) + exp = getattr(ser.dt, attr) tm.assert_equal(res, exp) diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index 2d92d034a2ed6..858fc0b3bad85 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -12,6 +12,7 @@ import pytz from pandas._libs.tslibs.timezones import maybe_get_tz +from pandas.compat import pa_version_under7p0 from pandas.errors import SettingWithCopyError from pandas.core.dtypes.common import ( @@ -89,8 +90,15 @@ def get_expected(ser, prop): return result return Series(result, index=ser.index, name=ser.name, dtype=result.dtype) - left = getattr(ser.dt, name) - right = get_expected(ser, name) + if name == "time": + msg = "In a future version, this will an array with pyarrow time dtype" + with tm.assert_produces_warning(FutureWarning, match=msg): + left = getattr(ser.dt, name) + right = get_expected(ser, name) + else: + left = getattr(ser.dt, name) + right = get_expected(ser, name) + if not (is_list_like(left) and is_list_like(right)): assert left == right elif isinstance(left, DataFrame): @@ -672,10 +680,31 @@ def test_valid_dt_with_missing_values(self): ) tm.assert_series_equal(result, expected) - result = ser.dt.time + msg = "In a future version, this will an array with pyarrow time" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = ser.dt.time expected = Series([time(0), time(0), pd.NaT, time(0), time(0)], dtype="object") tm.assert_series_equal(result, expected) + with pd.option_context("future.infer_time", False): + with tm.assert_produces_warning(None): + result = ser.dt.time + tm.assert_series_equal(result, expected) + + if pa_version_under7p0: + return + + with pd.option_context("future.infer_time", True): + with tm.assert_produces_warning(None): + result_pa = ser.dt.time + + import pyarrow as pa + + pa_dtype = pa.time64("ns") + dtype = pd.ArrowDtype(pa_dtype) + expected_pa = expected.astype(dtype) + tm.assert_series_equal(result_pa, expected_pa) + def test_dt_accessor_api(self): # GH 9322 from pandas.core.indexes.accessors import ( From fa6421c4268bf0e9944cbb9d31e575cd0de28b25 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 3 May 2023 11:53:26 -0700 Subject: [PATCH 04/27] mypy fixup --- pandas/core/arrays/datetimes.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index f0efbf1232304..976dc77332c66 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -85,7 +85,10 @@ ) from pandas import DataFrame - from pandas.core.arrays import PeriodArray + from pandas.core.arrays import ( + ArrowExtensionArray, + PeriodArray, + ) _midnight = time(0, 0) @@ -1338,7 +1341,7 @@ def day_name(self, locale=None) -> npt.NDArray[np.object_]: return result @property - def time(self) -> npt.NDArray[np.object_]: + def time(self) -> npt.NDArray[np.object_] | ArrowExtensionArray: """ Returns numpy array of :class:`datetime.time` objects. From d498ea1821e2128e0cdde4c4755d91d9bb02d5d6 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 4 May 2023 09:05:52 -0700 Subject: [PATCH 05/27] Handle construction from scalar --- pandas/_libs/lib.pyx | 3 ++- pandas/core/dtypes/cast.py | 26 +++++++++++++++++++ pandas/tests/frame/test_constructors.py | 34 +++++++++++++++++++++++++ 3 files changed, 62 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index e23652f5e5a22..4a05a4ddef420 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2589,7 +2589,7 @@ def maybe_convert_objects(ndarray[object] objects, seen.object_ = True break elif PyTime_Check(val): - if convert_non_numeric: + if convert_non_numeric and val.tzinfo is None: seen.time_ = True else: seen.object_ = True @@ -2660,6 +2660,7 @@ def maybe_convert_objects(ndarray[object] objects, elif seen.time_: if is_time_array(objects): + # FIXME: need to ensure this is not timetz opt = get_option("future.infer_time") if opt is True: import pyarrow as pa diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index f65cb94df293e..c0197f44431ee 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -20,6 +20,8 @@ import numpy as np +from pandas._config import get_option + from pandas._libs import lib from pandas._libs.missing import ( NA, @@ -40,6 +42,7 @@ IntCastingNaNError, LossySetitemError, ) +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( ensure_int8, @@ -819,6 +822,29 @@ def infer_dtype_from_scalar(val) -> tuple[DtypeObj, Any]: val = val.asm8 dtype = val.dtype + elif isinstance(val, dt.time): + if val.tzinfo is None: + # pyarrow doesn't have a dtype for timetz. + opt = get_option("future.infer_time") + if opt is None: + warnings.warn( + "Pandas type inference with a `datetime.time` " + "object is deprecated. In a future version, this will give " + "time32[pyarrow] dtype, which will require pyarrow to be " + "installed. To opt in to the new behavior immediately set " + "`pd.set_option('future.infer_time', True)`. To keep the " + "old behavior pass `dtype=object`.", + FutureWarning, + stacklevel=find_stack_level(), + ) + elif opt is True: + import pyarrow as pa + + pa_dtype = pa.time64("us") + from pandas.core.arrays.arrow import ArrowDtype + + dtype = ArrowDtype(pa_dtype) + elif is_bool(val): dtype = np.dtype(np.bool_) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 06e244b93016c..a424a672ed31a 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -3154,6 +3154,40 @@ def test_tzaware_data_tznaive_dtype(self, constructor, box, frame_or_series): with pytest.raises(err, match=msg): constructor(ts, dtype="M8[ns]") + @pytest.mark.parametrize( + "future", [pytest.param(True, marks=td.skip_if_no("pyarrow")), False, None] + ) + def test_from_pytime(self, constructor, box, frame_or_series, future): + item = Timestamp("2023-05-04 08:53").time() + + warn = None + if box is list or (box is dict and frame_or_series is Series): + msg = ( + "Pandas type inference with a sequence of `datetime.time` " + "objects is deprecated" + ) + else: + msg = "Pandas type inference with a `datetime.time` object is deprecated" + exp_dtype = np.dtype(object) + if future is None: + warn = FutureWarning + elif future is True: + import pyarrow as pa + + pa_type = pa.time64("us") + exp_dtype = pd.ArrowDtype(pa_type) + + with pd.option_context("future.infer_time", future): + with tm.assert_produces_warning(warn, match=msg): + result = constructor(item) + dtype = tm.get_dtype(result) + assert dtype == exp_dtype + + aware = Timestamp("2023-05-04 08:53", tz="US/Pacific").timetz() + result2 = constructor(aware) + dtype = tm.get_dtype(result2) + assert dtype == np.dtype(object) + # TODO: better location for this test? class TestAllowNonNano: From 037a0c64bad461d7b4e155d35a2b29e46ebc4fbe Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 4 May 2023 12:20:22 -0700 Subject: [PATCH 06/27] fix timetz sqlite tet --- pandas/tests/io/test_sql.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 535ff19ebc7dc..6f4e0da1b6764 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -3112,15 +3112,17 @@ def test_datetime_date(self): def test_datetime_time(self, tz_aware): # test support for datetime.time, GH #8341 - warn_msg = "Pandas type inference with a sequence of `datetime.time`" if not tz_aware: tz_times = [time(9, 0, 0), time(9, 1, 30)] else: tz_dt = date_range("2013-01-01 09:00:00", periods=2, tz="US/Pacific") - with tm.assert_produces_warning(FutureWarning, match=warn_msg): - tz_times = Series(tz_dt.to_pydatetime()).map(lambda dt: dt.timetz()) + tz_times = Series(tz_dt.to_pydatetime()).map(lambda dt: dt.timetz()) - with tm.assert_produces_warning(FutureWarning, match=warn_msg): + warn_msg = "Pandas type inference with a sequence of `datetime.time`" + warn = None + if not tz_aware: + warn = FutureWarning + with tm.assert_produces_warning(warn, match=warn_msg): df = DataFrame(tz_times, columns=["a"]) assert df.to_sql("test_time", self.conn, index=False) == 2 From 94fd426785557bbb79f3d4fb55c5ed868b0dc639 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 17 May 2023 10:53:22 -0700 Subject: [PATCH 07/27] update --- pandas/core/arrays/arrow/array.py | 10 +++++++++- pandas/core/arrays/datetimes.py | 1 + pandas/core/construction.py | 5 ++++- pandas/core/dtypes/cast.py | 1 - pandas/tests/arithmetic/test_datetime64.py | 8 +++++++- 5 files changed, 21 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 85a75fff25ebd..565800823a9f2 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -651,7 +651,15 @@ def _evaluate_op_method(self, other, op, arrow_funcs): if pc_func is NotImplemented: raise NotImplementedError(f"{op.__name__} not implemented.") - result = pc_func(self._pa_array, other) + try: + result = pc_func(self._pa_array, other) + except pa.lib.ArrowNotImplementedError: + if op in [operator.add, roperator.radd, operator.sub, roperator.rsub]: + # By returning NotImplemented we get standard message with a + # TypeError + return NotImplemented + raise + return type(self)(result) def _logical_method(self, other, op): diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 976dc77332c66..d12f3c67781b9 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -57,6 +57,7 @@ pandas_dtype, ) from pandas.core.dtypes.dtypes import ( + ArrowDtype, DatetimeTZDtype, ExtensionDtype, PeriodDtype, diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 3f9bb0bc4cc60..b4101d23e6153 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -51,7 +51,10 @@ is_object_dtype, pandas_dtype, ) -from pandas.core.dtypes.dtypes import PandasDtype, ArrowDtype +from pandas.core.dtypes.dtypes import ( + ArrowDtype, + PandasDtype, +) from pandas.core.dtypes.generic import ( ABCDataFrame, ABCExtensionArray, diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index c0197f44431ee..d62d793054639 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -841,7 +841,6 @@ def infer_dtype_from_scalar(val) -> tuple[DtypeObj, Any]: import pyarrow as pa pa_dtype = pa.time64("us") - from pandas.core.arrays.arrow import ArrowDtype dtype = ArrowDtype(pa_dtype) diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 72fc44ecd3fcf..60760c24d8321 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -1171,11 +1171,17 @@ def test_dt64arr_add_sub_parr( "future", [pytest.param(True, marks=td.skip_if_no("pyarrow")), False, None] ) def test_dt64arr_addsub_time_objects_raises( - self, box_with_array, tz_naive_fixture, future + self, box_with_array, tz_naive_fixture, future, request ): # https://github.com/pandas-dev/pandas/issues/10329 tz = tz_naive_fixture + if str(tz) == "tzlocal()" and future is True: + # TODO(GH#53278) + mark = pytest.mark.xfail( + reason="Incorrectly raises AttributeError instead of TypeError" + ) + request.node.add_marker(mark) obj1 = date_range("2012-01-01", periods=3, tz=tz) obj2 = [time(i, i, i) for i in range(3)] From 2b80d582e5a7c364897a57f1e2b4467ce9e48333 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 19 May 2023 13:31:53 -0700 Subject: [PATCH 08/27] update test --- pandas/tests/dtypes/test_inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 280941153e7f5..f6dc73c81ee48 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -1057,7 +1057,7 @@ def test_maybe_convert_objects_time(self, future): with pd.option_context("future.infer_time", future): with tm.assert_produces_warning(warn, match=msg): - out = lib.maybe_convert_objects(objs, convert_time=True) + out = lib.maybe_convert_objects(objs, convert_non_numeric=True) with tm.assert_produces_warning(warn, match=msg): ser = Series(objs) with tm.assert_produces_warning(warn, match=msg): From b365fe225fada0bb668ff94689ca264f7d5daf31 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 19 May 2023 14:28:53 -0700 Subject: [PATCH 09/27] update test --- pandas/core/arrays/arrow/array.py | 10 +--------- pandas/tests/arithmetic/common.py | 21 ++++++++++++++++----- pandas/tests/arithmetic/test_datetime64.py | 4 ++-- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 565800823a9f2..85a75fff25ebd 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -651,15 +651,7 @@ def _evaluate_op_method(self, other, op, arrow_funcs): if pc_func is NotImplemented: raise NotImplementedError(f"{op.__name__} not implemented.") - try: - result = pc_func(self._pa_array, other) - except pa.lib.ArrowNotImplementedError: - if op in [operator.add, roperator.radd, operator.sub, roperator.rsub]: - # By returning NotImplemented we get standard message with a - # TypeError - return NotImplemented - raise - + result = pc_func(self._pa_array, other) return type(self)(result) def _logical_method(self, other, op): diff --git a/pandas/tests/arithmetic/common.py b/pandas/tests/arithmetic/common.py index f3173e8f0eb57..1534346eb86cc 100644 --- a/pandas/tests/arithmetic/common.py +++ b/pandas/tests/arithmetic/common.py @@ -33,7 +33,9 @@ def assert_cannot_add(left, right, msg="cannot add"): right + left -def assert_invalid_addsub_type(left, right, msg=None): +def assert_invalid_addsub_type( + left, right, msg=None, can_be_not_implemented: bool = False +): """ Helper to assert that left and right can be neither added nor subtracted. @@ -42,14 +44,23 @@ def assert_invalid_addsub_type(left, right, msg=None): left : object right : object msg : str or None, default None + can_be_not_implemented : bool, default False + Whether to accept NotImplementedError in addition to TypeError """ - with pytest.raises(TypeError, match=msg): + + errs = TypeError + if can_be_not_implemented: + # really we are interested in pa.lib.ArrowNotImplementedError, which + # is a subclass of NotImplementedError + errs = (TypeError, NotImplementedError) + + with pytest.raises(errs, match=msg): left + right - with pytest.raises(TypeError, match=msg): + with pytest.raises(errs, match=msg): right + left - with pytest.raises(TypeError, match=msg): + with pytest.raises(errs, match=msg): left - right - with pytest.raises(TypeError, match=msg): + with pytest.raises(errs, match=msg): right - left diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 60760c24d8321..0fe7491eae88d 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -1195,7 +1195,7 @@ def test_dt64arr_addsub_time_objects_raises( warn_msg = "Pandas type inference with a sequence of `datetime.time` objects" warn = None if future is True: - msgs.append("cannot subtract DatetimeArray from ArrowExtensionArray") + msgs.append(r"Function '(add|subtract)_checked' has no kernel") elif future is None: warn = FutureWarning @@ -1210,7 +1210,7 @@ def test_dt64arr_addsub_time_objects_raises( # we aren't testing that here, so ignore. warnings.simplefilter("ignore", PerformanceWarning) - assert_invalid_addsub_type(obj1, obj2, msg=msg) + assert_invalid_addsub_type(obj1, obj2, msg=msg, can_be_not_implemented=True) # ------------------------------------------------------------- # Other invalid operations From 8971611c48eb3b36854938bb94850897955b47dd Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 20 May 2023 10:42:12 -0700 Subject: [PATCH 10/27] unstrict xfail --- pandas/tests/arithmetic/test_datetime64.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 0fe7491eae88d..959ab146e5e7c 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -1179,7 +1179,9 @@ def test_dt64arr_addsub_time_objects_raises( if str(tz) == "tzlocal()" and future is True: # TODO(GH#53278) mark = pytest.mark.xfail( - reason="Incorrectly raises AttributeError instead of TypeError" + reason="Incorrectly raises AttributeError instead of TypeError", + # some but not all CI builds + strict=False, ) request.node.add_marker(mark) From 9e0b4bdd5fc8bf1d523cc947b3858432fb81ac0d Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 20 May 2023 11:05:38 -0700 Subject: [PATCH 11/27] lint fixup --- pandas/_libs/lib.pyi | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index 3f15e50a95b7a..e9d4e45c07925 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -71,7 +71,6 @@ def map_infer( convert: bool = ..., ignore_na: bool = ..., ) -> np.ndarray: ... - @overload def maybe_convert_objects( objects: npt.NDArray[np.object_], From 3d9ec018e7061a776874bed21422837e0219b92f Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 22 May 2023 12:20:27 -0700 Subject: [PATCH 12/27] Fix doctest, typo --- pandas/conftest.py | 2 ++ pandas/core/arrays/datetimes.py | 2 +- pandas/tests/arrays/test_datetimes.py | 2 +- pandas/tests/generic/test_finalize.py | 2 +- pandas/tests/indexes/datetimes/test_scalar_compat.py | 2 +- pandas/tests/indexes/datetimes/test_timezones.py | 2 +- pandas/tests/io/parser/test_parse_dates.py | 2 +- pandas/tests/series/accessors/test_dt_accessor.py | 6 ++++-- 8 files changed, 12 insertions(+), 8 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index b2f1377a9fb32..9e694d2b5ae7e 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -137,6 +137,8 @@ def pytest_collection_modifyitems(items, config) -> None: ("is_sparse", "is_sparse is deprecated"), ("NDFrame.replace", "The 'method' keyword"), ("NDFrame.replace", "Series.replace without 'value'"), + ("DatetimeArray.time", "with pyarrow time dtype"), + ("DatetimeIndex.time", "with pyarrow time dtype"), # Docstring divides by zero to show behavior difference ("missing.mask_zero_div_zero", "divide by zero encountered"), ( diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index d12f3c67781b9..08c514bcf043b 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1381,7 +1381,7 @@ def time(self) -> npt.NDArray[np.object_] | ArrowExtensionArray: if opt is None: warnings.warn( f"The behavior of {type(self).__name__}.time is deprecated. " - "In a future version, this will an array with pyarrow time " + "In a future version, this will return an array with pyarrow time " "dtype instead of object dtype. To opt in to the future behavior, " "set `pd.set_option('future.infer_time', True)`.", FutureWarning, diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 47759de1bbda5..400b4d76af922 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -156,7 +156,7 @@ def test_time_date(self, dta_dti, meth): dta, dti = dta_dti warn = None - msg = "In a future version, this will an array with pyarrow time dtype" + msg = "In a future version, this will return an array with pyarrow time dtype" if meth == "time": warn = FutureWarning diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 73ca85b612031..7efb5a20249a9 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -673,7 +673,7 @@ def test_datetime_property(attr): s.attrs = {"a": 1} warn = None - msg = "In a future version, this will an array with pyarrow time dtype" + msg = "In a future version, this will return an array with pyarrow time dtype" if attr == "time": warn = FutureWarning with tm.assert_produces_warning(warn, match=msg): diff --git a/pandas/tests/indexes/datetimes/test_scalar_compat.py b/pandas/tests/indexes/datetimes/test_scalar_compat.py index 79ab191855945..e2bd5450d1f57 100644 --- a/pandas/tests/indexes/datetimes/test_scalar_compat.py +++ b/pandas/tests/indexes/datetimes/test_scalar_compat.py @@ -24,7 +24,7 @@ class TestDatetimeIndexOps: def test_dti_time(self): rng = date_range("1/1/2000", freq="12min", periods=10) - msg = "In a future version, this will an array with pyarrow time dtype" + msg = "In a future version, this will return an array with pyarrow time dtype" with tm.assert_produces_warning(FutureWarning, match=msg): result = pd.Index(rng).time expected = [t.time() for t in rng] diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index f4daa50113dad..eda4e98da8fd0 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -854,7 +854,7 @@ def test_time_accessor(self, dtype): index = DatetimeIndex(["2018-06-04 10:20:30", pd.NaT], dtype=dtype) - msg = "In a future version, this will an array with pyarrow time dtype" + msg = "In a future version, this will return an array with pyarrow time dtype" with tm.assert_produces_warning(FutureWarning, match=msg): result = index.time diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index dcc3d9f8f13a3..ff1c209cec852 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -479,7 +479,7 @@ def test_date_col_as_index_col(all_parsers): if parser.engine == "pyarrow": # https://github.com/pandas-dev/pandas/issues/44231 # pyarrow 6.0 starts to infer time type - msg = "In a future version, this will an array with pyarrow time dtype" + msg = "In a future version, this will return an array with pyarrow time dtype" with tm.assert_produces_warning(FutureWarning, match=msg): expected["X2"] = pd.to_datetime("1970-01-01" + expected["X2"]).dt.time diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index 858fc0b3bad85..b24b362e11d06 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -91,7 +91,9 @@ def get_expected(ser, prop): return Series(result, index=ser.index, name=ser.name, dtype=result.dtype) if name == "time": - msg = "In a future version, this will an array with pyarrow time dtype" + msg = ( + "In a future version, this will return an array with pyarrow time dtype" + ) with tm.assert_produces_warning(FutureWarning, match=msg): left = getattr(ser.dt, name) right = get_expected(ser, name) @@ -680,7 +682,7 @@ def test_valid_dt_with_missing_values(self): ) tm.assert_series_equal(result, expected) - msg = "In a future version, this will an array with pyarrow time" + msg = "In a future version, this will return an array with pyarrow time" with tm.assert_produces_warning(FutureWarning, match=msg): result = ser.dt.time expected = Series([time(0), time(0), pd.NaT, time(0), time(0)], dtype="object") From 65a702c39858fec0ecbf64d975e7c21ff3a6587c Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 30 May 2023 09:29:41 -0700 Subject: [PATCH 13/27] remove extra import --- pandas/core/arrays/datetimes.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 08c514bcf043b..a0eaf6493ac5d 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -61,7 +61,6 @@ DatetimeTZDtype, ExtensionDtype, PeriodDtype, - ArrowDtype, ) from pandas.core.dtypes.missing import isna From 24f848bddf2f57ff2bc5ce91fba1290f91245d51 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 31 May 2023 08:44:13 -0700 Subject: [PATCH 14/27] doctest warnings --- pandas/conftest.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/conftest.py b/pandas/conftest.py index 9e694d2b5ae7e..106ea3aeed8a1 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -129,6 +129,9 @@ def pytest_collection_modifyitems(items, config) -> None: # Warnings from doctests that can be ignored; place reason in comment above. # Each entry specifies (path, message) - see the ignore_doctest_warning function ignored_doctest_warnings = [ + ("DatetimeProperties.time", "with pyarrow time dtype"), + ("DatetimeArray.time", "with pyarrow time dtype"), + ("DatetimeIndex.time", "with pyarrow time dtype"), ("is_int64_dtype", "is_int64_dtype is deprecated"), ("is_interval_dtype", "is_interval_dtype is deprecated"), ("is_period_dtype", "is_period_dtype is deprecated"), From 1f01e6eb46224d6bf5b4051df862cf71f90cfec2 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 3 May 2023 09:46:37 -0700 Subject: [PATCH 15/27] dt.time --- pandas/core/arrays/datetimes.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index a0eaf6493ac5d..08c514bcf043b 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -61,6 +61,7 @@ DatetimeTZDtype, ExtensionDtype, PeriodDtype, + ArrowDtype, ) from pandas.core.dtypes.missing import isna From 7568a8ba0972a1dc21d46b0e2d005b4f958cfe2b Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 3 May 2023 09:46:37 -0700 Subject: [PATCH 16/27] dt.time --- pandas/tests/series/accessors/test_dt_accessor.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index b24b362e11d06..88b58ada19265 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -91,9 +91,7 @@ def get_expected(ser, prop): return Series(result, index=ser.index, name=ser.name, dtype=result.dtype) if name == "time": - msg = ( - "In a future version, this will return an array with pyarrow time dtype" - ) + msg = "In a future version, this will an array with pyarrow time dtype" with tm.assert_produces_warning(FutureWarning, match=msg): left = getattr(ser.dt, name) right = get_expected(ser, name) From 0a7562a4633352dbb1333d75b9e4ba26bef65895 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 4 May 2023 09:05:52 -0700 Subject: [PATCH 17/27] Handle construction from scalar --- pandas/core/dtypes/cast.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index d62d793054639..7fbfcc9d47570 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -841,7 +841,6 @@ def infer_dtype_from_scalar(val) -> tuple[DtypeObj, Any]: import pyarrow as pa pa_dtype = pa.time64("us") - dtype = ArrowDtype(pa_dtype) elif is_bool(val): From 62d03cad2a355a98d7c78420fe3af258b91d8cfc Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 17 May 2023 10:53:22 -0700 Subject: [PATCH 18/27] update --- pandas/core/arrays/arrow/array.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 85a75fff25ebd..565800823a9f2 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -651,7 +651,15 @@ def _evaluate_op_method(self, other, op, arrow_funcs): if pc_func is NotImplemented: raise NotImplementedError(f"{op.__name__} not implemented.") - result = pc_func(self._pa_array, other) + try: + result = pc_func(self._pa_array, other) + except pa.lib.ArrowNotImplementedError: + if op in [operator.add, roperator.radd, operator.sub, roperator.rsub]: + # By returning NotImplemented we get standard message with a + # TypeError + return NotImplemented + raise + return type(self)(result) def _logical_method(self, other, op): From 47e1601319daa8a5bb6da2e10abe0b00736c24d9 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 22 May 2023 12:20:27 -0700 Subject: [PATCH 19/27] Fix doctest, typo --- pandas/tests/series/accessors/test_dt_accessor.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index 88b58ada19265..b24b362e11d06 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -91,7 +91,9 @@ def get_expected(ser, prop): return Series(result, index=ser.index, name=ser.name, dtype=result.dtype) if name == "time": - msg = "In a future version, this will an array with pyarrow time dtype" + msg = ( + "In a future version, this will return an array with pyarrow time dtype" + ) with tm.assert_produces_warning(FutureWarning, match=msg): left = getattr(ser.dt, name) right = get_expected(ser, name) From 423496a7acff5381ed7a7252fbc73008887dbf9f Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 1 May 2023 12:35:57 -0700 Subject: [PATCH 20/27] POC: infer time objects to ArrowDtype[time] --- pandas/_libs/lib.pyx | 1 - pandas/tests/io/test_sql.py | 7 +++---- pandas/tests/series/accessors/test_dt_accessor.py | 1 + 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 4a05a4ddef420..aa42138b77b32 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2660,7 +2660,6 @@ def maybe_convert_objects(ndarray[object] objects, elif seen.time_: if is_time_array(objects): - # FIXME: need to ensure this is not timetz opt = get_option("future.infer_time") if opt is True: import pyarrow as pa diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 6f4e0da1b6764..858b8346cce3f 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -3112,16 +3112,15 @@ def test_datetime_date(self): def test_datetime_time(self, tz_aware): # test support for datetime.time, GH #8341 + warn_msg = "Pandas type inference with a sequence of `datetime.time`" if not tz_aware: tz_times = [time(9, 0, 0), time(9, 1, 30)] + warn = FutureWarning else: tz_dt = date_range("2013-01-01 09:00:00", periods=2, tz="US/Pacific") tz_times = Series(tz_dt.to_pydatetime()).map(lambda dt: dt.timetz()) + warn = None - warn_msg = "Pandas type inference with a sequence of `datetime.time`" - warn = None - if not tz_aware: - warn = FutureWarning with tm.assert_produces_warning(warn, match=warn_msg): df = DataFrame(tz_times, columns=["a"]) diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index b24b362e11d06..483fbf5e359f3 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -101,6 +101,7 @@ def get_expected(ser, prop): left = getattr(ser.dt, name) right = get_expected(ser, name) + if not (is_list_like(left) and is_list_like(right)): assert left == right elif isinstance(left, DataFrame): From ce1bc00a0b936c8ddf5ad07dae58268e0f6778a0 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 4 May 2023 09:05:52 -0700 Subject: [PATCH 21/27] Handle construction from scalar --- pandas/_libs/lib.pyx | 1 + pandas/core/dtypes/cast.py | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index aa42138b77b32..4a05a4ddef420 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2660,6 +2660,7 @@ def maybe_convert_objects(ndarray[object] objects, elif seen.time_: if is_time_array(objects): + # FIXME: need to ensure this is not timetz opt = get_option("future.infer_time") if opt is True: import pyarrow as pa diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 7fbfcc9d47570..933a7c7431fd0 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -843,6 +843,29 @@ def infer_dtype_from_scalar(val) -> tuple[DtypeObj, Any]: pa_dtype = pa.time64("us") dtype = ArrowDtype(pa_dtype) + elif isinstance(val, dt.time): + if val.tzinfo is None: + # pyarrow doesn't have a dtype for timetz. + opt = get_option("future.infer_time") + if opt is None: + warnings.warn( + "Pandas type inference with a `datetime.time` " + "object is deprecated. In a future version, this will give " + "time32[pyarrow] dtype, which will require pyarrow to be " + "installed. To opt in to the new behavior immediately set " + "`pd.set_option('future.infer_time', True)`. To keep the " + "old behavior pass `dtype=object`.", + FutureWarning, + stacklevel=find_stack_level(), + ) + elif opt is True: + import pyarrow as pa + + pa_dtype = pa.time64("us") + from pandas.core.arrays.arrow import ArrowDtype + + dtype = ArrowDtype(pa_dtype) + elif is_bool(val): dtype = np.dtype(np.bool_) From ae011015945b7541f2f9aaece8eb1eda76c537eb Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 1 May 2023 12:35:57 -0700 Subject: [PATCH 22/27] POC: infer time objects to ArrowDtype[time] --- pandas/_libs/lib.pyx | 1 - pandas/core/construction.py | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 4a05a4ddef420..aa42138b77b32 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2660,7 +2660,6 @@ def maybe_convert_objects(ndarray[object] objects, elif seen.time_: if is_time_array(objects): - # FIXME: need to ensure this is not timetz opt = get_option("future.infer_time") if opt is True: import pyarrow as pa diff --git a/pandas/core/construction.py b/pandas/core/construction.py index b4101d23e6153..4ed48bc231c77 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -300,6 +300,7 @@ def array( PeriodArray, TimedeltaArray, ) + from pandas.core.arrays.arrow import ArrowDtype from pandas.core.arrays.string_ import StringDtype if lib.is_scalar(data): From 208789c604807ea9085c6bb25caf8b855f667978 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 4 May 2023 09:05:52 -0700 Subject: [PATCH 23/27] Handle construction from scalar --- pandas/_libs/lib.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index aa42138b77b32..4a05a4ddef420 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2660,6 +2660,7 @@ def maybe_convert_objects(ndarray[object] objects, elif seen.time_: if is_time_array(objects): + # FIXME: need to ensure this is not timetz opt = get_option("future.infer_time") if opt is True: import pyarrow as pa From f6d055d39cd3798830a3c737305879f6d17e53e3 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 10 May 2023 07:11:37 -0700 Subject: [PATCH 24/27] ENH/DEPR: infer date objects to date[pyarrow] dtype --- pandas/_libs/lib.pyx | 38 ++++++++++++- pandas/core/config_init.py | 9 ++++ pandas/core/construction.py | 24 +++++++++ .../arrays/categorical/test_constructors.py | 7 ++- pandas/tests/dtypes/test_inference.py | 2 +- pandas/tests/extension/test_arrow.py | 21 ++++++-- pandas/tests/frame/methods/test_asfreq.py | 7 ++- pandas/tests/frame/methods/test_join.py | 18 +++++-- pandas/tests/frame/methods/test_reindex.py | 4 +- pandas/tests/frame/test_constructors.py | 14 ++++- pandas/tests/groupby/aggregate/test_other.py | 48 +++++++++-------- pandas/tests/groupby/test_apply.py | 52 +++++++++++------- pandas/tests/groupby/test_min_max.py | 7 ++- pandas/tests/groupby/test_nunique.py | 7 ++- .../tests/indexes/categorical/test_astype.py | 7 ++- .../tests/indexes/datetimes/test_indexing.py | 12 +++-- .../tests/indexes/multi/test_constructors.py | 18 ++++--- pandas/tests/io/excel/test_writers.py | 2 +- pandas/tests/io/json/test_pandas.py | 8 ++- .../io/parser/dtypes/test_categorical.py | 9 +++- pandas/tests/io/parser/test_parse_dates.py | 10 ++-- pandas/tests/io/pytables/test_round_trip.py | 1 + pandas/tests/io/pytables/test_timezones.py | 9 ++-- pandas/tests/io/test_orc.py | 8 ++- pandas/tests/io/test_sql.py | 13 +++-- pandas/tests/plotting/frame/test_frame.py | 5 +- pandas/tests/reshape/concat/test_datetimes.py | 39 ++++++++------ pandas/tests/reshape/merge/test_merge.py | 53 +++++++++++-------- pandas/tests/reshape/test_pivot.py | 10 ++-- .../series/accessors/test_dt_accessor.py | 13 ++++- pandas/tests/series/test_arithmetic.py | 13 +++-- pandas/tests/series/test_constructors.py | 7 ++- pandas/tests/strings/test_api.py | 6 +++ pandas/tests/test_multilevel.py | 10 +++- 34 files changed, 376 insertions(+), 135 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 4a05a4ddef420..c66322aa99fd1 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1271,6 +1271,7 @@ cdef class Seen: bint period_ # seen_period bint interval_ # seen_interval bint time_ + bint date_ def __cinit__(self, bint coerce_numeric=False): """ @@ -1298,6 +1299,7 @@ cdef class Seen: self.period_ = False self.interval_ = False self.time_ = False + self.date_ = False self.coerce_numeric = coerce_numeric cdef bint check_uint64_conflict(self) except -1: @@ -2565,6 +2567,11 @@ def maybe_convert_objects(ndarray[object] objects, else: seen.object_ = True break + elif PyDate_Check(val): + if convert_non_numeric: + seen.date_ = True + else: + seen.object_ = True elif is_period_object(val): if convert_non_numeric: seen.period_ = True @@ -2688,7 +2695,36 @@ def maybe_convert_objects(ndarray[object] objects, seen.object_ = True - if seen.nat_: + elif seen.date_: + if is_date_array(objects, skipna=True): + opt = get_option("future.infer_date") + if opt is True: + import pyarrow as pa + + from pandas.core.dtypes.dtypes import ArrowDtype + + obj = pa.array(objects) + dtype = ArrowDtype(obj.type) + return dtype.construct_array_type()(obj) + elif opt is False: + # explicitly set to keep the old behavior and avoid the warning + pass + else: + from pandas.util._exceptions import find_stack_level + warnings.warn( + "Pandas type inference with a sequence of `datetime.date` " + "objects is deprecated. In a future version, this will give " + "date32[pyarrow] dtype, which will require pyarrow to be " + "installed. To opt in to the new behavior immediately set " + "`pd.set_option('future.infer_time', True)`. To keep the " + "old behavior pass `dtype=object`.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + seen.object_ = True + + elif seen.nat_: if not seen.object_ and not seen.numeric_ and not seen.bool_: # all NaT, None, or nan (at least one NaT) # see GH#49340 for discussion of desired behavior diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 5f7d448a1092a..828712b5290d6 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -900,3 +900,12 @@ def register_converter_cb(key) -> None: "(at which point this option will be deprecated).", validator=is_one_of_factory([True, False, None]), ) + + cf.register_option( + "future.infer_date", + None, + "Whether to infer sequence of datetime.date objects as pyarrow date " + "dtype, which will be the default in pandas 3.0 " + "(at which point this option will be deprecated).", + validator=is_one_of_factory([True, False, None]), + ) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 4ed48bc231c77..9854377dc1796 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -392,6 +392,30 @@ def array( stacklevel=find_stack_level(), ) + elif inferred_dtype == "date": + opt = get_option("future.infer_date") + + if opt is True: + import pyarrow as pa + + obj = pa.array(data) + dtype = ArrowDtype(obj.type) + return dtype.construct_array_type()(obj) + elif opt is False: + # explicitly set to keep the old behavior and avoid the warning + pass + else: + warnings.warn( + "Pandas type inference with a sequence of `datetime.date` " + "objects is deprecated. In a future version, this will give " + "date32[pyarrow] dtype, which will require pyarrow to be " + "installed. To opt in to the new behavior immediately set " + "`pd.set_option('future.infer_time', True)`. To keep the " + "old behavior pass `dtype=object`.", + FutureWarning, + stacklevel=find_stack_level(), + ) + # Pandas overrides NumPy for # 1. datetime64[ns,us,ms,s] # 2. timedelta64[ns,us,ms,s] diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index 5eb7f37a4ae34..7fad0a31c0de7 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -369,7 +369,12 @@ def test_constructor_date_objects(self): # we dont cast date objects to timestamps, matching Index constructor v = date.today() - cat = Categorical([v, v]) + msg = ( + "Pandas type inference with a sequence of `datetime.date` " + "objects is deprecated" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + cat = Categorical([v, v]) assert cat.categories.dtype == object assert type(cat.categories[0]) is date diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index f6dc73c81ee48..eeebf8cae0fa4 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -1564,7 +1564,7 @@ def test_other_dtypes_for_array(self, func): def test_date(self): dates = [date(2012, 1, day) for day in range(1, 20)] - index = Index(dates) + index = Index(dates, dtype=object) assert index.inferred_type == "date" dates = [date(2012, 1, day) for day in range(1, 20)] + [np.nan] diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 61231974a878f..633676ea4fd3e 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -734,6 +734,13 @@ def test_stack(self, data, columns): # FIXME: need to avoid doing inference when calling frame._constructor # in _stack_multi_columns warn = FutureWarning + if pa.types.is_date(pa_dtype): + # FIXME: need to avoid doing inference when calling frame._constructor + # in _stack_multi_columns + warn = FutureWarning + warn_msg = ( + "Pandas type inference with a sequence of `datetime.date` objects" + ) with tm.assert_produces_warning(warn, match=warn_msg, check_stacklevel=False): super().test_stack(data, columns) @@ -802,9 +809,9 @@ def test_invert(self, data, request): class TestBaseMethods(base.BaseMethodsTests): def test_hash_pandas_object_works(self, data, as_frame): pa_dtype = data.dtype.pyarrow_dtype - warn_msg = "Pandas type inference with a sequence of `datetime.time`" + warn_msg = "Pandas type inference with a sequence of `datetime.(time|date)`" warn = None - if pa.types.is_time(pa_dtype): + if pa.types.is_time(pa_dtype) or pa.types.is_date(pa_dtype): # TODO(#48964) This warning will be avoided by implementing # ArrowExtensionArray.hash_pandas_object warn = FutureWarning @@ -1693,7 +1700,15 @@ def test_pickle_roundtrip(data): def test_astype_from_non_pyarrow(data): # GH49795 - pd_array = data._pa_array.to_pandas().array + msg = ( + "Pandas type inference with a sequence of `datetime.date` objects is deprecated" + ) + warn = None + if pa.types.is_date(data.dtype.pyarrow_dtype): + warn = FutureWarning + + with tm.assert_produces_warning(warn, match=msg): + pd_array = data._pa_array.to_pandas().array result = pd_array.astype(data.dtype) assert not isinstance(pd_array.dtype, ArrowDtype) assert isinstance(result.dtype, ArrowDtype) diff --git a/pandas/tests/frame/methods/test_asfreq.py b/pandas/tests/frame/methods/test_asfreq.py index 47cebd31451e3..bbba8d77334e2 100644 --- a/pandas/tests/frame/methods/test_asfreq.py +++ b/pandas/tests/frame/methods/test_asfreq.py @@ -186,7 +186,12 @@ def test_asfreq_with_date_object_index(self, frame_or_series): ts = frame_or_series(np.random.randn(20), index=rng) ts2 = ts.copy() - ts2.index = [x.date() for x in ts2.index] + msg = ( + "Pandas type inference with a sequence of `datetime.date` " + "objects is deprecated" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + ts2.index = [x.date() for x in ts2.index] result = ts2.asfreq("4H", method="ffill") expected = ts.asfreq("4H", method="ffill") diff --git a/pandas/tests/frame/methods/test_join.py b/pandas/tests/frame/methods/test_join.py index 98f3926968ad0..de59f0e912cfd 100644 --- a/pandas/tests/frame/methods/test_join.py +++ b/pandas/tests/frame/methods/test_join.py @@ -510,16 +510,26 @@ def test_join_multiindex_dates(self): # GH 33692 date = pd.Timestamp(2000, 1, 1).date() - df1_index = MultiIndex.from_tuples([(0, date)], names=["index_0", "date"]) + msg = ( + "Pandas type inference with a sequence of `datetime.date` " + "objects is deprecated" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + df1_index = MultiIndex.from_tuples([(0, date)], names=["index_0", "date"]) df1 = DataFrame({"col1": [0]}, index=df1_index) - df2_index = MultiIndex.from_tuples([(0, date)], names=["index_0", "date"]) + with tm.assert_produces_warning(FutureWarning, match=msg): + df2_index = MultiIndex.from_tuples([(0, date)], names=["index_0", "date"]) df2 = DataFrame({"col2": [0]}, index=df2_index) - df3_index = MultiIndex.from_tuples([(0, date)], names=["index_0", "date"]) + with tm.assert_produces_warning(FutureWarning, match=msg): + df3_index = MultiIndex.from_tuples([(0, date)], names=["index_0", "date"]) df3 = DataFrame({"col3": [0]}, index=df3_index) result = df1.join([df2, df3]) - expected_index = MultiIndex.from_tuples([(0, date)], names=["index_0", "date"]) + with tm.assert_produces_warning(FutureWarning, match=msg): + expected_index = MultiIndex.from_tuples( + [(0, date)], names=["index_0", "date"] + ) expected = DataFrame( {"col1": [0], "col2": [0], "col3": [0]}, index=expected_index ) diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index 63e2eb790a4ea..ee8ac468dbfed 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -200,7 +200,9 @@ def test_reindex_date_fill_value(self): ts = df.iloc[0, 0] fv = ts.date() - res = df.reindex(index=range(4), columns=["A", "B", "C"], fill_value=fv) + msg = "type inference with a sequence of `datetime.date` objects is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = df.reindex(index=range(4), columns=["A", "B", "C"], fill_value=fv) expected = DataFrame( {"A": df["A"].tolist() + [fv], "B": df["B"].tolist() + [fv], "C": [fv] * 4}, diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index a424a672ed31a..9f714f3ff16a7 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1895,7 +1895,12 @@ def test_constructor_with_datetimes2(self): datetimes = [ts.to_pydatetime() for ts in ind] dates = [ts.date() for ts in ind] df = DataFrame(datetimes, columns=["datetimes"]) - df["dates"] = dates + msg = ( + "Pandas type inference with a sequence of `datetime.date` " + "objects is deprecated" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + df["dates"] = dates result = df.dtypes expected = Series( [np.dtype("datetime64[ns]"), np.dtype("object")], @@ -2361,7 +2366,12 @@ def test_datetime_date_tuple_columns_from_dict(self): # GH 10863 v = date.today() tup = v, v - result = DataFrame({tup: Series(range(3), index=range(3))}, columns=[tup]) + msg = ( + "Pandas type inference with a sequence of `datetime.date` " + "objects is deprecated" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = DataFrame({tup: Series(range(3), index=range(3))}, columns=[tup]) expected = DataFrame([0, 1, 2], columns=Index(Series([tup]))) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index aad1218190a84..64848c350f65f 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -68,19 +68,22 @@ def test_agg_datetimes_mixed(): for row in data ] - df2 = DataFrame( - { - "key": [x[0] for x in data], - "date": [x[1] for x in data], - "value": [x[2] for x in data], - } - ) + msg = "Pandas type inference with a sequence of `datetime.date` objects" + with tm.assert_produces_warning(FutureWarning, match=msg): + df2 = DataFrame( + { + "key": [x[0] for x in data], + "date": [x[1] for x in data], + "value": [x[2] for x in data], + } + ) df1["weights"] = df1["value"] / df1["value"].sum() gb1 = df1.groupby("date").aggregate(np.sum) df2["weights"] = df1["value"] / df1["value"].sum() - gb2 = df2.groupby("date").aggregate(np.sum) + with tm.assert_produces_warning(FutureWarning, match=msg): + gb2 = df2.groupby("date").aggregate(np.sum) assert len(gb1) == len(gb2) @@ -367,22 +370,25 @@ def test_agg_consistency(): def P1(a): return np.percentile(a.dropna(), q=1) - df = DataFrame( - { - "col1": [1, 2, 3, 4], - "col2": [10, 25, 26, 31], - "date": [ - dt.date(2013, 2, 10), - dt.date(2013, 2, 10), - dt.date(2013, 2, 11), - dt.date(2013, 2, 11), - ], - } - ) + msg = "Pandas type inference with a sequence of `datetime.date` objects" + with tm.assert_produces_warning(FutureWarning, match=msg): + df = DataFrame( + { + "col1": [1, 2, 3, 4], + "col2": [10, 25, 26, 31], + "date": [ + dt.date(2013, 2, 10), + dt.date(2013, 2, 10), + dt.date(2013, 2, 11), + dt.date(2013, 2, 11), + ], + } + ) g = df.groupby("date") - expected = g.agg([P1]) + with tm.assert_produces_warning(FutureWarning, match=msg): + expected = g.agg([P1]) expected.columns = expected.columns.levels[0] result = g.agg(P1) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 0797e70acb4a9..ec137666c6e14 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -61,8 +61,11 @@ def test_apply_issues(): ) df = df.set_index("date_time") - expected = df.groupby(df.index.date).idxmax() - result = df.groupby(df.index.date).apply(lambda x: x.idxmax()) + msg = "Pandas type inference with a sequence of `datetime.date` objects" + gb = df.groupby(df.index.date) + with tm.assert_produces_warning(FutureWarning, match=msg): + expected = gb.idxmax() + result = gb.apply(lambda x: x.idxmax()) tm.assert_frame_equal(result, expected) # GH 5789 @@ -844,6 +847,9 @@ def test_apply_datetime_issue(group_column_dtlike): ) if isinstance(group_column_dtlike, time): warn = FutureWarning + elif type(group_column_dtlike) is date: + warn = FutureWarning + warn_msg = warn_msg.replace("datetime.time", "datetime.date") with tm.assert_produces_warning(warn, match=warn_msg): df = DataFrame({"a": ["foo"], "b": [group_column_dtlike]}) @@ -1098,27 +1104,31 @@ def test_apply_is_unchanged_when_other_methods_are_called_first(reduction_func): def test_apply_with_date_in_multiindex_does_not_convert_to_timestamp(): # GH 29617 + msg = "Pandas type inference with a sequence of `datetime.date` objects" - df = DataFrame( - { - "A": ["a", "a", "a", "b"], - "B": [ - date(2020, 1, 10), - date(2020, 1, 10), - date(2020, 2, 10), - date(2020, 2, 10), - ], - "C": [1, 2, 3, 4], - }, - index=Index([100, 101, 102, 103], name="idx"), - ) + with tm.assert_produces_warning(FutureWarning, match=msg): + df = DataFrame( + { + "A": ["a", "a", "a", "b"], + "B": [ + date(2020, 1, 10), + date(2020, 1, 10), + date(2020, 2, 10), + date(2020, 2, 10), + ], + "C": [1, 2, 3, 4], + }, + index=Index([100, 101, 102, 103], name="idx"), + ) grp = df.groupby(["A", "B"]) - result = grp.apply(lambda x: x.head(1)) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = grp.apply(lambda x: x.head(1)) expected = df.iloc[[0, 2, 3]] expected = expected.reset_index() - expected.index = MultiIndex.from_frame(expected[["A", "B", "idx"]]) + with tm.assert_produces_warning(FutureWarning, match=msg): + expected.index = MultiIndex.from_frame(expected[["A", "B", "idx"]]) expected = expected.drop(columns="idx") tm.assert_frame_equal(result, expected) @@ -1217,9 +1227,11 @@ def test_positional_slice_groups_datetimelike(): "let": list("abcde"), } ) - result = expected.groupby( - [expected.let, expected.date.dt.date], group_keys=False - ).apply(lambda x: x.iloc[0:]) + msg = "Pandas type inference with a sequence of `datetime.date` objects" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = expected.groupby( + [expected.let, expected.date.dt.date], group_keys=False + ).apply(lambda x: x.iloc[0:]) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_min_max.py b/pandas/tests/groupby/test_min_max.py index 37eb52be0b37b..d447a22dce64d 100644 --- a/pandas/tests/groupby/test_min_max.py +++ b/pandas/tests/groupby/test_min_max.py @@ -67,13 +67,16 @@ def test_min_date_with_nans(): ).dt.date df = DataFrame({"a": [np.nan, "1", np.nan], "b": [0, 1, 1], "c": dates}) - result = df.groupby("b", as_index=False)["c"].min()["c"] + msg = "Pandas type inference with a sequence of `datetime.date` objects" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.groupby("b", as_index=False)["c"].min()["c"] expected = pd.to_datetime( Series(["2019-05-09", "2019-05-09"], name="c"), format="%Y-%m-%d" ).dt.date tm.assert_series_equal(result, expected) - result = df.groupby("b")["c"].min() + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.groupby("b")["c"].min() expected.index.name = "b" tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_nunique.py b/pandas/tests/groupby/test_nunique.py index f4ebd54a7a1a9..e9d1d0cac3489 100644 --- a/pandas/tests/groupby/test_nunique.py +++ b/pandas/tests/groupby/test_nunique.py @@ -156,7 +156,12 @@ def test_nunique_with_timegrouper(): ) def test_nunique_with_NaT(key, data, dropna, expected): # GH 27951 - df = DataFrame({"key": key, "data": data}) + msg = "Pandas type inference with a sequence of `datetime.date` objects" + warn = None + if type(data[0]) is dt.date: + warn = FutureWarning + with tm.assert_produces_warning(warn, match=msg): + df = DataFrame({"key": key, "data": data}) result = df.groupby(["key"])["data"].nunique(dropna=dropna) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexes/categorical/test_astype.py b/pandas/tests/indexes/categorical/test_astype.py index da1d692f9eb2d..cf307408102e7 100644 --- a/pandas/tests/indexes/categorical/test_astype.py +++ b/pandas/tests/indexes/categorical/test_astype.py @@ -78,12 +78,15 @@ def test_categorical_date_roundtrip(self, box): # astype to categorical and back should preserve date objects v = date.today() - obj = Index([v, v]) + msg = "Pandas type inference with a sequence of `datetime.date` objects" + with tm.assert_produces_warning(FutureWarning, match=msg): + obj = Index([v, v]) assert obj.dtype == object if box: obj = obj.array - cat = obj.astype("category") + with tm.assert_produces_warning(FutureWarning, match=msg): + cat = obj.astype("category") rtrip = cat.astype(object) assert rtrip.dtype == object diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index ecdea9ea25c9d..84afa84299447 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -513,7 +513,9 @@ class TestGetIndexer: def test_get_indexer_date_objs(self): rng = date_range("1/1/2000", periods=20) - result = rng.get_indexer(rng.map(lambda x: x.date())) + msg = "Pandas type inference with a sequence of `datetime.date` objects" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = rng.get_indexer(rng.map(lambda x: x.date())) expected = rng.get_indexer(rng) tm.assert_numpy_array_equal(result, expected) @@ -568,7 +570,9 @@ def test_get_indexer(self): def test_get_indexer_mixed_dtypes(self, target): # https://github.com/pandas-dev/pandas/issues/33741 values = DatetimeIndex([Timestamp("2020-01-01"), Timestamp("2020-01-02")]) - result = values.get_indexer(target) + msg = "Pandas type inference with a sequence of `datetime.date` objects" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = values.get_indexer(target) expected = np.array([0, 1], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) @@ -583,7 +587,9 @@ def test_get_indexer_mixed_dtypes(self, target): def test_get_indexer_out_of_bounds_date(self, target, positions): values = DatetimeIndex([Timestamp("2020-01-01"), Timestamp("2020-01-02")]) - result = values.get_indexer(target) + msg = "Pandas type inference with a sequence of `datetime.date` objects" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = values.get_indexer(target) expected = np.array(positions, dtype=np.intp) tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py index cabc2bfd61db6..9843bf3f1cc52 100644 --- a/pandas/tests/indexes/multi/test_constructors.py +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -801,7 +801,9 @@ def test_datetimeindex(): # but NOT date objects, matching Index behavior date4 = date.today() - index = MultiIndex.from_product([[date4], [date2]]) + msg = "Pandas type inference with a sequence of `datetime.date` objects" + with tm.assert_produces_warning(FutureWarning, match=msg): + index = MultiIndex.from_product([[date4], [date2]]) assert not isinstance(index.levels[0], pd.DatetimeIndex) assert isinstance(index.levels[1], pd.DatetimeIndex) @@ -829,23 +831,27 @@ def test_constructor_with_tz(): def test_multiindex_inference_consistency(): # check that inference behavior matches the base class - + msg = "Pandas type inference with a sequence of `datetime.date` objects" v = date.today() arr = [v, v] - idx = Index(arr) + with tm.assert_produces_warning(FutureWarning, match=msg): + idx = Index(arr) assert idx.dtype == object - mi = MultiIndex.from_arrays([arr]) + with tm.assert_produces_warning(FutureWarning, match=msg): + mi = MultiIndex.from_arrays([arr]) lev = mi.levels[0] assert lev.dtype == object - mi = MultiIndex.from_product([arr]) + with tm.assert_produces_warning(FutureWarning, match=msg): + mi = MultiIndex.from_product([arr]) lev = mi.levels[0] assert lev.dtype == object - mi = MultiIndex.from_tuples([(x,) for x in arr]) + with tm.assert_produces_warning(FutureWarning, match=msg): + mi = MultiIndex.from_tuples([(x,) for x in arr]) lev = mi.levels[0] assert lev.dtype == object diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 0560e12a00bf5..15c78aa985e7f 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -633,7 +633,7 @@ def test_excel_roundtrip_datetime(self, merge_cells, tsframe, path): tsf = tsframe.copy() - tsf.index = [x.date() for x in tsframe.index] + tsf.index = Index([x.date() for x in tsframe.index], dtype=object) tsf.to_excel(path, "test1", merge_cells=merge_cells) with ExcelFile(path) as reader: diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index ea996e82ae3a6..0558c301b998f 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -836,7 +836,13 @@ def test_date_index_and_values(self, date_format, as_object, date_typ): if as_object: data.append("a") - ser = Series(data, index=data) + msg = "Pandas type inference with a sequence of `datetime.date` objects" + warn = None + if date_typ is datetime.date and not as_object: + warn = FutureWarning + + with tm.assert_produces_warning(warn, match=msg): + ser = Series(data, index=data) result = ser.to_json(date_format=date_format) if date_format == "epoch": diff --git a/pandas/tests/io/parser/dtypes/test_categorical.py b/pandas/tests/io/parser/dtypes/test_categorical.py index 33422d41c2f93..870332e316210 100644 --- a/pandas/tests/io/parser/dtypes/test_categorical.py +++ b/pandas/tests/io/parser/dtypes/test_categorical.py @@ -264,7 +264,14 @@ def test_categorical_coerces_timestamp(all_parsers): data = "b\n2014-01-01\n2014-01-01" expected = DataFrame({"b": Categorical([Timestamp("2014")] * 2)}) - result = parser.read_csv(StringIO(data), dtype=dtype) + msg = ( + "Pandas type inference with a sequence of `datetime.date` objects is deprecated" + ) + warn = None + if parser.engine == "pyarrow": + warn = FutureWarning + with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False): + result = parser.read_csv(StringIO(data), dtype=dtype) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index ff1c209cec852..d4ded75e201c8 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -1621,10 +1621,12 @@ def parse_function(yy, mm): parse_dates={"ym": [0, 1]}, date_parser=parse_function, ) - expected = DataFrame( - [[date(2001, 1, 1), 10, 10.0], [date(2001, 2, 1), 1, 11.0]], - columns=["ym", "day", "a"], - ) + msg = "Pandas type inference with a sequence of `datetime.date` objects" + with tm.assert_produces_warning(FutureWarning, match=msg): + expected = DataFrame( + [[date(2001, 1, 1), 10, 10.0], [date(2001, 2, 1), 1, 11.0]], + columns=["ym", "day", "a"], + ) expected["ym"] = expected["ym"].astype("datetime64[ns]") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/pytables/test_round_trip.py b/pandas/tests/io/pytables/test_round_trip.py index 42f020a8f3708..f1ddb34fcd582 100644 --- a/pandas/tests/io/pytables/test_round_trip.py +++ b/pandas/tests/io/pytables/test_round_trip.py @@ -407,6 +407,7 @@ def test_empty_series(dtype, setup_path): def test_can_serialize_dates(setup_path): rng = [x.date() for x in bdate_range("1/1/2000", "1/30/2000")] + rng = Index(rng, dtype=object) frame = DataFrame(np.random.randn(len(rng), 4), index=rng) _check_roundtrip(frame, tm.assert_frame_equal, path=setup_path) diff --git a/pandas/tests/io/pytables/test_timezones.py b/pandas/tests/io/pytables/test_timezones.py index e6c0c918a73cc..55f58c7028fbd 100644 --- a/pandas/tests/io/pytables/test_timezones.py +++ b/pandas/tests/io/pytables/test_timezones.py @@ -277,10 +277,12 @@ def test_store_timezone(setup_path): # issue storing datetime.date with a timezone as it resets when read # back in a new timezone + today = date(2013, 9, 10) + idx = pd.Index([today, today, today], dtype=object) + # original method with ensure_clean_store(setup_path) as store: - today = date(2013, 9, 10) - df = DataFrame([1, 2, 3], index=[today, today, today]) + df = DataFrame([1, 2, 3], index=idx) store["obj1"] = df result = store["obj1"] tm.assert_frame_equal(result, df) @@ -288,8 +290,7 @@ def test_store_timezone(setup_path): # with tz setting with ensure_clean_store(setup_path) as store: with tm.set_timezone("EST5EDT"): - today = date(2013, 9, 10) - df = DataFrame([1, 2, 3], index=[today, today, today]) + df = DataFrame([1, 2, 3], index=idx) store["obj1"] = df with tm.set_timezone("CST6CDT"): diff --git a/pandas/tests/io/test_orc.py b/pandas/tests/io/test_orc.py index 571d9d5536e20..cad83fc914a03 100644 --- a/pandas/tests/io/test_orc.py +++ b/pandas/tests/io/test_orc.py @@ -153,7 +153,9 @@ def test_orc_reader_date_low(dirpath): dtype="object", ), } - expected = pd.DataFrame.from_dict(data) + msg = "Pandas type inference with a sequence of `datetime.date` objects" + with tm.assert_produces_warning(FutureWarning, match=msg): + expected = pd.DataFrame.from_dict(data) inputfile = os.path.join(dirpath, "TestOrcFile.testDate1900.orc") got = read_orc(inputfile).iloc[:10] @@ -194,7 +196,9 @@ def test_orc_reader_date_high(dirpath): dtype="object", ), } - expected = pd.DataFrame.from_dict(data) + msg = "Pandas type inference with a sequence of `datetime.date` objects" + with tm.assert_produces_warning(FutureWarning, match=msg): + expected = pd.DataFrame.from_dict(data) inputfile = os.path.join(dirpath, "TestOrcFile.testDate2038.orc") got = read_orc(inputfile).iloc[:10] diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 858b8346cce3f..116fde9b75af8 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -2239,9 +2239,12 @@ def test_datetime_NaT(self): def test_datetime_date(self): # test support for datetime.date - df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"]) + msg = "Pandas type inference with a sequence of `datetime.date` objects" + with tm.assert_produces_warning(FutureWarning, match=msg): + df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"]) assert df.to_sql("test_date", self.conn, index=False) == 2 - res = read_sql_table("test_date", self.conn) + with tm.assert_produces_warning(FutureWarning, match=msg): + res = read_sql_table("test_date", self.conn) result = res["a"] expected = to_datetime(df["a"]) # comes back as datetime64 @@ -3099,7 +3102,9 @@ def test_execute_sql(self): def test_datetime_date(self): # test support for datetime.date - df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"]) + msg = "Pandas type inference with a sequence of `datetime.date` objects" + with tm.assert_produces_warning(FutureWarning, match=msg): + df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"]) assert df.to_sql("test_date", self.conn, index=False) == 2 res = read_sql_query("SELECT * FROM test_date", self.conn) if self.flavor == "sqlite": @@ -3112,7 +3117,7 @@ def test_datetime_date(self): def test_datetime_time(self, tz_aware): # test support for datetime.time, GH #8341 - warn_msg = "Pandas type inference with a sequence of `datetime.time`" + warn_msg = "Pandas type inference with a sequence of `datetime.time` objects" if not tz_aware: tz_times = [time(9, 0, 0), time(9, 1, 30)] warn = FutureWarning diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index 44bacb21d6ebc..832f6a24623d5 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -1230,9 +1230,12 @@ def test_specified_props_kwd_plot_box(self, props, expected): assert result[expected][0].get_color() == "C1" def test_unordered_ts(self): + idx = pd.Index( + [date(2012, 10, 1), date(2012, 9, 1), date(2012, 8, 1)], dtype=object + ) df = DataFrame( np.array([3.0, 2.0, 1.0]), - index=[date(2012, 10, 1), date(2012, 9, 1), date(2012, 8, 1)], + index=idx, columns=["test"], ) ax = df.plot() diff --git a/pandas/tests/reshape/concat/test_datetimes.py b/pandas/tests/reshape/concat/test_datetimes.py index 9ec0071ba9afa..d5ff454fe9645 100644 --- a/pandas/tests/reshape/concat/test_datetimes.py +++ b/pandas/tests/reshape/concat/test_datetimes.py @@ -131,34 +131,43 @@ def test_concat_multiindex_datetime_object_index(self): dtype="object", ) - s = Series( - ["a", "b"], - index=MultiIndex.from_arrays( + msg = "Pandas type inference with a sequence of `datetime.date` objects" + with tm.assert_produces_warning(FutureWarning, match=msg): + # TODO: should this be not-inferring since we already specified + # object dtype? + mi = MultiIndex.from_arrays( [ [1, 2], idx[:-1], ], names=["first", "second"], - ), - ) - s2 = Series( + ) + s = Series( ["a", "b"], - index=MultiIndex.from_arrays( + index=mi, + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + mi2 = MultiIndex.from_arrays( [[1, 2], idx[::2]], names=["first", "second"], - ), - ) - mi = MultiIndex.from_arrays( - [[1, 2, 2], idx], - names=["first", "second"], + ) + s2 = Series( + ["a", "b"], + index=mi2, ) - assert mi.levels[1].dtype == object + with tm.assert_produces_warning(FutureWarning, match=msg): + mi3 = MultiIndex.from_arrays( + [[1, 2, 2], idx], + names=["first", "second"], + ) + assert mi3.levels[1].dtype == object expected = DataFrame( [["a", "a"], ["b", np.nan], [np.nan, "b"]], - index=mi, + index=mi3, ) - result = concat([s, s2], axis=1) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = concat([s, s2], axis=1) tm.assert_frame_equal(result, expected) def test_concat_NaT_series(self): diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 896f1a9be52be..e27a798554467 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2036,31 +2036,42 @@ def test_dtype_on_categorical_dates(self): # GH 16900 # dates should not be coerced to ints - df = DataFrame( - [[date(2001, 1, 1), 1.1], [date(2001, 1, 2), 1.3]], columns=["date", "num2"] - ) - df["date"] = df["date"].astype("category") - - df2 = DataFrame( - [[date(2001, 1, 1), 1.3], [date(2001, 1, 3), 1.4]], columns=["date", "num4"] + msg = ( + "Pandas type inference with a sequence of `datetime.date` " + "objects is deprecated" ) - df2["date"] = df2["date"].astype("category") + with tm.assert_produces_warning(FutureWarning, match=msg): + df = DataFrame( + [[date(2001, 1, 1), 1.1], [date(2001, 1, 2), 1.3]], + columns=["date", "num2"], + ) + df["date"] = df["date"].astype("category") - expected_outer = DataFrame( - [ - [pd.Timestamp("2001-01-01").date(), 1.1, 1.3], - [pd.Timestamp("2001-01-02").date(), 1.3, np.nan], - [pd.Timestamp("2001-01-03").date(), np.nan, 1.4], - ], - columns=["date", "num2", "num4"], - ) - result_outer = merge(df, df2, how="outer", on=["date"]) + with tm.assert_produces_warning(FutureWarning, match=msg): + df2 = DataFrame( + [[date(2001, 1, 1), 1.3], [date(2001, 1, 3), 1.4]], + columns=["date", "num4"], + ) + df2["date"] = df2["date"].astype("category") + + with tm.assert_produces_warning(FutureWarning, match=msg): + expected_outer = DataFrame( + [ + [pd.Timestamp("2001-01-01").date(), 1.1, 1.3], + [pd.Timestamp("2001-01-02").date(), 1.3, np.nan], + [pd.Timestamp("2001-01-03").date(), np.nan, 1.4], + ], + columns=["date", "num2", "num4"], + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result_outer = merge(df, df2, how="outer", on=["date"]) tm.assert_frame_equal(result_outer, expected_outer) - expected_inner = DataFrame( - [[pd.Timestamp("2001-01-01").date(), 1.1, 1.3]], - columns=["date", "num2", "num4"], - ) + with tm.assert_produces_warning(FutureWarning, match=msg): + expected_inner = DataFrame( + [[pd.Timestamp("2001-01-01").date(), 1.1, 1.3]], + columns=["date", "num2", "num4"], + ) result_inner = merge(df, df2, how="inner", on=["date"]) tm.assert_frame_equal(result_inner, expected_inner) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index b6fcb27faf146..13be50ec46496 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1087,11 +1087,15 @@ def test_pivot_integer_columns(self): [1.0], ) ) - df = DataFrame(data) - table = df.pivot_table(values=4, index=[0, 1, 3], columns=[2]) + msg = "type inference with a sequence of `datetime.date` objects is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + df = DataFrame(data) + with tm.assert_produces_warning(FutureWarning, match=msg): + table = df.pivot_table(values=4, index=[0, 1, 3], columns=[2]) df2 = df.rename(columns=str) - table2 = df2.pivot_table(values="4", index=["0", "1", "3"], columns=["2"]) + with tm.assert_produces_warning(FutureWarning, match=msg): + table2 = df2.pivot_table(values="4", index=["0", "1", "3"], columns=["2"]) tm.assert_frame_equal(table, table2, check_names=False) diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index 483fbf5e359f3..aa017fa1703d5 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -744,9 +744,18 @@ def test_date_tz(self): tz="US/Eastern", ) ser = Series(rng) - expected = Series([date(2014, 4, 4), date(2014, 7, 18), date(2015, 11, 22)]) + + warn_msg = ( + "Pandas type inference with a sequence of `datetime.date` objects " + "is deprecated" + ) + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + expected = Series([date(2014, 4, 4), date(2014, 7, 18), date(2015, 11, 22)]) tm.assert_series_equal(ser.dt.date, expected) - tm.assert_series_equal(ser.apply(lambda x: x.date()), expected) + + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + res = ser.apply(lambda x: x.date()) + tm.assert_series_equal(res, expected) def test_dt_timetz_accessor(self, tz_naive_fixture): # GH21358 diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index a0edfae606e3f..8a63b9c4906f6 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -219,9 +219,8 @@ def test_add_with_duplicate_index(self): tm.assert_series_equal(result, expected) def test_add_na_handling(self): - ser = Series( - [Decimal("1.3"), Decimal("2.3")], index=[date(2012, 1, 1), date(2012, 1, 2)] - ) + index = Index([date(2012, 1, 1), date(2012, 1, 2)], dtype=object) + ser = Series([Decimal("1.3"), Decimal("2.3")], index=index) result = ser + ser.shift(1) result2 = ser.shift(1) + ser @@ -761,7 +760,13 @@ def test_align_date_objects_with_datetimeindex(self): ts_slice = ts[5:] ts2 = ts_slice.copy() - ts2.index = [x.date() for x in ts2.index] + + warn_msg = ( + "Pandas type inference with a sequence of `datetime.date` objects " + "is deprecated" + ) + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + ts2.index = [x.date() for x in ts2.index] result = ts + ts2 result2 = ts2 + ts diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index ceb283ca9e9e7..6f5d9e1424e7c 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1043,7 +1043,12 @@ def test_constructor_dtype_datetime64_7(self): # leave datetime.date alone dates2 = np.array([d.date() for d in dates.to_pydatetime()], dtype=object) - series1 = Series(dates2, dates) + warn_msg = ( + "Pandas type inference with a sequence of `datetime.date` objects " + "is deprecated" + ) + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + series1 = Series(dates2, dates) tm.assert_numpy_array_equal(series1.values, dates2) assert series1.dtype == object diff --git a/pandas/tests/strings/test_api.py b/pandas/tests/strings/test_api.py index c3b77569cc751..61fa400bf9348 100644 --- a/pandas/tests/strings/test_api.py +++ b/pandas/tests/strings/test_api.py @@ -38,6 +38,12 @@ def test_api_per_dtype(index_or_series, dtype, any_skipna_inferred_dtype): warn = None if dtype == "category" and inferred_dtype == "time": warn = FutureWarning + if dtype == "category" and inferred_dtype == "date": + warn = FutureWarning + warn_msg = ( + "Pandas type inference with a sequence of `datetime.date` objects " + "is deprecated" + ) with tm.assert_produces_warning(warn, match=warn_msg): t = box(values, dtype=dtype) # explicit dtype to avoid casting diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 8c5f9a894f2f7..f920c141366c9 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -269,13 +269,18 @@ def test_subsets_multiindex_dtype(self): tm.assert_series_equal(result, expected) def test_datetime_object_multiindex(self): + msg = ( + "Pandas type inference with a sequence of `datetime.date` " + "objects is deprecated" + ) data_dic = { (0, datetime.date(2018, 3, 3)): {"A": 1, "B": 10}, (0, datetime.date(2018, 3, 4)): {"A": 2, "B": 11}, (1, datetime.date(2018, 3, 3)): {"A": 3, "B": 12}, (1, datetime.date(2018, 3, 4)): {"A": 4, "B": 13}, } - result = DataFrame.from_dict(data_dic, orient="index") + with tm.assert_produces_warning(FutureWarning, match=msg): + result = DataFrame.from_dict(data_dic, orient="index") data = {"A": [1, 2, 3, 4], "B": [10, 11, 12, 13]} index = [ [0, 0, 1, 1], @@ -286,7 +291,8 @@ def test_datetime_object_multiindex(self): datetime.date(2018, 3, 4), ], ] - expected = DataFrame(data=data, index=index) + with tm.assert_produces_warning(FutureWarning, match=msg): + expected = DataFrame(data=data, index=index) tm.assert_frame_equal(result, expected) From 5ff5333b3de8f63fbff31743c047049cf16afe92 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 10 May 2023 16:40:23 -0700 Subject: [PATCH 25/27] deprecate inference with scalar date --- pandas/core/dtypes/cast.py | 37 +++++++++----------- pandas/tests/dtypes/cast/test_infer_dtype.py | 5 ++- pandas/tests/dtypes/cast/test_promote.py | 12 ++++++- pandas/tests/indexing/test_loc.py | 5 ++- pandas/tests/io/pytables/test_errors.py | 7 ++-- 5 files changed, 41 insertions(+), 25 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 933a7c7431fd0..a102bec99b5ee 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -843,28 +843,25 @@ def infer_dtype_from_scalar(val) -> tuple[DtypeObj, Any]: pa_dtype = pa.time64("us") dtype = ArrowDtype(pa_dtype) - elif isinstance(val, dt.time): - if val.tzinfo is None: - # pyarrow doesn't have a dtype for timetz. - opt = get_option("future.infer_time") - if opt is None: - warnings.warn( - "Pandas type inference with a `datetime.time` " - "object is deprecated. In a future version, this will give " - "time32[pyarrow] dtype, which will require pyarrow to be " - "installed. To opt in to the new behavior immediately set " - "`pd.set_option('future.infer_time', True)`. To keep the " - "old behavior pass `dtype=object`.", - FutureWarning, - stacklevel=find_stack_level(), - ) - elif opt is True: - import pyarrow as pa + elif isinstance(val, dt.date): + opt = get_option("future.infer_date") + if opt is None: + warnings.warn( + "Pandas type inference with a `datetime.date` " + "object is deprecated. In a future version, this will give " + "date32[pyarrow] dtype, which will require pyarrow to be " + "installed. To opt in to the new behavior immediately set " + "`pd.set_option('future.infer_date', True)`. To keep the " + "old behavior pass `dtype=object`.", + FutureWarning, + stacklevel=find_stack_level(), + ) + elif opt is True: + import pyarrow as pa - pa_dtype = pa.time64("us") - from pandas.core.arrays.arrow import ArrowDtype + pa_dtype = pa.date32() - dtype = ArrowDtype(pa_dtype) + dtype = ArrowDtype(pa_dtype) elif is_bool(val): dtype = np.dtype(np.bool_) diff --git a/pandas/tests/dtypes/cast/test_infer_dtype.py b/pandas/tests/dtypes/cast/test_infer_dtype.py index b5d761b3549fa..4b0bd8d4754cd 100644 --- a/pandas/tests/dtypes/cast/test_infer_dtype.py +++ b/pandas/tests/dtypes/cast/test_infer_dtype.py @@ -23,6 +23,7 @@ Timestamp, date_range, ) +import pandas._testing as tm def test_infer_dtype_from_int_scalar(any_int_numpy_dtype): @@ -102,7 +103,9 @@ def test_infer_dtype_from_period(freq): def test_infer_dtype_misc(): dt = date(2000, 1, 1) - dtype, val = infer_dtype_from_scalar(dt) + msg = "type inference with a `datetime.date` object" + with tm.assert_produces_warning(FutureWarning, match=msg): + dtype, val = infer_dtype_from_scalar(dt) assert dtype == np.object_ ts = Timestamp(1, tz="US/Eastern") diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index 1becf3b9843b7..ac641e3f3c5d5 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -16,6 +16,7 @@ from pandas.core.dtypes.missing import isna import pandas as pd +import pandas._testing as tm def _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar=None): @@ -354,12 +355,21 @@ def test_maybe_promote_any_with_datetime64(any_numpy_dtype, fill_value): expected_dtype = np.dtype(object) exp_val_for_scalar = fill_value + msg = "type inference with a `datetime.date` object" + warn = None + if type(fill_value) is datetime.date and any_numpy_dtype in [ + "datetime64[ns]", + "timedelta64[ns]", + ]: + warn = FutureWarning + if type(fill_value) is datetime.date and dtype.kind == "M": # Casting date to dt64 is deprecated, in 2.0 enforced to cast to object expected_dtype = np.dtype(object) exp_val_for_scalar = fill_value - _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + with tm.assert_produces_warning(warn, match=msg): + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) @pytest.mark.parametrize( diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 4017a0e3a2f80..b33aec1b5913a 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1443,7 +1443,10 @@ def test_loc_setitem_datetime_coercion(self): df.loc[0:1, "c"] = np.datetime64("2008-08-08") assert Timestamp("2008-08-08") == df.loc[0, "c"] assert Timestamp("2008-08-08") == df.loc[1, "c"] - df.loc[2, "c"] = date(2005, 5, 5) + + warn_msg = "type inference with a `datetime.date` object" + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + df.loc[2, "c"] = date(2005, 5, 5) assert Timestamp("2005-05-05").date() == df.loc[2, "c"] @pytest.mark.parametrize("idxer", ["var", ["var"]]) diff --git a/pandas/tests/io/pytables/test_errors.py b/pandas/tests/io/pytables/test_errors.py index 295cce970889c..4f5b32cdd7e70 100644 --- a/pandas/tests/io/pytables/test_errors.py +++ b/pandas/tests/io/pytables/test_errors.py @@ -56,13 +56,15 @@ def test_table_index_incompatible_dtypes(setup_path): def test_unimplemented_dtypes_table_columns(setup_path): + warn_msg = "type inference with a `datetime.date` object" with ensure_clean_store(setup_path) as store: dtypes = [("date", datetime.date(2001, 1, 2))] # currently not supported dtypes #### for n, f in dtypes: df = tm.makeDataFrame() - df[n] = f + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + df[n] = f msg = re.escape(f"[{n}] is not implemented as a table column") with pytest.raises(TypeError, match=msg): store.append(f"df1_{n}", df) @@ -71,7 +73,8 @@ def test_unimplemented_dtypes_table_columns(setup_path): df = tm.makeDataFrame() df["obj1"] = "foo" df["obj2"] = "bar" - df["datetime1"] = datetime.date(2001, 1, 2) + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + df["datetime1"] = datetime.date(2001, 1, 2) df = df._consolidate() with ensure_clean_store(setup_path) as store: From 70470f74473a3dbc754e6cc2d36d56173b8c6940 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 19 May 2023 16:40:28 -0700 Subject: [PATCH 26/27] remove extra import --- pandas/core/construction.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 9854377dc1796..41fce78ba750a 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -300,7 +300,6 @@ def array( PeriodArray, TimedeltaArray, ) - from pandas.core.arrays.arrow import ArrowDtype from pandas.core.arrays.string_ import StringDtype if lib.is_scalar(data): From 985e7af3e946521bd127828190b10d718bfe0ccf Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 30 May 2023 10:03:50 -0700 Subject: [PATCH 27/27] whitespace fixup --- pandas/tests/series/accessors/test_dt_accessor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index aa017fa1703d5..ada580d4f694a 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -101,7 +101,6 @@ def get_expected(ser, prop): left = getattr(ser.dt, name) right = get_expected(ser, name) - if not (is_list_like(left) and is_list_like(right)): assert left == right elif isinstance(left, DataFrame):