From 8ad465cc9ae0ae673efd0089ecb66684eafc84cf Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 22 Nov 2023 16:06:25 -0800 Subject: [PATCH] TST: dt64 unit in tests --- pandas/core/tools/datetimes.py | 3 +- pandas/tests/arithmetic/test_timedelta64.py | 2 +- pandas/tests/dtypes/test_dtypes.py | 3 +- pandas/tests/frame/methods/test_transpose.py | 3 +- .../indexes/datetimes/test_date_range.py | 5 +- .../indexes/datetimes/test_scalar_compat.py | 2 +- pandas/tests/indexes/datetimes/test_setops.py | 8 +- .../tests/indexes/interval/test_interval.py | 2 +- .../indexes/period/test_partial_slicing.py | 2 +- pandas/tests/io/formats/test_format.py | 12 +-- pandas/tests/io/parser/test_parse_dates.py | 21 ++--- pandas/tests/resample/test_period_index.py | 8 +- .../tests/resample/test_resampler_grouper.py | 83 ++++++++---------- pandas/tests/resample/test_time_grouper.py | 85 ++++++++++--------- .../reshape/concat/test_append_common.py | 2 +- pandas/tests/reshape/concat/test_datetimes.py | 7 +- pandas/tests/reshape/merge/test_join.py | 4 +- pandas/tests/reshape/test_pivot.py | 56 ++++++------ pandas/tests/series/indexing/test_setitem.py | 5 +- pandas/tests/series/methods/test_clip.py | 7 +- pandas/tests/series/test_constructors.py | 7 +- pandas/tests/test_algos.py | 12 +-- pandas/tests/tools/test_to_datetime.py | 37 ++++---- .../tseries/offsets/test_business_hour.py | 32 ------- 24 files changed, 182 insertions(+), 226 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 863fb414a75f2..076b506670d40 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -495,7 +495,8 @@ def _convert_listlike_datetimes( if tz_parsed is not None: # We can take a shortcut since the datetime64 numpy array # is in UTC - dtype = cast(DatetimeTZDtype, tz_to_dtype(tz_parsed)) + out_unit = np.datetime_data(result.dtype)[0] + dtype = cast(DatetimeTZDtype, tz_to_dtype(tz_parsed, out_unit)) dt64_values = result.view(f"M8[{dtype.unit}]") dta = DatetimeArray._simple_new(dt64_values, dtype=dtype) return DatetimeIndex._simple_new(dta, name=name) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 9f0e99b829739..e88bde07aee90 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -1039,7 +1039,7 @@ def test_td64arr_add_datetime64_nat(self, box_with_array): other = np.datetime64("NaT") tdi = timedelta_range("1 day", periods=3) - expected = DatetimeIndex(["NaT", "NaT", "NaT"]) + expected = DatetimeIndex(["NaT", "NaT", "NaT"], dtype="M8[ns]") tdser = tm.box_expected(tdi, box_with_array) expected = tm.box_expected(expected, box_with_array) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 04b3feabb9854..1c5514eff46d5 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -200,7 +200,8 @@ def test_is_boolean(self, categories, expected): def test_dtype_specific_categorical_dtype(self): expected = "datetime64[ns]" - result = str(Categorical(DatetimeIndex([])).categories.dtype) + dti = DatetimeIndex([], dtype=expected) + result = str(Categorical(dti).categories.dtype) assert result == expected def test_not_string(self): diff --git a/pandas/tests/frame/methods/test_transpose.py b/pandas/tests/frame/methods/test_transpose.py index f96f4e0558fa6..93a2f8d80019c 100644 --- a/pandas/tests/frame/methods/test_transpose.py +++ b/pandas/tests/frame/methods/test_transpose.py @@ -29,7 +29,8 @@ def test_transpose_td64_intervals(self): def test_transpose_empty_preserves_datetimeindex(self): # GH#41382 - df = DataFrame(index=DatetimeIndex([])) + dti = DatetimeIndex([], dtype="M8[ns]") + df = DataFrame(index=dti) expected = DatetimeIndex([], dtype="datetime64[ns]", freq=None) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index cd7a5996984de..dfad4d7ad01ea 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -183,6 +183,7 @@ def test_date_range_edges(self, freq): ) exp = DatetimeIndex( [ts + n * td for n in range(1, 5)], + dtype="M8[ns]", freq=freq, ) tm.assert_index_equal(idx, exp) @@ -193,7 +194,7 @@ def test_date_range_edges(self, freq): end=ts + td, freq=freq, ) - exp = DatetimeIndex([], freq=freq) + exp = DatetimeIndex([], dtype="M8[ns]", freq=freq) tm.assert_index_equal(idx, exp) # start matches end @@ -202,7 +203,7 @@ def test_date_range_edges(self, freq): end=ts + td, freq=freq, ) - exp = DatetimeIndex([ts + td], freq=freq) + exp = DatetimeIndex([ts + td], dtype="M8[ns]", freq=freq) tm.assert_index_equal(idx, exp) def test_date_range_near_implementation_bound(self): diff --git a/pandas/tests/indexes/datetimes/test_scalar_compat.py b/pandas/tests/indexes/datetimes/test_scalar_compat.py index c361c15c79758..81992219d71b4 100644 --- a/pandas/tests/indexes/datetimes/test_scalar_compat.py +++ b/pandas/tests/indexes/datetimes/test_scalar_compat.py @@ -304,7 +304,7 @@ def test_dti_fields(self, tz): exp = dti[[0, 90, 181, 273]] tm.assert_index_equal(res, exp) res = dti[dti.is_leap_year] - exp = DatetimeIndex([], freq="D", tz=dti.tz, name="name") + exp = DatetimeIndex([], freq="D", tz=dti.tz, name="name").as_unit("ns") tm.assert_index_equal(res, exp) def test_dti_is_year_quarter_start(self): diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index 62b5e72d5e025..dde680665a8bc 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -73,7 +73,7 @@ def test_union(self, tz, sort): expected2_notsorted = DatetimeIndex(list(other2) + list(rng2[:3])) rng3 = date_range("1/1/2000", freq="D", periods=5, tz=tz) - other3 = DatetimeIndex([], tz=tz) + other3 = DatetimeIndex([], tz=tz).as_unit("ns") expected3 = date_range("1/1/2000", freq="D", periods=5, tz=tz) expected3_notsorted = rng3 @@ -235,7 +235,7 @@ def test_intersection(self, tz, sort): expected3 = date_range("6/1/2000", "6/20/2000", freq="D", name=None) rng4 = date_range("7/1/2000", "7/31/2000", freq="D", name="idx") - expected4 = DatetimeIndex([], freq="D", name="idx") + expected4 = DatetimeIndex([], freq="D", name="idx", dtype="M8[ns]") for rng, expected in [ (rng2, expected2), @@ -265,7 +265,7 @@ def test_intersection(self, tz, sort): # GH 7880 rng4 = date_range("7/1/2000", "7/31/2000", freq="D", tz=tz, name="idx") - expected4 = DatetimeIndex([], tz=tz, name="idx") + expected4 = DatetimeIndex([], tz=tz, name="idx").as_unit("ns") assert expected4.freq is None for rng, expected in [ @@ -536,7 +536,7 @@ def test_intersection(self): # non-overlapping the_int = rng[:10].intersection(rng[10:]) - expected = DatetimeIndex([]) + expected = DatetimeIndex([]).as_unit("ns") tm.assert_index_equal(the_int, expected) def test_intersection_bug(self): diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index e19b1700236f5..63f9b2176dddd 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -388,7 +388,7 @@ def test_maybe_convert_i8_nat(self, breaks): # GH 20636 index = IntervalIndex.from_breaks(breaks) - to_convert = breaks._constructor([pd.NaT] * 3) + to_convert = breaks._constructor([pd.NaT] * 3).as_unit("ns") expected = Index([np.nan] * 3, dtype=np.float64) result = index._maybe_convert_i8(to_convert) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/period/test_partial_slicing.py b/pandas/tests/indexes/period/test_partial_slicing.py index 4af6b5ca1a6a7..4fab12f195dc0 100644 --- a/pandas/tests/indexes/period/test_partial_slicing.py +++ b/pandas/tests/indexes/period/test_partial_slicing.py @@ -117,7 +117,7 @@ def test_range_slice_outofbounds(self, make_range): idx = make_range(start="2013/10/01", freq="D", periods=10) df = DataFrame({"units": [100 + i for i in range(10)]}, index=idx) - empty = DataFrame(index=type(idx)([], freq="D"), columns=["units"]) + empty = DataFrame(index=idx[:0], columns=["units"]) empty["units"] = empty["units"].astype("int64") tm.assert_frame_equal(df["2013/09/01":"2013/09/30"], empty) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index b520e2463353c..edac82193d1c8 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -872,20 +872,22 @@ def test_to_string_truncate_multilevel(self): with option_context("display.max_rows", 7, "display.max_columns", 7): assert has_doubly_truncated_repr(df) - def test_truncate_with_different_dtypes(self): + @pytest.mark.parametrize("dtype", ["object", "datetime64[us]"]) + def test_truncate_with_different_dtypes(self, dtype): # 11594, 12045 # when truncated the dtypes of the splits can differ # 11594 - s = Series( + ser = Series( [datetime(2012, 1, 1)] * 10 + [datetime(1012, 1, 2)] - + [datetime(2012, 1, 3)] * 10 + + [datetime(2012, 1, 3)] * 10, + dtype=dtype, ) with option_context("display.max_rows", 8): - result = str(s) - assert "object" in result + result = str(ser) + assert dtype in result def test_truncate_with_different_dtypes2(self): # 12045 diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index ee19a20a155aa..113402cda1b9a 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -831,9 +831,7 @@ def test_parse_dates_string(all_parsers): parser = all_parsers result = parser.read_csv(StringIO(data), index_col="date", parse_dates=["date"]) # freq doesn't round-trip - index = DatetimeIndex( - list(date_range("1/1/2009", periods=3)), name="date", freq=None - ) + index = date_range("1/1/2009", periods=3, name="date")._with_freq(None) expected = DataFrame( {"A": ["a", "b", "c"], "B": [1, 3, 4], "C": [2, 4, 5]}, index=index @@ -1736,17 +1734,12 @@ def test_parse_timezone(all_parsers): 2018-01-04 09:05:00+09:00,23400""" result = parser.read_csv(StringIO(data), parse_dates=["dt"]) - dti = DatetimeIndex( - list( - date_range( - start="2018-01-04 09:01:00", - end="2018-01-04 09:05:00", - freq="1min", - tz=timezone(timedelta(minutes=540)), - ) - ), - freq=None, - ) + dti = date_range( + start="2018-01-04 09:01:00", + end="2018-01-04 09:05:00", + freq="1min", + tz=timezone(timedelta(minutes=540)), + )._with_freq(None) expected_data = {"dt": dti, "val": [23350, 23400, 23400, 23400, 23400]} expected = DataFrame(expected_data) diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index 5d233a940de2f..864ad4605b54d 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -504,8 +504,8 @@ def test_resample_weekly_all_na(self): expected = ts.asfreq("W-THU").ffill() tm.assert_series_equal(result, expected) - def test_resample_tz_localized(self): - dr = date_range(start="2012-4-13", end="2012-5-1") + def test_resample_tz_localized(self, unit): + dr = date_range(start="2012-4-13", end="2012-5-1", unit=unit) ts = Series(range(len(dr)), index=dr) ts_utc = ts.tz_localize("UTC") @@ -514,9 +514,7 @@ def test_resample_tz_localized(self): result = ts_local.resample("W").mean() ts_local_naive = ts_local.copy() - ts_local_naive.index = [ - x.replace(tzinfo=None) for x in ts_local_naive.index.to_pydatetime() - ] + ts_local_naive.index = ts_local_naive.index.tz_localize(None) exp = ts_local_naive.resample("W").mean().tz_localize("America/Los_Angeles") exp.index = pd.DatetimeIndex(exp.index, freq="W") diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index fd02216934576..4afd3b477c3ee 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -349,9 +349,9 @@ def weighted_quantile(series, weights, q): tm.assert_series_equal(result, expected) -def test_resample_groupby_with_label(): +def test_resample_groupby_with_label(unit): # GH 13235 - index = date_range("2000-01-01", freq="2D", periods=5) + index = date_range("2000-01-01", freq="2D", periods=5, unit=unit) df = DataFrame(index=index, data={"col0": [0, 0, 1, 1, 2], "col1": [1, 1, 1, 1, 1]}) msg = "DataFrameGroupBy.resample operated on the grouping columns" with tm.assert_produces_warning(FutureWarning, match=msg): @@ -359,8 +359,9 @@ def test_resample_groupby_with_label(): mi = [ np.array([0, 0, 1, 2], dtype=np.int64), - pd.to_datetime( - np.array(["1999-12-26", "2000-01-02", "2000-01-02", "2000-01-02"]) + np.array( + ["1999-12-26", "2000-01-02", "2000-01-02", "2000-01-02"], + dtype=f"M8[{unit}]", ), ] mindex = pd.MultiIndex.from_arrays(mi, names=["col0", None]) @@ -505,7 +506,9 @@ def test_resample_groupby_agg_object_dtype_all_nan(consolidate): idx = pd.MultiIndex.from_arrays( [ ["A"] * 3 + ["B"] * 3, - pd.to_datetime(["2020-01-05", "2020-01-12", "2020-01-19"] * 2), + pd.to_datetime(["2020-01-05", "2020-01-12", "2020-01-19"] * 2).as_unit( + "ns" + ), ], names=["key", "date"], ) @@ -530,19 +533,15 @@ def test_groupby_resample_with_list_of_keys(): } ) result = df.groupby("group").resample("2D", on="date")[["val"]].mean() + + mi_exp = pd.MultiIndex.from_arrays( + [[0, 0, 1, 1], df["date"]._values[::2]], names=["group", "date"] + ) expected = DataFrame( data={ "val": [4.0, 3.5, 6.5, 3.0], }, - index=Index( - data=[ - (0, Timestamp("2016-01-01")), - (0, Timestamp("2016-01-03")), - (1, Timestamp("2016-01-05")), - (1, Timestamp("2016-01-07")), - ], - name=("group", "date"), - ), + index=mi_exp, ) tm.assert_frame_equal(result, expected) @@ -605,17 +604,17 @@ def test_groupby_resample_size_all_index_same(): msg = "DataFrameGroupBy.resample operated on the grouping columns" with tm.assert_produces_warning(FutureWarning, match=msg): result = df.groupby("A").resample("D").size() + + mi_exp = pd.MultiIndex.from_arrays( + [ + [1, 1, 2, 2], + pd.DatetimeIndex(["2000-12-31", "2001-01-01"] * 2, dtype="M8[ns]"), + ], + names=["A", None], + ) expected = Series( 3, - index=pd.MultiIndex.from_tuples( - [ - (1, Timestamp("2000-12-31")), - (1, Timestamp("2001-01-01")), - (2, Timestamp("2000-12-31")), - (2, Timestamp("2001-01-01")), - ], - names=["A", None], - ), + index=mi_exp, ) tm.assert_series_equal(result, expected) @@ -627,25 +626,18 @@ def test_groupby_resample_on_index_with_list_of_keys(): "group": [0, 0, 0, 0, 1, 1, 1, 1], "val": [3, 1, 4, 1, 5, 9, 2, 6], }, - index=Series( - date_range(start="2016-01-01", periods=8), - name="date", - ), + index=date_range(start="2016-01-01", periods=8, name="date"), ) result = df.groupby("group").resample("2D")[["val"]].mean() + + mi_exp = pd.MultiIndex.from_arrays( + [[0, 0, 1, 1], df.index[::2]], names=["group", "date"] + ) expected = DataFrame( data={ "val": [2.0, 2.5, 7.0, 4.0], }, - index=Index( - data=[ - (0, Timestamp("2016-01-01")), - (0, Timestamp("2016-01-03")), - (1, Timestamp("2016-01-05")), - (1, Timestamp("2016-01-07")), - ], - name=("group", "date"), - ), + index=mi_exp, ) tm.assert_frame_equal(result, expected) @@ -659,26 +651,19 @@ def test_groupby_resample_on_index_with_list_of_keys_multi_columns(): "second_val": [2, 7, 1, 8, 2, 8, 1, 8], "third_val": [1, 4, 1, 4, 2, 1, 3, 5], }, - index=Series( - date_range(start="2016-01-01", periods=8), - name="date", - ), + index=date_range(start="2016-01-01", periods=8, name="date"), ) result = df.groupby("group").resample("2D")[["first_val", "second_val"]].mean() + + mi_exp = pd.MultiIndex.from_arrays( + [[0, 0, 1, 1], df.index[::2]], names=["group", "date"] + ) expected = DataFrame( data={ "first_val": [2.0, 2.5, 7.0, 4.0], "second_val": [4.5, 4.5, 5.0, 4.5], }, - index=Index( - data=[ - (0, Timestamp("2016-01-01")), - (0, Timestamp("2016-01-03")), - (1, Timestamp("2016-01-05")), - (1, Timestamp("2016-01-07")), - ], - name=("group", "date"), - ), + index=mi_exp, ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index 0d4b4d1865d45..1dc25cb9d4c1e 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -7,6 +7,7 @@ import pandas as pd from pandas import ( DataFrame, + Index, Series, Timestamp, ) @@ -138,13 +139,17 @@ def test_aggregate_normal(resample_method): normal_df["key"] = [1, 2, 3, 4, 5] * 4 dt_df = DataFrame(data, columns=["A", "B", "C", "D"]) - dt_df["key"] = [ - datetime(2013, 1, 1), - datetime(2013, 1, 2), - datetime(2013, 1, 3), - datetime(2013, 1, 4), - datetime(2013, 1, 5), - ] * 4 + dt_df["key"] = Index( + [ + datetime(2013, 1, 1), + datetime(2013, 1, 2), + datetime(2013, 1, 3), + datetime(2013, 1, 4), + datetime(2013, 1, 5), + ] + * 4, + dtype="M8[ns]", + ) normal_grouped = normal_df.groupby("key") dt_grouped = dt_df.groupby(Grouper(key="key", freq="D")) @@ -216,13 +221,17 @@ def test_aggregate_with_nat(func, fill_value): normal_df["key"] = [1, 2, np.nan, 4, 5] * 4 dt_df = DataFrame(data, columns=["A", "B", "C", "D"]) - dt_df["key"] = [ - datetime(2013, 1, 1), - datetime(2013, 1, 2), - pd.NaT, - datetime(2013, 1, 4), - datetime(2013, 1, 5), - ] * 4 + dt_df["key"] = Index( + [ + datetime(2013, 1, 1), + datetime(2013, 1, 2), + pd.NaT, + datetime(2013, 1, 4), + datetime(2013, 1, 5), + ] + * 4, + dtype="M8[ns]", + ) normal_grouped = normal_df.groupby("key") dt_grouped = dt_df.groupby(Grouper(key="key", freq="D")) @@ -253,13 +262,17 @@ def test_aggregate_with_nat_size(): normal_df["key"] = [1, 2, np.nan, 4, 5] * 4 dt_df = DataFrame(data, columns=["A", "B", "C", "D"]) - dt_df["key"] = [ - datetime(2013, 1, 1), - datetime(2013, 1, 2), - pd.NaT, - datetime(2013, 1, 4), - datetime(2013, 1, 5), - ] * 4 + dt_df["key"] = Index( + [ + datetime(2013, 1, 1), + datetime(2013, 1, 2), + pd.NaT, + datetime(2013, 1, 4), + datetime(2013, 1, 5), + ] + * 4, + dtype="M8[ns]", + ) normal_grouped = normal_df.groupby("key") dt_grouped = dt_df.groupby(Grouper(key="key", freq="D")) @@ -314,10 +327,11 @@ def test_repr(): ], ) def test_upsample_sum(method, method_args, expected_values): - s = Series(1, index=date_range("2017", periods=2, freq="h")) - resampled = s.resample("30min") + ser = Series(1, index=date_range("2017", periods=2, freq="h")) + resampled = ser.resample("30min") index = pd.DatetimeIndex( ["2017-01-01T00:00:00", "2017-01-01T00:30:00", "2017-01-01T01:00:00"], + dtype="M8[ns]", freq="30min", ) result = methodcaller(method, **method_args)(resampled) @@ -342,25 +356,12 @@ def test_groupby_resample_interpolate(): .interpolate(method="linear") ) - expected_ind = pd.MultiIndex.from_tuples( - [ - (50, Timestamp("2018-01-07")), - (50, Timestamp("2018-01-08")), - (50, Timestamp("2018-01-09")), - (50, Timestamp("2018-01-10")), - (50, Timestamp("2018-01-11")), - (50, Timestamp("2018-01-12")), - (50, Timestamp("2018-01-13")), - (50, Timestamp("2018-01-14")), - (50, Timestamp("2018-01-15")), - (50, Timestamp("2018-01-16")), - (50, Timestamp("2018-01-17")), - (50, Timestamp("2018-01-18")), - (50, Timestamp("2018-01-19")), - (50, Timestamp("2018-01-20")), - (50, Timestamp("2018-01-21")), - (60, Timestamp("2018-01-14")), - ], + volume = [50] * 15 + [60] + week_starting = list(date_range("2018-01-07", "2018-01-21")) + [ + Timestamp("2018-01-14") + ] + expected_ind = pd.MultiIndex.from_arrays( + [volume, week_starting], names=["volume", "week_starting"], ) diff --git a/pandas/tests/reshape/concat/test_append_common.py b/pandas/tests/reshape/concat/test_append_common.py index a87042180df8f..ab20e8c8f6930 100644 --- a/pandas/tests/reshape/concat/test_append_common.py +++ b/pandas/tests/reshape/concat/test_append_common.py @@ -315,7 +315,7 @@ def test_concatlike_datetimetz_short(self, tz): exp_idx = pd.DatetimeIndex( ["2014-07-15", "2014-07-16", "2014-07-17", "2014-07-11", "2014-07-21"], tz=tz, - ) + ).as_unit("ns") exp = DataFrame(0, index=exp_idx, columns=["A", "B"]) tm.assert_frame_equal(df1._append(df2), exp) diff --git a/pandas/tests/reshape/concat/test_datetimes.py b/pandas/tests/reshape/concat/test_datetimes.py index 22e1d75255b3f..c061a393bb1a6 100644 --- a/pandas/tests/reshape/concat/test_datetimes.py +++ b/pandas/tests/reshape/concat/test_datetimes.py @@ -63,6 +63,7 @@ def test_concat_datetime_timezone(self): ) .tz_convert("UTC") .tz_convert("Europe/Paris") + .as_unit("ns") ) expected = DataFrame( @@ -84,7 +85,7 @@ def test_concat_datetime_timezone(self): "2010-12-31 16:00:00+00:00", "2010-12-31 17:00:00+00:00", ] - ) + ).as_unit("ns") expected = DataFrame( [ @@ -197,8 +198,8 @@ def test_concat_NaT_series2(self): def test_concat_NaT_dataframes(self, tz): # GH 12396 - first = DataFrame([[pd.NaT], [pd.NaT]]) - first = first.apply(lambda x: x.dt.tz_localize(tz)) + dti = DatetimeIndex([pd.NaT, pd.NaT], tz=tz) + first = DataFrame({0: dti}) second = DataFrame( [[Timestamp("2015/01/01", tz=tz)], [Timestamp("2016/01/01", tz=tz)]], index=[2, 3], diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 021cbe9a695e2..a38e4ffe2eaf7 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -771,13 +771,13 @@ def test_join_datetime_string(self): ], columns=["x", "y", "a"], ) - dfa["x"] = pd.to_datetime(dfa["x"]).dt.as_unit("ns") + dfa["x"] = pd.to_datetime(dfa["x"]).astype("M8[ns]") dfb = DataFrame( [["2012-08-02", "J", 1], ["2013-04-06", "L", 2]], columns=["x", "y", "z"], index=[2, 4], ) - dfb["x"] = pd.to_datetime(dfb["x"]).dt.as_unit("ns") + dfb["x"] = pd.to_datetime(dfb["x"]).astype("M8[ns]") result = dfb.join(dfa.set_index(["x", "y"]), on=["x", "y"]) expected = DataFrame( [ diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 6ca6fde7c120d..f8ececf6c0540 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -545,40 +545,48 @@ def test_pivot_index_with_nan_dates(self, method): tm.assert_frame_equal(result, pv.T) @pytest.mark.parametrize("method", [True, False]) - def test_pivot_with_tz(self, method): + def test_pivot_with_tz(self, method, unit): # GH 5878 df = DataFrame( { - "dt1": [ - datetime(2013, 1, 1, 9, 0), - datetime(2013, 1, 2, 9, 0), - datetime(2013, 1, 1, 9, 0), - datetime(2013, 1, 2, 9, 0), - ], - "dt2": [ - datetime(2014, 1, 1, 9, 0), - datetime(2014, 1, 1, 9, 0), - datetime(2014, 1, 2, 9, 0), - datetime(2014, 1, 2, 9, 0), - ], + "dt1": pd.DatetimeIndex( + [ + datetime(2013, 1, 1, 9, 0), + datetime(2013, 1, 2, 9, 0), + datetime(2013, 1, 1, 9, 0), + datetime(2013, 1, 2, 9, 0), + ], + dtype=f"M8[{unit}, US/Pacific]", + ), + "dt2": pd.DatetimeIndex( + [ + datetime(2014, 1, 1, 9, 0), + datetime(2014, 1, 1, 9, 0), + datetime(2014, 1, 2, 9, 0), + datetime(2014, 1, 2, 9, 0), + ], + dtype=f"M8[{unit}, Asia/Tokyo]", + ), "data1": np.arange(4, dtype="int64"), "data2": np.arange(4, dtype="int64"), } ) - df["dt1"] = df["dt1"].apply(lambda d: pd.Timestamp(d, tz="US/Pacific")) - df["dt2"] = df["dt2"].apply(lambda d: pd.Timestamp(d, tz="Asia/Tokyo")) - exp_col1 = Index(["data1", "data1", "data2", "data2"]) exp_col2 = pd.DatetimeIndex( - ["2014/01/01 09:00", "2014/01/02 09:00"] * 2, name="dt2", tz="Asia/Tokyo" + ["2014/01/01 09:00", "2014/01/02 09:00"] * 2, + name="dt2", + dtype=f"M8[{unit}, Asia/Tokyo]", ) exp_col = MultiIndex.from_arrays([exp_col1, exp_col2]) + exp_idx = pd.DatetimeIndex( + ["2013/01/01 09:00", "2013/01/02 09:00"], + name="dt1", + dtype=f"M8[{unit}, US/Pacific]", + ) expected = DataFrame( [[0, 2, 0, 2], [1, 3, 1, 3]], - index=pd.DatetimeIndex( - ["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific" - ), + index=exp_idx, columns=exp_col, ) @@ -590,12 +598,8 @@ def test_pivot_with_tz(self, method): expected = DataFrame( [[0, 2], [1, 3]], - index=pd.DatetimeIndex( - ["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific" - ), - columns=pd.DatetimeIndex( - ["2014/01/01 09:00", "2014/01/02 09:00"], name="dt2", tz="Asia/Tokyo" - ), + index=exp_idx, + columns=exp_col2[:2], ) if method: diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 02d51d5119469..168be838ec768 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -502,10 +502,11 @@ def test_setitem_empty_series(self): def test_setitem_empty_series_datetimeindex_preserves_freq(self): # GH#33573 our index should retain its freq - series = Series([], DatetimeIndex([], freq="D"), dtype=object) + dti = DatetimeIndex([], freq="D", dtype="M8[ns]") + series = Series([], index=dti, dtype=object) key = Timestamp("2012-01-01") series[key] = 47 - expected = Series(47, DatetimeIndex([key], freq="D")) + expected = Series(47, DatetimeIndex([key], freq="D").as_unit("ns")) tm.assert_series_equal(series, expected) assert series.index.freq == expected.index.freq diff --git a/pandas/tests/series/methods/test_clip.py b/pandas/tests/series/methods/test_clip.py index 130b77db0a1fb..5bbf35f6e39bb 100644 --- a/pandas/tests/series/methods/test_clip.py +++ b/pandas/tests/series/methods/test_clip.py @@ -135,11 +135,12 @@ def test_clip_with_datetimes(self): ) tm.assert_series_equal(result, expected) - def test_clip_with_timestamps_and_oob_datetimes(self): + @pytest.mark.parametrize("dtype", [object, "M8[us]"]) + def test_clip_with_timestamps_and_oob_datetimes(self, dtype): # GH-42794 - ser = Series([datetime(1, 1, 1), datetime(9999, 9, 9)]) + ser = Series([datetime(1, 1, 1), datetime(9999, 9, 9)], dtype=dtype) result = ser.clip(lower=Timestamp.min, upper=Timestamp.max) - expected = Series([Timestamp.min, Timestamp.max], dtype="object") + expected = Series([Timestamp.min, Timestamp.max], dtype=dtype) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 587c73cc535d8..65726eb8fcbb8 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1169,9 +1169,10 @@ def test_constructor_with_datetime_tz3(self): def test_constructor_with_datetime_tz2(self): # with all NaT - s = Series(NaT, index=[0, 1], dtype="datetime64[ns, US/Eastern]") - expected = Series(DatetimeIndex(["NaT", "NaT"], tz="US/Eastern")) - tm.assert_series_equal(s, expected) + ser = Series(NaT, index=[0, 1], dtype="datetime64[ns, US/Eastern]") + dti = DatetimeIndex(["NaT", "NaT"], tz="US/Eastern").as_unit("ns") + expected = Series(dti) + tm.assert_series_equal(ser, expected) def test_constructor_no_partial_datetime_casting(self): # GH#40111 diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index ce8d962fc3115..a6e63dfd5f409 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1246,9 +1246,10 @@ def test_value_counts_nat(self): result_td = algos.value_counts(td) tm.assert_series_equal(result_td, exp_td) - def test_value_counts_datetime_outofbounds(self): + @pytest.mark.parametrize("dtype", [object, "M8[us]"]) + def test_value_counts_datetime_outofbounds(self, dtype): # GH 13663 - s = Series( + ser = Series( [ datetime(3000, 1, 1), datetime(5000, 1, 1), @@ -1256,13 +1257,14 @@ def test_value_counts_datetime_outofbounds(self): datetime(6000, 1, 1), datetime(3000, 1, 1), datetime(3000, 1, 1), - ] + ], + dtype=dtype, ) - res = s.value_counts() + res = ser.value_counts() exp_index = Index( [datetime(3000, 1, 1), datetime(5000, 1, 1), datetime(6000, 1, 1)], - dtype=object, + dtype=dtype, ) exp = Series([3, 2, 1], index=exp_index, name="count") tm.assert_series_equal(res, exp) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index b7db5545a9e26..503032293dc81 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1585,28 +1585,23 @@ def test_convert_object_to_datetime_with_cache( @pytest.mark.parametrize("cache", [True, False]) @pytest.mark.parametrize( - ("input", "expected"), - ( - ( - Series([NaT] * 20 + [None] * 20, dtype="object"), - Series([NaT] * 40, dtype="datetime64[ns]"), - ), - ( - Series([NaT] * 60 + [None] * 60, dtype="object"), - Series([NaT] * 120, dtype="datetime64[ns]"), - ), - (Series([None] * 20), Series([NaT] * 20, dtype="datetime64[ns]")), - (Series([None] * 60), Series([NaT] * 60, dtype="datetime64[ns]")), - (Series([""] * 20), Series([NaT] * 20, dtype="datetime64[ns]")), - (Series([""] * 60), Series([NaT] * 60, dtype="datetime64[ns]")), - (Series([pd.NA] * 20), Series([NaT] * 20, dtype="datetime64[ns]")), - (Series([pd.NA] * 60), Series([NaT] * 60, dtype="datetime64[ns]")), - (Series([np.nan] * 20), Series([NaT] * 20, dtype="datetime64[ns]")), - (Series([np.nan] * 60), Series([NaT] * 60, dtype="datetime64[ns]")), - ), + "input", + [ + Series([NaT] * 20 + [None] * 20, dtype="object"), + Series([NaT] * 60 + [None] * 60, dtype="object"), + Series([None] * 20), + Series([None] * 60), + Series([""] * 20), + Series([""] * 60), + Series([pd.NA] * 20), + Series([pd.NA] * 60), + Series([np.nan] * 20), + Series([np.nan] * 60), + ], ) - def test_to_datetime_converts_null_like_to_nat(self, cache, input, expected): + def test_to_datetime_converts_null_like_to_nat(self, cache, input): # GH35888 + expected = Series([NaT] * len(input), dtype="M8[ns]") result = to_datetime(input, cache=cache) tm.assert_series_equal(result, expected) @@ -1948,7 +1943,7 @@ def test_unit_array_mixed_nans_large_int(self, cache): tm.assert_index_equal(result, expected) result = to_datetime(values, errors="coerce", unit="s", cache=cache) - expected = DatetimeIndex(["NaT", "NaT", "NaT", "NaT", "NaT"]) + expected = DatetimeIndex(["NaT", "NaT", "NaT", "NaT", "NaT"], dtype="M8[ns]") tm.assert_index_equal(result, expected) msg = "cannot convert input 1420043460000000000000000 with the unit 's'" diff --git a/pandas/tests/tseries/offsets/test_business_hour.py b/pandas/tests/tseries/offsets/test_business_hour.py index 227cf6a495c91..2779100f5355c 100644 --- a/pandas/tests/tseries/offsets/test_business_hour.py +++ b/pandas/tests/tseries/offsets/test_business_hour.py @@ -946,38 +946,6 @@ def test_apply_nanoseconds(self): for base, expected in cases.items(): assert_offset_equal(offset, base, expected) - def test_datetimeindex(self): - idx1 = date_range(start="2014-07-04 15:00", end="2014-07-08 10:00", freq="bh") - idx2 = date_range(start="2014-07-04 15:00", periods=12, freq="bh") - idx3 = date_range(end="2014-07-08 10:00", periods=12, freq="bh") - expected = DatetimeIndex( - [ - "2014-07-04 15:00", - "2014-07-04 16:00", - "2014-07-07 09:00", - "2014-07-07 10:00", - "2014-07-07 11:00", - "2014-07-07 12:00", - "2014-07-07 13:00", - "2014-07-07 14:00", - "2014-07-07 15:00", - "2014-07-07 16:00", - "2014-07-08 09:00", - "2014-07-08 10:00", - ], - freq="bh", - ) - for idx in [idx1, idx2, idx3]: - tm.assert_index_equal(idx, expected) - - idx1 = date_range(start="2014-07-04 15:45", end="2014-07-08 10:45", freq="bh") - idx2 = date_range(start="2014-07-04 15:45", periods=12, freq="bh") - idx3 = date_range(end="2014-07-08 10:45", periods=12, freq="bh") - - expected = idx1 - for idx in [idx1, idx2, idx3]: - tm.assert_index_equal(idx, expected) - @pytest.mark.parametrize("td_unit", ["s", "ms", "us", "ns"]) @pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"]) def test_bday_ignores_timedeltas(self, unit, td_unit):