diff --git a/pandas/tests/arrays/datetimes/__init__.py b/pandas/tests/arrays/datetimes/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/arrays/datetimes/test_constructors.py b/pandas/tests/arrays/datetimes/test_constructors.py new file mode 100644 index 0000000000000..cd7d9a479ab38 --- /dev/null +++ b/pandas/tests/arrays/datetimes/test_constructors.py @@ -0,0 +1,156 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import DatetimeTZDtype + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import DatetimeArray +from pandas.core.arrays.datetimes import sequence_to_dt64ns + + +class TestDatetimeArrayConstructor: + def test_from_sequence_invalid_type(self): + mi = pd.MultiIndex.from_product([np.arange(5), np.arange(5)]) + with pytest.raises(TypeError, match="Cannot create a DatetimeArray"): + DatetimeArray._from_sequence(mi) + + def test_only_1dim_accepted(self): + arr = np.array([0, 1, 2, 3], dtype="M8[h]").astype("M8[ns]") + + with pytest.raises(ValueError, match="Only 1-dimensional"): + # 3-dim, we allow 2D to sneak in for ops purposes GH#29853 + DatetimeArray(arr.reshape(2, 2, 1)) + + with pytest.raises(ValueError, match="Only 1-dimensional"): + # 0-dim + DatetimeArray(arr[[0]].squeeze()) + + def test_freq_validation(self): + # GH#24623 check that invalid instances cannot be created with the + # public constructor + arr = np.arange(5, dtype=np.int64) * 3600 * 10 ** 9 + + msg = ( + "Inferred frequency H from passed values does not " + "conform to passed frequency W-SUN" + ) + with pytest.raises(ValueError, match=msg): + DatetimeArray(arr, freq="W") + + @pytest.mark.parametrize( + "meth", + [ + DatetimeArray._from_sequence, + sequence_to_dt64ns, + pd.to_datetime, + pd.DatetimeIndex, + ], + ) + def test_mixing_naive_tzaware_raises(self, meth): + # GH#24569 + arr = np.array([pd.Timestamp("2000"), pd.Timestamp("2000", tz="CET")]) + + msg = ( + "Cannot mix tz-aware with tz-naive values|" + "Tz-aware datetime.datetime cannot be converted " + "to datetime64 unless utc=True" + ) + + for obj in [arr, arr[::-1]]: + # check that we raise regardless of whether naive is found + # before aware or vice-versa + with pytest.raises(ValueError, match=msg): + meth(obj) + + def test_from_pandas_array(self): + arr = pd.array(np.arange(5, dtype=np.int64)) * 3600 * 10 ** 9 + + result = DatetimeArray._from_sequence(arr)._with_freq("infer") + + expected = pd.date_range("1970-01-01", periods=5, freq="H")._data + tm.assert_datetime_array_equal(result, expected) + + def test_mismatched_timezone_raises(self): + arr = DatetimeArray( + np.array(["2000-01-01T06:00:00"], dtype="M8[ns]"), + dtype=DatetimeTZDtype(tz="US/Central"), + ) + dtype = DatetimeTZDtype(tz="US/Eastern") + with pytest.raises(TypeError, match="Timezone of the array"): + DatetimeArray(arr, dtype=dtype) + + def test_non_array_raises(self): + with pytest.raises(ValueError, match="list"): + DatetimeArray([1, 2, 3]) + + def test_bool_dtype_raises(self): + arr = np.array([1, 2, 3], dtype="bool") + + with pytest.raises( + ValueError, match="The dtype of 'values' is incorrect.*bool" + ): + DatetimeArray(arr) + + msg = r"dtype bool cannot be converted to datetime64\[ns\]" + with pytest.raises(TypeError, match=msg): + DatetimeArray._from_sequence(arr) + + with pytest.raises(TypeError, match=msg): + sequence_to_dt64ns(arr) + + with pytest.raises(TypeError, match=msg): + pd.DatetimeIndex(arr) + + with pytest.raises(TypeError, match=msg): + pd.to_datetime(arr) + + def test_incorrect_dtype_raises(self): + with pytest.raises(ValueError, match="Unexpected value for 'dtype'."): + DatetimeArray(np.array([1, 2, 3], dtype="i8"), dtype="category") + + def test_freq_infer_raises(self): + with pytest.raises(ValueError, match="Frequency inference"): + DatetimeArray(np.array([1, 2, 3], dtype="i8"), freq="infer") + + def test_copy(self): + data = np.array([1, 2, 3], dtype="M8[ns]") + arr = DatetimeArray(data, copy=False) + assert arr._data is data + + arr = DatetimeArray(data, copy=True) + assert arr._data is not data + + +class TestSequenceToDT64NS: + def test_tz_dtype_mismatch_raises(self): + arr = DatetimeArray._from_sequence( + ["2000"], dtype=DatetimeTZDtype(tz="US/Central") + ) + with pytest.raises(TypeError, match="data is already tz-aware"): + sequence_to_dt64ns(arr, dtype=DatetimeTZDtype(tz="UTC")) + + def test_tz_dtype_matches(self): + arr = DatetimeArray._from_sequence( + ["2000"], dtype=DatetimeTZDtype(tz="US/Central") + ) + result, _, _ = sequence_to_dt64ns(arr, dtype=DatetimeTZDtype(tz="US/Central")) + tm.assert_numpy_array_equal(arr._data, result) + + @pytest.mark.parametrize("order", ["F", "C"]) + def test_2d(self, order): + dti = pd.date_range("2016-01-01", periods=6, tz="US/Pacific") + arr = np.array(dti, dtype=object).reshape(3, 2) + if order == "F": + arr = arr.T + + res = sequence_to_dt64ns(arr) + expected = sequence_to_dt64ns(arr.ravel()) + + tm.assert_numpy_array_equal(res[0].ravel(), expected[0]) + assert res[1] == expected[1] + assert res[2] == expected[2] + + res = DatetimeArray._from_sequence(arr) + expected = DatetimeArray._from_sequence(arr.ravel()).reshape(arr.shape) + tm.assert_datetime_array_equal(res, expected) diff --git a/pandas/tests/arrays/datetimes/test_reductions.py b/pandas/tests/arrays/datetimes/test_reductions.py new file mode 100644 index 0000000000000..0d30dc1bf2a6d --- /dev/null +++ b/pandas/tests/arrays/datetimes/test_reductions.py @@ -0,0 +1,175 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import DatetimeTZDtype + +import pandas as pd +from pandas import NaT +import pandas._testing as tm +from pandas.core.arrays import DatetimeArray + + +class TestReductions: + @pytest.fixture + def arr1d(self, tz_naive_fixture): + tz = tz_naive_fixture + dtype = DatetimeTZDtype(tz=tz) if tz is not None else np.dtype("M8[ns]") + arr = DatetimeArray._from_sequence( + [ + "2000-01-03", + "2000-01-03", + "NaT", + "2000-01-02", + "2000-01-05", + "2000-01-04", + ], + dtype=dtype, + ) + return arr + + def test_min_max(self, arr1d): + arr = arr1d + tz = arr.tz + + result = arr.min() + expected = pd.Timestamp("2000-01-02", tz=tz) + assert result == expected + + result = arr.max() + expected = pd.Timestamp("2000-01-05", tz=tz) + assert result == expected + + result = arr.min(skipna=False) + assert result is pd.NaT + + result = arr.max(skipna=False) + assert result is pd.NaT + + @pytest.mark.parametrize("tz", [None, "US/Central"]) + @pytest.mark.parametrize("skipna", [True, False]) + def test_min_max_empty(self, skipna, tz): + dtype = DatetimeTZDtype(tz=tz) if tz is not None else np.dtype("M8[ns]") + arr = DatetimeArray._from_sequence([], dtype=dtype) + result = arr.min(skipna=skipna) + assert result is pd.NaT + + result = arr.max(skipna=skipna) + assert result is pd.NaT + + @pytest.mark.parametrize("tz", [None, "US/Central"]) + @pytest.mark.parametrize("skipna", [True, False]) + def test_median_empty(self, skipna, tz): + dtype = DatetimeTZDtype(tz=tz) if tz is not None else np.dtype("M8[ns]") + arr = DatetimeArray._from_sequence([], dtype=dtype) + result = arr.median(skipna=skipna) + assert result is pd.NaT + + arr = arr.reshape(0, 3) + result = arr.median(axis=0, skipna=skipna) + expected = type(arr)._from_sequence([pd.NaT, pd.NaT, pd.NaT], dtype=arr.dtype) + tm.assert_equal(result, expected) + + result = arr.median(axis=1, skipna=skipna) + expected = type(arr)._from_sequence([], dtype=arr.dtype) + tm.assert_equal(result, expected) + + def test_median(self, arr1d): + arr = arr1d + + result = arr.median() + assert result == arr[0] + result = arr.median(skipna=False) + assert result is pd.NaT + + result = arr.dropna().median(skipna=False) + assert result == arr[0] + + result = arr.median(axis=0) + assert result == arr[0] + + def test_median_axis(self, arr1d): + arr = arr1d + assert arr.median(axis=0) == arr.median() + assert arr.median(axis=0, skipna=False) is pd.NaT + + msg = r"abs\(axis\) must be less than ndim" + with pytest.raises(ValueError, match=msg): + arr.median(axis=1) + + @pytest.mark.filterwarnings("ignore:All-NaN slice encountered:RuntimeWarning") + def test_median_2d(self, arr1d): + arr = arr1d.reshape(1, -1) + + # axis = None + assert arr.median() == arr1d.median() + assert arr.median(skipna=False) is pd.NaT + + # axis = 0 + result = arr.median(axis=0) + expected = arr1d + tm.assert_equal(result, expected) + + # Since column 3 is all-NaT, we get NaT there with or without skipna + result = arr.median(axis=0, skipna=False) + expected = arr1d + tm.assert_equal(result, expected) + + # axis = 1 + result = arr.median(axis=1) + expected = type(arr)._from_sequence([arr1d.median()]) + tm.assert_equal(result, expected) + + result = arr.median(axis=1, skipna=False) + expected = type(arr)._from_sequence([pd.NaT], dtype=arr.dtype) + tm.assert_equal(result, expected) + + def test_mean(self, arr1d): + arr = arr1d + + # manually verified result + expected = arr[0] + 0.4 * pd.Timedelta(days=1) + + result = arr.mean() + assert result == expected + result = arr.mean(skipna=False) + assert result is pd.NaT + + result = arr.dropna().mean(skipna=False) + assert result == expected + + result = arr.mean(axis=0) + assert result == expected + + def test_mean_2d(self): + dti = pd.date_range("2016-01-01", periods=6, tz="US/Pacific") + dta = dti._data.reshape(3, 2) + + result = dta.mean(axis=0) + expected = dta[1] + tm.assert_datetime_array_equal(result, expected) + + result = dta.mean(axis=1) + expected = dta[:, 0] + pd.Timedelta(hours=12) + tm.assert_datetime_array_equal(result, expected) + + result = dta.mean(axis=None) + expected = dti.mean() + assert result == expected + + @pytest.mark.parametrize("skipna", [True, False]) + def test_mean_empty(self, arr1d, skipna): + arr = arr1d[:0] + + assert arr.mean(skipna=skipna) is NaT + + arr2d = arr.reshape(0, 3) + result = arr2d.mean(axis=0, skipna=skipna) + expected = DatetimeArray._from_sequence([NaT, NaT, NaT], dtype=arr.dtype) + tm.assert_datetime_array_equal(result, expected) + + result = arr2d.mean(axis=1, skipna=skipna) + expected = arr # i.e. 1D, empty + tm.assert_datetime_array_equal(result, expected) + + result = arr2d.mean(axis=None, skipna=skipna) + assert result is NaT diff --git a/pandas/tests/arrays/period/__init__.py b/pandas/tests/arrays/period/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/arrays/period/test_arrow_compat.py b/pandas/tests/arrays/period/test_arrow_compat.py new file mode 100644 index 0000000000000..8dc9d2a996728 --- /dev/null +++ b/pandas/tests/arrays/period/test_arrow_compat.py @@ -0,0 +1,115 @@ +import pytest + +import pandas.util._test_decorators as td + +from pandas.core.dtypes.dtypes import PeriodDtype + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import PeriodArray, period_array + +pyarrow_skip = pyarrow_skip = td.skip_if_no("pyarrow", min_version="0.15.1.dev") + + +@pyarrow_skip +def test_arrow_extension_type(): + from pandas.core.arrays._arrow_utils import ArrowPeriodType + + p1 = ArrowPeriodType("D") + p2 = ArrowPeriodType("D") + p3 = ArrowPeriodType("M") + + assert p1.freq == "D" + assert p1 == p2 + assert not p1 == p3 + assert hash(p1) == hash(p2) + assert not hash(p1) == hash(p3) + + +@pyarrow_skip +@pytest.mark.parametrize( + "data, freq", + [ + (pd.date_range("2017", periods=3), "D"), + (pd.date_range("2017", periods=3, freq="A"), "A-DEC"), + ], +) +def test_arrow_array(data, freq): + import pyarrow as pa + + from pandas.core.arrays._arrow_utils import ArrowPeriodType + + periods = period_array(data, freq=freq) + result = pa.array(periods) + assert isinstance(result.type, ArrowPeriodType) + assert result.type.freq == freq + expected = pa.array(periods.asi8, type="int64") + assert result.storage.equals(expected) + + # convert to its storage type + result = pa.array(periods, type=pa.int64()) + assert result.equals(expected) + + # unsupported conversions + msg = "Not supported to convert PeriodArray to 'double' type" + with pytest.raises(TypeError, match=msg): + pa.array(periods, type="float64") + + with pytest.raises(TypeError, match="different 'freq'"): + pa.array(periods, type=ArrowPeriodType("T")) + + +@pyarrow_skip +def test_arrow_array_missing(): + import pyarrow as pa + + from pandas.core.arrays._arrow_utils import ArrowPeriodType + + arr = PeriodArray([1, 2, 3], freq="D") + arr[1] = pd.NaT + + result = pa.array(arr) + assert isinstance(result.type, ArrowPeriodType) + assert result.type.freq == "D" + expected = pa.array([1, None, 3], type="int64") + assert result.storage.equals(expected) + + +@pyarrow_skip +def test_arrow_table_roundtrip(): + import pyarrow as pa + + from pandas.core.arrays._arrow_utils import ArrowPeriodType + + arr = PeriodArray([1, 2, 3], freq="D") + arr[1] = pd.NaT + df = pd.DataFrame({"a": arr}) + + table = pa.table(df) + assert isinstance(table.field("a").type, ArrowPeriodType) + result = table.to_pandas() + assert isinstance(result["a"].dtype, PeriodDtype) + tm.assert_frame_equal(result, df) + + table2 = pa.concat_tables([table, table]) + result = table2.to_pandas() + expected = pd.concat([df, df], ignore_index=True) + tm.assert_frame_equal(result, expected) + + +@pyarrow_skip +def test_arrow_table_roundtrip_without_metadata(): + import pyarrow as pa + + arr = PeriodArray([1, 2, 3], freq="H") + arr[1] = pd.NaT + df = pd.DataFrame({"a": arr}) + + table = pa.table(df) + # remove the metadata + table = table.replace_schema_metadata() + assert table.schema.metadata is None + + result = table.to_pandas() + assert isinstance(result["a"].dtype, PeriodDtype) + tm.assert_frame_equal(result, df) diff --git a/pandas/tests/arrays/period/test_astype.py b/pandas/tests/arrays/period/test_astype.py new file mode 100644 index 0000000000000..52cd28c8d5acc --- /dev/null +++ b/pandas/tests/arrays/period/test_astype.py @@ -0,0 +1,70 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import PeriodDtype + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import period_array + + +@pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"]) +def test_astype(dtype): + # We choose to ignore the sign and size of integers for + # Period/Datetime/Timedelta astype + arr = period_array(["2000", "2001", None], freq="D") + with tm.assert_produces_warning(FutureWarning): + # astype(int..) deprecated + result = arr.astype(dtype) + + if np.dtype(dtype).kind == "u": + expected_dtype = np.dtype("uint64") + else: + expected_dtype = np.dtype("int64") + + with tm.assert_produces_warning(FutureWarning): + # astype(int..) deprecated + expected = arr.astype(expected_dtype) + + assert result.dtype == expected_dtype + tm.assert_numpy_array_equal(result, expected) + + +def test_astype_copies(): + arr = period_array(["2000", "2001", None], freq="D") + with tm.assert_produces_warning(FutureWarning): + # astype(int..) deprecated + result = arr.astype(np.int64, copy=False) + + # Add the `.base`, since we now use `.asi8` which returns a view. + # We could maybe override it in PeriodArray to return ._data directly. + assert result.base is arr._data + + with tm.assert_produces_warning(FutureWarning): + # astype(int..) deprecated + result = arr.astype(np.int64, copy=True) + assert result is not arr._data + tm.assert_numpy_array_equal(result, arr._data.view("i8")) + + +def test_astype_categorical(): + arr = period_array(["2000", "2001", "2001", None], freq="D") + result = arr.astype("category") + categories = pd.PeriodIndex(["2000", "2001"], freq="D") + expected = pd.Categorical.from_codes([0, 1, 1, -1], categories=categories) + tm.assert_categorical_equal(result, expected) + + +def test_astype_period(): + arr = period_array(["2000", "2001", None], freq="D") + result = arr.astype(PeriodDtype("M")) + expected = period_array(["2000", "2001", None], freq="M") + tm.assert_period_array_equal(result, expected) + + +@pytest.mark.parametrize("other", ["datetime64[ns]", "timedelta64[ns]"]) +def test_astype_datetime(other): + arr = period_array(["2000", "2001", None], freq="D") + # slice off the [ns] so that the regex matches. + with pytest.raises(TypeError, match=other[:-4]): + arr.astype(other) diff --git a/pandas/tests/arrays/period/test_constructors.py b/pandas/tests/arrays/period/test_constructors.py new file mode 100644 index 0000000000000..0a8a106767fb6 --- /dev/null +++ b/pandas/tests/arrays/period/test_constructors.py @@ -0,0 +1,95 @@ +import numpy as np +import pytest + +from pandas._libs.tslibs import iNaT +from pandas._libs.tslibs.period import IncompatibleFrequency + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import PeriodArray, period_array + + +@pytest.mark.parametrize( + "data, freq, expected", + [ + ([pd.Period("2017", "D")], None, [17167]), + ([pd.Period("2017", "D")], "D", [17167]), + ([2017], "D", [17167]), + (["2017"], "D", [17167]), + ([pd.Period("2017", "D")], pd.tseries.offsets.Day(), [17167]), + ([pd.Period("2017", "D"), None], None, [17167, iNaT]), + (pd.Series(pd.date_range("2017", periods=3)), None, [17167, 17168, 17169]), + (pd.date_range("2017", periods=3), None, [17167, 17168, 17169]), + (pd.period_range("2017", periods=4, freq="Q"), None, [188, 189, 190, 191]), + ], +) +def test_period_array_ok(data, freq, expected): + result = period_array(data, freq=freq).asi8 + expected = np.asarray(expected, dtype=np.int64) + tm.assert_numpy_array_equal(result, expected) + + +def test_period_array_readonly_object(): + # https://github.com/pandas-dev/pandas/issues/25403 + pa = period_array([pd.Period("2019-01-01")]) + arr = np.asarray(pa, dtype="object") + arr.setflags(write=False) + + result = period_array(arr) + tm.assert_period_array_equal(result, pa) + + result = pd.Series(arr) + tm.assert_series_equal(result, pd.Series(pa)) + + result = pd.DataFrame({"A": arr}) + tm.assert_frame_equal(result, pd.DataFrame({"A": pa})) + + +def test_from_datetime64_freq_changes(): + # https://github.com/pandas-dev/pandas/issues/23438 + arr = pd.date_range("2017", periods=3, freq="D") + result = PeriodArray._from_datetime64(arr, freq="M") + expected = period_array(["2017-01-01", "2017-01-01", "2017-01-01"], freq="M") + tm.assert_period_array_equal(result, expected) + + +@pytest.mark.parametrize( + "data, freq, msg", + [ + ( + [pd.Period("2017", "D"), pd.Period("2017", "A")], + None, + "Input has different freq", + ), + ([pd.Period("2017", "D")], "A", "Input has different freq"), + ], +) +def test_period_array_raises(data, freq, msg): + with pytest.raises(IncompatibleFrequency, match=msg): + period_array(data, freq) + + +def test_period_array_non_period_series_raies(): + ser = pd.Series([1, 2, 3]) + with pytest.raises(TypeError, match="dtype"): + PeriodArray(ser, freq="D") + + +def test_period_array_freq_mismatch(): + arr = period_array(["2000", "2001"], freq="D") + with pytest.raises(IncompatibleFrequency, match="freq"): + PeriodArray(arr, freq="M") + + with pytest.raises(IncompatibleFrequency, match="freq"): + PeriodArray(arr, freq=pd.tseries.offsets.MonthEnd()) + + +def test_from_sequence_disallows_i8(): + arr = period_array(["2000", "2001"], freq="D") + + msg = str(arr[0].ordinal) + with pytest.raises(TypeError, match=msg): + PeriodArray._from_sequence(arr.asi8, dtype=arr.dtype) + + with pytest.raises(TypeError, match=msg): + PeriodArray._from_sequence(list(arr.asi8), dtype=arr.dtype) diff --git a/pandas/tests/arrays/period/test_reductions.py b/pandas/tests/arrays/period/test_reductions.py new file mode 100644 index 0000000000000..2889cc786dd71 --- /dev/null +++ b/pandas/tests/arrays/period/test_reductions.py @@ -0,0 +1,42 @@ +import pytest + +import pandas as pd +from pandas.core.arrays import period_array + + +class TestReductions: + def test_min_max(self): + arr = period_array( + [ + "2000-01-03", + "2000-01-03", + "NaT", + "2000-01-02", + "2000-01-05", + "2000-01-04", + ], + freq="D", + ) + + result = arr.min() + expected = pd.Period("2000-01-02", freq="D") + assert result == expected + + result = arr.max() + expected = pd.Period("2000-01-05", freq="D") + assert result == expected + + result = arr.min(skipna=False) + assert result is pd.NaT + + result = arr.max(skipna=False) + assert result is pd.NaT + + @pytest.mark.parametrize("skipna", [True, False]) + def test_min_max_empty(self, skipna): + arr = period_array([], freq="D") + result = arr.min(skipna=skipna) + assert result is pd.NaT + + result = arr.max(skipna=skipna) + assert result is pd.NaT diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 587d3c466c631..86c4b4c5ce63d 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -9,123 +9,8 @@ from pandas.core.dtypes.dtypes import DatetimeTZDtype import pandas as pd -from pandas import NaT import pandas._testing as tm from pandas.core.arrays import DatetimeArray -from pandas.core.arrays.datetimes import sequence_to_dt64ns - - -class TestDatetimeArrayConstructor: - def test_from_sequence_invalid_type(self): - mi = pd.MultiIndex.from_product([np.arange(5), np.arange(5)]) - with pytest.raises(TypeError, match="Cannot create a DatetimeArray"): - DatetimeArray._from_sequence(mi) - - def test_only_1dim_accepted(self): - arr = np.array([0, 1, 2, 3], dtype="M8[h]").astype("M8[ns]") - - with pytest.raises(ValueError, match="Only 1-dimensional"): - # 3-dim, we allow 2D to sneak in for ops purposes GH#29853 - DatetimeArray(arr.reshape(2, 2, 1)) - - with pytest.raises(ValueError, match="Only 1-dimensional"): - # 0-dim - DatetimeArray(arr[[0]].squeeze()) - - def test_freq_validation(self): - # GH#24623 check that invalid instances cannot be created with the - # public constructor - arr = np.arange(5, dtype=np.int64) * 3600 * 10 ** 9 - - msg = ( - "Inferred frequency H from passed values does not " - "conform to passed frequency W-SUN" - ) - with pytest.raises(ValueError, match=msg): - DatetimeArray(arr, freq="W") - - @pytest.mark.parametrize( - "meth", - [ - DatetimeArray._from_sequence, - sequence_to_dt64ns, - pd.to_datetime, - pd.DatetimeIndex, - ], - ) - def test_mixing_naive_tzaware_raises(self, meth): - # GH#24569 - arr = np.array([pd.Timestamp("2000"), pd.Timestamp("2000", tz="CET")]) - - msg = ( - "Cannot mix tz-aware with tz-naive values|" - "Tz-aware datetime.datetime cannot be converted " - "to datetime64 unless utc=True" - ) - - for obj in [arr, arr[::-1]]: - # check that we raise regardless of whether naive is found - # before aware or vice-versa - with pytest.raises(ValueError, match=msg): - meth(obj) - - def test_from_pandas_array(self): - arr = pd.array(np.arange(5, dtype=np.int64)) * 3600 * 10 ** 9 - - result = DatetimeArray._from_sequence(arr)._with_freq("infer") - - expected = pd.date_range("1970-01-01", periods=5, freq="H")._data - tm.assert_datetime_array_equal(result, expected) - - def test_mismatched_timezone_raises(self): - arr = DatetimeArray( - np.array(["2000-01-01T06:00:00"], dtype="M8[ns]"), - dtype=DatetimeTZDtype(tz="US/Central"), - ) - dtype = DatetimeTZDtype(tz="US/Eastern") - with pytest.raises(TypeError, match="Timezone of the array"): - DatetimeArray(arr, dtype=dtype) - - def test_non_array_raises(self): - with pytest.raises(ValueError, match="list"): - DatetimeArray([1, 2, 3]) - - def test_bool_dtype_raises(self): - arr = np.array([1, 2, 3], dtype="bool") - - with pytest.raises( - ValueError, match="The dtype of 'values' is incorrect.*bool" - ): - DatetimeArray(arr) - - msg = r"dtype bool cannot be converted to datetime64\[ns\]" - with pytest.raises(TypeError, match=msg): - DatetimeArray._from_sequence(arr) - - with pytest.raises(TypeError, match=msg): - sequence_to_dt64ns(arr) - - with pytest.raises(TypeError, match=msg): - pd.DatetimeIndex(arr) - - with pytest.raises(TypeError, match=msg): - pd.to_datetime(arr) - - def test_incorrect_dtype_raises(self): - with pytest.raises(ValueError, match="Unexpected value for 'dtype'."): - DatetimeArray(np.array([1, 2, 3], dtype="i8"), dtype="category") - - def test_freq_infer_raises(self): - with pytest.raises(ValueError, match="Frequency inference"): - DatetimeArray(np.array([1, 2, 3], dtype="i8"), freq="infer") - - def test_copy(self): - data = np.array([1, 2, 3], dtype="M8[ns]") - arr = DatetimeArray(data, copy=False) - assert arr._data is data - - arr = DatetimeArray(data, copy=True) - assert arr._data is not data class TestDatetimeArrayComparisons: @@ -471,203 +356,3 @@ def test_tz_localize_t2d(self): roundtrip = expected.tz_localize("US/Pacific") tm.assert_datetime_array_equal(roundtrip, dta) - - -class TestSequenceToDT64NS: - def test_tz_dtype_mismatch_raises(self): - arr = DatetimeArray._from_sequence( - ["2000"], dtype=DatetimeTZDtype(tz="US/Central") - ) - with pytest.raises(TypeError, match="data is already tz-aware"): - sequence_to_dt64ns(arr, dtype=DatetimeTZDtype(tz="UTC")) - - def test_tz_dtype_matches(self): - arr = DatetimeArray._from_sequence( - ["2000"], dtype=DatetimeTZDtype(tz="US/Central") - ) - result, _, _ = sequence_to_dt64ns(arr, dtype=DatetimeTZDtype(tz="US/Central")) - tm.assert_numpy_array_equal(arr._data, result) - - @pytest.mark.parametrize("order", ["F", "C"]) - def test_2d(self, order): - dti = pd.date_range("2016-01-01", periods=6, tz="US/Pacific") - arr = np.array(dti, dtype=object).reshape(3, 2) - if order == "F": - arr = arr.T - - res = sequence_to_dt64ns(arr) - expected = sequence_to_dt64ns(arr.ravel()) - - tm.assert_numpy_array_equal(res[0].ravel(), expected[0]) - assert res[1] == expected[1] - assert res[2] == expected[2] - - res = DatetimeArray._from_sequence(arr) - expected = DatetimeArray._from_sequence(arr.ravel()).reshape(arr.shape) - tm.assert_datetime_array_equal(res, expected) - - -class TestReductions: - @pytest.fixture - def arr1d(self, tz_naive_fixture): - tz = tz_naive_fixture - dtype = DatetimeTZDtype(tz=tz) if tz is not None else np.dtype("M8[ns]") - arr = DatetimeArray._from_sequence( - [ - "2000-01-03", - "2000-01-03", - "NaT", - "2000-01-02", - "2000-01-05", - "2000-01-04", - ], - dtype=dtype, - ) - return arr - - def test_min_max(self, arr1d): - arr = arr1d - tz = arr.tz - - result = arr.min() - expected = pd.Timestamp("2000-01-02", tz=tz) - assert result == expected - - result = arr.max() - expected = pd.Timestamp("2000-01-05", tz=tz) - assert result == expected - - result = arr.min(skipna=False) - assert result is pd.NaT - - result = arr.max(skipna=False) - assert result is pd.NaT - - @pytest.mark.parametrize("tz", [None, "US/Central"]) - @pytest.mark.parametrize("skipna", [True, False]) - def test_min_max_empty(self, skipna, tz): - dtype = DatetimeTZDtype(tz=tz) if tz is not None else np.dtype("M8[ns]") - arr = DatetimeArray._from_sequence([], dtype=dtype) - result = arr.min(skipna=skipna) - assert result is pd.NaT - - result = arr.max(skipna=skipna) - assert result is pd.NaT - - @pytest.mark.parametrize("tz", [None, "US/Central"]) - @pytest.mark.parametrize("skipna", [True, False]) - def test_median_empty(self, skipna, tz): - dtype = DatetimeTZDtype(tz=tz) if tz is not None else np.dtype("M8[ns]") - arr = DatetimeArray._from_sequence([], dtype=dtype) - result = arr.median(skipna=skipna) - assert result is pd.NaT - - arr = arr.reshape(0, 3) - result = arr.median(axis=0, skipna=skipna) - expected = type(arr)._from_sequence([pd.NaT, pd.NaT, pd.NaT], dtype=arr.dtype) - tm.assert_equal(result, expected) - - result = arr.median(axis=1, skipna=skipna) - expected = type(arr)._from_sequence([], dtype=arr.dtype) - tm.assert_equal(result, expected) - - def test_median(self, arr1d): - arr = arr1d - - result = arr.median() - assert result == arr[0] - result = arr.median(skipna=False) - assert result is pd.NaT - - result = arr.dropna().median(skipna=False) - assert result == arr[0] - - result = arr.median(axis=0) - assert result == arr[0] - - def test_median_axis(self, arr1d): - arr = arr1d - assert arr.median(axis=0) == arr.median() - assert arr.median(axis=0, skipna=False) is pd.NaT - - msg = r"abs\(axis\) must be less than ndim" - with pytest.raises(ValueError, match=msg): - arr.median(axis=1) - - @pytest.mark.filterwarnings("ignore:All-NaN slice encountered:RuntimeWarning") - def test_median_2d(self, arr1d): - arr = arr1d.reshape(1, -1) - - # axis = None - assert arr.median() == arr1d.median() - assert arr.median(skipna=False) is pd.NaT - - # axis = 0 - result = arr.median(axis=0) - expected = arr1d - tm.assert_equal(result, expected) - - # Since column 3 is all-NaT, we get NaT there with or without skipna - result = arr.median(axis=0, skipna=False) - expected = arr1d - tm.assert_equal(result, expected) - - # axis = 1 - result = arr.median(axis=1) - expected = type(arr)._from_sequence([arr1d.median()]) - tm.assert_equal(result, expected) - - result = arr.median(axis=1, skipna=False) - expected = type(arr)._from_sequence([pd.NaT], dtype=arr.dtype) - tm.assert_equal(result, expected) - - def test_mean(self, arr1d): - arr = arr1d - - # manually verified result - expected = arr[0] + 0.4 * pd.Timedelta(days=1) - - result = arr.mean() - assert result == expected - result = arr.mean(skipna=False) - assert result is pd.NaT - - result = arr.dropna().mean(skipna=False) - assert result == expected - - result = arr.mean(axis=0) - assert result == expected - - def test_mean_2d(self): - dti = pd.date_range("2016-01-01", periods=6, tz="US/Pacific") - dta = dti._data.reshape(3, 2) - - result = dta.mean(axis=0) - expected = dta[1] - tm.assert_datetime_array_equal(result, expected) - - result = dta.mean(axis=1) - expected = dta[:, 0] + pd.Timedelta(hours=12) - tm.assert_datetime_array_equal(result, expected) - - result = dta.mean(axis=None) - expected = dti.mean() - assert result == expected - - @pytest.mark.parametrize("skipna", [True, False]) - def test_mean_empty(self, arr1d, skipna): - arr = arr1d[:0] - - assert arr.mean(skipna=skipna) is NaT - - arr2d = arr.reshape(0, 3) - result = arr2d.mean(axis=0, skipna=skipna) - expected = DatetimeArray._from_sequence([NaT, NaT, NaT], dtype=arr.dtype) - tm.assert_datetime_array_equal(result, expected) - - result = arr2d.mean(axis=1, skipna=skipna) - expected = arr # i.e. 1D, empty - tm.assert_datetime_array_equal(result, expected) - - result = arr2d.mean(axis=None, skipna=skipna) - assert result is NaT diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index 443eced3922ac..d044b191cf279 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -3,7 +3,6 @@ from pandas._libs.tslibs import iNaT from pandas._libs.tslibs.period import IncompatibleFrequency -import pandas.util._test_decorators as td from pandas.core.dtypes.base import registry from pandas.core.dtypes.dtypes import PeriodDtype @@ -27,92 +26,6 @@ def test_registered(): # period_array -@pytest.mark.parametrize( - "data, freq, expected", - [ - ([pd.Period("2017", "D")], None, [17167]), - ([pd.Period("2017", "D")], "D", [17167]), - ([2017], "D", [17167]), - (["2017"], "D", [17167]), - ([pd.Period("2017", "D")], pd.tseries.offsets.Day(), [17167]), - ([pd.Period("2017", "D"), None], None, [17167, iNaT]), - (pd.Series(pd.date_range("2017", periods=3)), None, [17167, 17168, 17169]), - (pd.date_range("2017", periods=3), None, [17167, 17168, 17169]), - (pd.period_range("2017", periods=4, freq="Q"), None, [188, 189, 190, 191]), - ], -) -def test_period_array_ok(data, freq, expected): - result = period_array(data, freq=freq).asi8 - expected = np.asarray(expected, dtype=np.int64) - tm.assert_numpy_array_equal(result, expected) - - -def test_period_array_readonly_object(): - # https://github.com/pandas-dev/pandas/issues/25403 - pa = period_array([pd.Period("2019-01-01")]) - arr = np.asarray(pa, dtype="object") - arr.setflags(write=False) - - result = period_array(arr) - tm.assert_period_array_equal(result, pa) - - result = pd.Series(arr) - tm.assert_series_equal(result, pd.Series(pa)) - - result = pd.DataFrame({"A": arr}) - tm.assert_frame_equal(result, pd.DataFrame({"A": pa})) - - -def test_from_datetime64_freq_changes(): - # https://github.com/pandas-dev/pandas/issues/23438 - arr = pd.date_range("2017", periods=3, freq="D") - result = PeriodArray._from_datetime64(arr, freq="M") - expected = period_array(["2017-01-01", "2017-01-01", "2017-01-01"], freq="M") - tm.assert_period_array_equal(result, expected) - - -@pytest.mark.parametrize( - "data, freq, msg", - [ - ( - [pd.Period("2017", "D"), pd.Period("2017", "A")], - None, - "Input has different freq", - ), - ([pd.Period("2017", "D")], "A", "Input has different freq"), - ], -) -def test_period_array_raises(data, freq, msg): - with pytest.raises(IncompatibleFrequency, match=msg): - period_array(data, freq) - - -def test_period_array_non_period_series_raies(): - ser = pd.Series([1, 2, 3]) - with pytest.raises(TypeError, match="dtype"): - PeriodArray(ser, freq="D") - - -def test_period_array_freq_mismatch(): - arr = period_array(["2000", "2001"], freq="D") - with pytest.raises(IncompatibleFrequency, match="freq"): - PeriodArray(arr, freq="M") - - with pytest.raises(IncompatibleFrequency, match="freq"): - PeriodArray(arr, freq=pd.tseries.offsets.MonthEnd()) - - -def test_from_sequence_disallows_i8(): - arr = period_array(["2000", "2001"], freq="D") - - msg = str(arr[0].ordinal) - with pytest.raises(TypeError, match=msg): - PeriodArray._from_sequence(arr.asi8, dtype=arr.dtype) - - with pytest.raises(TypeError, match=msg): - PeriodArray._from_sequence(list(arr.asi8), dtype=arr.dtype) - - def test_asi8(): result = period_array(["2000", "2001", None], freq="D").asi8 expected = np.array([10957, 11323, iNaT]) @@ -129,68 +42,6 @@ def test_take_raises(): arr.take([0, -1], allow_fill=True, fill_value="foo") -@pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"]) -def test_astype(dtype): - # We choose to ignore the sign and size of integers for - # Period/Datetime/Timedelta astype - arr = period_array(["2000", "2001", None], freq="D") - with tm.assert_produces_warning(FutureWarning): - # astype(int..) deprecated - result = arr.astype(dtype) - - if np.dtype(dtype).kind == "u": - expected_dtype = np.dtype("uint64") - else: - expected_dtype = np.dtype("int64") - - with tm.assert_produces_warning(FutureWarning): - # astype(int..) deprecated - expected = arr.astype(expected_dtype) - - assert result.dtype == expected_dtype - tm.assert_numpy_array_equal(result, expected) - - -def test_astype_copies(): - arr = period_array(["2000", "2001", None], freq="D") - with tm.assert_produces_warning(FutureWarning): - # astype(int..) deprecated - result = arr.astype(np.int64, copy=False) - - # Add the `.base`, since we now use `.asi8` which returns a view. - # We could maybe override it in PeriodArray to return ._data directly. - assert result.base is arr._data - - with tm.assert_produces_warning(FutureWarning): - # astype(int..) deprecated - result = arr.astype(np.int64, copy=True) - assert result is not arr._data - tm.assert_numpy_array_equal(result, arr._data.view("i8")) - - -def test_astype_categorical(): - arr = period_array(["2000", "2001", "2001", None], freq="D") - result = arr.astype("category") - categories = pd.PeriodIndex(["2000", "2001"], freq="D") - expected = pd.Categorical.from_codes([0, 1, 1, -1], categories=categories) - tm.assert_categorical_equal(result, expected) - - -def test_astype_period(): - arr = period_array(["2000", "2001", None], freq="D") - result = arr.astype(PeriodDtype("M")) - expected = period_array(["2000", "2001", None], freq="M") - tm.assert_period_array_equal(result, expected) - - -@pytest.mark.parametrize("other", ["datetime64[ns]", "timedelta64[ns]"]) -def test_astype_datetime(other): - arr = period_array(["2000", "2001", None], freq="D") - # slice off the [ns] so that the regex matches. - with pytest.raises(TypeError, match=other[:-4]): - arr.astype(other) - - def test_fillna_raises(): arr = period_array(["2000", "2001", "2002"], freq="D") with pytest.raises(ValueError, match="Length"): @@ -306,155 +157,3 @@ def test_repr_large(): "Length: 1000, dtype: period[D]" ) assert result == expected - - -# ---------------------------------------------------------------------------- -# Reductions - - -class TestReductions: - def test_min_max(self): - arr = period_array( - [ - "2000-01-03", - "2000-01-03", - "NaT", - "2000-01-02", - "2000-01-05", - "2000-01-04", - ], - freq="D", - ) - - result = arr.min() - expected = pd.Period("2000-01-02", freq="D") - assert result == expected - - result = arr.max() - expected = pd.Period("2000-01-05", freq="D") - assert result == expected - - result = arr.min(skipna=False) - assert result is pd.NaT - - result = arr.max(skipna=False) - assert result is pd.NaT - - @pytest.mark.parametrize("skipna", [True, False]) - def test_min_max_empty(self, skipna): - arr = period_array([], freq="D") - result = arr.min(skipna=skipna) - assert result is pd.NaT - - result = arr.max(skipna=skipna) - assert result is pd.NaT - - -# ---------------------------------------------------------------------------- -# Arrow interaction - -pyarrow_skip = pyarrow_skip = td.skip_if_no("pyarrow", min_version="0.15.1.dev") - - -@pyarrow_skip -def test_arrow_extension_type(): - from pandas.core.arrays._arrow_utils import ArrowPeriodType - - p1 = ArrowPeriodType("D") - p2 = ArrowPeriodType("D") - p3 = ArrowPeriodType("M") - - assert p1.freq == "D" - assert p1 == p2 - assert not p1 == p3 - assert hash(p1) == hash(p2) - assert not hash(p1) == hash(p3) - - -@pyarrow_skip -@pytest.mark.parametrize( - "data, freq", - [ - (pd.date_range("2017", periods=3), "D"), - (pd.date_range("2017", periods=3, freq="A"), "A-DEC"), - ], -) -def test_arrow_array(data, freq): - import pyarrow as pa - - from pandas.core.arrays._arrow_utils import ArrowPeriodType - - periods = period_array(data, freq=freq) - result = pa.array(periods) - assert isinstance(result.type, ArrowPeriodType) - assert result.type.freq == freq - expected = pa.array(periods.asi8, type="int64") - assert result.storage.equals(expected) - - # convert to its storage type - result = pa.array(periods, type=pa.int64()) - assert result.equals(expected) - - # unsupported conversions - msg = "Not supported to convert PeriodArray to 'double' type" - with pytest.raises(TypeError, match=msg): - pa.array(periods, type="float64") - - with pytest.raises(TypeError, match="different 'freq'"): - pa.array(periods, type=ArrowPeriodType("T")) - - -@pyarrow_skip -def test_arrow_array_missing(): - import pyarrow as pa - - from pandas.core.arrays._arrow_utils import ArrowPeriodType - - arr = PeriodArray([1, 2, 3], freq="D") - arr[1] = pd.NaT - - result = pa.array(arr) - assert isinstance(result.type, ArrowPeriodType) - assert result.type.freq == "D" - expected = pa.array([1, None, 3], type="int64") - assert result.storage.equals(expected) - - -@pyarrow_skip -def test_arrow_table_roundtrip(): - import pyarrow as pa - - from pandas.core.arrays._arrow_utils import ArrowPeriodType - - arr = PeriodArray([1, 2, 3], freq="D") - arr[1] = pd.NaT - df = pd.DataFrame({"a": arr}) - - table = pa.table(df) - assert isinstance(table.field("a").type, ArrowPeriodType) - result = table.to_pandas() - assert isinstance(result["a"].dtype, PeriodDtype) - tm.assert_frame_equal(result, df) - - table2 = pa.concat_tables([table, table]) - result = table2.to_pandas() - expected = pd.concat([df, df], ignore_index=True) - tm.assert_frame_equal(result, expected) - - -@pyarrow_skip -def test_arrow_table_roundtrip_without_metadata(): - import pyarrow as pa - - arr = PeriodArray([1, 2, 3], freq="H") - arr[1] = pd.NaT - df = pd.DataFrame({"a": arr}) - - table = pa.table(df) - # remove the metadata - table = table.replace_schema_metadata() - assert table.schema.metadata is None - - result = table.to_pandas() - assert isinstance(result["a"].dtype, PeriodDtype) - tm.assert_frame_equal(result, df) diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index 9d9ca41779b5a..9e2b8e0f1603e 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -4,81 +4,10 @@ import pandas as pd from pandas import Timedelta import pandas._testing as tm -from pandas.core import nanops from pandas.core.arrays import TimedeltaArray -class TestTimedeltaArrayConstructor: - def test_only_1dim_accepted(self): - # GH#25282 - arr = np.array([0, 1, 2, 3], dtype="m8[h]").astype("m8[ns]") - - with pytest.raises(ValueError, match="Only 1-dimensional"): - # 3-dim, we allow 2D to sneak in for ops purposes GH#29853 - TimedeltaArray(arr.reshape(2, 2, 1)) - - with pytest.raises(ValueError, match="Only 1-dimensional"): - # 0-dim - TimedeltaArray(arr[[0]].squeeze()) - - def test_freq_validation(self): - # ensure that the public constructor cannot create an invalid instance - arr = np.array([0, 0, 1], dtype=np.int64) * 3600 * 10 ** 9 - - msg = ( - "Inferred frequency None from passed values does not " - "conform to passed frequency D" - ) - with pytest.raises(ValueError, match=msg): - TimedeltaArray(arr.view("timedelta64[ns]"), freq="D") - - def test_non_array_raises(self): - with pytest.raises(ValueError, match="list"): - TimedeltaArray([1, 2, 3]) - - def test_other_type_raises(self): - with pytest.raises(ValueError, match="dtype bool cannot be converted"): - TimedeltaArray(np.array([1, 2, 3], dtype="bool")) - - def test_incorrect_dtype_raises(self): - # TODO: why TypeError for 'category' but ValueError for i8? - with pytest.raises( - ValueError, match=r"category cannot be converted to timedelta64\[ns\]" - ): - TimedeltaArray(np.array([1, 2, 3], dtype="i8"), dtype="category") - - with pytest.raises( - ValueError, match=r"dtype int64 cannot be converted to timedelta64\[ns\]" - ): - TimedeltaArray(np.array([1, 2, 3], dtype="i8"), dtype=np.dtype("int64")) - - def test_copy(self): - data = np.array([1, 2, 3], dtype="m8[ns]") - arr = TimedeltaArray(data, copy=False) - assert arr._data is data - - arr = TimedeltaArray(data, copy=True) - assert arr._data is not data - assert arr._data.base is not data - - class TestTimedeltaArray: - # TODO: de-duplicate with test_npsum below - def test_np_sum(self): - # GH#25282 - vals = np.arange(5, dtype=np.int64).view("m8[h]").astype("m8[ns]") - arr = TimedeltaArray(vals) - result = np.sum(arr) - assert result == vals.sum() - - result = np.sum(pd.TimedeltaIndex(arr)) - assert result == vals.sum() - - def test_from_sequence_dtype(self): - msg = "dtype .*object.* cannot be converted to timedelta64" - with pytest.raises(ValueError, match=msg): - TimedeltaArray._from_sequence([], dtype=object) - @pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"]) def test_astype_int(self, dtype): arr = TimedeltaArray._from_sequence([Timedelta("1H"), Timedelta("2H")]) @@ -179,209 +108,3 @@ def test_neg_freq(self): result = -arr tm.assert_timedelta_array_equal(result, expected) - - -class TestReductions: - @pytest.mark.parametrize("name", ["std", "min", "max", "median", "mean"]) - @pytest.mark.parametrize("skipna", [True, False]) - def test_reductions_empty(self, name, skipna): - tdi = pd.TimedeltaIndex([]) - arr = tdi.array - - result = getattr(tdi, name)(skipna=skipna) - assert result is pd.NaT - - result = getattr(arr, name)(skipna=skipna) - assert result is pd.NaT - - @pytest.mark.parametrize("skipna", [True, False]) - def test_sum_empty(self, skipna): - tdi = pd.TimedeltaIndex([]) - arr = tdi.array - - result = tdi.sum(skipna=skipna) - assert isinstance(result, Timedelta) - assert result == Timedelta(0) - - result = arr.sum(skipna=skipna) - assert isinstance(result, Timedelta) - assert result == Timedelta(0) - - def test_min_max(self): - arr = TimedeltaArray._from_sequence(["3H", "3H", "NaT", "2H", "5H", "4H"]) - - result = arr.min() - expected = Timedelta("2H") - assert result == expected - - result = arr.max() - expected = Timedelta("5H") - assert result == expected - - result = arr.min(skipna=False) - assert result is pd.NaT - - result = arr.max(skipna=False) - assert result is pd.NaT - - def test_sum(self): - tdi = pd.TimedeltaIndex(["3H", "3H", "NaT", "2H", "5H", "4H"]) - arr = tdi.array - - result = arr.sum(skipna=True) - expected = Timedelta(hours=17) - assert isinstance(result, Timedelta) - assert result == expected - - result = tdi.sum(skipna=True) - assert isinstance(result, Timedelta) - assert result == expected - - result = arr.sum(skipna=False) - assert result is pd.NaT - - result = tdi.sum(skipna=False) - assert result is pd.NaT - - result = arr.sum(min_count=9) - assert result is pd.NaT - - result = tdi.sum(min_count=9) - assert result is pd.NaT - - result = arr.sum(min_count=1) - assert isinstance(result, Timedelta) - assert result == expected - - result = tdi.sum(min_count=1) - assert isinstance(result, Timedelta) - assert result == expected - - def test_npsum(self): - # GH#25335 np.sum should return a Timedelta, not timedelta64 - tdi = pd.TimedeltaIndex(["3H", "3H", "2H", "5H", "4H"]) - arr = tdi.array - - result = np.sum(tdi) - expected = Timedelta(hours=17) - assert isinstance(result, Timedelta) - assert result == expected - - result = np.sum(arr) - assert isinstance(result, Timedelta) - assert result == expected - - def test_sum_2d_skipna_false(self): - arr = np.arange(8).astype(np.int64).view("m8[s]").astype("m8[ns]").reshape(4, 2) - arr[-1, -1] = "Nat" - - tda = TimedeltaArray(arr) - - result = tda.sum(skipna=False) - assert result is pd.NaT - - result = tda.sum(axis=0, skipna=False) - expected = pd.TimedeltaIndex([Timedelta(seconds=12), pd.NaT])._values - tm.assert_timedelta_array_equal(result, expected) - - result = tda.sum(axis=1, skipna=False) - expected = pd.TimedeltaIndex( - [ - Timedelta(seconds=1), - Timedelta(seconds=5), - Timedelta(seconds=9), - pd.NaT, - ] - )._values - tm.assert_timedelta_array_equal(result, expected) - - # Adding a Timestamp makes this a test for DatetimeArray.std - @pytest.mark.parametrize( - "add", - [ - Timedelta(0), - pd.Timestamp.now(), - pd.Timestamp.now("UTC"), - pd.Timestamp.now("Asia/Tokyo"), - ], - ) - def test_std(self, add): - tdi = pd.TimedeltaIndex(["0H", "4H", "NaT", "4H", "0H", "2H"]) + add - arr = tdi.array - - result = arr.std(skipna=True) - expected = Timedelta(hours=2) - assert isinstance(result, Timedelta) - assert result == expected - - result = tdi.std(skipna=True) - assert isinstance(result, Timedelta) - assert result == expected - - if getattr(arr, "tz", None) is None: - result = nanops.nanstd(np.asarray(arr), skipna=True) - assert isinstance(result, Timedelta) - assert result == expected - - result = arr.std(skipna=False) - assert result is pd.NaT - - result = tdi.std(skipna=False) - assert result is pd.NaT - - if getattr(arr, "tz", None) is None: - result = nanops.nanstd(np.asarray(arr), skipna=False) - assert result is pd.NaT - - def test_median(self): - tdi = pd.TimedeltaIndex(["0H", "3H", "NaT", "5H06m", "0H", "2H"]) - arr = tdi.array - - result = arr.median(skipna=True) - expected = Timedelta(hours=2) - assert isinstance(result, Timedelta) - assert result == expected - - result = tdi.median(skipna=True) - assert isinstance(result, Timedelta) - assert result == expected - - result = arr.median(skipna=False) - assert result is pd.NaT - - result = tdi.median(skipna=False) - assert result is pd.NaT - - def test_mean(self): - tdi = pd.TimedeltaIndex(["0H", "3H", "NaT", "5H06m", "0H", "2H"]) - arr = tdi._data - - # manually verified result - expected = Timedelta(arr.dropna()._ndarray.mean()) - - result = arr.mean() - assert result == expected - result = arr.mean(skipna=False) - assert result is pd.NaT - - result = arr.dropna().mean(skipna=False) - assert result == expected - - result = arr.mean(axis=0) - assert result == expected - - def test_mean_2d(self): - tdi = pd.timedelta_range("14 days", periods=6) - tda = tdi._data.reshape(3, 2) - - result = tda.mean(axis=0) - expected = tda[1] - tm.assert_timedelta_array_equal(result, expected) - - result = tda.mean(axis=1) - expected = tda[:, 0] + Timedelta(hours=12) - tm.assert_timedelta_array_equal(result, expected) - - result = tda.mean(axis=None) - expected = tdi.mean() - assert result == expected diff --git a/pandas/tests/arrays/timedeltas/__init__.py b/pandas/tests/arrays/timedeltas/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/arrays/timedeltas/test_constructors.py b/pandas/tests/arrays/timedeltas/test_constructors.py new file mode 100644 index 0000000000000..d297e745f107b --- /dev/null +++ b/pandas/tests/arrays/timedeltas/test_constructors.py @@ -0,0 +1,63 @@ +import numpy as np +import pytest + +from pandas.core.arrays import TimedeltaArray + + +class TestTimedeltaArrayConstructor: + def test_only_1dim_accepted(self): + # GH#25282 + arr = np.array([0, 1, 2, 3], dtype="m8[h]").astype("m8[ns]") + + with pytest.raises(ValueError, match="Only 1-dimensional"): + # 3-dim, we allow 2D to sneak in for ops purposes GH#29853 + TimedeltaArray(arr.reshape(2, 2, 1)) + + with pytest.raises(ValueError, match="Only 1-dimensional"): + # 0-dim + TimedeltaArray(arr[[0]].squeeze()) + + def test_freq_validation(self): + # ensure that the public constructor cannot create an invalid instance + arr = np.array([0, 0, 1], dtype=np.int64) * 3600 * 10 ** 9 + + msg = ( + "Inferred frequency None from passed values does not " + "conform to passed frequency D" + ) + with pytest.raises(ValueError, match=msg): + TimedeltaArray(arr.view("timedelta64[ns]"), freq="D") + + def test_non_array_raises(self): + with pytest.raises(ValueError, match="list"): + TimedeltaArray([1, 2, 3]) + + def test_other_type_raises(self): + with pytest.raises(ValueError, match="dtype bool cannot be converted"): + TimedeltaArray(np.array([1, 2, 3], dtype="bool")) + + def test_incorrect_dtype_raises(self): + # TODO: why TypeError for 'category' but ValueError for i8? + with pytest.raises( + ValueError, match=r"category cannot be converted to timedelta64\[ns\]" + ): + TimedeltaArray(np.array([1, 2, 3], dtype="i8"), dtype="category") + + with pytest.raises( + ValueError, match=r"dtype int64 cannot be converted to timedelta64\[ns\]" + ): + TimedeltaArray(np.array([1, 2, 3], dtype="i8"), dtype=np.dtype("int64")) + + def test_copy(self): + data = np.array([1, 2, 3], dtype="m8[ns]") + arr = TimedeltaArray(data, copy=False) + assert arr._data is data + + arr = TimedeltaArray(data, copy=True) + assert arr._data is not data + assert arr._data.base is not data + + def test_from_sequence_dtype(self): + msg = "dtype .*object.* cannot be converted to timedelta64" + with pytest.raises(ValueError, match=msg): + TimedeltaArray._from_sequence([], dtype=object) diff --git a/pandas/tests/arrays/timedeltas/test_reductions.py b/pandas/tests/arrays/timedeltas/test_reductions.py new file mode 100644 index 0000000000000..5f278b09dc818 --- /dev/null +++ b/pandas/tests/arrays/timedeltas/test_reductions.py @@ -0,0 +1,225 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import Timedelta +import pandas._testing as tm +from pandas.core import nanops +from pandas.core.arrays import TimedeltaArray + + +class TestReductions: + @pytest.mark.parametrize("name", ["std", "min", "max", "median", "mean"]) + @pytest.mark.parametrize("skipna", [True, False]) + def test_reductions_empty(self, name, skipna): + tdi = pd.TimedeltaIndex([]) + arr = tdi.array + + result = getattr(tdi, name)(skipna=skipna) + assert result is pd.NaT + + result = getattr(arr, name)(skipna=skipna) + assert result is pd.NaT + + @pytest.mark.parametrize("skipna", [True, False]) + def test_sum_empty(self, skipna): + tdi = pd.TimedeltaIndex([]) + arr = tdi.array + + result = tdi.sum(skipna=skipna) + assert isinstance(result, Timedelta) + assert result == Timedelta(0) + + result = arr.sum(skipna=skipna) + assert isinstance(result, Timedelta) + assert result == Timedelta(0) + + def test_min_max(self): + arr = TimedeltaArray._from_sequence(["3H", "3H", "NaT", "2H", "5H", "4H"]) + + result = arr.min() + expected = Timedelta("2H") + assert result == expected + + result = arr.max() + expected = Timedelta("5H") + assert result == expected + + result = arr.min(skipna=False) + assert result is pd.NaT + + result = arr.max(skipna=False) + assert result is pd.NaT + + def test_sum(self): + tdi = pd.TimedeltaIndex(["3H", "3H", "NaT", "2H", "5H", "4H"]) + arr = tdi.array + + result = arr.sum(skipna=True) + expected = Timedelta(hours=17) + assert isinstance(result, Timedelta) + assert result == expected + + result = tdi.sum(skipna=True) + assert isinstance(result, Timedelta) + assert result == expected + + result = arr.sum(skipna=False) + assert result is pd.NaT + + result = tdi.sum(skipna=False) + assert result is pd.NaT + + result = arr.sum(min_count=9) + assert result is pd.NaT + + result = tdi.sum(min_count=9) + assert result is pd.NaT + + result = arr.sum(min_count=1) + assert isinstance(result, Timedelta) + assert result == expected + + result = tdi.sum(min_count=1) + assert isinstance(result, Timedelta) + assert result == expected + + # TODO: de-duplicate with test_npsum below + def test_np_sum(self): + # GH#25282 + vals = np.arange(5, dtype=np.int64).view("m8[h]").astype("m8[ns]") + arr = TimedeltaArray(vals) + result = np.sum(arr) + assert result == vals.sum() + + result = np.sum(pd.TimedeltaIndex(arr)) + assert result == vals.sum() + + def test_npsum(self): + # GH#25335 np.sum should return a Timedelta, not timedelta64 + tdi = pd.TimedeltaIndex(["3H", "3H", "2H", "5H", "4H"]) + arr = tdi.array + + result = np.sum(tdi) + expected = Timedelta(hours=17) + assert isinstance(result, Timedelta) + assert result == expected + + result = np.sum(arr) + assert isinstance(result, Timedelta) + assert result == expected + + def test_sum_2d_skipna_false(self): + arr = np.arange(8).astype(np.int64).view("m8[s]").astype("m8[ns]").reshape(4, 2) + arr[-1, -1] = "Nat" + + tda = TimedeltaArray(arr) + + result = tda.sum(skipna=False) + assert result is pd.NaT + + result = tda.sum(axis=0, skipna=False) + expected = pd.TimedeltaIndex([Timedelta(seconds=12), pd.NaT])._values + tm.assert_timedelta_array_equal(result, expected) + + result = tda.sum(axis=1, skipna=False) + expected = pd.TimedeltaIndex( + [ + Timedelta(seconds=1), + Timedelta(seconds=5), + Timedelta(seconds=9), + pd.NaT, + ] + )._values + tm.assert_timedelta_array_equal(result, expected) + + # Adding a Timestamp makes this a test for DatetimeArray.std + @pytest.mark.parametrize( + "add", + [ + Timedelta(0), + pd.Timestamp.now(), + pd.Timestamp.now("UTC"), + pd.Timestamp.now("Asia/Tokyo"), + ], + ) + def test_std(self, add): + tdi = pd.TimedeltaIndex(["0H", "4H", "NaT", "4H", "0H", "2H"]) + add + arr = tdi.array + + result = arr.std(skipna=True) + expected = Timedelta(hours=2) + assert isinstance(result, Timedelta) + assert result == expected + + result = tdi.std(skipna=True) + assert isinstance(result, Timedelta) + assert result == expected + + if getattr(arr, "tz", None) is None: + result = nanops.nanstd(np.asarray(arr), skipna=True) + assert isinstance(result, Timedelta) + assert result == expected + + result = arr.std(skipna=False) + assert result is pd.NaT + + result = tdi.std(skipna=False) + assert result is pd.NaT + + if getattr(arr, "tz", None) is None: + result = nanops.nanstd(np.asarray(arr), skipna=False) + assert result is pd.NaT + + def test_median(self): + tdi = pd.TimedeltaIndex(["0H", "3H", "NaT", "5H06m", "0H", "2H"]) + arr = tdi.array + + result = arr.median(skipna=True) + expected = Timedelta(hours=2) + assert isinstance(result, Timedelta) + assert result == expected + + result = tdi.median(skipna=True) + assert isinstance(result, Timedelta) + assert result == expected + + result = arr.median(skipna=False) + assert result is pd.NaT + + result = tdi.median(skipna=False) + assert result is pd.NaT + + def test_mean(self): + tdi = pd.TimedeltaIndex(["0H", "3H", "NaT", "5H06m", "0H", "2H"]) + arr = tdi._data + + # manually verified result + expected = Timedelta(arr.dropna()._ndarray.mean()) + + result = arr.mean() + assert result == expected + result = arr.mean(skipna=False) + assert result is pd.NaT + + result = arr.dropna().mean(skipna=False) + assert result == expected + + result = arr.mean(axis=0) + assert result == expected + + def test_mean_2d(self): + tdi = pd.timedelta_range("14 days", periods=6) + tda = tdi._data.reshape(3, 2) + + result = tda.mean(axis=0) + expected = tda[1] + tm.assert_timedelta_array_equal(result, expected) + + result = tda.mean(axis=1) + expected = tda[:, 0] + Timedelta(hours=12) + tm.assert_timedelta_array_equal(result, expected) + + result = tda.mean(axis=None) + expected = tdi.mean() + assert result == expected