From a56276926bbd1b633c70912d0db20d733b1b6b5b Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 4 Jan 2019 11:32:14 -0800 Subject: [PATCH 1/9] revert treatment of i8values --- pandas/core/arrays/datetimes.py | 101 ++++++++--------------- pandas/core/frame.py | 5 +- pandas/core/indexes/datetimes.py | 4 +- pandas/core/internals/blocks.py | 2 +- pandas/io/packers.py | 13 ++- pandas/tests/arrays/test_datetimelike.py | 2 +- pandas/tests/arrays/test_datetimes.py | 6 +- pandas/tests/test_base.py | 6 +- 8 files changed, 60 insertions(+), 79 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index a55e8759deedb..fcc6fd9b75d00 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -236,83 +236,52 @@ class DatetimeArray(dtl.DatetimeLikeArrayMixin, _dtype = None # type: Union[np.dtype, DatetimeTZDtype] _freq = None - def __init__(self, values, dtype=_NS_DTYPE, freq=None, copy=False): - if isinstance(values, (ABCSeries, ABCIndexClass)): - values = values._values - - if isinstance(values, type(self)): - # validation - dtz = getattr(dtype, 'tz', None) - if dtz and values.tz is None: - dtype = DatetimeTZDtype(tz=dtype.tz) - elif dtz and values.tz: - if not timezones.tz_compare(dtz, values.tz): - msg = ( - "Timezone of the array and 'dtype' do not match. " - "'{}' != '{}'" - ) - raise TypeError(msg.format(dtz, values.tz)) - elif values.tz: - dtype = values.dtype - # freq = validate_values_freq(values, freq) - if freq is None: - freq = values.freq - values = values._data + def __init__(self, values, dtype=None, freq=None, copy=False): + if freq == "infer": + raise ValueError( + "Frequency inference not allowed in DatetimeArray.__init__. " + "Use 'pd.array()' instead.") - if not isinstance(values, np.ndarray): - msg = ( - "Unexpected type '{}'. 'values' must be a DatetimeArray " + if not hasattr(values, "dtype"): + # e.g. list + raise ValueError( + "Unexpected type '{vals}'. 'values' must be a DatetimeArray " "ndarray, or Series or Index containing one of those." - ) - raise ValueError(msg.format(type(values).__name__)) + .format(vals=type(values).__name__)) - if values.dtype == 'i8': - # for compat with datetime/timedelta/period shared methods, - # we can sometimes get here with int64 values. These represent - # nanosecond UTC (or tz-naive) unix timestamps - values = values.view(_NS_DTYPE) - - if values.dtype != _NS_DTYPE: - msg = ( + if values.dtype == np.bool_: + raise ValueError( "The dtype of 'values' is incorrect. Must be 'datetime64[ns]'." - " Got {} instead." - ) - raise ValueError(msg.format(values.dtype)) + " Got {dtype} instead." + .format(dtype=values.dtype)) - dtype = _validate_dt64_dtype(dtype) - - if freq == "infer": - msg = ( - "Frequency inference not allowed in DatetimeArray.__init__. " - "Use 'pd.array()' instead." - ) - raise ValueError(msg) - - if copy: - values = values.copy() - if freq: - freq = to_offset(freq) - if getattr(dtype, 'tz', None): - # https://github.com/pandas-dev/pandas/issues/18595 - # Ensure that we have a standard timezone for pytz objects. - # Without this, things like adding an array of timedeltas and - # a tz-aware Timestamp (with a tz specific to its datetime) will - # be incorrect(ish?) for the array as a whole - dtype = DatetimeTZDtype(tz=timezones.tz_standardize(dtype.tz)) - - self._data = values - self._dtype = dtype - self._freq = freq + arr = type(self)._from_sequence(values, dtype=dtype, + freq=freq, copy=copy) + self._data = arr._data + self._freq = arr._freq + self._dtype = arr._dtype @classmethod - def _simple_new(cls, values, freq=None, tz=None): + def _simple_new(cls, values, freq=None, tz=None, dtype=None): """ we require the we have a dtype compat for the values if we are passed a non-dtype compat, then coerce using the constructor """ - dtype = DatetimeTZDtype(tz=tz) if tz else _NS_DTYPE - - return cls(values, freq=freq, dtype=dtype) + if tz is not None: + # TODO: get tz out of here altogether + assert dtype is None + tz = timezones.tz_standardize(tz) + dtype = DatetimeTZDtype(tz=tz) + elif dtype is None: + dtype = _NS_DTYPE + + assert isinstance(values, np.ndarray), type(values) + + result = object.__new__(cls) + result._data = values.view('datetime64[ns]') + result._freq = freq + result._dtype = dtype + return result @classmethod def _from_sequence(cls, data, dtype=None, copy=False, diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a50def7357826..745fa2f0123a2 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4360,7 +4360,7 @@ def _maybe_casted_values(index, labels=None): values.fill(np.nan) else: values = values.take(labels) - + # # TODO(https://github.com/pandas-dev/pandas/issues/24206) # Push this into maybe_upcast_putmask? # We can't pass EAs there right now. Looks a bit @@ -4377,7 +4377,8 @@ def _maybe_casted_values(index, labels=None): values, mask, np.nan) if issubclass(values_type, DatetimeLikeArray): - values = values_type(values, dtype=values_dtype) + values = values_type._simple_new(values, + dtype=values_dtype) return values diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index f396f081267b3..cd1b886810022 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -316,12 +316,12 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None): tz = validate_tz_from_dtype(dtype, tz) dtype = DatetimeTZDtype(tz=tz) elif dtype is None: - dtype = _NS_DTYPE + dtype = values.dtype values = DatetimeArray(values, freq=freq, dtype=dtype) tz = values.tz freq = values.freq - values = values._data + values = values._data.view('i8') # DatetimeArray._simple_new will accept either i8 or M8[ns] dtypes if isinstance(values, DatetimeIndex): diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index f88114e1c9e20..9d5b9ca7c2660 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2362,7 +2362,7 @@ def _try_coerce_args(self, values, other): raise TypeError elif is_datetime64_dtype(other): # add the tz back - other = self._holder(other, dtype=self.dtype) + other = self._holder(other.ravel(), dtype=self.dtype) elif (is_null_datelike_scalar(other) or (lib.is_scalar(other) and isna(other))): diff --git a/pandas/io/packers.py b/pandas/io/packers.py index e6d18d5d4193a..124872a9ccc60 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -53,14 +53,15 @@ BadMove as _BadMove, move_into_mutable_buffer as _move_into_mutable_buffer) from pandas.core.dtypes.common import ( - is_categorical_dtype, is_object_dtype, needs_i8_conversion, pandas_dtype) + is_categorical_dtype, is_datetime64tz_dtype, is_object_dtype, + needs_i8_conversion, pandas_dtype) from pandas import ( # noqa:F401 Categorical, CategoricalIndex, DataFrame, DatetimeIndex, Float64Index, Index, Int64Index, Interval, IntervalIndex, MultiIndex, NaT, Panel, Period, PeriodIndex, RangeIndex, Series, TimedeltaIndex, Timestamp) from pandas.core import internals -from pandas.core.arrays import IntervalArray, PeriodArray +from pandas.core.arrays import DatetimeArray, IntervalArray, PeriodArray from pandas.core.arrays.sparse import BlockIndex, IntIndex from pandas.core.generic import NDFrame from pandas.core.internals import BlockManager, _safe_reshape, make_block @@ -651,6 +652,14 @@ def create_block(b): placement = b[u'locs'] else: placement = axes[0].get_indexer(b[u'items']) + + if is_datetime64tz_dtype(b[u'dtype']): + assert isinstance(values, np.ndarray), type(values) + assert values.dtype == 'datetime64[ns]', values.dtype + # These values are interpreted as unix timestamps, so we + # view as i8 + values = DatetimeArray(values.view('i8'), dtype=b[u'dtype']) + return make_block(values=values, klass=getattr(internals, b[u'klass']), placement=placement, diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index db88d94be1cab..0e7b0c07f874a 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -293,7 +293,7 @@ def test_from_array_keeps_base(self): arr = np.array(['2000-01-01', '2000-01-02'], dtype='M8[ns]') dta = DatetimeArray(arr) - assert dta._data is arr + assert dta._data.base is arr dta = DatetimeArray(arr[:0]) assert dta._data.base is arr diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 8890593b1fa9d..d17c98f14d8e0 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -28,7 +28,8 @@ def test_mismatched_timezone_raises(self): arr = DatetimeArray(np.array(['2000-01-01T06:00:00'], dtype='M8[ns]'), dtype=DatetimeTZDtype(tz='US/Central')) dtype = DatetimeTZDtype(tz='US/Eastern') - with pytest.raises(TypeError, match='Timezone of the array'): + with pytest.raises(TypeError, + match='data is already tz-aware US/Central'): DatetimeArray(arr, dtype=dtype) def test_non_array_raises(self): @@ -51,10 +52,11 @@ def test_freq_infer_raises(self): def test_copy(self): data = np.array([1, 2, 3], dtype='M8[ns]') arr = DatetimeArray(data, copy=False) - assert arr._data is data + assert arr._data.base is data arr = DatetimeArray(data, copy=True) assert arr._data is not data + assert arr._data.base is not data class TestDatetimeArrayComparisons(object): diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 657f5f193c85e..91a127cde9d9b 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -1241,7 +1241,7 @@ def test_numpy_array_all_dtypes(any_numpy_dtype): # tz-aware Datetime (DatetimeArray(np.array(['2000-01-01T12:00:00', '2000-01-02T12:00:00'], - dtype='M8[ns]'), + dtype='M8[ns]').view('i8'), dtype=DatetimeTZDtype(tz="US/Central")), '_data'), ]) @@ -1255,7 +1255,7 @@ def test_array(array, attr, box): array = getattr(array, attr) result = getattr(result, attr) - assert result is array + assert result is array or result.base is array.base def test_array_multiindex_raises(): @@ -1282,7 +1282,7 @@ def test_array_multiindex_raises(): # tz-aware stays tz`-aware (DatetimeArray(np.array(['2000-01-01T06:00:00', '2000-01-02T06:00:00'], - dtype='M8[ns]'), + dtype='M8[ns]').view('i8'), dtype=DatetimeTZDtype(tz='US/Central')), np.array([pd.Timestamp('2000-01-01', tz='US/Central'), pd.Timestamp('2000-01-02', tz='US/Central')])), From 21d722822a1358fce5f2f2f0ae5c77dce9adfe05 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 4 Jan 2019 11:35:37 -0800 Subject: [PATCH 2/9] remove debug pound --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 745fa2f0123a2..b55284c3eb541 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4360,7 +4360,7 @@ def _maybe_casted_values(index, labels=None): values.fill(np.nan) else: values = values.take(labels) - # + # TODO(https://github.com/pandas-dev/pandas/issues/24206) # Push this into maybe_upcast_putmask? # We can't pass EAs there right now. Looks a bit From 401abd8d287a4452f6e87f1b7b53b356b7126583 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 5 Jan 2019 15:38:58 -0800 Subject: [PATCH 3/9] best guess at feather fix --- pandas/core/internals/blocks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 315ee3591e439..a84e87bd8b1c5 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -3078,7 +3078,7 @@ def make_block(values, placement, klass=None, ndim=None, dtype=None, elif klass is DatetimeTZBlock and not is_datetime64tz_dtype(values): # TODO: This is no longer hit internally; does it need to be retained # for e.g. pyarrow? - values = DatetimeArray(values, dtype) + values = DatetimeArray(values.view('i8'), dtype) return klass(values, ndim=ndim, placement=placement) From a46a2cc47bc9c0c80eb8a147dbfda3100f44af58 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 5 Jan 2019 16:28:00 -0800 Subject: [PATCH 4/9] troubleshoot pyarrow fail --- pandas/core/arrays/datetimelike.py | 2 +- pandas/core/arrays/datetimes.py | 19 +++++++++---------- pandas/tests/arrays/test_datetimes.py | 2 +- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 2de22e062b29b..50e37cd3b779e 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -611,7 +611,7 @@ def _concat_same_type(cls, to_concat): def copy(self, deep=False): values = self.asi8.copy() - return type(self)(values, dtype=self.dtype, freq=self.freq) + return type(self)._simple_new(values, dtype=self.dtype, freq=self.freq) def _values_for_factorize(self): return self.asi8, iNaT diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 868e3b5d5bd04..596aae13b4b83 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -14,10 +14,10 @@ from pandas.util._decorators import Appender from pandas.core.dtypes.common import ( - _INT64_DTYPE, _NS_DTYPE, is_categorical_dtype, is_datetime64_dtype, - is_datetime64_ns_dtype, is_datetime64tz_dtype, is_dtype_equal, - is_extension_type, is_float_dtype, is_object_dtype, is_period_dtype, - is_string_dtype, is_timedelta64_dtype, pandas_dtype) + _INT64_DTYPE, _NS_DTYPE, is_bool_dtype, is_categorical_dtype, + is_datetime64_dtype, is_datetime64_ns_dtype, is_datetime64tz_dtype, + is_dtype_equal, is_extension_type, is_float_dtype, is_object_dtype, + is_period_dtype, is_string_dtype, is_timedelta64_dtype, pandas_dtype) from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.generic import ( ABCDataFrame, ABCIndexClass, ABCPandasArray, ABCSeries) @@ -249,12 +249,6 @@ def __init__(self, values, dtype=None, freq=None, copy=False): "ndarray, or Series or Index containing one of those." .format(vals=type(values).__name__)) - if values.dtype == np.bool_: - raise ValueError( - "The dtype of 'values' is incorrect. Must be 'datetime64[ns]'." - " Got {dtype} instead." - .format(dtype=values.dtype)) - arr = type(self)._from_sequence(values, dtype=dtype, freq=freq, copy=copy) self._data = arr._data @@ -1810,6 +1804,11 @@ def maybe_convert_dtype(data, copy): FutureWarning, stacklevel=5) data = data.view(_NS_DTYPE) + elif is_bool_dtype(data): + raise TypeError("The dtype of 'data' is incorrect. Must be " + "'datetime64[ns]'. Got {dtype} instead." + .format(dtype=data.dtype)) + elif is_period_dtype(data): # Note: without explicitly raising here, PeriodIndex # test_setops.test_join_does_not_recur fails diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 160b9c35edef7..1de19df4d9e50 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -38,7 +38,7 @@ def test_non_array_raises(self): def test_other_type_raises(self): with pytest.raises(ValueError, - match="The dtype of 'values' is incorrect.*bool"): + match="The dtype of 'data' is incorrect.*bool"): DatetimeArray(np.array([1, 2, 3], dtype='bool')) def test_incorrect_dtype_raises(self): From 27e5fb7c07a34e5d699978d83ef378f386e2f7f6 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 5 Jan 2019 17:06:41 -0800 Subject: [PATCH 5/9] revert --- pandas/core/arrays/datetimes.py | 18 +++++++++--------- pandas/tests/arrays/test_datetimes.py | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 596aae13b4b83..d19d25b127e80 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -14,10 +14,10 @@ from pandas.util._decorators import Appender from pandas.core.dtypes.common import ( - _INT64_DTYPE, _NS_DTYPE, is_bool_dtype, is_categorical_dtype, - is_datetime64_dtype, is_datetime64_ns_dtype, is_datetime64tz_dtype, - is_dtype_equal, is_extension_type, is_float_dtype, is_object_dtype, - is_period_dtype, is_string_dtype, is_timedelta64_dtype, pandas_dtype) + _INT64_DTYPE, _NS_DTYPE, is_categorical_dtype, is_datetime64_dtype, + is_datetime64_ns_dtype, is_datetime64tz_dtype, is_dtype_equal, + is_extension_type, is_float_dtype, is_object_dtype, is_period_dtype, + is_string_dtype, is_timedelta64_dtype, pandas_dtype) from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.generic import ( ABCDataFrame, ABCIndexClass, ABCPandasArray, ABCSeries) @@ -249,6 +249,11 @@ def __init__(self, values, dtype=None, freq=None, copy=False): "ndarray, or Series or Index containing one of those." .format(vals=type(values).__name__)) + if values.dtype == np.bool_: + raise ValueError( + "The dtype of 'values' is incorrect. Must be 'datetime64[ns]'." + " Got {dtype} instead." .format(dtype=values.dtype)) + arr = type(self)._from_sequence(values, dtype=dtype, freq=freq, copy=copy) self._data = arr._data @@ -1804,11 +1809,6 @@ def maybe_convert_dtype(data, copy): FutureWarning, stacklevel=5) data = data.view(_NS_DTYPE) - elif is_bool_dtype(data): - raise TypeError("The dtype of 'data' is incorrect. Must be " - "'datetime64[ns]'. Got {dtype} instead." - .format(dtype=data.dtype)) - elif is_period_dtype(data): # Note: without explicitly raising here, PeriodIndex # test_setops.test_join_does_not_recur fails diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 1de19df4d9e50..160b9c35edef7 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -38,7 +38,7 @@ def test_non_array_raises(self): def test_other_type_raises(self): with pytest.raises(ValueError, - match="The dtype of 'data' is incorrect.*bool"): + match="The dtype of 'values' is incorrect.*bool"): DatetimeArray(np.array([1, 2, 3], dtype='bool')) def test_incorrect_dtype_raises(self): From 1715eba78546fc52af9addc34f152df1e25373eb Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 6 Jan 2019 17:31:14 -0800 Subject: [PATCH 6/9] remove unnecessary casting, add tests --- pandas/core/indexes/datetimes.py | 8 ++-- pandas/core/internals/blocks.py | 2 +- pandas/tests/arrays/test_datetimes.py | 61 +++++++++++++++++++++++++++ 3 files changed, 66 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 05467468ae3e7..6eb46ede6fd92 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -321,13 +321,13 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None): values = DatetimeArray(values, freq=freq, dtype=dtype) tz = values.tz freq = values.freq - values = values._data.view('i8') + values = values._data + else: + tz = tz or getattr(dtype, 'tz', None) # DatetimeArray._simple_new will accept either i8 or M8[ns] dtypes - if isinstance(values, DatetimeIndex): - values = values._data + assert isinstance(values, np.ndarray) dtarr = DatetimeArray._simple_new(values, freq=freq, tz=tz) - assert isinstance(dtarr, DatetimeArray) result = object.__new__(cls) result._data = dtarr diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index a84e87bd8b1c5..d8292e11afa35 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2352,7 +2352,7 @@ def _try_coerce_args(self, values, other): raise TypeError elif is_datetime64_dtype(other): # add the tz back - other = self._holder(other.ravel(), dtype=self.dtype) + other = self._holder(other, dtype=self.dtype) elif is_null_datetimelike(other): other = tslibs.iNaT diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 160b9c35edef7..cab4a72831e5d 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -16,7 +16,68 @@ class TestDatetimeArrayConstructor(object): + + @pytest.mark.parametrize('tz', [None, 'Asia/Singapore']) + def test_constructor_equivalence(self, tz): + # GH#24623 check that DatetimeArray.__init__ behavior matches: + # Timestamp.__new__ for int64 + # DatetimeArray._from_sequence for int64, datetime64[ns] + # DatetimeArray._simple_new for int64 + # DatetimeIndex.__new__ for int64, datetime64[ns] + # DatetimeIndex._simple_new for int64, datetime64[ns] + # + # and that DatetimeArray._simple_new behaves like + # DatetimeIndex._simple_new for both int64 and datetime64[ns] inputs + arr = np.random.randint(-10**9, 10**9, size=5, dtype=np.int64) + dti = pd.date_range('1960-01-01', periods=1, tz=tz) + + v1 = DatetimeArray._simple_new(arr.view('i8'), dtype=dti.dtype) + v2 = DatetimeArray(arr.view('i8'), dtype=dti.dtype) + v3 = DatetimeArray._from_sequence(arr.view('i8'), dtype=dti.dtype) + v4 = pd.DatetimeIndex._simple_new(arr.view('i8'), tz=dti.tz) + v5 = pd.DatetimeIndex(arr.view('i8'), tz=dti.tz) + v6 = pd.to_datetime(arr, utc=True).tz_convert(dti.tz) + + # when dealing with _simple_new, i8 and M8[ns] are interchangeable + v7 = DatetimeArray._simple_new(arr.view('M8[ns]'), dtype=dti.dtype) + v8 = pd.DatetimeIndex._simple_new(arr.view('M8[ns]'), dtype=dti.dtype) + + tm.assert_datetime_array_equal(v1, v2) + tm.assert_datetime_array_equal(v1, v3) + tm.assert_datetime_array_equal(v1, v4._data) + tm.assert_datetime_array_equal(v1, v5._data) + tm.assert_datetime_array_equal(v1, v6._data) + tm.assert_datetime_array_equal(v1, v7) + tm.assert_datetime_array_equal(v1, v8._data) + + expected = [pd.Timestamp(i8, tz=dti.tz) for i8 in arr] + assert list(v1) == expected + + # The guarantees for datetime64 data are fewer + dt64arr = arr.view('datetime64[ns]') + v1 = DatetimeArray(dt64arr, dtype=dti.dtype) + v2 = DatetimeArray._from_sequence(dt64arr, dtype=dti.dtype) + v3 = DatetimeArray._from_sequence(dt64arr, tz=dti.tz) + v4 = pd.DatetimeIndex(dt64arr, dtype=dti.dtype) + v5 = pd.DatetimeIndex(dt64arr, tz=dti.tz) + + tm.assert_datetime_array_equal(v1, v2) + tm.assert_datetime_array_equal(v1, v3) + tm.assert_datetime_array_equal(v1, v4._data) + tm.assert_datetime_array_equal(v1, v5._data) + + def test_freq_validation(self): + # GH#24623 check that invalid instances cannot be created with the + # public constructor + arr = pd.array(np.arange(5, dtype=np.int64)) * 3600 * 10**9 + + msg = ("Inferred frequency H from passed values does not " + "conform to passed frequency W-SUN") + with pytest.raises(ValueError, match=msg): + DatetimeArray(arr, freq="W") + def test_from_pandas_array(self): + # GH#24623, GH#24615 arr = pd.array(np.arange(5, dtype=np.int64)) * 3600 * 10**9 result = DatetimeArray._from_sequence(arr, freq='infer') From c109018914f6b6573e2c2e363e7d76b1b3d14bd8 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 9 Jan 2019 08:30:40 -0800 Subject: [PATCH 7/9] standardize tzinfos --- pandas/core/arrays/datetimes.py | 1 + pandas/core/indexes/datetimes.py | 1 + 2 files changed, 2 insertions(+) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 653d808696b9c..d642fbd44b866 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -50,6 +50,7 @@ def tz_to_dtype(tz): if tz is None: return _NS_DTYPE else: + tz = timezones.tz_standardize(tz) return DatetimeTZDtype(tz=tz) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 1d45f33cc0aae..aa61632441906 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -314,6 +314,7 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None): if isinstance(values, DatetimeArray): if tz: tz = validate_tz_from_dtype(dtype, tz) + tz = timezones.tz_standardize(tz) dtype = DatetimeTZDtype(tz=tz) elif dtype is None: dtype = values.dtype From 14cf1363b12541e8cca10dfa3df8d819bb4ff9d4 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 9 Jan 2019 09:53:25 -0800 Subject: [PATCH 8/9] revert DTA behavior --- pandas/core/arrays/datetimes.py | 12 +++++++++++- pandas/io/packers.py | 2 -- pandas/tests/arrays/test_datetimes.py | 19 +++++++++++-------- pandas/tests/test_base.py | 4 ++-- 4 files changed, 24 insertions(+), 13 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index d642fbd44b866..d6678d3734aa6 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -268,7 +268,17 @@ def __init__(self, values, dtype=None, freq=None, copy=False): "ndarray, or Series or Index containing one of those." .format(vals=type(values).__name__)) - if values.dtype == np.bool_: + if is_datetime64_dtype(values.dtype) and hasattr(dtype, "tz"): + # cast to make _from_sequence treat as unix instead of wall-times; + # see GH#24559 + values = type(self)._simple_new( + np.asarray(values), + freq=getattr(values, "freq", None), + dtype=tz_to_dtype(utc)).tz_convert(dtype.tz) + + elif not (is_datetime64tz_dtype(values.dtype) or + is_datetime64_dtype(values.dtype) or + values.dtype == 'i8'): raise ValueError( "The dtype of 'values' is incorrect. Must be 'datetime64[ns]'." " Got {dtype} instead." .format(dtype=values.dtype)) diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 9effc2aa0003b..c2a495cf6eaf2 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -656,8 +656,6 @@ def create_block(b): if is_datetime64tz_dtype(b[u'dtype']): assert isinstance(values, np.ndarray), type(values) assert values.dtype == 'M8[ns]', values.dtype - # These values are interpreted as unix timestamps, so we - # view as i8 values = DatetimeArray(values.view('i8'), dtype=b[u'dtype']) return make_block(values=values, diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 4bdb214c136ca..32cafad584d01 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -29,6 +29,7 @@ def test_constructor_equivalence(self, tz): # and that DatetimeArray._simple_new behaves like # DatetimeIndex._simple_new for both int64 and datetime64[ns] inputs arr = np.random.randint(-10**9, 10**9, size=5, dtype=np.int64) + dt64arr = arr.view('datetime64[ns]') dti = pd.date_range('1960-01-01', periods=1, tz=tz) v1 = DatetimeArray._simple_new(arr.view('i8'), dtype=dti.dtype) @@ -42,6 +43,10 @@ def test_constructor_equivalence(self, tz): v7 = DatetimeArray._simple_new(arr.view('M8[ns]'), dtype=dti.dtype) v8 = pd.DatetimeIndex._simple_new(arr.view('M8[ns]'), dtype=dti.dtype) + # GH#24623 DatetimeArray.__init__ treats M8[ns] as unix timestamps, + # unlike DatetimeIndex.__new__. + v9 = DatetimeArray(dt64arr, dtype=dti.dtype) + tm.assert_datetime_array_equal(v1, v2) tm.assert_datetime_array_equal(v1, v3) tm.assert_datetime_array_equal(v1, v4._data) @@ -49,22 +54,20 @@ def test_constructor_equivalence(self, tz): tm.assert_datetime_array_equal(v1, v6._data) tm.assert_datetime_array_equal(v1, v7) tm.assert_datetime_array_equal(v1, v8._data) + tm.assert_datetime_array_equal(v1, v9) expected = [pd.Timestamp(i8, tz=dti.tz) for i8 in arr] assert list(v1) == expected # The guarantees for datetime64 data are fewer - dt64arr = arr.view('datetime64[ns]') - v1 = DatetimeArray(dt64arr, dtype=dti.dtype) - v2 = DatetimeArray._from_sequence(dt64arr, dtype=dti.dtype) - v3 = DatetimeArray._from_sequence(dt64arr, tz=dti.tz) - v4 = pd.DatetimeIndex(dt64arr, dtype=dti.dtype) - v5 = pd.DatetimeIndex(dt64arr, tz=dti.tz) + v1 = DatetimeArray._from_sequence(dt64arr, dtype=dti.dtype) + v2 = DatetimeArray._from_sequence(dt64arr, tz=dti.tz) + v3 = pd.DatetimeIndex(dt64arr, dtype=dti.dtype) + v4 = pd.DatetimeIndex(dt64arr, tz=dti.tz) tm.assert_datetime_array_equal(v1, v2) - tm.assert_datetime_array_equal(v1, v3) + tm.assert_datetime_array_equal(v1, v3._data) tm.assert_datetime_array_equal(v1, v4._data) - tm.assert_datetime_array_equal(v1, v5._data) def test_freq_validation(self): # GH#24623 check that invalid instances cannot be created with the diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 91a127cde9d9b..f3e78a3157399 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -1241,7 +1241,7 @@ def test_numpy_array_all_dtypes(any_numpy_dtype): # tz-aware Datetime (DatetimeArray(np.array(['2000-01-01T12:00:00', '2000-01-02T12:00:00'], - dtype='M8[ns]').view('i8'), + dtype='M8[ns]'), dtype=DatetimeTZDtype(tz="US/Central")), '_data'), ]) @@ -1282,7 +1282,7 @@ def test_array_multiindex_raises(): # tz-aware stays tz`-aware (DatetimeArray(np.array(['2000-01-01T06:00:00', '2000-01-02T06:00:00'], - dtype='M8[ns]').view('i8'), + dtype='M8[ns]'), dtype=DatetimeTZDtype(tz='US/Central')), np.array([pd.Timestamp('2000-01-01', tz='US/Central'), pd.Timestamp('2000-01-02', tz='US/Central')])), From 20b91dd30416c5436274d03f7564af015221bfa0 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 9 Jan 2019 09:54:31 -0800 Subject: [PATCH 9/9] flake8 fixup --- pandas/core/arrays/datetimes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index d6678d3734aa6..2f7cd3768b6ab 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -277,8 +277,8 @@ def __init__(self, values, dtype=None, freq=None, copy=False): dtype=tz_to_dtype(utc)).tz_convert(dtype.tz) elif not (is_datetime64tz_dtype(values.dtype) or - is_datetime64_dtype(values.dtype) or - values.dtype == 'i8'): + is_datetime64_dtype(values.dtype) or + values.dtype == 'i8'): raise ValueError( "The dtype of 'values' is incorrect. Must be 'datetime64[ns]'." " Got {dtype} instead." .format(dtype=values.dtype))