From 5990de6ae425dc6b59495418c8b38daeceaa5285 Mon Sep 17 00:00:00 2001 From: sinhrks Date: Sat, 28 May 2016 15:06:40 +0900 Subject: [PATCH] ENH: PeriodIndex now accepts pd.NaT --- doc/source/whatsnew/v0.18.2.txt | 1 + pandas/src/period.pyx | 37 ++++++++++++-- pandas/tseries/period.py | 30 ++++------- pandas/tseries/tests/test_period.py | 78 +++++++++++++++++++++++++++++ 4 files changed, 121 insertions(+), 25 deletions(-) diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt index e469cbf79b31a..45a9b75556bf6 100644 --- a/doc/source/whatsnew/v0.18.2.txt +++ b/doc/source/whatsnew/v0.18.2.txt @@ -288,6 +288,7 @@ Other API changes - ``Float64Index.astype(int)`` will now raise ``ValueError`` if ``Float64Index`` contains ``NaN`` values (:issue:`13149`) - ``TimedeltaIndex.astype(int)`` and ``DatetimeIndex.astype(int)`` will now return ``Int64Index`` instead of ``np.array`` (:issue:`13209`) - ``.filter()`` enforces mutual exclusion of the keyword arguments. (:issue:`12399`) +- ``PeridIndex`` can now accept ``list`` and ``array`` which contains ``pd.NaT`` (:issue:`13430`) .. _whatsnew_0182.deprecations: diff --git a/pandas/src/period.pyx b/pandas/src/period.pyx index 858aa58df8d7d..aca0d0dbc107b 100644 --- a/pandas/src/period.pyx +++ b/pandas/src/period.pyx @@ -24,6 +24,7 @@ cimport cython from datetime cimport * cimport util cimport lib +from lib cimport is_null_datetimelike import lib from pandas import tslib from tslib import Timedelta, Timestamp, iNaT, NaT @@ -458,13 +459,39 @@ def extract_ordinals(ndarray[object] values, freq): for i in range(n): p = values[i] - ordinals[i] = p.ordinal - if p.freqstr != freqstr: - msg = _DIFFERENT_FREQ_INDEX.format(freqstr, p.freqstr) - raise IncompatibleFrequency(msg) + + if is_null_datetimelike(p): + ordinals[i] = tslib.iNaT + else: + try: + ordinals[i] = p.ordinal + + if p.freqstr != freqstr: + msg = _DIFFERENT_FREQ_INDEX.format(freqstr, p.freqstr) + raise IncompatibleFrequency(msg) + + except AttributeError: + p = Period(p, freq=freq) + ordinals[i] = p.ordinal return ordinals + +def extract_freq(ndarray[object] values): + cdef: + Py_ssize_t i, n = len(values) + object p + + for i in range(n): + p = values[i] + try: + return p.freq + except AttributeError: + pass + + raise ValueError('freq not specified and cannot be inferred') + + cpdef resolution(ndarray[int64_t] stamps, tz=None): cdef: Py_ssize_t i, n = len(stamps) @@ -719,7 +746,7 @@ cdef class Period(object): converted = other.asfreq(freq) ordinal = converted.ordinal - elif lib.is_null_datetimelike(value) or value in tslib._nat_strings: + elif is_null_datetimelike(value) or value in tslib._nat_strings: ordinal = tslib.iNaT if freq is None: raise ValueError("If value is NaT, freq cannot be None " diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 8a3ac1f080c90..750e7a5553ef6 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -40,14 +40,6 @@ def f(self): return property(f) -def _get_ordinals(data, freq): - f = lambda x: Period(x, freq=freq).ordinal - if isinstance(data[0], Period): - return period.extract_ordinals(data, freq) - else: - return lib.map_infer(data, f) - - def dt64arr_to_periodarr(data, freq, tz): if data.dtype != np.dtype('M8[ns]'): raise ValueError('Wrong dtype: %s' % data.dtype) @@ -235,14 +227,9 @@ def _from_arraylike(cls, data, freq, tz): except (TypeError, ValueError): data = com._ensure_object(data) - if freq is None and len(data) > 0: - freq = getattr(data[0], 'freq', None) - if freq is None: - raise ValueError('freq not specified and cannot be ' - 'inferred from first element') - - data = _get_ordinals(data, freq) + freq = period.extract_freq(data) + data = period.extract_ordinals(data, freq) else: if isinstance(data, PeriodIndex): if freq is None or freq == data.freq: @@ -254,12 +241,15 @@ def _from_arraylike(cls, data, freq, tz): data = period.period_asfreq_arr(data.values, base1, base2, 1) else: - if freq is None and len(data) > 0: - freq = getattr(data[0], 'freq', None) + + if freq is None and com.is_object_dtype(data): + # must contain Period instance and thus extract ordinals + freq = period.extract_freq(data) + data = period.extract_ordinals(data, freq) if freq is None: - raise ValueError('freq not specified and cannot be ' - 'inferred from first element') + msg = 'freq not specified and cannot be inferred' + raise ValueError(msg) if data.dtype != np.int64: if np.issubdtype(data.dtype, np.datetime64): @@ -269,7 +259,7 @@ def _from_arraylike(cls, data, freq, tz): data = com._ensure_int64(data) except (TypeError, ValueError): data = com._ensure_object(data) - data = _get_ordinals(data, freq) + data = period.extract_ordinals(data, freq) return data, freq diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index de23306c80b71..807fb86b1b4da 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -1742,6 +1742,84 @@ def test_constructor_datetime64arr(self): self.assertRaises(ValueError, PeriodIndex, vals, freq='D') + def test_constructor_empty(self): + idx = pd.PeriodIndex([], freq='M') + tm.assertIsInstance(idx, PeriodIndex) + self.assertEqual(len(idx), 0) + self.assertEqual(idx.freq, 'M') + + with tm.assertRaisesRegexp(ValueError, 'freq not specified'): + pd.PeriodIndex([]) + + def test_constructor_pi_nat(self): + idx = PeriodIndex([Period('2011-01', freq='M'), pd.NaT, + Period('2011-01', freq='M')]) + exp = PeriodIndex(['2011-01', 'NaT', '2011-01'], freq='M') + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex(np.array([Period('2011-01', freq='M'), pd.NaT, + Period('2011-01', freq='M')])) + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex([pd.NaT, pd.NaT, Period('2011-01', freq='M'), + Period('2011-01', freq='M')]) + exp = PeriodIndex(['NaT', 'NaT', '2011-01', '2011-01'], freq='M') + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex(np.array([pd.NaT, pd.NaT, + Period('2011-01', freq='M'), + Period('2011-01', freq='M')])) + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex([pd.NaT, pd.NaT, '2011-01', '2011-01'], freq='M') + tm.assert_index_equal(idx, exp) + + with tm.assertRaisesRegexp(ValueError, 'freq not specified'): + PeriodIndex([pd.NaT, pd.NaT]) + + with tm.assertRaisesRegexp(ValueError, 'freq not specified'): + PeriodIndex(np.array([pd.NaT, pd.NaT])) + + with tm.assertRaisesRegexp(ValueError, 'freq not specified'): + PeriodIndex(['NaT', 'NaT']) + + with tm.assertRaisesRegexp(ValueError, 'freq not specified'): + PeriodIndex(np.array(['NaT', 'NaT'])) + + def test_constructor_incompat_freq(self): + msg = "Input has different freq=D from PeriodIndex\\(freq=M\\)" + + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + PeriodIndex([Period('2011-01', freq='M'), pd.NaT, + Period('2011-01', freq='D')]) + + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + PeriodIndex(np.array([Period('2011-01', freq='M'), pd.NaT, + Period('2011-01', freq='D')])) + + # first element is pd.NaT + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + PeriodIndex([pd.NaT, Period('2011-01', freq='M'), + Period('2011-01', freq='D')]) + + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + PeriodIndex(np.array([pd.NaT, Period('2011-01', freq='M'), + Period('2011-01', freq='D')])) + + def test_constructor_mixed(self): + idx = PeriodIndex(['2011-01', pd.NaT, Period('2011-01', freq='M')]) + exp = PeriodIndex(['2011-01', 'NaT', '2011-01'], freq='M') + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex(['NaT', pd.NaT, Period('2011-01', freq='M')]) + exp = PeriodIndex(['NaT', 'NaT', '2011-01'], freq='M') + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex([Period('2011-01-01', freq='D'), pd.NaT, + '2012-01-01']) + exp = PeriodIndex(['2011-01-01', 'NaT', '2012-01-01'], freq='D') + tm.assert_index_equal(idx, exp) + def test_constructor_simple_new(self): idx = period_range('2007-01', name='p', periods=2, freq='M') result = idx._simple_new(idx, 'p', freq=idx.freq)