From 23f4a50d7d719b2bb58662f747f5f4a2f7d42d77 Mon Sep 17 00:00:00 2001 From: Licht-T Date: Fri, 6 Oct 2017 00:16:52 +0900 Subject: [PATCH 1/7] BUG: Create PeriodEngine --- pandas/_libs/index.pyx | 77 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 75 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index e98c0131e9c44..e7c18f19eb41e 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -17,7 +17,7 @@ from tslib cimport _to_i8 from hashtable cimport HashTable -from pandas._libs import algos, hashtable as _hash +from pandas._libs import algos, period as periodlib, hashtable as _hash from pandas._libs.tslib import Timestamp, Timedelta from datetime import datetime, timedelta @@ -270,13 +270,16 @@ cdef class IndexEngine: values = self._get_index_values() self.mapping = self._make_hash_table(len(values)) - self.mapping.map_locations(values) + self._call_map_locations(values) if len(self.mapping) == len(values): self.unique = 1 self.need_unique_check = 0 + cpdef _call_map_locations(self, values): + self.mapping.map_locations(values) + def clear_mapping(self): self.mapping = None self.need_monotonic_check = 1 @@ -490,6 +493,76 @@ cdef class TimedeltaEngine(DatetimeEngine): cdef _get_box_dtype(self): return 'm8[ns]' + +cdef class PeriodEngine(Int64Engine): + + cdef _get_index_values(self): + return self.vgetter() + + cpdef _call_map_locations(self, values): + super(PeriodEngine, self)._call_map_locations(values.view('i8')) + + def _call_monotonic(self, values): + return super(PeriodEngine, self)._call_monotonic(values.view('i8')) + + cdef _maybe_get_bool_indexer(self, object val): + cdef: + ndarray[uint8_t, cast=True] indexer + ndarray[int64_t] values + int count = 0 + Py_ssize_t i, n + int last_true + + if not util.is_integer_object(val): + raise KeyError(val) + + values = self._get_index_values().view('i8') + n = len(values) + + result = np.empty(n, dtype=bool) + indexer = result.view(np.uint8) + + for i in range(n): + if values[i] == val: + count += 1 + indexer[i] = 1 + last_true = i + else: + indexer[i] = 0 + + if count == 0: + raise KeyError(val) + if count == 1: + return last_true + + return result + + def get_indexer(self, values): + cdef ndarray[int64_t, ndim=1] ordinals + + self._ensure_mapping_populated() + ordinals = periodlib.extract_ordinals(values, self.vgetter().freq) + return self.mapping.lookup(ordinals) + + def get_pad_indexer(self, other, limit=None): + ordinal = periodlib.extract_ordinals(other, self.vgetter().freq) + + return algos.pad_int64(self._get_index_values(), + np.asarray(ordinal), limit=limit) + + def get_backfill_indexer(self, other, limit=None): + ordinal = periodlib.extract_ordinals(other, self.vgetter().freq) + + return algos.backfill_int64(self._get_index_values(), + np.asarray(ordinal), limit=limit) + + def get_indexer_non_unique(self, targets): + ordinal = periodlib.extract_ordinals(targets, self.vgetter().freq) + ordinal_array = np.asarray(ordinal) + + return super(PeriodEngine, self).get_indexer_non_unique(ordinal_array) + + cpdef convert_scalar(ndarray arr, object value): # we don't turn integers # into datetimes/timedeltas From 6bab80a8466865e4f80cfacf380a7c556b5126eb Mon Sep 17 00:00:00 2001 From: Licht-T Date: Fri, 6 Oct 2017 00:17:30 +0900 Subject: [PATCH 2/7] BUG: Change the PeriodIndex engine to PeriodEngine --- pandas/core/indexes/period.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 148ca2725fbdc..c4938b556c8dd 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -31,7 +31,7 @@ import pandas.tseries.offsets as offsets from pandas._libs.lib import infer_dtype -from pandas._libs import tslib, period +from pandas._libs import tslib, period, index as libindex from pandas._libs.period import (Period, IncompatibleFrequency, get_period_field_arr, _validate_end_alias, _quarter_to_myear) @@ -192,6 +192,8 @@ class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index): freq = None + _engine_type = libindex.PeriodEngine + __eq__ = _period_index_cmp('__eq__') __ne__ = _period_index_cmp('__ne__', nat_result=True) __lt__ = _period_index_cmp('__lt__') @@ -275,6 +277,10 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, data = period.extract_ordinals(data, freq) return cls._from_ordinals(data, name=name, freq=freq) + @cache_readonly + def _engine(self): + return self._engine_type(lambda: self, len(self)) + @classmethod def _generate_range(cls, start, end, periods, freq, fields): if freq is not None: From 0635b88c9d989065e18e918f03465c4848a96c7c Mon Sep 17 00:00:00 2001 From: Licht-T Date: Tue, 3 Oct 2017 08:31:23 +0900 Subject: [PATCH 3/7] TST: Add PeriodIndex/PeriodEngine tests --- pandas/tests/indexes/period/test_indexing.py | 197 ++++++++++++++++++- pandas/tests/series/test_period.py | 30 +++ 2 files changed, 226 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index efc13a56cd77e..2b87699105d82 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -3,10 +3,11 @@ import pytest import numpy as np +from numpy import testing as ntm import pandas as pd from pandas.util import testing as tm from pandas.compat import lrange -from pandas._libs import tslib +from pandas._libs import tslib, tslibs from pandas import (PeriodIndex, Series, DatetimeIndex, period_range, Period) @@ -310,3 +311,197 @@ def test_take_fill_value(self): with pytest.raises(IndexError): idx.take(np.array([1, -5])) + + def test_get_loc(self): + # GH 17717 + p0 = pd.Period('2017-09-01') + p1 = pd.Period('2017-09-02') + p2 = pd.Period('2017-09-03') + + # get the location of p1/p2 from + # monotonic increasing PeriodIndex with non-duplicate + idx0 = pd.PeriodIndex([p0, p1, p2]) + expected_idx1_p1 = 1 + expected_idx1_p2 = 2 + + assert idx0.get_loc(p1) == expected_idx1_p1 + assert idx0.get_loc(str(p1)) == expected_idx1_p1 + assert idx0.get_loc(p2) == expected_idx1_p2 + assert idx0.get_loc(str(p2)) == expected_idx1_p2 + + pytest.raises(tslibs.parsing.DateParseError, idx0.get_loc, 'foo') + pytest.raises(KeyError, idx0.get_loc, 1.1) + pytest.raises(TypeError, idx0.get_loc, idx0) + + # get the location of p1/p2 from + # monotonic increasing PeriodIndex with duplicate + idx1 = pd.PeriodIndex([p1, p1, p2]) + expected_idx1_p1 = slice(0, 2) + expected_idx1_p2 = 2 + + assert idx1.get_loc(p1) == expected_idx1_p1 + assert idx1.get_loc(str(p1)) == expected_idx1_p1 + assert idx1.get_loc(p2) == expected_idx1_p2 + assert idx1.get_loc(str(p2)) == expected_idx1_p2 + + pytest.raises(tslibs.parsing.DateParseError, idx1.get_loc, 'foo') + pytest.raises(KeyError, idx1.get_loc, 1.1) + pytest.raises(TypeError, idx1.get_loc, idx1) + + # get the location of p1/p2 from + # non-monotonic increasing/decreasing PeriodIndex with duplicate + idx2 = pd.PeriodIndex([p2, p1, p2]) + expected_idx2_p1 = 1 + expected_idx2_p2 = np.array([True, False, True]) + + assert idx2.get_loc(p1) == expected_idx2_p1 + assert idx2.get_loc(str(p1)) == expected_idx2_p1 + ntm.assert_array_equal(idx2.get_loc(p2), expected_idx2_p2) + ntm.assert_array_equal(idx2.get_loc(str(p2)), expected_idx2_p2) + + def test_is_monotonic_increasing(self): + # GH 17717 + p0 = pd.Period('2017-09-01') + p1 = pd.Period('2017-09-02') + p2 = pd.Period('2017-09-03') + + idx_inc0 = pd.PeriodIndex([p0, p1, p2]) + idx_inc1 = pd.PeriodIndex([p0, p1, p1]) + idx_dec0 = pd.PeriodIndex([p2, p1, p0]) + idx_dec1 = pd.PeriodIndex([p2, p1, p1]) + idx = pd.PeriodIndex([p1, p2, p0]) + + assert idx_inc0.is_monotonic_increasing + assert idx_inc1.is_monotonic_increasing + assert not idx_dec0.is_monotonic_increasing + assert not idx_dec1.is_monotonic_increasing + assert not idx.is_monotonic_increasing + + def test_is_monotonic_decreasing(self): + # GH 17717 + p0 = pd.Period('2017-09-01') + p1 = pd.Period('2017-09-02') + p2 = pd.Period('2017-09-03') + + idx_inc0 = pd.PeriodIndex([p0, p1, p2]) + idx_inc1 = pd.PeriodIndex([p0, p1, p1]) + idx_dec0 = pd.PeriodIndex([p2, p1, p0]) + idx_dec1 = pd.PeriodIndex([p2, p1, p1]) + idx = pd.PeriodIndex([p1, p2, p0]) + + assert not idx_inc0.is_monotonic_decreasing + assert not idx_inc1.is_monotonic_decreasing + assert idx_dec0.is_monotonic_decreasing + assert idx_dec1.is_monotonic_decreasing + assert not idx.is_monotonic_decreasing + + def test_is_unique(self): + # GH 17717 + p0 = pd.Period('2017-09-01') + p1 = pd.Period('2017-09-02') + p2 = pd.Period('2017-09-03') + + idx0 = pd.PeriodIndex([p0, p1, p2]) + assert idx0.is_unique + + idx1 = pd.PeriodIndex([p1, p1, p2]) + assert not idx1.is_unique + + def test_contains(self): + # GH 17717 + p0 = pd.Period('2017-09-01') + p1 = pd.Period('2017-09-02') + p2 = pd.Period('2017-09-03') + p3 = pd.Period('2017-09-04') + + ps0 = [p0, p1, p2] + idx0 = pd.PeriodIndex(ps0) + + for p in ps0: + assert idx0.contains(p) + assert p in idx0 + + assert idx0.contains(str(p)) + assert str(p) in idx0 + + assert idx0.contains('2017-09-01 00:00:01') + assert '2017-09-01 00:00:01' in idx0 + + assert idx0.contains('2017-09') + assert '2017-09' in idx0 + + assert not idx0.contains(p3) + assert p3 not in idx0 + + def test_get_value(self): + # GH 17717 + p0 = pd.Period('2017-09-01') + p1 = pd.Period('2017-09-02') + p2 = pd.Period('2017-09-03') + + idx0 = pd.PeriodIndex([p0, p1, p2]) + input0 = np.array([1, 2, 3]) + expected0 = 2 + + result0 = idx0.get_value(input0, p1) + assert result0 == expected0 + + idx1 = pd.PeriodIndex([p1, p1, p2]) + input1 = np.array([1, 2, 3]) + expected1 = np.array([1, 2]) + + result1 = idx1.get_value(input1, p1) + tm.assert_numpy_array_equal(result1, expected1) + + idx2 = pd.PeriodIndex([p1, p2, p1]) + input2 = np.array([1, 2, 3]) + expected2 = np.array([1, 3]) + + result2 = idx2.get_value(input2, p1) + tm.assert_numpy_array_equal(result2, expected2) + + def test_get_indexer(self): + # GH 17717 + p1 = pd.Period('2017-09-01') + p2 = pd.Period('2017-09-04') + p3 = pd.Period('2017-09-07') + + tp0 = pd.Period('2017-08-31') + tp1 = pd.Period('2017-09-02') + tp2 = pd.Period('2017-09-05') + tp3 = pd.Period('2017-09-09') + + idx = pd.PeriodIndex([p1, p2, p3]) + + tm.assert_numpy_array_equal(idx.get_indexer(idx), + np.array([0, 1, 2], dtype=np.intp)) + + target = pd.PeriodIndex([tp0, tp1, tp2, tp3]) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), + np.array([-1, 0, 1, 2], dtype=np.intp)) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), + np.array([0, 1, 2, -1], dtype=np.intp)) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), + np.array([0, 0, 1, 2], dtype=np.intp)) + + res = idx.get_indexer(target, 'nearest', + tolerance=pd.Timedelta('1 day')) + tm.assert_numpy_array_equal(res, + np.array([0, 0, 1, -1], dtype=np.intp)) + + def test_get_indexer_non_unique(self): + # GH 17717 + p1 = pd.Period('2017-09-02') + p2 = pd.Period('2017-09-03') + p3 = pd.Period('2017-09-04') + p4 = pd.Period('2017-09-05') + + idx1 = pd.PeriodIndex([p1, p2, p1]) + idx2 = pd.PeriodIndex([p2, p1, p3, p4]) + + result = idx1.get_indexer_non_unique(idx2) + expected_indexer = np.array([1, 0, 2, -1, -1], dtype=np.int64) + expected_missing = np.array([2, 3], dtype=np.int64) + + tm.assert_numpy_array_equal(result[0], expected_indexer) + tm.assert_numpy_array_equal(result[1], expected_missing) diff --git a/pandas/tests/series/test_period.py b/pandas/tests/series/test_period.py index e907b0edd5c6a..b4ff25d2630b8 100644 --- a/pandas/tests/series/test_period.py +++ b/pandas/tests/series/test_period.py @@ -249,3 +249,33 @@ def test_align_series(self): msg = "Input has different freq=D from PeriodIndex\\(freq=A-DEC\\)" with tm.assert_raises_regex(period.IncompatibleFrequency, msg): ts + ts.asfreq('D', how="end") + + def test_truncate(self): + # GH 17717 + idx1 = pd.PeriodIndex([ + pd.Period('2017-09-02'), + pd.Period('2017-09-02'), + pd.Period('2017-09-03') + ]) + series1 = pd.Series([1, 2, 3], index=idx1) + result1 = series1.truncate(after='2017-09-02') + + expected_idx1 = pd.PeriodIndex([ + pd.Period('2017-09-02'), + pd.Period('2017-09-02') + ]) + tm.assert_series_equal(result1, pd.Series([1, 2], index=expected_idx1)) + + idx2 = pd.PeriodIndex([ + pd.Period('2017-09-03'), + pd.Period('2017-09-02'), + pd.Period('2017-09-03') + ]) + series2 = pd.Series([1, 2, 3], index=idx2) + result2 = series2.truncate(after='2017-09-02') + + expected_idx2 = pd.PeriodIndex([ + pd.Period('2017-09-03'), + pd.Period('2017-09-02') + ]) + tm.assert_series_equal(result2, pd.Series([1, 2], index=expected_idx2)) From a0532c8719fe0603146f7d5a3a2f970bdef88895 Mon Sep 17 00:00:00 2001 From: Licht-T Date: Sun, 29 Oct 2017 13:43:22 +0900 Subject: [PATCH 4/7] DOC: Add whatsnew note of PeriodIndex.truncate bug --- doc/source/whatsnew/v0.22.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index c41da4d67afe5..5c64b0a55c09b 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -100,7 +100,7 @@ Conversion Indexing ^^^^^^^^ -- +- Bug in :func:`PeriodIndex.truncate` which raises ``TypeError`` when ``PeriodIndex`` is monotonic (:issue:`17717`) - - From 88a85b75d3dcbd443929d0a1da584592cbb674ac Mon Sep 17 00:00:00 2001 From: Licht-T Date: Fri, 3 Nov 2017 23:30:31 +0900 Subject: [PATCH 5/7] BUG: Force calling super methods --- pandas/_libs/index.pyx | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index e7c18f19eb41e..f15b375543015 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -497,7 +497,7 @@ cdef class TimedeltaEngine(DatetimeEngine): cdef class PeriodEngine(Int64Engine): cdef _get_index_values(self): - return self.vgetter() + return super(PeriodEngine, self).vgetter() cpdef _call_map_locations(self, values): super(PeriodEngine, self)._call_map_locations(values.view('i8')) @@ -540,24 +540,30 @@ cdef class PeriodEngine(Int64Engine): def get_indexer(self, values): cdef ndarray[int64_t, ndim=1] ordinals - self._ensure_mapping_populated() - ordinals = periodlib.extract_ordinals(values, self.vgetter().freq) + super(PeriodEngine, self)._ensure_mapping_populated() + + freq = super(PeriodEngine, self).vgetter().freq + ordinals = periodlib.extract_ordinals(values, freq) + return self.mapping.lookup(ordinals) def get_pad_indexer(self, other, limit=None): - ordinal = periodlib.extract_ordinals(other, self.vgetter().freq) + freq = super(PeriodEngine, self).vgetter().freq + ordinal = periodlib.extract_ordinals(other, freq) return algos.pad_int64(self._get_index_values(), np.asarray(ordinal), limit=limit) def get_backfill_indexer(self, other, limit=None): - ordinal = periodlib.extract_ordinals(other, self.vgetter().freq) + freq = super(PeriodEngine, self).vgetter().freq + ordinal = periodlib.extract_ordinals(other, freq) return algos.backfill_int64(self._get_index_values(), np.asarray(ordinal), limit=limit) def get_indexer_non_unique(self, targets): - ordinal = periodlib.extract_ordinals(targets, self.vgetter().freq) + freq = super(PeriodEngine, self).vgetter().freq + ordinal = periodlib.extract_ordinals(targets, freq) ordinal_array = np.asarray(ordinal) return super(PeriodEngine, self).get_indexer_non_unique(ordinal_array) From b2a281aaad66342731b9f7db7ddccc86ef1059d7 Mon Sep 17 00:00:00 2001 From: Licht-T Date: Sat, 4 Nov 2017 00:51:41 +0900 Subject: [PATCH 6/7] Remove duplicate codes --- pandas/_libs/index.pyx | 35 +++----------------------- pandas/_libs/index_class_helper.pxi.in | 5 +++- 2 files changed, 7 insertions(+), 33 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index f15b375543015..78eb7b3ae483e 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -505,38 +505,6 @@ cdef class PeriodEngine(Int64Engine): def _call_monotonic(self, values): return super(PeriodEngine, self)._call_monotonic(values.view('i8')) - cdef _maybe_get_bool_indexer(self, object val): - cdef: - ndarray[uint8_t, cast=True] indexer - ndarray[int64_t] values - int count = 0 - Py_ssize_t i, n - int last_true - - if not util.is_integer_object(val): - raise KeyError(val) - - values = self._get_index_values().view('i8') - n = len(values) - - result = np.empty(n, dtype=bool) - indexer = result.view(np.uint8) - - for i in range(n): - if values[i] == val: - count += 1 - indexer[i] = 1 - last_true = i - else: - indexer[i] = 0 - - if count == 0: - raise KeyError(val) - if count == 1: - return last_true - - return result - def get_indexer(self, values): cdef ndarray[int64_t, ndim=1] ordinals @@ -568,6 +536,9 @@ cdef class PeriodEngine(Int64Engine): return super(PeriodEngine, self).get_indexer_non_unique(ordinal_array) + cdef _get_index_values_for_bool_indexer(self): + return self._get_index_values().view('i8') + cpdef convert_scalar(ndarray arr, object value): # we don't turn integers diff --git a/pandas/_libs/index_class_helper.pxi.in b/pandas/_libs/index_class_helper.pxi.in index 76c0deef7ebee..b9fc0ddd7ea1c 100644 --- a/pandas/_libs/index_class_helper.pxi.in +++ b/pandas/_libs/index_class_helper.pxi.in @@ -66,7 +66,7 @@ cdef class {{name}}Engine(IndexEngine): raise KeyError(val) {{endif}} - values = self._get_index_values() + values = self._get_index_values_for_bool_indexer() n = len(values) result = np.empty(n, dtype=bool) @@ -86,6 +86,9 @@ cdef class {{name}}Engine(IndexEngine): return last_true return result + + cdef _get_index_values_for_bool_indexer(self): + return self._get_index_values() {{endif}} {{endfor}} From ec608001111df27b2d30cd7e5ca99bbc7310e593 Mon Sep 17 00:00:00 2001 From: Licht-T Date: Sat, 4 Nov 2017 09:38:53 +0900 Subject: [PATCH 7/7] Remove numpy test module --- pandas/tests/indexes/period/test_indexing.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index 2b87699105d82..d99eba3e2d5e9 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -3,7 +3,6 @@ import pytest import numpy as np -from numpy import testing as ntm import pandas as pd from pandas.util import testing as tm from pandas.compat import lrange @@ -356,8 +355,8 @@ def test_get_loc(self): assert idx2.get_loc(p1) == expected_idx2_p1 assert idx2.get_loc(str(p1)) == expected_idx2_p1 - ntm.assert_array_equal(idx2.get_loc(p2), expected_idx2_p2) - ntm.assert_array_equal(idx2.get_loc(str(p2)), expected_idx2_p2) + tm.assert_numpy_array_equal(idx2.get_loc(p2), expected_idx2_p2) + tm.assert_numpy_array_equal(idx2.get_loc(str(p2)), expected_idx2_p2) def test_is_monotonic_increasing(self): # GH 17717