From b341e49335db7623c5d17851de271d580adeae0b Mon Sep 17 00:00:00 2001 From: jschendel Date: Tue, 12 Dec 2017 20:54:27 -0700 Subject: [PATCH 1/2] BUG: Fix IntervalIndex.to_tuples() with NA values --- doc/source/whatsnew/v0.22.0.txt | 1 + pandas/core/indexes/interval.py | 6 +++++- pandas/tests/indexes/test_interval.py | 14 ++++++++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 37032ff6bc313..0efd403620461 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -264,6 +264,7 @@ Conversion - Fixed a bug where ``FY5253`` date offsets could incorrectly raise an ``AssertionError`` in arithmetic operatons (:issue:`14774`) - Bug in :meth:`Index.astype` with a categorical dtype where the resultant index is not converted to a :class:`CategoricalIndex` for all types of index (:issue:`18630`) - Bug in :meth:`Series.astype` and ``Categorical.astype()`` where an existing categorical data does not get updated (:issue:`10696`, :issue:`18593`) +- Bug in ``IntervalIndex.to_tuples()`` where NA values are returned as a tuple of NA values instead of the NA value itself (:issue:`18756`) Indexing diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 292b0f638f821..7f30fdf91be8f 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -546,7 +546,11 @@ def from_tuples(cls, data, closed='right', name=None, copy=False): def to_tuples(self): """Return an Index of tuples of the form (left, right)""" - return Index(_asarray_tuplesafe(zip(self.left, self.right))) + tuples = _asarray_tuplesafe(zip(self.left, self.right)) + if self.hasnans: + # GH 18756 + tuples = np.where(~self._isnan, tuples, np.nan) + return Index(tuples) @cache_readonly def _multiindex(self): diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py index abad930793d7f..881af01906c62 100644 --- a/pandas/tests/indexes/test_interval.py +++ b/pandas/tests/indexes/test_interval.py @@ -7,6 +7,7 @@ Interval, IntervalIndex, Index, isna, notna, interval_range, Timestamp, Timedelta, compat, date_range, timedelta_range, DateOffset) from pandas.compat import lzip +from pandas.core.common import _asarray_tuplesafe from pandas.tseries.offsets import Day from pandas._libs.interval import IntervalTree from pandas.tests.indexes.common import Base @@ -1072,6 +1073,19 @@ def test_is_non_overlapping_monotonic(self, closed): idx = IntervalIndex.from_breaks(range(4), closed=closed) assert idx.is_non_overlapping_monotonic is True + @pytest.mark.parametrize('tuples', [ + lzip(range(10), range(1, 11)), + lzip(range(10), range(1, 11)) + [np.nan], + lzip(date_range('20170101', periods=10), + date_range('20170101', periods=10)), + [np.nan] + lzip(date_range('20170101', periods=10), + date_range('20170101', periods=10))]) + def test_to_tuples(self, tuples): + # GH 18756 + result = IntervalIndex.from_tuples(tuples).to_tuples() + expected = Index(_asarray_tuplesafe(tuples)) + tm.assert_index_equal(result, expected) + class TestIntervalRange(object): From 7c330c662f056b80c39c58364cf55aa932faafe0 Mon Sep 17 00:00:00 2001 From: jschendel Date: Thu, 14 Dec 2017 18:27:11 -0700 Subject: [PATCH 2/2] Add na_tuple kwarg --- doc/source/whatsnew/v0.22.0.txt | 2 +- pandas/core/indexes/interval.py | 24 ++++++++++++++++--- pandas/tests/indexes/test_interval.py | 34 +++++++++++++++++++++++---- 3 files changed, 52 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 0efd403620461..9dc10a09378f8 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -138,6 +138,7 @@ Other Enhancements - :func:`Series` / :func:`DataFrame` tab completion also returns identifiers in the first level of a :func:`MultiIndex`. (:issue:`16326`) - :func:`read_excel()` has gained the ``nrows`` parameter (:issue:`16645`) - :func:``DataFrame.to_json`` and ``Series.to_json`` now accept an ``index`` argument which allows the user to exclude the index from the JSON output (:issue:`17394`) +- ``IntervalIndex.to_tuples()`` has gained the ``na_tuple`` parameter to control whether NA is returned as a tuple of NA, or NA itself (:issue:`18756`) .. _whatsnew_0220.api_breaking: @@ -264,7 +265,6 @@ Conversion - Fixed a bug where ``FY5253`` date offsets could incorrectly raise an ``AssertionError`` in arithmetic operatons (:issue:`14774`) - Bug in :meth:`Index.astype` with a categorical dtype where the resultant index is not converted to a :class:`CategoricalIndex` for all types of index (:issue:`18630`) - Bug in :meth:`Series.astype` and ``Categorical.astype()`` where an existing categorical data does not get updated (:issue:`10696`, :issue:`18593`) -- Bug in ``IntervalIndex.to_tuples()`` where NA values are returned as a tuple of NA values instead of the NA value itself (:issue:`18756`) Indexing diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 7f30fdf91be8f..cb786574909db 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -544,10 +544,28 @@ def from_tuples(cls, data, closed='right', name=None, copy=False): return cls.from_arrays(left, right, closed, name=name, copy=False) - def to_tuples(self): - """Return an Index of tuples of the form (left, right)""" + def to_tuples(self, na_tuple=True): + """ + Return an Index of tuples of the form (left, right) + + Parameters + ---------- + na_tuple : boolean, default True + Returns NA as a tuple if True, ``(nan, nan)``, or just as the NA + value itself if False, ``nan``. + + ..versionadded:: 0.22.0 + + Examples + -------- + >>> idx = pd.IntervalIndex.from_arrays([0, np.nan, 2], [1, np.nan, 3]) + >>> idx.to_tuples() + Index([(0.0, 1.0), (nan, nan), (2.0, 3.0)], dtype='object') + >>> idx.to_tuples(na_tuple=False) + Index([(0.0, 1.0), nan, (2.0, 3.0)], dtype='object') + """ tuples = _asarray_tuplesafe(zip(self.left, self.right)) - if self.hasnans: + if not na_tuple: # GH 18756 tuples = np.where(~self._isnan, tuples, np.nan) return Index(tuples) diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py index 881af01906c62..c809127a66ab8 100644 --- a/pandas/tests/indexes/test_interval.py +++ b/pandas/tests/indexes/test_interval.py @@ -1075,17 +1075,43 @@ def test_is_non_overlapping_monotonic(self, closed): @pytest.mark.parametrize('tuples', [ lzip(range(10), range(1, 11)), - lzip(range(10), range(1, 11)) + [np.nan], lzip(date_range('20170101', periods=10), date_range('20170101', periods=10)), - [np.nan] + lzip(date_range('20170101', periods=10), - date_range('20170101', periods=10))]) + lzip(timedelta_range('0 days', periods=10), + timedelta_range('1 day', periods=10))]) def test_to_tuples(self, tuples): # GH 18756 - result = IntervalIndex.from_tuples(tuples).to_tuples() + idx = IntervalIndex.from_tuples(tuples) + result = idx.to_tuples() expected = Index(_asarray_tuplesafe(tuples)) tm.assert_index_equal(result, expected) + @pytest.mark.parametrize('tuples', [ + lzip(range(10), range(1, 11)) + [np.nan], + lzip(date_range('20170101', periods=10), + date_range('20170101', periods=10)) + [np.nan], + lzip(timedelta_range('0 days', periods=10), + timedelta_range('1 day', periods=10)) + [np.nan]]) + @pytest.mark.parametrize('na_tuple', [True, False]) + def test_to_tuples_na(self, tuples, na_tuple): + # GH 18756 + idx = IntervalIndex.from_tuples(tuples) + result = idx.to_tuples(na_tuple=na_tuple) + + # check the non-NA portion + expected_notna = Index(_asarray_tuplesafe(tuples[:-1])) + result_notna = result[:-1] + tm.assert_index_equal(result_notna, expected_notna) + + # check the NA portion + result_na = result[-1] + if na_tuple: + assert isinstance(result_na, tuple) + assert len(result_na) == 2 + assert all(isna(x) for x in result_na) + else: + assert isna(result_na) + class TestIntervalRange(object):