From 5fc66c3fc759ca646376afb1e9098d209b19514a Mon Sep 17 00:00:00 2001 From: Nate Yoder Date: Wed, 26 Oct 2016 13:24:49 -0700 Subject: [PATCH 01/11] make map return an index if it operates on an index, multi index, or categorical index; map on a categorical will either return a categorical or an index (rather than a numpy array) --- pandas/core/categorical.py | 2 +- pandas/indexes/base.py | 9 ++++-- pandas/indexes/category.py | 10 +++---- pandas/tests/indexes/test_base.py | 40 ++++++++++++++++++++------- pandas/tests/indexes/test_category.py | 22 +++++++-------- pandas/tests/test_categorical.py | 2 +- 6 files changed, 54 insertions(+), 31 deletions(-) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 922fb84684729..4f72f6d59c7a8 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -943,7 +943,7 @@ def map(self, mapper): Returns ------- - applied : Categorical or np.ndarray. + applied : Categorical or Index. """ new_categories = self.categories.map(mapper) try: diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 512abfd88c78c..acef3817944e6 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -2428,7 +2428,7 @@ def groupby(self, values): def map(self, mapper): """ - Apply mapper function to its values. + Apply mapper function to an index. Parameters ---------- @@ -2437,9 +2437,12 @@ def map(self, mapper): Returns ------- - applied : array + applied : Index + The output of the mapping function applied to the index. """ - return self._arrmap(self.values, mapper) + attributes = self._get_attributes_dict() + attributes['copy'] = False + return Index(self._arrmap(self.values, mapper), **attributes) def isin(self, values, level=None): """ diff --git a/pandas/indexes/category.py b/pandas/indexes/category.py index c1f5d47e1e04f..e87d44155d388 100644 --- a/pandas/indexes/category.py +++ b/pandas/indexes/category.py @@ -524,15 +524,15 @@ def map(self, mapper): ---------- mapper : callable Function to be applied. When all categories are mapped - to different categories, the result will be Categorical which has - the same order property as the original. Otherwise, the result will - be np.ndarray. + to different categories, the result will be a CategoricalIndex + which has the same order property as the original. Otherwise, + the result will be a Index. Returns ------- - applied : Categorical or np.ndarray. + applied : CategoricalIndex or Index """ - return self.values.map(mapper) + return self._shallow_copy_with_infer(self.values.map(mapper)) def delete(self, loc): """ diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 9be4935716989..129859d922ed9 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -767,6 +767,26 @@ def test_sub(self): self.assertRaises(TypeError, lambda: idx - idx.tolist()) self.assertRaises(TypeError, lambda: idx.tolist() - idx) + def test_map_identity_mapping(self): + for name, cur_index in self.indices.items(): + if name == 'tuples': + expected = Index(cur_index.values, tupleize_cols=False) + self.assert_index_equal(expected, cur_index.map(lambda x: x)) + else: + self.assert_index_equal(cur_index, cur_index.map(lambda x: x)) + + def test_map_that_returns_tuples_creates_index_not_multi_index(self): + boolean_index = tm.makeIntIndex(3).map(lambda x: (x, x == 1)) + expected = Index([(0, False), (1, True), (2, False)], + tupleize_cols=False) + self.assert_index_equal(boolean_index, expected) + + def test_map_that_reduces_multi_index_to_single_index_returns_index(self): + first_level = ['foo', 'bar', 'baz'] + multi_index = MultiIndex.from_tuples(lzip(first_level, [1, 2, 3])) + reduced_index = multi_index.map(lambda x: x[0]) + self.assert_index_equal(reduced_index, Index(first_level)) + def test_append_multiple(self): index = Index(['a', 'b', 'c', 'd', 'e', 'f']) @@ -1194,16 +1214,16 @@ def check_slice(in_slice, expected): self.assert_index_equal(result, expected) for in_slice, expected in [ - (SLC[::-1], 'yxdcb'), (SLC['b':'y':-1], ''), - (SLC['b'::-1], 'b'), (SLC[:'b':-1], 'yxdcb'), - (SLC[:'y':-1], 'y'), (SLC['y'::-1], 'yxdcb'), - (SLC['y'::-4], 'yb'), - # absent labels - (SLC[:'a':-1], 'yxdcb'), (SLC[:'a':-2], 'ydb'), - (SLC['z'::-1], 'yxdcb'), (SLC['z'::-3], 'yc'), - (SLC['m'::-1], 'dcb'), (SLC[:'m':-1], 'yx'), - (SLC['a':'a':-1], ''), (SLC['z':'z':-1], ''), - (SLC['m':'m':-1], '') + (SLC[::-1], 'yxdcb'), (SLC['b':'y':-1], ''), + (SLC['b'::-1], 'b'), (SLC[:'b':-1], 'yxdcb'), + (SLC[:'y':-1], 'y'), (SLC['y'::-1], 'yxdcb'), + (SLC['y'::-4], 'yb'), + # absent labels + (SLC[:'a':-1], 'yxdcb'), (SLC[:'a':-2], 'ydb'), + (SLC['z'::-1], 'yxdcb'), (SLC['z'::-3], 'yc'), + (SLC['m'::-1], 'dcb'), (SLC[:'m':-1], 'yx'), + (SLC['a':'a':-1], ''), (SLC['z':'z':-1], ''), + (SLC['m':'m':-1], '') ]: check_slice(in_slice, expected) diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 819b88bf4c5d3..b9ae0a21086e9 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -207,19 +207,19 @@ def test_map(self): ci = pd.CategoricalIndex(list('ABABC'), categories=list('CBA'), ordered=True) result = ci.map(lambda x: x.lower()) - exp = pd.Categorical(list('ababc'), categories=list('cba'), - ordered=True) - tm.assert_categorical_equal(result, exp) + exp = pd.CategoricalIndex(list('ababc'), categories=list('cba'), + ordered=True) + tm.assert_index_equal(result, exp) ci = pd.CategoricalIndex(list('ABABC'), categories=list('BAC'), ordered=False, name='XXX') result = ci.map(lambda x: x.lower()) - exp = pd.Categorical(list('ababc'), categories=list('bac'), - ordered=False) - tm.assert_categorical_equal(result, exp) + exp = pd.CategoricalIndex(list('ababc'), categories=list('bac'), + ordered=False, name='XXX') + tm.assert_index_equal(result, exp) - tm.assert_numpy_array_equal(ci.map(lambda x: 1), - np.array([1] * 5, dtype=np.int64)) + tm.assert_index_equal(ci.map(lambda x: 1), + Index(np.array([1] * 5, dtype=np.int64), name='XXX')) # change categories dtype ci = pd.CategoricalIndex(list('ABABC'), categories=list('BAC'), @@ -228,9 +228,9 @@ def f(x): return {'A': 10, 'B': 20, 'C': 30}.get(x) result = ci.map(f) - exp = pd.Categorical([10, 20, 10, 20, 30], categories=[20, 10, 30], - ordered=False) - tm.assert_categorical_equal(result, exp) + exp = pd.CategoricalIndex([10, 20, 10, 20, 30], categories=[20, 10, 30], + ordered=False) + tm.assert_index_equal(result, exp) def test_where(self): i = self.create_index() diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 5320b2216ee40..f45a57279a258 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -1669,7 +1669,7 @@ def test_map(self): tm.assert_categorical_equal(result, exp) result = c.map(lambda x: 1) - tm.assert_numpy_array_equal(result, np.array([1] * 5, dtype=np.int64)) + tm.assert_index_equal(result, Index(np.array([1] * 5, dtype=np.int64))) class TestCategoricalAsBlock(tm.TestCase): From a596744ad187fdbf6607bd5fd648389812859c15 Mon Sep 17 00:00:00 2001 From: Nate Yoder Date: Wed, 26 Oct 2016 16:25:55 -0700 Subject: [PATCH 02/11] introspect results from map so that if the output array has tuples we create a multiindex instead of an index --- doc/source/whatsnew/v0.20.0.txt | 19 +++++++++++++++++++ pandas/indexes/base.py | 9 ++++++++- pandas/tests/indexes/test_base.py | 20 ++++++++++++-------- pandas/tests/indexes/test_category.py | 1 + pandas/tests/test_categorical.py | 1 + 5 files changed, 41 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 2855cde95ac2a..95e52dc25c64a 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -91,6 +91,25 @@ Other enhancements Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +- ``map`` on an ``Index`` now returns an ``Index``, not an array (:issue:`12766`) +.. ipython:: python + + idx = Index([1, 2]) + idx + +Previous Behavior: + +.. code-block:: ipython + + In [3]: idx.map(lambda x: x * 2) + Out[3]: array([2, 4]) + +New Behavior: + +.. ipython:: python + + idx.map(lambda x: x * 2) + .. _whatsnew_0200.api: diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index acef3817944e6..baec61ada92e3 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -2439,10 +2439,17 @@ def map(self, mapper): ------- applied : Index The output of the mapping function applied to the index. + If the function returns a tuple a """ + from .multi import MultiIndex + mapped_values = self._arrmap(self.values, mapper) attributes = self._get_attributes_dict() + if mapped_values.size and isinstance(mapped_values[0], tuple): + return MultiIndex.from_tuples(mapped_values, + names=attributes.get('name')) + attributes['copy'] = False - return Index(self._arrmap(self.values, mapper), **attributes) + return Index(mapped_values, **attributes) def isin(self, values, level=None): """ diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 129859d922ed9..a81acd97d7dd7 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -768,20 +768,24 @@ def test_sub(self): self.assertRaises(TypeError, lambda: idx.tolist() - idx) def test_map_identity_mapping(self): + # GH 12766 for name, cur_index in self.indices.items(): - if name == 'tuples': - expected = Index(cur_index.values, tupleize_cols=False) - self.assert_index_equal(expected, cur_index.map(lambda x: x)) - else: - self.assert_index_equal(cur_index, cur_index.map(lambda x: x)) + self.assert_index_equal(cur_index, cur_index.map(lambda x: x)) - def test_map_that_returns_tuples_creates_index_not_multi_index(self): + def test_map_that_returns_tuples_creates_multi_index(self): + # GH 12766 boolean_index = tm.makeIntIndex(3).map(lambda x: (x, x == 1)) - expected = Index([(0, False), (1, True), (2, False)], - tupleize_cols=False) + expected = MultiIndex.from_tuples([(0, False), (1, True), (2, False)]) + self.assert_index_equal(boolean_index, expected) + + def test_map_that_returns_a_length_one_tuple_creates_an_index(self): + # GH 12766 + boolean_index = tm.makeIntIndex(3).map(lambda x: (x, )) + expected = Index([(0, ), (1, ), (2, )]) self.assert_index_equal(boolean_index, expected) def test_map_that_reduces_multi_index_to_single_index_returns_index(self): + # GH 12766 first_level = ['foo', 'bar', 'baz'] multi_index = MultiIndex.from_tuples(lzip(first_level, [1, 2, 3])) reduced_index = multi_index.map(lambda x: x[0]) diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index b9ae0a21086e9..708f424d9bad1 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -218,6 +218,7 @@ def test_map(self): ordered=False, name='XXX') tm.assert_index_equal(result, exp) + # GH 12766: Return an index not an array tm.assert_index_equal(ci.map(lambda x: 1), Index(np.array([1] * 5, dtype=np.int64), name='XXX')) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index f45a57279a258..5d2c317cc0f81 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -1669,6 +1669,7 @@ def test_map(self): tm.assert_categorical_equal(result, exp) result = c.map(lambda x: 1) + # GH 12766: Return an index not an array tm.assert_index_equal(result, Index(np.array([1] * 5, dtype=np.int64))) From a110be926b99baa72d7683871112ad06d91c7454 Mon Sep 17 00:00:00 2001 From: Nate Yoder Date: Thu, 27 Oct 2016 11:33:25 -0700 Subject: [PATCH 03/11] make map on time tseries indices return index if dtype of output is not a tseries; sphinx changes; fix docstring --- doc/source/whatsnew/v0.20.0.txt | 22 +++++++++---------- pandas/indexes/base.py | 3 ++- pandas/tests/indexes/test_base.py | 36 +++++++++++++++++++++++-------- pandas/tseries/base.py | 4 ++-- 4 files changed, 42 insertions(+), 23 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 95e52dc25c64a..82af2e664135b 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -92,26 +92,26 @@ Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ``map`` on an ``Index`` now returns an ``Index``, not an array (:issue:`12766`) -.. ipython:: python - idx = Index([1, 2]) - idx + .. ipython:: python -Previous Behavior: + idx = Index([1, 2]) + idx -.. code-block:: ipython + Previous Behavior: - In [3]: idx.map(lambda x: x * 2) - Out[3]: array([2, 4]) + .. code-block:: ipython -New Behavior: + In [3]: idx.map(lambda x: x * 2) + Out[3]: array([2, 4]) -.. ipython:: python + New Behavior: - idx.map(lambda x: x * 2) + .. ipython:: python -.. _whatsnew_0200.api: + idx.map(lambda x: x * 2) + .. _whatsnew_0200.api: - ``CParserError`` has been renamed to ``ParserError`` in ``pd.read_csv`` and will be removed in the future (:issue:`12665`) - ``SparseArray.cumsum()`` and ``SparseSeries.cumsum()`` will now always return ``SparseArray`` and ``SparseSeries`` respectively (:issue:`12855`) diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index baec61ada92e3..7fc6c1fb6e990 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -2439,7 +2439,8 @@ def map(self, mapper): ------- applied : Index The output of the mapping function applied to the index. - If the function returns a tuple a + If the function returns a tuple with more than one element + a MultiIndex will be returned. """ from .multi import MultiIndex mapped_values = self._arrmap(self.values, mapper) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index a81acd97d7dd7..56565c496cbf5 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -772,25 +772,43 @@ def test_map_identity_mapping(self): for name, cur_index in self.indices.items(): self.assert_index_equal(cur_index, cur_index.map(lambda x: x)) - def test_map_that_returns_tuples_creates_multi_index(self): + def test_map_with_tuples(self): # GH 12766 - boolean_index = tm.makeIntIndex(3).map(lambda x: (x, x == 1)) - expected = MultiIndex.from_tuples([(0, False), (1, True), (2, False)]) + + # Test that returning a single tuple from an Index + # returns an Index. + boolean_index = tm.makeIntIndex(3).map(lambda x: (x,)) + expected = Index([(0,), (1,), (2,)]) self.assert_index_equal(boolean_index, expected) - def test_map_that_returns_a_length_one_tuple_creates_an_index(self): - # GH 12766 - boolean_index = tm.makeIntIndex(3).map(lambda x: (x, )) - expected = Index([(0, ), (1, ), (2, )]) + # Test that returning a tuple from a map of a single index + # returns a MultiIndex object. + boolean_index = tm.makeIntIndex(3).map(lambda x: (x, x == 1)) + expected = MultiIndex.from_tuples([(0, False), (1, True), (2, False)]) self.assert_index_equal(boolean_index, expected) - def test_map_that_reduces_multi_index_to_single_index_returns_index(self): - # GH 12766 + # Test that returning a single object from a MultiIndex + # returns an Index. first_level = ['foo', 'bar', 'baz'] multi_index = MultiIndex.from_tuples(lzip(first_level, [1, 2, 3])) reduced_index = multi_index.map(lambda x: x[0]) self.assert_index_equal(reduced_index, Index(first_level)) + def test_map_tseries_indices_return_index(self): + date_index = tm.makeDateIndex(10) + exp = Index([1] * 10) + self.assert_index_equal(exp, date_index.map(lambda x: 1)) + + period_index = tm.makePeriodIndex(10) + self.assert_index_equal(exp, period_index.map(lambda x: 1)) + + tdelta_index = tm.makeTimedeltaIndex(10) + self.assert_index_equal(exp, tdelta_index.map(lambda x: 1)) + + date_index = tm.makeDateIndex(24, freq='h', name='hourly') + exp = Index(range(24), name='hourly') + self.assert_index_equal(exp, date_index.map(lambda x: x.hour)) + def test_append_multiple(self): index = Index(['a', 'b', 'c', 'd', 'e', 'f']) diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index 4645ae24684ff..aea23634d1e7a 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -330,11 +330,11 @@ def _nat_new(self, box=True): def map(self, f): try: result = f(self) - if not isinstance(result, (np.ndarray, Index)): + if not isinstance(result, Index): raise TypeError return result except Exception: - return _algos.arrmap_object(self.asobject.values, f) + return self.asobject.map(f) def sort_values(self, return_indexer=False, ascending=True): """ From 07b772a049d31beeaf5c8aca7c37cfcb1b2e4116 Mon Sep 17 00:00:00 2001 From: Nate Yoder Date: Thu, 27 Oct 2016 12:17:41 -0700 Subject: [PATCH 04/11] use the numpy results if we can to avoid repeating the computation just to create the object --- pandas/tseries/base.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index aea23634d1e7a..aafa0dfb1e68a 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -330,6 +330,11 @@ def _nat_new(self, box=True): def map(self, f): try: result = f(self) + + # Try to use this result if we can + if isinstance(result, np.ndarray): + self._shallow_copy(result) + if not isinstance(result, Index): raise TypeError return result From 23c133dd7a874b13b90e8d5fd8b0188d24c56241 Mon Sep 17 00:00:00 2001 From: Nate Yoder Date: Sat, 5 Nov 2016 18:23:24 -0700 Subject: [PATCH 05/11] Update tests to match dtype int64 --- pandas/tests/series/test_apply.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index 8d7676bef4d72..e7acc841c76ec 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -124,7 +124,7 @@ def test_apply_datetimetz(self): # change dtype result = s.apply(lambda x: x.hour) - exp = pd.Series(list(range(24)) + [0], name='XX', dtype=np.int32) + exp = pd.Series(list(range(24)) + [0], name='XX', dtype=np.int64) tm.assert_series_equal(result, exp) # not vectorized @@ -318,7 +318,7 @@ def test_map_datetimetz(self): # change dtype result = s.map(lambda x: x.hour) - exp = pd.Series(list(range(24)) + [0], name='XX', dtype=np.int32) + exp = pd.Series(list(range(24)) + [0], name='XX', dtype=np.int64) tm.assert_series_equal(result, exp) with tm.assertRaises(NotImplementedError): From 504c2a2f166ad8af2877cce378c46c890347e52d Mon Sep 17 00:00:00 2001 From: Nate Yoder Date: Sat, 5 Nov 2016 20:03:10 -0700 Subject: [PATCH 06/11] Fix tests that weren't run by PyCharm --- pandas/tseries/tests/test_converter.py | 4 ++-- pandas/tseries/tests/test_period.py | 15 ++++++--------- pandas/tseries/tests/test_timedeltas.py | 4 ++-- pandas/tseries/tests/test_timeseries.py | 8 ++++---- 4 files changed, 14 insertions(+), 17 deletions(-) diff --git a/pandas/tseries/tests/test_converter.py b/pandas/tseries/tests/test_converter.py index 37d9c35639c32..1317f8e977234 100644 --- a/pandas/tseries/tests/test_converter.py +++ b/pandas/tseries/tests/test_converter.py @@ -3,7 +3,7 @@ import nose import numpy as np -from pandas import Timestamp, Period +from pandas import Timestamp, Period, Index from pandas.compat import u import pandas.util.testing as tm from pandas.tseries.offsets import Second, Milli, Micro @@ -104,7 +104,7 @@ def test_dateindex_conversion(self): for freq in ('B', 'L', 'S'): dateindex = tm.makeDateIndex(k=10, freq=freq) rs = self.dtc.convert(dateindex, None, None) - xp = converter.dates.date2num(dateindex._mpl_repr()) + xp = Index(converter.dates.date2num(dateindex._mpl_repr())) tm.assert_almost_equal(rs, xp, decimals) def test_resolution(self): diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index fe0d28dd9c508..dae1554c0930e 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -3521,8 +3521,8 @@ def test_map(self): tm.assert_index_equal(result, expected) result = index.map(lambda x: x.ordinal) - exp = np.array([x.ordinal for x in index], dtype=np.int64) - tm.assert_numpy_array_equal(result, exp) + exp = Index([x.ordinal for x in index]) + tm.assert_index_equal(result, exp) def test_map_with_string_constructor(self): raw = [2005, 2007, 2009] @@ -3534,20 +3534,17 @@ def test_map_with_string_constructor(self): types += text_type, for t in types: - expected = np.array(lmap(t, raw), dtype=object) + expected = Index(lmap(t, raw)) res = index.map(t) - # should return an array - tm.assertIsInstance(res, np.ndarray) + # should return an Index + tm.assertIsInstance(res, Index) # preserve element types self.assertTrue(all(isinstance(resi, t) for resi in res)) - # dtype should be object - self.assertEqual(res.dtype, np.dtype('object').type) - # lastly, values should compare equal - tm.assert_numpy_array_equal(res, expected) + tm.assert_index_equal(res, expected) def test_convert_array_of_periods(self): rng = period_range('1/1/2000', periods=20, freq='D') diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py index f0d14014d6559..d1e5379071b26 100644 --- a/pandas/tseries/tests/test_timedeltas.py +++ b/pandas/tseries/tests/test_timedeltas.py @@ -1513,8 +1513,8 @@ def test_map(self): f = lambda x: x.days result = rng.map(f) - exp = np.array([f(x) for x in rng], dtype=np.int64) - self.assert_numpy_array_equal(result, exp) + exp = Int64Index([f(x) for x in rng]) + self.assert_index_equal(result, exp) def test_misc_coverage(self): diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index beacc21912edc..9aec391b33d9d 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -3003,8 +3003,8 @@ def test_map(self): f = lambda x: x.strftime('%Y%m%d') result = rng.map(f) - exp = np.array([f(x) for x in rng], dtype='=U8') - tm.assert_almost_equal(result, exp) + exp = Index([f(x) for x in rng], dtype=' Date: Mon, 28 Nov 2016 16:22:51 -0500 Subject: [PATCH 07/11] Update whatsnew and add git PR to tests to denote changes --- doc/source/whatsnew/v0.20.0.txt | 100 ++++++++++++++++++++++++++++++ pandas/tests/series/test_apply.py | 2 + 2 files changed, 102 insertions(+) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 82af2e664135b..eefd142eb8a86 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -91,6 +91,10 @@ Other enhancements Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Map on Index types now return other Index types +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + - ``map`` on an ``Index`` now returns an ``Index``, not an array (:issue:`12766`) .. ipython:: python @@ -111,6 +115,102 @@ Backwards incompatible API changes idx.map(lambda x: x * 2) +- ``map`` on an ``Index`` or ``MultiIndex`` returns the appropriate type depending on output dimensionality + + .. ipython:: python + + mi = MultiIndex.from_tuples([(1, 2), (2, 4)]) + mi + + Previous Behavior: + + .. code-block:: ipython + + In [5]: idx.map(lambda x: (x, x * 2)) + Out[5]: array([(1, 2), (2, 4)], dtype=object) + + + In [6]: mi.map(lambda x: x[0]) + Out[6]: array([1, 2]) + + New Behavior: + + .. ipython:: python + + idx.map(lambda x: (x, x * 2)) + + mi.map(lambda x: x[0]) + + +- ``map`` on an ``CategoricalIndex`` now returns a ``CategoricalIndex``, not a Categorical + + .. ipython:: python + + ci = CategoricalIndex(list('ABABC'), categories=list('CBA'), ordered=True) + ci + + Previous Behavior: + + .. code-block:: ipython + + In [7]: ci.map(lambda x: x.lower()) + Out[7]: + [a, b, a, b, c] + Categories (3, object): [c < b < a] + + New Behavior: + + .. ipython:: python + + ci.map(lambda x: x.lower()) + +- ``map`` on an ``DatetimeIndex`` or ``TimedeltaIndex`` now returns an ``Index`` instance + + .. ipython:: python + + dtidx = date_range(start='2016-01-01', end='2016-01-02') + dtidx + + Previous Behavior: + + .. code-block:: ipython + + In [8]: dtidx.map(lambda x: x.day) + Out[8]: array([1, 2]) + + New Behavior: + + .. ipython:: python + + dtidx.map(lambda x: x.day) + + +- ``map`` on a Series withe datetime64 values may return int64 dtypes rather than int32 + + .. ipython:: python + + s = Series(date_range('2011-01-02T00:00', '2011-01-02T02:00', freq='H').tz_localize('Asia/Tokyo')) + s + + Previous Behavior: + + .. code-block:: ipython + + In [9]: s.map(lambda x: x.hour) + Out[9]: + 0 0 + 1 1 + 2 2 + dtype: int32 + + + New Behavior: + + .. ipython:: python + + s.map(lambda x: x.hour) + + .. _whatsnew_0200.api: - ``CParserError`` has been renamed to ``ParserError`` in ``pd.read_csv`` and will be removed in the future (:issue:`12665`) diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index e7acc841c76ec..ec7ffde344d31 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -123,6 +123,7 @@ def test_apply_datetimetz(self): tm.assert_series_equal(result, exp) # change dtype + # GH 14506 : Returned dtype changed from int32 to int64 result = s.apply(lambda x: x.hour) exp = pd.Series(list(range(24)) + [0], name='XX', dtype=np.int64) tm.assert_series_equal(result, exp) @@ -317,6 +318,7 @@ def test_map_datetimetz(self): tm.assert_series_equal(result, exp) # change dtype + # GH 14506 : Returned dtype changed from int32 to int64 result = s.map(lambda x: x.hour) exp = pd.Series(list(range(24)) + [0], name='XX', dtype=np.int64) tm.assert_series_equal(result, exp) From a17ddab941b43c8fa6afdb7d9435fd217f9d60c9 Mon Sep 17 00:00:00 2001 From: Nate Yoder Date: Tue, 29 Nov 2016 06:45:19 -0800 Subject: [PATCH 08/11] Fix unused import and docstrings per pep8radius docformatter; change other uses of assert_index_equal to testing instead os self --- pandas/core/categorical.py | 4 ++-- pandas/indexes/base.py | 4 ++-- pandas/indexes/category.py | 4 ++-- pandas/tests/indexes/test_base.py | 16 ++++++++-------- pandas/tseries/base.py | 1 - pandas/tseries/tests/test_timedeltas.py | 2 +- pandas/tseries/tests/test_timeseries.py | 2 +- 7 files changed, 16 insertions(+), 17 deletions(-) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 4f72f6d59c7a8..5124dc44e2fc8 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -930,8 +930,7 @@ def remove_unused_categories(self, inplace=False): return cat def map(self, mapper): - """ - Apply mapper function to its categories (not codes). + """Apply mapper function to its categories (not codes). Parameters ---------- @@ -944,6 +943,7 @@ def map(self, mapper): Returns ------- applied : Categorical or Index. + """ new_categories = self.categories.map(mapper) try: diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 7fc6c1fb6e990..4589c1cef5206 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -2427,8 +2427,7 @@ def groupby(self, values): return result def map(self, mapper): - """ - Apply mapper function to an index. + """Apply mapper function to an index. Parameters ---------- @@ -2441,6 +2440,7 @@ def map(self, mapper): The output of the mapping function applied to the index. If the function returns a tuple with more than one element a MultiIndex will be returned. + """ from .multi import MultiIndex mapped_values = self._arrmap(self.values, mapper) diff --git a/pandas/indexes/category.py b/pandas/indexes/category.py index e87d44155d388..2c89f72975ade 100644 --- a/pandas/indexes/category.py +++ b/pandas/indexes/category.py @@ -517,8 +517,7 @@ def take(self, indices, axis=0, allow_fill=True, return self._create_from_codes(taken) def map(self, mapper): - """ - Apply mapper function to its categories (not codes). + """Apply mapper function to its categories (not codes). Parameters ---------- @@ -531,6 +530,7 @@ def map(self, mapper): Returns ------- applied : CategoricalIndex or Index + """ return self._shallow_copy_with_infer(self.values.map(mapper)) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 56565c496cbf5..3536a52432b8c 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -770,7 +770,7 @@ def test_sub(self): def test_map_identity_mapping(self): # GH 12766 for name, cur_index in self.indices.items(): - self.assert_index_equal(cur_index, cur_index.map(lambda x: x)) + tm.assert_index_equal(cur_index, cur_index.map(lambda x: x)) def test_map_with_tuples(self): # GH 12766 @@ -779,35 +779,35 @@ def test_map_with_tuples(self): # returns an Index. boolean_index = tm.makeIntIndex(3).map(lambda x: (x,)) expected = Index([(0,), (1,), (2,)]) - self.assert_index_equal(boolean_index, expected) + tm.assert_index_equal(boolean_index, expected) # Test that returning a tuple from a map of a single index # returns a MultiIndex object. boolean_index = tm.makeIntIndex(3).map(lambda x: (x, x == 1)) expected = MultiIndex.from_tuples([(0, False), (1, True), (2, False)]) - self.assert_index_equal(boolean_index, expected) + tm.assert_index_equal(boolean_index, expected) # Test that returning a single object from a MultiIndex # returns an Index. first_level = ['foo', 'bar', 'baz'] multi_index = MultiIndex.from_tuples(lzip(first_level, [1, 2, 3])) reduced_index = multi_index.map(lambda x: x[0]) - self.assert_index_equal(reduced_index, Index(first_level)) + tm.assert_index_equal(reduced_index, Index(first_level)) def test_map_tseries_indices_return_index(self): date_index = tm.makeDateIndex(10) exp = Index([1] * 10) - self.assert_index_equal(exp, date_index.map(lambda x: 1)) + tm.assert_index_equal(exp, date_index.map(lambda x: 1)) period_index = tm.makePeriodIndex(10) - self.assert_index_equal(exp, period_index.map(lambda x: 1)) + tm.assert_index_equal(exp, period_index.map(lambda x: 1)) tdelta_index = tm.makeTimedeltaIndex(10) - self.assert_index_equal(exp, tdelta_index.map(lambda x: 1)) + tm.assert_index_equal(exp, tdelta_index.map(lambda x: 1)) date_index = tm.makeDateIndex(24, freq='h', name='hourly') exp = Index(range(24), name='hourly') - self.assert_index_equal(exp, date_index.map(lambda x: x.hour)) + tm.assert_index_equal(exp, date_index.map(lambda x: x.hour)) def test_append_multiple(self): index = Index(['a', 'b', 'c', 'd', 'e', 'f']) diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index aafa0dfb1e68a..b48f26e226540 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -27,7 +27,6 @@ from pandas.util.decorators import Appender, cache_readonly import pandas.types.concat as _concat import pandas.tseries.frequencies as frequencies -import pandas.algos as _algos class DatelikeOps(object): diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py index d1e5379071b26..ca957ca0394d1 100644 --- a/pandas/tseries/tests/test_timedeltas.py +++ b/pandas/tseries/tests/test_timedeltas.py @@ -1514,7 +1514,7 @@ def test_map(self): f = lambda x: x.days result = rng.map(f) exp = Int64Index([f(x) for x in rng]) - self.assert_index_equal(result, exp) + tm.assert_index_equal(result, exp) def test_misc_coverage(self): diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 9aec391b33d9d..cd22ac561c6f7 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -3701,7 +3701,7 @@ def test_map_bug_1677(self): result = index.map(f) expected = Index([f(index[0])]) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) def test_groupby_function_tuple_1677(self): df = DataFrame(np.random.rand(100), From 4635e6a7c3caec7e508facd33fc76673b636f56b Mon Sep 17 00:00:00 2001 From: Nate Yoder Date: Mon, 5 Dec 2016 20:08:49 -0800 Subject: [PATCH 09/11] compare as index --- pandas/tseries/tests/test_converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tseries/tests/test_converter.py b/pandas/tseries/tests/test_converter.py index 1317f8e977234..7e4ed288e31c1 100644 --- a/pandas/tseries/tests/test_converter.py +++ b/pandas/tseries/tests/test_converter.py @@ -105,7 +105,7 @@ def test_dateindex_conversion(self): dateindex = tm.makeDateIndex(k=10, freq=freq) rs = self.dtc.convert(dateindex, None, None) xp = Index(converter.dates.date2num(dateindex._mpl_repr())) - tm.assert_almost_equal(rs, xp, decimals) + tm.assert_index_equal(rs, xp, decimals) def test_resolution(self): def _assert_less(ts1, ts2): From b36e83c41689221f22f9c24805fb1f11eac8158c Mon Sep 17 00:00:00 2001 From: Nate Yoder Date: Thu, 15 Dec 2016 19:27:05 -0800 Subject: [PATCH 10/11] update whatsnew, fix documentation --- doc/source/whatsnew/v0.20.0.txt | 78 ++++++--------------------------- pandas/indexes/base.py | 2 +- pandas/tseries/base.py | 2 +- 3 files changed, 15 insertions(+), 67 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index eefd142eb8a86..790ce0af4c3aa 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -95,30 +95,12 @@ Backwards incompatible API changes Map on Index types now return other Index types ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -- ``map`` on an ``Index`` now returns an ``Index``, not an array (:issue:`12766`) +- ``map`` on an ``Index`` now returns an ``Index``, not a numpy array (:issue:`12766`) .. ipython:: python idx = Index([1, 2]) idx - - Previous Behavior: - - .. code-block:: ipython - - In [3]: idx.map(lambda x: x * 2) - Out[3]: array([2, 4]) - - New Behavior: - - .. ipython:: python - - idx.map(lambda x: x * 2) - -- ``map`` on an ``Index`` or ``MultiIndex`` returns the appropriate type depending on output dimensionality - - .. ipython:: python - mi = MultiIndex.from_tuples([(1, 2), (2, 4)]) mi @@ -126,63 +108,29 @@ Map on Index types now return other Index types .. code-block:: ipython - In [5]: idx.map(lambda x: (x, x * 2)) - Out[5]: array([(1, 2), (2, 4)], dtype=object) - - - In [6]: mi.map(lambda x: x[0]) - Out[6]: array([1, 2]) - - New Behavior: - - .. ipython:: python - - idx.map(lambda x: (x, x * 2)) - - mi.map(lambda x: x[0]) - - -- ``map`` on an ``CategoricalIndex`` now returns a ``CategoricalIndex``, not a Categorical - - .. ipython:: python - - ci = CategoricalIndex(list('ABABC'), categories=list('CBA'), ordered=True) - ci + In [5]: idx.map(lambda x: x * 2) + Out[5]: array([2, 4]) - Previous Behavior: + In [6]: idx.map(lambda x: (x, x * 2)) + Out[6]: array([(1, 2), (2, 4)], dtype=object) - .. code-block:: ipython + In [7]: mi.map(lambda x: x) + Out[7]: array([(1, 2), (2, 4)], dtype=object) - In [7]: ci.map(lambda x: x.lower()) - Out[7]: - [a, b, a, b, c] - Categories (3, object): [c < b < a] + In [8]: mi.map(lambda x: x[0]) + Out[8]: array([1, 2]) New Behavior: .. ipython:: python - ci.map(lambda x: x.lower()) - -- ``map`` on an ``DatetimeIndex`` or ``TimedeltaIndex`` now returns an ``Index`` instance - - .. ipython:: python - - dtidx = date_range(start='2016-01-01', end='2016-01-02') - dtidx - - Previous Behavior: - - .. code-block:: ipython - - In [8]: dtidx.map(lambda x: x.day) - Out[8]: array([1, 2]) + idx.map(lambda x: x * 2) - New Behavior: + idx.map(lambda x: (x, x * 2)) - .. ipython:: python + mi.map(lambda x: x) - dtidx.map(lambda x: x.day) + mi.map(lambda x: x[0]) - ``map`` on a Series withe datetime64 values may return int64 dtypes rather than int32 diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 4589c1cef5206..1cc546629589d 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -2436,7 +2436,7 @@ def map(self, mapper): Returns ------- - applied : Index + applied : Union[Index, MultiIndex], inferred The output of the mapping function applied to the index. If the function returns a tuple with more than one element a MultiIndex will be returned. diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index b48f26e226540..8e24f430108b3 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -335,7 +335,7 @@ def map(self, f): self._shallow_copy(result) if not isinstance(result, Index): - raise TypeError + raise TypeError('The map function must return an Index object') return result except Exception: return self.asobject.map(f) From 95e4440d18b499c79e1ae5de81af0bb0d86f7eae Mon Sep 17 00:00:00 2001 From: Nate Yoder Date: Fri, 16 Dec 2016 10:17:11 -0800 Subject: [PATCH 11/11] fix typo and add ref tag in whatsnew --- doc/source/whatsnew/v0.20.0.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 790ce0af4c3aa..e264fb15f3e67 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -92,6 +92,8 @@ Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. _whatsnew.api_breaking.index_map + Map on Index types now return other Index types ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -133,7 +135,7 @@ Map on Index types now return other Index types mi.map(lambda x: x[0]) -- ``map`` on a Series withe datetime64 values may return int64 dtypes rather than int32 +- ``map`` on a Series with datetime64 values may return int64 dtypes rather than int32 .. ipython:: python