From a88a208be21fdfce82179ae34e0eb2d0e21177c7 Mon Sep 17 00:00:00 2001 From: jreback Date: Tue, 14 Jan 2014 21:48:58 -0500 Subject: [PATCH] BUG: pd.match not returning passed sentinel --- doc/source/release.rst | 1 + pandas/core/algorithms.py | 12 ++++++++++-- pandas/tests/test_algos.py | 17 ++++++++++++++++- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 6d550d4f0b588..9a0854494a897 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -103,6 +103,7 @@ Bug Fixes - Bug in propogating metadata on ``resample`` (:issue:`5862`) - Fixed string-representation of ``NaT`` to be "NaT" (:issue:`5708`) - Fixed string-representation for Timestamp to show nanoseconds if present (:issue:`5912`) + - ``pd.match`` not returning passed sentinel pandas 0.13.0 ------------- diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 24c14a5d7f215..f76f952c53d1d 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -11,7 +11,6 @@ import pandas.hashtable as htable import pandas.compat as compat - def match(to_match, values, na_sentinel=-1): """ Compute locations of to_match into values @@ -37,7 +36,16 @@ def match(to_match, values, na_sentinel=-1): values = np.array(values, dtype='O') f = lambda htype, caster: _match_generic(to_match, values, htype, caster) - return _hashtable_algo(f, values.dtype) + result = _hashtable_algo(f, values.dtype) + + if na_sentinel != -1: + + # replace but return a numpy array + # use a Series because it handles dtype conversions properly + from pandas.core.series import Series + result = Series(result.ravel()).replace(-1,na_sentinel).values.reshape(result.shape) + + return result def unique(values): diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 2cbccbaf5c66b..027e7c5fab191 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -8,7 +8,6 @@ import pandas.core.algorithms as algos import pandas.util.testing as tm - class TestMatch(tm.TestCase): _multiprocess_can_split_ = True @@ -20,6 +19,19 @@ def test_ints(self): expected = np.array([0, 2, 1, 1, 0, 2, -1, 0]) self.assert_(np.array_equal(result, expected)) + result = Series(algos.match(to_match, values, np.nan)) + expected = Series(np.array([0, 2, 1, 1, 0, 2, np.nan, 0])) + tm.assert_series_equal(result,expected) + + s = pd.Series(np.arange(5),dtype=np.float32) + result = algos.match(s, [2,4]) + expected = np.array([-1, -1, 0, -1, 1]) + self.assert_(np.array_equal(result, expected)) + + result = Series(algos.match(s, [2,4], np.nan)) + expected = Series(np.array([np.nan, np.nan, 0, np.nan, 1])) + tm.assert_series_equal(result,expected) + def test_strings(self): values = ['foo', 'bar', 'baz'] to_match = ['bar', 'foo', 'qux', 'foo', 'bar', 'baz', 'qux'] @@ -28,6 +40,9 @@ def test_strings(self): expected = np.array([1, 0, -1, 0, 1, 2, -1]) self.assert_(np.array_equal(result, expected)) + result = Series(algos.match(to_match, values, np.nan)) + expected = Series(np.array([1, 0, np.nan, 0, 1, 2, np.nan])) + tm.assert_series_equal(result,expected) class TestUnique(tm.TestCase): _multiprocess_can_split_ = True