Skip to content

Commit 2d571e6

Browse files
committed
Merge pull request #5943 from jreback/match
BUG: pd.match not returning passed sentinel
2 parents 7d0c26e + a88a208 commit 2d571e6

File tree

3 files changed

+27
-3
lines changed

3 files changed

+27
-3
lines changed

doc/source/release.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ Bug Fixes
103103
- Bug in propogating metadata on ``resample`` (:issue:`5862`)
104104
- Fixed string-representation of ``NaT`` to be "NaT" (:issue:`5708`)
105105
- Fixed string-representation for Timestamp to show nanoseconds if present (:issue:`5912`)
106+
- ``pd.match`` not returning passed sentinel
106107

107108
pandas 0.13.0
108109
-------------

pandas/core/algorithms.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
import pandas.hashtable as htable
1212
import pandas.compat as compat
1313

14-
1514
def match(to_match, values, na_sentinel=-1):
1615
"""
1716
Compute locations of to_match into values
@@ -37,7 +36,16 @@ def match(to_match, values, na_sentinel=-1):
3736
values = np.array(values, dtype='O')
3837

3938
f = lambda htype, caster: _match_generic(to_match, values, htype, caster)
40-
return _hashtable_algo(f, values.dtype)
39+
result = _hashtable_algo(f, values.dtype)
40+
41+
if na_sentinel != -1:
42+
43+
# replace but return a numpy array
44+
# use a Series because it handles dtype conversions properly
45+
from pandas.core.series import Series
46+
result = Series(result.ravel()).replace(-1,na_sentinel).values.reshape(result.shape)
47+
48+
return result
4149

4250

4351
def unique(values):

pandas/tests/test_algos.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
import pandas.core.algorithms as algos
99
import pandas.util.testing as tm
1010

11-
1211
class TestMatch(tm.TestCase):
1312
_multiprocess_can_split_ = True
1413

@@ -20,6 +19,19 @@ def test_ints(self):
2019
expected = np.array([0, 2, 1, 1, 0, 2, -1, 0])
2120
self.assert_(np.array_equal(result, expected))
2221

22+
result = Series(algos.match(to_match, values, np.nan))
23+
expected = Series(np.array([0, 2, 1, 1, 0, 2, np.nan, 0]))
24+
tm.assert_series_equal(result,expected)
25+
26+
s = pd.Series(np.arange(5),dtype=np.float32)
27+
result = algos.match(s, [2,4])
28+
expected = np.array([-1, -1, 0, -1, 1])
29+
self.assert_(np.array_equal(result, expected))
30+
31+
result = Series(algos.match(s, [2,4], np.nan))
32+
expected = Series(np.array([np.nan, np.nan, 0, np.nan, 1]))
33+
tm.assert_series_equal(result,expected)
34+
2335
def test_strings(self):
2436
values = ['foo', 'bar', 'baz']
2537
to_match = ['bar', 'foo', 'qux', 'foo', 'bar', 'baz', 'qux']
@@ -28,6 +40,9 @@ def test_strings(self):
2840
expected = np.array([1, 0, -1, 0, 1, 2, -1])
2941
self.assert_(np.array_equal(result, expected))
3042

43+
result = Series(algos.match(to_match, values, np.nan))
44+
expected = Series(np.array([1, 0, np.nan, 0, 1, 2, np.nan]))
45+
tm.assert_series_equal(result,expected)
3146

3247
class TestUnique(tm.TestCase):
3348
_multiprocess_can_split_ = True

0 commit comments

Comments
 (0)