From a88a208be21fdfce82179ae34e0eb2d0e21177c7 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Tue, 14 Jan 2014 21:48:58 -0500
Subject: [PATCH] BUG: pd.match not returning passed sentinel

---
 doc/source/release.rst     |  1 +
 pandas/core/algorithms.py  | 12 ++++++++++--
 pandas/tests/test_algos.py | 17 ++++++++++++++++-
 3 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/doc/source/release.rst b/doc/source/release.rst
index 6d550d4f0b588..9a0854494a897 100644
--- a/doc/source/release.rst
+++ b/doc/source/release.rst
@@ -103,6 +103,7 @@ Bug Fixes
   - Bug in propogating metadata on ``resample`` (:issue:`5862`)
   - Fixed string-representation of ``NaT`` to be "NaT" (:issue:`5708`)
   - Fixed string-representation for Timestamp to show nanoseconds if present (:issue:`5912`)
+  - ``pd.match`` not returning passed sentinel
 
 pandas 0.13.0
 -------------
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 24c14a5d7f215..f76f952c53d1d 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -11,7 +11,6 @@
 import pandas.hashtable as htable
 import pandas.compat as compat
 
-
 def match(to_match, values, na_sentinel=-1):
     """
     Compute locations of to_match into values
@@ -37,7 +36,16 @@ def match(to_match, values, na_sentinel=-1):
         values = np.array(values, dtype='O')
 
     f = lambda htype, caster: _match_generic(to_match, values, htype, caster)
-    return _hashtable_algo(f, values.dtype)
+    result = _hashtable_algo(f, values.dtype)
+
+    if na_sentinel != -1:
+
+        # replace but return a numpy array
+        # use a Series because it handles dtype conversions properly
+        from pandas.core.series import Series
+        result = Series(result.ravel()).replace(-1,na_sentinel).values.reshape(result.shape)
+
+    return result
 
 
 def unique(values):
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 2cbccbaf5c66b..027e7c5fab191 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -8,7 +8,6 @@
 import pandas.core.algorithms as algos
 import pandas.util.testing as tm
 
-
 class TestMatch(tm.TestCase):
     _multiprocess_can_split_ = True
 
@@ -20,6 +19,19 @@ def test_ints(self):
         expected = np.array([0, 2, 1, 1, 0, 2, -1, 0])
         self.assert_(np.array_equal(result, expected))
 
+        result = Series(algos.match(to_match, values, np.nan))
+        expected = Series(np.array([0, 2, 1, 1, 0, 2, np.nan, 0]))
+        tm.assert_series_equal(result,expected)
+
+        s = pd.Series(np.arange(5),dtype=np.float32)
+        result = algos.match(s, [2,4])
+        expected = np.array([-1, -1, 0, -1, 1])
+        self.assert_(np.array_equal(result, expected))
+
+        result = Series(algos.match(s, [2,4], np.nan))
+        expected = Series(np.array([np.nan, np.nan, 0, np.nan, 1]))
+        tm.assert_series_equal(result,expected)
+
     def test_strings(self):
         values = ['foo', 'bar', 'baz']
         to_match = ['bar', 'foo', 'qux', 'foo', 'bar', 'baz', 'qux']
@@ -28,6 +40,9 @@ def test_strings(self):
         expected = np.array([1, 0, -1, 0, 1, 2, -1])
         self.assert_(np.array_equal(result, expected))
 
+        result = Series(algos.match(to_match, values, np.nan))
+        expected = Series(np.array([1, 0, np.nan, 0, 1, 2, np.nan]))
+        tm.assert_series_equal(result,expected)
 
 class TestUnique(tm.TestCase):
     _multiprocess_can_split_ = True