Merge pull request #5145 from cpcloud/replace-exact-fix

cpcloud · cpcloud · commit c9d004f0b304 · 2013-10-07T21:35:21.000-07:00
BUG: allow tuples in recursive call to replace
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -573,6 +573,8 @@ Bug Fixes
   - Fix bound checking for Timestamp() with datetime64 input (:issue:`4065`)
   - Fix a bug where ``TestReadHtml`` wasn't calling the correct ``read_html()``
     function (:issue:`5150`).
+  - Fix a bug with ``NDFrame.replace()`` which made replacement appear as
+    though it was (incorrectly) using regular expressions (:issue:`5143`).
 
 pandas 0.12.0
 -------------
diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -7,23 +7,16 @@
 import numbers
 import codecs
 import csv
-import sys
 import types
 
-from datetime import timedelta
-
-from distutils.version import LooseVersion
-
 from numpy.lib.format import read_array, write_array
 import numpy as np
 
 import pandas.algos as algos
 import pandas.lib as lib
 import pandas.tslib as tslib
 from pandas import compat
-from pandas.compat import (StringIO, BytesIO, range, long, u, zip, map,
-                           string_types)
-from datetime import timedelta
+from pandas.compat import StringIO, BytesIO, range, long, u, zip, map
 
 from pandas.core.config import get_option
 from pandas.core import array as pa
@@ -36,6 +29,7 @@ class PandasError(Exception):
 class AmbiguousIndexError(PandasError, KeyError):
     pass
 
+
 _POSSIBLY_CAST_DTYPES = set([np.dtype(t)
                             for t in ['M8[ns]', 'm8[ns]', 'O', 'int8',
                                       'uint8', 'int16', 'uint16', 'int32',
@@ -101,6 +95,7 @@ class to receive bound method
     else:
         setattr(cls, name, func)
 
+
 def isnull(obj):
     """Detect missing values (NaN in numeric arrays, None/NaN in object arrays)
 
@@ -772,6 +767,7 @@ def diff(arr, n, axis=0):
 
     return out_arr
 
+
 def _coerce_to_dtypes(result, dtypes):
     """ given a dtypes and a result set, coerce the result elements to the dtypes """
     if len(result) != len(dtypes):
@@ -800,6 +796,7 @@ def conv(r,dtype):
 
     return np.array([ conv(r,dtype) for r, dtype in zip(result,dtypes) ])
 
+
 def _infer_dtype_from_scalar(val):
     """ interpret the dtype from a scalar, upcast floats and ints
         return the new value and the dtype """
@@ -986,6 +983,7 @@ def changeit():
 
     return result, False
 
+
 def _maybe_upcast(values, fill_value=np.nan, dtype=None, copy=False):
     """ provide explicty type promotion and coercion
 
@@ -1166,6 +1164,7 @@ def pad_1d(values, limit=None, mask=None):
     _method(values, mask, limit=limit)
     return values
 
+
 def backfill_1d(values, limit=None, mask=None):
 
     dtype = values.dtype.name
@@ -1190,6 +1189,7 @@ def backfill_1d(values, limit=None, mask=None):
     _method(values, mask, limit=limit)
     return values
 
+
 def pad_2d(values, limit=None, mask=None):
 
     dtype = values.dtype.name
@@ -1218,6 +1218,7 @@ def pad_2d(values, limit=None, mask=None):
         pass
     return values
 
+
 def backfill_2d(values, limit=None, mask=None):
 
     dtype = values.dtype.name
@@ -1246,6 +1247,7 @@ def backfill_2d(values, limit=None, mask=None):
         pass
     return values
 
+
 def interpolate_2d(values, method='pad', axis=0, limit=None, fill_value=None):
     """ perform an actual interpolation of values, values will be make 2-d if needed
         fills inplace, returns the result """
@@ -1371,6 +1373,7 @@ def _possibly_convert_platform(values):
 
     return values
 
+
 def _possibly_cast_to_datetime(value, dtype, coerce=False):
     """ try to cast the array/value to a datetimelike dtype, converting float nan to iNaT """
 
@@ -1787,6 +1790,7 @@ def is_datetime64_dtype(arr_or_dtype):
         tipo = arr_or_dtype.dtype.type
     return issubclass(tipo, np.datetime64)
 
+
 def is_datetime64_ns_dtype(arr_or_dtype):
     if isinstance(arr_or_dtype, np.dtype):
         tipo = arr_or_dtype
@@ -1796,6 +1800,7 @@ def is_datetime64_ns_dtype(arr_or_dtype):
         tipo = arr_or_dtype.dtype
     return tipo == _NS_DTYPE
 
+
 def is_timedelta64_dtype(arr_or_dtype):
     if isinstance(arr_or_dtype, np.dtype):
         tipo = arr_or_dtype.type
@@ -1851,6 +1856,7 @@ def _is_sequence(x):
     except (TypeError, AttributeError):
         return False
 
+
 _ensure_float64 = algos.ensure_float64
 _ensure_float32 = algos.ensure_float32
 _ensure_int64 = algos.ensure_int64
@@ -1987,6 +1993,7 @@ def _get_handle(path, mode, encoding=None, compression=None):
 
     return f
 
+
 if compat.PY3:  # pragma: no cover
     def UnicodeReader(f, dialect=csv.excel, encoding="utf-8", **kwds):
         # ignore encoding
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -12,7 +12,6 @@
 # pylint: disable=E1101,E1103
 # pylint: disable=W0212,W0231,W0703,W0622
 
-import operator
 import sys
 import collections
 import warnings
@@ -25,7 +24,7 @@
 from pandas.core.common import (isnull, notnull, PandasError, _try_sort,
                                 _default_index, _maybe_upcast, _is_sequence,
                                 _infer_dtype_from_scalar, _values_from_object,
-                                _coerce_to_dtypes, _DATELIKE_DTYPES, is_list_like)
+                                _DATELIKE_DTYPES, is_list_like)
 from pandas.core.generic import NDFrame, _shared_docs
 from pandas.core.index import Index, MultiIndex, _ensure_index
 from pandas.core.indexing import (_maybe_droplevels,
@@ -48,7 +47,6 @@
 from pandas.tseries.index import DatetimeIndex
 
 import pandas.core.algorithms as algos
-import pandas.core.datetools as datetools
 import pandas.core.common as com
 import pandas.core.format as fmt
 import pandas.core.nanops as nanops
@@ -4292,6 +4290,7 @@ def combineMult(self, other):
         """
         return self.mul(other, fill_value=1.)
 
+
 DataFrame._setup_axes(
     ['index', 'columns'], info_axis=1, stat_axis=0, axes_are_reversed=True)
 DataFrame._add_numeric_operations()
@@ -4552,6 +4551,7 @@ def _masked_rec_array_to_mgr(data, index, columns, dtype, copy):
         mgr = mgr.copy()
     return mgr
 
+
 def _reorder_arrays(arrays, arr_columns, columns):
     # reorder according to the columns
     if columns is not None and len(columns) and arr_columns is not None and len(arr_columns):
@@ -4562,6 +4562,7 @@ def _reorder_arrays(arrays, arr_columns, columns):
         arrays = [arrays[i] for i in indexer]
     return arrays, arr_columns
 
+
 def _list_to_arrays(data, columns, coerce_float=False, dtype=None):
     if len(data) > 0 and isinstance(data[0], tuple):
         content = list(lib.to_object_array_tuples(data).T)
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -18,9 +18,7 @@
 from pandas import compat, _np_version_under1p7
 from pandas.compat import map, zip, lrange, string_types, isidentifier
 from pandas.core.common import (isnull, notnull, is_list_like,
-                                _values_from_object,
-                                _infer_dtype_from_scalar, _maybe_promote,
-                                ABCSeries)
+                                _values_from_object, _maybe_promote, ABCSeries)
 import pandas.core.nanops as nanops
 from pandas.util.decorators import Appender, Substitution
 
@@ -36,6 +34,7 @@
 def is_dictlike(x):
     return isinstance(x, (dict, com.ABCSeries))
 
+
 def _single_replace(self, to_replace, method, inplace, limit):
     orig_dtype = self.dtype
     result = self if inplace else self.copy()
@@ -1844,7 +1843,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
         self._consolidate_inplace()
 
         if value is None:
-            if isinstance(to_replace, list):
+            if isinstance(to_replace, (tuple, list)):
                 return _single_replace(self, to_replace, method, inplace,
                                        limit)
 
@@ -1856,7 +1855,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
                 to_replace = regex
                 regex = True
 
-            items = to_replace.items()
+            items = list(compat.iteritems(to_replace))
             keys, values = zip(*items)
 
             are_mappings = [is_dictlike(v) for v in values]
@@ -1899,7 +1898,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
                                                         regex=regex)
 
                 # {'A': NA} -> 0
-                elif not isinstance(value, (list, np.ndarray)):
+                elif not com.is_list_like(value):
                     new_data = self._data
                     for k, src in compat.iteritems(to_replace):
                         if k in self:
@@ -1911,9 +1910,8 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
                     raise TypeError('Fill value must be scalar, dict, or '
                                     'Series')
 
-            elif isinstance(to_replace, (list, np.ndarray)):
-                # [NA, ''] -> [0, 'missing']
-                if isinstance(value, (list, np.ndarray)):
+            elif com.is_list_like(to_replace): # [NA, ''] -> [0, 'missing']
+                if com.is_list_like(value):
                     if len(to_replace) != len(value):
                         raise ValueError('Replacement lists must match '
                                          'in length. Expecting %d got %d ' %
@@ -1928,11 +1926,13 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
                                                   inplace=inplace, regex=regex)
             elif to_replace is None:
                 if not (com.is_re_compilable(regex) or
-                        isinstance(regex, (list, np.ndarray)) or is_dictlike(regex)):
+                        com.is_list_like(regex) or
+                        is_dictlike(regex)):
                     raise TypeError("'regex' must be a string or a compiled "
                                     "regular expression or a list or dict of "
                                     "strings or regular expressions, you "
-                                    "passed a {0}".format(type(regex)))
+                                    "passed a"
+                                    " {0!r}".format(type(regex).__name__))
                 return self.replace(regex, value, inplace=inplace, limit=limit,
                                     regex=True)
             else:
@@ -1948,12 +1948,13 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
                                                         inplace=inplace,
                                                         regex=regex)
 
-                elif not isinstance(value, (list, np.ndarray)):  # NA -> 0
+                elif not com.is_list_like(value):  # NA -> 0
                     new_data = self._data.replace(to_replace, value,
                                                   inplace=inplace, regex=regex)
                 else:
-                    raise TypeError('Invalid "to_replace" type: '
-                                    '{0}'.format(type(to_replace)))  # pragma: no cover
+                    msg = ('Invalid "to_replace" type: '
+                           '{0!r}').format(type(to_replace).__name__)
+                    raise TypeError(msg)  # pragma: no cover
 
         new_data = new_data.convert(copy=not inplace, convert_numeric=False)
 
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -992,6 +992,7 @@ class NumericBlock(Block):
     is_numeric = True
     _can_hold_na = True
 
+
 class FloatBlock(NumericBlock):
     is_float = True
     _downcast_dtype = 'int64'
@@ -1064,6 +1065,7 @@ def _try_cast(self, element):
     def should_store(self, value):
         return com.is_integer_dtype(value) and value.dtype == self.dtype
 
+
 class TimeDeltaBlock(IntBlock):
     is_timedelta = True
     _can_hold_na = True
@@ -1130,6 +1132,7 @@ def to_native_types(self, slicer=None, na_rep=None, **kwargs):
                                         for val in values.ravel()[imask]], dtype=object)
         return rvalues.tolist()
 
+
 class BoolBlock(NumericBlock):
     is_bool = True
     _can_hold_na = False
@@ -1677,6 +1680,7 @@ def split_block_at(self, item):
     def _try_cast_result(self, result, dtype=None):
         return result
 
+
 def make_block(values, items, ref_items, klass=None, ndim=None, dtype=None, fastpath=False, placement=None):
 
     if klass is None:
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py