Skip to content

Commit c9d004f

Browse files
committed
Merge pull request #5145 from cpcloud/replace-exact-fix
BUG: allow tuples in recursive call to replace
2 parents 239d1be + ba6c088 commit c9d004f

File tree

6 files changed

+83
-35
lines changed

6 files changed

+83
-35
lines changed

doc/source/release.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -573,6 +573,8 @@ Bug Fixes
573573
- Fix bound checking for Timestamp() with datetime64 input (:issue:`4065`)
574574
- Fix a bug where ``TestReadHtml`` wasn't calling the correct ``read_html()``
575575
function (:issue:`5150`).
576+
- Fix a bug with ``NDFrame.replace()`` which made replacement appear as
577+
though it was (incorrectly) using regular expressions (:issue:`5143`).
576578

577579
pandas 0.12.0
578580
-------------

pandas/core/common.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,23 +7,16 @@
77
import numbers
88
import codecs
99
import csv
10-
import sys
1110
import types
1211

13-
from datetime import timedelta
14-
15-
from distutils.version import LooseVersion
16-
1712
from numpy.lib.format import read_array, write_array
1813
import numpy as np
1914

2015
import pandas.algos as algos
2116
import pandas.lib as lib
2217
import pandas.tslib as tslib
2318
from pandas import compat
24-
from pandas.compat import (StringIO, BytesIO, range, long, u, zip, map,
25-
string_types)
26-
from datetime import timedelta
19+
from pandas.compat import StringIO, BytesIO, range, long, u, zip, map
2720

2821
from pandas.core.config import get_option
2922
from pandas.core import array as pa
@@ -36,6 +29,7 @@ class PandasError(Exception):
3629
class AmbiguousIndexError(PandasError, KeyError):
3730
pass
3831

32+
3933
_POSSIBLY_CAST_DTYPES = set([np.dtype(t)
4034
for t in ['M8[ns]', 'm8[ns]', 'O', 'int8',
4135
'uint8', 'int16', 'uint16', 'int32',
@@ -101,6 +95,7 @@ class to receive bound method
10195
else:
10296
setattr(cls, name, func)
10397

98+
10499
def isnull(obj):
105100
"""Detect missing values (NaN in numeric arrays, None/NaN in object arrays)
106101
@@ -772,6 +767,7 @@ def diff(arr, n, axis=0):
772767

773768
return out_arr
774769

770+
775771
def _coerce_to_dtypes(result, dtypes):
776772
""" given a dtypes and a result set, coerce the result elements to the dtypes """
777773
if len(result) != len(dtypes):
@@ -800,6 +796,7 @@ def conv(r,dtype):
800796

801797
return np.array([ conv(r,dtype) for r, dtype in zip(result,dtypes) ])
802798

799+
803800
def _infer_dtype_from_scalar(val):
804801
""" interpret the dtype from a scalar, upcast floats and ints
805802
return the new value and the dtype """
@@ -986,6 +983,7 @@ def changeit():
986983

987984
return result, False
988985

986+
989987
def _maybe_upcast(values, fill_value=np.nan, dtype=None, copy=False):
990988
""" provide explicty type promotion and coercion
991989
@@ -1166,6 +1164,7 @@ def pad_1d(values, limit=None, mask=None):
11661164
_method(values, mask, limit=limit)
11671165
return values
11681166

1167+
11691168
def backfill_1d(values, limit=None, mask=None):
11701169

11711170
dtype = values.dtype.name
@@ -1190,6 +1189,7 @@ def backfill_1d(values, limit=None, mask=None):
11901189
_method(values, mask, limit=limit)
11911190
return values
11921191

1192+
11931193
def pad_2d(values, limit=None, mask=None):
11941194

11951195
dtype = values.dtype.name
@@ -1218,6 +1218,7 @@ def pad_2d(values, limit=None, mask=None):
12181218
pass
12191219
return values
12201220

1221+
12211222
def backfill_2d(values, limit=None, mask=None):
12221223

12231224
dtype = values.dtype.name
@@ -1246,6 +1247,7 @@ def backfill_2d(values, limit=None, mask=None):
12461247
pass
12471248
return values
12481249

1250+
12491251
def interpolate_2d(values, method='pad', axis=0, limit=None, fill_value=None):
12501252
""" perform an actual interpolation of values, values will be make 2-d if needed
12511253
fills inplace, returns the result """
@@ -1371,6 +1373,7 @@ def _possibly_convert_platform(values):
13711373

13721374
return values
13731375

1376+
13741377
def _possibly_cast_to_datetime(value, dtype, coerce=False):
13751378
""" try to cast the array/value to a datetimelike dtype, converting float nan to iNaT """
13761379

@@ -1787,6 +1790,7 @@ def is_datetime64_dtype(arr_or_dtype):
17871790
tipo = arr_or_dtype.dtype.type
17881791
return issubclass(tipo, np.datetime64)
17891792

1793+
17901794
def is_datetime64_ns_dtype(arr_or_dtype):
17911795
if isinstance(arr_or_dtype, np.dtype):
17921796
tipo = arr_or_dtype
@@ -1796,6 +1800,7 @@ def is_datetime64_ns_dtype(arr_or_dtype):
17961800
tipo = arr_or_dtype.dtype
17971801
return tipo == _NS_DTYPE
17981802

1803+
17991804
def is_timedelta64_dtype(arr_or_dtype):
18001805
if isinstance(arr_or_dtype, np.dtype):
18011806
tipo = arr_or_dtype.type
@@ -1851,6 +1856,7 @@ def _is_sequence(x):
18511856
except (TypeError, AttributeError):
18521857
return False
18531858

1859+
18541860
_ensure_float64 = algos.ensure_float64
18551861
_ensure_float32 = algos.ensure_float32
18561862
_ensure_int64 = algos.ensure_int64
@@ -1987,6 +1993,7 @@ def _get_handle(path, mode, encoding=None, compression=None):
19871993

19881994
return f
19891995

1996+
19901997
if compat.PY3: # pragma: no cover
19911998
def UnicodeReader(f, dialect=csv.excel, encoding="utf-8", **kwds):
19921999
# ignore encoding

pandas/core/frame.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
# pylint: disable=E1101,E1103
1313
# pylint: disable=W0212,W0231,W0703,W0622
1414

15-
import operator
1615
import sys
1716
import collections
1817
import warnings
@@ -25,7 +24,7 @@
2524
from pandas.core.common import (isnull, notnull, PandasError, _try_sort,
2625
_default_index, _maybe_upcast, _is_sequence,
2726
_infer_dtype_from_scalar, _values_from_object,
28-
_coerce_to_dtypes, _DATELIKE_DTYPES, is_list_like)
27+
_DATELIKE_DTYPES, is_list_like)
2928
from pandas.core.generic import NDFrame, _shared_docs
3029
from pandas.core.index import Index, MultiIndex, _ensure_index
3130
from pandas.core.indexing import (_maybe_droplevels,
@@ -48,7 +47,6 @@
4847
from pandas.tseries.index import DatetimeIndex
4948

5049
import pandas.core.algorithms as algos
51-
import pandas.core.datetools as datetools
5250
import pandas.core.common as com
5351
import pandas.core.format as fmt
5452
import pandas.core.nanops as nanops
@@ -4292,6 +4290,7 @@ def combineMult(self, other):
42924290
"""
42934291
return self.mul(other, fill_value=1.)
42944292

4293+
42954294
DataFrame._setup_axes(
42964295
['index', 'columns'], info_axis=1, stat_axis=0, axes_are_reversed=True)
42974296
DataFrame._add_numeric_operations()
@@ -4552,6 +4551,7 @@ def _masked_rec_array_to_mgr(data, index, columns, dtype, copy):
45524551
mgr = mgr.copy()
45534552
return mgr
45544553

4554+
45554555
def _reorder_arrays(arrays, arr_columns, columns):
45564556
# reorder according to the columns
45574557
if columns is not None and len(columns) and arr_columns is not None and len(arr_columns):
@@ -4562,6 +4562,7 @@ def _reorder_arrays(arrays, arr_columns, columns):
45624562
arrays = [arrays[i] for i in indexer]
45634563
return arrays, arr_columns
45644564

4565+
45654566
def _list_to_arrays(data, columns, coerce_float=False, dtype=None):
45664567
if len(data) > 0 and isinstance(data[0], tuple):
45674568
content = list(lib.to_object_array_tuples(data).T)

pandas/core/generic.py

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,7 @@
1818
from pandas import compat, _np_version_under1p7
1919
from pandas.compat import map, zip, lrange, string_types, isidentifier
2020
from pandas.core.common import (isnull, notnull, is_list_like,
21-
_values_from_object,
22-
_infer_dtype_from_scalar, _maybe_promote,
23-
ABCSeries)
21+
_values_from_object, _maybe_promote, ABCSeries)
2422
import pandas.core.nanops as nanops
2523
from pandas.util.decorators import Appender, Substitution
2624

@@ -36,6 +34,7 @@
3634
def is_dictlike(x):
3735
return isinstance(x, (dict, com.ABCSeries))
3836

37+
3938
def _single_replace(self, to_replace, method, inplace, limit):
4039
orig_dtype = self.dtype
4140
result = self if inplace else self.copy()
@@ -1844,7 +1843,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
18441843
self._consolidate_inplace()
18451844

18461845
if value is None:
1847-
if isinstance(to_replace, list):
1846+
if isinstance(to_replace, (tuple, list)):
18481847
return _single_replace(self, to_replace, method, inplace,
18491848
limit)
18501849

@@ -1856,7 +1855,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
18561855
to_replace = regex
18571856
regex = True
18581857

1859-
items = to_replace.items()
1858+
items = list(compat.iteritems(to_replace))
18601859
keys, values = zip(*items)
18611860

18621861
are_mappings = [is_dictlike(v) for v in values]
@@ -1899,7 +1898,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
18991898
regex=regex)
19001899

19011900
# {'A': NA} -> 0
1902-
elif not isinstance(value, (list, np.ndarray)):
1901+
elif not com.is_list_like(value):
19031902
new_data = self._data
19041903
for k, src in compat.iteritems(to_replace):
19051904
if k in self:
@@ -1911,9 +1910,8 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
19111910
raise TypeError('Fill value must be scalar, dict, or '
19121911
'Series')
19131912

1914-
elif isinstance(to_replace, (list, np.ndarray)):
1915-
# [NA, ''] -> [0, 'missing']
1916-
if isinstance(value, (list, np.ndarray)):
1913+
elif com.is_list_like(to_replace): # [NA, ''] -> [0, 'missing']
1914+
if com.is_list_like(value):
19171915
if len(to_replace) != len(value):
19181916
raise ValueError('Replacement lists must match '
19191917
'in length. Expecting %d got %d ' %
@@ -1928,11 +1926,13 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
19281926
inplace=inplace, regex=regex)
19291927
elif to_replace is None:
19301928
if not (com.is_re_compilable(regex) or
1931-
isinstance(regex, (list, np.ndarray)) or is_dictlike(regex)):
1929+
com.is_list_like(regex) or
1930+
is_dictlike(regex)):
19321931
raise TypeError("'regex' must be a string or a compiled "
19331932
"regular expression or a list or dict of "
19341933
"strings or regular expressions, you "
1935-
"passed a {0}".format(type(regex)))
1934+
"passed a"
1935+
" {0!r}".format(type(regex).__name__))
19361936
return self.replace(regex, value, inplace=inplace, limit=limit,
19371937
regex=True)
19381938
else:
@@ -1948,12 +1948,13 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
19481948
inplace=inplace,
19491949
regex=regex)
19501950

1951-
elif not isinstance(value, (list, np.ndarray)): # NA -> 0
1951+
elif not com.is_list_like(value): # NA -> 0
19521952
new_data = self._data.replace(to_replace, value,
19531953
inplace=inplace, regex=regex)
19541954
else:
1955-
raise TypeError('Invalid "to_replace" type: '
1956-
'{0}'.format(type(to_replace))) # pragma: no cover
1955+
msg = ('Invalid "to_replace" type: '
1956+
'{0!r}').format(type(to_replace).__name__)
1957+
raise TypeError(msg) # pragma: no cover
19571958

19581959
new_data = new_data.convert(copy=not inplace, convert_numeric=False)
19591960

pandas/core/internals.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -992,6 +992,7 @@ class NumericBlock(Block):
992992
is_numeric = True
993993
_can_hold_na = True
994994

995+
995996
class FloatBlock(NumericBlock):
996997
is_float = True
997998
_downcast_dtype = 'int64'
@@ -1064,6 +1065,7 @@ def _try_cast(self, element):
10641065
def should_store(self, value):
10651066
return com.is_integer_dtype(value) and value.dtype == self.dtype
10661067

1068+
10671069
class TimeDeltaBlock(IntBlock):
10681070
is_timedelta = True
10691071
_can_hold_na = True
@@ -1130,6 +1132,7 @@ def to_native_types(self, slicer=None, na_rep=None, **kwargs):
11301132
for val in values.ravel()[imask]], dtype=object)
11311133
return rvalues.tolist()
11321134

1135+
11331136
class BoolBlock(NumericBlock):
11341137
is_bool = True
11351138
_can_hold_na = False
@@ -1677,6 +1680,7 @@ def split_block_at(self, item):
16771680
def _try_cast_result(self, result, dtype=None):
16781681
return result
16791682

1683+
16801684
def make_block(values, items, ref_items, klass=None, ndim=None, dtype=None, fastpath=False, placement=None):
16811685

16821686
if klass is None:

0 commit comments

Comments
 (0)