Skip to content

Commit f5001a1

Browse files
committed
ENH: return NA (-1) for out of range values in cut/qcut, close #1463
1 parent 3850dd7 commit f5001a1

File tree

2 files changed

+9
-20
lines changed

2 files changed

+9
-20
lines changed

pandas/tools/tests/test_tile.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -120,10 +120,12 @@ def test_cut_out_of_bounds(self):
120120
np.random.seed(12345)
121121

122122
arr = np.random.randn(100)
123-
self.assertRaises(ValueError, cut, arr, [-1, 0, 1])
124123

125-
arr = np.where(arr < -1, 0, arr)
126-
self.assertRaises(ValueError, cut, arr, [-1, 0, 1])
124+
result = cut(arr, [-1, 0, 1])
125+
126+
mask = result.labels == -1
127+
ex_mask = (arr < -1) | (arr > 1)
128+
self.assert_(np.array_equal(mask, ex_mask))
127129

128130
def test_cut_pass_labels(self):
129131
arr = [50, 5, 10, 15, 20, 30, 70]

pandas/tools/tile.py

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -148,18 +148,8 @@ def _bins_to_cuts(x, bins, right=True, labels=None, retbins=False,
148148
side = 'left' if right else 'right'
149149
ids = bins.searchsorted(x, side=side)
150150

151-
na_mask = com.notnull(x)
152-
above = na_mask & (ids == len(bins))
153-
below = na_mask & (ids == 0)
154-
155-
if above.any():
156-
raise ValueError('Values fall past last bin: %s' % str(x[above]))
157-
158-
if below.any():
159-
raise ValueError('Values fall before first bin: %s' % str(x[below]))
160-
161-
mask = com.isnull(x)
162-
has_nas = mask.any()
151+
na_mask = com.isnull(x) | (ids == len(bins)) | (ids == 0)
152+
has_nas = na_mask.any()
163153

164154
if labels is not False:
165155
if labels is None:
@@ -177,16 +167,13 @@ def _bins_to_cuts(x, bins, right=True, labels=None, retbins=False,
177167
levels = labels
178168

179169
levels = np.asarray(levels, dtype=object)
180-
181-
if has_nas:
182-
np.putmask(ids, mask, 0)
183-
170+
np.putmask(ids, na_mask, 0)
184171
fac = Categorical(ids - 1, levels, name=name)
185172
else:
186173
fac = ids - 1
187174
if has_nas:
188175
fac = ids.astype(np.float64)
189-
np.putmask(fac, mask, np.nan)
176+
np.putmask(fac, na_mask, np.nan)
190177

191178
if not retbins:
192179
return fac

0 commit comments

Comments
 (0)