Skip to content

Commit f3323f4

Browse files
committed
P Addressed review comments on f483d6a
1 parent f483d6a commit f3323f4

File tree

3 files changed

+20
-8
lines changed

3 files changed

+20
-8
lines changed

doc/source/whatsnew/v0.24.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,7 @@ Other Enhancements
236236
- Compatibility with Matplotlib 3.0 (:issue:`22790`).
237237
- Added :meth:`Interval.overlaps`, :meth:`IntervalArray.overlaps`, and :meth:`IntervalIndex.overlaps` for determining overlaps between interval-like objects (:issue:`21998`)
238238
- :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have gained the ``nonexistent`` argument for alternative handling of nonexistent times. See :ref:`timeseries.timezone_nonexsistent` (:issue:`8917`)
239+
- :func: `~cut` `bins` kwarg now accepts a string, which is dispatched to `numpy.histogram_bin_edges`. (:issue:`14627`)
239240

240241
.. _whatsnew_0240.api_breaking:
241242

pandas/core/reshape/tile.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
is_scalar, is_timedelta64_dtype)
1414
from pandas.core.dtypes.missing import isna
1515

16+
from pandas.compat import string_types
17+
1618
from pandas import (
1719
Categorical, Index, Interval, IntervalIndex, Series, Timedelta, Timestamp,
1820
to_datetime, to_timedelta)
@@ -42,7 +44,7 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
4244
range of `x` is extended by .1% on each side to include the minimum
4345
and maximum values of `x`.
4446
* str : Bin calculaton dispatched to `np.histogram_bin_edges`. See that
45-
documentation for details.
47+
documentation for details. (versionadded:: 0.24.0)
4648
* sequence of scalars : Defines the bin edges allowing for non-uniform
4749
width. No extension of the range of `x` is done.
4850
* IntervalIndex : Defines the exact bins to be used.
@@ -87,9 +89,9 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
8789
8890
bins : numpy.ndarray or IntervalIndex
8991
The computed or specified bins. Only returned when `retbins=True`.
90-
For scalar or sequence `bins`, this is an ndarray with the computed
91-
bins. If set `duplicates=drop`, `bins` will drop non-unique bin. For
92-
an IntervalIndex `bins`, this is equal to `bins`.
92+
For scalar, str, or sequence `bins`, this is an ndarray with the
93+
computed bins. If set `duplicates=drop`, `bins` will drop non-unique
94+
bin. For an IntervalIndex `bins`, this is equal to `bins`.
9395
9496
See Also
9597
--------
@@ -100,6 +102,8 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
100102
Series : One-dimensional array with axis labels (including time series).
101103
pandas.IntervalIndex : Immutable Index implementing an ordered,
102104
sliceable set.
105+
numpy.histogram_bin_edges : Bin calculation dispatched to this method when
106+
`bins` is a string.
103107
104108
Notes
105109
-----
@@ -185,7 +189,7 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
185189
Categories (3, interval[int64]): [(0, 1] < (2, 3] < (4, 5]]
186190
187191
Passng a string for `bins` dispatches the bin calculation to numpy's
188-
`histogram_bin_edges`.
192+
`histogram_bin_edges`. (Starting in version 0.24.)
189193
>>> pd.cut(array([0.1, 0.1, 0.2, 0.5, 0.5, 0.9, 1.0]),
190194
... bins="auto")
191195
... # doctest: +ELLIPSIS`
@@ -200,7 +204,8 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
200204
x_is_series, series_index, name, x = _preprocess_for_cut(x)
201205
x, dtype = _coerce_to_type(x)
202206

203-
if isinstance(bins, str):
207+
if isinstance(bins, string_types):
208+
# GH 14627
204209
bins = np.histogram_bin_edges(x, bins)
205210
mn, mx = bins[0], bins[-1]
206211
adj = (mx - mn)

pandas/tests/reshape/test_tile.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,6 @@
1616
from pandas.core.algorithms import quantile
1717
import pandas.core.reshape.tile as tmod
1818

19-
import pdb
20-
2119
class TestCut(object):
2220

2321
def test_simple(self):
@@ -39,6 +37,7 @@ def test_bins(self):
3937
6.53333333, 9.7]))
4038

4139
def test_str_bins(self):
40+
# GH 14627
4241
data = np.array([0.1, 0.1, 0.2, 0.5, 0.5, 0.9, 1.0])
4342
result, bins_cut = cut(data, bins="auto",
4443
retbins=True)
@@ -55,6 +54,13 @@ def test_str_bins(self):
5554
expected = Categorical(intervals, ordered=True)
5655
tm.assert_index_equal(result.categories,
5756
expected.categories)
57+
58+
59+
# Test that a `bin` string not present in `np.histogram_bin_edges`
60+
# throws a ValueError.
61+
tm.assert_raises_regex(ValueError,
62+
"'*' is not a valid estimator for `bins`",
63+
cut, data, "bad bins")
5864

5965
def test_right(self):
6066
data = np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1, 2.575])

0 commit comments

Comments
 (0)