From 4be1ffae17cda4c08adc08b3afe2d93fc7acf25e Mon Sep 17 00:00:00 2001 From: "B. L. Alterman" Date: Thu, 8 Nov 2018 09:36:50 -0500 Subject: [PATCH 1/9] ENH: numpy histogram bin edges in cut (GH 14627) Passig a string to `pd.cut` bins kwarg dispatches bin calculation to `np.histogram_bin_edges`. --- pandas/core/reshape/tile.py | 31 ++++++++++++++++++++++++++++--- pandas/tests/reshape/test_tile.py | 19 +++++++++++++++++++ 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 4a863372eea13..334e652a9e813 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -35,12 +35,14 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, ---------- x : array-like The input array to be binned. Must be 1-dimensional. - bins : int, sequence of scalars, or pandas.IntervalIndex + bins : int, str, sequence of scalars, or pandas.IntervalIndex The criteria to bin by. * int : Defines the number of equal-width bins in the range of `x`. The range of `x` is extended by .1% on each side to include the minimum and maximum values of `x`. + * str : Bin calculaton dispatched to `np.histogram_bin_edges`. See that + documentation for details. * sequence of scalars : Defines the bin edges allowing for non-uniform width. No extension of the range of `x` is done. * IntervalIndex : Defines the exact bins to be used. @@ -83,7 +85,7 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, * False : returns an ndarray of integers. - bins : numpy.ndarray or IntervalIndex. + bins : numpy.ndarray or IntervalIndex The computed or specified bins. Only returned when `retbins=True`. For scalar or sequence `bins`, this is an ndarray with the computed bins. If set `duplicates=drop`, `bins` will drop non-unique bin. For @@ -181,6 +183,16 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, >>> pd.cut([0, 0.5, 1.5, 2.5, 4.5], bins) [NaN, (0, 1], NaN, (2, 3], (4, 5]] Categories (3, interval[int64]): [(0, 1] < (2, 3] < (4, 5]] + + Passng a string for `bins` dispatches the bin calculation to numpy's + `histogram_bin_edges`. + >>> pd.cut(array([0.1, 0.1, 0.2, 0.5, 0.5, 0.9, 1.0]), + ... bins="auto") + ... # doctest: +ELLIPSIS` + [(0.0991, 0.325], (0.0991, 0.325], (0.0991, 0.325], (0.325, 0.55], + (0.325, 0.55], (0.775, 1.0], (0.775, 1.0]] + Categories (4, interval[float64]): [(0.0991, 0.325] < (0.325, 0.55] < + (0.55, 0.775] < (0.775, 1.0]] """ # NOTE: this binning code is changed a bit from histogram for var(x) == 0 @@ -188,7 +200,20 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, x_is_series, series_index, name, x = _preprocess_for_cut(x) x, dtype = _coerce_to_type(x) - if not np.iterable(bins): + if isinstance(bins, str): + bins = np.histogram_bin_edges(x, bins) + mn, mx = bins[0], bins[-1] + adj = (mx - mn) + if adj: + adj *= 0.001 # 0.1% of the range + else: + adj = 0.001 + if right: + bins[0] -= adj + else: + bins[-1] += adj + + elif not np.iterable(bins): if is_scalar(bins) and bins < 1: raise ValueError("`bins` should be a positive integer.") diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py index 44de3e93d42bf..c690876257d73 100644 --- a/pandas/tests/reshape/test_tile.py +++ b/pandas/tests/reshape/test_tile.py @@ -16,6 +16,7 @@ from pandas.core.algorithms import quantile import pandas.core.reshape.tile as tmod +import pdb class TestCut(object): @@ -37,6 +38,24 @@ def test_bins(self): tm.assert_almost_equal(bins, np.array([0.1905, 3.36666667, 6.53333333, 9.7])) + def test_str_bins(self): + data = np.array([0.1, 0.1, 0.2, 0.5, 0.5, 0.9, 1.0]) + result, bins_cut = cut(data, bins="auto", + retbins=True) + + bins_np = np.histogram_bin_edges(data, "auto") + adj = (bins_np[-1] - bins_np[0]) * 0.001 + bins_np[0] -= adj + tm.assert_almost_equal(bins_cut, bins_np) + tm.assert_almost_equal(np.round(bins_cut, 4), + np.array([0.0991, 0.325, 0.55, 0.775, 1.0])) + + intervals = IntervalIndex.from_breaks(np.round(bins_np, 4), + closed="right") + expected = Categorical(intervals, ordered=True) + tm.assert_index_equal(result.categories, + expected.categories) + def test_right(self): data = np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1, 2.575]) result, bins = cut(data, 4, right=True, retbins=True) From f483d6aa601bf3e332b81d9e3b22ab5625f7d9da Mon Sep 17 00:00:00 2001 From: "B. L. Alterman" Date: Thu, 8 Nov 2018 09:49:17 -0500 Subject: [PATCH 2/9] ENH: GH 14627 PEP8 fixes --- pandas/core/reshape/tile.py | 6 +++--- pandas/tests/reshape/test_tile.py | 16 ++++++++-------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 334e652a9e813..7708955e21c10 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -189,9 +189,9 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, >>> pd.cut(array([0.1, 0.1, 0.2, 0.5, 0.5, 0.9, 1.0]), ... bins="auto") ... # doctest: +ELLIPSIS` - [(0.0991, 0.325], (0.0991, 0.325], (0.0991, 0.325], (0.325, 0.55], + [(0.0991, 0.325], (0.0991, 0.325], (0.0991, 0.325], (0.325, 0.55], (0.325, 0.55], (0.775, 1.0], (0.775, 1.0]] - Categories (4, interval[float64]): [(0.0991, 0.325] < (0.325, 0.55] < + Categories (4, interval[float64]): [(0.0991, 0.325] < (0.325, 0.55] < (0.55, 0.775] < (0.775, 1.0]] """ # NOTE: this binning code is changed a bit from histogram for var(x) == 0 @@ -207,7 +207,7 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, if adj: adj *= 0.001 # 0.1% of the range else: - adj = 0.001 + adj = 0.001 if right: bins[0] -= adj else: diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py index c690876257d73..10cadd643340e 100644 --- a/pandas/tests/reshape/test_tile.py +++ b/pandas/tests/reshape/test_tile.py @@ -40,20 +40,20 @@ def test_bins(self): def test_str_bins(self): data = np.array([0.1, 0.1, 0.2, 0.5, 0.5, 0.9, 1.0]) - result, bins_cut = cut(data, bins="auto", - retbins=True) - + result, bins_cut = cut(data, bins="auto", + retbins=True) + bins_np = np.histogram_bin_edges(data, "auto") adj = (bins_np[-1] - bins_np[0]) * 0.001 bins_np[0] -= adj tm.assert_almost_equal(bins_cut, bins_np) - tm.assert_almost_equal(np.round(bins_cut, 4), + tm.assert_almost_equal(np.round(bins_cut, 4), np.array([0.0991, 0.325, 0.55, 0.775, 1.0])) - - intervals = IntervalIndex.from_breaks(np.round(bins_np, 4), + + intervals = IntervalIndex.from_breaks(np.round(bins_np, 4), closed="right") - expected = Categorical(intervals, ordered=True) - tm.assert_index_equal(result.categories, + expected = Categorical(intervals, ordered=True) + tm.assert_index_equal(result.categories, expected.categories) def test_right(self): From f3323f4f2122ee1b3ea2ae05d85b8672e6e20a06 Mon Sep 17 00:00:00 2001 From: "B. L. Alterman" Date: Sun, 11 Nov 2018 21:35:35 -0500 Subject: [PATCH 3/9] P Addressed review comments on f483d6a --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/reshape/tile.py | 17 +++++++++++------ pandas/tests/reshape/test_tile.py | 10 ++++++++-- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 6ace245a4bae1..fd11d25836ed9 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -236,6 +236,7 @@ Other Enhancements - Compatibility with Matplotlib 3.0 (:issue:`22790`). - Added :meth:`Interval.overlaps`, :meth:`IntervalArray.overlaps`, and :meth:`IntervalIndex.overlaps` for determining overlaps between interval-like objects (:issue:`21998`) - :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have gained the ``nonexistent`` argument for alternative handling of nonexistent times. See :ref:`timeseries.timezone_nonexsistent` (:issue:`8917`) +- :func: `~cut` `bins` kwarg now accepts a string, which is dispatched to `numpy.histogram_bin_edges`. (:issue:`14627`) .. _whatsnew_0240.api_breaking: diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 7708955e21c10..782feeca62582 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -13,6 +13,8 @@ is_scalar, is_timedelta64_dtype) from pandas.core.dtypes.missing import isna +from pandas.compat import string_types + from pandas import ( Categorical, Index, Interval, IntervalIndex, Series, Timedelta, Timestamp, to_datetime, to_timedelta) @@ -42,7 +44,7 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, range of `x` is extended by .1% on each side to include the minimum and maximum values of `x`. * str : Bin calculaton dispatched to `np.histogram_bin_edges`. See that - documentation for details. + documentation for details. (versionadded:: 0.24.0) * sequence of scalars : Defines the bin edges allowing for non-uniform width. No extension of the range of `x` is done. * IntervalIndex : Defines the exact bins to be used. @@ -87,9 +89,9 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, bins : numpy.ndarray or IntervalIndex The computed or specified bins. Only returned when `retbins=True`. - For scalar or sequence `bins`, this is an ndarray with the computed - bins. If set `duplicates=drop`, `bins` will drop non-unique bin. For - an IntervalIndex `bins`, this is equal to `bins`. + For scalar, str, or sequence `bins`, this is an ndarray with the + computed bins. If set `duplicates=drop`, `bins` will drop non-unique + bin. For an IntervalIndex `bins`, this is equal to `bins`. See Also -------- @@ -100,6 +102,8 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, Series : One-dimensional array with axis labels (including time series). pandas.IntervalIndex : Immutable Index implementing an ordered, sliceable set. + numpy.histogram_bin_edges : Bin calculation dispatched to this method when + `bins` is a string. Notes ----- @@ -185,7 +189,7 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, Categories (3, interval[int64]): [(0, 1] < (2, 3] < (4, 5]] Passng a string for `bins` dispatches the bin calculation to numpy's - `histogram_bin_edges`. + `histogram_bin_edges`. (Starting in version 0.24.) >>> pd.cut(array([0.1, 0.1, 0.2, 0.5, 0.5, 0.9, 1.0]), ... bins="auto") ... # doctest: +ELLIPSIS` @@ -200,7 +204,8 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, x_is_series, series_index, name, x = _preprocess_for_cut(x) x, dtype = _coerce_to_type(x) - if isinstance(bins, str): + if isinstance(bins, string_types): + # GH 14627 bins = np.histogram_bin_edges(x, bins) mn, mx = bins[0], bins[-1] adj = (mx - mn) diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py index 10cadd643340e..47918b29f16d8 100644 --- a/pandas/tests/reshape/test_tile.py +++ b/pandas/tests/reshape/test_tile.py @@ -16,8 +16,6 @@ from pandas.core.algorithms import quantile import pandas.core.reshape.tile as tmod -import pdb - class TestCut(object): def test_simple(self): @@ -39,6 +37,7 @@ def test_bins(self): 6.53333333, 9.7])) def test_str_bins(self): + # GH 14627 data = np.array([0.1, 0.1, 0.2, 0.5, 0.5, 0.9, 1.0]) result, bins_cut = cut(data, bins="auto", retbins=True) @@ -55,6 +54,13 @@ def test_str_bins(self): expected = Categorical(intervals, ordered=True) tm.assert_index_equal(result.categories, expected.categories) + + + # Test that a `bin` string not present in `np.histogram_bin_edges` + # throws a ValueError. + tm.assert_raises_regex(ValueError, + "'*' is not a valid estimator for `bins`", + cut, data, "bad bins") def test_right(self): data = np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1, 2.575]) From d7b2e3e69186604c35b2d9d82550473b092bdff0 Mon Sep 17 00:00:00 2001 From: "B. L. Alterman" Date: Mon, 12 Nov 2018 08:20:40 -0500 Subject: [PATCH 4/9] P converted tm to pytest --- pandas/tests/reshape/test_tile.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py index 47918b29f16d8..3e870208b02b0 100644 --- a/pandas/tests/reshape/test_tile.py +++ b/pandas/tests/reshape/test_tile.py @@ -58,9 +58,10 @@ def test_str_bins(self): # Test that a `bin` string not present in `np.histogram_bin_edges` # throws a ValueError. - tm.assert_raises_regex(ValueError, - "'*' is not a valid estimator for `bins`", - cut, data, "bad bins") + with pytest.raises(ValueError, + match="'*' is not a valid estimator for `bins`", + message="Verify acceptable bins in `np.histogram_bin_edges`."): + cut(data, bins="bad bins") def test_right(self): data = np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1, 2.575]) From 79b71455e685cc923114906c06e3d766a442d076 Mon Sep 17 00:00:00 2001 From: "B. L. Alterman" Date: Thu, 8 Nov 2018 09:36:50 -0500 Subject: [PATCH 5/9] ENH: Accept string for `bins` in `pd.cut` Passing a string to the `pd.cut` bins kwarg dispatches bin calculation to `np.histogram_bin_edges`. Closes gh-14627. --- doc/source/whatsnew/v0.24.0.rst | 1 + pandas/core/reshape/tile.py | 42 ++++++++++++++++++++++++++----- pandas/tests/reshape/test_tile.py | 28 ++++++++++++++++++++- 3 files changed, 64 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 44c467795d1ed..64cf5a33d8f5d 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -233,6 +233,7 @@ Other Enhancements - :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`) - :meth:`round`, :meth:`ceil`, and meth:`floor` for :class:`DatetimeIndex` and :class:`Timestamp` now support an ``ambiguous`` argument for handling datetimes that are rounded to ambiguous times (:issue:`18946`) - :meth:`round`, :meth:`ceil`, and meth:`floor` for :class:`DatetimeIndex` and :class:`Timestamp` now support a ``nonexistent`` argument for handling datetimes that are rounded to nonexistent times. See :ref:`timeseries.timezone_nonexistent` (:issue:`22647`) +- :func: `~cut` `bins` kwarg now accepts a string, which is dispatched to `numpy.histogram_bin_edges`. (:issue:`14627`) - :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`). - :meth:`Series.resample` and :meth:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`). - :meth:`pandas.core.dtypes.is_list_like` has gained a keyword ``allow_sets`` which is ``True`` by default; if ``False``, diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 4a863372eea13..782feeca62582 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -13,6 +13,8 @@ is_scalar, is_timedelta64_dtype) from pandas.core.dtypes.missing import isna +from pandas.compat import string_types + from pandas import ( Categorical, Index, Interval, IntervalIndex, Series, Timedelta, Timestamp, to_datetime, to_timedelta) @@ -35,12 +37,14 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, ---------- x : array-like The input array to be binned. Must be 1-dimensional. - bins : int, sequence of scalars, or pandas.IntervalIndex + bins : int, str, sequence of scalars, or pandas.IntervalIndex The criteria to bin by. * int : Defines the number of equal-width bins in the range of `x`. The range of `x` is extended by .1% on each side to include the minimum and maximum values of `x`. + * str : Bin calculaton dispatched to `np.histogram_bin_edges`. See that + documentation for details. (versionadded:: 0.24.0) * sequence of scalars : Defines the bin edges allowing for non-uniform width. No extension of the range of `x` is done. * IntervalIndex : Defines the exact bins to be used. @@ -83,11 +87,11 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, * False : returns an ndarray of integers. - bins : numpy.ndarray or IntervalIndex. + bins : numpy.ndarray or IntervalIndex The computed or specified bins. Only returned when `retbins=True`. - For scalar or sequence `bins`, this is an ndarray with the computed - bins. If set `duplicates=drop`, `bins` will drop non-unique bin. For - an IntervalIndex `bins`, this is equal to `bins`. + For scalar, str, or sequence `bins`, this is an ndarray with the + computed bins. If set `duplicates=drop`, `bins` will drop non-unique + bin. For an IntervalIndex `bins`, this is equal to `bins`. See Also -------- @@ -98,6 +102,8 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, Series : One-dimensional array with axis labels (including time series). pandas.IntervalIndex : Immutable Index implementing an ordered, sliceable set. + numpy.histogram_bin_edges : Bin calculation dispatched to this method when + `bins` is a string. Notes ----- @@ -181,6 +187,16 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, >>> pd.cut([0, 0.5, 1.5, 2.5, 4.5], bins) [NaN, (0, 1], NaN, (2, 3], (4, 5]] Categories (3, interval[int64]): [(0, 1] < (2, 3] < (4, 5]] + + Passng a string for `bins` dispatches the bin calculation to numpy's + `histogram_bin_edges`. (Starting in version 0.24.) + >>> pd.cut(array([0.1, 0.1, 0.2, 0.5, 0.5, 0.9, 1.0]), + ... bins="auto") + ... # doctest: +ELLIPSIS` + [(0.0991, 0.325], (0.0991, 0.325], (0.0991, 0.325], (0.325, 0.55], + (0.325, 0.55], (0.775, 1.0], (0.775, 1.0]] + Categories (4, interval[float64]): [(0.0991, 0.325] < (0.325, 0.55] < + (0.55, 0.775] < (0.775, 1.0]] """ # NOTE: this binning code is changed a bit from histogram for var(x) == 0 @@ -188,7 +204,21 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, x_is_series, series_index, name, x = _preprocess_for_cut(x) x, dtype = _coerce_to_type(x) - if not np.iterable(bins): + if isinstance(bins, string_types): + # GH 14627 + bins = np.histogram_bin_edges(x, bins) + mn, mx = bins[0], bins[-1] + adj = (mx - mn) + if adj: + adj *= 0.001 # 0.1% of the range + else: + adj = 0.001 + if right: + bins[0] -= adj + else: + bins[-1] += adj + + elif not np.iterable(bins): if is_scalar(bins) and bins < 1: raise ValueError("`bins` should be a positive integer.") diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py index f04e9a55a6c8d..a95ecf9938d41 100644 --- a/pandas/tests/reshape/test_tile.py +++ b/pandas/tests/reshape/test_tile.py @@ -16,7 +16,6 @@ from pandas.core.algorithms import quantile import pandas.core.reshape.tile as tmod - class TestCut(object): def test_simple(self): @@ -37,6 +36,33 @@ def test_bins(self): tm.assert_almost_equal(bins, np.array([0.1905, 3.36666667, 6.53333333, 9.7])) + def test_str_bins(self): + # GH 14627 + data = np.array([0.1, 0.1, 0.2, 0.5, 0.5, 0.9, 1.0]) + result, bins_cut = cut(data, bins="auto", + retbins=True) + + bins_np = np.histogram_bin_edges(data, "auto") + adj = (bins_np[-1] - bins_np[0]) * 0.001 + bins_np[0] -= adj + tm.assert_almost_equal(bins_cut, bins_np) + tm.assert_almost_equal(np.round(bins_cut, 4), + np.array([0.0991, 0.325, 0.55, 0.775, 1.0])) + + intervals = IntervalIndex.from_breaks(np.round(bins_np, 4), + closed="right") + expected = Categorical(intervals, ordered=True) + tm.assert_index_equal(result.categories, + expected.categories) + + + # Test that a `bin` string not present in `np.histogram_bin_edges` + # throws a ValueError. + with pytest.raises(ValueError, + match="'*' is not a valid estimator for `bins`", + message="Verify acceptable bins in `np.histogram_bin_edges`."): + cut(data, bins="bad bins") + def test_right(self): data = np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1, 2.575]) result, bins = cut(data, 4, right=True, retbins=True) From 1d9e47518a99e7846b20d2849554f39ec2d490de Mon Sep 17 00:00:00 2001 From: "B. L. Alterman" Date: Mon, 19 Nov 2018 11:12:55 -0500 Subject: [PATCH 6/9] P Added try-except statement for numpy compat 1) Should bring compatibility down from 1.15 to 1.11. --- pandas/core/reshape/tile.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 782feeca62582..c9d5d8b03ee5e 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -206,7 +206,14 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, if isinstance(bins, string_types): # GH 14627 - bins = np.histogram_bin_edges(x, bins) + # NOTE: when the minimum numpy requirement is + # increased to 1.15, the try-except statement + # can be removed. + try: + bins = np.histogram_bin_edges(x, bins) + except AttributeError: + cnt, bins = np.histogram(x, bins) + mn, mx = bins[0], bins[-1] adj = (mx - mn) if adj: From 43d61f90e7d58b06a684ea8d452f3daff57bdabf Mon Sep 17 00:00:00 2001 From: "B. L. Alterman" Date: Tue, 27 Nov 2018 13:51:45 -0500 Subject: [PATCH 7/9] P Fixing docstring for Travis --- pandas/core/reshape/tile.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index c9d5d8b03ee5e..e6735a025d50a 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -44,7 +44,10 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, range of `x` is extended by .1% on each side to include the minimum and maximum values of `x`. * str : Bin calculaton dispatched to `np.histogram_bin_edges`. See that - documentation for details. (versionadded:: 0.24.0) + documentation for details. + + .. versionadded:: 0.24.0 + * sequence of scalars : Defines the bin edges allowing for non-uniform width. No extension of the range of `x` is done. * IntervalIndex : Defines the exact bins to be used. @@ -190,9 +193,9 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, Passng a string for `bins` dispatches the bin calculation to numpy's `histogram_bin_edges`. (Starting in version 0.24.) + >>> pd.cut(array([0.1, 0.1, 0.2, 0.5, 0.5, 0.9, 1.0]), - ... bins="auto") - ... # doctest: +ELLIPSIS` + ... bins="auto") # doctest: +ELLIPSIS` [(0.0991, 0.325], (0.0991, 0.325], (0.0991, 0.325], (0.325, 0.55], (0.325, 0.55], (0.775, 1.0], (0.775, 1.0]] Categories (4, interval[float64]): [(0.0991, 0.325] < (0.325, 0.55] < From 3bc1f87f92e011de6dc6730db5a55dc8595ed8bd Mon Sep 17 00:00:00 2001 From: balterman Date: Sun, 9 Dec 2018 23:11:32 -0500 Subject: [PATCH 8/9] P Attempting to fix docstring errors --- pandas/core/reshape/tile.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index e6735a025d50a..57a7d12015b57 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -43,14 +43,13 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, * int : Defines the number of equal-width bins in the range of `x`. The range of `x` is extended by .1% on each side to include the minimum and maximum values of `x`. + * sequence of scalars : Defines the bin edges allowing for non-uniform + width. No extension of the range of `x` is done. + * IntervalIndex : Defines the exact bins to be used. * str : Bin calculaton dispatched to `np.histogram_bin_edges`. See that documentation for details. .. versionadded:: 0.24.0 - - * sequence of scalars : Defines the bin edges allowing for non-uniform - width. No extension of the range of `x` is done. - * IntervalIndex : Defines the exact bins to be used. right : bool, default True Indicates whether `bins` includes the rightmost edge or not. If From e6b2df3ef90aa8796a8efdc72c60d3ca38c87348 Mon Sep 17 00:00:00 2001 From: balterman Date: Sat, 5 Jan 2019 12:32:38 -0500 Subject: [PATCH 9/9] P Updating for TomAuspurger comments --- doc/source/whatsnew/v0.24.0.rst | 2 +- pandas/core/reshape/tile.py | 2 +- pandas/tests/reshape/test_tile.py | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 64cf5a33d8f5d..86ee1cd226b13 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -233,7 +233,7 @@ Other Enhancements - :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`) - :meth:`round`, :meth:`ceil`, and meth:`floor` for :class:`DatetimeIndex` and :class:`Timestamp` now support an ``ambiguous`` argument for handling datetimes that are rounded to ambiguous times (:issue:`18946`) - :meth:`round`, :meth:`ceil`, and meth:`floor` for :class:`DatetimeIndex` and :class:`Timestamp` now support a ``nonexistent`` argument for handling datetimes that are rounded to nonexistent times. See :ref:`timeseries.timezone_nonexistent` (:issue:`22647`) -- :func: `~cut` `bins` kwarg now accepts a string, which is dispatched to `numpy.histogram_bin_edges`. (:issue:`14627`) +- :func:`~cut` `bins` kwarg now accepts a string, which is dispatched to `numpy.histogram_bin_edges`. (:issue:`14627`) - :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`). - :meth:`Series.resample` and :meth:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`). - :meth:`pandas.core.dtypes.is_list_like` has gained a keyword ``allow_sets`` which is ``True`` by default; if ``False``, diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 57a7d12015b57..bcea4360af4ea 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -46,7 +46,7 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, * sequence of scalars : Defines the bin edges allowing for non-uniform width. No extension of the range of `x` is done. * IntervalIndex : Defines the exact bins to be used. - * str : Bin calculaton dispatched to `np.histogram_bin_edges`. See that + * str : Bin calculaton dispatched to :func:`np.histogram_bin_edges`. See that documentation for details. .. versionadded:: 0.24.0 diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py index a95ecf9938d41..1c520b6b20c2d 100644 --- a/pandas/tests/reshape/test_tile.py +++ b/pandas/tests/reshape/test_tile.py @@ -59,7 +59,6 @@ def test_str_bins(self): # Test that a `bin` string not present in `np.histogram_bin_edges` # throws a ValueError. with pytest.raises(ValueError, - match="'*' is not a valid estimator for `bins`", message="Verify acceptable bins in `np.histogram_bin_edges`."): cut(data, bins="bad bins")