|
12 | 12 |
|
13 | 13 | import numpy as np
|
14 | 14 |
|
| 15 | +def _generate_bins(x=None, bins=None, min_val=None, max_val=None, right=True): |
| 16 | + """ |
| 17 | + Generate bins for cut, must either pass x (an array-like) or a min and max |
| 18 | + value. If min or max are passed, ignores x. |
| 19 | +
|
| 20 | + Adds .1% space around bins if integer. |
| 21 | + """ |
| 22 | + if bins is None: |
| 23 | + raise ValueError("bins cannot be None.") |
| 24 | + # ignore x if min and max are passed |
| 25 | + if min_val is not None or max_val is not None: |
| 26 | + assert min_val is not None and max_val is not None, ( |
| 27 | + "Must pass *both* min_val and max_val") |
| 28 | + else: |
| 29 | + assert x is not None, "Must pass either min/max vals or array-like" |
| 30 | + |
| 31 | + # NOTE: this binning code is changed a bit from histogram for var(x) == 0 |
| 32 | + if not np.iterable(bins): |
| 33 | + if np.isscalar(bins) and bins < 1: |
| 34 | + raise ValueError("`bins` should be a positive integer.") |
| 35 | + if min_val is not None: |
| 36 | + mn, mx = min_val, max_val |
| 37 | + else: |
| 38 | + try: # for array-like |
| 39 | + sz = x.size |
| 40 | + except AttributeError: |
| 41 | + x = np.asarray(x) |
| 42 | + sz = x.size |
| 43 | + if sz == 0: |
| 44 | + raise ValueError('Cannot cut empty array') |
| 45 | + # handle empty arrays. Can't determine range, so use 0-1. |
| 46 | + # rng = (0, 1) |
| 47 | + else: |
| 48 | + rng = (nanops.nanmin(x), nanops.nanmax(x)) |
| 49 | + mn, mx = [mi + 0.0 for mi in rng] |
| 50 | + |
| 51 | + if mn == mx: # adjust end points before binning |
| 52 | + mn -= .001 * mn |
| 53 | + mx += .001 * mx |
| 54 | + bins = np.linspace(mn, mx, bins + 1, endpoint=True) |
| 55 | + else: # adjust end points after binning |
| 56 | + bins = np.linspace(mn, mx, bins + 1, endpoint=True) |
| 57 | + adj = (mx - mn) * 0.001 # 0.1% of the range |
| 58 | + if right: |
| 59 | + bins[0] -= adj |
| 60 | + else: |
| 61 | + bins[-1] += adj |
| 62 | + |
| 63 | + else: |
| 64 | + bins = np.asarray(bins) |
| 65 | + if (np.diff(bins) < 0).any(): |
| 66 | + raise ValueError('bins must increase monotonically.') |
| 67 | + return bins |
| 68 | + |
15 | 69 |
|
16 | 70 | def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
|
17 | 71 | include_lowest=False):
|
@@ -75,39 +129,10 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
|
75 | 129 | >>> pd.cut(np.ones(5), 4, labels=False)
|
76 | 130 | array([1, 1, 1, 1, 1], dtype=int64)
|
77 | 131 | """
|
78 |
| - # NOTE: this binning code is changed a bit from histogram for var(x) == 0 |
79 |
| - if not np.iterable(bins): |
80 |
| - if np.isscalar(bins) and bins < 1: |
81 |
| - raise ValueError("`bins` should be a positive integer.") |
82 |
| - try: # for array-like |
83 |
| - sz = x.size |
84 |
| - except AttributeError: |
85 |
| - x = np.asarray(x) |
86 |
| - sz = x.size |
87 |
| - if sz == 0: |
88 |
| - raise ValueError('Cannot cut empty array') |
89 |
| - # handle empty arrays. Can't determine range, so use 0-1. |
90 |
| - # rng = (0, 1) |
91 |
| - else: |
92 |
| - rng = (nanops.nanmin(x), nanops.nanmax(x)) |
93 |
| - mn, mx = [mi + 0.0 for mi in rng] |
| 132 | + if x is None: |
| 133 | + raise TypeError("Must pass array-like as first argument, not None") |
94 | 134 |
|
95 |
| - if mn == mx: # adjust end points before binning |
96 |
| - mn -= .001 * mn |
97 |
| - mx += .001 * mx |
98 |
| - bins = np.linspace(mn, mx, bins + 1, endpoint=True) |
99 |
| - else: # adjust end points after binning |
100 |
| - bins = np.linspace(mn, mx, bins + 1, endpoint=True) |
101 |
| - adj = (mx - mn) * 0.001 # 0.1% of the range |
102 |
| - if right: |
103 |
| - bins[0] -= adj |
104 |
| - else: |
105 |
| - bins[-1] += adj |
106 |
| - |
107 |
| - else: |
108 |
| - bins = np.asarray(bins) |
109 |
| - if (np.diff(bins) < 0).any(): |
110 |
| - raise ValueError('bins must increase monotonically.') |
| 135 | + bins = _generate_bins(x, bins, right=right) |
111 | 136 |
|
112 | 137 | return _bins_to_cuts(x, bins, right=right, labels=labels,retbins=retbins, precision=precision,
|
113 | 138 | include_lowest=include_lowest)
|
|
0 commit comments