11
11
import numpy as np
12
12
13
13
14
- def cut (x , bins , right = True , labels = None , retbins = False , precision = 3 ):
14
+ def cut (x , bins , right = True , labels = None , retbins = False , precision = 3 ,
15
+ include_lowest = False ):
15
16
"""
16
17
Return indices of half-open bins to which each value of `x` belongs.
17
18
@@ -38,9 +39,7 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3):
38
39
39
40
Returns
40
41
-------
41
- out : ndarray of labels
42
- Same shape as `x`. Array of strings by default, integers if
43
- labels=False
42
+ out : Categorical or array of integers if labels is False
44
43
bins : ndarray of floats
45
44
Returned only if `retbins` is True.
46
45
@@ -50,7 +49,9 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3):
50
49
a categorical variable. For example, `cut` could convert ages to groups
51
50
of age ranges.
52
51
53
- Any NA values will be NA in the result
52
+ Any NA values will be NA in the result. Out of bounds values will be NA in
53
+ the resulting Categorical object
54
+
54
55
55
56
Examples
56
57
--------
@@ -95,11 +96,12 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3):
95
96
raise ValueError ('bins must increase monotonically.' )
96
97
97
98
return _bins_to_cuts (x , bins , right = right , labels = labels ,
98
- retbins = retbins , precision = precision )
99
+ retbins = retbins , precision = precision ,
100
+ include_lowest = include_lowest )
99
101
100
102
101
103
102
- def qcut (x , q = 4 , labels = None , retbins = False , precision = 3 ):
104
+ def qcut (x , q , labels = None , retbins = False , precision = 3 ):
103
105
"""
104
106
Quantile-based discretization function. Discretize variable into
105
107
equal-sized buckets based on rank or based on sample quantiles. For example
@@ -111,8 +113,7 @@ def qcut(x, q=4, labels=None, retbins=False, precision=3):
111
113
x : ndarray or Series
112
114
q : integer or array of quantiles
113
115
Number of quantiles. 10 for deciles, 4 for quartiles, etc. Alternately
114
- array of quantiles, e.g. [0, .25, .5, .75, 1.] for quartiles. Array of
115
- quantiles must span [0, 1]
116
+ array of quantiles, e.g. [0, .25, .5, .75, 1.] for quartiles
116
117
labels : array or boolean, default None
117
118
Labels to use for bin edges, or False to return integer bin labels
118
119
retbins : bool, optional
@@ -121,9 +122,11 @@ def qcut(x, q=4, labels=None, retbins=False, precision=3):
121
122
122
123
Returns
123
124
-------
125
+ cat : Categorical
124
126
125
127
Notes
126
128
-----
129
+ Out of bounds values will be NA in the resulting Categorical object
127
130
128
131
Examples
129
132
--------
@@ -133,21 +136,22 @@ def qcut(x, q=4, labels=None, retbins=False, precision=3):
133
136
else :
134
137
quantiles = q
135
138
bins = algos .quantile (x , quantiles )
136
- bins [0 ] -= 0.001 * (x .max () - x .min ())
137
-
138
139
return _bins_to_cuts (x , bins , labels = labels , retbins = retbins ,
139
- precision = precision )
140
+ precision = precision , include_lowest = True )
140
141
141
142
142
143
def _bins_to_cuts (x , bins , right = True , labels = None , retbins = False ,
143
- precision = 3 , name = None ):
144
+ precision = 3 , name = None , include_lowest = False ):
144
145
if name is None and isinstance (x , Series ):
145
146
name = x .name
146
147
x = np .asarray (x )
147
148
148
149
side = 'left' if right else 'right'
149
150
ids = bins .searchsorted (x , side = side )
150
151
152
+ if include_lowest :
153
+ ids [x == bins [0 ]] = 1
154
+
151
155
na_mask = com .isnull (x ) | (ids == len (bins )) | (ids == 0 )
152
156
has_nas = na_mask .any ()
153
157
@@ -157,9 +161,12 @@ def _bins_to_cuts(x, bins, right=True, labels=None, retbins=False,
157
161
if right :
158
162
levels = ['(%s, %s]' % (fmt (a ), fmt (b ))
159
163
for a , b in zip (bins , bins [1 :])]
164
+ if include_lowest :
165
+ levels [0 ] = '[' + levels [0 ][1 :]
160
166
else :
161
167
levels = ['[%s, %s)' % (fmt (a ), fmt (b ))
162
168
for a , b in zip (bins , bins [1 :])]
169
+
163
170
else :
164
171
if len (labels ) != len (bins ) - 1 :
165
172
raise ValueError ('Bin labels must be one fewer than '
0 commit comments