Skip to content

Commit c6730ea

Browse files
author
Tom Augspurger
committed
Merge pull request #6955 from TomAugspurger/quantiles
ENH: Quantiles accepts an array
2 parents 7fe619f + 1c2e106 commit c6730ea

File tree

5 files changed

+100
-15
lines changed

5 files changed

+100
-15
lines changed

doc/source/v0.14.0.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,8 @@ Enhancements
482482
- Added ``how`` option to rolling-moment functions to dictate how to handle resampling; :func:``rolling_max`` defaults to max,
483483
:func:``rolling_min`` defaults to min, and all others default to mean (:issue:`6297`)
484484
- ``CustomBuisnessMonthBegin`` and ``CustomBusinessMonthEnd`` are now available (:issue:`6866`)
485+
- :meth:`Series.quantile` and :meth:`DataFrame.quantile` now accept an array of
486+
quantiles.
485487

486488
Performance
487489
~~~~~~~~~~~

pandas/core/frame.py

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4145,22 +4145,41 @@ def mode(self, axis=0, numeric_only=False):
41454145
def quantile(self, q=0.5, axis=0, numeric_only=True):
41464146
"""
41474147
Return values at the given quantile over requested axis, a la
4148-
scoreatpercentile in scipy.stats
4148+
numpy.percentile.
41494149
41504150
Parameters
41514151
----------
4152-
q : quantile, default 0.5 (50% quantile)
4153-
0 <= q <= 1
4152+
q : float or array-like, default 0.5 (50% quantile)
4153+
0 <= q <= 1, the quantile(s) to compute
41544154
axis : {0, 1}
41554155
0 for row-wise, 1 for column-wise
41564156
41574157
Returns
41584158
-------
4159-
quantiles : Series
4159+
quantiles : Series or DataFrame
4160+
If ``q`` is an array, a DataFrame will be returned where the
4161+
index is ``q``, the columns are the columns of self, and the
4162+
values are the quantiles.
4163+
If ``q`` is a float, a Series will be returned where the
4164+
index is the columns of self and the values are the quantiles.
4165+
4166+
Examples
4167+
--------
4168+
4169+
>>> df = DataFrame(np.array([[1, 1], [2, 10], [3, 100], [4, 100]]),
4170+
columns=['a', 'b'])
4171+
>>> df.quantile(.1)
4172+
a 1.3
4173+
b 3.7
4174+
dtype: float64
4175+
>>> df.quantile([.1, .5])
4176+
a b
4177+
0.1 1.3 3.7
4178+
0.5 2.5 55.0
41604179
"""
4161-
per = q * 100
4180+
per = np.asarray(q) * 100
41624181

4163-
def f(arr):
4182+
def f(arr, per):
41644183
arr = arr.values
41654184
if arr.dtype != np.float_:
41664185
arr = arr.astype(float)
@@ -4171,7 +4190,12 @@ def f(arr):
41714190
return _quantile(arr, per)
41724191

41734192
data = self._get_numeric_data() if numeric_only else self
4174-
return data.apply(f, axis=axis)
4193+
if com.is_list_like(per):
4194+
from pandas.tools.merge import concat
4195+
return concat([data.apply(f, axis=axis, args=(x,)) for x in per],
4196+
axis=1, keys=per/100.).T
4197+
else:
4198+
return data.apply(f, axis=axis, args=(per,))
41754199

41764200
def rank(self, axis=0, numeric_only=None, method='average',
41774201
na_option='keep', ascending=True, pct=False):

pandas/core/series.py

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1220,26 +1220,51 @@ def round(self, decimals=0, out=None):
12201220

12211221
def quantile(self, q=0.5):
12221222
"""
1223-
Return value at the given quantile, a la scoreatpercentile in
1224-
scipy.stats
1223+
Return value at the given quantile, a la numpy.percentile.
12251224
12261225
Parameters
12271226
----------
1228-
q : quantile
1229-
0 <= q <= 1
1227+
q : float or array-like, default 0.5 (50% quantile)
1228+
0 <= q <= 1, the quantile(s) to compute
12301229
12311230
Returns
12321231
-------
1233-
quantile : float
1232+
quantile : float or Series
1233+
if ``q`` is an array, a Series will be returned where the
1234+
index is ``q`` and the values are the quantiles.
1235+
1236+
Examples
1237+
--------
1238+
1239+
>>> s = Series([1, 2, 3, 4])
1240+
>>> s.quantile(.5)
1241+
2.5
1242+
>>> s.quantile([.25, .5, .75])
1243+
0.25 1.75
1244+
0.50 2.50
1245+
0.75 3.25
1246+
dtype: float64
12341247
"""
12351248
valid_values = self.dropna().values
12361249
if len(valid_values) == 0:
12371250
return pa.NA
1251+
1252+
def multi(values, qs):
1253+
if com.is_list_like(qs):
1254+
return Series([_quantile(values, x*100)
1255+
for x in qs], index=qs)
1256+
else:
1257+
return _quantile(values, qs*100)
1258+
12381259
if com.is_datetime64_dtype(self):
12391260
values = _values_from_object(self).view('i8')
1240-
result = lib.Timestamp(_quantile(values, q * 100))
1261+
result = multi(values, q)
1262+
if com.is_list_like(q):
1263+
result = result.map(lib.Timestamp)
1264+
else:
1265+
result = lib.Timestamp(result)
12411266
else:
1242-
result = _quantile(valid_values, q * 100)
1267+
result = multi(valid_values, q)
12431268

12441269
return result
12451270

pandas/tests/test_frame.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10945,6 +10945,25 @@ def test_quantile(self):
1094510945
xp = df.median()
1094610946
assert_series_equal(rs, xp)
1094710947

10948+
def test_quantile_multi(self):
10949+
df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]],
10950+
columns=['a', 'b', 'c'])
10951+
result = df.quantile([.25, .5])
10952+
expected = DataFrame([[1.5, 1.5, 1.5], [2., 2., 2.]],
10953+
index=[.25, .5], columns=['a', 'b', 'c'])
10954+
assert_frame_equal(result, expected)
10955+
10956+
# axis = 1
10957+
result = df.quantile([.25, .5], axis=1)
10958+
expected = DataFrame([[1.5, 1.5, 1.5], [2., 2., 2.]],
10959+
index=[.25, .5], columns=[0, 1, 2])
10960+
10961+
# empty
10962+
result = DataFrame({'x': [], 'y': []}).quantile([0.1, .9], axis=0)
10963+
expected = DataFrame({'x': [np.nan, np.nan], 'y': [np.nan, np.nan]},
10964+
index=[.1, .9])
10965+
assert_frame_equal(result, expected)
10966+
1094810967
def test_cumsum(self):
1094910968
self.tsframe.ix[5:10, 0] = nan
1095010969
self.tsframe.ix[10:15, 1] = nan
@@ -12728,7 +12747,6 @@ def check_query_with_unnamed_multiindex(self, parser, engine):
1272812747
df = DataFrame(randn(10, 2), index=index)
1272912748
ind = Series(df.index.get_level_values(0).values, index=index)
1273012749

12731-
#import ipdb; ipdb.set_trace()
1273212750
res1 = df.query('ilevel_0 == "red"', parser=parser, engine=engine)
1273312751
res2 = df.query('"red" == ilevel_0', parser=parser, engine=engine)
1273412752
exp = df[ind == 'red']

pandas/tests/test_series.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2203,6 +2203,22 @@ def test_quantile(self):
22032203
q = tds.quantile(.25)
22042204
self.assertEqual(q, pd.to_timedelta('24:00:00'))
22052205

2206+
def test_quantile_multi(self):
2207+
from numpy import percentile
2208+
2209+
qs = [.1, .9]
2210+
result = self.ts.quantile(qs)
2211+
expected = pd.Series([percentile(self.ts.valid(), 10),
2212+
percentile(self.ts.valid(), 90)],
2213+
index=qs)
2214+
assert_series_equal(result, expected)
2215+
2216+
dts = self.ts.index.to_series()
2217+
result = dts.quantile((.2, .2))
2218+
assert_series_equal(result, Series([Timestamp('2000-01-10 19:12:00'),
2219+
Timestamp('2000-01-10 19:12:00')],
2220+
index=[.2, .2]))
2221+
22062222
def test_describe(self):
22072223
_ = self.series.describe()
22082224
_ = self.ts.describe()

0 commit comments

Comments
 (0)