diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index a1a2149da7cf6..ce7fa940d018a 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -293,6 +293,7 @@ Other enhancements - :class:`Series` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) will now successfully operate when the dtype is numeric and ``numeric_only=True`` is provided; previously this would raise a ``NotImplementedError`` (:issue:`47500`) - :meth:`RangeIndex.union` now can return a :class:`RangeIndex` instead of a :class:`Int64Index` if the resulting values are equally spaced (:issue:`47557`, :issue:`43885`) - :meth:`DataFrame.compare` now accepts an argument ``result_names`` to allow the user to specify the result's names of both left and right DataFrame which are being compared. This is by default ``'self'`` and ``'other'`` (:issue:`44354`) +- :meth:`DataFrame.quantile` gained a ``method`` argument that can accept ``table`` to evaluate multi-column quantiles (:issue:`43881`) - :class:`Interval` now supports checking whether one interval is contained by another interval (:issue:`46613`) - :meth:`Series.add_suffix`, :meth:`DataFrame.add_suffix`, :meth:`Series.add_prefix` and :meth:`DataFrame.add_prefix` support a ``copy`` argument. If ``False``, the underlying data is not copied in the returned object (:issue:`47934`) - :meth:`DataFrame.set_index` now supports a ``copy`` keyword. If ``False``, the underlying data is not copied when a new :class:`DataFrame` is returned (:issue:`48043`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6cfca4ebdc612..74d4184fe985d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -83,7 +83,10 @@ npt, ) from pandas.compat._optional import import_optional_dependency -from pandas.compat.numpy import function as nv +from pandas.compat.numpy import ( + function as nv, + np_percentile_argname, +) from pandas.util._decorators import ( Appender, Substitution, @@ -11129,6 +11132,7 @@ def quantile( axis: Axis = 0, numeric_only: bool | lib.NoDefault = no_default, interpolation: QuantileInterpolation = "linear", + method: Literal["single", "table"] = "single", ) -> Series | DataFrame: """ Return values at the given quantile over requested axis. @@ -11157,6 +11161,10 @@ def quantile( * higher: `j`. * nearest: `i` or `j` whichever is nearest. * midpoint: (`i` + `j`) / 2. + method : {'single', 'table'}, default 'single' + Whether to compute quantiles per-column ('single') or over all columns + ('table'). When 'table', the only allowed interpolation methods are + 'nearest', 'lower', and 'higher'. Returns ------- @@ -11186,6 +11194,17 @@ def quantile( 0.1 1.3 3.7 0.5 2.5 55.0 + Specifying `method='table'` will compute the quantile over all columns. + + >>> df.quantile(.1, method="table", interpolation="nearest") + a 1 + b 1 + Name: 0.1, dtype: int64 + >>> df.quantile([.1, .5], method="table", interpolation="nearest") + a b + 0.1 1 1 + 0.5 3 100 + Specifying `numeric_only=False` will also compute the quantile of datetime and timedelta data. @@ -11212,13 +11231,18 @@ def quantile( # error: List item 0 has incompatible type "Union[float, Union[Union[ # ExtensionArray, ndarray[Any, Any]], Index, Series], Sequence[float]]"; # expected "float" - res_df = self.quantile( - [q], # type: ignore[list-item] + res_df = self.quantile( # type: ignore[call-overload] + [q], axis=axis, numeric_only=numeric_only, interpolation=interpolation, + method=method, ) - res = res_df.iloc[0] + if method == "single": + res = res_df.iloc[0] + else: + # cannot directly iloc over sparse arrays + res = res_df.T.iloc[:, 0] if axis == 1 and len(self) == 0: # GH#41544 try to get an appropriate dtype dtype = find_common_type(list(self.dtypes)) @@ -11246,11 +11270,47 @@ def quantile( res = self._constructor([], index=q, columns=cols, dtype=dtype) return res.__finalize__(self, method="quantile") - # error: Argument "qs" to "quantile" of "BlockManager" has incompatible type - # "Index"; expected "Float64Index" - res = data._mgr.quantile( - qs=q, axis=1, interpolation=interpolation # type: ignore[arg-type] - ) + valid_method = {"single", "table"} + if method not in valid_method: + raise ValueError( + f"Invalid method: {method}. Method must be in {valid_method}." + ) + if method == "single": + # error: Argument "qs" to "quantile" of "BlockManager" has incompatible type + # "Index"; expected "Float64Index" + res = data._mgr.quantile( + qs=q, axis=1, interpolation=interpolation # type: ignore[arg-type] + ) + elif method == "table": + valid_interpolation = {"nearest", "lower", "higher"} + if interpolation not in valid_interpolation: + raise ValueError( + f"Invalid interpolation: {interpolation}. " + f"Interpolation must be in {valid_interpolation}" + ) + # handle degenerate case + if len(data) == 0: + if data.ndim == 2: + dtype = find_common_type(list(self.dtypes)) + else: + dtype = self.dtype + return self._constructor([], index=q, columns=data.columns, dtype=dtype) + + q_idx = np.quantile( # type: ignore[call-overload] + np.arange(len(data)), q, **{np_percentile_argname: interpolation} + ) + + by = data.columns + if len(by) > 1: + keys = [data._get_label_or_level_values(x) for x in by] + indexer = lexsort_indexer(keys) + else: + by = by[0] + k = data._get_label_or_level_values(by) # type: ignore[arg-type] + indexer = nargsort(k) + + res = data._mgr.take(indexer[q_idx], verify=False) + res.axes[1] = q result = self._constructor(res) return result.__finalize__(self, method="quantile") diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index 16b82727fd069..14b416011b956 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -16,6 +16,14 @@ import pandas._testing as tm +@pytest.fixture( + params=[["linear", "single"], ["nearest", "table"]], ids=lambda x: "-".join(x) +) +def interp_method(request): + """(interpolation, method) arguments for quantile""" + return request.param + + class TestDataFrameQuantile: @pytest.mark.parametrize( "non_num_col", @@ -25,8 +33,11 @@ class TestDataFrameQuantile: [DataFrame, Series, Timestamp], ], ) - def test_numeric_only_default_false_warning(self, non_num_col): + def test_numeric_only_default_false_warning( + self, non_num_col, interp_method, request, using_array_manager + ): # GH #7308 + interpolation, method = interp_method df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}) df["C"] = non_num_col @@ -35,8 +46,14 @@ def test_numeric_only_default_false_warning(self, non_num_col): index=["A", "B"], name=0.5, ) + if interpolation == "nearest": + expected = expected.astype(np.int64) + if method == "table" and using_array_manager: + request.node.add_marker( + pytest.mark.xfail(reason="Axis name incorrectly set.") + ) with tm.assert_produces_warning(FutureWarning, match="numeric_only"): - result = df.quantile(0.5) + result = df.quantile(0.5, interpolation=interpolation, method=method) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( @@ -64,66 +81,142 @@ def test_quantile_sparse(self, df, expected): tm.assert_series_equal(result, expected) - def test_quantile(self, datetime_frame): - from numpy import percentile - + def test_quantile( + self, datetime_frame, interp_method, using_array_manager, request + ): + interpolation, method = interp_method df = datetime_frame - q = df.quantile(0.1, axis=0, numeric_only=True) - assert q["A"] == percentile(df["A"], 10) - tm.assert_index_equal(q.index, df.columns) - - q = df.quantile(0.9, axis=1, numeric_only=True) - assert q["2000-01-17"] == percentile(df.loc["2000-01-17"], 90) - tm.assert_index_equal(q.index, df.index) - - # test degenerate case - q = DataFrame({"x": [], "y": []}).quantile(0.1, axis=0, numeric_only=True) + result = df.quantile( + 0.1, axis=0, numeric_only=True, interpolation=interpolation, method=method + ) + expected = Series( + [np.percentile(df[col], 10) for col in df.columns], + index=df.columns, + name=0.1, + ) + if interpolation == "linear": + # np.percentile values only comparable to linear interpolation + tm.assert_series_equal(result, expected) + else: + tm.assert_index_equal(result.index, expected.index) + request.node.add_marker( + pytest.mark.xfail( + using_array_manager, reason="Name set incorrectly for arraymanager" + ) + ) + assert result.name == expected.name + + result = df.quantile( + 0.9, axis=1, numeric_only=True, interpolation=interpolation, method=method + ) + expected = Series( + [np.percentile(df.loc[date], 90) for date in df.index], + index=df.index, + name=0.9, + ) + if interpolation == "linear": + # np.percentile values only comparable to linear interpolation + tm.assert_series_equal(result, expected) + else: + tm.assert_index_equal(result.index, expected.index) + request.node.add_marker( + pytest.mark.xfail( + using_array_manager, reason="Name set incorrectly for arraymanager" + ) + ) + assert result.name == expected.name + + def test_empty(self, interp_method): + interpolation, method = interp_method + q = DataFrame({"x": [], "y": []}).quantile( + 0.1, axis=0, numeric_only=True, interpolation=interpolation, method=method + ) assert np.isnan(q["x"]) and np.isnan(q["y"]) - # non-numeric exclusion + def test_non_numeric_exclusion(self, interp_method, request, using_array_manager): + interpolation, method = interp_method df = DataFrame({"col1": ["A", "A", "B", "B"], "col2": [1, 2, 3, 4]}) - rs = df.quantile(0.5, numeric_only=True) + rs = df.quantile( + 0.5, numeric_only=True, interpolation=interpolation, method=method + ) with tm.assert_produces_warning(FutureWarning, match="Select only valid"): xp = df.median().rename(0.5) + if interpolation == "nearest": + xp = (xp + 0.5).astype(np.int64) + if method == "table" and using_array_manager: + request.node.add_marker( + pytest.mark.xfail(reason="Axis name incorrectly set.") + ) tm.assert_series_equal(rs, xp) + def test_axis(self, interp_method, request, using_array_manager): # axis + interpolation, method = interp_method df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3]) - result = df.quantile(0.5, axis=1) + result = df.quantile(0.5, axis=1, interpolation=interpolation, method=method) expected = Series([1.5, 2.5, 3.5], index=[1, 2, 3], name=0.5) + if interpolation == "nearest": + expected = expected.astype(np.int64) + if method == "table" and using_array_manager: + request.node.add_marker( + pytest.mark.xfail(reason="Axis name incorrectly set.") + ) tm.assert_series_equal(result, expected) - result = df.quantile([0.5, 0.75], axis=1) + result = df.quantile( + [0.5, 0.75], axis=1, interpolation=interpolation, method=method + ) expected = DataFrame( {1: [1.5, 1.75], 2: [2.5, 2.75], 3: [3.5, 3.75]}, index=[0.5, 0.75] ) + if interpolation == "nearest": + expected.iloc[0, :] -= 0.5 + expected.iloc[1, :] += 0.25 + expected = expected.astype(np.int64) tm.assert_frame_equal(result, expected, check_index_type=True) + def test_axis_numeric_only_true(self, interp_method, request, using_array_manager): # We may want to break API in the future to change this # so that we exclude non-numeric along the same axis # See GH #7312 + interpolation, method = interp_method df = DataFrame([[1, 2, 3], ["a", "b", 4]]) - result = df.quantile(0.5, axis=1, numeric_only=True) + result = df.quantile( + 0.5, axis=1, numeric_only=True, interpolation=interpolation, method=method + ) expected = Series([3.0, 4.0], index=[0, 1], name=0.5) + if interpolation == "nearest": + expected = expected.astype(np.int64) + if method == "table" and using_array_manager: + request.node.add_marker( + pytest.mark.xfail(reason="Axis name incorrectly set.") + ) tm.assert_series_equal(result, expected) - def test_quantile_date_range(self): + def test_quantile_date_range(self, interp_method, request, using_array_manager): # GH 2460 - + interpolation, method = interp_method dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific") ser = Series(dti) df = DataFrame(ser) - result = df.quantile(numeric_only=False) + result = df.quantile( + numeric_only=False, interpolation=interpolation, method=method + ) expected = Series( ["2016-01-02 00:00:00"], name=0.5, dtype="datetime64[ns, US/Pacific]" ) + if method == "table" and using_array_manager: + request.node.add_marker( + pytest.mark.xfail(reason="Axis name incorrectly set.") + ) tm.assert_series_equal(result, expected) - def test_quantile_axis_mixed(self): + def test_quantile_axis_mixed(self, interp_method, request, using_array_manager): # mixed on axis=1 + interpolation, method = interp_method df = DataFrame( { "A": [1, 2, 3], @@ -132,8 +225,16 @@ def test_quantile_axis_mixed(self): "D": ["foo", "bar", "baz"], } ) - result = df.quantile(0.5, axis=1, numeric_only=True) + result = df.quantile( + 0.5, axis=1, numeric_only=True, interpolation=interpolation, method=method + ) expected = Series([1.5, 2.5, 3.5], name=0.5) + if interpolation == "nearest": + expected -= 0.5 + if method == "table" and using_array_manager: + request.node.add_marker( + pytest.mark.xfail(reason="Axis name incorrectly set.") + ) tm.assert_series_equal(result, expected) # must raise @@ -141,30 +242,44 @@ def test_quantile_axis_mixed(self): with pytest.raises(TypeError, match=msg): df.quantile(0.5, axis=1, numeric_only=False) - def test_quantile_axis_parameter(self): + def test_quantile_axis_parameter(self, interp_method, request, using_array_manager): # GH 9543/9544 - + interpolation, method = interp_method + if method == "table" and using_array_manager: + request.node.add_marker( + pytest.mark.xfail(reason="Axis name incorrectly set.") + ) df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3]) - result = df.quantile(0.5, axis=0) + result = df.quantile(0.5, axis=0, interpolation=interpolation, method=method) expected = Series([2.0, 3.0], index=["A", "B"], name=0.5) + if interpolation == "nearest": + expected = expected.astype(np.int64) tm.assert_series_equal(result, expected) - expected = df.quantile(0.5, axis="index") + expected = df.quantile( + 0.5, axis="index", interpolation=interpolation, method=method + ) + if interpolation == "nearest": + expected = expected.astype(np.int64) tm.assert_series_equal(result, expected) - result = df.quantile(0.5, axis=1) + result = df.quantile(0.5, axis=1, interpolation=interpolation, method=method) expected = Series([1.5, 2.5, 3.5], index=[1, 2, 3], name=0.5) + if interpolation == "nearest": + expected = expected.astype(np.int64) tm.assert_series_equal(result, expected) - result = df.quantile(0.5, axis="columns") + result = df.quantile( + 0.5, axis="columns", interpolation=interpolation, method=method + ) tm.assert_series_equal(result, expected) msg = "No axis named -1 for object type DataFrame" with pytest.raises(ValueError, match=msg): - df.quantile(0.1, axis=-1) + df.quantile(0.1, axis=-1, interpolation=interpolation, method=method) msg = "No axis named column for object type DataFrame" with pytest.raises(ValueError, match=msg): df.quantile(0.1, axis="column") @@ -247,24 +362,45 @@ def test_quantile_interpolation_int(self, int_frame): assert q1["A"] == np.percentile(df["A"], 10) tm.assert_series_equal(q, q1) - def test_quantile_multi(self): + def test_quantile_multi(self, interp_method, request, using_array_manager): + interpolation, method = interp_method df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]], columns=["a", "b", "c"]) - result = df.quantile([0.25, 0.5]) + result = df.quantile([0.25, 0.5], interpolation=interpolation, method=method) expected = DataFrame( [[1.5, 1.5, 1.5], [2.0, 2.0, 2.0]], index=[0.25, 0.5], columns=["a", "b", "c"], ) + if interpolation == "nearest": + expected = expected.astype(np.int64) + if method == "table" and using_array_manager: + request.node.add_marker( + pytest.mark.xfail(reason="Axis name incorrectly set.") + ) tm.assert_frame_equal(result, expected) - # axis = 1 - result = df.quantile([0.25, 0.5], axis=1) + def test_quantile_multi_axis_1(self, interp_method, request, using_array_manager): + interpolation, method = interp_method + df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]], columns=["a", "b", "c"]) + result = df.quantile( + [0.25, 0.5], axis=1, interpolation=interpolation, method=method + ) expected = DataFrame( - [[1.5, 1.5, 1.5], [2.0, 2.0, 2.0]], index=[0.25, 0.5], columns=[0, 1, 2] + [[1.0, 2.0, 3.0]] * 2, index=[0.25, 0.5], columns=[0, 1, 2] ) + if interpolation == "nearest": + expected = expected.astype(np.int64) + if method == "table" and using_array_manager: + request.node.add_marker( + pytest.mark.xfail(reason="Axis name incorrectly set.") + ) + tm.assert_frame_equal(result, expected) - # empty - result = DataFrame({"x": [], "y": []}).quantile([0.1, 0.9], axis=0) + def test_quantile_multi_empty(self, interp_method): + interpolation, method = interp_method + result = DataFrame({"x": [], "y": []}).quantile( + [0.1, 0.9], axis=0, interpolation=interpolation, method=method + ) expected = DataFrame( {"x": [np.nan, np.nan], "y": [np.nan, np.nan]}, index=[0.1, 0.9] ) @@ -275,7 +411,8 @@ def test_quantile_datetime(self): # exclude datetime result = df.quantile(0.5, numeric_only=True) - expected = Series([2.5], index=["b"]) + expected = Series([2.5], index=["b"], name=0.5) + tm.assert_series_equal(result, expected) # datetime result = df.quantile(0.5, numeric_only=False) @@ -327,26 +464,41 @@ def test_quantile_datetime(self): "Period[D]", ], ) - def test_quantile_dt64_empty(self, dtype): + def test_quantile_dt64_empty(self, dtype, interp_method): # GH#41544 + interpolation, method = interp_method df = DataFrame(columns=["a", "b"], dtype=dtype) - res = df.quantile(0.5, axis=1, numeric_only=False) + res = df.quantile( + 0.5, axis=1, numeric_only=False, interpolation=interpolation, method=method + ) expected = Series([], index=[], name=0.5, dtype=dtype) tm.assert_series_equal(res, expected) # no columns in result, so no dtype preservation - res = df.quantile([0.5], axis=1, numeric_only=False) + res = df.quantile( + [0.5], + axis=1, + numeric_only=False, + interpolation=interpolation, + method=method, + ) expected = DataFrame(index=[0.5]) tm.assert_frame_equal(res, expected) - def test_quantile_invalid(self, datetime_frame): + @pytest.mark.parametrize("invalid", [-1, 2, [0.5, -1], [0.5, 2]]) + def test_quantile_invalid(self, invalid, datetime_frame, interp_method): msg = "percentiles should all be in the interval \\[0, 1\\]" - for invalid in [-1, 2, [0.5, -1], [0.5, 2]]: - with pytest.raises(ValueError, match=msg): - datetime_frame.quantile(invalid) - - def test_quantile_box(self): + interpolation, method = interp_method + with pytest.raises(ValueError, match=msg): + datetime_frame.quantile(invalid, interpolation=interpolation, method=method) + + def test_quantile_box(self, interp_method, request, using_array_manager): + interpolation, method = interp_method + if method == "table" and using_array_manager: + request.node.add_marker( + pytest.mark.xfail(reason="Axis name incorrectly set.") + ) df = DataFrame( { "A": [ @@ -367,7 +519,9 @@ def test_quantile_box(self): } ) - res = df.quantile(0.5, numeric_only=False) + res = df.quantile( + 0.5, numeric_only=False, interpolation=interpolation, method=method + ) exp = Series( [ @@ -380,7 +534,9 @@ def test_quantile_box(self): ) tm.assert_series_equal(res, exp) - res = df.quantile([0.5], numeric_only=False) + res = df.quantile( + [0.5], numeric_only=False, interpolation=interpolation, method=method + ) exp = DataFrame( [ [ @@ -394,6 +550,7 @@ def test_quantile_box(self): ) tm.assert_frame_equal(res, exp) + def test_quantile_box_nat(self): # DatetimeLikeBlock may be consolidated and contain NaT in different loc df = DataFrame( { @@ -469,49 +626,73 @@ def test_quantile_box(self): ) tm.assert_frame_equal(res, exp) - def test_quantile_nan(self): - + def test_quantile_nan(self, interp_method, request, using_array_manager): + interpolation, method = interp_method + if method == "table" and using_array_manager: + request.node.add_marker( + pytest.mark.xfail(reason="Axis name incorrectly set.") + ) # GH 14357 - float block where some cols have missing values df = DataFrame({"a": np.arange(1, 6.0), "b": np.arange(1, 6.0)}) df.iloc[-1, 1] = np.nan - res = df.quantile(0.5) - exp = Series([3.0, 2.5], index=["a", "b"], name=0.5) + res = df.quantile(0.5, interpolation=interpolation, method=method) + exp = Series( + [3.0, 2.5 if interpolation == "linear" else 3.0], index=["a", "b"], name=0.5 + ) tm.assert_series_equal(res, exp) - res = df.quantile([0.5, 0.75]) - exp = DataFrame({"a": [3.0, 4.0], "b": [2.5, 3.25]}, index=[0.5, 0.75]) + res = df.quantile([0.5, 0.75], interpolation=interpolation, method=method) + exp = DataFrame( + { + "a": [3.0, 4.0], + "b": [2.5, 3.25] if interpolation == "linear" else [3.0, 4.0], + }, + index=[0.5, 0.75], + ) tm.assert_frame_equal(res, exp) - res = df.quantile(0.5, axis=1) + res = df.quantile(0.5, axis=1, interpolation=interpolation, method=method) exp = Series(np.arange(1.0, 6.0), name=0.5) tm.assert_series_equal(res, exp) - res = df.quantile([0.5, 0.75], axis=1) + res = df.quantile( + [0.5, 0.75], axis=1, interpolation=interpolation, method=method + ) exp = DataFrame([np.arange(1.0, 6.0)] * 2, index=[0.5, 0.75]) + if interpolation == "nearest": + exp.iloc[1, -1] = np.nan tm.assert_frame_equal(res, exp) # full-nan column df["b"] = np.nan - res = df.quantile(0.5) + res = df.quantile(0.5, interpolation=interpolation, method=method) exp = Series([3.0, np.nan], index=["a", "b"], name=0.5) tm.assert_series_equal(res, exp) - res = df.quantile([0.5, 0.75]) + res = df.quantile([0.5, 0.75], interpolation=interpolation, method=method) exp = DataFrame({"a": [3.0, 4.0], "b": [np.nan, np.nan]}, index=[0.5, 0.75]) tm.assert_frame_equal(res, exp) - def test_quantile_nat(self): - + def test_quantile_nat(self, interp_method, request, using_array_manager): + interpolation, method = interp_method + if method == "table" and using_array_manager: + request.node.add_marker( + pytest.mark.xfail(reason="Axis name incorrectly set.") + ) # full NaT column df = DataFrame({"a": [pd.NaT, pd.NaT, pd.NaT]}) - res = df.quantile(0.5, numeric_only=False) + res = df.quantile( + 0.5, numeric_only=False, interpolation=interpolation, method=method + ) exp = Series([pd.NaT], index=["a"], name=0.5) tm.assert_series_equal(res, exp) - res = df.quantile([0.5], numeric_only=False) + res = df.quantile( + [0.5], numeric_only=False, interpolation=interpolation, method=method + ) exp = DataFrame({"a": [pd.NaT]}, index=[0.5]) tm.assert_frame_equal(res, exp) @@ -527,50 +708,57 @@ def test_quantile_nat(self): } ) - res = df.quantile(0.5, numeric_only=False) + res = df.quantile( + 0.5, numeric_only=False, interpolation=interpolation, method=method + ) exp = Series([Timestamp("2012-01-02"), pd.NaT], index=["a", "b"], name=0.5) tm.assert_series_equal(res, exp) - res = df.quantile([0.5], numeric_only=False) + res = df.quantile( + [0.5], numeric_only=False, interpolation=interpolation, method=method + ) exp = DataFrame( [[Timestamp("2012-01-02"), pd.NaT]], index=[0.5], columns=["a", "b"] ) tm.assert_frame_equal(res, exp) - def test_quantile_empty_no_rows_floats(self): + def test_quantile_empty_no_rows_floats(self, interp_method): + interpolation, method = interp_method - # floats df = DataFrame(columns=["a", "b"], dtype="float64") - res = df.quantile(0.5) + res = df.quantile(0.5, interpolation=interpolation, method=method) exp = Series([np.nan, np.nan], index=["a", "b"], name=0.5) tm.assert_series_equal(res, exp) - res = df.quantile([0.5]) + res = df.quantile([0.5], interpolation=interpolation, method=method) exp = DataFrame([[np.nan, np.nan]], columns=["a", "b"], index=[0.5]) tm.assert_frame_equal(res, exp) - res = df.quantile(0.5, axis=1) + res = df.quantile(0.5, axis=1, interpolation=interpolation, method=method) exp = Series([], index=[], dtype="float64", name=0.5) tm.assert_series_equal(res, exp) - res = df.quantile([0.5], axis=1) + res = df.quantile([0.5], axis=1, interpolation=interpolation, method=method) exp = DataFrame(columns=[], index=[0.5]) tm.assert_frame_equal(res, exp) - def test_quantile_empty_no_rows_ints(self): - # ints + def test_quantile_empty_no_rows_ints(self, interp_method): + interpolation, method = interp_method df = DataFrame(columns=["a", "b"], dtype="int64") - res = df.quantile(0.5) + res = df.quantile(0.5, interpolation=interpolation, method=method) exp = Series([np.nan, np.nan], index=["a", "b"], name=0.5) tm.assert_series_equal(res, exp) - def test_quantile_empty_no_rows_dt64(self): + def test_quantile_empty_no_rows_dt64(self, interp_method): + interpolation, method = interp_method # datetimes df = DataFrame(columns=["a", "b"], dtype="datetime64[ns]") - res = df.quantile(0.5, numeric_only=False) + res = df.quantile( + 0.5, numeric_only=False, interpolation=interpolation, method=method + ) exp = Series( [pd.NaT, pd.NaT], index=["a", "b"], dtype="datetime64[ns]", name=0.5 ) @@ -578,43 +766,61 @@ def test_quantile_empty_no_rows_dt64(self): # Mixed dt64/dt64tz df["a"] = df["a"].dt.tz_localize("US/Central") - res = df.quantile(0.5, numeric_only=False) + res = df.quantile( + 0.5, numeric_only=False, interpolation=interpolation, method=method + ) exp = exp.astype(object) tm.assert_series_equal(res, exp) # both dt64tz df["b"] = df["b"].dt.tz_localize("US/Central") - res = df.quantile(0.5, numeric_only=False) + res = df.quantile( + 0.5, numeric_only=False, interpolation=interpolation, method=method + ) exp = exp.astype(df["b"].dtype) tm.assert_series_equal(res, exp) - def test_quantile_empty_no_columns(self): + def test_quantile_empty_no_columns(self, interp_method): # GH#23925 _get_numeric_data may drop all columns + interpolation, method = interp_method df = DataFrame(pd.date_range("1/1/18", periods=5)) df.columns.name = "captain tightpants" - result = df.quantile(0.5, numeric_only=True) + result = df.quantile( + 0.5, numeric_only=True, interpolation=interpolation, method=method + ) expected = Series([], index=[], name=0.5, dtype=np.float64) expected.index.name = "captain tightpants" tm.assert_series_equal(result, expected) - result = df.quantile([0.5], numeric_only=True) + result = df.quantile( + [0.5], numeric_only=True, interpolation=interpolation, method=method + ) expected = DataFrame([], index=[0.5], columns=[]) expected.columns.name = "captain tightpants" tm.assert_frame_equal(result, expected) - def test_quantile_item_cache(self, using_array_manager): + def test_quantile_item_cache(self, using_array_manager, interp_method): # previous behavior incorrect retained an invalid _item_cache entry + interpolation, method = interp_method df = DataFrame(np.random.randn(4, 3), columns=["A", "B", "C"]) df["D"] = df["A"] * 2 ser = df["A"] if not using_array_manager: assert len(df._mgr.blocks) == 2 - df.quantile(numeric_only=False) + df.quantile(numeric_only=False, interpolation=interpolation, method=method) ser.values[0] = 99 assert df.iloc[0, 0] == df["A"][0] + def test_invalid_method(self): + with pytest.raises(ValueError, match="Invalid method: foo"): + DataFrame(range(1)).quantile(0.5, method="foo") + + def test_table_invalid_interpolation(self): + with pytest.raises(ValueError, match="Invalid interpolation: foo"): + DataFrame(range(1)).quantile(0.5, method="table", interpolation="foo") + class TestQuantileExtensionDtype: # TODO: tests for axis=1?