Skip to content

REF: simplify BlockManager.quantile #39618

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 8, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -9410,6 +9410,13 @@ def quantile(
"""
validate_percentile(q)

if not is_list_like(q):
# BlockManager.quantile expects listlike, so we wrap and unwrap here
res = self.quantile(
[q], axis=axis, numeric_only=numeric_only, interpolation=interpolation
)
return res.iloc[0]

data = self._get_numeric_data() if numeric_only else self
axis = self._get_axis_number(axis)
is_transposed = axis == 1
Expand Down
16 changes: 4 additions & 12 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1399,14 +1399,12 @@ def quantile(self, qs, interpolation="linear", axis: int = 0) -> Block:
"""
# We should always have ndim == 2 because Series dispatches to DataFrame
assert self.ndim == 2
assert axis == 1 # only ever called this way
assert is_list_like(qs) # caller is responsible for this

values = self.get_values()

is_empty = values.shape[axis] == 0
orig_scalar = not is_list_like(qs)
if orig_scalar:
# make list-like, unpack later
qs = [qs]

if is_empty:
# create the array of na_values
Expand All @@ -1430,14 +1428,7 @@ def quantile(self, qs, interpolation="linear", axis: int = 0) -> Block:
result = np.array(result, copy=False)
result = result.T

if orig_scalar and not lib.is_scalar(result):
# result could be scalar in case with is_empty and self.ndim == 1
assert result.shape[-1] == 1, result.shape
result = result[..., 0]
result = lib.item_from_zerodim(result)

ndim = np.ndim(result)
return make_block(result, placement=np.arange(len(result)), ndim=ndim)
return make_block(result, placement=self.mgr_locs, ndim=2)

def _replace_coerce(
self,
Expand Down Expand Up @@ -2185,6 +2176,7 @@ def fillna(
)

def quantile(self, qs, interpolation="linear", axis: int = 0) -> Block:
assert axis == 1 # only ever called this way
naive = self.values.view("M8[ns]")

# TODO(EA2D): kludge for 2D block with 1D values
Expand Down
80 changes: 15 additions & 65 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
is_extension_array_dtype,
is_list_like,
)
from pandas.core.dtypes.concat import concat_compat
from pandas.core.dtypes.dtypes import ExtensionDtype
from pandas.core.dtypes.generic import ABCDataFrame, ABCPandasArray, ABCSeries
from pandas.core.dtypes.missing import array_equals, isna
Expand All @@ -40,7 +39,7 @@
from pandas.core.arrays.sparse import SparseDtype
from pandas.core.construction import extract_array
from pandas.core.indexers import maybe_convert_indices
from pandas.core.indexes.api import Index, ensure_index
from pandas.core.indexes.api import Float64Index, Index, ensure_index
from pandas.core.internals.base import DataManager
from pandas.core.internals.blocks import (
Block,
Expand Down Expand Up @@ -445,7 +444,6 @@ def quantile(
transposed: bool = False,
interpolation="linear",
qs=None,
numeric_only=None,
) -> BlockManager:
"""
Iterate over blocks applying quantile reduction.
Expand All @@ -460,8 +458,7 @@ def quantile(
transposed: bool, default False
we are holding transposed data
interpolation : type of interpolation, default 'linear'
qs : a scalar or list of the quantiles to be computed
numeric_only : ignored
qs : list of the quantiles to be computed

Returns
-------
Expand All @@ -470,73 +467,26 @@ def quantile(
# Series dispatches to DataFrame for quantile, which allows us to
# simplify some of the code here and in the blocks
assert self.ndim >= 2
assert is_list_like(qs) # caller is responsible for this
assert axis == 1 # only ever called this way

def get_axe(block, qs, axes):
# Because Series dispatches to DataFrame, we will always have
# block.ndim == 2
from pandas import Float64Index

if is_list_like(qs):
ax = Float64Index(qs)
else:
ax = axes[0]
return ax
qs_axe = Float64Index(qs)
new_axes = list(self.axes)
new_axes[1] = qs_axe

axes, blocks = [], []
blocks = []
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you make a list comprehension

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure

for b in self.blocks:
block = b.quantile(axis=axis, qs=qs, interpolation=interpolation)

axe = get_axe(b, qs, axes=self.axes)

axes.append(axe)
blocks.append(block)

# note that some DatetimeTZ, Categorical are always ndim==1
ndim = {b.ndim for b in blocks}
assert 0 not in ndim, ndim

if 2 in ndim:

new_axes = list(self.axes)

# multiple blocks that are reduced
if len(blocks) > 1:
new_axes[1] = axes[0]

# reset the placement to the original
for b, sb in zip(blocks, self.blocks):
b.mgr_locs = sb.mgr_locs

else:
new_axes[axis] = Index(np.concatenate([ax._values for ax in axes]))

if transposed:
new_axes = new_axes[::-1]
blocks = [
b.make_block(b.values.T, placement=np.arange(b.shape[1]))
for b in blocks
]

return type(self)(blocks, new_axes)

# single block, i.e. ndim == {1}
values = concat_compat([b.values for b in blocks])

# compute the orderings of our original data
if len(self.blocks) > 1:

indexer = np.empty(len(self.axes[0]), dtype=np.intp)
i = 0
for b in self.blocks:
for j in b.mgr_locs:
indexer[j] = i
i = i + 1

values = values.take(indexer)
if transposed:
new_axes = new_axes[::-1]
blocks = [
b.make_block(b.values.T, placement=np.arange(b.shape[1]))
for b in blocks
]

return SingleBlockManager(
make_block(values, ndim=1, placement=np.arange(len(values))), axes[0]
)
return type(self)(blocks, new_axes)

def isna(self, func) -> BlockManager:
return self.apply("apply", func=func)
Expand Down