Skip to content

API/CLN: Have toplevel pd.pivot mirror pivot instead of pivot_simple #22209

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Aug 8, 2018
11 changes: 7 additions & 4 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5346,8 +5346,7 @@ def update(self, other, join='left', overwrite=True, filter_func=None,
# ----------------------------------------------------------------------
# Data reshaping

def pivot(self, index=None, columns=None, values=None):
"""
_shared_docs['pivot'] = """
Return reshaped DataFrame organized by given index / column values.

Reshape data (produce a "pivot" table) based on column values. Uses
Expand All @@ -5357,7 +5356,7 @@ def pivot(self, index=None, columns=None, values=None):
columns. See the :ref:`User Guide <reshaping>` for more on reshaping.

Parameters
----------
----------%s
index : string or object, optional
Column to use to make new frame's index. If None, uses
existing index.
Expand Down Expand Up @@ -5449,7 +5448,11 @@ def pivot(self, index=None, columns=None, values=None):
...
ValueError: Index contains duplicate entries, cannot reshape
"""
from pandas.core.reshape.reshape import pivot

@Substitution('')
@Appender(_shared_docs['pivot'])
def pivot(self, index=None, columns=None, values=None):
from pandas.core.reshape.pivot import pivot
return pivot(self, index=index, columns=columns, values=values)

_shared_docs['pivot_table'] = """
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/reshape/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from pandas.core.reshape.concat import concat
from pandas.core.reshape.melt import melt, lreshape, wide_to_long
from pandas.core.reshape.reshape import pivot_simple as pivot, get_dummies
from pandas.core.reshape.reshape import get_dummies
from pandas.core.reshape.merge import merge, merge_ordered, merge_asof
from pandas.core.reshape.pivot import pivot_table, crosstab
from pandas.core.reshape.pivot import pivot_table, pivot, crosstab
from pandas.core.reshape.tile import cut, qcut
92 changes: 91 additions & 1 deletion pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@

from pandas.core.reshape.concat import concat
from pandas.core.series import Series
from pandas.core.frame import DataFrame
from pandas.core.groupby import Grouper
from pandas.core.reshape.util import cartesian_product
from pandas.core.index import Index, _get_objs_combined_axis
from pandas.core.index import Index, MultiIndex, _get_objs_combined_axis
from pandas.compat import range, lrange, zip
from pandas import compat
import pandas.core.common as com
Expand Down Expand Up @@ -369,6 +370,30 @@ def _convert_by(by):
return by


@Substitution('\ndata : DataFrame')
@Appender(_shared_docs['pivot'], indents=1)
def pivot(data, index=None, columns=None, values=None):
if values is None:
cols = [columns] if index is None else [index, columns]
append = index is None
indexed = data.set_index(cols, append=append)
else:
if index is None:
index = data.index
else:
index = data[index]
index = MultiIndex.from_arrays([index, data[columns]])

if is_list_like(values) and not isinstance(values, tuple):
# Exclude tuple because it is seen as a single column name
indexed = data._constructor(data[values].values, index=index,
columns=values)
else:
indexed = data._constructor_sliced(data[values].values,
index=index)
return indexed.unstack(columns)


def crosstab(index, columns, values=None, rownames=None, colnames=None,
aggfunc=None, margins=False, margins_name='All', dropna=True,
normalize=False):
Expand Down Expand Up @@ -598,3 +623,68 @@ def _get_names(arrs, names, prefix='row'):
names = list(names)

return names


def _pivot_simple(index, columns, values):
"""
Produce 'pivot' table based on 3 columns of this DataFrame.
Uses unique values from index / columns and fills with values.
Parameters
----------
index : ndarray
Labels to use to make new frame's index
columns : ndarray
Labels to use to make new frame's columns
values : ndarray
Values to use for populating new frame's values
Notes
-----
Obviously, all 3 of the input arguments must have the same length
This is ONLY used for testing in pandas/test/test_panel.py
Returns
-------
DataFrame
See also
--------
DataFrame.pivot_table : generalization of pivot that can handle
duplicate values for one index/column pair
"""
if (len(index) != len(columns)) or (len(columns) != len(values)):
raise AssertionError('Length of index, columns, and values must be the'
' same')
if len(index) == 0:
return DataFrame(index=[])
hindex = MultiIndex.from_arrays([index, columns])
series = Series(values.ravel(), index=hindex)
series = series.sort_index(level=0)
return series.unstack()


def _slow_pivot(index, columns, values):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we are using this anywhere but the test_panel test? if so I would just blow it away & remove the test

"""
Produce 'pivot' table based on 3 columns of this DataFrame.
Uses unique values from index / columns and fills with values.

Parameters
----------
index : string or object
Column name to use to make new frame's index
columns : string or object
Column name to use to make new frame's columns
values : string or object
Column name to use for populating new frame's values

Could benefit from some Cython here.

Note
----
This is ONLY used for testing in pandas/test/test_panel.py
"""
tree = {}
for i, (idx, col) in enumerate(zip(index, columns)):
if col not in tree:
tree[col] = {}
branch = tree[col]
branch[idx] = values[i]

return DataFrame(tree)
91 changes: 0 additions & 91 deletions pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,97 +383,6 @@ def _unstack_multiple(data, clocs, fill_value=None):
return unstacked


def pivot(self, index=None, columns=None, values=None):
"""
See DataFrame.pivot
"""
if values is None:
cols = [columns] if index is None else [index, columns]
append = index is None
indexed = self.set_index(cols, append=append)
else:
if index is None:
index = self.index
else:
index = self[index]
index = MultiIndex.from_arrays([index, self[columns]])

if is_list_like(values) and not isinstance(values, tuple):
# Exclude tuple because it is seen as a single column name
indexed = self._constructor(self[values].values, index=index,
columns=values)
else:
indexed = self._constructor_sliced(self[values].values,
index=index)
return indexed.unstack(columns)


def pivot_simple(index, columns, values):
"""
Produce 'pivot' table based on 3 columns of this DataFrame.
Uses unique values from index / columns and fills with values.

Parameters
----------
index : ndarray
Labels to use to make new frame's index
columns : ndarray
Labels to use to make new frame's columns
values : ndarray
Values to use for populating new frame's values

Notes
-----
Obviously, all 3 of the input arguments must have the same length

Returns
-------
DataFrame

See also
--------
DataFrame.pivot_table : generalization of pivot that can handle
duplicate values for one index/column pair
"""
if (len(index) != len(columns)) or (len(columns) != len(values)):
raise AssertionError('Length of index, columns, and values must be the'
' same')

if len(index) == 0:
return DataFrame(index=[])

hindex = MultiIndex.from_arrays([index, columns])
series = Series(values.ravel(), index=hindex)
series = series.sort_index(level=0)
return series.unstack()


def _slow_pivot(index, columns, values):
"""
Produce 'pivot' table based on 3 columns of this DataFrame.
Uses unique values from index / columns and fills with values.

Parameters
----------
index : string or object
Column name to use to make new frame's index
columns : string or object
Column name to use to make new frame's columns
values : string or object
Column name to use for populating new frame's values

Could benefit from some Cython here.
"""
tree = {}
for i, (idx, col) in enumerate(zip(index, columns)):
if col not in tree:
tree[col] = {}
branch = tree[col]
branch[idx] = values[i]

return DataFrame(tree)


def unstack(obj, level, fill_value=None):
if isinstance(level, (tuple, list)):
if len(level) != 1:
Expand Down
Loading