From 8918e6078c249900f22ce42db5cce1688ce32bd0 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 9 Mar 2020 11:41:57 -0700 Subject: [PATCH 1/4] BUG: pivot_table losing tz --- pandas/core/indexes/multi.py | 1 + pandas/core/reshape/util.py | 64 ++++++++++++++++++++++++++---- pandas/tests/reshape/test_pivot.py | 8 ++++ pandas/tests/reshape/test_util.py | 16 ++++++++ 4 files changed, 82 insertions(+), 7 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index c1efa512f326a..81527fda6d276 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -563,6 +563,7 @@ def from_product(cls, iterables, sortorder=None, names=lib.no_default): if names is lib.no_default: names = [getattr(it, "name", None) for it in iterables] + # codes are all ndarrays, so cartesian_product is lossless codes = cartesian_product(codes) return MultiIndex(levels, codes, sortorder=sortorder, names=names) diff --git a/pandas/core/reshape/util.py b/pandas/core/reshape/util.py index d8652c9b4fac9..03398ce18ce2c 100644 --- a/pandas/core/reshape/util.py +++ b/pandas/core/reshape/util.py @@ -1,8 +1,9 @@ import numpy as np from pandas.core.dtypes.common import is_list_like +from pandas.core.dtypes.generic import ABCCategorical -import pandas.core.common as com +from pandas.core.indexes.api import Index, IntervalIndex def cartesian_product(X): @@ -51,9 +52,58 @@ def cartesian_product(X): # if any factor is empty, the cartesian product is empty b = np.zeros_like(cumprodX) - return [ - np.tile( - np.repeat(np.asarray(com.values_from_object(x)), b[i]), np.product(a[i]) - ) - for i, x in enumerate(X) - ] + return [_tile_compat(np.repeat(x, b[i]), np.product(a[i])) for i, x in enumerate(X)] + + +def _broadcast_tile(arr: np.ndarray, num: int) -> np.ndarray: + """ + Emulate np.tile but using views instead of copies. + """ + shape = (len(arr), num) + middle = arr.reshape(len(arr), 1) + new_arr = np.broadcast_to(middle, shape) + + # Note: doing `ravel` gives us the wrong order + return new_arr.reshape(-1, order="F") + + +def _tile_compat(arr, num: int): + """ + Index compat for np.tile. + + Notes + ----- + Does not support multi-dimensional `num`. + """ + if isinstance(arr, np.ndarray): + return _broadcast_tile(arr, num) + + # Otherwise we have an Index + values = arr._data + + if isinstance(values, np.ndarray): + result = _broadcast_tile(values, num) + return type(arr)._simple_new(result, name=arr.name) + + elif isinstance(values, ABCCategorical): + codes = _broadcast_tile(values.codes, num) + result = type(values).from_codes(codes, dtype=values.dtype) + return type(arr)._simple_new(result, name=arr.name) + + elif isinstance(arr, IntervalIndex): + new_left = _tile_compat(values.left, num) + new_right = _tile_compat(values.right, num) + result = type(values).from_arrays(new_left, new_right, closed=values.closed) + return type(arr)._simple_new(result, name=arr.name) + + elif isinstance(values._data, np.ndarray): + # DatetimeIndex, TimedeltaIndex, PeriodIndex + data = _broadcast_tile(values._data, num) + result = type(values)._simple_new(data, dtype=values.dtype) + return type(arr)._simple_new(result, name=arr.name) + + else: + # As of now this just leaves RangeIndex, which cannot + # use type(self)._simple_new + result = _broadcast_tile(values, num) + return Index(result, name=arr.name) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index e09a2a7907177..66d9d685cef6a 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1026,6 +1026,14 @@ def test_pivot_table_multiindex_only(self, cols): tm.assert_frame_equal(result, expected) + def test_pivot_table_retains_tz(self): + dti = date_range("2016-01-01", periods=3, tz="Europe/Amsterdam") + df = DataFrame({"A": np.random.randn(3), "B": np.random.randn(3), "C": dti}) + result = df.pivot_table(index=["B", "C"], dropna=False) + + # check tz retention + assert result.index.levels[1].equals(dti) + def test_pivot_integer_columns(self): # caused by upstream bug in unstack diff --git a/pandas/tests/reshape/test_util.py b/pandas/tests/reshape/test_util.py index cd518dda4edbf..9d074b5ade425 100644 --- a/pandas/tests/reshape/test_util.py +++ b/pandas/tests/reshape/test_util.py @@ -25,6 +25,22 @@ def test_datetimeindex(self): tm.assert_index_equal(result1, expected1) tm.assert_index_equal(result2, expected2) + def test_tzaware_retained(self): + x = date_range("2000-01-01", periods=2, tz="US/Pacific") + y = np.array([3, 4]) + result1, result2 = cartesian_product([x, y]) + + expected = x.repeat(2) + tm.assert_index_equal(result1, expected) + + def test_tzaware_retained_categorical(self): + x = date_range("2000-01-01", periods=2, tz="US/Pacific").astype("category") + y = np.array([3, 4]) + result1, result2 = cartesian_product([x, y]) + + expected = x.repeat(2) + tm.assert_index_equal(result1, expected) + def test_empty(self): # product of empty factors X = [[], [0, 1], []] From f561dd962abf6eb19cbf6794509701ebfb738eae Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 12 Mar 2020 08:56:08 -0700 Subject: [PATCH 2/4] Simpler implementation --- pandas/core/reshape/util.py | 47 +++---------------------------------- 1 file changed, 3 insertions(+), 44 deletions(-) diff --git a/pandas/core/reshape/util.py b/pandas/core/reshape/util.py index 03398ce18ce2c..7abb14303f8cc 100644 --- a/pandas/core/reshape/util.py +++ b/pandas/core/reshape/util.py @@ -1,9 +1,6 @@ import numpy as np from pandas.core.dtypes.common import is_list_like -from pandas.core.dtypes.generic import ABCCategorical - -from pandas.core.indexes.api import Index, IntervalIndex def cartesian_product(X): @@ -55,18 +52,6 @@ def cartesian_product(X): return [_tile_compat(np.repeat(x, b[i]), np.product(a[i])) for i, x in enumerate(X)] -def _broadcast_tile(arr: np.ndarray, num: int) -> np.ndarray: - """ - Emulate np.tile but using views instead of copies. - """ - shape = (len(arr), num) - middle = arr.reshape(len(arr), 1) - new_arr = np.broadcast_to(middle, shape) - - # Note: doing `ravel` gives us the wrong order - return new_arr.reshape(-1, order="F") - - def _tile_compat(arr, num: int): """ Index compat for np.tile. @@ -76,34 +61,8 @@ def _tile_compat(arr, num: int): Does not support multi-dimensional `num`. """ if isinstance(arr, np.ndarray): - return _broadcast_tile(arr, num) + return np.tile(arr, num) # Otherwise we have an Index - values = arr._data - - if isinstance(values, np.ndarray): - result = _broadcast_tile(values, num) - return type(arr)._simple_new(result, name=arr.name) - - elif isinstance(values, ABCCategorical): - codes = _broadcast_tile(values.codes, num) - result = type(values).from_codes(codes, dtype=values.dtype) - return type(arr)._simple_new(result, name=arr.name) - - elif isinstance(arr, IntervalIndex): - new_left = _tile_compat(values.left, num) - new_right = _tile_compat(values.right, num) - result = type(values).from_arrays(new_left, new_right, closed=values.closed) - return type(arr)._simple_new(result, name=arr.name) - - elif isinstance(values._data, np.ndarray): - # DatetimeIndex, TimedeltaIndex, PeriodIndex - data = _broadcast_tile(values._data, num) - result = type(values)._simple_new(data, dtype=values.dtype) - return type(arr)._simple_new(result, name=arr.name) - - else: - # As of now this just leaves RangeIndex, which cannot - # use type(self)._simple_new - result = _broadcast_tile(values, num) - return Index(result, name=arr.name) + taker = np.tile(np.arange(len(arr)), num) + return arr.take(taker) From f7607601ae6c79707d13724c2bf14692733963c6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 14 Mar 2020 09:56:17 -0700 Subject: [PATCH 3/4] whatsnew --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 48eff0543ad4d..dc66c4cd5ee3b 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -335,6 +335,7 @@ Reshaping - Bug in :func:`concat` where the resulting indices are not copied when ``copy=True`` (:issue:`29879`) - :meth:`Series.append` will now raise a ``TypeError`` when passed a DataFrame or a sequence containing Dataframe (:issue:`31413`) - :meth:`DataFrame.replace` and :meth:`Series.replace` will raise a ``TypeError`` if ``to_replace`` is not an expected type. Previously the ``replace`` would fail silently (:issue:`18634`) +- Bug in :meth:`DataFrame.pivot_table` losing timezone information when creating a :class:`MultiIndex` level from a column with timezone-aware dtype (:issue:`32558`) Sparse From 3662259b612475746d259f45ddf5a109f37c1963 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 14 Mar 2020 12:15:50 -0700 Subject: [PATCH 4/4] dummy to force CI