pandas-dev · jreback · Dec 28, 2021 · Dec 28, 2021 · Dec 28, 2021 · Dec 28, 2021
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -364,10 +364,29 @@ second column is instead renamed to ``a.2``.
 
     res
 
-.. _whatsnew_140.notable_bug_fixes.notable_bug_fix3:
+.. _whatsnew_140.notable_bug_fixes.unstack_pivot_int32_limit:
 
-notable_bug_fix3
-^^^^^^^^^^^^^^^^
+unstack and pivot_table no longer raises ValueError for result that would exceed int32 limit
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Previously :meth:`DataFrame.pivot_table` and :meth:`DataFrame.unstack` would raise a ``ValueError`` if the operation
+could produce a result with more than ``2**31 - 1`` elements. This operation now raises a :class:`errors.PerformanceWarning`
+instead (:issue:`26314`).
+
+*Previous behavior*:
+
+.. code-block:: ipython
+
+    In [3]: df = DataFrame({"ind1": np.arange(2 ** 16), "ind2": np.arange(2 ** 16), "count": 0})
+    In [4]: df.pivot_table(index="ind1", columns="ind2", values="count", aggfunc="count")
+    ValueError: Unstacked DataFrame is too big, causing int32 overflow
+
+*New behavior*:
+
+.. code-block:: python
+
+    In [4]: df.pivot_table(index="ind1", columns="ind2", values="count", aggfunc="count")
+    PerformanceWarning: The following operation may generate 4294967296 cells in the resulting pandas object.
 
 .. ---------------------------------------------------------------------------
 

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
@@ -2,6 +2,7 @@
 
 import itertools
 from typing import TYPE_CHECKING
+import warnings
 
 import numpy as np
 
@@ -11,6 +12,7 @@
     Dtype,
     npt,
 )
+from pandas.errors import PerformanceWarning
 from pandas.util._decorators import cache_readonly
 
 from pandas.core.dtypes.cast import maybe_promote
@@ -125,10 +127,15 @@ def __init__(self, index: MultiIndex, level=-1, constructor=None):
         num_columns = self.removed_level.size
 
         # GH20601: This forces an overflow if the number of cells is too high.
-        num_cells = np.multiply(num_rows, num_columns, dtype=np.int32)
-
-        if num_rows > 0 and num_columns > 0 and num_cells <= 0:
-            raise ValueError("Unstacked DataFrame is too big, causing int32 overflow")
+        num_cells = num_rows * num_columns
+
+        # GH 26314: Previous ValueError raised was too restrictive for many users.
+        if num_cells > np.iinfo(np.int32).max:
+            warnings.warn(
+                f"The following operation may generate {num_cells} cells "
+                f"in the resulting pandas object.",
+                PerformanceWarning,
+            )
 
         self._make_selectors()
 

diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py
@@ -5,6 +5,8 @@
 import numpy as np
 import pytest
 
+from pandas.errors import PerformanceWarning
+
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -1819,11 +1821,17 @@ def test_unstack_unobserved_keys(self):
     @pytest.mark.slow
     def test_unstack_number_of_levels_larger_than_int32(self):
         # GH#20601
+        # GH 26314: Change ValueError to PerformanceWarning
         df = DataFrame(
             np.random.randn(2 ** 16, 2), index=[np.arange(2 ** 16), np.arange(2 ** 16)]
         )
-        with pytest.raises(ValueError, match="int32 overflow"):
-            df.unstack()
+        msg = "The following operation may generate"
+        with tm.assert_produces_warning(PerformanceWarning, match=msg):
+            try:
+                df.unstack()
+            except MemoryError:
+                # Just checking the warning
+                return
 
     def test_stack_order_with_unsorted_levels(self):
         # GH#16323

diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
@@ -8,6 +8,8 @@
 import numpy as np
 import pytest
 
+from pandas.errors import PerformanceWarning
+
 import pandas as pd
 from pandas import (
     Categorical,
@@ -1991,15 +1993,20 @@ def test_pivot_string_func_vs_func(self, f, f_numpy):
     @pytest.mark.slow
     def test_pivot_number_of_levels_larger_than_int32(self):
         # GH 20601
+        # GH 26314: Change ValueError to PerformanceWarning
         df = DataFrame(
             {"ind1": np.arange(2 ** 16), "ind2": np.arange(2 ** 16), "count": 0}
         )
 
-        msg = "Unstacked DataFrame is too big, causing int32 overflow"
-        with pytest.raises(ValueError, match=msg):
-            df.pivot_table(
-                index="ind1", columns="ind2", values="count", aggfunc="count"
-            )
+        msg = "The following operation may generate"
+        with tm.assert_produces_warning(PerformanceWarning, match=msg):
+            try:
+                df.pivot_table(
+                    index="ind1", columns="ind2", values="count", aggfunc="count"
+                )
+            except MemoryError:
+                # Just checking the warning
+                return
 
     def test_pivot_table_aggfunc_dropna(self, dropna):
         # GH 22159