From b196878940f6c56d07667b5e987f7e25ad390693 Mon Sep 17 00:00:00 2001 From: David Hoffman Date: Mon, 30 Jan 2017 13:30:19 -0500 Subject: [PATCH 1/5] Fix overflow error in cartesian_product When the numbers in `X` are large it can cause an overflow error on windows machine where the native `int` is 32 bit. Switching to np.intp alleviates this problem. Other fixes would include switching to np.uint32 or np.uint64. --- pandas/tools/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tools/util.py b/pandas/tools/util.py index b50bf9dc448bc..5cdb81a0098d8 100644 --- a/pandas/tools/util.py +++ b/pandas/tools/util.py @@ -55,7 +55,7 @@ def cartesian_product(X): if len(X) == 0: return [] - lenX = np.fromiter((len(x) for x in X), dtype=int) + lenX = np.fromiter((len(x) for x in X), dtype=np.intp) cumprodX = np.cumproduct(lenX) a = np.roll(cumprodX, 1) From 7aeee85546f4fbff257e9e9b2b71c474b5c39f84 Mon Sep 17 00:00:00 2001 From: David Hoffman Date: Mon, 30 Jan 2017 15:26:26 -0500 Subject: [PATCH 2/5] Added tests for large numbers --- pandas/tools/tests/test_util.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/tools/tests/test_util.py b/pandas/tools/tests/test_util.py index f9647721e3c5b..c7de0aa381088 100644 --- a/pandas/tools/tests/test_util.py +++ b/pandas/tools/tests/test_util.py @@ -57,6 +57,15 @@ def test_invalid_input(self): msg = "Input must be a list-like of list-likes" for X in invalid_inputs: tm.assertRaisesRegexp(TypeError, msg, cartesian_product, X=X) + + def test_large_input(self): + # test failure of large inputs on windows OS + X = np.arange(65536) + Y = np.arange(65535) + result1, result2 = cartesian_product([X, Y]) + expected1, expected2 = np.asarray(list(pd.compat.product(X, Y))).T + tm.assert_numpy_array_equal(result1, expected1) + tm.assert_numpy_array_equal(result2, expected2) class TestLocaleUtils(tm.TestCase): From 47a6c6c9e65d24c59612836dddfdc162fefecc6f Mon Sep 17 00:00:00 2001 From: David Hoffman Date: Mon, 30 Jan 2017 16:18:36 -0500 Subject: [PATCH 3/5] Update test so that it will actually run on "normal" machine --- pandas/tools/tests/test_util.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tools/tests/test_util.py b/pandas/tools/tests/test_util.py index c7de0aa381088..1576d3f25efc8 100644 --- a/pandas/tools/tests/test_util.py +++ b/pandas/tools/tests/test_util.py @@ -63,9 +63,9 @@ def test_large_input(self): X = np.arange(65536) Y = np.arange(65535) result1, result2 = cartesian_product([X, Y]) - expected1, expected2 = np.asarray(list(pd.compat.product(X, Y))).T - tm.assert_numpy_array_equal(result1, expected1) - tm.assert_numpy_array_equal(result2, expected2) + expected_size = X.size * Y.size + tm.assert_equal(result1.size, expected_size) + tm.assert_equal(result2.size, expected_size) class TestLocaleUtils(tm.TestCase): From d54583e9ca09fe5e48636abec056dd553a58c0a7 Mon Sep 17 00:00:00 2001 From: David Hoffman Date: Mon, 30 Jan 2017 16:51:49 -0500 Subject: [PATCH 4/5] Remove `test_large_input` because it's too big --- pandas/tools/tests/test_util.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/pandas/tools/tests/test_util.py b/pandas/tools/tests/test_util.py index 1576d3f25efc8..f9647721e3c5b 100644 --- a/pandas/tools/tests/test_util.py +++ b/pandas/tools/tests/test_util.py @@ -57,15 +57,6 @@ def test_invalid_input(self): msg = "Input must be a list-like of list-likes" for X in invalid_inputs: tm.assertRaisesRegexp(TypeError, msg, cartesian_product, X=X) - - def test_large_input(self): - # test failure of large inputs on windows OS - X = np.arange(65536) - Y = np.arange(65535) - result1, result2 = cartesian_product([X, Y]) - expected_size = X.size * Y.size - tm.assert_equal(result1.size, expected_size) - tm.assert_equal(result2.size, expected_size) class TestLocaleUtils(tm.TestCase): From c9c8d5e3f250299e9888da239ac9ea9246c047c1 Mon Sep 17 00:00:00 2001 From: David Hoffman Date: Mon, 30 Jan 2017 16:55:47 -0500 Subject: [PATCH 5/5] Update v0.19.2.txt --- doc/source/whatsnew/v0.19.2.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.19.2.txt b/doc/source/whatsnew/v0.19.2.txt index 722e494c9e614..bf52d1a74e3ea 100644 --- a/doc/source/whatsnew/v0.19.2.txt +++ b/doc/source/whatsnew/v0.19.2.txt @@ -80,3 +80,4 @@ Bug Fixes - Explicit check in ``to_stata`` and ``StataWriter`` for out-of-range values when writing doubles (:issue:`14618`) - Bug in ``.plot(kind='kde')`` which did not drop missing values to generate the KDE Plot, instead generating an empty plot. (:issue:`14821`) - Bug in ``unstack()`` if called with a list of column(s) as an argument, regardless of the dtypes of all columns, they get coerced to ``object`` (:issue:`11847`) +- Fix bug (:issue:`15265`) in ``cartesian_product`` in ``pandas.tools.util`` that caused an uncaught overflow error when using large arguments.