From b83151619379435b3a8236d9de378eb5b263768b Mon Sep 17 00:00:00 2001
From: Piotr Jucha <pi.jucha@gmail.com>
Date: Mon, 5 Sep 2016 00:26:37 -0400
Subject: [PATCH] BUG/TST: Empty input arrays in cartesian_product and
 MultiIndex (#12258)

---
 doc/source/whatsnew/v0.19.0.txt    |  1 +
 pandas/core/categorical.py         | 22 ++++++++++-
 pandas/tests/indexes/test_multi.py | 63 ++++++++++++++++++++++++++++++
 pandas/tools/tests/test_util.py    | 23 +++++++++++
 pandas/tools/util.py               | 29 +++++++++++++-
 5 files changed, 135 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
index 454ffc5e5c685..67beb468dce8a 100644
--- a/doc/source/whatsnew/v0.19.0.txt
+++ b/doc/source/whatsnew/v0.19.0.txt
@@ -1471,6 +1471,7 @@ Bug Fixes
 - Bug in ``MultiIndex`` slicing where extra elements were returned when level is non-unique (:issue:`12896`)
 - Bug in ``.str.replace`` does not raise ``TypeError`` for invalid replacement (:issue:`13438`)
 - Bug in ``MultiIndex.from_arrays`` which didn't check for input array lengths matching (:issue:`13599`)
+- Bug in ``cartesian_product`` and ``MultiIndex.from_product`` which may raise with empty input arrays (:issue:`12258`)
 
 
 - Bug in ``pd.read_csv()`` which may cause a segfault or corruption when iterating in large chunks over a stream/file under rare circumstances (:issue:`13703`)
diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index 0a13c8936eeec..6b37a5e2cd202 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -1979,13 +1979,16 @@ def _factorize_from_iterable(values):
 
     Returns
     -------
-    codes : np.array
+    codes : ndarray
     categories : Index
         If `values` has a categorical dtype, then `categories` is
         a CategoricalIndex keeping the categories and order of `values`.
     """
     from pandas.indexes.category import CategoricalIndex
 
+    if not is_list_like(values):
+        raise TypeError("Input must be list-like")
+
     if is_categorical(values):
         if isinstance(values, (ABCCategoricalIndex, ABCSeries)):
             values = values._values
@@ -2003,8 +2006,23 @@ def _factorize_from_iterable(values):
 def _factorize_from_iterables(iterables):
     """
     A higher-level wrapper over `_factorize_from_iterable`.
-    See `_factorize_from_iterable` for more info.
 
     *This is an internal function*
+
+    Parameters
+    ----------
+    iterables : list-like of list-likes
+
+    Returns
+    -------
+    codes_tuple : tuple of ndarrays
+    categories_tuple : tuple of Indexes
+
+    Notes
+    -----
+    See `_factorize_from_iterable` for more info.
     """
+    if len(iterables) == 0:
+        # For consistency, it should return a list of 2 tuples.
+        return [(), ()]
     return lzip(*[_factorize_from_iterable(it) for it in iterables])
diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
index 5248f0775d22f..92061eab61b78 100644
--- a/pandas/tests/indexes/test_multi.py
+++ b/pandas/tests/indexes/test_multi.py
@@ -691,6 +691,32 @@ def test_from_arrays_index_series_categorical(self):
         tm.assert_index_equal(result3.get_level_values(0), idx1)
         tm.assert_index_equal(result3.get_level_values(1), idx2)
 
+    def test_from_arrays_empty(self):
+        # 0 levels
+        with tm.assertRaisesRegexp(
+                ValueError, "Must pass non-zero number of levels/labels"):
+            MultiIndex.from_arrays(arrays=[])
+
+        # 1 level
+        result = MultiIndex.from_arrays(arrays=[[]], names=['A'])
+        expected = Index([], name='A')
+        tm.assert_index_equal(result, expected)
+
+        # N levels
+        for N in [2, 3]:
+            arrays = [[]] * N
+            names = list('ABC')[:N]
+            result = MultiIndex.from_arrays(arrays=arrays, names=names)
+            expected = MultiIndex(levels=[np.array([])] * N, labels=[[]] * N,
+                                  names=names)
+            tm.assert_index_equal(result, expected)
+
+    def test_from_arrays_invalid_input(self):
+        invalid_inputs = [1, [1], [1, 2], [[1], 2],
+                          'a', ['a'], ['a', 'b'], [['a'], 'b']]
+        for i in invalid_inputs:
+            tm.assertRaises(TypeError, MultiIndex.from_arrays, arrays=i)
+
     def test_from_arrays_different_lengths(self):
         # GH13599
         idx1 = [1, 2, 3]
@@ -723,6 +749,43 @@ def test_from_product(self):
         tm.assert_index_equal(result, expected)
         self.assertEqual(result.names, names)
 
+    def test_from_product_empty(self):
+        # 0 levels
+        with tm.assertRaisesRegexp(
+                ValueError, "Must pass non-zero number of levels/labels"):
+            MultiIndex.from_product([])
+
+        # 1 level
+        result = MultiIndex.from_product([[]], names=['A'])
+        expected = pd.Float64Index([], name='A')
+        tm.assert_index_equal(result, expected)
+
+        # 2 levels
+        l1 = [[], ['foo', 'bar', 'baz'], []]
+        l2 = [[], [], ['a', 'b', 'c']]
+        names = ['A', 'B']
+        for first, second in zip(l1, l2):
+            result = MultiIndex.from_product([first, second], names=names)
+            expected = MultiIndex(levels=[np.array(first), np.array(second)],
+                                  labels=[[], []], names=names)
+            tm.assert_index_equal(result, expected)
+
+        # GH12258
+        names = ['A', 'B', 'C']
+        for N in range(4):
+            lvl2 = lrange(N)
+            result = MultiIndex.from_product([[], lvl2, []], names=names)
+            expected = MultiIndex(levels=[np.array(A)
+                                          for A in [[], lvl2, []]],
+                                  labels=[[], [], []], names=names)
+            tm.assert_index_equal(result, expected)
+
+    def test_from_product_invalid_input(self):
+        invalid_inputs = [1, [1], [1, 2], [[1], 2],
+                          'a', ['a'], ['a', 'b'], [['a'], 'b']]
+        for i in invalid_inputs:
+            tm.assertRaises(TypeError, MultiIndex.from_product, iterables=i)
+
     def test_from_product_datetimeindex(self):
         dt_index = date_range('2000-01-01', periods=2)
         mi = pd.MultiIndex.from_product([[1, 2], dt_index])
diff --git a/pandas/tools/tests/test_util.py b/pandas/tools/tests/test_util.py
index d8a98bbb3fd27..8c16308d79a31 100644
--- a/pandas/tools/tests/test_util.py
+++ b/pandas/tools/tests/test_util.py
@@ -34,6 +34,29 @@ def test_datetimeindex(self):
         tm.assert_numpy_array_equal(result1, expected1)
         tm.assert_numpy_array_equal(result2, expected2)
 
+    def test_empty(self):
+        # product of empty factors
+        X = [[], [0, 1], []]
+        Y = [[], [], ['a', 'b', 'c']]
+        for x, y in zip(X, Y):
+            expected1 = np.array([], dtype=np.asarray(x).dtype)
+            expected2 = np.array([], dtype=np.asarray(y).dtype)
+            result1, result2 = cartesian_product([x, y])
+            tm.assert_numpy_array_equal(result1, expected1)
+            tm.assert_numpy_array_equal(result2, expected2)
+
+        # empty product (empty input):
+        result = cartesian_product([])
+        expected = []
+        tm.assert_equal(result, expected)
+
+    def test_invalid_input(self):
+        invalid_inputs = [1, [1], [1, 2], [[1], 2],
+                          'a', ['a'], ['a', 'b'], [['a'], 'b']]
+        msg = "Input must be a list-like of list-likes"
+        for X in invalid_inputs:
+            tm.assertRaisesRegexp(TypeError, msg, cartesian_product, X=X)
+
 
 class TestLocaleUtils(tm.TestCase):
 
diff --git a/pandas/tools/util.py b/pandas/tools/util.py
index b8b28663387cc..fec56328c1721 100644
--- a/pandas/tools/util.py
+++ b/pandas/tools/util.py
@@ -4,6 +4,7 @@
 from pandas.types.common import (is_number,
                                  is_numeric_dtype,
                                  is_datetime_or_timedelta_dtype,
+                                 is_list_like,
                                  _ensure_object)
 from pandas.types.cast import _possibly_downcast_to_dtype
 
@@ -24,13 +25,35 @@ def cartesian_product(X):
     Numpy version of itertools.product or pandas.compat.product.
     Sometimes faster (for large inputs)...
 
+    Parameters
+    ----------
+    X : list-like of list-likes
+
+    Returns
+    -------
+    product : list of ndarrays
+
     Examples
     --------
     >>> cartesian_product([list('ABC'), [1, 2]])
     [array(['A', 'A', 'B', 'B', 'C', 'C'], dtype='|S1'),
     array([1, 2, 1, 2, 1, 2])]
 
+    See also
+    --------
+    itertools.product : Cartesian product of input iterables.  Equivalent to
+        nested for-loops.
+    pandas.compat.product : An alias for itertools.product.
     """
+    msg = "Input must be a list-like of list-likes"
+    if not is_list_like(X):
+        raise TypeError(msg)
+    for x in X:
+        if not is_list_like(x):
+            raise TypeError(msg)
+
+    if len(X) == 0:
+        return []
 
     lenX = np.fromiter((len(x) for x in X), dtype=int)
     cumprodX = np.cumproduct(lenX)
@@ -38,7 +61,11 @@ def cartesian_product(X):
     a = np.roll(cumprodX, 1)
     a[0] = 1
 
-    b = cumprodX[-1] / cumprodX
+    if cumprodX[-1] != 0:
+        b = cumprodX[-1] / cumprodX
+    else:
+        # if any factor is empty, the cartesian product is empty
+        b = np.zeros_like(cumprodX)
 
     return [np.tile(np.repeat(np.asarray(com._values_from_object(x)), b[i]),
                     np.product(a[i]))