Rewrite

Marco Gorelli · Marco Gorelli · commit 8865cad2796f · 2019-08-20T09:41:47.000+01:00
Syntax: python explode.py infile template [range]

The template argument is used to construct the names of the
individual frame files.  The frames are numbered file001.ext,
file002.ext, etc.  You can insert %d to control the placement
and syntax of the frame number.

The optional range argument specifies which frames to extract.
You can give one or more ranges like 1-10, 5, -15 etc.  If
omitted, all frames are extracted. method of frame so it doesn't require exploding a series twice
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2096,7 +2096,7 @@ def to_stata(
             data_label=data_label,
             write_index=write_index,
             variable_labels=variable_labels,
-            **kwargs
+            **kwargs,
         )
         writer.write_file()
 
@@ -2122,7 +2122,7 @@ def to_parquet(
         compression="snappy",
         index=None,
         partition_cols=None,
-        **kwargs
+        **kwargs,
     ):
         """
         Write a DataFrame to the binary parquet format.
@@ -2198,7 +2198,7 @@ def to_parquet(
             compression=compression,
             index=index,
             partition_cols=partition_cols,
-            **kwargs
+            **kwargs,
         )
 
     @Substitution(
@@ -4172,7 +4172,7 @@ def fillna(
         inplace=False,
         limit=None,
         downcast=None,
-        **kwargs
+        **kwargs,
     ):
         return super().fillna(
             value=value,
@@ -4181,7 +4181,7 @@ def fillna(
             inplace=inplace,
             limit=limit,
             downcast=downcast,
-            **kwargs
+            **kwargs,
         )
 
     @Appender(_shared_docs["replace"] % _shared_doc_kwargs)
@@ -6237,15 +6237,31 @@ def explode(self, column: Union[str, Tuple]) -> "DataFrame":
         if not self.columns.is_unique:
             raise ValueError("columns must be unique")
 
-        result = self[column].explode()
-        return (
+        if isinstance(self.index, MultiIndex):
+            index_names = [
+                f"level_{num}" if val is None else val
+                for num, val in enumerate(self.index.names)
+            ]
+        else:
+            index_names = [i if i else "index" for i in [self.index.name]]
+
+        column_with_index = self[column].reset_index()
+
+        result = (
             self.drop([column], axis=1)
-            .reset_index(drop=True)
-            .join(self[column].reset_index(drop=True).explode())
+            .reset_index()
+            .join(column_with_index[column].explode())
+            .set_index(index_names)
             .reindex(columns=self.columns, copy=False)
-            .set_index(result.index)
         )
 
+        if isinstance(self.index, MultiIndex):
+            result.index.names = self.index.names
+        else:
+            result.index.name = self.index.name
+
+        return result
+
     def unstack(self, level=-1, fill_value=None):
         """
         Pivot a level of the (necessarily hierarchical) index labels, returning
@@ -6617,7 +6633,7 @@ def _gotitem(
         see_also=_agg_summary_and_see_also_doc,
         examples=_agg_examples_doc,
         versionadded="\n.. versionadded:: 0.20.0\n",
-        **_shared_doc_kwargs
+        **_shared_doc_kwargs,
     )
     @Appender(_shared_docs["aggregate"])
     def aggregate(self, func, axis=0, *args, **kwargs):
@@ -6659,7 +6675,7 @@ def apply(
         reduce=None,
         result_type=None,
         args=(),
-        **kwds
+        **kwds,
     ):
         """
         Apply a function along an axis of the DataFrame.
diff --git a/pandas/tests/frame/test_explode.py b/pandas/tests/frame/test_explode.py
@@ -137,6 +137,19 @@ def test_usecase():
                 dtype=object,
             ),
         ),
+        (
+            pd.DataFrame(
+                {"col": [[1, 2], [3, 4]], "other_col": ["a", "b"], "my_index": [0, 0]}
+            ).set_index("my_index"),
+            pd.DataFrame(
+                {
+                    "col": [1, 2, 3, 4],
+                    "other_col": ["a", "a", "b", "b"],
+                    "my_index": [0, 0, 0, 0],
+                },
+                dtype=object,
+            ).set_index("my_index"),
+        ),
     ],
 )
 def test_duplicate_index(df, expected):