Skip to content

Commit 8865cad

Browse files
author
Marco Gorelli
committed
Rewrite
Syntax: python explode.py infile template [range] The template argument is used to construct the names of the individual frame files. The frames are numbered file001.ext, file002.ext, etc. You can insert %d to control the placement and syntax of the frame number. The optional range argument specifies which frames to extract. You can give one or more ranges like 1-10, 5, -15 etc. If omitted, all frames are extracted. method of frame so it doesn't require exploding a series twice
1 parent d31a765 commit 8865cad

File tree

2 files changed

+41
-12
lines changed

2 files changed

+41
-12
lines changed

pandas/core/frame.py

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2096,7 +2096,7 @@ def to_stata(
20962096
data_label=data_label,
20972097
write_index=write_index,
20982098
variable_labels=variable_labels,
2099-
**kwargs
2099+
**kwargs,
21002100
)
21012101
writer.write_file()
21022102

@@ -2122,7 +2122,7 @@ def to_parquet(
21222122
compression="snappy",
21232123
index=None,
21242124
partition_cols=None,
2125-
**kwargs
2125+
**kwargs,
21262126
):
21272127
"""
21282128
Write a DataFrame to the binary parquet format.
@@ -2198,7 +2198,7 @@ def to_parquet(
21982198
compression=compression,
21992199
index=index,
22002200
partition_cols=partition_cols,
2201-
**kwargs
2201+
**kwargs,
22022202
)
22032203

22042204
@Substitution(
@@ -4172,7 +4172,7 @@ def fillna(
41724172
inplace=False,
41734173
limit=None,
41744174
downcast=None,
4175-
**kwargs
4175+
**kwargs,
41764176
):
41774177
return super().fillna(
41784178
value=value,
@@ -4181,7 +4181,7 @@ def fillna(
41814181
inplace=inplace,
41824182
limit=limit,
41834183
downcast=downcast,
4184-
**kwargs
4184+
**kwargs,
41854185
)
41864186

41874187
@Appender(_shared_docs["replace"] % _shared_doc_kwargs)
@@ -6237,15 +6237,31 @@ def explode(self, column: Union[str, Tuple]) -> "DataFrame":
62376237
if not self.columns.is_unique:
62386238
raise ValueError("columns must be unique")
62396239

6240-
result = self[column].explode()
6241-
return (
6240+
if isinstance(self.index, MultiIndex):
6241+
index_names = [
6242+
f"level_{num}" if val is None else val
6243+
for num, val in enumerate(self.index.names)
6244+
]
6245+
else:
6246+
index_names = [i if i else "index" for i in [self.index.name]]
6247+
6248+
column_with_index = self[column].reset_index()
6249+
6250+
result = (
62426251
self.drop([column], axis=1)
6243-
.reset_index(drop=True)
6244-
.join(self[column].reset_index(drop=True).explode())
6252+
.reset_index()
6253+
.join(column_with_index[column].explode())
6254+
.set_index(index_names)
62456255
.reindex(columns=self.columns, copy=False)
6246-
.set_index(result.index)
62476256
)
62486257

6258+
if isinstance(self.index, MultiIndex):
6259+
result.index.names = self.index.names
6260+
else:
6261+
result.index.name = self.index.name
6262+
6263+
return result
6264+
62496265
def unstack(self, level=-1, fill_value=None):
62506266
"""
62516267
Pivot a level of the (necessarily hierarchical) index labels, returning
@@ -6617,7 +6633,7 @@ def _gotitem(
66176633
see_also=_agg_summary_and_see_also_doc,
66186634
examples=_agg_examples_doc,
66196635
versionadded="\n.. versionadded:: 0.20.0\n",
6620-
**_shared_doc_kwargs
6636+
**_shared_doc_kwargs,
66216637
)
66226638
@Appender(_shared_docs["aggregate"])
66236639
def aggregate(self, func, axis=0, *args, **kwargs):
@@ -6659,7 +6675,7 @@ def apply(
66596675
reduce=None,
66606676
result_type=None,
66616677
args=(),
6662-
**kwds
6678+
**kwds,
66636679
):
66646680
"""
66656681
Apply a function along an axis of the DataFrame.

pandas/tests/frame/test_explode.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,19 @@ def test_usecase():
137137
dtype=object,
138138
),
139139
),
140+
(
141+
pd.DataFrame(
142+
{"col": [[1, 2], [3, 4]], "other_col": ["a", "b"], "my_index": [0, 0]}
143+
).set_index("my_index"),
144+
pd.DataFrame(
145+
{
146+
"col": [1, 2, 3, 4],
147+
"other_col": ["a", "a", "b", "b"],
148+
"my_index": [0, 0, 0, 0],
149+
},
150+
dtype=object,
151+
).set_index("my_index"),
152+
),
140153
],
141154
)
142155
def test_duplicate_index(df, expected):

0 commit comments

Comments
 (0)