Skip to content

Commit 94f319e

Browse files
committed
remove multi subset support
1 parent dc17ef6 commit 94f319e

File tree

2 files changed

+19
-61
lines changed

2 files changed

+19
-61
lines changed

pandas/core/frame.py

Lines changed: 12 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6237,15 +6237,15 @@ def stack(self, level=-1, dropna=True):
62376237
else:
62386238
return stack(self, level, dropna=dropna)
62396239

6240-
def explode(self, subset: Iterable) -> "DataFrame":
6240+
def explode(self, column: str) -> "DataFrame":
62416241
"""
6242-
Create new DataFrame expanding a list-like columns.
6242+
Create new DataFrame expanding a specified list-like column.
62436243
62446244
.. versionadded:: 0.25.0
62456245
62466246
Parameters
62476247
----------
6248-
subset : list-like
6248+
column : str
62496249
62506250
Returns
62516251
-------
@@ -6256,9 +6256,7 @@ def explode(self, subset: Iterable) -> "DataFrame":
62566256
Raises
62576257
------
62586258
ValueError :
6259-
if columns & subset are not unique.
6260-
ValueError :
6261-
subset must be list-like
6259+
if columns of the frame are not unique.
62626260
62636261
See Also
62646262
--------
@@ -6285,7 +6283,7 @@ def explode(self, subset: Iterable) -> "DataFrame":
62856283
2 [] 1
62866284
3 [3, 4] 1
62876285
6288-
>>> df.explode(['A'])
6286+
>>> df.explode('A')
62896287
A B
62906288
0 1 1
62916289
0 2 1
@@ -6297,24 +6295,16 @@ def explode(self, subset: Iterable) -> "DataFrame":
62976295
62986296
"""
62996297

6300-
if not is_list_like(subset):
6301-
raise ValueError("subset must be a list-like")
6302-
if not Index(subset).is_unique:
6303-
raise ValueError("subset must be unique")
6298+
if not is_scalar(column):
6299+
raise ValueError("column must be a scalar")
63046300
if not self.columns.is_unique:
63056301
raise ValueError("columns must be unique")
63066302

6307-
results = [self[s].explode() for s in subset]
6308-
result = self.drop(subset, axis=1)
6309-
6310-
# recursive merge
6311-
from pandas.core.reshape.merge import merge
6312-
6313-
def merger(left, right):
6314-
return merge(left, right, left_index=True, right_index=True)
6315-
6316-
return functools.reduce(merger, [result] + results).reindex(
6317-
columns=self.columns, copy=False
6303+
result = self[column].explode()
6304+
return (
6305+
self.drop([column], axis=1)
6306+
.join(result)
6307+
.reindex(columns=self.columns, copy=False)
63186308
)
63196309

63206310
def unstack(self, level=-1, fill_value=None):

pandas/tests/frame/test_explode.py

Lines changed: 7 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,19 @@ def test_error():
99
df = pd.DataFrame(
1010
{"A": pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd")), "B": 1}
1111
)
12+
with pytest.raises(ValueError):
13+
df.explode(list("AA"))
14+
1215
df.columns = list("AA")
1316
with pytest.raises(ValueError):
14-
df.explode(subset=list("AA"))
17+
df.explode("A")
1518

1619

1720
def test_basic():
1821
df = pd.DataFrame(
1922
{"A": pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd")), "B": 1}
2023
)
21-
result = df.explode(subset=["A"])
24+
result = df.explode("A")
2225
expected = pd.DataFrame(
2326
{
2427
"A": pd.Series(
@@ -30,48 +33,13 @@ def test_basic():
3033
tm.assert_frame_equal(result, expected)
3134

3235

33-
def test_all_columns():
34-
df = pd.DataFrame(
35-
{"A": pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd")), "B": 1}
36-
)
37-
result = df.explode(subset=["A", "B"])
38-
expected = pd.DataFrame(
39-
{
40-
"A": pd.Series(
41-
[0, 1, 2, np.nan, np.nan, 3, 4], index=list("aaabcdd"), dtype=object
42-
),
43-
"B": 1,
44-
}
45-
)
46-
tm.assert_frame_equal(result, expected)
47-
48-
49-
def test_multiple_columns():
50-
df = pd.DataFrame(
51-
{
52-
"A": pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd")),
53-
"B": pd.Series([[0, 1, 2], np.nan, np.nan, 3], index=list("abcd")),
54-
}
55-
)
56-
result = df.explode(subset=["A", "B"])
57-
expected = pd.DataFrame(
58-
{
59-
"A": [0, 0, 0, 1, 1, 1, 2, 2, 2, np.nan, np.nan, 3, 4],
60-
"B": [0, 1, 2, 0, 1, 2, 0, 1, 2, np.nan, np.nan, 3, 3],
61-
},
62-
dtype=object,
63-
index=list("aaaaaaaaabcdd"),
64-
)
65-
tm.assert_frame_equal(result, expected)
66-
67-
6836
def test_usecase():
6937
# explode a single column
7038
# gh-10511
7139
df = pd.DataFrame(
7240
[[11, range(5), 10], [22, range(3), 20]], columns=["A", "B", "C"]
7341
).set_index("C")
74-
result = df.explode(["B"])
42+
result = df.explode("B")
7543

7644
expected = pd.DataFrame(
7745
{
@@ -89,7 +57,7 @@ def test_usecase():
8957
[["2014-01-01", "Alice", "A B"], ["2014-01-02", "Bob", "C D"]],
9058
columns=["dt", "name", "text"],
9159
)
92-
result = df.assign(text=df.text.str.split(" ")).explode(["text"])
60+
result = df.assign(text=df.text.str.split(" ")).explode("text")
9361
expected = pd.DataFrame(
9462
[
9563
["2014-01-01", "Alice", "A"],

0 commit comments

Comments
 (0)