Skip to content

Commit 9d20b61

Browse files
committed
try adding frame
1 parent c426627 commit 9d20b61

File tree

5 files changed

+181
-10
lines changed

5 files changed

+181
-10
lines changed

doc/source/reference/frame.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,7 @@ Reshaping, sorting, transposing
239239
DataFrame.unstack
240240
DataFrame.swapaxes
241241
DataFrame.melt
242+
DataFrame.explode
242243
DataFrame.squeeze
243244
DataFrame.to_xarray
244245
DataFrame.T

pandas/core/frame.py

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import itertools
1616
import sys
1717
from textwrap import dedent
18-
from typing import FrozenSet, List, Optional, Set, Type, Union
18+
from typing import FrozenSet, Iterable, List, Optional, Set, Type, Union
1919
import warnings
2020

2121
import numpy as np
@@ -6237,6 +6237,78 @@ def stack(self, level=-1, dropna=True):
62376237
else:
62386238
return stack(self, level, dropna=dropna)
62396239

6240+
def explode(self, subset: Iterable) -> "DataFrame":
6241+
"""
6242+
Create new DataFrame expanding a list-like columns.
6243+
6244+
.. versionadded:: 0.25.0
6245+
6246+
Parameters
6247+
----------
6248+
subset : list-like
6249+
6250+
Returns
6251+
-------
6252+
DataFrame
6253+
Exploded lists to rows of the subset columns; index will be duplicated for these rows.
6254+
6255+
Raises
6256+
------
6257+
ValueError :
6258+
if columns & subset are not unique.
6259+
ValueError :
6260+
subset must be list-like
6261+
6262+
See Also
6263+
--------
6264+
Series.str.split : Split string values on specified separator.
6265+
Series.unstack : Unstack, a.k.a. pivot, Series with MultiIndex to produce DataFrame.
6266+
DataFrame.melt : Unpivot a DataFrame from wide format to long format
6267+
Series.explode : Explode a DataFrame from list-like columns to long format.
6268+
6269+
Notes
6270+
-----
6271+
This routine will explode list-likes including lists, tuples, Series, and np.ndarray.
6272+
The result dtype of the subset rows will be object.
6273+
Scalars will be returned unchanged.
6274+
Empty list-likes will result in a np.nan for that row.
6275+
6276+
Examples
6277+
--------
6278+
In [1]: df = pd.DataFrame({'A': [[1, 2, 3], 'foo', [], [3, 4]], 'B': 1})
6279+
6280+
In [3]: df.explode()
6281+
Out[3]:
6282+
0 1
6283+
0 2
6284+
0 3
6285+
1 foo
6286+
2 NaN
6287+
3 3
6288+
3 4
6289+
dtype: object
6290+
"""
6291+
6292+
if not is_list_like(subset):
6293+
raise ValueError("subset must be a list-like")
6294+
if not Index(subset).is_unique:
6295+
raise ValueError("subset must be unique")
6296+
if not self.columns.is_unique:
6297+
raise ValueError("columns must be unique")
6298+
6299+
results = [self[s].explode() for s in subset]
6300+
result = self.drop(subset, axis=1)
6301+
6302+
# recursive merge
6303+
from pandas.core.reshape.merge import merge
6304+
6305+
def merger(left, right):
6306+
return merge(left, right, left_index=True, right_index=True)
6307+
6308+
return functools.reduce(merger, [result] + results).reindex(
6309+
columns=self.columns, copy=False
6310+
)
6311+
62406312
def unstack(self, level=-1, fill_value=None):
62416313
"""
62426314
Pivot a level of the (necessarily hierarchical) index labels, returning

pandas/core/series.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3652,6 +3652,7 @@ def explode(self) -> "Series":
36523652
Series.str.split : Split string values on specified separator.
36533653
Series.unstack : Unstack, a.k.a. pivot, Series with MultiIndex to produce DataFrame.
36543654
DataFrame.melt : Unpivot a DataFrame from wide format to long format
3655+
DataFrame.explode : Explode a DataFrame from list-like columns to long format.
36553656
36563657
Notes
36573658
-----

pandas/tests/frame/test_explode.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
import numpy as np
2+
import pytest
3+
4+
import pandas as pd
5+
from pandas.util import testing as tm
6+
7+
8+
def test_error():
9+
df = pd.DataFrame(
10+
{"A": pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd")), "B": 1}
11+
)
12+
df.columns = list("AA")
13+
with pytest.raises(ValueError):
14+
df.explode(subset=list("AA"))
15+
16+
17+
def test_basic():
18+
df = pd.DataFrame(
19+
{"A": pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd")), "B": 1}
20+
)
21+
result = df.explode(subset=["A"])
22+
expected = pd.DataFrame(
23+
{
24+
"A": pd.Series(
25+
[0, 1, 2, np.nan, np.nan, 3, 4], index=list("aaabcdd"), dtype=object
26+
),
27+
"B": 1,
28+
}
29+
)
30+
tm.assert_frame_equal(result, expected)
31+
32+
33+
def test_all_columns():
34+
df = pd.DataFrame(
35+
{"A": pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd")), "B": 1}
36+
)
37+
result = df.explode(subset=["A", "B"])
38+
expected = pd.DataFrame(
39+
{
40+
"A": pd.Series(
41+
[0, 1, 2, np.nan, np.nan, 3, 4], index=list("aaabcdd"), dtype=object
42+
),
43+
"B": 1,
44+
}
45+
)
46+
tm.assert_frame_equal(result, expected)
47+
48+
49+
def test_multiple_columns():
50+
df = pd.DataFrame(
51+
{
52+
"A": pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd")),
53+
"B": pd.Series([[0, 1, 2], np.nan, np.nan, 3], index=list("abcd")),
54+
}
55+
)
56+
result = df.explode(subset=["A", "B"])
57+
expected = pd.DataFrame(
58+
{
59+
"A": [0, 0, 0, 1, 1, 1, 2, 2, 2, np.nan, np.nan, 3, 4],
60+
"B": [0, 1, 2, 0, 1, 2, 0, 1, 2, np.nan, np.nan, 3, 3],
61+
},
62+
dtype=object,
63+
index=list("aaaaaaaaabcdd"),
64+
)
65+
tm.assert_frame_equal(result, expected)
66+
67+
68+
def test_usecase():
69+
# explode a single column
70+
# gh-10511
71+
df = pd.DataFrame(
72+
[[11, range(5), 10], [22, range(3), 20]], columns=["A", "B", "C"]
73+
).set_index("C")
74+
result = df.explode(["B"])
75+
76+
expected = pd.DataFrame(
77+
{
78+
"A": [11, 11, 11, 11, 11, 22, 22, 22],
79+
"B": np.array([0, 1, 2, 3, 4, 0, 1, 2], dtype=object),
80+
"C": [10, 10, 10, 10, 10, 20, 20, 20],
81+
},
82+
columns=list("ABC"),
83+
).set_index("C")
84+
85+
tm.assert_frame_equal(result, expected)
86+
87+
# gh-8517
88+
df = pd.DataFrame(
89+
[["2014-01-01", "Alice", "A B"], ["2014-01-02", "Bob", "C D"]],
90+
columns=["dt", "name", "text"],
91+
)
92+
result = df.assign(text=df.text.str.split(" ")).explode(["text"])
93+
expected = pd.DataFrame(
94+
[
95+
["2014-01-01", "Alice", "A"],
96+
["2014-01-01", "Alice", "B"],
97+
["2014-01-02", "Bob", "C"],
98+
["2014-01-02", "Bob", "D"],
99+
],
100+
columns=["dt", "name", "text"],
101+
index=[0, 0, 1, 1],
102+
)
103+
tm.assert_frame_equal(result, expected)

pandas/tests/series/test_explode.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,10 @@
66

77

88
def test_basic():
9-
s = pd.Series([[0, 1, 2], np.nan, [], (3, 4)],
10-
index=list('abcd'),
11-
name="foo")
9+
s = pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd"), name="foo")
1210
result = s.explode()
1311
expected = pd.Series(
14-
[0, 1, 2, np.nan, np.nan, 3, 4],
15-
index=list('aaabcdd'),
16-
dtype=object,
17-
name="foo",
12+
[0, 1, 2, np.nan, np.nan, 3, 4], index=list("aaabcdd"), dtype=object, name="foo"
1813
)
1914
tm.assert_series_equal(result, expected)
2015

@@ -43,8 +38,7 @@ def test_empty():
4338
def test_nested_lists():
4439
s = pd.Series([[[1, 2, 3]], [1, 2], 1])
4540
result = s.explode()
46-
expected = pd.Series([[1, 2, 3], 1, 2, 1],
47-
index=[0, 1, 1, 2])
41+
expected = pd.Series([[1, 2, 3], 1, 2, 1], index=[0, 1, 1, 2])
4842
tm.assert_series_equal(result, expected)
4943

5044

0 commit comments

Comments
 (0)