Skip to content

Commit 3667b76

Browse files
navreetaftjreback
authored andcommitted
PERF: improved performance of pd.concat, by not forcing C ordering when testing for isnull, #119678
1 parent 44e4c96 commit 3667b76

File tree

4 files changed

+58
-4
lines changed

4 files changed

+58
-4
lines changed

asv_bench/benchmarks/join_merge.py

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,59 @@ def time_concat_small_frames(self):
9191
concat(([self.df] * 1000))
9292

9393

94+
class concat_panels(object):
95+
goal_time = 0.2
96+
97+
def setup(self):
98+
dataset = np.zeros((10000, 200, 2), dtype=np.float32)
99+
self.panels_f = [pd.Panel(np.copy(dataset, order='F'))
100+
for i in range(20)]
101+
self.panels_c = [pd.Panel(np.copy(dataset, order='C'))
102+
for i in range(20)]
103+
104+
def time_concat_c_ordered_axis0(self):
105+
concat(self.panels_c, axis=0, ignore_index=True)
106+
107+
def time_concat_f_ordered_axis0(self):
108+
concat(self.panels_f, axis=0, ignore_index=True)
109+
110+
def time_concat_c_ordered_axis1(self):
111+
concat(self.panels_c, axis=1, ignore_index=True)
112+
113+
def time_concat_f_ordered_axis1(self):
114+
concat(self.panels_f, axis=1, ignore_index=True)
115+
116+
def time_concat_c_ordered_axis2(self):
117+
concat(self.panels_c, axis=2, ignore_index=True)
118+
119+
def time_concat_f_ordered_axis2(self):
120+
concat(self.panels_f, axis=2, ignore_index=True)
121+
122+
123+
class concat_dataframes(object):
124+
goal_time = 0.2
125+
126+
def setup(self):
127+
dataset = np.zeros((10000, 200), dtype=np.float32)
128+
129+
self.frames_f = [pd.DataFrame(np.copy(dataset, order='F'))
130+
for i in range(20)]
131+
self.frames_c = [pd.DataFrame(np.copy(dataset, order='C'))
132+
for i in range(20)]
133+
134+
def time_concat_c_ordered_axis0(self):
135+
concat(self.frames_c, axis=0, ignore_index=True)
136+
137+
def time_concat_f_ordered_axis0(self):
138+
concat(self.frames_f, axis=0, ignore_index=True)
139+
140+
def time_concat_c_ordered_axis1(self):
141+
concat(self.frames_c, axis=1, ignore_index=True)
142+
143+
def time_concat_f_ordered_axis1(self):
144+
concat(self.frames_f, axis=1, ignore_index=True)
145+
146+
94147
class i8merge(object):
95148
goal_time = 0.2
96149

@@ -356,4 +409,4 @@ def time_series_align_left_monotonic(self):
356409

357410
def sample(self, values, k):
358411
self.sampler = np.random.permutation(len(values))
359-
return values.take(self.sampler[:k])
412+
return values.take(self.sampler[:k])

asv_bench/benchmarks/plotting.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ class plot_andrews_curves(object):
2424
def setup(self):
2525
self.N = 500
2626
self.M = 10
27-
data_dict = {x: np.random.randn(self.N) for x in range(self.M)}
28-
data_dict["Name"] = ["A"] * self.N
27+
data_dict = {x: np.random.randn(self.N) for x in range(self.M)}
28+
data_dict["Name"] = ["A"] * self.N
2929
self.df = DataFrame(data_dict)
3030

3131
def time_plot_andrews_curves(self):

doc/source/whatsnew/v0.18.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -413,6 +413,7 @@ Performance Improvements
413413
- Improved performance of ``andrews_curves`` (:issue:`11534`)
414414

415415
- Improved huge ``DatetimeIndex``, ``PeriodIndex`` and ``TimedeltaIndex``'s ops performance including ``NaT`` (:issue:`10277`)
416+
- Improved performance of ``pandas.concat`` (:issue:`11958`)
416417

417418

418419

pandas/core/internals.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4769,7 +4769,7 @@ def is_null(self):
47694769
if self.block.is_categorical:
47704770
values_flat = values.categories
47714771
else:
4772-
values_flat = values.ravel()
4772+
values_flat = values.ravel(order='K')
47734773
total_len = values_flat.shape[0]
47744774
chunk_len = max(total_len // 40, 1000)
47754775
for i in range(0, total_len, chunk_len):

0 commit comments

Comments
 (0)