Skip to content

Commit df39bd7

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into less24024b
2 parents 184f59f + 2a09706 commit df39bd7

File tree

7 files changed

+86
-75
lines changed

7 files changed

+86
-75
lines changed

asv_bench/benchmarks/frame_methods.py

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ def setup(self):
103103
self.df2 = DataFrame(np.random.randn(N * 50, 10))
104104
self.df3 = DataFrame(np.random.randn(N, 5 * N),
105105
columns=['C' + str(c) for c in range(N * 5)])
106+
self.df4 = DataFrame(np.random.randn(N * 1000, 10))
106107

107108
def time_iteritems(self):
108109
# (monitor no-copying behaviour)
@@ -119,10 +120,70 @@ def time_iteritems_indexing(self):
119120
for col in self.df3:
120121
self.df3[col]
121122

123+
def time_itertuples_start(self):
124+
self.df4.itertuples()
125+
126+
def time_itertuples_read_first(self):
127+
next(self.df4.itertuples())
128+
122129
def time_itertuples(self):
123-
for row in self.df2.itertuples():
130+
for row in self.df4.itertuples():
131+
pass
132+
133+
def time_itertuples_to_list(self):
134+
list(self.df4.itertuples())
135+
136+
def mem_itertuples_start(self):
137+
return self.df4.itertuples()
138+
139+
def peakmem_itertuples_start(self):
140+
self.df4.itertuples()
141+
142+
def mem_itertuples_read_first(self):
143+
return next(self.df4.itertuples())
144+
145+
def peakmem_itertuples(self):
146+
for row in self.df4.itertuples():
147+
pass
148+
149+
def mem_itertuples_to_list(self):
150+
return list(self.df4.itertuples())
151+
152+
def peakmem_itertuples_to_list(self):
153+
list(self.df4.itertuples())
154+
155+
def time_itertuples_raw_start(self):
156+
self.df4.itertuples(index=False, name=None)
157+
158+
def time_itertuples_raw_read_first(self):
159+
next(self.df4.itertuples(index=False, name=None))
160+
161+
def time_itertuples_raw_tuples(self):
162+
for row in self.df4.itertuples(index=False, name=None):
124163
pass
125164

165+
def time_itertuples_raw_tuples_to_list(self):
166+
list(self.df4.itertuples(index=False, name=None))
167+
168+
def mem_itertuples_raw_start(self):
169+
return self.df4.itertuples(index=False, name=None)
170+
171+
def peakmem_itertuples_raw_start(self):
172+
self.df4.itertuples(index=False, name=None)
173+
174+
def peakmem_itertuples_raw_read_first(self):
175+
next(self.df4.itertuples(index=False, name=None))
176+
177+
def peakmem_itertuples_raw(self):
178+
for row in self.df4.itertuples(index=False, name=None):
179+
pass
180+
181+
def mem_itertuples_raw_to_list(self):
182+
return list(self.df4.itertuples(index=False, name=None))
183+
184+
def peakmem_itertuples_raw_to_list(self):
185+
list(self.df4.itertuples(index=False, name=None))
186+
126187
def time_iterrows(self):
127188
for row in self.df.iterrows():
128189
pass

doc/source/io.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
.. _io:
22

3+
.. currentmodule:: pandas
4+
5+
36
{{ header }}
47

58
.. ipython:: python

doc/source/whatsnew/v0.24.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1251,6 +1251,8 @@ Performance Improvements
12511251
- Fixed a performance regression on Windows with Python 3.7 of :func:`read_csv` (:issue:`23516`)
12521252
- Improved performance of :class:`Categorical` constructor for ``Series`` objects (:issue:`23814`)
12531253
- Improved performance of :meth:`~DataFrame.where` for Categorical data (:issue:`24077`)
1254+
- Improved performance of iterating over a :class:`Series`. Using :meth:`DataFrame.itertuples` now creates iterators
1255+
without internally allocating lists of all elements (:issue:`20783`)
12541256

12551257
.. _whatsnew_0240.docs:
12561258

pandas/core/base.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
import pandas._libs.lib as lib
1010
import pandas.compat as compat
11-
from pandas.compat import PYPY, OrderedDict, builtins
11+
from pandas.compat import PYPY, OrderedDict, builtins, map, range
1212
from pandas.compat.numpy import function as nv
1313
from pandas.errors import AbstractMethodError
1414
from pandas.util._decorators import Appender, Substitution, cache_readonly
@@ -1072,7 +1072,13 @@ def __iter__(self):
10721072
(for str, int, float) or a pandas scalar
10731073
(for Timestamp/Timedelta/Interval/Period)
10741074
"""
1075-
return iter(self.tolist())
1075+
# We are explicity making element iterators.
1076+
if is_datetimelike(self._values):
1077+
return map(com.maybe_box_datetimelike, self._values)
1078+
elif is_extension_array_dtype(self._values):
1079+
return iter(self._values)
1080+
else:
1081+
return map(self._values.item, range(self._values.size))
10761082

10771083
@cache_readonly
10781084
def hasnans(self):

pandas/core/frame.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -898,10 +898,10 @@ def itertuples(self, index=True, name="Pandas"):
898898
Animal(Index='hawk', num_legs=2, num_wings=2)
899899
"""
900900
arrays = []
901-
fields = []
901+
fields = list(self.columns)
902902
if index:
903903
arrays.append(self.index)
904-
fields.append("Index")
904+
fields.insert(0, "Index")
905905

906906
# use integer indexing because of possible duplicate column names
907907
arrays.extend(self.iloc[:, k] for k in range(len(self.columns)))
@@ -911,10 +911,9 @@ def itertuples(self, index=True, name="Pandas"):
911911
if name is not None and len(self.columns) + index < 256:
912912
# `rename` is unsupported in Python 2.6
913913
try:
914-
itertuple = collections.namedtuple(name,
915-
fields + list(self.columns),
916-
rename=True)
914+
itertuple = collections.namedtuple(name, fields, rename=True)
917915
return map(itertuple._make, zip(*arrays))
916+
918917
except Exception:
919918
pass
920919

pandas/core/groupby/base.py

Lines changed: 7 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -69,35 +69,22 @@ def _gotitem(self, key, ndim, subset=None):
6969

7070
# special case to prevent duplicate plots when catching exceptions when
7171
# forwarding methods from NDFrames
72-
plotting_methods = frozenset(['plot', 'boxplot', 'hist'])
72+
plotting_methods = frozenset(['plot', 'hist'])
7373

7474
common_apply_whitelist = frozenset([
75-
'last', 'first',
76-
'head', 'tail', 'median',
77-
'mean', 'sum', 'min', 'max',
78-
'cumcount', 'ngroup',
79-
'resample',
80-
'rank', 'quantile',
81-
'fillna',
82-
'mad',
83-
'any', 'all',
84-
'take',
85-
'idxmax', 'idxmin',
86-
'shift', 'tshift',
87-
'ffill', 'bfill',
88-
'pct_change', 'skew',
89-
'corr', 'cov', 'diff',
75+
'quantile', 'fillna', 'mad', 'take',
76+
'idxmax', 'idxmin', 'tshift',
77+
'skew', 'corr', 'cov', 'diff'
9078
]) | plotting_methods
9179

9280
series_apply_whitelist = ((common_apply_whitelist |
9381
{'nlargest', 'nsmallest',
9482
'is_monotonic_increasing',
95-
'is_monotonic_decreasing'}) -
96-
{'boxplot'}) | frozenset(['dtype', 'unique'])
83+
'is_monotonic_decreasing'})
84+
) | frozenset(['dtype', 'unique'])
9785

9886
dataframe_apply_whitelist = ((common_apply_whitelist |
99-
frozenset(['dtypes', 'corrwith'])) -
100-
{'boxplot'})
87+
frozenset(['dtypes', 'corrwith'])))
10188

10289
cython_transforms = frozenset(['cumprod', 'cumsum', 'shift',
10390
'cummin', 'cummax'])

pandas/tests/groupby/test_whitelist.py

Lines changed: 0 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -14,35 +14,16 @@
1414
AGG_FUNCTIONS_WITH_SKIPNA = ['skew', 'mad']
1515

1616
df_whitelist = [
17-
'last',
18-
'first',
19-
'mean',
20-
'sum',
21-
'min',
22-
'max',
23-
'head',
24-
'tail',
25-
'cumcount',
26-
'ngroup',
27-
'resample',
28-
'rank',
2917
'quantile',
3018
'fillna',
3119
'mad',
32-
'any',
33-
'all',
3420
'take',
3521
'idxmax',
3622
'idxmin',
37-
'shift',
3823
'tshift',
39-
'ffill',
40-
'bfill',
41-
'pct_change',
4224
'skew',
4325
'plot',
4426
'hist',
45-
'median',
4627
'dtypes',
4728
'corrwith',
4829
'corr',
@@ -57,35 +38,16 @@ def df_whitelist_fixture(request):
5738

5839

5940
s_whitelist = [
60-
'last',
61-
'first',
62-
'mean',
63-
'sum',
64-
'min',
65-
'max',
66-
'head',
67-
'tail',
68-
'cumcount',
69-
'ngroup',
70-
'resample',
71-
'rank',
7241
'quantile',
7342
'fillna',
7443
'mad',
75-
'any',
76-
'all',
7744
'take',
7845
'idxmax',
7946
'idxmin',
80-
'shift',
8147
'tshift',
82-
'ffill',
83-
'bfill',
84-
'pct_change',
8548
'skew',
8649
'plot',
8750
'hist',
88-
'median',
8951
'dtype',
9052
'corr',
9153
'cov',
@@ -150,17 +112,8 @@ def test_groupby_whitelist(df_letters, whitelist):
150112
def check_whitelist(obj, df, m):
151113
# check the obj for a particular whitelist m
152114

153-
# these are aliases so ok to have the alias __name__
154-
alias = {'bfill': 'backfill',
155-
'ffill': 'pad',
156-
'boxplot': None}
157-
158115
gb = obj.groupby(df.letters)
159116

160-
m = alias.get(m, m)
161-
if m is None:
162-
return
163-
164117
f = getattr(type(gb), m)
165118

166119
# name

0 commit comments

Comments
 (0)