Skip to content

Commit 9c8c145

Browse files
mitarrok
authored andcommitted
ENH: Implemented lazy iteration.
Fixes GH20783.
1 parent 6111f64 commit 9c8c145

File tree

4 files changed

+23
-7
lines changed

4 files changed

+23
-7
lines changed

asv_bench/benchmarks/frame_methods.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,16 @@ def time_itertuples(self):
123123
for row in self.df2.itertuples():
124124
pass
125125

126+
def time_itertuples_to_list(self):
127+
list(self.df2.itertuples())
128+
129+
def time_itertuples_raw_tuples(self):
130+
for row in self.df2.itertuples(index=False, name=None):
131+
pass
132+
133+
def time_itertuples_raw_tuples_to_list(self):
134+
list(self.df2.itertuples(index=False, name=None))
135+
126136
def time_iterrows(self):
127137
for row in self.df.iterrows():
128138
pass

doc/source/whatsnew/v0.24.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1258,6 +1258,7 @@ Performance Improvements
12581258
- Fixed a performance regression on Windows with Python 3.7 of :func:`read_csv` (:issue:`23516`)
12591259
- Improved performance of :class:`Categorical` constructor for ``Series`` objects (:issue:`23814`)
12601260
- Improved performance of :meth:`~DataFrame.where` for Categorical data (:issue:`24077`)
1261+
- Iterating over a :class:`Series` and using :meth:`DataFrame.itertuples` now create iterators without internally allocating lists of all elements (:issue:`20783`)
12611262

12621263
.. _whatsnew_0240.docs:
12631264

pandas/core/base.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
import pandas._libs.lib as lib
1010
import pandas.compat as compat
11-
from pandas.compat import PYPY, OrderedDict, builtins
11+
from pandas.compat import PYPY, OrderedDict, builtins, map, range
1212
from pandas.compat.numpy import function as nv
1313
from pandas.errors import AbstractMethodError
1414
from pandas.util._decorators import Appender, Substitution, cache_readonly
@@ -1035,7 +1035,13 @@ def __iter__(self):
10351035
(for str, int, float) or a pandas scalar
10361036
(for Timestamp/Timedelta/Interval/Period)
10371037
"""
1038-
return iter(self.tolist())
1038+
# We are explicity making element iterators.
1039+
if is_datetimelike(self._values):
1040+
return map(com.maybe_box_datetimelike, self._values)
1041+
elif is_extension_array_dtype(self._values):
1042+
return iter(self._values)
1043+
else:
1044+
return map(self._values.item, range(self._values.size))
10391045

10401046
@cache_readonly
10411047
def hasnans(self):

pandas/core/frame.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -898,10 +898,10 @@ def itertuples(self, index=True, name="Pandas"):
898898
Animal(Index='hawk', num_legs=2, num_wings=2)
899899
"""
900900
arrays = []
901-
fields = []
901+
fields = list(self.columns)
902902
if index:
903903
arrays.append(self.index)
904-
fields.append("Index")
904+
fields.insert(0, "Index")
905905

906906
# use integer indexing because of possible duplicate column names
907907
arrays.extend(self.iloc[:, k] for k in range(len(self.columns)))
@@ -911,10 +911,9 @@ def itertuples(self, index=True, name="Pandas"):
911911
if name is not None and len(self.columns) + index < 256:
912912
# `rename` is unsupported in Python 2.6
913913
try:
914-
itertuple = collections.namedtuple(name,
915-
fields + list(self.columns),
916-
rename=True)
914+
itertuple = collections.namedtuple(name, fields, rename=True)
917915
return map(itertuple._make, zip(*arrays))
916+
918917
except Exception:
919918
pass
920919

0 commit comments

Comments
 (0)