From aa08a6de4d32d2338a6850118fb6764e6deab1e1 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 19 Dec 2018 16:39:04 -0600
Subject: [PATCH 01/11] Fixed warnings in asv files

(cherry picked from commit f566b46390e8f87f0819801aeb9be52047f10e8f)
---
 asv_bench/benchmarks/join_merge.py |  2 +-
 asv_bench/benchmarks/panel_ctor.py | 12 ++++++------
 asv_bench/benchmarks/reindex.py    |  4 ++--
 asv_bench/benchmarks/timedelta.py  |  9 +++++----
 asv_bench/benchmarks/timestamp.py  |  7 ++++---
 5 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py
index 88a59fea375ea..a1cdb00260fc4 100644
--- a/asv_bench/benchmarks/join_merge.py
+++ b/asv_bench/benchmarks/join_merge.py
@@ -50,7 +50,7 @@ def setup(self, axis):
         self.empty_right = [df, DataFrame()]
 
     def time_concat_series(self, axis):
-        concat(self.series, axis=axis)
+        concat(self.series, axis=axis, sort=False)
 
     def time_concat_small_frames(self, axis):
         concat(self.small_frames, axis=axis)
diff --git a/asv_bench/benchmarks/panel_ctor.py b/asv_bench/benchmarks/panel_ctor.py
index 47b3ad612f9b1..627705284481b 100644
--- a/asv_bench/benchmarks/panel_ctor.py
+++ b/asv_bench/benchmarks/panel_ctor.py
@@ -1,7 +1,7 @@
 import warnings
 from datetime import datetime, timedelta
 
-from pandas import DataFrame, Panel, DatetimeIndex, date_range
+from pandas import DataFrame, Panel, date_range
 
 
 class DifferentIndexes(object):
@@ -23,9 +23,9 @@ def time_from_dict(self):
 class SameIndexes(object):
 
     def setup(self):
-        idx = DatetimeIndex(start=datetime(1990, 1, 1),
-                            end=datetime(2012, 1, 1),
-                            freq='D')
+        idx = date_range(start=datetime(1990, 1, 1),
+                         end=datetime(2012, 1, 1),
+                         freq='D')
         df = DataFrame({'a': 0, 'b': 1, 'c': 2}, index=idx)
         self.data_frames = dict(enumerate([df] * 100))
 
@@ -40,10 +40,10 @@ def setup(self):
         start = datetime(1990, 1, 1)
         end = datetime(2012, 1, 1)
         df1 = DataFrame({'a': 0, 'b': 1, 'c': 2},
-                        index=DatetimeIndex(start=start, end=end, freq='D'))
+                        index=date_range(start=start, end=end, freq='D'))
         end += timedelta(days=1)
         df2 = DataFrame({'a': 0, 'b': 1, 'c': 2},
-                        index=DatetimeIndex(start=start, end=end, freq='D'))
+                        index=date_range(start=start, end=end, freq='D'))
         dfs = [df1] * 50 + [df2] * 50
         self.data_frames = dict(enumerate(dfs))
 
diff --git a/asv_bench/benchmarks/reindex.py b/asv_bench/benchmarks/reindex.py
index 576dc495eb984..fb47fa81d8dfd 100644
--- a/asv_bench/benchmarks/reindex.py
+++ b/asv_bench/benchmarks/reindex.py
@@ -1,6 +1,6 @@
 import numpy as np
 import pandas.util.testing as tm
-from pandas import (DataFrame, Series, DatetimeIndex, MultiIndex, Index,
+from pandas import (DataFrame, Series, MultiIndex, Index,
                     date_range)
 from .pandas_vb_common import lib
 
@@ -8,7 +8,7 @@
 class Reindex(object):
 
     def setup(self):
-        rng = DatetimeIndex(start='1/1/1970', periods=10000, freq='1min')
+        rng = date_range(start='1/1/1970', periods=10000, freq='1min')
         self.df = DataFrame(np.random.rand(10000, 10), index=rng,
                             columns=range(10))
         self.df['foo'] = 'bar'
diff --git a/asv_bench/benchmarks/timedelta.py b/asv_bench/benchmarks/timedelta.py
index 7ee73fb7ac7b6..0cfbbd536bc8b 100644
--- a/asv_bench/benchmarks/timedelta.py
+++ b/asv_bench/benchmarks/timedelta.py
@@ -1,8 +1,9 @@
 import datetime
 
 import numpy as np
-from pandas import Series, timedelta_range, to_timedelta, Timestamp, \
-    Timedelta, TimedeltaIndex, DataFrame
+
+from pandas import (
+    DataFrame, Series, Timedelta, Timestamp, timedelta_range, to_timedelta)
 
 
 class TimedeltaConstructor(object):
@@ -122,8 +123,8 @@ def time_timedelta_nanoseconds(self, series):
 class TimedeltaIndexing(object):
 
     def setup(self):
-        self.index = TimedeltaIndex(start='1985', periods=1000, freq='D')
-        self.index2 = TimedeltaIndex(start='1986', periods=1000, freq='D')
+        self.index = timedelta_range(start='1985', periods=1000, freq='D')
+        self.index2 = timedelta_range(start='1986', periods=1000, freq='D')
         self.series = Series(range(1000), index=self.index)
         self.timedelta = self.index[500]
 
diff --git a/asv_bench/benchmarks/timestamp.py b/asv_bench/benchmarks/timestamp.py
index 64f46fe378e53..4c1d6e8533408 100644
--- a/asv_bench/benchmarks/timestamp.py
+++ b/asv_bench/benchmarks/timestamp.py
@@ -1,8 +1,9 @@
 import datetime
 
-from pandas import Timestamp
-import pytz
 import dateutil
+import pytz
+
+from pandas import Timestamp
 
 
 class TimestampConstruction(object):
@@ -46,7 +47,7 @@ def time_dayofweek(self, tz, freq):
         self.ts.dayofweek
 
     def time_weekday_name(self, tz, freq):
-        self.ts.weekday_name
+        self.ts.day_name
 
     def time_dayofyear(self, tz, freq):
         self.ts.dayofyear

From ae026b227b56f1ad73fcccc6a20b62a7cc069c13 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 19 Dec 2018 21:21:12 -0600
Subject: [PATCH 02/11] avoid series constructor

(cherry picked from commit eb219acf481d5c0a9f4d7fe745a886d81e57ccc4)
---
 pandas/core/reshape/reshape.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 8319a8cc5417c..9a9693bfd4c9e 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -853,6 +853,7 @@ def check_len(item, name):
 
 def _get_dummies_1d(data, prefix, prefix_sep='_', dummy_na=False,
                     sparse=False, drop_first=False, dtype=None):
+    from pandas.core.reshape.concat import concat
     # Series avoids inconsistent NaN handling
     codes, levels = _factorize_from_iterable(Series(data))
 
@@ -909,7 +910,7 @@ def _make_col_name(prefix, prefix_sep, level):
         index = None
 
     if sparse:
-        sparse_series = {}
+        sparse_series = []
         N = len(data)
         sp_indices = [[] for _ in range(len(dummy_cols))]
         mask = codes != -1
@@ -928,10 +929,9 @@ def _make_col_name(prefix, prefix_sep, level):
             sarr = SparseArray(np.ones(len(ixs), dtype=dtype),
                                sparse_index=IntIndex(N, ixs), fill_value=0,
                                dtype=dtype)
-            sparse_series[col] = Series(data=sarr, index=index)
+            sparse_series.append(Series(data=sarr, index=index, name=col))
 
-        out = DataFrame(sparse_series, index=index, columns=dummy_cols,
-                        dtype=dtype)
+        out = concat(sparse_series, axis=1, copy=False)
         return out
 
     else:

From b253674bbdf2771221af82b5637fee63efa7f8d0 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 20 Dec 2018 06:40:44 -0600
Subject: [PATCH 03/11] BUG: Fix concat(Series[sparse], axis=1)

* Preserve sparsity
* Preserve fill value
---
 doc/source/whatsnew/v0.24.0.rst |  3 ++-
 pandas/core/dtypes/concat.py    |  8 ++++----
 pandas/core/reshape/reshape.py  | 15 ++++++++++++---
 3 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index 1fb43de5f4c5a..c996539f3e50a 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -647,7 +647,7 @@ changes were made:
   * The ``out`` and ``mode`` parameters are now longer accepted (previously, this raised if they were specified).
   * Passing a scalar for ``indices`` is no longer allowed.
 
-- The result of concatenating a mix of sparse and dense Series is a Series with sparse values, rather than a ``SparseSeries``.
+- The result of :func:`concat` with a mix of sparse and dense Series is a Series with sparse values, rather than a ``SparseSeries``.
 - ``SparseDataFrame.combine`` and ``DataFrame.combine_first`` no longer supports combining a sparse column with a dense column while preserving the sparse subtype. The result will be an object-dtype SparseArray.
 - Setting :attr:`SparseArray.fill_value` to a fill value with a different dtype is now allowed.
 - ``DataFrame[column]`` is now a :class:`Series` with sparse values, rather than a :class:`SparseSeries`, when slicing a single column with sparse values (:issue:`23559`).
@@ -1613,6 +1613,7 @@ Sparse
 - Bug in :meth:`SparseArary.unique` not returning the unique values (:issue:`19595`)
 - Bug in :meth:`SparseArray.nonzero` and :meth:`SparseDataFrame.dropna` returning shifted/incorrect results (:issue:`21172`)
 - Bug in :meth:`DataFrame.apply` where dtypes would lose sparseness (:issue:`23744`)
+- Bug in :func:`concat` when concatenating a list of :class:`Series` with all-sparse values changing the ``fill_value`` and converting to a dense Series (:issue:`24371`)
 
 Style
 ^^^^^
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 0df0c01dbd47a..a90cfa4e4c906 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -66,19 +66,19 @@ def _get_series_result_type(result, objs=None):
     return appropriate class of Series concat
     input is either dict or array-like
     """
+    from pandas import SparseSeries, SparseDataFrame, DataFrame
+
     # concat Series with axis 1
     if isinstance(result, dict):
         # concat Series with axis 1
-        if all(is_sparse(c) for c in compat.itervalues(result)):
-            from pandas.core.sparse.api import SparseDataFrame
+        if all(isinstance(c, (SparseSeries, SparseDataFrame))
+               for c in compat.itervalues(result)):
             return SparseDataFrame
         else:
-            from pandas.core.frame import DataFrame
             return DataFrame
 
     # otherwise it is a SingleBlockManager (axis = 0)
     if result._block.is_sparse:
-        from pandas.core.sparse.api import SparseSeries
         return SparseSeries
     else:
         return objs[0]._constructor
diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 9a9693bfd4c9e..713a4b19c1fd5 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -11,8 +11,8 @@
 
 from pandas.core.dtypes.cast import maybe_promote
 from pandas.core.dtypes.common import (
-    ensure_platform_int, is_bool_dtype, is_extension_array_dtype, is_list_like,
-    is_object_dtype, needs_i8_conversion)
+    ensure_platform_int, is_bool_dtype, is_extension_array_dtype,
+    is_integer_dtype, is_list_like, is_object_dtype, needs_i8_conversion)
 from pandas.core.dtypes.missing import notna
 
 from pandas import compat
@@ -910,6 +910,14 @@ def _make_col_name(prefix, prefix_sep, level):
         index = None
 
     if sparse:
+
+        if is_integer_dtype(dtype):
+            fill_value = 0
+        elif dtype == bool:
+            fill_value = False
+        else:
+            fill_value = 0.0
+
         sparse_series = []
         N = len(data)
         sp_indices = [[] for _ in range(len(dummy_cols))]
@@ -927,7 +935,8 @@ def _make_col_name(prefix, prefix_sep, level):
             dummy_cols = dummy_cols[1:]
         for col, ixs in zip(dummy_cols, sp_indices):
             sarr = SparseArray(np.ones(len(ixs), dtype=dtype),
-                               sparse_index=IntIndex(N, ixs), fill_value=0,
+                               sparse_index=IntIndex(N, ixs),
+                               fill_value=fill_value,
                                dtype=dtype)
             sparse_series.append(Series(data=sarr, index=index, name=col))
 

From 6a65cbc6c3b640bf1ec489d587d65c772160093b Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 20 Dec 2018 07:21:34 -0600
Subject: [PATCH 04/11] SparseSeries unstack

---
 doc/source/whatsnew/v0.24.0.rst     | 1 +
 pandas/tests/sparse/test_reshape.py | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index c996539f3e50a..cb4241b8d1bfc 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1103,6 +1103,7 @@ Other API Changes
 - The order of the arguments of :func:`DataFrame.to_html` and :func:`DataFrame.to_string` is rearranged to be consistent with each other. (:issue:`23614`)
 - :meth:`CategoricalIndex.reindex` now raises a ``ValueError`` if the target index is non-unique and not equal to the current index. It previously only raised if the target index was not of a categorical dtype (:issue:`23963`).
 - :func:`Series.to_list` and :func:`Index.to_list` are now aliases of ``Series.tolist`` respectively ``Index.tolist`` (:issue:`8826`)
+- The result of ``SparseSeries.unstack`` is now a :class:`DataFrame` with sparse values, rather than a :class:`SparseDataFrame` (issue:`24372`).
 
 .. _whatsnew_0240.deprecations:
 
diff --git a/pandas/tests/sparse/test_reshape.py b/pandas/tests/sparse/test_reshape.py
index b492c47375bcf..d4ba672607982 100644
--- a/pandas/tests/sparse/test_reshape.py
+++ b/pandas/tests/sparse/test_reshape.py
@@ -35,4 +35,8 @@ def test_sparse_frame_unstack(sparse_df):
 
 def test_sparse_series_unstack(sparse_df, multi_index3):
     frame = pd.SparseSeries(np.ones(3), index=multi_index3).unstack()
-    tm.assert_sp_frame_equal(frame, sparse_df)
+
+    arr = np.array([1, np.nan, np.nan])
+    arrays = {i: pd.SparseArray(np.roll(arr, i)) for i in range(3)}
+    expected = pd.DataFrame(arrays)
+    tm.assert_frame_equal(frame, expected)

From 080f0bd5790cf4f56e7baf1a330015e7f3d60fd4 Mon Sep 17 00:00:00 2001
From: Mitar <mitar.git@tnode.com>
Date: Mon, 23 Apr 2018 07:44:16 -0700
Subject: [PATCH 05/11] ENH: Implemented lazy iteration.

Fixes GH20783.
---
 asv_bench/benchmarks/frame_methods.py | 10 ++++++++++
 doc/source/whatsnew/v0.24.0.rst       |  1 +
 pandas/core/base.py                   | 10 ++++++++--
 pandas/core/frame.py                  |  9 ++++-----
 4 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py
index 3c0dd646aa502..dbb3bfa0977c0 100644
--- a/asv_bench/benchmarks/frame_methods.py
+++ b/asv_bench/benchmarks/frame_methods.py
@@ -123,6 +123,16 @@ def time_itertuples(self):
         for row in self.df2.itertuples():
             pass
 
+    def time_itertuples_to_list(self):
+        list(self.df2.itertuples())
+
+    def time_itertuples_raw_tuples(self):
+        for row in self.df2.itertuples(index=False, name=None):
+            pass
+
+    def time_itertuples_raw_tuples_to_list(self):
+        list(self.df2.itertuples(index=False, name=None))
+
     def time_iterrows(self):
         for row in self.df.iterrows():
             pass
diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index cb4241b8d1bfc..b4e17bd36fdeb 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1260,6 +1260,7 @@ Performance Improvements
 - Fixed a performance regression on Windows with Python 3.7 of :func:`read_csv` (:issue:`23516`)
 - Improved performance of :class:`Categorical` constructor for ``Series`` objects (:issue:`23814`)
 - Improved performance of :meth:`~DataFrame.where` for Categorical data (:issue:`24077`)
+- Iterating over a :class:`Series` and using :meth:`DataFrame.itertuples` now create iterators without internally allocating lists of all elements (:issue:`20783`)
 
 .. _whatsnew_0240.docs:
 
diff --git a/pandas/core/base.py b/pandas/core/base.py
index 46f61c353056e..7caec9f16e472 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -8,7 +8,7 @@
 
 import pandas._libs.lib as lib
 import pandas.compat as compat
-from pandas.compat import PYPY, OrderedDict, builtins
+from pandas.compat import PYPY, OrderedDict, builtins, map, range
 from pandas.compat.numpy import function as nv
 from pandas.errors import AbstractMethodError
 from pandas.util._decorators import Appender, Substitution, cache_readonly
@@ -1072,7 +1072,13 @@ def __iter__(self):
         (for str, int, float) or a pandas scalar
         (for Timestamp/Timedelta/Interval/Period)
         """
-        return iter(self.tolist())
+        # We are explicity making element iterators.
+        if is_datetimelike(self._values):
+            return map(com.maybe_box_datetimelike, self._values)
+        elif is_extension_array_dtype(self._values):
+            return iter(self._values)
+        else:
+            return map(self._values.item, range(self._values.size))
 
     @cache_readonly
     def hasnans(self):
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index c4537db254132..c8ef958750379 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -898,10 +898,10 @@ def itertuples(self, index=True, name="Pandas"):
         Animal(Index='hawk', num_legs=2, num_wings=2)
         """
         arrays = []
-        fields = []
+        fields = list(self.columns)
         if index:
             arrays.append(self.index)
-            fields.append("Index")
+            fields.insert(0, "Index")
 
         # use integer indexing because of possible duplicate column names
         arrays.extend(self.iloc[:, k] for k in range(len(self.columns)))
@@ -911,10 +911,9 @@ def itertuples(self, index=True, name="Pandas"):
         if name is not None and len(self.columns) + index < 256:
             # `rename` is unsupported in Python 2.6
             try:
-                itertuple = collections.namedtuple(name,
-                                                   fields + list(self.columns),
-                                                   rename=True)
+                itertuple = collections.namedtuple(name, fields, rename=True)
                 return map(itertuple._make, zip(*arrays))
+
             except Exception:
                 pass
 

From 4bc7a7818516bd5b0670fee22a813595fec4de6b Mon Sep 17 00:00:00 2001
From: Rok <rok@mihevc.org>
Date: Tue, 18 Dec 2018 12:45:09 +0100
Subject: [PATCH 06/11] adding benchmarks for itertuples

---
 asv_bench/benchmarks/frame_methods.py | 50 +++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py
index dbb3bfa0977c0..d0527fde2f1ae 100644
--- a/asv_bench/benchmarks/frame_methods.py
+++ b/asv_bench/benchmarks/frame_methods.py
@@ -119,6 +119,12 @@ def time_iteritems_indexing(self):
         for col in self.df3:
             self.df3[col]
 
+    def time_itertuples_start(self):
+        self.df2.itertuples()
+
+    def time_itertuples_read_first(self):
+        next(self.df2.itertuples())
+
     def time_itertuples(self):
         for row in self.df2.itertuples():
             pass
@@ -126,6 +132,31 @@ def time_itertuples(self):
     def time_itertuples_to_list(self):
         list(self.df2.itertuples())
 
+    def mem_itertuples_start(self):
+        return self.df2.itertuples()
+
+    def peakmem_itertuples_start(self):
+        self.df2.itertuples()
+
+    def mem_itertuples_read_first(self):
+        return next(self.df2.itertuples())
+
+    def peakmem_itertuples(self):
+        for row in self.df2.itertuples():
+            pass
+
+    def mem_itertuples_to_list(self):
+        return list(self.df2.itertuples())
+
+    def peakmem_itertuples_to_list(self):
+        list(self.df2.itertuples())
+
+    def time_itertuples_raw_start(self):
+        self.df2.itertuples(index=False, name=None)
+
+    def time_itertuples_raw_read_first(self):
+        next(self.df2.itertuples(index=False, name=None))
+
     def time_itertuples_raw_tuples(self):
         for row in self.df2.itertuples(index=False, name=None):
             pass
@@ -133,6 +164,25 @@ def time_itertuples_raw_tuples(self):
     def time_itertuples_raw_tuples_to_list(self):
         list(self.df2.itertuples(index=False, name=None))
 
+    def mem_itertuples_raw_start(self):
+        return self.df2.itertuples(index=False, name=None)
+
+    def peakmem_itertuples_raw_start(self):
+        self.df2.itertuples(index=False, name=None)
+
+    def mem_itertuples_raw_read_first(self):
+        return next(self.df2.itertuples(index=False, name=None))
+
+    def peakmem_itertuples_raw(self):
+        for row in self.df2.itertuples(index=False, name=None):
+            pass
+
+    def mem_itertuples_raw_to_list(self):
+        return list(self.df2.itertuples(index=False, name=None))
+
+    def peakmem_itertuples_raw_to_list(self):
+        list(self.df2.itertuples(index=False, name=None))
+
     def time_iterrows(self):
         for row in self.df.iterrows():
             pass

From ff3174a7544a55f7d084d8e619db30ccf270f3fe Mon Sep 17 00:00:00 2001
From: Rok <rok@mihevc.org>
Date: Wed, 19 Dec 2018 01:58:58 +0100
Subject: [PATCH 07/11] making dataframe size greater for itertuples benchmark

---
 asv_bench/benchmarks/frame_methods.py | 41 ++++++++++++++-------------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py
index d0527fde2f1ae..4cdbe14b445ee 100644
--- a/asv_bench/benchmarks/frame_methods.py
+++ b/asv_bench/benchmarks/frame_methods.py
@@ -103,6 +103,7 @@ def setup(self):
         self.df2 = DataFrame(np.random.randn(N * 50, 10))
         self.df3 = DataFrame(np.random.randn(N, 5 * N),
                              columns=['C' + str(c) for c in range(N * 5)])
+        self.df4 = DataFrame(np.random.randn(N * 1000, 10))
 
     def time_iteritems(self):
         # (monitor no-copying behaviour)
@@ -120,68 +121,68 @@ def time_iteritems_indexing(self):
             self.df3[col]
 
     def time_itertuples_start(self):
-        self.df2.itertuples()
+        self.df4.itertuples()
 
     def time_itertuples_read_first(self):
-        next(self.df2.itertuples())
+        next(self.df4.itertuples())
 
     def time_itertuples(self):
-        for row in self.df2.itertuples():
+        for row in self.df4.itertuples():
             pass
 
     def time_itertuples_to_list(self):
-        list(self.df2.itertuples())
+        list(self.df4.itertuples())
 
     def mem_itertuples_start(self):
-        return self.df2.itertuples()
+        return self.df4.itertuples()
 
     def peakmem_itertuples_start(self):
-        self.df2.itertuples()
+        self.df4.itertuples()
 
     def mem_itertuples_read_first(self):
-        return next(self.df2.itertuples())
+        return next(self.df4.itertuples())
 
     def peakmem_itertuples(self):
-        for row in self.df2.itertuples():
+        for row in self.df4.itertuples():
             pass
 
     def mem_itertuples_to_list(self):
-        return list(self.df2.itertuples())
+        return list(self.df4.itertuples())
 
     def peakmem_itertuples_to_list(self):
-        list(self.df2.itertuples())
+        list(self.df4.itertuples())
 
     def time_itertuples_raw_start(self):
-        self.df2.itertuples(index=False, name=None)
+        self.df4.itertuples(index=False, name=None)
 
     def time_itertuples_raw_read_first(self):
-        next(self.df2.itertuples(index=False, name=None))
+        next(self.df4.itertuples(index=False, name=None))
 
     def time_itertuples_raw_tuples(self):
-        for row in self.df2.itertuples(index=False, name=None):
+        for row in self.df4.itertuples(index=False, name=None):
             pass
 
     def time_itertuples_raw_tuples_to_list(self):
-        list(self.df2.itertuples(index=False, name=None))
+        list(self.df4.itertuples(index=False, name=None))
 
     def mem_itertuples_raw_start(self):
-        return self.df2.itertuples(index=False, name=None)
+        return self.df4.itertuples(index=False, name=None)
 
     def peakmem_itertuples_raw_start(self):
-        self.df2.itertuples(index=False, name=None)
+        self.df4.itertuples(index=False, name=None)
 
     def mem_itertuples_raw_read_first(self):
-        return next(self.df2.itertuples(index=False, name=None))
+        return next(self.df4.itertuples(index=False, name=None))
 
     def peakmem_itertuples_raw(self):
-        for row in self.df2.itertuples(index=False, name=None):
+        for row in self.df4.itertuples(index=False, name=None):
             pass
 
     def mem_itertuples_raw_to_list(self):
-        return list(self.df2.itertuples(index=False, name=None))
+        return list(self.df4.itertuples(index=False, name=None))
 
     def peakmem_itertuples_raw_to_list(self):
-        list(self.df2.itertuples(index=False, name=None))
+        list(self.df4.itertuples(index=False, name=None))
 
     def time_iterrows(self):
         for row in self.df.iterrows():

From 854ac015cd26debd3f46cdf930ab8d7740e33b17 Mon Sep 17 00:00:00 2001
From: Rok <rok@mihevc.org>
Date: Wed, 19 Dec 2018 14:21:39 +0100
Subject: [PATCH 08/11] switching mem_itertuples_raw_read_first to
 peakmem_itertuples_raw_read_first

---
 asv_bench/benchmarks/frame_methods.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py
index 4cdbe14b445ee..ba2e63c20d3f8 100644
--- a/asv_bench/benchmarks/frame_methods.py
+++ b/asv_bench/benchmarks/frame_methods.py
@@ -171,8 +171,8 @@ def mem_itertuples_raw_start(self):
     def peakmem_itertuples_raw_start(self):
         self.df4.itertuples(index=False, name=None)
 
-    def mem_itertuples_raw_read_first(self):
-        return next(self.df4.itertuples(index=False, name=None))
+    def peakmem_itertuples_raw_read_first(self):
+        next(self.df4.itertuples(index=False, name=None))
 
     def peakmem_itertuples_raw(self):
         for row in self.df4.itertuples(index=False, name=None):

From 49248d27a3e9053838ed9b708f78e92b961e3cc9 Mon Sep 17 00:00:00 2001
From: Rok <rok@mihevc.org>
Date: Thu, 20 Dec 2018 03:39:22 +0100
Subject: [PATCH 09/11] reduce number of rows in
 reshape.GetDummies.time_get_dummies_1d_sparse benchmark

---
 asv_bench/benchmarks/reshape.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/asv_bench/benchmarks/reshape.py b/asv_bench/benchmarks/reshape.py
index e5c2f54263a3c..2405ce6582a6c 100644
--- a/asv_bench/benchmarks/reshape.py
+++ b/asv_bench/benchmarks/reshape.py
@@ -135,7 +135,7 @@ def time_pivot_table(self):
 class GetDummies(object):
     def setup(self):
         categories = list(string.ascii_letters[:12])
-        s = pd.Series(np.random.choice(categories, size=1000000),
+        s = pd.Series(np.random.choice(categories, size=10000),
                       dtype=pd.api.types.CategoricalDtype(categories))
         self.s = s
 

From 2780c6f324906a71bed1cc1cc1023ec22ca141ab Mon Sep 17 00:00:00 2001
From: Rok <rok@mihevc.org>
Date: Thu, 20 Dec 2018 04:46:46 +0100
Subject: [PATCH 10/11] Revert "reduce number of rows in
 reshape.GetDummies.time_get_dummies_1d_sparse benchmark"

This reverts commit e2f892a58fa4d9681bf69072c08195c7d67e108d.
---
 asv_bench/benchmarks/reshape.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/asv_bench/benchmarks/reshape.py b/asv_bench/benchmarks/reshape.py
index 2405ce6582a6c..e5c2f54263a3c 100644
--- a/asv_bench/benchmarks/reshape.py
+++ b/asv_bench/benchmarks/reshape.py
@@ -135,7 +135,7 @@ def time_pivot_table(self):
 class GetDummies(object):
     def setup(self):
         categories = list(string.ascii_letters[:12])
-        s = pd.Series(np.random.choice(categories, size=10000),
+        s = pd.Series(np.random.choice(categories, size=1000000),
                       dtype=pd.api.types.CategoricalDtype(categories))
         self.s = s
 

From 05ef2f82e7e07631977bfad5d646d74fb5d3bd1f Mon Sep 17 00:00:00 2001
From: Rok <rok@mihevc.org>
Date: Mon, 24 Dec 2018 02:32:08 +0100
Subject: [PATCH 11/11] change to whatsnew perf section

---
 doc/source/whatsnew/v0.24.0.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index b4e17bd36fdeb..414c1c435ed3f 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1260,7 +1260,8 @@ Performance Improvements
 - Fixed a performance regression on Windows with Python 3.7 of :func:`read_csv` (:issue:`23516`)
 - Improved performance of :class:`Categorical` constructor for ``Series`` objects (:issue:`23814`)
 - Improved performance of :meth:`~DataFrame.where` for Categorical data (:issue:`24077`)
-- Iterating over a :class:`Series` and using :meth:`DataFrame.itertuples` now create iterators without internally allocating lists of all elements (:issue:`20783`)
+- Improved performance of iterating over a :class:`Series`. Using :meth:`DataFrame.itertuples` now creates iterators
+  without internally allocating lists of all elements (:issue:`20783`)
 
 .. _whatsnew_0240.docs: