From b8200e4c35a9d41dabfa791e189ec01041da6988 Mon Sep 17 00:00:00 2001
From: Ben Kandel <ben.kandel@gmail.com>
Date: Sat, 5 Nov 2016 22:05:18 -0400
Subject: [PATCH 01/12] BUG: read_csv with empty df

read_csv would fail on files if the number of header lines passed in includes
all the lines in the files. This commit fixes that bug.
---
 doc/source/whatsnew/v0.19.1.txt  |  1 +
 pandas/io/tests/parser/common.py | 18 ++++++++++++++++++
 pandas/parser.pyx                |  6 ++++--
 3 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.19.1.txt b/doc/source/whatsnew/v0.19.1.txt
index db5bd22393e64..595fda7086811 100644
--- a/doc/source/whatsnew/v0.19.1.txt
+++ b/doc/source/whatsnew/v0.19.1.txt
@@ -57,5 +57,6 @@ Bug Fixes
 - Bug in ``DataFrame.to_json`` where ``lines=True`` and a value contained a ``}`` character (:issue:`14391`)
 - Bug in ``df.groupby`` causing an ``AttributeError`` when grouping a single index frame by a column and the index level (:issue`14327`)
 - Bug in ``df.groupby`` where ``TypeError`` raised when ``pd.Grouper(key=...)`` is passed in a list (:issue:`14334`)
+- Bug in ``pd.read_csv`` where reading files fails if the number of headers is equal to the number of lines in the file (:issue:`14515`)
 - Bug in ``pd.pivot_table`` may raise ``TypeError`` or ``ValueError`` when ``index`` or ``columns``
   is not scalar and ``values`` is not specified (:issue:`14380`)
\ No newline at end of file
diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py
index 4cb00c48976a4..6c25c9875d8c5 100644
--- a/pandas/io/tests/parser/common.py
+++ b/pandas/io/tests/parser/common.py
@@ -606,6 +606,24 @@ def test_multi_index_no_level_names(self):
         expected = self.read_csv(StringIO(data), index_col=[1, 0])
         tm.assert_frame_equal(df, expected, check_names=False)
 
+    def test_multi_index_blank_df(self):
+        # GH 14545
+        data = """a,b
+"""
+        df = self.read_csv(StringIO(data), header=[0])
+        expected = DataFrame(columns=[('a'),('b')])
+        tm.assert_frame_equal(df, expected)
+        expected_csv = expected.to_csv()
+        round_trip = self.read_csv(StringIO(expected_csv))
+        tm.assert_frame_equal(expected, round_trip)
+
+        data_multiline = """a,b
+c,d
+"""
+        df2 = self.read_csv(StringIO(data_multiline), header=[0,1])
+        expected2 = DataFrame(columns=[('a', 'c'), ('b', 'd')])
+        tm.assert_frame_equal(df2, expected2)
+
     def test_no_unnamed_index(self):
         data = """ id c0 c1 c2
 0 1 0 a b
diff --git a/pandas/parser.pyx b/pandas/parser.pyx
index 9fb99637731be..af3e19ba8d4ee 100644
--- a/pandas/parser.pyx
+++ b/pandas/parser.pyx
@@ -717,7 +717,9 @@ cdef class TextReader:
                     start = self.parser.line_start[0]
 
                 # e.g., if header=3 and file only has 2 lines
-                elif self.parser.lines < hr + 1:
+                if (self.parser.lines < hr + 1
+                    and not isinstance(self.orig_header, list)) or (
+                            self.parser.lines < hr):
                     msg = self.orig_header
                     if isinstance(msg, list):
                         msg = "[%s], len of %d," % (
@@ -940,7 +942,7 @@ cdef class TextReader:
                 raise_parser_error('Error tokenizing data', self.parser)
             footer = self.skipfooter
 
-        if self.parser_start == self.parser.lines:
+        if self.parser_start >= self.parser.lines:
             raise StopIteration
         self._end_clock('Tokenization')
 

From 2f64d578506ab86321aa7ec5c41a5c2b3fd92e90 Mon Sep 17 00:00:00 2001
From: Ben Kandel <ben.kandel@gmail.com>
Date: Sat, 5 Nov 2016 22:11:48 -0400
Subject: [PATCH 02/12] pep8

---
 pandas/io/tests/parser/common.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py
index 6c25c9875d8c5..2db14a1685608 100644
--- a/pandas/io/tests/parser/common.py
+++ b/pandas/io/tests/parser/common.py
@@ -611,7 +611,7 @@ def test_multi_index_blank_df(self):
         data = """a,b
 """
         df = self.read_csv(StringIO(data), header=[0])
-        expected = DataFrame(columns=[('a'),('b')])
+        expected = DataFrame(columns=[('a'), ('b')])
         tm.assert_frame_equal(df, expected)
         expected_csv = expected.to_csv()
         round_trip = self.read_csv(StringIO(expected_csv))
@@ -620,7 +620,7 @@ def test_multi_index_blank_df(self):
         data_multiline = """a,b
 c,d
 """
-        df2 = self.read_csv(StringIO(data_multiline), header=[0,1])
+        df2 = self.read_csv(StringIO(data_multiline), header=[0, 1])
         expected2 = DataFrame(columns=[('a', 'c'), ('b', 'd')])
         tm.assert_frame_equal(df2, expected2)
 

From bfe0423921fa11bc5d22caca8e09cea1ac3543b0 Mon Sep 17 00:00:00 2001
From: Ben Kandel <ben.kandel@gmail.com>
Date: Mon, 7 Nov 2016 10:10:18 -0500
Subject: [PATCH 03/12] typo

---
 pandas/parser.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/parser.pyx b/pandas/parser.pyx
index af3e19ba8d4ee..3376b338685f1 100644
--- a/pandas/parser.pyx
+++ b/pandas/parser.pyx
@@ -717,7 +717,7 @@ cdef class TextReader:
                     start = self.parser.line_start[0]
 
                 # e.g., if header=3 and file only has 2 lines
-                if (self.parser.lines < hr + 1
+                elif (self.parser.lines < hr + 1
                     and not isinstance(self.orig_header, list)) or (
                             self.parser.lines < hr):
                     msg = self.orig_header

From 72adaf210895d379ba57875103610782a2bedea3 Mon Sep 17 00:00:00 2001
From: Ben Kandel <ben.kandel@gmail.com>
Date: Mon, 7 Nov 2016 12:23:02 -0500
Subject: [PATCH 04/12] remove unnecessary test

---
 pandas/io/tests/parser/common.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py
index 2db14a1685608..0cb4dffe0e139 100644
--- a/pandas/io/tests/parser/common.py
+++ b/pandas/io/tests/parser/common.py
@@ -611,11 +611,8 @@ def test_multi_index_blank_df(self):
         data = """a,b
 """
         df = self.read_csv(StringIO(data), header=[0])
-        expected = DataFrame(columns=[('a'), ('b')])
+        expected = DataFrame(columns=[('a',), ('b',)])
         tm.assert_frame_equal(df, expected)
-        expected_csv = expected.to_csv()
-        round_trip = self.read_csv(StringIO(expected_csv))
-        tm.assert_frame_equal(expected, round_trip)
 
         data_multiline = """a,b
 c,d

From 17e44dd3e49f294f9518ac2173eaa06489faf997 Mon Sep 17 00:00:00 2001
From: Ben Kandel <ben.kandel@gmail.com>
Date: Fri, 11 Nov 2016 09:23:38 -0500
Subject: [PATCH 05/12] fix python parser too

---
 pandas/io/parsers.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 092cba093421a..4a501573c8cc4 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -2083,6 +2083,12 @@ def _infer_columns(self):
                     # We have an empty file, so check
                     # if columns are provided. That will
                     # serve as the 'line' for parsing
+                    if have_mi_columns:
+                        if clear_buffer:
+                            self._clear_buffer()
+                        columns.append([None] * len(this_columns))
+                        return columns, num_original_columns
+
                     if not self.names:
                         raise EmptyDataError(
                             "No columns to parse from file")

From 68eadf3afaa4815c97e5875acc4f0f8202048e0f Mon Sep 17 00:00:00 2001
From: Ben Kandel <ben.kandel@gmail.com>
Date: Fri, 11 Nov 2016 10:37:16 -0500
Subject: [PATCH 06/12] Modify test.

A test in test_to_csv checked for the presence of exactly the behavior we're
fixing here: A file with 5 lines that asks for a header of length 5 should work
and return an empty dataframe, not error.
---
 pandas/tests/frame/test_to_csv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py
index 4d6a5bb32038d..1eb3454519ce3 100644
--- a/pandas/tests/frame/test_to_csv.py
+++ b/pandas/tests/frame/test_to_csv.py
@@ -587,7 +587,7 @@ def _make_frame(names=None):
             df = _make_frame(True)
             df.to_csv(path, tupleize_cols=False)
 
-            for i in [5, 6, 7]:
+            for i in [6, 7]:
                 msg = 'len of {i}, but only 5 lines in file'.format(i=i)
                 with assertRaisesRegexp(ParserError, msg):
                     read_csv(path, tupleize_cols=False,

From 3d9bbddea2e22d4ee86111a44cf21cbbfce12895 Mon Sep 17 00:00:00 2001
From: Ben Kandel <ben.kandel@gmail.com>
Date: Fri, 11 Nov 2016 10:39:54 -0500
Subject: [PATCH 07/12] whatsnew

---
 doc/source/whatsnew/v0.19.1.txt | 1 -
 doc/source/whatsnew/v0.20.0.txt | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.19.1.txt b/doc/source/whatsnew/v0.19.1.txt
index 595fda7086811..db5bd22393e64 100644
--- a/doc/source/whatsnew/v0.19.1.txt
+++ b/doc/source/whatsnew/v0.19.1.txt
@@ -57,6 +57,5 @@ Bug Fixes
 - Bug in ``DataFrame.to_json`` where ``lines=True`` and a value contained a ``}`` character (:issue:`14391`)
 - Bug in ``df.groupby`` causing an ``AttributeError`` when grouping a single index frame by a column and the index level (:issue`14327`)
 - Bug in ``df.groupby`` where ``TypeError`` raised when ``pd.Grouper(key=...)`` is passed in a list (:issue:`14334`)
-- Bug in ``pd.read_csv`` where reading files fails if the number of headers is equal to the number of lines in the file (:issue:`14515`)
 - Bug in ``pd.pivot_table`` may raise ``TypeError`` or ``ValueError`` when ``index`` or ``columns``
   is not scalar and ``values`` is not specified (:issue:`14380`)
\ No newline at end of file
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
index 581106924c77e..e4cd76c6da36a 100644
--- a/doc/source/whatsnew/v0.20.0.txt
+++ b/doc/source/whatsnew/v0.20.0.txt
@@ -81,3 +81,4 @@ Performance Improvements
 
 Bug Fixes
 ~~~~~~~~~
+- Bug in ``pd.read_csv`` where reading files fails if the number of headers is equal to the number of lines in the file (:issue:`14515`)

From fc23e5c899e78a6cdaf8a6d420eb8ceffe3f7584 Mon Sep 17 00:00:00 2001
From: Ben Kandel <ben.kandel@gmail.com>
Date: Sat, 12 Nov 2016 21:47:18 -0500
Subject: [PATCH 08/12] fix errant this_columns

---
 pandas/io/parsers.py | 4 ++--
 pandas/parser.pyx    | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 4a501573c8cc4..fa1904a8fb955 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -2083,10 +2083,10 @@ def _infer_columns(self):
                     # We have an empty file, so check
                     # if columns are provided. That will
                     # serve as the 'line' for parsing
-                    if have_mi_columns:
+                    if have_mi_columns and hr > 0:
                         if clear_buffer:
                             self._clear_buffer()
-                        columns.append([None] * len(this_columns))
+                        columns.append([None] * len(columns[-1]))
                         return columns, num_original_columns
 
                     if not self.names:
diff --git a/pandas/parser.pyx b/pandas/parser.pyx
index 3376b338685f1..0b1c9eba63ba7 100644
--- a/pandas/parser.pyx
+++ b/pandas/parser.pyx
@@ -718,7 +718,7 @@ cdef class TextReader:
 
                 # e.g., if header=3 and file only has 2 lines
                 elif (self.parser.lines < hr + 1
-                    and not isinstance(self.orig_header, list)) or (
+                      and not isinstance(self.orig_header, list)) or (
                             self.parser.lines < hr):
                     msg = self.orig_header
                     if isinstance(msg, list):

From 518982d1890ae8a58d159cca91ab45242d3b1880 Mon Sep 17 00:00:00 2001
From: Ben Kandel <ben.kandel@gmail.com>
Date: Sun, 13 Nov 2016 08:28:26 -0500
Subject: [PATCH 09/12] move to 0.19.2

---
 doc/source/whatsnew/v0.19.2.txt | 2 +-
 doc/source/whatsnew/v0.20.0.txt | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.19.2.txt b/doc/source/whatsnew/v0.19.2.txt
index ecbd6e9b3b288..a58e3499ac38f 100644
--- a/doc/source/whatsnew/v0.19.2.txt
+++ b/doc/source/whatsnew/v0.19.2.txt
@@ -29,7 +29,7 @@ Bug Fixes
 
 - Compat with ``dateutil==2.6.0``; segfault reported in the testing suite (:issue:`14621`)
 - Allow ``nanoseconds`` in ``Timestamp.replace`` as a kwarg (:issue:`14621`)
-
+- Bug in ``pd.read_csv`` where reading files fails if the number of headers is equal to the number of lines in the file (:issue:`14515`)
 
 
 
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
index e4cd76c6da36a..581106924c77e 100644
--- a/doc/source/whatsnew/v0.20.0.txt
+++ b/doc/source/whatsnew/v0.20.0.txt
@@ -81,4 +81,3 @@ Performance Improvements
 
 Bug Fixes
 ~~~~~~~~~
-- Bug in ``pd.read_csv`` where reading files fails if the number of headers is equal to the number of lines in the file (:issue:`14515`)

From fedfff8231d53045ec6900b85f29d2e5863ab70b Mon Sep 17 00:00:00 2001
From: Ben Kandel <ben.kandel@gmail.com>
Date: Tue, 15 Nov 2016 10:14:10 -0500
Subject: [PATCH 10/12] fix multiindex column parsing

---
 pandas/io/parsers.py             |  6 ++++--
 pandas/io/tests/parser/common.py | 12 ++++++++++--
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index fa1904a8fb955..13b67068985f3 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -1509,10 +1509,10 @@ def read(self, nrows=None):
             if self._first_chunk:
                 self._first_chunk = False
                 names = self._maybe_dedup_names(self.orig_names)
-
                 index, columns, col_dict = _get_empty_meta(
                     names, self.index_col, self.index_names,
                     dtype=self.kwds.get('dtype'))
+                columns = self._maybe_make_multi_index_columns(columns, self.col_names)
 
                 if self.usecols is not None:
                     columns = self._filter_usecols(columns)
@@ -1979,8 +1979,10 @@ def read(self, rows=None):
         if not len(content):  # pragma: no cover
             # DataFrame with the right metadata, even though it's length 0
             names = self._maybe_dedup_names(self.orig_names)
-            return _get_empty_meta(names, self.index_col,
+            index, columns, col_dict = _get_empty_meta(names, self.index_col,
                                    self.index_names)
+            columns = self._maybe_make_multi_index_columns(columns, self.col_names)
+            return index, columns, col_dict
 
         # handle new style for names in index
         count_empty_content_vals = count_empty_vals(content[0])
diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py
index 0cb4dffe0e139..941cd9db8c71a 100644
--- a/pandas/io/tests/parser/common.py
+++ b/pandas/io/tests/parser/common.py
@@ -611,15 +611,23 @@ def test_multi_index_blank_df(self):
         data = """a,b
 """
         df = self.read_csv(StringIO(data), header=[0])
-        expected = DataFrame(columns=[('a',), ('b',)])
+        expected = DataFrame(columns=['a', 'b'])
         tm.assert_frame_equal(df, expected)
+        round_trip = self.read_csv(StringIO(
+            expected.to_csv(index=False)), header=[0])
+        tm.assert_frame_equal(round_trip, expected)
 
         data_multiline = """a,b
 c,d
 """
         df2 = self.read_csv(StringIO(data_multiline), header=[0, 1])
-        expected2 = DataFrame(columns=[('a', 'c'), ('b', 'd')])
+        cols = MultiIndex.from_tuples([('a','c'), ('b', 'd')])
+        expected2 = DataFrame(columns=cols)
         tm.assert_frame_equal(df2, expected2)
+        round_trip = self.read_csv(StringIO(
+            expected2.to_csv(index=False)), header=[0, 1])
+        tm.assert_frame_equal(round_trip, expected2)
+
 
     def test_no_unnamed_index(self):
         data = """ id c0 c1 c2

From e6b1237791fa8392f359a89a6a8a6a4f4da5aed3 Mon Sep 17 00:00:00 2001
From: Ben Kandel <ben.kandel@gmail.com>
Date: Tue, 15 Nov 2016 10:53:27 -0500
Subject: [PATCH 11/12] lint

---
 pandas/io/parsers.py             | 10 ++++++----
 pandas/io/tests/parser/common.py |  3 +--
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 13b67068985f3..3fe5e5e826ebd 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -1512,7 +1512,8 @@ def read(self, nrows=None):
                 index, columns, col_dict = _get_empty_meta(
                     names, self.index_col, self.index_names,
                     dtype=self.kwds.get('dtype'))
-                columns = self._maybe_make_multi_index_columns(columns, self.col_names)
+                columns = self._maybe_make_multi_index_columns(
+                    columns, self.col_names)
 
                 if self.usecols is not None:
                     columns = self._filter_usecols(columns)
@@ -1979,9 +1980,10 @@ def read(self, rows=None):
         if not len(content):  # pragma: no cover
             # DataFrame with the right metadata, even though it's length 0
             names = self._maybe_dedup_names(self.orig_names)
-            index, columns, col_dict = _get_empty_meta(names, self.index_col,
-                                   self.index_names)
-            columns = self._maybe_make_multi_index_columns(columns, self.col_names)
+            index, columns, col_dict = _get_empty_meta(
+                names, self.index_col, self.index_names)
+            columns = self._maybe_make_multi_index_columns(
+                columns, self.col_names)
             return index, columns, col_dict
 
         # handle new style for names in index
diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py
index 941cd9db8c71a..6eb73876c11dd 100644
--- a/pandas/io/tests/parser/common.py
+++ b/pandas/io/tests/parser/common.py
@@ -621,14 +621,13 @@ def test_multi_index_blank_df(self):
 c,d
 """
         df2 = self.read_csv(StringIO(data_multiline), header=[0, 1])
-        cols = MultiIndex.from_tuples([('a','c'), ('b', 'd')])
+        cols = MultiIndex.from_tuples([('a', 'c'), ('b', 'd')])
         expected2 = DataFrame(columns=cols)
         tm.assert_frame_equal(df2, expected2)
         round_trip = self.read_csv(StringIO(
             expected2.to_csv(index=False)), header=[0, 1])
         tm.assert_frame_equal(round_trip, expected2)
 
-
     def test_no_unnamed_index(self):
         data = """ id c0 c1 c2
 0 1 0 a b

From 32e3b0a01930c48a51d1a7c209dee9724fd5ac19 Mon Sep 17 00:00:00 2001
From: Ben Kandel <ben.kandel@gmail.com>
Date: Tue, 15 Nov 2016 13:19:38 -0500
Subject: [PATCH 12/12] lint

---
 pandas/parser.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/parser.pyx b/pandas/parser.pyx
index 0b1c9eba63ba7..6b43dfbabc4a0 100644
--- a/pandas/parser.pyx
+++ b/pandas/parser.pyx
@@ -719,7 +719,7 @@ cdef class TextReader:
                 # e.g., if header=3 and file only has 2 lines
                 elif (self.parser.lines < hr + 1
                       and not isinstance(self.orig_header, list)) or (
-                            self.parser.lines < hr):
+                          self.parser.lines < hr):
                     msg = self.orig_header
                     if isinstance(msg, list):
                         msg = "[%s], len of %d," % (