From 6744b910f0c61d50d17ab86686f9fc0a6003d12c Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Sat, 27 Nov 2021 23:55:13 +0100
Subject: [PATCH 1/2] BUG: read_csv raising ParserError when some chunks have
 less columns than header

---
 doc/source/whatsnew/v1.4.0.rst                |  1 +
 pandas/_libs/parsers.pyx                      |  9 ++--
 .../tests/io/parser/common/test_chunksize.py  | 45 +++++++++++++++++++
 3 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index e87f5f53256cf..4ba8f756254ee 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -655,6 +655,7 @@ I/O
 - Column headers are dropped when constructing a :class:`DataFrame` from a sqlalchemy's ``Row`` object (:issue:`40682`)
 - Bug in unpickling a :class:`Index` with object dtype incorrectly inferring numeric dtypes (:issue:`43188`)
 - Bug in :func:`read_csv` where reading multi-header input with unequal lengths incorrectly raising uncontrolled ``IndexError`` (:issue:`43102`)
+- Bug in :func:`read_csv` raising ``ParserError`` when reading file in chunks and aome chunk blocks have fewer columns than header for ``engine="c"`` (:issue:`21211`)
 - Bug in :func:`read_csv`, changed exception class when expecting a file path name or file-like object from ``OSError`` to ``TypeError`` (:issue:`43366`)
 - Bug in :func:`read_csv` and :func:`read_fwf` ignoring all ``skiprows`` except first when ``nrows`` is specified for ``engine='python'`` (:issue:`44021`, :issue:`10261`)
 - Bug in :func:`read_json` not handling non-numpy dtypes correctly (especially ``category``) (:issue:`21892`, :issue:`33205`)
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index d2975f83b97d7..b7d81ed59352d 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -927,9 +927,12 @@ cdef class TextReader:
                 (num_cols >= self.parser.line_fields[i]) * num_cols
 
         if self.table_width - self.leading_cols > num_cols:
-            raise ParserError(f"Too many columns specified: expected "
-                              f"{self.table_width - self.leading_cols} "
-                              f"and found {num_cols}")
+            if ((not callable(self.usecols) and self.usecols and
+                    self.table_width - self.leading_cols < len(self.usecols))
+                    or (self.names and len(self.names) - self.leading_cols > num_cols)):
+                raise ParserError(f"Too many columns specified: expected "
+                                  f"{self.table_width - self.leading_cols} "
+                                  f"and found {num_cols}")
 
         if (self.usecols is not None and not callable(self.usecols) and
                 all(isinstance(u, int) for u in self.usecols)):
diff --git a/pandas/tests/io/parser/common/test_chunksize.py b/pandas/tests/io/parser/common/test_chunksize.py
index e8a8769bc6291..4c26047d98acc 100644
--- a/pandas/tests/io/parser/common/test_chunksize.py
+++ b/pandas/tests/io/parser/common/test_chunksize.py
@@ -248,3 +248,48 @@ def test_read_csv_memory_growth_chunksize(all_parsers):
         with parser.read_csv(path, chunksize=20) as result:
             for _ in result:
                 pass
+
+
+def test_chunksize_with_usecols_second_block_shorter(all_parsers):
+    # GH#21211
+    parser = all_parsers
+    data = """1,2,3,4
+5,6,7,8
+9,10,11
+"""
+
+    result_chunks = parser.read_csv(
+        StringIO(data),
+        names=["a", "b"],
+        chunksize=2,
+        usecols=[0, 1],
+        header=None,
+    )
+
+    expected_frames = [
+        DataFrame({"a": [1, 5], "b": [2, 6]}),
+        DataFrame({"a": [9], "b": [10]}, index=[2]),
+    ]
+
+    for i, result in enumerate(result_chunks):
+        tm.assert_frame_equal(result, expected_frames[i])
+
+
+def test_chunksize_second_block_shorter(all_parsers):
+    # GH#21211
+    parser = all_parsers
+    data = """a,b,c,d
+1,2,3,4
+5,6,7,8
+9,10,11
+"""
+
+    result_chunks = parser.read_csv(StringIO(data), chunksize=2)
+
+    expected_frames = [
+        DataFrame({"a": [1, 5], "b": [2, 6], "c": [3, 7], "d": [4, 8]}),
+        DataFrame({"a": [9], "b": [10], "c": [11], "d": [np.nan]}, index=[2]),
+    ]
+
+    for i, result in enumerate(result_chunks):
+        tm.assert_frame_equal(result, expected_frames[i])

From 12cfc0c85d33aa461e5b406749eb55aef0a9f99b Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Sun, 28 Nov 2021 03:18:03 +0100
Subject: [PATCH 2/2] Rename condition

---
 pandas/_libs/parsers.pyx | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index b7d81ed59352d..5a6ad2194f99f 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -926,15 +926,19 @@ cdef class TextReader:
                 self.parser.line_fields[i] + \
                 (num_cols >= self.parser.line_fields[i]) * num_cols
 
+        usecols_not_callable_and_exists = not callable(self.usecols) and self.usecols
+        names_larger_num_cols = (self.names and
+                                 len(self.names) - self.leading_cols > num_cols)
+
         if self.table_width - self.leading_cols > num_cols:
-            if ((not callable(self.usecols) and self.usecols and
-                    self.table_width - self.leading_cols < len(self.usecols))
-                    or (self.names and len(self.names) - self.leading_cols > num_cols)):
+            if (usecols_not_callable_and_exists
+                    and self.table_width - self.leading_cols < len(self.usecols)
+                    or names_larger_num_cols):
                 raise ParserError(f"Too many columns specified: expected "
                                   f"{self.table_width - self.leading_cols} "
                                   f"and found {num_cols}")
 
-        if (self.usecols is not None and not callable(self.usecols) and
+        if (usecols_not_callable_and_exists and
                 all(isinstance(u, int) for u in self.usecols)):
             missing_usecols = [col for col in self.usecols if col >= num_cols]
             if missing_usecols: