From 95265477e5edace9a1ebf4bed3bb6e9db1d87f7b Mon Sep 17 00:00:00 2001
From: debnathshoham <debnathshoham@gmail.com>
Date: Sat, 7 Aug 2021 01:09:06 +0530
Subject: [PATCH 1/8] BUG: read_fwf raise if len colspec doesnt match len names

---
 pandas/io/parsers/readers.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
index c639a4a9d494e..6828d6f3c2de2 100644
--- a/pandas/io/parsers/readers.py
+++ b/pandas/io/parsers/readers.py
@@ -756,6 +756,10 @@ def read_fwf(
             colspecs.append((col, col + w))
             col += w
 
+    if kwds.get("names") is not None:
+        if len(kwds.get("names")) != len(colspecs):
+            raise ValueError("Length of colspecs must match length of names")
+
     kwds["colspecs"] = colspecs
     kwds["infer_nrows"] = infer_nrows
     kwds["engine"] = "python-fwf"

From 4e813a0fedca597a6920a87459b24efc588471d4 Mon Sep 17 00:00:00 2001
From: debnathshoham <debnathshoham@gmail.com>
Date: Sat, 7 Aug 2021 13:17:52 +0530
Subject: [PATCH 2/8] length of columns considered

---
 pandas/io/parsers/readers.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
index 6828d6f3c2de2..c0ab9ded1e906 100644
--- a/pandas/io/parsers/readers.py
+++ b/pandas/io/parsers/readers.py
@@ -756,8 +756,17 @@ def read_fwf(
             colspecs.append((col, col + w))
             col += w
 
+    # GH#40830
+    # Ensure length of `colspecs` matches length of `names`
     if kwds.get("names") is not None:
-        if len(kwds.get("names")) != len(colspecs):
+        len_index = 0
+        if kwds.get("index_col") is not None:
+            index_col = kwds.get("index_col")
+            if isinstance(index_col, int) or isinstance(index_col, str):
+                len_index = 1
+            else:
+                len_index = len(index_col)
+        if len(kwds.get("names")) + len_index != len(colspecs):
             raise ValueError("Length of colspecs must match length of names")
 
     kwds["colspecs"] = colspecs

From b126fb4fba81307dc74147cf00bb9aa152b13ddb Mon Sep 17 00:00:00 2001
From: debnathshoham <debnathshoham@gmail.com>
Date: Sat, 7 Aug 2021 15:26:16 +0530
Subject: [PATCH 3/8] added test; whatsnew

---
 doc/source/whatsnew/v1.4.0.rst          |  2 +-
 pandas/io/parsers/readers.py            | 27 ++++++++++++++++---------
 pandas/tests/io/parser/test_read_fwf.py | 17 ++++++++++++++++
 3 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index fa9c424351b00..8d3d9fc41fa37 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -258,7 +258,7 @@ I/O
 - Bug in :func:`read_excel` attempting to read chart sheets from .xlsx files (:issue:`41448`)
 - Bug in :func:`json_normalize` where ``errors=ignore`` could fail to ignore missing values of ``meta`` when ``record_path`` has a length greater than one (:issue:`41876`)
 - Bug in :func:`read_csv` with multi-header input and arguments referencing column names as tuples (:issue:`42446`)
--
+- Bug in :func:`read_fwf` where difference in length of ``colspecs`` and ``names`` did not throw any error (:issue:`40830`)
 
 Period
 ^^^^^^
diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
index c0ab9ded1e906..203e5dd6a13b2 100644
--- a/pandas/io/parsers/readers.py
+++ b/pandas/io/parsers/readers.py
@@ -758,16 +758,23 @@ def read_fwf(
 
     # GH#40830
     # Ensure length of `colspecs` matches length of `names`
-    if kwds.get("names") is not None:
-        len_index = 0
-        if kwds.get("index_col") is not None:
-            index_col = kwds.get("index_col")
-            if isinstance(index_col, int) or isinstance(index_col, str):
-                len_index = 1
-            else:
-                len_index = len(index_col)
-        if len(kwds.get("names")) + len_index != len(colspecs):
-            raise ValueError("Length of colspecs must match length of names")
+    names = kwds.get("names")
+    if names is not None:
+        if len(names) != len(colspecs):
+            # maybe name of index(s) not present in `names`
+            len_index = 0
+            if kwds.get("index_col") is not None:
+                index_col = kwds.get("index_col")
+                if isinstance(index_col, int) or isinstance(index_col, str):
+                    len_index = 1
+                elif index_col is False:
+                    len_index = 0
+                else:
+                    # error: Argument 1 to "len" has incompatible type
+                    # "Optional[Any]"; expected "Sized"  [arg-type]
+                    len_index = len(index_col)  # type: ignore[arg-type]
+            if len(names) + len_index != len(colspecs):
+                raise ValueError("Length of colspecs must match length of names")
 
     kwds["colspecs"] = colspecs
     kwds["infer_nrows"] = infer_nrows
diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py
index 9739a2a75886a..c54590992c22d 100644
--- a/pandas/tests/io/parser/test_read_fwf.py
+++ b/pandas/tests/io/parser/test_read_fwf.py
@@ -710,3 +710,20 @@ def test_encoding_mmap(memory_map):
     data.seek(0)
     df_reference = DataFrame([[1, "A", "Ä", 2]])
     tm.assert_frame_equal(df, df_reference)
+
+
+@pytest.mark.parametrize(
+    "colspecs, names, widths",
+    [
+        ([(0, 6), (6, 12), (12, 18), (18, None)], list("abcde"), None),
+        ([6] * 4, list("abcde"), None),
+        (None, list("abcde"), [6] * 4),
+    ],
+)
+def test_len_colspecs_len_names(colspecs, names, widths):
+    # GH#40830
+    data = """col1  col2  col3  col4
+    bab   ba    2"""
+    msg = "Length of colspecs must match length of names"
+    with pytest.raises(ValueError, match=msg):
+        read_fwf(StringIO(data), colspecs=colspecs, names=names, widths=widths)

From e955b14379d4751cdcd06d829f54f771591c90f9 Mon Sep 17 00:00:00 2001
From: debnathshoham <debnathshoham@gmail.com>
Date: Sun, 8 Aug 2021 14:00:13 +0530
Subject: [PATCH 4/8] suggested edits

---
 doc/source/whatsnew/v1.4.0.rst          | 2 +-
 pandas/io/parsers/readers.py            | 2 +-
 pandas/tests/io/parser/test_read_fwf.py | 9 ++++++++-
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 8d3d9fc41fa37..47892ace8fddc 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -258,7 +258,7 @@ I/O
 - Bug in :func:`read_excel` attempting to read chart sheets from .xlsx files (:issue:`41448`)
 - Bug in :func:`json_normalize` where ``errors=ignore`` could fail to ignore missing values of ``meta`` when ``record_path`` has a length greater than one (:issue:`41876`)
 - Bug in :func:`read_csv` with multi-header input and arguments referencing column names as tuples (:issue:`42446`)
-- Bug in :func:`read_fwf` where difference in length of ``colspecs`` and ``names`` did not throw any error (:issue:`40830`)
+- Bug in :func:`read_fwf`, where difference in lengths of ``colspecs`` and ``names`` was not raising ``ValueError`` (:issue:`40830`)
 
 Period
 ^^^^^^
diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
index 203e5dd6a13b2..ed51655d636e0 100644
--- a/pandas/io/parsers/readers.py
+++ b/pandas/io/parsers/readers.py
@@ -765,7 +765,7 @@ def read_fwf(
             len_index = 0
             if kwds.get("index_col") is not None:
                 index_col = kwds.get("index_col")
-                if isinstance(index_col, int) or isinstance(index_col, str):
+                if isinstance(index_col, (int, str)):
                     len_index = 1
                 elif index_col is False:
                     len_index = 0
diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py
index c54590992c22d..87a820470453b 100644
--- a/pandas/tests/io/parser/test_read_fwf.py
+++ b/pandas/tests/io/parser/test_read_fwf.py
@@ -716,7 +716,6 @@ def test_encoding_mmap(memory_map):
     "colspecs, names, widths",
     [
         ([(0, 6), (6, 12), (12, 18), (18, None)], list("abcde"), None),
-        ([6] * 4, list("abcde"), None),
         (None, list("abcde"), [6] * 4),
     ],
 )
@@ -727,3 +726,11 @@ def test_len_colspecs_len_names(colspecs, names, widths):
     msg = "Length of colspecs must match length of names"
     with pytest.raises(ValueError, match=msg):
         read_fwf(StringIO(data), colspecs=colspecs, names=names, widths=widths)
+
+
+def test_index_col_True():
+    data = """col1  col2  col3  col4
+    bab   ba    2"""
+    msg = "The value of index_col couldn't be 'True'"
+    with pytest.raises(ValueError, match=msg):
+        read_fwf(StringIO(data), index_col=True)

From ae0cfaa805e00c3649f98e146ed2e01473e3e2c9 Mon Sep 17 00:00:00 2001
From: debnathshoham <debnathshoham@gmail.com>
Date: Sun, 8 Aug 2021 23:13:52 +0530
Subject: [PATCH 5/8] included index_col params alts in test

---
 pandas/io/parsers/readers.py            |  4 +-
 pandas/tests/io/parser/test_read_fwf.py | 65 +++++++++++++++++++------
 2 files changed, 54 insertions(+), 15 deletions(-)

diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
index ed51655d636e0..ce02f9e25e1a5 100644
--- a/pandas/io/parsers/readers.py
+++ b/pandas/io/parsers/readers.py
@@ -765,7 +765,9 @@ def read_fwf(
             len_index = 0
             if kwds.get("index_col") is not None:
                 index_col = kwds.get("index_col")
-                if isinstance(index_col, (int, str)):
+                if index_col is True:
+                    raise ValueError("The value of index_col couldn't be 'True'")
+                elif isinstance(index_col, (int, str)):
                     len_index = 1
                 elif index_col is False:
                     len_index = 0
diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py
index 87a820470453b..5cf42378a3bc1 100644
--- a/pandas/tests/io/parser/test_read_fwf.py
+++ b/pandas/tests/io/parser/test_read_fwf.py
@@ -713,24 +713,61 @@ def test_encoding_mmap(memory_map):
 
 
 @pytest.mark.parametrize(
-    "colspecs, names, widths",
+    "colspecs, names, widths, index_col, msg",
     [
-        ([(0, 6), (6, 12), (12, 18), (18, None)], list("abcde"), None),
-        (None, list("abcde"), [6] * 4),
+        (
+            [(0, 6), (6, 12), (12, 18), (18, None)],
+            list("abcde"),
+            None,
+            None,
+            "Length of colspecs must match length of names",
+        ),
+        (
+            None,
+            list("abcde"),
+            [6] * 4,
+            None,
+            "Length of colspecs must match length of names",
+        ),
+        (
+            [(0, 6), (6, 12), (12, 18), (18, None)],
+            list("abcde"),
+            None,
+            True,
+            "The value of index_col couldn't be 'True'",
+        ),
+        (
+            None,
+            list("abcde"),
+            [6] * 4,
+            False,
+            "Length of colspecs must match length of names",
+        ),
+        (
+            None,
+            list("abcde"),
+            [6] * 4,
+            True,
+            "The value of index_col couldn't be 'True'",
+        ),
+        (
+            [(0, 6), (6, 12), (12, 18), (18, None)],
+            list("abcde"),
+            None,
+            False,
+            "Length of colspecs must match length of names",
+        ),
     ],
 )
-def test_len_colspecs_len_names(colspecs, names, widths):
+def test_len_colspecs_len_names(colspecs, names, widths, index_col, msg):
     # GH#40830
     data = """col1  col2  col3  col4
     bab   ba    2"""
-    msg = "Length of colspecs must match length of names"
-    with pytest.raises(ValueError, match=msg):
-        read_fwf(StringIO(data), colspecs=colspecs, names=names, widths=widths)
-
-
-def test_index_col_True():
-    data = """col1  col2  col3  col4
-    bab   ba    2"""
-    msg = "The value of index_col couldn't be 'True'"
     with pytest.raises(ValueError, match=msg):
-        read_fwf(StringIO(data), index_col=True)
+        read_fwf(
+            StringIO(data),
+            colspecs=colspecs,
+            names=names,
+            widths=widths,
+            index_col=index_col,
+        )

From 28924f0ca17087916699c90173e1e0c9f645b35c Mon Sep 17 00:00:00 2001
From: debnathshoham <debnathshoham@gmail.com>
Date: Mon, 9 Aug 2021 21:07:41 +0530
Subject: [PATCH 6/8] suggested changes

---
 pandas/io/parsers/readers.py            | 15 +++++----------
 pandas/tests/io/parser/test_read_fwf.py | 11 +++--------
 2 files changed, 8 insertions(+), 18 deletions(-)

diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
index ce02f9e25e1a5..024456dbd6d47 100644
--- a/pandas/io/parsers/readers.py
+++ b/pandas/io/parsers/readers.py
@@ -765,16 +765,11 @@ def read_fwf(
             len_index = 0
             if kwds.get("index_col") is not None:
                 index_col = kwds.get("index_col")
-                if index_col is True:
-                    raise ValueError("The value of index_col couldn't be 'True'")
-                elif isinstance(index_col, (int, str)):
-                    len_index = 1
-                elif index_col is False:
-                    len_index = 0
-                else:
-                    # error: Argument 1 to "len" has incompatible type
-                    # "Optional[Any]"; expected "Sized"  [arg-type]
-                    len_index = len(index_col)  # type: ignore[arg-type]
+                if index_col is not False:
+                    if not is_list_like(index_col):
+                        len_index = 1
+                    else:
+                        len_index = len(index_col)
             if len(names) + len_index != len(colspecs):
                 raise ValueError("Length of colspecs must match length of names")
 
diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py
index 5cf42378a3bc1..524438b4395b4 100644
--- a/pandas/tests/io/parser/test_read_fwf.py
+++ b/pandas/tests/io/parser/test_read_fwf.py
@@ -713,56 +713,51 @@ def test_encoding_mmap(memory_map):
 
 
 @pytest.mark.parametrize(
-    "colspecs, names, widths, index_col, msg",
+    "colspecs, names, widths, index_col",
     [
         (
             [(0, 6), (6, 12), (12, 18), (18, None)],
             list("abcde"),
             None,
             None,
-            "Length of colspecs must match length of names",
         ),
         (
             None,
             list("abcde"),
             [6] * 4,
             None,
-            "Length of colspecs must match length of names",
         ),
         (
             [(0, 6), (6, 12), (12, 18), (18, None)],
             list("abcde"),
             None,
             True,
-            "The value of index_col couldn't be 'True'",
         ),
         (
             None,
             list("abcde"),
             [6] * 4,
             False,
-            "Length of colspecs must match length of names",
         ),
         (
             None,
             list("abcde"),
             [6] * 4,
             True,
-            "The value of index_col couldn't be 'True'",
         ),
         (
             [(0, 6), (6, 12), (12, 18), (18, None)],
             list("abcde"),
             None,
             False,
-            "Length of colspecs must match length of names",
         ),
     ],
 )
-def test_len_colspecs_len_names(colspecs, names, widths, index_col, msg):
+def test_len_colspecs_len_names(colspecs, names, widths, index_col):
     # GH#40830
     data = """col1  col2  col3  col4
     bab   ba    2"""
+    msg = "Length of colspecs must match length of names"
     with pytest.raises(ValueError, match=msg):
         read_fwf(
             StringIO(data),

From d5c8c38e803095a7cc3d7708ca23d5f3659b23b1 Mon Sep 17 00:00:00 2001
From: debnathshoham <debnathshoham@gmail.com>
Date: Mon, 9 Aug 2021 22:07:46 +0530
Subject: [PATCH 7/8] added type

---
 pandas/io/parsers/readers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
index 024456dbd6d47..9ddea2cc263fb 100644
--- a/pandas/io/parsers/readers.py
+++ b/pandas/io/parsers/readers.py
@@ -764,7 +764,7 @@ def read_fwf(
             # maybe name of index(s) not present in `names`
             len_index = 0
             if kwds.get("index_col") is not None:
-                index_col = kwds.get("index_col")
+                index_col: Any = kwds.get("index_col")
                 if index_col is not False:
                     if not is_list_like(index_col):
                         len_index = 1

From 32d8092a6495be92db9e6c4e211e899a64ea0f1f Mon Sep 17 00:00:00 2001
From: debnathshoham <debnathshoham@gmail.com>
Date: Sat, 14 Aug 2021 23:40:10 +0530
Subject: [PATCH 8/8] changed comment; added test for index_col

---
 pandas/io/parsers/readers.py            |  3 +-
 pandas/tests/io/parser/test_read_fwf.py | 87 +++++++++++++++++++++++++
 2 files changed, 89 insertions(+), 1 deletion(-)

diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
index 9ddea2cc263fb..5a4a82bd341f1 100644
--- a/pandas/io/parsers/readers.py
+++ b/pandas/io/parsers/readers.py
@@ -761,7 +761,8 @@ def read_fwf(
     names = kwds.get("names")
     if names is not None:
         if len(names) != len(colspecs):
-            # maybe name of index(s) not present in `names`
+            # need to check len(index_col) as it might contain
+            # unnamed indices, in which case it's name is not required
             len_index = 0
             if kwds.get("index_col") is not None:
                 index_col: Any = kwds.get("index_col")
diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py
index 524438b4395b4..6b136618de721 100644
--- a/pandas/tests/io/parser/test_read_fwf.py
+++ b/pandas/tests/io/parser/test_read_fwf.py
@@ -766,3 +766,90 @@ def test_len_colspecs_len_names(colspecs, names, widths, index_col):
             widths=widths,
             index_col=index_col,
         )
+
+
+@pytest.mark.parametrize(
+    "colspecs, names, widths, index_col, expected",
+    [
+        (
+            [(0, 6), (6, 12), (12, 18), (18, None)],
+            list("abc"),
+            None,
+            0,
+            DataFrame(
+                index=["col1", "ba"],
+                columns=["a", "b", "c"],
+                data=[["col2", "col3", "col4"], ["b   ba", "2", np.nan]],
+            ),
+        ),
+        (
+            [(0, 6), (6, 12), (12, 18), (18, None)],
+            list("ab"),
+            None,
+            [0, 1],
+            DataFrame(
+                index=[["col1", "ba"], ["col2", "b   ba"]],
+                columns=["a", "b"],
+                data=[["col3", "col4"], ["2", np.nan]],
+            ),
+        ),
+        (
+            [(0, 6), (6, 12), (12, 18), (18, None)],
+            list("a"),
+            None,
+            [0, 1, 2],
+            DataFrame(
+                index=[["col1", "ba"], ["col2", "b   ba"], ["col3", "2"]],
+                columns=["a"],
+                data=[["col4"], [np.nan]],
+            ),
+        ),
+        (
+            None,
+            list("abc"),
+            [6] * 4,
+            0,
+            DataFrame(
+                index=["col1", "ba"],
+                columns=["a", "b", "c"],
+                data=[["col2", "col3", "col4"], ["b   ba", "2", np.nan]],
+            ),
+        ),
+        (
+            None,
+            list("ab"),
+            [6] * 4,
+            [0, 1],
+            DataFrame(
+                index=[["col1", "ba"], ["col2", "b   ba"]],
+                columns=["a", "b"],
+                data=[["col3", "col4"], ["2", np.nan]],
+            ),
+        ),
+        (
+            None,
+            list("a"),
+            [6] * 4,
+            [0, 1, 2],
+            DataFrame(
+                index=[["col1", "ba"], ["col2", "b   ba"], ["col3", "2"]],
+                columns=["a"],
+                data=[["col4"], [np.nan]],
+            ),
+        ),
+    ],
+)
+def test_len_colspecs_len_names_with_index_col(
+    colspecs, names, widths, index_col, expected
+):
+    # GH#40830
+    data = """col1  col2  col3  col4
+    bab   ba    2"""
+    result = read_fwf(
+        StringIO(data),
+        colspecs=colspecs,
+        names=names,
+        widths=widths,
+        index_col=index_col,
+    )
+    tm.assert_frame_equal(result, expected)