pandas-dev · jreback · Mar 30, 2019 · Mar 12, 2019 · Mar 12, 2019 · Mar 13, 2019
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
@@ -123,7 +123,7 @@ Bug Fixes
 ~~~~~~~~~
 - Bug in :func:`to_datetime` which would raise an (incorrect) ``ValueError`` when called with a date far into the future and the ``format`` argument specified instead of raising ``OutOfBoundsDatetime`` (:issue:`23830`)
 - Bug in an error message in :meth:`DataFrame.plot`. Improved the error message if non-numerics are passed to :meth:`DataFrame.plot` (:issue:`25481`)
--
+- Bug in ``read_csv`` which would not raise ``ValueError`` if a column index in ``usecols`` was out of bounds (:issue:`25623`)
 
 Categorical
 ^^^^^^^^^^^

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -1894,6 +1894,11 @@ def __init__(self, src, **kwds):
                     not set(usecols).issubset(self.orig_names)):
                 _validate_usecols_names(usecols, self.orig_names)
 
+            # GH 25623
+            elif self.usecols_dtype == 'integer':
+                indices = lrange(self._reader.table_width)
+                _validate_usecols_names(usecols, indices)
+
             if len(self.names) > len(usecols):
                 self.names = [n for i, n in enumerate(self.names)
                               if (i in usecols or n in usecols)]
@@ -2197,7 +2202,8 @@ def __init__(self, f, **kwds):
         self.skipinitialspace = kwds['skipinitialspace']
         self.lineterminator = kwds['lineterminator']
         self.quoting = kwds['quoting']
-        self.usecols, _ = _validate_usecols_arg(kwds['usecols'])
+        self.usecols, self.usecols_dtype = _validate_usecols_arg(
+            kwds['usecols'])
         self.skip_blank_lines = kwds['skip_blank_lines']
 
         self.warn_bad_lines = kwds['warn_bad_lines']
@@ -2588,6 +2594,12 @@ def _infer_columns(self):
             if clear_buffer:
                 self._clear_buffer()
 
+            # GH 25623
+            if self.usecols_dtype == 'integer':
+                for col in columns:
+                    indices = lrange(len(col))
+                    _validate_usecols_names(self.usecols, indices)
+
             if names is not None:
                 if ((self.usecols is not None and
                      len(names) != len(self.usecols)) or
@@ -2623,6 +2635,10 @@ def _infer_columns(self):
             ncols = len(line)
             num_original_columns = ncols
 
+            # GH25623
+            if self.usecols_dtype == 'integer':
+                _validate_usecols_names(self.usecols, lrange(ncols))
+
             if not names:
                 if self.prefix:
                     columns = [['%s%d' % (self.prefix, i)

diff --git a/pandas/tests/io/parser/test_usecols.py b/pandas/tests/io/parser/test_usecols.py
@@ -21,6 +21,20 @@
                                "expected but not found: {0}")
 
 
+@pytest.mark.parametrize("names,usecols", [
+    (None, [0, 3]),
+    (["a", "b", "c"], [0, -1, 2]),
+    (None, [3]),
+    (["a"], [3])
+])
+def test_usecols_out_of_bounds(all_parsers, names, usecols):
+    data = "a,b,c\n1,2,3\n4,5,6"
+    parser = all_parsers
+
+    with pytest.raises(ValueError, match=_msg_validate_usecols_names):
+        parser.read_csv(StringIO(data), usecols=usecols, names=names)
+
+
 def test_raise_on_mixed_dtype_usecols(all_parsers):
     # See gh-12678
     data = """a,b,c