From f9c120ab08808e4be3029228eb5e9dd6a10b411b Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Fri, 26 Nov 2021 23:05:07 +0100
Subject: [PATCH 1/2] BUG: read_csv not applying dtype to index col

---
 doc/source/whatsnew/v1.4.0.rst           |  1 +
 pandas/io/parsers/base_parser.py         | 26 +++++++++++++++++++++++-
 pandas/io/parsers/python_parser.py       | 19 ++---------------
 pandas/tests/io/parser/test_index_col.py | 11 ++++++++++
 4 files changed, 39 insertions(+), 18 deletions(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 39e3894f86302..4b67ab33bbbc6 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -658,6 +658,7 @@ I/O
 - Bug in :func:`json_normalize` where multi-character ``sep`` parameter is incorrectly prefixed to every key (:issue:`43831`)
 - Bug in :func:`json_normalize` where reading data with missing multi-level metadata would not respect errors="ignore" (:issue:`44312`)
 - Bug in :func:`read_csv` with :code:`float_precision="round_trip"` which did not skip initial/trailing whitespace (:issue:`43713`)
+- Bug in :func:`read_csv` not applying dtype for ``index_col`` (:issue:`9435`)
 - Bug in dumping/loading a :class:`DataFrame` with ``yaml.dump(frame)`` (:issue:`42748`)
 - Bug in :class:`ExcelWriter`, where ``engine_kwargs`` were not passed through to all engines (:issue:`43442`)
 - Bug in :func:`read_csv` raising ``ValueError`` when ``parse_dates`` was used with ``MultiIndex`` columns (:issue:`8991`)
diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index 25a89d1c57006..c56032b49d081 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 from collections import defaultdict
+from copy import copy
 import csv
 import datetime
 from enum import Enum
@@ -148,6 +149,8 @@ def __init__(self, kwds):
         self.na_filter = kwds.get("na_filter", False)
         self.keep_default_na = kwds.get("keep_default_na", True)
 
+        self.dtype = copy(kwds.get("dtype", None))
+
         self.true_values = kwds.get("true_values")
         self.false_values = kwds.get("false_values")
         self.mangle_dupe_cols = kwds.get("mangle_dupe_cols", True)
@@ -498,6 +501,17 @@ def _get_name(icol):
 
         return index
 
+    def _clean_mapping(self, mapping):
+        """converts col numbers to names"""
+        if not isinstance(mapping, dict):
+            return mapping
+        clean = {}
+        for col, v in mapping.items():
+            if isinstance(col, int) and col not in self.orig_names:
+                col = self.orig_names[col]
+            clean[col] = v
+        return clean
+
     @final
     def _agg_index(self, index, try_parse_dates: bool = True) -> Index:
         arrays = []
@@ -522,7 +536,17 @@ def _agg_index(self, index, try_parse_dates: bool = True) -> Index:
                         col_name, self.na_values, self.na_fvalues, self.keep_default_na
                     )
 
-            arr, _ = self._infer_types(arr, col_na_values | col_na_fvalues)
+            clean_dtypes = self._clean_mapping(self.dtype)
+
+            cast_type = None
+            if isinstance(clean_dtypes, dict) and self.index_names is not None:
+                cast_type = clean_dtypes.get(self.index_names[i], None)
+
+            try_num_bool = not (cast_type and is_string_dtype(cast_type))
+
+            arr, _ = self._infer_types(
+                arr, col_na_values | col_na_fvalues, try_num_bool
+            )
             arrays.append(arr)
 
         names = self.index_names
diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
index f5420618c0235..6800fca508adc 100644
--- a/pandas/io/parsers/python_parser.py
+++ b/pandas/io/parsers/python_parser.py
@@ -4,7 +4,6 @@
     abc,
     defaultdict,
 )
-from copy import copy
 import csv
 from io import StringIO
 import re
@@ -89,7 +88,6 @@ def __init__(
         self.verbose = kwds["verbose"]
         self.converters = kwds["converters"]
 
-        self.dtype = copy(kwds["dtype"])
         self.thousands = kwds["thousands"]
         self.decimal = kwds["decimal"]
 
@@ -308,21 +306,8 @@ def get_chunk(self, size=None):
 
     def _convert_data(self, data):
         # apply converters
-        def _clean_mapping(mapping):
-            """converts col numbers to names"""
-            clean = {}
-            for col, v in mapping.items():
-                if isinstance(col, int) and col not in self.orig_names:
-                    col = self.orig_names[col]
-                clean[col] = v
-            return clean
-
-        clean_conv = _clean_mapping(self.converters)
-        if not isinstance(self.dtype, dict):
-            # handles single dtype applied to all columns
-            clean_dtypes = self.dtype
-        else:
-            clean_dtypes = _clean_mapping(self.dtype)
+        clean_conv = self._clean_mapping(self.converters)
+        clean_dtypes = self._clean_mapping(self.dtype)
 
         # Apply NA values.
         clean_na_values = {}
diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py
index 7315dcc0c4c07..58b5eebbec344 100644
--- a/pandas/tests/io/parser/test_index_col.py
+++ b/pandas/tests/io/parser/test_index_col.py
@@ -321,3 +321,14 @@ def test_infer_types_boolean_sum(all_parsers):
     # index column of dtype 'object', and the Python parser will return a
     # index column of dtype 'int64'.
     tm.assert_frame_equal(result, expected, check_index_type=False)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize("dtype, val", [(object, "01"), ("int64", 1)])
+def test_specify_dtype_for_index_col(all_parsers, dtype, val):
+    # GH#9435
+    data = "a,b\n01,2"
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), index_col="a", dtype={"a": dtype})
+    expected = DataFrame({"b": [2]}, index=Index([val], name="a"))
+    tm.assert_frame_equal(result, expected)

From 0cf6eeeffae69106c65472d164d9d3e2477ba1de Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Sat, 27 Nov 2021 00:15:16 +0100
Subject: [PATCH 2/2] Fix typing

---
 pandas/io/parsers/base_parser.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index c56032b49d081..f06950e3450a7 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -507,6 +507,8 @@ def _clean_mapping(self, mapping):
             return mapping
         clean = {}
         for col, v in mapping.items():
+            # for mypy
+            assert self.orig_names is not None
             if isinstance(col, int) and col not in self.orig_names:
                 col = self.orig_names[col]
             clean[col] = v