From 9b351a188a8d193fb16cff49688083d28a0f714e Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Wed, 10 Nov 2021 23:19:56 +0100
Subject: [PATCH 01/14] Start typing parsers

---
 pandas/io/parsers/base_parser.py   | 12 +++-
 pandas/io/parsers/python_parser.py | 99 ++++++++++++++++++------------
 2 files changed, 69 insertions(+), 42 deletions(-)

diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index 8cdcc05f60266..096a2c8d997fd 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -27,6 +27,7 @@
     ArrayLike,
     DtypeArg,
     FilePathOrBuffer,
+    Scalar,
 )
 from pandas.errors import (
     ParserError,
@@ -231,7 +232,7 @@ def _open_handles(self, src: FilePathOrBuffer, kwds: dict[str, Any]) -> None:
             errors=kwds.get("encoding_errors", "strict"),
         )
 
-    def _validate_parse_dates_presence(self, columns: list[str]) -> None:
+    def _validate_parse_dates_presence(self, columns: list[Scalar]) -> None:
         """
         Check if parse_dates are in columns.
 
@@ -314,12 +315,17 @@ def _should_parse_dates(self, i: int) -> bool:
 
     @final
     def _extract_multi_indexer_columns(
-        self, header, index_names, col_names, passed_names: bool = False
+        self,
+        header: list[list[Scalar | None]],
+        index_names: list | None,
+        col_names: None,
+        passed_names: bool = False,
     ):
         """
         extract and return the names, index_names, col_names
         header is a list-of-lists returned from the parsers
         """
+        assert col_names is None
         if len(header) < 2:
             return header[0], index_names, col_names, passed_names
 
@@ -610,7 +616,7 @@ def _convert_to_ndarrays(
 
     @final
     def _set_noconvert_dtype_columns(
-        self, col_indices: list[int], names: list[int | str | tuple]
+        self, col_indices: list[int], names: list[Scalar | tuple]
     ) -> set[int]:
         """
         Set the columns that should not undergo dtype conversions.
diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
index af253fc062632..ce0dfc567ba43 100644
--- a/pandas/io/parsers/python_parser.py
+++ b/pandas/io/parsers/python_parser.py
@@ -19,12 +19,21 @@
 import numpy as np
 
 import pandas._libs.lib as lib
-from pandas._typing import FilePathOrBuffer
+from pandas._typing import (
+    FilePathOrBuffer,
+    Scalar,
+)
 from pandas.errors import (
     EmptyDataError,
     ParserError,
 )
 
+# BOM character (byte order mark)
+# This exists at the beginning of a file to indicate endianness
+# of a file (stream). Unfortunately, this marker screws up parsing,
+# so we need to remove it if we see it.
+from pandas.util._exceptions import find_stack_level
+
 from pandas.core.dtypes.common import is_integer
 from pandas.core.dtypes.inference import is_dict_like
 
@@ -33,10 +42,6 @@
     parser_defaults,
 )
 
-# BOM character (byte order mark)
-# This exists at the beginning of a file to indicate endianness
-# of a file (stream). Unfortunately, this marker screws up parsing,
-# so we need to remove it if we see it.
 _BOM = "\ufeff"
 
 
@@ -105,9 +110,10 @@ def __init__(self, f: FilePathOrBuffer | list, **kwds):
         # Get columns in two steps: infer from data, then
         # infer column indices from self.usecols if it is specified.
         self._col_indices: list[int] | None = None
+        columns: list[list[Scalar | None]]
         try:
             (
-                self.columns,
+                columns,
                 self.num_original_columns,
                 self.unnamed_cols,
             ) = self._infer_columns()
@@ -117,7 +123,8 @@ def __init__(self, f: FilePathOrBuffer | list, **kwds):
 
         # Now self.columns has the set of columns that we will process.
         # The original set is stored in self.original_columns.
-        if len(self.columns) > 1:
+        self.columns: list[Scalar | tuple]
+        if len(columns) > 1:
             # we are processing a multi index column
             # error: Cannot determine type of 'index_names'
             # error: Cannot determine type of 'col_names'
@@ -127,17 +134,17 @@ def __init__(self, f: FilePathOrBuffer | list, **kwds):
                 self.col_names,
                 _,
             ) = self._extract_multi_indexer_columns(
-                self.columns,
+                columns,
                 self.index_names,  # type: ignore[has-type]
                 self.col_names,  # type: ignore[has-type]
             )
             # Update list of original names to include all indices.
             self.num_original_columns = len(self.columns)
         else:
-            self.columns = self.columns[0]
+            self.columns = columns[0]
 
         # get popped off for index
-        self.orig_names: list[int | str | tuple] = list(self.columns)
+        self.orig_names: list[Scalar | tuple] = list(self.columns)
 
         # needs to be cleaned/refactored
         # multiple date column thing turning into a real spaghetti factory
@@ -352,11 +359,13 @@ def _clean_mapping(mapping):
             clean_dtypes,
         )
 
-    def _infer_columns(self):
+    def _infer_columns(
+        self,
+    ) -> tuple[list[list[Scalar | None]], int, set[Scalar | None]]:
         names = self.names
         num_original_columns = 0
         clear_buffer = True
-        unnamed_cols: set[str | int | None] = set()
+        unnamed_cols: set[Scalar | None] = set()
 
         if self.header is not None:
             header = self.header
@@ -370,7 +379,7 @@ def _infer_columns(self):
                 have_mi_columns = False
                 header = [header]
 
-            columns: list[list[int | str | None]] = []
+            columns: list[list[Scalar | None]] = []
             for level, hr in enumerate(header):
                 try:
                     line = self._buffered_line()
@@ -399,7 +408,7 @@ def _infer_columns(self):
 
                     line = self.names[:]
 
-                this_columns: list[int | str | None] = []
+                this_columns: list[Scalar | None] = []
                 this_unnamed_cols = []
 
                 for i, c in enumerate(line):
@@ -527,10 +536,10 @@ def _infer_columns(self):
 
     def _handle_usecols(
         self,
-        columns: list[list[str | int | None]],
-        usecols_key: list[str | int | None],
+        columns: list[list[Scalar | None]],
+        usecols_key: list[Scalar | None],
         num_original_columns: int,
-    ):
+    ) -> list[list[Scalar | None]]:
         """
         Sets self._col_indices
 
@@ -563,7 +572,7 @@ def _handle_usecols(
                         "Defining usecols with out of bounds indices is deprecated "
                         "and will raise a ParserError in a future version.",
                         FutureWarning,
-                        stacklevel=8,
+                        stacklevel=find_stack_level(),
                     )
                 col_indices = self.usecols
 
@@ -583,7 +592,7 @@ def _buffered_line(self):
         else:
             return self._next_line()
 
-    def _check_for_bom(self, first_row):
+    def _check_for_bom(self, first_row: list[Scalar]) -> list[Scalar]:
         """
         Checks whether the file begins with the BOM character.
         If it does, remove it. In addition, if there is quoting
@@ -614,6 +623,7 @@ def _check_for_bom(self, first_row):
             return first_row
 
         first_row_bom = first_row[0]
+        new_row: str
 
         if len(first_row_bom) > 1 and first_row_bom[1] == self.quotechar:
             start = 2
@@ -632,9 +642,11 @@ def _check_for_bom(self, first_row):
 
             # No quotation so just remove BOM from first element
             new_row = first_row_bom[1:]
-        return [new_row] + first_row[1:]
 
-    def _is_line_empty(self, line):
+        new_row_list: list[Scalar] = [new_row]
+        return new_row_list + first_row[1:]
+
+    def _is_line_empty(self, line: list[Scalar]) -> bool:
         """
         Check if a line is empty or not.
 
@@ -649,7 +661,7 @@ def _is_line_empty(self, line):
         """
         return not line or all(not x for x in line)
 
-    def _next_line(self):
+    def _next_line(self) -> list[Scalar]:
         if isinstance(self.data, list):
             while self.skipfunc(self.pos):
                 self.pos += 1
@@ -703,7 +715,7 @@ def _next_line(self):
         self.buf.append(line)
         return line
 
-    def _alert_malformed(self, msg, row_num):
+    def _alert_malformed(self, msg: str, row_num: int) -> None:
         """
         Alert a user about a malformed row, depending on value of
         `self.on_bad_lines` enum.
@@ -713,10 +725,12 @@ def _alert_malformed(self, msg, row_num):
 
         Parameters
         ----------
-        msg : The error message to display.
-        row_num : The row number where the parsing error occurred.
-                  Because this row number is displayed, we 1-index,
-                  even though we 0-index internally.
+        msg: str
+            The error message to display.
+        row_num: int
+            The row number where the parsing error occurred.
+            Because this row number is displayed, we 1-index,
+            even though we 0-index internally.
         """
         if self.on_bad_lines == self.BadLineHandleMethod.ERROR:
             raise ParserError(msg)
@@ -724,7 +738,7 @@ def _alert_malformed(self, msg, row_num):
             base = f"Skipping line {row_num}: "
             sys.stderr.write(base + msg + "\n")
 
-    def _next_iter_line(self, row_num):
+    def _next_iter_line(self, row_num: int) -> list[Scalar] | None:
         """
         Wrapper around iterating through `self.data` (CSV source).
 
@@ -734,12 +748,15 @@ def _next_iter_line(self, row_num):
 
         Parameters
         ----------
-        row_num : The row number of the line being parsed.
+        row_num: int
+            The row number of the line being parsed.
         """
         try:
             # assert for mypy, data is Iterator[str] or None, would error in next
             assert self.data is not None
-            return next(self.data)
+            line = next(self.data)
+            assert isinstance(line, list)
+            return line
         except csv.Error as e:
             if (
                 self.on_bad_lines == self.BadLineHandleMethod.ERROR
@@ -768,7 +785,7 @@ def _next_iter_line(self, row_num):
                 self._alert_malformed(msg, row_num)
             return None
 
-    def _check_comments(self, lines):
+    def _check_comments(self, lines: list[list[Scalar]]) -> list[list[Scalar]]:
         if self.comment is None:
             return lines
         ret = []
@@ -789,19 +806,19 @@ def _check_comments(self, lines):
             ret.append(rl)
         return ret
 
-    def _remove_empty_lines(self, lines):
+    def _remove_empty_lines(self, lines: list[list[Scalar]]) -> list[list[Scalar]]:
         """
         Iterate through the lines and remove any that are
         either empty or contain only one whitespace value
 
         Parameters
         ----------
-        lines : array-like
+        lines : list of list of Scalars
             The array of lines that we are to filter.
 
         Returns
         -------
-        filtered_lines : array-like
+        filtered_lines : list of list of Scalars
             The same array of lines with the "empty" ones removed.
         """
         ret = []
@@ -815,7 +832,7 @@ def _remove_empty_lines(self, lines):
                 ret.append(line)
         return ret
 
-    def _check_thousands(self, lines):
+    def _check_thousands(self, lines: list[list[Scalar]]) -> list[list[Scalar]]:
         if self.thousands is None:
             return lines
 
@@ -823,7 +840,9 @@ def _check_thousands(self, lines):
             lines=lines, search=self.thousands, replace=""
         )
 
-    def _search_replace_num_columns(self, lines, search, replace):
+    def _search_replace_num_columns(
+        self, lines: list[list[Scalar]], search: str, replace: str
+    ) -> list[list[Scalar]]:
         ret = []
         for line in lines:
             rl = []
@@ -840,7 +859,7 @@ def _search_replace_num_columns(self, lines, search, replace):
             ret.append(rl)
         return ret
 
-    def _check_decimal(self, lines):
+    def _check_decimal(self, lines: list[list[Scalar]]) -> list[list[Scalar]]:
         if self.decimal == parser_defaults["decimal"]:
             return lines
 
@@ -848,12 +867,12 @@ def _check_decimal(self, lines):
             lines=lines, search=self.decimal, replace="."
         )
 
-    def _clear_buffer(self):
+    def _clear_buffer(self) -> None:
         self.buf = []
 
     _implicit_index = False
 
-    def _get_index_name(self, columns):
+    def _get_index_name(self, columns: list[Scalar | tuple]):
         """
         Try several cases to get lines:
 
@@ -868,11 +887,13 @@ def _get_index_name(self, columns):
         orig_names = list(columns)
         columns = list(columns)
 
+        line: list[Scalar] | None
         try:
             line = self._next_line()
         except StopIteration:
             line = None
 
+        next_line: list[Scalar] | None
         try:
             next_line = self._next_line()
         except StopIteration:

From a1a465edcaf459039e88369430c400312c9e6f02 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Thu, 11 Nov 2021 21:27:57 +0100
Subject: [PATCH 02/14] Remove parameter

---
 pandas/io/parsers/base_parser.py      | 6 ++----
 pandas/io/parsers/c_parser_wrapper.py | 1 -
 pandas/io/parsers/python_parser.py    | 1 -
 3 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index 096a2c8d997fd..5c1bca9cd81bb 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -316,18 +316,16 @@ def _should_parse_dates(self, i: int) -> bool:
     @final
     def _extract_multi_indexer_columns(
         self,
-        header: list[list[Scalar | None]],
+        header,
         index_names: list | None,
-        col_names: None,
         passed_names: bool = False,
     ):
         """
         extract and return the names, index_names, col_names
         header is a list-of-lists returned from the parsers
         """
-        assert col_names is None
         if len(header) < 2:
-            return header[0], index_names, col_names, passed_names
+            return header[0], index_names, None, passed_names
 
         # the names are the tuples of the header that are not the index cols
         # 0 is the name of the index, assuming index_col is a list of column
diff --git a/pandas/io/parsers/c_parser_wrapper.py b/pandas/io/parsers/c_parser_wrapper.py
index 32ca3aaeba6cc..40a070326a1c0 100644
--- a/pandas/io/parsers/c_parser_wrapper.py
+++ b/pandas/io/parsers/c_parser_wrapper.py
@@ -91,7 +91,6 @@ def __init__(self, src: FilePathOrBuffer, **kwds):
                 ) = self._extract_multi_indexer_columns(
                     self._reader.header,
                     self.index_names,  # type: ignore[has-type]
-                    self.col_names,  # type: ignore[has-type]
                     passed_names,
                 )
             else:
diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
index ce0dfc567ba43..df55564f8a9e8 100644
--- a/pandas/io/parsers/python_parser.py
+++ b/pandas/io/parsers/python_parser.py
@@ -136,7 +136,6 @@ def __init__(self, f: FilePathOrBuffer | list, **kwds):
             ) = self._extract_multi_indexer_columns(
                 columns,
                 self.index_names,  # type: ignore[has-type]
-                self.col_names,  # type: ignore[has-type]
             )
             # Update list of original names to include all indices.
             self.num_original_columns = len(self.columns)

From 9db189bf8950eb8f30453c6dd159670859d634a0 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Fri, 12 Nov 2021 11:27:28 +0100
Subject: [PATCH 03/14] Move comment

---
 pandas/io/parsers/python_parser.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
index a6f57bad08b2d..e740bec7a9e52 100644
--- a/pandas/io/parsers/python_parser.py
+++ b/pandas/io/parsers/python_parser.py
@@ -27,11 +27,6 @@
     EmptyDataError,
     ParserError,
 )
-
-# BOM character (byte order mark)
-# This exists at the beginning of a file to indicate endianness
-# of a file (stream). Unfortunately, this marker screws up parsing,
-# so we need to remove it if we see it.
 from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.common import is_integer
@@ -42,6 +37,10 @@
     parser_defaults,
 )
 
+# BOM character (byte order mark)
+# This exists at the beginning of a file to indicate endianness
+# of a file (stream). Unfortunately, this marker screws up parsing,
+# so we need to remove it if we see it.
 _BOM = "\ufeff"
 
 

From 467421b0c849c2ff5d7d4a8e0b709d9b6cbe9efd Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Fri, 12 Nov 2021 13:54:54 +0100
Subject: [PATCH 04/14] Continue typing

---
 pandas/io/parsers/arrow_parser_wrapper.py |  7 +++-
 pandas/io/parsers/base_parser.py          | 46 ++++++++++++++++++++---
 pandas/io/parsers/python_parser.py        | 30 ++++++++++-----
 3 files changed, 67 insertions(+), 16 deletions(-)

diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py
index 5b1b178c4f610..596a29bad04b3 100644
--- a/pandas/io/parsers/arrow_parser_wrapper.py
+++ b/pandas/io/parsers/arrow_parser_wrapper.py
@@ -107,7 +107,12 @@ def _finalize_output(self, frame: DataFrame) -> DataFrame:
                 multi_index_named = False
             frame.columns = self.names
         # we only need the frame not the names
-        frame.columns, frame = self._do_date_conversions(frame.columns, frame)
+        # error: Incompatible types in assignment (expression has type
+        # "Union[List[Union[Union[str, int, float, bool], Union[Period, Timestamp,
+        # Timedelta, Any]]], Index]", variable has type "Index")  [assignment]
+        frame.columns, frame = self._do_date_conversions(  # type: ignore[assignment]
+            frame.columns, frame
+        )
         if self.index_col is not None:
             for i, item in enumerate(self.index_col):
                 if is_integer(item):
diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index 5c1bca9cd81bb..3073d7239fbc6 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -13,6 +13,7 @@
     Sequence,
     cast,
     final,
+    overload,
 )
 import warnings
 
@@ -33,6 +34,7 @@
     ParserError,
     ParserWarning,
 )
+from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.cast import astype_nansafe
 from pandas.core.dtypes.common import (
@@ -54,6 +56,7 @@
 from pandas.core.dtypes.dtypes import CategoricalDtype
 from pandas.core.dtypes.missing import isna
 
+from pandas import DataFrame
 from pandas.core import algorithms
 from pandas.core.arrays import Categorical
 from pandas.core.indexes.api import (
@@ -381,7 +384,7 @@ def extract(r):
         return names, index_names, col_names, passed_names
 
     @final
-    def _maybe_dedup_names(self, names):
+    def _maybe_dedup_names(self, names: list[Scalar | tuple]) -> list[Scalar | tuple]:
         # see gh-7160 and gh-9424: this helps to provide
         # immediate alleviation of the duplicate names
         # issue and appears to be satisfactory to users,
@@ -389,7 +392,7 @@ def _maybe_dedup_names(self, names):
         # would be nice!
         if self.mangle_dupe_cols:
             names = list(names)  # so we can index
-            counts: DefaultDict[int | str | tuple, int] = defaultdict(int)
+            counts: DefaultDict[Scalar | tuple, int] = defaultdict(int)
             is_potential_mi = _is_potential_multi_index(names, self.index_col)
 
             for i, col in enumerate(names):
@@ -399,6 +402,7 @@ def _maybe_dedup_names(self, names):
                     counts[col] = cur_count + 1
 
                     if is_potential_mi:
+                        assert isinstance(col, tuple)
                         col = col[:-1] + (f"{col[-1]}.{cur_count}",)
                     else:
                         col = f"{col}.{cur_count}"
@@ -798,7 +802,35 @@ def _cast_types(self, values, cast_type, column):
                 ) from err
         return values
 
-    def _do_date_conversions(self, names, data):
+    @overload
+    def _do_date_conversions(
+        self,
+        names: Index,
+        data: DataFrame,
+    ) -> tuple[list[Scalar] | Index, DataFrame]:
+        ...
+
+    @overload
+    def _do_date_conversions(
+        self,
+        names: list[Scalar | tuple],
+        data: dict[Scalar | tuple, ArrayLike] | dict[Scalar | tuple, np.ndarray],
+    ) -> tuple[
+        list[Scalar | tuple],
+        dict[Scalar | tuple, ArrayLike] | dict[Scalar | tuple, np.ndarray],
+    ]:
+        ...
+
+    def _do_date_conversions(
+        self,
+        names: list[Scalar | tuple] | Index,
+        data: dict[Scalar | tuple, ArrayLike]
+        | dict[Scalar | tuple, np.ndarray]
+        | DataFrame,
+    ) -> tuple[
+        list[Scalar | tuple] | Index,
+        dict[Scalar | tuple, ArrayLike] | dict[Scalar | tuple, np.ndarray] | DataFrame,
+    ]:
         # returns data, columns
 
         if self.parse_dates is not None:
@@ -814,7 +846,11 @@ def _do_date_conversions(self, names, data):
 
         return names, data
 
-    def _check_data_length(self, columns: list[str], data: list[ArrayLike]) -> None:
+    def _check_data_length(
+        self,
+        columns: list[Scalar | tuple],
+        data: list[ArrayLike] | list[np.ndarray],
+    ) -> None:
         """Checks if length of data is equal to length of column names.
 
         One set of trailing commas is allowed. self.index_col not False
@@ -834,7 +870,7 @@ def _check_data_length(self, columns: list[str], data: list[ArrayLike]) -> None:
                 "Length of header or names does not match length of data. This leads "
                 "to a loss of data with index_col=False.",
                 ParserWarning,
-                stacklevel=6,
+                stacklevel=find_stack_level(),
             )
 
     def _evaluate_usecols(self, usecols, names):
diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
index e740bec7a9e52..9d8408fa38b02 100644
--- a/pandas/io/parsers/python_parser.py
+++ b/pandas/io/parsers/python_parser.py
@@ -11,7 +11,10 @@
 import sys
 from typing import (
     DefaultDict,
+    Dict,
     Iterator,
+    Tuple,
+    Union,
     cast,
 )
 import warnings
@@ -20,6 +23,7 @@
 
 import pandas._libs.lib as lib
 from pandas._typing import (
+    ArrayLike,
     FilePathOrBuffer,
     Scalar,
 )
@@ -173,7 +177,7 @@ def __init__(self, f: FilePathOrBuffer | list, **kwds):
             )
         self.num = re.compile(regex)
 
-    def _make_reader(self, f):
+    def _make_reader(self, f) -> None:
         sep = self.delimiter
 
         if sep is None or len(sep) == 1:
@@ -239,7 +243,7 @@ def _read():
         # TextIOWrapper, mmap, None]")
         self.data = reader  # type: ignore[assignment]
 
-    def read(self, rows=None):
+    def read(self, rows: int | None = None):
         try:
             content = self._get_lines(rows)
         except StopIteration:
@@ -276,14 +280,18 @@ def read(self, rows=None):
         alldata = self._rows_to_cols(content)
         data, columns = self._exclude_implicit_index(alldata)
 
-        columns, data = self._do_date_conversions(columns, data)
+        columns, date_data = self._do_date_conversions(columns, data)
+        data = cast(Dict[Union[Scalar, Tuple], np.ndarray], date_data)
 
-        data = self._convert_data(data)
-        index, columns = self._make_index(data, alldata, columns, indexnamerow)
+        conv_data = self._convert_data(data)
+        index, columns = self._make_index(conv_data, alldata, columns, indexnamerow)
 
-        return index, columns, data
+        return index, columns, conv_data
 
-    def _exclude_implicit_index(self, alldata):
+    def _exclude_implicit_index(
+        self,
+        alldata: list[np.ndarray],
+    ) -> tuple[dict[Scalar | tuple, np.ndarray], list[Scalar | tuple]]:
         names = self._maybe_dedup_names(self.orig_names)
 
         offset = 0
@@ -305,7 +313,9 @@ def get_chunk(self, size=None):
             size = self.chunksize  # type: ignore[attr-defined]
         return self.read(rows=size)
 
-    def _convert_data(self, data):
+    def _convert_data(
+        self, data: dict[Scalar | tuple, np.ndarray]
+    ) -> dict[Scalar | tuple, ArrayLike]:
         # apply converters
         def _clean_mapping(mapping):
             """converts col numbers to names"""
@@ -931,7 +941,7 @@ def _get_index_name(self, columns: list[Scalar | tuple]):
 
         return index_name, orig_names, columns
 
-    def _rows_to_cols(self, content):
+    def _rows_to_cols(self, content: list[list[Scalar]]) -> list[np.ndarray]:
         col_len = self.num_original_columns
 
         if self._implicit_index:
@@ -1014,7 +1024,7 @@ def _rows_to_cols(self, content):
                 ]
         return zipped_content
 
-    def _get_lines(self, rows=None):
+    def _get_lines(self, rows: int | None = None):
         lines = self.buf
         new_rows = None
 

From a719b926c2ccef69416d9792ead700c15989bbb0 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Fri, 12 Nov 2021 20:24:10 +0100
Subject: [PATCH 05/14] Adjust type hints

---
 pandas/io/parsers/base_parser.py   | 33 ++++++++++++++++++++----------
 pandas/io/parsers/python_parser.py | 10 +++++----
 2 files changed, 28 insertions(+), 15 deletions(-)

diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index 3073d7239fbc6..5faac175cb32e 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -384,7 +384,9 @@ def extract(r):
         return names, index_names, col_names, passed_names
 
     @final
-    def _maybe_dedup_names(self, names: list[Scalar | tuple]) -> list[Scalar | tuple]:
+    def _maybe_dedup_names(
+        self, names: list[Scalar] | list[tuple]
+    ) -> list[Scalar] | list[tuple]:
         # see gh-7160 and gh-9424: this helps to provide
         # immediate alleviation of the duplicate names
         # issue and appears to be satisfactory to users,
@@ -618,7 +620,7 @@ def _convert_to_ndarrays(
 
     @final
     def _set_noconvert_dtype_columns(
-        self, col_indices: list[int], names: list[Scalar | tuple]
+        self, col_indices: list[int], names: list[Scalar] | list[tuple]
     ) -> set[int]:
         """
         Set the columns that should not undergo dtype conversions.
@@ -813,23 +815,32 @@ def _do_date_conversions(
     @overload
     def _do_date_conversions(
         self,
-        names: list[Scalar | tuple],
+        names: list[Scalar] | list[tuple],
         data: dict[Scalar | tuple, ArrayLike] | dict[Scalar | tuple, np.ndarray],
     ) -> tuple[
-        list[Scalar | tuple],
-        dict[Scalar | tuple, ArrayLike] | dict[Scalar | tuple, np.ndarray],
+        list[Scalar] | list[tuple],
+        dict[Scalar, ArrayLike]
+        | dict[tuple, ArrayLike]
+        | dict[Scalar, np.ndarray]
+        | dict[tuple, np.ndarray],
     ]:
         ...
 
     def _do_date_conversions(
         self,
-        names: list[Scalar | tuple] | Index,
-        data: dict[Scalar | tuple, ArrayLike]
-        | dict[Scalar | tuple, np.ndarray]
+        names: list[Scalar] | list[tuple] | Index,
+        data: dict[Scalar, ArrayLike]
+        | dict[tuple, ArrayLike]
+        | dict[Scalar, np.ndarray]
+        | dict[tuple, np.ndarray]
         | DataFrame,
     ) -> tuple[
-        list[Scalar | tuple] | Index,
-        dict[Scalar | tuple, ArrayLike] | dict[Scalar | tuple, np.ndarray] | DataFrame,
+        list[Scalar] | list[tuple] | Index,
+        dict[Scalar, ArrayLike]
+        | dict[tuple, ArrayLike]
+        | dict[Scalar, np.ndarray]
+        | dict[tuple, np.ndarray]
+        | DataFrame,
     ]:
         # returns data, columns
 
@@ -848,7 +859,7 @@ def _do_date_conversions(
 
     def _check_data_length(
         self,
-        columns: list[Scalar | tuple],
+        columns: list[Scalar] | list[tuple],
         data: list[ArrayLike] | list[np.ndarray],
     ) -> None:
         """Checks if length of data is equal to length of column names.
diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
index 9d8408fa38b02..4ab47e5e38ef9 100644
--- a/pandas/io/parsers/python_parser.py
+++ b/pandas/io/parsers/python_parser.py
@@ -127,7 +127,7 @@ def __init__(self, f: FilePathOrBuffer | list, **kwds):
         # Now self.columns has the set of columns that we will process.
         # The original set is stored in self.original_columns.
         # error: Cannot determine type of 'index_names'
-        self.columns: list[Scalar | tuple]
+        self.columns: list[Scalar] | list[tuple]
         (
             self.columns,
             self.index_names,
@@ -139,7 +139,7 @@ def __init__(self, f: FilePathOrBuffer | list, **kwds):
         )
 
         # get popped off for index
-        self.orig_names: list[Scalar | tuple] = list(self.columns)
+        self.orig_names: list[Scalar] | list[tuple] = list(self.columns)
 
         # needs to be cleaned/refactored
         # multiple date column thing turning into a real spaghetti factory
@@ -291,7 +291,9 @@ def read(self, rows: int | None = None):
     def _exclude_implicit_index(
         self,
         alldata: list[np.ndarray],
-    ) -> tuple[dict[Scalar | tuple, np.ndarray], list[Scalar | tuple]]:
+    ) -> tuple[
+        dict[Scalar, np.ndarray] | dict[tuple, np.ndarray], list[Scalar] | list[tuple]
+    ]:
         names = self._maybe_dedup_names(self.orig_names)
 
         offset = 0
@@ -873,7 +875,7 @@ def _clear_buffer(self) -> None:
 
     _implicit_index = False
 
-    def _get_index_name(self, columns: list[Scalar | tuple]):
+    def _get_index_name(self, columns: list[Scalar] | list[tuple]):
         """
         Try several cases to get lines:
 

From b39888c4123f9286121e522f3c019e4d6663c928 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Fri, 12 Nov 2021 20:36:54 +0100
Subject: [PATCH 06/14] Restrict types

---
 pandas/io/parsers/base_parser.py   | 9 +++++++--
 pandas/io/parsers/python_parser.py | 2 +-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index 5faac175cb32e..a5e42f10269eb 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -235,7 +235,9 @@ def _open_handles(self, src: FilePathOrBuffer, kwds: dict[str, Any]) -> None:
             errors=kwds.get("encoding_errors", "strict"),
         )
 
-    def _validate_parse_dates_presence(self, columns: list[Scalar]) -> None:
+    def _validate_parse_dates_presence(
+        self, columns: list[Scalar] | list[tuple]
+    ) -> None:
         """
         Check if parse_dates are in columns.
 
@@ -816,7 +818,10 @@ def _do_date_conversions(
     def _do_date_conversions(
         self,
         names: list[Scalar] | list[tuple],
-        data: dict[Scalar | tuple, ArrayLike] | dict[Scalar | tuple, np.ndarray],
+        data: dict[Scalar, ArrayLike]
+        | dict[tuple, ArrayLike]
+        | dict[Scalar, np.ndarray]
+        | dict[tuple, np.ndarray],
     ) -> tuple[
         list[Scalar] | list[tuple],
         dict[Scalar, ArrayLike]
diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
index 4ab47e5e38ef9..e364a17513ce9 100644
--- a/pandas/io/parsers/python_parser.py
+++ b/pandas/io/parsers/python_parser.py
@@ -256,7 +256,7 @@ def read(self, rows: int | None = None):
         # done with first read, next time raise StopIteration
         self._first_chunk = False
 
-        columns = list(self.orig_names)
+        columns: list[Scalar] | list[tuple] = list(self.orig_names)
         if not len(content):  # pragma: no cover
             # DataFrame with the right metadata, even though it's length 0
             names = self._maybe_dedup_names(self.orig_names)

From 16c5ccce14c40314e86591634900cd18d1ecec0c Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Sat, 13 Nov 2021 20:28:17 +0100
Subject: [PATCH 07/14] Improve tuple type hint

---
 pandas/io/parsers/base_parser.py   | 36 +++++++++++++++---------------
 pandas/io/parsers/python_parser.py | 20 ++++++++++-------
 2 files changed, 30 insertions(+), 26 deletions(-)

diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index a5e42f10269eb..a76f49a1cdbb6 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -236,7 +236,7 @@ def _open_handles(self, src: FilePathOrBuffer, kwds: dict[str, Any]) -> None:
         )
 
     def _validate_parse_dates_presence(
-        self, columns: list[Scalar] | list[tuple]
+        self, columns: list[Scalar] | list[tuple[Scalar, ...]]
     ) -> None:
         """
         Check if parse_dates are in columns.
@@ -387,8 +387,8 @@ def extract(r):
 
     @final
     def _maybe_dedup_names(
-        self, names: list[Scalar] | list[tuple]
-    ) -> list[Scalar] | list[tuple]:
+        self, names: list[Scalar] | list[tuple[Scalar, ...]]
+    ) -> list[Scalar] | list[tuple[Scalar, ...]]:
         # see gh-7160 and gh-9424: this helps to provide
         # immediate alleviation of the duplicate names
         # issue and appears to be satisfactory to users,
@@ -396,7 +396,7 @@ def _maybe_dedup_names(
         # would be nice!
         if self.mangle_dupe_cols:
             names = list(names)  # so we can index
-            counts: DefaultDict[Scalar | tuple, int] = defaultdict(int)
+            counts: DefaultDict[Scalar | tuple[Scalar, ...], int] = defaultdict(int)
             is_potential_mi = _is_potential_multi_index(names, self.index_col)
 
             for i, col in enumerate(names):
@@ -622,7 +622,7 @@ def _convert_to_ndarrays(
 
     @final
     def _set_noconvert_dtype_columns(
-        self, col_indices: list[int], names: list[Scalar] | list[tuple]
+        self, col_indices: list[int], names: list[Scalar] | list[tuple[Scalar, ...]]
     ) -> set[int]:
         """
         Set the columns that should not undergo dtype conversions.
@@ -817,34 +817,34 @@ def _do_date_conversions(
     @overload
     def _do_date_conversions(
         self,
-        names: list[Scalar] | list[tuple],
+        names: list[Scalar] | list[tuple[Scalar, ...]],
         data: dict[Scalar, ArrayLike]
-        | dict[tuple, ArrayLike]
+        | dict[tuple[Scalar, ...], ArrayLike]
         | dict[Scalar, np.ndarray]
-        | dict[tuple, np.ndarray],
+        | dict[tuple[Scalar, ...], np.ndarray],
     ) -> tuple[
-        list[Scalar] | list[tuple],
+        list[Scalar] | list[tuple[Scalar, ...]],
         dict[Scalar, ArrayLike]
-        | dict[tuple, ArrayLike]
+        | dict[tuple[Scalar, ...], ArrayLike]
         | dict[Scalar, np.ndarray]
-        | dict[tuple, np.ndarray],
+        | dict[tuple[Scalar, ...], np.ndarray],
     ]:
         ...
 
     def _do_date_conversions(
         self,
-        names: list[Scalar] | list[tuple] | Index,
+        names: list[Scalar] | list[tuple[Scalar, ...]] | Index,
         data: dict[Scalar, ArrayLike]
-        | dict[tuple, ArrayLike]
+        | dict[tuple[Scalar, ...], ArrayLike]
         | dict[Scalar, np.ndarray]
-        | dict[tuple, np.ndarray]
+        | dict[tuple[Scalar, ...], np.ndarray]
         | DataFrame,
     ) -> tuple[
-        list[Scalar] | list[tuple] | Index,
+        list[Scalar] | list[tuple[Scalar, ...]] | Index,
         dict[Scalar, ArrayLike]
-        | dict[tuple, ArrayLike]
+        | dict[tuple[Scalar, ...], ArrayLike]
         | dict[Scalar, np.ndarray]
-        | dict[tuple, np.ndarray]
+        | dict[tuple[Scalar, ...], np.ndarray]
         | DataFrame,
     ]:
         # returns data, columns
@@ -864,7 +864,7 @@ def _do_date_conversions(
 
     def _check_data_length(
         self,
-        columns: list[Scalar] | list[tuple],
+        columns: list[Scalar] | list[tuple[Scalar, ...]],
         data: list[ArrayLike] | list[np.ndarray],
     ) -> None:
         """Checks if length of data is equal to length of column names.
diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
index e364a17513ce9..703a33659b8db 100644
--- a/pandas/io/parsers/python_parser.py
+++ b/pandas/io/parsers/python_parser.py
@@ -127,7 +127,7 @@ def __init__(self, f: FilePathOrBuffer | list, **kwds):
         # Now self.columns has the set of columns that we will process.
         # The original set is stored in self.original_columns.
         # error: Cannot determine type of 'index_names'
-        self.columns: list[Scalar] | list[tuple]
+        self.columns: list[Scalar] | list[tuple[Scalar, ...]]
         (
             self.columns,
             self.index_names,
@@ -139,7 +139,7 @@ def __init__(self, f: FilePathOrBuffer | list, **kwds):
         )
 
         # get popped off for index
-        self.orig_names: list[Scalar] | list[tuple] = list(self.columns)
+        self.orig_names: list[Scalar] | list[tuple[Scalar, ...]] = list(self.columns)
 
         # needs to be cleaned/refactored
         # multiple date column thing turning into a real spaghetti factory
@@ -256,7 +256,7 @@ def read(self, rows: int | None = None):
         # done with first read, next time raise StopIteration
         self._first_chunk = False
 
-        columns: list[Scalar] | list[tuple] = list(self.orig_names)
+        columns: list[Scalar] | list[tuple[Scalar, ...]] = list(self.orig_names)
         if not len(content):  # pragma: no cover
             # DataFrame with the right metadata, even though it's length 0
             names = self._maybe_dedup_names(self.orig_names)
@@ -281,7 +281,10 @@ def read(self, rows: int | None = None):
         data, columns = self._exclude_implicit_index(alldata)
 
         columns, date_data = self._do_date_conversions(columns, data)
-        data = cast(Dict[Union[Scalar, Tuple], np.ndarray], date_data)
+        data = cast(
+            Union[Dict[Scalar, np.ndarray], Dict[Tuple[Scalar, ...], np.ndarray]],
+            date_data,
+        )
 
         conv_data = self._convert_data(data)
         index, columns = self._make_index(conv_data, alldata, columns, indexnamerow)
@@ -292,7 +295,8 @@ def _exclude_implicit_index(
         self,
         alldata: list[np.ndarray],
     ) -> tuple[
-        dict[Scalar, np.ndarray] | dict[tuple, np.ndarray], list[Scalar] | list[tuple]
+        dict[Scalar, np.ndarray] | dict[tuple[Scalar, ...], np.ndarray],
+        list[Scalar] | list[tuple[Scalar, ...]],
     ]:
         names = self._maybe_dedup_names(self.orig_names)
 
@@ -316,8 +320,8 @@ def get_chunk(self, size=None):
         return self.read(rows=size)
 
     def _convert_data(
-        self, data: dict[Scalar | tuple, np.ndarray]
-    ) -> dict[Scalar | tuple, ArrayLike]:
+        self, data: dict[Scalar, np.ndarray] | dict[tuple[Scalar, ...], np.ndarray]
+    ) -> dict[Scalar, ArrayLike] | dict[tuple[Scalar, ...], ArrayLike]:
         # apply converters
         def _clean_mapping(mapping):
             """converts col numbers to names"""
@@ -875,7 +879,7 @@ def _clear_buffer(self) -> None:
 
     _implicit_index = False
 
-    def _get_index_name(self, columns: list[Scalar] | list[tuple]):
+    def _get_index_name(self, columns: list[Scalar] | list[tuple[Scalar, ...]]):
         """
         Try several cases to get lines:
 

From ddb1f23676e1f017370c0737217dc471eec42a1f Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Tue, 23 Nov 2021 22:13:41 +0100
Subject: [PATCH 08/14] Adjust typing

---
 pandas/io/parsers/base_parser.py      | 51 +++++++++------------------
 pandas/io/parsers/c_parser_wrapper.py |  8 ++---
 pandas/io/parsers/python_parser.py    | 26 ++++++--------
 3 files changed, 32 insertions(+), 53 deletions(-)

diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index eacc8d15f3e41..0d4bfb9c175d7 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -1,6 +1,9 @@
 from __future__ import annotations
 
-from collections import defaultdict
+from collections import (
+    Hashable,
+    defaultdict,
+)
 import csv
 import datetime
 from enum import Enum
@@ -10,6 +13,7 @@
     Callable,
     DefaultDict,
     Iterable,
+    Mapping,
     Sequence,
     cast,
     final,
@@ -392,9 +396,7 @@ def extract(r):
         return names, index_names, col_names, passed_names
 
     @final
-    def _maybe_dedup_names(
-        self, names: list[Scalar] | list[tuple[Scalar, ...]]
-    ) -> list[Scalar] | list[tuple[Scalar, ...]]:
+    def _maybe_dedup_names(self, names: Sequence[Hashable]) -> Sequence[Hashable]:
         # see gh-7160 and gh-9424: this helps to provide
         # immediate alleviation of the duplicate names
         # issue and appears to be satisfactory to users,
@@ -412,6 +414,7 @@ def _maybe_dedup_names(
                     counts[col] = cur_count + 1
 
                     if is_potential_mi:
+                        # for mypy
                         assert isinstance(col, tuple)
                         col = col[:-1] + (f"{col[-1]}.{cur_count}",)
                     else:
@@ -544,7 +547,7 @@ def _agg_index(self, index, try_parse_dates: bool = True) -> Index:
     @final
     def _convert_to_ndarrays(
         self,
-        dct: dict,
+        dct: Mapping,
         na_values,
         na_fvalues,
         verbose: bool = False,
@@ -817,42 +820,22 @@ def _do_date_conversions(
         self,
         names: Index,
         data: DataFrame,
-    ) -> tuple[list[Scalar] | Index, DataFrame]:
+    ) -> tuple[Sequence[Hashable] | Index, DataFrame]:
         ...
 
     @overload
     def _do_date_conversions(
         self,
-        names: list[Scalar] | list[tuple[Scalar, ...]],
-        data: dict[Scalar, ArrayLike]
-        | dict[tuple[Scalar, ...], ArrayLike]
-        | dict[Scalar, np.ndarray]
-        | dict[tuple[Scalar, ...], np.ndarray],
-    ) -> tuple[
-        list[Scalar] | list[tuple[Scalar, ...]],
-        dict[Scalar, ArrayLike]
-        | dict[tuple[Scalar, ...], ArrayLike]
-        | dict[Scalar, np.ndarray]
-        | dict[tuple[Scalar, ...], np.ndarray],
-    ]:
+        names: Sequence[Hashable],
+        data: Mapping[Hashable, ArrayLike],
+    ) -> tuple[Sequence[Hashable], Mapping[Hashable, ArrayLike]]:
         ...
 
     def _do_date_conversions(
         self,
-        names: list[Scalar] | list[tuple[Scalar, ...]] | Index,
-        data: dict[Scalar, ArrayLike]
-        | dict[tuple[Scalar, ...], ArrayLike]
-        | dict[Scalar, np.ndarray]
-        | dict[tuple[Scalar, ...], np.ndarray]
-        | DataFrame,
-    ) -> tuple[
-        list[Scalar] | list[tuple[Scalar, ...]] | Index,
-        dict[Scalar, ArrayLike]
-        | dict[tuple[Scalar, ...], ArrayLike]
-        | dict[Scalar, np.ndarray]
-        | dict[tuple[Scalar, ...], np.ndarray]
-        | DataFrame,
-    ]:
+        names: Sequence[Hashable] | Index,
+        data: Mapping[Hashable, ArrayLike] | DataFrame,
+    ) -> tuple[Sequence[Hashable] | Index, Mapping[Hashable, ArrayLike] | DataFrame]:
         # returns data, columns
 
         if self.parse_dates is not None:
@@ -870,8 +853,8 @@ def _do_date_conversions(
 
     def _check_data_length(
         self,
-        columns: list[Scalar] | list[tuple[Scalar, ...]],
-        data: list[ArrayLike] | list[np.ndarray],
+        columns: Sequence[Hashable],
+        data: Sequence[ArrayLike],
     ) -> None:
         """Checks if length of data is equal to length of column names.
 
diff --git a/pandas/io/parsers/c_parser_wrapper.py b/pandas/io/parsers/c_parser_wrapper.py
index e96df3b3f3782..05c963f2d2552 100644
--- a/pandas/io/parsers/c_parser_wrapper.py
+++ b/pandas/io/parsers/c_parser_wrapper.py
@@ -279,7 +279,7 @@ def read(self, nrows=None):
             data_tups = sorted(data.items())
             data = {k: v for k, (i, v) in zip(names, data_tups)}
 
-            names, data = self._do_date_conversions(names, data)
+            names, date_data = self._do_date_conversions(names, data)
 
         else:
             # rename dict keys
@@ -302,13 +302,13 @@ def read(self, nrows=None):
 
             data = {k: v for k, (i, v) in zip(names, data_tups)}
 
-            names, data = self._do_date_conversions(names, data)
-            index, names = self._make_index(data, alldata, names)
+            names, date_data = self._do_date_conversions(names, data)
+            index, names = self._make_index(date_data, alldata, names)
 
         # maybe create a mi on the columns
         names = self._maybe_make_multi_index_columns(names, self.col_names)
 
-        return index, names, data
+        return index, names, date_data
 
     def _filter_usecols(self, names):
         # hackish
diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
index 910b0605c486b..e0572a6f2f63a 100644
--- a/pandas/io/parsers/python_parser.py
+++ b/pandas/io/parsers/python_parser.py
@@ -1,6 +1,8 @@
 from __future__ import annotations
 
 from collections import (
+    Mapping,
+    Sequence,
     abc,
     defaultdict,
 )
@@ -11,10 +13,8 @@
 import sys
 from typing import (
     DefaultDict,
-    Dict,
+    Hashable,
     Iterator,
-    Tuple,
-    Union,
     cast,
 )
 import warnings
@@ -259,7 +259,7 @@ def read(self, rows: int | None = None):
         # done with first read, next time raise StopIteration
         self._first_chunk = False
 
-        columns: list[Scalar] | list[tuple[Scalar, ...]] = list(self.orig_names)
+        columns: Sequence[Hashable] = list(self.orig_names)
         if not len(content):  # pragma: no cover
             # DataFrame with the right metadata, even though it's length 0
             names = self._maybe_dedup_names(self.orig_names)
@@ -284,10 +284,7 @@ def read(self, rows: int | None = None):
         data, columns = self._exclude_implicit_index(alldata)
 
         columns, date_data = self._do_date_conversions(columns, data)
-        data = cast(
-            Union[Dict[Scalar, np.ndarray], Dict[Tuple[Scalar, ...], np.ndarray]],
-            date_data,
-        )
+        data = cast(Mapping[Hashable, np.ndarray], date_data)
 
         conv_data = self._convert_data(data)
         index, columns = self._make_index(conv_data, alldata, columns, indexnamerow)
@@ -297,10 +294,7 @@ def read(self, rows: int | None = None):
     def _exclude_implicit_index(
         self,
         alldata: list[np.ndarray],
-    ) -> tuple[
-        dict[Scalar, np.ndarray] | dict[tuple[Scalar, ...], np.ndarray],
-        list[Scalar] | list[tuple[Scalar, ...]],
-    ]:
+    ) -> tuple[Mapping[Hashable, np.ndarray], Sequence[Hashable]]:
         names = self._maybe_dedup_names(self.orig_names)
 
         offset = 0
@@ -323,8 +317,9 @@ def get_chunk(self, size=None):
         return self.read(rows=size)
 
     def _convert_data(
-        self, data: dict[Scalar, np.ndarray] | dict[tuple[Scalar, ...], np.ndarray]
-    ) -> dict[Scalar, ArrayLike] | dict[tuple[Scalar, ...], ArrayLike]:
+        self,
+        data: Mapping[Hashable, np.ndarray],
+    ) -> Mapping[Hashable, ArrayLike]:
         # apply converters
         def _clean_mapping(mapping):
             """converts col numbers to names"""
@@ -765,6 +760,7 @@ def _next_iter_line(self, row_num: int) -> list[Scalar] | None:
             # assert for mypy, data is Iterator[str] or None, would error in next
             assert self.data is not None
             line = next(self.data)
+            # for mypy
             assert isinstance(line, list)
             return line
         except csv.Error as e:
@@ -882,7 +878,7 @@ def _clear_buffer(self) -> None:
 
     _implicit_index = False
 
-    def _get_index_name(self, columns: list[Scalar] | list[tuple[Scalar, ...]]):
+    def _get_index_name(self, columns: Sequence[Hashable]):
         """
         Try several cases to get lines:
 

From 97425f14a2e318a97ea74e60bd1bf12bf632f1e7 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Tue, 23 Nov 2021 22:27:44 +0100
Subject: [PATCH 09/14] Adjust types

---
 pandas/io/parsers/base_parser.py   | 9 +++------
 pandas/io/parsers/python_parser.py | 6 +++---
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index 0d4bfb9c175d7..c793c3713cb60 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -33,7 +33,6 @@
     DtypeArg,
     FilePath,
     ReadCsvBuffer,
-    Scalar,
 )
 from pandas.errors import (
     ParserError,
@@ -244,9 +243,7 @@ def _open_handles(
             errors=kwds.get("encoding_errors", "strict"),
         )
 
-    def _validate_parse_dates_presence(
-        self, columns: list[Scalar] | list[tuple[Scalar, ...]]
-    ) -> None:
+    def _validate_parse_dates_presence(self, columns: Sequence[Hashable]) -> None:
         """
         Check if parse_dates are in columns.
 
@@ -404,7 +401,7 @@ def _maybe_dedup_names(self, names: Sequence[Hashable]) -> Sequence[Hashable]:
         # would be nice!
         if self.mangle_dupe_cols:
             names = list(names)  # so we can index
-            counts: DefaultDict[Scalar | tuple[Scalar, ...], int] = defaultdict(int)
+            counts: DefaultDict[Hashable, int] = defaultdict(int)
             is_potential_mi = _is_potential_multi_index(names, self.index_col)
 
             for i, col in enumerate(names):
@@ -631,7 +628,7 @@ def _convert_to_ndarrays(
 
     @final
     def _set_noconvert_dtype_columns(
-        self, col_indices: list[int], names: list[Scalar] | list[tuple[Scalar, ...]]
+        self, col_indices: list[int], names: Sequence[Hashable]
     ) -> set[int]:
         """
         Set the columns that should not undergo dtype conversions.
diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
index e0572a6f2f63a..db8e210add447 100644
--- a/pandas/io/parsers/python_parser.py
+++ b/pandas/io/parsers/python_parser.py
@@ -130,7 +130,7 @@ def __init__(
         # Now self.columns has the set of columns that we will process.
         # The original set is stored in self.original_columns.
         # error: Cannot determine type of 'index_names'
-        self.columns: list[Scalar] | list[tuple[Scalar, ...]]
+        self.columns: list[Hashable]
         (
             self.columns,
             self.index_names,
@@ -142,7 +142,7 @@ def __init__(
         )
 
         # get popped off for index
-        self.orig_names: list[Scalar] | list[tuple[Scalar, ...]] = list(self.columns)
+        self.orig_names: list[Hashable] = list(self.columns)
 
         # needs to be cleaned/refactored
         # multiple date column thing turning into a real spaghetti factory
@@ -878,7 +878,7 @@ def _clear_buffer(self) -> None:
 
     _implicit_index = False
 
-    def _get_index_name(self, columns: Sequence[Hashable]):
+    def _get_index_name(self, columns: list[Hashable]):
         """
         Try several cases to get lines:
 

From 287098713220b120c3e92a406001c57d76c56bdf Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Tue, 23 Nov 2021 22:38:50 +0100
Subject: [PATCH 10/14] Add docstring

---
 pandas/io/parsers/base_parser.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index c793c3713cb60..0b58f917dd6f9 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -333,8 +333,18 @@ def _extract_multi_indexer_columns(
         passed_names: bool = False,
     ):
         """
-        extract and return the names, index_names, col_names
-        header is a list-of-lists returned from the parsers
+        Extract and return the names, index_names, col_names if the column
+        names are a MultiIndex.
+
+        Parameters
+        ----------
+        header: list of lists
+            The header rows
+        index_names: list, optional
+            The names of the future index
+        passed_names: bool, default False
+            A flag specifying if names where passed
+
         """
         if len(header) < 2:
             return header[0], index_names, None, passed_names

From 2e85ac54d1aa34a8771113ad030b30d68e2250e1 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Wed, 24 Nov 2021 00:04:42 +0100
Subject: [PATCH 11/14] Fix cast

---
 pandas/io/parsers/python_parser.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
index db8e210add447..c4d5e6bf91f7a 100644
--- a/pandas/io/parsers/python_parser.py
+++ b/pandas/io/parsers/python_parser.py
@@ -11,6 +11,7 @@
 from io import StringIO
 import re
 import sys
+import typing
 from typing import (
     DefaultDict,
     Hashable,
@@ -284,7 +285,7 @@ def read(self, rows: int | None = None):
         data, columns = self._exclude_implicit_index(alldata)
 
         columns, date_data = self._do_date_conversions(columns, data)
-        data = cast(Mapping[Hashable, np.ndarray], date_data)
+        data = cast(typing.Mapping[typing.Hashable, np.ndarray], date_data)
 
         conv_data = self._convert_data(data)
         index, columns = self._make_index(conv_data, alldata, columns, indexnamerow)

From 8bd226bacb2a01879ae84c1f5f4ddaea7886edf8 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Wed, 24 Nov 2021 19:06:44 +0100
Subject: [PATCH 12/14] Move import

---
 pandas/io/parsers/base_parser.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index 0b58f917dd6f9..0cc5643a8e593 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -1,9 +1,6 @@
 from __future__ import annotations
 
-from collections import (
-    Hashable,
-    defaultdict,
-)
+from collections import defaultdict
 import csv
 import datetime
 from enum import Enum
@@ -12,6 +9,7 @@
     Any,
     Callable,
     DefaultDict,
+    Hashable,
     Iterable,
     Mapping,
     Sequence,

From 91177a116b1493a50b193f41f9ca175287fe1e32 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Wed, 24 Nov 2021 19:30:29 +0100
Subject: [PATCH 13/14] Move import

---
 pandas/io/parsers/python_parser.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
index c4d5e6bf91f7a..6148f147bb5b2 100644
--- a/pandas/io/parsers/python_parser.py
+++ b/pandas/io/parsers/python_parser.py
@@ -1,8 +1,6 @@
 from __future__ import annotations
 
 from collections import (
-    Mapping,
-    Sequence,
     abc,
     defaultdict,
 )
@@ -16,6 +14,8 @@
     DefaultDict,
     Hashable,
     Iterator,
+    Mapping,
+    Sequence,
     cast,
 )
 import warnings

From 41aca2ff043399637c7fa8b55223792e12d57c02 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Sun, 28 Nov 2021 22:12:36 +0100
Subject: [PATCH 14/14] Fix merge conflicts in typing

---
 pandas/io/parsers/python_parser.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
index 4cc240265542a..2d1433a8f21c8 100644
--- a/pandas/io/parsers/python_parser.py
+++ b/pandas/io/parsers/python_parser.py
@@ -281,12 +281,12 @@ def read(self, rows: int | None = None):
         alldata = self._rows_to_cols(content)
         data, columns = self._exclude_implicit_index(alldata)
 
-        data = self._convert_data(data)
-        columns, data = self._do_date_conversions(columns, data)
+        conv_data = self._convert_data(data)
+        columns, conv_data = self._do_date_conversions(columns, conv_data)
 
-        index, columns = self._make_index(data, alldata, columns, indexnamerow)
+        index, columns = self._make_index(conv_data, alldata, columns, indexnamerow)
 
-        return index, columns, data
+        return index, columns, conv_data
 
     def _exclude_implicit_index(
         self,
@@ -461,6 +461,7 @@ def _infer_columns(
             if clear_buffer:
                 self._clear_buffer()
 
+            first_line: list[Scalar] | None
             if names is not None:
                 # Read first row after header to check if data are longer
                 try: