Skip to content

Commit 3d99081

Browse files
authored
TYP: Typ part of python_parser (#44406)
1 parent 7f06a8a commit 3d99081

File tree

4 files changed

+133
-57
lines changed

4 files changed

+133
-57
lines changed

pandas/io/parsers/arrow_parser_wrapper.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,12 @@ def _finalize_output(self, frame: DataFrame) -> DataFrame:
110110
multi_index_named = False
111111
frame.columns = self.names
112112
# we only need the frame not the names
113-
frame.columns, frame = self._do_date_conversions(frame.columns, frame)
113+
# error: Incompatible types in assignment (expression has type
114+
# "Union[List[Union[Union[str, int, float, bool], Union[Period, Timestamp,
115+
# Timedelta, Any]]], Index]", variable has type "Index") [assignment]
116+
frame.columns, frame = self._do_date_conversions( # type: ignore[assignment]
117+
frame.columns, frame
118+
)
114119
if self.index_col is not None:
115120
for i, item in enumerate(self.index_col):
116121
if is_integer(item):

pandas/io/parsers/base_parser.py

Lines changed: 53 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,13 @@
1010
Any,
1111
Callable,
1212
DefaultDict,
13+
Hashable,
1314
Iterable,
15+
Mapping,
1416
Sequence,
1517
cast,
1618
final,
19+
overload,
1720
)
1821
import warnings
1922

@@ -56,6 +59,7 @@
5659
from pandas.core.dtypes.dtypes import CategoricalDtype
5760
from pandas.core.dtypes.missing import isna
5861

62+
from pandas import DataFrame
5963
from pandas.core import algorithms
6064
from pandas.core.arrays import Categorical
6165
from pandas.core.indexes.api import (
@@ -241,7 +245,7 @@ def _open_handles(
241245
errors=kwds.get("encoding_errors", "strict"),
242246
)
243247

244-
def _validate_parse_dates_presence(self, columns: list[str]) -> Iterable:
248+
def _validate_parse_dates_presence(self, columns: Sequence[Hashable]) -> Iterable:
245249
"""
246250
Check if parse_dates are in columns.
247251
@@ -337,11 +341,24 @@ def _should_parse_dates(self, i: int) -> bool:
337341

338342
@final
339343
def _extract_multi_indexer_columns(
340-
self, header, index_names, passed_names: bool = False
344+
self,
345+
header,
346+
index_names: list | None,
347+
passed_names: bool = False,
341348
):
342349
"""
343-
extract and return the names, index_names, col_names
344-
header is a list-of-lists returned from the parsers
350+
Extract and return the names, index_names, col_names if the column
351+
names are a MultiIndex.
352+
353+
Parameters
354+
----------
355+
header: list of lists
356+
The header rows
357+
index_names: list, optional
358+
The names of the future index
359+
passed_names: bool, default False
360+
A flag specifying if names where passed
361+
345362
"""
346363
if len(header) < 2:
347364
return header[0], index_names, None, passed_names
@@ -400,15 +417,15 @@ def extract(r):
400417
return names, index_names, col_names, passed_names
401418

402419
@final
403-
def _maybe_dedup_names(self, names):
420+
def _maybe_dedup_names(self, names: Sequence[Hashable]) -> Sequence[Hashable]:
404421
# see gh-7160 and gh-9424: this helps to provide
405422
# immediate alleviation of the duplicate names
406423
# issue and appears to be satisfactory to users,
407424
# but ultimately, not needing to butcher the names
408425
# would be nice!
409426
if self.mangle_dupe_cols:
410427
names = list(names) # so we can index
411-
counts: DefaultDict[int | str | tuple, int] = defaultdict(int)
428+
counts: DefaultDict[Hashable, int] = defaultdict(int)
412429
is_potential_mi = _is_potential_multi_index(names, self.index_col)
413430

414431
for i, col in enumerate(names):
@@ -418,6 +435,8 @@ def _maybe_dedup_names(self, names):
418435
counts[col] = cur_count + 1
419436

420437
if is_potential_mi:
438+
# for mypy
439+
assert isinstance(col, tuple)
421440
col = col[:-1] + (f"{col[-1]}.{cur_count}",)
422441
else:
423442
col = f"{col}.{cur_count}"
@@ -572,7 +591,7 @@ def _agg_index(self, index, try_parse_dates: bool = True) -> Index:
572591
@final
573592
def _convert_to_ndarrays(
574593
self,
575-
dct: dict,
594+
dct: Mapping,
576595
na_values,
577596
na_fvalues,
578597
verbose: bool = False,
@@ -664,7 +683,7 @@ def _convert_to_ndarrays(
664683

665684
@final
666685
def _set_noconvert_dtype_columns(
667-
self, col_indices: list[int], names: list[int | str | tuple]
686+
self, col_indices: list[int], names: Sequence[Hashable]
668687
) -> set[int]:
669688
"""
670689
Set the columns that should not undergo dtype conversions.
@@ -848,7 +867,27 @@ def _cast_types(self, values, cast_type, column):
848867
) from err
849868
return values
850869

851-
def _do_date_conversions(self, names, data):
870+
@overload
871+
def _do_date_conversions(
872+
self,
873+
names: Index,
874+
data: DataFrame,
875+
) -> tuple[Sequence[Hashable] | Index, DataFrame]:
876+
...
877+
878+
@overload
879+
def _do_date_conversions(
880+
self,
881+
names: Sequence[Hashable],
882+
data: Mapping[Hashable, ArrayLike],
883+
) -> tuple[Sequence[Hashable], Mapping[Hashable, ArrayLike]]:
884+
...
885+
886+
def _do_date_conversions(
887+
self,
888+
names: Sequence[Hashable] | Index,
889+
data: Mapping[Hashable, ArrayLike] | DataFrame,
890+
) -> tuple[Sequence[Hashable] | Index, Mapping[Hashable, ArrayLike] | DataFrame]:
852891
# returns data, columns
853892

854893
if self.parse_dates is not None:
@@ -864,7 +903,11 @@ def _do_date_conversions(self, names, data):
864903

865904
return names, data
866905

867-
def _check_data_length(self, columns: list[str], data: list[ArrayLike]) -> None:
906+
def _check_data_length(
907+
self,
908+
columns: Sequence[Hashable],
909+
data: Sequence[ArrayLike],
910+
) -> None:
868911
"""Checks if length of data is equal to length of column names.
869912
870913
One set of trailing commas is allowed. self.index_col not False

pandas/io/parsers/c_parser_wrapper.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ def read(self, nrows=None):
279279
data_tups = sorted(data.items())
280280
data = {k: v for k, (i, v) in zip(names, data_tups)}
281281

282-
names, data = self._do_date_conversions(names, data)
282+
names, date_data = self._do_date_conversions(names, data)
283283

284284
else:
285285
# rename dict keys
@@ -302,13 +302,13 @@ def read(self, nrows=None):
302302

303303
data = {k: v for k, (i, v) in zip(names, data_tups)}
304304

305-
names, data = self._do_date_conversions(names, data)
306-
index, names = self._make_index(data, alldata, names)
305+
names, date_data = self._do_date_conversions(names, data)
306+
index, names = self._make_index(date_data, alldata, names)
307307

308308
# maybe create a mi on the columns
309309
names = self._maybe_make_multi_index_columns(names, self.col_names)
310310

311-
return index, names, data
311+
return index, names, date_data
312312

313313
def _filter_usecols(self, names):
314314
# hackish

0 commit comments

Comments
 (0)