10
10
Any ,
11
11
Callable ,
12
12
DefaultDict ,
13
+ Hashable ,
13
14
Iterable ,
15
+ Mapping ,
14
16
Sequence ,
15
17
cast ,
16
18
final ,
19
+ overload ,
17
20
)
18
21
import warnings
19
22
56
59
from pandas .core .dtypes .dtypes import CategoricalDtype
57
60
from pandas .core .dtypes .missing import isna
58
61
62
+ from pandas import DataFrame
59
63
from pandas .core import algorithms
60
64
from pandas .core .arrays import Categorical
61
65
from pandas .core .indexes .api import (
@@ -241,7 +245,7 @@ def _open_handles(
241
245
errors = kwds .get ("encoding_errors" , "strict" ),
242
246
)
243
247
244
- def _validate_parse_dates_presence (self , columns : list [ str ]) -> Iterable :
248
+ def _validate_parse_dates_presence (self , columns : Sequence [ Hashable ]) -> Iterable :
245
249
"""
246
250
Check if parse_dates are in columns.
247
251
@@ -337,11 +341,24 @@ def _should_parse_dates(self, i: int) -> bool:
337
341
338
342
@final
339
343
def _extract_multi_indexer_columns (
340
- self , header , index_names , passed_names : bool = False
344
+ self ,
345
+ header ,
346
+ index_names : list | None ,
347
+ passed_names : bool = False ,
341
348
):
342
349
"""
343
- extract and return the names, index_names, col_names
344
- header is a list-of-lists returned from the parsers
350
+ Extract and return the names, index_names, col_names if the column
351
+ names are a MultiIndex.
352
+
353
+ Parameters
354
+ ----------
355
+ header: list of lists
356
+ The header rows
357
+ index_names: list, optional
358
+ The names of the future index
359
+ passed_names: bool, default False
360
+ A flag specifying if names where passed
361
+
345
362
"""
346
363
if len (header ) < 2 :
347
364
return header [0 ], index_names , None , passed_names
@@ -400,15 +417,15 @@ def extract(r):
400
417
return names , index_names , col_names , passed_names
401
418
402
419
@final
403
- def _maybe_dedup_names (self , names ) :
420
+ def _maybe_dedup_names (self , names : Sequence [ Hashable ]) -> Sequence [ Hashable ] :
404
421
# see gh-7160 and gh-9424: this helps to provide
405
422
# immediate alleviation of the duplicate names
406
423
# issue and appears to be satisfactory to users,
407
424
# but ultimately, not needing to butcher the names
408
425
# would be nice!
409
426
if self .mangle_dupe_cols :
410
427
names = list (names ) # so we can index
411
- counts : DefaultDict [int | str | tuple , int ] = defaultdict (int )
428
+ counts : DefaultDict [Hashable , int ] = defaultdict (int )
412
429
is_potential_mi = _is_potential_multi_index (names , self .index_col )
413
430
414
431
for i , col in enumerate (names ):
@@ -418,6 +435,8 @@ def _maybe_dedup_names(self, names):
418
435
counts [col ] = cur_count + 1
419
436
420
437
if is_potential_mi :
438
+ # for mypy
439
+ assert isinstance (col , tuple )
421
440
col = col [:- 1 ] + (f"{ col [- 1 ]} .{ cur_count } " ,)
422
441
else :
423
442
col = f"{ col } .{ cur_count } "
@@ -572,7 +591,7 @@ def _agg_index(self, index, try_parse_dates: bool = True) -> Index:
572
591
@final
573
592
def _convert_to_ndarrays (
574
593
self ,
575
- dct : dict ,
594
+ dct : Mapping ,
576
595
na_values ,
577
596
na_fvalues ,
578
597
verbose : bool = False ,
@@ -664,7 +683,7 @@ def _convert_to_ndarrays(
664
683
665
684
@final
666
685
def _set_noconvert_dtype_columns (
667
- self , col_indices : list [int ], names : list [ int | str | tuple ]
686
+ self , col_indices : list [int ], names : Sequence [ Hashable ]
668
687
) -> set [int ]:
669
688
"""
670
689
Set the columns that should not undergo dtype conversions.
@@ -848,7 +867,27 @@ def _cast_types(self, values, cast_type, column):
848
867
) from err
849
868
return values
850
869
851
- def _do_date_conversions (self , names , data ):
870
+ @overload
871
+ def _do_date_conversions (
872
+ self ,
873
+ names : Index ,
874
+ data : DataFrame ,
875
+ ) -> tuple [Sequence [Hashable ] | Index , DataFrame ]:
876
+ ...
877
+
878
+ @overload
879
+ def _do_date_conversions (
880
+ self ,
881
+ names : Sequence [Hashable ],
882
+ data : Mapping [Hashable , ArrayLike ],
883
+ ) -> tuple [Sequence [Hashable ], Mapping [Hashable , ArrayLike ]]:
884
+ ...
885
+
886
+ def _do_date_conversions (
887
+ self ,
888
+ names : Sequence [Hashable ] | Index ,
889
+ data : Mapping [Hashable , ArrayLike ] | DataFrame ,
890
+ ) -> tuple [Sequence [Hashable ] | Index , Mapping [Hashable , ArrayLike ] | DataFrame ]:
852
891
# returns data, columns
853
892
854
893
if self .parse_dates is not None :
@@ -864,7 +903,11 @@ def _do_date_conversions(self, names, data):
864
903
865
904
return names , data
866
905
867
- def _check_data_length (self , columns : list [str ], data : list [ArrayLike ]) -> None :
906
+ def _check_data_length (
907
+ self ,
908
+ columns : Sequence [Hashable ],
909
+ data : Sequence [ArrayLike ],
910
+ ) -> None :
868
911
"""Checks if length of data is equal to length of column names.
869
912
870
913
One set of trailing commas is allowed. self.index_col not False
0 commit comments