From f2deab1db3457d12c3a5afbecbd9bf1823da8d0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Fri, 29 Sep 2023 22:03:55 -0400 Subject: [PATCH 1/3] TYP: read_csv's usecols --- pandas/_typing.py | 9 +++++++ pandas/io/parsers/readers.py | 52 ++++++++---------------------------- 2 files changed, 20 insertions(+), 41 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index f18c67fcb0c90..0c73a5d221135 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -509,3 +509,12 @@ def closed(self) -> bool: # Offsets OffsetCalendar = Union[np.busdaycalendar, "AbstractHolidayCalendar"] + +# read_csv: usecols +UsecolsArgType = Union[ + SequenceNotStr[Hashable], + Sequence[int], + AnyArrayLike, + Callable[[HashableT], bool], + None, +] diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 6ce6ac71b1ddd..1de0cc43a1250 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -77,11 +77,11 @@ DtypeArg, DtypeBackend, FilePath, - HashableT, IndexLabel, ReadCsvBuffer, Self, StorageOptions, + UsecolsArgType, ) _doc_read_csv_and_table = ( r""" @@ -645,10 +645,7 @@ def read_csv( header: int | Sequence[int] | None | Literal["infer"] = ..., names: Sequence[Hashable] | None | lib.NoDefault = ..., index_col: IndexLabel | Literal[False] | None = ..., - usecols: list[HashableT] - | tuple[HashableT] - | Callable[[Hashable], bool] - | None = ..., + usecols: UsecolsArgType = ..., dtype: DtypeArg | None = ..., engine: CSVEngine | None = ..., converters: Mapping[Hashable, Callable] | None = ..., @@ -707,10 +704,7 @@ def read_csv( header: int | Sequence[int] | None | Literal["infer"] = ..., names: Sequence[Hashable] | None | lib.NoDefault = ..., index_col: IndexLabel | Literal[False] | None = ..., - usecols: list[HashableT] - | tuple[HashableT] - | Callable[[Hashable], bool] - | None = ..., + usecols: UsecolsArgType = ..., dtype: DtypeArg | None = ..., engine: CSVEngine | None = ..., converters: Mapping[Hashable, Callable] | None = ..., @@ -770,10 +764,7 @@ def read_csv( header: int | Sequence[int] | None | Literal["infer"] = ..., names: Sequence[Hashable] | None | lib.NoDefault = ..., index_col: IndexLabel | Literal[False] | None = ..., - usecols: list[HashableT] - | tuple[HashableT] - | Callable[[Hashable], bool] - | None = ..., + usecols: UsecolsArgType = ..., dtype: DtypeArg | None = ..., engine: CSVEngine | None = ..., converters: Mapping[Hashable, Callable] | None = ..., @@ -833,10 +824,7 @@ def read_csv( header: int | Sequence[int] | None | Literal["infer"] = ..., names: Sequence[Hashable] | None | lib.NoDefault = ..., index_col: IndexLabel | Literal[False] | None = ..., - usecols: list[HashableT] - | tuple[HashableT] - | Callable[[Hashable], bool] - | None = ..., + usecols: UsecolsArgType = ..., dtype: DtypeArg | None = ..., engine: CSVEngine | None = ..., converters: Mapping[Hashable, Callable] | None = ..., @@ -907,10 +895,7 @@ def read_csv( header: int | Sequence[int] | None | Literal["infer"] = "infer", names: Sequence[Hashable] | None | lib.NoDefault = lib.no_default, index_col: IndexLabel | Literal[False] | None = None, - usecols: list[HashableT] - | tuple[HashableT] - | Callable[[Hashable], bool] - | None = None, + usecols: UsecolsArgType = None, # General Parsing Configuration dtype: DtypeArg | None = None, engine: CSVEngine | None = None, @@ -1005,10 +990,7 @@ def read_table( header: int | Sequence[int] | None | Literal["infer"] = ..., names: Sequence[Hashable] | None | lib.NoDefault = ..., index_col: IndexLabel | Literal[False] | None = ..., - usecols: list[HashableT] - | tuple[HashableT] - | Callable[[Hashable], bool] - | None = ..., + usecols: UsecolsArgType = ..., dtype: DtypeArg | None = ..., engine: CSVEngine | None = ..., converters: Mapping[Hashable, Callable] | None = ..., @@ -1065,10 +1047,7 @@ def read_table( header: int | Sequence[int] | None | Literal["infer"] = ..., names: Sequence[Hashable] | None | lib.NoDefault = ..., index_col: IndexLabel | Literal[False] | None = ..., - usecols: list[HashableT] - | tuple[HashableT] - | Callable[[Hashable], bool] - | None = ..., + usecols: UsecolsArgType = ..., dtype: DtypeArg | None = ..., engine: CSVEngine | None = ..., converters: Mapping[Hashable, Callable] | None = ..., @@ -1125,10 +1104,7 @@ def read_table( header: int | Sequence[int] | None | Literal["infer"] = ..., names: Sequence[Hashable] | None | lib.NoDefault = ..., index_col: IndexLabel | Literal[False] | None = ..., - usecols: list[HashableT] - | tuple[HashableT] - | Callable[[Hashable], bool] - | None = ..., + usecols: UsecolsArgType = ..., dtype: DtypeArg | None = ..., engine: CSVEngine | None = ..., converters: Mapping[Hashable, Callable] | None = ..., @@ -1185,10 +1161,7 @@ def read_table( header: int | Sequence[int] | None | Literal["infer"] = ..., names: Sequence[Hashable] | None | lib.NoDefault = ..., index_col: IndexLabel | Literal[False] | None = ..., - usecols: list[HashableT] - | tuple[HashableT] - | Callable[[Hashable], bool] - | None = ..., + usecols: UsecolsArgType = ..., dtype: DtypeArg | None = ..., engine: CSVEngine | None = ..., converters: Mapping[Hashable, Callable] | None = ..., @@ -1258,10 +1231,7 @@ def read_table( header: int | Sequence[int] | None | Literal["infer"] = "infer", names: Sequence[Hashable] | None | lib.NoDefault = lib.no_default, index_col: IndexLabel | Literal[False] | None = None, - usecols: list[HashableT] - | tuple[HashableT] - | Callable[[Hashable], bool] - | None = None, + usecols: UsecolsArgType = None, # General Parsing Configuration dtype: DtypeArg | None = None, engine: CSVEngine | None = None, From fe656ea22813c1e86c9bd1c1c56be5e5a2e69516 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Mon, 2 Oct 2023 21:24:49 -0400 Subject: [PATCH 2/3] adjust doc --- pandas/io/parsers/readers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 1de0cc43a1250..32f8a1fe81a9f 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -142,7 +142,7 @@ Note: ``index_col=False`` can be used to force pandas to *not* use the first column as the index, e.g., when you have a malformed file with delimiters at the end of each line. -usecols : list of Hashable or Callable, optional +usecols : Sequence of Hashable or Callable, optional Subset of columns to select, denoted either by column labels or column indices. If list-like, all elements must either be positional (i.e. integer indices into the document columns) or strings From f3b158c7d56e9fa6b29676801a603a68c3f2632a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Tue, 3 Oct 2023 19:48:30 -0400 Subject: [PATCH 3/3] use range --- pandas/_typing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index 0c73a5d221135..de01434c09c39 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -513,7 +513,7 @@ def closed(self) -> bool: # read_csv: usecols UsecolsArgType = Union[ SequenceNotStr[Hashable], - Sequence[int], + range, AnyArrayLike, Callable[[HashableT], bool], None,