Skip to content

Commit 3173270

Browse files
committed
format
1 parent 2a445f3 commit 3173270

File tree

2 files changed

+53
-23
lines changed

2 files changed

+53
-23
lines changed

pandas/io/common.py

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
import codecs
1010
from collections import defaultdict
1111
from collections.abc import (
12-
Generator,
1312
Hashable,
1413
Iterable,
1514
Mapping,
@@ -1304,7 +1303,8 @@ def _infer_protocol(path: str) -> str:
13041303
def _match_file(
13051304
path: Path | PurePosixPath, extensions: set[str] | None, glob: str | None
13061305
) -> bool:
1307-
"""Check if the file matches the given extensions and glob pattern.
1306+
"""
1307+
Check if the file matches the given extensions and glob pattern.
13081308
Parameters
13091309
----------
13101310
path : Path or PurePosixPath
@@ -1324,10 +1324,10 @@ def _match_file(
13241324

13251325

13261326
def iterdir(
1327-
path: FilePath,
1327+
path: FilePath | BaseBuffer,
13281328
extensions: str | Iterable[str] | None = None,
13291329
glob: str | None = None,
1330-
) -> Generator[Path | PurePosixPath]:
1330+
) -> list[Path | PurePosixPath] | BaseBuffer:
13311331
"""Yield file paths in a directory (no nesting allowed).
13321332
13331333
Supports:
@@ -1346,7 +1346,7 @@ def iterdir(
13461346
Only yield files matching the given glob pattern.
13471347
If None, all files are yielded.
13481348
1349-
Yields
1349+
Returns
13501350
------
13511351
pathlib.Path or pathlib.PurePosixPath
13521352
File paths within the directory.
@@ -1358,6 +1358,9 @@ def iterdir(
13581358
ImportError
13591359
If fsspec is required but not installed.
13601360
"""
1361+
if hasattr(path, "read") or hasattr(path, "write"):
1362+
return path
1363+
13611364
if extensions is not None:
13621365
if isinstance(extensions, str):
13631366
extensions = {extensions.lower()}
@@ -1375,18 +1378,18 @@ def iterdir(
13751378
extensions,
13761379
glob,
13771380
):
1378-
yield resolved_path
1379-
return
1381+
return [resolved_path]
13801382

1383+
result = []
13811384
for entry in resolved_path.iterdir():
13821385
if entry.is_file():
13831386
if _match_file(
13841387
entry,
13851388
extensions,
13861389
glob,
13871390
):
1388-
yield entry
1389-
return
1391+
result.append(entry)
1392+
return result
13901393

13911394
# Remote paths
13921395
fsspec = import_optional_dependency("fsspec", extra=scheme)
@@ -1398,9 +1401,9 @@ def iterdir(
13981401
extensions,
13991402
glob,
14001403
):
1401-
yield PurePosixPath(path_without_scheme)
1402-
return
1404+
return [PurePosixPath(path_without_scheme)]
14031405

1406+
result = []
14041407
for file in fs.ls(path_without_scheme, detail=True):
14051408
if file["type"] == "file":
14061409
path_obj = PurePosixPath(file["name"])
@@ -1409,4 +1412,5 @@ def iterdir(
14091412
extensions,
14101413
glob,
14111414
):
1412-
yield path_obj
1415+
result.append(path_obj)
1416+
return result

pandas/io/parsers/readers.py

Lines changed: 37 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
from pandas.io.common import (
5656
IOHandles,
5757
get_handle,
58+
iterdir,
5859
stringify_path,
5960
validate_header_arg,
6061
)
@@ -73,6 +74,7 @@
7374
if TYPE_CHECKING:
7475
from collections.abc import (
7576
Callable,
77+
Generator,
7678
Hashable,
7779
Iterable,
7880
Mapping,
@@ -668,9 +670,23 @@ def _validate_names(names: Sequence[Hashable] | None) -> None:
668670
raise ValueError("Names should be an ordered collection.")
669671

670672

673+
def _multi_file_generator(
674+
list_of_files: list[str], kwds
675+
) -> Generator[DataFrame] | Generator[TextFileReader]:
676+
"""Generator for multiple files."""
677+
for file in list_of_files:
678+
parser = TextFileReader(file, **kwds)
679+
680+
if kwds.get("chunksize", None) or kwds.get("iterator", False):
681+
yield parser
682+
else:
683+
with parser:
684+
yield parser.read(kwds.get("nrows", None))
685+
686+
671687
def _read(
672688
filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], kwds
673-
) -> DataFrame | TextFileReader:
689+
) -> DataFrame | TextFileReader | Generator[DataFrame] | Generator[TextFileReader]:
674690
"""Generic reader of line files."""
675691
# if we pass a date_format and parse_dates=False, we should not parse the
676692
# dates GH#44366
@@ -709,14 +725,25 @@ def _read(
709725
# Check for duplicates in names.
710726
_validate_names(kwds.get("names", None))
711727

712-
# Create the parser.
713-
parser = TextFileReader(filepath_or_buffer, **kwds)
728+
extensions = kwds.get("extensions", None)
729+
glob = kwds.get("glob", None)
730+
files = iterdir(filepath_or_buffer, extensions, glob)
731+
732+
if len(files) == 0:
733+
raise FileNotFoundError(
734+
f"No files found in {filepath_or_buffer}, "
735+
f"with extension(s) {extensions} and glob pattern {glob}"
736+
)
737+
elif len(files) == 1:
738+
parser = TextFileReader(files[0], **kwds)
714739

715-
if chunksize or iterator:
716-
return parser
740+
if chunksize or iterator:
741+
return parser
717742

718-
with parser:
719-
return parser.read(nrows)
743+
with parser:
744+
return parser.read(nrows)
745+
else:
746+
return _multi_file_generator(files, kwds)
720747

721748

722749
@overload
@@ -932,10 +959,9 @@ def read_table(
932959
skipfooter: int = 0,
933960
nrows: int | None = None,
934961
# NA and Missing Data Handling
935-
na_values: Hashable
936-
| Iterable[Hashable]
937-
| Mapping[Hashable, Iterable[Hashable]]
938-
| None = None,
962+
na_values: (
963+
Hashable | Iterable[Hashable] | Mapping[Hashable, Iterable[Hashable]] | None
964+
) = None,
939965
keep_default_na: bool = True,
940966
na_filter: bool = True,
941967
skip_blank_lines: bool = True,

0 commit comments

Comments
 (0)