format

fangchenli · fangchenli · commit 31732701f9b1 · 2025-05-21T22:35:28.000-07:00
diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -9,7 +9,6 @@
 import codecs
 from collections import defaultdict
 from collections.abc import (
-    Generator,
     Hashable,
     Iterable,
     Mapping,
@@ -1304,7 +1303,8 @@ def _infer_protocol(path: str) -> str:
 def _match_file(
     path: Path | PurePosixPath, extensions: set[str] | None, glob: str | None
 ) -> bool:
-    """Check if the file matches the given extensions and glob pattern.
+    """
+    Check if the file matches the given extensions and glob pattern.
     Parameters
     ----------
     path : Path or PurePosixPath
@@ -1324,10 +1324,10 @@ def _match_file(
 
 
 def iterdir(
-    path: FilePath,
+    path: FilePath | BaseBuffer,
     extensions: str | Iterable[str] | None = None,
     glob: str | None = None,
-) -> Generator[Path | PurePosixPath]:
+) -> list[Path | PurePosixPath] | BaseBuffer:
     """Yield file paths in a directory (no nesting allowed).
 
     Supports:
@@ -1346,7 +1346,7 @@ def iterdir(
         Only yield files matching the given glob pattern.
         If None, all files are yielded.
 
-    Yields
+    Returns
     ------
     pathlib.Path or pathlib.PurePosixPath
         File paths within the directory.
@@ -1358,6 +1358,9 @@ def iterdir(
     ImportError
         If fsspec is required but not installed.
     """
+    if hasattr(path, "read") or hasattr(path, "write"):
+        return path
+
     if extensions is not None:
         if isinstance(extensions, str):
             extensions = {extensions.lower()}
@@ -1375,18 +1378,18 @@ def iterdir(
                 extensions,
                 glob,
             ):
-                yield resolved_path
-            return
+                return [resolved_path]
 
+        result = []
         for entry in resolved_path.iterdir():
             if entry.is_file():
                 if _match_file(
                     entry,
                     extensions,
                     glob,
                 ):
-                    yield entry
-        return
+                    result.append(entry)
+        return result
 
     # Remote paths
     fsspec = import_optional_dependency("fsspec", extra=scheme)
@@ -1398,9 +1401,9 @@ def iterdir(
             extensions,
             glob,
         ):
-            yield PurePosixPath(path_without_scheme)
-        return
+            return [PurePosixPath(path_without_scheme)]
 
+    result = []
     for file in fs.ls(path_without_scheme, detail=True):
         if file["type"] == "file":
             path_obj = PurePosixPath(file["name"])
@@ -1409,4 +1412,5 @@ def iterdir(
                 extensions,
                 glob,
             ):
-                yield path_obj
+                result.append(path_obj)
+    return result
diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
@@ -55,6 +55,7 @@
 from pandas.io.common import (
     IOHandles,
     get_handle,
+    iterdir,
     stringify_path,
     validate_header_arg,
 )
@@ -73,6 +74,7 @@
 if TYPE_CHECKING:
     from collections.abc import (
         Callable,
+        Generator,
         Hashable,
         Iterable,
         Mapping,
@@ -668,9 +670,23 @@ def _validate_names(names: Sequence[Hashable] | None) -> None:
             raise ValueError("Names should be an ordered collection.")
 
 
+def _multi_file_generator(
+    list_of_files: list[str], kwds
+) -> Generator[DataFrame] | Generator[TextFileReader]:
+    """Generator for multiple files."""
+    for file in list_of_files:
+        parser = TextFileReader(file, **kwds)
+
+        if kwds.get("chunksize", None) or kwds.get("iterator", False):
+            yield parser
+        else:
+            with parser:
+                yield parser.read(kwds.get("nrows", None))
+
+
 def _read(
     filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], kwds
-) -> DataFrame | TextFileReader:
+) -> DataFrame | TextFileReader | Generator[DataFrame] | Generator[TextFileReader]:
     """Generic reader of line files."""
     # if we pass a date_format and parse_dates=False, we should not parse the
     # dates GH#44366
@@ -709,14 +725,25 @@ def _read(
     # Check for duplicates in names.
     _validate_names(kwds.get("names", None))
 
-    # Create the parser.
-    parser = TextFileReader(filepath_or_buffer, **kwds)
+    extensions = kwds.get("extensions", None)
+    glob = kwds.get("glob", None)
+    files = iterdir(filepath_or_buffer, extensions, glob)
+
+    if len(files) == 0:
+        raise FileNotFoundError(
+            f"No files found in {filepath_or_buffer}, "
+            f"with extension(s) {extensions} and glob pattern {glob}"
+        )
+    elif len(files) == 1:
+        parser = TextFileReader(files[0], **kwds)
 
-    if chunksize or iterator:
-        return parser
+        if chunksize or iterator:
+            return parser
 
-    with parser:
-        return parser.read(nrows)
+        with parser:
+            return parser.read(nrows)
+    else:
+        return _multi_file_generator(files, kwds)
 
 
 @overload
@@ -932,10 +959,9 @@ def read_table(
     skipfooter: int = 0,
     nrows: int | None = None,
     # NA and Missing Data Handling
-    na_values: Hashable
-    | Iterable[Hashable]
-    | Mapping[Hashable, Iterable[Hashable]]
-    | None = None,
+    na_values: (
+        Hashable | Iterable[Hashable] | Mapping[Hashable, Iterable[Hashable]] | None
+    ) = None,
     keep_default_na: bool = True,
     na_filter: bool = True,
     skip_blank_lines: bool = True,