diff --git a/pandas/io/common.py b/pandas/io/common.py index d692f26ab0576..ef8b466145daa 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -925,7 +925,7 @@ class _BufferedWriter(BytesIO, ABC): # type: ignore[misc] """ @abstractmethod - def write_to_buffer(self): + def write_to_buffer(self) -> None: ... def close(self) -> None: diff --git a/pandas/io/date_converters.py b/pandas/io/date_converters.py index 077524fbee465..85e92da8c2a54 100644 --- a/pandas/io/date_converters.py +++ b/pandas/io/date_converters.py @@ -1,13 +1,16 @@ """This module is designed for community supported date conversion functions""" +from __future__ import annotations + import warnings import numpy as np from pandas._libs.tslibs import parsing +from pandas._typing import npt from pandas.util._exceptions import find_stack_level -def parse_date_time(date_col, time_col): +def parse_date_time(date_col, time_col) -> npt.NDArray[np.object_]: """ Parse columns with dates and times into a single datetime column. @@ -26,7 +29,7 @@ def parse_date_time(date_col, time_col): return parsing.try_parse_date_and_time(date_col, time_col) -def parse_date_fields(year_col, month_col, day_col): +def parse_date_fields(year_col, month_col, day_col) -> npt.NDArray[np.object_]: """ Parse columns with years, months and days into a single date column. @@ -48,7 +51,9 @@ def parse_date_fields(year_col, month_col, day_col): return parsing.try_parse_year_month_day(year_col, month_col, day_col) -def parse_all_fields(year_col, month_col, day_col, hour_col, minute_col, second_col): +def parse_all_fields( + year_col, month_col, day_col, hour_col, minute_col, second_col +) -> npt.NDArray[np.object_]: """ Parse columns with datetime information into a single datetime column. @@ -78,7 +83,7 @@ def parse_all_fields(year_col, month_col, day_col, hour_col, minute_col, second_ ) -def generic_parser(parse_func, *cols): +def generic_parser(parse_func, *cols) -> np.ndarray: """ Use dateparser to parse columns with data information into a single datetime column. diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 24b881bda4805..58bb4b1a8f849 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1069,7 +1069,7 @@ class ExcelWriter(metaclass=abc.ABCMeta): _supported_extensions: tuple[str, ...] def __new__( - cls, + cls: type[ExcelWriter], path: FilePath | WriteExcelBuffer | ExcelWriter, engine: str | None = None, date_format: str | None = None, @@ -1079,7 +1079,7 @@ def __new__( if_sheet_exists: Literal["error", "new", "replace", "overlay"] | None = None, engine_kwargs: dict | None = None, **kwargs, - ): + ) -> ExcelWriter: if kwargs: if engine_kwargs is not None: raise ValueError("Cannot use both engine_kwargs and **kwargs") @@ -1325,7 +1325,7 @@ def cur_sheet(self): return self._cur_sheet @property - def handles(self): + def handles(self) -> IOHandles[bytes]: """ Handles to Excel sheets. @@ -1344,7 +1344,7 @@ def path(self): self._deprecate("path") return self._path - def __fspath__(self): + def __fspath__(self) -> str: return getattr(self._handles.handle, "name", "") def _get_sheet_name(self, sheet_name: str | None) -> str: @@ -1402,10 +1402,10 @@ def check_extension(cls, ext: str) -> Literal[True]: return True # Allow use as a contextmanager - def __enter__(self): + def __enter__(self) -> ExcelWriter: return self - def __exit__(self, exc_type, exc_value, traceback): + def __exit__(self, exc_type, exc_value, traceback) -> None: self.close() def close(self) -> None: @@ -1699,13 +1699,13 @@ def close(self) -> None: """close io if necessary""" self._reader.close() - def __enter__(self): + def __enter__(self) -> ExcelFile: return self - def __exit__(self, exc_type, exc_value, traceback): + def __exit__(self, exc_type, exc_value, traceback) -> None: self.close() - def __del__(self): + def __del__(self) -> None: # Ensure we don't leak file descriptors, but put in try/except in case # attributes are already deleted try: diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index 87cc07d3fd21d..c3cd3fbe9e853 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -33,6 +33,7 @@ if TYPE_CHECKING: from openpyxl.descriptors.serialisable import Serialisable + from openpyxl.workbook import Workbook class OpenpyxlWriter(ExcelWriter): @@ -79,7 +80,7 @@ def __init__( self.book.remove(self.book.worksheets[0]) @property - def book(self): + def book(self) -> Workbook: """ Book instance of class openpyxl.workbook.Workbook. diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py index 9813b91419060..4ecd5b7604088 100644 --- a/pandas/io/feather_format.py +++ b/pandas/io/feather_format.py @@ -31,7 +31,7 @@ def to_feather( path: FilePath | WriteBuffer[bytes], storage_options: StorageOptions = None, **kwargs, -): +) -> None: """ Write a DataFrame to the binary Feather format. diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index cdd24a1194a45..531fa5400f466 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -220,7 +220,7 @@ def _validate_parse_dates_presence(self, columns: Sequence[Hashable]) -> Iterabl for col in cols_needed ] - def close(self): + def close(self) -> None: pass @final diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index ec953c9df036c..3e897f9b1334e 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -308,7 +308,11 @@ def _exclude_implicit_index( }, names # legacy - def get_chunk(self, size=None): + def get_chunk( + self, size: int | None = None + ) -> tuple[ + Index | None, Sequence[Hashable] | MultiIndex, Mapping[Hashable, ArrayLike] + ]: if size is None: # error: "PythonParser" has no attribute "chunksize" size = self.chunksize # type: ignore[attr-defined] diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index e811ace78f1f5..4b14b99f91f3f 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -493,7 +493,22 @@ class _DeprecationConfig(NamedTuple): } -def validate_integer(name, val, min_val=0): +@overload +def validate_integer(name, val: None, min_val=...) -> None: + ... + + +@overload +def validate_integer(name, val: int | float, min_val=...) -> int: + ... + + +@overload +def validate_integer(name, val: int | None, min_val=...) -> int | None: + ... + + +def validate_integer(name, val: int | float | None, min_val=0) -> int | None: """ Checks whether the 'name' parameter for parsing is either an integer OR float that can SAFELY be cast to an integer @@ -509,17 +524,18 @@ def validate_integer(name, val, min_val=0): min_val : int Minimum allowed value (val < min_val will result in a ValueError) """ - msg = f"'{name:s}' must be an integer >={min_val:d}" + if val is None: + return val - if val is not None: - if is_float(val): - if int(val) != val: - raise ValueError(msg) - val = int(val) - elif not (is_integer(val) and val >= min_val): + msg = f"'{name:s}' must be an integer >={min_val:d}" + if is_float(val): + if int(val) != val: raise ValueError(msg) + val = int(val) + elif not (is_integer(val) and val >= min_val): + raise ValueError(msg) - return val + return int(val) def _validate_names(names: Sequence[Hashable] | None) -> None: @@ -1784,7 +1800,7 @@ def __exit__(self, exc_type, exc_value, traceback) -> None: self.close() -def TextParser(*args, **kwds): +def TextParser(*args, **kwds) -> TextFileReader: """ Converts lists of lists/tuples into DataFrames with proper type inference and optional (e.g. string to datetime) conversion. Also enables iterating diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py index 500e88eb0ef76..a2e217767d1d4 100644 --- a/pandas/io/sas/sas_xport.py +++ b/pandas/io/sas/sas_xport.py @@ -396,7 +396,7 @@ def _read_header(self): dtype = np.dtype(dtypel) self._dtype = dtype - def __next__(self): + def __next__(self) -> pd.DataFrame: return self.read(nrows=self._chunksize or 1) def _record_count(self) -> int: @@ -434,7 +434,7 @@ def _record_count(self) -> int: return (total_records_length - tail_pad) // self.record_length - def get_chunk(self, size=None): + def get_chunk(self, size=None) -> pd.DataFrame: """ Reads lines from Xport file and returns as dataframe diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 701642ad2cfe2..24290b8370ed2 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -14,6 +14,7 @@ from functools import partial import re from typing import ( + TYPE_CHECKING, Any, Iterator, Sequence, @@ -50,6 +51,9 @@ import pandas.core.common as com from pandas.core.tools.datetimes import to_datetime +if TYPE_CHECKING: + from sqlalchemy import Table + class DatabaseError(OSError): pass @@ -277,7 +281,9 @@ def read_sql_table( if not pandas_sql.has_table(table_name): raise ValueError(f"Table {table_name} not found") - table = pandas_sql.read_table( + # error: Item "SQLiteDatabase" of "Union[SQLDatabase, SQLiteDatabase]" + # has no attribute "read_table" + table = pandas_sql.read_table( # type: ignore[union-attr] table_name, index_col=index_col, coerce_float=coerce_float, @@ -701,7 +707,7 @@ def to_sql( ) -def has_table(table_name: str, con, schema: str | None = None): +def has_table(table_name: str, con, schema: str | None = None) -> bool: """ Check if DataBase has named table. @@ -728,7 +734,7 @@ def has_table(table_name: str, con, schema: str | None = None): table_exists = has_table -def pandasSQL_builder(con, schema: str | None = None): +def pandasSQL_builder(con, schema: str | None = None) -> SQLDatabase | SQLiteDatabase: """ Convenience function to return the correct PandasSQL subclass based on the provided parameters. @@ -806,7 +812,7 @@ def __init__( def exists(self): return self.pd_sql.has_table(self.name, self.schema) - def sql_schema(self): + def sql_schema(self) -> str: from sqlalchemy.schema import CreateTable return str(CreateTable(self.table).compile(self.pd_sql.connectable)) @@ -816,7 +822,7 @@ def _execute_create(self): self.table = self.table.to_metadata(self.pd_sql.meta) self.table.create(bind=self.pd_sql.connectable) - def create(self): + def create(self) -> None: if self.exists(): if self.if_exists == "fail": raise ValueError(f"Table '{self.name}' already exists.") @@ -862,7 +868,7 @@ def _execute_insert_multi(self, conn, keys: list[str], data_iter) -> int: result = conn.execute(stmt) return result.rowcount - def insert_data(self): + def insert_data(self) -> tuple[list[str], list[np.ndarray]]: if self.index is not None: temp = self.frame.copy() temp.index.names = self.index @@ -875,7 +881,9 @@ def insert_data(self): column_names = list(map(str, temp.columns)) ncols = len(column_names) - data_list = [None] * ncols + # this just pre-allocates the list: None's will be replaced with ndarrays + # error: List item 0 has incompatible type "None"; expected "ndarray" + data_list: list[np.ndarray] = [None] * ncols # type: ignore[list-item] for i, (_, ser) in enumerate(temp.items()): vals = ser._values @@ -894,9 +902,7 @@ def insert_data(self): mask = isna(d) d[mask] = None - # error: No overload variant of "__setitem__" of "list" matches - # argument types "int", "ndarray" - data_list[i] = d # type: ignore[call-overload] + data_list[i] = d return column_names, data_list @@ -974,7 +980,13 @@ def _query_iterator( yield self.frame - def read(self, coerce_float=True, parse_dates=None, columns=None, chunksize=None): + def read( + self, + coerce_float=True, + parse_dates=None, + columns=None, + chunksize=None, + ) -> DataFrame | Iterator[DataFrame]: from sqlalchemy import select if columns is not None and len(columns) > 0: @@ -1398,7 +1410,7 @@ def read_table( columns=None, schema: str | None = None, chunksize: int | None = None, - ): + ) -> DataFrame | Iterator[DataFrame]: """ Read SQL database table into a DataFrame. @@ -1487,13 +1499,13 @@ def _query_iterator( def read_query( self, sql: str, - index_col: str | None = None, + index_col: str | Sequence[str] | None = None, coerce_float: bool = True, parse_dates=None, params=None, chunksize: int | None = None, dtype: DtypeArg | None = None, - ): + ) -> DataFrame | Iterator[DataFrame]: """ Read SQL query into a DataFrame. @@ -1620,7 +1632,7 @@ def check_case_sensitive( self, name, schema, - ): + ) -> None: """ Checks table name for issues with case-sensitivity. Method is called after data is inserted. @@ -1741,7 +1753,7 @@ def has_table(self, name: str, schema: str | None = None): insp = inspect(self.connectable) return insp.has_table(name, schema or self.meta.schema) - def get_table(self, table_name: str, schema: str | None = None): + def get_table(self, table_name: str, schema: str | None = None) -> Table: from sqlalchemy import ( Numeric, Table, @@ -1756,7 +1768,7 @@ def get_table(self, table_name: str, schema: str | None = None): column.type.asdecimal = False return tbl - def drop_table(self, table_name: str, schema: str | None = None): + def drop_table(self, table_name: str, schema: str | None = None) -> None: schema = schema or self.meta.schema if self.has_table(table_name, schema): self.meta.reflect(bind=self.connectable, only=[table_name], schema=schema) @@ -1839,7 +1851,7 @@ def __init__(self, *args, **kwargs) -> None: sqlite3.register_adapter(time, lambda _: _.strftime("%H:%M:%S.%f")) super().__init__(*args, **kwargs) - def sql_schema(self): + def sql_schema(self) -> str: return str(";\n".join(self.table)) def _execute_create(self): @@ -1847,7 +1859,7 @@ def _execute_create(self): for stmt in self.table: conn.execute(stmt) - def insert_statement(self, *, num_rows: int): + def insert_statement(self, *, num_rows: int) -> str: names = list(map(str, self.frame.columns)) wld = "?" # wildcard char escape = _get_valid_sqlite_name @@ -2049,7 +2061,7 @@ def read_query( parse_dates=None, chunksize: int | None = None, dtype: DtypeArg | None = None, - ): + ) -> DataFrame | Iterator[DataFrame]: args = _convert_params(sql, params) cursor = self.execute(*args) @@ -2164,17 +2176,17 @@ def to_sql( table.create() return table.insert(chunksize, method) - def has_table(self, name: str, schema: str | None = None): + def has_table(self, name: str, schema: str | None = None) -> bool: wld = "?" query = f"SELECT name FROM sqlite_master WHERE type='table' AND name={wld};" return len(self.execute(query, [name]).fetchall()) > 0 - def get_table(self, table_name: str, schema: str | None = None): + def get_table(self, table_name: str, schema: str | None = None) -> None: return None # not supported in fallback mode - def drop_table(self, name: str, schema: str | None = None): + def drop_table(self, name: str, schema: str | None = None) -> None: drop_sql = f"DROP TABLE {_get_valid_sqlite_name(name)}" self.execute(drop_sql) @@ -2205,7 +2217,7 @@ def get_schema( con=None, dtype: DtypeArg | None = None, schema: str | None = None, -): +) -> str: """ Get the SQL db table schema for the given frame.