Skip to content

TYP: misc IO return types #57228

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pandas/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,3 +529,6 @@ def closed(self) -> bool:
Callable[[HashableT], bool],
None,
]

# maintaine the sub-type of any hashable sequence
SequenceT = TypeVar("SequenceT", bound=Sequence[Hashable])
3 changes: 2 additions & 1 deletion pandas/io/excel/_openpyxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
if TYPE_CHECKING:
from openpyxl import Workbook
from openpyxl.descriptors.serialisable import Serialisable
from openpyxl.styles import Fill

from pandas._typing import (
ExcelWriterIfSheetExists,
Expand Down Expand Up @@ -244,7 +245,7 @@ def _convert_to_stop(cls, stop_seq):
return map(cls._convert_to_color, stop_seq)

@classmethod
def _convert_to_fill(cls, fill_dict: dict[str, Any]):
def _convert_to_fill(cls, fill_dict: dict[str, Any]) -> Fill:
"""
Convert ``fill_dict`` to an openpyxl v2 Fill object.

Expand Down
4 changes: 3 additions & 1 deletion pandas/io/formats/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,9 @@ def build_border(
for side in ["top", "right", "bottom", "left"]
}

def _border_style(self, style: str | None, width: str | None, color: str | None):
def _border_style(
self, style: str | None, width: str | None, color: str | None
) -> str | None:
# convert styles and widths to openxml, one of:
# 'dashDot'
# 'dashDotDot'
Expand Down
6 changes: 4 additions & 2 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1346,7 +1346,9 @@ def get_result_as_array(self) -> np.ndarray:
the parameters given at initialisation, as a numpy array
"""

def format_with_na_rep(values: ArrayLike, formatter: Callable, na_rep: str):
def format_with_na_rep(
values: ArrayLike, formatter: Callable, na_rep: str
) -> np.ndarray:
mask = isna(values)
formatted = np.array(
[
Expand All @@ -1358,7 +1360,7 @@ def format_with_na_rep(values: ArrayLike, formatter: Callable, na_rep: str):

def format_complex_with_na_rep(
values: ArrayLike, formatter: Callable, na_rep: str
):
) -> np.ndarray:
real_values = np.real(values).ravel() # type: ignore[arg-type]
imag_values = np.imag(values).ravel() # type: ignore[arg-type]
real_mask, imag_mask = isna(real_values), isna(imag_values)
Expand Down
8 changes: 4 additions & 4 deletions pandas/io/formats/info.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ def dtype_counts(self) -> Mapping[str, int]:

@property
@abstractmethod
def non_null_counts(self) -> Sequence[int]:
def non_null_counts(self) -> list[int] | Series:
"""Sequence of non-null counts for all columns or column (if series)."""

@property
Expand Down Expand Up @@ -486,7 +486,7 @@ def col_count(self) -> int:
return len(self.ids)

@property
def non_null_counts(self) -> Sequence[int]:
def non_null_counts(self) -> Series:
"""Sequence of non-null counts for all columns or column (if series)."""
return self.data.count()

Expand Down Expand Up @@ -546,7 +546,7 @@ def render(
printer.to_buffer(buf)

@property
def non_null_counts(self) -> Sequence[int]:
def non_null_counts(self) -> list[int]:
return [self.data.count()]

@property
Expand Down Expand Up @@ -750,7 +750,7 @@ def memory_usage_string(self) -> str:
return self.info.memory_usage_string

@property
def non_null_counts(self) -> Sequence[int]:
def non_null_counts(self) -> list[int] | Series:
return self.info.non_null_counts

def add_object_type_line(self) -> None:
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/formats/style.py
Original file line number Diff line number Diff line change
Expand Up @@ -3827,7 +3827,7 @@ def _background_gradient(
vmax: float | None = None,
gmap: Sequence | np.ndarray | DataFrame | Series | None = None,
text_only: bool = False,
):
) -> list[str] | DataFrame:
"""
Color background in a range according to the data or a gradient map
"""
Expand Down
4 changes: 3 additions & 1 deletion pandas/io/formats/style_render.py
Original file line number Diff line number Diff line change
Expand Up @@ -2030,7 +2030,9 @@ def _class_styles(self):
}
]

def _pseudo_css(self, uuid: str, name: str, row: int, col: int, text: str):
def _pseudo_css(
self, uuid: str, name: str, row: int, col: int, text: str
) -> list[CSSDict]:
"""
For every table data-cell that has a valid tooltip (not None, NaN or
empty string) must create two pseudo CSS entries for the specific
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,7 @@ def row_is_all_th(row):

def _expand_colspan_rowspan(
self, rows, section: Literal["header", "footer", "body"]
):
) -> list[list]:
"""
Given a list of <tr>s, return a list of text rows.

Expand Down
27 changes: 25 additions & 2 deletions pandas/io/json/_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
TYPE_CHECKING,
Any,
DefaultDict,
overload,
)

import numpy as np
Expand Down Expand Up @@ -42,13 +43,35 @@ def convert_to_line_delimits(s: str) -> str:
return convert_json_to_lines(s)


@overload
def nested_to_record(
ds,
ds: dict,
prefix: str = ...,
sep: str = ...,
level: int = ...,
max_level: int | None = ...,
) -> dict[str, Any]:
...


@overload
def nested_to_record(
ds: list[dict],
prefix: str = ...,
sep: str = ...,
level: int = ...,
max_level: int | None = ...,
) -> list[dict[str, Any]]:
...


def nested_to_record(
ds: dict | list[dict],
prefix: str = "",
sep: str = ".",
level: int = 0,
max_level: int | None = None,
):
) -> dict[str, Any] | list[dict[str, Any]]:
"""
A simplified json_normalize

Expand Down
2 changes: 1 addition & 1 deletion pandas/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def validate_dataframe(df: DataFrame) -> None:
if not isinstance(df, DataFrame):
raise ValueError("to_parquet only supports IO with DataFrames")

def write(self, df: DataFrame, path, compression, **kwargs):
def write(self, df: DataFrame, path, compression, **kwargs) -> None:
raise AbstractMethodError(self)

def read(self, path, columns=None, **kwargs) -> DataFrame:
Expand Down
36 changes: 20 additions & 16 deletions pandas/io/parsers/base_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@

if TYPE_CHECKING:
from collections.abc import (
Hashable,
Iterable,
Mapping,
Sequence,
Expand All @@ -94,7 +93,10 @@
ArrayLike,
DtypeArg,
DtypeObj,
Hashable,
HashableT,
Scalar,
SequenceT,
)


Expand Down Expand Up @@ -350,13 +352,13 @@ def extract(r):
@final
def _maybe_make_multi_index_columns(
self,
columns: Sequence[Hashable],
columns: SequenceT,
col_names: Sequence[Hashable] | None = None,
) -> Sequence[Hashable] | MultiIndex:
) -> SequenceT | MultiIndex:
# possibly create a column mi here
if is_potential_multi_index(columns):
list_columns = cast(list[tuple], columns)
return MultiIndex.from_tuples(list_columns, names=col_names)
columns_mi = cast("Sequence[tuple[Hashable, ...]]", columns)
return MultiIndex.from_tuples(columns_mi, names=col_names)
return columns

@final
Expand Down Expand Up @@ -520,7 +522,7 @@ def _convert_to_ndarrays(
verbose: bool = False,
converters=None,
dtypes=None,
):
) -> dict[Any, np.ndarray]:
result = {}
for c, values in dct.items():
conv_f = None if converters is None else converters.get(c, None)
Expand Down Expand Up @@ -923,23 +925,23 @@ def _check_data_length(
@overload
def _evaluate_usecols(
self,
usecols: set[int] | Callable[[Hashable], object],
names: Sequence[Hashable],
usecols: Callable[[Hashable], object],
names: Iterable[Hashable],
) -> set[int]:
...

@overload
def _evaluate_usecols(
self, usecols: set[str], names: Sequence[Hashable]
) -> set[str]:
self, usecols: SequenceT, names: Iterable[Hashable]
) -> SequenceT:
...

@final
def _evaluate_usecols(
self,
usecols: Callable[[Hashable], object] | set[str] | set[int],
names: Sequence[Hashable],
) -> set[str] | set[int]:
usecols: Callable[[Hashable], object] | SequenceT,
names: Iterable[Hashable],
) -> SequenceT | set[int]:
"""
Check whether or not the 'usecols' parameter
is a callable. If so, enumerates the 'names'
Expand All @@ -952,7 +954,7 @@ def _evaluate_usecols(
return usecols

@final
def _validate_usecols_names(self, usecols, names: Sequence):
def _validate_usecols_names(self, usecols: SequenceT, names: Sequence) -> SequenceT:
"""
Validates that all usecols are present in a given
list of names. If not, raise a ValueError that
Expand Down Expand Up @@ -1072,7 +1074,9 @@ def _clean_index_names(self, columns, index_col) -> tuple[list | None, list, lis
return index_names, columns, index_col

@final
def _get_empty_meta(self, columns, dtype: DtypeArg | None = None):
def _get_empty_meta(
self, columns: Sequence[HashableT], dtype: DtypeArg | None = None
) -> tuple[Index, list[HashableT], dict[HashableT, Series]]:
columns = list(columns)

index_col = self.index_col
Expand Down Expand Up @@ -1275,7 +1279,7 @@ def _process_date_conversion(
columns,
keep_date_col: bool = False,
dtype_backend=lib.no_default,
):
) -> tuple[dict, list]:
def _isindex(colspec):
return (isinstance(index_col, list) and colspec in index_col) or (
isinstance(index_names, list) and colspec in index_names
Expand Down
14 changes: 10 additions & 4 deletions pandas/io/parsers/c_parser_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,12 @@
)

from pandas._typing import (
AnyArrayLike,
ArrayLike,
DtypeArg,
DtypeObj,
ReadCsvBuffer,
SequenceT,
)

from pandas import (
Expand Down Expand Up @@ -225,7 +227,7 @@ def read(
) -> tuple[
Index | MultiIndex | None,
Sequence[Hashable] | MultiIndex,
Mapping[Hashable, ArrayLike],
Mapping[Hashable, AnyArrayLike],
]:
index: Index | MultiIndex | None
column_names: Sequence[Hashable] | MultiIndex
Expand All @@ -248,7 +250,11 @@ def read(
names,
dtype=self.dtype,
)
columns = self._maybe_make_multi_index_columns(columns, self.col_names)
# error: Incompatible types in assignment (expression has type
# "list[Hashable] | MultiIndex", variable has type "list[Hashable]")
columns = self._maybe_make_multi_index_columns( # type: ignore[assignment]
columns, self.col_names
)

if self.usecols is not None:
columns = self._filter_usecols(columns)
Expand Down Expand Up @@ -334,11 +340,11 @@ def read(

return index, column_names, date_data

def _filter_usecols(self, names: Sequence[Hashable]) -> Sequence[Hashable]:
def _filter_usecols(self, names: SequenceT) -> SequenceT | list[Hashable]:
# hackish
usecols = self._evaluate_usecols(self.usecols, names)
if usecols is not None and len(names) != len(usecols):
names = [
return [
name for i, name in enumerate(names) if i in usecols or name in usecols
]
return names
Expand Down
1 change: 1 addition & 0 deletions pandas/io/parsers/python_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ def read(
# done with first read, next time raise StopIteration
self._first_chunk = False

index: Index | None
columns: Sequence[Hashable] = list(self.orig_names)
if not len(content): # pragma: no cover
# DataFrame with the right metadata, even though it's length 0
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/parsers/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2101,7 +2101,7 @@ def _floatify_na_values(na_values):
return result


def _stringify_na_values(na_values, floatify: bool):
def _stringify_na_values(na_values, floatify: bool) -> set[str | float]:
"""return a stringified and numeric for these values"""
result: list[str | float] = []
for x in na_values:
Expand Down
5 changes: 3 additions & 2 deletions pandas/io/sas/sas7bdat.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
from pandas._typing import (
CompressionOptions,
FilePath,
NaTType,
ReadBuffer,
)

Expand All @@ -62,7 +63,7 @@
_sas_origin = Timestamp("1960-01-01")


def _parse_datetime(sas_datetime: float, unit: str):
def _parse_datetime(sas_datetime: float, unit: str) -> datetime | NaTType:
if isna(sas_datetime):
return pd.NaT

Expand Down Expand Up @@ -326,7 +327,7 @@ def __next__(self) -> DataFrame:
return da

# Read a single float of the given width (4 or 8).
def _read_float(self, offset: int, width: int):
def _read_float(self, offset: int, width: int) -> float:
assert self._cached_page is not None
if width == 4:
return read_float_with_byteswap(
Expand Down
Loading