-
-
Notifications
You must be signed in to change notification settings - Fork 143
ENH: Synchronize io/stata with pandas master #202
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
34efdbc
163cebd
49734ca
646a60f
5d3648c
52e104d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,9 +3,9 @@ import datetime | |
from io import BytesIO | ||
from types import TracebackType | ||
from typing import ( | ||
Hashable, | ||
Literal, | ||
Sequence, | ||
overload, | ||
) | ||
|
||
import numpy as np | ||
|
@@ -18,10 +18,12 @@ from pandas._typing import ( | |
FilePath, | ||
HashableT, | ||
ReadBuffer, | ||
StataDateFormat, | ||
StorageOptions, | ||
WriteBuffer, | ||
) | ||
|
||
@overload | ||
def read_stata( | ||
path: FilePath | ReadBuffer[bytes], | ||
convert_dates: bool = ..., | ||
|
@@ -32,70 +34,47 @@ def read_stata( | |
columns: list[HashableT] | None = ..., | ||
order_categoricals: bool = ..., | ||
chunksize: int | None = ..., | ||
iterator: bool = ..., | ||
*, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so the second and third overload only different in whether the arguments are keyword-only or can also be provided as positional arguments? If pandas would have deprecated positional arguments, I would remove the positional overloads - but they are not yet deprecated. (Will try to open a PR for that later today at pandas.) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yup. They are needed to handle the cases where There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I opened pandas-dev/pandas#48128 to make most arguments keyword-only There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. using positional keywords will now be deprecated in 1.5 (except for the first non-self argument) |
||
iterator: Literal[True], | ||
compression: CompressionOptions = ..., | ||
storage_options: StorageOptions = ..., | ||
) -> DataFrame | StataReader: ... | ||
|
||
stata_epoch: datetime.datetime = ... | ||
excessive_string_length_error: str | ||
) -> StataReader: ... | ||
@overload | ||
def read_stata( | ||
path: FilePath | ReadBuffer[bytes], | ||
convert_dates: bool, | ||
convert_categoricals: bool, | ||
index_col: str | None, | ||
convert_missing: bool, | ||
preserve_dtypes: bool, | ||
columns: list[HashableT] | None, | ||
order_categoricals: bool, | ||
chunksize: int | None, | ||
iterator: Literal[True], | ||
compression: CompressionOptions = ..., | ||
storage_options: StorageOptions = ..., | ||
) -> StataReader: ... | ||
@overload | ||
def read_stata( | ||
path: FilePath | ReadBuffer[bytes], | ||
convert_dates: bool = ..., | ||
convert_categoricals: bool = ..., | ||
index_col: str | None = ..., | ||
convert_missing: bool = ..., | ||
preserve_dtypes: bool = ..., | ||
columns: list[HashableT] | None = ..., | ||
order_categoricals: bool = ..., | ||
chunksize: int | None = ..., | ||
iterator: Literal[False] = ..., | ||
compression: CompressionOptions = ..., | ||
storage_options: StorageOptions = ..., | ||
) -> DataFrame: ... | ||
|
||
class PossiblePrecisionLoss(Warning): ... | ||
|
||
precision_loss_doc: str | ||
|
||
class ValueLabelTypeMismatch(Warning): ... | ||
|
||
value_label_mismatch_doc: str | ||
|
||
class InvalidColumnName(Warning): ... | ||
|
||
invalid_name_doc: str | ||
|
||
class StataValueLabel: | ||
labname: Hashable = ... | ||
value_labels: list[tuple[float, str]] = ... | ||
text_len: int = ... | ||
off: npt.NDArray[np.int32] = ... | ||
val: npt.NDArray[np.int32] = ... | ||
txt: list[bytes] = ... | ||
n: int = ... | ||
len: int = ... | ||
def __init__( | ||
self, catarray: pd.Series, encoding: Literal["latin-1", "utf-8"] = ... | ||
) -> None: ... | ||
def generate_value_label(self, byteorder: str) -> bytes: ... | ||
|
||
class StataMissingValue: | ||
MISSING_VALUES: dict[float, str] = ... | ||
bases: tuple[int, int, int] = ... | ||
float32_base: bytes = ... | ||
increment: int = ... | ||
int_value: int = ... | ||
float64_base: bytes = ... | ||
BASE_MISSING_VALUES: dict[str, int] = ... | ||
def __init__(self, value: float) -> None: ... | ||
def __eq__(self, other: object) -> bool: ... | ||
@property | ||
def string(self) -> str: ... | ||
@property | ||
def value(self) -> float: ... | ||
@classmethod | ||
def get_base_missing_value(cls, dtype): ... | ||
|
||
class StataParser: | ||
Dr-Irv marked this conversation as resolved.
Show resolved
Hide resolved
|
||
DTYPE_MAP: dict[int, np.dtype] = ... | ||
DTYPE_MAP_XML: dict[int, np.dtype] = ... | ||
TYPE_MAP: list[tuple[int | str, ...]] = ... | ||
TYPE_MAP_XML: dict[int, str] = ... | ||
VALID_RANGE: dict[ | ||
str, | ||
tuple[int, int] | tuple[np.float32, np.float32] | tuple[np.float64, np.float64], | ||
] = ... | ||
OLD_TYPE_MAPPING: dict[int, int] = ... | ||
MISSING_VALUES: dict[str, int] = ... | ||
NUMPY_TYPE_MAP: dict[str, str] = ... | ||
RESERVED_WORDS: tuple[str, ...] = ... | ||
def __init__(self) -> None: ... | ||
|
||
class StataReader(StataParser, abc.Iterator): | ||
|
@@ -142,70 +121,19 @@ class StataReader(StataParser, abc.Iterator): | |
def value_labels(self) -> dict[str, dict[float, str]]: ... | ||
|
||
class StataWriter(StataParser): | ||
type_converters: dict[str, type[np.dtype]] = ... | ||
def __init__( | ||
self, | ||
Dr-Irv marked this conversation as resolved.
Show resolved
Hide resolved
|
||
fname: FilePath | WriteBuffer[bytes], | ||
data: DataFrame, | ||
convert_dates: dict[Hashable, str] | None = ..., | ||
convert_dates: dict[HashableT, StataDateFormat] | None = ..., | ||
write_index: bool = ..., | ||
byteorder: str | None = ..., | ||
time_stamp: datetime.datetime | None = ..., | ||
data_label: str | None = ..., | ||
variable_labels: dict[Hashable, str] | None = ..., | ||
variable_labels: dict[HashableT, str] | None = ..., | ||
compression: CompressionOptions = ..., | ||
storage_options: StorageOptions = ..., | ||
*, | ||
value_labels: dict[Hashable, dict[float, str]] | None = ..., | ||
value_labels: dict[HashableT, dict[float, str]] | None = ..., | ||
) -> None: ... | ||
def write_file(self) -> None: ... | ||
|
||
class StataStrLWriter: | ||
df: DataFrame = ... | ||
columns: Sequence[str] = ... | ||
def __init__( | ||
self, | ||
df: DataFrame, | ||
columns: Sequence[str], | ||
version: int = ..., | ||
byteorder: str | None = ..., | ||
) -> None: ... | ||
def generate_table(self) -> tuple[dict[str, tuple[int, int]], DataFrame]: ... | ||
def generate_blob(self, gso_table: dict[str, tuple[int, int]]) -> bytes: ... | ||
|
||
class StataWriter117(StataWriter): | ||
def __init__( | ||
self, | ||
fname: FilePath | WriteBuffer[bytes], | ||
data: DataFrame, | ||
convert_dates: dict[Hashable, str] | None = ..., | ||
write_index: bool = ..., | ||
byteorder: str | None = ..., | ||
time_stamp: datetime.datetime | None = ..., | ||
data_label: str | None = ..., | ||
variable_labels: dict[Hashable, str] | None = ..., | ||
convert_strl: Sequence[Hashable] | None = ..., | ||
compression: CompressionOptions = ..., | ||
storage_options: StorageOptions = ..., | ||
*, | ||
value_labels: dict[Hashable, dict[float, str]] | None = ..., | ||
) -> None: ... | ||
|
||
class StataWriterUTF8(StataWriter117): | ||
def __init__( | ||
self, | ||
fname: FilePath | WriteBuffer[bytes], | ||
data: DataFrame, | ||
convert_dates: dict[Hashable, str] | None = ..., | ||
write_index: bool = ..., | ||
byteorder: str | None = ..., | ||
time_stamp: datetime.datetime | None = ..., | ||
data_label: str | None = ..., | ||
variable_labels: dict[Hashable, str] | None = ..., | ||
convert_strl: Sequence[Hashable] | None = ..., | ||
version: int | None = ..., | ||
compression: CompressionOptions = ..., | ||
storage_options: StorageOptions = ..., | ||
*, | ||
value_labels: dict[Hashable, dict[float, str]] | None = ..., | ||
) -> None: ... |
Uh oh!
There was an error while loading. Please reload this page.