1
1
from collections import abc
2
2
import datetime
3
+ from io import BytesIO
4
+ from types import TracebackType
3
5
from typing import (
4
- Dict ,
5
6
Hashable ,
6
- List ,
7
- Optional ,
7
+ Literal ,
8
8
Sequence ,
9
9
)
10
10
11
+ import numpy as np
12
+ import numpy .typing as npt
13
+ import pandas as pd
11
14
from pandas .core .frame import DataFrame
12
15
13
- from pandas ._typing import FilePathOrBuffer
16
+ from pandas ._typing import (
17
+ CompressionOptions ,
18
+ FilePath ,
19
+ HashableT ,
20
+ ReadBuffer ,
21
+ StorageOptions ,
22
+ WriteBuffer ,
23
+ )
14
24
15
25
def read_stata (
16
- path : FilePathOrBuffer ,
26
+ path : FilePath | ReadBuffer [ bytes ] ,
17
27
convert_dates : bool = ...,
18
28
convert_categoricals : bool = ...,
19
- index_col : Optional [ str ] = ...,
29
+ index_col : str | None = ...,
20
30
convert_missing : bool = ...,
21
31
preserve_dtypes : bool = ...,
22
- columns : Optional [ List [ str ]] = ...,
32
+ columns : list [ HashableT ] | None = ...,
23
33
order_categoricals : bool = ...,
24
- chunksize : Optional [ int ] = ...,
34
+ chunksize : int | None = ...,
25
35
iterator : bool = ...,
26
- ) -> DataFrame : ...
36
+ compression : CompressionOptions = ...,
37
+ storage_options : StorageOptions = ...,
38
+ ) -> DataFrame | StataReader : ...
27
39
28
- stata_epoch = ...
40
+ stata_epoch : datetime . datetime = ...
29
41
excessive_string_length_error : str
30
42
31
43
class PossiblePrecisionLoss (Warning ): ...
@@ -41,127 +53,159 @@ class InvalidColumnName(Warning): ...
41
53
invalid_name_doc : str
42
54
43
55
class StataValueLabel :
44
- labname = ...
45
- value_labels = ...
46
- text_len = ...
47
- off = ...
48
- val = ...
49
- txt = ...
56
+ labname : Hashable = ...
57
+ value_labels : list [ tuple [ int | float , str ]] = ...
58
+ text_len : int = ...
59
+ off : npt . NDArray [ np . int32 ] = ...
60
+ val : npt . NDArray [ np . int32 ] = ...
61
+ txt : list [ bytes ] = ...
50
62
n : int = ...
51
- len = ...
52
- def __init__ (self , catarray , encoding : str = ...): ...
53
- def generate_value_label (self , byteorder ): ...
63
+ len : int = ...
64
+ def __init__ (
65
+ self , catarray : pd .Series , encoding : Literal ["latin-1" , "utf-8" ] = ...
66
+ ) -> None : ...
67
+ def generate_value_label (self , byteorder : str ) -> bytes : ...
54
68
55
69
class StataMissingValue :
56
- MISSING_VALUES = ...
57
- bases = ...
70
+ MISSING_VALUES : dict [ float , str ] = ...
71
+ bases : tuple [ int , int , int ] = ...
58
72
float32_base : bytes = ...
59
- increment = ...
60
- value = ...
61
- int_value = ...
73
+ increment : int = ...
74
+ int_value : int = ...
62
75
float64_base : bytes = ...
63
- BASE_MISSING_VALUES = ...
64
- def __init__ (self , value ) -> None : ...
65
- string = ...
66
- def __eq__ (self , other ) -> bool : ...
76
+ BASE_MISSING_VALUES : dict [str , int ] = ...
77
+ def __init__ (self , value : int | float ) -> None : ...
78
+ def __eq__ (self , other : object ) -> bool : ...
79
+ @property
80
+ def string (self ) -> str : ...
81
+ @property
82
+ def value (self ) -> int | float : ...
67
83
@classmethod
68
84
def get_base_missing_value (cls , dtype ): ...
69
85
70
86
class StataParser :
71
- DTYPE_MAP = ...
72
- DTYPE_MAP_XML = ...
73
- TYPE_MAP = ...
74
- TYPE_MAP_XML = ...
75
- VALID_RANGE = ...
76
- OLD_TYPE_MAPPING = ...
77
- MISSING_VALUES = ...
78
- NUMPY_TYPE_MAP = ...
79
- RESERVED_WORDS = ...
87
+ DTYPE_MAP : dict [int , np .dtype ] = ...
88
+ DTYPE_MAP_XML : dict [int , np .dtype ] = ...
89
+ TYPE_MAP : list [tuple [int | str , ...]] = ...
90
+ TYPE_MAP_XML : dict [int , str ] = ...
91
+ VALID_RANGE : dict [
92
+ str ,
93
+ tuple [int , int ] | tuple [np .float32 , np .float32 ] | tuple [np .float64 , np .float64 ],
94
+ ] = ...
95
+ OLD_TYPE_MAPPING : dict [int , int ] = ...
96
+ MISSING_VALUES : dict [str , int ] = ...
97
+ NUMPY_TYPE_MAP : dict [str , str ] = ...
98
+ RESERVED_WORDS : tuple [str , ...] = ...
80
99
def __init__ (self ) -> None : ...
81
100
82
101
class StataReader (StataParser , abc .Iterator ):
83
- col_sizes = ...
84
- path_or_buf = ...
102
+ col_sizes : list [ int ] = ...
103
+ path_or_buf : BytesIO = ...
85
104
def __init__ (
86
105
self ,
87
- path_or_buf ,
106
+ path_or_buf : FilePath | ReadBuffer [ bytes ] ,
88
107
convert_dates : bool = ...,
89
108
convert_categoricals : bool = ...,
90
- index_col = ...,
109
+ index_col : str | None = ...,
91
110
convert_missing : bool = ...,
92
111
preserve_dtypes : bool = ...,
93
- columns = ...,
112
+ columns : Sequence [ str ] | None = ...,
94
113
order_categoricals : bool = ...,
95
- chunksize = ...,
114
+ chunksize : int | None = ...,
115
+ compression : CompressionOptions = ...,
116
+ storage_options : StorageOptions = ...,
117
+ ) -> None : ...
118
+ def __enter__ (self ) -> StataReader : ...
119
+ def __exit__ (
120
+ self ,
121
+ exc_type : type [BaseException ] | None ,
122
+ exc_value : BaseException | None ,
123
+ traceback : TracebackType | None ,
96
124
) -> None : ...
97
- def __enter__ (self ): ...
98
- def __exit__ (self , exc_type , exc_value , traceback ) -> None : ...
99
125
def close (self ) -> None : ...
100
- def __next__ (self ): ...
101
- def get_chunk (self , size = ...): ...
126
+ def __next__ (self ) -> DataFrame : ...
127
+ def get_chunk (self , size : int | None = ...) -> DataFrame : ...
102
128
def read (
103
129
self ,
104
- nrows = ...,
105
- convert_dates = ...,
106
- convert_categoricals = ...,
107
- index_col = ...,
108
- convert_missing = ...,
109
- preserve_dtypes = ...,
110
- columns = ...,
111
- order_categoricals = ...,
130
+ nrows : int | None = ...,
131
+ convert_dates : bool | None = ...,
132
+ convert_categoricals : bool | None = ...,
133
+ index_col : str | None = ...,
134
+ convert_missing : bool | None = ...,
135
+ preserve_dtypes : bool | None = ...,
136
+ columns : list [ str ] | None = ...,
137
+ order_categoricals : bool | None = ...,
112
138
): ...
113
139
@property
114
- def data_label (self ): ...
115
- def variable_labels (self ): ...
116
- def value_labels (self ): ...
140
+ def data_label (self ) -> str : ...
141
+ def variable_labels (self ) -> dict [ str , str ] : ...
142
+ def value_labels (self ) -> dict [ str , dict [ int | float , str ]] : ...
117
143
118
144
class StataWriter (StataParser ):
119
- type_converters = ...
145
+ type_converters : dict [ str , type [ np . dtype ]] = ...
120
146
def __init__ (
121
147
self ,
122
- fname ,
123
- data ,
124
- convert_dates = ...,
148
+ fname : FilePath | WriteBuffer [ bytes ] ,
149
+ data : DataFrame ,
150
+ convert_dates : dict [ Hashable , str ] | None = ...,
125
151
write_index : bool = ...,
126
- byteorder = ...,
127
- time_stamp = ...,
128
- data_label = ...,
129
- variable_labels = ...,
152
+ byteorder : str | None = ...,
153
+ time_stamp : datetime .datetime | None = ...,
154
+ data_label : str | None = ...,
155
+ variable_labels : dict [Hashable , str ] | None = ...,
156
+ compression : CompressionOptions = ...,
157
+ storage_options : StorageOptions = ...,
158
+ * ,
159
+ value_labels : dict [Hashable , dict [float | int , str ]] | None = ...,
130
160
) -> None : ...
131
161
def write_file (self ) -> None : ...
132
162
133
163
class StataStrLWriter :
134
- df = ...
135
- columns = ...
136
- def __init__ (self , df , columns , version : int = ..., byteorder = ...) -> None : ...
137
- def generate_table (self ): ...
138
- def generate_blob (self , gso_table ): ...
164
+ df : DataFrame = ...
165
+ columns : Sequence [str ] = ...
166
+ def __init__ (
167
+ self ,
168
+ df : DataFrame ,
169
+ columns : Sequence [str ],
170
+ version : int = ...,
171
+ byteorder : str | None = ...,
172
+ ) -> None : ...
173
+ def generate_table (self ) -> tuple [dict [str , tuple [int , int ]], DataFrame ]: ...
174
+ def generate_blob (self , gso_table : dict [str , tuple [int , int ]]) -> bytes : ...
139
175
140
176
class StataWriter117 (StataWriter ):
141
177
def __init__ (
142
178
self ,
143
- fname ,
144
- data ,
145
- convert_dates = ...,
179
+ fname : FilePath | WriteBuffer [ bytes ] ,
180
+ data : DataFrame ,
181
+ convert_dates : dict [ Hashable , str ] | None = ...,
146
182
write_index : bool = ...,
147
- byteorder = ...,
148
- time_stamp = ...,
149
- data_label = ...,
150
- variable_labels = ...,
151
- convert_strl = ...,
183
+ byteorder : str | None = ...,
184
+ time_stamp : datetime .datetime | None = ...,
185
+ data_label : str | None = ...,
186
+ variable_labels : dict [Hashable , str ] | None = ...,
187
+ convert_strl : Sequence [Hashable ] | None = ...,
188
+ compression : CompressionOptions = ...,
189
+ storage_options : StorageOptions = ...,
190
+ * ,
191
+ value_labels : dict [Hashable , dict [float | int , str ]] | None = ...,
152
192
) -> None : ...
153
193
154
194
class StataWriterUTF8 (StataWriter117 ):
155
195
def __init__ (
156
196
self ,
157
- fname : FilePathOrBuffer ,
197
+ fname : FilePath | WriteBuffer [ bytes ] ,
158
198
data : DataFrame ,
159
- convert_dates : Optional [ Dict [ Hashable , str ]] = ...,
199
+ convert_dates : dict [ Hashable , str ] | None = ...,
160
200
write_index : bool = ...,
161
- byteorder : Optional [str ] = ...,
162
- time_stamp : Optional [datetime .datetime ] = ...,
163
- data_label : Optional [str ] = ...,
164
- variable_labels : Optional [Dict [Hashable , str ]] = ...,
165
- convert_strl : Optional [Sequence [Hashable ]] = ...,
166
- version : Optional [int ] = ...,
201
+ byteorder : str | None = ...,
202
+ time_stamp : datetime .datetime | None = ...,
203
+ data_label : str | None = ...,
204
+ variable_labels : dict [Hashable , str ] | None = ...,
205
+ convert_strl : Sequence [Hashable ] | None = ...,
206
+ version : int | None = ...,
207
+ compression : CompressionOptions = ...,
208
+ storage_options : StorageOptions = ...,
209
+ * ,
210
+ value_labels : dict [Hashable , dict [float | int , str ]] | None = ...,
167
211
) -> None : ...
0 commit comments