@@ -72,7 +72,7 @@ def test_invalid_flavor():
72
72
msg = r"\{" + flavor + r"\} is not a valid set of flavors"
73
73
74
74
with pytest .raises (ValueError , match = msg ):
75
- read_html (url , "google" , flavor = flavor )
75
+ read_html (url , match = "google" , flavor = flavor )
76
76
77
77
78
78
@td .skip_if_no ("bs4" )
@@ -121,13 +121,26 @@ def test_to_html_compat(self):
121
121
res = self .read_html (out , attrs = {"class" : "dataframe" }, index_col = 0 )[0 ]
122
122
tm .assert_frame_equal (res , df )
123
123
124
+ @tm .network
125
+ def test_banklist_url_positional_match (self ):
126
+ url = "http://www.fdic.gov/bank/individual/failed/banklist.html"
127
+ # Passing match argument as positional should cause a FutureWarning.
128
+ with tm .assert_produces_warning (FutureWarning ):
129
+ df1 = self .read_html (
130
+ url , "First Federal Bank of Florida" , attrs = {"id" : "table" }
131
+ )
132
+ with tm .assert_produces_warning (FutureWarning ):
133
+ df2 = self .read_html (url , "Metcalf Bank" , attrs = {"id" : "table" })
134
+
135
+ assert_framelist_equal (df1 , df2 )
136
+
124
137
@tm .network
125
138
def test_banklist_url (self ):
126
139
url = "http://www.fdic.gov/bank/individual/failed/banklist.html"
127
140
df1 = self .read_html (
128
- url , "First Federal Bank of Florida" , attrs = {"id" : "table" }
141
+ url , match = "First Federal Bank of Florida" , attrs = {"id" : "table" }
129
142
)
130
- df2 = self .read_html (url , "Metcalf Bank" , attrs = {"id" : "table" })
143
+ df2 = self .read_html (url , match = "Metcalf Bank" , attrs = {"id" : "table" })
131
144
132
145
assert_framelist_equal (df1 , df2 )
133
146
@@ -137,21 +150,25 @@ def test_spam_url(self):
137
150
"https://raw.githubusercontent.com/pandas-dev/pandas/master/"
138
151
"pandas/tests/io/data/html/spam.html"
139
152
)
140
- df1 = self .read_html (url , ".*Water.*" )
141
- df2 = self .read_html (url , "Unit" )
153
+ df1 = self .read_html (url , match = ".*Water.*" )
154
+ df2 = self .read_html (url , match = "Unit" )
142
155
143
156
assert_framelist_equal (df1 , df2 )
144
157
145
158
@pytest .mark .slow
146
159
def test_banklist (self ):
147
- df1 = self .read_html (self .banklist_data , ".*Florida.*" , attrs = {"id" : "table" })
148
- df2 = self .read_html (self .banklist_data , "Metcalf Bank" , attrs = {"id" : "table" })
160
+ df1 = self .read_html (
161
+ self .banklist_data , match = ".*Florida.*" , attrs = {"id" : "table" }
162
+ )
163
+ df2 = self .read_html (
164
+ self .banklist_data , match = "Metcalf Bank" , attrs = {"id" : "table" }
165
+ )
149
166
150
167
assert_framelist_equal (df1 , df2 )
151
168
152
169
def test_spam (self ):
153
- df1 = self .read_html (self .spam_data , ".*Water.*" )
154
- df2 = self .read_html (self .spam_data , "Unit" )
170
+ df1 = self .read_html (self .spam_data , match = ".*Water.*" )
171
+ df2 = self .read_html (self .spam_data , match = "Unit" )
155
172
assert_framelist_equal (df1 , df2 )
156
173
157
174
assert df1 [0 ].iloc [0 , 0 ] == "Proximates"
@@ -168,81 +185,88 @@ def test_banklist_no_match(self):
168
185
assert isinstance (df , DataFrame )
169
186
170
187
def test_spam_header (self ):
171
- df = self .read_html (self .spam_data , ".*Water.*" , header = 2 )[0 ]
188
+ df = self .read_html (self .spam_data , match = ".*Water.*" , header = 2 )[0 ]
172
189
assert df .columns [0 ] == "Proximates"
173
190
assert not df .empty
174
191
175
192
def test_skiprows_int (self ):
176
- df1 = self .read_html (self .spam_data , ".*Water.*" , skiprows = 1 )
177
- df2 = self .read_html (self .spam_data , "Unit" , skiprows = 1 )
193
+ df1 = self .read_html (self .spam_data , match = ".*Water.*" , skiprows = 1 )
194
+ df2 = self .read_html (self .spam_data , match = "Unit" , skiprows = 1 )
178
195
179
196
assert_framelist_equal (df1 , df2 )
180
197
181
198
def test_skiprows_range (self ):
182
- df1 = self .read_html (self .spam_data , ".*Water.*" , skiprows = range (2 ))[0 ]
183
- df2 = self .read_html (self .spam_data , "Unit" , skiprows = range (2 ))[0 ]
199
+ df1 = self .read_html (self .spam_data , match = ".*Water.*" , skiprows = range (2 ))
200
+ df2 = self .read_html (self .spam_data , match = "Unit" , skiprows = range (2 ))
201
+
202
+ assert_framelist_equal (df1 , df2 )
203
+
204
+ def test_skiprows_range_single_frame (self ):
205
+ df1 = self .read_html (self .spam_data , match = ".*Water.*" , skiprows = range (2 ))[0 ]
206
+ df2 = self .read_html (self .spam_data , match = "Unit" , skiprows = range (2 ))[0 ]
207
+
184
208
tm .assert_frame_equal (df1 , df2 )
185
209
186
210
def test_skiprows_list (self ):
187
- df1 = self .read_html (self .spam_data , ".*Water.*" , skiprows = [1 , 2 ])
188
- df2 = self .read_html (self .spam_data , "Unit" , skiprows = [2 , 1 ])
211
+ df1 = self .read_html (self .spam_data , match = ".*Water.*" , skiprows = [1 , 2 ])
212
+ df2 = self .read_html (self .spam_data , match = "Unit" , skiprows = [2 , 1 ])
189
213
190
214
assert_framelist_equal (df1 , df2 )
191
215
192
216
def test_skiprows_set (self ):
193
- df1 = self .read_html (self .spam_data , ".*Water.*" , skiprows = {1 , 2 })
194
- df2 = self .read_html (self .spam_data , "Unit" , skiprows = {2 , 1 })
217
+ df1 = self .read_html (self .spam_data , match = ".*Water.*" , skiprows = {1 , 2 })
218
+ df2 = self .read_html (self .spam_data , match = "Unit" , skiprows = {2 , 1 })
195
219
196
220
assert_framelist_equal (df1 , df2 )
197
221
198
222
def test_skiprows_slice (self ):
199
- df1 = self .read_html (self .spam_data , ".*Water.*" , skiprows = 1 )
200
- df2 = self .read_html (self .spam_data , "Unit" , skiprows = 1 )
223
+ df1 = self .read_html (self .spam_data , match = ".*Water.*" , skiprows = 1 )
224
+ df2 = self .read_html (self .spam_data , match = "Unit" , skiprows = 1 )
201
225
202
226
assert_framelist_equal (df1 , df2 )
203
227
204
228
def test_skiprows_slice_short (self ):
205
- df1 = self .read_html (self .spam_data , ".*Water.*" , skiprows = slice (2 ))
206
- df2 = self .read_html (self .spam_data , "Unit" , skiprows = slice (2 ))
229
+ df1 = self .read_html (self .spam_data , match = ".*Water.*" , skiprows = slice (2 ))
230
+ df2 = self .read_html (self .spam_data , match = "Unit" , skiprows = slice (2 ))
207
231
208
232
assert_framelist_equal (df1 , df2 )
209
233
210
234
def test_skiprows_slice_long (self ):
211
- df1 = self .read_html (self .spam_data , ".*Water.*" , skiprows = slice (2 , 5 ))
212
- df2 = self .read_html (self .spam_data , "Unit" , skiprows = slice (4 , 1 , - 1 ))
235
+ df1 = self .read_html (self .spam_data , match = ".*Water.*" , skiprows = slice (2 , 5 ))
236
+ df2 = self .read_html (self .spam_data , match = "Unit" , skiprows = slice (4 , 1 , - 1 ))
213
237
214
238
assert_framelist_equal (df1 , df2 )
215
239
216
240
def test_skiprows_ndarray (self ):
217
- df1 = self .read_html (self .spam_data , ".*Water.*" , skiprows = np .arange (2 ))
218
- df2 = self .read_html (self .spam_data , "Unit" , skiprows = np .arange (2 ))
241
+ df1 = self .read_html (self .spam_data , match = ".*Water.*" , skiprows = np .arange (2 ))
242
+ df2 = self .read_html (self .spam_data , match = "Unit" , skiprows = np .arange (2 ))
219
243
220
244
assert_framelist_equal (df1 , df2 )
221
245
222
246
def test_skiprows_invalid (self ):
223
247
with pytest .raises (TypeError , match = ("is not a valid type for skipping rows" )):
224
- self .read_html (self .spam_data , ".*Water.*" , skiprows = "asdf" )
248
+ self .read_html (self .spam_data , match = ".*Water.*" , skiprows = "asdf" )
225
249
226
250
def test_index (self ):
227
- df1 = self .read_html (self .spam_data , ".*Water.*" , index_col = 0 )
228
- df2 = self .read_html (self .spam_data , "Unit" , index_col = 0 )
251
+ df1 = self .read_html (self .spam_data , match = ".*Water.*" , index_col = 0 )
252
+ df2 = self .read_html (self .spam_data , match = "Unit" , index_col = 0 )
229
253
assert_framelist_equal (df1 , df2 )
230
254
231
255
def test_header_and_index_no_types (self ):
232
- df1 = self .read_html (self .spam_data , ".*Water.*" , header = 1 , index_col = 0 )
233
- df2 = self .read_html (self .spam_data , "Unit" , header = 1 , index_col = 0 )
256
+ df1 = self .read_html (self .spam_data , match = ".*Water.*" , header = 1 , index_col = 0 )
257
+ df2 = self .read_html (self .spam_data , match = "Unit" , header = 1 , index_col = 0 )
234
258
assert_framelist_equal (df1 , df2 )
235
259
236
260
def test_header_and_index_with_types (self ):
237
- df1 = self .read_html (self .spam_data , ".*Water.*" , header = 1 , index_col = 0 )
238
- df2 = self .read_html (self .spam_data , "Unit" , header = 1 , index_col = 0 )
261
+ df1 = self .read_html (self .spam_data , match = ".*Water.*" , header = 1 , index_col = 0 )
262
+ df2 = self .read_html (self .spam_data , match = "Unit" , header = 1 , index_col = 0 )
239
263
assert_framelist_equal (df1 , df2 )
240
264
241
265
def test_infer_types (self ):
242
266
243
267
# 10892 infer_types removed
244
- df1 = self .read_html (self .spam_data , ".*Water.*" , index_col = 0 )
245
- df2 = self .read_html (self .spam_data , "Unit" , index_col = 0 )
268
+ df1 = self .read_html (self .spam_data , match = ".*Water.*" , index_col = 0 )
269
+ df2 = self .read_html (self .spam_data , match = "Unit" , index_col = 0 )
246
270
assert_framelist_equal (df1 , df2 )
247
271
248
272
def test_string_io (self ):
@@ -252,25 +276,25 @@ def test_string_io(self):
252
276
with open (self .spam_data , ** self .spam_data_kwargs ) as f :
253
277
data2 = StringIO (f .read ())
254
278
255
- df1 = self .read_html (data1 , ".*Water.*" )
256
- df2 = self .read_html (data2 , "Unit" )
279
+ df1 = self .read_html (data1 , match = ".*Water.*" )
280
+ df2 = self .read_html (data2 , match = "Unit" )
257
281
assert_framelist_equal (df1 , df2 )
258
282
259
283
def test_string (self ):
260
284
with open (self .spam_data , ** self .spam_data_kwargs ) as f :
261
285
data = f .read ()
262
286
263
- df1 = self .read_html (data , ".*Water.*" )
264
- df2 = self .read_html (data , "Unit" )
287
+ df1 = self .read_html (data , match = ".*Water.*" )
288
+ df2 = self .read_html (data , match = "Unit" )
265
289
266
290
assert_framelist_equal (df1 , df2 )
267
291
268
292
def test_file_like (self ):
269
293
with open (self .spam_data , ** self .spam_data_kwargs ) as f :
270
- df1 = self .read_html (f , ".*Water.*" )
294
+ df1 = self .read_html (f , match = ".*Water.*" )
271
295
272
296
with open (self .spam_data , ** self .spam_data_kwargs ) as f :
273
- df2 = self .read_html (f , "Unit" )
297
+ df2 = self .read_html (f , match = "Unit" )
274
298
275
299
assert_framelist_equal (df1 , df2 )
276
300
@@ -292,7 +316,7 @@ def test_invalid_url(self):
292
316
def test_file_url (self ):
293
317
url = self .banklist_data
294
318
dfs = self .read_html (
295
- file_path_to_url (os .path .abspath (url )), "First" , attrs = {"id" : "table" }
319
+ file_path_to_url (os .path .abspath (url )), match = "First" , attrs = {"id" : "table" }
296
320
)
297
321
assert isinstance (dfs , list )
298
322
for df in dfs :
@@ -308,7 +332,7 @@ def test_invalid_table_attrs(self):
308
332
309
333
def _bank_data (self , * args , ** kwargs ):
310
334
return self .read_html (
311
- self .banklist_data , "Metcalf" , attrs = {"id" : "table" }, * args , ** kwargs
335
+ self .banklist_data , match = "Metcalf" , attrs = {"id" : "table" }, * args , ** kwargs
312
336
)
313
337
314
338
@pytest .mark .slow
@@ -358,7 +382,7 @@ def test_regex_idempotency(self):
358
382
def test_negative_skiprows (self ):
359
383
msg = r"\(you passed a negative value\)"
360
384
with pytest .raises (ValueError , match = msg ):
361
- self .read_html (self .spam_data , "Water" , skiprows = - 1 )
385
+ self .read_html (self .spam_data , match = "Water" , skiprows = - 1 )
362
386
363
387
@tm .network
364
388
def test_multiple_matches (self ):
@@ -600,7 +624,9 @@ def test_gold_canyon(self):
600
624
raw_text = f .read ()
601
625
602
626
assert gc in raw_text
603
- df = self .read_html (self .banklist_data , "Gold Canyon" , attrs = {"id" : "table" })[0 ]
627
+ df = self .read_html (
628
+ self .banklist_data , match = "Gold Canyon" , attrs = {"id" : "table" }
629
+ )[0 ]
604
630
assert gc in df .to_string ()
605
631
606
632
def test_different_number_of_cols (self ):
@@ -855,7 +881,7 @@ def test_wikipedia_states_table(self, datapath):
855
881
data = datapath ("io" , "data" , "html" , "wikipedia_states.html" )
856
882
assert os .path .isfile (data ), f"{ repr (data )} is not a file"
857
883
assert os .path .getsize (data ), f"{ repr (data )} is an empty file"
858
- result = self .read_html (data , "Arizona" , header = 1 )[0 ]
884
+ result = self .read_html (data , match = "Arizona" , header = 1 )[0 ]
859
885
assert result .shape == (60 , 12 )
860
886
assert "Unnamed" in result .columns [- 1 ]
861
887
assert result ["sq mi" ].dtype == np .dtype ("float64" )
@@ -1065,7 +1091,7 @@ def test_works_on_valid_markup(self, datapath):
1065
1091
@pytest .mark .slow
1066
1092
def test_fallback_success (self , datapath ):
1067
1093
banklist_data = datapath ("io" , "data" , "html" , "banklist.html" )
1068
- self .read_html (banklist_data , ".*Water.*" , flavor = ["lxml" , "html5lib" ])
1094
+ self .read_html (banklist_data , match = ".*Water.*" , flavor = ["lxml" , "html5lib" ])
1069
1095
1070
1096
def test_to_html_timestamp (self ):
1071
1097
rng = date_range ("2000-01-01" , periods = 10 )
0 commit comments