Skip to content

Commit ce2bcc5

Browse files
committed
Deprecate use of most positional arguments for read_html and read_json
1 parent 38e16c4 commit ce2bcc5

File tree

8 files changed

+320
-49
lines changed

8 files changed

+320
-49
lines changed

doc/source/whatsnew/v1.0.0.rst

100755100644
File mode changed.

doc/source/whatsnew/v1.1.0.rst

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,19 @@ Backwards incompatible API changes
7373

7474
Deprecations
7575
~~~~~~~~~~~~
76+
7677
- Lookups on a :class:`Series` with a single-item list containing a slice (e.g. ``ser[[slice(0, 4)]]``) are deprecated, will raise in a future version. Either convert the list to tuple, or pass the slice directly instead (:issue:`31333`)
78+
7779
- :meth:`DataFrame.mean` and :meth:`DataFrame.median` with ``numeric_only=None`` will include datetime64 and datetime64tz columns in a future version (:issue:`29941`)
78-
-
80+
81+
- Passing any arguments but `io` to :func:`read_html` as positional
82+
arguments is deprecated since version 1.0. All other arguments should
83+
be given as keyword arguments (:issue:`27573`).
84+
85+
- Passing any arguments but `path_or_buf` to :func:`read_json` as positional
86+
arguments is deprecated since version 1.0. All other arguments should
87+
be given as keyword arguments (:issue:`27573`).
88+
7989
-
8090

8191
.. ---------------------------------------------------------------------------

pandas/io/html.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
from pandas.compat._optional import import_optional_dependency
1313
from pandas.errors import AbstractMethodError, EmptyDataError
14+
from pandas.util._decorators import deprecate_nonkeyword_arguments
1415

1516
from pandas.core.dtypes.common import is_list_like
1617

@@ -921,6 +922,7 @@ def _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs):
921922
return ret
922923

923924

925+
@deprecate_nonkeyword_arguments(version="1.4")
924926
def read_html(
925927
io,
926928
match=".+",

pandas/io/json/_json.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from pandas._libs.tslibs import iNaT
1212
from pandas._typing import JSONSerializable
1313
from pandas.errors import AbstractMethodError
14-
from pandas.util._decorators import deprecate_kwarg
14+
from pandas.util._decorators import deprecate_kwarg, deprecate_nonkeyword_arguments
1515

1616
from pandas.core.dtypes.common import ensure_str, is_period_dtype
1717

@@ -345,6 +345,9 @@ def _write(
345345

346346

347347
@deprecate_kwarg(old_arg_name="numpy", new_arg_name=None)
348+
@deprecate_nonkeyword_arguments(
349+
version="1.4", allowed_args=["path_or_buf"], stacklevel=3
350+
)
348351
def read_json(
349352
path_or_buf=None,
350353
orient=None,
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
"""
2+
Tests for the deprecated keyword arguments for `read_json`.
3+
"""
4+
5+
import pandas as pd
6+
import pandas.util.testing as tm
7+
8+
from pandas.io.json import read_json
9+
10+
11+
def test_deprecated_kwargs():
12+
df = pd.DataFrame({"A": [2, 4, 6], "B": [3, 6, 9]}, index=[0, 1, 2])
13+
buf = df.to_json(orient="split")
14+
with tm.assert_produces_warning(FutureWarning):
15+
tm.assert_frame_equal(df, read_json(buf, "split"))
16+
buf = df.to_json(orient="columns")
17+
with tm.assert_produces_warning(FutureWarning):
18+
tm.assert_frame_equal(df, read_json(buf, "columns"))
19+
buf = df.to_json(orient="index")
20+
with tm.assert_produces_warning(FutureWarning):
21+
tm.assert_frame_equal(df, read_json(buf, "index"))
22+
23+
24+
def test_good_kwargs():
25+
df = pd.DataFrame({"A": [2, 4, 6], "B": [3, 6, 9]}, index=[0, 1, 2])
26+
with tm.assert_produces_warning(None):
27+
tm.assert_frame_equal(df, read_json(df.to_json(orient="split"), orient="split"))
28+
tm.assert_frame_equal(
29+
df, read_json(df.to_json(orient="columns"), orient="columns")
30+
)
31+
tm.assert_frame_equal(df, read_json(df.to_json(orient="index"), orient="index"))

pandas/tests/io/test_html.py

Lines changed: 73 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ def test_invalid_flavor():
7272
msg = r"\{" + flavor + r"\} is not a valid set of flavors"
7373

7474
with pytest.raises(ValueError, match=msg):
75-
read_html(url, "google", flavor=flavor)
75+
read_html(url, match="google", flavor=flavor)
7676

7777

7878
@td.skip_if_no("bs4")
@@ -121,13 +121,26 @@ def test_to_html_compat(self):
121121
res = self.read_html(out, attrs={"class": "dataframe"}, index_col=0)[0]
122122
tm.assert_frame_equal(res, df)
123123

124+
@tm.network
125+
def test_banklist_url_positional_match(self):
126+
url = "http://www.fdic.gov/bank/individual/failed/banklist.html"
127+
# Passing match argument as positional should cause a FutureWarning.
128+
with tm.assert_produces_warning(FutureWarning):
129+
df1 = self.read_html(
130+
url, "First Federal Bank of Florida", attrs={"id": "table"}
131+
)
132+
with tm.assert_produces_warning(FutureWarning):
133+
df2 = self.read_html(url, "Metcalf Bank", attrs={"id": "table"})
134+
135+
assert_framelist_equal(df1, df2)
136+
124137
@tm.network
125138
def test_banklist_url(self):
126139
url = "http://www.fdic.gov/bank/individual/failed/banklist.html"
127140
df1 = self.read_html(
128-
url, "First Federal Bank of Florida", attrs={"id": "table"}
141+
url, match="First Federal Bank of Florida", attrs={"id": "table"}
129142
)
130-
df2 = self.read_html(url, "Metcalf Bank", attrs={"id": "table"})
143+
df2 = self.read_html(url, match="Metcalf Bank", attrs={"id": "table"})
131144

132145
assert_framelist_equal(df1, df2)
133146

@@ -137,21 +150,25 @@ def test_spam_url(self):
137150
"https://raw.githubusercontent.com/pandas-dev/pandas/master/"
138151
"pandas/tests/io/data/html/spam.html"
139152
)
140-
df1 = self.read_html(url, ".*Water.*")
141-
df2 = self.read_html(url, "Unit")
153+
df1 = self.read_html(url, match=".*Water.*")
154+
df2 = self.read_html(url, match="Unit")
142155

143156
assert_framelist_equal(df1, df2)
144157

145158
@pytest.mark.slow
146159
def test_banklist(self):
147-
df1 = self.read_html(self.banklist_data, ".*Florida.*", attrs={"id": "table"})
148-
df2 = self.read_html(self.banklist_data, "Metcalf Bank", attrs={"id": "table"})
160+
df1 = self.read_html(
161+
self.banklist_data, match=".*Florida.*", attrs={"id": "table"}
162+
)
163+
df2 = self.read_html(
164+
self.banklist_data, match="Metcalf Bank", attrs={"id": "table"}
165+
)
149166

150167
assert_framelist_equal(df1, df2)
151168

152169
def test_spam(self):
153-
df1 = self.read_html(self.spam_data, ".*Water.*")
154-
df2 = self.read_html(self.spam_data, "Unit")
170+
df1 = self.read_html(self.spam_data, match=".*Water.*")
171+
df2 = self.read_html(self.spam_data, match="Unit")
155172
assert_framelist_equal(df1, df2)
156173

157174
assert df1[0].iloc[0, 0] == "Proximates"
@@ -168,81 +185,88 @@ def test_banklist_no_match(self):
168185
assert isinstance(df, DataFrame)
169186

170187
def test_spam_header(self):
171-
df = self.read_html(self.spam_data, ".*Water.*", header=2)[0]
188+
df = self.read_html(self.spam_data, match=".*Water.*", header=2)[0]
172189
assert df.columns[0] == "Proximates"
173190
assert not df.empty
174191

175192
def test_skiprows_int(self):
176-
df1 = self.read_html(self.spam_data, ".*Water.*", skiprows=1)
177-
df2 = self.read_html(self.spam_data, "Unit", skiprows=1)
193+
df1 = self.read_html(self.spam_data, match=".*Water.*", skiprows=1)
194+
df2 = self.read_html(self.spam_data, match="Unit", skiprows=1)
178195

179196
assert_framelist_equal(df1, df2)
180197

181198
def test_skiprows_range(self):
182-
df1 = self.read_html(self.spam_data, ".*Water.*", skiprows=range(2))[0]
183-
df2 = self.read_html(self.spam_data, "Unit", skiprows=range(2))[0]
199+
df1 = self.read_html(self.spam_data, match=".*Water.*", skiprows=range(2))
200+
df2 = self.read_html(self.spam_data, match="Unit", skiprows=range(2))
201+
202+
assert_framelist_equal(df1, df2)
203+
204+
def test_skiprows_range_single_frame(self):
205+
df1 = self.read_html(self.spam_data, match=".*Water.*", skiprows=range(2))[0]
206+
df2 = self.read_html(self.spam_data, match="Unit", skiprows=range(2))[0]
207+
184208
tm.assert_frame_equal(df1, df2)
185209

186210
def test_skiprows_list(self):
187-
df1 = self.read_html(self.spam_data, ".*Water.*", skiprows=[1, 2])
188-
df2 = self.read_html(self.spam_data, "Unit", skiprows=[2, 1])
211+
df1 = self.read_html(self.spam_data, match=".*Water.*", skiprows=[1, 2])
212+
df2 = self.read_html(self.spam_data, match="Unit", skiprows=[2, 1])
189213

190214
assert_framelist_equal(df1, df2)
191215

192216
def test_skiprows_set(self):
193-
df1 = self.read_html(self.spam_data, ".*Water.*", skiprows={1, 2})
194-
df2 = self.read_html(self.spam_data, "Unit", skiprows={2, 1})
217+
df1 = self.read_html(self.spam_data, match=".*Water.*", skiprows={1, 2})
218+
df2 = self.read_html(self.spam_data, match="Unit", skiprows={2, 1})
195219

196220
assert_framelist_equal(df1, df2)
197221

198222
def test_skiprows_slice(self):
199-
df1 = self.read_html(self.spam_data, ".*Water.*", skiprows=1)
200-
df2 = self.read_html(self.spam_data, "Unit", skiprows=1)
223+
df1 = self.read_html(self.spam_data, match=".*Water.*", skiprows=1)
224+
df2 = self.read_html(self.spam_data, match="Unit", skiprows=1)
201225

202226
assert_framelist_equal(df1, df2)
203227

204228
def test_skiprows_slice_short(self):
205-
df1 = self.read_html(self.spam_data, ".*Water.*", skiprows=slice(2))
206-
df2 = self.read_html(self.spam_data, "Unit", skiprows=slice(2))
229+
df1 = self.read_html(self.spam_data, match=".*Water.*", skiprows=slice(2))
230+
df2 = self.read_html(self.spam_data, match="Unit", skiprows=slice(2))
207231

208232
assert_framelist_equal(df1, df2)
209233

210234
def test_skiprows_slice_long(self):
211-
df1 = self.read_html(self.spam_data, ".*Water.*", skiprows=slice(2, 5))
212-
df2 = self.read_html(self.spam_data, "Unit", skiprows=slice(4, 1, -1))
235+
df1 = self.read_html(self.spam_data, match=".*Water.*", skiprows=slice(2, 5))
236+
df2 = self.read_html(self.spam_data, match="Unit", skiprows=slice(4, 1, -1))
213237

214238
assert_framelist_equal(df1, df2)
215239

216240
def test_skiprows_ndarray(self):
217-
df1 = self.read_html(self.spam_data, ".*Water.*", skiprows=np.arange(2))
218-
df2 = self.read_html(self.spam_data, "Unit", skiprows=np.arange(2))
241+
df1 = self.read_html(self.spam_data, match=".*Water.*", skiprows=np.arange(2))
242+
df2 = self.read_html(self.spam_data, match="Unit", skiprows=np.arange(2))
219243

220244
assert_framelist_equal(df1, df2)
221245

222246
def test_skiprows_invalid(self):
223247
with pytest.raises(TypeError, match=("is not a valid type for skipping rows")):
224-
self.read_html(self.spam_data, ".*Water.*", skiprows="asdf")
248+
self.read_html(self.spam_data, match=".*Water.*", skiprows="asdf")
225249

226250
def test_index(self):
227-
df1 = self.read_html(self.spam_data, ".*Water.*", index_col=0)
228-
df2 = self.read_html(self.spam_data, "Unit", index_col=0)
251+
df1 = self.read_html(self.spam_data, match=".*Water.*", index_col=0)
252+
df2 = self.read_html(self.spam_data, match="Unit", index_col=0)
229253
assert_framelist_equal(df1, df2)
230254

231255
def test_header_and_index_no_types(self):
232-
df1 = self.read_html(self.spam_data, ".*Water.*", header=1, index_col=0)
233-
df2 = self.read_html(self.spam_data, "Unit", header=1, index_col=0)
256+
df1 = self.read_html(self.spam_data, match=".*Water.*", header=1, index_col=0)
257+
df2 = self.read_html(self.spam_data, match="Unit", header=1, index_col=0)
234258
assert_framelist_equal(df1, df2)
235259

236260
def test_header_and_index_with_types(self):
237-
df1 = self.read_html(self.spam_data, ".*Water.*", header=1, index_col=0)
238-
df2 = self.read_html(self.spam_data, "Unit", header=1, index_col=0)
261+
df1 = self.read_html(self.spam_data, match=".*Water.*", header=1, index_col=0)
262+
df2 = self.read_html(self.spam_data, match="Unit", header=1, index_col=0)
239263
assert_framelist_equal(df1, df2)
240264

241265
def test_infer_types(self):
242266

243267
# 10892 infer_types removed
244-
df1 = self.read_html(self.spam_data, ".*Water.*", index_col=0)
245-
df2 = self.read_html(self.spam_data, "Unit", index_col=0)
268+
df1 = self.read_html(self.spam_data, match=".*Water.*", index_col=0)
269+
df2 = self.read_html(self.spam_data, match="Unit", index_col=0)
246270
assert_framelist_equal(df1, df2)
247271

248272
def test_string_io(self):
@@ -252,25 +276,25 @@ def test_string_io(self):
252276
with open(self.spam_data, **self.spam_data_kwargs) as f:
253277
data2 = StringIO(f.read())
254278

255-
df1 = self.read_html(data1, ".*Water.*")
256-
df2 = self.read_html(data2, "Unit")
279+
df1 = self.read_html(data1, match=".*Water.*")
280+
df2 = self.read_html(data2, match="Unit")
257281
assert_framelist_equal(df1, df2)
258282

259283
def test_string(self):
260284
with open(self.spam_data, **self.spam_data_kwargs) as f:
261285
data = f.read()
262286

263-
df1 = self.read_html(data, ".*Water.*")
264-
df2 = self.read_html(data, "Unit")
287+
df1 = self.read_html(data, match=".*Water.*")
288+
df2 = self.read_html(data, match="Unit")
265289

266290
assert_framelist_equal(df1, df2)
267291

268292
def test_file_like(self):
269293
with open(self.spam_data, **self.spam_data_kwargs) as f:
270-
df1 = self.read_html(f, ".*Water.*")
294+
df1 = self.read_html(f, match=".*Water.*")
271295

272296
with open(self.spam_data, **self.spam_data_kwargs) as f:
273-
df2 = self.read_html(f, "Unit")
297+
df2 = self.read_html(f, match="Unit")
274298

275299
assert_framelist_equal(df1, df2)
276300

@@ -292,7 +316,7 @@ def test_invalid_url(self):
292316
def test_file_url(self):
293317
url = self.banklist_data
294318
dfs = self.read_html(
295-
file_path_to_url(os.path.abspath(url)), "First", attrs={"id": "table"}
319+
file_path_to_url(os.path.abspath(url)), match="First", attrs={"id": "table"}
296320
)
297321
assert isinstance(dfs, list)
298322
for df in dfs:
@@ -308,7 +332,7 @@ def test_invalid_table_attrs(self):
308332

309333
def _bank_data(self, *args, **kwargs):
310334
return self.read_html(
311-
self.banklist_data, "Metcalf", attrs={"id": "table"}, *args, **kwargs
335+
self.banklist_data, match="Metcalf", attrs={"id": "table"}, *args, **kwargs
312336
)
313337

314338
@pytest.mark.slow
@@ -358,7 +382,7 @@ def test_regex_idempotency(self):
358382
def test_negative_skiprows(self):
359383
msg = r"\(you passed a negative value\)"
360384
with pytest.raises(ValueError, match=msg):
361-
self.read_html(self.spam_data, "Water", skiprows=-1)
385+
self.read_html(self.spam_data, match="Water", skiprows=-1)
362386

363387
@tm.network
364388
def test_multiple_matches(self):
@@ -600,7 +624,9 @@ def test_gold_canyon(self):
600624
raw_text = f.read()
601625

602626
assert gc in raw_text
603-
df = self.read_html(self.banklist_data, "Gold Canyon", attrs={"id": "table"})[0]
627+
df = self.read_html(
628+
self.banklist_data, match="Gold Canyon", attrs={"id": "table"}
629+
)[0]
604630
assert gc in df.to_string()
605631

606632
def test_different_number_of_cols(self):
@@ -855,7 +881,7 @@ def test_wikipedia_states_table(self, datapath):
855881
data = datapath("io", "data", "html", "wikipedia_states.html")
856882
assert os.path.isfile(data), f"{repr(data)} is not a file"
857883
assert os.path.getsize(data), f"{repr(data)} is an empty file"
858-
result = self.read_html(data, "Arizona", header=1)[0]
884+
result = self.read_html(data, match="Arizona", header=1)[0]
859885
assert result.shape == (60, 12)
860886
assert "Unnamed" in result.columns[-1]
861887
assert result["sq mi"].dtype == np.dtype("float64")
@@ -1065,7 +1091,7 @@ def test_works_on_valid_markup(self, datapath):
10651091
@pytest.mark.slow
10661092
def test_fallback_success(self, datapath):
10671093
banklist_data = datapath("io", "data", "html", "banklist.html")
1068-
self.read_html(banklist_data, ".*Water.*", flavor=["lxml", "html5lib"])
1094+
self.read_html(banklist_data, match=".*Water.*", flavor=["lxml", "html5lib"])
10691095

10701096
def test_to_html_timestamp(self):
10711097
rng = date_range("2000-01-01", periods=10)

0 commit comments

Comments
 (0)