Skip to content

Commit 8eb0b1b

Browse files
authored
Deprecate prefix argument in read_csv and read_table (#44713)
1 parent 1895062 commit 8eb0b1b

File tree

7 files changed

+84
-26
lines changed

7 files changed

+84
-26
lines changed

doc/source/user_guide/io.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,18 @@ squeeze : boolean, default ``False``
161161
the data.
162162
prefix : str, default ``None``
163163
Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ...
164+
165+
.. deprecated:: 1.4.0
166+
Use a list comprehension on the DataFrame's columns after calling ``read_csv``.
167+
168+
.. ipython:: python
169+
170+
data = "col1,col2,col3\na,b,1"
171+
172+
df = pd.read_csv(StringIO(data))
173+
df.columns = [f"pre_{col}" for col in df.columns]
174+
df
175+
164176
mangle_dupe_cols : boolean, default ``True``
165177
Duplicate columns will be specified as 'X', 'X.1'...'X.N', rather than 'X'...'X'.
166178
Passing in ``False`` will cause data to be overwritten if there are duplicate

doc/source/whatsnew/v1.4.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,7 @@ Other Deprecations
517517
- Deprecated casting behavior when setting timezone-aware value(s) into a timezone-aware :class:`Series` or :class:`DataFrame` column when the timezones do not match. Previously this cast to object dtype. In a future version, the values being inserted will be converted to the series or column's existing timezone (:issue:`37605`)
518518
- Deprecated casting behavior when passing an item with mismatched-timezone to :meth:`DatetimeIndex.insert`, :meth:`DatetimeIndex.putmask`, :meth:`DatetimeIndex.where` :meth:`DatetimeIndex.fillna`, :meth:`Series.mask`, :meth:`Series.where`, :meth:`Series.fillna`, :meth:`Series.shift`, :meth:`Series.replace`, :meth:`Series.reindex` (and :class:`DataFrame` column analogues). In the past this has cast to object dtype. In a future version, these will cast the passed item to the index or series's timezone (:issue:`37605`)
519519
- Deprecated the 'errors' keyword argument in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, and meth:`DataFrame.mask`; in a future version the argument will be removed (:issue:`44294`)
520+
- Deprecated the ``prefix`` keyword argument in :func:`read_csv` and :func:`read_table`, in a future version the argument will be removed (:issue:`43396`)
520521
- Deprecated :meth:`PeriodIndex.astype` to ``datetime64[ns]`` or ``DatetimeTZDtype``, use ``obj.to_timestamp(how).tz_localize(dtype.tz)`` instead (:issue:`44398`)
521522
- Deprecated passing non boolean argument to sort in :func:`concat` (:issue:`41518`)
522523
- Deprecated passing arguments as positional for :func:`read_fwf` other than ``filepath_or_buffer`` (:issue:`41485`):

pandas/io/parsers/readers.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,9 @@
143143
the data.
144144
prefix : str, optional
145145
Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ...
146+
147+
.. deprecated:: 1.4.0
148+
Use a list comprehension on the DataFrame's columns after calling ``read_csv``.
146149
mangle_dupe_cols : bool, default True
147150
Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than
148151
'X'...'X'. Passing in False will cause data to be overwritten if there
@@ -461,6 +464,9 @@ class _DeprecationConfig(NamedTuple):
461464
"squeeze": _DeprecationConfig(
462465
None, 'Append .squeeze("columns") to the call to squeeze.'
463466
),
467+
"prefix": _DeprecationConfig(
468+
None, "Use a list comprehension on the column names in the future."
469+
),
464470
}
465471

466472

pandas/tests/io/parser/common/test_common_basic.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -823,7 +823,8 @@ def test_names_and_prefix_not_None_raises(all_parsers, func):
823823
parser = all_parsers
824824
msg = "Specified named and prefix; you can only specify one."
825825
with pytest.raises(ValueError, match=msg):
826-
getattr(parser, func)(f, names=["a", "b"], prefix="x")
826+
with tm.assert_produces_warning(FutureWarning):
827+
getattr(parser, func)(f, names=["a", "b"], prefix="x")
827828

828829

829830
@pytest.mark.parametrize("func", ["read_csv", "read_table"])
@@ -833,7 +834,15 @@ def test_names_and_prefix_explicit_None(all_parsers, names, prefix, func):
833834
f = StringIO("a,b\n1,2")
834835
expected = DataFrame({"x0": ["a", "1"], "x1": ["b", "2"]})
835836
parser = all_parsers
836-
result = getattr(parser, func)(f, names=names, sep=",", prefix=prefix, header=None)
837+
if prefix is not None:
838+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
839+
result = getattr(parser, func)(
840+
f, names=names, sep=",", prefix=prefix, header=None
841+
)
842+
else:
843+
result = getattr(parser, func)(
844+
f, names=names, sep=",", prefix=prefix, header=None
845+
)
837846
tm.assert_frame_equal(result, expected)
838847

839848

pandas/tests/io/parser/common/test_read_errors.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,8 @@ def test_read_csv_raises_on_header_prefix(all_parsers):
128128
s = StringIO("0,1\n2,3")
129129

130130
with pytest.raises(ValueError, match=msg):
131-
parser.read_csv(s, header=0, prefix="_X")
131+
with tm.assert_produces_warning(FutureWarning):
132+
parser.read_csv(s, header=0, prefix="_X")
132133

133134

134135
def test_unexpected_keyword_parameter_exception(all_parsers):

pandas/tests/io/parser/test_header.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,8 @@ def test_no_header_prefix(all_parsers):
8282
6,7,8,9,10
8383
11,12,13,14,15
8484
"""
85-
result = parser.read_csv(StringIO(data), prefix="Field", header=None)
85+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
86+
result = parser.read_csv(StringIO(data), prefix="Field", header=None)
8687
expected = DataFrame(
8788
[[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]],
8889
columns=["Field0", "Field1", "Field2", "Field3", "Field4"],
@@ -457,7 +458,11 @@ def test_no_header(all_parsers, kwargs, names):
457458
expected = DataFrame(
458459
[[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]], columns=names
459460
)
460-
result = parser.read_csv(StringIO(data), header=None, **kwargs)
461+
if "prefix" in kwargs.keys():
462+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
463+
result = parser.read_csv(StringIO(data), header=None, **kwargs)
464+
else:
465+
result = parser.read_csv(StringIO(data), header=None, **kwargs)
461466
tm.assert_frame_equal(result, expected)
462467

463468

pandas/tests/io/parser/test_parse_dates.py

Lines changed: 45 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -171,14 +171,21 @@ def date_parser(*date_cols):
171171
"""
172172
return parsing.try_parse_dates(parsing.concat_date_cols(date_cols))
173173

174-
result = parser.read_csv(
174+
kwds = {
175+
"header": None,
176+
"date_parser": date_parser,
177+
"prefix": "X",
178+
"parse_dates": {"actual": [1, 2], "nominal": [1, 3]},
179+
"keep_date_col": keep_date_col,
180+
}
181+
result = parser.read_csv_check_warnings(
182+
FutureWarning,
183+
"The prefix argument has been deprecated "
184+
"and will be removed in a future version. .*\n\n",
175185
StringIO(data),
176-
header=None,
177-
date_parser=date_parser,
178-
prefix="X",
179-
parse_dates={"actual": [1, 2], "nominal": [1, 3]},
180-
keep_date_col=keep_date_col,
186+
**kwds,
181187
)
188+
182189
expected = DataFrame(
183190
[
184191
[
@@ -309,13 +316,20 @@ def test_multiple_date_col(all_parsers, keep_date_col):
309316
KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000
310317
"""
311318
parser = all_parsers
312-
result = parser.read_csv(
319+
kwds = {
320+
"header": None,
321+
"prefix": "X",
322+
"parse_dates": [[1, 2], [1, 3]],
323+
"keep_date_col": keep_date_col,
324+
}
325+
result = parser.read_csv_check_warnings(
326+
FutureWarning,
327+
"The prefix argument has been deprecated "
328+
"and will be removed in a future version. .*\n\n",
313329
StringIO(data),
314-
header=None,
315-
prefix="X",
316-
parse_dates=[[1, 2], [1, 3]],
317-
keep_date_col=keep_date_col,
330+
**kwds,
318331
)
332+
319333
expected = DataFrame(
320334
[
321335
[
@@ -427,8 +441,13 @@ def test_date_col_as_index_col(all_parsers):
427441
KORD,19990127 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000
428442
"""
429443
parser = all_parsers
430-
result = parser.read_csv(
431-
StringIO(data), header=None, prefix="X", parse_dates=[1], index_col=1
444+
kwds = {"header": None, "prefix": "X", "parse_dates": [1], "index_col": 1}
445+
result = parser.read_csv_check_warnings(
446+
FutureWarning,
447+
"The prefix argument has been deprecated "
448+
"and will be removed in a future version. .*\n\n",
449+
StringIO(data),
450+
**kwds,
432451
)
433452

434453
index = Index(
@@ -477,14 +496,19 @@ def test_multiple_date_cols_int_cast(all_parsers, date_parser, warning):
477496
parse_dates = {"actual": [1, 2], "nominal": [1, 3]}
478497
parser = all_parsers
479498

480-
with tm.assert_produces_warning(warning, check_stacklevel=False):
481-
result = parser.read_csv(
482-
StringIO(data),
483-
header=None,
484-
date_parser=date_parser,
485-
parse_dates=parse_dates,
486-
prefix="X",
487-
)
499+
kwds = {
500+
"header": None,
501+
"prefix": "X",
502+
"parse_dates": parse_dates,
503+
"date_parser": date_parser,
504+
}
505+
result = parser.read_csv_check_warnings(
506+
FutureWarning,
507+
"The prefix argument has been deprecated "
508+
"and will be removed in a future version. .*\n\n",
509+
StringIO(data),
510+
**kwds,
511+
)
488512

489513
expected = DataFrame(
490514
[

0 commit comments

Comments
 (0)