Skip to content

Commit 841a6cc

Browse files
WIP: Implementing suggested changes
1 parent 15d4786 commit 841a6cc

File tree

2 files changed

+25
-23
lines changed

2 files changed

+25
-23
lines changed

pandas/io/parsers.py

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1065,6 +1065,20 @@ def _evaluate_usecols(usecols, names):
10651065
return usecols
10661066

10671067

1068+
def _validate_usecols(usecols, names):
1069+
"""
1070+
Validates that all usecols are present in a given
1071+
list of names, if not, raise a ValueError that
1072+
shows what usecols are missing.
1073+
"""
1074+
missing = [c for c in usecols if c not in names]
1075+
if len(missing) > 0:
1076+
raise ValueError(
1077+
"Usecols do not match columns, "
1078+
"columns expected but not found: {missing}".format(missing=missing)
1079+
)
1080+
1081+
10681082
def _validate_skipfooter_arg(skipfooter):
10691083
"""
10701084
Validate the 'skipfooter' parameter.
@@ -1662,22 +1676,14 @@ def __init__(self, src, **kwds):
16621676
# GH 14671
16631677
if (self.usecols_dtype == 'string' and
16641678
not set(usecols).issubset(self.orig_names)):
1665-
missing = [c for c in usecols if c not in self.orig_names]
1666-
raise ValueError(
1667-
"Usecols do not match columns, "
1668-
"columns expected but not found: {}".format(missing)
1669-
)
1679+
_validate_usecols(usecols, self.orig_names)
16701680

16711681
if len(self.names) > len(usecols):
16721682
self.names = [n for i, n in enumerate(self.names)
16731683
if (i in usecols or n in usecols)]
16741684

16751685
if len(self.names) < len(usecols):
1676-
missing = [c for c in usecols if c not in self.names]
1677-
raise ValueError(
1678-
"Usecols do not match columns, "
1679-
"columns expected but not found: {}".format(missing)
1680-
)
1686+
_validate_usecols(usecols, self.names)
16811687

16821688
self._set_noconvert_columns()
16831689

@@ -2451,16 +2457,12 @@ def _handle_usecols(self, columns, usecols_key):
24512457
"be integers.")
24522458
col_indices = []
24532459

2454-
missing = [c for c in self.usecols if c not in usecols_key]
2455-
if len(missing) > 0:
2456-
raise ValueError(
2457-
"Usecols do not match columns, "
2458-
"columns expected but not found: {}".format(missing)
2459-
)
2460-
24612460
for col in self.usecols:
24622461
if isinstance(col, string_types):
2463-
col_indices.append(usecols_key.index(col))
2462+
try:
2463+
col_indices.append(usecols_key.index(col))
2464+
except ValueError:
2465+
_validate_usecols(self.usecols, usecols_key)
24642466
else:
24652467
col_indices.append(col)
24662468
else:

pandas/tests/io/parser/usecols.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -482,7 +482,7 @@ def test_raise_on_usecols_names_mismatch(self):
482482

483483
msg = (
484484
"Usecols do not match columns, "
485-
"columns expected but not found: {}"
485+
"columns expected but not found: {missing}"
486486
)
487487

488488
usecols = ['a', 'b', 'c', 'd']
@@ -492,11 +492,11 @@ def test_raise_on_usecols_names_mismatch(self):
492492
tm.assert_frame_equal(df, expected)
493493

494494
usecols = ['a', 'b', 'c', 'f']
495-
with tm.assert_raises_regex(ValueError, msg.format(['f'])):
495+
with tm.assert_raises_regex(ValueError, msg.format(missing=['f'])):
496496
self.read_csv(StringIO(data), usecols=usecols)
497497

498498
usecols = ['a', 'b', 'f']
499-
with tm.assert_raises_regex(ValueError, msg.format(['f'])):
499+
with tm.assert_raises_regex(ValueError, msg.format(missing=['f'])):
500500
self.read_csv(StringIO(data), usecols=usecols)
501501

502502
names = ['A', 'B', 'C', 'D']
@@ -520,9 +520,9 @@ def test_raise_on_usecols_names_mismatch(self):
520520
# tm.assert_frame_equal(df, expected)
521521

522522
usecols = ['A', 'B', 'C', 'f']
523-
with tm.assert_ra(ValueError, msg.format(['f'])):
523+
with tm.assert_ra(ValueError, msg.format(missing=['f'])):
524524
self.read_csv(StringIO(data), header=0, names=names,
525525
usecols=usecols)
526526
usecols = ['A', 'B', 'f']
527-
with tm.assert_raises_regex(ValueError, msg.format(['f'])):
527+
with tm.assert_raises_regex(ValueError, msg.format(missing=['f'])):
528528
self.read_csv(StringIO(data), names=names, usecols=usecols)

0 commit comments

Comments
 (0)