Skip to content

Commit b2ffe2d

Browse files
committed
ENH: Accept callable for skiprows
1 parent 4de5cdc commit b2ffe2d

File tree

2 files changed

+15
-6
lines changed

2 files changed

+15
-6
lines changed

doc/source/whatsnew/v0.20.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ Other enhancements
110110
- ``pd.qcut`` has gained the ``duplicates='raise'|'drop'`` option to control whether to raise on duplicated edges (:issue:`7751`)
111111
- ``Series`` provides a ``to_excel`` method to output Excel files (:issue:`8825`)
112112
- The ``usecols`` argument in ``pd.read_csv`` now accepts a callable function as a value (:issue:`14154`)
113+
- The ``skiprows`` argument in ``pd.read_csv`` now accepts a callable function as a value (:issue:`10882`)
113114
- ``pd.DataFrame.plot`` now prints a title above each subplot if ``suplots=True`` and ``title`` is a list of strings (:issue:`14753`)
114115
- ``pd.Series.interpolate`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`)
115116
- ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`)

pandas/io/parsers.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -919,7 +919,10 @@ def _clean_options(self, options, engine):
919919
if engine != 'c':
920920
if is_integer(skiprows):
921921
skiprows = lrange(skiprows)
922-
skiprows = set() if skiprows is None else set(skiprows)
922+
if skiprows is None:
923+
skiprows = set()
924+
elif not callable(skiprows):
925+
skiprows = set(skiprows)
923926

924927
# put stuff back
925928
result['names'] = names
@@ -1840,6 +1843,11 @@ def __init__(self, f, **kwds):
18401843
self.memory_map = kwds['memory_map']
18411844
self.skiprows = kwds['skiprows']
18421845

1846+
if callable(self.skiprows):
1847+
self.skipfunc = self.skiprows
1848+
else:
1849+
self.skipfunc = lambda x: x in self.skiprows
1850+
18431851
self.skipfooter = kwds['skipfooter']
18441852
self.delimiter = kwds['delimiter']
18451853

@@ -1995,7 +2003,7 @@ class MyDialect(csv.Dialect):
19952003
# attempt to sniff the delimiter
19962004
if sniff_sep:
19972005
line = f.readline()
1998-
while self.pos in self.skiprows:
2006+
while self.skipfunc(self.pos):
19992007
self.pos += 1
20002008
line = f.readline()
20012009

@@ -2402,7 +2410,7 @@ def _empty(self, line):
24022410

24032411
def _next_line(self):
24042412
if isinstance(self.data, list):
2405-
while self.pos in self.skiprows:
2413+
while self.skipfunc(self.pos):
24062414
self.pos += 1
24072415

24082416
while True:
@@ -2421,7 +2429,7 @@ def _next_line(self):
24212429
except IndexError:
24222430
raise StopIteration
24232431
else:
2424-
while self.pos in self.skiprows:
2432+
while self.skipfunc(self.pos):
24252433
self.pos += 1
24262434
next(self.data)
24272435

@@ -2673,7 +2681,7 @@ def _get_lines(self, rows=None):
26732681
# Check for stop rows. n.b.: self.skiprows is a set.
26742682
if self.skiprows:
26752683
new_rows = [row for i, row in enumerate(new_rows)
2676-
if i + self.pos not in self.skiprows]
2684+
if not self.skipfunc(i + self.pos)]
26772685

26782686
lines.extend(new_rows)
26792687
self.pos = new_pos
@@ -2701,7 +2709,7 @@ def _get_lines(self, rows=None):
27012709
except StopIteration:
27022710
if self.skiprows:
27032711
new_rows = [row for i, row in enumerate(new_rows)
2704-
if self.pos + i not in self.skiprows]
2712+
if not self.skipfunc(i + self.pos)]
27052713
lines.extend(new_rows)
27062714
if len(lines) == 0:
27072715
raise

0 commit comments

Comments
 (0)