From 8ba3dd0e879b8c47b57a7530114416d247f0d7c7 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 1 Mar 2016 16:31:17 +0000 Subject: [PATCH] BUG: Fixed grow_buffer to grow when capacity is reached Closes gh-12494. --- doc/source/whatsnew/v0.18.0.txt | 1 + pandas/io/tests/test_parsers.py | 20 ++++++++++++++++++++ pandas/src/parser/tokenizer.c | 2 +- 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index 8d36d323f48f9..537dfde4a024f 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -1199,3 +1199,4 @@ Bug Fixes - Bug in ``DataFrame.apply`` in which reduction was not being prevented for cases in which ``dtype`` was not a numpy dtype (:issue:`12244`) - Bug when initializing categorical series with a scalar value. (:issue:`12336`) - Bug when specifying a UTC ``DatetimeIndex`` by setting ``utc=True`` in ``.to_datetime`` (:issue:`11934`) +- Bug when increasing the buffer size of CSV reader in ``read_csv`` (:issue:`12494`) diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py index d3020e337322b..f32dfd37e837c 100755 --- a/pandas/io/tests/test_parsers.py +++ b/pandas/io/tests/test_parsers.py @@ -2635,6 +2635,26 @@ def test_eof_states(self): self.assertRaises(Exception, self.read_csv, StringIO(data), escapechar='\\') + def test_grow_boundary_at_cap(self): + # See gh-12494 + # + # Cause of error was the fact that pandas + # was not increasing the buffer size when + # the desired space would fill the buffer + # to capacity, which later would cause a + # buffer overflow error when checking the + # EOF terminator of the CSV stream + def test_empty_header_read(count): + s = StringIO(',' * count) + expected = DataFrame(columns=[ + 'Unnamed: {i}'.format(i=i) + for i in range(count + 1)]) + df = read_csv(s) + tm.assert_frame_equal(df, expected) + + for count in range(1, 101): + test_empty_header_read(count) + class TestPythonParser(ParserTests, tm.TestCase): diff --git a/pandas/src/parser/tokenizer.c b/pandas/src/parser/tokenizer.c index a19930a5cef30..dae15215929b7 100644 --- a/pandas/src/parser/tokenizer.c +++ b/pandas/src/parser/tokenizer.c @@ -111,7 +111,7 @@ static void *grow_buffer(void *buffer, int length, int *capacity, void *newbuffer = buffer; // Can we fit potentially nbytes tokens (+ null terminators) in the stream? - while ( (length + space > cap) && (newbuffer != NULL) ){ + while ( (length + space >= cap) && (newbuffer != NULL) ){ cap = cap? cap << 1 : 2; buffer = newbuffer; newbuffer = safe_realloc(newbuffer, elsize * cap);