From 4b22dbce65c4dcbed6cf2e9dec4f61e399bf3c21 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sat, 11 Mar 2023 10:56:16 -0800 Subject: [PATCH 1/3] Try simplifying delim macro --- pandas/_libs/src/parser/tokenizer.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c index fed9b26d479cb..0c664564b85be 100644 --- a/pandas/_libs/src/parser/tokenizer.c +++ b/pandas/_libs/src/parser/tokenizer.c @@ -664,9 +664,7 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes, ((!self->delim_whitespace && c == ' ' && self->skipinitialspace)) // applied when in a field -#define IS_DELIMITER(c) \ - ((!self->delim_whitespace && c == self->delimiter) || \ - (self->delim_whitespace && isblank(c))) +#define IS_DELIMITER(c) (c == self->delimiter) #define _TOKEN_CLEANUP() \ self->stream_len = slen; \ From 398a8097e386d63b289113b80fbc0f5b07e4d138 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 11 Apr 2023 15:34:12 -0700 Subject: [PATCH 2/3] no dereference impl --- pandas/_libs/src/parser/tokenizer.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c index 0c664564b85be..e60fc6bf75f91 100644 --- a/pandas/_libs/src/parser/tokenizer.c +++ b/pandas/_libs/src/parser/tokenizer.c @@ -664,7 +664,7 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes, ((!self->delim_whitespace && c == ' ' && self->skipinitialspace)) // applied when in a field -#define IS_DELIMITER(c) (c == self->delimiter) +#define IS_DELIMITER(c) ((c == delimiter) || (delim_whitespace && isblank(c))) #define _TOKEN_CLEANUP() \ self->stream_len = slen; \ @@ -719,6 +719,9 @@ int tokenize_bytes(parser_t *self, const char lineterminator = (self->lineterminator == '\0') ? '\n' : self->lineterminator; + const int delim_whitespace = self->delim_whitespace; + const char delimiter = self->delimiter; + // 1000 is something that couldn't fit in "char" // thus comparing a char to it would always be "false" const int carriage_symbol = (self->lineterminator == '\0') ? '\r' : 1000; From 1f677f537333934061d15379fe249cf16868f574 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 12 Apr 2023 15:25:59 -0700 Subject: [PATCH 3/3] whatsnew --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index fca355069ae74..5dfccb0c12576 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -87,7 +87,7 @@ Other enhancements - :meth:`DataFrame.applymap` now uses the :meth:`~api.extensions.ExtensionArray.map` method of underlying :class:`api.extensions.ExtensionArray` instances (:issue:`52219`) - :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`). - Add dtype of categories to ``repr`` information of :class:`CategoricalDtype` (:issue:`52179`) -- +- Performance improvement in :func:`read_csv` (:issue:`52632`) .. --------------------------------------------------------------------------- .. _whatsnew_210.notable_bug_fixes: