From d475fb8694ab5ccf478ee7d269a0924ffbe97f07 Mon Sep 17 00:00:00 2001 From: Calvin Buckley Date: Wed, 8 Mar 2023 15:33:32 -0400 Subject: [PATCH 1/3] pdo_odbc: Don't fetch 256 byte blocks for long columns Fetching 256 byte blocks can confuse some drivers with conversion routines. That, and it seems to me the round trips to and from a database could be a major performance impact. Instead, we try to fetch all at once, and continue fetching if a driver somehow has more for us. This has been tested with a problematic case with the Db2i driver with stateful MBCS encodings. See GH-10733 for discussion about this and issues it can resolve. --- ext/pdo_odbc/odbc_stmt.c | 46 +++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/ext/pdo_odbc/odbc_stmt.c b/ext/pdo_odbc/odbc_stmt.c index ea0dd0e53aac6..41c8a576fc99f 100644 --- a/ext/pdo_odbc/odbc_stmt.c +++ b/ext/pdo_odbc/odbc_stmt.c @@ -671,23 +671,29 @@ static int odbc_stmt_get_col(pdo_stmt_t *stmt, int colno, zval *result, enum pdo } if (rc == SQL_SUCCESS_WITH_INFO || rc == SQL_SUCCESS) { - /* this is a 'long column' - - read the column in 255 byte blocks until the end of the column is reached, reassembling those blocks - in order into the output buffer; 255 bytes are an optimistic assumption, since the driver may assert - more or less NUL bytes at the end; we cater to that later, if actual length information is available - - this loop has to work whether or not SQLGetData() provides the total column length. - calling SQLDescribeCol() or other, specifically to get the column length, then doing a single read - for that size would be slower except maybe for extremely long columns.*/ - char *buf2 = emalloc(256); - zend_string *str = zend_string_init(C->data, 256, 0); - size_t used = 255; /* not 256; the driver NUL terminated the buffer */ + /* + * This is a long column. + * + * Try to get as much as we can at once. If the + * driver somehow has more for us, get more. We'll + * assemble it into one big buffer at the end. + * + * N.B. with n and n+1 mentioned in the comments: + * n is the size returned without null terminator. + * + * The extension previously tried getting it in 256 + * byte blocks, but this could have created trouble + * with some drivers. + */ + SQLLEN to_fetch_len = orig_fetched_len + 1; /* for 0 at end */ + char *buf2 = emalloc(to_fetch_len); + zend_string *str = zend_string_init(C->data, to_fetch_len, 0); + size_t used = orig_fetched_len; /* not n + 1; the driver NUL terminated the buffer */ do { C->fetched_len = 0; - /* read block. 256 bytes => 255 bytes are actually read, the last 1 is NULL */ - rc = SQLGetData(S->stmt, colno+1, C->is_unicode ? SQL_C_BINARY : SQL_C_CHAR, buf2, 256, &C->fetched_len); + /* read block. n + 1 bytes => n bytes are actually read, the last 1 is NULL */ + rc = SQLGetData(S->stmt, colno+1, C->is_unicode ? SQL_C_BINARY : SQL_C_CHAR, buf2, to_fetch_len, &C->fetched_len); /* adjust `used` in case we have length info from the driver */ if (orig_fetched_len >= 0 && C->fetched_len >= 0) { @@ -697,13 +703,13 @@ static int odbc_stmt_get_col(pdo_stmt_t *stmt, int colno, zval *result, enum pdo } /* resize output buffer and reassemble block */ - if (rc==SQL_SUCCESS_WITH_INFO || (rc==SQL_SUCCESS && C->fetched_len > 255)) { + if (rc==SQL_SUCCESS_WITH_INFO || (rc==SQL_SUCCESS && C->fetched_len > orig_fetched_len)) { /* point 5, in section "Retrieving Data with SQLGetData" in http://msdn.microsoft.com/en-us/library/windows/desktop/ms715441(v=vs.85).aspx - states that if SQL_SUCCESS_WITH_INFO, fetched_len will be > 255 (greater than buf2's size) - (if a driver fails to follow that and wrote less than 255 bytes to buf2, this will AV or read garbage into buf) */ - str = zend_string_realloc(str, used + 256, 0); - memcpy(ZSTR_VAL(str) + used, buf2, 256); - used = used + 255; + states that if SQL_SUCCESS_WITH_INFO, fetched_len will be > n (greater than buf2's size) + (if a driver fails to follow that and wrote less than n bytes to buf2, this will AV or read garbage into buf) */ + str = zend_string_realloc(str, used + to_fetch_len, 0); + memcpy(ZSTR_VAL(str) + used, buf2, to_fetch_len); + used = used + orig_fetched_len; } else if (rc==SQL_SUCCESS) { str = zend_string_realloc(str, used + C->fetched_len, 0); memcpy(ZSTR_VAL(str) + used, buf2, C->fetched_len); From c4f8b0baf3df30f4aebca7b6bc39d377cd1202b1 Mon Sep 17 00:00:00 2001 From: SakiTakamachi Date: Sat, 21 Oct 2023 00:32:00 +0900 Subject: [PATCH 2/3] change to separate by 256 bytes, when C->fetched_len == SQL_NO_TOTAL change to separate by 256 bytes, when C->fetched_len == SQL_NO_TOTAL changed from 256 byte to 2048 byte buf block. --- ext/pdo_odbc/odbc_stmt.c | 41 ++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/ext/pdo_odbc/odbc_stmt.c b/ext/pdo_odbc/odbc_stmt.c index 41c8a576fc99f..18bf7ec2bd0df 100644 --- a/ext/pdo_odbc/odbc_stmt.c +++ b/ext/pdo_odbc/odbc_stmt.c @@ -615,7 +615,7 @@ static int odbc_stmt_describe(pdo_stmt_t *stmt, int colno) /* tell ODBC to put it straight into our buffer, but only if it * isn't "long" data, and only if we haven't already bound a long * column. */ - if (colsize < 256 && !S->going_long) { + if (colsize < 2048 && !S->going_long) { S->cols[colno].data = emalloc(colsize+1); S->cols[colno].is_long = 0; @@ -631,7 +631,7 @@ static int odbc_stmt_describe(pdo_stmt_t *stmt, int colno) } else { /* allocate a smaller buffer to keep around for smaller * "long" columns */ - S->cols[colno].data = emalloc(256); + S->cols[colno].data = emalloc(2048); S->going_long = 1; S->cols[colno].is_long = 1; } @@ -657,14 +657,14 @@ static int odbc_stmt_get_col(pdo_stmt_t *stmt, int colno, zval *result, enum pdo RETCODE rc; /* fetch it into C->data, which is allocated with a length - * of 256 bytes; if there is more to be had, we then allocate + * of 2048 bytes; if there is more to be had, we then allocate * bigger buffer for the caller to free */ rc = SQLGetData(S->stmt, colno+1, C->is_unicode ? SQL_C_BINARY : SQL_C_CHAR, C->data, - 256, &C->fetched_len); + 2048, &C->fetched_len); orig_fetched_len = C->fetched_len; - if (rc == SQL_SUCCESS && C->fetched_len < 256) { + if (rc == SQL_SUCCESS && C->fetched_len < 2048) { /* all the data fit into our little buffer; * jump down to the generic bound data case */ goto in_data; @@ -684,16 +684,29 @@ static int odbc_stmt_get_col(pdo_stmt_t *stmt, int colno, zval *result, enum pdo * The extension previously tried getting it in 256 * byte blocks, but this could have created trouble * with some drivers. + * + * However, depending on the driver, fetched_len may + * not contain the number of bytes and SQL_NO_TOTAL + * may be passed. + * The behavior in this case is the same as before, + * dividing the data into blocks. However, it has been + * changed from 256 byte to 2048 byte block. */ - SQLLEN to_fetch_len = orig_fetched_len + 1; /* for 0 at end */ - char *buf2 = emalloc(to_fetch_len); - zend_string *str = zend_string_init(C->data, to_fetch_len, 0); - size_t used = orig_fetched_len; /* not n + 1; the driver NUL terminated the buffer */ + ssize_t to_fetch_len; + if (orig_fetched_len == SQL_NO_TOTAL) { + to_fetch_len = C->datalen > 2047 ? 2047 : C->datalen; + } else { + to_fetch_len = orig_fetched_len; + } + ssize_t to_fetch_byte = to_fetch_len + 1; + char *buf2 = emalloc(to_fetch_byte); + zend_string *str = zend_string_init(C->data, to_fetch_byte, 0); + size_t used = to_fetch_len; do { C->fetched_len = 0; /* read block. n + 1 bytes => n bytes are actually read, the last 1 is NULL */ - rc = SQLGetData(S->stmt, colno+1, C->is_unicode ? SQL_C_BINARY : SQL_C_CHAR, buf2, to_fetch_len, &C->fetched_len); + rc = SQLGetData(S->stmt, colno+1, C->is_unicode ? SQL_C_BINARY : SQL_C_CHAR, buf2, to_fetch_byte, &C->fetched_len); /* adjust `used` in case we have length info from the driver */ if (orig_fetched_len >= 0 && C->fetched_len >= 0) { @@ -703,13 +716,13 @@ static int odbc_stmt_get_col(pdo_stmt_t *stmt, int colno, zval *result, enum pdo } /* resize output buffer and reassemble block */ - if (rc==SQL_SUCCESS_WITH_INFO || (rc==SQL_SUCCESS && C->fetched_len > orig_fetched_len)) { + if (rc==SQL_SUCCESS_WITH_INFO || (rc==SQL_SUCCESS && C->fetched_len > to_fetch_len)) { /* point 5, in section "Retrieving Data with SQLGetData" in http://msdn.microsoft.com/en-us/library/windows/desktop/ms715441(v=vs.85).aspx states that if SQL_SUCCESS_WITH_INFO, fetched_len will be > n (greater than buf2's size) (if a driver fails to follow that and wrote less than n bytes to buf2, this will AV or read garbage into buf) */ - str = zend_string_realloc(str, used + to_fetch_len, 0); - memcpy(ZSTR_VAL(str) + used, buf2, to_fetch_len); - used = used + orig_fetched_len; + str = zend_string_realloc(str, used + to_fetch_byte, 0); + memcpy(ZSTR_VAL(str) + used, buf2, to_fetch_byte); + used = used + to_fetch_len; } else if (rc==SQL_SUCCESS) { str = zend_string_realloc(str, used + C->fetched_len, 0); memcpy(ZSTR_VAL(str) + used, buf2, C->fetched_len); From a97752e0fbbe8c2694c6344e3fb58247d247059b Mon Sep 17 00:00:00 2001 From: Calvin Buckley Date: Wed, 25 Oct 2023 12:55:35 -0300 Subject: [PATCH 3/3] Make long column buffer size single define Could be configurable maybe, but best to avoid magic numbers even for a compile-time constant. --- ext/pdo_odbc/odbc_stmt.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/ext/pdo_odbc/odbc_stmt.c b/ext/pdo_odbc/odbc_stmt.c index 18bf7ec2bd0df..5520597fd876d 100644 --- a/ext/pdo_odbc/odbc_stmt.c +++ b/ext/pdo_odbc/odbc_stmt.c @@ -26,6 +26,9 @@ #include "php_pdo_odbc.h" #include "php_pdo_odbc_int.h" +/* Buffer size; bigger columns than this become a "long column" */ +#define LONG_COLUMN_BUFFER_SIZE 2048 + enum pdo_odbc_conv_result { PDO_ODBC_CONV_NOT_REQUIRED, PDO_ODBC_CONV_OK, @@ -615,7 +618,7 @@ static int odbc_stmt_describe(pdo_stmt_t *stmt, int colno) /* tell ODBC to put it straight into our buffer, but only if it * isn't "long" data, and only if we haven't already bound a long * column. */ - if (colsize < 2048 && !S->going_long) { + if (colsize < LONG_COLUMN_BUFFER_SIZE && !S->going_long) { S->cols[colno].data = emalloc(colsize+1); S->cols[colno].is_long = 0; @@ -631,7 +634,7 @@ static int odbc_stmt_describe(pdo_stmt_t *stmt, int colno) } else { /* allocate a smaller buffer to keep around for smaller * "long" columns */ - S->cols[colno].data = emalloc(2048); + S->cols[colno].data = emalloc(LONG_COLUMN_BUFFER_SIZE); S->going_long = 1; S->cols[colno].is_long = 1; } @@ -661,10 +664,10 @@ static int odbc_stmt_get_col(pdo_stmt_t *stmt, int colno, zval *result, enum pdo * bigger buffer for the caller to free */ rc = SQLGetData(S->stmt, colno+1, C->is_unicode ? SQL_C_BINARY : SQL_C_CHAR, C->data, - 2048, &C->fetched_len); + LONG_COLUMN_BUFFER_SIZE, &C->fetched_len); orig_fetched_len = C->fetched_len; - if (rc == SQL_SUCCESS && C->fetched_len < 2048) { + if (rc == SQL_SUCCESS && C->fetched_len < LONG_COLUMN_BUFFER_SIZE) { /* all the data fit into our little buffer; * jump down to the generic bound data case */ goto in_data; @@ -694,7 +697,7 @@ static int odbc_stmt_get_col(pdo_stmt_t *stmt, int colno, zval *result, enum pdo */ ssize_t to_fetch_len; if (orig_fetched_len == SQL_NO_TOTAL) { - to_fetch_len = C->datalen > 2047 ? 2047 : C->datalen; + to_fetch_len = C->datalen > (LONG_COLUMN_BUFFER_SIZE - 1) ? (LONG_COLUMN_BUFFER_SIZE - 1) : C->datalen; } else { to_fetch_len = orig_fetched_len; }