|
| 1 | +--TEST-- |
| 2 | +PDO_MYSQL: Test quoting of multibyte sequence with GBK vs utf8mb4 |
| 3 | +--EXTENSIONS-- |
| 4 | +pdo_mysql |
| 5 | +--SKIPIF-- |
| 6 | +<?php |
| 7 | +require_once __DIR__ . '/inc/mysql_pdo_test.inc'; |
| 8 | +MySQLPDOTest::skip(); |
| 9 | +?> |
| 10 | +--FILE-- |
| 11 | +<?php |
| 12 | + require_once __DIR__ . '/inc/mysql_pdo_test.inc'; |
| 13 | + |
| 14 | + $link = MySQLPDOTest::factory('PDO', ['charset' => 'GBK']); |
| 15 | + $quoted = $link->quote("\xbf\x27"); |
| 16 | + $quoted_without_outer_quotes = substr($quoted, 1, -1); |
| 17 | + |
| 18 | + /* This should result in 5C BF 5C 27 for GBK instead of BF 5C 27 like with UTF8MB4. |
| 19 | + * To explain why the extra escaping takes place, let's assume we don't do that and see what happens. |
| 20 | + * |
| 21 | + * 1. First iteration, i.e. *from == 0xBF. This isn't a valid GBK multibyte sequence start, |
| 22 | + * so the mb validity check fails. |
| 23 | + * Without the character length check, we'd check if we need to escape the current character 0xBF. |
| 24 | + * The character 0xBF isn't handled in the switch case so we don't escape it and append 0xBF to the output buffer. |
| 25 | + * 2. Second iteration, i.e. *from == 0x27. This isn't a valid start either, so we go to the escape logic. |
| 26 | + * Note that 0x27 is the character ', so we have to escape! We write two bytes to the output: |
| 27 | + * \ (this is 0x5C) and ' (this is 0x27). |
| 28 | + * 3. The function finished, let's look at the output: 0xBF 0x5C 0x27. |
| 29 | + * Now we actually made a problem: 0xBF 0x5C is a valid GBK multibyte sequence! |
| 30 | + * So we transformed an invalid multibyte sequences into a valid one, potentially corrupting data. |
| 31 | + * The solution is to check whether it could have been part of a multibyte sequence, but the checks are less strict. */ |
| 32 | + var_dump(bin2hex($quoted_without_outer_quotes)); |
| 33 | + |
| 34 | + unset($link); |
| 35 | + |
| 36 | + // Compare with utf8mb4 |
| 37 | + $link = MySQLPDOTest::factory('PDO', ['charset' => 'utf8mb4']); |
| 38 | + $quoted = $link->quote("\xbf\x27"); |
| 39 | + $quoted_without_outer_quotes = substr($quoted, 1, -1); |
| 40 | + var_dump(bin2hex($quoted_without_outer_quotes)); |
| 41 | +?> |
| 42 | +--EXPECT-- |
| 43 | +string(8) "5cbf5c27" |
| 44 | +string(6) "bf5c27" |
0 commit comments