Skip to content

Commit 364cfbf

Browse files
committed
Implement fast text conversion interface for Base64
1 parent 838f2f3 commit 364cfbf

File tree

2 files changed

+171
-2
lines changed

2 files changed

+171
-2
lines changed

ext/mbstring/libmbfl/filters/mbfilter_base64.c

Lines changed: 120 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@
3131
#include "mbfilter.h"
3232
#include "mbfilter_base64.h"
3333

34+
static size_t mb_base64_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
35+
static void mb_wchar_to_base64(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
36+
3437
const mbfl_encoding mbfl_encoding_base64 = {
3538
mbfl_no_encoding_base64,
3639
"BASE64",
@@ -40,8 +43,8 @@ const mbfl_encoding mbfl_encoding_base64 = {
4043
MBFL_ENCTYPE_GL_UNSAFE,
4144
NULL,
4245
NULL,
43-
NULL,
44-
NULL
46+
mb_base64_to_wchar,
47+
mb_wchar_to_base64
4548
};
4649

4750
const struct mbfl_convert_vtbl vtbl_8bit_b64 = {
@@ -212,3 +215,118 @@ int mbfl_filt_conv_base64dec_flush(mbfl_convert_filter *filter)
212215
}
213216
return 0;
214217
}
218+
219+
static int decode_base64(char c)
220+
{
221+
if (c >= 'A' && c <= 'Z') {
222+
return c - 'A';
223+
} else if (c >= 'a' && c <= 'z') { /* a - z */
224+
return c - 'a' + 26;
225+
} else if (c >= '0' && c <= '9') { /* 0 - 9 */
226+
return c - '0' + 52;
227+
} else if (c == '+') {
228+
return 62;
229+
} else if (c == '/') {
230+
return 63;
231+
}
232+
return -1;
233+
}
234+
235+
static size_t mb_base64_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
236+
{
237+
unsigned char *p = *in, *e = p + *in_len;
238+
/* Reserve two slots at the end of the output buffer so that we always have
239+
* space to emit any trailing bytes when we hit the end of the input string */
240+
uint32_t *out = buf, *limit = buf + bufsize - 2;
241+
242+
unsigned int bits = *state & 0xFF, cache = *state >> 8;
243+
244+
while (p < e && (limit - out) >= 3) {
245+
unsigned char c = *p++;
246+
247+
if (c == '\r' || c == '\n' || c == ' ' || c == '\t' || c == '=') {
248+
continue;
249+
}
250+
251+
int value = decode_base64(c);
252+
253+
if (value == -1) {
254+
*out++ = MBFL_BAD_INPUT;
255+
} else {
256+
bits += 6;
257+
cache = (cache << 6) | (value & 0x3F);
258+
if (bits == 24) {
259+
*out++ = (cache >> 16) & 0xFF;
260+
*out++ = (cache >> 8) & 0xFF;
261+
*out++ = cache & 0xFF;
262+
bits = cache = 0;
263+
}
264+
}
265+
}
266+
267+
if (p == e) {
268+
if (bits) {
269+
if (bits == 18) {
270+
*out++ = (cache >> 10) & 0xFF;
271+
*out++ = (cache >> 2) & 0xFF;
272+
} else if (bits == 12) {
273+
*out++ = (cache >> 4) & 0xFF;
274+
}
275+
}
276+
} else {
277+
*state = (cache << 8) | (bits & 0xFF);
278+
}
279+
280+
*in_len = e - p;
281+
*in = p;
282+
return out - buf;
283+
}
284+
285+
static void mb_wchar_to_base64(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
286+
{
287+
unsigned char *out, *limit;
288+
MB_CONVERT_BUF_LOAD(buf, out, limit);
289+
/* Every 3 bytes of input converts to 4 bytes of output... but if the number of input
290+
* bytes is not a multiple of 3, we still pad the output out to a multiple of 4
291+
* That's `(len + 2) * 4 / 3`, to calculate the amount of space needed in the output buffer
292+
*
293+
* But also, we add a CR+LF line ending (2 bytes) for every 76 bytes of output
294+
* That means we must multiply the above number by 78/76
295+
* Use `zend_safe_address_guarded` to check that the multiplication doesn't overflow */
296+
MB_CONVERT_BUF_ENSURE(buf, out, limit, zend_safe_address_guarded(len, 26, 52) / 19);
297+
298+
unsigned int bits = (buf->state & 0x3) * 8;
299+
unsigned int chars_output = ((buf->state >> 2) & 0x3F) * 4;
300+
unsigned int cache = buf->state >> 8;
301+
302+
while (len--) {
303+
uint32_t w = *in++;
304+
cache = (cache << 8) | (w & 0xFF);
305+
bits += 8;
306+
if (bits == 24) {
307+
if (chars_output > 72) {
308+
out = mb_convert_buf_add2(out, '\r', '\n');
309+
chars_output = 0;
310+
}
311+
out = mb_convert_buf_add4(out,
312+
mbfl_base64_table[(cache >> 18) & 0x3F],
313+
mbfl_base64_table[(cache >> 12) & 0x3F],
314+
mbfl_base64_table[(cache >> 6) & 0x3F],
315+
mbfl_base64_table[cache & 0x3F]);
316+
chars_output += 4;
317+
bits = cache = 0;
318+
}
319+
}
320+
321+
if (end && bits) {
322+
if (bits == 8) {
323+
out = mb_convert_buf_add4(out, mbfl_base64_table[(cache >> 2) & 0x3F], mbfl_base64_table[(cache & 0x3) << 4], '=', '=');
324+
} else {
325+
out = mb_convert_buf_add4(out, mbfl_base64_table[(cache >> 10) & 0x3F], mbfl_base64_table[(cache >> 4) & 0x3F], mbfl_base64_table[(cache & 0xF) << 2], '=');
326+
}
327+
} else {
328+
buf->state = (cache << 8) | (((chars_output / 4) & 0x3F) << 2) | ((bits / 8) & 0x3);
329+
}
330+
331+
MB_CONVERT_BUF_STORE(buf, out, limit);
332+
}
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
--TEST--
2+
Temporary test of mbstring's Base64 'encoding'
3+
--EXTENSIONS--
4+
mbstring
5+
--FILE--
6+
<?php
7+
8+
/* Using mbstring to convert strings to and from Base64 has already been deprecated
9+
* So this test should be removed when the Base64 'encoding' is */
10+
11+
function testConversion($raw, $base64) {
12+
$converted = mb_convert_encoding($raw, 'Base64', '8bit');
13+
if ($converted !== $base64)
14+
die('Expected ' . bin2hex($raw) . ' to convert to "' . $base64 . '"; actually got "' . $converted . '"');
15+
$converted = mb_convert_encoding($base64, '8bit', 'Base64');
16+
if ($converted !== $raw)
17+
die('Expected "' . $base64 . '" to convert to ' . bin2hex($raw) . '; actually got ' . bin2hex($converted));
18+
}
19+
20+
testConversion('', '');
21+
testConversion('a', 'YQ==');
22+
testConversion('ab', 'YWI=');
23+
testConversion("\x01\x02\x03", 'AQID');
24+
testConversion("\xFF\xFE\x11\x22", '//4RIg==');
25+
testConversion("\x00", 'AA==');
26+
testConversion("\x00\x00", 'AAA=');
27+
testConversion("\x00\x00\x00", 'AAAA');
28+
29+
testConversion(str_repeat("ABCDEFGHIJ", 20), "QUJDREVGR0hJSkFCQ0RFRkdISUpBQkNERUZHSElKQUJDREVGR0hJSkFCQ0RFRkdISUpBQkNERUZH\r\nSElKQUJDREVGR0hJSkFCQ0RFRkdISUpBQkNERUZHSElKQUJDREVGR0hJSkFCQ0RFRkdISUpBQkNE\r\nRUZHSElKQUJDREVGR0hJSkFCQ0RFRkdISUpBQkNERUZHSElKQUJDREVGR0hJSkFCQ0RFRkdISUpB\r\nQkNERUZHSElKQUJDREVGR0hJSkFCQ0RFRkdISUo=");
30+
31+
echo "Done!\n";
32+
?>
33+
--EXPECTF--
34+
Deprecated: mb_convert_encoding(): Handling Base64 via mbstring is deprecated; use base64_encode/base64_decode instead in %s
35+
36+
Deprecated: mb_convert_encoding(): Handling Base64 via mbstring is deprecated; use base64_encode/base64_decode instead in %s
37+
38+
Deprecated: mb_convert_encoding(): Handling Base64 via mbstring is deprecated; use base64_encode/base64_decode instead in %s
39+
40+
Deprecated: mb_convert_encoding(): Handling Base64 via mbstring is deprecated; use base64_encode/base64_decode instead in %s
41+
42+
Deprecated: mb_convert_encoding(): Handling Base64 via mbstring is deprecated; use base64_encode/base64_decode instead in %s
43+
44+
Deprecated: mb_convert_encoding(): Handling Base64 via mbstring is deprecated; use base64_encode/base64_decode instead in %s
45+
46+
Deprecated: mb_convert_encoding(): Handling Base64 via mbstring is deprecated; use base64_encode/base64_decode instead in %s
47+
48+
Deprecated: mb_convert_encoding(): Handling Base64 via mbstring is deprecated; use base64_encode/base64_decode instead in %s
49+
50+
Deprecated: mb_convert_encoding(): Handling Base64 via mbstring is deprecated; use base64_encode/base64_decode instead in %s
51+
Done!

0 commit comments

Comments
 (0)