Skip to content

Commit 82e2bea

Browse files
committed
Implement fast text conversion interface for QPrint
1 parent f7f3519 commit 82e2bea

File tree

2 files changed

+153
-21
lines changed

2 files changed

+153
-21
lines changed

ext/mbstring/libmbfl/filters/mbfilter_qprint.c

Lines changed: 119 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@
3131
#include "mbfilter_qprint.h"
3232
#include "unicode_prop.h"
3333

34+
static size_t mb_qprint_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
35+
static void mb_wchar_to_qprint(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
36+
3437
static const char *mbfl_encoding_qprint_aliases[] = {"qprint", NULL};
3538

3639
const mbfl_encoding mbfl_encoding_qprint = {
@@ -42,8 +45,8 @@ const mbfl_encoding mbfl_encoding_qprint = {
4245
MBFL_ENCTYPE_GL_UNSAFE,
4346
NULL,
4447
NULL,
45-
NULL,
46-
NULL
48+
mb_qprint_to_wchar,
49+
mb_wchar_to_qprint
4750
};
4851

4952
const struct mbfl_convert_vtbl vtbl_8bit_qprint = {
@@ -153,32 +156,32 @@ int mbfl_filt_conv_qprintenc_flush(mbfl_convert_filter *filter)
153156
return 0;
154157
}
155158

159+
static int hex2code_map[] = {
160+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
161+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
162+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
163+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
164+
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
165+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
166+
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
167+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
168+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
169+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
170+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
171+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
172+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
173+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
174+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
175+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
176+
};
177+
156178
/*
157179
* Quoted-Printable => any
158180
*/
159181
int mbfl_filt_conv_qprintdec(int c, mbfl_convert_filter *filter)
160182
{
161183
int n, m;
162184

163-
static int hex2code_map[] = {
164-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
165-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
166-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
167-
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
168-
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
169-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
170-
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
171-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
172-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
173-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
174-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
175-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
176-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
177-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
178-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
179-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
180-
};
181-
182185
switch (filter->status) {
183186
case 1:
184187
if (hex2code_map[c & 0xff] >= 0) {
@@ -242,3 +245,98 @@ int mbfl_filt_conv_qprintdec_flush(mbfl_convert_filter *filter)
242245

243246
return 0;
244247
}
248+
249+
static size_t mb_qprint_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
250+
{
251+
unsigned char *p = *in, *e = p + *in_len;
252+
uint32_t *out = buf, *limit = buf + bufsize - 2;
253+
254+
while (p < e && out < limit) {
255+
unsigned char c = *p++;
256+
257+
if (c == '=' && p < e) {
258+
unsigned char c2 = *p++;
259+
260+
if (hex2code_map[c2] >= 0 && p < e) {
261+
unsigned char c3 = *p++;
262+
263+
if (hex2code_map[c3] >= 0) {
264+
*out++ = hex2code_map[c2] << 4 | hex2code_map[c3];
265+
} else {
266+
*out++ = '=';
267+
*out++ = c2;
268+
*out++ = c3;
269+
}
270+
} else if (c2 == '\r' && p < e) {
271+
unsigned char c3 = *p++;
272+
273+
if (c3 != '\n') {
274+
*out++ = c3;
275+
}
276+
} else if (c2 != '\n') {
277+
*out++ = '=';
278+
*out++ = c2;
279+
}
280+
} else {
281+
*out++ = c;
282+
}
283+
}
284+
285+
*in_len = e - p;
286+
*in = p;
287+
return out - buf;
288+
}
289+
290+
static unsigned char qprint_enc_nibble(unsigned char nibble)
291+
{
292+
if (nibble < 10) {
293+
return nibble + '0';
294+
} else {
295+
return nibble - 10 + 'A';
296+
}
297+
}
298+
299+
static void mb_wchar_to_qprint(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
300+
{
301+
unsigned char *out, *limit;
302+
MB_CONVERT_BUF_LOAD(buf, out, limit);
303+
MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
304+
305+
unsigned int chars_output = buf->state;
306+
307+
while (len--) {
308+
/* We assume that all the input 'codepoints' are not really Unicode codepoints at all,
309+
* but raw bytes from 0x00-0xFF */
310+
uint32_t w = *in++;
311+
312+
/* QPrint actually mandates that line length should not be more than 76 characters,
313+
* but mbstring stops slightly short of that */
314+
if (chars_output >= 72) {
315+
MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 3);
316+
out = mb_convert_buf_add3(out, '=', '\r', '\n');
317+
chars_output = 0;
318+
}
319+
320+
if (!w) {
321+
out = mb_convert_buf_add(out, '\0');
322+
chars_output = 0;
323+
} else if (w == '\n') {
324+
out = mb_convert_buf_add2(out, '\r', '\n');
325+
chars_output = 0;
326+
} else if (w == '\r') {
327+
/* No output */
328+
} else if (w >= 0x80 || w == '=') {
329+
/* Not ASCII */
330+
MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 3);
331+
out = mb_convert_buf_add3(out, '=', qprint_enc_nibble((w >> 4) & 0xF), qprint_enc_nibble(w & 0xF));
332+
chars_output += 3;
333+
} else {
334+
/* Plain ASCII */
335+
out = mb_convert_buf_add(out, w);
336+
chars_output++;
337+
}
338+
}
339+
340+
buf->state = chars_output;
341+
MB_CONVERT_BUF_STORE(buf, out, limit);
342+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
--TEST--
2+
Temporary test of mbstring's QPrint 'encoding'
3+
--EXTENSIONS--
4+
mbstring
5+
--FILE--
6+
<?php
7+
8+
/* Using mbstring to convert strings to and from QPrint has already been deprecated
9+
* So this test should be removed when the QPrint 'encoding' is */
10+
11+
function testConversion($raw, $qprint) {
12+
$converted = mb_convert_encoding($raw, 'QPrint', '8bit');
13+
if ($converted !== $qprint)
14+
die('Expected ' . bin2hex($raw) . ' to convert to "' . $qprint . '"; actually got "' . $converted . '"');
15+
$converted = mb_convert_encoding($qprint, '8bit', 'QPrint');
16+
if ($converted !== str_replace("\n", "\r\n", $raw))
17+
die('Expected "' . $qprint . '" to convert to ' . bin2hex($raw) . '; actually got ' . bin2hex($converted));
18+
}
19+
20+
testConversion('', '');
21+
testConversion('', '=E3=81=82');
22+
23+
testConversion("J'interdis aux marchands de vanter trop leurs marchandises. Car ils se font vite pédagogues et t'enseignent comme but ce qui n'est par essence qu'un moyen, et te trompant ainsi sur la route à suivre les voilà bientôt qui te dégradent, car si leur musique est vulgaire ils te fabriquent pour te la vendre une âme vulgaire.
24+
\xE2\x80\x89Antoine de Saint-Exupéry, Citadelle", "J'interdis aux marchands de vanter trop leurs marchandises. Car ils se f=\r\nont vite p=C3=A9dagogues et t'enseignent comme but ce qui n'est par esse=\r\nnce qu'un moyen, et te trompant ainsi sur la route =C3=A0 suivre les voi=\r\nl=C3=A0 bient=C3=B4t qui te d=C3=A9gradent, car si leur musique est vulg=\r\naire ils te fabriquent pour te la vendre une =C3=A2me vulgaire.\r\n=E2=80=94=E2=80=89Antoine de Saint-Exup=C3=A9ry, Citadelle");
25+
26+
echo "Done!\n";
27+
?>
28+
--EXPECTF--
29+
Deprecated: mb_convert_encoding(): Handling QPrint via mbstring is deprecated; use quoted_printable_encode/quoted_printable_decode instead in %s
30+
31+
Deprecated: mb_convert_encoding(): Handling QPrint via mbstring is deprecated; use quoted_printable_encode/quoted_printable_decode instead in %s
32+
33+
Deprecated: mb_convert_encoding(): Handling QPrint via mbstring is deprecated; use quoted_printable_encode/quoted_printable_decode instead in %s
34+
Done!

0 commit comments

Comments
 (0)