Skip to content

Major overhaul of mbstring (part 31) #10591

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ PHP NEWS
MB_CASE_LOWER_SIMPLE and MB_CASE_TITLE_SIMPLE. (Alex Dowad)
. mb_detect_encoding is better able to identify UTF-8 and UTF-16 strings
with a byte-order mark. (Alex Dowad)
. mb_decode_mimeheader handles underscores in QPrint-encoded MIME encoded
words properly according to the standard (RFC 2047). (Alex Dowad)

- Opcache:
. Added start, restart and force restart time to opcache's
Expand Down
4 changes: 4 additions & 0 deletions UPGRADING
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ PHP 8.3 UPGRADE NOTES
casing rules for the Greek letter sigma. For mb_convert_case, conditional
casing only applies to MB_CASE_LOWER and MB_CASE_TITLE modes, not to
MB_CASE_LOWER_SIMPLE and MB_CASE_TITLE_SIMPLE. (Alex Dowad)
. mb_decode_mimeheader handles underscores in QPrint-encoded MIME encoded
words as dictated by RFC 2047; they are converted to spaces (byte 0x20).
To include a underscore in a QPrint-encoded MIME encoded word, it must
be encoded as "=5F". (Alex Dowad)

- Standard:
. E_NOTICEs emitted by unserialized() have been promoted to E_WARNING.
Expand Down
18 changes: 3 additions & 15 deletions ext/mbstring/libmbfl/filters/mbfilter_sjis.c
Original file line number Diff line number Diff line change
Expand Up @@ -2250,11 +2250,7 @@ static void mb_wchar_to_sjis_docomo(uint32_t *in, size_t len, mb_convert_buf *bu
/* Continue what we were doing on the previous call */
w = buf->state;
buf->state = 0;
if (len) {
goto reprocess_wchar;
} else {
goto emit_output;
}
goto reprocess_wchar;
}

while (len--) {
Expand Down Expand Up @@ -2482,11 +2478,7 @@ static void mb_wchar_to_sjis_kddi(uint32_t *in, size_t len, mb_convert_buf *buf,
if (buf->state) {
w = buf->state;
buf->state = 0;
if (len) {
goto reprocess_wchar;
} else {
goto emit_output;
}
goto reprocess_wchar;
}

while (len--) {
Expand Down Expand Up @@ -2793,11 +2785,7 @@ static void mb_wchar_to_sjis_sb(uint32_t *in, size_t len, mb_convert_buf *buf, b
if (buf->state) {
w = buf->state;
buf->state = 0;
if (len) {
goto reprocess_wchar;
} else {
goto emit_output;
}
goto reprocess_wchar;
}

while (len--) {
Expand Down
273 changes: 0 additions & 273 deletions ext/mbstring/libmbfl/mbfl/mbfilter.c
Original file line number Diff line number Diff line change
Expand Up @@ -832,276 +832,3 @@ mbfl_mime_header_encode(

return result;
}


/*
* MIME header decode
*/
struct mime_header_decoder_data {
mbfl_convert_filter *deco_filter;
mbfl_convert_filter *conv1_filter;
mbfl_convert_filter *conv2_filter;
mbfl_memory_device outdev;
mbfl_memory_device tmpdev;
size_t cspos;
int status;
const mbfl_encoding *encoding;
const mbfl_encoding *incode;
const mbfl_encoding *outcode;
};

static int
mime_header_decoder_collector(int c, void* data)
{
const mbfl_encoding *encoding;
struct mime_header_decoder_data *pd = (struct mime_header_decoder_data*)data;

switch (pd->status) {
case 1:
if (c == 0x3f) { /* ? */
mbfl_memory_device_output(c, &pd->tmpdev);
pd->cspos = pd->tmpdev.pos;
pd->status = 2;
} else {
mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
mbfl_memory_device_reset(&pd->tmpdev);
if (c == 0x3d) { /* = */
mbfl_memory_device_output(c, &pd->tmpdev);
} else if (c == 0x0d || c == 0x0a) { /* CR or LF */
pd->status = 9;
} else {
(*pd->conv1_filter->filter_function)(c, pd->conv1_filter);
pd->status = 0;
}
}
break;
case 2: /* store charset string */
if (c == 0x3f) { /* ? */
/* identify charset */
mbfl_memory_device_output('\0', &pd->tmpdev);
encoding = mbfl_name2encoding((const char *)&pd->tmpdev.buffer[pd->cspos]);
if (encoding != NULL) {
pd->incode = encoding;
pd->status = 3;
}
mbfl_memory_device_unput(&pd->tmpdev);
mbfl_memory_device_output(c, &pd->tmpdev);
} else {
mbfl_memory_device_output(c, &pd->tmpdev);
if (pd->tmpdev.pos > 100) { /* too long charset string */
pd->status = 0;
} else if (c == 0x0d || c == 0x0a) { /* CR or LF */
mbfl_memory_device_unput(&pd->tmpdev);
pd->status = 9;
}
if (pd->status != 2) {
mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
mbfl_memory_device_reset(&pd->tmpdev);
}
}
break;
case 3: /* identify encoding */
mbfl_memory_device_output(c, &pd->tmpdev);
if (c == 0x42 || c == 0x62) { /* 'B' or 'b' */
pd->encoding = &mbfl_encoding_base64;
pd->status = 4;
} else if (c == 0x51 || c == 0x71) { /* 'Q' or 'q' */
pd->encoding = &mbfl_encoding_qprint;
pd->status = 4;
} else {
if (c == 0x0d || c == 0x0a) { /* CR or LF */
mbfl_memory_device_unput(&pd->tmpdev);
pd->status = 9;
} else {
pd->status = 0;
}
mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
mbfl_memory_device_reset(&pd->tmpdev);
}
break;
case 4: /* reset filter */
mbfl_memory_device_output(c, &pd->tmpdev);
if (c == 0x3f) { /* ? */
/* charset convert filter */
mbfl_convert_filter_reset(pd->conv1_filter, pd->incode, &mbfl_encoding_wchar);
/* decode filter */
mbfl_convert_filter_reset(pd->deco_filter, pd->encoding, &mbfl_encoding_8bit);
pd->status = 5;
} else {
if (c == 0x0d || c == 0x0a) { /* CR or LF */
mbfl_memory_device_unput(&pd->tmpdev);
pd->status = 9;
} else {
pd->status = 0;
}
mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
}
mbfl_memory_device_reset(&pd->tmpdev);
break;
case 5: /* encoded block */
if (c == 0x3f) { /* ? */
pd->status = 6;
} else {
(*pd->deco_filter->filter_function)(c, pd->deco_filter);
}
break;
case 6: /* check end position */
if (c == 0x3d) { /* = */
/* flush and reset filter */
(*pd->deco_filter->filter_flush)(pd->deco_filter);
(*pd->conv1_filter->filter_flush)(pd->conv1_filter);
mbfl_convert_filter_reset(pd->conv1_filter, &mbfl_encoding_ascii, &mbfl_encoding_wchar);
pd->status = 7;
} else {
(*pd->deco_filter->filter_function)(0x3f, pd->deco_filter);
if (c != 0x3f) { /* ? */
(*pd->deco_filter->filter_function)(c, pd->deco_filter);
pd->status = 5;
}
}
break;
case 7: /* after encoded block */
if (c == 0x0d || c == 0x0a) { /* CR LF */
pd->status = 8;
} else {
mbfl_memory_device_output(c, &pd->tmpdev);
if (c == 0x3d) { /* = */
pd->status = 1;
} else if (c != 0x20 && c != 0x09) { /* not space */
mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
mbfl_memory_device_reset(&pd->tmpdev);
pd->status = 0;
}
}
break;
case 8: /* folding */
case 9: /* folding */
if (c != 0x0d && c != 0x0a && c != 0x20 && c != 0x09) {
if (c == 0x3d) { /* = */
if (pd->status == 8) {
mbfl_memory_device_output(0x20, &pd->tmpdev); /* SPACE */
} else {
(*pd->conv1_filter->filter_function)(0x20, pd->conv1_filter);
}
mbfl_memory_device_output(c, &pd->tmpdev);
pd->status = 1;
} else {
mbfl_memory_device_output(0x20, &pd->tmpdev);
mbfl_memory_device_output(c, &pd->tmpdev);
mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
mbfl_memory_device_reset(&pd->tmpdev);
pd->status = 0;
}
}
break;
default: /* non encoded block */
if (c == 0x0d || c == 0x0a) { /* CR LF */
pd->status = 9;
} else if (c == 0x3d) { /* = */
mbfl_memory_device_output(c, &pd->tmpdev);
pd->status = 1;
} else {
(*pd->conv1_filter->filter_function)(c, pd->conv1_filter);
}
break;
}

return 0;
}

mbfl_string *
mime_header_decoder_result(struct mime_header_decoder_data *pd, mbfl_string *result)
{
switch (pd->status) {
case 1:
case 2:
case 3:
case 4:
case 7:
case 8:
case 9:
mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
break;
case 5:
case 6:
(*pd->deco_filter->filter_flush)(pd->deco_filter);
(*pd->conv1_filter->filter_flush)(pd->conv1_filter);
break;
}
(*pd->conv2_filter->filter_flush)(pd->conv2_filter);
mbfl_memory_device_reset(&pd->tmpdev);
pd->status = 0;

return mbfl_memory_device_result(&pd->outdev, result);
}

struct mime_header_decoder_data*
mime_header_decoder_new(const mbfl_encoding *outcode)
{
struct mime_header_decoder_data *pd = emalloc(sizeof(struct mime_header_decoder_data));

mbfl_memory_device_init(&pd->outdev, 0, 0);
mbfl_memory_device_init(&pd->tmpdev, 0, 0);
pd->cspos = 0;
pd->status = 0;
pd->encoding = &mbfl_encoding_8bit;
pd->incode = &mbfl_encoding_ascii;
pd->outcode = outcode;
/* charset convert filter */
pd->conv2_filter = mbfl_convert_filter_new(&mbfl_encoding_wchar, pd->outcode, mbfl_memory_device_output, 0, &pd->outdev);
pd->conv1_filter = mbfl_convert_filter_new(pd->incode, &mbfl_encoding_wchar, mbfl_filter_output_pipe, 0, pd->conv2_filter);
/* decode filter */
pd->deco_filter = mbfl_convert_filter_new(pd->encoding, &mbfl_encoding_8bit, mbfl_filter_output_pipe, 0, pd->conv1_filter);

if (pd->conv1_filter == NULL || pd->conv2_filter == NULL || pd->deco_filter == NULL) {
mime_header_decoder_delete(pd);
return NULL;
}

return pd;
}

void
mime_header_decoder_delete(struct mime_header_decoder_data *pd)
{
if (pd) {
mbfl_convert_filter_delete(pd->conv2_filter);
mbfl_convert_filter_delete(pd->conv1_filter);
mbfl_convert_filter_delete(pd->deco_filter);
mbfl_memory_device_clear(&pd->outdev);
mbfl_memory_device_clear(&pd->tmpdev);
efree((void*)pd);
}
}

mbfl_string *
mbfl_mime_header_decode(
mbfl_string *string,
mbfl_string *result,
const mbfl_encoding *outcode)
{
size_t n;
unsigned char *p;
struct mime_header_decoder_data *pd;

mbfl_string_init(result);
result->encoding = outcode;

pd = mime_header_decoder_new(outcode);
if (pd == NULL) {
return NULL;
}

/* feed data */
n = string->len;
p = string->val;
while (n > 0) {
mime_header_decoder_collector(*p++, pd);
n--;
}

result = mime_header_decoder_result(pd, result);
mime_header_decoder_delete(pd);

return result;
}
20 changes: 0 additions & 20 deletions ext/mbstring/libmbfl/mbfl/mbfilter.h
Original file line number Diff line number Diff line change
Expand Up @@ -193,24 +193,4 @@ mbfl_mime_header_encode(
const char *linefeed,
int indent);

/*
* MIME header decode
*/
struct mime_header_decoder_data; /* forward declaration */

MBFLAPI extern struct mime_header_decoder_data *
mime_header_decoder_new(const mbfl_encoding *outcode);

MBFLAPI extern void
mime_header_decoder_delete(struct mime_header_decoder_data *pd);

MBFLAPI extern mbfl_string *
mime_header_decoder_result(struct mime_header_decoder_data *pd, mbfl_string *result);

MBFLAPI extern mbfl_string *
mbfl_mime_header_decode(
mbfl_string *string,
mbfl_string *result,
const mbfl_encoding *outcode);

#endif /* MBFL_MBFILTER_H */
11 changes: 0 additions & 11 deletions ext/mbstring/libmbfl/mbfl/mbfl_encoding.c
Original file line number Diff line number Diff line change
Expand Up @@ -212,12 +212,6 @@ const mbfl_encoding *mbfl_no2encoding(enum mbfl_no_encoding no_encoding)
return NULL;
}

enum mbfl_no_encoding mbfl_name2no_encoding(const char *name)
{
const mbfl_encoding *encoding = mbfl_name2encoding(name);
return encoding ? encoding->no_encoding : mbfl_no_encoding_invalid;
}

const char *mbfl_no_encoding2name(enum mbfl_no_encoding no_encoding)
{
const mbfl_encoding *encoding = mbfl_no2encoding(no_encoding);
Expand All @@ -229,11 +223,6 @@ const mbfl_encoding **mbfl_get_supported_encodings(void)
return mbfl_encoding_ptr_list;
}

const char *mbfl_no2preferred_mime_name(enum mbfl_no_encoding no_encoding)
{
return mbfl_encoding_preferred_mime_name(mbfl_no2encoding(no_encoding));
}

const char *mbfl_encoding_preferred_mime_name(const mbfl_encoding *encoding)
{
if (encoding->mime_name && encoding->mime_name[0] != '\0') {
Expand Down
2 changes: 0 additions & 2 deletions ext/mbstring/libmbfl/mbfl/mbfl_encoding.h
Original file line number Diff line number Diff line change
Expand Up @@ -248,10 +248,8 @@ static inline zend_string* mb_convert_buf_result(mb_convert_buf *buf, const mbfl

MBFLAPI extern const mbfl_encoding *mbfl_name2encoding(const char *name);
MBFLAPI extern const mbfl_encoding *mbfl_no2encoding(enum mbfl_no_encoding no_encoding);
MBFLAPI extern enum mbfl_no_encoding mbfl_name2no_encoding(const char *name);
MBFLAPI extern const mbfl_encoding **mbfl_get_supported_encodings(void);
MBFLAPI extern const char *mbfl_no_encoding2name(enum mbfl_no_encoding no_encoding);
MBFLAPI extern const char *mbfl_no2preferred_mime_name(enum mbfl_no_encoding no_encoding);
MBFLAPI extern const char *mbfl_encoding_preferred_mime_name(const mbfl_encoding *encoding);

#endif /* MBFL_ENCODING_H */
Loading