@@ -229,6 +229,12 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_output_handler, 0, 0, 2)
229
229
ZEND_ARG_INFO (0 , status )
230
230
ZEND_END_ARG_INFO ()
231
231
232
+ ZEND_BEGIN_ARG_INFO_EX (arginfo_mb_str_split , 0 , 0 , 1 )
233
+ ZEND_ARG_INFO (0 , str )
234
+ ZEND_ARG_INFO (0 , split_length )
235
+ ZEND_ARG_INFO (0 , encoding )
236
+ ZEND_END_ARG_INFO ()
237
+
232
238
ZEND_BEGIN_ARG_INFO_EX (arginfo_mb_strlen , 0 , 0 , 1 )
233
239
ZEND_ARG_INFO (0 , str )
234
240
ZEND_ARG_INFO (0 , encoding )
@@ -526,6 +532,7 @@ static const zend_function_entry mbstring_functions[] = {
526
532
PHP_FE (mb_parse_str , arginfo_mb_parse_str )
527
533
PHP_FE (mb_output_handler , arginfo_mb_output_handler )
528
534
PHP_FE (mb_preferred_mime_name , arginfo_mb_preferred_mime_name )
535
+ PHP_FE (mb_str_split , arginfo_mb_str_split )
529
536
PHP_FE (mb_strlen , arginfo_mb_strlen )
530
537
PHP_FE (mb_strpos , arginfo_mb_strpos )
531
538
PHP_FE (mb_strrpos , arginfo_mb_strrpos )
@@ -2273,6 +2280,169 @@ PHP_FUNCTION(mb_output_handler)
2273
2280
}
2274
2281
/* }}} */
2275
2282
2283
+ /* {{{ proto array mb_str_split(string str [, int split_length] [, string encoding])
2284
+ Convert a multibyte string to an array. If split_length is specified,
2285
+ break the string down into chunks each split_length characters long. */
2286
+
2287
+ /* structure to pass split params to the callback */
2288
+ struct mbfl_split_params {
2289
+ zval * return_value ; /* php function return value structure pointer */
2290
+ mbfl_string * result_string ; /* string to store result chunk */
2291
+ size_t mb_chunk_length ; /* actual chunk length in chars */
2292
+ size_t split_length ; /* split length in chars */
2293
+ mbfl_convert_filter * next_filter ; /* widechar to encoding converter */
2294
+ };
2295
+
2296
+ /* callback function to fill split array */
2297
+ static int mbfl_split_output (int c , void * data )
2298
+ {
2299
+ struct mbfl_split_params * params = (struct mbfl_split_params * )data ; /* cast passed data */
2300
+
2301
+ (* params -> next_filter -> filter_function )(c , params -> next_filter ); /* decoder filter */
2302
+
2303
+ if (params -> split_length == ++ params -> mb_chunk_length ) { /* if current chunk size reached defined chunk size or last char reached */
2304
+ mbfl_convert_filter_flush (params -> next_filter );/* concatenate separate decoded chars to the solid string */
2305
+ mbfl_memory_device * device = (mbfl_memory_device * )params -> next_filter -> data ; /* chars container */
2306
+ mbfl_string * chunk = params -> result_string ;
2307
+ mbfl_memory_device_result (device , chunk ); /* make chunk */
2308
+ add_next_index_stringl (params -> return_value , (const char * )chunk -> val , chunk -> len ); /* add chunk to the array */
2309
+ efree (chunk -> val );
2310
+ params -> mb_chunk_length = 0 ; /* reset mb_chunk size */
2311
+ }
2312
+ return 0 ;
2313
+ }
2314
+
2315
+ PHP_FUNCTION (mb_str_split )
2316
+ {
2317
+ zend_string * str , * encoding = NULL ;
2318
+ size_t mb_len , chunks , chunk_len ;
2319
+ const char * p , * last ; /* pointer for the string cursor and last string char */
2320
+ mbfl_string string , result_string ;
2321
+ const mbfl_encoding * mbfl_encoding ;
2322
+ zend_long split_length = 1 ;
2323
+
2324
+ ZEND_PARSE_PARAMETERS_START (1 , 3 )
2325
+ Z_PARAM_STR (str )
2326
+ Z_PARAM_OPTIONAL
2327
+ Z_PARAM_LONG (split_length )
2328
+ Z_PARAM_STR (encoding )
2329
+ ZEND_PARSE_PARAMETERS_END ();
2330
+
2331
+ if (split_length <= 0 ) {
2332
+ php_error_docref (NULL , E_WARNING , "The length of each segment must be greater than zero" );
2333
+ RETURN_FALSE ;
2334
+ }
2335
+
2336
+ /* fill mbfl_string structure */
2337
+ string .val = (unsigned char * ) ZSTR_VAL (str );
2338
+ string .len = ZSTR_LEN (str );
2339
+ string .no_language = MBSTRG (language );
2340
+ string .encoding = php_mb_get_encoding (encoding );
2341
+ if (!string .encoding ) {
2342
+ RETURN_FALSE ;
2343
+ }
2344
+
2345
+ p = ZSTR_VAL (str ); /* string cursor pointer */
2346
+ last = ZSTR_VAL (str ) + ZSTR_LEN (str ); /* last string char pointer */
2347
+
2348
+ mbfl_encoding = string .encoding ;
2349
+
2350
+ /* first scenario: 1,2,4-bytes fixed width encodings (head part) */
2351
+ if (mbfl_encoding -> flag & MBFL_ENCTYPE_SBCS ) { /* 1 byte */
2352
+ mb_len = string .len ;
2353
+ chunk_len = (size_t )split_length ; /* chunk length in bytes */
2354
+ } else if (mbfl_encoding -> flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE )) { /* 2 bytes */
2355
+ mb_len = string .len / 2 ;
2356
+ chunk_len = split_length * 2 ;
2357
+ } else if (mbfl_encoding -> flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE )) { /* 4 bytes */
2358
+ mb_len = string .len / 4 ;
2359
+ chunk_len = split_length * 4 ;
2360
+ } else if (mbfl_encoding -> mblen_table != NULL ) {
2361
+ /* second scenario: variable width encodings with length table */
2362
+ char unsigned const * mbtab = mbfl_encoding -> mblen_table ;
2363
+
2364
+ /* assume that we have 1-bytes characters */
2365
+ array_init_size (return_value , (string .len + split_length ) / split_length ); /* round up */
2366
+
2367
+ while (p < last ) { /* split cycle work until the cursor has reached the last byte */
2368
+ char const * chunk_p = p ; /* chunk first byte pointer */
2369
+ chunk_len = 0 ; /* chunk length in bytes */
2370
+ for (zend_long char_count = 0 ; char_count < split_length && p < last ; ++ char_count ) {
2371
+ char unsigned const m = mbtab [* (const unsigned char * )p ]; /* single character length table */
2372
+ chunk_len += m ;
2373
+ p += m ;
2374
+ }
2375
+ if (p >= last ) chunk_len -= p - last ; /* check if chunk is in bounds */
2376
+ add_next_index_stringl (return_value , chunk_p , chunk_len );
2377
+ }
2378
+ return ;
2379
+ } else {
2380
+ /* third scenario: other multibyte encodings */
2381
+ mbfl_convert_filter * filter , * decoder ;
2382
+
2383
+ /* assume that we have 1-bytes characters */
2384
+ array_init_size (return_value , (string .len + split_length ) / split_length ); /* round up */
2385
+
2386
+ /* decoder filter to decode wchar to encoding */
2387
+ mbfl_memory_device device ;
2388
+ mbfl_memory_device_init (& device , split_length + 1 , 0 );
2389
+
2390
+ decoder = mbfl_convert_filter_new (
2391
+ & mbfl_encoding_wchar ,
2392
+ string .encoding ,
2393
+ mbfl_memory_device_output ,
2394
+ NULL ,
2395
+ & device );
2396
+ /* if something wrong with the decoded */
2397
+ if (decoder == NULL ) {
2398
+ RETURN_FALSE ;
2399
+ }
2400
+
2401
+ /* wchar filter */
2402
+ mbfl_string_init (& result_string ); /* mbfl_string to store chunk in the callback */
2403
+ struct mbfl_split_params params = { /* init callback function params structure */
2404
+ .return_value = return_value ,
2405
+ .result_string = & result_string ,
2406
+ .mb_chunk_length = 0 ,
2407
+ .split_length = (size_t )split_length ,
2408
+ .next_filter = decoder ,
2409
+ };
2410
+
2411
+ filter = mbfl_convert_filter_new (
2412
+ string .encoding ,
2413
+ & mbfl_encoding_wchar ,
2414
+ mbfl_split_output ,
2415
+ NULL ,
2416
+ & params );
2417
+ /* if something wrong with the filter */
2418
+ if (filter == NULL ){
2419
+ mbfl_convert_filter_delete (decoder ); /* this will free allocated memory for the decoded */
2420
+ RETURN_FALSE ;
2421
+ }
2422
+
2423
+ while (p < last - 1 ) { /* cycle each byte except last with callback function */
2424
+ (* filter -> filter_function )(* p ++ , filter );
2425
+ }
2426
+ params .mb_chunk_length = split_length - 1 ; /* force to finish current chunk */
2427
+ (* filter -> filter_function )(* p ++ , filter ); /*process last char */
2428
+
2429
+ mbfl_convert_filter_delete (decoder );
2430
+ mbfl_convert_filter_delete (filter );
2431
+ return ;
2432
+ }
2433
+
2434
+ /* first scenario: 1,2,4-bytes fixed width encodings (tail part) */
2435
+ chunks = (mb_len + split_length - 1 ) / split_length ; /* (round up idiom) */
2436
+ array_init_size (return_value , chunks );
2437
+ if (chunks != 0 ) {
2438
+ for (zend_long i = 0 ; i < chunks - 1 ; p += chunk_len , ++ i ) {
2439
+ add_next_index_stringl (return_value , p , chunk_len );
2440
+ }
2441
+ add_next_index_stringl (return_value , p , last - p );
2442
+ }
2443
+ }
2444
+ /* }}} */
2445
+
2276
2446
/* {{{ proto int mb_strlen(string str [, string encoding])
2277
2447
Get character numbers of a string */
2278
2448
PHP_FUNCTION (mb_strlen )
0 commit comments