@@ -456,7 +456,7 @@ static enum entity_charset determine_charset(char *charset_hint TSRMLS_DC)
456
456
/* }}} */
457
457
458
458
/* {{{ php_utf32_utf8 */
459
- size_t php_utf32_utf8 (unsigned char * buf , int k )
459
+ static size_t php_utf32_utf8 (unsigned char * buf , unsigned k )
460
460
{
461
461
size_t retval = 0 ;
462
462
@@ -487,6 +487,47 @@ size_t php_utf32_utf8(unsigned char *buf, int k)
487
487
}
488
488
/* }}} */
489
489
490
+ /* {{{ php_mb2_int_to_char
491
+ * Convert back big endian int representation of sequence of one or two 8-bit code units. */
492
+ static size_t php_mb2_int_to_char (unsigned char * buf , unsigned k )
493
+ {
494
+ assert (k <= 0xFFFFU );
495
+ /* one or two bytes */
496
+ if (k <= 0xFFU ) { /* 1 */
497
+ buf [0 ] = k ;
498
+ return 1U ;
499
+ } else { /* 2 */
500
+ buf [0 ] = k >> 8 ;
501
+ buf [1 ] = k & 0xFFU ;
502
+ return 2U ;
503
+ }
504
+ }
505
+ /* }}} */
506
+
507
+ /* {{{ php_mb3_int_to_char
508
+ * Convert back big endian int representation of sequence of one to three 8-bit code units.
509
+ * For EUC-JP. */
510
+ static size_t php_mb3_int_to_char (unsigned char * buf , unsigned k )
511
+ {
512
+ assert (k <= 0xFFFFFFU );
513
+ /* one to three bytes */
514
+ if (k <= 0xFFU ) { /* 1 */
515
+ buf [0 ] = k ;
516
+ return 1U ;
517
+ } else if (k <= 0xFFFFU ) { /* 2 */
518
+ buf [0 ] = k >> 8 ;
519
+ buf [1 ] = k & 0xFFU ;
520
+ return 2U ;
521
+ } else {
522
+ buf [0 ] = k >> 16 ;
523
+ buf [1 ] = (k >> 8 ) & 0xFFU ;
524
+ buf [2 ] = k & 0xFFU ;
525
+ return 3U ;
526
+ }
527
+ }
528
+ /* }}} */
529
+
530
+
490
531
/* {{{ unimap_bsearc_cmp
491
532
* Binary search of unicode code points in unicode <--> charset mapping.
492
533
* Returns the code point in the target charset (whose mapping table was given) or 0 if
@@ -817,21 +858,23 @@ static void traverse_for_entities(char *ret, int *retlen_p, int all, int quote_s
817
858
case cs_big5hkscs :
818
859
case cs_sjis :
819
860
case cs_gb2312 :
820
- /* one or two bytes */
821
- * (q ++ ) = (code & 0xFFU );
822
- if (0xFF00U & code ) { /* 2 */
823
- * (q ++ ) = (code >> 8 );
824
- }
861
+ /* we don't have named entity or unicode mappings for these yet,
862
+ * so we're guaranteed code <= 0xFF */
863
+ #if 0
864
+ q += php_mb2_int_to_char ((unsigned char * )q , code );
865
+ #else
866
+ assert (code <= 0xFFU );
867
+ * (q ++ ) = code ;
868
+ #endif
825
869
break ;
826
870
827
871
case cs_eucjp :
828
- /* one to three bytes */
829
- * (q ++ ) = code & 0xFFU ;
830
- if (0xFFFF00U & code ) { /* 2 */
831
- * (q ++ ) = ((code >> 8 ) & 0xFFU );
832
- if (0xFF0000U & code ) /* 3 */
833
- * (q ++ ) = (code >> 16 );
834
- }
872
+ #if 0 /* idem */
873
+ q += php_mb2_int_to_char ((unsigned char * )q , code );
874
+ #else
875
+ assert (code <= 0xFFU );
876
+ * (q ++ ) = code ;
877
+ #endif
835
878
break ;
836
879
837
880
default :
0 commit comments