@@ -1263,60 +1263,91 @@ PHP_FUNCTION(htmlentities)
1263
1263
}
1264
1264
/* }}} */
1265
1265
1266
- /* {{{ proto array get_html_translation_table([int table [, int quote_style]])
1266
+ /* {{{ proto array get_html_translation_table([int table [, int quote_style [, string charset_hint] ]])
1267
1267
Returns the internal translation table used by htmlspecialchars and htmlentities */
1268
1268
PHP_FUNCTION (get_html_translation_table )
1269
1269
{
1270
1270
long which = HTML_SPECIALCHARS , quote_style = ENT_COMPAT ;
1271
1271
unsigned int i ;
1272
1272
int j ;
1273
- char ind [2 ];
1274
- enum entity_charset charset = determine_charset (NULL TSRMLS_CC );
1273
+ unsigned char ind [5 ]; /* max # of 8-bit code units (4; for UTF-8) + 1 for \0 */
1274
+ void * dummy ;
1275
+ char * charset_hint = NULL ;
1276
+ int charset_hint_len ;
1277
+ enum entity_charset charset ;
1275
1278
1276
- if (zend_parse_parameters (ZEND_NUM_ARGS () TSRMLS_CC , "|ll" , & which , & quote_style ) == FAILURE ) {
1279
+ if (zend_parse_parameters (ZEND_NUM_ARGS () TSRMLS_CC , "|lls" ,
1280
+ & which , & quote_style , & charset_hint , & charset_hint_len ) == FAILURE ) {
1277
1281
return ;
1278
1282
}
1279
1283
1280
- array_init ( return_value );
1284
+ charset = determine_charset ( charset_hint TSRMLS_CC );
1281
1285
1282
- ind [ 1 ] = 0 ;
1286
+ array_init ( return_value ) ;
1283
1287
1284
1288
switch (which ) {
1285
- case HTML_ENTITIES :
1286
- for (j = 0 ; entity_map [j ].charset != cs_terminator ; j ++ ) {
1287
- if (entity_map [j ].charset != charset )
1289
+ case HTML_ENTITIES :
1290
+ for (j = 0 ; entity_map [j ].charset != cs_terminator ; j ++ ) {
1291
+ if (entity_map [j ].charset != charset )
1292
+ continue ;
1293
+ for (i = 0 ; i <= entity_map [j ].endchar - entity_map [j ].basechar ; i ++ ) {
1294
+ char buffer [16 ];
1295
+ unsigned k ;
1296
+ size_t written ;
1297
+
1298
+ if (entity_map [j ].table [i ] == NULL )
1288
1299
continue ;
1289
- for ( i = 0 ; i <= entity_map [ j ]. endchar - entity_map [ j ]. basechar ; i ++ ) {
1290
- char buffer [ 16 ] ;
1300
+
1301
+ k = i + entity_map [ j ]. basechar ;
1291
1302
1292
- if (entity_map [j ].table [i ] == NULL )
1293
- continue ;
1294
- /* what about wide chars here ?? */
1295
- ind [0 ] = i + entity_map [j ].basechar ;
1296
- snprintf (buffer , sizeof (buffer ), "&%s;" , entity_map [j ].table [i ]);
1297
- add_assoc_string (return_value , ind , buffer , 1 );
1303
+ switch (charset ) {
1304
+ case cs_utf_8 :
1305
+ written = php_utf32_utf8 (ind , k );
1306
+ ind [written ] = '\0' ;
1307
+ break ;
1308
+ /* we have no mappings for these, but if we had... */
1309
+ case cs_big5 :
1310
+ case cs_gb2312 :
1311
+ case cs_big5hkscs :
1312
+ case cs_sjis :
1313
+ written = php_mb2_int_to_char (ind , k );
1314
+ ind [written ] = '\0' ;
1315
+ break ;
1316
+ case cs_eucjp :
1317
+ written = php_mb3_int_to_char (ind , k );
1318
+ ind [written ] = '\0' ;
1319
+ break ;
1320
+ default : /* one byte */
1321
+ written = 1 ;
1322
+ ind [0 ] = (unsigned char )k ;
1323
+ ind [1 ] = '\0' ;
1324
+ break ;
1325
+ }
1298
1326
1327
+ snprintf (buffer , sizeof (buffer ), "&%s;" , entity_map [j ].table [i ]);
1328
+ if (zend_hash_find (Z_ARRVAL_P (return_value ), (const char * )ind , written + 1 , & dummy ) == FAILURE ) {
1329
+ /* in case of the single quote, which is repeated, the first one wins,
1330
+ * so don't replace the existint mapping */
1331
+ add_assoc_string (return_value , (const char * )ind , buffer , 1 );
1299
1332
}
1300
1333
}
1301
- /* break thru */
1302
-
1303
- case HTML_SPECIALCHARS :
1304
- for (j = 0 ; basic_entities_ex [j ].charcode != 0 ; j ++ ) {
1305
- void * dummy ;
1334
+ }
1335
+ /* break thru */
1306
1336
1307
- if (basic_entities_ex [j ].flags && (quote_style & basic_entities_ex [j ].flags ) == 0 )
1308
- continue ;
1337
+ case HTML_SPECIALCHARS :
1338
+ for (j = 0 ; basic_entities_ex [j ].charcode != 0 ; j ++ ) {
1339
+ if (basic_entities_ex [j ].flags && (quote_style & basic_entities_ex [j ].flags ) == 0 )
1340
+ continue ;
1309
1341
1310
- ind [0 ] = (unsigned char )basic_entities_ex [j ].charcode ;
1311
- if (zend_hash_find (Z_ARRVAL_P (return_value ), ind , sizeof (ind ), & dummy ) == FAILURE ) {
1312
- /* in case of the single quote, which is repeated, the first one wins,
1313
- * so don't replace the existint mapping */
1314
- add_assoc_stringl (return_value , ind , basic_entities_ex [j ].entity ,
1315
- basic_entities_ex [j ].entitylen , 1 );
1316
- }
1342
+ ind [0 ] = (unsigned char )basic_entities_ex [j ].charcode ;
1343
+ ind [1 ] = '\0' ;
1344
+ if (zend_hash_find (Z_ARRVAL_P (return_value ), (const char * )ind , 2 , & dummy ) == FAILURE ) {
1345
+ add_assoc_stringl (return_value , ind , basic_entities_ex [j ].entity ,
1346
+ basic_entities_ex [j ].entitylen , 1 );
1317
1347
}
1348
+ }
1318
1349
1319
- break ;
1350
+ break ;
1320
1351
}
1321
1352
}
1322
1353
/* }}} */
0 commit comments