|
2 | 2 | /* This is a generated file, do not modify */
|
3 | 3 | /* Usage: php create_data_file.php /path/to/magic.mgc > data_file.c */
|
4 | 4 | <?php
|
| 5 | + /*--- Initialization of our translation table ---*/ |
| 6 | + |
| 7 | + // By default, everything gets mapped to its \o notation |
| 8 | + // (not \x, because by C's norm, \x eats as many chars as possible, while \o stops at exactly 3; |
| 9 | + // thus \x0ABACK_2_MALICE is interpreted as hex \x0ABAC (which overflows) followed by string K_2_MALICE, |
| 10 | + // while \o0120 is unambiguously a CR followed by digit 0). |
| 11 | + for ($i = 0; $i < 0x100; ++$i) { |
| 12 | + $map[chr($i)] = sprintf('\%03o', $i); |
| 13 | + } |
| 14 | + // \0 is a shortcut for \x00; as the majority of the input file is \0's, |
| 15 | + // we divide the generated file's size by nearly 2 (30 MB -> 16 MB). |
| 16 | + $map[chr(0)] = '\0'; |
| 17 | + $map["\n"] = '\n'; |
| 18 | + // Displayable ASCII can be output as is: strings for file types will appear readable. |
| 19 | + for ($i = ord(' '); $i < 0x7F; ++$i) { |
| 20 | + $map[chr($i)] = chr($i); |
| 21 | + } |
| 22 | + // … Except digits following a \0: \012 will be interpreted as octal 012, and not \0 followed by 12. |
| 23 | + // Then we have to express \0 in a full unambiguous 3-chars octal code. |
| 24 | + for ($i = ord('0'); $i <= ord('9'); ++$i) { |
| 25 | + $map[chr(0).chr($i)] = '\000'.chr($i); |
| 26 | + } |
| 27 | + // … Except " and \ because we enclose the result into quotes and escape with \. |
| 28 | + $map['"'] = '\"'; |
| 29 | + $map['\\'] = '\\\\'; |
| 30 | + |
| 31 | + /*--- File generation ---*/ |
| 32 | + |
| 33 | + // https://github.com/php/php-src/pull/10422 |
| 34 | + // Some compilers (GCC, clang) do not like long lists; some (MSVC) do not like long strings. |
| 35 | + // CHUNK_SIZE splitting our ~10 MB binary source should give a good compromise between both. |
| 36 | + const CHUNK_SIZE = 1024; |
| 37 | + |
5 | 38 | $dta = file_get_contents( $argv[1] );
|
6 |
| - $dta_l = strlen($dta); |
7 |
| - $j = 0; |
| 39 | + $chunks = str_split($dta, CHUNK_SIZE); |
| 40 | + $chunks[count($chunks) - 1] = str_pad($chunks[count($chunks) - 1], CHUNK_SIZE, chr(0)); |
8 | 41 |
|
9 |
| - echo "const unsigned char php_magic_database[$dta_l] = {\n"; |
10 |
| - for ($i = 0; $i < $dta_l; $i++) { |
11 |
| - printf("0x%02X, ", ord($dta[$i])); |
12 |
| - if ($j % 16 == 15) { |
13 |
| - echo "\n"; |
14 |
| - } |
15 |
| - $j++; |
| 42 | + echo 'const unsigned char php_magic_database[' . count($chunks) . '][' . CHUNK_SIZE . "] = {\n"; |
| 43 | + foreach ($chunks as $chunk) { |
| 44 | + echo '"' . strtr($chunk, $map) . '",' . "\n"; |
16 | 45 | }
|
17 | 46 | echo "};\n";
|
18 | 47 | ?>
|
0 commit comments