Skip to content

Commit fa96c7e

Browse files
committed
fileinfo: Fixed bug #78987 High memory usage during encoding detection
The default buffer size for the encoding detection is set to 64K. Signed-off-by: Anatol Belski <ab@php.net>
1 parent fac3fbc commit fa96c7e

File tree

4 files changed

+71
-22
lines changed

4 files changed

+71
-22
lines changed

ext/fileinfo/libmagic.patch

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
diff -u libmagic.orig/apprentice.c libmagic/apprentice.c
22
--- libmagic.orig/apprentice.c 2019-02-20 03:35:27.000000000 +0100
3-
+++ libmagic/apprentice.c 2020-11-19 11:50:32.412674100 +0100
3+
+++ libmagic/apprentice.c 2021-09-17 21:27:42.796508107 +0200
44
@@ -29,6 +29,8 @@
55
* apprentice - make one pass through /etc/magic, learning its secrets.
66
*/
@@ -974,7 +974,7 @@ diff -u libmagic.orig/apprentice.c libmagic/apprentice.c
974974
}
975975
diff -u libmagic.orig/ascmagic.c libmagic/ascmagic.c
976976
--- libmagic.orig/ascmagic.c 2019-05-07 04:27:11.000000000 +0200
977-
+++ libmagic/ascmagic.c 2020-09-07 00:42:14.447562400 +0200
977+
+++ libmagic/ascmagic.c 2021-09-17 21:27:42.796508107 +0200
978978
@@ -96,7 +96,7 @@
979979
rv = file_ascmagic_with_encoding(ms, &bb,
980980
ubuf, ulen, code, type, text);
@@ -1005,7 +1005,7 @@ diff -u libmagic.orig/ascmagic.c libmagic/ascmagic.c
10051005
}
10061006
diff -u libmagic.orig/buffer.c libmagic/buffer.c
10071007
--- libmagic.orig/buffer.c 2019-05-07 04:27:11.000000000 +0200
1008-
+++ libmagic/buffer.c 2020-09-07 00:42:14.447562400 +0200
1008+
+++ libmagic/buffer.c 2021-09-17 21:27:42.796508107 +0200
10091009
@@ -31,19 +31,23 @@
10101010
#endif /* lint */
10111011

@@ -1062,7 +1062,7 @@ diff -u libmagic.orig/buffer.c libmagic/buffer.c
10621062

10631063
diff -u libmagic.orig/cdf.c libmagic/cdf.c
10641064
--- libmagic.orig/cdf.c 2019-02-20 03:35:27.000000000 +0100
1065-
+++ libmagic/cdf.c 2020-09-07 00:42:14.447562400 +0200
1065+
+++ libmagic/cdf.c 2021-09-17 21:27:42.796508107 +0200
10661066
@@ -43,7 +43,17 @@
10671067
#include <err.h>
10681068
#endif
@@ -1341,7 +1341,7 @@ diff -u libmagic.orig/cdf.c libmagic/cdf.c
13411341
#endif
13421342
diff -u libmagic.orig/cdf.h libmagic/cdf.h
13431343
--- libmagic.orig/cdf.h 2019-02-20 02:24:19.000000000 +0100
1344-
+++ libmagic/cdf.h 2020-09-07 00:42:14.447562400 +0200
1344+
+++ libmagic/cdf.h 2021-09-17 21:22:51.278410517 +0200
13451345
@@ -35,10 +35,10 @@
13461346
#ifndef _H_CDF_
13471347
#define _H_CDF_
@@ -1366,7 +1366,7 @@ diff -u libmagic.orig/cdf.h libmagic/cdf.h
13661366
#define CDF_SECID_FREE -1
13671367
diff -u libmagic.orig/cdf_time.c libmagic/cdf_time.c
13681368
--- libmagic.orig/cdf_time.c 2019-03-12 21:43:05.000000000 +0100
1369-
+++ libmagic/cdf_time.c 2020-09-07 00:42:14.447562400 +0200
1369+
+++ libmagic/cdf_time.c 2021-09-17 21:22:51.278410517 +0200
13701370
@@ -23,6 +23,7 @@
13711371
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
13721372
* POSSIBILITY OF SUCH DAMAGE.
@@ -1395,7 +1395,7 @@ diff -u libmagic.orig/cdf_time.c libmagic/cdf_time.c
13951395
(void)snprintf(buf, 26, "*Bad* %#16.16" INT64_T_FORMAT "x\n",
13961396
diff -u libmagic.orig/compress.c libmagic/compress.c
13971397
--- libmagic.orig/compress.c 2019-05-07 04:27:11.000000000 +0200
1398-
+++ libmagic/compress.c 2020-09-07 00:42:14.447562400 +0200
1398+
+++ libmagic/compress.c 2021-09-17 21:27:42.796508107 +0200
13991399
@@ -45,13 +45,11 @@
14001400
#endif
14011401
#include <string.h>
@@ -1545,7 +1545,7 @@ diff -u libmagic.orig/compress.c libmagic/compress.c
15451545
+#endif
15461546
diff -u libmagic.orig/der.c libmagic/der.c
15471547
--- libmagic.orig/der.c 2019-02-20 03:35:27.000000000 +0100
1548-
+++ libmagic/der.c 2020-09-07 00:42:14.447562400 +0200
1548+
+++ libmagic/der.c 2021-09-17 21:27:42.796508107 +0200
15491549
@@ -51,7 +51,9 @@
15501550
#include "magic.h"
15511551
#include "der.h"
@@ -1575,7 +1575,7 @@ diff -u libmagic.orig/der.c libmagic/der.c
15751575
snprintf(buf + z, blen - z, "%.2x", d[i]);
15761576
diff -u libmagic.orig/elfclass.h libmagic/elfclass.h
15771577
--- libmagic.orig/elfclass.h 2019-02-20 02:30:19.000000000 +0100
1578-
+++ libmagic/elfclass.h 2020-09-07 00:42:14.447562400 +0200
1578+
+++ libmagic/elfclass.h 2021-09-17 21:22:51.278410517 +0200
15791579
@@ -41,7 +41,7 @@
15801580
return toomany(ms, "program headers", phnum);
15811581
flags |= FLAGS_IS_CORE;
@@ -1605,7 +1605,16 @@ diff -u libmagic.orig/elfclass.h libmagic/elfclass.h
16051605
CAST(int, elf_getu16(swap, elfhdr.e_shstrndx)),
16061606
diff -u libmagic.orig/encoding.c libmagic/encoding.c
16071607
--- libmagic.orig/encoding.c 2019-04-15 18:48:41.000000000 +0200
1608-
+++ libmagic/encoding.c 2020-09-07 00:42:14.447562400 +0200
1608+
+++ libmagic/encoding.c 2021-09-17 22:21:17.103362836 +0200
1609+
@@ -71,7 +71,7 @@
1610+
size_t *ulen, const char **code, const char **code_mime, const char **type)
1611+
{
1612+
const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
1613+
- size_t nbytes = b->flen;
1614+
+ size_t nbytes = b->flen > 64*1024 ? 64*1024 : b->flen;
1615+
size_t mlen;
1616+
int rv = 1, ucs_type;
1617+
unsigned char *nbuf = NULL;
16091618
@@ -89,13 +89,13 @@
16101619
*code_mime = "binary";
16111620

@@ -1636,7 +1645,7 @@ diff -u libmagic.orig/encoding.c libmagic/encoding.c
16361645
}
16371646
diff -u libmagic.orig/file.h libmagic/file.h
16381647
--- libmagic.orig/file.h 2019-05-07 04:27:11.000000000 +0200
1639-
+++ libmagic/file.h 2020-11-23 17:11:36.234964700 +0100
1648+
+++ libmagic/file.h 2021-09-17 21:27:42.796508107 +0200
16401649
@@ -33,18 +33,9 @@
16411650
#ifndef __file_h__
16421651
#define __file_h__
@@ -1930,7 +1939,7 @@ diff -u libmagic.orig/file.h libmagic/file.h
19301939
#endif
19311940
diff -u libmagic.orig/fsmagic.c libmagic/fsmagic.c
19321941
--- libmagic.orig/fsmagic.c 2019-05-07 04:26:48.000000000 +0200
1933-
+++ libmagic/fsmagic.c 2020-09-07 00:42:14.447562400 +0200
1942+
+++ libmagic/fsmagic.c 2021-09-17 21:27:42.796508107 +0200
19341943
@@ -66,26 +66,10 @@
19351944
# define minor(dev) ((dev) & 0xff)
19361945
#endif
@@ -2223,7 +2232,7 @@ diff -u libmagic.orig/fsmagic.c libmagic/fsmagic.c
22232232
case S_IFSOCK:
22242233
diff -u libmagic.orig/funcs.c libmagic/funcs.c
22252234
--- libmagic.orig/funcs.c 2019-05-07 04:27:11.000000000 +0200
2226-
+++ libmagic/funcs.c 2020-09-07 00:42:14.447562400 +0200
2235+
+++ libmagic/funcs.c 2021-09-17 21:27:42.796508107 +0200
22272236
@@ -31,7 +31,6 @@
22282237
#endif /* lint */
22292238

@@ -2579,7 +2588,7 @@ diff -u libmagic.orig/funcs.c libmagic/funcs.c
25792588

25802589
diff -u libmagic.orig/magic.c libmagic/magic.c
25812590
--- libmagic.orig/magic.c 2019-05-07 04:27:11.000000000 +0200
2582-
+++ libmagic/magic.c 2020-09-07 00:42:14.447562400 +0200
2591+
+++ libmagic/magic.c 2021-09-17 21:27:42.796508107 +0200
25832592
@@ -25,11 +25,6 @@
25842593
* SUCH DAMAGE.
25852594
*/
@@ -3043,8 +3052,8 @@ diff -u libmagic.orig/magic.c libmagic/magic.c
30433052
public const char *
30443053
magic_error(struct magic_set *ms)
30453054
diff -u libmagic.orig/magic.h libmagic/magic.h
3046-
--- libmagic.orig/magic.h 2020-11-23 17:12:12.776465800 +0100
3047-
+++ libmagic/magic.h 2020-09-07 00:42:14.447562400 +0200
3055+
--- libmagic.orig/magic.h 2021-09-18 20:39:34.813932433 +0200
3056+
+++ libmagic/magic.h 2021-09-17 21:27:42.796508107 +0200
30483057
@@ -124,6 +124,7 @@
30493058

30503059
const char *magic_getpath(const char *, int);
@@ -3055,7 +3064,7 @@ diff -u libmagic.orig/magic.h libmagic/magic.h
30553064

30563065
diff -u libmagic.orig/print.c libmagic/print.c
30573066
--- libmagic.orig/print.c 2019-03-12 21:43:05.000000000 +0100
3058-
+++ libmagic/print.c 2020-09-07 00:42:14.447562400 +0200
3067+
+++ libmagic/print.c 2021-09-17 21:27:42.796508107 +0200
30593068
@@ -28,6 +28,7 @@
30603069
/*
30613070
* print.c - debugging printout routines
@@ -3129,7 +3138,7 @@ diff -u libmagic.orig/print.c libmagic/print.c
31293138
goto out;
31303139
diff -u libmagic.orig/readcdf.c libmagic/readcdf.c
31313140
--- libmagic.orig/readcdf.c 2019-03-12 21:43:05.000000000 +0100
3132-
+++ libmagic/readcdf.c 2020-09-07 00:42:14.463191200 +0200
3141+
+++ libmagic/readcdf.c 2021-09-17 21:27:42.796508107 +0200
31333142
@@ -31,7 +31,11 @@
31343143

31353144
#include <assert.h>
@@ -3248,7 +3257,7 @@ diff -u libmagic.orig/readcdf.c libmagic/readcdf.c
32483257
if (i != -1)
32493258
diff -u libmagic.orig/softmagic.c libmagic/softmagic.c
32503259
--- libmagic.orig/softmagic.c 2019-05-17 04:24:59.000000000 +0200
3251-
+++ libmagic/softmagic.c 2020-09-07 00:42:14.463191200 +0200
3260+
+++ libmagic/softmagic.c 2021-09-17 21:27:42.796508107 +0200
32523261
@@ -43,6 +43,10 @@
32533262
#include <time.h>
32543263
#include "der.h"
@@ -3615,7 +3624,7 @@ diff -u libmagic.orig/softmagic.c libmagic/softmagic.c
36153624
case FILE_INDIRECT:
36163625
diff -u libmagic.orig/strcasestr.c libmagic/strcasestr.c
36173626
--- libmagic.orig/strcasestr.c 2014-09-11 17:05:33.000000000 +0200
3618-
+++ libmagic/strcasestr.c 2020-08-05 15:01:55.644887300 +0200
3627+
+++ libmagic/strcasestr.c 2021-09-17 21:22:51.282410490 +0200
36193628
@@ -39,6 +39,8 @@
36203629

36213630
#include "file.h"

ext/fileinfo/libmagic/encoding.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ file_encoding(struct magic_set *ms, const struct buffer *b, unichar **ubuf,
7171
size_t *ulen, const char **code, const char **code_mime, const char **type)
7272
{
7373
const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
74-
size_t nbytes = b->flen;
74+
size_t nbytes = b->flen > 64*1024 ? 64*1024 : b->flen;
7575
size_t mlen;
7676
int rv = 1, ucs_type;
7777
unsigned char *nbuf = NULL;

ext/fileinfo/tests/bug78987.phpt

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
--TEST--
2+
Bug #78987 High memory usage during encoding detection
3+
--SKIPIF--
4+
<?php require_once(__DIR__ . '/skipif.inc'); ?>
5+
--INI--
6+
memory_limit=512M
7+
--FILE--
8+
<?php
9+
$finfo = new finfo(FILEINFO_MIME_TYPE);
10+
$minSize = 128 * 1024;
11+
$maxSize = 16 * 1024 * 1024;
12+
13+
$map = array(
14+
131072 => 2097152,
15+
262144 => 2097152,
16+
524288 => 2097152,
17+
1048576 => 4194304,
18+
2097152 => 6295552,
19+
4194304 => 10493952,
20+
8388608 => 16785408,
21+
16777216 => 29368320,
22+
);
23+
for($size = $minSize; $size <= $maxSize; $size *= 2) {
24+
$content = str_repeat('0', $size);
25+
26+
$finfo->buffer($content);
27+
28+
$m = memory_get_peak_usage(true);
29+
printf("%-8d => %s\n", $size, $m <= $map[$size] ? "ok" : "$m");
30+
}
31+
?>
32+
--EXPECT--
33+
131072 => ok
34+
262144 => ok
35+
524288 => ok
36+
1048576 => ok
37+
2097152 => ok
38+
4194304 => ok
39+
8388608 => ok
40+
16777216 => ok

ext/fileinfo/tests/finfo_file_basic.phpt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ var_dump( finfo_file( $finfo, $magicFile.chr(0).$magicFile) );
2727
*** Testing finfo_file() : basic functionality ***
2828
string(28) "text/x-php; charset=us-ascii"
2929
string(%d) "PHP script, ASCII text%A"
30-
string(32) "text/plain; charset=unknown-8bit"
30+
string(28) "text/plain; charset=us-ascii"
3131

3232
Warning: finfo_file(): Invalid path in %s%efinfo_file_basic.php on line %d
3333
bool(false)

0 commit comments

Comments
 (0)