Skip to content

Commit 5aaf7b4

Browse files
committed
Fix zlib support for large files
gzread() and gzwrite() have effectively a 4GiB limit at the moment because the APIs of the zlib library use unsigned ints. For example, this means that the count argument of gzread() and gzwrite() & co effectively are modulo 2**32. Fix this by adding a loop to handle all bytes. As for automated testing, I didn't find an easy way to write a phpt for this that wouldn't use a lot of memory or requires a large file. For instance, the gzread() test that I manually ran requires a 4MiB input file (and I can't shrink it because zlib has a max window size). Here are the testing instructions, run on 64-bit: To test for gzwrite(): ```php $f = gzopen("out.txt.gz", "w"); gzwrite($f, str_repeat('a', 4*1024*1024*1024+64)); // 4GiB + 64 bytes ``` Then use `zcat out.txt.gz|wc -c` to check that all bytes were written (should be 4294967360). To test for gzread(): Create a file containing all a's for example that is 4GiB + 64 bytes. Then compress it into out.txt.gz using the gzip command. Then run: ```php $f = gzopen("out.txt.gz", "r"); $str = gzread($f, 4*1024*1024*1024+64); var_dump(strlen($str)); // 4294967360 var_dump(substr($str, -3)); // string (3) "aaa" ``` Closes phpGH-17775.
1 parent 678ecff commit 5aaf7b4

File tree

2 files changed

+41
-9
lines changed

2 files changed

+41
-9
lines changed

NEWS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ PHP NEWS
5050
. Fixed bug GH-17745 (zlib extension incorrectly handles object arguments).
5151
(nielsdos)
5252
. Fix memory leak when encoding check fails. (nielsdos)
53+
. Fix zlib support for large files. (nielsdos)
5354

5455
13 Feb 2025, PHP 8.3.17
5556

ext/zlib/zlib_fopen_wrapper.c

Lines changed: 40 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,24 +33,55 @@ struct php_gz_stream_data_t {
3333
static ssize_t php_gziop_read(php_stream *stream, char *buf, size_t count)
3434
{
3535
struct php_gz_stream_data_t *self = (struct php_gz_stream_data_t *) stream->abstract;
36-
int read;
36+
ssize_t total_read = 0;
37+
38+
/* Despite the count argument of gzread() being "unsigned int",
39+
* the return value is "int". Error returns are values < 0, otherwise the count is returned.
40+
* To properly distinguish error values from success value, we therefore need to cap at INT_MAX.
41+
*/
42+
do {
43+
unsigned int chunk_size = MIN(count, INT_MAX);
44+
int read = gzread(self->gz_file, buf, chunk_size);
45+
count -= chunk_size;
46+
47+
if (gzeof(self->gz_file)) {
48+
stream->eof = 1;
49+
}
3750

38-
/* XXX this needs to be looped for the case count > UINT_MAX */
39-
read = gzread(self->gz_file, buf, count);
51+
if (UNEXPECTED(read < 0)) {
52+
return read;
53+
}
4054

41-
if (gzeof(self->gz_file)) {
42-
stream->eof = 1;
43-
}
55+
total_read += read;
56+
buf += read;
57+
} while (count > 0 && !stream->eof);
4458

45-
return read;
59+
return total_read;
4660
}
4761

4862
static ssize_t php_gziop_write(php_stream *stream, const char *buf, size_t count)
4963
{
5064
struct php_gz_stream_data_t *self = (struct php_gz_stream_data_t *) stream->abstract;
65+
ssize_t total_written = 0;
66+
67+
/* Despite the count argument of gzread() being "unsigned int",
68+
* the return value is "int". Error returns are values < 0, otherwise the count is returned.
69+
* To properly distinguish error values from success value, we therefore need to cap at INT_MAX.
70+
*/
71+
do {
72+
unsigned int chunk_size = MIN(count, INT_MAX);
73+
int written = gzwrite(self->gz_file, buf, chunk_size);
74+
count -= chunk_size;
75+
76+
if (UNEXPECTED(written < 0)) {
77+
return written;
78+
}
79+
80+
total_written += written;
81+
buf += written;
82+
} while (count > 0);
5183

52-
/* XXX this needs to be looped for the case count > UINT_MAX */
53-
return gzwrite(self->gz_file, (char *) buf, count);
84+
return total_written;
5485
}
5586

5687
static int php_gziop_seek(php_stream *stream, zend_off_t offset, int whence, zend_off_t *newoffs)

0 commit comments

Comments
 (0)