Skip to content

Commit c124c58

Browse files
authored
Fix ISIZE check to handle 32bit overflows
1 parent 8c3b567 commit c124c58

File tree

3 files changed

+32
-2
lines changed

3 files changed

+32
-2
lines changed

CHANGELOG.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ Changelog
77
.. This document is user facing. Please word the changes in such a way
88
.. that users understand how the changes affect the new version.
99
10+
version 0.5.0-dev
11+
-----------------
12+
+ Fix a bug where files larger than 4GB could not be decompressed.
13+
1014
version 0.4.2
1115
-----------------
1216
+ Fix a reference counting error that happened on module initialization and

src/zlib_ng/zlib_ngmodule.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2590,8 +2590,9 @@ GzipReader_read_into_buffer(GzipReader *self, uint8_t *out_buffer, size_t out_bu
25902590
return -1;
25912591
}
25922592
uint32_t length = load_u32_le(current_pos);
2593-
current_pos += 4;
2594-
if (length != self->zst.total_out) {
2593+
current_pos += 4;
2594+
// ISIZE is the length of the original data modulo 2^32
2595+
if (length != (0xFFFFFFFFUL & self->zst.total_out)) {
25952596
Py_BLOCK_THREADS;
25962597
PyErr_SetString(BadGzipFile, "Incorrect length of data produced");
25972598
return -1;

tests/test_gzip_ng.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,31 @@ def test_decompress_incorrect_length():
292292
error.match("Incorrect length of data produced")
293293

294294

295+
def test_decompress_on_long_input():
296+
# Ensure that a compressed payload with length bigger than 2**32 (ISIZE is
297+
# overflown) can be decompressed. To avoid writing the whole uncompressed payload
298+
# into memory, the test writes the compressed data in chunks. The payload consists
299+
# almost exclusively of zeros to achieve an exteremely efficient compression rate,
300+
# so that the compressed data also fits in memory.
301+
302+
buffered_stream = io.BytesIO()
303+
n = 20
304+
block_size = 2**n
305+
iterations = 2**(32 - n)
306+
zeros_block = bytes(block_size)
307+
308+
# To avoid writing the whole compressed data, we will write the compressed data
309+
with gzip_ng.open(buffered_stream, "wb") as gz:
310+
for _ in range(iterations):
311+
gz.write(zeros_block)
312+
gz.write(b"\x01" * 123)
313+
buffered_stream.seek(0)
314+
with gzip_ng.open(buffered_stream, "rb") as gz:
315+
for _ in range(iterations):
316+
assert zeros_block == gz.read(block_size)
317+
assert gz.read() == b"\x01" * 123
318+
319+
295320
def test_decompress_incorrect_checksum():
296321
# Create a wrong checksum by using a non-default seed.
297322
wrong_checksum = zlib.crc32(DATA, 50)

0 commit comments

Comments
 (0)