Skip to content

Commit 737b4ba

Browse files
authored
gh-134635: add zlib.{adler32,crc32}_combine to combine checksums (#134650)
1 parent 8704d6b commit 737b4ba

File tree

6 files changed

+356
-1
lines changed

6 files changed

+356
-1
lines changed

Doc/library/zlib.rst

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,20 @@ The available exception and functions in this module are:
4444
.. versionchanged:: 3.0
4545
The result is always unsigned.
4646

47+
.. function:: adler32_combine(adler1, adler2, len2, /)
48+
49+
Combine two Adler-32 checksums into one.
50+
51+
Given the Adler-32 checksum *adler1* of a sequence ``A`` and the
52+
Adler-32 checksum *adler2* of a sequence ``B`` of length *len2*,
53+
return the Adler-32 checksum of ``A`` and ``B`` concatenated.
54+
55+
This function is typically useful to combine Adler-32 checksums
56+
that were concurrently computed. To compute checksums sequentially, use
57+
:func:`adler32` with the running checksum as the ``value`` argument.
58+
59+
.. versionadded:: next
60+
4761
.. function:: compress(data, /, level=-1, wbits=MAX_WBITS)
4862

4963
Compresses the bytes in *data*, returning a bytes object containing compressed data.
@@ -136,6 +150,20 @@ The available exception and functions in this module are:
136150
.. versionchanged:: 3.0
137151
The result is always unsigned.
138152

153+
.. function:: crc32_combine(crc1, crc2, len2, /)
154+
155+
Combine two CRC-32 checksums into one.
156+
157+
Given the CRC-32 checksum *crc1* of a sequence ``A`` and the
158+
CRC-32 checksum *crc2* of a sequence ``B`` of length *len2*,
159+
return the CRC-32 checksum of ``A`` and ``B`` concatenated.
160+
161+
This function is typically useful to combine CRC-32 checksums
162+
that were concurrently computed. To compute checksums sequentially, use
163+
:func:`crc32` with the running checksum as the ``value`` argument.
164+
165+
.. versionadded:: next
166+
139167
.. function:: decompress(data, /, wbits=MAX_WBITS, bufsize=DEF_BUF_SIZE)
140168

141169
Decompresses the bytes in *data*, returning a bytes object containing the

Doc/whatsnew/3.15.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,16 @@ ssl
9797
(Contributed by Will Childs-Klein in :gh:`133624`.)
9898

9999

100+
zlib
101+
----
102+
103+
* Allow combining two Adler-32 checksums via :func:`~zlib.adler32_combine`.
104+
(Contributed by Callum Attryde and Bénédikt Tran in :gh:`134635`.)
105+
106+
* Allow combining two CRC-32 checksums via :func:`~zlib.crc32_combine`.
107+
(Contributed by Bénédikt Tran in :gh:`134635`.)
108+
109+
100110
.. Add improved modules above alphabetically, not here at the end.
101111
102112
Optimizations

Lib/test/test_zlib.py

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,114 @@ def test_same_as_binascii_crc32(self):
119119
self.assertEqual(binascii.crc32(b'spam'), zlib.crc32(b'spam'))
120120

121121

122+
class ChecksumCombineMixin:
123+
"""Mixin class for testing checksum combination."""
124+
125+
N = 1000
126+
default_iv: int
127+
128+
def parse_iv(self, iv):
129+
"""Parse an IV value.
130+
131+
- The default IV is returned if *iv* is None.
132+
- A random IV is returned if *iv* is -1.
133+
- Otherwise, *iv* is returned as is.
134+
"""
135+
if iv is None:
136+
return self.default_iv
137+
if iv == -1:
138+
return random.randint(1, 0x80000000)
139+
return iv
140+
141+
def checksum(self, data, init=None):
142+
"""Compute the checksum of data with a given initial value.
143+
144+
The *init* value is parsed by ``parse_iv``.
145+
"""
146+
iv = self.parse_iv(init)
147+
return self._checksum(data, iv)
148+
149+
def _checksum(self, data, init):
150+
raise NotImplementedError
151+
152+
def combine(self, a, b, blen):
153+
"""Combine two checksums together."""
154+
raise NotImplementedError
155+
156+
def get_random_data(self, data_len, *, iv=None):
157+
"""Get a triplet (data, iv, checksum)."""
158+
data = random.randbytes(data_len)
159+
init = self.parse_iv(iv)
160+
checksum = self.checksum(data, init)
161+
return data, init, checksum
162+
163+
def test_combine_empty(self):
164+
for _ in range(self.N):
165+
a, iv, checksum = self.get_random_data(32, iv=-1)
166+
res = self.combine(iv, self.checksum(a), len(a))
167+
self.assertEqual(res, checksum)
168+
169+
def test_combine_no_iv(self):
170+
for _ in range(self.N):
171+
a, _, chk_a = self.get_random_data(32)
172+
b, _, chk_b = self.get_random_data(64)
173+
res = self.combine(chk_a, chk_b, len(b))
174+
self.assertEqual(res, self.checksum(a + b))
175+
176+
def test_combine_no_iv_invalid_length(self):
177+
a, _, chk_a = self.get_random_data(32)
178+
b, _, chk_b = self.get_random_data(64)
179+
checksum = self.checksum(a + b)
180+
for invalid_len in [1, len(a), 48, len(b) + 1, 191]:
181+
invalid_res = self.combine(chk_a, chk_b, invalid_len)
182+
self.assertNotEqual(invalid_res, checksum)
183+
184+
self.assertRaises(TypeError, self.combine, 0, 0, "len")
185+
186+
def test_combine_with_iv(self):
187+
for _ in range(self.N):
188+
a, iv_a, chk_a_with_iv = self.get_random_data(32, iv=-1)
189+
chk_a_no_iv = self.checksum(a)
190+
b, iv_b, chk_b_with_iv = self.get_random_data(64, iv=-1)
191+
chk_b_no_iv = self.checksum(b)
192+
193+
# We can represent c = COMBINE(CHK(a, iv_a), CHK(b, iv_b)) as:
194+
#
195+
# c = CHK(CHK(b'', iv_a) + CHK(a) + CHK(b'', iv_b) + CHK(b))
196+
# = COMBINE(
197+
# COMBINE(CHK(b'', iv_a), CHK(a)),
198+
# COMBINE(CHK(b'', iv_b), CHK(b)),
199+
# )
200+
# = COMBINE(COMBINE(iv_a, CHK(a)), COMBINE(iv_b, CHK(b)))
201+
tmp0 = self.combine(iv_a, chk_a_no_iv, len(a))
202+
tmp1 = self.combine(iv_b, chk_b_no_iv, len(b))
203+
expected = self.combine(tmp0, tmp1, len(b))
204+
checksum = self.combine(chk_a_with_iv, chk_b_with_iv, len(b))
205+
self.assertEqual(checksum, expected)
206+
207+
208+
class CRC32CombineTestCase(ChecksumCombineMixin, unittest.TestCase):
209+
210+
default_iv = 0
211+
212+
def _checksum(self, data, init):
213+
return zlib.crc32(data, init)
214+
215+
def combine(self, a, b, blen):
216+
return zlib.crc32_combine(a, b, blen)
217+
218+
219+
class Adler32CombineTestCase(ChecksumCombineMixin, unittest.TestCase):
220+
221+
default_iv = 1
222+
223+
def _checksum(self, data, init):
224+
return zlib.adler32(data, init)
225+
226+
def combine(self, a, b, blen):
227+
return zlib.adler32_combine(a, b, blen)
228+
229+
122230
# Issue #10276 - check that inputs >=4 GiB are handled correctly.
123231
class ChecksumBigBufferTestCase(unittest.TestCase):
124232

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
:mod:`zlib`: Allow to combine Adler-32 and CRC-32 checksums via
2+
:func:`~zlib.adler32_combine` and :func:`~zlib.crc32_combine`. Patch by
3+
Callum Attryde and Bénédikt Tran.

Modules/clinic/zlibmodule.c.h

Lines changed: 119 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)