From d44dd9b8f26d97797a361a11277446b0a7158ff3 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Sun, 11 May 2025 20:13:13 +0100 Subject: [PATCH 01/10] Split _zstd_set_c_parameters --- Lib/test/test_zstd.py | 9 +- Modules/_zstd/clinic/compressor.c.h | 11 +- Modules/_zstd/compressor.c | 165 ++++++++++++++-------------- 3 files changed, 98 insertions(+), 87 deletions(-) diff --git a/Lib/test/test_zstd.py b/Lib/test/test_zstd.py index 541db4441b035c..e1a27a638d1fa2 100644 --- a/Lib/test/test_zstd.py +++ b/Lib/test/test_zstd.py @@ -196,8 +196,11 @@ def test_simple_compress_bad_args(self): self.assertRaises(TypeError, ZstdCompressor, zstd_dict=b"abcd1234") self.assertRaises(TypeError, ZstdCompressor, zstd_dict={1: 2, 3: 4}) + # valid compression level range is [-(1<<17), 22] with self.assertRaises(ValueError): ZstdCompressor(2**31) + with self.assertRaises(ValueError): + ZstdCompressor(level=-(2**31)) with self.assertRaises(ValueError): ZstdCompressor(options={2**31: 100}) @@ -261,8 +264,10 @@ def test_compress_parameters(self): # clamp compressionLevel level_min, level_max = CompressionParameter.compression_level.bounds() - compress(b'', level_max+1) - compress(b'', level_min-1) + with self.assertRaises(ValueError): + compress(b'', level_max+1) + with self.assertRaises(ValueError): + compress(b'', level_min-1) compress(b'', options={CompressionParameter.compression_level:level_max+1}) compress(b'', options={CompressionParameter.compression_level:level_min-1}) diff --git a/Modules/_zstd/clinic/compressor.c.h b/Modules/_zstd/clinic/compressor.c.h index f69161b590e5b7..4ead29a7bfa541 100644 --- a/Modules/_zstd/clinic/compressor.c.h +++ b/Modules/_zstd/clinic/compressor.c.h @@ -6,6 +6,7 @@ preserve # include "pycore_gc.h" // PyGC_Head # include "pycore_runtime.h" // _Py_ID() #endif +#include "pycore_abstract.h" // _Py_convert_optional_to_ssize_t() #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() PyDoc_STRVAR(_zstd_ZstdCompressor_new__doc__, @@ -25,7 +26,7 @@ PyDoc_STRVAR(_zstd_ZstdCompressor_new__doc__, "function instead."); static PyObject * -_zstd_ZstdCompressor_new_impl(PyTypeObject *type, PyObject *level, +_zstd_ZstdCompressor_new_impl(PyTypeObject *type, Py_ssize_t level, PyObject *options, PyObject *zstd_dict); static PyObject * @@ -63,7 +64,7 @@ _zstd_ZstdCompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) PyObject * const *fastargs; Py_ssize_t nargs = PyTuple_GET_SIZE(args); Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0; - PyObject *level = Py_None; + Py_ssize_t level = PY_SSIZE_T_MIN; PyObject *options = Py_None; PyObject *zstd_dict = Py_None; @@ -76,7 +77,9 @@ _zstd_ZstdCompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) goto skip_optional_pos; } if (fastargs[0]) { - level = fastargs[0]; + if (!_Py_convert_optional_to_ssize_t(fastargs[0], &level)) { + goto exit; + } if (!--noptargs) { goto skip_optional_pos; } @@ -252,4 +255,4 @@ _zstd_ZstdCompressor_flush(PyObject *self, PyObject *const *args, Py_ssize_t nar exit: return return_value; } -/*[clinic end generated code: output=ee2d1dc298de790c input=a9049054013a1b77]*/ +/*[clinic end generated code: output=95b86cea725001df input=a9049054013a1b77]*/ diff --git a/Modules/_zstd/compressor.c b/Modules/_zstd/compressor.c index f794acbc19cfc7..b554a3148abaa3 100644 --- a/Modules/_zstd/compressor.c +++ b/Modules/_zstd/compressor.c @@ -45,98 +45,101 @@ typedef struct { #include "clinic/compressor.c.h" static int -_zstd_set_c_parameters(ZstdCompressor *self, PyObject *level_or_options, - const char *arg_name, const char* arg_type) +_zstd_set_c_level(ZstdCompressor *self, const Py_ssize_t level) { - size_t zstd_ret; - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); - if (mod_state == NULL) { + /* Set integer compression level */ + const int min_level = ZSTD_minCLevel(); + const int max_level = ZSTD_maxCLevel(); + if (level < min_level || level > max_level) { + PyErr_Format(PyExc_ValueError, + "compression level %zd not in valid range %d <= level <= %d.", + level, min_level, max_level); return -1; } - /* Integer compression level */ - if (PyLong_Check(level_or_options)) { - int level = PyLong_AsInt(level_or_options); - if (level == -1 && PyErr_Occurred()) { - PyErr_Format(PyExc_ValueError, - "Compression level should be an int value between %d and %d.", - ZSTD_minCLevel(), ZSTD_maxCLevel()); - return -1; - } - - /* Save for generating ZSTD_CDICT */ - self->compression_level = level; + /* Save for generating ZSTD_CDICT */ + self->compression_level = (int)level; - /* Set compressionLevel to compression context */ - zstd_ret = ZSTD_CCtx_setParameter(self->cctx, - ZSTD_c_compressionLevel, - level); + /* Set compressionLevel to compression context */ + const size_t zstd_ret = ZSTD_CCtx_setParameter( + self->cctx, ZSTD_c_compressionLevel, (int)level); - /* Check error */ - if (ZSTD_isError(zstd_ret)) { - set_zstd_error(mod_state, ERR_SET_C_LEVEL, zstd_ret); + /* Check error */ + if (ZSTD_isError(zstd_ret)) { + const _zstd_state* const st = PyType_GetModuleState(Py_TYPE(self)); + if (st == NULL) { return -1; } - return 0; + set_zstd_error(st, ERR_SET_C_LEVEL, zstd_ret); + return -1; } + return 0; +} - /* Options dict */ - if (PyDict_Check(level_or_options)) { - PyObject *key, *value; - Py_ssize_t pos = 0; +static int +_zstd_set_c_parameters(ZstdCompressor *self, PyObject *options) +{ + /* Set options dict */ + _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + if (mod_state == NULL) { + return -1; + } - while (PyDict_Next(level_or_options, &pos, &key, &value)) { - /* Check key type */ - if (Py_TYPE(key) == mod_state->DParameter_type) { - PyErr_SetString(PyExc_TypeError, - "Key of compression option dict should " - "NOT be DecompressionParameter."); - return -1; - } + if (!PyDict_Check(options)) { + PyErr_Format(PyExc_TypeError, "invalid type for options, expected dict"); + return -1; + } - int key_v = PyLong_AsInt(key); - if (key_v == -1 && PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Key of options dict should be a CompressionParameter attribute."); - return -1; - } + Py_ssize_t pos = 0; + PyObject *key, *value; + while (PyDict_Next(options, &pos, &key, &value)) { + /* Check key type */ + if (Py_TYPE(key) == mod_state->DParameter_type) { + PyErr_SetString(PyExc_TypeError, + "key should NOT be DecompressionParameter."); + return -1; + } - // TODO(emmatyping): check bounds when there is a value error here for better - // error message? - int value_v = PyLong_AsInt(value); - if (value_v == -1 && PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Value of option dict should be an int."); - return -1; - } + const int key_v = PyLong_AsInt(key); + if (key_v == -1 && PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, + "key should be a CompressionParameter attribute."); + return -1; + } - if (key_v == ZSTD_c_compressionLevel) { - /* Save for generating ZSTD_CDICT */ - self->compression_level = value_v; - } - else if (key_v == ZSTD_c_nbWorkers) { - /* From the zstd library docs: - 1. When nbWorkers >= 1, triggers asynchronous mode when - used with ZSTD_compressStream2(). - 2, Default value is `0`, aka "single-threaded mode" : no - worker is spawned, compression is performed inside - caller's thread, all invocations are blocking. */ - if (value_v != 0) { - self->use_multithread = 1; - } - } + // TODO(emmatyping): check bounds when there is a value error here for better + // error message? + int value_v = PyLong_AsInt(value); + if (value_v == -1 && PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, + "options dict value should be an int."); + return -1; + } - /* Set parameter to compression context */ - zstd_ret = ZSTD_CCtx_setParameter(self->cctx, key_v, value_v); - if (ZSTD_isError(zstd_ret)) { - set_parameter_error(mod_state, 1, key_v, value_v); - return -1; + if (key_v == ZSTD_c_compressionLevel) { + /* Save for generating ZSTD_CDICT */ + self->compression_level = value_v; + } + else if (key_v == ZSTD_c_nbWorkers) { + /* From the zstd library docs: + 1. When nbWorkers >= 1, triggers asynchronous mode when + used with ZSTD_compressStream2(). + 2, Default value is `0`, aka "single-threaded mode" : no + worker is spawned, compression is performed inside + caller's thread, all invocations are blocking. */ + if (value_v != 0) { + self->use_multithread = 1; } } - return 0; + + /* Set parameter to compression context */ + const size_t zstd_ret = ZSTD_CCtx_setParameter(self->cctx, key_v, value_v); + if (ZSTD_isError(zstd_ret)) { + set_parameter_error(mod_state, 1, key_v, value_v); + return -1; + } } - PyErr_Format(PyExc_TypeError, "Invalid type for %s. Expected %s", arg_name, arg_type); - return -1; + return 0; } static void @@ -314,7 +317,7 @@ _zstd_load_c_dict(ZstdCompressor *self, PyObject *dict) /*[clinic input] @classmethod _zstd.ZstdCompressor.__new__ as _zstd_ZstdCompressor_new - level: object = None + level: Py_ssize_t(c_default='PY_SSIZE_T_MIN', accept={int, NoneType}) = None The compression level to use. Defaults to COMPRESSION_LEVEL_DEFAULT. options: object = None A dict object that contains advanced compression parameters. @@ -328,9 +331,9 @@ function instead. [clinic start generated code]*/ static PyObject * -_zstd_ZstdCompressor_new_impl(PyTypeObject *type, PyObject *level, +_zstd_ZstdCompressor_new_impl(PyTypeObject *type, Py_ssize_t level, PyObject *options, PyObject *zstd_dict) -/*[clinic end generated code: output=cdef61eafecac3d7 input=92de0211ae20ffdc]*/ +/*[clinic end generated code: output=a857ec0dc29fc5e2 input=9899740b24d11319]*/ { ZstdCompressor* self = PyObject_GC_New(ZstdCompressor, type); if (self == NULL) { @@ -353,20 +356,20 @@ _zstd_ZstdCompressor_new_impl(PyTypeObject *type, PyObject *level, /* Last mode */ self->last_mode = ZSTD_e_end; - if (level != Py_None && options != Py_None) { + if (level != PY_SSIZE_T_MIN && options != Py_None) { PyErr_SetString(PyExc_RuntimeError, "Only one of level or options should be used."); goto error; } /* Set compressLevel/options to compression context */ - if (level != Py_None) { - if (_zstd_set_c_parameters(self, level, "level", "int") < 0) { + if (level != PY_SSIZE_T_MIN) { + if (_zstd_set_c_level(self, level) < 0) { goto error; } } if (options != Py_None) { - if (_zstd_set_c_parameters(self, options, "options", "dict") < 0) { + if (_zstd_set_c_parameters(self, options) < 0) { goto error; } } From f40f0084d31b2db139997e8428452d6c41b78ab3 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Sat, 24 May 2025 05:26:46 +0100 Subject: [PATCH 02/10] Respond to review comments --- Lib/compression/zstd/_zstdfile.py | 3 +- Lib/test/test_zstd.py | 59 +++++++++++++++---- Modules/_zstd/clinic/compressor.c.h | 11 ++-- Modules/_zstd/compressor.c | 89 ++++++++++++++++++----------- Modules/_zstd/decompressor.c | 33 +++++------ 5 files changed, 125 insertions(+), 70 deletions(-) diff --git a/Lib/compression/zstd/_zstdfile.py b/Lib/compression/zstd/_zstdfile.py index 8770e576f509f4..d709f5efc658fa 100644 --- a/Lib/compression/zstd/_zstdfile.py +++ b/Lib/compression/zstd/_zstdfile.py @@ -1,7 +1,6 @@ import io from os import PathLike -from _zstd import (ZstdCompressor, ZstdDecompressor, ZstdError, - ZSTD_DStreamOutSize) +from _zstd import ZstdCompressor, ZstdDecompressor, ZSTD_DStreamOutSize from compression._common import _streams __all__ = ('ZstdFile', 'open') diff --git a/Lib/test/test_zstd.py b/Lib/test/test_zstd.py index 5d49cb64257d3c..1003371f6b1666 100644 --- a/Lib/test/test_zstd.py +++ b/Lib/test/test_zstd.py @@ -197,12 +197,16 @@ def test_simple_compress_bad_args(self): self.assertRaises(TypeError, ZstdCompressor, zstd_dict={1: 2, 3: 4}) # valid compression level range is [-(1<<17), 22] + with self.assertRaises(ValueError): + ZstdCompressor(23) + with self.assertRaises(ValueError): + ZstdCompressor(-(1<<17)-1) with self.assertRaises(ValueError): ZstdCompressor(2**31) with self.assertRaises(ValueError): - ZstdCompressor(level=-(2**31)) + ZstdCompressor(level=-(2**1000)) with self.assertRaises(ValueError): - ZstdCompressor(options={2**31: 100}) + ZstdCompressor(level=(2**1000)) with self.assertRaises(ZstdError): ZstdCompressor(options={CompressionParameter.window_log: 100}) @@ -262,15 +266,22 @@ def test_compress_parameters(self): d1[CompressionParameter.ldm_bucket_size_log] = 2**31 self.assertRaises(ValueError, ZstdCompressor, options=d1) - # clamp compressionLevel + # out of bounds compression level level_min, level_max = CompressionParameter.compression_level.bounds() with self.assertRaises(ValueError): compress(b'', level_max+1) with self.assertRaises(ValueError): compress(b'', level_min-1) - - compress(b'', options={CompressionParameter.compression_level:level_max+1}) - compress(b'', options={CompressionParameter.compression_level:level_min-1}) + with self.assertRaises(ValueError): + compress(b'', 2**1000) + with self.assertRaises(ValueError): + compress(b'', -(2**1000)) + with self.assertRaises(ValueError): + compress(b'', options={ + CompressionParameter.compression_level: level_max+1}) + with self.assertRaises(ValueError): + compress(b'', options={ + CompressionParameter.compression_level: level_min-1}) # zstd lib doesn't support MT compression if not SUPPORT_MULTITHREADING: @@ -390,12 +401,22 @@ def test_simple_decompress_bad_args(self): self.assertRaises(TypeError, ZstdDecompressor, options=b'abc') with self.assertRaises(ValueError): - ZstdDecompressor(options={2**31 : 100}) + ZstdDecompressor(options={2**31: 100}) + with self.assertRaises(ValueError): + ZstdDecompressor(options={2**1000: 100}) + with self.assertRaises(ValueError): + ZstdDecompressor(options={-(2**31)-1: 100}) + with self.assertRaises(ValueError): + ZstdDecompressor(options={-(2**1000): 100}) + with self.assertRaises(ValueError): + ZstdDecompressor(options={0: 2**32}) + with self.assertRaises(ValueError): + ZstdDecompressor(options={0: -(2**1000)}) with self.assertRaises(ZstdError): - ZstdDecompressor(options={DecompressionParameter.window_log_max:100}) + ZstdDecompressor(options={DecompressionParameter.window_log_max: 100}) with self.assertRaises(ZstdError): - ZstdDecompressor(options={3333 : 100}) + ZstdDecompressor(options={3333: 100}) empty = compress(b'') lzd = ZstdDecompressor() @@ -421,6 +442,20 @@ def test_decompress_parameters(self): r'\((?:32|64)-bit build\)')): decompress(b'', options=options) + # out of bounds deecompression parameter + options[DecompressionParameter.window_log_max] = 2**31 + with self.assertRaises(ValueError): + decompress(b'', options=options) + options[DecompressionParameter.window_log_max] = -(2**32)-1 + with self.assertRaises(ValueError): + decompress(b'', options=options) + options[DecompressionParameter.window_log_max] = 2**1000 + with self.assertRaises(ValueError): + decompress(b'', options=options) + options[DecompressionParameter.window_log_max] = -(2**1000) + with self.assertRaises(ValueError): + decompress(b'', options=options) + def test_unknown_decompression_parameter(self): KEY = 100001234 options = {DecompressionParameter.window_log_max: DecompressionParameter.window_log_max.bounds()[1], @@ -1430,11 +1465,11 @@ def test_init_bad_mode(self): ZstdFile(io.BytesIO(COMPRESSED_100_PLUS_32KB), "rw") with self.assertRaisesRegex(TypeError, - r"NOT be a CompressionParameter"): + r"not be a CompressionParameter"): ZstdFile(io.BytesIO(), 'rb', options={CompressionParameter.compression_level:5}) with self.assertRaisesRegex(TypeError, - r"NOT be a DecompressionParameter"): + r"not be a DecompressionParameter"): ZstdFile(io.BytesIO(), 'wb', options={DecompressionParameter.window_log_max:21}) @@ -1473,7 +1508,7 @@ def test_init_close_fp(self): tmp_f.write(DAT_130K_C) filename = tmp_f.name - with self.assertRaises(ValueError): + with self.assertRaises(TypeError): ZstdFile(filename, options={'a':'b'}) # for PyPy diff --git a/Modules/_zstd/clinic/compressor.c.h b/Modules/_zstd/clinic/compressor.c.h index 4ead29a7bfa541..f69161b590e5b7 100644 --- a/Modules/_zstd/clinic/compressor.c.h +++ b/Modules/_zstd/clinic/compressor.c.h @@ -6,7 +6,6 @@ preserve # include "pycore_gc.h" // PyGC_Head # include "pycore_runtime.h" // _Py_ID() #endif -#include "pycore_abstract.h" // _Py_convert_optional_to_ssize_t() #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() PyDoc_STRVAR(_zstd_ZstdCompressor_new__doc__, @@ -26,7 +25,7 @@ PyDoc_STRVAR(_zstd_ZstdCompressor_new__doc__, "function instead."); static PyObject * -_zstd_ZstdCompressor_new_impl(PyTypeObject *type, Py_ssize_t level, +_zstd_ZstdCompressor_new_impl(PyTypeObject *type, PyObject *level, PyObject *options, PyObject *zstd_dict); static PyObject * @@ -64,7 +63,7 @@ _zstd_ZstdCompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) PyObject * const *fastargs; Py_ssize_t nargs = PyTuple_GET_SIZE(args); Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0; - Py_ssize_t level = PY_SSIZE_T_MIN; + PyObject *level = Py_None; PyObject *options = Py_None; PyObject *zstd_dict = Py_None; @@ -77,9 +76,7 @@ _zstd_ZstdCompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) goto skip_optional_pos; } if (fastargs[0]) { - if (!_Py_convert_optional_to_ssize_t(fastargs[0], &level)) { - goto exit; - } + level = fastargs[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -255,4 +252,4 @@ _zstd_ZstdCompressor_flush(PyObject *self, PyObject *const *args, Py_ssize_t nar exit: return return_value; } -/*[clinic end generated code: output=95b86cea725001df input=a9049054013a1b77]*/ +/*[clinic end generated code: output=ee2d1dc298de790c input=a9049054013a1b77]*/ diff --git a/Modules/_zstd/compressor.c b/Modules/_zstd/compressor.c index ee63e4028d1136..a0d1cfd8af8354 100644 --- a/Modules/_zstd/compressor.c +++ b/Modules/_zstd/compressor.c @@ -49,32 +49,32 @@ typedef struct { #include "clinic/compressor.c.h" static int -_zstd_set_c_level(ZstdCompressor *self, const Py_ssize_t level) +_zstd_set_c_level(ZstdCompressor *self, const int level) { /* Set integer compression level */ - const int min_level = ZSTD_minCLevel(); - const int max_level = ZSTD_maxCLevel(); + int min_level = ZSTD_minCLevel(); + int max_level = ZSTD_maxCLevel(); if (level < min_level || level > max_level) { PyErr_Format(PyExc_ValueError, - "compression level %zd not in valid range %d <= level <= %d.", + "%zd not in valid range %d <= compression level <= %d.", level, min_level, max_level); return -1; } /* Save for generating ZSTD_CDICT */ - self->compression_level = (int)level; + self->compression_level = level; /* Set compressionLevel to compression context */ - const size_t zstd_ret = ZSTD_CCtx_setParameter( - self->cctx, ZSTD_c_compressionLevel, (int)level); + size_t zstd_ret = ZSTD_CCtx_setParameter( + self->cctx, ZSTD_c_compressionLevel, level); /* Check error */ if (ZSTD_isError(zstd_ret)) { - const _zstd_state* const st = PyType_GetModuleState(Py_TYPE(self)); - if (st == NULL) { + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + if (mod_state == NULL) { return -1; } - set_zstd_error(st, ERR_SET_C_LEVEL, zstd_ret); + set_zstd_error(mod_state, ERR_SET_C_LEVEL, zstd_ret); return -1; } return 0; @@ -83,14 +83,14 @@ _zstd_set_c_level(ZstdCompressor *self, const Py_ssize_t level) static int _zstd_set_c_parameters(ZstdCompressor *self, PyObject *options) { - /* Set options dict */ - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); if (mod_state == NULL) { return -1; } if (!PyDict_Check(options)) { - PyErr_Format(PyExc_TypeError, "invalid type for options, expected dict"); + PyErr_Format(PyExc_TypeError, + "invalid type for options, expected dict"); return -1; } @@ -100,32 +100,38 @@ _zstd_set_c_parameters(ZstdCompressor *self, PyObject *options) /* Check key type */ if (Py_TYPE(key) == mod_state->DParameter_type) { PyErr_SetString(PyExc_TypeError, - "key should NOT be DecompressionParameter."); + "compression options dictionary key must not be a " + "DecompressionParameter attribute"); return -1; } - const int key_v = PyLong_AsInt(key); + Py_INCREF(key); + int key_v = PyLong_AsInt(key); if (key_v == -1 && PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "key should be either a " - "CompressionParameter attribute or an int."); + if (PyErr_ExceptionMatches(PyExc_OverflowError)) { + PyErr_SetString(PyExc_ValueError, + "dictionary key must be less than 2**31"); + } return -1; } - // TODO(emmatyping): check bounds when there is a value error here for better - // error message? + Py_INCREF(value); int value_v = PyLong_AsInt(value); if (value_v == -1 && PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "options dict value should be an int."); + if (PyErr_ExceptionMatches(PyExc_OverflowError)) { + PyErr_SetString(PyExc_ValueError, + "dictionary value must be less than 2**31"); + } return -1; } if (key_v == ZSTD_c_compressionLevel) { - /* Save for generating ZSTD_CDICT */ - self->compression_level = value_v; + if (_zstd_set_c_level(self, value_v) < 0) { + return -1; + } + continue; } - else if (key_v == ZSTD_c_nbWorkers) { + if (key_v == ZSTD_c_nbWorkers) { /* From the zstd library docs: 1. When nbWorkers >= 1, triggers asynchronous mode when used with ZSTD_compressStream2(). @@ -138,7 +144,7 @@ _zstd_set_c_parameters(ZstdCompressor *self, PyObject *options) } /* Set parameter to compression context */ - const size_t zstd_ret = ZSTD_CCtx_setParameter(self->cctx, key_v, value_v); + size_t zstd_ret = ZSTD_CCtx_setParameter(self->cctx, key_v, value_v); if (ZSTD_isError(zstd_ret)) { set_parameter_error(mod_state, 1, key_v, value_v); return -1; @@ -323,7 +329,7 @@ _zstd_load_c_dict(ZstdCompressor *self, PyObject *dict) /*[clinic input] @classmethod _zstd.ZstdCompressor.__new__ as _zstd_ZstdCompressor_new - level: Py_ssize_t(c_default='PY_SSIZE_T_MIN', accept={int, NoneType}) = None + level: object = None The compression level to use. Defaults to COMPRESSION_LEVEL_DEFAULT. options: object = None A dict object that contains advanced compression parameters. @@ -337,9 +343,9 @@ function instead. [clinic start generated code]*/ static PyObject * -_zstd_ZstdCompressor_new_impl(PyTypeObject *type, Py_ssize_t level, +_zstd_ZstdCompressor_new_impl(PyTypeObject *type, PyObject *level, PyObject *options, PyObject *zstd_dict) -/*[clinic end generated code: output=a857ec0dc29fc5e2 input=9899740b24d11319]*/ +/*[clinic end generated code: output=cdef61eafecac3d7 input=92de0211ae20ffdc]*/ { ZstdCompressor* self = PyObject_GC_New(ZstdCompressor, type); if (self == NULL) { @@ -364,19 +370,34 @@ _zstd_ZstdCompressor_new_impl(PyTypeObject *type, Py_ssize_t level, /* Last mode */ self->last_mode = ZSTD_e_end; - if (level != PY_SSIZE_T_MIN && options != Py_None) { + if (level != Py_None && options != Py_None) { PyErr_SetString(PyExc_RuntimeError, "Only one of level or options should be used."); goto error; } - /* Set compressLevel/options to compression context */ - if (level != PY_SSIZE_T_MIN) { - if (_zstd_set_c_level(self, level) < 0) { + /* Set compression level */ + if (level != Py_None) { + if (!PyLong_Check(level)) { + PyErr_SetString(PyExc_TypeError, + "invalid type for level, expected int"); + goto error; + } + int level_v = PyLong_AsInt(level); + if (level_v == -1 && PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_OverflowError)) { + PyErr_Format(PyExc_ValueError, + "%zd not in valid range %d <= compression level <= %d.", + level, ZSTD_minCLevel(), ZSTD_maxCLevel()); + } + goto error; + } + if (_zstd_set_c_level(self, level_v) < 0) { goto error; } } + /* Set options dictionary */ if (options != Py_None) { if (_zstd_set_c_parameters(self, options) < 0) { goto error; @@ -693,6 +714,8 @@ PyDoc_STRVAR(ZstdCompressor_last_mode_doc, static PyMemberDef ZstdCompressor_members[] = { {"last_mode", Py_T_INT, offsetof(ZstdCompressor, last_mode), Py_READONLY, ZstdCompressor_last_mode_doc}, + {"compression_level", Py_T_INT, offsetof(ZstdCompressor, compression_level), + Py_READONLY, NULL}, {NULL} }; diff --git a/Modules/_zstd/decompressor.c b/Modules/_zstd/decompressor.c index d084f0847c72dd..72dbad701ac92e 100644 --- a/Modules/_zstd/decompressor.c +++ b/Modules/_zstd/decompressor.c @@ -88,13 +88,9 @@ _get_DDict(ZstdDict *self) return self->d_dict; } -/* Set decompression parameters to decompression context */ static int _zstd_set_d_parameters(ZstdDecompressor *self, PyObject *options) { - size_t zstd_ret; - PyObject *key, *value; - Py_ssize_t pos; _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); if (mod_state == NULL) { return -1; @@ -102,38 +98,43 @@ _zstd_set_d_parameters(ZstdDecompressor *self, PyObject *options) if (!PyDict_Check(options)) { PyErr_SetString(PyExc_TypeError, - "options argument should be dict object."); + "invalid type for options, expected dict"); return -1; } - pos = 0; + Py_ssize_t pos = 0; + PyObject *key, *value; while (PyDict_Next(options, &pos, &key, &value)) { /* Check key type */ if (Py_TYPE(key) == mod_state->CParameter_type) { PyErr_SetString(PyExc_TypeError, - "Key of decompression options dict should " - "NOT be a CompressionParameter attribute."); + "compression options dictionary key must not be a " + "CompressionParameter attribute"); return -1; } - /* Both key & value should be 32-bit signed int */ + Py_INCREF(key); int key_v = PyLong_AsInt(key); if (key_v == -1 && PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Key of options dict should be either a " - "DecompressionParameter attribute or an int."); + if (PyErr_ExceptionMatches(PyExc_OverflowError)) { + PyErr_SetString(PyExc_ValueError, + "dictionary key must be less than 2**31"); + } return -1; } + Py_INCREF(value); int value_v = PyLong_AsInt(value); if (value_v == -1 && PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Value of options dict should be an int."); + if (PyErr_ExceptionMatches(PyExc_OverflowError)) { + PyErr_SetString(PyExc_ValueError, + "dictionary value must be less than 2**31"); + } return -1; } /* Set parameter to compression context */ - zstd_ret = ZSTD_DCtx_setParameter(self->dctx, key_v, value_v); + size_t zstd_ret = ZSTD_DCtx_setParameter(self->dctx, key_v, value_v); /* Check error */ if (ZSTD_isError(zstd_ret)) { @@ -583,7 +584,7 @@ _zstd_ZstdDecompressor_new_impl(PyTypeObject *type, PyObject *zstd_dict, self->dict = zstd_dict; } - /* Set option to decompression context */ + /* Set options dictionary */ if (options != Py_None) { if (_zstd_set_d_parameters(self, options) < 0) { goto error; From 1fb521a4661e2630082e5384851cf8461359ff60 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Sat, 24 May 2025 08:05:38 +0100 Subject: [PATCH 03/10] fixup! Respond to review comments --- Modules/_zstd/compressor.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/Modules/_zstd/compressor.c b/Modules/_zstd/compressor.c index a0d1cfd8af8354..44cf0b06dfb1a7 100644 --- a/Modules/_zstd/compressor.c +++ b/Modules/_zstd/compressor.c @@ -714,8 +714,6 @@ PyDoc_STRVAR(ZstdCompressor_last_mode_doc, static PyMemberDef ZstdCompressor_members[] = { {"last_mode", Py_T_INT, offsetof(ZstdCompressor, last_mode), Py_READONLY, ZstdCompressor_last_mode_doc}, - {"compression_level", Py_T_INT, offsetof(ZstdCompressor, compression_level), - Py_READONLY, NULL}, {NULL} }; From 5b851a201e62f9f2caba612d8485ae2fa13f1209 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Mon, 26 May 2025 09:45:44 +0100 Subject: [PATCH 04/10] Serhiy's review --- Lib/test/test_zstd.py | 69 +++++++++++++++++++++++++----------- Modules/_zstd/compressor.c | 27 +++++++------- Modules/_zstd/decompressor.c | 17 ++++----- 3 files changed, 66 insertions(+), 47 deletions(-) diff --git a/Lib/test/test_zstd.py b/Lib/test/test_zstd.py index 34944c01690fe8..a1ebb3e58d2e59 100644 --- a/Lib/test/test_zstd.py +++ b/Lib/test/test_zstd.py @@ -196,14 +196,31 @@ def test_simple_compress_bad_args(self): self.assertRaises(TypeError, ZstdCompressor, zstd_dict={1: 2, 3: 4}) # valid compression level range is [-(1<<17), 22] - with self.assertRaises(ValueError): + with self.assertRaises(ValueError) as cm: ZstdCompressor(23) - with self.assertRaises(ValueError): + self.assertEqual( + str(cm.exception), + '23 not in valid range -131072 <= compression level <= 22.', + ) + with self.assertRaises(ValueError) as cm: ZstdCompressor(-(1<<17)-1) - with self.assertRaises(ValueError): + self.assertEqual(-(1<<17)-1, -131073) + self.assertEqual( + str(cm.exception), + '-131073 not in valid range -131072 <= compression level <= 22.', + ) + with self.assertRaises(ValueError) as cm: ZstdCompressor(2**31) - with self.assertRaises(ValueError): + self.assertEqual( + str(cm.exception), + 'compression level not in valid range -131072 <= level <= 22.', + ) + with self.assertRaises(ValueError) as cm: ZstdCompressor(level=-(2**1000)) + self.assertEqual( + str(cm.exception), + 'compression level not in valid range -131072 <= level <= 22.', + ) with self.assertRaises(ValueError): ZstdCompressor(level=(2**1000)) @@ -260,10 +277,15 @@ def test_compress_parameters(self): } ZstdCompressor(options=d) - # larger than signed int, ValueError d1 = d.copy() + # larger than signed int d1[CompressionParameter.ldm_bucket_size_log] = 2**31 - self.assertRaises(ValueError, ZstdCompressor, options=d1) + with self.assertRaises(OverflowError): + ZstdCompressor(options=d1) + # smaller than signed int + d1[CompressionParameter.ldm_bucket_size_log] = -(2**31)-1 + with self.assertRaises(OverflowError): + ZstdCompressor(options=d1) # out of bounds compression level level_min, level_max = CompressionParameter.compression_level.bounds() @@ -399,17 +421,17 @@ def test_simple_decompress_bad_args(self): self.assertRaises(TypeError, ZstdDecompressor, options='abc') self.assertRaises(TypeError, ZstdDecompressor, options=b'abc') - with self.assertRaises(ValueError): + with self.assertRaises(OverflowError): ZstdDecompressor(options={2**31: 100}) - with self.assertRaises(ValueError): + with self.assertRaises(OverflowError): ZstdDecompressor(options={2**1000: 100}) - with self.assertRaises(ValueError): + with self.assertRaises(OverflowError): ZstdDecompressor(options={-(2**31)-1: 100}) - with self.assertRaises(ValueError): + with self.assertRaises(OverflowError): ZstdDecompressor(options={-(2**1000): 100}) - with self.assertRaises(ValueError): - ZstdDecompressor(options={0: 2**32}) - with self.assertRaises(ValueError): + with self.assertRaises(OverflowError): + ZstdDecompressor(options={0: 2**31}) + with self.assertRaises(OverflowError): ZstdDecompressor(options={0: -(2**1000)}) with self.assertRaises(ZstdError): @@ -428,10 +450,15 @@ def test_decompress_parameters(self): d = {DecompressionParameter.window_log_max : 15} ZstdDecompressor(options=d) - # larger than signed int, ValueError d1 = d.copy() + # larger than signed int d1[DecompressionParameter.window_log_max] = 2**31 - self.assertRaises(ValueError, ZstdDecompressor, None, d1) + with self.assertRaises(OverflowError): + ZstdDecompressor(None, d1) + # smaller than signed int + d1[DecompressionParameter.window_log_max] = -(2**31)-1 + with self.assertRaises(OverflowError): + ZstdDecompressor(None, d1) # out of bounds error msg options = {DecompressionParameter.window_log_max:100} @@ -443,16 +470,16 @@ def test_decompress_parameters(self): # out of bounds deecompression parameter options[DecompressionParameter.window_log_max] = 2**31 - with self.assertRaises(ValueError): + with self.assertRaises(OverflowError): decompress(b'', options=options) - options[DecompressionParameter.window_log_max] = -(2**32)-1 - with self.assertRaises(ValueError): + options[DecompressionParameter.window_log_max] = -(2**31)-1 + with self.assertRaises(OverflowError): decompress(b'', options=options) options[DecompressionParameter.window_log_max] = 2**1000 - with self.assertRaises(ValueError): + with self.assertRaises(OverflowError): decompress(b'', options=options) options[DecompressionParameter.window_log_max] = -(2**1000) - with self.assertRaises(ValueError): + with self.assertRaises(OverflowError): decompress(b'', options=options) def test_unknown_decompression_parameter(self): @@ -1487,7 +1514,7 @@ def test_init_bad_check(self): with self.assertRaises(TypeError): ZstdFile(io.BytesIO(COMPRESSED_100_PLUS_32KB), "r", options=33) - with self.assertRaises(ValueError): + with self.assertRaises(OverflowError): ZstdFile(io.BytesIO(COMPRESSED_100_PLUS_32KB), options={DecompressionParameter.window_log_max:2**31}) diff --git a/Modules/_zstd/compressor.c b/Modules/_zstd/compressor.c index 44cf0b06dfb1a7..2737d0e668c5f4 100644 --- a/Modules/_zstd/compressor.c +++ b/Modules/_zstd/compressor.c @@ -49,14 +49,14 @@ typedef struct { #include "clinic/compressor.c.h" static int -_zstd_set_c_level(ZstdCompressor *self, const int level) +_zstd_set_c_level(ZstdCompressor *self, int level) { /* Set integer compression level */ int min_level = ZSTD_minCLevel(); int max_level = ZSTD_maxCLevel(); if (level < min_level || level > max_level) { PyErr_Format(PyExc_ValueError, - "%zd not in valid range %d <= compression level <= %d.", + "%d not in valid range %d <= compression level <= %d.", level, min_level, max_level); return -1; } @@ -90,7 +90,8 @@ _zstd_set_c_parameters(ZstdCompressor *self, PyObject *options) if (!PyDict_Check(options)) { PyErr_Format(PyExc_TypeError, - "invalid type for options, expected dict"); + "ZstdCompressor() argument 'options' must be dict, not %T", + options); return -1; } @@ -106,22 +107,16 @@ _zstd_set_c_parameters(ZstdCompressor *self, PyObject *options) } Py_INCREF(key); + Py_INCREF(value); int key_v = PyLong_AsInt(key); + Py_DECREF(key); if (key_v == -1 && PyErr_Occurred()) { - if (PyErr_ExceptionMatches(PyExc_OverflowError)) { - PyErr_SetString(PyExc_ValueError, - "dictionary key must be less than 2**31"); - } return -1; } - Py_INCREF(value); int value_v = PyLong_AsInt(value); + Py_DECREF(value); if (value_v == -1 && PyErr_Occurred()) { - if (PyErr_ExceptionMatches(PyExc_OverflowError)) { - PyErr_SetString(PyExc_ValueError, - "dictionary value must be less than 2**31"); - } return -1; } @@ -145,6 +140,8 @@ _zstd_set_c_parameters(ZstdCompressor *self, PyObject *options) /* Set parameter to compression context */ size_t zstd_ret = ZSTD_CCtx_setParameter(self->cctx, key_v, value_v); + + /* Check error */ if (ZSTD_isError(zstd_ret)) { set_parameter_error(mod_state, 1, key_v, value_v); return -1; @@ -371,7 +368,7 @@ _zstd_ZstdCompressor_new_impl(PyTypeObject *type, PyObject *level, self->last_mode = ZSTD_e_end; if (level != Py_None && options != Py_None) { - PyErr_SetString(PyExc_RuntimeError, + PyErr_SetString(PyExc_TypeError, "Only one of level or options should be used."); goto error; } @@ -387,8 +384,8 @@ _zstd_ZstdCompressor_new_impl(PyTypeObject *type, PyObject *level, if (level_v == -1 && PyErr_Occurred()) { if (PyErr_ExceptionMatches(PyExc_OverflowError)) { PyErr_Format(PyExc_ValueError, - "%zd not in valid range %d <= compression level <= %d.", - level, ZSTD_minCLevel(), ZSTD_maxCLevel()); + "compression level not in valid range %d <= level <= %d.", + ZSTD_minCLevel(), ZSTD_maxCLevel()); } goto error; } diff --git a/Modules/_zstd/decompressor.c b/Modules/_zstd/decompressor.c index 72dbad701ac92e..a1183c74c1408d 100644 --- a/Modules/_zstd/decompressor.c +++ b/Modules/_zstd/decompressor.c @@ -97,8 +97,9 @@ _zstd_set_d_parameters(ZstdDecompressor *self, PyObject *options) } if (!PyDict_Check(options)) { - PyErr_SetString(PyExc_TypeError, - "invalid type for options, expected dict"); + PyErr_Format(PyExc_TypeError, + "ZstdDecompressor() argument 'options' must be dict, not %T", + options); return -1; } @@ -114,22 +115,16 @@ _zstd_set_d_parameters(ZstdDecompressor *self, PyObject *options) } Py_INCREF(key); + Py_INCREF(value); int key_v = PyLong_AsInt(key); + Py_DECREF(key); if (key_v == -1 && PyErr_Occurred()) { - if (PyErr_ExceptionMatches(PyExc_OverflowError)) { - PyErr_SetString(PyExc_ValueError, - "dictionary key must be less than 2**31"); - } return -1; } - Py_INCREF(value); int value_v = PyLong_AsInt(value); + Py_DECREF(value); if (value_v == -1 && PyErr_Occurred()) { - if (PyErr_ExceptionMatches(PyExc_OverflowError)) { - PyErr_SetString(PyExc_ValueError, - "dictionary value must be less than 2**31"); - } return -1; } From 36a45171b29bcf5c93f133783c47235a1aa9cf5f Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Mon, 26 May 2025 16:09:49 +0100 Subject: [PATCH 05/10] Serhiy's review --- Lib/test/test_zstd.py | 117 ++++++++++++++++------------------- Modules/_zstd/_zstdmodule.c | 24 +++---- Modules/_zstd/_zstdmodule.h | 3 +- Modules/_zstd/compressor.c | 7 ++- Modules/_zstd/decompressor.c | 2 +- 5 files changed, 66 insertions(+), 87 deletions(-) diff --git a/Lib/test/test_zstd.py b/Lib/test/test_zstd.py index a1ebb3e58d2e59..fbab7661075fc5 100644 --- a/Lib/test/test_zstd.py +++ b/Lib/test/test_zstd.py @@ -64,6 +64,10 @@ SUPPORT_MULTITHREADING = False +C_INT_MIN = -(2**31) +C_INT_MAX = (2**31) - 1 + + def setUpModule(): global SUPPORT_MULTITHREADING SUPPORT_MULTITHREADING = CompressionParameter.nb_workers.bounds() != (0, 0) @@ -195,38 +199,21 @@ def test_simple_compress_bad_args(self): self.assertRaises(TypeError, ZstdCompressor, zstd_dict=b"abcd1234") self.assertRaises(TypeError, ZstdCompressor, zstd_dict={1: 2, 3: 4}) - # valid compression level range is [-(1<<17), 22] - with self.assertRaises(ValueError) as cm: - ZstdCompressor(23) - self.assertEqual( - str(cm.exception), - '23 not in valid range -131072 <= compression level <= 22.', - ) - with self.assertRaises(ValueError) as cm: - ZstdCompressor(-(1<<17)-1) - self.assertEqual(-(1<<17)-1, -131073) - self.assertEqual( - str(cm.exception), - '-131073 not in valid range -131072 <= compression level <= 22.', - ) - with self.assertRaises(ValueError) as cm: - ZstdCompressor(2**31) - self.assertEqual( - str(cm.exception), - 'compression level not in valid range -131072 <= level <= 22.', - ) - with self.assertRaises(ValueError) as cm: + # valid range for compression level is [-(1<<17), 22] + msg = '{} not in valid range -131072 <= compression level <= 22' + with self.assertRaisesRegex(ValueError, msg.format(C_INT_MAX)): + ZstdCompressor(C_INT_MAX) + with self.assertRaisesRegex(ValueError, msg.format(C_INT_MIN)): + ZstdCompressor(C_INT_MIN) + msg = 'compression level not in valid range -131072 <= level <= 22' + with self.assertRaisesRegex(ValueError, msg): ZstdCompressor(level=-(2**1000)) - self.assertEqual( - str(cm.exception), - 'compression level not in valid range -131072 <= level <= 22.', - ) - with self.assertRaises(ValueError): + with self.assertRaisesRegex(ValueError, msg): ZstdCompressor(level=(2**1000)) - with self.assertRaises(ZstdError): + with self.assertRaises(ValueError): ZstdCompressor(options={CompressionParameter.window_log: 100}) - with self.assertRaises(ZstdError): + with self.assertRaises(ValueError): ZstdCompressor(options={3333: 100}) # Method bad arguments @@ -279,12 +266,12 @@ def test_compress_parameters(self): d1 = d.copy() # larger than signed int - d1[CompressionParameter.ldm_bucket_size_log] = 2**31 - with self.assertRaises(OverflowError): + d1[CompressionParameter.ldm_bucket_size_log] = C_INT_MAX + with self.assertRaises(ValueError): ZstdCompressor(options=d1) # smaller than signed int - d1[CompressionParameter.ldm_bucket_size_log] = -(2**31)-1 - with self.assertRaises(OverflowError): + d1[CompressionParameter.ldm_bucket_size_log] = C_INT_MIN + with self.assertRaises(ValueError): ZstdCompressor(options=d1) # out of bounds compression level @@ -315,19 +302,19 @@ def test_compress_parameters(self): # out of bounds error msg option = {CompressionParameter.window_log:100} - with self.assertRaisesRegex(ZstdError, - (r'Error when setting zstd compression parameter "window_log", ' - r'it should \d+ <= value <= \d+, provided value is 100\. ' - r'\((?:32|64)-bit build\)')): + with self.assertRaisesRegex( + ValueError, + r"100 not in valid range \d+ <= value <= \d+ for compression " + r"parameter 'window_log'", + ): compress(b'', options=option) def test_unknown_compression_parameter(self): KEY = 100001234 option = {CompressionParameter.compression_level: 10, KEY: 200000000} - pattern = (r'Invalid zstd compression parameter.*?' - fr'"unknown parameter \(key {KEY}\)"') - with self.assertRaisesRegex(ZstdError, pattern): + pattern = rf"invalid compression parameter 'unknown parameter \(key {KEY}\)'" + with self.assertRaisesRegex(ValueError, pattern): ZstdCompressor(options=option) @unittest.skipIf(not SUPPORT_MULTITHREADING, @@ -421,22 +408,22 @@ def test_simple_decompress_bad_args(self): self.assertRaises(TypeError, ZstdDecompressor, options='abc') self.assertRaises(TypeError, ZstdDecompressor, options=b'abc') - with self.assertRaises(OverflowError): - ZstdDecompressor(options={2**31: 100}) + with self.assertRaises(ValueError): + ZstdDecompressor(options={C_INT_MAX: 100}) + with self.assertRaises(ValueError): + ZstdDecompressor(options={C_INT_MIN: 100}) + with self.assertRaises(ValueError): + ZstdDecompressor(options={0: C_INT_MAX}) with self.assertRaises(OverflowError): ZstdDecompressor(options={2**1000: 100}) - with self.assertRaises(OverflowError): - ZstdDecompressor(options={-(2**31)-1: 100}) with self.assertRaises(OverflowError): ZstdDecompressor(options={-(2**1000): 100}) - with self.assertRaises(OverflowError): - ZstdDecompressor(options={0: 2**31}) with self.assertRaises(OverflowError): ZstdDecompressor(options={0: -(2**1000)}) - with self.assertRaises(ZstdError): + with self.assertRaises(ValueError): ZstdDecompressor(options={DecompressionParameter.window_log_max: 100}) - with self.assertRaises(ZstdError): + with self.assertRaises(ValueError): ZstdDecompressor(options={3333: 100}) empty = compress(b'') @@ -452,28 +439,29 @@ def test_decompress_parameters(self): d1 = d.copy() # larger than signed int - d1[DecompressionParameter.window_log_max] = 2**31 - with self.assertRaises(OverflowError): + d1[DecompressionParameter.window_log_max] = C_INT_MAX + with self.assertRaises(ValueError): ZstdDecompressor(None, d1) # smaller than signed int - d1[DecompressionParameter.window_log_max] = -(2**31)-1 - with self.assertRaises(OverflowError): + d1[DecompressionParameter.window_log_max] = C_INT_MIN + with self.assertRaises(ValueError): ZstdDecompressor(None, d1) # out of bounds error msg options = {DecompressionParameter.window_log_max:100} - with self.assertRaisesRegex(ZstdError, - (r'Error when setting zstd decompression parameter "window_log_max", ' - r'it should \d+ <= value <= \d+, provided value is 100\. ' - r'\((?:32|64)-bit build\)')): + with self.assertRaisesRegex( + ValueError, + r"100 not in valid range \d+ <= value <= \d+ for decompression " + r"parameter 'window_log_max'", + ): decompress(b'', options=options) # out of bounds deecompression parameter - options[DecompressionParameter.window_log_max] = 2**31 - with self.assertRaises(OverflowError): + options[DecompressionParameter.window_log_max] = C_INT_MAX + with self.assertRaises(ValueError): decompress(b'', options=options) - options[DecompressionParameter.window_log_max] = -(2**31)-1 - with self.assertRaises(OverflowError): + options[DecompressionParameter.window_log_max] = C_INT_MIN + with self.assertRaises(ValueError): decompress(b'', options=options) options[DecompressionParameter.window_log_max] = 2**1000 with self.assertRaises(OverflowError): @@ -486,9 +474,8 @@ def test_unknown_decompression_parameter(self): KEY = 100001234 options = {DecompressionParameter.window_log_max: DecompressionParameter.window_log_max.bounds()[1], KEY: 200000000} - pattern = (r'Invalid zstd decompression parameter.*?' - fr'"unknown parameter \(key {KEY}\)"') - with self.assertRaisesRegex(ZstdError, pattern): + pattern = rf"invalid decompression parameter 'unknown parameter \(key {KEY}\)'" + with self.assertRaisesRegex(ValueError, pattern): ZstdDecompressor(options=options) def test_decompress_epilogue_flags(self): @@ -1506,9 +1493,9 @@ def test_init_bad_check(self): with self.assertRaises(TypeError): ZstdFile(io.BytesIO(), "w", level='asd') # CHECK_UNKNOWN and anything above CHECK_ID_MAX should be invalid. - with self.assertRaises(ZstdError): + with self.assertRaises(ValueError): ZstdFile(io.BytesIO(), "w", options={999:9999}) - with self.assertRaises(ZstdError): + with self.assertRaises(ValueError): ZstdFile(io.BytesIO(), "w", options={CompressionParameter.window_log:99}) with self.assertRaises(TypeError): @@ -1518,7 +1505,7 @@ def test_init_bad_check(self): ZstdFile(io.BytesIO(COMPRESSED_100_PLUS_32KB), options={DecompressionParameter.window_log_max:2**31}) - with self.assertRaises(ZstdError): + with self.assertRaises(ValueError): ZstdFile(io.BytesIO(COMPRESSED_100_PLUS_32KB), options={444:333}) diff --git a/Modules/_zstd/_zstdmodule.c b/Modules/_zstd/_zstdmodule.c index 56ad999e5cd4e4..aed0ed6b8258c9 100644 --- a/Modules/_zstd/_zstdmodule.c +++ b/Modules/_zstd/_zstdmodule.c @@ -103,16 +103,13 @@ static const ParameterInfo dp_list[] = { }; void -set_parameter_error(const _zstd_state* const state, int is_compress, - int key_v, int value_v) +set_parameter_error(int is_compress, int key_v, int value_v) { ParameterInfo const *list; int list_size; - char const *name; char *type; ZSTD_bounds bounds; - int i; - char pos_msg[128]; + char pos_msg[64]; if (is_compress) { list = cp_list; @@ -126,8 +123,8 @@ set_parameter_error(const _zstd_state* const state, int is_compress, } /* Find parameter's name */ - name = NULL; - for (i = 0; i < list_size; i++) { + char const *name = NULL; + for (int i = 0; i < list_size; i++) { if (key_v == (list+i)->parameter) { name = (list+i)->parameter_name; break; @@ -149,20 +146,15 @@ set_parameter_error(const _zstd_state* const state, int is_compress, bounds = ZSTD_dParam_getBounds(key_v); } if (ZSTD_isError(bounds.error)) { - PyErr_Format(state->ZstdError, - "Invalid zstd %s parameter \"%s\".", + PyErr_Format(PyExc_ValueError, "invalid %s parameter '%s'", type, name); return; } /* Error message */ - PyErr_Format(state->ZstdError, - "Error when setting zstd %s parameter \"%s\", it " - "should %d <= value <= %d, provided value is %d. " - "(%d-bit build)", - type, name, - bounds.lowerBound, bounds.upperBound, value_v, - 8*(int)sizeof(Py_ssize_t)); + PyErr_Format(PyExc_ValueError, + "%d not in valid range %d <= value <= %d for %s parameter '%s'", + value_v, bounds.lowerBound, bounds.upperBound, type, name); } static inline _zstd_state* diff --git a/Modules/_zstd/_zstdmodule.h b/Modules/_zstd/_zstdmodule.h index b36486442c6567..1f4160f474f0b0 100644 --- a/Modules/_zstd/_zstdmodule.h +++ b/Modules/_zstd/_zstdmodule.h @@ -49,7 +49,6 @@ set_zstd_error(const _zstd_state* const state, const error_type type, size_t zstd_ret); extern void -set_parameter_error(const _zstd_state* const state, int is_compress, - int key_v, int value_v); +set_parameter_error(int is_compress, int key_v, int value_v); #endif // !ZSTD_MODULE_H diff --git a/Modules/_zstd/compressor.c b/Modules/_zstd/compressor.c index 563142f7b89760..a9163fbeaa53f0 100644 --- a/Modules/_zstd/compressor.c +++ b/Modules/_zstd/compressor.c @@ -56,7 +56,7 @@ _zstd_set_c_level(ZstdCompressor *self, int level) int max_level = ZSTD_maxCLevel(); if (level < min_level || level > max_level) { PyErr_Format(PyExc_ValueError, - "%d not in valid range %d <= compression level <= %d.", + "%d not in valid range %d <= compression level <= %d", level, min_level, max_level); return -1; } @@ -111,6 +111,7 @@ _zstd_set_c_parameters(ZstdCompressor *self, PyObject *options) int key_v = PyLong_AsInt(key); Py_DECREF(key); if (key_v == -1 && PyErr_Occurred()) { + Py_DECREF(value); return -1; } @@ -143,7 +144,7 @@ _zstd_set_c_parameters(ZstdCompressor *self, PyObject *options) /* Check error */ if (ZSTD_isError(zstd_ret)) { - set_parameter_error(mod_state, 1, key_v, value_v); + set_parameter_error(1, key_v, value_v); return -1; } } @@ -377,7 +378,7 @@ _zstd_ZstdCompressor_new_impl(PyTypeObject *type, PyObject *level, if (level_v == -1 && PyErr_Occurred()) { if (PyErr_ExceptionMatches(PyExc_OverflowError)) { PyErr_Format(PyExc_ValueError, - "compression level not in valid range %d <= level <= %d.", + "compression level not in valid range %d <= level <= %d", ZSTD_minCLevel(), ZSTD_maxCLevel()); } goto error; diff --git a/Modules/_zstd/decompressor.c b/Modules/_zstd/decompressor.c index 1f6050233b5e62..26e568cf433308 100644 --- a/Modules/_zstd/decompressor.c +++ b/Modules/_zstd/decompressor.c @@ -131,7 +131,7 @@ _zstd_set_d_parameters(ZstdDecompressor *self, PyObject *options) /* Check error */ if (ZSTD_isError(zstd_ret)) { - set_parameter_error(mod_state, 0, key_v, value_v); + set_parameter_error(0, key_v, value_v); return -1; } } From 4b2b44206468c98eb8442f1ae37fe409c40a5c33 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Tue, 27 May 2025 02:32:58 +0100 Subject: [PATCH 06/10] error messages --- Lib/test/test_zstd.py | 10 +++++----- Modules/_zstd/_zstdmodule.c | 5 +++-- Modules/_zstd/compressor.c | 2 +- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/Lib/test/test_zstd.py b/Lib/test/test_zstd.py index fbab7661075fc5..e9ed31c18606b2 100644 --- a/Lib/test/test_zstd.py +++ b/Lib/test/test_zstd.py @@ -200,7 +200,7 @@ def test_simple_compress_bad_args(self): self.assertRaises(TypeError, ZstdCompressor, zstd_dict={1: 2, 3: 4}) # valid range for compression level is [-(1<<17), 22] - msg = '{} not in valid range -131072 <= compression level <= 22' + msg = 'compression level {} not in valid range -131072 <= level <= 22' with self.assertRaisesRegex(ValueError, msg.format(C_INT_MAX)): ZstdCompressor(C_INT_MAX) with self.assertRaisesRegex(ValueError, msg.format(C_INT_MIN)): @@ -304,8 +304,8 @@ def test_compress_parameters(self): option = {CompressionParameter.window_log:100} with self.assertRaisesRegex( ValueError, - r"100 not in valid range \d+ <= value <= \d+ for compression " - r"parameter 'window_log'", + "compression parameter 'window_log' received an illegal value 100; " + r'the valid range is \d+ <= value <= \d+', ): compress(b'', options=option) @@ -451,8 +451,8 @@ def test_decompress_parameters(self): options = {DecompressionParameter.window_log_max:100} with self.assertRaisesRegex( ValueError, - r"100 not in valid range \d+ <= value <= \d+ for decompression " - r"parameter 'window_log_max'", + "decompression parameter 'window_log_max' received an illegal value 100; " + r'the valid range is \d+ <= value <= \d+', ): decompress(b'', options=options) diff --git a/Modules/_zstd/_zstdmodule.c b/Modules/_zstd/_zstdmodule.c index aed0ed6b8258c9..68ca958dcf4f1f 100644 --- a/Modules/_zstd/_zstdmodule.c +++ b/Modules/_zstd/_zstdmodule.c @@ -153,8 +153,9 @@ set_parameter_error(int is_compress, int key_v, int value_v) /* Error message */ PyErr_Format(PyExc_ValueError, - "%d not in valid range %d <= value <= %d for %s parameter '%s'", - value_v, bounds.lowerBound, bounds.upperBound, type, name); + "%s parameter '%s' received an illegal value %d; " + "the valid range is %d <= value <= %d", + type, name, value_v, bounds.lowerBound, bounds.upperBound); } static inline _zstd_state* diff --git a/Modules/_zstd/compressor.c b/Modules/_zstd/compressor.c index a9163fbeaa53f0..b8e36b0737ca86 100644 --- a/Modules/_zstd/compressor.c +++ b/Modules/_zstd/compressor.c @@ -56,7 +56,7 @@ _zstd_set_c_level(ZstdCompressor *self, int level) int max_level = ZSTD_maxCLevel(); if (level < min_level || level > max_level) { PyErr_Format(PyExc_ValueError, - "%d not in valid range %d <= compression level <= %d", + "compression level %d not in valid range %d <= level <= %d", level, min_level, max_level); return -1; } From 62d085e50b7d12f181cb6d3b982149dc29ae2422 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Wed, 28 May 2025 13:36:04 +0100 Subject: [PATCH 07/10] restore out-of-bounds tests & improve range notation --- Lib/test/test_zstd.py | 19 +++++++++++++------ Modules/_zstd/_zstdmodule.c | 2 +- Modules/_zstd/compressor.c | 4 ++-- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/Lib/test/test_zstd.py b/Lib/test/test_zstd.py index e9ed31c18606b2..7c570639807c89 100644 --- a/Lib/test/test_zstd.py +++ b/Lib/test/test_zstd.py @@ -200,16 +200,16 @@ def test_simple_compress_bad_args(self): self.assertRaises(TypeError, ZstdCompressor, zstd_dict={1: 2, 3: 4}) # valid range for compression level is [-(1<<17), 22] - msg = 'compression level {} not in valid range -131072 <= level <= 22' + msg = 'illegal compression level {}; the valid range is [-131072, 22]' with self.assertRaisesRegex(ValueError, msg.format(C_INT_MAX)): ZstdCompressor(C_INT_MAX) with self.assertRaisesRegex(ValueError, msg.format(C_INT_MIN)): ZstdCompressor(C_INT_MIN) - msg = 'compression level not in valid range -131072 <= level <= 22' + msg = 'illegal compression level; the valid range is [-131072, 22]' with self.assertRaisesRegex(ValueError, msg): ZstdCompressor(level=-(2**1000)) with self.assertRaisesRegex(ValueError, msg): - ZstdCompressor(level=(2**1000)) + ZstdCompressor(level=2**1000) with self.assertRaises(ValueError): ZstdCompressor(options={CompressionParameter.window_log: 100}) @@ -305,7 +305,7 @@ def test_compress_parameters(self): with self.assertRaisesRegex( ValueError, "compression parameter 'window_log' received an illegal value 100; " - r'the valid range is \d+ <= value <= \d+', + r'the valid range is \[\d+, \d+\]', ): compress(b'', options=option) @@ -439,10 +439,17 @@ def test_decompress_parameters(self): d1 = d.copy() # larger than signed int + d1[DecompressionParameter.window_log_max] = 2**1000 + with self.assertRaises(OverflowError): + ZstdDecompressor(None, d1) + # smaller than signed int + d1[DecompressionParameter.window_log_max] = -(2**1000) + with self.assertRaises(OverflowError): + ZstdDecompressor(None, d1) + d1[DecompressionParameter.window_log_max] = C_INT_MAX with self.assertRaises(ValueError): ZstdDecompressor(None, d1) - # smaller than signed int d1[DecompressionParameter.window_log_max] = C_INT_MIN with self.assertRaises(ValueError): ZstdDecompressor(None, d1) @@ -452,7 +459,7 @@ def test_decompress_parameters(self): with self.assertRaisesRegex( ValueError, "decompression parameter 'window_log_max' received an illegal value 100; " - r'the valid range is \d+ <= value <= \d+', + r'the valid range is \[\d+, \d+\]', ): decompress(b'', options=options) diff --git a/Modules/_zstd/_zstdmodule.c b/Modules/_zstd/_zstdmodule.c index 68ca958dcf4f1f..5ad697d2b83dd6 100644 --- a/Modules/_zstd/_zstdmodule.c +++ b/Modules/_zstd/_zstdmodule.c @@ -154,7 +154,7 @@ set_parameter_error(int is_compress, int key_v, int value_v) /* Error message */ PyErr_Format(PyExc_ValueError, "%s parameter '%s' received an illegal value %d; " - "the valid range is %d <= value <= %d", + "the valid range is [%d, %d]", type, name, value_v, bounds.lowerBound, bounds.upperBound); } diff --git a/Modules/_zstd/compressor.c b/Modules/_zstd/compressor.c index b8e36b0737ca86..0fc3d7d36c68fe 100644 --- a/Modules/_zstd/compressor.c +++ b/Modules/_zstd/compressor.c @@ -56,7 +56,7 @@ _zstd_set_c_level(ZstdCompressor *self, int level) int max_level = ZSTD_maxCLevel(); if (level < min_level || level > max_level) { PyErr_Format(PyExc_ValueError, - "compression level %d not in valid range %d <= level <= %d", + "illegal compression level %d; the valid range is [%d, %d]", level, min_level, max_level); return -1; } @@ -378,7 +378,7 @@ _zstd_ZstdCompressor_new_impl(PyTypeObject *type, PyObject *level, if (level_v == -1 && PyErr_Occurred()) { if (PyErr_ExceptionMatches(PyExc_OverflowError)) { PyErr_Format(PyExc_ValueError, - "compression level not in valid range %d <= level <= %d", + "illegal compression level; the valid range is [%d, %d]", ZSTD_minCLevel(), ZSTD_maxCLevel()); } goto error; From 1e62fd2ed93da152adeb1caa42b2d6cfa73eace9 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Wed, 28 May 2025 14:12:21 +0100 Subject: [PATCH 08/10] fix bad pattern --- Lib/test/test_zstd.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_zstd.py b/Lib/test/test_zstd.py index 7c570639807c89..9e30e122762b60 100644 --- a/Lib/test/test_zstd.py +++ b/Lib/test/test_zstd.py @@ -200,12 +200,12 @@ def test_simple_compress_bad_args(self): self.assertRaises(TypeError, ZstdCompressor, zstd_dict={1: 2, 3: 4}) # valid range for compression level is [-(1<<17), 22] - msg = 'illegal compression level {}; the valid range is [-131072, 22]' + msg = r'illegal compression level {}; the valid range is \[-131072, 22\]' with self.assertRaisesRegex(ValueError, msg.format(C_INT_MAX)): ZstdCompressor(C_INT_MAX) with self.assertRaisesRegex(ValueError, msg.format(C_INT_MIN)): ZstdCompressor(C_INT_MIN) - msg = 'illegal compression level; the valid range is [-131072, 22]' + msg = r'illegal compression level; the valid range is \[-131072, 22\]' with self.assertRaisesRegex(ValueError, msg): ZstdCompressor(level=-(2**1000)) with self.assertRaisesRegex(ValueError, msg): From 0b3745492b6e5e8d987a71a185cb162360380107 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Wed, 28 May 2025 14:13:39 +0100 Subject: [PATCH 09/10] Apply suggestion Co-authored-by: Serhiy Storchaka --- Lib/test/test_zstd.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_zstd.py b/Lib/test/test_zstd.py index 9e30e122762b60..8a91a182e0db4c 100644 --- a/Lib/test/test_zstd.py +++ b/Lib/test/test_zstd.py @@ -200,12 +200,12 @@ def test_simple_compress_bad_args(self): self.assertRaises(TypeError, ZstdCompressor, zstd_dict={1: 2, 3: 4}) # valid range for compression level is [-(1<<17), 22] - msg = r'illegal compression level {}; the valid range is \[-131072, 22\]' + msg = r'illegal compression level {}; the valid range is \[\d+, \d+\]' with self.assertRaisesRegex(ValueError, msg.format(C_INT_MAX)): ZstdCompressor(C_INT_MAX) with self.assertRaisesRegex(ValueError, msg.format(C_INT_MIN)): ZstdCompressor(C_INT_MIN) - msg = r'illegal compression level; the valid range is \[-131072, 22\]' + msg = r'illegal compression level; the valid range is \[\d+, \d+\]' with self.assertRaisesRegex(ValueError, msg): ZstdCompressor(level=-(2**1000)) with self.assertRaisesRegex(ValueError, msg): From c877a4fcd1d913625d704b076845f4f516acfd70 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Wed, 28 May 2025 15:18:51 +0100 Subject: [PATCH 10/10] negative --- Lib/test/test_zstd.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_zstd.py b/Lib/test/test_zstd.py index 8a91a182e0db4c..014634e450e449 100644 --- a/Lib/test/test_zstd.py +++ b/Lib/test/test_zstd.py @@ -200,12 +200,12 @@ def test_simple_compress_bad_args(self): self.assertRaises(TypeError, ZstdCompressor, zstd_dict={1: 2, 3: 4}) # valid range for compression level is [-(1<<17), 22] - msg = r'illegal compression level {}; the valid range is \[\d+, \d+\]' + msg = r'illegal compression level {}; the valid range is \[-?\d+, -?\d+\]' with self.assertRaisesRegex(ValueError, msg.format(C_INT_MAX)): ZstdCompressor(C_INT_MAX) with self.assertRaisesRegex(ValueError, msg.format(C_INT_MIN)): ZstdCompressor(C_INT_MIN) - msg = r'illegal compression level; the valid range is \[\d+, \d+\]' + msg = r'illegal compression level; the valid range is \[-?\d+, -?\d+\]' with self.assertRaisesRegex(ValueError, msg): ZstdCompressor(level=-(2**1000)) with self.assertRaisesRegex(ValueError, msg): @@ -305,7 +305,7 @@ def test_compress_parameters(self): with self.assertRaisesRegex( ValueError, "compression parameter 'window_log' received an illegal value 100; " - r'the valid range is \[\d+, \d+\]', + r'the valid range is \[-?\d+, -?\d+\]', ): compress(b'', options=option) @@ -459,7 +459,7 @@ def test_decompress_parameters(self): with self.assertRaisesRegex( ValueError, "decompression parameter 'window_log_max' received an illegal value 100; " - r'the valid range is \[\d+, \d+\]', + r'the valid range is \[-?\d+, -?\d+\]', ): decompress(b'', options=options)