From 887d6e57cc5dc0d215a8aa9727651ef225b03675 Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Fri, 24 Feb 2023 15:14:03 -0700 Subject: [PATCH 1/7] Handle possible NULL pointers in the array buffer --- stringdtype/stringdtype/src/casts.c | 22 ++++++++++++++++----- stringdtype/stringdtype/src/dtype.c | 16 +++++++++++++-- stringdtype/stringdtype/src/static_string.c | 19 +++++++++++++++++- stringdtype/stringdtype/src/static_string.h | 9 ++++++++- stringdtype/stringdtype/src/umath.c | 13 ++++++++++-- stringdtype/tests/test_stringdtype.py | 13 ++++++++++++ 6 files changed, 81 insertions(+), 11 deletions(-) diff --git a/stringdtype/stringdtype/src/casts.c b/stringdtype/stringdtype/src/casts.c index 8ad331ce..21900574 100644 --- a/stringdtype/stringdtype/src/casts.c +++ b/stringdtype/stringdtype/src/casts.c @@ -53,11 +53,17 @@ string_to_string(PyArrayMethod_Context *context, char *const data[], npy_intp out_stride = strides[1] / context->descriptors[1]->elsize; while (N--) { - out[0] = ssdup(in[0]); + ss *s = empty_if_null(in); + out[0] = ssdup(s); if (out[0] == NULL) { gil_error(PyExc_MemoryError, "ssdup failed"); return -1; } + + if (*in == NULL) { + free(s); + } + in += in_stride; out += out_stride; } @@ -217,9 +223,9 @@ unicode_to_string(PyArrayMethod_Context *context, char *const data[], gil_error(PyExc_TypeError, "Invalid unicode code point found"); return -1; } - ss *out_ss = ssnewempty(out_num_bytes); + ss *out_ss = ssnewemptylen(out_num_bytes); if (out_ss == NULL) { - gil_error(PyExc_MemoryError, "ssnewempty failed"); + gil_error(PyExc_MemoryError, "ssnewemptylen failed"); return -1; } char *out_buf = out_ss->buf; @@ -340,8 +346,9 @@ string_to_unicode(PyArrayMethod_Context *context, char *const data[], long max_out_size = (context->descriptors[1]->elsize) / 4; while (N--) { - unsigned char *this_string = (unsigned char *)((*in)->buf); - size_t n_bytes = (*in)->len; + ss *s = empty_if_null(in); + unsigned char *this_string = (unsigned char *)(s->buf); + size_t n_bytes = s->len; size_t tot_n_bytes = 0; for (int i = 0; i < max_out_size; i++) { @@ -363,6 +370,11 @@ string_to_unicode(PyArrayMethod_Context *context, char *const data[], break; } } + + if (*in == NULL) { + free(s); + } + in += in_stride; out += out_stride; } diff --git a/stringdtype/stringdtype/src/dtype.c b/stringdtype/stringdtype/src/dtype.c index ff7aafc1..9174d8f3 100644 --- a/stringdtype/stringdtype/src/dtype.c +++ b/stringdtype/stringdtype/src/dtype.c @@ -136,9 +136,21 @@ stringdtype_setitem(StringDTypeObject *NPY_UNUSED(descr), PyObject *obj, } static PyObject * -stringdtype_getitem(StringDTypeObject *descr, char **dataptr) +stringdtype_getitem(StringDTypeObject *NPY_UNUSED(descr), char **dataptr) { - PyObject *val_obj = PyUnicode_FromString(((ss *)*dataptr)->buf); + char *data; + + // in the future this could represent missing data too, but we'd + // need to make it so np.empty and np.zeros take their initial value + // from an API hook that doesn't exist yet + if (*dataptr == NULL) { + data = "\0"; + } + else { + data = ((ss *)*dataptr)->buf; + } + + PyObject *val_obj = PyUnicode_FromString(data); if (val_obj == NULL) { return NULL; diff --git a/stringdtype/stringdtype/src/static_string.c b/stringdtype/stringdtype/src/static_string.c index 0defdfea..17c9a5bf 100644 --- a/stringdtype/stringdtype/src/static_string.c +++ b/stringdtype/stringdtype/src/static_string.c @@ -33,9 +33,26 @@ ssdup(const ss *s) // does not do any initialization, the caller must // initialize and null-terminate the string ss * -ssnewempty(size_t len) +ssnewemptylen(size_t len) { ss *ret = (ss *)malloc(sizeof(ss) + sizeof(char) * (len + 1)); ret->len = len; return ret; } + +ss * +ssempty(void) +{ + return ssnewlen("", 0); +} + +ss * +empty_if_null(ss **data) +{ + if (*data == NULL) { + return ssempty(); + } + else { + return *data; + } +} diff --git a/stringdtype/stringdtype/src/static_string.h b/stringdtype/stringdtype/src/static_string.h index ee4fc4fa..76af3875 100644 --- a/stringdtype/stringdtype/src/static_string.h +++ b/stringdtype/stringdtype/src/static_string.h @@ -21,6 +21,13 @@ ssdup(const ss *s); // does not do any initialization, the caller must // initialize and null-terminate the string ss * -ssnewempty(size_t len); +ssnewemptylen(size_t len); + +// returns an new heap-allocated empty string +ss * +ssnewempty(void); + +ss * +empty_if_null(ss **data); #endif /*_NPY_STATIC_STRING_H */ diff --git a/stringdtype/stringdtype/src/umath.c b/stringdtype/stringdtype/src/umath.c index 7ef8dc12..48a42508 100644 --- a/stringdtype/stringdtype/src/umath.c +++ b/stringdtype/stringdtype/src/umath.c @@ -30,8 +30,8 @@ string_equal_strided_loop(PyArrayMethod_Context *context, char *const data[], npy_intp out_stride = strides[2]; while (N--) { - ss *s1 = *in1; - ss *s2 = *in2; + ss *s1 = empty_if_null(in1); + ss *s2 = empty_if_null(in2); if (s1->len == s2->len && strncmp(s1->buf, s2->buf, s1->len) == 0) { *out = (npy_bool)1; @@ -39,6 +39,15 @@ string_equal_strided_loop(PyArrayMethod_Context *context, char *const data[], else { *out = (npy_bool)0; } + + if (*in1 == NULL) { + free(s1); + } + + if (*in2 == NULL) { + free(s2); + } + in1 += in1_stride; in2 += in2_stride; out += out_stride; diff --git a/stringdtype/tests/test_stringdtype.py b/stringdtype/tests/test_stringdtype.py index ccdfe403..82b82456 100644 --- a/stringdtype/tests/test_stringdtype.py +++ b/stringdtype/tests/test_stringdtype.py @@ -181,3 +181,16 @@ def test_sort(strings): np.random.default_rng().shuffle(arr) arr.sort() np.testing.assert_array_equal(arr, arr_sorted) + + +def test_creation_functions(): + np.testing.assert_array_equal( + np.zeros(3, dtype=StringDType()), ["", "", ""] + ) + + np.testing.assert_array_equal( + np.empty(3, dtype=StringDType()), ["", "", ""] + ) + + # make sure getitem works too + assert np.empty(3, dtype=StringDType())[0] == "" From ec0c8e61328fa4d25e66dd532af3ad7f618e8224 Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Tue, 28 Feb 2023 12:33:57 -0700 Subject: [PATCH 2/7] refactor so contents of ss struct is in array buffer --- stringdtype/stringdtype/src/casts.c | 59 ++++++---------- stringdtype/stringdtype/src/dtype.c | 35 +++++---- stringdtype/stringdtype/src/main.c | 7 +- stringdtype/stringdtype/src/static_string.c | 78 +++++++++++++-------- stringdtype/stringdtype/src/static_string.h | 54 ++++++++------ stringdtype/stringdtype/src/umath.c | 28 +++----- 6 files changed, 133 insertions(+), 128 deletions(-) diff --git a/stringdtype/stringdtype/src/casts.c b/stringdtype/stringdtype/src/casts.c index 21900574..b83de64e 100644 --- a/stringdtype/stringdtype/src/casts.c +++ b/stringdtype/stringdtype/src/casts.c @@ -40,30 +40,26 @@ string_to_string_resolve_descriptors(PyObject *NPY_UNUSED(self), } static int -string_to_string(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - NpyAuxData *NPY_UNUSED(auxdata)) +string_to_string(PyArrayMethod_Context *NPY_UNUSED(context), + char *const data[], npy_intp const dimensions[], + npy_intp const strides[], NpyAuxData *NPY_UNUSED(auxdata)) { npy_intp N = dimensions[0]; - ss **in = (ss **)data[0]; - ss **out = (ss **)data[1]; - // strides are in bytes but pointer offsets are in pointer widths, so - // divide by the element size (one pointer width) to get the pointer offset - npy_intp in_stride = strides[0] / context->descriptors[0]->elsize; - npy_intp out_stride = strides[1] / context->descriptors[1]->elsize; + char *in = data[0]; + char *out = data[1]; + npy_intp in_stride = strides[0]; + npy_intp out_stride = strides[1]; + + ss *s = NULL, *os = NULL; while (N--) { - ss *s = empty_if_null(in); - out[0] = ssdup(s); - if (out[0] == NULL) { + load_string(in, &s); + load_string(out, &os); + if (ssdup(s, os) == -1) { gil_error(PyExc_MemoryError, "ssdup failed"); return -1; } - if (*in == NULL) { - free(s); - } - in += in_stride; out += out_stride; } @@ -120,7 +116,7 @@ unicode_to_string_resolve_descriptors(PyObject *NPY_UNUSED(self), // is the number of codepoints that are not trailing null codepoints. Returns // 0 on success and -1 when an invalid code point is found. static int -utf8_size(Py_UCS4 *codepoints, long max_length, size_t *num_codepoints, +utf8_size(const Py_UCS4 *codepoints, long max_length, size_t *num_codepoints, size_t *utf8_bytes) { size_t ucs4len = max_length; @@ -132,7 +128,7 @@ utf8_size(Py_UCS4 *codepoints, long max_length, size_t *num_codepoints, size_t num_bytes = 0; - for (int i = 0; i < ucs4len; i++) { + for (size_t i = 0; i < ucs4len; i++) { Py_UCS4 code = codepoints[i]; if (code <= 0x7F) { @@ -207,13 +203,11 @@ unicode_to_string(PyArrayMethod_Context *context, char *const data[], npy_intp N = dimensions[0]; Py_UCS4 *in = (Py_UCS4 *)data[0]; - ss **out = (ss **)data[1]; + char *out = data[1]; // 4 bytes per UCS4 character npy_intp in_stride = strides[0] / 4; - // strides are in bytes but pointer offsets are in pointer widths, so - // divide by the element size (one pointer width) to get the pointer offset - npy_intp out_stride = strides[1] / context->descriptors[1]->elsize; + npy_intp out_stride = strides[1]; while (N--) { size_t out_num_bytes = 0; @@ -223,8 +217,8 @@ unicode_to_string(PyArrayMethod_Context *context, char *const data[], gil_error(PyExc_TypeError, "Invalid unicode code point found"); return -1; } - ss *out_ss = ssnewemptylen(out_num_bytes); - if (out_ss == NULL) { + ss *out_ss = (ss *)out; + if (ssnewemptylen(out_num_bytes, out_ss) == -1) { gil_error(PyExc_MemoryError, "ssnewemptylen failed"); return -1; } @@ -253,9 +247,6 @@ unicode_to_string(PyArrayMethod_Context *context, char *const data[], // pad string with null character out_buf[out_num_bytes] = '\0'; - // set out to the address of the beginning of the string - out[0] = out_ss; - in += in_stride; out += out_stride; } @@ -335,18 +326,18 @@ string_to_unicode(PyArrayMethod_Context *context, char *const data[], NpyAuxData *NPY_UNUSED(auxdata)) { npy_intp N = dimensions[0]; - ss **in = (ss **)data[0]; + char *in = data[0]; Py_UCS4 *out = (Py_UCS4 *)data[1]; - // strides are in bytes but pointer offsets are in pointer widths, so - // divide by the element size (one pointer width) to get the pointer offset - npy_intp in_stride = strides[0] / context->descriptors[0]->elsize; + npy_intp in_stride = strides[0]; // 4 bytes per UCS4 character npy_intp out_stride = strides[1] / 4; // max number of 4 byte UCS4 characters that can fit in the output long max_out_size = (context->descriptors[1]->elsize) / 4; + ss *s = NULL; + while (N--) { - ss *s = empty_if_null(in); + load_string(in, &s); unsigned char *this_string = (unsigned char *)(s->buf); size_t n_bytes = s->len; size_t tot_n_bytes = 0; @@ -371,10 +362,6 @@ string_to_unicode(PyArrayMethod_Context *context, char *const data[], } } - if (*in == NULL) { - free(s); - } - in += in_stride; out += out_stride; } diff --git a/stringdtype/stringdtype/src/dtype.c b/stringdtype/stringdtype/src/dtype.c index 9174d8f3..20205078 100644 --- a/stringdtype/stringdtype/src/dtype.c +++ b/stringdtype/stringdtype/src/dtype.c @@ -16,8 +16,8 @@ new_stringdtype_instance(void) if (new == NULL) { return NULL; } - new->base.elsize = sizeof(ss *); - new->base.alignment = _Alignof(ss *); + new->base.elsize = sizeof(ss); + new->base.alignment = _Alignof(ss); new->base.flags |= NPY_NEEDS_INIT; new->base.flags |= NPY_LIST_PICKLE; new->base.flags |= NPY_ITEM_REFCOUNT; @@ -119,18 +119,18 @@ stringdtype_setitem(StringDTypeObject *NPY_UNUSED(descr), PyObject *obj, return -1; } - ss *str_val = ssnewlen(val, length); - if (str_val == NULL) { + // free if dataptr holds preexisting string data, + // ssfree does a NULL check + ssfree((ss *)dataptr); + + // copies contents of val into item_val->buf + if (ssnewlen(val, length, (ss *)dataptr) == -1) { PyErr_SetString(PyExc_MemoryError, "ssnewlen failed"); return -1; } - // the dtype instance has the NPY_NEEDS_INIT flag set, - // so if *dataptr is NULL, that means we're initializing - // the array and don't need to free an existing string - if (*dataptr != NULL) { - free((ss *)*dataptr); - } - *dataptr = (char *)str_val; + + // val_obj must stay alive until here to ensure *val* doesn't get + // deallocated Py_DECREF(val_obj); return 0; } @@ -140,14 +140,11 @@ stringdtype_getitem(StringDTypeObject *NPY_UNUSED(descr), char **dataptr) { char *data; - // in the future this could represent missing data too, but we'd - // need to make it so np.empty and np.zeros take their initial value - // from an API hook that doesn't exist yet if (*dataptr == NULL) { data = "\0"; } else { - data = ((ss *)*dataptr)->buf; + data = ((ss *)dataptr)->buf; } PyObject *val_obj = PyUnicode_FromString(data); @@ -173,8 +170,8 @@ stringdtype_getitem(StringDTypeObject *NPY_UNUSED(descr), char **dataptr) int compare_strings(char **a, char **b, PyArrayObject *NPY_UNUSED(arr)) { - ss *ss_a = (ss *)*a; - ss *ss_b = (ss *)*b; + ss *ss_a = (ss *)a; + ss *ss_b = (ss *)b; return strcmp(ss_a->buf, ss_b->buf); } @@ -193,8 +190,8 @@ stringdtype_clear_loop(void *NPY_UNUSED(traverse_context), { while (size--) { if (data != NULL) { - free(*(ss **)data); - *(ss **)data = NULL; + ssfree((ss *)data); + memset(data, 0, sizeof(ss)); } data += stride; } diff --git a/stringdtype/stringdtype/src/main.c b/stringdtype/stringdtype/src/main.c index 24648a08..f5691bb0 100644 --- a/stringdtype/stringdtype/src/main.c +++ b/stringdtype/stringdtype/src/main.c @@ -50,16 +50,15 @@ _memory_usage(PyObject *NPY_UNUSED(self), PyObject *obj) // initialize with the size of the internal buffer size_t memory_usage = PyArray_NBYTES(arr); - size_t struct_size = sizeof(ss); do { - ss **in = (ss **)*dataptr; - npy_intp stride = *strideptr / descr->elsize; + char *in = dataptr[0]; + npy_intp stride = *strideptr; npy_intp count = *innersizeptr; while (count--) { // +1 byte for the null terminator - memory_usage += (*in)->len + struct_size + 1; + memory_usage += ((ss *)in)->len + 1; in += stride; } diff --git a/stringdtype/stringdtype/src/static_string.c b/stringdtype/stringdtype/src/static_string.c index 17c9a5bf..ab7c670f 100644 --- a/stringdtype/stringdtype/src/static_string.c +++ b/stringdtype/stringdtype/src/static_string.c @@ -1,58 +1,76 @@ #include "static_string.h" -// allocates a new ss string of length len, filling with the contents of init -ss * -ssnewlen(const char *init, size_t len) +int +ssnewlen(const char *init, size_t len, ss *to_init) { + if ((to_init->buf != NULL) || (to_init->len != 0)) { + return -1; + } + // one extra byte for null terminator - ss *ret = (ss *)malloc(sizeof(ss) + sizeof(char) * (len + 1)); + char *ret_buf = (char *)malloc(sizeof(char) * (len + 1)); - if (ret == NULL) { - return NULL; + if (ret_buf == NULL) { + return -1; } - ret->len = len; + to_init->len = len; if (len > 0) { - memcpy(ret->buf, init, len); + memcpy(ret_buf, init, len); } - ret->buf[len] = '\0'; + ret_buf[len] = '\0'; + + to_init->buf = ret_buf; - return ret; + return 0; } -// returns a new heap-allocated copy of input string *s* -ss * -ssdup(const ss *s) +void +ssfree(ss *str) { - return ssnewlen(s->buf, s->len); + if (str->buf != NULL) { + free(str->buf); + str->buf = NULL; + } + str->len = 0; } -// returns a new, empty string of length len -// does not do any initialization, the caller must -// initialize and null-terminate the string -ss * -ssnewemptylen(size_t len) +int +ssdup(ss *in, ss *out) { - ss *ret = (ss *)malloc(sizeof(ss) + sizeof(char) * (len + 1)); - ret->len = len; - return ret; + return ssnewlen(in->buf, in->len, out); } -ss * -ssempty(void) +int +ssnewemptylen(size_t num_bytes, ss *out) { - return ssnewlen("", 0); + if (out->len != 0 || out->buf != NULL) { + return -1; + } + + char *buf = (char *)malloc(sizeof(char) * (num_bytes + 1)); + + if (buf == NULL) { + return -1; + } + + out->buf = buf; + out->len = num_bytes; + + return 0; } -ss * -empty_if_null(ss **data) +static ss EMPTY = {0, "\0"}; + +void +load_string(char *data, ss **out) { - if (*data == NULL) { - return ssempty(); + if (data == NULL) { + *out = &EMPTY; } else { - return *data; + *out = (ss *)data; } } diff --git a/stringdtype/stringdtype/src/static_string.h b/stringdtype/stringdtype/src/static_string.h index 76af3875..b4421e8a 100644 --- a/stringdtype/stringdtype/src/static_string.h +++ b/stringdtype/stringdtype/src/static_string.h @@ -6,28 +6,40 @@ typedef struct ss { size_t len; - char buf[]; + char *buf; } ss; -// allocates a new ss string of length len, filling with the contents of init -ss * -ssnewlen(const char *init, size_t len); - -// returns a new heap-allocated copy of input string *s* -ss * -ssdup(const ss *s); - -// returns a new, empty string of length len -// does not do any initialization, the caller must -// initialize and null-terminate the string -ss * -ssnewemptylen(size_t len); - -// returns an new heap-allocated empty string -ss * -ssnewempty(void); - -ss * -empty_if_null(ss **data); +// Allocates a new buffer for *to_init*, filling with the copied contents of +// *init* and sets *to_init->len* to *len*. +int +ssnewlen(const char *init, size_t len, ss *to_init); + +// Frees the internal string buffer held by *str*. If str->buf is NULL, does +// nothing. +void +ssfree(ss *str); + +// copies the contents out *in* into *out*. Allocates a new string buffer for +// *out*, checks that *out->buf* is NULL and *out->len* is zero and errors +// otherwise. Also errors if malloc fails. +int +ssdup(ss *in, ss *out); + +// Allocates a new string buffer for *out* with enough capacity to store +// *num_bytes* of text. The actual allocation is num_bytes + 1 bytes, to +// account for the null terminator. Does not do any initialization, the caller +// must initialize and null-terminate the string buffer. Errors if *out* +// already contains data or if malloc fails. +int +ssnewemptylen(size_t num_bytes, ss *out); + +// Interpret the contents of buffer *data* as an ss struct and set *out* to +// that struct. If *data* is NULL, set *out* to point to a statically +// allocated, empty SS struct. Since this function may set *out* to point to +// statically allocated data, do not ever free memory owned by an output of +// this function. That means this function is most useful for read-only +// applications. +void +load_string(char *data, ss **out); #endif /*_NPY_STATIC_STRING_H */ diff --git a/stringdtype/stringdtype/src/umath.c b/stringdtype/stringdtype/src/umath.c index 48a42508..52da9ee0 100644 --- a/stringdtype/stringdtype/src/umath.c +++ b/stringdtype/stringdtype/src/umath.c @@ -14,24 +14,24 @@ #include "umath.h" static int -string_equal_strided_loop(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], +string_equal_strided_loop(PyArrayMethod_Context *NPY_UNUSED(context), + char *const data[], npy_intp const dimensions[], npy_intp const strides[], NpyAuxData *NPY_UNUSED(auxdata)) { npy_intp N = dimensions[0]; - ss **in1 = (ss **)data[0]; - ss **in2 = (ss **)data[1]; + char *in1 = data[0]; + char *in2 = data[1]; npy_bool *out = (npy_bool *)data[2]; - // strides are in bytes but pointer offsets are in pointer widths, so - // divide by the element size (one pointer width) to get the pointer offset - npy_intp in1_stride = strides[0] / context->descriptors[0]->elsize; - npy_intp in2_stride = strides[1] / context->descriptors[1]->elsize; + npy_intp in1_stride = strides[0]; + npy_intp in2_stride = strides[1]; npy_intp out_stride = strides[2]; + ss *s1 = NULL, *s2 = NULL; + while (N--) { - ss *s1 = empty_if_null(in1); - ss *s2 = empty_if_null(in2); + load_string(in1, &s1); + load_string(in2, &s2); if (s1->len == s2->len && strncmp(s1->buf, s2->buf, s1->len) == 0) { *out = (npy_bool)1; @@ -40,14 +40,6 @@ string_equal_strided_loop(PyArrayMethod_Context *context, char *const data[], *out = (npy_bool)0; } - if (*in1 == NULL) { - free(s1); - } - - if (*in2 == NULL) { - free(s2); - } - in1 += in1_stride; in2 += in2_stride; out += out_stride; From c7866703fef333f8f4d3c7b22edbfe586289278f Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Tue, 28 Feb 2023 14:03:56 -0700 Subject: [PATCH 3/7] respond to review comments; clarify integer return codes --- stringdtype/stringdtype/src/dtype.c | 18 ++++++++++-------- stringdtype/stringdtype/src/static_string.c | 4 ++-- stringdtype/stringdtype/src/static_string.h | 15 ++++++++------- 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/stringdtype/stringdtype/src/dtype.c b/stringdtype/stringdtype/src/dtype.c index 20205078..0754a2a8 100644 --- a/stringdtype/stringdtype/src/dtype.c +++ b/stringdtype/stringdtype/src/dtype.c @@ -124,13 +124,19 @@ stringdtype_setitem(StringDTypeObject *NPY_UNUSED(descr), PyObject *obj, ssfree((ss *)dataptr); // copies contents of val into item_val->buf - if (ssnewlen(val, length, (ss *)dataptr) == -1) { - PyErr_SetString(PyExc_MemoryError, "ssnewlen failed"); + int res = ssnewlen(val, length, (ss *)dataptr); + // val_obj must stay alive until here to ensure *val* doesn't get + // deallocated + Py_DECREF(val_obj); + if (res == -1) { + PyErr_NoMemory(); return -1; } + else if (res == -2) { + // this should never happen + assert(0); + } - // val_obj must stay alive until here to ensure *val* doesn't get - // deallocated Py_DECREF(val_obj); return 0; } @@ -156,10 +162,6 @@ stringdtype_getitem(StringDTypeObject *NPY_UNUSED(descr), char **dataptr) PyObject *res = PyObject_CallFunctionObjArgs((PyObject *)StringScalar_Type, val_obj, NULL); - if (res == NULL) { - return NULL; - } - Py_DECREF(val_obj); return res; diff --git a/stringdtype/stringdtype/src/static_string.c b/stringdtype/stringdtype/src/static_string.c index ab7c670f..1148e8d2 100644 --- a/stringdtype/stringdtype/src/static_string.c +++ b/stringdtype/stringdtype/src/static_string.c @@ -4,7 +4,7 @@ int ssnewlen(const char *init, size_t len, ss *to_init) { if ((to_init->buf != NULL) || (to_init->len != 0)) { - return -1; + return -2; } // one extra byte for null terminator @@ -47,7 +47,7 @@ int ssnewemptylen(size_t num_bytes, ss *out) { if (out->len != 0 || out->buf != NULL) { - return -1; + return -2; } char *buf = (char *)malloc(sizeof(char) * (num_bytes + 1)); diff --git a/stringdtype/stringdtype/src/static_string.h b/stringdtype/stringdtype/src/static_string.h index b4421e8a..c4a956d2 100644 --- a/stringdtype/stringdtype/src/static_string.h +++ b/stringdtype/stringdtype/src/static_string.h @@ -10,26 +10,27 @@ typedef struct ss { } ss; // Allocates a new buffer for *to_init*, filling with the copied contents of -// *init* and sets *to_init->len* to *len*. +// *init* and sets *to_init->len* to *len*. Returns -1 if malloc fails and -2 +// if *to_init* is not empty. Returns 0 on success. int ssnewlen(const char *init, size_t len, ss *to_init); -// Frees the internal string buffer held by *str*. If str->buf is NULL, does -// nothing. +// Sets len to 0 and if str->buf is not already NULL, frees it and sets it to +// NULL. Cannot fail. void ssfree(ss *str); // copies the contents out *in* into *out*. Allocates a new string buffer for -// *out*, checks that *out->buf* is NULL and *out->len* is zero and errors -// otherwise. Also errors if malloc fails. +// *out*. Returns -1 if malloc fails and -2 if *out* is not empty. Returns 0 on +// success. int ssdup(ss *in, ss *out); // Allocates a new string buffer for *out* with enough capacity to store // *num_bytes* of text. The actual allocation is num_bytes + 1 bytes, to // account for the null terminator. Does not do any initialization, the caller -// must initialize and null-terminate the string buffer. Errors if *out* -// already contains data or if malloc fails. +// must initialize and null-terminate the string buffer. Returns -1 if malloc +// fails and -2 if *out* is not empty. Returns 0 on success. int ssnewemptylen(size_t num_bytes, ss *out); From 78f67715cd44134e7262b99a98066c128eb46b10 Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Tue, 28 Feb 2023 14:07:07 -0700 Subject: [PATCH 4/7] don't use load_string with output parameter --- stringdtype/stringdtype/src/casts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stringdtype/stringdtype/src/casts.c b/stringdtype/stringdtype/src/casts.c index b83de64e..f1ee85da 100644 --- a/stringdtype/stringdtype/src/casts.c +++ b/stringdtype/stringdtype/src/casts.c @@ -54,7 +54,7 @@ string_to_string(PyArrayMethod_Context *NPY_UNUSED(context), while (N--) { load_string(in, &s); - load_string(out, &os); + os = (ss *)out; if (ssdup(s, os) == -1) { gil_error(PyExc_MemoryError, "ssdup failed"); return -1; From f1b2489d25bb2acf81daa937b0defe3b63ef42cc Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Tue, 28 Feb 2023 14:11:00 -0700 Subject: [PATCH 5/7] Fix null check in load_string --- stringdtype/stringdtype/src/static_string.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/stringdtype/stringdtype/src/static_string.c b/stringdtype/stringdtype/src/static_string.c index 1148e8d2..143d6eaa 100644 --- a/stringdtype/stringdtype/src/static_string.c +++ b/stringdtype/stringdtype/src/static_string.c @@ -67,10 +67,11 @@ static ss EMPTY = {0, "\0"}; void load_string(char *data, ss **out) { - if (data == NULL) { + ss *ss_d = (ss *)data; + if (ss_d->len == 0) { *out = &EMPTY; } else { - *out = (ss *)data; + *out = ss_d; } } From ff238ae7c0e4e6ed1ab4b5e7308095c8df0e2723 Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Tue, 28 Feb 2023 14:14:44 -0700 Subject: [PATCH 6/7] Fix error handling in casts --- stringdtype/stringdtype/src/casts.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stringdtype/stringdtype/src/casts.c b/stringdtype/stringdtype/src/casts.c index f1ee85da..7b951183 100644 --- a/stringdtype/stringdtype/src/casts.c +++ b/stringdtype/stringdtype/src/casts.c @@ -55,7 +55,7 @@ string_to_string(PyArrayMethod_Context *NPY_UNUSED(context), while (N--) { load_string(in, &s); os = (ss *)out; - if (ssdup(s, os) == -1) { + if (ssdup(s, os) < 0) { gil_error(PyExc_MemoryError, "ssdup failed"); return -1; } @@ -218,7 +218,7 @@ unicode_to_string(PyArrayMethod_Context *context, char *const data[], return -1; } ss *out_ss = (ss *)out; - if (ssnewemptylen(out_num_bytes, out_ss) == -1) { + if (ssnewemptylen(out_num_bytes, out_ss) < 0) { gil_error(PyExc_MemoryError, "ssnewemptylen failed"); return -1; } From 01f13a92eb0cec51d687ecb40f43fbb57b1a7174 Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Tue, 28 Feb 2023 14:20:11 -0700 Subject: [PATCH 7/7] fix reference counting error --- stringdtype/stringdtype/src/dtype.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/stringdtype/stringdtype/src/dtype.c b/stringdtype/stringdtype/src/dtype.c index 0754a2a8..44fc328d 100644 --- a/stringdtype/stringdtype/src/dtype.c +++ b/stringdtype/stringdtype/src/dtype.c @@ -125,9 +125,11 @@ stringdtype_setitem(StringDTypeObject *NPY_UNUSED(descr), PyObject *obj, // copies contents of val into item_val->buf int res = ssnewlen(val, length, (ss *)dataptr); + // val_obj must stay alive until here to ensure *val* doesn't get // deallocated Py_DECREF(val_obj); + if (res == -1) { PyErr_NoMemory(); return -1; @@ -137,7 +139,6 @@ stringdtype_setitem(StringDTypeObject *NPY_UNUSED(descr), PyObject *obj, assert(0); } - Py_DECREF(val_obj); return 0; }