diff --git a/stringdtype/README.md b/stringdtype/README.md index 0799deb0..d2eae298 100644 --- a/stringdtype/README.md +++ b/stringdtype/README.md @@ -1,16 +1,17 @@ # A dtype that stores pointers to strings -This is a simple proof-of-concept dtype using the (as of early 2023) experimental -[new dtype -implementation](https://numpy.org/neps/nep-0041-improved-dtype-support.html) in -NumPy. +This is the prototype implementation of the variable-width UTF-8 string DType +described in [NEP 55](https://numpy.org/neps/nep-0055-string_dtype.html). + +See the NEP for implementation details and usage examples. Full +documentation will be written as before this code is merged into NumPy. ## Building Ensure Meson and NumPy are installed in the python environment you would like to use: ``` -$ python3 -m pip install meson meson-python build patchelf +$ python3 -m pip install meson meson-python ``` It is important to have the latest development version of numpy installed. @@ -20,16 +21,35 @@ Nightly wheels work well for this purpose, and can be installed easily: $ pip install -i https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy ``` -Build with meson, create a wheel, and install it. +You can install with `pip` directly, taking care to disable build isolation so +the numpy nightly gets picked up at build time: + +```bash +$ pip install -v . --no-build-isolation +``` + +If you want to work on the `stringdtype` code, you can build with meson, +create a wheel, and install it. ```bash $ rm -r dist/ $ meson build $ python -m build --wheel -Cbuilddir=build +$ python -m pip install dist/path-to-wheel-file.whl ``` -Or simply install directly, taking care to install without build isolation: +## Usage + +The dtype will not import unless you run python executable with +the `NUMPY_EXPERIMENTAL_DTYPE_API` environment variable set: ```bash -$ pip install -v . --no-build-isolation +$ NUMPY_EXPERIMENTAL_DTYPE_API=1 python +Python 3.11.3 (main, May 2 2023, 11:36:22) [GCC 11.3.0] on linux +Type "help", "copyright", "credits" or "license" for more information. +>>> from stringdtype import StringDType +>>> import numpy as np +>>> arr = np.array(["hello", "world"], dtype=StringDType()) +>>> arr +array(['hello', 'world'], dtype=StringDType()) ``` diff --git a/stringdtype/stringdtype/src/casts.c b/stringdtype/stringdtype/src/casts.c index 29626072..1f0f3506 100644 --- a/stringdtype/stringdtype/src/casts.c +++ b/stringdtype/stringdtype/src/casts.c @@ -82,19 +82,20 @@ string_to_string(PyArrayMethod_Context *context, char *const data[], npy_intp in_stride = strides[0]; npy_intp out_stride = strides[1]; - NPY_STRING_ACQUIRE_ALLOCATOR2(odescr, idescr); + npy_string_allocator *iallocator = NULL; + npy_string_allocator *oallocator = NULL; + NpyString_acquire_allocator2(idescr, odescr, &iallocator, &oallocator); while (N--) { const npy_packed_static_string *s = (npy_packed_static_string *)in; npy_packed_static_string *os = (npy_packed_static_string *)out; if (in != out) { - if (in_hasnull && !out_hasnull && npy_string_isnull(s)) { + if (in_hasnull && !out_hasnull && NpyString_isnull(s)) { // lossy but this is an unsafe cast so this is OK - npy_string_free(os, odescr->allocator); - if (npy_string_newsize(in_na_name->buf, in_na_name->size, os, - odescr->allocator) < 0) { + if (NpyString_pack(odescr->allocator, os, in_na_name->buf, + in_na_name->size) < 0) { gil_error(PyExc_MemoryError, - "Failed to allocate string in string to string " + "Failed to pack string in string to string " "cast."); goto fail; } @@ -109,13 +110,13 @@ string_to_string(PyArrayMethod_Context *context, char *const data[], out += out_stride; } - NPY_STRING_RELEASE_ALLOCATOR2(odescr, idescr); + NpyString_release_allocator2(odescr, idescr); return 0; fail: - NPY_STRING_RELEASE_ALLOCATOR2(odescr, idescr); + NpyString_release_allocator2(odescr, idescr); return -1; } @@ -221,8 +222,7 @@ unicode_to_string(PyArrayMethod_Context *context, char *const data[], PyArray_Descr **descrs = context->descriptors; StringDTypeObject *sdescr = (StringDTypeObject *)descrs[1]; - NPY_STRING_ACQUIRE_ALLOCATOR(sdescr); - npy_string_allocator *allocator = sdescr->allocator; + npy_string_allocator *allocator = NpyString_acquire_allocator(sdescr); long max_in_size = (descrs[0]->elsize) / 4; @@ -243,18 +243,18 @@ unicode_to_string(PyArrayMethod_Context *context, char *const data[], goto fail; } npy_packed_static_string *out_pss = (npy_packed_static_string *)out; - if (npy_string_free(out_pss, allocator) < 0) { + if (NpyString_free(out_pss, allocator) < 0) { gil_error(PyExc_MemoryError, "Failed to deallocate string in unicode to string cast"); goto fail; } - if (npy_string_newemptysize(out_num_bytes, out_pss, allocator) < 0) { + if (NpyString_newemptysize(out_num_bytes, out_pss, allocator) < 0) { gil_error(PyExc_MemoryError, "Failed to allocate string in unicode to string cast"); goto fail; } npy_static_string out_ss = {0, NULL}; - int is_null = npy_string_load(allocator, out_pss, &out_ss); + int is_null = NpyString_load(allocator, out_pss, &out_ss); if (is_null == -1) { gil_error(PyExc_MemoryError, "Failed to load string in unicode to string cast"); @@ -287,13 +287,13 @@ unicode_to_string(PyArrayMethod_Context *context, char *const data[], out += out_stride; } - NPY_STRING_RELEASE_ALLOCATOR(sdescr); + NpyString_release_allocator(sdescr); return 0; fail: - NPY_STRING_RELEASE_ALLOCATOR(sdescr); + NpyString_release_allocator(sdescr); return -1; } @@ -376,8 +376,7 @@ string_to_unicode(PyArrayMethod_Context *context, char *const data[], NpyAuxData *NPY_UNUSED(auxdata)) { StringDTypeObject *descr = (StringDTypeObject *)context->descriptors[0]; - NPY_STRING_ACQUIRE_ALLOCATOR(descr); - npy_string_allocator *allocator = descr->allocator; + npy_string_allocator *allocator = NpyString_acquire_allocator(descr); int has_null = descr->na_object != NULL; int has_string_na = descr->has_string_na; const npy_static_string *default_string = &descr->default_string; @@ -397,7 +396,7 @@ string_to_unicode(PyArrayMethod_Context *context, char *const data[], npy_static_string name = {0, NULL}; unsigned char *this_string = NULL; size_t n_bytes; - int is_null = npy_string_load(allocator, ps, &s); + int is_null = NpyString_load(allocator, ps, &s); if (is_null == -1) { gil_error(PyExc_MemoryError, "Failed to load string in unicode to string cast"); @@ -444,12 +443,12 @@ string_to_unicode(PyArrayMethod_Context *context, char *const data[], out += out_stride; } - NPY_STRING_RELEASE_ALLOCATOR(descr); + NpyString_release_allocator(descr); return 0; fail: - NPY_STRING_RELEASE_ALLOCATOR(descr); + NpyString_release_allocator(descr); return -1; } @@ -490,8 +489,7 @@ string_to_bool(PyArrayMethod_Context *context, char *const data[], NpyAuxData *NPY_UNUSED(auxdata)) { StringDTypeObject *descr = (StringDTypeObject *)context->descriptors[0]; - NPY_STRING_ACQUIRE_ALLOCATOR(descr); - npy_string_allocator *allocator = descr->allocator; + npy_string_allocator *allocator = NpyString_acquire_allocator(descr); int has_null = descr->na_object != NULL; int has_string_na = descr->has_string_na; const npy_static_string *default_string = &descr->default_string; @@ -506,7 +504,7 @@ string_to_bool(PyArrayMethod_Context *context, char *const data[], while (N--) { const npy_packed_static_string *ps = (npy_packed_static_string *)in; npy_static_string s = {0, NULL}; - int is_null = npy_string_load(allocator, ps, &s); + int is_null = NpyString_load(allocator, ps, &s); if (is_null == -1) { gil_error(PyExc_MemoryError, "Failed to load string in unicode to string cast"); @@ -532,13 +530,13 @@ string_to_bool(PyArrayMethod_Context *context, char *const data[], out += out_stride; } - NPY_STRING_RELEASE_ALLOCATOR(descr); + NpyString_release_allocator(descr); return 0; fail: - NPY_STRING_RELEASE_ALLOCATOR(descr); + NpyString_release_allocator(descr); return -1; } @@ -565,16 +563,10 @@ bool_to_string(PyArrayMethod_Context *context, char *const data[], npy_intp out_stride = strides[1]; StringDTypeObject *descr = (StringDTypeObject *)context->descriptors[1]; - NPY_STRING_ACQUIRE_ALLOCATOR(descr); - npy_string_allocator *allocator = descr->allocator; + npy_string_allocator *allocator = NpyString_acquire_allocator(descr); while (N--) { npy_packed_static_string *out_pss = (npy_packed_static_string *)out; - if (npy_string_free(out_pss, allocator) < 0) { - gil_error(PyExc_MemoryError, - "Failed to deallocate string in bool to string cast"); - goto fail; - } char *ret_val = NULL; size_t size = 0; if ((npy_bool)(*in) == 1) { @@ -590,24 +582,22 @@ bool_to_string(PyArrayMethod_Context *context, char *const data[], "invalid value encountered in bool to string cast"); goto fail; } - if (npy_string_newsize(ret_val, size, out_pss, allocator) < 0) { - // execution should never get here because this will be a small - // string on all platforms + if (NpyString_pack(allocator, out_pss, ret_val, size) < 0) { gil_error(PyExc_MemoryError, - "Failed to allocate string in bool to string cast"); + "Failed to pack string in bool to string cast"); goto fail; } in += in_stride; out += out_stride; } - NPY_STRING_RELEASE_ALLOCATOR(descr); + NpyString_release_allocator(descr); return 0; fail: - NPY_STRING_RELEASE_ALLOCATOR(descr); + NpyString_release_allocator(descr); return -1; } @@ -628,7 +618,7 @@ string_to_pylong(char *in, int hasnull, { const npy_packed_static_string *ps = (npy_packed_static_string *)in; npy_static_string s = {0, NULL}; - int isnull = npy_string_load(allocator, ps, &s); + int isnull = NpyString_load(allocator, ps, &s); if (isnull == -1) { PyErr_SetString(PyExc_MemoryError, "Failed to load string converting string to int"); @@ -706,19 +696,14 @@ pyobj_to_string(PyObject *obj, char *out, npy_string_allocator *allocator) Py_ssize_t length; const char *cstr_val = PyUnicode_AsUTF8AndSize(pystr_val, &length); if (cstr_val == NULL) { + Py_DECREF(pystr_val); return -1; } npy_packed_static_string *out_ss = (npy_packed_static_string *)out; - if (npy_string_free(out_ss, allocator) < 0) { + if (NpyString_pack(allocator, out_ss, cstr_val, length) < 0) { gil_error(PyExc_MemoryError, - "Failed to deallocate string when converting from python " + "Failed to pack string while converting from python " "string"); - return -1; - } - if (npy_string_newsize(cstr_val, length, out_ss, allocator) < 0) { - PyErr_SetString(PyExc_MemoryError, - "Failed to allocate numpy string when converting from " - "python string."); Py_DECREF(pystr_val); return -1; } @@ -742,125 +727,123 @@ uint_to_string(unsigned long long in, char *out, return pyobj_to_string(pylong_val, out, allocator); } -#define STRING_INT_CASTS(typename, typekind, shortname, numpy_tag, \ - printf_code, npy_longtype, longtype) \ - static NPY_CASTING string_to_##typename##_resolve_descriptors( \ - PyObject *NPY_UNUSED(self), \ - PyArray_DTypeMeta *NPY_UNUSED(dtypes[2]), \ - PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2], \ - npy_intp *NPY_UNUSED(view_offset)) \ - { \ - if (given_descrs[1] == NULL) { \ - loop_descrs[1] = PyArray_DescrNewFromType(numpy_tag); \ - } \ - else { \ - Py_INCREF(given_descrs[1]); \ - loop_descrs[1] = given_descrs[1]; \ - } \ - \ - Py_INCREF(given_descrs[0]); \ - loop_descrs[0] = given_descrs[0]; \ - \ - return NPY_UNSAFE_CASTING; \ - } \ - \ - static int string_to_## \ - typename(PyArrayMethod_Context * context, char *const data[], \ - npy_intp const dimensions[], npy_intp const strides[], \ - NpyAuxData *NPY_UNUSED(auxdata)) \ - { \ - StringDTypeObject *descr = \ - ((StringDTypeObject *)context->descriptors[0]); \ - NPY_STRING_ACQUIRE_ALLOCATOR(descr); \ - npy_string_allocator *allocator = descr->allocator; \ - int hasnull = descr->na_object != NULL; \ - const npy_static_string *default_string = &descr->default_string; \ - \ - npy_intp N = dimensions[0]; \ - char *in = data[0]; \ - npy_##typename *out = (npy_##typename *)data[1]; \ - \ - npy_intp in_stride = strides[0]; \ - npy_intp out_stride = strides[1] / sizeof(npy_##typename); \ - \ - while (N--) { \ - npy_longtype value; \ - if (string_to_##typekind(in, &value, hasnull, default_string, \ - allocator) != 0) { \ - goto fail; \ - } \ - *out = (npy_##typename)value; \ - if (*out != value) { \ - /* out of bounds, raise error following NEP 50 behavior */ \ - char message[200]; \ - snprintf(message, sizeof(message), \ - "Integer %" #printf_code \ - " is out of bounds " \ - "for " #typename, \ - value); \ - gil_error(PyExc_OverflowError, message); \ - goto fail; \ - } \ - in += in_stride; \ - out += out_stride; \ - } \ - \ - NPY_STRING_RELEASE_ALLOCATOR(descr); \ - return 0; \ - \ - fail: \ - NPY_STRING_RELEASE_ALLOCATOR(descr); \ - return -1; \ - } \ - \ - static PyType_Slot s2##shortname##_slots[] = { \ - {NPY_METH_resolve_descriptors, \ - &string_to_##typename##_resolve_descriptors}, \ - {NPY_METH_strided_loop, &string_to_##typename}, \ - {0, NULL}}; \ - \ - static char *s2##shortname##_name = "cast_StringDType_to_" #typename; \ - \ - static int typename##_to_string( \ - PyArrayMethod_Context *context, char *const data[], \ - npy_intp const dimensions[], npy_intp const strides[], \ - NpyAuxData *NPY_UNUSED(auxdata)) \ - { \ - npy_intp N = dimensions[0]; \ - npy_##typename *in = (npy_##typename *)data[0]; \ - char *out = data[1]; \ - \ - npy_intp in_stride = strides[0] / sizeof(npy_##typename); \ - npy_intp out_stride = strides[1]; \ - \ - StringDTypeObject *descr = \ - (StringDTypeObject *)context->descriptors[1]; \ - NPY_STRING_ACQUIRE_ALLOCATOR(descr); \ - npy_string_allocator *allocator = descr->allocator; \ - \ - while (N--) { \ - if (typekind##_to_string((longtype)*in, out, allocator) != 0) { \ - goto fail; \ - } \ - \ - in += in_stride; \ - out += out_stride; \ - } \ - \ - NPY_STRING_RELEASE_ALLOCATOR(descr); \ - return 0; \ - \ - fail: \ - NPY_STRING_RELEASE_ALLOCATOR(descr); \ - return -1; \ - } \ - \ - static PyType_Slot shortname##2s_slots [] = { \ - {NPY_METH_resolve_descriptors, \ - &any_to_string_UNSAFE_resolve_descriptors}, \ - {NPY_METH_strided_loop, &typename##_to_string}, \ - {0, NULL}}; \ - \ +#define STRING_INT_CASTS(typename, typekind, shortname, numpy_tag, \ + printf_code, npy_longtype, longtype) \ + static NPY_CASTING string_to_##typename##_resolve_descriptors( \ + PyObject *NPY_UNUSED(self), \ + PyArray_DTypeMeta *NPY_UNUSED(dtypes[2]), \ + PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2], \ + npy_intp *NPY_UNUSED(view_offset)) \ + { \ + if (given_descrs[1] == NULL) { \ + loop_descrs[1] = PyArray_DescrNewFromType(numpy_tag); \ + } \ + else { \ + Py_INCREF(given_descrs[1]); \ + loop_descrs[1] = given_descrs[1]; \ + } \ + \ + Py_INCREF(given_descrs[0]); \ + loop_descrs[0] = given_descrs[0]; \ + \ + return NPY_UNSAFE_CASTING; \ + } \ + \ + static int string_to_## \ + typename(PyArrayMethod_Context * context, char *const data[], \ + npy_intp const dimensions[], npy_intp const strides[], \ + NpyAuxData *NPY_UNUSED(auxdata)) \ + { \ + StringDTypeObject *descr = \ + ((StringDTypeObject *)context->descriptors[0]); \ + npy_string_allocator *allocator = NpyString_acquire_allocator(descr); \ + int hasnull = descr->na_object != NULL; \ + const npy_static_string *default_string = &descr->default_string; \ + \ + npy_intp N = dimensions[0]; \ + char *in = data[0]; \ + npy_##typename *out = (npy_##typename *)data[1]; \ + \ + npy_intp in_stride = strides[0]; \ + npy_intp out_stride = strides[1] / sizeof(npy_##typename); \ + \ + while (N--) { \ + npy_longtype value; \ + if (string_to_##typekind(in, &value, hasnull, default_string, \ + allocator) != 0) { \ + goto fail; \ + } \ + *out = (npy_##typename)value; \ + if (*out != value) { \ + /* out of bounds, raise error following NEP 50 behavior */ \ + char message[200]; \ + snprintf(message, sizeof(message), \ + "Integer %" #printf_code \ + " is out of bounds " \ + "for " #typename, \ + value); \ + gil_error(PyExc_OverflowError, message); \ + goto fail; \ + } \ + in += in_stride; \ + out += out_stride; \ + } \ + \ + NpyString_release_allocator(descr); \ + return 0; \ + \ + fail: \ + NpyString_release_allocator(descr); \ + return -1; \ + } \ + \ + static PyType_Slot s2##shortname##_slots[] = { \ + {NPY_METH_resolve_descriptors, \ + &string_to_##typename##_resolve_descriptors}, \ + {NPY_METH_strided_loop, &string_to_##typename}, \ + {0, NULL}}; \ + \ + static char *s2##shortname##_name = "cast_StringDType_to_" #typename; \ + \ + static int typename##_to_string( \ + PyArrayMethod_Context *context, char *const data[], \ + npy_intp const dimensions[], npy_intp const strides[], \ + NpyAuxData *NPY_UNUSED(auxdata)) \ + { \ + npy_intp N = dimensions[0]; \ + npy_##typename *in = (npy_##typename *)data[0]; \ + char *out = data[1]; \ + \ + npy_intp in_stride = strides[0] / sizeof(npy_##typename); \ + npy_intp out_stride = strides[1]; \ + \ + StringDTypeObject *descr = \ + (StringDTypeObject *)context->descriptors[1]; \ + npy_string_allocator *allocator = NpyString_acquire_allocator(descr); \ + \ + while (N--) { \ + if (typekind##_to_string((longtype)*in, out, allocator) != 0) { \ + goto fail; \ + } \ + \ + in += in_stride; \ + out += out_stride; \ + } \ + \ + NpyString_release_allocator(descr); \ + return 0; \ + \ + fail: \ + NpyString_release_allocator(descr); \ + return -1; \ + } \ + \ + static PyType_Slot shortname##2s_slots [] = { \ + {NPY_METH_resolve_descriptors, \ + &any_to_string_UNSAFE_resolve_descriptors}, \ + {NPY_METH_strided_loop, &typename##_to_string}, \ + {0, NULL}}; \ + \ static char *shortname##2s_name = "cast_" #typename "_to_StringDType"; #define DTYPES_AND_CAST_SPEC(shortname, typename) \ @@ -925,7 +908,7 @@ string_to_pyfloat(char *in, int hasnull, { const npy_packed_static_string *ps = (npy_packed_static_string *)in; npy_static_string s = {0, NULL}; - int isnull = npy_string_load(allocator, ps, &s); + int isnull = NpyString_load(allocator, ps, &s); if (isnull == -1) { PyErr_SetString( PyExc_MemoryError, @@ -950,62 +933,61 @@ string_to_pyfloat(char *in, int hasnull, return pyfloat_value; } -#define STRING_TO_FLOAT_CAST(typename, shortname, isinf_name, \ - double_to_float) \ - static int string_to_## \ - typename(PyArrayMethod_Context * context, char *const data[], \ - npy_intp const dimensions[], npy_intp const strides[], \ - NpyAuxData *NPY_UNUSED(auxdata)) \ - { \ - StringDTypeObject *descr = \ - (StringDTypeObject *)context->descriptors[0]; \ - NPY_STRING_ACQUIRE_ALLOCATOR(descr); \ - npy_string_allocator *allocator = descr->allocator; \ - int hasnull = (descr->na_object != NULL); \ - const npy_static_string *default_string = &descr->default_string; \ - \ - npy_intp N = dimensions[0]; \ - char *in = data[0]; \ - npy_##typename *out = (npy_##typename *)data[1]; \ - \ - npy_intp in_stride = strides[0]; \ - npy_intp out_stride = strides[1] / sizeof(npy_##typename); \ - \ - while (N--) { \ - PyObject *pyfloat_value = string_to_pyfloat( \ - in, hasnull, default_string, allocator); \ - if (pyfloat_value == NULL) { \ - goto fail; \ - } \ - double dval = PyFloat_AS_DOUBLE(pyfloat_value); \ - npy_##typename fval = (double_to_float)(dval); \ - \ - if (NPY_UNLIKELY(isinf_name(fval) && !(npy_isinf(dval)))) { \ - if (PyUFunc_GiveFloatingpointErrors("cast", \ - NPY_FPE_OVERFLOW) < 0) { \ - goto fail; \ - } \ - } \ - \ - *out = fval; \ - \ - in += in_stride; \ - out += out_stride; \ - } \ - \ - NPY_STRING_RELEASE_ALLOCATOR(descr); \ - return 0; \ - fail: \ - NPY_STRING_RELEASE_ALLOCATOR(descr); \ - return -1; \ - } \ - \ - static PyType_Slot s2##shortname##_slots[] = { \ - {NPY_METH_resolve_descriptors, \ - &string_to_##typename##_resolve_descriptors}, \ - {NPY_METH_strided_loop, &string_to_##typename}, \ - {0, NULL}}; \ - \ +#define STRING_TO_FLOAT_CAST(typename, shortname, isinf_name, \ + double_to_float) \ + static int string_to_## \ + typename(PyArrayMethod_Context * context, char *const data[], \ + npy_intp const dimensions[], npy_intp const strides[], \ + NpyAuxData *NPY_UNUSED(auxdata)) \ + { \ + StringDTypeObject *descr = \ + (StringDTypeObject *)context->descriptors[0]; \ + npy_string_allocator *allocator = NpyString_acquire_allocator(descr); \ + int hasnull = (descr->na_object != NULL); \ + const npy_static_string *default_string = &descr->default_string; \ + \ + npy_intp N = dimensions[0]; \ + char *in = data[0]; \ + npy_##typename *out = (npy_##typename *)data[1]; \ + \ + npy_intp in_stride = strides[0]; \ + npy_intp out_stride = strides[1] / sizeof(npy_##typename); \ + \ + while (N--) { \ + PyObject *pyfloat_value = string_to_pyfloat( \ + in, hasnull, default_string, allocator); \ + if (pyfloat_value == NULL) { \ + goto fail; \ + } \ + double dval = PyFloat_AS_DOUBLE(pyfloat_value); \ + npy_##typename fval = (double_to_float)(dval); \ + \ + if (NPY_UNLIKELY(isinf_name(fval) && !(npy_isinf(dval)))) { \ + if (PyUFunc_GiveFloatingpointErrors("cast", \ + NPY_FPE_OVERFLOW) < 0) { \ + goto fail; \ + } \ + } \ + \ + *out = fval; \ + \ + in += in_stride; \ + out += out_stride; \ + } \ + \ + NpyString_release_allocator(descr); \ + return 0; \ + fail: \ + NpyString_release_allocator(descr); \ + return -1; \ + } \ + \ + static PyType_Slot s2##shortname##_slots[] = { \ + {NPY_METH_resolve_descriptors, \ + &string_to_##typename##_resolve_descriptors}, \ + {NPY_METH_strided_loop, &string_to_##typename}, \ + {0, NULL}}; \ + \ static char *s2##shortname##_name = "cast_StringDType_to_" #typename; #define STRING_TO_FLOAT_RESOLVE_DESCRIPTORS(typename, npy_typename) \ @@ -1029,48 +1011,47 @@ string_to_pyfloat(char *in, int hasnull, return NPY_UNSAFE_CASTING; \ } -#define FLOAT_TO_STRING_CAST(typename, shortname, float_to_double) \ - static int typename##_to_string( \ - PyArrayMethod_Context *context, char *const data[], \ - npy_intp const dimensions[], npy_intp const strides[], \ - NpyAuxData *NPY_UNUSED(auxdata)) \ - { \ - npy_intp N = dimensions[0]; \ - npy_##typename *in = (npy_##typename *)data[0]; \ - char *out = data[1]; \ - PyArray_Descr *float_descr = context->descriptors[0]; \ - \ - npy_intp in_stride = strides[0] / sizeof(npy_##typename); \ - npy_intp out_stride = strides[1]; \ - \ - StringDTypeObject *descr = \ - (StringDTypeObject *)context->descriptors[1]; \ - NPY_STRING_ACQUIRE_ALLOCATOR(descr); \ - npy_string_allocator *allocator = descr->allocator; \ - \ - while (N--) { \ - PyObject *scalar_val = PyArray_Scalar(in, float_descr, NULL); \ - if (pyobj_to_string(scalar_val, out, allocator) == -1) { \ - goto fail; \ - } \ - \ - in += in_stride; \ - out += out_stride; \ - } \ - \ - NPY_STRING_RELEASE_ALLOCATOR(descr); \ - return 0; \ - fail: \ - NPY_STRING_RELEASE_ALLOCATOR(descr); \ - return -1; \ - } \ - \ - static PyType_Slot shortname##2s_slots [] = { \ - {NPY_METH_resolve_descriptors, \ - &any_to_string_UNSAFE_resolve_descriptors}, \ - {NPY_METH_strided_loop, &typename##_to_string}, \ - {0, NULL}}; \ - \ +#define FLOAT_TO_STRING_CAST(typename, shortname, float_to_double) \ + static int typename##_to_string( \ + PyArrayMethod_Context *context, char *const data[], \ + npy_intp const dimensions[], npy_intp const strides[], \ + NpyAuxData *NPY_UNUSED(auxdata)) \ + { \ + npy_intp N = dimensions[0]; \ + npy_##typename *in = (npy_##typename *)data[0]; \ + char *out = data[1]; \ + PyArray_Descr *float_descr = context->descriptors[0]; \ + \ + npy_intp in_stride = strides[0] / sizeof(npy_##typename); \ + npy_intp out_stride = strides[1]; \ + \ + StringDTypeObject *descr = \ + (StringDTypeObject *)context->descriptors[1]; \ + npy_string_allocator *allocator = NpyString_acquire_allocator(descr); \ + \ + while (N--) { \ + PyObject *scalar_val = PyArray_Scalar(in, float_descr, NULL); \ + if (pyobj_to_string(scalar_val, out, allocator) == -1) { \ + goto fail; \ + } \ + \ + in += in_stride; \ + out += out_stride; \ + } \ + \ + NpyString_release_allocator(descr); \ + return 0; \ + fail: \ + NpyString_release_allocator(descr); \ + return -1; \ + } \ + \ + static PyType_Slot shortname##2s_slots [] = { \ + {NPY_METH_resolve_descriptors, \ + &any_to_string_UNSAFE_resolve_descriptors}, \ + {NPY_METH_strided_loop, &typename##_to_string}, \ + {0, NULL}}; \ + \ static char *shortname##2s_name = "cast_" #typename "_to_StringDType"; STRING_TO_FLOAT_RESOLVE_DESCRIPTORS(float64, DOUBLE) @@ -1081,8 +1062,7 @@ string_to_float64(PyArrayMethod_Context *context, char *const data[], NpyAuxData *NPY_UNUSED(auxdata)) { StringDTypeObject *descr = (StringDTypeObject *)context->descriptors[0]; - NPY_STRING_ACQUIRE_ALLOCATOR(descr); - npy_string_allocator *allocator = descr->allocator; + npy_string_allocator *allocator = NpyString_acquire_allocator(descr); int hasnull = descr->na_object != NULL; const npy_static_string *default_string = &descr->default_string; npy_intp N = dimensions[0]; @@ -1105,11 +1085,11 @@ string_to_float64(PyArrayMethod_Context *context, char *const data[], out += out_stride; } - NPY_STRING_RELEASE_ALLOCATOR(descr); + NpyString_release_allocator(descr); return 0; fail: - NPY_STRING_RELEASE_ALLOCATOR(descr); + NpyString_release_allocator(descr); return -1; } @@ -1161,8 +1141,7 @@ string_to_datetime(PyArrayMethod_Context *context, char *const data[], NpyAuxData *NPY_UNUSED(auxdata)) { StringDTypeObject *descr = (StringDTypeObject *)context->descriptors[0]; - NPY_STRING_ACQUIRE_ALLOCATOR(descr); - npy_string_allocator *allocator = descr->allocator; + npy_string_allocator *allocator = NpyString_acquire_allocator(descr); int has_null = descr->na_object != NULL; int has_string_na = descr->has_string_na; const npy_static_string *default_string = &descr->default_string; @@ -1186,7 +1165,7 @@ string_to_datetime(PyArrayMethod_Context *context, char *const data[], while (N--) { const npy_packed_static_string *ps = (npy_packed_static_string *)in; npy_static_string s = {0, NULL}; - int is_null = npy_string_load(allocator, ps, &s); + int is_null = NpyString_load(allocator, ps, &s); if (is_null == -1) { // do we hold the gil in this cast? error handling below seems to // think we do @@ -1217,11 +1196,11 @@ string_to_datetime(PyArrayMethod_Context *context, char *const data[], out += out_stride; } - NPY_STRING_RELEASE_ALLOCATOR(descr); + NpyString_release_allocator(descr); return 0; fail: - NPY_STRING_RELEASE_ALLOCATOR(descr); + NpyString_release_allocator(descr); return -1; } @@ -1255,20 +1234,17 @@ datetime_to_string(PyArrayMethod_Context *context, char *const data[], char datetime_buf[NPY_DATETIME_MAX_ISO8601_STRLEN]; StringDTypeObject *sdescr = (StringDTypeObject *)context->descriptors[1]; - NPY_STRING_ACQUIRE_ALLOCATOR(sdescr); - npy_string_allocator *allocator = sdescr->allocator; + npy_string_allocator *allocator = NpyString_acquire_allocator(sdescr); while (N--) { npy_packed_static_string *out_pss = (npy_packed_static_string *)out; - if (npy_string_free(out_pss, allocator) < 0) { + if (*in == NPY_DATETIME_NAT && + NpyString_pack_null(allocator, out_pss) < 0) { gil_error( PyExc_MemoryError, "Failed to deallocate string in datetime to string cast"); goto fail; } - if (*in == NPY_DATETIME_NAT) { - *out_pss = *NPY_NULL_STRING; - } else { if (NpyDatetime_ConvertDatetime64ToDatetimeStruct(dt_meta, *in, &dts) < 0) { @@ -1284,8 +1260,8 @@ datetime_to_string(PyArrayMethod_Context *context, char *const data[], goto fail; } - if (npy_string_newsize(datetime_buf, strlen(datetime_buf), out_pss, - allocator) < 0) { + if (NpyString_pack(allocator, out_pss, datetime_buf, + strlen(datetime_buf)) < 0) { PyErr_SetString(PyExc_MemoryError, "Failed to allocate string when converting " "from a datetime."); @@ -1297,11 +1273,11 @@ datetime_to_string(PyArrayMethod_Context *context, char *const data[], out += out_stride; } - NPY_STRING_RELEASE_ALLOCATOR(sdescr); + NpyString_release_allocator(sdescr); return 0; fail: - NPY_STRING_RELEASE_ALLOCATOR(sdescr); + NpyString_release_allocator(sdescr); return -1; } diff --git a/stringdtype/stringdtype/src/dtype.c b/stringdtype/stringdtype/src/dtype.c index eac27009..29a963da 100644 --- a/stringdtype/stringdtype/src/dtype.c +++ b/stringdtype/stringdtype/src/dtype.c @@ -11,11 +11,6 @@ PyTypeObject *StringScalar_Type = NULL; PyObject * new_stringdtype_instance(PyObject *na_object, int coerce) { - npy_string_allocator *allocator = NULL; - PyThread_type_lock *allocator_lock = NULL; - npy_packed_static_string packed_na_name = *NPY_EMPTY_STRING; - npy_packed_static_string packed_default_string = *NPY_EMPTY_STRING; - PyObject *new = PyArrayDescr_Type.tp_new((PyTypeObject *)&StringDType, NULL, NULL); @@ -23,9 +18,14 @@ new_stringdtype_instance(PyObject *na_object, int coerce) return NULL; } - allocator = npy_string_new_allocator(PyMem_RawMalloc, PyMem_RawFree, - PyMem_RawRealloc); + npy_string_allocator *allocator = NULL; + PyThread_type_lock *allocator_lock = NULL; + + char *default_string_buf = NULL; + char *na_name_buf = NULL; + allocator = NpyString_new_allocator(PyMem_RawMalloc, PyMem_RawFree, + PyMem_RawRealloc); if (allocator == NULL) { PyErr_SetString(PyExc_MemoryError, "Failed to create string allocator"); @@ -38,6 +38,9 @@ new_stringdtype_instance(PyObject *na_object, int coerce) goto fail; } + npy_static_string default_string = {0, NULL}; + npy_static_string na_name = {0, NULL}; + Py_XINCREF(na_object); ((StringDTypeObject *)new)->na_object = na_object; int hasnull = na_object != NULL; @@ -49,13 +52,9 @@ new_stringdtype_instance(PyObject *na_object, int coerce) has_string_na = 1; Py_ssize_t size = 0; const char *buf = PyUnicode_AsUTF8AndSize(na_object, &size); - if (npy_string_newsize(buf, (size_t)size, &packed_default_string, - allocator) < 0) { - PyErr_SetString(PyExc_MemoryError, - "Failed to allocate string while creating " - "StringDType instance."); - goto fail; - } + default_string.buf = PyMem_RawMalloc(size); + memcpy((char *)default_string.buf, buf, size); + default_string.size = size; } else { // treat as nan-like if != comparison returns a object whose truth @@ -86,14 +85,9 @@ new_stringdtype_instance(PyObject *na_object, int coerce) Py_DECREF(na_pystr); goto fail; } - if (npy_string_newsize(utf8_ptr, (size_t)size, &packed_na_name, - allocator) < 0) { - PyErr_SetString(PyExc_MemoryError, - "Failed to allocate string while creating " - "StringDType instance."); - Py_DECREF(na_pystr); - goto fail; - } + na_name.buf = PyMem_RawMalloc(size); + memcpy((char *)na_name.buf, utf8_ptr, size); + na_name.size = size; Py_DECREF(na_pystr); } @@ -101,39 +95,16 @@ new_stringdtype_instance(PyObject *na_object, int coerce) snew->has_nan_na = has_nan_na; snew->has_string_na = has_string_na; - snew->packed_default_string = packed_default_string; - snew->packed_na_name = packed_na_name; snew->coerce = coerce; snew->allocator_lock = allocator_lock; snew->allocator = allocator; snew->array_owned = 0; - - npy_static_string default_string = {0, NULL}; - if (npy_string_load(allocator, &snew->packed_default_string, - &default_string) == -1) { - PyErr_SetString(PyExc_MemoryError, - "Failed to load packed string while " - "creating StringDType instance."); - Py_DECREF(snew); - return NULL; - } - - npy_static_string na_name = {0, NULL}; - if (npy_string_load(allocator, &snew->packed_na_name, &na_name) == -1) { - PyErr_SetString(PyExc_MemoryError, - "Failed to load packed string while " - "creating StringDType instance."); - - Py_DECREF(snew); - return NULL; - } - snew->na_name = na_name; snew->default_string = default_string; PyArray_Descr *base = (PyArray_Descr *)new; - base->elsize = sizeof(npy_static_string); - base->alignment = _Alignof(npy_static_string); + base->elsize = SIZEOF_NPY_PACKED_STATIC_STRING; + base->alignment = ALIGNOF_NPY_PACKED_STATIC_STRING; base->flags |= NPY_NEEDS_INIT; base->flags |= NPY_LIST_PICKLE; base->flags |= NPY_ITEM_REFCOUNT; @@ -149,10 +120,14 @@ new_stringdtype_instance(PyObject *na_object, int coerce) fail: // this only makes sense if the allocator isn't attached to new yet Py_DECREF(new); + if (default_string_buf != NULL) { + PyMem_RawFree(default_string_buf); + } + if (na_name_buf != NULL) { + PyMem_RawFree(na_name_buf); + } if (allocator != NULL) { - npy_string_free(&packed_na_name, allocator); - npy_string_free(&packed_default_string, allocator); - npy_string_free_allocator(allocator); + NpyString_free_allocator(allocator); } if (allocator_lock != NULL) { PyThread_free_lock(allocator_lock); @@ -236,10 +211,7 @@ common_dtype(PyArray_DTypeMeta *cls, PyArray_DTypeMeta *other) // `scalar`. If scalar is not already a string and // coerce is nonzero, __str__ is called to convert it // to a string. If coerce is zero, raises an error for -// non-string or non-NA input. If the scalar is the -// na_object for the dtype class, return a new -// reference to the na_object. - +// non-string or non-NA input. static PyObject * get_value(PyObject *scalar, int coerce) { @@ -260,9 +232,11 @@ get_value(PyObject *scalar, int coerce) } } } + else { + Py_INCREF(scalar); + } - // attempt to decode as UTF8 - return PyUnicode_AsUTF8String(scalar); + return scalar; } static PyArray_Descr * @@ -274,6 +248,8 @@ string_discover_descriptor_from_pyobject(PyTypeObject *NPY_UNUSED(cls), return NULL; } + Py_DECREF(val); + PyArray_Descr *ret = (PyArray_Descr *)new_stringdtype_instance(NULL, 1); return ret; @@ -286,15 +262,7 @@ stringdtype_setitem(StringDTypeObject *descr, PyObject *obj, char **dataptr) { npy_packed_static_string *sdata = (npy_packed_static_string *)dataptr; - NPY_STRING_ACQUIRE_ALLOCATOR(descr); - - // free if dataptr holds preexisting string data, - // npy_string_free does a NULL check and checks for small strings - if (npy_string_free(sdata, descr->allocator) < 0) { - PyErr_SetString(PyExc_MemoryError, - "String deallocation failed in StringDType setitem"); - goto fail; - } + npy_string_allocator *allocator = NpyString_acquire_allocator(descr); // borrow reference PyObject *na_object = descr->na_object; @@ -302,7 +270,12 @@ stringdtype_setitem(StringDTypeObject *descr, PyObject *obj, char **dataptr) // setting NA *must* check pointer equality since NA types might not // allow equality if (na_object != NULL && obj == na_object) { - *sdata = *NPY_NULL_STRING; + if (NpyString_pack_null(allocator, sdata) < 0) { + PyErr_SetString(PyExc_MemoryError, + "Failed to pack null string during StringDType " + "setitem"); + goto fail; + } } else { PyObject *val_obj = get_value(obj, descr->coerce); @@ -311,28 +284,29 @@ stringdtype_setitem(StringDTypeObject *descr, PyObject *obj, char **dataptr) goto fail; } - char *val = NULL; Py_ssize_t length = 0; - if (PyBytes_AsStringAndSize(val_obj, &val, &length) == -1) { + const char *val = PyUnicode_AsUTF8AndSize(val_obj, &length); + if (val == NULL) { Py_DECREF(val_obj); goto fail; } - if (npy_string_newsize(val, length, sdata, descr->allocator) < 0) { + if (NpyString_pack(allocator, sdata, val, length) < 0) { PyErr_SetString(PyExc_MemoryError, - "Failed to allocate string during StringDType " + "Failed to pack string during StringDType " "setitem"); Py_DECREF(val_obj); goto fail; } + Py_DECREF(val_obj); } - NPY_STRING_RELEASE_ALLOCATOR(descr); + NpyString_release_allocator(descr); return 0; fail: - NPY_STRING_RELEASE_ALLOCATOR(descr); + NpyString_release_allocator(descr); return -1; } @@ -344,8 +318,8 @@ stringdtype_getitem(StringDTypeObject *descr, char **dataptr) npy_packed_static_string *psdata = (npy_packed_static_string *)dataptr; npy_static_string sdata = {0, NULL}; int hasnull = descr->na_object != NULL; - NPY_STRING_ACQUIRE_ALLOCATOR(descr); - int is_null = npy_string_load(descr->allocator, psdata, &sdata); + npy_string_allocator *allocator = NpyString_acquire_allocator(descr); + int is_null = NpyString_load(allocator, psdata, &sdata); if (is_null < 0) { PyErr_SetString(PyExc_MemoryError, @@ -370,7 +344,7 @@ stringdtype_getitem(StringDTypeObject *descr, char **dataptr) } } - NPY_STRING_RELEASE_ALLOCATOR(descr); + NpyString_release_allocator(descr); /* * In principle we should return a StringScalar instance here, but @@ -387,7 +361,7 @@ stringdtype_getitem(StringDTypeObject *descr, char **dataptr) fail: - NPY_STRING_RELEASE_ALLOCATOR(descr); + NpyString_release_allocator(descr); return NULL; } @@ -397,7 +371,7 @@ stringdtype_getitem(StringDTypeObject *descr, char **dataptr) npy_bool nonzero(void *data, void *NPY_UNUSED(arr)) { - return npy_string_size((npy_packed_static_string *)data) != 0; + return NpyString_size((npy_packed_static_string *)data) != 0; } // Implementation of PyArray_CompareFunc. @@ -406,9 +380,11 @@ int compare(void *a, void *b, void *arr) { StringDTypeObject *descr = (StringDTypeObject *)PyArray_DESCR(arr); - NPY_STRING_ACQUIRE_ALLOCATOR(descr); + // ignore the allocator returned by this function + // since _compare needs the descr anyway + NpyString_acquire_allocator(descr); int ret = _compare(a, b, descr, descr); - NPY_STRING_RELEASE_ALLOCATOR(descr); + NpyString_release_allocator(descr); return ret; } @@ -427,10 +403,10 @@ _compare(void *a, void *b, StringDTypeObject *descr_a, npy_static_string *default_string = &descr_a->default_string; const npy_packed_static_string *ps_a = (npy_packed_static_string *)a; npy_static_string s_a = {0, NULL}; - int a_is_null = npy_string_load(allocator_a, ps_a, &s_a); + int a_is_null = NpyString_load(allocator_a, ps_a, &s_a); const npy_packed_static_string *ps_b = (npy_packed_static_string *)b; npy_static_string s_b = {0, NULL}; - int b_is_null = npy_string_load(allocator_b, ps_b, &s_b); + int b_is_null = NpyString_load(allocator_b, ps_b, &s_b); if (NPY_UNLIKELY(a_is_null == -1 || b_is_null == -1)) { char *msg = "Failed to load string in string comparison"; if (hasnull && !(has_string_na && has_nan_na)) { @@ -474,18 +450,19 @@ _compare(void *a, void *b, StringDTypeObject *descr_a, } } } - return npy_string_cmp(&s_a, &s_b); + return NpyString_cmp(&s_a, &s_b); } // PyArray_ArgFunc // The max element is the one with the highest unicode code point. int -argmax(void *data, npy_intp n, npy_intp *max_ind, void *arr) +argmax(char *data, npy_intp n, npy_intp *max_ind, void *arr) { - npy_packed_static_string *dptr = (npy_packed_static_string *)data; + PyArray_Descr *descr = PyArray_DESCR(arr); + npy_intp elsize = descr->elsize; *max_ind = 0; for (int i = 1; i < n; i++) { - if (compare(&dptr[i], &dptr[*max_ind], arr) > 0) { + if (compare(data + i * elsize, data + (*max_ind) * elsize, arr) > 0) { *max_ind = i; } } @@ -495,12 +472,13 @@ argmax(void *data, npy_intp n, npy_intp *max_ind, void *arr) // PyArray_ArgFunc // The min element is the one with the lowest unicode code point. int -argmin(void *data, npy_intp n, npy_intp *min_ind, void *arr) +argmin(char *data, npy_intp n, npy_intp *min_ind, void *arr) { - npy_packed_static_string *dptr = (npy_packed_static_string *)data; + PyArray_Descr *descr = PyArray_DESCR(arr); + npy_intp elsize = descr->elsize; *min_ind = 0; for (int i = 1; i < n; i++) { - if (compare(&dptr[i], &dptr[*min_ind], arr) < 0) { + if (compare(data + i * elsize, data + (*min_ind) * elsize, arr) < 0) { *min_ind = i; } } @@ -520,24 +498,21 @@ stringdtype_clear_loop(void *NPY_UNUSED(traverse_context), npy_intp stride, NpyAuxData *NPY_UNUSED(auxdata)) { StringDTypeObject *sdescr = (StringDTypeObject *)descr; - NPY_STRING_ACQUIRE_ALLOCATOR(sdescr); + npy_string_allocator *allocator = NpyString_acquire_allocator(sdescr); while (size--) { npy_packed_static_string *sdata = (npy_packed_static_string *)data; - if (data != NULL) { - if (npy_string_free(sdata, sdescr->allocator) < 0) { - gil_error(PyExc_MemoryError, - "String deallocation failed in clear loop"); - goto fail; - } - memset(data, 0, sizeof(npy_packed_static_string)); + if (data != NULL && NpyString_free(sdata, allocator) < 0) { + gil_error(PyExc_MemoryError, + "String deallocation failed in clear loop"); + goto fail; } data += stride; } - NPY_STRING_RELEASE_ALLOCATOR(sdescr); + NpyString_release_allocator(sdescr); return 0; fail: - NPY_STRING_RELEASE_ALLOCATOR(sdescr); + NpyString_release_allocator(sdescr); return -1; } @@ -701,10 +676,11 @@ stringdtype_dealloc(StringDTypeObject *self) if (self->allocator != NULL) { // can we assume the destructor for an instance will only get called // inside of one C thread? - npy_string_free(&self->packed_default_string, self->allocator); - npy_string_free(&self->packed_na_name, self->allocator); - npy_string_free_allocator(self->allocator); + NpyString_free_allocator(self->allocator); + PyThread_free_lock(self->allocator_lock); } + PyMem_RawFree((char *)self->na_name.buf); + PyMem_RawFree((char *)self->default_string.buf); PyArrayDescr_Type.tp_dealloc((PyObject *)self); } @@ -955,14 +931,14 @@ free_and_copy(npy_string_allocator *in_allocator, const npy_packed_static_string *in, npy_packed_static_string *out, const char *location) { - if (npy_string_free(out, out_allocator) < 0) { + if (NpyString_free(out, out_allocator) < 0) { char message[200]; snprintf(message, sizeof(message), "Failed to deallocate string in %s", location); gil_error(PyExc_MemoryError, message); return -1; } - if (npy_string_dup(in, out, in_allocator, out_allocator) < 0) { + if (NpyString_dup(in, out, in_allocator, out_allocator) < 0) { char message[200]; snprintf(message, sizeof(message), "Failed to allocate string in %s", location); diff --git a/stringdtype/stringdtype/src/dtype.h b/stringdtype/stringdtype/src/dtype.h index 836d99b2..809c55a0 100644 --- a/stringdtype/stringdtype/src/dtype.h +++ b/stringdtype/stringdtype/src/dtype.h @@ -19,41 +19,13 @@ #include "numpy/npy_math.h" #include "numpy/ufuncobject.h" -#define NPY_STRING_ACQUIRE_ALLOCATOR(descr) \ - if (!PyThread_acquire_lock(descr->allocator_lock, NOWAIT_LOCK)) { \ - PyThread_acquire_lock(descr->allocator_lock, WAIT_LOCK); \ - } - -#define NPY_STRING_ACQUIRE_ALLOCATOR2(descr1, descr2) \ - NPY_STRING_ACQUIRE_ALLOCATOR(descr1) \ - if (descr1 != descr2) { \ - NPY_STRING_ACQUIRE_ALLOCATOR(descr2) \ - } - -#define NPY_STRING_ACQUIRE_ALLOCATOR3(descr1, descr2, descr3) \ - NPY_STRING_ACQUIRE_ALLOCATOR(descr1) \ - if (descr1 != descr2) { \ - NPY_STRING_ACQUIRE_ALLOCATOR(descr2) \ - } \ - if (descr1 != descr3 && descr2 != descr3) { \ - NPY_STRING_ACQUIRE_ALLOCATOR(descr3) \ - } - -#define NPY_STRING_RELEASE_ALLOCATOR(descr) \ - PyThread_release_lock(descr->allocator_lock); -#define NPY_STRING_RELEASE_ALLOCATOR2(descr1, descr2) \ - NPY_STRING_RELEASE_ALLOCATOR(descr1); \ - if (descr1 != descr2) { \ - NPY_STRING_RELEASE_ALLOCATOR(descr2); \ - } -#define NPY_STRING_RELEASE_ALLOCATOR3(descr1, descr2, descr3) \ - NPY_STRING_RELEASE_ALLOCATOR(descr1); \ - if (descr1 != descr2) { \ - NPY_STRING_RELEASE_ALLOCATOR(descr2); \ - } \ - if (descr1 != descr3 && descr2 != descr3) { \ - NPY_STRING_RELEASE_ALLOCATOR(descr3); \ - } +// not publicly exposed by the static string library so we need to define +// this here so we can define `elsize` and `alignment` on the descr +// +// if the layout of npy_packed_static_string ever changes in the future +// this may need to be updated. +#define SIZEOF_NPY_PACKED_STATIC_STRING 2 * sizeof(size_t) +#define ALIGNOF_NPY_PACKED_STATIC_STRING _Alignof(size_t) typedef struct { PyArray_Descr base; @@ -63,9 +35,7 @@ typedef struct { int has_string_na; int array_owned; npy_static_string default_string; - npy_packed_static_string packed_default_string; npy_static_string na_name; - npy_packed_static_string packed_na_name; PyThread_type_lock *allocator_lock; // the allocator should only be directly accessed after // acquiring the allocator_lock and the lock should @@ -81,6 +51,74 @@ typedef struct { extern StringDType_type StringDType; extern PyTypeObject *StringScalar_Type; +static inline npy_string_allocator * +NpyString_acquire_allocator(StringDTypeObject *descr) +{ + if (!PyThread_acquire_lock(descr->allocator_lock, NOWAIT_LOCK)) { + PyThread_acquire_lock(descr->allocator_lock, WAIT_LOCK); + } + return descr->allocator; +} + +static inline void +NpyString_acquire_allocator2(StringDTypeObject *descr1, + StringDTypeObject *descr2, + npy_string_allocator **allocator1, + npy_string_allocator **allocator2) +{ + *allocator1 = NpyString_acquire_allocator(descr1); + if (descr1 != descr2) { + *allocator2 = NpyString_acquire_allocator(descr2); + } + else { + *allocator2 = *allocator1; + } +} + +static inline void +NpyString_acquire_allocator3(StringDTypeObject *descr1, + StringDTypeObject *descr2, + StringDTypeObject *descr3, + npy_string_allocator **allocator1, + npy_string_allocator **allocator2, + npy_string_allocator **allocator3) +{ + NpyString_acquire_allocator2(descr1, descr2, allocator1, allocator2); + if (descr1 != descr3 && descr2 != descr3) { + *allocator3 = NpyString_acquire_allocator(descr3); + } + else { + *allocator3 = descr3->allocator; + } +} + +static inline void +NpyString_release_allocator(StringDTypeObject *descr) +{ + PyThread_release_lock(descr->allocator_lock); +} + +static inline void +NpyString_release_allocator2(StringDTypeObject *descr1, + StringDTypeObject *descr2) +{ + NpyString_release_allocator(descr1); + if (descr1 != descr2) { + NpyString_release_allocator(descr2); + } +} + +static inline void +NpyString_release_allocator3(StringDTypeObject *descr1, + StringDTypeObject *descr2, + StringDTypeObject *descr3) +{ + NpyString_release_allocator2(descr1, descr2); + if (descr1 != descr3 && descr2 != descr3) { + NpyString_release_allocator(descr3); + } +} + PyObject * new_stringdtype_instance(PyObject *na_object, int coerce); diff --git a/stringdtype/stringdtype/src/main.c b/stringdtype/stringdtype/src/main.c index 18ead38c..502cf4b8 100644 --- a/stringdtype/stringdtype/src/main.c +++ b/stringdtype/stringdtype/src/main.c @@ -58,7 +58,7 @@ _memory_usage(PyObject *NPY_UNUSED(self), PyObject *obj) npy_intp count = *innersizeptr; while (count--) { - size_t size = npy_string_size(((npy_packed_static_string *)in)); + size_t size = NpyString_size(((npy_packed_static_string *)in)); // FIXME: add a way for a string to report its heap size usage if (size > (sizeof(npy_static_string) - 1)) { memory_usage += size; diff --git a/stringdtype/stringdtype/src/static_string.c b/stringdtype/stringdtype/src/static_string.c index 713df1e7..bdba5d12 100644 --- a/stringdtype/stringdtype/src/static_string.c +++ b/stringdtype/stringdtype/src/static_string.c @@ -8,14 +8,14 @@ // the high byte in vstring.size is reserved for flags // SSSS SSSF -typedef struct _npy_static_string_t { +typedef struct _npy_static_vstring_t { size_t offset; - size_t size; -} _npy_static_string_t; + size_t size_and_flags; +} _npy_static_vstring_t; typedef struct _short_string_buffer { - char buf[sizeof(_npy_static_string_t) - 1]; - unsigned char flags_and_size; + char buf[sizeof(_npy_static_vstring_t) - 1]; + unsigned char size_and_flags; } _short_string_buffer; #elif NPY_BYTE_ORDER == NPY_BIG_ENDIAN @@ -23,20 +23,20 @@ typedef struct _short_string_buffer { // the high byte in vstring.size is reserved for flags // FSSS SSSS -typedef struct _npy_static_string_t { +typedef struct _npy_static_vstring_t { size_t size; size_t offset; -} _npy_static_string_t; +} _npy_static_vstring_t; typedef struct _short_string_buffer { - unsigned char flags_and_size; - char buf[sizeof(npy_static_string_t) - 1]; + unsigned char size_and_flags; + char buf[sizeof(_npy_static_vstring_t) - 1]; } _short_string_buffer; #endif typedef union _npy_static_string_u { - _npy_static_string_t vstring; + _npy_static_vstring_t vstring; _short_string_buffer direct_buffer; } _npy_static_string_u; @@ -59,19 +59,12 @@ typedef union _npy_static_string_u { // of this choice is a calloc'd array buffer (e.g. from np.empty) is filled // with empty elements for free const _npy_static_string_u empty_string_u = { - .direct_buffer = {.flags_and_size = 0, .buf = {0}}}; -const npy_packed_static_string *NPY_EMPTY_STRING = - (npy_packed_static_string *)&empty_string_u; -// zero-filled, but with the NULL flag set to distinguish from empty string -const _npy_static_string_u null_string_u = { - .direct_buffer = {.flags_and_size = NPY_STRING_MISSING, .buf = {0}}}; -const npy_packed_static_string *NPY_NULL_STRING = - (npy_packed_static_string *)&null_string_u; + .direct_buffer = {.size_and_flags = 0, .buf = {0}}}; #define VSTRING_FLAGS(string) \ - string->direct_buffer.flags_and_size & ~NPY_SHORT_STRING_SIZE_MASK; + string->direct_buffer.size_and_flags & ~NPY_SHORT_STRING_SIZE_MASK; #define HIGH_BYTE_MASK ((size_t)0XFF << 8 * (sizeof(size_t) - 1)) -#define VSTRING_SIZE(string) (string->vstring.size & ~HIGH_BYTE_MASK) +#define VSTRING_SIZE(string) (string->vstring.size_and_flags & ~HIGH_BYTE_MASK) typedef struct npy_string_arena { size_t cursor; @@ -89,9 +82,9 @@ struct npy_string_allocator { void set_vstring_size(_npy_static_string_u *str, size_t size) { - unsigned char current_flags = str->direct_buffer.flags_and_size; - str->vstring.size = size; - str->direct_buffer.flags_and_size = current_flags; + unsigned char current_flags = str->direct_buffer.size_and_flags; + str->vstring.size_and_flags = size; + str->direct_buffer.size_and_flags = current_flags; } char * @@ -107,9 +100,10 @@ vstring_buffer(npy_string_arena *arena, _npy_static_string_u *string) return (char *)((size_t)arena->buffer + string->vstring.offset); } +#define ARENA_EXPAND_FACTOR 1.25 + char * -npy_string_arena_malloc(npy_string_arena *arena, npy_string_realloc_func r, - size_t size) +arena_malloc(npy_string_arena *arena, npy_string_realloc_func r, size_t size) { // one extra size_t to store the size of the allocation size_t string_storage_size; @@ -126,15 +120,16 @@ npy_string_arena_malloc(npy_string_arena *arena, npy_string_realloc_func r, if (arena->size == 0) { newsize = string_storage_size; } - else if (((2 * arena->size) - arena->cursor) > string_storage_size) { - newsize = 2 * arena->size; + else if (((ARENA_EXPAND_FACTOR * arena->size) - arena->cursor) > + string_storage_size) { + newsize = ARENA_EXPAND_FACTOR * arena->size; } else { newsize = arena->size + string_storage_size; } if ((arena->cursor + size) >= newsize) { - // doubling the current size isn't enough - newsize = 2 * (arena->cursor + size); + // need extra room beyond the expansion factor, leave some padding + newsize = ARENA_EXPAND_FACTOR * (arena->cursor + size); } // passing a NULL buffer to realloc is the same as malloc char *newbuf = r(arena->buffer, newsize); @@ -162,7 +157,7 @@ npy_string_arena_malloc(npy_string_arena *arena, npy_string_realloc_func r, } int -npy_string_arena_free(npy_string_arena *arena, _npy_static_string_u *str) +arena_free(npy_string_arena *arena, _npy_static_string_u *str) { if (arena->size == 0 && arena->cursor == 0 && arena->buffer == NULL) { // empty arena, nothing to do @@ -190,8 +185,8 @@ npy_string_arena_free(npy_string_arena *arena, _npy_static_string_u *str) static npy_string_arena NEW_ARENA = {0, 0, NULL}; npy_string_allocator * -npy_string_new_allocator(npy_string_malloc_func m, npy_string_free_func f, - npy_string_realloc_func r) +NpyString_new_allocator(npy_string_malloc_func m, npy_string_free_func f, + npy_string_realloc_func r) { npy_string_allocator *allocator = m(sizeof(npy_string_allocator)); if (allocator == NULL) { @@ -200,13 +195,13 @@ npy_string_new_allocator(npy_string_malloc_func m, npy_string_free_func f, allocator->malloc = m; allocator->free = f; allocator->realloc = r; - // arena buffer gets allocated in npy_string_arena_malloc + // arena buffer gets allocated in arena_malloc allocator->arena = NEW_ARENA; return allocator; } void -npy_string_free_allocator(npy_string_allocator *allocator) +NpyString_free_allocator(npy_string_allocator *allocator) { npy_string_free_func f = allocator->free; @@ -221,7 +216,7 @@ int is_short_string(const npy_packed_static_string *s) { unsigned char high_byte = - ((_npy_static_string_u *)s)->direct_buffer.flags_and_size; + ((_npy_static_string_u *)s)->direct_buffer.size_and_flags; int has_short_flag = (high_byte & NPY_STRING_SHORT); int has_on_heap_flag = (high_byte & NPY_STRING_ON_HEAP); return has_short_flag && !has_on_heap_flag; @@ -230,24 +225,24 @@ is_short_string(const npy_packed_static_string *s) int is_medium_string(const _npy_static_string_u *s) { - unsigned char high_byte = s->direct_buffer.flags_and_size; + unsigned char high_byte = s->direct_buffer.size_and_flags; int has_short_flag = (high_byte & NPY_STRING_SHORT); int has_medium_flag = (high_byte & NPY_STRING_MEDIUM); return (!has_short_flag && has_medium_flag); } int -npy_string_isnull(const npy_packed_static_string *s) +NpyString_isnull(const npy_packed_static_string *s) { unsigned char high_byte = - ((_npy_static_string_u *)s)->direct_buffer.flags_and_size; + ((_npy_static_string_u *)s)->direct_buffer.size_and_flags; return (high_byte & NPY_STRING_MISSING) == NPY_STRING_MISSING; } int is_not_a_vstring(const npy_packed_static_string *s) { - return is_short_string(s) || npy_string_isnull(s); + return is_short_string(s) || NpyString_isnull(s); } int @@ -257,11 +252,11 @@ is_a_vstring(const npy_packed_static_string *s) } int -npy_string_load(npy_string_allocator *allocator, - const npy_packed_static_string *packed_string, - npy_static_string *unpacked_string) +NpyString_load(npy_string_allocator *allocator, + const npy_packed_static_string *packed_string, + npy_static_string *unpacked_string) { - if (npy_string_isnull(packed_string)) { + if (NpyString_isnull(packed_string)) { unpacked_string->size = 0; unpacked_string->buf = NULL; return 1; @@ -270,7 +265,7 @@ npy_string_load(npy_string_allocator *allocator, _npy_static_string_u *string_u = (_npy_static_string_u *)packed_string; if (is_short_string(packed_string)) { - unsigned char high_byte = string_u->direct_buffer.flags_and_size; + unsigned char high_byte = string_u->direct_buffer.size_and_flags; unpacked_string->size = high_byte & NPY_SHORT_STRING_SIZE_MASK; unpacked_string->buf = string_u->direct_buffer.buf; } @@ -300,7 +295,7 @@ heap_or_arena_allocate(npy_string_allocator *allocator, _npy_static_string_u *to_init_u, size_t size, int *on_heap) { - unsigned char *flags = &to_init_u->direct_buffer.flags_and_size; + unsigned char *flags = &to_init_u->direct_buffer.size_and_flags; if (*flags & NPY_STRING_SHORT) { // Have to heap allocate since there isn't a preexisting // allocation. This leaves the NPY_STRING_SHORT flag set to indicate @@ -345,8 +340,7 @@ heap_or_arena_allocate(npy_string_allocator *allocator, } } // string isn't previously allocated, so add to existing arena allocation - char *ret = npy_string_arena_malloc(arena, allocator->realloc, - sizeof(char) * size); + char *ret = arena_malloc(arena, allocator->realloc, sizeof(char) * size); if (size < NPY_MEDIUM_STRING_MAX_SIZE) { *flags |= NPY_STRING_MEDIUM; } @@ -357,7 +351,7 @@ int heap_or_arena_deallocate(npy_string_allocator *allocator, _npy_static_string_u *str_u) { - unsigned char *flags = &str_u->direct_buffer.flags_and_size; + unsigned char *flags = &str_u->direct_buffer.size_and_flags; if (*flags & NPY_STRING_ON_HEAP) { // It's a heap string (not in the arena buffer) so it needs to be // deallocated with free(). For heap strings the offset is a raw @@ -375,22 +369,22 @@ heap_or_arena_deallocate(npy_string_allocator *allocator, if (arena == NULL) { return -1; } - if (npy_string_arena_free(arena, str_u) < 0) { + if (arena_free(arena, str_u) < 0) { return -1; } if (arena->buffer != NULL) { - str_u->direct_buffer.flags_and_size |= NPY_STRING_ARENA_FREED; + str_u->direct_buffer.size_and_flags |= NPY_STRING_ARENA_FREED; } } return 0; } int -npy_string_newsize(const char *init, size_t size, - npy_packed_static_string *to_init, - npy_string_allocator *allocator) +NpyString_newsize(const char *init, size_t size, + npy_packed_static_string *to_init, + npy_string_allocator *allocator) { - if (npy_string_newemptysize(size, to_init, allocator) < 0) { + if (NpyString_newemptysize(size, to_init, allocator) < 0) { return -1; } @@ -415,8 +409,8 @@ npy_string_newsize(const char *init, size_t size, } int -npy_string_newemptysize(size_t size, npy_packed_static_string *out, - npy_string_allocator *allocator) +NpyString_newemptysize(size_t size, npy_packed_static_string *out, + npy_string_allocator *allocator) { if (size > NPY_MAX_STRING_SIZE) { return -1; @@ -425,11 +419,11 @@ npy_string_newemptysize(size_t size, npy_packed_static_string *out, _npy_static_string_u *out_u = (_npy_static_string_u *)out; unsigned char flags = - out_u->direct_buffer.flags_and_size & ~NPY_SHORT_STRING_SIZE_MASK; + out_u->direct_buffer.size_and_flags & ~NPY_SHORT_STRING_SIZE_MASK; if (size == 0) { - *out = *NPY_EMPTY_STRING; - out_u->direct_buffer.flags_and_size |= flags; + memcpy(out_u, &empty_string_u, sizeof(_npy_static_string_u)); + out_u->direct_buffer.size_and_flags |= flags; return 0; } @@ -458,21 +452,21 @@ npy_string_newemptysize(size_t size, npy_packed_static_string *out, // In either case, the size data is in at most the least significant 4 // bits of the byte so it's safe to | with one of 0x10, 0x20, 0x40, or // 0x80. - out_u->direct_buffer.flags_and_size = NPY_STRING_SHORT | flags | size; + out_u->direct_buffer.size_and_flags = NPY_STRING_SHORT | flags | size; } return 0; } int -npy_string_free(npy_packed_static_string *str, npy_string_allocator *allocator) +NpyString_free(npy_packed_static_string *str, npy_string_allocator *allocator) { _npy_static_string_u *str_u = (_npy_static_string_u *)str; if (is_not_a_vstring(str)) { // zero out, keeping flags - unsigned char *flags = &str_u->direct_buffer.flags_and_size; + unsigned char *flags = &str_u->direct_buffer.size_and_flags; unsigned char current_flags = *flags & ~NPY_SHORT_STRING_SIZE_MASK; - memcpy(str, NPY_EMPTY_STRING, sizeof(npy_packed_static_string)); + memcpy(str_u, &empty_string_u, sizeof(_npy_static_string_u)); *flags |= current_flags; } else { @@ -488,27 +482,26 @@ npy_string_free(npy_packed_static_string *str, npy_string_allocator *allocator) } int -npy_string_dup(const npy_packed_static_string *in, - npy_packed_static_string *out, - npy_string_allocator *in_allocator, - npy_string_allocator *out_allocator) +NpyString_dup(const npy_packed_static_string *in, + npy_packed_static_string *out, + npy_string_allocator *in_allocator, + npy_string_allocator *out_allocator) { - if (npy_string_isnull(in)) { - *out = *NPY_NULL_STRING; - return 0; + if (NpyString_isnull(in)) { + return NpyString_pack_null(out_allocator, out); } if (is_short_string(in)) { - memcpy(out, in, sizeof(npy_packed_static_string)); + memcpy(out, in, sizeof(_npy_static_string_u)); return 0; } _npy_static_string_u *in_u = (_npy_static_string_u *)in; size_t size = VSTRING_SIZE(in_u); if (size == 0) { _npy_static_string_u *out_u = (_npy_static_string_u *)out; - unsigned char flags = out_u->direct_buffer.flags_and_size & + unsigned char flags = out_u->direct_buffer.size_and_flags & ~NPY_SHORT_STRING_SIZE_MASK; - *out = *NPY_EMPTY_STRING; - out_u->direct_buffer.flags_and_size |= flags; + memcpy(out_u, &empty_string_u, sizeof(_npy_static_string_u)); + out_u->direct_buffer.size_and_flags |= flags; return 0; } char *in_buf = NULL; @@ -526,7 +519,7 @@ npy_string_dup(const npy_packed_static_string *in, in_buf = vstring_buffer(arena, in_u); } int ret = - npy_string_newsize(in_buf, VSTRING_SIZE(in_u), out, out_allocator); + NpyString_newsize(in_buf, VSTRING_SIZE(in_u), out, out_allocator); if (used_malloc) { in_allocator->free(in_buf); } @@ -534,7 +527,7 @@ npy_string_dup(const npy_packed_static_string *in, } int -npy_string_cmp(const npy_static_string *s1, const npy_static_string *s2) +NpyString_cmp(const npy_static_string *s1, const npy_static_string *s2) { size_t minsize = s1->size < s2->size ? s1->size : s2->size; @@ -557,18 +550,44 @@ npy_string_cmp(const npy_static_string *s1, const npy_static_string *s2) } size_t -npy_string_size(const npy_packed_static_string *packed_string) +NpyString_size(const npy_packed_static_string *packed_string) { - if (npy_string_isnull(packed_string)) { + if (NpyString_isnull(packed_string)) { return 0; } _npy_static_string_u *string_u = (_npy_static_string_u *)packed_string; if (is_short_string(packed_string)) { - return string_u->direct_buffer.flags_and_size & + return string_u->direct_buffer.size_and_flags & NPY_SHORT_STRING_SIZE_MASK; } return VSTRING_SIZE(string_u); } + +int +NpyString_pack(npy_string_allocator *allocator, + npy_packed_static_string *packed_string, const char *buf, + size_t size) +{ + if (NpyString_free(packed_string, allocator) < 0) { + return -1; + } + return NpyString_newsize(buf, size, packed_string, allocator); +} + +int +NpyString_pack_null(npy_string_allocator *allocator, + npy_packed_static_string *packed_string) +{ + _npy_static_string_u *str_u = (_npy_static_string_u *)packed_string; + unsigned char *flags = &str_u->direct_buffer.size_and_flags; + unsigned char current_flags = *flags & ~NPY_SHORT_STRING_SIZE_MASK; + if (NpyString_free(packed_string, allocator) < 0) { + return -1; + } + memcpy(str_u, &empty_string_u, sizeof(_npy_static_string_u)); + *flags = current_flags | NPY_STRING_MISSING; + return 0; +} diff --git a/stringdtype/stringdtype/src/static_string.h b/stringdtype/stringdtype/src/static_string.h index b5967fa1..31b93f50 100644 --- a/stringdtype/stringdtype/src/static_string.h +++ b/stringdtype/stringdtype/src/static_string.h @@ -4,23 +4,13 @@ #include "stdint.h" #include "stdlib.h" -typedef struct npy_packed_static_string { - char packed_buffer[sizeof(char *) + sizeof(size_t)]; -} npy_packed_static_string; +typedef struct npy_packed_static_string npy_packed_static_string; -typedef struct npy_static_string { +typedef struct npy_unpacked_static_string { size_t size; const char *buf; } npy_static_string; -// Represents the empty string. The unpacked string can be passed safely to -// npy_static_string API functions. -extern const npy_packed_static_string *NPY_EMPTY_STRING; -// Represents a sentinel value, use npy_string_isnull or the return value of -// npy_string_load to check if a value is null before working with the unpacked -// representation. -extern const npy_packed_static_string *NPY_NULL_STRING; - // one byte in size is reserved for flags and small string optimization #define NPY_MAX_STRING_SIZE ((int64_t)1 << 8 * (sizeof(size_t) - 1)) - 1 @@ -36,17 +26,17 @@ typedef void *(*npy_string_realloc_func)(void *ptr, size_t size); // users won't use these directly and will use an allocator already // attached to a dtype instance npy_string_allocator * -npy_string_new_allocator(npy_string_malloc_func m, npy_string_free_func f, - npy_string_realloc_func r); +NpyString_new_allocator(npy_string_malloc_func m, npy_string_free_func f, + npy_string_realloc_func r); // Deallocates the internal buffer and the allocator itself. void -npy_string_free_allocator(npy_string_allocator *allocator); +NpyString_free_allocator(npy_string_allocator *allocator); // Allocates a new buffer for *to_init*, which must be set to NULL before // calling this function, filling the newly allocated buffer with the copied // contents of the first *size* entries in *init*, which must be valid and -// initialized beforehand. Calling npy_string_free on *to_init* before calling +// initialized beforehand. Calling NpyString_free on *to_init* before calling // this function on an existing string or copying the contents of // NPY_EMPTY_STRING into *to_init* is sufficient to initialize it. Does not // check if *to_init* is NULL or if the internal buffer is non-NULL, undefined @@ -55,31 +45,30 @@ npy_string_free_allocator(npy_string_allocator *allocator); // string. Returns -1 if allocating the string would exceed the maximum // allowed string size or exhaust available memory. Returns 0 on success. int -npy_string_newsize(const char *init, size_t size, - npy_packed_static_string *to_init, - npy_string_allocator *allocator); +NpyString_newsize(const char *init, size_t size, + npy_packed_static_string *to_init, + npy_string_allocator *allocator); // Zeroes out the packed string and frees any heap allocated data. For // arena-allocated data, checks if the data are inside the arena and // will return -1 if not. Returns 0 on success. int -npy_string_free(npy_packed_static_string *str, - npy_string_allocator *allocator); +NpyString_free(npy_packed_static_string *str, npy_string_allocator *allocator); // Copies the contents of *in* into *out*. Allocates a new string buffer for -// *out*, npy_string_free *must* be called before this is called if *out* +// *out*, NpyString_free *must* be called before this is called if *out* // points to an existing string. Returns -1 if malloc fails. Returns 0 on // success. int -npy_string_dup(const npy_packed_static_string *in, - npy_packed_static_string *out, - npy_string_allocator *in_allocator, - npy_string_allocator *out_allocator); +NpyString_dup(const npy_packed_static_string *in, + npy_packed_static_string *out, + npy_string_allocator *in_allocator, + npy_string_allocator *out_allocator); // Allocates a new string buffer for *out* with enough capacity to store // *size* bytes of text. Does not do any initialization, the caller must // initialize the string buffer after this function returns. Calling -// npy_string_free on *to_init* before calling this function on an existing +// NpyString_free on *to_init* before calling this function on an existing // string or initializing a new string with the contents of NPY_EMPTY_STRING // is sufficient to initialize it. Does not check if *to_init* has already // been initialized or if the internal buffer is non-NULL, undefined behavior @@ -88,19 +77,32 @@ npy_string_dup(const npy_packed_static_string *in, // allocating the string would exceed the maximum allowed string size or // exhaust available memory. Returns 0 on success. int -npy_string_newemptysize(size_t size, npy_packed_static_string *out, - npy_string_allocator *allocator); +NpyString_newemptysize(size_t size, npy_packed_static_string *out, + npy_string_allocator *allocator); // Determine if *in* corresponds to a null string (e.g. an NA object). Returns // -1 if *in* cannot be unpacked. Returns 1 if *in* is a null string and // zero otherwise. int -npy_string_isnull(const npy_packed_static_string *in); +NpyString_isnull(const npy_packed_static_string *in); // Compare two strings. Has the same semantics as if strcmp were passed // null-terminated C strings with the contents of *s1* and *s2*. int -npy_string_cmp(const npy_static_string *s1, const npy_static_string *s2); +NpyString_cmp(const npy_static_string *s1, const npy_static_string *s2); + +// Copy and pack the first *size* entries of the buffer pointed to by *buf* +// into the *packed_string*. Returns 0 on success and -1 on failure. +int +NpyString_pack(npy_string_allocator *allocator, + npy_packed_static_string *packed_string, const char *buf, + size_t size); + +// Pack the null string into the *packed_string*. Returns 0 on success and -1 +// on failure. +int +NpyString_pack_null(npy_string_allocator *allocator, + npy_packed_static_string *packed_string); // Extract the packed contents of *packed_string* into *unpacked_string*. A // useful pattern is to define a stack-allocated npy_static_string instance @@ -113,14 +115,14 @@ npy_string_cmp(const npy_static_string *s1, const npy_static_string *s2); // string, and returns 0 otherwise. This function can be used to // simultaneously unpack a string and determine if it is a null string. int -npy_string_load(npy_string_allocator *allocator, - const npy_packed_static_string *packed_string, - npy_static_string *unpacked_string); +NpyString_load(npy_string_allocator *allocator, + const npy_packed_static_string *packed_string, + npy_static_string *unpacked_string); // Returns the size of the string data in the packed string. Useful in // situations where only the string size is needed and determining if it is a // null or unpacking the string is unnecessary. size_t -npy_string_size(const npy_packed_static_string *packed_string); +NpyString_size(const npy_packed_static_string *packed_string); #endif /*_NPY_STATIC_STRING_H */ diff --git a/stringdtype/stringdtype/src/umath.c b/stringdtype/stringdtype/src/umath.c index 803dab67..737250ef 100644 --- a/stringdtype/stringdtype/src/umath.c +++ b/stringdtype/stringdtype/src/umath.c @@ -52,13 +52,16 @@ multiply_resolve_descriptors( const npy_static_string *default_string, \ StringDTypeObject *idescr, StringDTypeObject *odescr) \ { \ - NPY_STRING_ACQUIRE_ALLOCATOR2(idescr, odescr); \ + npy_string_allocator *iallocator = NULL; \ + npy_string_allocator *oallocator = NULL; \ + NpyString_acquire_allocator2(idescr, odescr, &iallocator, \ + &oallocator); \ while (N--) { \ const npy_packed_static_string *ips = \ (npy_packed_static_string *)sin; \ npy_static_string is = {0, NULL}; \ npy_packed_static_string *ops = (npy_packed_static_string *)out; \ - int is_isnull = npy_string_load(idescr->allocator, ips, &is); \ + int is_isnull = NpyString_load(iallocator, ips, &is); \ if (is_isnull == -1) { \ gil_error(PyExc_MemoryError, \ "Failed to load string in multiply"); \ @@ -66,13 +69,12 @@ multiply_resolve_descriptors( } \ else if (is_isnull) { \ if (has_nan_na) { \ - if (npy_string_free(ops, odescr->allocator) < 0) { \ + if (NpyString_pack_null(oallocator, ops) < 0) { \ gil_error(PyExc_MemoryError, \ "Failed to deallocate string in multiply"); \ goto fail; \ } \ \ - *ops = *NPY_NULL_STRING; \ sin += s_stride; \ iin += i_stride; \ out += o_stride; \ @@ -109,18 +111,17 @@ multiply_resolve_descriptors( } \ } \ else { \ - if (npy_string_free(ops, odescr->allocator) < 0) { \ + if (NpyString_free(ops, oallocator) < 0) { \ gil_error(PyExc_MemoryError, \ "Failed to deallocate string in multiply"); \ goto fail; \ } \ - if (npy_string_newemptysize(newsize, ops, \ - odescr->allocator) < 0) { \ + if (NpyString_newemptysize(newsize, ops, oallocator) < 0) { \ gil_error(PyExc_MemoryError, \ "Failed to allocate string in multiply"); \ goto fail; \ } \ - if (npy_string_load(odescr->allocator, ops, &os) < 0) { \ + if (NpyString_load(oallocator, ops, &os) < 0) { \ gil_error(PyExc_MemoryError, \ "Failed to load string in multiply"); \ goto fail; \ @@ -136,16 +137,9 @@ multiply_resolve_descriptors( } \ \ if (idescr == odescr) { \ - if (npy_string_free(ops, odescr->allocator) < 0) { \ + if (NpyString_pack(oallocator, ops, buf, newsize) < 0) { \ gil_error(PyExc_MemoryError, \ - "Failed to deallocate string in multiply"); \ - goto fail; \ - } \ - \ - if (npy_string_newsize(buf, newsize, ops, \ - odescr->allocator) < 0) { \ - gil_error(PyExc_MemoryError, \ - "Failed to allocate string in multiply"); \ + "Failed to pack string in multiply"); \ goto fail; \ } \ \ @@ -156,11 +150,11 @@ multiply_resolve_descriptors( iin += i_stride; \ out += o_stride; \ } \ - NPY_STRING_RELEASE_ALLOCATOR2(idescr, odescr); \ + NpyString_release_allocator2(idescr, odescr); \ return 0; \ \ fail: \ - NPY_STRING_RELEASE_ALLOCATOR2(idescr, odescr); \ + NpyString_release_allocator2(idescr, odescr); \ return -1; \ } \ \ @@ -314,15 +308,19 @@ add_strided_loop(PyArrayMethod_Context *context, char *const data[], npy_intp in2_stride = strides[1]; npy_intp out_stride = strides[2]; - NPY_STRING_ACQUIRE_ALLOCATOR3(s1descr, s2descr, odescr); + npy_string_allocator *s1allocator = NULL; + npy_string_allocator *s2allocator = NULL; + npy_string_allocator *oallocator = NULL; + NpyString_acquire_allocator3(s1descr, s2descr, odescr, &s1allocator, + &s2allocator, &oallocator); while (N--) { const npy_packed_static_string *ps1 = (npy_packed_static_string *)in1; npy_static_string s1 = {0, NULL}; - int s1_isnull = npy_string_load(s1descr->allocator, ps1, &s1); + int s1_isnull = NpyString_load(s1allocator, ps1, &s1); const npy_packed_static_string *ps2 = (npy_packed_static_string *)in2; npy_static_string s2 = {0, NULL}; - int s2_isnull = npy_string_load(s2descr->allocator, ps2, &s2); + int s2_isnull = NpyString_load(s2allocator, ps2, &s2); if (s1_isnull == -1 || s2_isnull == -1) { gil_error(PyExc_MemoryError, "Failed to load string in add"); goto fail; @@ -330,12 +328,11 @@ add_strided_loop(PyArrayMethod_Context *context, char *const data[], npy_packed_static_string *ops = (npy_packed_static_string *)out; if (NPY_UNLIKELY(s1_isnull || s2_isnull)) { if (has_nan_na) { - if (npy_string_free(ops, odescr->allocator) < 0) { + if (NpyString_pack_null(oallocator, ops) < 0) { gil_error(PyExc_MemoryError, "Failed to deallocate string in add"); goto fail; } - *ops = *NPY_NULL_STRING; goto next_step; } else if (has_string_na || !has_null) { @@ -372,17 +369,17 @@ add_strided_loop(PyArrayMethod_Context *context, char *const data[], } } else { - if (npy_string_free(ops, odescr->allocator) < 0) { + if (NpyString_free(ops, oallocator) < 0) { gil_error(PyExc_MemoryError, "Failed to deallocate string in add"); goto fail; } - if (npy_string_newemptysize(newsize, ops, odescr->allocator) < 0) { + if (NpyString_newemptysize(newsize, ops, oallocator) < 0) { gil_error(PyExc_MemoryError, "Failed to allocate string in add"); goto fail; } - if (npy_string_load(odescr->allocator, ops, &os) < 0) { + if (NpyString_load(oallocator, ops, &os) < 0) { gil_error(PyExc_MemoryError, "Failed to load string in add"); goto fail; } @@ -394,15 +391,9 @@ add_strided_loop(PyArrayMethod_Context *context, char *const data[], memcpy(buf + s1.size, s2.buf, s2.size); if (s1descr == odescr || s2descr == odescr) { - if (npy_string_free(ops, odescr->allocator) < 0) { + if (NpyString_pack(oallocator, ops, buf, newsize) < 0) { gil_error(PyExc_MemoryError, - "Failed to deallocate string in add"); - goto fail; - } - - if (npy_string_newsize(buf, newsize, ops, odescr->allocator) < 0) { - gil_error(PyExc_MemoryError, - "Failed to allocate string in add"); + "Failed to pack output string in add"); goto fail; } @@ -414,11 +405,11 @@ add_strided_loop(PyArrayMethod_Context *context, char *const data[], in2 += in2_stride; out += out_stride; } - NPY_STRING_RELEASE_ALLOCATOR3(s1descr, s2descr, odescr); + NpyString_release_allocator3(s1descr, s2descr, odescr); return 0; fail: - NPY_STRING_RELEASE_ALLOCATOR3(s1descr, s2descr, odescr); + NpyString_release_allocator3(s1descr, s2descr, odescr); return -1; } @@ -441,7 +432,12 @@ maximum_strided_loop(PyArrayMethod_Context *context, char *const data[], npy_intp in2_stride = strides[1]; npy_intp out_stride = strides[2]; - NPY_STRING_ACQUIRE_ALLOCATOR3(in1_descr, in2_descr, out_descr); + npy_string_allocator *in1_allocator = NULL; + npy_string_allocator *in2_allocator = NULL; + npy_string_allocator *out_allocator = NULL; + NpyString_acquire_allocator3(in1_descr, in2_descr, out_descr, + &in1_allocator, &in2_allocator, + &out_allocator); while (N--) { const npy_packed_static_string *sin1 = (npy_packed_static_string *)in1; @@ -451,16 +447,16 @@ maximum_strided_loop(PyArrayMethod_Context *context, char *const data[], // if in and out are the same address, do nothing to avoid a // use-after-free if (in1 != out) { - if (free_and_copy(in1_descr->allocator, out_descr->allocator, - sin1, sout, "maximum") == -1) { + if (free_and_copy(in1_allocator, out_allocator, sin1, sout, + "maximum") == -1) { goto fail; } } } else { if (in2 != out) { - if (free_and_copy(in2_descr->allocator, out_descr->allocator, - sin2, sout, "maximum") == -1) { + if (free_and_copy(in2_allocator, out_allocator, sin2, sout, + "maximum") == -1) { goto fail; } } @@ -470,11 +466,11 @@ maximum_strided_loop(PyArrayMethod_Context *context, char *const data[], out += out_stride; } - NPY_STRING_RELEASE_ALLOCATOR3(in1_descr, in2_descr, out_descr); + NpyString_release_allocator3(in1_descr, in2_descr, out_descr); return 0; fail: - NPY_STRING_RELEASE_ALLOCATOR3(in1_descr, in2_descr, out_descr); + NpyString_release_allocator3(in1_descr, in2_descr, out_descr); return -1; } @@ -497,7 +493,12 @@ minimum_strided_loop(PyArrayMethod_Context *context, char *const data[], npy_intp in2_stride = strides[1]; npy_intp out_stride = strides[2]; - NPY_STRING_ACQUIRE_ALLOCATOR3(in1_descr, in2_descr, out_descr); + npy_string_allocator *in1_allocator = NULL; + npy_string_allocator *in2_allocator = NULL; + npy_string_allocator *out_allocator = NULL; + NpyString_acquire_allocator3(in1_descr, in2_descr, out_descr, + &in1_allocator, &in2_allocator, + &out_allocator); while (N--) { const npy_packed_static_string *sin1 = (npy_packed_static_string *)in1; @@ -507,16 +508,16 @@ minimum_strided_loop(PyArrayMethod_Context *context, char *const data[], // if in and out are the same address, do nothing to avoid a // use-after-free if (in1 != out) { - if (free_and_copy(in1_descr->allocator, out_descr->allocator, - sin1, sout, "minimum") == -1) { + if (free_and_copy(in1_allocator, out_allocator, sin1, sout, + "minimum") == -1) { goto fail; } } } else { if (in2 != out) { - if (free_and_copy(in2_descr->allocator, out_descr->allocator, - sin2, sout, "minimum") == -1) { + if (free_and_copy(in2_allocator, out_allocator, sin2, sout, + "minimum") == -1) { goto fail; } } @@ -526,11 +527,11 @@ minimum_strided_loop(PyArrayMethod_Context *context, char *const data[], out += out_stride; } - NPY_STRING_RELEASE_ALLOCATOR3(in1_descr, in2_descr, out_descr); + NpyString_release_allocator3(in1_descr, in2_descr, out_descr); return 0; fail: - NPY_STRING_RELEASE_ALLOCATOR3(in1_descr, in2_descr, out_descr); + NpyString_release_allocator3(in1_descr, in2_descr, out_descr); return -1; } @@ -554,15 +555,17 @@ string_equal_strided_loop(PyArrayMethod_Context *context, char *const data[], npy_intp in2_stride = strides[1]; npy_intp out_stride = strides[2]; - NPY_STRING_ACQUIRE_ALLOCATOR2(descr1, descr2); + npy_string_allocator *allocator1 = NULL; + npy_string_allocator *allocator2 = NULL; + NpyString_acquire_allocator2(descr1, descr2, &allocator1, &allocator2); while (N--) { const npy_packed_static_string *ps1 = (npy_packed_static_string *)in1; npy_static_string s1 = {0, NULL}; - int s1_isnull = npy_string_load(descr1->allocator, ps1, &s1); + int s1_isnull = NpyString_load(allocator1, ps1, &s1); const npy_packed_static_string *ps2 = (npy_packed_static_string *)in2; npy_static_string s2 = {0, NULL}; - int s2_isnull = npy_string_load(descr2->allocator, ps2, &s2); + int s2_isnull = NpyString_load(allocator2, ps2, &s2); if (NPY_UNLIKELY(s1_isnull < 0 || s2_isnull < 0)) { gil_error(PyExc_MemoryError, "Failed to load string in equal"); goto fail; @@ -604,12 +607,12 @@ string_equal_strided_loop(PyArrayMethod_Context *context, char *const data[], out += out_stride; } - NPY_STRING_RELEASE_ALLOCATOR2(descr1, descr2); + NpyString_release_allocator2(descr1, descr2); return 0; fail: - NPY_STRING_RELEASE_ALLOCATOR2(descr1, descr2); + NpyString_release_allocator2(descr1, descr2); return -1; } @@ -634,15 +637,17 @@ string_not_equal_strided_loop(PyArrayMethod_Context *context, npy_intp in2_stride = strides[1]; npy_intp out_stride = strides[2]; - NPY_STRING_ACQUIRE_ALLOCATOR2(descr1, descr2); + npy_string_allocator *allocator1 = NULL; + npy_string_allocator *allocator2 = NULL; + NpyString_acquire_allocator2(descr1, descr2, &allocator1, &allocator2); while (N--) { const npy_packed_static_string *ps1 = (npy_packed_static_string *)in1; npy_static_string s1 = {0, NULL}; - int s1_isnull = npy_string_load(descr1->allocator, ps1, &s1); + int s1_isnull = NpyString_load(allocator1, ps1, &s1); const npy_packed_static_string *ps2 = (npy_packed_static_string *)in2; npy_static_string s2 = {0, NULL}; - int s2_isnull = npy_string_load(descr2->allocator, ps2, &s2); + int s2_isnull = NpyString_load(allocator2, ps2, &s2); if (NPY_UNLIKELY(s1_isnull < 0 || s2_isnull < 0)) { gil_error(PyExc_MemoryError, "Failed to load string in not equal"); goto fail; @@ -684,12 +689,12 @@ string_not_equal_strided_loop(PyArrayMethod_Context *context, out += out_stride; } - NPY_STRING_RELEASE_ALLOCATOR2(descr1, descr2); + NpyString_release_allocator2(descr1, descr2); return 0; fail: - NPY_STRING_RELEASE_ALLOCATOR2(descr1, descr2); + NpyString_release_allocator2(descr1, descr2); return -1; } @@ -714,15 +719,17 @@ string_greater_strided_loop(PyArrayMethod_Context *context, char *const data[], npy_intp in2_stride = strides[1]; npy_intp out_stride = strides[2]; - NPY_STRING_ACQUIRE_ALLOCATOR2(descr1, descr2); + npy_string_allocator *allocator1 = NULL; + npy_string_allocator *allocator2 = NULL; + NpyString_acquire_allocator2(descr1, descr2, &allocator1, &allocator2); while (N--) { const npy_packed_static_string *ps1 = (npy_packed_static_string *)in1; npy_static_string s1 = {0, NULL}; - int s1_isnull = npy_string_load(descr1->allocator, ps1, &s1); + int s1_isnull = NpyString_load(allocator1, ps1, &s1); const npy_packed_static_string *ps2 = (npy_packed_static_string *)in2; npy_static_string s2 = {0, NULL}; - int s2_isnull = npy_string_load(descr2->allocator, ps2, &s2); + int s2_isnull = NpyString_load(allocator2, ps2, &s2); if (NPY_UNLIKELY(s1_isnull < 0 || s2_isnull < 0)) { gil_error(PyExc_MemoryError, "Failed to load string in greater"); goto fail; @@ -748,7 +755,7 @@ string_greater_strided_loop(PyArrayMethod_Context *context, char *const data[], } } } - if (npy_string_cmp(&s1, &s2) > 0) { + if (NpyString_cmp(&s1, &s2) > 0) { *out = (npy_bool)1; } else { @@ -761,12 +768,12 @@ string_greater_strided_loop(PyArrayMethod_Context *context, char *const data[], out += out_stride; } - NPY_STRING_RELEASE_ALLOCATOR2(descr1, descr2); + NpyString_release_allocator2(descr1, descr2); return 0; fail: - NPY_STRING_RELEASE_ALLOCATOR2(descr1, descr2); + NpyString_release_allocator2(descr1, descr2); return -1; } @@ -792,15 +799,17 @@ string_greater_equal_strided_loop(PyArrayMethod_Context *context, npy_intp in2_stride = strides[1]; npy_intp out_stride = strides[2]; - NPY_STRING_ACQUIRE_ALLOCATOR2(descr1, descr2); + npy_string_allocator *allocator1 = NULL; + npy_string_allocator *allocator2 = NULL; + NpyString_acquire_allocator2(descr1, descr2, &allocator1, &allocator2); while (N--) { const npy_packed_static_string *ps1 = (npy_packed_static_string *)in1; npy_static_string s1 = {0, NULL}; - int s1_isnull = npy_string_load(descr1->allocator, ps1, &s1); + int s1_isnull = NpyString_load(allocator1, ps1, &s1); const npy_packed_static_string *ps2 = (npy_packed_static_string *)in2; npy_static_string s2 = {0, NULL}; - int s2_isnull = npy_string_load(descr2->allocator, ps2, &s2); + int s2_isnull = NpyString_load(allocator2, ps2, &s2); if (NPY_UNLIKELY(s1_isnull < 0 || s2_isnull < 0)) { gil_error(PyExc_MemoryError, "Failed to load string in greater equal"); @@ -827,7 +836,7 @@ string_greater_equal_strided_loop(PyArrayMethod_Context *context, } } } - if (npy_string_cmp(&s1, &s2) >= 0) { + if (NpyString_cmp(&s1, &s2) >= 0) { *out = (npy_bool)1; } else { @@ -840,12 +849,12 @@ string_greater_equal_strided_loop(PyArrayMethod_Context *context, out += out_stride; } - NPY_STRING_RELEASE_ALLOCATOR2(descr1, descr2); + NpyString_release_allocator2(descr1, descr2); return 0; fail: - NPY_STRING_RELEASE_ALLOCATOR2(descr1, descr2); + NpyString_release_allocator2(descr1, descr2); return -1; } @@ -869,15 +878,17 @@ string_less_strided_loop(PyArrayMethod_Context *context, char *const data[], npy_intp in2_stride = strides[1]; npy_intp out_stride = strides[2]; - NPY_STRING_ACQUIRE_ALLOCATOR2(descr1, descr2); + npy_string_allocator *allocator1 = NULL; + npy_string_allocator *allocator2 = NULL; + NpyString_acquire_allocator2(descr1, descr2, &allocator1, &allocator2); while (N--) { const npy_packed_static_string *ps1 = (npy_packed_static_string *)in1; npy_static_string s1 = {0, NULL}; - int s1_isnull = npy_string_load(descr1->allocator, ps1, &s1); + int s1_isnull = NpyString_load(allocator1, ps1, &s1); const npy_packed_static_string *ps2 = (npy_packed_static_string *)in2; npy_static_string s2 = {0, NULL}; - int s2_isnull = npy_string_load(descr2->allocator, ps2, &s2); + int s2_isnull = NpyString_load(allocator2, ps2, &s2); if (NPY_UNLIKELY(s1_isnull < 0 || s2_isnull < 0)) { gil_error(PyExc_MemoryError, "Failed to load string in less"); goto fail; @@ -903,7 +914,7 @@ string_less_strided_loop(PyArrayMethod_Context *context, char *const data[], } } } - if (npy_string_cmp(&s1, &s2) < 0) { + if (NpyString_cmp(&s1, &s2) < 0) { *out = (npy_bool)1; } else { @@ -916,12 +927,12 @@ string_less_strided_loop(PyArrayMethod_Context *context, char *const data[], out += out_stride; } - NPY_STRING_RELEASE_ALLOCATOR2(descr1, descr2); + NpyString_release_allocator2(descr1, descr2); return 0; fail: - NPY_STRING_RELEASE_ALLOCATOR2(descr1, descr2); + NpyString_release_allocator2(descr1, descr2); return -1; } @@ -946,15 +957,17 @@ string_less_equal_strided_loop(PyArrayMethod_Context *context, npy_intp in2_stride = strides[1]; npy_intp out_stride = strides[2]; - NPY_STRING_ACQUIRE_ALLOCATOR2(descr1, descr2); + npy_string_allocator *allocator1 = NULL; + npy_string_allocator *allocator2 = NULL; + NpyString_acquire_allocator2(descr1, descr2, &allocator1, &allocator2); while (N--) { const npy_packed_static_string *ps1 = (npy_packed_static_string *)in1; npy_static_string s1 = {0, NULL}; - int s1_isnull = npy_string_load(descr1->allocator, ps1, &s1); + int s1_isnull = NpyString_load(allocator1, ps1, &s1); const npy_packed_static_string *ps2 = (npy_packed_static_string *)in2; npy_static_string s2 = {0, NULL}; - int s2_isnull = npy_string_load(descr2->allocator, ps2, &s2); + int s2_isnull = NpyString_load(allocator2, ps2, &s2); if (NPY_UNLIKELY(s1_isnull < 0 || s2_isnull < 0)) { gil_error(PyExc_MemoryError, "Failed to load string in less equal"); @@ -981,7 +994,7 @@ string_less_equal_strided_loop(PyArrayMethod_Context *context, } } } - if (npy_string_cmp(&s1, &s2) <= 0) { + if (NpyString_cmp(&s1, &s2) <= 0) { *out = (npy_bool)1; } else { @@ -994,12 +1007,12 @@ string_less_equal_strided_loop(PyArrayMethod_Context *context, out += out_stride; } - NPY_STRING_RELEASE_ALLOCATOR2(descr1, descr2); + NpyString_release_allocator2(descr1, descr2); return 0; fail: - NPY_STRING_RELEASE_ALLOCATOR2(descr1, descr2); + NpyString_release_allocator2(descr1, descr2); return -1; } @@ -1037,7 +1050,7 @@ string_isnan_strided_loop(PyArrayMethod_Context *context, char *const data[], while (N--) { const npy_packed_static_string *s = (npy_packed_static_string *)in; - if (has_nan_na && npy_string_isnull(s)) { + if (has_nan_na && NpyString_isnull(s)) { *out = (npy_bool)1; } else { diff --git a/stringdtype/tests/test_char.py b/stringdtype/tests/test_char.py index f2373e36..74c5b289 100644 --- a/stringdtype/tests/test_char.py +++ b/stringdtype/tests/test_char.py @@ -23,14 +23,14 @@ def unicode_array(): UNARY_FUNCTIONS = [ - "str_len", + # "str_len", "capitalize", "expandtabs", "isalnum", - # "isalpha", (10-23-23) skipped temporarily since it is now a ufunc - "isdigit", + # "isalpha", + # "isdigit", "islower", - "isspace", + # "isspace", "istitle", "isupper", "lower", @@ -38,8 +38,8 @@ def unicode_array(): "swapcase", "title", "upper", - "isnumeric", - "isdecimal", + # "isnumeric", + # "isdecimal", ] @@ -59,21 +59,21 @@ def test_unary(string_array, unicode_array, function_name): ("multiply", (None, 2)), ("mod", ("format: %s", None)), ("center", (None, 25)), - ("count", (None, "A")), + # ("count", (None, "A")), ("encode", (None, "UTF-8")), - ("endswith", (None, "lo")), - # ("find", (None, "A")), # 11-6-2023 skipped temporarily + # ("endswith", (None, "lo")), + # ("find", (None, "A")), ("index", (None, "e")), ("join", ("-", None)), ("ljust", (None, 12)), ("partition", (None, "A")), ("replace", (None, "A", "B")), - # ("rfind", (None, "A")), # 11-6-2023 skipped temporarily + # ("rfind", (None, "A")), ("rindex", (None, "e")), ("rjust", (None, 12)), ("rpartition", (None, "A")), ("split", (None, "A")), - ("startswith", (None, "A")), + # ("startswith", (None, "A")), ("zfill", (None, 12)), ] diff --git a/stringdtype/tests/test_stringdtype.py b/stringdtype/tests/test_stringdtype.py index 2630ed31..e1cc27a0 100644 --- a/stringdtype/tests/test_stringdtype.py +++ b/stringdtype/tests/test_stringdtype.py @@ -811,17 +811,25 @@ def func(arr): rnd = rng.random() # either write to random locations in the array, compute a ufunc, or # re-initialize the array - if rnd < 0.3333: + if rnd < 0.25: num = np.random.randint(0, arr.size) arr[num] = arr[num] + "hello" - elif rnd < 0.6666: - np.add(arr, arr) + elif rnd < 0.5: + if rnd < 0.375: + np.add(arr, arr) + else: + np.add(arr, arr, out=arr) + elif rnd < 0.75: + if rnd < 0.875: + np.multiply(arr, np.int64(2)) + else: + np.multiply(arr, np.int64(2), out=arr) else: arr[:] = random_string_list with concurrent.futures.ThreadPoolExecutor(max_workers=8) as tpe: arr = np.array(random_string_list, dtype=dtype) - futures = [tpe.submit(func, arr) for _ in range(100)] + futures = [tpe.submit(func, arr) for _ in range(500)] for f in futures: f.result()