diff --git a/asciidtype/asciidtype/src/asciidtype_main.c b/asciidtype/asciidtype/src/asciidtype_main.c index 3d54fe90..4bf027ad 100644 --- a/asciidtype/asciidtype/src/asciidtype_main.c +++ b/asciidtype/asciidtype/src/asciidtype_main.c @@ -22,7 +22,7 @@ PyInit__asciidtype_main(void) return NULL; } - if (import_experimental_dtype_api(7) < 0) { + if (import_experimental_dtype_api(8) < 0) { return NULL; } diff --git a/metadatadtype/metadatadtype/src/metadatadtype_main.c b/metadatadtype/metadatadtype/src/metadatadtype_main.c index b33c13c9..bdb21c66 100644 --- a/metadatadtype/metadatadtype/src/metadatadtype_main.c +++ b/metadatadtype/metadatadtype/src/metadatadtype_main.c @@ -21,7 +21,7 @@ PyInit__metadatadtype_main(void) if (_import_array() < 0) { return NULL; } - if (import_experimental_dtype_api(7) < 0) { + if (import_experimental_dtype_api(8) < 0) { return NULL; } diff --git a/mpfdtype/mpfdtype/src/mpfdtype_main.c b/mpfdtype/mpfdtype/src/mpfdtype_main.c index 52d51c26..44ade5da 100644 --- a/mpfdtype/mpfdtype/src/mpfdtype_main.c +++ b/mpfdtype/mpfdtype/src/mpfdtype_main.c @@ -22,7 +22,7 @@ PyInit__mpfdtype_main(void) if (_import_array() < 0) { return NULL; } - if (import_experimental_dtype_api(7) < 0) { + if (import_experimental_dtype_api(8) < 0) { return NULL; } diff --git a/quaddtype/quaddtype/src/quaddtype_main.c b/quaddtype/quaddtype/src/quaddtype_main.c index 680c9921..855ef9ab 100644 --- a/quaddtype/quaddtype/src/quaddtype_main.c +++ b/quaddtype/quaddtype/src/quaddtype_main.c @@ -23,7 +23,7 @@ PyInit__quaddtype_main(void) return NULL; // Fail to init if the experimental DType API version 5 isn't supported - if (import_experimental_dtype_api(7) < 0) { + if (import_experimental_dtype_api(8) < 0) { PyErr_SetString(PyExc_ImportError, "Error encountered importing the experimental dtype API."); return NULL; diff --git a/stringdtype/README.md b/stringdtype/README.md index c5d4375d..fe8550bf 100644 --- a/stringdtype/README.md +++ b/stringdtype/README.md @@ -10,18 +10,26 @@ NumPy. Ensure Meson and NumPy are installed in the python environment you would like to use: ``` -$ python3 -m pip install meson meson-python numpy build patchelf +$ python3 -m pip install meson meson-python build patchelf ``` -Build with meson, create a wheel, and install it +It is important to have the latest development version of numpy installed. +Nightly wheels work well for this purpose, and can be installed easily: +```bash +$ pip install -i https://pypi.anaconda.org/scipy-wheels-nightly/simple numpy ``` + +Build with meson, create a wheel, and install it. + +```bash $ rm -r dist/ $ meson build $ python -m build --wheel -Cbuilddir=build -$ python -m pip install dist/asciidtype*.whl ``` -The `mesonpy` build backend for pip [does not currently support editable -installs](https://github.com/mesonbuild/meson-python/issues/47), so `pip install --e .` will not work. +Or simply install directly, taking care to install without build isolation: + +```bash +$ pip install -v . --no-build-isolation +``` diff --git a/stringdtype/stringdtype/__init__.py b/stringdtype/stringdtype/__init__.py index d9520291..6fc58f31 100644 --- a/stringdtype/stringdtype/__init__.py +++ b/stringdtype/stringdtype/__init__.py @@ -5,7 +5,6 @@ from .scalar import StringScalar # isort: skip from ._main import StringDType, _memory_usage - __all__ = [ "StringDType", "StringScalar", diff --git a/stringdtype/stringdtype/src/casts.c b/stringdtype/stringdtype/src/casts.c index 1d014f4e..ea75e4fa 100644 --- a/stringdtype/stringdtype/src/casts.c +++ b/stringdtype/stringdtype/src/casts.c @@ -295,7 +295,7 @@ string_to_unicode_resolve_descriptors(PyObject *NPY_UNUSED(self), // codepoint for the next character, returning the size of the character in // bytes. Does not do any validation or error checking: assumes *c* is valid // utf-8 -static size_t +size_t utf8_char_to_ucs4_code(unsigned char *c, Py_UCS4 *code) { if (c[0] <= 0x7F) { diff --git a/stringdtype/stringdtype/src/casts.h b/stringdtype/stringdtype/src/casts.h index 344bea3a..b8ac06bf 100644 --- a/stringdtype/stringdtype/src/casts.h +++ b/stringdtype/stringdtype/src/casts.h @@ -13,4 +13,7 @@ PyArrayMethod_Spec ** get_casts(void); +size_t +utf8_char_to_ucs4_code(unsigned char *, Py_UCS4 *); + #endif /* _NPY_CASTS_H */ diff --git a/stringdtype/stringdtype/src/dtype.c b/stringdtype/stringdtype/src/dtype.c index 4832e1ac..1a8b492a 100644 --- a/stringdtype/stringdtype/src/dtype.c +++ b/stringdtype/stringdtype/src/dtype.c @@ -155,6 +155,16 @@ stringdtype_getitem(StringDTypeObject *descr, char **dataptr) return res; } +// Implementation of PyArray_CompareFunc. +// Compares unicode strings by their code points. +int +compare_strings(char **a, char **b, PyArrayObject *NPY_UNUSED(arr)) +{ + ss *ss_a = (ss *)*a; + ss *ss_b = (ss *)*b; + return strcmp(ss_a->buf, ss_b->buf); +} + static StringDTypeObject * stringdtype_ensure_canonical(StringDTypeObject *self) { @@ -170,6 +180,7 @@ static PyType_Slot StringDType_Slots[] = { {NPY_DT_setitem, &stringdtype_setitem}, {NPY_DT_getitem, &stringdtype_getitem}, {NPY_DT_ensure_canonical, &stringdtype_ensure_canonical}, + {NPY_DT_PyArray_ArrFuncs_compare, &compare_strings}, {0, NULL}}; static PyObject * diff --git a/stringdtype/stringdtype/src/main.c b/stringdtype/stringdtype/src/main.c index a4e63772..27666c35 100644 --- a/stringdtype/stringdtype/src/main.c +++ b/stringdtype/stringdtype/src/main.c @@ -90,7 +90,7 @@ PyInit__main(void) if (_import_array() < 0) { return NULL; } - if (import_experimental_dtype_api(7) < 0) { + if (import_experimental_dtype_api(8) < 0) { return NULL; } diff --git a/stringdtype/tests/test_stringdtype.py b/stringdtype/tests/test_stringdtype.py index 617e3387..ccdfe403 100644 --- a/stringdtype/tests/test_stringdtype.py +++ b/stringdtype/tests/test_stringdtype.py @@ -161,3 +161,23 @@ def test_pickle(string_list): assert res[1] == dtype os.remove(f.name) + + +@pytest.mark.parametrize( + "strings", + [ + ["left", "right", "leftovers", "righty", "up", "down"], + ["๐Ÿคฃ๐Ÿคฃ", "๐Ÿคฃ", "๐Ÿ“ต", "๐Ÿ˜ฐ"], + ["๐Ÿšœ", "๐Ÿ™ƒ", "๐Ÿ˜พ"], + ["๐Ÿ˜น", "๐Ÿš ", "๐ŸšŒ"], + ["Aยขโ˜ƒโ‚ฌ ๐Ÿ˜Š", " Aโ˜ƒโ‚ฌยข๐Ÿ˜Š", "โ˜ƒโ‚ฌ๐Ÿ˜Š Aยข", "๐Ÿ˜Šโ˜ƒAยข โ‚ฌ"], + ], +) +def test_sort(strings): + """Test that sorting matches python's internal sorting.""" + arr = np.array(strings, dtype=StringDType()) + arr_sorted = np.array(sorted(strings), dtype=StringDType()) + + np.random.default_rng().shuffle(arr) + arr.sort() + np.testing.assert_array_equal(arr, arr_sorted) diff --git a/unytdtype/unytdtype/src/unytdtype_main.c b/unytdtype/unytdtype/src/unytdtype_main.c index 063665bb..81a0e0ce 100644 --- a/unytdtype/unytdtype/src/unytdtype_main.c +++ b/unytdtype/unytdtype/src/unytdtype_main.c @@ -21,7 +21,7 @@ PyInit__unytdtype_main(void) if (_import_array() < 0) { return NULL; } - if (import_experimental_dtype_api(7) < 0) { + if (import_experimental_dtype_api(8) < 0) { return NULL; }