Skip to content

Commit aef38e9

Browse files
committed
Add string comparison support so that x.sort() works
1 parent aa1d862 commit aef38e9

File tree

12 files changed

+55
-14
lines changed

12 files changed

+55
-14
lines changed

asciidtype/asciidtype/src/asciidtype_main.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ PyInit__asciidtype_main(void)
2222
return NULL;
2323
}
2424

25-
if (import_experimental_dtype_api(7) < 0) {
25+
if (import_experimental_dtype_api(8) < 0) {
2626
return NULL;
2727
}
2828

metadatadtype/metadatadtype/src/metadatadtype_main.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ PyInit__metadatadtype_main(void)
2121
if (_import_array() < 0) {
2222
return NULL;
2323
}
24-
if (import_experimental_dtype_api(7) < 0) {
24+
if (import_experimental_dtype_api(8) < 0) {
2525
return NULL;
2626
}
2727

mpfdtype/mpfdtype/src/mpfdtype_main.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ PyInit__mpfdtype_main(void)
2222
if (_import_array() < 0) {
2323
return NULL;
2424
}
25-
if (import_experimental_dtype_api(7) < 0) {
25+
if (import_experimental_dtype_api(8) < 0) {
2626
return NULL;
2727
}
2828

quaddtype/quaddtype/src/quaddtype_main.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ PyInit__quaddtype_main(void)
2323
return NULL;
2424

2525
// Fail to init if the experimental DType API version 5 isn't supported
26-
if (import_experimental_dtype_api(7) < 0) {
26+
if (import_experimental_dtype_api(8) < 0) {
2727
PyErr_SetString(PyExc_ImportError,
2828
"Error encountered importing the experimental dtype API.");
2929
return NULL;

stringdtype/README.md

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,26 @@ NumPy.
1010
Ensure Meson and NumPy are installed in the python environment you would like to use:
1111

1212
```
13-
$ python3 -m pip install meson meson-python numpy build patchelf
13+
$ python3 -m pip install meson meson-python build patchelf
1414
```
1515

16-
Build with meson, create a wheel, and install it
16+
It is important to have the latest development version of numpy installed.
17+
Nightly wheels work well for this purpose, and can be installed easily:
1718

19+
```bash
20+
$ pip install -i https://pypi.anaconda.org/scipy-wheels-nightly/simple numpy
1821
```
22+
23+
Build with meson, create a wheel, and install it.
24+
25+
```bash
1926
$ rm -r dist/
2027
$ meson build
2128
$ python -m build --wheel -Cbuilddir=build
22-
$ python -m pip install dist/asciidtype*.whl
2329
```
2430

25-
The `mesonpy` build backend for pip [does not currently support editable
26-
installs](https://github.com/mesonbuild/meson-python/issues/47), so `pip install
27-
-e .` will not work.
31+
Or simply install directly, taking care to install without build isolation:
32+
33+
```bash
34+
$ pip install -v . --no-build-isolation
35+
```

stringdtype/stringdtype/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from .scalar import StringScalar # isort: skip
66
from ._main import StringDType, _memory_usage
77

8-
98
__all__ = [
109
"StringDType",
1110
"StringScalar",

stringdtype/stringdtype/src/casts.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,7 @@ string_to_unicode_resolve_descriptors(PyObject *NPY_UNUSED(self),
295295
// codepoint for the next character, returning the size of the character in
296296
// bytes. Does not do any validation or error checking: assumes *c* is valid
297297
// utf-8
298-
static size_t
298+
size_t
299299
utf8_char_to_ucs4_code(unsigned char *c, Py_UCS4 *code)
300300
{
301301
if (c[0] <= 0x7F) {

stringdtype/stringdtype/src/casts.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,7 @@
1313
PyArrayMethod_Spec **
1414
get_casts(void);
1515

16+
size_t
17+
utf8_char_to_ucs4_code(unsigned char *, Py_UCS4 *);
18+
1619
#endif /* _NPY_CASTS_H */

stringdtype/stringdtype/src/dtype.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,16 @@ stringdtype_getitem(StringDTypeObject *descr, char **dataptr)
155155
return res;
156156
}
157157

158+
// Implementation of PyArray_CompareFunc.
159+
// Compares unicode strings by their code points.
160+
int
161+
compare_strings(char **a, char **b, PyArrayObject *NPY_UNUSED(arr))
162+
{
163+
ss *ss_a = (ss *)*a;
164+
ss *ss_b = (ss *)*b;
165+
return strcmp(ss_a->buf, ss_b->buf);
166+
}
167+
158168
static StringDTypeObject *
159169
stringdtype_ensure_canonical(StringDTypeObject *self)
160170
{
@@ -170,6 +180,7 @@ static PyType_Slot StringDType_Slots[] = {
170180
{NPY_DT_setitem, &stringdtype_setitem},
171181
{NPY_DT_getitem, &stringdtype_getitem},
172182
{NPY_DT_ensure_canonical, &stringdtype_ensure_canonical},
183+
{NPY_DT_PyArray_ArrFuncs_compare, &compare_strings},
173184
{0, NULL}};
174185

175186
static PyObject *

stringdtype/stringdtype/src/main.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ PyInit__main(void)
9090
if (_import_array() < 0) {
9191
return NULL;
9292
}
93-
if (import_experimental_dtype_api(7) < 0) {
93+
if (import_experimental_dtype_api(8) < 0) {
9494
return NULL;
9595
}
9696

stringdtype/tests/test_stringdtype.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,3 +161,23 @@ def test_pickle(string_list):
161161
assert res[1] == dtype
162162

163163
os.remove(f.name)
164+
165+
166+
@pytest.mark.parametrize(
167+
"strings",
168+
[
169+
["left", "right", "leftovers", "righty", "up", "down"],
170+
["🤣🤣", "🤣", "📵", "😰"],
171+
["🚜", "🙃", "😾"],
172+
["😹", "🚠", "🚌"],
173+
["A¢☃€ 😊", " A☃€¢😊", "☃€😊 A¢", "😊☃A¢ €"],
174+
],
175+
)
176+
def test_sort(strings):
177+
"""Test that sorting matches python's internal sorting."""
178+
arr = np.array(strings, dtype=StringDType())
179+
arr_sorted = np.array(sorted(strings), dtype=StringDType())
180+
181+
np.random.default_rng().shuffle(arr)
182+
arr.sort()
183+
np.testing.assert_array_equal(arr, arr_sorted)

unytdtype/unytdtype/src/unytdtype_main.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ PyInit__unytdtype_main(void)
2121
if (_import_array() < 0) {
2222
return NULL;
2323
}
24-
if (import_experimental_dtype_api(7) < 0) {
24+
if (import_experimental_dtype_api(8) < 0) {
2525
return NULL;
2626
}
2727

0 commit comments

Comments
 (0)