From be0e0bd636f54f7990606197cb29c925e4a31f2d Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Mon, 2 Dec 2024 16:58:51 +0000 Subject: [PATCH 1/7] [mypyc] Add primitive and specializations for ord --- mypyc/irbuild/specialize.py | 11 +++++++++++ mypyc/lib-rt/bytes_ops.c | 11 +++++++++++ mypyc/lib-rt/str_ops.c | 12 ++++++++++++ mypyc/lower/str_ops.py | 8 ++++++++ mypyc/primitives/bytes_ops.py | 8 ++++++++ mypyc/primitives/registry.py | 2 +- mypyc/primitives/str_ops.py | 9 +++++++++ mypyc/test-data/irbuild-str.test | 30 ++++++++++++++++++++++++++++++ 8 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 mypyc/lower/str_ops.py diff --git a/mypyc/irbuild/specialize.py b/mypyc/irbuild/specialize.py index cb69852af1ce..f652449f5289 100644 --- a/mypyc/irbuild/specialize.py +++ b/mypyc/irbuild/specialize.py @@ -19,6 +19,7 @@ from mypy.nodes import ( ARG_NAMED, ARG_POS, + BytesExpr, CallExpr, DictExpr, Expression, @@ -877,3 +878,13 @@ def translate_float(builder: IRBuilder, expr: CallExpr, callee: RefExpr) -> Valu # No-op float conversion. return builder.accept(arg) return None + + +@specialize_function("builtins.ord") +def translate_ord(builder: IRBuilder, expr: CallExpr, callee: RefExpr) -> Value | None: + if len(expr.args) != 1 or expr.arg_kinds[0] != ARG_POS: + return None + arg = expr.args[0] + if isinstance(arg, (StrExpr, BytesExpr)) and len(arg.value) == 1: + return Integer(ord(arg.value)) + return None diff --git a/mypyc/lib-rt/bytes_ops.c b/mypyc/lib-rt/bytes_ops.c index 0cb2f300d507..85878b987899 100644 --- a/mypyc/lib-rt/bytes_ops.c +++ b/mypyc/lib-rt/bytes_ops.c @@ -141,3 +141,14 @@ PyObject *CPyBytes_Build(Py_ssize_t len, ...) { return (PyObject *)ret; } + + +CPyTagged CPyBytes_Ord(PyObject *obj) { + Py_ssize_t s = PyBytes_GET_SIZE(obj); + if (s == 1) { + return PyBytes_AS_STRING(obj)[0] << 1; + } + // TODO: bytearray + PyErr_SetString(PyExc_TypeError, "ord() expects a character"); + return CPY_INT_TAG; +} diff --git a/mypyc/lib-rt/str_ops.c b/mypyc/lib-rt/str_ops.c index 4ba181bcce85..f99d76c9e740 100644 --- a/mypyc/lib-rt/str_ops.c +++ b/mypyc/lib-rt/str_ops.c @@ -243,3 +243,15 @@ PyObject *CPy_Encode(PyObject *obj, PyObject *encoding, PyObject *errors) { return NULL; } } + + +CPyTagged CPyStr_Ord(PyObject *obj) { + Py_ssize_t s = PyUnicode_GET_LENGTH(obj); + if (s == 1) { + int kind = PyUnicode_KIND(obj); + return PyUnicode_READ(kind, PyUnicode_DATA(obj), 0) << 1; + } + PyErr_Format( + PyExc_TypeError, "ord() expected a character, but a string of length %d found", (int)s); + return CPY_INT_TAG; +} diff --git a/mypyc/lower/str_ops.py b/mypyc/lower/str_ops.py new file mode 100644 index 000000000000..382b32a31d42 --- /dev/null +++ b/mypyc/lower/str_ops.py @@ -0,0 +1,8 @@ +from __future__ import annotations + +from mypyc.ir.ops import GetElementPtr, LoadMem, Value, LoadLiteral, Integer +from mypyc.ir.rtypes import PyVarObject, c_pyssize_t_rprimitive +from mypyc.irbuild.ll_builder import LowLevelIRBuilder +from mypyc.lower.registry import lower_primitive_op + + diff --git a/mypyc/primitives/bytes_ops.py b/mypyc/primitives/bytes_ops.py index d7a7f3e2f59b..1afd196cff84 100644 --- a/mypyc/primitives/bytes_ops.py +++ b/mypyc/primitives/bytes_ops.py @@ -99,3 +99,11 @@ error_kind=ERR_MAGIC, var_arg_type=bytes_rprimitive, ) + +function_op( + name="builtins.ord", + arg_types=[bytes_rprimitive], + return_type=int_rprimitive, + c_function_name="CPyBytes_Ord", + error_kind=ERR_MAGIC, +) diff --git a/mypyc/primitives/registry.py b/mypyc/primitives/registry.py index 5e7ecb70f55d..45669b0be987 100644 --- a/mypyc/primitives/registry.py +++ b/mypyc/primitives/registry.py @@ -152,7 +152,7 @@ def function_op( name: str, arg_types: list[RType], return_type: RType, - c_function_name: str, + c_function_name: str | None, error_kind: int, var_arg_type: RType | None = None, truncated_type: RType | None = None, diff --git a/mypyc/primitives/str_ops.py b/mypyc/primitives/str_ops.py index 3a5495e21c1b..685483f3c443 100644 --- a/mypyc/primitives/str_ops.py +++ b/mypyc/primitives/str_ops.py @@ -23,6 +23,7 @@ function_op, load_address_op, method_op, + custom_primitive_op, ) # Get the 'str' type object. @@ -251,3 +252,11 @@ c_function_name="CPy_Encode", error_kind=ERR_MAGIC, ) + +function_op( + name="builtins.ord", + arg_types=[str_rprimitive], + return_type=int_rprimitive, + c_function_name="CPyStr_Ord", + error_kind=ERR_MAGIC, +) diff --git a/mypyc/test-data/irbuild-str.test b/mypyc/test-data/irbuild-str.test index 61e5a42cf3ef..3d02e6f4fa1b 100644 --- a/mypyc/test-data/irbuild-str.test +++ b/mypyc/test-data/irbuild-str.test @@ -383,3 +383,33 @@ L0: r37 = 'latin2' r38 = CPy_Encode(s, r37, 0) return 1 + +[case testOrd] +def str_ord(x: str) -> int: + return ord(x) +def str_ord_literal() -> int: + return ord("a") +def bytes_ord(x: bytes) -> int: + return ord(x) +def bytes_ord_literal() -> int: + return ord(b"a") +[out] +def str_ord(x): + x :: str + r0 :: int +L0: + r0 = CPyStr_Ord(x) + return r0 +def str_ord_literal(): +L0: + return 194 +def bytes_ord(x): + x :: bytes + r0 :: int +L0: + r0 = CPyBytes_Ord(x) + return r0 +def bytes_ord_literal(): +L0: + return 194 + From ba56a1e1068f59ddaa1d09f452b41e487e044730 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Mon, 2 Dec 2024 17:01:26 +0000 Subject: [PATCH 2/7] Update docs --- mypyc/doc/str_operations.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mypyc/doc/str_operations.rst b/mypyc/doc/str_operations.rst index 5420c8af7d31..f9fb225a1dbf 100644 --- a/mypyc/doc/str_operations.rst +++ b/mypyc/doc/str_operations.rst @@ -33,3 +33,9 @@ Methods * ``s.split(sep: str)`` * ``s.split(sep: str, maxsplit: int)`` * ``s1.startswith(s2: str)`` + +Other +----- + + * ``len(s: str)`` + * ``ord(s: str)`` From cd4e3fc7c76dd8ba61125f286017da0ae8696d11 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Mon, 2 Dec 2024 17:14:48 +0000 Subject: [PATCH 3/7] Fixes + more tests --- mypyc/lib-rt/bytes_ops.c | 14 ++++++++++---- mypyc/test-data/run-bytes.test | 19 +++++++++++++++++++ mypyc/test-data/run-strings.test | 9 ++++++++- 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/mypyc/lib-rt/bytes_ops.c b/mypyc/lib-rt/bytes_ops.c index 85878b987899..5ddf3528211f 100644 --- a/mypyc/lib-rt/bytes_ops.c +++ b/mypyc/lib-rt/bytes_ops.c @@ -144,11 +144,17 @@ PyObject *CPyBytes_Build(Py_ssize_t len, ...) { CPyTagged CPyBytes_Ord(PyObject *obj) { - Py_ssize_t s = PyBytes_GET_SIZE(obj); - if (s == 1) { - return PyBytes_AS_STRING(obj)[0] << 1; + if (PyBytes_Check(obj)) { + Py_ssize_t s = PyBytes_GET_SIZE(obj); + if (s == 1) { + return (unsigned char)(PyBytes_AS_STRING(obj)[0]) << 1; + } + } else if (PyByteArray_Check(obj)) { + Py_ssize_t s = PyByteArray_GET_SIZE(obj); + if (s == 1) { + return (unsigned char)(PyByteArray_AS_STRING(obj)[0]) << 1; + } } - // TODO: bytearray PyErr_SetString(PyExc_TypeError, "ord() expects a character"); return CPY_INT_TAG; } diff --git a/mypyc/test-data/run-bytes.test b/mypyc/test-data/run-bytes.test index aaf541194ac6..885f9129c591 100644 --- a/mypyc/test-data/run-bytes.test +++ b/mypyc/test-data/run-bytes.test @@ -111,6 +111,25 @@ def test_len() -> None: assert len(b) == 3 assert len(bytes()) == 0 +def test_ord() -> None: + assert ord(b'a') == ord('a') + assert ord(b'\x00') == 0 + assert ord(b'\x00' + bytes()) == 0 + assert ord(b'\xfe') == 254 + assert ord(b'\xfe' + bytes()) == 254 + assert ord(b'a' + bytes()) == ord('a') + + with assertRaises(TypeError): + ord(b'aa') + +def test_ord_bytesarray() -> None: + assert ord(bytearray(b'a')) == ord('a') + assert ord(bytearray(b'\x00')) == 0 + assert ord(bytearray(b'\xfe')) == 254 + + with assertRaises(TypeError): + ord(bytearray(b'aa')) + [case testBytesSlicing] def test_bytes_slicing() -> None: b = b'abcdefg' diff --git a/mypyc/test-data/run-strings.test b/mypyc/test-data/run-strings.test index 2becae848f7c..d5a52539c890 100644 --- a/mypyc/test-data/run-strings.test +++ b/mypyc/test-data/run-strings.test @@ -567,6 +567,7 @@ def test_chr() -> None: [case testOrd] def test_ord() -> None: assert ord('\ue000') == 57344 + assert ord('\ue000' + str()) == 57344 s = "a\xac\u1234\u20ac\U00008000" # ^^^^ two-digit hex escape # ^^^^^^ four-digit Unicode escape @@ -577,13 +578,19 @@ def test_ord() -> None: assert ord(u[-1]) == 233 assert ord(b'a') == 97 assert ord(b'a' + bytes()) == 97 - u2 = '\U0010ffff' + u2 = '\U0010ffff' + str() assert ord(u2) == 1114111 + assert ord('\U0010ffff') == 1114111 try: ord('aa') assert False except TypeError: pass + try: + ord('') + assert False + except TypeError: + pass [case testDecode] def test_decode() -> None: From 67d9a7f03e423b39eca1d428ca86c248b629369e Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Tue, 3 Dec 2024 09:57:57 +0000 Subject: [PATCH 4/7] Add function declarations --- mypyc/lib-rt/CPy.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mypyc/lib-rt/CPy.h b/mypyc/lib-rt/CPy.h index 833b1bd2e76a..d3637cde49ff 100644 --- a/mypyc/lib-rt/CPy.h +++ b/mypyc/lib-rt/CPy.h @@ -730,6 +730,7 @@ bool CPyStr_IsTrue(PyObject *obj); Py_ssize_t CPyStr_Size_size_t(PyObject *str); PyObject *CPy_Decode(PyObject *obj, PyObject *encoding, PyObject *errors); PyObject *CPy_Encode(PyObject *obj, PyObject *encoding, PyObject *errors); +CPyTagged CPyStr_Ord(PyObject *obj); // Bytes operations @@ -740,6 +741,7 @@ PyObject *CPyBytes_GetSlice(PyObject *obj, CPyTagged start, CPyTagged end); CPyTagged CPyBytes_GetItem(PyObject *o, CPyTagged index); PyObject *CPyBytes_Concat(PyObject *a, PyObject *b); PyObject *CPyBytes_Join(PyObject *sep, PyObject *iter); +CPyTagged CPyBytes_Ord(PyObject *obj); int CPyBytes_Compare(PyObject *left, PyObject *right); From 88efef4d7ca81ad1ba847e3fc10c8be23c9cef11 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Wed, 4 Dec 2024 11:16:10 +0000 Subject: [PATCH 5/7] Various minor updates --- mypyc/doc/str_operations.rst | 4 ++-- mypyc/lib-rt/str_ops.c | 2 +- mypyc/primitives/registry.py | 2 +- mypyc/test-data/irbuild-str.test | 15 ++++++++++++++- mypyc/test-data/run-bytes.test | 6 +++++- mypyc/test-data/run-strings.test | 26 +++++++++++++------------- 6 files changed, 36 insertions(+), 19 deletions(-) diff --git a/mypyc/doc/str_operations.rst b/mypyc/doc/str_operations.rst index f9fb225a1dbf..a8f2cf43a991 100644 --- a/mypyc/doc/str_operations.rst +++ b/mypyc/doc/str_operations.rst @@ -34,8 +34,8 @@ Methods * ``s.split(sep: str, maxsplit: int)`` * ``s1.startswith(s2: str)`` -Other ------ +Functions +--------- * ``len(s: str)`` * ``ord(s: str)`` diff --git a/mypyc/lib-rt/str_ops.c b/mypyc/lib-rt/str_ops.c index f99d76c9e740..68026037502d 100644 --- a/mypyc/lib-rt/str_ops.c +++ b/mypyc/lib-rt/str_ops.c @@ -252,6 +252,6 @@ CPyTagged CPyStr_Ord(PyObject *obj) { return PyUnicode_READ(kind, PyUnicode_DATA(obj), 0) << 1; } PyErr_Format( - PyExc_TypeError, "ord() expected a character, but a string of length %d found", (int)s); + PyExc_TypeError, "ord() expected a character, but a string of length %zd found", s); return CPY_INT_TAG; } diff --git a/mypyc/primitives/registry.py b/mypyc/primitives/registry.py index 45669b0be987..5e7ecb70f55d 100644 --- a/mypyc/primitives/registry.py +++ b/mypyc/primitives/registry.py @@ -152,7 +152,7 @@ def function_op( name: str, arg_types: list[RType], return_type: RType, - c_function_name: str | None, + c_function_name: str, error_kind: int, var_arg_type: RType | None = None, truncated_type: RType | None = None, diff --git a/mypyc/test-data/irbuild-str.test b/mypyc/test-data/irbuild-str.test index 3d02e6f4fa1b..d17c66bba22f 100644 --- a/mypyc/test-data/irbuild-str.test +++ b/mypyc/test-data/irbuild-str.test @@ -393,6 +393,8 @@ def bytes_ord(x: bytes) -> int: return ord(x) def bytes_ord_literal() -> int: return ord(b"a") +def any_ord(x) -> int: + return ord(x) [out] def str_ord(x): x :: str @@ -412,4 +414,15 @@ L0: def bytes_ord_literal(): L0: return 194 - +def any_ord(x): + x, r0 :: object + r1 :: str + r2, r3 :: object + r4 :: int +L0: + r0 = builtins :: module + r1 = 'ord' + r2 = CPyObject_GetAttr(r0, r1) + r3 = PyObject_CallFunctionObjArgs(r2, x, 0) + r4 = unbox(int, r3) + return r4 diff --git a/mypyc/test-data/run-bytes.test b/mypyc/test-data/run-bytes.test index 885f9129c591..fa63c46a6798 100644 --- a/mypyc/test-data/run-bytes.test +++ b/mypyc/test-data/run-bytes.test @@ -113,14 +113,16 @@ def test_len() -> None: def test_ord() -> None: assert ord(b'a') == ord('a') + assert ord(b'a' + bytes()) == ord('a') assert ord(b'\x00') == 0 assert ord(b'\x00' + bytes()) == 0 assert ord(b'\xfe') == 254 assert ord(b'\xfe' + bytes()) == 254 - assert ord(b'a' + bytes()) == ord('a') with assertRaises(TypeError): ord(b'aa') + with assertRaises(TypeError): + ord(b'') def test_ord_bytesarray() -> None: assert ord(bytearray(b'a')) == ord('a') @@ -129,6 +131,8 @@ def test_ord_bytesarray() -> None: with assertRaises(TypeError): ord(bytearray(b'aa')) + with assertRaises(TypeError): + ord(bytearray(b'')) [case testBytesSlicing] def test_bytes_slicing() -> None: diff --git a/mypyc/test-data/run-strings.test b/mypyc/test-data/run-strings.test index d5a52539c890..1caddce9848d 100644 --- a/mypyc/test-data/run-strings.test +++ b/mypyc/test-data/run-strings.test @@ -565,15 +565,21 @@ def test_chr() -> None: assert try_invalid(1114112) [case testOrd] +from testutil import assertRaises + def test_ord() -> None: + assert ord(' ') == 32 + assert ord(' ' + str()) == 32 + assert ord('\x00') == 0 + assert ord('\x00' + str()) == 0 assert ord('\ue000') == 57344 assert ord('\ue000' + str()) == 57344 - s = "a\xac\u1234\u20ac\U00008000" - # ^^^^ two-digit hex escape - # ^^^^^^ four-digit Unicode escape - # ^^^^^^^^^^ eight-digit Unicode escape + s = "a\xac\u1234\u20ac\U00010000" + # ^^^^ two-digit hex escape + # ^^^^^^ four-digit Unicode escape + # ^^^^^^^^^^ eight-digit Unicode escape l1 = [ord(c) for c in s] - assert l1 == [97, 172, 4660, 8364, 32768] + assert l1 == [97, 172, 4660, 8364, 65536] u = 'abcdé' assert ord(u[-1]) == 233 assert ord(b'a') == 97 @@ -581,16 +587,10 @@ def test_ord() -> None: u2 = '\U0010ffff' + str() assert ord(u2) == 1114111 assert ord('\U0010ffff') == 1114111 - try: + with assertRaises(TypeError, "ord() expected a character, but a string of length 2 found"): ord('aa') - assert False - except TypeError: - pass - try: + with assertRaises(TypeError): ord('') - assert False - except TypeError: - pass [case testDecode] def test_decode() -> None: From cdba80ade3f7bf00b4214dfa18218e37d988c772 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Wed, 4 Dec 2024 11:23:19 +0000 Subject: [PATCH 6/7] Remove unused file --- mypyc/lower/str_ops.py | 8 -------- 1 file changed, 8 deletions(-) delete mode 100644 mypyc/lower/str_ops.py diff --git a/mypyc/lower/str_ops.py b/mypyc/lower/str_ops.py deleted file mode 100644 index 382b32a31d42..000000000000 --- a/mypyc/lower/str_ops.py +++ /dev/null @@ -1,8 +0,0 @@ -from __future__ import annotations - -from mypyc.ir.ops import GetElementPtr, LoadMem, Value, LoadLiteral, Integer -from mypyc.ir.rtypes import PyVarObject, c_pyssize_t_rprimitive -from mypyc.irbuild.ll_builder import LowLevelIRBuilder -from mypyc.lower.registry import lower_primitive_op - - From 775ef3740dd19432ed4b4cbfed684c89fdf13ef0 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Wed, 4 Dec 2024 11:25:54 +0000 Subject: [PATCH 7/7] Lint --- mypyc/primitives/str_ops.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mypyc/primitives/str_ops.py b/mypyc/primitives/str_ops.py index 685483f3c443..0accffd86a17 100644 --- a/mypyc/primitives/str_ops.py +++ b/mypyc/primitives/str_ops.py @@ -23,7 +23,6 @@ function_op, load_address_op, method_op, - custom_primitive_op, ) # Get the 'str' type object.