Open
Description
Crash report
What happened?
The pure Python code in sqlglot
package manages to trigger an assertion in CPython:
python: Objects/unicodeobject.c:10387: _PyUnicode_JoinArray: Assertion `res_data == PyUnicode_1BYTE_DATA(res) + kind * PyUnicode_GET_LENGTH(res)' failed.
Aborted (core dumped)
Unfortunately, due to limited this is as far as I've been able to reduce it:
from sqlglot import parse_one
parse_one("SELECT * FROM taxi ORDER BY 1 OFFSET 0 ROWS FETCH NEXT 3 ROWS ONLY").sql()
I can reproduce with 4.14.0b2 and 4109a9c, built with --with-assertions
(but for some reason, doesn't happen if I build --with-pydebug
), against sqlglot 26.23.0, i.e.:
CFLAGS='-O0 -g' ./configure -C --with-assertions
make -j$(nproc)
./python -m venv .venv
.venv/bin/pip install sqlglot
.venv/bin/python -c 'from sqlglot import parse_one; parse_one("SELECT * FROM taxi ORDER BY 1 OFFSET 0 ROWS FETCH NEXT 3 ROWS ONLY").sql()'
(gdb) bt
#0 0x00007feb24e84dbc in ?? () from /usr/lib64/libc.so.6
#1 0x00007feb24e2c8e6 in raise () from /usr/lib64/libc.so.6
#2 0x00007feb24e1434b in abort () from /usr/lib64/libc.so.6
#3 0x00007feb24e142b5 in ?? () from /usr/lib64/libc.so.6
#4 0x000055e03a36ecf3 in _PyUnicode_JoinArray (separator=0x55e03a899bc8 <_PyRuntime+35496>, items=0x7ffeef0e9278, seqlen=4)
at Objects/unicodeobject.c:10387
#5 0x000055e03a423f4a in _PyEval_EvalFrameDefault (tstate=0x55e03a8de070 <_PyRuntime+315216>, frame=0x7feb250e9690, throwflag=0)
at Python/generated_cases.c.h:1414
#6 0x000055e03a41a1f2 in _PyEval_EvalFrame (tstate=0x55e03a8de070 <_PyRuntime+315216>, frame=0x7feb250e9120, throwflag=0)
at ./Include/internal/pycore_ceval.h:119
#7 0x000055e03a45c7d0 in _PyEval_Vector (tstate=0x55e03a8de070 <_PyRuntime+315216>, func=0x7feb237f54e0, locals=0x0,
args=0x7ffeef0e95f0, argcount=2, kwnames=0x0) at Python/ceval.c:1975
#8 0x000055e03a21d64d in _PyFunction_Vectorcall (func=0x7feb237f54e0, stack=0x7ffeef0e95f0, nargsf=2, kwnames=0x0)
at Objects/call.c:413
#9 0x000055e03a2213da in _PyObject_VectorcallTstate (tstate=0x55e03a8de070 <_PyRuntime+315216>, callable=0x7feb237f54e0,
args=0x7ffeef0e95f0, nargsf=2, kwnames=0x0) at ./Include/internal/pycore_call.h:169
#10 0x000055e03a221f4e in method_vectorcall (method=0x7feb237a7c80, args=0x7feb240d5220, nargsf=1, kwnames=0x0)
at Objects/classobject.c:94
#11 0x000055e03a21cfc4 in _PyVectorcall_Call (tstate=0x55e03a8de070 <_PyRuntime+315216>, func=0x55e03a221c5a <method_vectorcall>,
callable=0x7feb237a7c80, tuple=0x7feb240d5200, kwargs=0x7feb23810ac0) at Objects/call.c:273
#12 0x000055e03a21d36b in _PyObject_Call (tstate=0x55e03a8de070 <_PyRuntime+315216>, callable=0x7feb237a7c80, args=0x7feb240d5200,
kwargs=0x7feb23810ac0) at Objects/call.c:348
#13 0x000055e03a21d446 in PyObject_Call (callable=0x7feb237a7c80, args=0x7feb240d5200, kwargs=0x7feb23810ac0) at Objects/call.c:373
#14 0x000055e03a42a262 in _PyEval_EvalFrameDefault (tstate=0x55e03a8de070 <_PyRuntime+315216>, frame=0x7feb250e9088, throwflag=0)
at Python/generated_cases.c.h:2654
#15 0x000055e03a41a1f2 in _PyEval_EvalFrame (tstate=0x55e03a8de070 <_PyRuntime+315216>, frame=0x7feb250e9020, throwflag=0)
at ./Include/internal/pycore_ceval.h:119
#16 0x000055e03a45c7d0 in _PyEval_Vector (tstate=0x55e03a8de070 <_PyRuntime+315216>, func=0x7feb240e73d0, locals=0x7feb240f0300,
args=0x0, argcount=0, kwnames=0x0) at Python/ceval.c:1975
#17 0x000055e03a41d431 in PyEval_EvalCode (co=0x7feb24112780, globals=0x7feb240f0300, locals=0x7feb240f0300) at Python/ceval.c:866
#18 0x000055e03a51b066 in run_eval_code_obj (tstate=0x55e03a8de070 <_PyRuntime+315216>, co=0x7feb24112780, globals=0x7feb240f0300,
locals=0x7feb240f0300) at Python/pythonrun.c:1365
#19 0x000055e03a51b5dc in run_mod (mod=0x55e05c7c3728, filename=0x7feb240f0370, globals=0x7feb240f0300, locals=0x7feb240f0300,
flags=0x7ffeef0ed160, arena=0x7feb24d07cb0, interactive_src=0x7feb241259d0, generate_new_source=0) at Python/pythonrun.c:1436
#20 0x000055e03a51ac55 in _PyRun_StringFlagsWithName (
str=0x7feb24125a90 "from sqlglot import parse_one; parse_one(\"SELECT * FROM taxi ORDER BY 1 OFFSET 0 ROWS FETCH NEXT 3 ROWS ONLY\").sql()\n", name=0x7feb240f0370, start=257, globals=0x7feb240f0300, locals=0x7feb240f0300, flags=0x7ffeef0ed160,
generate_new_source=0) at Python/pythonrun.c:1259
#21 0x000055e03a518cf4 in _PyRun_SimpleStringFlagsWithName (
command=0x7feb24125a90 "from sqlglot import parse_one; parse_one(\"SELECT * FROM taxi ORDER BY 1 OFFSET 0 ROWS FETCH NEXT 3 ROWS ONLY\").sql()\n", name=0x55e03a6ce96e "<string>", flags=0x7ffeef0ed160) at Python/pythonrun.c:578
#22 0x000055e03a55dec7 in pymain_run_command (
command=0x55e05c6afa60 L"from sqlglot import parse_one; parse_one(\"SELECT * FROM taxi ORDER BY 1 OFFSET 0 ROWS FETCH NEXT 3 ROWS ONLY\").sql()\n") at Modules/main.c:261
#23 0x000055e03a55f2ad in pymain_run_python (exitcode=0x7ffeef0ed254) at Modules/main.c:682
#24 0x000055e03a55f4ae in Py_RunMain () at Modules/main.c:772
#25 0x000055e03a55f569 in pymain_main (args=0x7ffeef0ed2d0) at Modules/main.c:802
#26 0x000055e03a55f631 in Py_BytesMain (argc=3, argv=0x7ffeef0ed438) at Modules/main.c:826
#27 0x000055e03a1849bd in main (argc=3, argv=0x7ffeef0ed438) at ./Programs/python.c:15
(gdb) up 4
#4 0x000055e03a36ecf3 in _PyUnicode_JoinArray (separator=0x55e03a899bc8 <_PyRuntime+35496>, items=0x7ffeef0e9278, seqlen=4)
at Objects/unicodeobject.c:10387
10387 assert(res_data == PyUnicode_1BYTE_DATA(res)
(gdb) p res_data
$1 = (unsigned char *) 0x7feb2381b97c "\340U"
(gdb) p * (PyASCIIObject*) res
$5 = {ob_base = {{ob_refcnt_full = 1, {ob_refcnt = 1, ob_overflow = 0, ob_flags = 0}}, ob_type = 0x55e03a872040 <PyUnicode_Type>},
length = 23, hash = -1, state = {interned = 0, kind = 1, compact = 1, ascii = 1, statically_allocated = 0}}
(gdb) p * (PyUnicodeObject*) res
$6 = {_base = {_base = {ob_base = {{ob_refcnt_full = 1, {ob_refcnt = 1, ob_overflow = 0, ob_flags = 0}},
ob_type = 0x55e03a872040 <PyUnicode_Type>}, length = 23, hash = -1, state = {interned = 0, kind = 1, compact = 1, ascii = 1,
statically_allocated = 0}}, utf8_length = 5629578988226954784,
utf8 = 0x4546203320545845 <error: Cannot access memory at address 0x4546203320545845>}, data = {any = 0x5458454e20484354,
latin1 = 0x5458454e20484354 <error: Cannot access memory at address 0x5458454e20484354>, ucs2 = 0x5458454e20484354,
ucs4 = 0x5458454e20484354}}
(gdb) p * (PyCompactUnicodeObject*) res
$7 = {_base = {ob_base = {{ob_refcnt_full = 1, {ob_refcnt = 1, ob_overflow = 0, ob_flags = 0}},
ob_type = 0x55e03a872040 <PyUnicode_Type>}, length = 23, hash = -1, state = {interned = 0, kind = 1, compact = 1, ascii = 1,
statically_allocated = 0}}, utf8_length = 5629578988226954784,
utf8 = 0x4546203320545845 <error: Cannot access memory at address 0x4546203320545845>}
(that's just my guesswork of what to print)
CPython versions tested on:
3.14, CPython main branch
Operating systems tested on:
Linux
Output from running 'python -VV' on the command line:
Python 3.15.0a0 (heads/main:51910dc5620, May 29 2025, 16:12:29) [GCC 14.3.0]