diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py
index 1d56ccd71cf962..d4b4f60be980a5 100644
--- a/Lib/test/test_pyexpat.py
+++ b/Lib/test/test_pyexpat.py
@@ -9,12 +9,11 @@
from io import BytesIO
from test import support
from test.support import os_helper
-
+from test.support import sortdict
+from unittest import mock
from xml.parsers import expat
from xml.parsers.expat import errors
-from test.support import sortdict
-
class SetAttributeTest(unittest.TestCase):
def setUp(self):
@@ -436,6 +435,19 @@ def test7(self):
"", "4", "", "5", ""],
"buffered text not properly split")
+ def test_change_character_data_handler_in_callback(self):
+ # Test that xmlparse_handler_setter() properly handles
+ # the special case "parser.CharacterDataHandler = None".
+ def handler(*args):
+ parser.CharacterDataHandler = None
+
+ handler_wrapper = mock.Mock(wraps=handler)
+ parser = expat.ParserCreate()
+ parser.CharacterDataHandler = handler_wrapper
+ parser.Parse(b"12345 ", True)
+ handler_wrapper.assert_called_once()
+ self.assertIsNone(parser.CharacterDataHandler)
+
# Test handling of exception from callback:
class HandlerExceptionTest(unittest.TestCase):
@@ -595,7 +607,7 @@ def test_unchanged_size(self):
def test_disabling_buffer(self):
xml1 = b"" + b'a' * 512
xml2 = b'b' * 1024
- xml3 = b'c' * 1024 + b'';
+ xml3 = b'c' * 1024 + b''
parser = expat.ParserCreate()
parser.CharacterDataHandler = self.counting_handler
parser.buffer_text = 1
diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c
index fa153d86543e99..c1b13c04dd8edb 100644
--- a/Modules/pyexpat.c
+++ b/Modules/pyexpat.c
@@ -98,26 +98,102 @@ typedef struct {
#define CHARACTER_DATA_BUFFER_SIZE 8192
-typedef const void *xmlhandler;
+typedef union xmlhandler_union {
+ // - XML_StartCdataSectionHandler
+ // - XML_EndCdataSectionHandler
+ // - XML_EndDoctypeDeclHandler
+ void (*parser_only)(
+ void *
+ );
+ // XML_NotStandaloneHandler
+ int (*not_standalone)(
+ void *
+ );
+ // - XML_EndElementHandler
+ // - XML_EndNamespaceDeclHandler
+ // - XML_CommentHandler
+ void (*parser_and_data)(
+ void *, const XML_Char *
+ );
+ // - XML_CharacterDataHandler
+ // - XML_DefaultHandler
+ // - XML_DefaultHandlerExpand
+ // - XML_SkippedEntityHandler
+ // - noop_character_data_handler
+ void (*parser_and_data_and_int)(
+ void *, const XML_Char *, int
+ );
+ // - XML_ProcessingInstructionHandler
+ // - XML_StartNamespaceDeclHandler
+ void (*parser_and_data_and_data)(
+ void *, const XML_Char *, const XML_Char *
+ );
+ // XML_StartElementHandler
+ void (*start_element)(
+ void *, const XML_Char *, const XML_Char **
+ );
+ // XML_ElementDeclHandler
+ void (*element_decl)(
+ void *, const XML_Char *, XML_Content *
+ );
+ // XML_XmlDeclHandler
+ void (*xml_decl)(
+ void *, const XML_Char *, const XML_Char *, int
+ );
+ // XML_StartDoctypeDeclHandler
+ void (*start_doctype_decl)(
+ void *,
+ const XML_Char *, const XML_Char *, const XML_Char *,
+ int
+ );
+ // XML_NotationDeclHandler
+ void (*notation_decl)(
+ void *,
+ const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *
+ );
+ // XML_AttlistDeclHandler
+ void (*attlist_decl)(
+ void *,
+ const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *,
+ int
+ );
+ // XML_UnparsedEntityDeclHandler
+ void (*unparsed_entity_decl)(
+ void *,
+ const XML_Char *, const XML_Char *,
+ const XML_Char *, const XML_Char *, const XML_Char *
+ );
+ // XML_EntityDeclHandler
+ void (*entity_decl)(
+ void *,
+ const XML_Char *, int,
+ const XML_Char *, int,
+ const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *
+ );
+ // XML_ExternalEntityRefHandler
+ int (*external_entity_ref)(
+ XML_Parser,
+ const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *
+ );
+} xmlhandler_union;
+
+typedef xmlhandler_union *xmlhandler;
typedef void (*xmlhandlersetter)(XML_Parser self, xmlhandler handler);
struct HandlerInfo {
const char *name;
xmlhandlersetter setter;
- xmlhandler handler;
+ xmlhandler_union handler;
PyGetSetDef getset;
};
static struct HandlerInfo handler_info[64];
-// gh-111178: Use _Py_NO_SANITIZE_UNDEFINED, rather than using the exact
-// handler API for each handler.
-static inline void _Py_NO_SANITIZE_UNDEFINED
+static inline void
CALL_XML_HANDLER_SETTER(const struct HandlerInfo *handler_info,
XML_Parser xml_parser, xmlhandler xml_handler)
{
- xmlhandlersetter setter = (xmlhandlersetter)handler_info->setter;
- setter(xml_parser, xml_handler);
+ handler_info->setter(xml_parser, xml_handler);
}
/* Set an integer attribute on the error object; return true on success,
@@ -1063,7 +1139,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
if (handler != NULL) {
new_parser->handlers[i] = Py_NewRef(handler);
struct HandlerInfo info = handler_info[i];
- CALL_XML_HANDLER_SETTER(&info, new_parser->itself, info.handler);
+ CALL_XML_HANDLER_SETTER(&info, new_parser->itself, &info.handler);
}
}
@@ -1315,15 +1391,23 @@ xmlparse_dealloc(PyObject *op)
Py_DECREF(tp);
}
+static Py_ssize_t
+xmlparse_handler_get_index(void *closure)
+{
+ struct HandlerInfo *info = (struct HandlerInfo *)closure;
+ Py_ssize_t ind = (Py_ssize_t)(info - handler_info);
+ assert(ind >= 0);
+ assert(ind < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
+ assert(info->name == handler_info[ind].name);
+ return ind;
+}
static PyObject *
xmlparse_handler_getter(PyObject *op, void *closure)
{
xmlparseobject *self = xmlparseobject_CAST(op);
- struct HandlerInfo *hi = (struct HandlerInfo *)closure;
- assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
- int handlernum = (int)(hi - handler_info);
- PyObject *result = self->handlers[handlernum];
+ Py_ssize_t ind = xmlparse_handler_get_index(closure);
+ PyObject *result = self->handlers[ind];
if (result == NULL) {
result = Py_None;
}
@@ -1334,9 +1418,7 @@ static int
xmlparse_handler_setter(PyObject *op, PyObject *v, void *closure)
{
xmlparseobject *self = xmlparseobject_CAST(op);
- struct HandlerInfo *hi = (struct HandlerInfo *)closure;
- assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
- int handlernum = (int)(hi - handler_info);
+ Py_ssize_t handlernum = xmlparse_handler_get_index(closure);
if (v == NULL) {
PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
return -1;
@@ -1351,7 +1433,8 @@ xmlparse_handler_setter(PyObject *op, PyObject *v, void *closure)
return -1;
}
- xmlhandler c_handler = NULL;
+ int used = 0;
+ xmlhandler_union c_handler;
if (v == Py_None) {
/* If this is the character data handler, and a character
data handler is already active, we need to be more
@@ -1365,16 +1448,24 @@ xmlparse_handler_setter(PyObject *op, PyObject *v, void *closure)
elaborate system of handlers and state could remove the
C handler more effectively. */
if (handlernum == CharacterData && self->in_callback) {
- c_handler = noop_character_data_handler;
+ // The cast to union type '(union T)' is NOT a cast in the regular
+ // sense but a constructor as it does not produce an lvalue. It
+ // is a valid construction as per ISO C11, ยง6.5.2.5.
+ c_handler = (xmlhandler_union){
+ .parser_and_data_and_int = noop_character_data_handler
+ };
+ used = 1;
}
v = NULL;
}
else if (v != NULL) {
Py_INCREF(v);
c_handler = handler_info[handlernum].handler;
+ used = 1;
}
Py_XSETREF(self->handlers[handlernum], v);
- CALL_XML_HANDLER_SETTER(&handler_info[handlernum], self->itself, c_handler);
+ xmlhandler p_handler = used ? &c_handler : NULL;
+ CALL_XML_HANDLER_SETTER(&handler_info[handlernum], self->itself, p_handler);
return 0;
}
@@ -2222,40 +2313,69 @@ clear_handlers(xmlparseobject *self, int initial)
}
}
-static struct HandlerInfo handler_info[] = {
-
- // The cast to `xmlhandlersetter` is needed as the signature of XML
- // handler functions is not compatible with `xmlhandlersetter` since
- // their second parameter is narrower than a `const void *`.
-#define HANDLER_INFO(name) \
- {#name, (xmlhandlersetter)XML_Set##name, my_##name},
-
- HANDLER_INFO(StartElementHandler)
- HANDLER_INFO(EndElementHandler)
- HANDLER_INFO(ProcessingInstructionHandler)
- HANDLER_INFO(CharacterDataHandler)
- HANDLER_INFO(UnparsedEntityDeclHandler)
- HANDLER_INFO(NotationDeclHandler)
- HANDLER_INFO(StartNamespaceDeclHandler)
- HANDLER_INFO(EndNamespaceDeclHandler)
- HANDLER_INFO(CommentHandler)
- HANDLER_INFO(StartCdataSectionHandler)
- HANDLER_INFO(EndCdataSectionHandler)
- HANDLER_INFO(DefaultHandler)
- HANDLER_INFO(DefaultHandlerExpand)
- HANDLER_INFO(NotStandaloneHandler)
- HANDLER_INFO(ExternalEntityRefHandler)
- HANDLER_INFO(StartDoctypeDeclHandler)
- HANDLER_INFO(EndDoctypeDeclHandler)
- HANDLER_INFO(EntityDeclHandler)
- HANDLER_INFO(XmlDeclHandler)
- HANDLER_INFO(ElementDeclHandler)
- HANDLER_INFO(AttlistDeclHandler)
+/*
+ * The handler type TYPE is typically useful to detect issues at compile time.
+ */
+#define SETTER_WRAPPER(NAME, MEMBER) \
+ static inline void \
+ my_Set ## NAME (XML_Parser parser, xmlhandler handle) \
+ { \
+ (void)XML_Set ## NAME (parser, handle ? handle->MEMBER : NULL); \
+ }
+SETTER_WRAPPER(StartElementHandler, start_element)
+SETTER_WRAPPER(EndElementHandler, parser_and_data)
+SETTER_WRAPPER(ProcessingInstructionHandler, parser_and_data_and_data)
+SETTER_WRAPPER(CharacterDataHandler, parser_and_data_and_int)
+SETTER_WRAPPER(UnparsedEntityDeclHandler, unparsed_entity_decl)
+SETTER_WRAPPER(NotationDeclHandler, notation_decl)
+SETTER_WRAPPER(StartNamespaceDeclHandler, parser_and_data_and_data)
+SETTER_WRAPPER(EndNamespaceDeclHandler, parser_and_data)
+SETTER_WRAPPER(CommentHandler, parser_and_data)
+SETTER_WRAPPER(StartCdataSectionHandler, parser_only)
+SETTER_WRAPPER(EndCdataSectionHandler, parser_only)
+SETTER_WRAPPER(DefaultHandler, parser_and_data_and_int)
+SETTER_WRAPPER(DefaultHandlerExpand, parser_and_data_and_int)
+SETTER_WRAPPER(NotStandaloneHandler, not_standalone)
+SETTER_WRAPPER(ExternalEntityRefHandler, external_entity_ref)
+SETTER_WRAPPER(StartDoctypeDeclHandler, start_doctype_decl)
+SETTER_WRAPPER(EndDoctypeDeclHandler, parser_only)
+SETTER_WRAPPER(EntityDeclHandler, entity_decl)
+SETTER_WRAPPER(XmlDeclHandler, xml_decl)
+SETTER_WRAPPER(ElementDeclHandler, element_decl)
+SETTER_WRAPPER(AttlistDeclHandler, attlist_decl)
#if XML_COMBINED_VERSION >= 19504
- HANDLER_INFO(SkippedEntityHandler)
+SETTER_WRAPPER(SkippedEntityHandler, parser_and_data_and_int)
#endif
+#undef SETTER_WRAPPER
+static struct HandlerInfo handler_info[] = {
+#define HANDLER_INFO(IND, NAME, MEMBER) \
+ [IND] = {#NAME, my_Set##NAME, {.MEMBER = my_##NAME}},
+
+ HANDLER_INFO(StartElement, StartElementHandler, start_element)
+ HANDLER_INFO(EndElement, EndElementHandler, parser_and_data)
+ HANDLER_INFO(ProcessingInstruction, ProcessingInstructionHandler, parser_and_data_and_data)
+ HANDLER_INFO(CharacterData, CharacterDataHandler, parser_and_data_and_int)
+ HANDLER_INFO(UnparsedEntityDecl, UnparsedEntityDeclHandler, unparsed_entity_decl)
+ HANDLER_INFO(NotationDecl, NotationDeclHandler, notation_decl)
+ HANDLER_INFO(StartNamespaceDecl, StartNamespaceDeclHandler, parser_and_data_and_data)
+ HANDLER_INFO(EndNamespaceDecl, EndNamespaceDeclHandler, parser_and_data)
+ HANDLER_INFO(Comment, CommentHandler, parser_and_data)
+ HANDLER_INFO(StartCdataSection, StartCdataSectionHandler, parser_only)
+ HANDLER_INFO(EndCdataSection, EndCdataSectionHandler, parser_only)
+ HANDLER_INFO(Default, DefaultHandler, parser_and_data_and_int)
+ HANDLER_INFO(DefaultHandlerExpand, DefaultHandlerExpand, parser_and_data_and_int)
+ HANDLER_INFO(NotStandalone, NotStandaloneHandler, not_standalone)
+ HANDLER_INFO(ExternalEntityRef, ExternalEntityRefHandler, external_entity_ref)
+ HANDLER_INFO(StartDoctypeDecl, StartDoctypeDeclHandler, start_doctype_decl)
+ HANDLER_INFO(EndDoctypeDecl, EndDoctypeDeclHandler, parser_only)
+ HANDLER_INFO(EntityDecl, EntityDeclHandler, entity_decl)
+ HANDLER_INFO(XmlDecl, XmlDeclHandler, xml_decl)
+ HANDLER_INFO(ElementDecl, ElementDeclHandler, element_decl)
+ HANDLER_INFO(AttlistDecl, AttlistDeclHandler, attlist_decl)
+#if XML_COMBINED_VERSION >= 19504
+ HANDLER_INFO(SkippedEntity, SkippedEntityHandler, parser_and_data_and_int)
+#endif
#undef HANDLER_INFO
-
- {NULL, NULL, NULL} /* sentinel */
+ {NULL, NULL, {NULL}} /* sentinel */
};