Skip to content

gh-133157: remove usage of _Py_NO_SANITIZE_UNDEFINED in pyexpat #134050

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions Lib/test/test_pyexpat.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,14 @@ def test7(self):
"<!--abc-->", "4", "<!--def-->", "5", "</a>"],
"buffered text not properly split")

def test_change_character_data_handler_in_callback(self):
def handler(*args):
parser.CharacterDataHandler = None

parser = expat.ParserCreate()
parser.CharacterDataHandler = handler
parser.Parse(b"<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", True)


# Test handling of exception from callback:
class HandlerExceptionTest(unittest.TestCase):
Expand Down
220 changes: 170 additions & 50 deletions Modules/pyexpat.c
Original file line number Diff line number Diff line change
Expand Up @@ -98,26 +98,102 @@ typedef struct {

#define CHARACTER_DATA_BUFFER_SIZE 8192

typedef const void *xmlhandler;
typedef union xmlhandler_union {
// - XML_StartCdataSectionHandler
// - XML_EndCdataSectionHandler
// - XML_EndDoctypeDeclHandler
void (*parser_only)(
void *
);
// XML_NotStandaloneHandler
int (*not_standalone)(
void *
);
// - XML_EndElementHandler
// - XML_EndNamespaceDeclHandler
// - XML_CommentHandler
void (*parser_and_data)(
void *, const XML_Char *
);
// - XML_CharacterDataHandler
// - XML_DefaultHandler
// - XML_DefaultHandlerExpand
// - XML_SkippedEntityHandler
// - noop_character_data_handler
void (*parser_and_data_and_int)(
void *, const XML_Char *, int
);
// - XML_ProcessingInstructionHandler
// - XML_StartNamespaceDeclHandler
void (*parser_and_data_and_data)(
void *, const XML_Char *, const XML_Char *
);
// XML_StartElementHandler
void (*start_element)(
void *, const XML_Char *, const XML_Char **
);
// XML_ElementDeclHandler
void (*element_decl)(
void *, const XML_Char *, XML_Content *
);
// XML_XmlDeclHandler
void (*xml_decl)(
void *, const XML_Char *, const XML_Char *, int
);
// XML_StartDoctypeDeclHandler
void (*start_doctype_decl)(
void *,
const XML_Char *, const XML_Char *, const XML_Char *,
int
);
// XML_NotationDeclHandler
void (*notation_decl)(
void *,
const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *
);
// XML_AttlistDeclHandler
void (*attlist_decl)(
void *,
const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *,
int
);
// XML_UnparsedEntityDeclHandler
void (*unparsed_entity_decl)(
void *,
const XML_Char *, const XML_Char *,
const XML_Char *, const XML_Char *, const XML_Char *
);
// XML_EntityDeclHandler
void (*entity_decl)(
void *,
const XML_Char *, int,
const XML_Char *, int,
const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *
);
// XML_ExternalEntityRefHandler
int (*external_entity_ref)(
XML_Parser,
const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *
);
} xmlhandler_union;

typedef xmlhandler_union *xmlhandler;
typedef void (*xmlhandlersetter)(XML_Parser self, xmlhandler handler);

struct HandlerInfo {
const char *name;
xmlhandlersetter setter;
xmlhandler handler;
xmlhandler_union handler;
PyGetSetDef getset;
};

static struct HandlerInfo handler_info[64];

// gh-111178: Use _Py_NO_SANITIZE_UNDEFINED, rather than using the exact
// handler API for each handler.
static inline void _Py_NO_SANITIZE_UNDEFINED
static inline void
CALL_XML_HANDLER_SETTER(const struct HandlerInfo *handler_info,
XML_Parser xml_parser, xmlhandler xml_handler)
{
xmlhandlersetter setter = (xmlhandlersetter)handler_info->setter;
setter(xml_parser, xml_handler);
handler_info->setter(xml_parser, xml_handler);
}

/* Set an integer attribute on the error object; return true on success,
Expand Down Expand Up @@ -1063,7 +1139,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
if (handler != NULL) {
new_parser->handlers[i] = Py_NewRef(handler);
struct HandlerInfo info = handler_info[i];
CALL_XML_HANDLER_SETTER(&info, new_parser->itself, info.handler);
CALL_XML_HANDLER_SETTER(&info, new_parser->itself, &info.handler);
}
}

Expand Down Expand Up @@ -1315,15 +1391,23 @@ xmlparse_dealloc(PyObject *op)
Py_DECREF(tp);
}

static Py_ssize_t
xmlparse_handler_get_index(void *closure)
{
struct HandlerInfo *info = (struct HandlerInfo *)closure;
Py_ssize_t ind = (Py_ssize_t)(info - handler_info);
assert(ind >= 0);
assert(ind < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
assert(info->name == handler_info[ind].name);
return ind;
}

static PyObject *
xmlparse_handler_getter(PyObject *op, void *closure)
{
xmlparseobject *self = xmlparseobject_CAST(op);
struct HandlerInfo *hi = (struct HandlerInfo *)closure;
assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
int handlernum = (int)(hi - handler_info);
PyObject *result = self->handlers[handlernum];
Py_ssize_t ind = xmlparse_handler_get_index(closure);
PyObject *result = self->handlers[ind];
if (result == NULL) {
result = Py_None;
}
Expand All @@ -1334,9 +1418,7 @@ static int
xmlparse_handler_setter(PyObject *op, PyObject *v, void *closure)
{
xmlparseobject *self = xmlparseobject_CAST(op);
struct HandlerInfo *hi = (struct HandlerInfo *)closure;
assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
int handlernum = (int)(hi - handler_info);
Py_ssize_t handlernum = xmlparse_handler_get_index(closure);
if (v == NULL) {
PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
return -1;
Expand All @@ -1351,7 +1433,8 @@ xmlparse_handler_setter(PyObject *op, PyObject *v, void *closure)
return -1;
}

xmlhandler c_handler = NULL;
int used = 0;
xmlhandler_union c_handler;
if (v == Py_None) {
/* If this is the character data handler, and a character
data handler is already active, we need to be more
Expand All @@ -1365,16 +1448,24 @@ xmlparse_handler_setter(PyObject *op, PyObject *v, void *closure)
elaborate system of handlers and state could remove the
C handler more effectively. */
if (handlernum == CharacterData && self->in_callback) {
c_handler = noop_character_data_handler;
// The cast to union type '(union T)' is NOT a cast in the regular
// sense but a constructor as it does not produce an lvalue. It
// is a valid construction as per ISO C11, §6.5.2.5.
c_handler = (xmlhandler_union){
.parser_and_data_and_int = noop_character_data_handler
};
used = 1;
}
v = NULL;
}
else if (v != NULL) {
Py_INCREF(v);
c_handler = handler_info[handlernum].handler;
used = 1;
}
Py_XSETREF(self->handlers[handlernum], v);
CALL_XML_HANDLER_SETTER(&handler_info[handlernum], self->itself, c_handler);
xmlhandler p_handler = used ? &c_handler : NULL;
CALL_XML_HANDLER_SETTER(&handler_info[handlernum], self->itself, p_handler);
return 0;
}

Expand Down Expand Up @@ -2222,40 +2313,69 @@ clear_handlers(xmlparseobject *self, int initial)
}
}

static struct HandlerInfo handler_info[] = {

// The cast to `xmlhandlersetter` is needed as the signature of XML
// handler functions is not compatible with `xmlhandlersetter` since
// their second parameter is narrower than a `const void *`.
#define HANDLER_INFO(name) \
{#name, (xmlhandlersetter)XML_Set##name, my_##name},

HANDLER_INFO(StartElementHandler)
HANDLER_INFO(EndElementHandler)
HANDLER_INFO(ProcessingInstructionHandler)
HANDLER_INFO(CharacterDataHandler)
HANDLER_INFO(UnparsedEntityDeclHandler)
HANDLER_INFO(NotationDeclHandler)
HANDLER_INFO(StartNamespaceDeclHandler)
HANDLER_INFO(EndNamespaceDeclHandler)
HANDLER_INFO(CommentHandler)
HANDLER_INFO(StartCdataSectionHandler)
HANDLER_INFO(EndCdataSectionHandler)
HANDLER_INFO(DefaultHandler)
HANDLER_INFO(DefaultHandlerExpand)
HANDLER_INFO(NotStandaloneHandler)
HANDLER_INFO(ExternalEntityRefHandler)
HANDLER_INFO(StartDoctypeDeclHandler)
HANDLER_INFO(EndDoctypeDeclHandler)
HANDLER_INFO(EntityDeclHandler)
HANDLER_INFO(XmlDeclHandler)
HANDLER_INFO(ElementDeclHandler)
HANDLER_INFO(AttlistDeclHandler)
/*
* The handler type TYPE is typically useful to detect issues at compile time.
*/
#define SETTER_WRAPPER(NAME, MEMBER) \
static inline void \
my_Set ## NAME (XML_Parser parser, xmlhandler handle) \
{ \
(void)XML_Set ## NAME (parser, handle ? handle->MEMBER : NULL); \
}
SETTER_WRAPPER(StartElementHandler, start_element)
SETTER_WRAPPER(EndElementHandler, parser_and_data)
SETTER_WRAPPER(ProcessingInstructionHandler, parser_and_data_and_data)
SETTER_WRAPPER(CharacterDataHandler, parser_and_data_and_int)
SETTER_WRAPPER(UnparsedEntityDeclHandler, unparsed_entity_decl)
SETTER_WRAPPER(NotationDeclHandler, notation_decl)
SETTER_WRAPPER(StartNamespaceDeclHandler, parser_and_data_and_data)
SETTER_WRAPPER(EndNamespaceDeclHandler, parser_and_data)
SETTER_WRAPPER(CommentHandler, parser_and_data)
SETTER_WRAPPER(StartCdataSectionHandler, parser_only)
SETTER_WRAPPER(EndCdataSectionHandler, parser_only)
SETTER_WRAPPER(DefaultHandler, parser_and_data_and_int)
SETTER_WRAPPER(DefaultHandlerExpand, parser_and_data_and_int)
SETTER_WRAPPER(NotStandaloneHandler, not_standalone)
SETTER_WRAPPER(ExternalEntityRefHandler, external_entity_ref)
SETTER_WRAPPER(StartDoctypeDeclHandler, start_doctype_decl)
SETTER_WRAPPER(EndDoctypeDeclHandler, parser_only)
SETTER_WRAPPER(EntityDeclHandler, entity_decl)
SETTER_WRAPPER(XmlDeclHandler, xml_decl)
SETTER_WRAPPER(ElementDeclHandler, element_decl)
SETTER_WRAPPER(AttlistDeclHandler, attlist_decl)
#if XML_COMBINED_VERSION >= 19504
HANDLER_INFO(SkippedEntityHandler)
SETTER_WRAPPER(SkippedEntityHandler, parser_and_data_and_int)
#endif
#undef SETTER_WRAPPER

static struct HandlerInfo handler_info[] = {
#define HANDLER_INFO(IND, NAME, MEMBER) \
[IND] = {#NAME, my_Set##NAME, {.MEMBER = my_##NAME}},

HANDLER_INFO(StartElement, StartElementHandler, start_element)
HANDLER_INFO(EndElement, EndElementHandler, parser_and_data)
HANDLER_INFO(ProcessingInstruction, ProcessingInstructionHandler, parser_and_data_and_data)
HANDLER_INFO(CharacterData, CharacterDataHandler, parser_and_data_and_int)
HANDLER_INFO(UnparsedEntityDecl, UnparsedEntityDeclHandler, unparsed_entity_decl)
HANDLER_INFO(NotationDecl, NotationDeclHandler, notation_decl)
HANDLER_INFO(StartNamespaceDecl, StartNamespaceDeclHandler, parser_and_data_and_data)
HANDLER_INFO(EndNamespaceDecl, EndNamespaceDeclHandler, parser_and_data)
HANDLER_INFO(Comment, CommentHandler, parser_and_data)
HANDLER_INFO(StartCdataSection, StartCdataSectionHandler, parser_only)
HANDLER_INFO(EndCdataSection, EndCdataSectionHandler, parser_only)
HANDLER_INFO(Default, DefaultHandler, parser_and_data_and_int)
HANDLER_INFO(DefaultHandlerExpand, DefaultHandlerExpand, parser_and_data_and_int)
HANDLER_INFO(NotStandalone, NotStandaloneHandler, not_standalone)
HANDLER_INFO(ExternalEntityRef, ExternalEntityRefHandler, external_entity_ref)
HANDLER_INFO(StartDoctypeDecl, StartDoctypeDeclHandler, start_doctype_decl)
HANDLER_INFO(EndDoctypeDecl, EndDoctypeDeclHandler, parser_only)
HANDLER_INFO(EntityDecl, EntityDeclHandler, entity_decl)
HANDLER_INFO(XmlDecl, XmlDeclHandler, xml_decl)
HANDLER_INFO(ElementDecl, ElementDeclHandler, element_decl)
HANDLER_INFO(AttlistDecl, AttlistDeclHandler, attlist_decl)
#if XML_COMBINED_VERSION >= 19504
HANDLER_INFO(SkippedEntity, SkippedEntityHandler, parser_and_data_and_int)
#endif
#undef HANDLER_INFO

{NULL, NULL, NULL} /* sentinel */
{NULL, NULL, {NULL}} /* sentinel */
};
Loading