@@ -74,6 +74,26 @@ static const xmlChar *get_libxml_namespace_href(uintptr_t lexbor_namespace)
74
74
}
75
75
}
76
76
77
+ static xmlNodePtr lexbor_libxml2_bridge_new_text_node_fast (xmlDocPtr lxml_doc , const lxb_char_t * data , size_t data_length , bool compact_text_nodes )
78
+ {
79
+ if (compact_text_nodes && data_length < LXML_INTERNED_STRINGS_SIZE ) {
80
+ /* See xmlSAX2TextNode() in libxml2 */
81
+ xmlNodePtr lxml_text = xmlMalloc (sizeof (* lxml_text ));
82
+ if (UNEXPECTED (lxml_text == NULL )) {
83
+ return NULL ;
84
+ }
85
+ memset (lxml_text , 0 , sizeof (* lxml_text ));
86
+ lxml_text -> name = xmlStringText ;
87
+ lxml_text -> type = XML_TEXT_NODE ;
88
+ lxml_text -> doc = lxml_doc ;
89
+ lxml_text -> content = (xmlChar * ) & lxml_text -> properties ;
90
+ memcpy (lxml_text -> content , data , data_length );
91
+ return lxml_text ;
92
+ } else {
93
+ return xmlNewDocTextLen (lxml_doc , (const xmlChar * ) data , data_length );
94
+ }
95
+ }
96
+
77
97
static lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert (
78
98
lxb_dom_node_t * start_node ,
79
99
xmlDocPtr lxml_doc ,
@@ -130,14 +150,52 @@ static lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert(
130
150
);
131
151
}
132
152
133
- for (lxb_dom_attr_t * attr = element -> last_attr ; attr != NULL ; attr = attr -> prev ) {
134
- lexbor_libxml2_bridge_work_list_item_push (
135
- & work_list ,
136
- (lxb_dom_node_t * ) attr ,
137
- entering_namespace ,
138
- lxml_element ,
139
- current_lxml_ns
140
- );
153
+ xmlAttrPtr last_added_attr = NULL ;
154
+ for (lxb_dom_attr_t * attr = element -> first_attr ; attr != NULL ; attr = attr -> next ) {
155
+ /* Same namespace remark as for elements */
156
+ size_t local_name_length , value_length ;
157
+ const lxb_char_t * local_name = lxb_dom_attr_local_name (attr , & local_name_length );
158
+ const lxb_char_t * value = lxb_dom_attr_value (attr , & value_length );
159
+
160
+ if (UNEXPECTED (local_name_length >= INT_MAX || value_length >= INT_MAX )) {
161
+ retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OVERFLOW ;
162
+ goto out ;
163
+ }
164
+
165
+ xmlAttrPtr lxml_attr = xmlMalloc (sizeof (xmlAttr ));
166
+ if (UNEXPECTED (lxml_attr == NULL )) {
167
+ retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OOM ;
168
+ goto out ;
169
+ }
170
+
171
+ memset (lxml_attr , 0 , sizeof (xmlAttr ));
172
+ lxml_attr -> type = XML_ATTRIBUTE_NODE ;
173
+ lxml_attr -> parent = lxml_element ;
174
+ lxml_attr -> name = xmlDictLookup (lxml_doc -> dict , local_name , local_name_length );
175
+ lxml_attr -> doc = lxml_doc ;
176
+ xmlNodePtr lxml_text = lexbor_libxml2_bridge_new_text_node_fast (lxml_doc , value , value_length , true /* Always true for optimization purposes */ );
177
+ if (UNEXPECTED (lxml_text == NULL )) {
178
+ xmlFreeProp (lxml_attr );
179
+ retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OOM ;
180
+ goto out ;
181
+ }
182
+
183
+ lxml_attr -> children = lxml_attr -> last = lxml_text ;
184
+
185
+ if (last_added_attr == NULL ) {
186
+ lxml_element -> properties = lxml_attr ;
187
+ } else {
188
+ last_added_attr -> next = lxml_attr ;
189
+ lxml_attr -> prev = last_added_attr ;
190
+ }
191
+ last_added_attr = lxml_attr ;
192
+
193
+ /* xmlIsID does some other stuff too that is irrelevant here. */
194
+ if (local_name_length == 2 && local_name [0 ] == 'i' && local_name [1 ] == 'd' ) {
195
+ xmlAddID (NULL , lxml_doc , value , lxml_attr );
196
+ }
197
+
198
+ /* libxml2 doesn't support line numbers on this anyway, it derives them instead, so don't bother */
141
199
}
142
200
} else if (node -> type == LXB_DOM_NODE_TYPE_TEXT ) {
143
201
lxb_dom_text_t * text = lxb_dom_interface_text (node );
@@ -147,26 +205,10 @@ static lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert(
147
205
retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OVERFLOW ;
148
206
goto out ;
149
207
}
150
- xmlNodePtr lxml_text ;
151
- if (compact_text_nodes && data_length < LXML_INTERNED_STRINGS_SIZE ) {
152
- /* See xmlSAX2TextNode() in libxml2 */
153
- lxml_text = xmlMalloc (sizeof (* lxml_text ));
154
- if (UNEXPECTED (lxml_text == NULL )) {
155
- retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OOM ;
156
- goto out ;
157
- }
158
- memset (lxml_text , 0 , sizeof (* lxml_text ));
159
- lxml_text -> name = xmlStringText ;
160
- lxml_text -> type = XML_TEXT_NODE ;
161
- lxml_text -> doc = lxml_doc ;
162
- lxml_text -> content = (xmlChar * ) & lxml_text -> properties ;
163
- memcpy (lxml_text -> content , data , data_length + 1 /* include '\0' */ );
164
- } else {
165
- lxml_text = xmlNewDocTextLen (lxml_doc , (const xmlChar * ) data , data_length );
166
- if (UNEXPECTED (lxml_text == NULL )) {
167
- retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OOM ;
168
- goto out ;
169
- }
208
+ xmlNodePtr lxml_text = lexbor_libxml2_bridge_new_text_node_fast (lxml_doc , data , data_length , compact_text_nodes );
209
+ if (UNEXPECTED (lxml_text == NULL )) {
210
+ retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OOM ;
211
+ goto out ;
170
212
}
171
213
xmlAddChild (lxml_parent , lxml_text );
172
214
if (node -> line >= USHRT_MAX ) {
@@ -192,20 +234,6 @@ static lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert(
192
234
goto out ;
193
235
}
194
236
/* libxml2 doesn't support line numbers on this anyway, it returns -1 instead, so don't bother */
195
- } else if (node -> type == LXB_DOM_NODE_TYPE_ATTRIBUTE ) {
196
- lxb_dom_attr_t * attr = lxb_dom_interface_attr (node );
197
- do {
198
- /* Same namespace remark as for elements */
199
- const lxb_char_t * local_name = lxb_dom_attr_local_name (attr , NULL );
200
- const lxb_char_t * value = lxb_dom_attr_value (attr , NULL );
201
- xmlAttrPtr lxml_attr = xmlSetNsProp (lxml_parent , NULL , local_name , value );
202
- if (UNEXPECTED (lxml_attr == NULL )) {
203
- retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OOM ;
204
- goto out ;
205
- }
206
- attr = attr -> next ;
207
- /* libxml2 doesn't support line numbers on this anyway, it derives them instead, so don't bother */
208
- } while (attr );
209
237
} else if (node -> type == LXB_DOM_NODE_TYPE_COMMENT ) {
210
238
lxb_dom_comment_t * comment = lxb_dom_interface_comment (node );
211
239
xmlNodePtr lxml_comment = xmlNewDocComment (lxml_doc , comment -> char_data .data .data );
@@ -247,15 +275,19 @@ lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_document(
247
275
{
248
276
#ifdef LIBXML_HTML_ENABLED
249
277
xmlDocPtr lxml_doc = htmlNewDocNoDtD (NULL , NULL );
278
+ if (UNEXPECTED (!lxml_doc )) {
279
+ return LEXBOR_LIBXML2_BRIDGE_STATUS_OOM ;
280
+ }
250
281
#else
251
282
/* If HTML support is not enabled, then htmlNewDocNoDtD() is not available.
252
283
* This code mimics the behaviour. */
253
284
xmlDocPtr lxml_doc = xmlNewDoc ((const xmlChar * ) "1.0" );
254
- lxml_doc -> type = XML_HTML_DOCUMENT_NODE ;
255
- #endif
256
- if (!lxml_doc ) {
285
+ if (UNEXPECTED (!lxml_doc )) {
257
286
return LEXBOR_LIBXML2_BRIDGE_STATUS_OOM ;
258
287
}
288
+ lxml_doc -> type = XML_HTML_DOCUMENT_NODE ;
289
+ #endif
290
+ lxml_doc -> dict = xmlDictCreate ();
259
291
lexbor_libxml2_bridge_status status = lexbor_libxml2_bridge_convert (
260
292
lxb_dom_interface_node (document )-> last_child ,
261
293
lxml_doc ,
0 commit comments