Skip to content

Commit 43b43ed

Browse files
committed
Implement HTML5Document
1 parent 527a355 commit 43b43ed

File tree

102 files changed

+5412
-793
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

102 files changed

+5412
-793
lines changed

ext/dom/config.m4

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,21 @@ if test "$PHP_DOM" != "no"; then
1212

1313
PHP_SETUP_LIBXML(DOM_SHARED_LIBADD, [
1414
AC_DEFINE(HAVE_DOM,1,[ ])
15+
PHP_LEXBOR_CFLAGS="-I@ext_srcdir@/lexbor -DLEXBOR_STATIC"
16+
LEXBOR_DIR="lexbor/lexbor"
17+
LEXBOR_SOURCES="$LEXBOR_DIR/ports/posix/lexbor/core/memory.c \
18+
$LEXBOR_DIR/core/array_obj.c $LEXBOR_DIR/core/array.c $LEXBOR_DIR/core/avl.c $LEXBOR_DIR/core/bst.c $LEXBOR_DIR/core/diyfp.c $LEXBOR_DIR/core/conv.c $LEXBOR_DIR/core/dobject.c $LEXBOR_DIR/core/dtoa.c $LEXBOR_DIR/core/hash.c $LEXBOR_DIR/core/mem.c $LEXBOR_DIR/core/mraw.c $LEXBOR_DIR/core/print.c $LEXBOR_DIR/core/serialize.c $LEXBOR_DIR/core/shs.c $LEXBOR_DIR/core/str.c $LEXBOR_DIR/core/strtod.c \
19+
$LEXBOR_DIR/dom/interface.c $LEXBOR_DIR/dom/interfaces/attr.c $LEXBOR_DIR/dom/interfaces/cdata_section.c $LEXBOR_DIR/dom/interfaces/character_data.c $LEXBOR_DIR/dom/interfaces/comment.c $LEXBOR_DIR/dom/interfaces/document.c $LEXBOR_DIR/dom/interfaces/document_fragment.c $LEXBOR_DIR/dom/interfaces/document_type.c $LEXBOR_DIR/dom/interfaces/element.c $LEXBOR_DIR/dom/interfaces/node.c $LEXBOR_DIR/dom/interfaces/processing_instruction.c $LEXBOR_DIR/dom/interfaces/shadow_root.c $LEXBOR_DIR/dom/interfaces/text.c \
20+
$LEXBOR_DIR/html/tokenizer/error.c $LEXBOR_DIR/html/tokenizer/state_comment.c $LEXBOR_DIR/html/tokenizer/state_doctype.c $LEXBOR_DIR/html/tokenizer/state_rawtext.c $LEXBOR_DIR/html/tokenizer/state_rcdata.c $LEXBOR_DIR/html/tokenizer/state_script.c $LEXBOR_DIR/html/tokenizer/state.c \
21+
$LEXBOR_DIR/html/tree/active_formatting.c $LEXBOR_DIR/html/tree/error.c $LEXBOR_DIR/html/tree/insertion_mode/after_after_body.c $LEXBOR_DIR/html/tree/insertion_mode/after_after_frameset.c $LEXBOR_DIR/html/tree/insertion_mode/after_body.c $LEXBOR_DIR/html/tree/insertion_mode/after_frameset.c $LEXBOR_DIR/html/tree/insertion_mode/after_head.c $LEXBOR_DIR/html/tree/insertion_mode/before_head.c $LEXBOR_DIR/html/tree/insertion_mode/before_html.c $LEXBOR_DIR/html/tree/insertion_mode/foreign_content.c $LEXBOR_DIR/html/tree/insertion_mode/in_body.c $LEXBOR_DIR/html/tree/insertion_mode/in_caption.c $LEXBOR_DIR/html/tree/insertion_mode/in_cell.c $LEXBOR_DIR/html/tree/insertion_mode/in_column_group.c $LEXBOR_DIR/html/tree/insertion_mode/in_frameset.c $LEXBOR_DIR/html/tree/insertion_mode/in_head.c $LEXBOR_DIR/html/tree/insertion_mode/in_head_noscript.c $LEXBOR_DIR/html/tree/insertion_mode/initial.c $LEXBOR_DIR/html/tree/insertion_mode/in_row.c $LEXBOR_DIR/html/tree/insertion_mode/in_select.c $LEXBOR_DIR/html/tree/insertion_mode/in_select_in_table.c $LEXBOR_DIR/html/tree/insertion_mode/in_table_body.c $LEXBOR_DIR/html/tree/insertion_mode/in_table.c $LEXBOR_DIR/html/tree/insertion_mode/in_table_text.c $LEXBOR_DIR/html/tree/insertion_mode/in_template.c $LEXBOR_DIR/html/tree/insertion_mode/text.c $LEXBOR_DIR/html/tree/open_elements.c \
22+
$LEXBOR_DIR/encoding/big5.c $LEXBOR_DIR/encoding/decode.c $LEXBOR_DIR/encoding/encode.c $LEXBOR_DIR/encoding/encoding.c $LEXBOR_DIR/encoding/euc_kr.c $LEXBOR_DIR/encoding/gb18030.c $LEXBOR_DIR/encoding/iso_2022_jp_katakana.c $LEXBOR_DIR/encoding/jis0208.c $LEXBOR_DIR/encoding/jis0212.c $LEXBOR_DIR/encoding/range.c $LEXBOR_DIR/encoding/res.c $LEXBOR_DIR/encoding/single.c \
23+
$LEXBOR_DIR/html/encoding.c $LEXBOR_DIR/html/interface.c $LEXBOR_DIR/html/parser.c $LEXBOR_DIR/html/token.c $LEXBOR_DIR/html/token_attr.c $LEXBOR_DIR/html/tokenizer.c $LEXBOR_DIR/html/tree.c \
24+
$LEXBOR_DIR/html/interfaces/anchor_element.c $LEXBOR_DIR/html/interfaces/area_element.c $LEXBOR_DIR/html/interfaces/audio_element.c $LEXBOR_DIR/html/interfaces/base_element.c $LEXBOR_DIR/html/interfaces/body_element.c $LEXBOR_DIR/html/interfaces/br_element.c $LEXBOR_DIR/html/interfaces/button_element.c $LEXBOR_DIR/html/interfaces/canvas_element.c $LEXBOR_DIR/html/interfaces/data_element.c $LEXBOR_DIR/html/interfaces/data_list_element.c $LEXBOR_DIR/html/interfaces/details_element.c $LEXBOR_DIR/html/interfaces/dialog_element.c $LEXBOR_DIR/html/interfaces/directory_element.c $LEXBOR_DIR/html/interfaces/div_element.c $LEXBOR_DIR/html/interfaces/d_list_element.c $LEXBOR_DIR/html/interfaces/document.c $LEXBOR_DIR/html/interfaces/element.c $LEXBOR_DIR/html/interfaces/embed_element.c $LEXBOR_DIR/html/interfaces/field_set_element.c $LEXBOR_DIR/html/interfaces/font_element.c $LEXBOR_DIR/html/interfaces/form_element.c $LEXBOR_DIR/html/interfaces/frame_element.c $LEXBOR_DIR/html/interfaces/frame_set_element.c $LEXBOR_DIR/html/interfaces/head_element.c $LEXBOR_DIR/html/interfaces/heading_element.c $LEXBOR_DIR/html/interfaces/hr_element.c $LEXBOR_DIR/html/interfaces/html_element.c $LEXBOR_DIR/html/interfaces/iframe_element.c $LEXBOR_DIR/html/interfaces/image_element.c $LEXBOR_DIR/html/interfaces/input_element.c $LEXBOR_DIR/html/interfaces/label_element.c $LEXBOR_DIR/html/interfaces/legend_element.c $LEXBOR_DIR/html/interfaces/li_element.c $LEXBOR_DIR/html/interfaces/link_element.c $LEXBOR_DIR/html/interfaces/map_element.c $LEXBOR_DIR/html/interfaces/marquee_element.c $LEXBOR_DIR/html/interfaces/media_element.c $LEXBOR_DIR/html/interfaces/menu_element.c $LEXBOR_DIR/html/interfaces/meta_element.c $LEXBOR_DIR/html/interfaces/meter_element.c $LEXBOR_DIR/html/interfaces/mod_element.c $LEXBOR_DIR/html/interfaces/object_element.c $LEXBOR_DIR/html/interfaces/o_list_element.c $LEXBOR_DIR/html/interfaces/opt_group_element.c $LEXBOR_DIR/html/interfaces/option_element.c $LEXBOR_DIR/html/interfaces/output_element.c $LEXBOR_DIR/html/interfaces/paragraph_element.c $LEXBOR_DIR/html/interfaces/param_element.c $LEXBOR_DIR/html/interfaces/picture_element.c $LEXBOR_DIR/html/interfaces/pre_element.c $LEXBOR_DIR/html/interfaces/progress_element.c $LEXBOR_DIR/html/interfaces/quote_element.c $LEXBOR_DIR/html/interfaces/script_element.c $LEXBOR_DIR/html/interfaces/select_element.c $LEXBOR_DIR/html/interfaces/slot_element.c $LEXBOR_DIR/html/interfaces/source_element.c $LEXBOR_DIR/html/interfaces/span_element.c $LEXBOR_DIR/html/interfaces/style_element.c $LEXBOR_DIR/html/interfaces/table_caption_element.c $LEXBOR_DIR/html/interfaces/table_cell_element.c $LEXBOR_DIR/html/interfaces/table_col_element.c $LEXBOR_DIR/html/interfaces/table_element.c $LEXBOR_DIR/html/interfaces/table_row_element.c $LEXBOR_DIR/html/interfaces/table_section_element.c $LEXBOR_DIR/html/interfaces/template_element.c $LEXBOR_DIR/html/interfaces/text_area_element.c $LEXBOR_DIR/html/interfaces/time_element.c $LEXBOR_DIR/html/interfaces/title_element.c $LEXBOR_DIR/html/interfaces/track_element.c $LEXBOR_DIR/html/interfaces/u_list_element.c $LEXBOR_DIR/html/interfaces/unknown_element.c $LEXBOR_DIR/html/interfaces/video_element.c $LEXBOR_DIR/html/interfaces/window.c \
25+
$LEXBOR_DIR/selectors/selectors.c \
26+
$LEXBOR_DIR/ns/ns.c \
27+
$LEXBOR_DIR/tag/tag.c"
1528
PHP_NEW_EXTENSION(dom, [php_dom.c attr.c document.c \
29+
html5_document.c html5_serializer.c html5_parser.c namespace_compat.c \
1630
domexception.c parentnode.c \
1731
processinginstruction.c cdatasection.c \
1832
documentfragment.c domimplementation.c \
@@ -21,8 +35,9 @@ if test "$PHP_DOM" != "no"; then
2135
nodelist.c text.c comment.c \
2236
entityreference.c \
2337
notation.c xpath.c dom_iterators.c \
24-
namednodemap.c],
25-
$ext_shared)
38+
namednodemap.c \
39+
$LEXBOR_SOURCES],
40+
$ext_shared,,$PHP_LEXBOR_CFLAGS)
2641
PHP_SUBST(DOM_SHARED_LIBADD)
2742
PHP_INSTALL_HEADERS([ext/dom/xml_common.h])
2843
PHP_ADD_EXTENSION_DEP(dom, libxml)

ext/dom/config.w32

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,29 @@ if (PHP_DOM == "yes") {
88
CHECK_HEADER_ADD_INCLUDE("libxml/parser.h", "CFLAGS_DOM", PHP_PHP_BUILD + "\\include\\libxml2")
99
) {
1010
EXTENSION("dom", "php_dom.c attr.c document.c \
11+
html5_document.c html5_serializer.c html5_parser.c namespace_compat.c \
1112
domexception.c parentnode.c processinginstruction.c \
1213
cdatasection.c documentfragment.c domimplementation.c element.c \
1314
node.c characterdata.c documenttype.c \
1415
entity.c nodelist.c text.c comment.c \
1516
entityreference.c \
1617
notation.c xpath.c dom_iterators.c \
17-
namednodemap.c");
18+
namednodemap.c", null, "-Iext/dom/lexbor");
19+
20+
ADD_SOURCES("ext/dom/lexbor/lexbor/ports/windows_nt/lexbor/core", "memory.c", "dom");
21+
ADD_SOURCES("ext/dom/lexbor/lexbor/core", "array_obj.c array.c avl.c bst.c diyfp.c conv.c dobject.c dtoa.c hash.c mem.c mraw.c print.c serialize.c shs.c str.c strtod.c", "dom");
22+
ADD_SOURCES("ext/dom/lexbor/lexbor/dom", "interface.c", "dom");
23+
ADD_SOURCES("ext/dom/lexbor/lexbor/dom/interfaces", "attr.c cdata_section.c character_data.c comment.c document.c document_fragment.c document_type.c element.c node.c processing_instruction.c shadow_root.c text.c", "dom");
24+
ADD_SOURCES("ext/dom/lexbor/lexbor/html/tokenizer", "error.c state_comment.c state_doctype.c state_rawtext.c state_rcdata.c state_script.c state.c", "dom");
25+
ADD_SOURCES("ext/dom/lexbor/lexbor/html/tree", "active_formatting.c open_elements.c error.c", "dom");
26+
ADD_SOURCES("ext/dom/lexbor/lexbor/html/tree/insertion_mode", "after_after_body.c after_after_frameset.c after_body.c after_frameset.c after_head.c before_head.c before_html.c foreign_content.c in_body.c in_caption.c in_cell.c in_column_group.c in_frameset.c in_head.c in_head_noscript.c initial.c in_row.c in_select.c in_select_in_table.c in_table_body.c in_table.c in_table_text.c in_template.c text.c", "dom");
27+
ADD_SOURCES("ext/dom/lexbor/lexbor/html", "encoding.c interface.c parser.c token.c token_attr.c tokenizer.c tree.c", "dom");
28+
ADD_SOURCES("ext/dom/lexbor/lexbor/encoding", "big5.c decode.c encode.c encoding.c euc_kr.c gb18030.c iso_2022_jp_katakana.c jis0208.c jis0212.c range.c res.c single.c", "dom");
29+
ADD_SOURCES("ext/dom/lexbor/lexbor/html/interfaces", "anchor_element.c area_element.c audio_element.c base_element.c body_element.c br_element.c button_element.c canvas_element.c data_element.c data_list_element.c details_element.c dialog_element.c directory_element.c div_element.c d_list_element.c document.c element.c embed_element.c field_set_element.c font_element.c form_element.c frame_element.c frame_set_element.c head_element.c heading_element.c hr_element.c html_element.c iframe_element.c image_element.c input_element.c label_element.c legend_element.c li_element.c link_element.c map_element.c marquee_element.c media_element.c menu_element.c meta_element.c meter_element.c mod_element.c object_element.c o_list_element.c opt_group_element.c option_element.c output_element.c paragraph_element.c param_element.c picture_element.c pre_element.c progress_element.c quote_element.c script_element.c select_element.c slot_element.c source_element.c span_element.c style_element.c table_caption_element.c table_cell_element.c table_col_element.c table_element.c table_row_element.c table_section_element.c template_element.c text_area_element.c time_element.c title_element.c track_element.c u_list_element.c unknown_element.c video_element.c window.c", "dom");
30+
ADD_SOURCES("ext/dom/lexbor/lexbor/selectors", "selectors.c", "dom");
31+
ADD_SOURCES("ext/dom/lexbor/lexbor/ns", "ns.c", "dom");
32+
ADD_SOURCES("ext/dom/lexbor/lexbor/tag", "tag.c", "dom");
33+
ADD_FLAG("CFLAGS_DOM", "/D LEXBOR_STATIC ");
1834

1935
AC_DEFINE("HAVE_DOM", 1, "DOM support");
2036

ext/dom/document.c

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,6 @@ struct _idsIterator {
3535
xmlNode *element;
3636
};
3737

38-
#define DOM_LOAD_STRING 0
39-
#define DOM_LOAD_FILE 1
40-
4138
/*
4239
* class DOMDocument extends DOMNode
4340
*
@@ -794,7 +791,7 @@ PHP_METHOD(DOMDocument, importNode)
794791
if (nodep->doc == docp) {
795792
retnodep = nodep;
796793
} else {
797-
retnodep = dom_clone_node(nodep, docp, recursive);
794+
retnodep = dom_clone_node(nodep, docp, intern, recursive);
798795
if (!retnodep) {
799796
RETURN_FALSE;
800797
}
@@ -1101,8 +1098,7 @@ PHP_METHOD(DOMDocument, normalizeDocument)
11011098
}
11021099
/* }}} end dom_document_normalize_document */
11031100

1104-
/* {{{ */
1105-
PHP_METHOD(DOMDocument, __construct)
1101+
void php_dom_document_constructor(INTERNAL_FUNCTION_PARAMETERS)
11061102
{
11071103
xmlDoc *docp = NULL, *olddoc;
11081104
dom_object *intern;
@@ -1141,6 +1137,12 @@ PHP_METHOD(DOMDocument, __construct)
11411137
}
11421138
php_libxml_increment_node_ptr((php_libxml_node_object *)intern, (xmlNodePtr)docp, (void *)intern);
11431139
}
1140+
1141+
/* {{{ */
1142+
PHP_METHOD(DOMDocument, __construct)
1143+
{
1144+
php_dom_document_constructor(INTERNAL_FUNCTION_PARAM_PASSTHRU);
1145+
}
11441146
/* }}} end DOMDocument::__construct */
11451147

11461148
char *_dom_get_valid_file_path(char *source, char *resolved_path, int resolved_path_len ) /* {{{ */
@@ -1310,14 +1312,15 @@ static xmlDocPtr dom_document_parser(zval *id, int mode, char *source, size_t so
13101312
}
13111313
/* }}} */
13121314

1313-
static void dom_finish_loading_document(zval *this, zval *return_value, xmlDocPtr newdoc)
1315+
void php_dom_finish_loading_document(zval *this, zval *return_value, xmlDocPtr newdoc)
13141316
{
13151317
if (!newdoc)
13161318
RETURN_FALSE;
13171319

13181320
dom_object *intern = Z_DOMOBJ_P(this);
13191321
size_t old_modification_nr = 0;
13201322
if (intern != NULL) {
1323+
bool is_html5_class = intern->document->is_html5_class;
13211324
xmlDocPtr docp = (xmlDocPtr) dom_object_get_node(intern);
13221325
dom_doc_propsptr doc_prop = NULL;
13231326
if (docp != NULL) {
@@ -1337,6 +1340,7 @@ static void dom_finish_loading_document(zval *this, zval *return_value, xmlDocPt
13371340
RETURN_FALSE;
13381341
}
13391342
intern->document->doc_props = doc_prop;
1343+
intern->document->is_html5_class = is_html5_class;
13401344
}
13411345

13421346
php_libxml_increment_node_ptr((php_libxml_node_object *)intern, (xmlNodePtr)newdoc, (void *)intern);
@@ -1350,8 +1354,7 @@ static void dom_finish_loading_document(zval *this, zval *return_value, xmlDocPt
13501354
RETURN_TRUE;
13511355
}
13521356

1353-
/* {{{ static void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode) */
1354-
static void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode) {
1357+
void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode, xmlDocPtr *doc_out) {
13551358
char *source;
13561359
size_t source_len;
13571360
zend_long options = 0;
@@ -1374,17 +1377,18 @@ static void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode) {
13741377
}
13751378

13761379
xmlDocPtr newdoc = dom_document_parser(ZEND_THIS, mode, source, source_len, options);
1380+
*doc_out = newdoc;
13771381

1378-
dom_finish_loading_document(ZEND_THIS, return_value, newdoc);
1382+
php_dom_finish_loading_document(ZEND_THIS, return_value, newdoc);
13791383
}
1380-
/* }}} end dom_parser_document */
13811384

13821385
/* {{{ URL: http://www.w3.org/TR/DOM-Level-3-LS/load-save.html#LS-DocumentLS-load
13831386
Since: DOM Level 3
13841387
*/
13851388
PHP_METHOD(DOMDocument, load)
13861389
{
1387-
dom_parse_document(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_FILE);
1390+
xmlDocPtr unused;
1391+
dom_parse_document(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_FILE, &unused);
13881392
}
13891393
/* }}} end dom_document_load */
13901394

@@ -1393,7 +1397,8 @@ Since: DOM Level 3
13931397
*/
13941398
PHP_METHOD(DOMDocument, loadXML)
13951399
{
1396-
dom_parse_document(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_STRING);
1400+
xmlDocPtr unused;
1401+
dom_parse_document(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_STRING, &unused);
13971402
}
13981403
/* }}} end dom_document_loadxml */
13991404

@@ -1915,7 +1920,7 @@ static void dom_load_html(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ */
19151920
xmlDocPtr newdoc = ctxt->myDoc;
19161921
htmlFreeParserCtxt(ctxt);
19171922

1918-
dom_finish_loading_document(ZEND_THIS, return_value, newdoc);
1923+
php_dom_finish_loading_document(ZEND_THIS, return_value, newdoc);
19191924
}
19201925
/* }}} */
19211926

ext/dom/dom_ce.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ extern PHP_DOM_EXPORT zend_class_entry *dom_domexception_class_entry;
2323
extern PHP_DOM_EXPORT zend_class_entry *dom_domimplementation_class_entry;
2424
extern PHP_DOM_EXPORT zend_class_entry *dom_documentfragment_class_entry;
2525
extern PHP_DOM_EXPORT zend_class_entry *dom_document_class_entry;
26+
extern PHP_DOM_EXPORT zend_class_entry *dom_html5_document_class_entry;
2627
extern PHP_DOM_EXPORT zend_class_entry *dom_nodelist_class_entry;
2728
extern PHP_DOM_EXPORT zend_class_entry *dom_namednodemap_class_entry;
2829
extern PHP_DOM_EXPORT zend_class_entry *dom_characterdata_class_entry;

ext/dom/dom_properties.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@ zend_result dom_document_recover_write(dom_object *obj, zval *newval);
6161
zend_result dom_document_substitue_entities_read(dom_object *obj, zval *retval);
6262
zend_result dom_document_substitue_entities_write(dom_object *obj, zval *newval);
6363

64+
/* html5 document properties */
65+
zend_result dom_html5_document_encoding_write(dom_object *obj, zval *retval);
66+
6467
/* documenttype properties */
6568
zend_result dom_documenttype_name_read(dom_object *obj, zval *retval);
6669
zend_result dom_documenttype_entities_read(dom_object *obj, zval *retval);

0 commit comments

Comments
 (0)