Skip to content

Commit f3ece81

Browse files
authored
Optimize size of ext/dom (#12830)
* Remove unused upper case tag static data * Shrink size of static binary search tree This also makes it more efficient on the data cache. * Update patches
1 parent 37a1e19 commit f3ece81

File tree

6 files changed

+4741
-4729
lines changed

6 files changed

+4741
-4729
lines changed

ext/dom/lexbor/lexbor/core/sbst.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,12 @@ extern "C" {
1919
typedef struct {
2020
lxb_char_t key;
2121

22-
void *value;
23-
size_t value_len;
22+
lxb_char_t value[6];
23+
unsigned char value_len;
2424

25-
size_t left;
26-
size_t right;
27-
size_t next;
25+
unsigned short left;
26+
unsigned short right;
27+
unsigned short next;
2828
}
2929
lexbor_sbst_entry_static_t;
3030

ext/dom/lexbor/lexbor/html/tokenizer/res.h

Lines changed: 4721 additions & 4721 deletions
Large diffs are not rendered by default.

ext/dom/lexbor/lexbor/html/tokenizer/state.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1815,7 +1815,7 @@ lxb_html_tokenizer_state_char_ref_named(lxb_html_tokenizer_t *tkz,
18151815
goto done;
18161816
}
18171817

1818-
if (entry->value != NULL) {
1818+
if (entry->value[0] != 0) {
18191819
tkz->entity_end = (tkz->pos + (data - begin)) - tkz->start;
18201820
tkz->entity_match = entry;
18211821
}

ext/dom/lexbor/lexbor/tag/res.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,7 @@ static const lxb_tag_data_t lxb_tag_res_data_default[LXB_TAG__LAST_ENTRY] =
224224
{{.u.short_str = "xmp", .length = 3, .next = NULL}, LXB_TAG_XMP, 1, true}
225225
};
226226

227+
#if 0
227228
static const lxb_tag_data_t lxb_tag_res_data_upper_default[LXB_TAG__LAST_ENTRY] =
228229
{
229230
{{.u.short_str = "#UNDEF", .length = 6, .next = NULL}, LXB_TAG__UNDEF, 1, true},
@@ -423,6 +424,7 @@ static const lxb_tag_data_t lxb_tag_res_data_upper_default[LXB_TAG__LAST_ENTRY]
423424
{{.u.short_str = "WBR", .length = 3, .next = NULL}, LXB_TAG_WBR, 1, true},
424425
{{.u.short_str = "XMP", .length = 3, .next = NULL}, LXB_TAG_XMP, 1, true}
425426
};
427+
#endif
426428

427429
static const lexbor_shs_entry_t lxb_tag_res_shs_data_default[] =
428430
{

ext/dom/lexbor/lexbor/tag/tag.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ lxb_tag_data_by_name(lexbor_hash_t *hash, const lxb_char_t *name, size_t len)
9292
lexbor_hash_search_lower, name, len);
9393
}
9494

95+
#if 0
9596
const lxb_tag_data_t *
9697
lxb_tag_data_by_name_upper(lexbor_hash_t *hash,
9798
const lxb_char_t *name, size_t len)
@@ -114,6 +115,7 @@ lxb_tag_data_by_name_upper(lexbor_hash_t *hash,
114115
return (const lxb_tag_data_t *) lexbor_hash_search(hash,
115116
lexbor_hash_search_upper, name, len);
116117
}
118+
#endif
117119

118120
/*
119121
* No inline functions for ABI.

ext/dom/lexbor/patches/README.md

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,23 @@ This contains the following patch files in mailbox format.
1717
A PHP specific patch to patch utilities and data structure to be able to generate smaller lookup tables.
1818
This patch won't be upstreamed because it breaks generality of those data structures, i.e. it only works
1919
because we only use it for character encoding.
20+
* 0001-Remove-unused-upper-case-tag-static-data.patch
21+
A PHP specific patch to remove unused upper case tag static data. This shrinks the static data size.
22+
* 0001-Shrink-size-of-static-binary-search-tree.patch
23+
A PHP specific patch to shrink the size of the static binary search tree for entities.
24+
This shrinks the static data size and reduces data cache pressure.
2025

2126
**Note** for this patch the utilities to generate the tables are also patched.
2227
Make sure to apply on a fresh Lexbor clone and run (in `lexbor/utils/encoding`): `python3 single-byte.py` and `python3 multi-byte.py` to generate the tables.
28+
Also run `python3 tokenizer_entities_bst.py` to generate the static binary search tree for entities.
2329

2430
## How to apply
2531

26-
* cd into `ext/dom/lexbor_bridge/lexbor`
32+
* cd into `ext/dom/lexbor/lexbor`
2733
* `git am -3 ../patches/0001-Expose-line-and-column-information-for-use-in-PHP.patch`
2834
* `git am -3 ../patches/0001-Track-implied-added-nodes-for-options-use-in-PHP.patch`
2935
* `git am -3 ../patches/0001-Patch-out-CSS-parser.patch`
3036
* `git am -3 ../patches/0001-Patch-utilities-and-data-structure-to-be-able-to-gen.patch`
31-
* `git reset HEAD~4` # 4 is the number of commits created by the above commands
37+
* `git am -3 ../patches/0001-Remove-unused-upper-case-tag-static-data.patch`
38+
* `git am -3 ../patches/0001-Shrink-size-of-static-binary-search-tree.patch`
39+
* `git reset HEAD~6` # 6 is the number of commits created by the above commands

0 commit comments

Comments
 (0)