16
16
* Normalized input.
17
17
*
18
18
* @typedef Schema
19
- * Sanitization configuration.
19
+ * Schema that defines what nodes and properties are allowed.
20
+ *
21
+ * The default schema is `defaultSchema`, which follows how GitHub cleans.
22
+ * If any top-level key is missing in the given schema, the corresponding
23
+ * value of the default schema is used.
24
+ *
25
+ * To extend the standard schema with a few changes, clone `defaultSchema`
26
+ * like so:
27
+ *
28
+ * ```js
29
+ * import {h} from 'hastscript'
30
+ * import deepmerge from 'deepmerge' // You can use `structuredClone` in modern JS.
31
+ * import {sanitize, defaultSchema} from 'hast-util-sanitize'
32
+ *
33
+ * const schema = deepmerge(defaultSchema, {attributes: {'*': ['className']}})
34
+ *
35
+ * const tree = sanitize(h('div', {className: ['foo']}), schema)
36
+ *
37
+ * // `tree` still has `className`.
38
+ * console.log(tree)
39
+ * // {
40
+ * // type: 'element',
41
+ * // tagName: 'div',
42
+ * // properties: {className: ['foo']},
43
+ * // children: []
44
+ * // }
45
+ * ```
20
46
* @property {Attributes | null | undefined } [attributes]
21
- * Map of tag names to allowed properties.
47
+ * Map of tag names to allowed *property names*.
48
+ *
49
+ * The special key `'*'` as a tag name defines property names allowed on all
50
+ * elements.
51
+ *
52
+ * The special value `'data*'` as a property name can be used to allow all
53
+ * `data`properties.
54
+ *
55
+ * For example:
56
+ *
57
+ * ```js
58
+ * attributes: {
59
+ * a: ['href'],
60
+ * img: ['src', 'longDesc'],
61
+ * // …
62
+ * '*': [
63
+ * 'abbr',
64
+ * 'accept',
65
+ * 'acceptCharset',
66
+ * // …
67
+ * 'vSpace',
68
+ * 'width',
69
+ * 'itemProp'
70
+ * ]
71
+ * }
72
+ * ```
73
+ *
74
+ * Instead of a single string, which allows any *property value* of that
75
+ * property name, it’s also possible to provide an array to allow several
76
+ * values.
77
+ * For example, `input: ['type']` allows the `type` attribute set to any
78
+ * value on inputs.
79
+ * But `input: [['type', 'checkbox', 'radio']]` allows `type` only when set
80
+ * to one of the allowed values (`'checkbox'` or `'radio'`).
81
+ *
82
+ * You can also use regexes, so for example `span: [['className', /^hljs-/]]`
83
+ * allows any class that starts with `hljs-` on `span` elements.
84
+ *
85
+ * This is how the default GitHub schema allows only disabled checkbox
86
+ * inputs:
22
87
*
23
- * The special `'*'` key defines property names allowed on all elements.
88
+ * ```js
89
+ * attributes: {
90
+ * // …
91
+ * input: [
92
+ * ['type', 'checkbox'],
93
+ * ['disabled', true]
94
+ * ]
95
+ * // …
96
+ * }
97
+ * ```
98
+ *
99
+ * Attributes also plays well with properties that accept space- or
100
+ * comma-separated values, such as `class`.
101
+ * Say you wanted to allow certain classes on `span` elements for syntax
102
+ * highlighting, that can be done like this:
103
+ *
104
+ * ```js
105
+ * // …
106
+ * span: [
107
+ * ['className', 'token', 'number', 'operator']
108
+ * ]
109
+ * // …
110
+ * ```
24
111
* @property {Record<string, Record<string, PropertyValue>> | null | undefined } [required]
25
- * Map of tag names to required property names and their default property value.
112
+ * Map of tag names to required *property names* and their default *property
113
+ * value*.
114
+ *
115
+ * If the defined keys do not exist in an element’s properties, they are added
116
+ * and set to the specified value.
117
+ *
118
+ * Note that properties are first checked based on the schema at `attributes`,
119
+ * so properties could be removed by that step and then added again through
120
+ * `required`.
121
+ *
122
+ * For example:
123
+ *
124
+ * ```js
125
+ * required: {
126
+ * input: {type: 'checkbox', disabled: true}
127
+ * }
128
+ * ```
26
129
* @property {Array<string> | null | undefined } [tagNames]
27
130
* List of allowed tag names.
131
+ *
132
+ * For example:
133
+ *
134
+ * ```js
135
+ * tagNames: [
136
+ * 'h1',
137
+ * 'h2',
138
+ * 'h3',
139
+ * // …
140
+ * 'strike',
141
+ * 'summary',
142
+ * 'details'
143
+ * ]
144
+ * ```
28
145
* @property {Record<string, Array<string>> | null | undefined } [protocols]
29
- * Map of protocols to allow in property values.
146
+ * Map of *property names* to allowed protocols.
147
+ *
148
+ * The listed property names can be set to URLs that are local (relative to
149
+ * the current website, such as `this`, `#this`, `/this`, or `?this`) or
150
+ * remote (such as `https://example.com`), in which case they must have a
151
+ * protocol that is allowed here.
152
+ *
153
+ * For example:
154
+ *
155
+ * ```js
156
+ * protocols: {
157
+ * href: ['http', 'https', 'mailto'],
158
+ * // …
159
+ * longDesc: ['http', 'https']
160
+ * }
161
+ * ```
30
162
* @property {Record<string, Array<string>> | null | undefined } [ancestors]
31
- * Map of tag names to their required ancestor elements.
163
+ * Map of tag names to a list of tag names which are required ancestors.
164
+ *
165
+ * Elements with these tag names will be ignored if they occur outside of one
166
+ * of their allowed parents.
167
+ *
168
+ * For example:
169
+ *
170
+ * ```js
171
+ * ancestors: {
172
+ * li: ['ol', 'ul'],
173
+ * // …
174
+ * tr: ['table']
175
+ * }
176
+ * ```
32
177
* @property {Array<string> | null | undefined } [clobber]
33
- * List of allowed property names which can clobber.
178
+ * List of *property names* that clobber (`Array<string>`).
179
+ *
180
+ * For example:
181
+ *
182
+ * ```js
183
+ * clobber: ['name', 'id']
184
+ * ```
34
185
* @property {string | null | undefined } [clobberPrefix]
35
- * Prefix to use before potentially clobbering property names.
186
+ * Prefix to use before clobbering properties.
187
+ *
188
+ * For example:
189
+ *
190
+ * ```js
191
+ * clobberPrefix: 'user-content-'
192
+ * ```
36
193
* @property {Array<string> | null | undefined } [strip]
37
- * Names of elements to strip from the tree.
38
- * @property {boolean | null | undefined } [allowComments]
39
- * Whether to allow comments.
40
- * @property {boolean | null | undefined } [allowDoctypes]
41
- * Whether to allow doctypes.
194
+ * List of tag names to strip from the tree.
195
+ *
196
+ * By default, unsafe elements are replaced by their children.
197
+ * Some elements should however be entirely stripped from the tree.
198
+ *
199
+ * For example:
200
+ *
201
+ * ```js
202
+ * strip: ['script']
203
+ * ```
204
+ * @property {boolean | null | undefined } [allowComments=false]
205
+ * Whether to allow comment nodes.
206
+ *
207
+ * For example:
208
+ *
209
+ * ```js
210
+ * allowComments: true
211
+ * ```
212
+ * @property {boolean | null | undefined } [allowDoctypes=false]
213
+ * Whether to allow doctype nodes.
214
+ *
215
+ * ```js
216
+ * allowDoctypes: true
217
+ * ```
42
218
*
43
219
* @typedef {(schema: Schema, value: any, node: any, stack: Array<string>) => unknown } Handler
44
220
* @typedef {Record<string, Handler> } NodeDefinition
@@ -65,12 +241,14 @@ const nodeSchema = {
65
241
}
66
242
67
243
/**
68
- * Utility to sanitize a tree
244
+ * Sanitize a tree.
69
245
*
70
246
* @param {Node } node
71
- * Hast tree to sanitize
247
+ * Tree to clean.
72
248
* @param {Schema | null | undefined } [schema]
73
- * Schema defining how to sanitize - defaults to Github style sanitation
249
+ * Schema defining how to sanitize.
250
+ * @returns {Node }
251
+ * New, sanitized, tree.
74
252
*/
75
253
export function sanitize ( node , schema ) {
76
254
/** @type {Node } */
@@ -420,19 +598,23 @@ function handlePropertyValue(schema, value, prop, definition) {
420
598
* @returns {boolean }
421
599
*/
422
600
function safeProtocol ( schema , value , prop ) {
601
+ const protocols =
602
+ schema . protocols && own . call ( schema . protocols , prop )
603
+ ? schema . protocols [ prop ] . concat ( )
604
+ : [ ]
605
+
606
+ // Not listed.
607
+ if ( protocols . length === 0 ) {
608
+ return true
609
+ }
610
+
423
611
const url = String ( value )
424
612
const colon = url . indexOf ( ':' )
425
613
const questionMark = url . indexOf ( '?' )
426
614
const numberSign = url . indexOf ( '#' )
427
615
const slash = url . indexOf ( '/' )
428
- const protocols =
429
- schema . protocols && own . call ( schema . protocols , prop )
430
- ? schema . protocols [ prop ] . concat ( )
431
- : [ ]
432
- let index = - 1
433
616
434
617
if (
435
- protocols . length === 0 ||
436
618
colon < 0 ||
437
619
// If the first colon is after a `?`, `#`, or `/`, it’s not a protocol.
438
620
( slash > - 1 && colon > slash ) ||
@@ -442,6 +624,8 @@ function safeProtocol(schema, value, prop) {
442
624
return true
443
625
}
444
626
627
+ let index = - 1
628
+
445
629
while ( ++ index < protocols . length ) {
446
630
if (
447
631
colon === protocols [ index ] . length &&
0 commit comments