From cfde96351de57fc255fd7de76cfaf8f130cc014d Mon Sep 17 00:00:00 2001 From: Remco Haszing Date: Wed, 15 Mar 2023 14:17:27 +0100 Subject: [PATCH 01/11] Add implementation --- .npmrc | 2 + .prettierignore | 2 + index.js | 5 + lib/browser.js | 36 ++++++ lib/index.js | 18 +++ package.json | 98 ++++++++++++++++ readme.md | 303 ++++++++++++++++++++++++++++++++++++++++++++++++ test/index.js | 95 +++++++++++++++ tsconfig.json | 17 +++ 9 files changed, 576 insertions(+) create mode 100644 .npmrc create mode 100644 .prettierignore create mode 100644 index.js create mode 100644 lib/browser.js create mode 100644 lib/index.js create mode 100644 package.json create mode 100644 readme.md create mode 100644 test/index.js create mode 100644 tsconfig.json diff --git a/.npmrc b/.npmrc new file mode 100644 index 0000000..9951b11 --- /dev/null +++ b/.npmrc @@ -0,0 +1,2 @@ +package-lock=false +ignore-scripts=true diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 0000000..cebe81f --- /dev/null +++ b/.prettierignore @@ -0,0 +1,2 @@ +coverage/ +*.md diff --git a/index.js b/index.js new file mode 100644 index 0000000..2513390 --- /dev/null +++ b/index.js @@ -0,0 +1,5 @@ +/** + * @typedef {import('./lib/index.js').Options} Options + */ + +export {fromHtml} from './lib/index.js' diff --git a/lib/browser.js b/lib/browser.js new file mode 100644 index 0000000..5bd74c5 --- /dev/null +++ b/lib/browser.js @@ -0,0 +1,36 @@ +/** + * @typedef {typeof import('./index.js').fromHtml} FromHtml + * @typedef {import('hast').RootContent} RootContent + */ + +import {fromDom} from 'hast-util-from-dom' + +const template = document.createElement('template') +const parser = new DOMParser() + +/** + * @param {string} value + */ +function parseFragment(value) { + template.innerHTML = value + return template.content +} + +/** @type {FromHtml} */ +export function fromHtml(value, options) { + /** @type {RootContent[]} */ + const children = [] + const node = options?.fragment + ? parseFragment(value) + : parser.parseFromString(value, 'text/html') + + while (node.firstChild) { + children.push(/** @type {RootContent} */ (fromDom(node.firstChild))) + node.firstChild.remove() + } + + return { + type: 'root', + children + } +} diff --git a/lib/index.js b/lib/index.js new file mode 100644 index 0000000..728290f --- /dev/null +++ b/lib/index.js @@ -0,0 +1,18 @@ +/** + * @typedef {import('hast').Root} Root + * @typedef {Pick} Options + */ + +import {fromHtml as fromHtmlNode} from 'hast-util-from-html' + +/** + * @param {string} value + * Serialized HTML to parse. + * @param {Options} [options] + * Configuration (optional). + * @returns {import('hast').Root} + * Tree + */ +export function fromHtml(value, options) { + return fromHtmlNode(value, options) +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..7eec3b5 --- /dev/null +++ b/package.json @@ -0,0 +1,98 @@ +{ + "name": "hast-util-from-html-isomorphic", + "version": "0.0.0", + "description": "hast utility that turns HTML into a syntax tree", + "license": "MIT", + "keywords": [ + "unist", + "hast", + "hast-util", + "util", + "utility", + "html", + "parse", + "dom" + ], + "repository": "syntax-tree/hast-util-from-html-isomorphic", + "bugs": "https://github.com/syntax-tree/hast-util-from-html-isomorphic/issues", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + }, + "author": "Remco Haszing ", + "contributors": [ + "Remco Haszing " + ], + "sideEffects": false, + "type": "module", + "exports": { + ".": { + "worker": "./index.js", + "browser": "./lib/browser.js", + "default": "./index.js" + } + }, + "files": [ + "lib/", + "index.d.ts", + "index.js" + ], + "dependencies": { + "@types/hast": "^2.0.0", + "hast-util-from-html": "^1.0.0", + "hast-util-from-dom": "^4.0.0" + }, + "devDependencies": { + "@types/jsdom": "^21.0.0", + "@types/node": "^18.0.0", + "c8": "^7.0.0", + "jsdom": "^21.0.0", + "prettier": "^2.0.0", + "remark-cli": "^11.0.0", + "remark-preset-wooorm": "^9.0.0", + "type-coverage": "^2.0.0", + "typescript": "^4.0.0", + "unist-util-remove-position": "^4.0.0", + "xo": "^0.53.0" + }, + "scripts": { + "prepack": "npm run build && npm run format", + "build": "tsc --build --clean && tsc --build && type-coverage", + "format": "remark . -qfo && prettier . -w --loglevel warn && xo --fix", + "test-browser": "node --conditions development --conditions browser --test", + "test-node": "node --conditions development --test", + "test-worker": "node --conditions development --conditions worker --test", + "test-api": "npm run test-browser && npm run test-node && npm run test-worker", + "test-coverage": "c8 --check-coverage --100 --reporter lcov npm run test-api", + "test": "npm run build && npm run format && npm run test-coverage" + }, + "prettier": { + "tabWidth": 2, + "useTabs": false, + "singleQuote": true, + "bracketSpacing": false, + "semi": false, + "trailingComma": "none" + }, + "xo": { + "prettier": true, + "env": [ + "es2021", + "browser" + ] + }, + "remarkConfig": { + "plugins": [ + "preset-wooorm", + [ + "remark-lint-no-html", + false + ] + ] + }, + "typeCoverage": { + "atLeast": 100, + "detail": true, + "strict": true + } +} diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..1b838ca --- /dev/null +++ b/readme.md @@ -0,0 +1,303 @@ +# hast-util-from-html-isomorphic + +[![Build][build-badge]][build] +[![Coverage][coverage-badge]][coverage] +[![Downloads][downloads-badge]][downloads] +[![Size][size-badge]][size] +[![Sponsors][sponsors-badge]][collective] +[![Backers][backers-badge]][collective] +[![Chat][chat-badge]][chat] + +[hast][] utility that turns HTML into a syntax tree. + +## Contents + +* [What is this?](#what-is-this) +* [When should I use this?](#when-should-i-use-this) +* [Install](#install) +* [Use](#use) +* [API](#api) + * [`fromHtml(value[, options])`](#fromhtmlvalue-options) + * [`Options`](#options) + * [`OnError`](#onerror) + * [`ErrorCode`](#errorcode) + * [`ErrorSeverity`](#errorseverity) +* [Examples](#examples) + * [Example: fragment versus document](#example-fragment-versus-document) + * [Example: whitespace around and inside ``](#example-whitespace-around-and-inside-html) + * [Example: parse errors](#example-parse-errors) +* [Syntax](#syntax) +* [Types](#types-2) +* [Compatibility](#compatibility) +* [Security](#security) +* [Related](#related) +* [Contribute](#contribute) +* [License](#license) + +## What is this? + +This package is a utility that takes HTML input and turns it into a hast syntax +tree. + +When running in a browser, this uses [`hast-util-from-dom`][hast-util-from-dom], +otherwise this uses [`hast-util-from-html`][hast-util-from-html]. + +## When should I use this? + +If you want to handle syntax trees manually in isomorphic code and care about +bundle size, use this. + +If you want to use positional information, use +[`hast-util-from-html`][hast-util-from-html]. + +If you don’t care about positional information, and your code will only run in +the browser, use [`hast-util-from-dom`][hast-util-from-dom]. + +Finally you can use the utility [`hast-util-to-html`][hast-util-to-html] to do +the inverse of this utility. +It turns hast into HTML. + +## Install + +This package is [ESM only][esm]. +In Node.js (version 14.14+ and 16.0+), install with [npm][]: + +```sh +npm install hast-util-from-html-isomorphic +``` + +In Deno with [`esm.sh`][esmsh]: + +```js +import {fromHtml} from "https://esm.sh/hast-util-from-html-isomorphic@1" +``` + +In browsers with [`esm.sh`][esmsh]: + +```html + +``` + +## Use + +```js +import {fromHtml} from 'hast-util-from-html-isomorphic' + +const tree = fromHtml('

Hello, world!

', {fragment: true}) + +console.log(tree) +``` + +Yields (positional info and data omitted for brevity): + +```js +{ + type: 'root', + children: [ + { + type: 'element', + tagName: 'h1', + properties: {}, + children: [Array], + position: [Object] + } + ] +} +``` + +## API + +This package exports the identifier [`fromHtml`][fromhtml]. +There is no default export. + +### `fromHtml(value[, options])` + +Turn serialized HTML into a hast tree. + +###### Parameters + + + +* `value` ([`Compatible`][compatible]) + — serialized HTML to parse +* `options` ([`Options`][options], optional) + — configuration + +###### Returns + +Tree ([`Root`][root]). + +### `Options` + +Configuration (TypeScript type). + +##### Fields + +###### `options.fragment` + +Whether to parse as a fragment (`boolean`, default: `false`). +The default is to expect a whole document. +In document mode, unopened `html`, `head`, and `body` elements are opened. + +## Examples + +### Example: fragment versus document + +The following example shows the difference between parsing as a document and +parsing as a fragment: + +```js +import {fromHtml} from 'hast-util-from-html-isomorphic' + +const doc = 'Hi!

Hello!

' + +console.log(fromHtml(doc)) + +console.log(fromHtml(doc, {fragment: true})) +``` + +…yields (positional info and data omitted for brevity): + +```js +{ + type: 'root', + children: [ + {type: 'element', tagName: 'html', properties: {}, children: [Array]} + ] +} +``` + +```js +{ + type: 'root', + children: [ + {type: 'element', tagName: 'title', properties: {}, children: [Array]}, + {type: 'element', tagName: 'h1', properties: {}, children: [Array]} + ] +} +``` + +> 👉 **Note**: observe that when a whole document is expected (first example), +> missing elements are opened and closed. + +## Syntax + +HTML is parsed according to WHATWG HTML (the living standard), which is also +followed by browsers such as Chrome and Firefox. + +## Types + +This package is fully typed with [TypeScript][]. +It exports the additional type [`Options`][options]. + +## Compatibility + +Projects maintained by the unified collective are compatible with all maintained +versions of Node.js. +As of now, that is Node.js 14.14+ and 16.0+. +Our projects sometimes work with older versions, but this is not guaranteed. + +## Security + +Parsing HTML is safe but using user-provided content can open you up to a +[cross-site scripting (XSS)][xss] attack. +Use [`hast-util-santize`][hast-util-sanitize] to make the hast tree safe. + +## Related + +* [`hast-util-from-html`](https://github.com/syntax-tree/hast-util-from-html) + — parse html into hast +* [`hast-util-from-dom`](https://github.com/syntax-tree/hast-util-from-dom) + — parse a DOM tree into hast +* [`hast-util-to-html`](https://github.com/syntax-tree/hast-util-to-html) + — serialize hast +* [`hast-util-sanitize`](https://github.com/syntax-tree/hast-util-sanitize) + — sanitize hast +* [`xast-util-from-xml`][xast-util-from-xml] + — parse XML + +## Contribute + +See [`contributing.md`][contributing] in [`syntax-tree/.github`][health] for +ways to get started. +See [`support.md`][support] for ways to get help. + +This project has a [code of conduct][coc]. +By interacting with this repository, organization, or community you agree to +abide by its terms. + +## License + +[MIT][license] © [Remco Haszing][author] + + + +[build-badge]: https://github.com/syntax-tree/hast-util-from-html-isomorphic/workflows/main/badge.svg + +[build]: https://github.com/syntax-tree/hast-util-from-html-isomorphic/actions + +[coverage-badge]: https://img.shields.io/codecov/c/github/syntax-tree/hast-util-from-html-isomorphic.svg + +[coverage]: https://codecov.io/github/syntax-tree/hast-util-from-html-isomorphic + +[downloads-badge]: https://img.shields.io/npm/dm/hast-util-from-html-isomorphic.svg + +[downloads]: https://www.npmjs.com/package/hast-util-from-html-isomorphic + +[size-badge]: https://img.shields.io/bundlephobia/minzip/hast-util-from-html-isomorphic.svg + +[size]: https://bundlephobia.com/result?p=hast-util-from-html-isomorphic + +[sponsors-badge]: https://opencollective.com/unified/sponsors/badge.svg + +[backers-badge]: https://opencollective.com/unified/backers/badge.svg + +[collective]: https://opencollective.com/unified + +[chat-badge]: https://img.shields.io/badge/chat-discussions-success.svg + +[chat]: https://github.com/syntax-tree/unist/discussions + +[npm]: https://docs.npmjs.com/cli/install + +[esm]: https://gist.github.com/sindresorhus/a39789f98801d908bbc7ff3ecc99d99c + +[esmsh]: https://esm.sh + +[typescript]: https://www.typescriptlang.org + +[license]: license + +[author]: https://github.com/remcohaszing + +[health]: https://github.com/syntax-tree/.github + +[contributing]: https://github.com/syntax-tree/.github/blob/main/contributing.md + +[support]: https://github.com/syntax-tree/.github/blob/main/support.md + +[coc]: https://github.com/syntax-tree/.github/blob/main/code-of-conduct.md + +[xss]: https://en.wikipedia.org/wiki/Cross-site_scripting + +[hast]: https://github.com/syntax-tree/hast + +[root]: https://github.com/syntax-tree/hast#root + +[hast-util-sanitize]: https://github.com/syntax-tree/hast-util-sanitize + +[hast-util-from-dom]: https://github.com/syntax-tree/hast-util-from-dom + +[hast-util-from-html]: https://github.com/syntax-tree/hast-util-from-html + +[hast-util-to-html]: https://github.com/syntax-tree/hast-util-to-html + +[xast-util-from-xml]: https://github.com/syntax-tree/xast-util-from-xml + +[fromhtml]: #fromhtmlvalue-options + +[options]: #options + +[compatible]: https://github.com/vfile/vfile/blob/03efac7/lib/index.js#L16 diff --git a/test/index.js b/test/index.js new file mode 100644 index 0000000..c687cd6 --- /dev/null +++ b/test/index.js @@ -0,0 +1,95 @@ +import assert from 'node:assert/strict' +import {test} from 'node:test' + +import {JSDOM} from 'jsdom' +import {removePosition} from 'unist-util-remove-position' + +const jsdom = new JSDOM() +globalThis.document = jsdom.window.document +globalThis.DOMParser = jsdom.window.DOMParser + +// We use a dynamic import, so we can configure jsdom before we load the module. +const {fromHtml} = await import('hast-util-from-html-isomorphic') + +test('parse document', () => { + const html = '' + const tree = fromHtml(html) + removePosition(tree, {force: true}) + delete tree.data + + assert.deepEqual(tree, { + children: [ + { + children: [ + { + children: [], + properties: {}, + tagName: 'head', + type: 'element' + }, + { + children: [], + properties: {}, + tagName: 'body', + type: 'element' + } + ], + properties: {}, + tagName: 'html', + type: 'element' + } + ], + type: 'root' + }) +}) + +test('parse single element fragment', () => { + const html = '

' + const tree = fromHtml(html, {fragment: true}) + removePosition(tree, {force: true}) + delete tree.data + + assert.deepEqual(tree, { + children: [ + { + children: [ + { + children: [], + properties: {}, + tagName: 'p', + type: 'element' + } + ], + properties: {}, + tagName: 'div', + type: 'element' + } + ], + type: 'root' + }) +}) + +test('parse multi element fragment', () => { + const html = '

' + const tree = fromHtml(html, {fragment: true}) + removePosition(tree, {force: true}) + delete tree.data + + assert.deepEqual(tree, { + children: [ + { + children: [], + properties: {}, + tagName: 'p', + type: 'element' + }, + { + children: [], + properties: {}, + tagName: 'div', + type: 'element' + } + ], + type: 'root' + }) +}) diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..51d2f34 --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,17 @@ +{ + "include": ["**/**.js"], + "exclude": ["coverage/", "node_modules/"], + "compilerOptions": { + "checkJs": true, + "declaration": true, + "emitDeclarationOnly": true, + "exactOptionalPropertyTypes": true, + "forceConsistentCasingInFileNames": true, + "lib": ["dom", "es2020"], + "module": "node16", + "newLine": "lf", + "skipLibCheck": true, + "strict": true, + "target": "es2020" + } +} From b76e50833c4dfa99be984400d984c33b782238b9 Mon Sep 17 00:00:00 2001 From: Remco Haszing Date: Thu, 16 Mar 2023 13:46:30 +0100 Subject: [PATCH 02/11] Update readme --- readme.md | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/readme.md b/readme.md index 1b838ca..d66e945 100644 --- a/readme.md +++ b/readme.md @@ -19,15 +19,10 @@ * [API](#api) * [`fromHtml(value[, options])`](#fromhtmlvalue-options) * [`Options`](#options) - * [`OnError`](#onerror) - * [`ErrorCode`](#errorcode) - * [`ErrorSeverity`](#errorseverity) * [Examples](#examples) * [Example: fragment versus document](#example-fragment-versus-document) - * [Example: whitespace around and inside ``](#example-whitespace-around-and-inside-html) - * [Example: parse errors](#example-parse-errors) * [Syntax](#syntax) -* [Types](#types-2) +* [Types](#types) * [Compatibility](#compatibility) * [Security](#security) * [Related](#related) From 757a67aedc1bd8e213d73235136ee57216b18588 Mon Sep 17 00:00:00 2001 From: Remco Haszing Date: Thu, 16 Mar 2023 13:51:03 +0100 Subject: [PATCH 03/11] Avoid node --test flag See https://github.com/bcoe/c8/issues/432 --- package.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/package.json b/package.json index 7eec3b5..249b380 100644 --- a/package.json +++ b/package.json @@ -59,9 +59,9 @@ "prepack": "npm run build && npm run format", "build": "tsc --build --clean && tsc --build && type-coverage", "format": "remark . -qfo && prettier . -w --loglevel warn && xo --fix", - "test-browser": "node --conditions development --conditions browser --test", - "test-node": "node --conditions development --test", - "test-worker": "node --conditions development --conditions worker --test", + "test-browser": "node --conditions development --conditions browser test/index.js", + "test-node": "node --conditions development test/index.js", + "test-worker": "node --conditions development --conditions worker test/index.js", "test-api": "npm run test-browser && npm run test-node && npm run test-worker", "test-coverage": "c8 --check-coverage --100 --reporter lcov npm run test-api", "test": "npm run build && npm run format && npm run test-coverage" From 4ed4810518c9b3b1533886454ad189204a7bca5e Mon Sep 17 00:00:00 2001 From: Remco Haszing Date: Thu, 16 Mar 2023 14:38:53 +0100 Subject: [PATCH 04/11] Rename export to fromHtmlIsomorphic --- index.js | 2 +- lib/browser.js | 6 +++--- lib/index.js | 6 +++--- readme.md | 16 ++++++++-------- test/index.js | 8 ++++---- 5 files changed, 19 insertions(+), 19 deletions(-) diff --git a/index.js b/index.js index 2513390..2f05f17 100644 --- a/index.js +++ b/index.js @@ -2,4 +2,4 @@ * @typedef {import('./lib/index.js').Options} Options */ -export {fromHtml} from './lib/index.js' +export {fromHtmlIsomorphic} from './lib/index.js' diff --git a/lib/browser.js b/lib/browser.js index 5bd74c5..7e049ce 100644 --- a/lib/browser.js +++ b/lib/browser.js @@ -1,5 +1,5 @@ /** - * @typedef {typeof import('./index.js').fromHtml} FromHtml + * @typedef {typeof import('./index.js').fromHtmlIsomorphic} FromHtmlIsomorphic * @typedef {import('hast').RootContent} RootContent */ @@ -16,8 +16,8 @@ function parseFragment(value) { return template.content } -/** @type {FromHtml} */ -export function fromHtml(value, options) { +/** @type {FromHtmlIsomorphic} */ +export function fromHtmlIsomorphic(value, options) { /** @type {RootContent[]} */ const children = [] const node = options?.fragment diff --git a/lib/index.js b/lib/index.js index 728290f..4a5b314 100644 --- a/lib/index.js +++ b/lib/index.js @@ -3,7 +3,7 @@ * @typedef {Pick} Options */ -import {fromHtml as fromHtmlNode} from 'hast-util-from-html' +import {fromHtml} from 'hast-util-from-html' /** * @param {string} value @@ -13,6 +13,6 @@ import {fromHtml as fromHtmlNode} from 'hast-util-from-html' * @returns {import('hast').Root} * Tree */ -export function fromHtml(value, options) { - return fromHtmlNode(value, options) +export function fromHtmlIsomorphic(value, options) { + return fromHtml(value, options) } diff --git a/readme.md b/readme.md index d66e945..ad6fb9a 100644 --- a/readme.md +++ b/readme.md @@ -17,7 +17,7 @@ * [Install](#install) * [Use](#use) * [API](#api) - * [`fromHtml(value[, options])`](#fromhtmlvalue-options) + * [`fromHtmlIsomorphic(value[, options])`](#fromhtmlisomorphicvalue-options) * [`Options`](#options) * [Examples](#examples) * [Example: fragment versus document](#example-fragment-versus-document) @@ -64,23 +64,23 @@ npm install hast-util-from-html-isomorphic In Deno with [`esm.sh`][esmsh]: ```js -import {fromHtml} from "https://esm.sh/hast-util-from-html-isomorphic@1" +import {fromHtmlIsomorphic} from "https://esm.sh/hast-util-from-html-isomorphic@1" ``` In browsers with [`esm.sh`][esmsh]: ```html ``` ## Use ```js -import {fromHtml} from 'hast-util-from-html-isomorphic' +import {fromHtmlIsomorphic} from 'hast-util-from-html-isomorphic' -const tree = fromHtml('

Hello, world!

', {fragment: true}) +const tree = fromHtmlIsomorphic('

Hello, world!

', {fragment: true}) console.log(tree) ``` @@ -104,10 +104,10 @@ Yields (positional info and data omitted for brevity): ## API -This package exports the identifier [`fromHtml`][fromhtml]. +This package exports the identifier [`fromHtmlIsomorphic`][fromhtmlisomorphic]. There is no default export. -### `fromHtml(value[, options])` +### `fromHtmlIsomorphic(value[, options])` Turn serialized HTML into a hast tree. @@ -291,7 +291,7 @@ abide by its terms. [xast-util-from-xml]: https://github.com/syntax-tree/xast-util-from-xml -[fromhtml]: #fromhtmlvalue-options +[fromhtmlisomorphic]: #fromhtmlisomorphicvalue-options [options]: #options diff --git a/test/index.js b/test/index.js index c687cd6..d41b5eb 100644 --- a/test/index.js +++ b/test/index.js @@ -9,11 +9,11 @@ globalThis.document = jsdom.window.document globalThis.DOMParser = jsdom.window.DOMParser // We use a dynamic import, so we can configure jsdom before we load the module. -const {fromHtml} = await import('hast-util-from-html-isomorphic') +const {fromHtmlIsomorphic} = await import('hast-util-from-html-isomorphic') test('parse document', () => { const html = '' - const tree = fromHtml(html) + const tree = fromHtmlIsomorphic(html) removePosition(tree, {force: true}) delete tree.data @@ -45,7 +45,7 @@ test('parse document', () => { test('parse single element fragment', () => { const html = '

' - const tree = fromHtml(html, {fragment: true}) + const tree = fromHtmlIsomorphic(html, {fragment: true}) removePosition(tree, {force: true}) delete tree.data @@ -71,7 +71,7 @@ test('parse single element fragment', () => { test('parse multi element fragment', () => { const html = '

' - const tree = fromHtml(html, {fragment: true}) + const tree = fromHtmlIsomorphic(html, {fragment: true}) removePosition(tree, {force: true}) delete tree.data From e8f3c2e1652712dd7beb14dc7073ffa146dd7860 Mon Sep 17 00:00:00 2001 From: Remco Haszing Date: Thu, 16 Mar 2023 14:43:19 +0100 Subject: [PATCH 05/11] Update readme.md Co-authored-by: Titus Signed-off-by: Remco Haszing --- readme.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/readme.md b/readme.md index ad6fb9a..2ac0626 100644 --- a/readme.md +++ b/readme.md @@ -8,7 +8,8 @@ [![Backers][backers-badge]][collective] [![Chat][chat-badge]][chat] -[hast][] utility that turns HTML into a syntax tree. +[hast][] utility that turns HTML into a syntax tree, using browser APIs +when available, so it has a smaller bundle size there. ## Contents From aa3ed85da4a1c51cb7b1ce72051ea4eb94c66241 Mon Sep 17 00:00:00 2001 From: Remco Haszing Date: Thu, 16 Mar 2023 14:48:10 +0100 Subject: [PATCH 06/11] Apply suggestions from code review Co-authored-by: Titus Signed-off-by: Remco Haszing --- readme.md | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/readme.md b/readme.md index 2ac0626..3b449bd 100644 --- a/readme.md +++ b/readme.md @@ -35,28 +35,29 @@ when available, so it has a smaller bundle size there. This package is a utility that takes HTML input and turns it into a hast syntax tree. -When running in a browser, this uses [`hast-util-from-dom`][hast-util-from-dom], -otherwise this uses [`hast-util-from-html`][hast-util-from-html]. +In a browser, this uses [`hast-util-from-dom`][hast-util-from-dom], +otherwise it uses [`hast-util-from-html`][hast-util-from-html]. ## When should I use this? -If you want to handle syntax trees manually in isomorphic code and care about -bundle size, use this. +If you want to get a syntax tree without positional info, and your code should +be isomorphic (it could run anywhere), as it results in a smaller bundle size. -If you want to use positional information, use +If you need positional information, use [`hast-util-from-html`][hast-util-from-html]. -If you don’t care about positional information, and your code will only run in -the browser, use [`hast-util-from-dom`][hast-util-from-dom]. +If you don’t care about positional info and your code only runs in browsers, +use [`hast-util-from-dom`][hast-util-from-dom]. -Finally you can use the utility [`hast-util-to-html`][hast-util-to-html] to do -the inverse of this utility. -It turns hast into HTML. +Finally you can use the utility [`hast-util-to-html`][hast-util-to-html], +or [`hast-util-to-dom`][hast-util-to-dom] with `.outerHTML`, to do the inverse +of this utility. +That turns hast into HTML. ## Install This package is [ESM only][esm]. -In Node.js (version 14.14+ and 16.0+), install with [npm][]: +In Node.js (version 16.0+), install with [npm][]: ```sh npm install hast-util-from-html-isomorphic @@ -97,7 +98,6 @@ Yields (positional info and data omitted for brevity): tagName: 'h1', properties: {}, children: [Array], - position: [Object] } ] } @@ -114,8 +114,6 @@ Turn serialized HTML into a hast tree. ###### Parameters - - * `value` ([`Compatible`][compatible]) — serialized HTML to parse * `options` ([`Options`][options], optional) @@ -131,7 +129,7 @@ Configuration (TypeScript type). ##### Fields -###### `options.fragment` +###### `fragment` Whether to parse as a fragment (`boolean`, default: `false`). The default is to expect a whole document. @@ -192,7 +190,7 @@ It exports the additional type [`Options`][options]. Projects maintained by the unified collective are compatible with all maintained versions of Node.js. -As of now, that is Node.js 14.14+ and 16.0+. +As of now, that is Node.js 16.0+. Our projects sometimes work with older versions, but this is not guaranteed. ## Security From 0265512caa26026c117887d65b3eecdbb5059fb1 Mon Sep 17 00:00:00 2001 From: Remco Haszing Date: Thu, 16 Mar 2023 14:59:30 +0100 Subject: [PATCH 07/11] Simplify browser implementation --- lib/browser.js | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/lib/browser.js b/lib/browser.js index 7e049ce..b9e1a48 100644 --- a/lib/browser.js +++ b/lib/browser.js @@ -1,36 +1,26 @@ /** * @typedef {typeof import('./index.js').fromHtmlIsomorphic} FromHtmlIsomorphic - * @typedef {import('hast').RootContent} RootContent + * @typedef {import('hast').Root} Root */ import {fromDom} from 'hast-util-from-dom' -const template = document.createElement('template') const parser = new DOMParser() /** * @param {string} value */ function parseFragment(value) { + const template = document.createElement('template') template.innerHTML = value return template.content } /** @type {FromHtmlIsomorphic} */ export function fromHtmlIsomorphic(value, options) { - /** @type {RootContent[]} */ - const children = [] const node = options?.fragment ? parseFragment(value) : parser.parseFromString(value, 'text/html') - while (node.firstChild) { - children.push(/** @type {RootContent} */ (fromDom(node.firstChild))) - node.firstChild.remove() - } - - return { - type: 'root', - children - } + return /** @type {Root} */ (fromDom(node)) } From 92c48445fed4c95d17c8c4a37a024291f18dd8cd Mon Sep 17 00:00:00 2001 From: Remco Haszing Date: Thu, 16 Mar 2023 15:01:54 +0100 Subject: [PATCH 08/11] Fix lint issue --- readme.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/readme.md b/readme.md index 3b449bd..0a112ea 100644 --- a/readme.md +++ b/readme.md @@ -286,6 +286,8 @@ abide by its terms. [hast-util-from-html]: https://github.com/syntax-tree/hast-util-from-html +[hast-util-to-dom]: https://github.com/syntax-tree/hast-util-to-dom + [hast-util-to-html]: https://github.com/syntax-tree/hast-util-to-html [xast-util-from-xml]: https://github.com/syntax-tree/xast-util-from-xml From ce48dac25e45733eff4d21523d2196c247685a66 Mon Sep 17 00:00:00 2001 From: Remco Haszing Date: Thu, 16 Mar 2023 15:40:06 +0100 Subject: [PATCH 09/11] Remove positional info This provides a more consistent interface between Node.js and the browser. --- lib/index.js | 6 +++++- package.json | 4 ++-- test/index.js | 7 ------- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/lib/index.js b/lib/index.js index 4a5b314..e5b5e47 100644 --- a/lib/index.js +++ b/lib/index.js @@ -4,6 +4,7 @@ */ import {fromHtml} from 'hast-util-from-html' +import {removePosition} from 'unist-util-remove-position' /** * @param {string} value @@ -14,5 +15,8 @@ import {fromHtml} from 'hast-util-from-html' * Tree */ export function fromHtmlIsomorphic(value, options) { - return fromHtml(value, options) + const tree = fromHtml(value, options) + removePosition(tree, {force: true}) + delete tree.data + return tree } diff --git a/package.json b/package.json index 249b380..dd308a9 100644 --- a/package.json +++ b/package.json @@ -40,7 +40,8 @@ "dependencies": { "@types/hast": "^2.0.0", "hast-util-from-html": "^1.0.0", - "hast-util-from-dom": "^4.0.0" + "hast-util-from-dom": "^4.0.0", + "unist-util-remove-position": "^4.0.0" }, "devDependencies": { "@types/jsdom": "^21.0.0", @@ -52,7 +53,6 @@ "remark-preset-wooorm": "^9.0.0", "type-coverage": "^2.0.0", "typescript": "^4.0.0", - "unist-util-remove-position": "^4.0.0", "xo": "^0.53.0" }, "scripts": { diff --git a/test/index.js b/test/index.js index d41b5eb..835d279 100644 --- a/test/index.js +++ b/test/index.js @@ -2,7 +2,6 @@ import assert from 'node:assert/strict' import {test} from 'node:test' import {JSDOM} from 'jsdom' -import {removePosition} from 'unist-util-remove-position' const jsdom = new JSDOM() globalThis.document = jsdom.window.document @@ -14,8 +13,6 @@ const {fromHtmlIsomorphic} = await import('hast-util-from-html-isomorphic') test('parse document', () => { const html = '' const tree = fromHtmlIsomorphic(html) - removePosition(tree, {force: true}) - delete tree.data assert.deepEqual(tree, { children: [ @@ -46,8 +43,6 @@ test('parse document', () => { test('parse single element fragment', () => { const html = '

' const tree = fromHtmlIsomorphic(html, {fragment: true}) - removePosition(tree, {force: true}) - delete tree.data assert.deepEqual(tree, { children: [ @@ -72,8 +67,6 @@ test('parse single element fragment', () => { test('parse multi element fragment', () => { const html = '

' const tree = fromHtmlIsomorphic(html, {fragment: true}) - removePosition(tree, {force: true}) - delete tree.data assert.deepEqual(tree, { children: [ From 2fc4a77d7d1197b9dcc5b319a981f6b9b92588ad Mon Sep 17 00:00:00 2001 From: Remco Haszing Date: Thu, 16 Mar 2023 15:42:13 +0100 Subject: [PATCH 10/11] Add deno and react-native exports --- package.json | 2 ++ 1 file changed, 2 insertions(+) diff --git a/package.json b/package.json index dd308a9..78f26a2 100644 --- a/package.json +++ b/package.json @@ -27,6 +27,8 @@ "type": "module", "exports": { ".": { + "deno": "./index.js", + "react-native": "./index.js", "worker": "./index.js", "browser": "./lib/browser.js", "default": "./index.js" From e50890295af67b8c3d78ccd5915321761627fa3f Mon Sep 17 00:00:00 2001 From: Remco Haszing Date: Thu, 16 Mar 2023 15:55:00 +0100 Subject: [PATCH 11/11] Use node require preload to configure jsdom --- package.json | 2 +- test/dom.cjs | 5 +++++ test/index.js | 9 +-------- 3 files changed, 7 insertions(+), 9 deletions(-) create mode 100644 test/dom.cjs diff --git a/package.json b/package.json index 78f26a2..9e68b52 100644 --- a/package.json +++ b/package.json @@ -61,7 +61,7 @@ "prepack": "npm run build && npm run format", "build": "tsc --build --clean && tsc --build && type-coverage", "format": "remark . -qfo && prettier . -w --loglevel warn && xo --fix", - "test-browser": "node --conditions development --conditions browser test/index.js", + "test-browser": "node --conditions development --conditions browser -r ./test/dom.cjs test/index.js", "test-node": "node --conditions development test/index.js", "test-worker": "node --conditions development --conditions worker test/index.js", "test-api": "npm run test-browser && npm run test-node && npm run test-worker", diff --git a/test/dom.cjs b/test/dom.cjs new file mode 100644 index 0000000..090f1ca --- /dev/null +++ b/test/dom.cjs @@ -0,0 +1,5 @@ +const {JSDOM} = require('jsdom') + +const jsdom = new JSDOM() +globalThis.document = jsdom.window.document +globalThis.DOMParser = jsdom.window.DOMParser diff --git a/test/index.js b/test/index.js index 835d279..768b99b 100644 --- a/test/index.js +++ b/test/index.js @@ -1,14 +1,7 @@ import assert from 'node:assert/strict' import {test} from 'node:test' -import {JSDOM} from 'jsdom' - -const jsdom = new JSDOM() -globalThis.document = jsdom.window.document -globalThis.DOMParser = jsdom.window.DOMParser - -// We use a dynamic import, so we can configure jsdom before we load the module. -const {fromHtmlIsomorphic} = await import('hast-util-from-html-isomorphic') +import {fromHtmlIsomorphic} from 'hast-util-from-html-isomorphic' test('parse document', () => { const html = ''