perf: replace parse5 with ultrahtml (#336)

This commit is contained in:
Daniel Roe 2022-12-04 22:10:10 +00:00 committed by GitHub
parent 04615e443e
commit 1c0f8b0147
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 94 additions and 143 deletions

View file

@ -68,7 +68,7 @@ const deleteAndRedraft = async () => {
} }
const { text } = await useMasto().statuses.remove(status.id) const { text } = await useMasto().statuses.remove(status.id)
openPublishDialog('dialog', getDraftFromStatus(status, text), true) openPublishDialog('dialog', await getDraftFromStatus(status, text), true)
} }
const reply = () => { const reply = () => {
@ -81,9 +81,9 @@ const reply = () => {
} }
} }
function editStatus() { async function editStatus() {
openPublishDialog(`edit-${status.id}`, { openPublishDialog(`edit-${status.id}`, {
...getDraftFromStatus(status), ...await getDraftFromStatus(status),
editingStatus: status, editingStatus: status,
}) })
} }

View file

@ -1,48 +1,45 @@
import type { Emoji } from 'masto' import type { Emoji } from 'masto'
import type { DefaultTreeAdapterMap } from 'parse5' import type { Node } from 'ultrahtml'
import { parseFragment, serialize } from 'parse5' import { TEXT_NODE, parse, render, walkSync } from 'ultrahtml'
import type { VNode } from 'vue' import type { VNode } from 'vue'
import { Fragment, h, isVNode } from 'vue' import { Fragment, h, isVNode } from 'vue'
import { RouterLink } from 'vue-router' import { RouterLink } from 'vue-router'
import ContentCode from '~/components/content/ContentCode.vue' import ContentCode from '~/components/content/ContentCode.vue'
import AccountHoverWrapper from '~/components/account/AccountHoverWrapper.vue' import AccountHoverWrapper from '~/components/account/AccountHoverWrapper.vue'
type Node = DefaultTreeAdapterMap['childNode'] function handleMention(el: Node) {
type Element = DefaultTreeAdapterMap['element']
function handleMention(el: Element) {
// Redirect mentions to the user page // Redirect mentions to the user page
if (el.tagName === 'a' && el.attrs.find(i => i.name === 'class' && i.value.includes('mention'))) { if (el.name === 'a' && el.attributes.class?.includes('mention')) {
const href = el.attrs.find(i => i.name === 'href') const href = el.attributes.href
if (href) { if (href) {
const matchUser = href.value.match(UserLinkRE) const matchUser = href.match(UserLinkRE)
if (matchUser) { if (matchUser) {
const [, server, username] = matchUser const [, server, username] = matchUser
const handle = `@${username}@${server.replace(/(.+\.)(.+\..+)/, '$2')}` const handle = `@${username}@${server.replace(/(.+\.)(.+\..+)/, '$2')}`
href.value = `/${server}/@${username}` el.attributes.href = `/${server}/@${username}`
return h(AccountHoverWrapper, { handle, class: 'inline-block' }, () => nodeToVNode(el)) return h(AccountHoverWrapper, { handle, class: 'inline-block' }, () => nodeToVNode(el))
} }
const matchTag = href.value.match(TagLinkRE) const matchTag = href.match(TagLinkRE)
if (matchTag) { if (matchTag) {
const [, , name] = matchTag const [, , name] = matchTag
href.value = `/${currentServer.value}/tags/${name}` el.attributes.href = `/${currentServer.value}/tags/${name}`
} }
} }
} }
return undefined return undefined
} }
function handleCodeBlock(el: Element) { function handleCodeBlock(el: Node) {
if (el.tagName === 'pre' && el.childNodes[0]?.nodeName === 'code') { if (el.name === 'pre' && el.children[0]?.name === 'code') {
const codeEl = el.childNodes[0] as Element const codeEl = el.children[0] as Node
const classes = codeEl.attrs.find(i => i.name === 'class')?.value const classes = codeEl.attributes.class as string
const lang = classes?.split(/\s/g).find(i => i.startsWith('language-'))?.replace('language-', '') const lang = classes?.split(/\s/g).find(i => i.startsWith('language-'))?.replace('language-', '')
const code = codeEl.childNodes[0] ? treeToText(codeEl.childNodes[0]) : '' const code = codeEl.children[0] ? treeToText(codeEl.children[0]) : ''
return h(ContentCode, { lang, code: encodeURIComponent(code) }) return h(ContentCode, { lang, code: encodeURIComponent(code) })
} }
} }
function handleNode(el: Element) { function handleNode(el: Node) {
return handleCodeBlock(el) || handleMention(el) || el return handleCodeBlock(el) || handleMention(el) || el
} }
@ -51,7 +48,7 @@ function handleNode(el: Element) {
* with interop of custom emojis and inline Markdown syntax * with interop of custom emojis and inline Markdown syntax
*/ */
export function parseMastodonHTML(html: string, customEmojis: Record<string, Emoji> = {}) { export function parseMastodonHTML(html: string, customEmojis: Record<string, Emoji> = {}) {
const processed = html let processed = html
// custom emojis // custom emojis
.replace(/:([\w-]+?):/g, (_, name) => { .replace(/:([\w-]+?):/g, (_, name) => {
const emoji = customEmojis[name] const emoji = customEmojis[name]
@ -66,36 +63,36 @@ export function parseMastodonHTML(html: string, customEmojis: Record<string, Emo
return `><pre><code${classes}>${code}</code></pre>` return `><pre><code${classes}>${code}</code></pre>`
}) })
const tree = parseFragment(processed) walkSync(parse(processed), (node) => {
if (node.type !== TEXT_NODE)
function walk(node: Node) { return
if ('childNodes' in node) const replacements = [
node.childNodes = node.childNodes.flatMap(n => walk(n)) [/\*\*\*(.*?)\*\*\*/g, '<b><em>$1</em></b>'],
[/\*\*(.*?)\*\*/g, '<b>$1</b>'],
if (node.nodeName === '#text') { [/\*(.*?)\*/g, '<em>$1</em>'],
// @ts-expect-error casing [/~~(.*?)~~/g, '<del>$1</del>'],
const text = node.value as string [/`([^`]+?)`/g, '<code>$1</code>'],
const converted = text ] as const
.replace(/\*\*\*(.*?)\*\*\*/g, '<b><em>$1</em></b>') for (const [re, replacement] of replacements) {
.replace(/\*\*(.*?)\*\*/g, '<b>$1</b>') for (const match of node.value.matchAll(re)) {
.replace(/\*(.*?)\*/g, '<em>$1</em>') if (node.loc) {
.replace(/~~(.*?)~~/g, '<del>$1</del>') const start = match.index! + node.loc[0].start
.replace(/`([^`]+?)`/g, '<code>$1</code>') const end = start + match[0].length + node.loc[0].start
processed = processed.slice(0, start) + match[0].replace(re, replacement) + processed.slice(end)
if (converted !== text)
return parseFragment(converted).childNodes
} }
return [node] else {
processed = processed.replace(match[0], match[0].replace(re, replacement))
}
}
}
})
return parse(processed)
} }
tree.childNodes = tree.childNodes.flatMap(n => walk(n)) export async function convertMastodonHTML(html: string, customEmojis: Record<string, Emoji> = {}) {
return tree
}
export function convertMastodonHTML(html: string, customEmojis: Record<string, Emoji> = {}) {
const tree = parseMastodonHTML(html, customEmojis) const tree = parseMastodonHTML(html, customEmojis)
return serialize(tree) return await render(tree)
} }
/** /**
@ -106,31 +103,28 @@ export function contentToVNode(
customEmojis: Record<string, Emoji> = {}, customEmojis: Record<string, Emoji> = {},
): VNode { ): VNode {
const tree = parseMastodonHTML(content, customEmojis) const tree = parseMastodonHTML(content, customEmojis)
return h(Fragment, tree.childNodes.map(n => treeToVNode(n))) return h(Fragment, (tree.children as Node[]).map(n => treeToVNode(n)))
} }
function nodeToVNode(node: Node): VNode | string | null { function nodeToVNode(node: Node): VNode | string | null {
if (node.nodeName === '#text') { if (node.type === TEXT_NODE)
// @ts-expect-error casing return node.value
return input.value as string
}
if ('childNodes' in node) { if ('children' in node) {
const attrs = Object.fromEntries(node.attrs.map(i => [i.name, i.value])) if (node.name === 'a' && (node.attributes.href?.startsWith('/') || node.attributes.href?.startsWith('.'))) {
if (node.nodeName === 'a' && (attrs.href?.startsWith('/') || attrs.href?.startsWith('.'))) { node.attributes.to = node.attributes.href
attrs.to = attrs.href delete node.attributes.href
delete attrs.href delete node.attributes.target
delete attrs.target
return h( return h(
RouterLink as any, RouterLink as any,
attrs, node.attributes,
() => node.childNodes.map(treeToVNode), () => node.children.map(treeToVNode),
) )
} }
return h( return h(
node.nodeName, node.name,
attrs, node.attributes,
node.childNodes.map(treeToVNode), node.children.map(treeToVNode),
) )
} }
return null return null
@ -139,12 +133,10 @@ function nodeToVNode(node: Node): VNode | string | null {
function treeToVNode( function treeToVNode(
input: Node, input: Node,
): VNode | string | null { ): VNode | string | null {
if (input.nodeName === '#text') { if (input.type === TEXT_NODE)
// @ts-expect-error casing
return input.value as string return input.value as string
}
if ('childNodes' in input) { if ('children' in input) {
const node = handleNode(input) const node = handleNode(input)
if (node == null) if (node == null)
return null return null
@ -156,8 +148,8 @@ function treeToVNode(
} }
export function htmlToText(html: string) { export function htmlToText(html: string) {
const tree = parseFragment(html) const tree = parse(html)
return tree.childNodes.map(n => treeToText(n)).join('').trim() return (tree.children as Node[]).map(n => treeToText(n)).join('').trim()
} }
export function treeToText(input: Node): string { export function treeToText(input: Node): string {
@ -165,20 +157,18 @@ export function treeToText(input: Node): string {
let body = '' let body = ''
let post = '' let post = ''
if (input.nodeName === '#text') if (input.type === TEXT_NODE)
// @ts-expect-error casing
return input.value return input.value
if (input.nodeName === 'br') if (input.name === 'br')
return '\n' return '\n'
if (['p', 'pre'].includes(input.nodeName)) if (['p', 'pre'].includes(input.name))
pre = '\n' pre = '\n'
if (input.nodeName === 'code') { if (input.name === 'code') {
if (input.parentNode?.nodeName === 'pre') { if (input.parent?.name === 'pre') {
const clz = input.attrs.find(attr => attr.name === 'class') const lang = input.attributes.class?.replace('language-', '')
const lang = clz?.value.replace('language-', '')
pre = `\`\`\`${lang || ''}\n` pre = `\`\`\`${lang || ''}\n`
post = '\n```' post = '\n```'
@ -188,24 +178,24 @@ export function treeToText(input: Node): string {
post = '`' post = '`'
} }
} }
else if (input.nodeName === 'b' || input.nodeName === 'strong') { else if (input.name === 'b' || input.name === 'strong') {
pre = '**' pre = '**'
post = '**' post = '**'
} }
else if (input.nodeName === 'i' || input.nodeName === 'em') { else if (input.name === 'i' || input.name === 'em') {
pre = '*' pre = '*'
post = '*' post = '*'
} }
else if (input.nodeName === 'del') { else if (input.name === 'del') {
pre = '~~' pre = '~~'
post = '~~' post = '~~'
} }
if ('childNodes' in input) if ('children' in input)
body = input.childNodes.map(n => treeToText(n)).join('') body = (input.children as Node[]).map(n => treeToText(n)).join('')
if (input.nodeName === 'img' && input.attrs.some(attr => attr.name === 'class' && attr.value.includes('custom-emoji'))) if (input.name === 'img' && input.attributes.class?.includes('custom-emoji'))
return `:${input.attrs.find(attr => attr.name === 'data-emoji-id')?.value}:` return `:${input.attributes['data-emoji-id']}:`
return pre + body + post return pre + body + post
} }

View file

@ -40,9 +40,9 @@ export function getDefaultDraft(options: Partial<Draft['params'] & Omit<Draft, '
} }
} }
export function getDraftFromStatus(status: Status, text?: null | string): Draft { export async function getDraftFromStatus(status: Status, text?: null | string): Promise<Draft> {
return getDefaultDraft({ return getDefaultDraft({
status: text || convertMastodonHTML(status.content), status: text || await convertMastodonHTML(status.content),
mediaIds: status.mediaAttachments.map(att => att.id), mediaIds: status.mediaAttachments.map(att => att.id),
visibility: status.visibility, visibility: status.visibility,
attachments: status.mediaAttachments, attachments: status.mediaAttachments,

View file

@ -61,7 +61,6 @@
"lru-cache": "^7.14.1", "lru-cache": "^7.14.1",
"masto": "^4.7.5", "masto": "^4.7.5",
"nuxt": "^3.0.0", "nuxt": "^3.0.0",
"parse5": "^7.1.2",
"pinia": "^2.0.27", "pinia": "^2.0.27",
"postcss-nested": "^6.0.0", "postcss-nested": "^6.0.0",
"prettier": "^2.8.0", "prettier": "^2.8.0",
@ -74,6 +73,7 @@
"tippy.js": "^6.3.7", "tippy.js": "^6.3.7",
"typescript": "^4.9.3", "typescript": "^4.9.3",
"ufo": "^1.0.1", "ufo": "^1.0.1",
"ultrahtml": "^1.0.4",
"unplugin-auto-import": "^0.12.0", "unplugin-auto-import": "^0.12.0",
"vite-plugin-inspect": "^0.7.9", "vite-plugin-inspect": "^0.7.9",
"vitest": "^0.25.3", "vitest": "^0.25.3",

View file

@ -44,7 +44,6 @@ specifiers:
lru-cache: ^7.14.1 lru-cache: ^7.14.1
masto: ^4.7.5 masto: ^4.7.5
nuxt: ^3.0.0 nuxt: ^3.0.0
parse5: ^7.1.2
pinia: ^2.0.27 pinia: ^2.0.27
postcss-nested: ^6.0.0 postcss-nested: ^6.0.0
prettier: ^2.8.0 prettier: ^2.8.0
@ -57,6 +56,7 @@ specifiers:
tippy.js: ^6.3.7 tippy.js: ^6.3.7
typescript: ^4.9.3 typescript: ^4.9.3
ufo: ^1.0.1 ufo: ^1.0.1
ultrahtml: ^1.0.4
unplugin-auto-import: ^0.12.0 unplugin-auto-import: ^0.12.0
vite-plugin-inspect: ^0.7.9 vite-plugin-inspect: ^0.7.9
vitest: ^0.25.3 vitest: ^0.25.3
@ -108,7 +108,6 @@ devDependencies:
lru-cache: 7.14.1 lru-cache: 7.14.1
masto: 4.7.5 masto: 4.7.5
nuxt: 3.0.0_s5ps7njkmjlaqajutnox5ntcla nuxt: 3.0.0_s5ps7njkmjlaqajutnox5ntcla
parse5: 7.1.2
pinia: 2.0.27_typescript@4.9.3 pinia: 2.0.27_typescript@4.9.3
postcss-nested: 6.0.0 postcss-nested: 6.0.0
prettier: 2.8.0 prettier: 2.8.0
@ -121,6 +120,7 @@ devDependencies:
tippy.js: 6.3.7 tippy.js: 6.3.7
typescript: 4.9.3 typescript: 4.9.3
ufo: 1.0.1 ufo: 1.0.1
ultrahtml: 1.0.4
unplugin-auto-import: 0.12.0 unplugin-auto-import: 0.12.0
vite-plugin-inspect: 0.7.9 vite-plugin-inspect: 0.7.9
vitest: 0.25.3 vitest: 0.25.3
@ -6367,12 +6367,6 @@ packages:
parse-path: 7.0.0 parse-path: 7.0.0
dev: true dev: true
/parse5/7.1.2:
resolution: {integrity: sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==}
dependencies:
entities: 4.4.0
dev: true
/parseurl/1.3.3: /parseurl/1.3.3:
resolution: {integrity: sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==} resolution: {integrity: sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==}
engines: {node: '>= 0.8'} engines: {node: '>= 0.8'}

View file

@ -1,45 +1,25 @@
// Vitest Snapshot v1 // Vitest Snapshot v1
exports[`content-rich > code frame 1`] = ` exports[`content-rich > code frame 1`] = `
"<p>Testing code block</p> "<p>Testing code block</p><p><pre lang=\\"ts\\">import { useMouse, usePreferredDark } from &#39;@vueuse/core&#39;
<p></p>
<pre lang=\\"ts\\">
import { useMouse, usePreferredDark } from &#39;@vueuse/core&#39;
// tracks mouse position // tracks mouse position
const { x, y } = useMouse() const { x, y } = useMouse()
// is the user prefers dark theme // is the user prefers dark theme
const isDark = usePreferredDark()</pre const isDark = usePreferredDark()</pre></p>"
>
<p></p>
"
`; `;
exports[`content-rich > code frame 2 1`] = ` exports[`content-rich > code frame 2 1`] = `
"<p> "<p>
<span class=\\"h-card\\"><a class=\\"u-url mention\\" to=\\"/@antfu@mas.to\\"></a></span> <span class=\\"h-card\\"><a class=\\"u-url mention\\" to=\\"/mas.to/@antfu\\"></a></span>
Testing<br /> Testing<br />
</p>
<pre lang=\\"ts\\">const a = hello</pre> <pre lang=\\"ts\\">const a = hello</pre>
<p></p> </p>
" "
`; `;
exports[`content-rich > code frame empty 1`] = ` exports[`content-rich > code frame empty 1`] = `"<p><pre></pre><br></p>"`;
"<p></p>
<pre></pre>
<br />
<p></p>
"
`;
exports[`content-rich > code frame no lang 1`] = ` exports[`content-rich > code frame no lang 1`] = `"<p><pre>hello world</pre><br>no lang</p>"`;
"<p></p>
<pre>hello world</pre>
<br />no lang
<p></p>
"
`;
exports[`content-rich > custom emoji 1`] = ` exports[`content-rich > custom emoji 1`] = `
"Daniel Roe "Daniel Roe
@ -61,7 +41,7 @@ exports[`content-rich > link + mention 1`] = `
><a ><a
class=\\"u-url mention\\" class=\\"u-url mention\\"
rel=\\"nofollow noopener noreferrer\\" rel=\\"nofollow noopener noreferrer\\"
to=\\"/@vitest@mas.to\\" to=\\"/mas.to/@vitest\\"
></a ></a
></span> ></span>
(migrated from chai+mocha) (migrated from chai+mocha)

View file

@ -1,24 +1,18 @@
// Vitest Snapshot v1 // Vitest Snapshot v1
exports[`html-parse > code frame > html 1`] = ` exports[`html-parse > code frame > html 1`] = `
"<p>Testing code block</p> "<p>Testing code block</p><p><pre><code class=\\"language-ts\\">import { useMouse, usePreferredDark } from &#39;@vueuse/core&#39;
<p></p>
<pre><code class=\\"language-ts\\">import { useMouse, usePreferredDark } from '@vueuse/core'
// tracks mouse position // tracks mouse position
const { x, y } = useMouse() const { x, y } = useMouse()
// is the user prefers dark theme // is the user prefers dark theme
const isDark = usePreferredDark()</code></pre> const isDark = usePreferredDark()</code></pre></p>"
<p></p>
"
`; `;
exports[`html-parse > code frame > text 1`] = ` exports[`html-parse > code frame > text 1`] = `
"Testing code block "Testing code block
\`\`\`ts \`\`\`ts
import { useMouse, usePreferredDark } from '@vueuse/core' import { useMouse, usePreferredDark } from &#39;@vueuse/core&#39;
// tracks mouse position // tracks mouse position
const { x, y } = useMouse() const { x, y } = useMouse()
// is the user prefers dark theme // is the user prefers dark theme
@ -34,9 +28,8 @@ exports[`html-parse > code frame 2 > html 1`] = `
></span ></span
> >
Testing<br /> Testing<br />
</p>
<pre><code class=\\"language-ts\\">const a = hello</code></pre> <pre><code class=\\"language-ts\\">const a = hello</code></pre>
<p></p> </p>
" "
`; `;
@ -65,13 +58,7 @@ exports[`html-parse > empty > html 1`] = `""`;
exports[`html-parse > empty > text 1`] = `""`; exports[`html-parse > empty > text 1`] = `""`;
exports[`html-parse > inline markdown > html 1`] = ` exports[`html-parse > inline markdown > html 1`] = `"<p>text <code>code</code> <b>bold</b> <em>italic</em> <del>del</del></p><p><pre><code class=\\"language-js\\">code block</code></pre></p>"`;
"<p>text <code>code</code> <b>bold</b> <em>italic</em> <del>del</del></p>
<p></p>
<pre><code class=\\"language-js\\">code block</code></pre>
<p></p>
"
`;
exports[`html-parse > inline markdown > text 1`] = ` exports[`html-parse > inline markdown > text 1`] = `
"text \`code\` **bold** *italic* ~~del~~ "text \`code\` **bold** *italic* ~~del~~

View file

@ -1,7 +1,7 @@
import type { Emoji } from 'masto' import type { Emoji } from 'masto'
import { describe, expect, it } from 'vitest' import { describe, expect, it } from 'vitest'
import { format } from 'prettier' import { format } from 'prettier'
import { serialize } from 'parse5' import { render as renderTree } from 'ultrahtml'
import { parseMastodonHTML, treeToText } from '~/composables/content' import { parseMastodonHTML, treeToText } from '~/composables/content'
describe('html-parse', () => { describe('html-parse', () => {
@ -53,9 +53,9 @@ describe('html-parse', () => {
async function render(input: string, emojis?: Record<string, Emoji>) { async function render(input: string, emojis?: Record<string, Emoji>) {
const tree = parseMastodonHTML(input, emojis) const tree = parseMastodonHTML(input, emojis)
const html = serialize(tree) const html = await renderTree(tree)
let formatted = '' let formatted = ''
const serializedText = tree.childNodes.map(n => treeToText(n)).join('').trim() const serializedText = tree.children.map(n => treeToText(n)).join('').trim()
try { try {
formatted = format(html, { formatted = format(html, {