refactor: html parsing
This commit is contained in:
parent
d76e4bfaa5
commit
6944a74653
|
@ -4,27 +4,23 @@ defineOptions({
|
||||||
name: 'ContentRich',
|
name: 'ContentRich',
|
||||||
})
|
})
|
||||||
|
|
||||||
const { content, emojis, markdown = true } = defineProps<{
|
const {
|
||||||
|
content,
|
||||||
|
emojis,
|
||||||
|
markdown = true,
|
||||||
|
} = defineProps<{
|
||||||
content: string
|
content: string
|
||||||
markdown?: boolean
|
|
||||||
emojis?: Emoji[]
|
emojis?: Emoji[]
|
||||||
|
markdown?: boolean
|
||||||
}>()
|
}>()
|
||||||
|
|
||||||
const useEmojis = computed(() => {
|
const emojisObject = useEmojisFallback(() => emojis)
|
||||||
const result: Emoji[] = []
|
|
||||||
if (emojis)
|
|
||||||
result.push(...emojis)
|
|
||||||
|
|
||||||
result.push(...currentCustomEmojis.value.emojis)
|
|
||||||
|
|
||||||
return emojisArrayToObject(result)
|
|
||||||
})
|
|
||||||
|
|
||||||
export default () => h(
|
export default () => h(
|
||||||
'span',
|
'span',
|
||||||
{ class: 'content-rich', dir: 'auto' },
|
{ class: 'content-rich', dir: 'auto' },
|
||||||
contentToVNode(content, {
|
contentToVNode(content, {
|
||||||
emojis: useEmojis.value,
|
emojis: emojisObject.value,
|
||||||
markdown,
|
markdown,
|
||||||
}),
|
}),
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,23 +1,37 @@
|
||||||
<script setup lang="ts">
|
<script setup lang="ts">
|
||||||
import type { Status, StatusEdit } from 'masto'
|
import type { Status, StatusEdit } from 'masto'
|
||||||
|
|
||||||
const { status, withAction = true } = defineProps<{
|
const {
|
||||||
|
status,
|
||||||
|
withAction = true,
|
||||||
|
} = defineProps<{
|
||||||
status: Status | StatusEdit
|
status: Status | StatusEdit
|
||||||
withAction?: boolean
|
withAction?: boolean
|
||||||
}>()
|
}>()
|
||||||
|
|
||||||
const { translation } = useTranslation(status)
|
const { translation } = useTranslation(status)
|
||||||
|
|
||||||
|
const emojisObject = useEmojisFallback(() => status.emojis)
|
||||||
|
const vnode = $computed(() => {
|
||||||
|
if (!status.content)
|
||||||
|
return null
|
||||||
|
const vnode = contentToVNode(status.content, {
|
||||||
|
emojis: emojisObject.value,
|
||||||
|
markdown: true,
|
||||||
|
})
|
||||||
|
return vnode
|
||||||
|
})
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<template>
|
<template>
|
||||||
<div class="status-body" whitespace-pre-wrap break-words :class="{ 'with-action': withAction }">
|
<div class="status-body" whitespace-pre-wrap break-words :class="{ 'with-action': withAction }">
|
||||||
<ContentRich
|
<span
|
||||||
v-if="status.content"
|
v-if="status.content"
|
||||||
class="line-compact"
|
class="content-rich line-compact" dir="auto"
|
||||||
:content="status.content"
|
:lang="('language' in status && status.language) || undefined"
|
||||||
:emojis="status.emojis"
|
>
|
||||||
:lang="'language' in status && status.language"
|
<component :is="vnode" />
|
||||||
/>
|
</span>
|
||||||
<div v-else />
|
<div v-else />
|
||||||
<template v-if="translation.visible">
|
<template v-if="translation.visible">
|
||||||
<div my2 h-px border="b base" bg-base />
|
<div my2 h-px border="b base" bg-base />
|
||||||
|
|
|
@ -5,6 +5,34 @@ import { ELEMENT_NODE, TEXT_NODE, h, parse, render } from 'ultrahtml'
|
||||||
import { findAndReplaceEmojisInText } from '@iconify/utils'
|
import { findAndReplaceEmojisInText } from '@iconify/utils'
|
||||||
import { emojiRegEx, getEmojiAttributes } from '../config/emojis'
|
import { emojiRegEx, getEmojiAttributes } from '../config/emojis'
|
||||||
|
|
||||||
|
export interface ContentParseOptions {
|
||||||
|
emojis?: Record<string, Emoji>
|
||||||
|
markdown?: boolean
|
||||||
|
replaceUnicodeEmoji?: boolean
|
||||||
|
astTransforms?: Transform[]
|
||||||
|
}
|
||||||
|
|
||||||
|
const sanitizerBasicClasses = filterClasses(/^(h-\S*|p-\S*|u-\S*|dt-\S*|e-\S*|mention|hashtag|ellipsis|invisible)$/u)
|
||||||
|
const sanitizer = sanitize({
|
||||||
|
// Allow basic elements as seen in https://github.com/mastodon/mastodon/blob/17f79082b098e05b68d6f0d38fabb3ac121879a9/lib/sanitize_ext/sanitize_config.rb
|
||||||
|
br: {},
|
||||||
|
p: {},
|
||||||
|
a: {
|
||||||
|
href: filterHref(),
|
||||||
|
class: sanitizerBasicClasses,
|
||||||
|
rel: set('nofollow noopener noreferrer'),
|
||||||
|
target: set('_blank'),
|
||||||
|
},
|
||||||
|
span: {
|
||||||
|
class: sanitizerBasicClasses,
|
||||||
|
},
|
||||||
|
// Allow elements potentially created for Markdown code blocks above
|
||||||
|
pre: {},
|
||||||
|
code: {
|
||||||
|
class: filterClasses(/^language-\w+$/),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
const decoder = process.client ? document.createElement('textarea') : null
|
const decoder = process.client ? document.createElement('textarea') : null
|
||||||
export function decodeHtml(text: string) {
|
export function decodeHtml(text: string) {
|
||||||
if (!decoder)
|
if (!decoder)
|
||||||
|
@ -18,11 +46,19 @@ export function decodeHtml(text: string) {
|
||||||
* Parse raw HTML form Mastodon server to AST,
|
* Parse raw HTML form Mastodon server to AST,
|
||||||
* with interop of custom emojis and inline Markdown syntax
|
* with interop of custom emojis and inline Markdown syntax
|
||||||
*/
|
*/
|
||||||
export function parseMastodonHTML(html: string, customEmojis: Record<string, Emoji> = {}, markdown = true, forTiptap = false) {
|
export function parseMastodonHTML(
|
||||||
|
html: string,
|
||||||
|
options: ContentParseOptions = {},
|
||||||
|
) {
|
||||||
|
const {
|
||||||
|
markdown = true,
|
||||||
|
replaceUnicodeEmoji = true,
|
||||||
|
} = options
|
||||||
|
|
||||||
if (markdown) {
|
if (markdown) {
|
||||||
// Handle code blocks
|
// Handle code blocks
|
||||||
html = html
|
html = html
|
||||||
.replace(/>(```|~~~)(\w*)([\s\S]+?)\1/g, (_1, _2, lang, raw) => {
|
.replace(/>(```|~~~)(\w*)([\s\S]+?)\1/g, (_1, _2, lang: string, raw: string) => {
|
||||||
const code = htmlToText(raw)
|
const code = htmlToText(raw)
|
||||||
const classes = lang ? ` class="language-${lang}"` : ''
|
const classes = lang ? ` class="language-${lang}"` : ''
|
||||||
return `><pre><code${classes}>${code}</code></pre>`
|
return `><pre><code${classes}>${code}</code></pre>`
|
||||||
|
@ -30,39 +66,31 @@ export function parseMastodonHTML(html: string, customEmojis: Record<string, Emo
|
||||||
}
|
}
|
||||||
|
|
||||||
// Always sanitize the raw HTML data *after* it has been modified
|
// Always sanitize the raw HTML data *after* it has been modified
|
||||||
const basicClasses = filterClasses(/^(h-\S*|p-\S*|u-\S*|dt-\S*|e-\S*|mention|hashtag|ellipsis|invisible)$/u)
|
const transforms: Transform[] = [
|
||||||
return transformSync(parse(html), [
|
sanitizer,
|
||||||
sanitize({
|
...options.astTransforms || [],
|
||||||
// Allow basic elements as seen in https://github.com/mastodon/mastodon/blob/17f79082b098e05b68d6f0d38fabb3ac121879a9/lib/sanitize_ext/sanitize_config.rb
|
]
|
||||||
br: {},
|
|
||||||
p: {},
|
if (replaceUnicodeEmoji)
|
||||||
a: {
|
transforms.push(transformUnicodeEmoji)
|
||||||
href: filterHref(),
|
|
||||||
class: basicClasses,
|
if (markdown)
|
||||||
rel: set('nofollow noopener noreferrer'),
|
transforms.push(transformMarkdown)
|
||||||
target: set('_blank'),
|
|
||||||
},
|
transforms.push(replaceCustomEmoji(options.emojis || {}))
|
||||||
span: {
|
|
||||||
class: basicClasses,
|
return transformSync(parse(html), transforms)
|
||||||
},
|
|
||||||
// Allow elements potentially created for Markdown code blocks above
|
|
||||||
pre: {},
|
|
||||||
code: {
|
|
||||||
class: filterClasses(/^language-\w+$/),
|
|
||||||
},
|
|
||||||
}),
|
|
||||||
// Unicode emojis to images, but only if not converting HTML for Tiptap
|
|
||||||
!forTiptap ? replaceUnicodeEmoji() : noopTransform(),
|
|
||||||
markdown ? formatMarkdown() : noopTransform(),
|
|
||||||
replaceCustomEmoji(customEmojis),
|
|
||||||
])
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Converts raw HTML form Mastodon server to HTML for Tiptap editor
|
* Converts raw HTML form Mastodon server to HTML for Tiptap editor
|
||||||
*/
|
*/
|
||||||
export function convertMastodonHTML(html: string, customEmojis: Record<string, Emoji> = {}) {
|
export function convertMastodonHTML(html: string, customEmojis: Record<string, Emoji> = {}) {
|
||||||
const tree = parseMastodonHTML(html, customEmojis, true, true)
|
const tree = parseMastodonHTML(html, {
|
||||||
|
emojis: customEmojis,
|
||||||
|
markdown: true,
|
||||||
|
replaceUnicodeEmoji: false,
|
||||||
|
})
|
||||||
return render(tree)
|
return render(tree)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -162,11 +190,6 @@ function transformSync(doc: Node, transforms: Transform[]) {
|
||||||
return doc
|
return doc
|
||||||
}
|
}
|
||||||
|
|
||||||
// A transformation that does nothing. Useful for conditional transform chains.
|
|
||||||
function noopTransform(): Transform {
|
|
||||||
return node => node
|
|
||||||
}
|
|
||||||
|
|
||||||
// A tree transform for sanitizing elements & their attributes.
|
// A tree transform for sanitizing elements & their attributes.
|
||||||
type AttrSanitizers = Record<string, (value: string | undefined) => string | undefined>
|
type AttrSanitizers = Record<string, (value: string | undefined) => string | undefined>
|
||||||
function sanitize(allowedElements: Record<string, AttrSanitizers>): Transform {
|
function sanitize(allowedElements: Record<string, AttrSanitizers>): Transform {
|
||||||
|
@ -241,27 +264,25 @@ function filterHref() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function replaceUnicodeEmoji(): Transform {
|
function transformUnicodeEmoji(node: Node) {
|
||||||
return (node) => {
|
if (node.type !== TEXT_NODE)
|
||||||
if (node.type !== TEXT_NODE)
|
return node
|
||||||
return node
|
|
||||||
|
|
||||||
let start = 0
|
let start = 0
|
||||||
|
|
||||||
const matches = [] as (string | Node)[]
|
const matches = [] as (string | Node)[]
|
||||||
findAndReplaceEmojisInText(emojiRegEx, node.value, (match, result) => {
|
findAndReplaceEmojisInText(emojiRegEx, node.value, (match, result) => {
|
||||||
const attrs = getEmojiAttributes(match)
|
const attrs = getEmojiAttributes(match)
|
||||||
matches.push(result.slice(start))
|
matches.push(result.slice(start))
|
||||||
matches.push(h('img', { src: attrs.src, alt: attrs.alt, class: attrs.class }))
|
matches.push(h('img', { src: attrs.src, alt: attrs.alt, class: attrs.class }))
|
||||||
start = result.length + match.match.length
|
start = result.length + match.match.length
|
||||||
return undefined
|
return undefined
|
||||||
})
|
})
|
||||||
if (matches.length === 0)
|
if (matches.length === 0)
|
||||||
return node
|
return node
|
||||||
|
|
||||||
matches.push(node.value.slice(start))
|
matches.push(node.value.slice(start))
|
||||||
return matches.filter(Boolean)
|
return matches.filter(Boolean)
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function replaceCustomEmoji(customEmojis: Record<string, Emoji>): Transform {
|
function replaceCustomEmoji(customEmojis: Record<string, Emoji>): Transform {
|
||||||
|
@ -286,47 +307,45 @@ function replaceCustomEmoji(customEmojis: Record<string, Emoji>): Transform {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function formatMarkdown(): Transform {
|
const _markdownReplacements: [RegExp, (c: (string | Node)[]) => Node][] = [
|
||||||
const replacements: [RegExp, (c: (string | Node)[]) => Node][] = [
|
[/\*\*\*(.*?)\*\*\*/g, c => h('b', null, [h('em', null, c)])],
|
||||||
[/\*\*\*(.*?)\*\*\*/g, c => h('b', null, [h('em', null, c)])],
|
[/\*\*(.*?)\*\*/g, c => h('b', null, c)],
|
||||||
[/\*\*(.*?)\*\*/g, c => h('b', null, c)],
|
[/\*(.*?)\*/g, c => h('em', null, c)],
|
||||||
[/\*(.*?)\*/g, c => h('em', null, c)],
|
[/~~(.*?)~~/g, c => h('del', null, c)],
|
||||||
[/~~(.*?)~~/g, c => h('del', null, c)],
|
[/`([^`]+?)`/g, c => h('code', null, c)],
|
||||||
[/`([^`]+?)`/g, c => h('code', null, c)],
|
]
|
||||||
]
|
|
||||||
|
|
||||||
function process(value: string) {
|
function _markdownProcess(value: string) {
|
||||||
const results = [] as (string | Node)[]
|
const results = [] as (string | Node)[]
|
||||||
|
|
||||||
let start = 0
|
let start = 0
|
||||||
while (true) {
|
while (true) {
|
||||||
let found: { match: RegExpMatchArray; replacer: (c: (string | Node)[]) => Node } | undefined
|
let found: { match: RegExpMatchArray; replacer: (c: (string | Node)[]) => Node } | undefined
|
||||||
|
|
||||||
for (const [re, replacer] of replacements) {
|
for (const [re, replacer] of _markdownReplacements) {
|
||||||
re.lastIndex = start
|
re.lastIndex = start
|
||||||
|
|
||||||
const match = re.exec(value)
|
const match = re.exec(value)
|
||||||
if (match) {
|
if (match) {
|
||||||
if (!found || match.index < found.match.index!)
|
if (!found || match.index < found.match.index!)
|
||||||
found = { match, replacer }
|
found = { match, replacer }
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!found)
|
|
||||||
break
|
|
||||||
|
|
||||||
results.push(value.slice(start, found.match.index))
|
|
||||||
results.push(found.replacer(process(found.match[1])))
|
|
||||||
start = found.match.index! + found.match[0].length
|
|
||||||
}
|
}
|
||||||
|
|
||||||
results.push(value.slice(start))
|
if (!found)
|
||||||
return results.filter(Boolean)
|
break
|
||||||
|
|
||||||
|
results.push(value.slice(start, found.match.index))
|
||||||
|
results.push(found.replacer(_markdownProcess(found.match[1])))
|
||||||
|
start = found.match.index! + found.match[0].length
|
||||||
}
|
}
|
||||||
|
|
||||||
return (node) => {
|
results.push(value.slice(start))
|
||||||
if (node.type !== TEXT_NODE)
|
return results.filter(Boolean)
|
||||||
return node
|
}
|
||||||
return process(node.value)
|
|
||||||
}
|
function transformMarkdown(node: Node) {
|
||||||
|
if (node.type !== TEXT_NODE)
|
||||||
|
return node
|
||||||
|
return _markdownProcess(node.value)
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
import type { Emoji } from 'masto'
|
|
||||||
import { TEXT_NODE } from 'ultrahtml'
|
import { TEXT_NODE } from 'ultrahtml'
|
||||||
import type { Node } from 'ultrahtml'
|
import type { Node } from 'ultrahtml'
|
||||||
import { Fragment, h, isVNode } from 'vue'
|
import { Fragment, h, isVNode } from 'vue'
|
||||||
import type { VNode } from 'vue'
|
import type { VNode } from 'vue'
|
||||||
import { RouterLink } from 'vue-router'
|
import { RouterLink } from 'vue-router'
|
||||||
|
import type { ContentParseOptions } from './content-parse'
|
||||||
import { decodeHtml, parseMastodonHTML } from './content-parse'
|
import { decodeHtml, parseMastodonHTML } from './content-parse'
|
||||||
import ContentCode from '~/components/content/ContentCode.vue'
|
import ContentCode from '~/components/content/ContentCode.vue'
|
||||||
import AccountHoverWrapper from '~/components/account/AccountHoverWrapper.vue'
|
import AccountHoverWrapper from '~/components/account/AccountHoverWrapper.vue'
|
||||||
|
@ -13,12 +13,9 @@ import AccountHoverWrapper from '~/components/account/AccountHoverWrapper.vue'
|
||||||
*/
|
*/
|
||||||
export function contentToVNode(
|
export function contentToVNode(
|
||||||
content: string,
|
content: string,
|
||||||
{ emojis = {}, markdown = true }: {
|
options?: ContentParseOptions,
|
||||||
emojis?: Record<string, Emoji>
|
|
||||||
markdown?: boolean
|
|
||||||
} = {},
|
|
||||||
): VNode {
|
): VNode {
|
||||||
const tree = parseMastodonHTML(content, emojis, markdown)
|
const tree = parseMastodonHTML(content, options)
|
||||||
return h(Fragment, (tree.children as Node[]).map(n => treeToVNode(n)))
|
return h(Fragment, (tree.children as Node[]).map(n => treeToVNode(n)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -51,3 +51,16 @@ export const customEmojisData = computed(() => currentCustomEmojis.value.emojis.
|
||||||
emojis: transformEmojiData(currentCustomEmojis.value.emojis),
|
emojis: transformEmojiData(currentCustomEmojis.value.emojis),
|
||||||
}]
|
}]
|
||||||
: undefined)
|
: undefined)
|
||||||
|
|
||||||
|
export function useEmojisFallback(emojisGetter: () => Emoji[] | undefined) {
|
||||||
|
return computed(() => {
|
||||||
|
const result: Emoji[] = []
|
||||||
|
const emojis = emojisGetter()
|
||||||
|
if (emojis)
|
||||||
|
result.push(...emojis)
|
||||||
|
|
||||||
|
result.push(...currentCustomEmojis.value.emojis)
|
||||||
|
|
||||||
|
return emojisArrayToObject(result)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
// @unimport-disabled
|
||||||
import { emojiFilename, emojiPrefix, emojiRegEx } from '@iconify-emoji/twemoji'
|
import { emojiFilename, emojiPrefix, emojiRegEx } from '@iconify-emoji/twemoji'
|
||||||
import type { EmojiRegexMatch } from '@iconify/utils/lib/emoji/replace/find'
|
import type { EmojiRegexMatch } from '@iconify/utils/lib/emoji/replace/find'
|
||||||
import { getEmojiMatchesInText } from '@iconify/utils/lib/emoji/replace/find'
|
import { getEmojiMatchesInText } from '@iconify/utils/lib/emoji/replace/find'
|
||||||
|
|
|
@ -1,12 +1,5 @@
|
||||||
// Vitest Snapshot v1
|
// Vitest Snapshot v1
|
||||||
|
|
||||||
exports[`content-rich > JavaScript hrefs get removed 1`] = `
|
|
||||||
"<p>
|
|
||||||
<a href=\\"#\\" rel=\\"nofollow noopener noreferrer\\" target=\\"_blank\\">click me</a>
|
|
||||||
</p>
|
|
||||||
"
|
|
||||||
`;
|
|
||||||
|
|
||||||
exports[`content-rich > code frame 1`] = `
|
exports[`content-rich > code frame 1`] = `
|
||||||
"<p>Testing code block</p><p><pre lang=\\"ts\\">import { useMouse, usePreferredDark } from '@vueuse/core'
|
"<p>Testing code block</p><p><pre lang=\\"ts\\">import { useMouse, usePreferredDark } from '@vueuse/core'
|
||||||
// tracks mouse position
|
// tracks mouse position
|
||||||
|
@ -75,8 +68,3 @@ exports[`content-rich > link + mention 1`] = `
|
||||||
</p>
|
</p>
|
||||||
"
|
"
|
||||||
`;
|
`;
|
||||||
|
|
||||||
exports[`content-rich > script tags get removed 1`] = `
|
|
||||||
"<p></p>
|
|
||||||
"
|
|
||||||
`;
|
|
||||||
|
|
|
@ -67,7 +67,7 @@ describe('html-parse', () => {
|
||||||
})
|
})
|
||||||
|
|
||||||
async function render(input: string, emojis?: Record<string, Emoji>) {
|
async function render(input: string, emojis?: Record<string, Emoji>) {
|
||||||
const tree = parseMastodonHTML(input, emojis)
|
const tree = parseMastodonHTML(input, { emojis })
|
||||||
const html = await renderTree(tree)
|
const html = await renderTree(tree)
|
||||||
let formatted = ''
|
let formatted = ''
|
||||||
const serializedText = treeToText(tree).trim()
|
const serializedText = treeToText(tree).trim()
|
||||||
|
|
Loading…
Reference in a new issue