diff --git a/composables/content-parse.ts b/composables/content-parse.ts
index 92211e90..9cc4bf15 100644
--- a/composables/content-parse.ts
+++ b/composables/content-parse.ts
@@ -1,7 +1,7 @@
// @unimport-disable
import type { Emoji } from 'masto'
import type { Node } from 'ultrahtml'
-import { TEXT_NODE, parse, render, walkSync } from 'ultrahtml'
+import { ELEMENT_NODE, TEXT_NODE, h, parse, render } from 'ultrahtml'
import { findAndReplaceEmojisInText } from '@iconify/utils'
import { emojiRegEx, getEmojiAttributes } from '../config/emojis'
@@ -19,53 +19,43 @@ export function decodeHtml(text: string) {
* with interop of custom emojis and inline Markdown syntax
*/
export function parseMastodonHTML(html: string, customEmojis: Record
+ click me
+ Testing code block
`
})
-
- walkSync(parse(processed), (node) => {
- if (node.type !== TEXT_NODE)
- return
- const replacements = [
- [/\*\*\*(.*?)\*\*\*/g, '$1'],
- [/\*\*(.*?)\*\*/g, '$1'],
- [/\*(.*?)\*/g, '$1'],
- [/~~(.*?)~~/g, '${code}
$1'],
- [/`([^`]+?)`/g, '$1
'],
- ] as any
-
- for (const [re, replacement] of replacements) {
- for (const match of node.value.matchAll(re)) {
- if (node.loc) {
- const start = match.index! + node.loc[0].start
- const end = start + match[0].length + node.loc[0].start
- processed = processed.slice(0, start) + match[0].replace(re, replacement) + processed.slice(end)
- }
- else {
- processed = processed.replace(match[0], match[0].replace(re, replacement))
- }
- }
- }
- })
}
- return parse(processed)
+ // Always sanitize the raw HTML data *after* it has been modified
+ const basicClasses = filterClasses(/^(h-\S*|p-\S*|u-\S*|dt-\S*|e-\S*|mention|hashtag|ellipsis|invisible)$/u)
+ return transformSync(parse(html), [
+ sanitize({
+ // Allow basic elements as seen in https://github.com/mastodon/mastodon/blob/17f79082b098e05b68d6f0d38fabb3ac121879a9/lib/sanitize_ext/sanitize_config.rb
+ br: {},
+ p: {},
+ a: {
+ href: filterHref(),
+ class: basicClasses,
+ rel: set('nofollow noopener noreferrer'),
+ target: set('_blank'),
+ },
+ span: {
+ class: basicClasses,
+ },
+ // Allow elements potentially created for Markdown code blocks above
+ pre: {},
+ code: {
+ class: filterClasses(/^language-\w+$/),
+ },
+ }),
+ // Unicode emojis to images, but only if not converting HTML for Tiptap
+ !forTiptap ? replaceUnicodeEmoji() : noopTransform(),
+ markdown ? formatMarkdown() : noopTransform(),
+ replaceCustomEmoji(customEmojis),
+ ])
}
/**
@@ -133,12 +123,210 @@ export function treeToText(input: Node): string {
return pre + body + post
}
-/**
- * Replace unicode emojis with locally hosted images
- */
-export function replaceUnicodeEmoji(html: string) {
- return findAndReplaceEmojisInText(emojiRegEx, html, (match) => {
- const attrs = getEmojiAttributes(match)
- return ``
- }) || html
+// A tree transform function takes an ultrahtml Node object and returns
+// new content that will replace the given node in the tree.
+// Returning a null removes the node from the tree.
+// Strings get converted to text nodes.
+// The input node's children have been transformed before the node itself
+// gets transformed.
+type Transform = (node: Node) => (Node | string)[] | Node | string | null
+
+// Helpers for transforming (filtering, modifying, ...) a parsed HTML tree
+// by running the given chain of transform functions one-by-one.
+function transformSync(doc: Node, transforms: Transform[]) {
+ function visit(node: Node, transform: Transform, isRoot = false) {
+ if (Array.isArray(node.children)) {
+ const children = [] as (Node | string)[]
+ for (let i = 0; i < node.children.length; i++) {
+ const result = visit(node.children[i], transform)
+ if (Array.isArray(result))
+ children.push(...result)
+
+ else if (result)
+ children.push(result)
+ }
+
+ node.children = children.map((value) => {
+ if (typeof value === 'string')
+ return { type: TEXT_NODE, value, parent: node }
+ value.parent = node
+ return value
+ })
+ }
+ return isRoot ? node : transform(node)
+ }
+
+ for (const transform of transforms)
+ doc = visit(doc, transform, true) as Node
+
+ return doc
+}
+
+// A transformation that does nothing. Useful for conditional transform chains.
+function noopTransform(): Transform {
+ return node => node
+}
+
+// A tree transform for sanitizing elements & their attributes.
+type AttrSanitizers = Recordimport { useMouse, usePreferredDark } from '@vueuse/core'
// tracks mouse position
@@ -10,7 +17,13 @@ const isDark = usePreferredDark()
const a = hello
@@ -62,3 +75,8 @@ exports[`content-rich > link + mention 1`] = `
"
`;
+
+exports[`content-rich > script tags get removed 1`] = `
+"
+"
+`;
diff --git a/tests/__snapshots__/html-parse.test.ts.snap b/tests/__snapshots__/html-parse.test.ts.snap
index 863cd499..6cd2fdcd 100644
--- a/tests/__snapshots__/html-parse.test.ts.snap
+++ b/tests/__snapshots__/html-parse.test.ts.snap
@@ -23,7 +23,11 @@ const isDark = usePreferredDark()
exports[`html-parse > code frame 2 > html 1`] = `
"