schlechtenburg/packages/rich-text/lib/create.ts
2024-10-09 14:43:40 +02:00

560 lines
16 KiB
TypeScript

/**
* Internal dependencies
*/
import { useFormatTypes } from './use-format-types';
import { createElement } from './create-element';
import { mergePair } from './concat';
import { OBJECT_REPLACEMENT_CHARACTER, ZWNBSP } from './special-characters';
import { RichTextFormat, RichTextValue, RichTextFormatType, SimpleRange } from './types';
function createEmptyValue(): RichTextValue {
return {
formats: [],
replacements: [],
text: '',
};
}
function toFormat( { tagName, attributes }: { tagName: string, attributes: Record<string,any> } ): RichTextFormat {
const { getFormatTypeForClassName, getFormatTypeForBareElement } = useFormatTypes();
let formatType: RichTextFormatType|undefined;
if ( attributes && attributes.class ) {
formatType = getFormatTypeForClassName(attributes.class);
if ( formatType ) {
// Preserve any additional classes.
attributes.class = ` ${ attributes.class } `
.replace( ` ${ formatType.className } `, ' ' )
.trim();
if ( ! attributes.class ) {
delete attributes.class;
}
}
}
if ( ! formatType ) {
formatType = getFormatTypeForBareElement(tagName);
}
if ( ! formatType ) {
return attributes ? { type: tagName, attributes } : { type: tagName };
}
if ( ! attributes ) {
return { formatType, type: formatType.name, tagName };
}
const registeredAttributes: Record<string, any> = {};
const unregisteredAttributes: Record<string, any> = {};
const _attributes = { ...attributes };
for ( const key in formatType.attributes ) {
const name = formatType.attributes[ key ];
registeredAttributes[ key ] = _attributes[ name ];
// delete the attribute and what's left is considered
// to be unregistered.
delete _attributes[ name ];
if ( typeof registeredAttributes[ key ] === 'undefined' ) {
delete registeredAttributes[ key ];
}
}
for ( const name in _attributes ) {
unregisteredAttributes[ name ] = attributes[ name ];
}
if ( formatType.contentEditable === false ) {
delete unregisteredAttributes.contenteditable;
}
return {
formatType,
type: formatType.name,
tagName,
attributes: registeredAttributes,
unregisteredAttributes,
};
}
export const fromPlainText = (text: string) => create({ text });
export const fromHTMLString = (html: string) => create({ html });
export const fromHTMLElement = (htmlElement: Element, options: { preserveWhiteSpace?: boolean } = {}) => {
const { preserveWhiteSpace = false } = options;
const element = preserveWhiteSpace
? htmlElement
: collapseWhiteSpace( htmlElement );
const richTextValue = create({ element });
Object.defineProperty( richTextValue, 'originalHTML', {
value: htmlElement.innerHTML,
} );
return richTextValue;
};
/**
* Create a RichText value from an `Element` tree (DOM), an HTML string or a
* plain text string, with optionally a `Range` object to set the selection. If
* called without any input, an empty value will be created. The optional
* functions can be used to filter out content.
*
* A value will have the following shape, which you are strongly encouraged not
* to modify without the use of helper functions:
*
* ```js
* {
* text: string,
* formats: Array,
* replacements: Array,
* ?start: number,
* ?end: number,
* }
* ```
*
* As you can see, text and formatting are separated. `text` holds the text,
* including any replacement characters for objects and lines. `formats`,
* `objects` and `lines` are all sparse arrays of the same length as `text`. It
* holds information about the formatting at the relevant text indices. Finally
* `start` and `end` state which text indices are selected. They are only
* provided if a `Range` was given.
*
* @param {Object} [$1] Optional named arguments.
* @param {Element} [$1.element] Element to create value from.
* @param {string} [$1.text] Text to create value from.
* @param {string} [$1.html] HTML to create value from.
* @param {Range} [$1.range] Range to create value from.
* @return {RichTextValue} A rich text value.
*/
export function create({
element,
text,
html,
range,
isEditableTree = false,
}: {
element?: Element|Node,
text?: string,
html?: string,
range?: SimpleRange,
isEditableTree?: boolean,
} = {} ): RichTextValue {
if ( typeof text === 'string' && text.length > 0 ) {
return {
formats: Array( text.length ),
replacements: Array( text.length ),
text,
};
}
if ( typeof html === 'string' && html.length > 0 ) {
// It does not matter which document this is, we're just using it to
// parse.
element = createElement( document, html );
}
if ( typeof element !== 'object' ) {
return createEmptyValue();
}
return createFromElement( {
element,
range,
isEditableTree,
} );
}
/**
* Helper to accumulate the value's selection start and end from the current
* node and range.
*
* @param {Object} accumulator Object to accumulate into.
* @param {Node} node Node to create value with.
* @param {Range} range Range to create value with.
* @param {Object} value Value that is being accumulated.
*/
function accumulateSelection(
accumulator: RichTextValue,
node: Node,
range?: SimpleRange,
value?: RichTextValue,
) {
if ( !range || !value ) {
return;
}
const { parentNode } = node;
const { startContainer, startOffset, endContainer, endOffset } = range;
const currentLength = accumulator.text.length;
// Selection can be extracted from value.
if ( value.start !== undefined ) {
accumulator.start = currentLength + value.start;
// Range indicates that the current node has selection.
} else if ( node === startContainer && node.nodeType === node.TEXT_NODE ) {
accumulator.start = currentLength + startOffset;
// Range indicates that the current node is selected.
} else if (
parentNode === startContainer &&
node === startContainer.childNodes[ startOffset ]
) {
accumulator.start = currentLength;
// Range indicates that the selection is after the current node.
} else if (
parentNode === startContainer &&
node === startContainer.childNodes[ startOffset - 1 ]
) {
accumulator.start = currentLength + value.text.length;
// Fallback if no child inside handled the selection.
} else if ( node === startContainer ) {
accumulator.start = currentLength;
}
// Selection can be extracted from value.
if ( value.end !== undefined ) {
accumulator.end = currentLength + value.end;
// Range indicates that the current node has selection.
} else if ( node === endContainer && node.nodeType === node.TEXT_NODE ) {
accumulator.end = currentLength + endOffset;
// Range indicates that the current node is selected.
} else if (
parentNode === endContainer &&
node === endContainer.childNodes[ endOffset - 1 ]
) {
accumulator.end = currentLength + value.text.length;
// Range indicates that the selection is before the current node.
} else if (
parentNode === endContainer &&
node === endContainer.childNodes[ endOffset ]
) {
accumulator.end = currentLength;
// Fallback if no child inside handled the selection.
} else if ( node === endContainer ) {
accumulator.end = currentLength + endOffset;
}
}
/**
* Adjusts the start and end offsets from a range based on a text filter.
*
* @param {Node} node Node of which the text should be filtered.
* @param {Range} range The range to filter.
* @param {Function} filter Function to use to filter the text.
*
* @return {Object|undefined} Object containing range properties.
*/
function filterRange(node: Node, range?: SimpleRange, filter?: Function): SimpleRange|undefined {
if ( ! range ) {
return;
}
if ( ! filter ) {
return;
}
const { startContainer, endContainer } = range;
let { startOffset, endOffset } = range;
let value = node.nodeValue || '';
if ( node === startContainer ) {
startOffset = filter( value.slice( 0, startOffset ) ).length;
}
if ( node === endContainer ) {
endOffset = filter( value.slice( 0, endOffset ) ).length;
}
return { startContainer, startOffset, endContainer, endOffset };
}
/**
* Collapse any whitespace used for HTML formatting to one space character,
* because it will also be displayed as such by the browser.
*
* We need to strip it from the content because we use white-space: pre-wrap for
* displaying editable rich text. Without using white-space: pre-wrap, the
* browser will litter the content with non breaking spaces, among other issues.
* See packages/rich-text/src/component/use-default-style.js.
*
* @see
* https://developer.mozilla.org/en-US/docs/Web/CSS/white-space-collapse#collapsing_of_white_space
*
* @param {HTMLElement} element
* @param {boolean} isRoot
*
* @return {HTMLElement} New element with collapsed whitespace.
*/
function collapseWhiteSpace(element: HTMLElement, isRoot: boolean = true): HTMLElement {
const clone = element.cloneNode( true ) as HTMLElement;
clone.normalize();
Array.from( clone.childNodes ).forEach( ( node, i, nodes ) => {
if ( node.nodeType === node.TEXT_NODE ) {
let newNodeValue = node.nodeValue || '';
if ( /[\n\t\r\f]/.test( newNodeValue ) ) {
newNodeValue = newNodeValue.replace( /[\n\t\r\f]+/g, ' ' );
}
if ( newNodeValue.indexOf( ' ' ) !== -1 ) {
newNodeValue = newNodeValue.replace( / {2,}/g, ' ' );
}
if ( i === 0 && newNodeValue.startsWith( ' ' ) ) {
newNodeValue = newNodeValue.slice( 1 );
} else if (
isRoot &&
i === nodes.length - 1 &&
newNodeValue.endsWith( ' ' )
) {
newNodeValue = newNodeValue.slice( 0, -1 );
}
node.nodeValue = newNodeValue;
} else if ( node.nodeType === node.ELEMENT_NODE ) {
collapseWhiteSpace( node as HTMLElement, false );
}
} );
return clone;
}
/**
* We need to normalise line breaks to `\n` so they are consistent across
* platforms and serialised properly. Not removing \r would cause it to
* linger and result in double line breaks when whitespace is preserved.
*/
const CARRIAGE_RETURN = '\r';
/**
* Removes reserved characters used by rich-text (zero width non breaking spaces
* added by `toTree` and object replacement characters).
*
* @param {string} string
*/
export function removeReservedCharacters( string: string ): string {
// with the global flag, note that we should create a new regex each time OR
// reset lastIndex state.
return string.replace(
new RegExp(
`[${ ZWNBSP }${ OBJECT_REPLACEMENT_CHARACTER }${ CARRIAGE_RETURN }]`,
'gu'
),
''
);
}
/**
* Creates a Rich Text value from a DOM element and range.
*
* @param {Object} $1 Named arguments.
* @param {Element} [$1.element] Element to create value from.
* @param {Range} [$1.range] Range to create value from.
* @param {boolean} [$1.isEditableTree]
*
* @return {RichTextValue} A rich text value.
*/
function createFromElement(
{
element,
range,
isEditableTree,
}:
{
element?:Element|Node,
range?:SimpleRange,
isEditableTree?: boolean,
}
): RichTextValue {
const accumulator = createEmptyValue();
if ( ! element ) {
return accumulator;
}
if ( ! element.hasChildNodes() ) {
accumulateSelection( accumulator, element, range, createEmptyValue() );
return accumulator;
}
const length = element.childNodes.length;
let newRange = range;
// Optimise for speed.
for ( let index = 0; index < length; index++ ) {
const node = element.childNodes[ index ];
const tagName = node.nodeName.toLowerCase();
if ( node.nodeType === node.TEXT_NODE ) {
const text = removeReservedCharacters( node.nodeValue || '' );
newRange = filterRange( node, newRange, removeReservedCharacters );
accumulateSelection( accumulator, node, newRange, { text, formats: [], replacements: [] } );
// Create a sparse array of the same length as `text`, in which
// formats can be added.
accumulator.formats.length += text.length;
accumulator.replacements.length += text.length;
accumulator.text += text;
continue;
}
if ( node.nodeType !== node.ELEMENT_NODE ) {
continue;
}
if (
isEditableTree &&
// Ignore any line breaks that are not inserted by us.
tagName === 'br' &&
! (node as HTMLElement).getAttribute( 'data-rich-text-line-break' )
) {
accumulateSelection( accumulator, node, newRange, createEmptyValue() );
continue;
}
if ( tagName === 'script' ) {
const value = {
formats: [ [{ type: '' }] , [{ type: '' }] ],
replacements: [
{
type: tagName,
attributes: {
'data-rich-text-script':
(node as HTMLElement).getAttribute( 'data-rich-text-script' ) ||
encodeURIComponent( (node as HTMLElement).innerHTML ),
},
},
],
text: OBJECT_REPLACEMENT_CHARACTER,
start: 0,
end: 0,
};
accumulateSelection( accumulator, node, newRange, value );
mergePair( accumulator, value );
continue;
}
if ( tagName === 'br' ) {
accumulateSelection( accumulator, node, newRange, createEmptyValue() );
mergePair( accumulator, create( { text: '\n' } ) );
continue;
}
const format = toFormat( {
tagName,
attributes: getAttributes( { element: node as HTMLElement } ),
} );
// When a format type is declared as not editable, replace it with an
// object replacement character and preserve the inner HTML.
// if ( format?.formatType?.contentEditable === false ) {
// delete format.formatType;
// accumulateSelection( accumulator, node, newRange, createEmptyValue() );
// mergePair( accumulator, {
// formats: [ [{ type: '' }] , [{ type: '' }] ],
// replacements: [
// {
// ...format,
// innerHTML: (node as HTMLElement).innerHTML,
// },
// ],
// text: OBJECT_REPLACEMENT_CHARACTER,
// start: 0,
// end: 0,
// } );
// continue;
// }
if ( format ) {
delete format.formatType;
}
const value = createFromElement( {
element: node as HTMLElement,
range: newRange,
isEditableTree,
} );
accumulateSelection( accumulator, node, newRange, value );
// Ignore any placeholders, but keep their content since the browser
// might insert text inside them when the editable element is flex.
if ( ! format || (node as HTMLElement).getAttribute( 'data-rich-text-placeholder' ) ) {
mergePair( accumulator, value );
} else if ( value.text.length === 0 ) {
if ( format.attributes ) {
mergePair( accumulator, {
formats: [ [{ type: '' }] , [{ type: '' }] ],
replacements: [ format ],
text: OBJECT_REPLACEMENT_CHARACTER,
start: 0,
end: 0,
} );
}
} else {
// Indices should share a reference to the same formats array.
// Only create a new reference if `formats` changes.
function mergeFormats( formats ) {
if ( mergeFormats.formats === formats ) {
return mergeFormats.newFormats;
}
const newFormats = formats
? [ format, ...formats ]
: [ format ];
mergeFormats.formats = formats;
mergeFormats.newFormats = newFormats;
return newFormats;
}
// Since the formats parameter can be `undefined`, preset
// `mergeFormats` with a new reference.
mergeFormats.newFormats = [ format ];
mergePair( accumulator, {
...value,
formats: Array.from( value.formats, mergeFormats ),
} );
}
}
return accumulator;
}
/**
* Gets the attributes of an element in object shape.
*
* @param {Object} $1 Named arguments.
* @param {Element} $1.element Element to get attributes from.
*
* @return {Object} Attribute object or `undefined` if the element has no
* attributes.
*/
function getAttributes({ element }: { element: Element }): Record<string, any>{
let accumulator: Record<string, any> = {};
if ( ! element.hasAttributes() ) {
return accumulator;
}
const length = element.attributes.length;
// Optimise for speed.
for ( let i = 0; i < length; i++ ) {
const { name, value } = element.attributes[ i ];
if ( name.indexOf( 'data-rich-text-' ) === 0 ) {
continue;
}
const safeName = /^on/i.test( name )
? 'data-disable-rich-text-' + name
: name;
accumulator = accumulator || {};
accumulator[ safeName ] = value;
}
return accumulator;
}