schlechtenburg/packages/rich-text/lib/create.ts

import { FormatTypeStore } from './use-format-types';
import { createElement } from './create-element';
import { mergePair } from './concat';
import { OBJECT_REPLACEMENT_CHARACTER, ZWNBSP } from './special-characters';
import { RichTextFormat, RichTextValue, RichTextFormatType, SimpleRange } from './types';

function createEmptyValue(): RichTextValue {
	return {
		formats: [],
		replacements: [],
		text: '',
	};
}

function toFormat(
  { tagName, attributes }: { tagName: string, attributes: Record<string,any> },
  store: FormatTypeStore,
): RichTextFormat {
  if (!store) {
    console.dir((new Error()).stack);
  }
  const { getFormatTypeForClassName, getFormatTypeForBareElement } = store;
	let formatType: RichTextFormatType|undefined;

	if ( attributes && attributes.class ) {
		formatType = getFormatTypeForClassName(attributes.class);

		if ( formatType ) {
			// Preserve any additional classes.
			attributes.class = ` ${ attributes.class } `
				.replace( ` ${ formatType.className } `, ' ' )
				.trim();

			if ( ! attributes.class ) {
				delete attributes.class;
			}
		}
	}

	if ( ! formatType ) {
		formatType = getFormatTypeForBareElement(tagName);
	}

	if ( ! formatType ) {
		return attributes ? { type: tagName, attributes } : { type: tagName };
	}

	if ( ! attributes ) {
		return { formatType, type: formatType.name, tagName };
	}

	const registeredAttributes: Record<string, any> = {};
	const unregisteredAttributes: Record<string, any> = {};
	const _attributes = { ...attributes };

	for ( const key in formatType.attributes ) {
		const name = formatType.attributes[ key ];

		registeredAttributes[ key ] = _attributes[ name ];

		// delete the attribute and what's left is considered
		// to be unregistered.
		delete _attributes[ name ];

		if ( typeof registeredAttributes[ key ] === 'undefined' ) {
			delete registeredAttributes[ key ];
		}
	}

	for ( const name in _attributes ) {
		unregisteredAttributes[ name ] = attributes[ name ];
	}

	if ( formatType.contentEditable === false ) {
		delete unregisteredAttributes.contenteditable;
	}

	return {
		formatType,
		type: formatType.name,
		tagName,
		attributes: registeredAttributes,
		unregisteredAttributes,
	};
}

export const fromPlainText = (text: string, store: FormatTypeStore) => create({ text }, store);
export const fromHTMLString = (html: string, store: FormatTypeStore) => create({ html }, store);
export const fromHTMLElement = (htmlElement: Element, options: { preserveWhiteSpace?: boolean } = {}, store: FormatTypeStore) => {
  const { preserveWhiteSpace = false } = options;
  const element = preserveWhiteSpace
    ? htmlElement
    : collapseWhiteSpace( htmlElement );
  const richTextValue = create({ element }, store);
  Object.defineProperty( richTextValue, 'originalHTML', {
    value: htmlElement.innerHTML,
  } );
  return richTextValue;
};

/**
 * Create a RichText value from an `Element` tree (DOM), an HTML string or a
 * plain text string, with optionally a `Range` object to set the selection. If
 * called without any input, an empty value will be created. The optional
 * functions can be used to filter out content.
 *
 * A value will have the following shape, which you are strongly encouraged not
 * to modify without the use of helper functions:
 *
 * ```js
 * {
 *   text: string,
 *   formats: Array,
 *   replacements: Array,
 *   ?start: number,
 *   ?end: number,
 * }
 * ```
 *
 * As you can see, text and formatting are separated. `text` holds the text,
 * including any replacement characters for objects and lines. `formats`,
 * `objects` and `lines` are all sparse arrays of the same length as `text`. It
 * holds information about the formatting at the relevant text indices. Finally
 * `start` and `end` state which text indices are selected. They are only
 * provided if a `Range` was given.
 */
export function create(
  {
    element,
    text,
    html,
    range,
    isEditableTree = false,
  }: {
    element?: Element|Node,
    text?: string,
    html?: string,
    range?: SimpleRange,
    isEditableTree?: boolean,
  } = {},
  store: FormatTypeStore,
): RichTextValue {
	if ( typeof text === 'string' && text.length > 0 ) {
		return {
			formats: Array( text.length ),
			replacements: Array( text.length ),
			text,
		};
	}

	if ( typeof html === 'string' && html.length > 0 ) {
		// It does not matter which document this is, we're just using it to
		// parse.
		element = createElement( document, html );
	}

	if ( typeof element !== 'object' ) {
		return createEmptyValue();
	}

	return createFromElement( {
		element,
		range,
		isEditableTree,
	}, store );
}

/**
 * Helper to accumulate the value's selection start and end from the current
 * node and range.
 */
function accumulateSelection(
  accumulator: RichTextValue,
  node: Node,
  range?: SimpleRange,
  value?: RichTextValue,
) {
	if ( !range || !value ) {
		return;
	}

	const { parentNode } = node;
	const { startContainer, startOffset, endContainer, endOffset } = range;
	const currentLength = accumulator.text.length;

	// Selection can be extracted from value.
	if ( value.start !== undefined ) {
		accumulator.start = currentLength + value.start;
		// Range indicates that the current node has selection.
	} else if ( node === startContainer && node.nodeType === node.TEXT_NODE ) {
		accumulator.start = currentLength + startOffset;
		// Range indicates that the current node is selected.
	} else if (
		parentNode === startContainer &&
		node === startContainer.childNodes[ startOffset ]
	) {
		accumulator.start = currentLength;
		// Range indicates that the selection is after the current node.
	} else if (
		parentNode === startContainer &&
		node === startContainer.childNodes[ startOffset - 1 ]
	) {
		accumulator.start = currentLength + value.text.length;
		// Fallback if no child inside handled the selection.
	} else if ( node === startContainer ) {
		accumulator.start = currentLength;
	}

	// Selection can be extracted from value.
	if ( value.end !== undefined ) {
		accumulator.end = currentLength + value.end;
		// Range indicates that the current node has selection.
	} else if ( node === endContainer && node.nodeType === node.TEXT_NODE ) {
		accumulator.end = currentLength + endOffset;
		// Range indicates that the current node is selected.
	} else if (
		parentNode === endContainer &&
		node === endContainer.childNodes[ endOffset - 1 ]
	) {
		accumulator.end = currentLength + value.text.length;
		// Range indicates that the selection is before the current node.
	} else if (
		parentNode === endContainer &&
		node === endContainer.childNodes[ endOffset ]
	) {
		accumulator.end = currentLength;
		// Fallback if no child inside handled the selection.
	} else if ( node === endContainer ) {
		accumulator.end = currentLength + endOffset;
	}
}

/**
 * Adjusts the start and end offsets from a range based on a text filter.
 */
function filterRange(node: Node, range?: SimpleRange, filter?: Function): SimpleRange|undefined {
	if ( ! range ) {
		return;
	}

	if ( ! filter ) {
		return;
	}

	const { startContainer, endContainer } = range;
	let { startOffset, endOffset } = range;
  let value = node.nodeValue || '';

	if ( node === startContainer ) {
		startOffset = filter( value.slice( 0, startOffset ) ).length;
	}

	if ( node === endContainer ) {
		endOffset = filter( value.slice( 0, endOffset ) ).length;
	}

	return { startContainer, startOffset, endContainer, endOffset };
}

/**
 * Collapse any whitespace used for HTML formatting to one space character,
 * because it will also be displayed as such by the browser.
 *
 * We need to strip it from the content because we use white-space: pre-wrap for
 * displaying editable rich text. Without using white-space: pre-wrap, the
 * browser will litter the content with non breaking spaces, among other issues.
 * See packages/rich-text/src/component/use-default-style.js.
 *
 * @see
 * https://developer.mozilla.org/en-US/docs/Web/CSS/white-space-collapse#collapsing_of_white_space
 */
function collapseWhiteSpace(element: HTMLElement, isRoot: boolean = true): HTMLElement {
	const clone = element.cloneNode( true ) as HTMLElement;
	clone.normalize();
	Array.from( clone.childNodes ).forEach( ( node, i, nodes ) => {
		if ( node.nodeType === node.TEXT_NODE ) {
			let newNodeValue = node.nodeValue || '';

			if ( /[\n\t\r\f]/.test( newNodeValue ) ) {
				newNodeValue = newNodeValue.replace( /[\n\t\r\f]+/g, ' ' );
			}

			if ( newNodeValue.indexOf( '  ' ) !== -1 ) {
				newNodeValue = newNodeValue.replace( / {2,}/g, ' ' );
			}

			if ( i === 0 && newNodeValue.startsWith( ' ' ) ) {
				newNodeValue = newNodeValue.slice( 1 );
			} else if (
				isRoot &&
				i === nodes.length - 1 &&
				newNodeValue.endsWith( ' ' )
			) {
				newNodeValue = newNodeValue.slice( 0, -1 );
			}

			node.nodeValue = newNodeValue;
		} else if ( node.nodeType === node.ELEMENT_NODE ) {
			collapseWhiteSpace( node as HTMLElement, false );
		}
	} );
	return clone;
}

/**
 * We need to normalise line breaks to `\n` so they are consistent across
 * platforms and serialised properly. Not removing \r would cause it to
 * linger and result in double line breaks when whitespace is preserved.
 */
const CARRIAGE_RETURN = '\r';

/**
 * Removes reserved characters used by rich-text (zero width non breaking spaces
 * added by `toTree` and object replacement characters).
 *
 * @param {string} string
 */
export function removeReservedCharacters( string: string ): string {
	// with the global flag, note that we should create a new regex each time OR
	// reset lastIndex state.
	return string.replace(
		new RegExp(
			`[${ ZWNBSP }${ OBJECT_REPLACEMENT_CHARACTER }${ CARRIAGE_RETURN }]`,
			'gu'
		),
		''
	);
}

/**
 * Creates a Rich Text value from a DOM element and range.
 */
function createFromElement(
  {
    element,
    range,
    isEditableTree,
  }: {
    element?:Element|Node,
    range?:SimpleRange,
    isEditableTree?: boolean,
  },
  store: FormatTypeStore,
): RichTextValue {
	const accumulator = createEmptyValue();

	if ( ! element ) {
		return accumulator;
	}

	if ( ! element.hasChildNodes() ) {
		accumulateSelection( accumulator, element, range, createEmptyValue() );
		return accumulator;
	}

	const length = element.childNodes.length;
  let newRange = range;

	// Optimise for speed.
	for ( let index = 0; index < length; index++ ) {
		const node = element.childNodes[ index ];
		const tagName = node.nodeName.toLowerCase();

		if ( node.nodeType === node.TEXT_NODE ) {
			const text = removeReservedCharacters( node.nodeValue || '' );
			newRange = filterRange( node, newRange, removeReservedCharacters );
			accumulateSelection( accumulator, node, newRange, { text, formats: [], replacements: [] } );
			// Create a sparse array of the same length as `text`, in which
			// formats can be added.
			accumulator.formats.length += text.length;
			accumulator.replacements.length += text.length;
			accumulator.text += text;
			continue;
		}

		if ( node.nodeType !== node.ELEMENT_NODE ) {
			continue;
		}

		if (
			isEditableTree &&
			// Ignore any line breaks that are not inserted by us.
			tagName === 'br' &&
			! (node as HTMLElement).getAttribute( 'data-rich-text-line-break' )
		) {
			accumulateSelection( accumulator, node, newRange, createEmptyValue() );
			continue;
		}

		if ( tagName === 'script' ) {
			const value = {
        formats: [ [{ type: '' }] , [{ type: '' }] ],
				replacements: [
					{
						type: tagName,
						attributes: {
							'data-rich-text-script':
								(node as HTMLElement).getAttribute( 'data-rich-text-script' ) ||
								encodeURIComponent( (node as HTMLElement).innerHTML ),
						},
					},
				],
				text: OBJECT_REPLACEMENT_CHARACTER,
        start: 0,
        end: 0,
			};
			accumulateSelection( accumulator, node, newRange, value );
			mergePair( accumulator, value );
			continue;
		}

		if ( tagName === 'br' ) {
			accumulateSelection( accumulator, node, newRange, createEmptyValue() );
			mergePair( accumulator, create( { text: '\n' }, store ) );
			continue;
		}

		const format = toFormat( {
			tagName,
			attributes: getAttributes( { element: node as HTMLElement } ),
		}, store );

		// When a format type is declared as not editable, replace it with an
		// object replacement character and preserve the inner HTML.
		// if ( format?.formatType?.contentEditable === false ) {
		// 	delete format.formatType;
		// 	accumulateSelection( accumulator, node, newRange, createEmptyValue() );
		// 	mergePair( accumulator, {
    //     formats: [ [{ type: '' }] , [{ type: '' }] ],
		// 		replacements: [
		// 			{
		// 				...format,
		// 				innerHTML: (node as HTMLElement).innerHTML,
		// 			},
		// 		],
		// 		text: OBJECT_REPLACEMENT_CHARACTER,
    //     start: 0,
    //     end: 0,
		// 	} );
		// 	continue;
		// }

		if ( format ) {
			delete format.formatType;
		}

		const value = createFromElement( {
			element: node as HTMLElement,
			range: newRange,
			isEditableTree,
		}, store );

		accumulateSelection( accumulator, node, newRange, value );

		// Ignore any placeholders, but keep their content since the browser
		// might insert text inside them when the editable element is flex.
		if ( ! format || (node as HTMLElement).getAttribute( 'data-rich-text-placeholder' ) ) {
			mergePair( accumulator, value );
		} else if ( value.text.length === 0 ) {
			if ( format.attributes ) {
				mergePair( accumulator, {
					formats: [ [{ type: '' }] , [{ type: '' }] ],
					replacements: [ format ],
					text: OBJECT_REPLACEMENT_CHARACTER,
          start: 0,
          end: 0,
				} );
			}
		} else {
			// Indices should share a reference to the same formats array.
			// Only create a new reference if `formats` changes.
			function mergeFormats( formats ) {
				if ( mergeFormats.formats === formats ) {
					return mergeFormats.newFormats;
				}

				const newFormats = formats
					? [ format, ...formats ]
					: [ format ];

				mergeFormats.formats = formats;
				mergeFormats.newFormats = newFormats;

				return newFormats;
			}

			// Since the formats parameter can be `undefined`, preset
			// `mergeFormats` with a new reference.
			mergeFormats.newFormats = [ format ];

			mergePair( accumulator, {
				...value,
				formats: Array.from( value.formats, mergeFormats ),
			} );
		}
	}

	return accumulator;
}

/**
 * Gets the attributes of an element in object shape.
 */
function getAttributes({ element }: { element: Element }): Record<string, any>{
	let accumulator: Record<string, any> = {};

	if ( ! element.hasAttributes() ) {
		return accumulator;
	}

	const length = element.attributes.length;
	// Optimise for speed.
	for ( let i = 0; i < length; i++ ) {
		const { name, value } = element.attributes[ i ];

		if ( name.indexOf( 'data-rich-text-' ) === 0 ) {
			continue;
		}

		const safeName = /^on/i.test( name )
			? 'data-disable-rich-text-' + name
			: name;

		accumulator = accumulator || {};
		accumulator[ safeName ] = value;
	}

	return accumulator;
}