diff --git a/ext/js/dom/dom-text-scanner.js b/ext/js/dom/dom-text-scanner.js index 83b16028..7bed94a9 100644 --- a/ext/js/dom/dom-text-scanner.js +++ b/ext/js/dom/dom-text-scanner.js @@ -145,6 +145,44 @@ class DOMTextScanner { // Private + /** + * Reads a code point in a string in the forward direction. + * @param {string} text The text to read the code point from. + * @param {number} position The index of the first character to read. + * @returns {string} The code point from the string. + */ + _readCodePointForward(text, position) { + let char = text[position]; + const charCode = char.charCodeAt(0); + if (charCode >= 0xd800 && charCode < 0xdc00 && ++position < text.length) { + const char2 = text[position]; + const charCode2 = char2.charCodeAt(0); + if (charCode2 >= 0xdc00 && charCode2 < 0xe000) { + char += char2; + } + } + return char; + } + + /** + * Reads a code point in a string in the backward direction. + * @param {string} text The text to read the code point from. + * @param {number} position The index of the first character to read. + * @returns {string} The code point from the string. + */ + _readCodePointBackward(text, position) { + let char = text[position]; + const charCode = char.charCodeAt(0); + if (charCode >= 0xdc00 && charCode < 0xe000 && position > 0) { + const char2 = text[position - 1]; + const charCode2 = char2.charCodeAt(0); + if (charCode2 >= 0xd800 && charCode2 < 0xdc00) { + char = char2 + char; + } + } + return char; + } + /** * Seeks forward in a text node. * @param {Text} textNode The text node to use. @@ -164,9 +202,9 @@ class DOMTextScanner { let newlines = this._newlines; while (offset < nodeValueLength) { - const char = nodeValue[offset]; + const char = this._readCodePointForward(nodeValue, offset); + offset += char.length; const charAttributes = DOMTextScanner.getCharacterAttributes(char, preserveNewlines, preserveWhitespace); - ++offset; if (charAttributes === 0) { // Character should be ignored @@ -188,7 +226,7 @@ class DOMTextScanner { lineHasContent = false; lineHasWhitespace = false; if (remainder <= 0) { - --offset; // Revert character offset + offset -= char.length; // Revert character offset break; } } @@ -200,7 +238,7 @@ class DOMTextScanner { content += ' '; lineHasWhitespace = false; if (--remainder <= 0) { - --offset; // Revert character offset + offset -= char.length; // Revert character offset break; } } else { @@ -250,8 +288,8 @@ class DOMTextScanner { let newlines = this._newlines; while (offset > 0) { - --offset; - const char = nodeValue[offset]; + const char = this._readCodePointBackward(nodeValue, offset - 1); + offset -= char.length; const charAttributes = DOMTextScanner.getCharacterAttributes(char, preserveNewlines, preserveWhitespace); if (charAttributes === 0) { @@ -274,7 +312,7 @@ class DOMTextScanner { lineHasContent = false; lineHasWhitespace = false; if (remainder <= 0) { - ++offset; // Revert character offset + offset += char.length; // Revert character offset break; } } @@ -286,7 +324,7 @@ class DOMTextScanner { content = ' ' + content; lineHasWhitespace = false; if (--remainder <= 0) { - ++offset; // Revert character offset + offset += char.length; // Revert character offset break; } } else {