Update DOMTextScanner to support UTF-16 surrogate pairs (#2213)
This commit is contained in:
parent
5c267f4bb7
commit
c5c5308ff2
@ -145,6 +145,44 @@ class DOMTextScanner {
|
|||||||
|
|
||||||
// Private
|
// Private
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads a code point in a string in the forward direction.
|
||||||
|
* @param {string} text The text to read the code point from.
|
||||||
|
* @param {number} position The index of the first character to read.
|
||||||
|
* @returns {string} The code point from the string.
|
||||||
|
*/
|
||||||
|
_readCodePointForward(text, position) {
|
||||||
|
let char = text[position];
|
||||||
|
const charCode = char.charCodeAt(0);
|
||||||
|
if (charCode >= 0xd800 && charCode < 0xdc00 && ++position < text.length) {
|
||||||
|
const char2 = text[position];
|
||||||
|
const charCode2 = char2.charCodeAt(0);
|
||||||
|
if (charCode2 >= 0xdc00 && charCode2 < 0xe000) {
|
||||||
|
char += char2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return char;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads a code point in a string in the backward direction.
|
||||||
|
* @param {string} text The text to read the code point from.
|
||||||
|
* @param {number} position The index of the first character to read.
|
||||||
|
* @returns {string} The code point from the string.
|
||||||
|
*/
|
||||||
|
_readCodePointBackward(text, position) {
|
||||||
|
let char = text[position];
|
||||||
|
const charCode = char.charCodeAt(0);
|
||||||
|
if (charCode >= 0xdc00 && charCode < 0xe000 && position > 0) {
|
||||||
|
const char2 = text[position - 1];
|
||||||
|
const charCode2 = char2.charCodeAt(0);
|
||||||
|
if (charCode2 >= 0xd800 && charCode2 < 0xdc00) {
|
||||||
|
char = char2 + char;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return char;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Seeks forward in a text node.
|
* Seeks forward in a text node.
|
||||||
* @param {Text} textNode The text node to use.
|
* @param {Text} textNode The text node to use.
|
||||||
@ -164,9 +202,9 @@ class DOMTextScanner {
|
|||||||
let newlines = this._newlines;
|
let newlines = this._newlines;
|
||||||
|
|
||||||
while (offset < nodeValueLength) {
|
while (offset < nodeValueLength) {
|
||||||
const char = nodeValue[offset];
|
const char = this._readCodePointForward(nodeValue, offset);
|
||||||
|
offset += char.length;
|
||||||
const charAttributes = DOMTextScanner.getCharacterAttributes(char, preserveNewlines, preserveWhitespace);
|
const charAttributes = DOMTextScanner.getCharacterAttributes(char, preserveNewlines, preserveWhitespace);
|
||||||
++offset;
|
|
||||||
|
|
||||||
if (charAttributes === 0) {
|
if (charAttributes === 0) {
|
||||||
// Character should be ignored
|
// Character should be ignored
|
||||||
@ -188,7 +226,7 @@ class DOMTextScanner {
|
|||||||
lineHasContent = false;
|
lineHasContent = false;
|
||||||
lineHasWhitespace = false;
|
lineHasWhitespace = false;
|
||||||
if (remainder <= 0) {
|
if (remainder <= 0) {
|
||||||
--offset; // Revert character offset
|
offset -= char.length; // Revert character offset
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -200,7 +238,7 @@ class DOMTextScanner {
|
|||||||
content += ' ';
|
content += ' ';
|
||||||
lineHasWhitespace = false;
|
lineHasWhitespace = false;
|
||||||
if (--remainder <= 0) {
|
if (--remainder <= 0) {
|
||||||
--offset; // Revert character offset
|
offset -= char.length; // Revert character offset
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -250,8 +288,8 @@ class DOMTextScanner {
|
|||||||
let newlines = this._newlines;
|
let newlines = this._newlines;
|
||||||
|
|
||||||
while (offset > 0) {
|
while (offset > 0) {
|
||||||
--offset;
|
const char = this._readCodePointBackward(nodeValue, offset - 1);
|
||||||
const char = nodeValue[offset];
|
offset -= char.length;
|
||||||
const charAttributes = DOMTextScanner.getCharacterAttributes(char, preserveNewlines, preserveWhitespace);
|
const charAttributes = DOMTextScanner.getCharacterAttributes(char, preserveNewlines, preserveWhitespace);
|
||||||
|
|
||||||
if (charAttributes === 0) {
|
if (charAttributes === 0) {
|
||||||
@ -274,7 +312,7 @@ class DOMTextScanner {
|
|||||||
lineHasContent = false;
|
lineHasContent = false;
|
||||||
lineHasWhitespace = false;
|
lineHasWhitespace = false;
|
||||||
if (remainder <= 0) {
|
if (remainder <= 0) {
|
||||||
++offset; // Revert character offset
|
offset += char.length; // Revert character offset
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -286,7 +324,7 @@ class DOMTextScanner {
|
|||||||
content = ' ' + content;
|
content = ' ' + content;
|
||||||
lineHasWhitespace = false;
|
lineHasWhitespace = false;
|
||||||
if (--remainder <= 0) {
|
if (--remainder <= 0) {
|
||||||
++offset; // Revert character offset
|
offset += char.length; // Revert character offset
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
Loading…
Reference in New Issue
Block a user