DOMTextScanner (#458)

* Create new class for scanning text in a document

* Update test styles

* Add tests
This commit is contained in:
toasted-nutbread 2020-05-02 13:05:43 -04:00 committed by GitHub
parent d581bffa15
commit d4ae9aa501
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 1121 additions and 3 deletions

View File

@ -0,0 +1,538 @@
/*
* Copyright (C) 2020 Yomichan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
/**
* A class used to scan text in a document.
*/
class DOMTextScanner {
/**
* Creates a new instance of a DOMTextScanner.
* @param node The DOM Node to start at.
* @param offset The character offset in to start at when node is a text node.
* Use 0 for non-text nodes.
*/
constructor(node, offset, forcePreserveWhitespace=false, generateLayoutContent=true) {
const ruby = DOMTextScanner.getParentRubyElement(node);
const resetOffset = (ruby !== null);
if (resetOffset) { node = ruby; }
this._node = node;
this._offset = offset;
this._content = '';
this._remainder = 0;
this._resetOffset = resetOffset;
this._newlines = 0;
this._lineHasWhitespace = false;
this._lineHasContent = false;
this._forcePreserveWhitespace = forcePreserveWhitespace;
this._generateLayoutContent = generateLayoutContent;
}
/**
* Gets the current node being scanned.
* @returns A DOM Node.
*/
get node() {
return this._node;
}
/**
* Gets the current offset corresponding to the node being scanned.
* This value is only applicable for text nodes.
* @returns An integer.
*/
get offset() {
return this._offset;
}
/**
* Gets the accumulated content string resulting from calls to seek().
* @returns A string.
*/
get content() {
return this._content;
}
/**
* Seeks a given length in the document and accumulates the text content.
* @param length A positive or negative integer corresponding to how many characters
* should be added to content. Content is only added to the accumulation string,
* never removed, so mixing seek calls with differently signed length values
* may give unexpected results.
* @returns this
*/
seek(length) {
const forward = (length >= 0);
this._remainder = (forward ? length : -length);
if (length === 0) { return this; }
const TEXT_NODE = Node.TEXT_NODE;
const ELEMENT_NODE = Node.ELEMENT_NODE;
const generateLayoutContent = this._generateLayoutContent;
let node = this._node;
let resetOffset = this._resetOffset;
let newlines = 0;
while (node !== null) {
let enterable = false;
const nodeType = node.nodeType;
if (nodeType === TEXT_NODE) {
if (!(
forward ?
this._seekTextNodeForward(node, resetOffset) :
this._seekTextNodeBackward(node, resetOffset)
)) {
// Length reached
break;
}
} else if (nodeType === ELEMENT_NODE) {
[enterable, newlines] = DOMTextScanner.getElementSeekInfo(node);
if (newlines > this._newlines && generateLayoutContent) {
this._newlines = newlines;
}
}
const exitedNodes = [];
node = DOMTextScanner.getNextNode(node, forward, enterable, exitedNodes);
for (const exitedNode of exitedNodes) {
if (exitedNode.nodeType !== ELEMENT_NODE) { continue; }
newlines = DOMTextScanner.getElementSeekInfo(exitedNode)[1];
if (newlines > this._newlines && generateLayoutContent) {
this._newlines = newlines;
}
}
resetOffset = true;
}
this._node = node;
this._resetOffset = resetOffset;
return this;
}
// Private
/**
* Seeks forward in a text node.
* @param textNode The text node to use.
* @param resetOffset Whether or not the text offset should be reset.
* @returns true if scanning should continue, or false if the scan length has been reached.
*/
_seekTextNodeForward(textNode, resetOffset) {
const nodeValue = textNode.nodeValue;
const nodeValueLength = nodeValue.length;
const [preserveNewlines, preserveWhitespace] = (
this._forcePreserveWhitespace ?
[true, true] :
DOMTextScanner.getWhitespaceSettings(textNode)
);
let lineHasWhitespace = this._lineHasWhitespace;
let lineHasContent = this._lineHasContent;
let content = this._content;
let offset = resetOffset ? 0 : this._offset;
let remainder = this._remainder;
let newlines = this._newlines;
while (offset < nodeValueLength) {
const char = nodeValue[offset];
const charAttributes = DOMTextScanner.getCharacterAttributes(char, preserveNewlines, preserveWhitespace);
++offset;
if (charAttributes === 0) {
// Character should be ignored
continue;
} else if (charAttributes === 1) {
// Character is collapsable whitespace
lineHasWhitespace = true;
} else {
// Character should be added to the content
if (newlines > 0) {
if (content.length > 0) {
const useNewlineCount = Math.min(remainder, newlines);
content += '\n'.repeat(useNewlineCount);
remainder -= useNewlineCount;
newlines -= useNewlineCount;
} else {
newlines = 0;
}
lineHasContent = false;
lineHasWhitespace = false;
if (remainder <= 0) {
--offset; // Revert character offset
break;
}
}
lineHasContent = (charAttributes === 2); // 3 = character is a newline
if (lineHasWhitespace) {
if (lineHasContent) {
content += ' ';
lineHasWhitespace = false;
if (--remainder <= 0) {
--offset; // Revert character offset
break;
}
} else {
lineHasWhitespace = false;
}
}
content += char;
if (--remainder <= 0) { break; }
}
}
this._lineHasWhitespace = lineHasWhitespace;
this._lineHasContent = lineHasContent;
this._content = content;
this._offset = offset;
this._remainder = remainder;
this._newlines = newlines;
return (remainder > 0);
}
/**
* Seeks backward in a text node.
* This function is nearly the same as _seekTextNodeForward, with the following differences:
* - Iteration condition is reversed to check if offset is greater than 0.
* - offset is reset to nodeValueLength instead of 0.
* - offset is decremented instead of incremented.
* - offset is decremented before getting the character.
* - offset is reverted by incrementing instead of decrementing.
* - content string is prepended instead of appended.
* @param textNode The text node to use.
* @param resetOffset Whether or not the text offset should be reset.
* @returns true if scanning should continue, or false if the scan length has been reached.
*/
_seekTextNodeBackward(textNode, resetOffset) {
const nodeValue = textNode.nodeValue;
const nodeValueLength = nodeValue.length;
const [preserveNewlines, preserveWhitespace] = (
this._forcePreserveWhitespace ?
[true, true] :
DOMTextScanner.getWhitespaceSettings(textNode)
);
let lineHasWhitespace = this._lineHasWhitespace;
let lineHasContent = this._lineHasContent;
let content = this._content;
let offset = resetOffset ? nodeValueLength : this._offset;
let remainder = this._remainder;
let newlines = this._newlines;
while (offset > 0) {
--offset;
const char = nodeValue[offset];
const charAttributes = DOMTextScanner.getCharacterAttributes(char, preserveNewlines, preserveWhitespace);
if (charAttributes === 0) {
// Character should be ignored
continue;
} else if (charAttributes === 1) {
// Character is collapsable whitespace
lineHasWhitespace = true;
} else {
// Character should be added to the content
if (newlines > 0) {
if (content.length > 0) {
const useNewlineCount = Math.min(remainder, newlines);
content = '\n'.repeat(useNewlineCount) + content;
remainder -= useNewlineCount;
newlines -= useNewlineCount;
} else {
newlines = 0;
}
lineHasContent = false;
lineHasWhitespace = false;
if (remainder <= 0) {
++offset; // Revert character offset
break;
}
}
lineHasContent = (charAttributes === 2); // 3 = character is a newline
if (lineHasWhitespace) {
if (lineHasContent) {
content = ' ' + content;
lineHasWhitespace = false;
if (--remainder <= 0) {
++offset; // Revert character offset
break;
}
} else {
lineHasWhitespace = false;
}
}
content = char + content;
if (--remainder <= 0) { break; }
}
}
this._lineHasWhitespace = lineHasWhitespace;
this._lineHasContent = lineHasContent;
this._content = content;
this._offset = offset;
this._remainder = remainder;
this._newlines = newlines;
return (remainder > 0);
}
// Static helpers
/**
* Gets the next node in the document for a specified scanning direction.
* @param node The current DOM Node.
* @param forward Whether to scan forward in the document or backward.
* @param visitChildren Whether the children of the current node should be visited.
* @param exitedNodes An array which stores nodes which were exited.
* @returns The next node in the document, or null if there is no next node.
*/
static getNextNode(node, forward, visitChildren, exitedNodes) {
let next = visitChildren ? (forward ? node.firstChild : node.lastChild) : null;
if (next === null) {
while (true) {
exitedNodes.push(node);
next = (forward ? node.nextSibling : node.previousSibling);
if (next !== null) { break; }
next = node.parentNode;
if (next === null) { break; }
node = next;
}
}
return next;
}
/**
* Gets the parent element of a given Node.
* @param node The node to check.
* @returns The parent element if one exists, otherwise null.
*/
static getParentElement(node) {
while (node !== null && node.nodeType !== Node.ELEMENT_NODE) {
node = node.parentNode;
}
return node;
}
/**
* Gets the parent <ruby> element of a given node, if one exists. For efficiency purposes,
* this only checks the immediate parent elements and does not check all ancestors, so
* there are cases where the node may be in a ruby element but it is not returned.
* @param node The node to check.
* @returns A <ruby> node if the input node is contained in one, otherwise null.
*/
static getParentRubyElement(node) {
node = DOMTextScanner.getParentElement(node);
if (node !== null && node.nodeName.toUpperCase() === 'RT') {
node = node.parentNode;
if (node !== null && node.nodeName.toUpperCase() === 'RUBY') {
return node;
}
}
return null;
}
/**
* @returns [enterable: boolean, newlines: integer]
* The enterable value indicates whether the content of this node should be entered.
* The newlines value corresponds to the number of newline characters that should be added.
* 1 newline corresponds to a simple new line in the layout.
* 2 newlines corresponds to a significant visual distinction since the previous content.
*/
static getElementSeekInfo(element) {
let enterable = true;
switch (element.nodeName.toUpperCase()) {
case 'HEAD':
case 'RT':
case 'SCRIPT':
case 'STYLE':
return [false, 0];
case 'BR':
return [false, 1];
case 'TEXTAREA':
case 'INPUT':
case 'BUTTON':
enterable = false;
break;
}
const style = window.getComputedStyle(element);
const display = style.display;
const visible = (display !== 'none' && DOMTextScanner.isStyleVisible(style));
let newlines = 0;
if (!visible) {
enterable = false;
} else {
switch (style.position) {
case 'absolute':
case 'fixed':
case 'sticky':
newlines = 2;
break;
}
if (newlines === 0 && DOMTextScanner.doesCSSDisplayChangeLayout(display)) {
newlines = 1;
}
}
return [enterable, newlines];
}
/**
* Gets information about how whitespace characters are treated.
* @param textNode The Text node to check.
* @returns [preserveNewlines: boolean, preserveWhitespace: boolean]
* The value of preserveNewlines indicates whether or not newline characters are treated as line breaks.
* The value of preserveWhitespace indicates whether or not sequences of whitespace characters are collapsed.
*/
static getWhitespaceSettings(textNode) {
const element = DOMTextScanner.getParentElement(textNode);
if (element !== null) {
const style = window.getComputedStyle(element);
switch (style.whiteSpace) {
case 'pre':
case 'pre-wrap':
case 'break-spaces':
return [true, true];
case 'pre-line':
return [true, false];
}
}
return [false, false];
}
/**
* Gets attributes for the specified character.
* @param character A string containing a single character.
* @returns An integer representing the attributes of the character.
* 0: Character should be ignored.
* 1: Character is collapsable whitespace.
* 2: Character should be added to the content.
* 3: Character should be added to the content and is a newline.
*/
static getCharacterAttributes(character, preserveNewlines, preserveWhitespace) {
switch (character.charCodeAt(0)) {
case 0x09: // Tab ('\t')
case 0x0c: // Form feed ('\f')
case 0x0d: // Carriage return ('\r')
case 0x20: // Space (' ')
return preserveWhitespace ? 2 : 1;
case 0x0a: // Line feed ('\n')
return preserveNewlines ? 3 : 1;
case 0x200c: // Zero-width non-joiner ('\u200c')
return 0;
default: // Other
return 2;
}
}
/**
* Checks whether a given style is visible or not.
* This function does not check style.display === 'none'.
* @param style An object implementing the CSSStyleDeclaration interface.
* @returns true if the style should result in an element being visible, otherwise false.
*/
static isStyleVisible(style) {
return !(
style.visibility === 'hidden' ||
parseFloat(style.opacity) <= 0 ||
parseFloat(style.fontSize) <= 0 ||
(
!DOMTextScanner.isStyleSelectable(style) &&
(
DOMTextScanner.isCSSColorTransparent(style.color) ||
DOMTextScanner.isCSSColorTransparent(style.webkitTextFillColor)
)
)
);
}
/**
* Checks whether a given style is selectable or not.
* @param style An object implementing the CSSStyleDeclaration interface.
* @returns true if the style is selectable, otherwise false.
*/
static isStyleSelectable(style) {
return !(
style.userSelect === 'none' ||
style.webkitUserSelect === 'none' ||
style.MozUserSelect === 'none' ||
style.msUserSelect === 'none'
);
}
/**
* Checks whether a CSS color is transparent or not.
* @param cssColor A CSS color string, expected to be encoded in rgb(a) form.
* @returns true if the color is transparent, otherwise false.
*/
static isCSSColorTransparent(cssColor) {
return (
typeof cssColor === 'string' &&
cssColor.startsWith('rgba(') &&
/,\s*0.?0*\)$/.test(cssColor)
);
}
/**
* Checks whether a CSS display value will cause a layout change for text.
* @param cssDisplay A CSS string corresponding to the value of the display property.
* @returns true if the layout is changed by this value, otherwise false.
*/
static doesCSSDisplayChangeLayout(cssDisplay) {
let pos = cssDisplay.indexOf(' ');
if (pos >= 0) {
// Truncate to <display-outside> part
cssDisplay = cssDisplay.substring(0, pos);
}
pos = cssDisplay.indexOf('-');
if (pos >= 0) {
// Truncate to first part of kebab-case value
cssDisplay = cssDisplay.substring(0, pos);
}
switch (cssDisplay) {
case 'block':
case 'flex':
case 'grid':
case 'list': // list-item
case 'table': // table, table-*
return true;
case 'ruby': // rubt-*
return (pos >= 0);
default:
return false;
}
}
}

View File

@ -0,0 +1,393 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width,initial-scale=1" />
<title>Yomichan DOMTextScanner Tests</title>
<link rel="icon" type="image/gif" href="data:image/gif;base64,R0lGODlhEAAQAKEBAAAAAP///////////yH5BAEKAAIALAAAAAAQABAAAAImFI6Zpt0B4YkS0TCpq07xbmEgcGVRUpLaI46ZG7ppalY0jDCwUAAAOw==" />
<link rel="stylesheet" href="test-stylesheet.css" />
</head>
<body>
<h1>Yomichan DOMTextScanner Tests</h1>
<y-test
data-test-data='{
"node": "div:nth-of-type(1)",
"offset": 0,
"length": 15,
"expected": {
"node": "div:nth-of-type(2)>div::text",
"offset": 3,
"content": "小ぢん\nまり1\n小ぢん\nまり2"
}
}'
>
<y-description>Layout newlines expected due to entering and exiting display:block nodes.</y-description>
<div><div>小ぢん</div>まり1</div>
<div>小ぢん<div>まり2</div></div>
</y-test>
<y-test
data-test-data='{
"node": "div:nth-of-type(1)::text",
"offset": 0,
"length": 13,
"expected": {
"node": "div:nth-of-type(2)::text",
"offset": 6,
"content": "小ぢんまり1\n小ぢんまり2"
}
}'
>
<y-description>Layout newline expected due to sequential display:block elements.</y-description>
<div>小ぢんまり1</div><div>小ぢんまり2</div>
</y-test>
<y-test
data-test-data='{
"node": "div:nth-of-type(1)::text",
"offset": 0,
"length": 13,
"expected": {
"node": "div:nth-of-type(2)::text",
"offset": 6,
"content": "小ぢんまり1\n小ぢんまり2"
}
}'
>
<y-description>Layout newline expected due to sequential display:block elements separated by a newline.</y-description>
<div>小ぢんまり1</div>
<div>小ぢんまり2</div>
</y-test>
<y-test
data-test-data='{
"node": "span:nth-of-type(1)::text",
"offset": 0,
"length": 12,
"expected": {
"node": "span:nth-of-type(2)::text",
"offset": 6,
"content": "小ぢんまり1小ぢんまり2"
}
}'
>
<y-description>No newlines expected due to display:inline.</y-description>
<span>小ぢんまり1</span><span>小ぢんまり2</span>
</y-test>
<y-test
data-test-data='{
"node": "span:nth-of-type(1)::text",
"offset": 0,
"length": 13,
"expected": {
"node": "span:nth-of-type(2)::text",
"offset": 6,
"content": "小ぢんまり1\n小ぢんまり2"
}
}'
>
<y-description>No newlines expected due to white-space:normal.</y-description>
<span>小ぢんまり1</span>
<span>小ぢんまり2</span>
</y-test>
<y-test
data-test-data='{
"node": "span:nth-of-type(1)::text",
"offset": 0,
"length": 13,
"expected": {
"node": "span:nth-of-type(2)::text",
"offset": 6,
"content": "小ぢんまり1\n小ぢんまり2"
}
}'
>
<y-description>Newline expected due to white-space:pre.</y-description>
<pre>
<span>小ぢんまり1</span>
<span>小ぢんまり2</span>
</pre>
</y-test>
<y-test
data-test-data='{
"node": "span:nth-of-type(1)::text",
"offset": 0,
"length": 12,
"expected": {
"node": "span:nth-of-type(2)::text",
"offset": 6,
"content": "小ぢんまり1小ぢんまり2"
}
}'
>
<y-description>No newlines expected due to display:inline-block. Actual layout flow cannot be determined by DOM/CSS alone.</y-description>
<span style="display: inline-block;">小ぢんまり1</span><span style="display: inline-block;">小ぢんまり2</span>
</y-test>
<y-test
style="position: relative;"
data-test-data='{
"node": "div:nth-of-type(1)::text",
"offset": 0,
"length": 13,
"expected": {
"node": "div:nth-of-type(2)::text",
"offset": 6,
"content": "小ぢんまり1\n小ぢんまり2"
}
}'
>
<y-description>Single newline expected due to display:block layout.</y-description>
<div>小ぢんまり1</div><div style="position: relative;">小ぢんまり2</div>
</y-test>
<y-test
style="position: relative; overflow: hidden;"
data-test-data='{
"node": "div:nth-of-type(1)::text",
"offset": 0,
"length": 14,
"expected": {
"node": "div:nth-of-type(2)::text",
"offset": 6,
"content": "小ぢんまり1\n\n小ぢんまり2"
}
}'
>
<y-description>Two newlines expected due to position:absolute causing a significant layout change.</y-description>
<div>小ぢんまり1</div><div style="position: absolute;">小ぢんまり2</div>
</y-test>
<y-test
style="position: relative; overflow: hidden;"
data-test-data='{
"node": "div:nth-of-type(1)::text",
"offset": 0,
"length": 14,
"expected": {
"node": "div:nth-of-type(2)::text",
"offset": 6,
"content": "小ぢんまり1\n\n小ぢんまり2"
}
}'
>
<y-description>Two newlines expected due to position:fixed causing a significant layout change.</y-description>
<div>小ぢんまり1</div><div style="position: fixed;">小ぢんまり2</div>
</y-test>
<y-test
style="position: relative;"
data-test-data='{
"node": "div:nth-of-type(1)::text",
"offset": 0,
"length": 14,
"expected": {
"node": "div:nth-of-type(2)::text",
"offset": 6,
"content": "小ぢんまり1\n\n小ぢんまり2"
}
}'
>
<y-description>Two newlines expected due to position:sticky being able to cause a significant layout change.</y-description>
<div>小ぢんまり1</div><div style="position: sticky;">小ぢんまり2</div>
</y-test>
<y-test
data-test-data='{
"node": "rt",
"offset": 0,
"length": 6,
"expected": {
"node": "div::text",
"offset": 5,
"content": "小ぢんまり1"
}
}'
>
<y-description>Scanning text starting in an &lt;rt&gt; element. Should start scanning at the start of the &lt;ruby&gt; tag instead.</y-description>
<div><ruby><rp>(</rp><rt></rt><rp>)</rp></ruby>ぢんまり1</div>
</y-test>
<y-test
data-test-data='{
"node": "div",
"offset": 0,
"length": 6,
"expected": {
"node": "div::nth-text(2)",
"offset": 3,
"content": "小ぢんまり1"
}
}'
>
<y-description>Skip &lt;script&gt; content.</y-description>
<div>小ぢん<script>/*comment*/</script>まり1</div>
</y-test>
<y-test
data-test-data='{
"node": "div",
"offset": 0,
"length": 6,
"expected": {
"node": "div::nth-text(2)",
"offset": 3,
"content": "小ぢんまり1"
}
}'
>
<y-description>Skip &lt;style&gt; content.</y-description>
<div>小ぢん<style>/*comment*/</style>まり1</div>
</y-test>
<y-test
data-test-data='{
"node": "div",
"offset": 0,
"length": 6,
"expected": {
"node": "div::nth-text(2)",
"offset": 3,
"content": "小ぢんまり1"
}
}'
>
<y-description>Skip &lt;textarea&gt; content.</y-description>
<div>小ぢん<textarea>textarea content</textarea>まり1</div>
</y-test>
<y-test
data-test-data='{
"node": "div",
"offset": 0,
"length": 6,
"expected": {
"node": "div::nth-text(2)",
"offset": 3,
"content": "小ぢんまり1"
}
}'
>
<y-description>Skip &lt;input&gt; content.</y-description>
<div>小ぢん<input value="content" />まり1</div>
</y-test>
<y-test
data-test-data='{
"node": "div",
"offset": 0,
"length": 6,
"expected": {
"node": "div::nth-text(2)",
"offset": 3,
"content": "小ぢんまり1"
}
}'
>
<y-description>Skip &lt;button&gt; content.</y-description>
<div>小ぢん<button>content</button>まり1</div>
</y-test>
<y-test
data-test-data='{
"node": "div",
"offset": 0,
"length": 6,
"expected": {
"node": "div::nth-text(2)",
"offset": 3,
"content": "小ぢんまり1"
}
}'
>
<y-description>Skip content with font-size:0.</y-description>
<div>小ぢん<span style="font-size: 0;">content</span>まり1</div>
</y-test>
<y-test
data-test-data='{
"node": "div",
"offset": 0,
"length": 6,
"expected": {
"node": "div::nth-text(2)",
"offset": 3,
"content": "小ぢんまり1"
}
}'
>
<y-description>Skip content with opacity:0.</y-description>
<div>小ぢん<span style="opacity: 0;">content</span>まり1</div>
</y-test>
<y-test
data-test-data='{
"node": "div",
"offset": 0,
"length": 6,
"expected": {
"node": "div::nth-text(2)",
"offset": 3,
"content": "小ぢんまり1"
}
}'
>
<y-description>Skip content with visibility:hidden.</y-description>
<div>小ぢん<span style="visibility: hidden;">content</span>まり1</div>
</y-test>
<y-test
data-test-data='{
"node": "div",
"offset": 0,
"length": 6,
"expected": {
"node": "div::nth-text(2)",
"offset": 3,
"content": "小ぢんまり1"
}
}'
>
<y-description>Skip content with display:none.</y-description>
<div>小ぢん<span style="display: none;">content</span>まり1</div>
</y-test>
<y-test
data-test-data='{
"node": "div",
"offset": 0,
"length": 6,
"expected": {
"node": "div::nth-text(2)",
"offset": 3,
"content": "小ぢんまり1"
}
}'
>
<y-description>Don't skip content with user-select:none.</y-description>
<div>小ぢ<span style="user-select: none;"></span>まり1</div>
</y-test>
<y-test
data-test-data='{
"node": "div",
"offset": 0,
"length": 6,
"expected": {
"node": "div::nth-text(2)",
"offset": 3,
"content": "小ぢんまり1"
}
}'
>
<y-description>Skip content with user-select:none <em>and</em> a transparent color.</y-description>
<div>小ぢん<span style="user-select: none; color: rgba(0, 0, 0, 0);">content</span>まり1</div>
</y-test>
</body>
</html>

View File

@ -28,7 +28,9 @@ a, a:visited {
text-decoration: underline;
}
.test {
.test,
y-test {
display: block;
background-color: #ffffff;
margin: 1em 0;
padding: 0.5em;
@ -36,7 +38,8 @@ a, a:visited {
border-radius: 4px;
}
.test:before {
.test:before,
y-test:before {
content: "Test " counter(test-id);
display: block;
counter-increment: test-id;
@ -45,7 +48,10 @@ a, a:visited {
font-weight: bold;
}
.description {
.description,
y-description {
color: #444444;
font-style: italic;
display: block;
padding-bottom: 0.5em;
}

View File

@ -0,0 +1,181 @@
/*
* Copyright (C) 2020 Yomichan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
const fs = require('fs');
const path = require('path');
const assert = require('assert');
const {JSDOM} = require('jsdom');
const {VM} = require('./yomichan-vm');
function createJSDOM(fileName) {
const domSource = fs.readFileSync(fileName, {encoding: 'utf8'});
return new JSDOM(domSource);
}
function querySelectorTextNode(element, selector) {
let textIndex = -1;
const match = /::text$|::nth-text\((\d+)\)$/.exec(selector);
if (match !== null) {
textIndex = (match[1] ? parseInt(match[1], 10) - 1 : 0);
selector = selector.substring(0, selector.length - match[0].length);
}
const result = element.querySelector(selector);
if (textIndex < 0) {
return result;
}
for (let n = result.firstChild; n !== null; n = n.nextSibling) {
if (n.nodeType === n.constructor.TEXT_NODE) {
if (textIndex === 0) {
return n;
}
--textIndex;
}
}
return null;
}
function getComputedFontSizeInPixels(window, getComputedStyle, element) {
for (; element !== null; element = element.parentNode) {
if (element.nodeType === window.Node.ELEMENT_NODE) {
const fontSize = getComputedStyle(element).fontSize;
if (fontSize.endsWith('px')) {
const value = parseFloat(fontSize.substring(0, fontSize.length - 2));
return value;
}
}
}
const defaultFontSize = 14;
return defaultFontSize;
}
function createAbsoluteGetComputedStyle(window) {
// Wrapper to convert em units to px units
const getComputedStyleOld = window.getComputedStyle.bind(window);
return (element, ...args) => {
const style = getComputedStyleOld(element, ...args);
return new Proxy(style, {
get: (target, property) => {
let result = target[property];
if (typeof result === 'string') {
result = result.replace(/([-+]?\d(?:\.\d)?(?:[eE][-+]?\d+)?)em/g, (g0, g1) => {
const fontSize = getComputedFontSizeInPixels(window, getComputedStyleOld, element);
return `${parseFloat(g1) * fontSize}px`;
});
}
return result;
}
});
};
}
async function testDomTextScanner(dom, {DOMTextScanner}) {
const document = dom.window.document;
for (const testElement of document.querySelectorAll('y-test')) {
let testData = JSON.parse(testElement.dataset.testData);
if (!Array.isArray(testData)) {
testData = [testData];
}
for (const testDataItem of testData) {
let {
node,
offset,
length,
forcePreserveWhitespace,
generateLayoutContent,
reversible,
expected: {
node: expectedNode,
offset: expectedOffset,
content: expectedContent
}
} = testDataItem;
node = querySelectorTextNode(testElement, node);
expectedNode = querySelectorTextNode(testElement, expectedNode);
// Standard test
{
const scanner = new DOMTextScanner(node, offset, forcePreserveWhitespace, generateLayoutContent);
scanner.seek(length);
const {node: actualNode1, offset: actualOffset1, content: actualContent1} = scanner;
assert.strictEqual(actualContent1, expectedContent);
assert.strictEqual(actualOffset1, expectedOffset);
assert.strictEqual(actualNode1, expectedNode);
}
// Substring tests
for (let i = 1; i <= length; ++i) {
const scanner = new DOMTextScanner(node, offset, forcePreserveWhitespace, generateLayoutContent);
scanner.seek(length - i);
const {content: actualContent} = scanner;
assert.strictEqual(actualContent, expectedContent.substring(0, expectedContent.length - i));
}
if (reversible === false) { continue; }
// Reversed test
{
const scanner = new DOMTextScanner(expectedNode, expectedOffset, forcePreserveWhitespace, generateLayoutContent);
scanner.seek(-length);
const {content: actualContent} = scanner;
assert.strictEqual(actualContent, expectedContent);
}
// Reversed substring tests
for (let i = 1; i <= length; ++i) {
const scanner = new DOMTextScanner(expectedNode, expectedOffset, forcePreserveWhitespace, generateLayoutContent);
scanner.seek(-(length - i));
const {content: actualContent} = scanner;
assert.strictEqual(actualContent, expectedContent.substring(i));
}
}
}
}
async function testDocument1() {
const dom = createJSDOM(path.join(__dirname, 'data', 'html', 'test-dom-text-scanner.html'));
const window = dom.window;
try {
const {document, Node, Range} = window;
window.getComputedStyle = createAbsoluteGetComputedStyle(window);
const vm = new VM({document, window, Range, Node});
vm.execute('fg/js/dom-text-scanner.js');
const DOMTextScanner = vm.get('DOMTextScanner');
await testDomTextScanner(dom, {DOMTextScanner});
} finally {
window.close();
}
}
async function main() {
await testDocument1();
}
if (require.main === module) { main(); }