Use DOMTextScanner (#536)

* Use DOMTextScanner instead of TextSourceRange.seek*

* Move getNodesInRange to dom.js

* Move anyNodeMatchesSelector to dom.js

* Remove unused functions

* Update tests

* Add layoutAwareScan option

* Use layoutAwareScan for source and sentence scanning

* Remove unused IGNORE_TEXT_PATTERN
This commit is contained in:
toasted-nutbread 2020-06-21 16:07:51 -04:00 committed by GitHub
parent 4ebee3e17c
commit e23504613f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 102 additions and 247 deletions

View File

@ -321,7 +321,8 @@
"enablePopupSearch", "enablePopupSearch",
"enableOnPopupExpressions", "enableOnPopupExpressions",
"enableOnSearchPage", "enableOnSearchPage",
"enableSearchTags" "enableSearchTags",
"layoutAwareScan"
], ],
"properties": { "properties": {
"middleMouse": { "middleMouse": {
@ -383,6 +384,10 @@
"enableSearchTags": { "enableSearchTags": {
"type": "boolean", "type": "boolean",
"default": false "default": false
},
"layoutAwareScan": {
"type": "boolean",
"default": false
} }
} }
}, },

View File

@ -203,7 +203,8 @@ function profileOptionsCreateDefaults() {
enablePopupSearch: false, enablePopupSearch: false,
enableOnPopupExpressions: false, enableOnPopupExpressions: false,
enableOnSearchPage: true, enableOnSearchPage: true,
enableSearchTags: false enableSearchTags: false,
layoutAwareScan: false
}, },
translation: { translation: {

View File

@ -75,15 +75,17 @@ class QueryParser {
async _search(textSource, cause) { async _search(textSource, cause) {
if (textSource === null) { return null; } if (textSource === null) { return null; }
const searchText = this._textScanner.getTextSourceContent(textSource, this._options.scanning.length); const {length: scanLength, layoutAwareScan} = this._options.scanning;
const searchText = this._textScanner.getTextSourceContent(textSource, scanLength, layoutAwareScan);
if (searchText.length === 0) { return null; } if (searchText.length === 0) { return null; }
const {definitions, length} = await api.termsFind(searchText, {}, this._getOptionsContext()); const {definitions, length} = await api.termsFind(searchText, {}, this._getOptionsContext());
if (definitions.length === 0) { return null; } if (definitions.length === 0) { return null; }
const sentence = docSentenceExtract(textSource, this._options.anki.sentenceExt); const sentenceExtent = this._options.anki.sentenceExt;
const sentence = docSentenceExtract(textSource, sentenceExtent, layoutAwareScan);
textSource.setEndOffset(length); textSource.setEndOffset(length, layoutAwareScan);
this._setContent('terms', {definitions, context: { this._setContent('terms', {definitions, context: {
focus: false, focus: false,

View File

@ -79,6 +79,7 @@
<script src="/bg/js/dictionary.js"></script> <script src="/bg/js/dictionary.js"></script>
<script src="/bg/js/handlebars.js"></script> <script src="/bg/js/handlebars.js"></script>
<script src="/fg/js/document.js"></script> <script src="/fg/js/document.js"></script>
<script src="/fg/js/dom-text-scanner.js"></script>
<script src="/fg/js/source.js"></script> <script src="/fg/js/source.js"></script>
<script src="/mixed/js/audio-system.js"></script> <script src="/mixed/js/audio-system.js"></script>
<script src="/mixed/js/display-context.js"></script> <script src="/mixed/js/display-context.js"></script>

View File

@ -126,6 +126,7 @@
<script src="/mixed/js/text-scanner.js"></script> <script src="/mixed/js/text-scanner.js"></script>
<script src="/fg/js/document.js"></script> <script src="/fg/js/document.js"></script>
<script src="/fg/js/dom-text-scanner.js"></script>
<script src="/fg/js/popup.js"></script> <script src="/fg/js/popup.js"></script>
<script src="/fg/js/source.js"></script> <script src="/fg/js/source.js"></script>
<script src="/fg/js/popup-factory.js"></script> <script src="/fg/js/popup-factory.js"></script>

View File

@ -400,6 +400,10 @@
<label><input type="checkbox" id="auto-hide-results" data-setting="scanning.autoHideResults"> Automatically hide results</label> <label><input type="checkbox" id="auto-hide-results" data-setting="scanning.autoHideResults"> Automatically hide results</label>
</div> </div>
<div class="checkbox">
<label><input type="checkbox" id="layout-aware-scan" data-setting="scanning.layoutAwareScan"> Layout-aware scan</label>
</div>
<div class="checkbox options-advanced"> <div class="checkbox options-advanced">
<label><input type="checkbox" id="deep-dom-scan" data-setting="scanning.deepDomScan"> Deep DOM scan</label> <label><input type="checkbox" id="deep-dom-scan" data-setting="scanning.deepDomScan"> Deep DOM scan</label>
</div> </div>

View File

@ -46,6 +46,7 @@
<script src="/mixed/js/japanese.js"></script> <script src="/mixed/js/japanese.js"></script>
<script src="/fg/js/document.js"></script> <script src="/fg/js/document.js"></script>
<script src="/fg/js/dom-text-scanner.js"></script>
<script src="/fg/js/source.js"></script> <script src="/fg/js/source.js"></script>
<script src="/mixed/js/audio-system.js"></script> <script src="/mixed/js/audio-system.js"></script>
<script src="/mixed/js/display-context.js"></script> <script src="/mixed/js/display-context.js"></script>

View File

@ -17,6 +17,7 @@
/* global /* global
* DOM * DOM
* DOMTextScanner
* TextSourceElement * TextSourceElement
* TextSourceRange * TextSourceRange
*/ */
@ -152,14 +153,14 @@ function docRangeFromPoint(x, y, deepDomScan) {
} }
} }
function docSentenceExtract(source, extent) { function docSentenceExtract(source, extent, layoutAwareScan) {
const quotesFwd = {'「': '」', '『': '』', "'": "'", '"': '"'}; const quotesFwd = {'「': '」', '『': '』', "'": "'", '"': '"'};
const quotesBwd = {'」': '「', '』': '『', "'": "'", '"': '"'}; const quotesBwd = {'」': '「', '』': '『', "'": "'", '"': '"'};
const terminators = '…。..?!'; const terminators = '…。..?!';
const sourceLocal = source.clone(); const sourceLocal = source.clone();
const position = sourceLocal.setStartOffset(extent); const position = sourceLocal.setStartOffset(extent, layoutAwareScan);
sourceLocal.setEndOffset(extent * 2 - position, true); sourceLocal.setEndOffset(extent * 2 - position, layoutAwareScan, true);
const content = sourceLocal.text(); const content = sourceLocal.text();
let quoteStack = []; let quoteStack = [];
@ -232,7 +233,7 @@ function isPointInRange(x, y, range) {
const nodePre = range.endContainer; const nodePre = range.endContainer;
const offsetPre = range.endOffset; const offsetPre = range.endOffset;
try { try {
const {node, offset, content} = TextSourceRange.seekForward(range.endContainer, range.endOffset, 1); const {node, offset, content} = new DOMTextScanner(range.endContainer, range.endOffset, true, false).seek(1);
range.setEnd(node, offset); range.setEnd(node, offset);
if (!isWhitespace(content) && DOM.isPointInAnyRect(x, y, range.getClientRects())) { if (!isWhitespace(content) && DOM.isPointInAnyRect(x, y, range.getClientRects())) {
@ -243,7 +244,7 @@ function isPointInRange(x, y, range) {
} }
// Scan backward // Scan backward
const {node, offset, content} = TextSourceRange.seekBackward(range.startContainer, range.startOffset, 1); const {node, offset, content} = new DOMTextScanner(range.startContainer, range.startOffset, true, false).seek(-1);
range.setStart(node, offset); range.setStart(node, offset);
if (!isWhitespace(content) && DOM.isPointInAnyRect(x, y, range.getClientRects())) { if (!isWhitespace(content) && DOM.isPointInAnyRect(x, y, range.getClientRects())) {

View File

@ -258,32 +258,36 @@ class Frontend {
} }
async _findTerms(textSource, optionsContext) { async _findTerms(textSource, optionsContext) {
const searchText = this._textScanner.getTextSourceContent(textSource, this._options.scanning.length); const {length: scanLength, layoutAwareScan} = this._options.scanning;
const searchText = this._textScanner.getTextSourceContent(textSource, scanLength, layoutAwareScan);
if (searchText.length === 0) { return null; } if (searchText.length === 0) { return null; }
const {definitions, length} = await api.termsFind(searchText, {}, optionsContext); const {definitions, length} = await api.termsFind(searchText, {}, optionsContext);
if (definitions.length === 0) { return null; } if (definitions.length === 0) { return null; }
textSource.setEndOffset(length); textSource.setEndOffset(length, layoutAwareScan);
return {definitions, type: 'terms'}; return {definitions, type: 'terms'};
} }
async _findKanji(textSource, optionsContext) { async _findKanji(textSource, optionsContext) {
const searchText = this._textScanner.getTextSourceContent(textSource, 1); const layoutAwareScan = this._options.scanning.layoutAwareScan;
const searchText = this._textScanner.getTextSourceContent(textSource, 1, layoutAwareScan);
if (searchText.length === 0) { return null; } if (searchText.length === 0) { return null; }
const definitions = await api.kanjiFind(searchText, optionsContext); const definitions = await api.kanjiFind(searchText, optionsContext);
if (definitions.length === 0) { return null; } if (definitions.length === 0) { return null; }
textSource.setEndOffset(1); textSource.setEndOffset(1, layoutAwareScan);
return {definitions, type: 'kanji'}; return {definitions, type: 'kanji'};
} }
_showContent(textSource, focus, definitions, type, optionsContext) { _showContent(textSource, focus, definitions, type, optionsContext) {
const {url} = optionsContext; const {url} = optionsContext;
const sentence = docSentenceExtract(textSource, this._options.anki.sentenceExt); const sentenceExtent = this._options.anki.sentenceExt;
const layoutAwareScan = this._options.scanning.layoutAwareScan;
const sentence = docSentenceExtract(textSource, sentenceExtent, layoutAwareScan);
this._showPopupContent( this._showPopupContent(
textSource, textSource,
optionsContext, optionsContext,

View File

@ -15,9 +15,9 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>. * along with this program. If not, see <https://www.gnu.org/licenses/>.
*/ */
// \u200c (Zero-width non-joiner) appears on Google Docs from Chrome 76 onwards /* global
const IGNORE_TEXT_PATTERN = /\u200c/; * DOMTextScanner
*/
/* /*
* TextSourceRange * TextSourceRange
@ -46,19 +46,19 @@ class TextSourceRange {
return this.content; return this.content;
} }
setEndOffset(length, fromEnd=false) { setEndOffset(length, layoutAwareScan, fromEnd=false) {
const state = ( const state = (
fromEnd ? fromEnd ?
TextSourceRange.seekForward(this.range.endContainer, this.range.endOffset, length) : new DOMTextScanner(this.range.endContainer, this.range.endOffset, !layoutAwareScan, layoutAwareScan).seek(length) :
TextSourceRange.seekForward(this.range.startContainer, this.range.startOffset, length) new DOMTextScanner(this.range.startContainer, this.range.startOffset, !layoutAwareScan, layoutAwareScan).seek(length)
); );
this.range.setEnd(state.node, state.offset); this.range.setEnd(state.node, state.offset);
this.content = (fromEnd ? this.content + state.content : state.content); this.content = (fromEnd ? this.content + state.content : state.content);
return length - state.remainder; return length - state.remainder;
} }
setStartOffset(length) { setStartOffset(length, layoutAwareScan) {
const state = TextSourceRange.seekBackward(this.range.startContainer, this.range.startOffset, length); const state = new DOMTextScanner(this.range.startContainer, this.range.startOffset, !layoutAwareScan, layoutAwareScan).seek(-length);
this.range.setStart(state.node, state.offset); this.range.setStart(state.node, state.offset);
this.rangeStartOffset = this.range.startOffset; this.rangeStartOffset = this.range.startOffset;
this.content = state.content + this.content; this.content = state.content + this.content;
@ -110,154 +110,6 @@ class TextSourceRange {
} }
} }
static shouldEnter(node) {
switch (node.nodeName.toUpperCase()) {
case 'RT':
case 'SCRIPT':
case 'STYLE':
return false;
}
const style = window.getComputedStyle(node);
return !(
style.visibility === 'hidden' ||
style.display === 'none' ||
parseFloat(style.fontSize) === 0
);
}
static getRubyElement(node) {
node = TextSourceRange.getParentElement(node);
if (node !== null && node.nodeName.toUpperCase() === 'RT') {
node = node.parentNode;
return (node !== null && node.nodeName.toUpperCase() === 'RUBY') ? node : null;
}
return null;
}
static seekForward(node, offset, length) {
const state = {node, offset, remainder: length, content: ''};
if (length <= 0) {
return state;
}
const TEXT_NODE = Node.TEXT_NODE;
const ELEMENT_NODE = Node.ELEMENT_NODE;
let resetOffset = false;
const ruby = TextSourceRange.getRubyElement(node);
if (ruby !== null) {
node = ruby;
resetOffset = true;
}
while (node !== null) {
let visitChildren = true;
const nodeType = node.nodeType;
if (nodeType === TEXT_NODE) {
state.node = node;
if (TextSourceRange.seekForwardTextNode(state, resetOffset)) {
break;
}
resetOffset = true;
} else if (nodeType === ELEMENT_NODE) {
visitChildren = TextSourceRange.shouldEnter(node);
}
node = TextSourceRange.getNextNode(node, visitChildren);
}
return state;
}
static seekForwardTextNode(state, resetOffset) {
const nodeValue = state.node.nodeValue;
const nodeValueLength = nodeValue.length;
let content = state.content;
let offset = resetOffset ? 0 : state.offset;
let remainder = state.remainder;
let result = false;
for (; offset < nodeValueLength; ++offset) {
const c = nodeValue[offset];
if (!IGNORE_TEXT_PATTERN.test(c)) {
content += c;
if (--remainder <= 0) {
result = true;
++offset;
break;
}
}
}
state.offset = offset;
state.content = content;
state.remainder = remainder;
return result;
}
static seekBackward(node, offset, length) {
const state = {node, offset, remainder: length, content: ''};
if (length <= 0) {
return state;
}
const TEXT_NODE = Node.TEXT_NODE;
const ELEMENT_NODE = Node.ELEMENT_NODE;
let resetOffset = false;
const ruby = TextSourceRange.getRubyElement(node);
if (ruby !== null) {
node = ruby;
resetOffset = true;
}
while (node !== null) {
let visitChildren = true;
const nodeType = node.nodeType;
if (nodeType === TEXT_NODE) {
state.node = node;
if (TextSourceRange.seekBackwardTextNode(state, resetOffset)) {
break;
}
resetOffset = true;
} else if (nodeType === ELEMENT_NODE) {
visitChildren = TextSourceRange.shouldEnter(node);
}
node = TextSourceRange.getPreviousNode(node, visitChildren);
}
return state;
}
static seekBackwardTextNode(state, resetOffset) {
const nodeValue = state.node.nodeValue;
let content = state.content;
let offset = resetOffset ? nodeValue.length : state.offset;
let remainder = state.remainder;
let result = false;
for (; offset > 0; --offset) {
const c = nodeValue[offset - 1];
if (!IGNORE_TEXT_PATTERN.test(c)) {
content = c + content;
if (--remainder <= 0) {
result = true;
--offset;
break;
}
}
}
state.offset = offset;
state.content = content;
state.remainder = remainder;
return result;
}
static getParentElement(node) { static getParentElement(node) {
while (node !== null && node.nodeType !== Node.ELEMENT_NODE) { while (node !== null && node.nodeType !== Node.ELEMENT_NODE) {
node = node.parentNode; node = node.parentNode;
@ -290,66 +142,6 @@ class TextSourceRange {
return writingMode; return writingMode;
} }
} }
static getNodesInRange(range) {
const end = range.endContainer;
const nodes = [];
for (let node = range.startContainer; node !== null; node = TextSourceRange.getNextNode(node, true)) {
nodes.push(node);
if (node === end) { break; }
}
return nodes;
}
static getNextNode(node, visitChildren) {
let next = visitChildren ? node.firstChild : null;
if (next === null) {
while (true) {
next = node.nextSibling;
if (next !== null) { break; }
next = node.parentNode;
if (next === null) { break; }
node = next;
}
}
return next;
}
static getPreviousNode(node, visitChildren) {
let next = visitChildren ? node.lastChild : null;
if (next === null) {
while (true) {
next = node.previousSibling;
if (next !== null) { break; }
next = node.parentNode;
if (next === null) { break; }
node = next;
}
}
return next;
}
static anyNodeMatchesSelector(nodeList, selector) {
for (const node of nodeList) {
if (TextSourceRange.nodeMatchesSelector(node, selector)) {
return true;
}
}
return false;
}
static nodeMatchesSelector(node, selector) {
for (; node !== null; node = node.parentNode) {
if (node.nodeType === Node.ELEMENT_NODE) {
return node.matches(selector);
}
}
return false;
}
} }

View File

@ -42,6 +42,7 @@
"mixed/js/dynamic-loader.js", "mixed/js/dynamic-loader.js",
"mixed/js/text-scanner.js", "mixed/js/text-scanner.js",
"fg/js/document.js", "fg/js/document.js",
"fg/js/dom-text-scanner.js",
"fg/js/popup.js", "fg/js/popup.js",
"fg/js/source.js", "fg/js/source.js",
"fg/js/popup-factory.js", "fg/js/popup-factory.js",

View File

@ -236,7 +236,9 @@ class Display {
const {textSource, definitions} = termLookupResults; const {textSource, definitions} = termLookupResults;
const scannedElement = e.target; const scannedElement = e.target;
const sentence = docSentenceExtract(textSource, this.options.anki.sentenceExt); const sentenceExtent = this.options.anki.sentenceExt;
const layoutAwareScan = this.options.scanning.layoutAwareScan;
const sentence = docSentenceExtract(textSource, sentenceExtent, layoutAwareScan);
this.context.update({ this.context.update({
index: this.entryIndexFind(scannedElement), index: this.entryIndexFind(scannedElement),
@ -273,21 +275,22 @@ class Display {
try { try {
e.preventDefault(); e.preventDefault();
const textSource = docRangeFromPoint(e.clientX, e.clientY, this.options.scanning.deepDomScan); const {length: scanLength, deepDomScan: deepScan, layoutAwareScan} = this.options.scanning;
const textSource = docRangeFromPoint(e.clientX, e.clientY, deepScan);
if (textSource === null) { if (textSource === null) {
return false; return false;
} }
let definitions, length; let definitions, length;
try { try {
textSource.setEndOffset(this.options.scanning.length); textSource.setEndOffset(scanLength, layoutAwareScan);
({definitions, length} = await api.termsFind(textSource.text(), {}, this.getOptionsContext())); ({definitions, length} = await api.termsFind(textSource.text(), {}, this.getOptionsContext()));
if (definitions.length === 0) { if (definitions.length === 0) {
return false; return false;
} }
textSource.setEndOffset(length); textSource.setEndOffset(length, layoutAwareScan);
} finally { } finally {
textSource.cleanup(); textSource.cleanup();
} }

View File

@ -86,4 +86,42 @@ class DOM {
null null
); );
} }
static getNodesInRange(range) {
const end = range.endContainer;
const nodes = [];
for (let node = range.startContainer; node !== null; node = DOM.getNextNode(node)) {
nodes.push(node);
if (node === end) { break; }
}
return nodes;
}
static getNextNode(node) {
let next = node.firstChild;
if (next === null) {
while (true) {
next = node.nextSibling;
if (next !== null) { break; }
next = node.parentNode;
if (next === null) { break; }
node = next;
}
}
return next;
}
static anyNodeMatchesSelector(nodes, selector) {
const ELEMENT_NODE = Node.ELEMENT_NODE;
for (let node of nodes) {
for (; node !== null; node = node.parentNode) {
if (node.nodeType !== ELEMENT_NODE) { continue; }
if (node.matches(selector)) { return true; }
break;
}
}
return false;
}
} }

View File

@ -17,7 +17,6 @@
/* global /* global
* DOM * DOM
* TextSourceRange
* docRangeFromPoint * docRangeFromPoint
*/ */
@ -119,20 +118,20 @@ class TextScanner extends EventDispatcher {
} }
} }
getTextSourceContent(textSource, length) { getTextSourceContent(textSource, length, layoutAwareScan) {
const clonedTextSource = textSource.clone(); const clonedTextSource = textSource.clone();
clonedTextSource.setEndOffset(length); clonedTextSource.setEndOffset(length, layoutAwareScan);
if (this._ignoreNodes !== null && clonedTextSource.range) { if (this._ignoreNodes !== null && clonedTextSource.range) {
length = clonedTextSource.text().length; length = clonedTextSource.text().length;
while (clonedTextSource.range && length > 0) { while (clonedTextSource.range && length > 0) {
const nodes = TextSourceRange.getNodesInRange(clonedTextSource.range); const nodes = DOM.getNodesInRange(clonedTextSource.range);
if (!TextSourceRange.anyNodeMatchesSelector(nodes, this._ignoreNodes)) { if (!DOM.anyNodeMatchesSelector(nodes, this._ignoreNodes)) {
break; break;
} }
--length; --length;
clonedTextSource.setEndOffset(length); clonedTextSource.setEndOffset(length, layoutAwareScan);
} }
} }

View File

@ -94,10 +94,12 @@ async function testDocument1() {
const vm = new VM({document, window, Range, Node}); const vm = new VM({document, window, Range, Node});
vm.execute([ vm.execute([
'mixed/js/dom.js', 'mixed/js/dom.js',
'fg/js/dom-text-scanner.js',
'fg/js/source.js', 'fg/js/source.js',
'fg/js/document.js' 'fg/js/document.js'
]); ]);
const [TextSourceRange, TextSourceElement, docRangeFromPoint, docSentenceExtract] = vm.get([ const [DOMTextScanner, TextSourceRange, TextSourceElement, docRangeFromPoint, docSentenceExtract] = vm.get([
'DOMTextScanner',
'TextSourceRange', 'TextSourceRange',
'TextSourceElement', 'TextSourceElement',
'docRangeFromPoint', 'docRangeFromPoint',
@ -106,7 +108,7 @@ async function testDocument1() {
try { try {
await testDocumentTextScanningFunctions(dom, {docRangeFromPoint, docSentenceExtract, TextSourceRange, TextSourceElement}); await testDocumentTextScanningFunctions(dom, {docRangeFromPoint, docSentenceExtract, TextSourceRange, TextSourceElement});
await testTextSourceRangeSeekFunctions(dom, {TextSourceRange}); await testTextSourceRangeSeekFunctions(dom, {DOMTextScanner});
} finally { } finally {
window.close(); window.close();
} }
@ -179,7 +181,7 @@ async function testDocumentTextScanningFunctions(dom, {docRangeFromPoint, docSen
if (source === null) { continue; } if (source === null) { continue; }
// Test docSentenceExtract // Test docSentenceExtract
const sentenceActual = docSentenceExtract(source, sentenceExtent).text; const sentenceActual = docSentenceExtract(source, sentenceExtent, false).text;
assert.strictEqual(sentenceActual, sentence); assert.strictEqual(sentenceActual, sentence);
// Clean // Clean
@ -187,7 +189,7 @@ async function testDocumentTextScanningFunctions(dom, {docRangeFromPoint, docSen
} }
} }
async function testTextSourceRangeSeekFunctions(dom, {TextSourceRange}) { async function testTextSourceRangeSeekFunctions(dom, {DOMTextScanner}) {
const document = dom.window.document; const document = dom.window.document;
for (const testElement of document.querySelectorAll('.test[data-test-type=text-source-range-seek]')) { for (const testElement of document.querySelectorAll('.test[data-test-type=text-source-range-seek]')) {
@ -220,8 +222,8 @@ async function testTextSourceRangeSeekFunctions(dom, {TextSourceRange}) {
const {node, offset, content} = ( const {node, offset, content} = (
seekDirection === 'forward' ? seekDirection === 'forward' ?
TextSourceRange.seekForward(seekNode, seekOffset, seekLength) : new DOMTextScanner(seekNode, seekOffset, true, false).seek(seekLength) :
TextSourceRange.seekBackward(seekNode, seekOffset, seekLength) new DOMTextScanner(seekNode, seekOffset, true, false).seek(-seekLength)
); );
assert.strictEqual(node, expectedResultNode); assert.strictEqual(node, expectedResultNode);