Use DOMTextScanner (#536)

* Use DOMTextScanner instead of TextSourceRange.seek*

* Move getNodesInRange to dom.js

* Move anyNodeMatchesSelector to dom.js

* Remove unused functions

* Update tests

* Add layoutAwareScan option

* Use layoutAwareScan for source and sentence scanning

* Remove unused IGNORE_TEXT_PATTERN
This commit is contained in:
toasted-nutbread 2020-06-21 16:07:51 -04:00 committed by GitHub
parent 4ebee3e17c
commit e23504613f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 102 additions and 247 deletions

View File

@ -321,7 +321,8 @@
"enablePopupSearch",
"enableOnPopupExpressions",
"enableOnSearchPage",
"enableSearchTags"
"enableSearchTags",
"layoutAwareScan"
],
"properties": {
"middleMouse": {
@ -383,6 +384,10 @@
"enableSearchTags": {
"type": "boolean",
"default": false
},
"layoutAwareScan": {
"type": "boolean",
"default": false
}
}
},

View File

@ -203,7 +203,8 @@ function profileOptionsCreateDefaults() {
enablePopupSearch: false,
enableOnPopupExpressions: false,
enableOnSearchPage: true,
enableSearchTags: false
enableSearchTags: false,
layoutAwareScan: false
},
translation: {

View File

@ -75,15 +75,17 @@ class QueryParser {
async _search(textSource, cause) {
if (textSource === null) { return null; }
const searchText = this._textScanner.getTextSourceContent(textSource, this._options.scanning.length);
const {length: scanLength, layoutAwareScan} = this._options.scanning;
const searchText = this._textScanner.getTextSourceContent(textSource, scanLength, layoutAwareScan);
if (searchText.length === 0) { return null; }
const {definitions, length} = await api.termsFind(searchText, {}, this._getOptionsContext());
if (definitions.length === 0) { return null; }
const sentence = docSentenceExtract(textSource, this._options.anki.sentenceExt);
const sentenceExtent = this._options.anki.sentenceExt;
const sentence = docSentenceExtract(textSource, sentenceExtent, layoutAwareScan);
textSource.setEndOffset(length);
textSource.setEndOffset(length, layoutAwareScan);
this._setContent('terms', {definitions, context: {
focus: false,

View File

@ -79,6 +79,7 @@
<script src="/bg/js/dictionary.js"></script>
<script src="/bg/js/handlebars.js"></script>
<script src="/fg/js/document.js"></script>
<script src="/fg/js/dom-text-scanner.js"></script>
<script src="/fg/js/source.js"></script>
<script src="/mixed/js/audio-system.js"></script>
<script src="/mixed/js/display-context.js"></script>

View File

@ -126,6 +126,7 @@
<script src="/mixed/js/text-scanner.js"></script>
<script src="/fg/js/document.js"></script>
<script src="/fg/js/dom-text-scanner.js"></script>
<script src="/fg/js/popup.js"></script>
<script src="/fg/js/source.js"></script>
<script src="/fg/js/popup-factory.js"></script>

View File

@ -400,6 +400,10 @@
<label><input type="checkbox" id="auto-hide-results" data-setting="scanning.autoHideResults"> Automatically hide results</label>
</div>
<div class="checkbox">
<label><input type="checkbox" id="layout-aware-scan" data-setting="scanning.layoutAwareScan"> Layout-aware scan</label>
</div>
<div class="checkbox options-advanced">
<label><input type="checkbox" id="deep-dom-scan" data-setting="scanning.deepDomScan"> Deep DOM scan</label>
</div>

View File

@ -46,6 +46,7 @@
<script src="/mixed/js/japanese.js"></script>
<script src="/fg/js/document.js"></script>
<script src="/fg/js/dom-text-scanner.js"></script>
<script src="/fg/js/source.js"></script>
<script src="/mixed/js/audio-system.js"></script>
<script src="/mixed/js/display-context.js"></script>

View File

@ -17,6 +17,7 @@
/* global
* DOM
* DOMTextScanner
* TextSourceElement
* TextSourceRange
*/
@ -152,14 +153,14 @@ function docRangeFromPoint(x, y, deepDomScan) {
}
}
function docSentenceExtract(source, extent) {
function docSentenceExtract(source, extent, layoutAwareScan) {
const quotesFwd = {'「': '」', '『': '』', "'": "'", '"': '"'};
const quotesBwd = {'」': '「', '』': '『', "'": "'", '"': '"'};
const terminators = '…。..?!';
const sourceLocal = source.clone();
const position = sourceLocal.setStartOffset(extent);
sourceLocal.setEndOffset(extent * 2 - position, true);
const position = sourceLocal.setStartOffset(extent, layoutAwareScan);
sourceLocal.setEndOffset(extent * 2 - position, layoutAwareScan, true);
const content = sourceLocal.text();
let quoteStack = [];
@ -232,7 +233,7 @@ function isPointInRange(x, y, range) {
const nodePre = range.endContainer;
const offsetPre = range.endOffset;
try {
const {node, offset, content} = TextSourceRange.seekForward(range.endContainer, range.endOffset, 1);
const {node, offset, content} = new DOMTextScanner(range.endContainer, range.endOffset, true, false).seek(1);
range.setEnd(node, offset);
if (!isWhitespace(content) && DOM.isPointInAnyRect(x, y, range.getClientRects())) {
@ -243,7 +244,7 @@ function isPointInRange(x, y, range) {
}
// Scan backward
const {node, offset, content} = TextSourceRange.seekBackward(range.startContainer, range.startOffset, 1);
const {node, offset, content} = new DOMTextScanner(range.startContainer, range.startOffset, true, false).seek(-1);
range.setStart(node, offset);
if (!isWhitespace(content) && DOM.isPointInAnyRect(x, y, range.getClientRects())) {

View File

@ -258,32 +258,36 @@ class Frontend {
}
async _findTerms(textSource, optionsContext) {
const searchText = this._textScanner.getTextSourceContent(textSource, this._options.scanning.length);
const {length: scanLength, layoutAwareScan} = this._options.scanning;
const searchText = this._textScanner.getTextSourceContent(textSource, scanLength, layoutAwareScan);
if (searchText.length === 0) { return null; }
const {definitions, length} = await api.termsFind(searchText, {}, optionsContext);
if (definitions.length === 0) { return null; }
textSource.setEndOffset(length);
textSource.setEndOffset(length, layoutAwareScan);
return {definitions, type: 'terms'};
}
async _findKanji(textSource, optionsContext) {
const searchText = this._textScanner.getTextSourceContent(textSource, 1);
const layoutAwareScan = this._options.scanning.layoutAwareScan;
const searchText = this._textScanner.getTextSourceContent(textSource, 1, layoutAwareScan);
if (searchText.length === 0) { return null; }
const definitions = await api.kanjiFind(searchText, optionsContext);
if (definitions.length === 0) { return null; }
textSource.setEndOffset(1);
textSource.setEndOffset(1, layoutAwareScan);
return {definitions, type: 'kanji'};
}
_showContent(textSource, focus, definitions, type, optionsContext) {
const {url} = optionsContext;
const sentence = docSentenceExtract(textSource, this._options.anki.sentenceExt);
const sentenceExtent = this._options.anki.sentenceExt;
const layoutAwareScan = this._options.scanning.layoutAwareScan;
const sentence = docSentenceExtract(textSource, sentenceExtent, layoutAwareScan);
this._showPopupContent(
textSource,
optionsContext,

View File

@ -15,9 +15,9 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
// \u200c (Zero-width non-joiner) appears on Google Docs from Chrome 76 onwards
const IGNORE_TEXT_PATTERN = /\u200c/;
/* global
* DOMTextScanner
*/
/*
* TextSourceRange
@ -46,19 +46,19 @@ class TextSourceRange {
return this.content;
}
setEndOffset(length, fromEnd=false) {
setEndOffset(length, layoutAwareScan, fromEnd=false) {
const state = (
fromEnd ?
TextSourceRange.seekForward(this.range.endContainer, this.range.endOffset, length) :
TextSourceRange.seekForward(this.range.startContainer, this.range.startOffset, length)
new DOMTextScanner(this.range.endContainer, this.range.endOffset, !layoutAwareScan, layoutAwareScan).seek(length) :
new DOMTextScanner(this.range.startContainer, this.range.startOffset, !layoutAwareScan, layoutAwareScan).seek(length)
);
this.range.setEnd(state.node, state.offset);
this.content = (fromEnd ? this.content + state.content : state.content);
return length - state.remainder;
}
setStartOffset(length) {
const state = TextSourceRange.seekBackward(this.range.startContainer, this.range.startOffset, length);
setStartOffset(length, layoutAwareScan) {
const state = new DOMTextScanner(this.range.startContainer, this.range.startOffset, !layoutAwareScan, layoutAwareScan).seek(-length);
this.range.setStart(state.node, state.offset);
this.rangeStartOffset = this.range.startOffset;
this.content = state.content + this.content;
@ -110,154 +110,6 @@ class TextSourceRange {
}
}
static shouldEnter(node) {
switch (node.nodeName.toUpperCase()) {
case 'RT':
case 'SCRIPT':
case 'STYLE':
return false;
}
const style = window.getComputedStyle(node);
return !(
style.visibility === 'hidden' ||
style.display === 'none' ||
parseFloat(style.fontSize) === 0
);
}
static getRubyElement(node) {
node = TextSourceRange.getParentElement(node);
if (node !== null && node.nodeName.toUpperCase() === 'RT') {
node = node.parentNode;
return (node !== null && node.nodeName.toUpperCase() === 'RUBY') ? node : null;
}
return null;
}
static seekForward(node, offset, length) {
const state = {node, offset, remainder: length, content: ''};
if (length <= 0) {
return state;
}
const TEXT_NODE = Node.TEXT_NODE;
const ELEMENT_NODE = Node.ELEMENT_NODE;
let resetOffset = false;
const ruby = TextSourceRange.getRubyElement(node);
if (ruby !== null) {
node = ruby;
resetOffset = true;
}
while (node !== null) {
let visitChildren = true;
const nodeType = node.nodeType;
if (nodeType === TEXT_NODE) {
state.node = node;
if (TextSourceRange.seekForwardTextNode(state, resetOffset)) {
break;
}
resetOffset = true;
} else if (nodeType === ELEMENT_NODE) {
visitChildren = TextSourceRange.shouldEnter(node);
}
node = TextSourceRange.getNextNode(node, visitChildren);
}
return state;
}
static seekForwardTextNode(state, resetOffset) {
const nodeValue = state.node.nodeValue;
const nodeValueLength = nodeValue.length;
let content = state.content;
let offset = resetOffset ? 0 : state.offset;
let remainder = state.remainder;
let result = false;
for (; offset < nodeValueLength; ++offset) {
const c = nodeValue[offset];
if (!IGNORE_TEXT_PATTERN.test(c)) {
content += c;
if (--remainder <= 0) {
result = true;
++offset;
break;
}
}
}
state.offset = offset;
state.content = content;
state.remainder = remainder;
return result;
}
static seekBackward(node, offset, length) {
const state = {node, offset, remainder: length, content: ''};
if (length <= 0) {
return state;
}
const TEXT_NODE = Node.TEXT_NODE;
const ELEMENT_NODE = Node.ELEMENT_NODE;
let resetOffset = false;
const ruby = TextSourceRange.getRubyElement(node);
if (ruby !== null) {
node = ruby;
resetOffset = true;
}
while (node !== null) {
let visitChildren = true;
const nodeType = node.nodeType;
if (nodeType === TEXT_NODE) {
state.node = node;
if (TextSourceRange.seekBackwardTextNode(state, resetOffset)) {
break;
}
resetOffset = true;
} else if (nodeType === ELEMENT_NODE) {
visitChildren = TextSourceRange.shouldEnter(node);
}
node = TextSourceRange.getPreviousNode(node, visitChildren);
}
return state;
}
static seekBackwardTextNode(state, resetOffset) {
const nodeValue = state.node.nodeValue;
let content = state.content;
let offset = resetOffset ? nodeValue.length : state.offset;
let remainder = state.remainder;
let result = false;
for (; offset > 0; --offset) {
const c = nodeValue[offset - 1];
if (!IGNORE_TEXT_PATTERN.test(c)) {
content = c + content;
if (--remainder <= 0) {
result = true;
--offset;
break;
}
}
}
state.offset = offset;
state.content = content;
state.remainder = remainder;
return result;
}
static getParentElement(node) {
while (node !== null && node.nodeType !== Node.ELEMENT_NODE) {
node = node.parentNode;
@ -290,66 +142,6 @@ class TextSourceRange {
return writingMode;
}
}
static getNodesInRange(range) {
const end = range.endContainer;
const nodes = [];
for (let node = range.startContainer; node !== null; node = TextSourceRange.getNextNode(node, true)) {
nodes.push(node);
if (node === end) { break; }
}
return nodes;
}
static getNextNode(node, visitChildren) {
let next = visitChildren ? node.firstChild : null;
if (next === null) {
while (true) {
next = node.nextSibling;
if (next !== null) { break; }
next = node.parentNode;
if (next === null) { break; }
node = next;
}
}
return next;
}
static getPreviousNode(node, visitChildren) {
let next = visitChildren ? node.lastChild : null;
if (next === null) {
while (true) {
next = node.previousSibling;
if (next !== null) { break; }
next = node.parentNode;
if (next === null) { break; }
node = next;
}
}
return next;
}
static anyNodeMatchesSelector(nodeList, selector) {
for (const node of nodeList) {
if (TextSourceRange.nodeMatchesSelector(node, selector)) {
return true;
}
}
return false;
}
static nodeMatchesSelector(node, selector) {
for (; node !== null; node = node.parentNode) {
if (node.nodeType === Node.ELEMENT_NODE) {
return node.matches(selector);
}
}
return false;
}
}

View File

@ -42,6 +42,7 @@
"mixed/js/dynamic-loader.js",
"mixed/js/text-scanner.js",
"fg/js/document.js",
"fg/js/dom-text-scanner.js",
"fg/js/popup.js",
"fg/js/source.js",
"fg/js/popup-factory.js",

View File

@ -236,7 +236,9 @@ class Display {
const {textSource, definitions} = termLookupResults;
const scannedElement = e.target;
const sentence = docSentenceExtract(textSource, this.options.anki.sentenceExt);
const sentenceExtent = this.options.anki.sentenceExt;
const layoutAwareScan = this.options.scanning.layoutAwareScan;
const sentence = docSentenceExtract(textSource, sentenceExtent, layoutAwareScan);
this.context.update({
index: this.entryIndexFind(scannedElement),
@ -273,21 +275,22 @@ class Display {
try {
e.preventDefault();
const textSource = docRangeFromPoint(e.clientX, e.clientY, this.options.scanning.deepDomScan);
const {length: scanLength, deepDomScan: deepScan, layoutAwareScan} = this.options.scanning;
const textSource = docRangeFromPoint(e.clientX, e.clientY, deepScan);
if (textSource === null) {
return false;
}
let definitions, length;
try {
textSource.setEndOffset(this.options.scanning.length);
textSource.setEndOffset(scanLength, layoutAwareScan);
({definitions, length} = await api.termsFind(textSource.text(), {}, this.getOptionsContext()));
if (definitions.length === 0) {
return false;
}
textSource.setEndOffset(length);
textSource.setEndOffset(length, layoutAwareScan);
} finally {
textSource.cleanup();
}

View File

@ -86,4 +86,42 @@ class DOM {
null
);
}
static getNodesInRange(range) {
const end = range.endContainer;
const nodes = [];
for (let node = range.startContainer; node !== null; node = DOM.getNextNode(node)) {
nodes.push(node);
if (node === end) { break; }
}
return nodes;
}
static getNextNode(node) {
let next = node.firstChild;
if (next === null) {
while (true) {
next = node.nextSibling;
if (next !== null) { break; }
next = node.parentNode;
if (next === null) { break; }
node = next;
}
}
return next;
}
static anyNodeMatchesSelector(nodes, selector) {
const ELEMENT_NODE = Node.ELEMENT_NODE;
for (let node of nodes) {
for (; node !== null; node = node.parentNode) {
if (node.nodeType !== ELEMENT_NODE) { continue; }
if (node.matches(selector)) { return true; }
break;
}
}
return false;
}
}

View File

@ -17,7 +17,6 @@
/* global
* DOM
* TextSourceRange
* docRangeFromPoint
*/
@ -119,20 +118,20 @@ class TextScanner extends EventDispatcher {
}
}
getTextSourceContent(textSource, length) {
getTextSourceContent(textSource, length, layoutAwareScan) {
const clonedTextSource = textSource.clone();
clonedTextSource.setEndOffset(length);
clonedTextSource.setEndOffset(length, layoutAwareScan);
if (this._ignoreNodes !== null && clonedTextSource.range) {
length = clonedTextSource.text().length;
while (clonedTextSource.range && length > 0) {
const nodes = TextSourceRange.getNodesInRange(clonedTextSource.range);
if (!TextSourceRange.anyNodeMatchesSelector(nodes, this._ignoreNodes)) {
const nodes = DOM.getNodesInRange(clonedTextSource.range);
if (!DOM.anyNodeMatchesSelector(nodes, this._ignoreNodes)) {
break;
}
--length;
clonedTextSource.setEndOffset(length);
clonedTextSource.setEndOffset(length, layoutAwareScan);
}
}

View File

@ -94,10 +94,12 @@ async function testDocument1() {
const vm = new VM({document, window, Range, Node});
vm.execute([
'mixed/js/dom.js',
'fg/js/dom-text-scanner.js',
'fg/js/source.js',
'fg/js/document.js'
]);
const [TextSourceRange, TextSourceElement, docRangeFromPoint, docSentenceExtract] = vm.get([
const [DOMTextScanner, TextSourceRange, TextSourceElement, docRangeFromPoint, docSentenceExtract] = vm.get([
'DOMTextScanner',
'TextSourceRange',
'TextSourceElement',
'docRangeFromPoint',
@ -106,7 +108,7 @@ async function testDocument1() {
try {
await testDocumentTextScanningFunctions(dom, {docRangeFromPoint, docSentenceExtract, TextSourceRange, TextSourceElement});
await testTextSourceRangeSeekFunctions(dom, {TextSourceRange});
await testTextSourceRangeSeekFunctions(dom, {DOMTextScanner});
} finally {
window.close();
}
@ -179,7 +181,7 @@ async function testDocumentTextScanningFunctions(dom, {docRangeFromPoint, docSen
if (source === null) { continue; }
// Test docSentenceExtract
const sentenceActual = docSentenceExtract(source, sentenceExtent).text;
const sentenceActual = docSentenceExtract(source, sentenceExtent, false).text;
assert.strictEqual(sentenceActual, sentence);
// Clean
@ -187,7 +189,7 @@ async function testDocumentTextScanningFunctions(dom, {docRangeFromPoint, docSen
}
}
async function testTextSourceRangeSeekFunctions(dom, {TextSourceRange}) {
async function testTextSourceRangeSeekFunctions(dom, {DOMTextScanner}) {
const document = dom.window.document;
for (const testElement of document.querySelectorAll('.test[data-test-type=text-source-range-seek]')) {
@ -220,8 +222,8 @@ async function testTextSourceRangeSeekFunctions(dom, {TextSourceRange}) {
const {node, offset, content} = (
seekDirection === 'forward' ?
TextSourceRange.seekForward(seekNode, seekOffset, seekLength) :
TextSourceRange.seekBackward(seekNode, seekOffset, seekLength)
new DOMTextScanner(seekNode, seekOffset, true, false).seek(seekLength) :
new DOMTextScanner(seekNode, seekOffset, true, false).seek(-seekLength)
);
assert.strictEqual(node, expectedResultNode);