From e23504613f8526b90a497512c086ed48e66cde95 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sun, 21 Jun 2020 16:07:51 -0400 Subject: [PATCH] Use DOMTextScanner (#536) * Use DOMTextScanner instead of TextSourceRange.seek* * Move getNodesInRange to dom.js * Move anyNodeMatchesSelector to dom.js * Remove unused functions * Update tests * Add layoutAwareScan option * Use layoutAwareScan for source and sentence scanning * Remove unused IGNORE_TEXT_PATTERN --- ext/bg/data/options-schema.json | 7 +- ext/bg/js/options.js | 3 +- ext/bg/js/search-query-parser.js | 8 +- ext/bg/search.html | 1 + ext/bg/settings-popup-preview.html | 1 + ext/bg/settings.html | 4 + ext/fg/float.html | 1 + ext/fg/js/document.js | 11 +- ext/fg/js/frontend.js | 14 +- ext/fg/js/source.js | 224 ++--------------------------- ext/manifest.json | 1 + ext/mixed/js/display.js | 11 +- ext/mixed/js/dom.js | 38 +++++ ext/mixed/js/text-scanner.js | 11 +- test/test-document.js | 14 +- 15 files changed, 102 insertions(+), 247 deletions(-) diff --git a/ext/bg/data/options-schema.json b/ext/bg/data/options-schema.json index 0379fa75..5885e036 100644 --- a/ext/bg/data/options-schema.json +++ b/ext/bg/data/options-schema.json @@ -321,7 +321,8 @@ "enablePopupSearch", "enableOnPopupExpressions", "enableOnSearchPage", - "enableSearchTags" + "enableSearchTags", + "layoutAwareScan" ], "properties": { "middleMouse": { @@ -383,6 +384,10 @@ "enableSearchTags": { "type": "boolean", "default": false + }, + "layoutAwareScan": { + "type": "boolean", + "default": false } } }, diff --git a/ext/bg/js/options.js b/ext/bg/js/options.js index 97368a0b..170e4799 100644 --- a/ext/bg/js/options.js +++ b/ext/bg/js/options.js @@ -203,7 +203,8 @@ function profileOptionsCreateDefaults() { enablePopupSearch: false, enableOnPopupExpressions: false, enableOnSearchPage: true, - enableSearchTags: false + enableSearchTags: false, + layoutAwareScan: false }, translation: { diff --git a/ext/bg/js/search-query-parser.js b/ext/bg/js/search-query-parser.js index addfc686..97e98b40 100644 --- a/ext/bg/js/search-query-parser.js +++ b/ext/bg/js/search-query-parser.js @@ -75,15 +75,17 @@ class QueryParser { async _search(textSource, cause) { if (textSource === null) { return null; } - const searchText = this._textScanner.getTextSourceContent(textSource, this._options.scanning.length); + const {length: scanLength, layoutAwareScan} = this._options.scanning; + const searchText = this._textScanner.getTextSourceContent(textSource, scanLength, layoutAwareScan); if (searchText.length === 0) { return null; } const {definitions, length} = await api.termsFind(searchText, {}, this._getOptionsContext()); if (definitions.length === 0) { return null; } - const sentence = docSentenceExtract(textSource, this._options.anki.sentenceExt); + const sentenceExtent = this._options.anki.sentenceExt; + const sentence = docSentenceExtract(textSource, sentenceExtent, layoutAwareScan); - textSource.setEndOffset(length); + textSource.setEndOffset(length, layoutAwareScan); this._setContent('terms', {definitions, context: { focus: false, diff --git a/ext/bg/search.html b/ext/bg/search.html index de08cdae..4a28dd88 100644 --- a/ext/bg/search.html +++ b/ext/bg/search.html @@ -79,6 +79,7 @@ + diff --git a/ext/bg/settings-popup-preview.html b/ext/bg/settings-popup-preview.html index fe92f24f..5eecd005 100644 --- a/ext/bg/settings-popup-preview.html +++ b/ext/bg/settings-popup-preview.html @@ -126,6 +126,7 @@ + diff --git a/ext/bg/settings.html b/ext/bg/settings.html index 118a13b9..77b61aef 100644 --- a/ext/bg/settings.html +++ b/ext/bg/settings.html @@ -400,6 +400,10 @@ +
+ +
+
diff --git a/ext/fg/float.html b/ext/fg/float.html index 17dbcc6d..3e41cde5 100644 --- a/ext/fg/float.html +++ b/ext/fg/float.html @@ -46,6 +46,7 @@ + diff --git a/ext/fg/js/document.js b/ext/fg/js/document.js index d639bc86..c288502c 100644 --- a/ext/fg/js/document.js +++ b/ext/fg/js/document.js @@ -17,6 +17,7 @@ /* global * DOM + * DOMTextScanner * TextSourceElement * TextSourceRange */ @@ -152,14 +153,14 @@ function docRangeFromPoint(x, y, deepDomScan) { } } -function docSentenceExtract(source, extent) { +function docSentenceExtract(source, extent, layoutAwareScan) { const quotesFwd = {'「': '」', '『': '』', "'": "'", '"': '"'}; const quotesBwd = {'」': '「', '』': '『', "'": "'", '"': '"'}; const terminators = '…。..??!!'; const sourceLocal = source.clone(); - const position = sourceLocal.setStartOffset(extent); - sourceLocal.setEndOffset(extent * 2 - position, true); + const position = sourceLocal.setStartOffset(extent, layoutAwareScan); + sourceLocal.setEndOffset(extent * 2 - position, layoutAwareScan, true); const content = sourceLocal.text(); let quoteStack = []; @@ -232,7 +233,7 @@ function isPointInRange(x, y, range) { const nodePre = range.endContainer; const offsetPre = range.endOffset; try { - const {node, offset, content} = TextSourceRange.seekForward(range.endContainer, range.endOffset, 1); + const {node, offset, content} = new DOMTextScanner(range.endContainer, range.endOffset, true, false).seek(1); range.setEnd(node, offset); if (!isWhitespace(content) && DOM.isPointInAnyRect(x, y, range.getClientRects())) { @@ -243,7 +244,7 @@ function isPointInRange(x, y, range) { } // Scan backward - const {node, offset, content} = TextSourceRange.seekBackward(range.startContainer, range.startOffset, 1); + const {node, offset, content} = new DOMTextScanner(range.startContainer, range.startOffset, true, false).seek(-1); range.setStart(node, offset); if (!isWhitespace(content) && DOM.isPointInAnyRect(x, y, range.getClientRects())) { diff --git a/ext/fg/js/frontend.js b/ext/fg/js/frontend.js index 70bd8a48..ab455c09 100644 --- a/ext/fg/js/frontend.js +++ b/ext/fg/js/frontend.js @@ -258,32 +258,36 @@ class Frontend { } async _findTerms(textSource, optionsContext) { - const searchText = this._textScanner.getTextSourceContent(textSource, this._options.scanning.length); + const {length: scanLength, layoutAwareScan} = this._options.scanning; + const searchText = this._textScanner.getTextSourceContent(textSource, scanLength, layoutAwareScan); if (searchText.length === 0) { return null; } const {definitions, length} = await api.termsFind(searchText, {}, optionsContext); if (definitions.length === 0) { return null; } - textSource.setEndOffset(length); + textSource.setEndOffset(length, layoutAwareScan); return {definitions, type: 'terms'}; } async _findKanji(textSource, optionsContext) { - const searchText = this._textScanner.getTextSourceContent(textSource, 1); + const layoutAwareScan = this._options.scanning.layoutAwareScan; + const searchText = this._textScanner.getTextSourceContent(textSource, 1, layoutAwareScan); if (searchText.length === 0) { return null; } const definitions = await api.kanjiFind(searchText, optionsContext); if (definitions.length === 0) { return null; } - textSource.setEndOffset(1); + textSource.setEndOffset(1, layoutAwareScan); return {definitions, type: 'kanji'}; } _showContent(textSource, focus, definitions, type, optionsContext) { const {url} = optionsContext; - const sentence = docSentenceExtract(textSource, this._options.anki.sentenceExt); + const sentenceExtent = this._options.anki.sentenceExt; + const layoutAwareScan = this._options.scanning.layoutAwareScan; + const sentence = docSentenceExtract(textSource, sentenceExtent, layoutAwareScan); this._showPopupContent( textSource, optionsContext, diff --git a/ext/fg/js/source.js b/ext/fg/js/source.js index fa4706f2..38810f07 100644 --- a/ext/fg/js/source.js +++ b/ext/fg/js/source.js @@ -15,9 +15,9 @@ * along with this program. If not, see . */ -// \u200c (Zero-width non-joiner) appears on Google Docs from Chrome 76 onwards -const IGNORE_TEXT_PATTERN = /\u200c/; - +/* global + * DOMTextScanner + */ /* * TextSourceRange @@ -46,19 +46,19 @@ class TextSourceRange { return this.content; } - setEndOffset(length, fromEnd=false) { + setEndOffset(length, layoutAwareScan, fromEnd=false) { const state = ( fromEnd ? - TextSourceRange.seekForward(this.range.endContainer, this.range.endOffset, length) : - TextSourceRange.seekForward(this.range.startContainer, this.range.startOffset, length) + new DOMTextScanner(this.range.endContainer, this.range.endOffset, !layoutAwareScan, layoutAwareScan).seek(length) : + new DOMTextScanner(this.range.startContainer, this.range.startOffset, !layoutAwareScan, layoutAwareScan).seek(length) ); this.range.setEnd(state.node, state.offset); this.content = (fromEnd ? this.content + state.content : state.content); return length - state.remainder; } - setStartOffset(length) { - const state = TextSourceRange.seekBackward(this.range.startContainer, this.range.startOffset, length); + setStartOffset(length, layoutAwareScan) { + const state = new DOMTextScanner(this.range.startContainer, this.range.startOffset, !layoutAwareScan, layoutAwareScan).seek(-length); this.range.setStart(state.node, state.offset); this.rangeStartOffset = this.range.startOffset; this.content = state.content + this.content; @@ -110,154 +110,6 @@ class TextSourceRange { } } - static shouldEnter(node) { - switch (node.nodeName.toUpperCase()) { - case 'RT': - case 'SCRIPT': - case 'STYLE': - return false; - } - - const style = window.getComputedStyle(node); - return !( - style.visibility === 'hidden' || - style.display === 'none' || - parseFloat(style.fontSize) === 0 - ); - } - - static getRubyElement(node) { - node = TextSourceRange.getParentElement(node); - if (node !== null && node.nodeName.toUpperCase() === 'RT') { - node = node.parentNode; - return (node !== null && node.nodeName.toUpperCase() === 'RUBY') ? node : null; - } - return null; - } - - static seekForward(node, offset, length) { - const state = {node, offset, remainder: length, content: ''}; - if (length <= 0) { - return state; - } - - const TEXT_NODE = Node.TEXT_NODE; - const ELEMENT_NODE = Node.ELEMENT_NODE; - let resetOffset = false; - - const ruby = TextSourceRange.getRubyElement(node); - if (ruby !== null) { - node = ruby; - resetOffset = true; - } - - while (node !== null) { - let visitChildren = true; - const nodeType = node.nodeType; - - if (nodeType === TEXT_NODE) { - state.node = node; - if (TextSourceRange.seekForwardTextNode(state, resetOffset)) { - break; - } - resetOffset = true; - } else if (nodeType === ELEMENT_NODE) { - visitChildren = TextSourceRange.shouldEnter(node); - } - - node = TextSourceRange.getNextNode(node, visitChildren); - } - - return state; - } - - static seekForwardTextNode(state, resetOffset) { - const nodeValue = state.node.nodeValue; - const nodeValueLength = nodeValue.length; - let content = state.content; - let offset = resetOffset ? 0 : state.offset; - let remainder = state.remainder; - let result = false; - - for (; offset < nodeValueLength; ++offset) { - const c = nodeValue[offset]; - if (!IGNORE_TEXT_PATTERN.test(c)) { - content += c; - if (--remainder <= 0) { - result = true; - ++offset; - break; - } - } - } - - state.offset = offset; - state.content = content; - state.remainder = remainder; - return result; - } - - static seekBackward(node, offset, length) { - const state = {node, offset, remainder: length, content: ''}; - if (length <= 0) { - return state; - } - - const TEXT_NODE = Node.TEXT_NODE; - const ELEMENT_NODE = Node.ELEMENT_NODE; - let resetOffset = false; - - const ruby = TextSourceRange.getRubyElement(node); - if (ruby !== null) { - node = ruby; - resetOffset = true; - } - - while (node !== null) { - let visitChildren = true; - const nodeType = node.nodeType; - - if (nodeType === TEXT_NODE) { - state.node = node; - if (TextSourceRange.seekBackwardTextNode(state, resetOffset)) { - break; - } - resetOffset = true; - } else if (nodeType === ELEMENT_NODE) { - visitChildren = TextSourceRange.shouldEnter(node); - } - - node = TextSourceRange.getPreviousNode(node, visitChildren); - } - - return state; - } - - static seekBackwardTextNode(state, resetOffset) { - const nodeValue = state.node.nodeValue; - let content = state.content; - let offset = resetOffset ? nodeValue.length : state.offset; - let remainder = state.remainder; - let result = false; - - for (; offset > 0; --offset) { - const c = nodeValue[offset - 1]; - if (!IGNORE_TEXT_PATTERN.test(c)) { - content = c + content; - if (--remainder <= 0) { - result = true; - --offset; - break; - } - } - } - - state.offset = offset; - state.content = content; - state.remainder = remainder; - return result; - } - static getParentElement(node) { while (node !== null && node.nodeType !== Node.ELEMENT_NODE) { node = node.parentNode; @@ -290,66 +142,6 @@ class TextSourceRange { return writingMode; } } - - static getNodesInRange(range) { - const end = range.endContainer; - const nodes = []; - for (let node = range.startContainer; node !== null; node = TextSourceRange.getNextNode(node, true)) { - nodes.push(node); - if (node === end) { break; } - } - return nodes; - } - - static getNextNode(node, visitChildren) { - let next = visitChildren ? node.firstChild : null; - if (next === null) { - while (true) { - next = node.nextSibling; - if (next !== null) { break; } - - next = node.parentNode; - if (next === null) { break; } - - node = next; - } - } - return next; - } - - static getPreviousNode(node, visitChildren) { - let next = visitChildren ? node.lastChild : null; - if (next === null) { - while (true) { - next = node.previousSibling; - if (next !== null) { break; } - - next = node.parentNode; - if (next === null) { break; } - - node = next; - } - } - return next; - } - - static anyNodeMatchesSelector(nodeList, selector) { - for (const node of nodeList) { - if (TextSourceRange.nodeMatchesSelector(node, selector)) { - return true; - } - } - return false; - } - - static nodeMatchesSelector(node, selector) { - for (; node !== null; node = node.parentNode) { - if (node.nodeType === Node.ELEMENT_NODE) { - return node.matches(selector); - } - } - return false; - } } diff --git a/ext/manifest.json b/ext/manifest.json index 75334675..4d4f0c06 100644 --- a/ext/manifest.json +++ b/ext/manifest.json @@ -42,6 +42,7 @@ "mixed/js/dynamic-loader.js", "mixed/js/text-scanner.js", "fg/js/document.js", + "fg/js/dom-text-scanner.js", "fg/js/popup.js", "fg/js/source.js", "fg/js/popup-factory.js", diff --git a/ext/mixed/js/display.js b/ext/mixed/js/display.js index 90fd1037..1d699706 100644 --- a/ext/mixed/js/display.js +++ b/ext/mixed/js/display.js @@ -236,7 +236,9 @@ class Display { const {textSource, definitions} = termLookupResults; const scannedElement = e.target; - const sentence = docSentenceExtract(textSource, this.options.anki.sentenceExt); + const sentenceExtent = this.options.anki.sentenceExt; + const layoutAwareScan = this.options.scanning.layoutAwareScan; + const sentence = docSentenceExtract(textSource, sentenceExtent, layoutAwareScan); this.context.update({ index: this.entryIndexFind(scannedElement), @@ -273,21 +275,22 @@ class Display { try { e.preventDefault(); - const textSource = docRangeFromPoint(e.clientX, e.clientY, this.options.scanning.deepDomScan); + const {length: scanLength, deepDomScan: deepScan, layoutAwareScan} = this.options.scanning; + const textSource = docRangeFromPoint(e.clientX, e.clientY, deepScan); if (textSource === null) { return false; } let definitions, length; try { - textSource.setEndOffset(this.options.scanning.length); + textSource.setEndOffset(scanLength, layoutAwareScan); ({definitions, length} = await api.termsFind(textSource.text(), {}, this.getOptionsContext())); if (definitions.length === 0) { return false; } - textSource.setEndOffset(length); + textSource.setEndOffset(length, layoutAwareScan); } finally { textSource.cleanup(); } diff --git a/ext/mixed/js/dom.js b/ext/mixed/js/dom.js index 0e8f4462..05764443 100644 --- a/ext/mixed/js/dom.js +++ b/ext/mixed/js/dom.js @@ -86,4 +86,42 @@ class DOM { null ); } + + static getNodesInRange(range) { + const end = range.endContainer; + const nodes = []; + for (let node = range.startContainer; node !== null; node = DOM.getNextNode(node)) { + nodes.push(node); + if (node === end) { break; } + } + return nodes; + } + + static getNextNode(node) { + let next = node.firstChild; + if (next === null) { + while (true) { + next = node.nextSibling; + if (next !== null) { break; } + + next = node.parentNode; + if (next === null) { break; } + + node = next; + } + } + return next; + } + + static anyNodeMatchesSelector(nodes, selector) { + const ELEMENT_NODE = Node.ELEMENT_NODE; + for (let node of nodes) { + for (; node !== null; node = node.parentNode) { + if (node.nodeType !== ELEMENT_NODE) { continue; } + if (node.matches(selector)) { return true; } + break; + } + } + return false; + } } diff --git a/ext/mixed/js/text-scanner.js b/ext/mixed/js/text-scanner.js index b8688b08..fb275452 100644 --- a/ext/mixed/js/text-scanner.js +++ b/ext/mixed/js/text-scanner.js @@ -17,7 +17,6 @@ /* global * DOM - * TextSourceRange * docRangeFromPoint */ @@ -119,20 +118,20 @@ class TextScanner extends EventDispatcher { } } - getTextSourceContent(textSource, length) { + getTextSourceContent(textSource, length, layoutAwareScan) { const clonedTextSource = textSource.clone(); - clonedTextSource.setEndOffset(length); + clonedTextSource.setEndOffset(length, layoutAwareScan); if (this._ignoreNodes !== null && clonedTextSource.range) { length = clonedTextSource.text().length; while (clonedTextSource.range && length > 0) { - const nodes = TextSourceRange.getNodesInRange(clonedTextSource.range); - if (!TextSourceRange.anyNodeMatchesSelector(nodes, this._ignoreNodes)) { + const nodes = DOM.getNodesInRange(clonedTextSource.range); + if (!DOM.anyNodeMatchesSelector(nodes, this._ignoreNodes)) { break; } --length; - clonedTextSource.setEndOffset(length); + clonedTextSource.setEndOffset(length, layoutAwareScan); } } diff --git a/test/test-document.js b/test/test-document.js index 0d9026db..ba7acc49 100644 --- a/test/test-document.js +++ b/test/test-document.js @@ -94,10 +94,12 @@ async function testDocument1() { const vm = new VM({document, window, Range, Node}); vm.execute([ 'mixed/js/dom.js', + 'fg/js/dom-text-scanner.js', 'fg/js/source.js', 'fg/js/document.js' ]); - const [TextSourceRange, TextSourceElement, docRangeFromPoint, docSentenceExtract] = vm.get([ + const [DOMTextScanner, TextSourceRange, TextSourceElement, docRangeFromPoint, docSentenceExtract] = vm.get([ + 'DOMTextScanner', 'TextSourceRange', 'TextSourceElement', 'docRangeFromPoint', @@ -106,7 +108,7 @@ async function testDocument1() { try { await testDocumentTextScanningFunctions(dom, {docRangeFromPoint, docSentenceExtract, TextSourceRange, TextSourceElement}); - await testTextSourceRangeSeekFunctions(dom, {TextSourceRange}); + await testTextSourceRangeSeekFunctions(dom, {DOMTextScanner}); } finally { window.close(); } @@ -179,7 +181,7 @@ async function testDocumentTextScanningFunctions(dom, {docRangeFromPoint, docSen if (source === null) { continue; } // Test docSentenceExtract - const sentenceActual = docSentenceExtract(source, sentenceExtent).text; + const sentenceActual = docSentenceExtract(source, sentenceExtent, false).text; assert.strictEqual(sentenceActual, sentence); // Clean @@ -187,7 +189,7 @@ async function testDocumentTextScanningFunctions(dom, {docRangeFromPoint, docSen } } -async function testTextSourceRangeSeekFunctions(dom, {TextSourceRange}) { +async function testTextSourceRangeSeekFunctions(dom, {DOMTextScanner}) { const document = dom.window.document; for (const testElement of document.querySelectorAll('.test[data-test-type=text-source-range-seek]')) { @@ -220,8 +222,8 @@ async function testTextSourceRangeSeekFunctions(dom, {TextSourceRange}) { const {node, offset, content} = ( seekDirection === 'forward' ? - TextSourceRange.seekForward(seekNode, seekOffset, seekLength) : - TextSourceRange.seekBackward(seekNode, seekOffset, seekLength) + new DOMTextScanner(seekNode, seekOffset, true, false).seek(seekLength) : + new DOMTextScanner(seekNode, seekOffset, true, false).seek(-seekLength) ); assert.strictEqual(node, expectedResultNode);