Refactor sentence parsing (#1215)
* Rename sentenceExtent with sentenceScanExtent * Update TextScanner.setOptions * Change function argument order * Rename quote map variables * Fix edge case quote handling * Update terminator maps to support character inclusion
This commit is contained in:
parent
da1e1e5c5b
commit
083da93142
@ -312,7 +312,7 @@ class Frontend {
|
||||
async _updateOptionsInternal() {
|
||||
const optionsContext = await this._getOptionsContext();
|
||||
const options = await api.optionsGet(optionsContext);
|
||||
const scanningOptions = options.scanning;
|
||||
const {scanning: scanningOptions, sentenceParsing: sentenceParsingOptions} = options;
|
||||
this._options = options;
|
||||
|
||||
await this._updatePopup();
|
||||
@ -326,7 +326,7 @@ class Frontend {
|
||||
touchInputEnabled: scanningOptions.touchInputEnabled,
|
||||
pointerEventsEnabled: scanningOptions.pointerEventsEnabled,
|
||||
scanLength: scanningOptions.length,
|
||||
sentenceExtent: options.sentenceParsing.scanExtent,
|
||||
sentenceScanExtent: sentenceParsingOptions.scanExtent,
|
||||
layoutAwareScan: scanningOptions.layoutAwareScan,
|
||||
preventMiddleMouse
|
||||
});
|
||||
|
@ -309,7 +309,7 @@ class Display extends EventDispatcher {
|
||||
|
||||
async updateOptions() {
|
||||
const options = await api.optionsGet(this.getOptionsContext());
|
||||
const scanning = options.scanning;
|
||||
const {scanning: scanningOptions, sentenceParsing: sentenceParsingOptions} = options;
|
||||
this._options = options;
|
||||
|
||||
this._updateDocumentOptions(options);
|
||||
@ -320,16 +320,16 @@ class Display extends EventDispatcher {
|
||||
selectedParser: options.parsing.selectedParser,
|
||||
termSpacing: options.parsing.termSpacing,
|
||||
scanning: {
|
||||
inputs: scanning.inputs,
|
||||
deepContentScan: scanning.deepDomScan,
|
||||
selectText: scanning.selectText,
|
||||
delay: scanning.delay,
|
||||
touchInputEnabled: scanning.touchInputEnabled,
|
||||
pointerEventsEnabled: scanning.pointerEventsEnabled,
|
||||
scanLength: scanning.length,
|
||||
sentenceExtent: options.sentenceParsing.scanExtent,
|
||||
layoutAwareScan: scanning.layoutAwareScan,
|
||||
preventMiddleMouse: scanning.preventMiddleMouse.onSearchQuery
|
||||
inputs: scanningOptions.inputs,
|
||||
deepContentScan: scanningOptions.deepDomScan,
|
||||
selectText: scanningOptions.selectText,
|
||||
delay: scanningOptions.delay,
|
||||
touchInputEnabled: scanningOptions.touchInputEnabled,
|
||||
pointerEventsEnabled: scanningOptions.pointerEventsEnabled,
|
||||
scanLength: scanningOptions.length,
|
||||
sentenceScanExtent: sentenceParsingOptions.scanExtent,
|
||||
layoutAwareScan: scanningOptions.layoutAwareScan,
|
||||
preventMiddleMouse: scanningOptions.preventMiddleMouse.onSearchQuery
|
||||
}
|
||||
});
|
||||
|
||||
@ -1810,7 +1810,7 @@ class Display extends EventDispatcher {
|
||||
this._definitionTextScanner.on('searched', this._onDefinitionTextScannerSearched.bind(this));
|
||||
}
|
||||
|
||||
const scanningOptions = options.scanning;
|
||||
const {scanning: scanningOptions, sentenceParsing: sentenceParsingOptions} = options;
|
||||
this._definitionTextScanner.setOptions({
|
||||
inputs: [{
|
||||
include: 'mouse0',
|
||||
@ -1832,7 +1832,7 @@ class Display extends EventDispatcher {
|
||||
touchInputEnabled: false,
|
||||
pointerEventsEnabled: false,
|
||||
scanLength: scanningOptions.length,
|
||||
sentenceExtent: options.sentenceParsing.scanExtent,
|
||||
sentenceScanExtent: sentenceParsingOptions.scanExtent,
|
||||
layoutAwareScan: scanningOptions.layoutAwareScan,
|
||||
preventMiddleMouse: false
|
||||
});
|
||||
|
@ -31,12 +31,16 @@ class DocumentUtil {
|
||||
['\'', '\''],
|
||||
['"', '"']
|
||||
];
|
||||
this._terminatorSet = new Set(['…', '。', '.', '.', '?', '?', '!', '!']);
|
||||
this._startQuoteMap = new Map();
|
||||
this._endQuoteMap = new Map();
|
||||
const terminatorString = '…。..??!!';
|
||||
this._terminatorMap = new Map();
|
||||
for (const char of terminatorString) {
|
||||
this._terminatorMap.set(char, [false, true]);
|
||||
}
|
||||
this._forwardQuoteMap = new Map();
|
||||
this._backwardQuoteMap = new Map();
|
||||
for (const [char1, char2] of quoteArray) {
|
||||
this._startQuoteMap.set(char1, char2);
|
||||
this._endQuoteMap.set(char2, char1);
|
||||
this._forwardQuoteMap.set(char1, [char2, false]);
|
||||
this._backwardQuoteMap.set(char2, [char1, false]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -77,10 +81,10 @@ class DocumentUtil {
|
||||
}
|
||||
}
|
||||
|
||||
extractSentence(source, extent, layoutAwareScan) {
|
||||
const terminatorSet = this._terminatorSet;
|
||||
const startQuoteMap = this._startQuoteMap;
|
||||
const endQuoteMap = this._endQuoteMap;
|
||||
extractSentence(source, layoutAwareScan, extent) {
|
||||
const terminatorMap = this._terminatorMap;
|
||||
const forwardQuoteMap = this._forwardQuoteMap;
|
||||
const backwardQuoteMap = this._backwardQuoteMap;
|
||||
|
||||
// Scan text
|
||||
source = source.clone();
|
||||
@ -98,22 +102,28 @@ class DocumentUtil {
|
||||
const c = text[pos1 - 1];
|
||||
if (c === '\n') { break; }
|
||||
|
||||
if (quoteStack.length === 0 && terminatorSet.has(c)) {
|
||||
break;
|
||||
if (quoteStack.length === 0) {
|
||||
const terminatorInfo = terminatorMap.get(c);
|
||||
if (typeof terminatorInfo !== 'undefined') {
|
||||
if (terminatorInfo[0]) { --pos1; }
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let otherQuote = startQuoteMap.get(c);
|
||||
if (typeof otherQuote !== 'undefined') {
|
||||
let quoteInfo = forwardQuoteMap.get(c);
|
||||
if (typeof quoteInfo !== 'undefined') {
|
||||
if (quoteStack.length === 0) {
|
||||
if (quoteInfo[1]) { --pos1; }
|
||||
break;
|
||||
} else if (quoteStack[0] === c) {
|
||||
quoteStack.pop();
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
otherQuote = endQuoteMap.get(c);
|
||||
if (typeof otherQuote !== 'undefined') {
|
||||
quoteStack.unshift(otherQuote);
|
||||
}
|
||||
}
|
||||
|
||||
quoteInfo = backwardQuoteMap.get(c);
|
||||
if (typeof quoteInfo !== 'undefined') {
|
||||
quoteStack.unshift(quoteInfo[0]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -123,23 +133,28 @@ class DocumentUtil {
|
||||
const c = text[pos2];
|
||||
if (c === '\n') { break; }
|
||||
|
||||
if (quoteStack.length === 0 && terminatorSet.has(c)) {
|
||||
++pos2;
|
||||
break;
|
||||
if (quoteStack.length === 0) {
|
||||
const terminatorInfo = terminatorMap.get(c);
|
||||
if (typeof terminatorInfo !== 'undefined') {
|
||||
if (terminatorInfo[1]) { ++pos2; }
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let otherQuote = endQuoteMap.get(c);
|
||||
if (typeof otherQuote !== 'undefined') {
|
||||
let quoteInfo = backwardQuoteMap.get(c);
|
||||
if (typeof quoteInfo !== 'undefined') {
|
||||
if (quoteStack.length === 0) {
|
||||
if (quoteInfo[1]) { ++pos2; }
|
||||
break;
|
||||
} else if (quoteStack[0] === c) {
|
||||
quoteStack.pop();
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
otherQuote = startQuoteMap.get(c);
|
||||
if (typeof otherQuote !== 'undefined') {
|
||||
quoteStack.unshift(otherQuote);
|
||||
}
|
||||
}
|
||||
|
||||
quoteInfo = forwardQuoteMap.get(c);
|
||||
if (typeof quoteInfo !== 'undefined') {
|
||||
quoteStack.unshift(quoteInfo[0]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -59,7 +59,7 @@ class TextScanner extends EventDispatcher {
|
||||
this._touchInputEnabled = false;
|
||||
this._pointerEventsEnabled = false;
|
||||
this._scanLength = 1;
|
||||
this._sentenceExtent = 1;
|
||||
this._sentenceScanExtent = 1;
|
||||
this._layoutAwareScan = false;
|
||||
this._preventMiddleMouse = false;
|
||||
this._inputs = [];
|
||||
@ -134,7 +134,18 @@ class TextScanner extends EventDispatcher {
|
||||
}
|
||||
}
|
||||
|
||||
setOptions({inputs, deepContentScan, selectText, delay, touchInputEnabled, pointerEventsEnabled, scanLength, sentenceExtent, layoutAwareScan, preventMiddleMouse}) {
|
||||
setOptions({
|
||||
inputs,
|
||||
deepContentScan,
|
||||
selectText,
|
||||
delay,
|
||||
touchInputEnabled,
|
||||
pointerEventsEnabled,
|
||||
scanLength,
|
||||
sentenceScanExtent,
|
||||
layoutAwareScan,
|
||||
preventMiddleMouse
|
||||
}) {
|
||||
if (Array.isArray(inputs)) {
|
||||
this._inputs = inputs.map(({
|
||||
include,
|
||||
@ -182,8 +193,8 @@ class TextScanner extends EventDispatcher {
|
||||
if (typeof scanLength === 'number') {
|
||||
this._scanLength = scanLength;
|
||||
}
|
||||
if (typeof sentenceExtent === 'number') {
|
||||
this._sentenceExtent = sentenceExtent;
|
||||
if (typeof sentenceScanExtent === 'number') {
|
||||
this._sentenceScanExtent = sentenceScanExtent;
|
||||
}
|
||||
if (typeof layoutAwareScan === 'boolean') {
|
||||
this._layoutAwareScan = layoutAwareScan;
|
||||
@ -711,7 +722,7 @@ class TextScanner extends EventDispatcher {
|
||||
|
||||
async _findTerms(textSource, optionsContext) {
|
||||
const scanLength = this._scanLength;
|
||||
const sentenceExtent = this._sentenceExtent;
|
||||
const sentenceScanExtent = this._sentenceScanExtent;
|
||||
const layoutAwareScan = this._layoutAwareScan;
|
||||
const searchText = this.getTextSourceContent(textSource, scanLength, layoutAwareScan);
|
||||
if (searchText.length === 0) { return null; }
|
||||
@ -720,13 +731,13 @@ class TextScanner extends EventDispatcher {
|
||||
if (definitions.length === 0) { return null; }
|
||||
|
||||
textSource.setEndOffset(length, layoutAwareScan);
|
||||
const sentence = this._documentUtil.extractSentence(textSource, sentenceExtent, layoutAwareScan);
|
||||
const sentence = this._documentUtil.extractSentence(textSource, layoutAwareScan, sentenceScanExtent);
|
||||
|
||||
return {definitions, sentence, type: 'terms'};
|
||||
}
|
||||
|
||||
async _findKanji(textSource, optionsContext) {
|
||||
const sentenceExtent = this._sentenceExtent;
|
||||
const sentenceScanExtent = this._sentenceScanExtent;
|
||||
const layoutAwareScan = this._layoutAwareScan;
|
||||
const searchText = this.getTextSourceContent(textSource, 1, layoutAwareScan);
|
||||
if (searchText.length === 0) { return null; }
|
||||
@ -735,7 +746,7 @@ class TextScanner extends EventDispatcher {
|
||||
if (definitions.length === 0) { return null; }
|
||||
|
||||
textSource.setEndOffset(1, layoutAwareScan);
|
||||
const sentence = this._documentUtil.extractSentence(textSource, sentenceExtent, layoutAwareScan);
|
||||
const sentence = this._documentUtil.extractSentence(textSource, layoutAwareScan, sentenceScanExtent);
|
||||
|
||||
return {definitions, sentence, type: 'kanji'};
|
||||
}
|
||||
|
@ -21,7 +21,7 @@
|
||||
data-end-node-selector="span"
|
||||
data-end-offset="0"
|
||||
data-result-type="TextSourceRange",
|
||||
data-sentence-extent="100"
|
||||
data-sentence-scan-extent="100"
|
||||
data-sentence="真白「心配してくださって、ありがとございます」"
|
||||
>
|
||||
<span>真白「心配してくださって、ありがとございます」</span>
|
||||
@ -37,7 +37,7 @@
|
||||
data-end-node-selector="span"
|
||||
data-end-offset="5"
|
||||
data-result-type="TextSourceRange",
|
||||
data-sentence-extent="100"
|
||||
data-sentence-scan-extent="100"
|
||||
data-sentence="心配してくださって、ありがとございます"
|
||||
>
|
||||
<span>真白「心配してくださって、ありがとございます」</span>
|
||||
@ -53,7 +53,7 @@
|
||||
data-end-node-selector="span"
|
||||
data-end-offset="16"
|
||||
data-result-type="TextSourceRange",
|
||||
data-sentence-extent="100"
|
||||
data-sentence-scan-extent="100"
|
||||
data-sentence="心配して「くださって」、ありがと「ございます」"
|
||||
>
|
||||
<span>真白「心配して「くださって」、ありがと「ございます」」</span>
|
||||
@ -69,7 +69,7 @@
|
||||
data-end-node-selector="span"
|
||||
data-end-offset="4"
|
||||
data-result-type="TextSourceRange",
|
||||
data-sentence-extent="100"
|
||||
data-sentence-scan-extent="100"
|
||||
data-sentence="ありがとございます。"
|
||||
>
|
||||
<span>ありがとございます。ありがとございます。</span>
|
||||
@ -85,7 +85,7 @@
|
||||
data-end-node-selector="span"
|
||||
data-end-offset="14"
|
||||
data-result-type="TextSourceRange",
|
||||
data-sentence-extent="100"
|
||||
data-sentence-scan-extent="100"
|
||||
data-sentence="ありがとございます。"
|
||||
>
|
||||
<span>ありがとございます。ありがとございます。</span>
|
||||
@ -101,7 +101,7 @@
|
||||
data-end-node-selector="input"
|
||||
data-end-offset="0"
|
||||
data-result-type="TextSourceRange",
|
||||
data-sentence-extent="100"
|
||||
data-sentence-scan-extent="100"
|
||||
data-sentence="真白「心配してくださって、ありがとございます」"
|
||||
data-has-imposter="true"
|
||||
>
|
||||
@ -118,7 +118,7 @@
|
||||
data-end-node-selector="textarea"
|
||||
data-end-offset="0"
|
||||
data-result-type="TextSourceRange",
|
||||
data-sentence-extent="100"
|
||||
data-sentence-scan-extent="100"
|
||||
data-sentence="真白「心配してくださって、ありがとございます」"
|
||||
data-has-imposter="true"
|
||||
>
|
||||
@ -135,7 +135,7 @@
|
||||
data-end-node-selector="button"
|
||||
data-end-offset="0"
|
||||
data-result-type="TextSourceElement",
|
||||
data-sentence-extent="100"
|
||||
data-sentence-scan-extent="100"
|
||||
data-sentence="よみちゃん"
|
||||
>
|
||||
<button style="width: 100%; box-sizing: border-box; font-family: inherit; font-size: inherit; border: 1px solid #d8d8d8; background-color: #f0f0f0; padding: 0.2em;">よみちゃん</button>
|
||||
@ -151,7 +151,7 @@
|
||||
data-end-node-selector="img"
|
||||
data-end-offset="0"
|
||||
data-result-type="TextSourceElement"
|
||||
data-sentence-extent="100"
|
||||
data-sentence-scan-extent="100"
|
||||
data-sentence="よみちゃん"
|
||||
>
|
||||
<img src="" alt="よみちゃん" title="よみちゃん" style="width: 70px; height: 70px; image-rendering: crisp-edges; image-rendering: pixelated; display: block;" />
|
||||
|
@ -127,7 +127,7 @@ async function testDocumentTextScanningFunctions(dom, {DocumentUtil, TextSourceR
|
||||
endNodeSelector,
|
||||
endOffset,
|
||||
resultType,
|
||||
sentenceExtent,
|
||||
sentenceScanExtent,
|
||||
sentence,
|
||||
hasImposter
|
||||
} = testElement.dataset;
|
||||
@ -139,7 +139,7 @@ async function testDocumentTextScanningFunctions(dom, {DocumentUtil, TextSourceR
|
||||
|
||||
startOffset = parseInt(startOffset, 10);
|
||||
endOffset = parseInt(endOffset, 10);
|
||||
sentenceExtent = parseInt(sentenceExtent, 10);
|
||||
sentenceScanExtent = parseInt(sentenceScanExtent, 10);
|
||||
|
||||
assert.notStrictEqual(elementFromPointValue, null);
|
||||
assert.notStrictEqual(caretRangeFromPointValue, null);
|
||||
@ -182,7 +182,7 @@ async function testDocumentTextScanningFunctions(dom, {DocumentUtil, TextSourceR
|
||||
if (source === null) { continue; }
|
||||
|
||||
// Test docSentenceExtract
|
||||
const sentenceActual = documentUtil.extractSentence(source, sentenceExtent, false).text;
|
||||
const sentenceActual = documentUtil.extractSentence(source, false, sentenceScanExtent).text;
|
||||
assert.strictEqual(sentenceActual, sentence);
|
||||
|
||||
// Clean
|
||||
|
Loading…
Reference in New Issue
Block a user