From 66d048832f2dc30e11e6be4c68beab23c7d8adef Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sun, 16 May 2021 15:24:38 -0400 Subject: [PATCH] Sentence termination character mode (#1682) * Change enableTerminationCharacters to terminationCharacterMode * Update settings * Update sentence extraction * Update tests * Add tests --- ext/css/settings.css | 3 ++ ext/data/schemas/options-schema.json | 7 +++-- ext/js/data/options-util.js | 14 +++++++++- ext/js/dom/document-util.js | 7 +++-- ext/js/language/text-scanner.js | 17 ++++++++--- ext/settings.html | 32 ++++++++++++++------- test/data/html/test-document1.html | 42 ++++++++++++++++++++++++++++ test/test-document-util.js | 5 +++- test/test-options-util.js | 4 +-- 9 files changed, 107 insertions(+), 24 deletions(-) diff --git a/ext/css/settings.css b/ext/css/settings.css index 2b0d5e8e..1bc2d1a7 100644 --- a/ext/css/settings.css +++ b/ext/css/settings.css @@ -2249,6 +2249,9 @@ input[type=number].dictionary-priority { align-content: flex-start; justify-content: flex-start; } +.horizontal-flex.horizontal-flex-nowrap { + flex-wrap: nowrap; +} .horizontal-flex>* { margin-left: 0.375em; } diff --git a/ext/data/schemas/options-schema.json b/ext/data/schemas/options-schema.json index 1f27c330..3f5bd0c7 100644 --- a/ext/data/schemas/options-schema.json +++ b/ext/data/schemas/options-schema.json @@ -925,7 +925,7 @@ "type": "object", "required": [ "scanExtent", - "enableTerminationCharacters", + "terminationCharacterMode", "terminationCharacters" ], "properties": { @@ -934,8 +934,9 @@ "minimum": 0, "default": 200 }, - "enableTerminationCharacters": { - "type": "boolean", + "terminationCharacterMode": { + "type": "string", + "enum": ["custom", "custom-no-newlines", "newlines", "none"], "default": true }, "terminationCharacters": { diff --git a/ext/js/data/options-util.js b/ext/js/data/options-util.js index cb7946f7..cb58206f 100644 --- a/ext/js/data/options-util.js +++ b/ext/js/data/options-util.js @@ -460,7 +460,8 @@ class OptionsUtil { {async: true, update: this._updateVersion8.bind(this)}, {async: false, update: this._updateVersion9.bind(this)}, {async: true, update: this._updateVersion10.bind(this)}, - {async: true, update: this._updateVersion11.bind(this)} + {async: false, update: this._updateVersion11.bind(this)}, + {async: false, update: this._updateVersion12.bind(this)} ]; } @@ -811,4 +812,15 @@ class OptionsUtil { } return options; } + + _updateVersion12(options) { + // Version 12 changes: + // Changed sentenceParsing.enableTerminationCharacters to sentenceParsing.terminationCharacterMode. + for (const profile of options.profiles) { + const {sentenceParsing} = profile.options; + sentenceParsing.terminationCharacterMode = sentenceParsing.enableTerminationCharacters ? 'custom' : 'newlines'; + delete sentenceParsing.enableTerminationCharacters; + } + return options; + } } diff --git a/ext/js/dom/document-util.js b/ext/js/dom/document-util.js index 8284ffa5..da4d3e61 100644 --- a/ext/js/dom/document-util.js +++ b/ext/js/dom/document-util.js @@ -69,6 +69,7 @@ class DocumentUtil { * @param source The text source object, either `TextSourceRange` or `TextSourceElement`. * @param layoutAwareScan Whether or not layout-aware scan mode should be used. * @param extent The length of the sentence to extract. + * @param terminateAtNewlines Whether or not a sentence should be terminated at newline characters. * @param terminatorMap A mapping of characters that terminate a sentence. * Format: * ```js @@ -87,7 +88,7 @@ class DocumentUtil { * ``` * @returns The sentence and the offset to the original source: `{sentence: string, offset: integer}`. */ - extractSentence(source, layoutAwareScan, extent, terminatorMap, forwardQuoteMap, backwardQuoteMap) { + extractSentence(source, layoutAwareScan, extent, terminateAtNewlines, terminatorMap, forwardQuoteMap, backwardQuoteMap) { // Scan text source = source.clone(); const startLength = source.setStartOffset(extent, layoutAwareScan); @@ -102,7 +103,7 @@ class DocumentUtil { let quoteStack = []; for (; pos1 > 0; --pos1) { const c = text[pos1 - 1]; - if (c === '\n') { break; } + if (c === '\n' && terminateAtNewlines) { break; } if (quoteStack.length === 0) { const terminatorInfo = terminatorMap.get(c); @@ -133,7 +134,7 @@ class DocumentUtil { quoteStack = []; for (; pos2 < textLength; ++pos2) { const c = text[pos2]; - if (c === '\n') { break; } + if (c === '\n' && terminateAtNewlines) { break; } if (quoteStack.length === 0) { const terminatorInfo = terminatorMap.get(c); diff --git a/ext/js/language/text-scanner.js b/ext/js/language/text-scanner.js index a49627f8..2ebf26e2 100644 --- a/ext/js/language/text-scanner.js +++ b/ext/js/language/text-scanner.js @@ -63,6 +63,7 @@ class TextScanner extends EventDispatcher { this._layoutAwareScan = false; this._preventMiddleMouse = false; this._sentenceScanExtent = 0; + this._sentenceTerminateAtNewlines = true; this._sentenceTerminatorMap = new Map(); this._sentenceForwardQuoteMap = new Map(); this._sentenceBackwardQuoteMap = new Map(); @@ -209,19 +210,23 @@ class TextScanner extends EventDispatcher { this._preventMiddleMouse = preventMiddleMouse; } if (typeof sentenceParsingOptions === 'object' && sentenceParsingOptions !== null) { - const {scanExtent, enableTerminationCharacters, terminationCharacters} = sentenceParsingOptions; - const hasTerminationCharacters = (typeof terminationCharacters === 'object' && Array.isArray(terminationCharacters)); + const {scanExtent, terminationCharacterMode, terminationCharacters} = sentenceParsingOptions; if (typeof scanExtent === 'number') { this._sentenceScanExtent = sentenceParsingOptions.scanExtent; } - if (typeof enableTerminationCharacters === 'boolean' || hasTerminationCharacters) { + if (typeof terminationCharacterMode === 'string') { + this._sentenceTerminateAtNewlines = (terminationCharacterMode === 'custom' || terminationCharacterMode === 'newlines'); const sentenceTerminatorMap = this._sentenceTerminatorMap; const sentenceForwardQuoteMap = this._sentenceForwardQuoteMap; const sentenceBackwardQuoteMap = this._sentenceBackwardQuoteMap; sentenceTerminatorMap.clear(); sentenceForwardQuoteMap.clear(); sentenceBackwardQuoteMap.clear(); - if (enableTerminationCharacters !== false && hasTerminationCharacters) { + if ( + typeof terminationCharacters === 'object' && + Array.isArray(terminationCharacters) && + (terminationCharacterMode === 'custom' || terminationCharacterMode === 'custom-no-newlines') + ) { for (const {enabled, character1, character2, includeCharacterAtStart, includeCharacterAtEnd} of terminationCharacters) { if (!enabled) { continue; } if (character2 === null) { @@ -841,6 +846,7 @@ class TextScanner extends EventDispatcher { async _findTermDictionaryEntries(textSource, optionsContext) { const scanLength = this._scanLength; const sentenceScanExtent = this._sentenceScanExtent; + const sentenceTerminateAtNewlines = this._sentenceTerminateAtNewlines; const sentenceTerminatorMap = this._sentenceTerminatorMap; const sentenceForwardQuoteMap = this._sentenceForwardQuoteMap; const sentenceBackwardQuoteMap = this._sentenceBackwardQuoteMap; @@ -856,6 +862,7 @@ class TextScanner extends EventDispatcher { textSource, layoutAwareScan, sentenceScanExtent, + sentenceTerminateAtNewlines, sentenceTerminatorMap, sentenceForwardQuoteMap, sentenceBackwardQuoteMap @@ -866,6 +873,7 @@ class TextScanner extends EventDispatcher { async _findKanjiDictionaryEntries(textSource, optionsContext) { const sentenceScanExtent = this._sentenceScanExtent; + const sentenceTerminateAtNewlines = this._sentenceTerminateAtNewlines; const sentenceTerminatorMap = this._sentenceTerminatorMap; const sentenceForwardQuoteMap = this._sentenceForwardQuoteMap; const sentenceBackwardQuoteMap = this._sentenceBackwardQuoteMap; @@ -881,6 +889,7 @@ class TextScanner extends EventDispatcher { textSource, layoutAwareScan, sentenceScanExtent, + sentenceTerminateAtNewlines, sentenceTerminatorMap, sentenceForwardQuoteMap, sentenceBackwardQuoteMap diff --git a/ext/settings.html b/ext/settings.html index 4b1ccae7..1b167e22 100644 --- a/ext/settings.html +++ b/ext/settings.html @@ -1257,18 +1257,30 @@
-
Enable sentence termination characters
+
Sentence termination characters
- -
-
-
-
-
Configure sentence termination characters…
-
-
- +
+ + +
diff --git a/test/data/html/test-document1.html b/test/data/html/test-document1.html index af38540f..78d862c4 100644 --- a/test/data/html/test-document1.html +++ b/test/data/html/test-document1.html @@ -157,6 +157,48 @@ よみちゃん +
+ありがとございます1 +ありがとございます2 +ありがとございます3 +ありがとございます4 +ありがとございます5 +
+ +
+ありがとございます1 +ありがとございます2 +ありがとございます3 +ありがとございます4 +ありがとございます5 +
+