Sentence termination character mode (#1682)

* Change enableTerminationCharacters to terminationCharacterMode

* Update settings

* Update sentence extraction

* Update tests

* Add tests
This commit is contained in:
toasted-nutbread 2021-05-16 15:24:38 -04:00 committed by GitHub
parent 41ee167dfd
commit 66d048832f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 107 additions and 24 deletions

View File

@ -2249,6 +2249,9 @@ input[type=number].dictionary-priority {
align-content: flex-start;
justify-content: flex-start;
}
.horizontal-flex.horizontal-flex-nowrap {
flex-wrap: nowrap;
}
.horizontal-flex>* {
margin-left: 0.375em;
}

View File

@ -925,7 +925,7 @@
"type": "object",
"required": [
"scanExtent",
"enableTerminationCharacters",
"terminationCharacterMode",
"terminationCharacters"
],
"properties": {
@ -934,8 +934,9 @@
"minimum": 0,
"default": 200
},
"enableTerminationCharacters": {
"type": "boolean",
"terminationCharacterMode": {
"type": "string",
"enum": ["custom", "custom-no-newlines", "newlines", "none"],
"default": true
},
"terminationCharacters": {

View File

@ -460,7 +460,8 @@ class OptionsUtil {
{async: true, update: this._updateVersion8.bind(this)},
{async: false, update: this._updateVersion9.bind(this)},
{async: true, update: this._updateVersion10.bind(this)},
{async: true, update: this._updateVersion11.bind(this)}
{async: false, update: this._updateVersion11.bind(this)},
{async: false, update: this._updateVersion12.bind(this)}
];
}
@ -811,4 +812,15 @@ class OptionsUtil {
}
return options;
}
_updateVersion12(options) {
// Version 12 changes:
// Changed sentenceParsing.enableTerminationCharacters to sentenceParsing.terminationCharacterMode.
for (const profile of options.profiles) {
const {sentenceParsing} = profile.options;
sentenceParsing.terminationCharacterMode = sentenceParsing.enableTerminationCharacters ? 'custom' : 'newlines';
delete sentenceParsing.enableTerminationCharacters;
}
return options;
}
}

View File

@ -69,6 +69,7 @@ class DocumentUtil {
* @param source The text source object, either `TextSourceRange` or `TextSourceElement`.
* @param layoutAwareScan Whether or not layout-aware scan mode should be used.
* @param extent The length of the sentence to extract.
* @param terminateAtNewlines Whether or not a sentence should be terminated at newline characters.
* @param terminatorMap A mapping of characters that terminate a sentence.
* Format:
* ```js
@ -87,7 +88,7 @@ class DocumentUtil {
* ```
* @returns The sentence and the offset to the original source: `{sentence: string, offset: integer}`.
*/
extractSentence(source, layoutAwareScan, extent, terminatorMap, forwardQuoteMap, backwardQuoteMap) {
extractSentence(source, layoutAwareScan, extent, terminateAtNewlines, terminatorMap, forwardQuoteMap, backwardQuoteMap) {
// Scan text
source = source.clone();
const startLength = source.setStartOffset(extent, layoutAwareScan);
@ -102,7 +103,7 @@ class DocumentUtil {
let quoteStack = [];
for (; pos1 > 0; --pos1) {
const c = text[pos1 - 1];
if (c === '\n') { break; }
if (c === '\n' && terminateAtNewlines) { break; }
if (quoteStack.length === 0) {
const terminatorInfo = terminatorMap.get(c);
@ -133,7 +134,7 @@ class DocumentUtil {
quoteStack = [];
for (; pos2 < textLength; ++pos2) {
const c = text[pos2];
if (c === '\n') { break; }
if (c === '\n' && terminateAtNewlines) { break; }
if (quoteStack.length === 0) {
const terminatorInfo = terminatorMap.get(c);

View File

@ -63,6 +63,7 @@ class TextScanner extends EventDispatcher {
this._layoutAwareScan = false;
this._preventMiddleMouse = false;
this._sentenceScanExtent = 0;
this._sentenceTerminateAtNewlines = true;
this._sentenceTerminatorMap = new Map();
this._sentenceForwardQuoteMap = new Map();
this._sentenceBackwardQuoteMap = new Map();
@ -209,19 +210,23 @@ class TextScanner extends EventDispatcher {
this._preventMiddleMouse = preventMiddleMouse;
}
if (typeof sentenceParsingOptions === 'object' && sentenceParsingOptions !== null) {
const {scanExtent, enableTerminationCharacters, terminationCharacters} = sentenceParsingOptions;
const hasTerminationCharacters = (typeof terminationCharacters === 'object' && Array.isArray(terminationCharacters));
const {scanExtent, terminationCharacterMode, terminationCharacters} = sentenceParsingOptions;
if (typeof scanExtent === 'number') {
this._sentenceScanExtent = sentenceParsingOptions.scanExtent;
}
if (typeof enableTerminationCharacters === 'boolean' || hasTerminationCharacters) {
if (typeof terminationCharacterMode === 'string') {
this._sentenceTerminateAtNewlines = (terminationCharacterMode === 'custom' || terminationCharacterMode === 'newlines');
const sentenceTerminatorMap = this._sentenceTerminatorMap;
const sentenceForwardQuoteMap = this._sentenceForwardQuoteMap;
const sentenceBackwardQuoteMap = this._sentenceBackwardQuoteMap;
sentenceTerminatorMap.clear();
sentenceForwardQuoteMap.clear();
sentenceBackwardQuoteMap.clear();
if (enableTerminationCharacters !== false && hasTerminationCharacters) {
if (
typeof terminationCharacters === 'object' &&
Array.isArray(terminationCharacters) &&
(terminationCharacterMode === 'custom' || terminationCharacterMode === 'custom-no-newlines')
) {
for (const {enabled, character1, character2, includeCharacterAtStart, includeCharacterAtEnd} of terminationCharacters) {
if (!enabled) { continue; }
if (character2 === null) {
@ -841,6 +846,7 @@ class TextScanner extends EventDispatcher {
async _findTermDictionaryEntries(textSource, optionsContext) {
const scanLength = this._scanLength;
const sentenceScanExtent = this._sentenceScanExtent;
const sentenceTerminateAtNewlines = this._sentenceTerminateAtNewlines;
const sentenceTerminatorMap = this._sentenceTerminatorMap;
const sentenceForwardQuoteMap = this._sentenceForwardQuoteMap;
const sentenceBackwardQuoteMap = this._sentenceBackwardQuoteMap;
@ -856,6 +862,7 @@ class TextScanner extends EventDispatcher {
textSource,
layoutAwareScan,
sentenceScanExtent,
sentenceTerminateAtNewlines,
sentenceTerminatorMap,
sentenceForwardQuoteMap,
sentenceBackwardQuoteMap
@ -866,6 +873,7 @@ class TextScanner extends EventDispatcher {
async _findKanjiDictionaryEntries(textSource, optionsContext) {
const sentenceScanExtent = this._sentenceScanExtent;
const sentenceTerminateAtNewlines = this._sentenceTerminateAtNewlines;
const sentenceTerminatorMap = this._sentenceTerminatorMap;
const sentenceForwardQuoteMap = this._sentenceForwardQuoteMap;
const sentenceBackwardQuoteMap = this._sentenceBackwardQuoteMap;
@ -881,6 +889,7 @@ class TextScanner extends EventDispatcher {
textSource,
layoutAwareScan,
sentenceScanExtent,
sentenceTerminateAtNewlines,
sentenceTerminatorMap,
sentenceForwardQuoteMap,
sentenceBackwardQuoteMap

View File

@ -1257,18 +1257,30 @@
</div></div>
<div class="settings-item"><div class="settings-item-inner settings-item-inner-wrappable">
<div class="settings-item-left">
<div class="settings-item-label">Enable sentence termination characters</div>
<div class="settings-item-label">Sentence termination characters</div>
</div>
<div class="settings-item-right">
<label class="toggle"><input type="checkbox" data-setting="sentenceParsing.enableTerminationCharacters"><span class="toggle-body"><span class="toggle-track"></span><span class="toggle-knob"></span></span></label>
</div>
</div></div>
<div class="settings-item settings-item-button" data-modal-action="show,sentence-termination-characters"><div class="settings-item-inner">
<div class="settings-item-left">
<div class="settings-item-label">Configure sentence termination characters&hellip;</div>
</div>
<div class="settings-item-right open-panel-button-container">
<button class="icon-button"><span class="icon-button-inner"><span class="icon" data-icon="material-right-arrow"></span></span></button>
<div class="horizontal-flex horizontal-flex-nowrap">
<button class="low-emphasis" data-modal-action="show,sentence-termination-characters" id="configure-sentence-termination-characters-button" hidden>Configure&hellip;</button>
<select data-setting="sentenceParsing.terminationCharacterMode"
data-transform='{
"type": "setVisibility",
"selector": "#configure-sentence-termination-characters-button",
"condition": {
"op": "||",
"value": [
{"op": "===", "value": "custom"},
{"op": "===", "value": "custom-no-newlines"}
]
}
}'
>
<option value="custom">Custom</option>
<option value="custom-no-newlines">Custom, no newlines</option>
<option value="newlines">Newlines only</option>
<option value="none">None</option>
</select>
</div>
</div>
</div></div>
</div>

View File

@ -157,6 +157,48 @@
<img src="data:image/gif;base64,R0lGODdhBwAHAIABAAAAAP///ywAAAAABwAHAAACDIRvEaC32FpCbEkKCgA7" alt="よみちゃん" title="よみちゃん" style="width: 70px; height: 70px; image-rendering: crisp-edges; image-rendering: pixelated; display: block;">
</div>
<div
class="test"
data-test-type="scan"
data-element-from-point-selector="span:nth-of-type(3)"
data-caret-range-from-point-selector="span:nth-of-type(3)"
data-start-node-selector="span:nth-of-type(3)"
data-start-offset="0"
data-end-node-selector="span:nth-of-type(3)"
data-end-offset="0"
data-result-type="TextSourceRange"
data-sentence-scan-extent="22"
data-sentence="ありがとございます3"
data-terminate-at-newlines="true"
>
<span>ありがとございます1</span>
<span>ありがとございます2</span>
<span>ありがとございます3</span>
<span>ありがとございます4</span>
<span>ありがとございます5</span>
</div>
<div
class="test"
data-test-type="scan"
data-element-from-point-selector="span:nth-of-type(3)"
data-caret-range-from-point-selector="span:nth-of-type(3)"
data-start-node-selector="span:nth-of-type(3)"
data-start-offset="0"
data-end-node-selector="span:nth-of-type(3)"
data-end-offset="0"
data-result-type="TextSourceRange"
data-sentence-scan-extent="22"
data-sentence="ありがとございます1&#10;ありがとございます2&#10;ありがとございます3&#10;ありがとございます4"
data-terminate-at-newlines="false"
>
<span>ありがとございます1</span>
<span>ありがとございます2</span>
<span>ありがとございます3</span>
<span>ありがとございます4</span>
<span>ありがとございます5</span>
</div>
<div
class="test"
data-test-type="text-source-range-seek"

View File

@ -129,7 +129,8 @@ async function testDocumentTextScanningFunctions(dom, {DocumentUtil, TextSourceR
resultType,
sentenceScanExtent,
sentence,
hasImposter
hasImposter,
terminateAtNewlines
} = testElement.dataset;
const elementFromPointValue = querySelectorChildOrSelf(testElement, elementFromPointSelector);
@ -140,6 +141,7 @@ async function testDocumentTextScanningFunctions(dom, {DocumentUtil, TextSourceR
startOffset = parseInt(startOffset, 10);
endOffset = parseInt(endOffset, 10);
sentenceScanExtent = parseInt(sentenceScanExtent, 10);
terminateAtNewlines = (terminateAtNewlines !== 'false');
assert.notStrictEqual(elementFromPointValue, null);
assert.notStrictEqual(caretRangeFromPointValue, null);
@ -200,6 +202,7 @@ async function testDocumentTextScanningFunctions(dom, {DocumentUtil, TextSourceR
source,
false,
sentenceScanExtent,
terminateAtNewlines,
terminatorMap,
forwardQuoteMap,
backwardQuoteMap

View File

@ -438,7 +438,7 @@ function createProfileOptionsUpdatedTestData1() {
},
sentenceParsing: {
scanExtent: 200,
enableTerminationCharacters: true,
terminationCharacterMode: 'custom',
terminationCharacters: [
{enabled: true, character1: '「', character2: '」', includeCharacterAtStart: false, includeCharacterAtEnd: false},
{enabled: true, character1: '『', character2: '』', includeCharacterAtStart: false, includeCharacterAtEnd: false},
@ -576,7 +576,7 @@ function createOptionsUpdatedTestData1() {
}
],
profileCurrent: 0,
version: 11,
version: 12,
global: {
database: {
prefixWildcardsSupported: false