Sentence termination character mode (#1682)
* Change enableTerminationCharacters to terminationCharacterMode * Update settings * Update sentence extraction * Update tests * Add tests
This commit is contained in:
parent
41ee167dfd
commit
66d048832f
@ -2249,6 +2249,9 @@ input[type=number].dictionary-priority {
|
|||||||
align-content: flex-start;
|
align-content: flex-start;
|
||||||
justify-content: flex-start;
|
justify-content: flex-start;
|
||||||
}
|
}
|
||||||
|
.horizontal-flex.horizontal-flex-nowrap {
|
||||||
|
flex-wrap: nowrap;
|
||||||
|
}
|
||||||
.horizontal-flex>* {
|
.horizontal-flex>* {
|
||||||
margin-left: 0.375em;
|
margin-left: 0.375em;
|
||||||
}
|
}
|
||||||
|
@ -925,7 +925,7 @@
|
|||||||
"type": "object",
|
"type": "object",
|
||||||
"required": [
|
"required": [
|
||||||
"scanExtent",
|
"scanExtent",
|
||||||
"enableTerminationCharacters",
|
"terminationCharacterMode",
|
||||||
"terminationCharacters"
|
"terminationCharacters"
|
||||||
],
|
],
|
||||||
"properties": {
|
"properties": {
|
||||||
@ -934,8 +934,9 @@
|
|||||||
"minimum": 0,
|
"minimum": 0,
|
||||||
"default": 200
|
"default": 200
|
||||||
},
|
},
|
||||||
"enableTerminationCharacters": {
|
"terminationCharacterMode": {
|
||||||
"type": "boolean",
|
"type": "string",
|
||||||
|
"enum": ["custom", "custom-no-newlines", "newlines", "none"],
|
||||||
"default": true
|
"default": true
|
||||||
},
|
},
|
||||||
"terminationCharacters": {
|
"terminationCharacters": {
|
||||||
|
@ -460,7 +460,8 @@ class OptionsUtil {
|
|||||||
{async: true, update: this._updateVersion8.bind(this)},
|
{async: true, update: this._updateVersion8.bind(this)},
|
||||||
{async: false, update: this._updateVersion9.bind(this)},
|
{async: false, update: this._updateVersion9.bind(this)},
|
||||||
{async: true, update: this._updateVersion10.bind(this)},
|
{async: true, update: this._updateVersion10.bind(this)},
|
||||||
{async: true, update: this._updateVersion11.bind(this)}
|
{async: false, update: this._updateVersion11.bind(this)},
|
||||||
|
{async: false, update: this._updateVersion12.bind(this)}
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -811,4 +812,15 @@ class OptionsUtil {
|
|||||||
}
|
}
|
||||||
return options;
|
return options;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_updateVersion12(options) {
|
||||||
|
// Version 12 changes:
|
||||||
|
// Changed sentenceParsing.enableTerminationCharacters to sentenceParsing.terminationCharacterMode.
|
||||||
|
for (const profile of options.profiles) {
|
||||||
|
const {sentenceParsing} = profile.options;
|
||||||
|
sentenceParsing.terminationCharacterMode = sentenceParsing.enableTerminationCharacters ? 'custom' : 'newlines';
|
||||||
|
delete sentenceParsing.enableTerminationCharacters;
|
||||||
|
}
|
||||||
|
return options;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -69,6 +69,7 @@ class DocumentUtil {
|
|||||||
* @param source The text source object, either `TextSourceRange` or `TextSourceElement`.
|
* @param source The text source object, either `TextSourceRange` or `TextSourceElement`.
|
||||||
* @param layoutAwareScan Whether or not layout-aware scan mode should be used.
|
* @param layoutAwareScan Whether or not layout-aware scan mode should be used.
|
||||||
* @param extent The length of the sentence to extract.
|
* @param extent The length of the sentence to extract.
|
||||||
|
* @param terminateAtNewlines Whether or not a sentence should be terminated at newline characters.
|
||||||
* @param terminatorMap A mapping of characters that terminate a sentence.
|
* @param terminatorMap A mapping of characters that terminate a sentence.
|
||||||
* Format:
|
* Format:
|
||||||
* ```js
|
* ```js
|
||||||
@ -87,7 +88,7 @@ class DocumentUtil {
|
|||||||
* ```
|
* ```
|
||||||
* @returns The sentence and the offset to the original source: `{sentence: string, offset: integer}`.
|
* @returns The sentence and the offset to the original source: `{sentence: string, offset: integer}`.
|
||||||
*/
|
*/
|
||||||
extractSentence(source, layoutAwareScan, extent, terminatorMap, forwardQuoteMap, backwardQuoteMap) {
|
extractSentence(source, layoutAwareScan, extent, terminateAtNewlines, terminatorMap, forwardQuoteMap, backwardQuoteMap) {
|
||||||
// Scan text
|
// Scan text
|
||||||
source = source.clone();
|
source = source.clone();
|
||||||
const startLength = source.setStartOffset(extent, layoutAwareScan);
|
const startLength = source.setStartOffset(extent, layoutAwareScan);
|
||||||
@ -102,7 +103,7 @@ class DocumentUtil {
|
|||||||
let quoteStack = [];
|
let quoteStack = [];
|
||||||
for (; pos1 > 0; --pos1) {
|
for (; pos1 > 0; --pos1) {
|
||||||
const c = text[pos1 - 1];
|
const c = text[pos1 - 1];
|
||||||
if (c === '\n') { break; }
|
if (c === '\n' && terminateAtNewlines) { break; }
|
||||||
|
|
||||||
if (quoteStack.length === 0) {
|
if (quoteStack.length === 0) {
|
||||||
const terminatorInfo = terminatorMap.get(c);
|
const terminatorInfo = terminatorMap.get(c);
|
||||||
@ -133,7 +134,7 @@ class DocumentUtil {
|
|||||||
quoteStack = [];
|
quoteStack = [];
|
||||||
for (; pos2 < textLength; ++pos2) {
|
for (; pos2 < textLength; ++pos2) {
|
||||||
const c = text[pos2];
|
const c = text[pos2];
|
||||||
if (c === '\n') { break; }
|
if (c === '\n' && terminateAtNewlines) { break; }
|
||||||
|
|
||||||
if (quoteStack.length === 0) {
|
if (quoteStack.length === 0) {
|
||||||
const terminatorInfo = terminatorMap.get(c);
|
const terminatorInfo = terminatorMap.get(c);
|
||||||
|
@ -63,6 +63,7 @@ class TextScanner extends EventDispatcher {
|
|||||||
this._layoutAwareScan = false;
|
this._layoutAwareScan = false;
|
||||||
this._preventMiddleMouse = false;
|
this._preventMiddleMouse = false;
|
||||||
this._sentenceScanExtent = 0;
|
this._sentenceScanExtent = 0;
|
||||||
|
this._sentenceTerminateAtNewlines = true;
|
||||||
this._sentenceTerminatorMap = new Map();
|
this._sentenceTerminatorMap = new Map();
|
||||||
this._sentenceForwardQuoteMap = new Map();
|
this._sentenceForwardQuoteMap = new Map();
|
||||||
this._sentenceBackwardQuoteMap = new Map();
|
this._sentenceBackwardQuoteMap = new Map();
|
||||||
@ -209,19 +210,23 @@ class TextScanner extends EventDispatcher {
|
|||||||
this._preventMiddleMouse = preventMiddleMouse;
|
this._preventMiddleMouse = preventMiddleMouse;
|
||||||
}
|
}
|
||||||
if (typeof sentenceParsingOptions === 'object' && sentenceParsingOptions !== null) {
|
if (typeof sentenceParsingOptions === 'object' && sentenceParsingOptions !== null) {
|
||||||
const {scanExtent, enableTerminationCharacters, terminationCharacters} = sentenceParsingOptions;
|
const {scanExtent, terminationCharacterMode, terminationCharacters} = sentenceParsingOptions;
|
||||||
const hasTerminationCharacters = (typeof terminationCharacters === 'object' && Array.isArray(terminationCharacters));
|
|
||||||
if (typeof scanExtent === 'number') {
|
if (typeof scanExtent === 'number') {
|
||||||
this._sentenceScanExtent = sentenceParsingOptions.scanExtent;
|
this._sentenceScanExtent = sentenceParsingOptions.scanExtent;
|
||||||
}
|
}
|
||||||
if (typeof enableTerminationCharacters === 'boolean' || hasTerminationCharacters) {
|
if (typeof terminationCharacterMode === 'string') {
|
||||||
|
this._sentenceTerminateAtNewlines = (terminationCharacterMode === 'custom' || terminationCharacterMode === 'newlines');
|
||||||
const sentenceTerminatorMap = this._sentenceTerminatorMap;
|
const sentenceTerminatorMap = this._sentenceTerminatorMap;
|
||||||
const sentenceForwardQuoteMap = this._sentenceForwardQuoteMap;
|
const sentenceForwardQuoteMap = this._sentenceForwardQuoteMap;
|
||||||
const sentenceBackwardQuoteMap = this._sentenceBackwardQuoteMap;
|
const sentenceBackwardQuoteMap = this._sentenceBackwardQuoteMap;
|
||||||
sentenceTerminatorMap.clear();
|
sentenceTerminatorMap.clear();
|
||||||
sentenceForwardQuoteMap.clear();
|
sentenceForwardQuoteMap.clear();
|
||||||
sentenceBackwardQuoteMap.clear();
|
sentenceBackwardQuoteMap.clear();
|
||||||
if (enableTerminationCharacters !== false && hasTerminationCharacters) {
|
if (
|
||||||
|
typeof terminationCharacters === 'object' &&
|
||||||
|
Array.isArray(terminationCharacters) &&
|
||||||
|
(terminationCharacterMode === 'custom' || terminationCharacterMode === 'custom-no-newlines')
|
||||||
|
) {
|
||||||
for (const {enabled, character1, character2, includeCharacterAtStart, includeCharacterAtEnd} of terminationCharacters) {
|
for (const {enabled, character1, character2, includeCharacterAtStart, includeCharacterAtEnd} of terminationCharacters) {
|
||||||
if (!enabled) { continue; }
|
if (!enabled) { continue; }
|
||||||
if (character2 === null) {
|
if (character2 === null) {
|
||||||
@ -841,6 +846,7 @@ class TextScanner extends EventDispatcher {
|
|||||||
async _findTermDictionaryEntries(textSource, optionsContext) {
|
async _findTermDictionaryEntries(textSource, optionsContext) {
|
||||||
const scanLength = this._scanLength;
|
const scanLength = this._scanLength;
|
||||||
const sentenceScanExtent = this._sentenceScanExtent;
|
const sentenceScanExtent = this._sentenceScanExtent;
|
||||||
|
const sentenceTerminateAtNewlines = this._sentenceTerminateAtNewlines;
|
||||||
const sentenceTerminatorMap = this._sentenceTerminatorMap;
|
const sentenceTerminatorMap = this._sentenceTerminatorMap;
|
||||||
const sentenceForwardQuoteMap = this._sentenceForwardQuoteMap;
|
const sentenceForwardQuoteMap = this._sentenceForwardQuoteMap;
|
||||||
const sentenceBackwardQuoteMap = this._sentenceBackwardQuoteMap;
|
const sentenceBackwardQuoteMap = this._sentenceBackwardQuoteMap;
|
||||||
@ -856,6 +862,7 @@ class TextScanner extends EventDispatcher {
|
|||||||
textSource,
|
textSource,
|
||||||
layoutAwareScan,
|
layoutAwareScan,
|
||||||
sentenceScanExtent,
|
sentenceScanExtent,
|
||||||
|
sentenceTerminateAtNewlines,
|
||||||
sentenceTerminatorMap,
|
sentenceTerminatorMap,
|
||||||
sentenceForwardQuoteMap,
|
sentenceForwardQuoteMap,
|
||||||
sentenceBackwardQuoteMap
|
sentenceBackwardQuoteMap
|
||||||
@ -866,6 +873,7 @@ class TextScanner extends EventDispatcher {
|
|||||||
|
|
||||||
async _findKanjiDictionaryEntries(textSource, optionsContext) {
|
async _findKanjiDictionaryEntries(textSource, optionsContext) {
|
||||||
const sentenceScanExtent = this._sentenceScanExtent;
|
const sentenceScanExtent = this._sentenceScanExtent;
|
||||||
|
const sentenceTerminateAtNewlines = this._sentenceTerminateAtNewlines;
|
||||||
const sentenceTerminatorMap = this._sentenceTerminatorMap;
|
const sentenceTerminatorMap = this._sentenceTerminatorMap;
|
||||||
const sentenceForwardQuoteMap = this._sentenceForwardQuoteMap;
|
const sentenceForwardQuoteMap = this._sentenceForwardQuoteMap;
|
||||||
const sentenceBackwardQuoteMap = this._sentenceBackwardQuoteMap;
|
const sentenceBackwardQuoteMap = this._sentenceBackwardQuoteMap;
|
||||||
@ -881,6 +889,7 @@ class TextScanner extends EventDispatcher {
|
|||||||
textSource,
|
textSource,
|
||||||
layoutAwareScan,
|
layoutAwareScan,
|
||||||
sentenceScanExtent,
|
sentenceScanExtent,
|
||||||
|
sentenceTerminateAtNewlines,
|
||||||
sentenceTerminatorMap,
|
sentenceTerminatorMap,
|
||||||
sentenceForwardQuoteMap,
|
sentenceForwardQuoteMap,
|
||||||
sentenceBackwardQuoteMap
|
sentenceBackwardQuoteMap
|
||||||
|
@ -1257,18 +1257,30 @@
|
|||||||
</div></div>
|
</div></div>
|
||||||
<div class="settings-item"><div class="settings-item-inner settings-item-inner-wrappable">
|
<div class="settings-item"><div class="settings-item-inner settings-item-inner-wrappable">
|
||||||
<div class="settings-item-left">
|
<div class="settings-item-left">
|
||||||
<div class="settings-item-label">Enable sentence termination characters</div>
|
<div class="settings-item-label">Sentence termination characters</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="settings-item-right">
|
<div class="settings-item-right">
|
||||||
<label class="toggle"><input type="checkbox" data-setting="sentenceParsing.enableTerminationCharacters"><span class="toggle-body"><span class="toggle-track"></span><span class="toggle-knob"></span></span></label>
|
<div class="horizontal-flex horizontal-flex-nowrap">
|
||||||
</div>
|
<button class="low-emphasis" data-modal-action="show,sentence-termination-characters" id="configure-sentence-termination-characters-button" hidden>Configure…</button>
|
||||||
</div></div>
|
<select data-setting="sentenceParsing.terminationCharacterMode"
|
||||||
<div class="settings-item settings-item-button" data-modal-action="show,sentence-termination-characters"><div class="settings-item-inner">
|
data-transform='{
|
||||||
<div class="settings-item-left">
|
"type": "setVisibility",
|
||||||
<div class="settings-item-label">Configure sentence termination characters…</div>
|
"selector": "#configure-sentence-termination-characters-button",
|
||||||
</div>
|
"condition": {
|
||||||
<div class="settings-item-right open-panel-button-container">
|
"op": "||",
|
||||||
<button class="icon-button"><span class="icon-button-inner"><span class="icon" data-icon="material-right-arrow"></span></span></button>
|
"value": [
|
||||||
|
{"op": "===", "value": "custom"},
|
||||||
|
{"op": "===", "value": "custom-no-newlines"}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
>
|
||||||
|
<option value="custom">Custom</option>
|
||||||
|
<option value="custom-no-newlines">Custom, no newlines</option>
|
||||||
|
<option value="newlines">Newlines only</option>
|
||||||
|
<option value="none">None</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div></div>
|
</div></div>
|
||||||
</div>
|
</div>
|
||||||
|
@ -157,6 +157,48 @@
|
|||||||
<img src="" alt="よみちゃん" title="よみちゃん" style="width: 70px; height: 70px; image-rendering: crisp-edges; image-rendering: pixelated; display: block;">
|
<img src="" alt="よみちゃん" title="よみちゃん" style="width: 70px; height: 70px; image-rendering: crisp-edges; image-rendering: pixelated; display: block;">
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div
|
||||||
|
class="test"
|
||||||
|
data-test-type="scan"
|
||||||
|
data-element-from-point-selector="span:nth-of-type(3)"
|
||||||
|
data-caret-range-from-point-selector="span:nth-of-type(3)"
|
||||||
|
data-start-node-selector="span:nth-of-type(3)"
|
||||||
|
data-start-offset="0"
|
||||||
|
data-end-node-selector="span:nth-of-type(3)"
|
||||||
|
data-end-offset="0"
|
||||||
|
data-result-type="TextSourceRange"
|
||||||
|
data-sentence-scan-extent="22"
|
||||||
|
data-sentence="ありがとございます3"
|
||||||
|
data-terminate-at-newlines="true"
|
||||||
|
>
|
||||||
|
<span>ありがとございます1</span>
|
||||||
|
<span>ありがとございます2</span>
|
||||||
|
<span>ありがとございます3</span>
|
||||||
|
<span>ありがとございます4</span>
|
||||||
|
<span>ありがとございます5</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div
|
||||||
|
class="test"
|
||||||
|
data-test-type="scan"
|
||||||
|
data-element-from-point-selector="span:nth-of-type(3)"
|
||||||
|
data-caret-range-from-point-selector="span:nth-of-type(3)"
|
||||||
|
data-start-node-selector="span:nth-of-type(3)"
|
||||||
|
data-start-offset="0"
|
||||||
|
data-end-node-selector="span:nth-of-type(3)"
|
||||||
|
data-end-offset="0"
|
||||||
|
data-result-type="TextSourceRange"
|
||||||
|
data-sentence-scan-extent="22"
|
||||||
|
data-sentence="ありがとございます1 ありがとございます2 ありがとございます3 ありがとございます4"
|
||||||
|
data-terminate-at-newlines="false"
|
||||||
|
>
|
||||||
|
<span>ありがとございます1</span>
|
||||||
|
<span>ありがとございます2</span>
|
||||||
|
<span>ありがとございます3</span>
|
||||||
|
<span>ありがとございます4</span>
|
||||||
|
<span>ありがとございます5</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
<div
|
<div
|
||||||
class="test"
|
class="test"
|
||||||
data-test-type="text-source-range-seek"
|
data-test-type="text-source-range-seek"
|
||||||
|
@ -129,7 +129,8 @@ async function testDocumentTextScanningFunctions(dom, {DocumentUtil, TextSourceR
|
|||||||
resultType,
|
resultType,
|
||||||
sentenceScanExtent,
|
sentenceScanExtent,
|
||||||
sentence,
|
sentence,
|
||||||
hasImposter
|
hasImposter,
|
||||||
|
terminateAtNewlines
|
||||||
} = testElement.dataset;
|
} = testElement.dataset;
|
||||||
|
|
||||||
const elementFromPointValue = querySelectorChildOrSelf(testElement, elementFromPointSelector);
|
const elementFromPointValue = querySelectorChildOrSelf(testElement, elementFromPointSelector);
|
||||||
@ -140,6 +141,7 @@ async function testDocumentTextScanningFunctions(dom, {DocumentUtil, TextSourceR
|
|||||||
startOffset = parseInt(startOffset, 10);
|
startOffset = parseInt(startOffset, 10);
|
||||||
endOffset = parseInt(endOffset, 10);
|
endOffset = parseInt(endOffset, 10);
|
||||||
sentenceScanExtent = parseInt(sentenceScanExtent, 10);
|
sentenceScanExtent = parseInt(sentenceScanExtent, 10);
|
||||||
|
terminateAtNewlines = (terminateAtNewlines !== 'false');
|
||||||
|
|
||||||
assert.notStrictEqual(elementFromPointValue, null);
|
assert.notStrictEqual(elementFromPointValue, null);
|
||||||
assert.notStrictEqual(caretRangeFromPointValue, null);
|
assert.notStrictEqual(caretRangeFromPointValue, null);
|
||||||
@ -200,6 +202,7 @@ async function testDocumentTextScanningFunctions(dom, {DocumentUtil, TextSourceR
|
|||||||
source,
|
source,
|
||||||
false,
|
false,
|
||||||
sentenceScanExtent,
|
sentenceScanExtent,
|
||||||
|
terminateAtNewlines,
|
||||||
terminatorMap,
|
terminatorMap,
|
||||||
forwardQuoteMap,
|
forwardQuoteMap,
|
||||||
backwardQuoteMap
|
backwardQuoteMap
|
||||||
|
@ -438,7 +438,7 @@ function createProfileOptionsUpdatedTestData1() {
|
|||||||
},
|
},
|
||||||
sentenceParsing: {
|
sentenceParsing: {
|
||||||
scanExtent: 200,
|
scanExtent: 200,
|
||||||
enableTerminationCharacters: true,
|
terminationCharacterMode: 'custom',
|
||||||
terminationCharacters: [
|
terminationCharacters: [
|
||||||
{enabled: true, character1: '「', character2: '」', includeCharacterAtStart: false, includeCharacterAtEnd: false},
|
{enabled: true, character1: '「', character2: '」', includeCharacterAtStart: false, includeCharacterAtEnd: false},
|
||||||
{enabled: true, character1: '『', character2: '』', includeCharacterAtStart: false, includeCharacterAtEnd: false},
|
{enabled: true, character1: '『', character2: '』', includeCharacterAtStart: false, includeCharacterAtEnd: false},
|
||||||
@ -576,7 +576,7 @@ function createOptionsUpdatedTestData1() {
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
profileCurrent: 0,
|
profileCurrent: 0,
|
||||||
version: 11,
|
version: 12,
|
||||||
global: {
|
global: {
|
||||||
database: {
|
database: {
|
||||||
prefixWildcardsSupported: false
|
prefixWildcardsSupported: false
|
||||||
|
Loading…
Reference in New Issue
Block a user