Customizable sentence parsing (#1217)

* Add new sentenceParsing options

* Update TextScanner.setOptions

* Assign terminator/quote maps

* Pass sentence parsing info to extractSentence

* Simplify setting

* Add setting for enableTerminationCharacters

* Create new settings for sentence termination characters
This commit is contained in:
toasted-nutbread 2021-01-10 14:43:06 -05:00 committed by GitHub
parent 083da93142
commit f6a38f40dc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 638 additions and 62 deletions

View File

@ -1856,6 +1856,81 @@ input.translation-text-replacement-test-output {
display: none;
}
.sentence-termination-character-list-table {
width: 100%;
table-layout: fixed;
border-spacing: 0.25em;
margin-top: 0.5em;
min-width: 400px;
}
.sentence-termination-character-list-table thead td {
white-space: nowrap;
font-size: var(--font-size-small);
line-height: 1;
text-align: left;
vertical-align: bottom;
font-weight: normal;
}
.sentence-termination-character-list-table td {
vertical-align: middle;
padding: 0;
}
.sentence-termination-character-list-table td:nth-child(1) {
width: 2em;
}
.sentence-termination-character-list-table td:nth-child(2) {
width: 4em;
}
.sentence-termination-character-list-table td:nth-child(3) {
width: 25%;
}
.sentence-termination-character-list-table td:nth-child(4) {
width: 18.5%;
}
.sentence-termination-character-list-table td:nth-child(5) {
width: 18.5%;
}
.sentence-termination-character-list-table td:nth-child(6) {
width: 40%;
}
.sentence-termination-character-list-table td:nth-child(7) {
width: 3em;
}
select.sentence-termination-character-type,
input.sentence-termination-character-input1,
input.sentence-termination-character-input2 {
width: 100%;
}
.sentence-termination-character-input2-alt {
text-align: center;
}
.sentence-termination-character-entry:not([data-type=quote]) .sentence-termination-character-input2 {
display: none;
}
.sentence-termination-character-entry[data-type=quote] .sentence-termination-character-input2-alt {
display: none;
}
.sentence-termination-character-include-list {
display: flex;
flex-flow: row nowrap;
}
.sentence-termination-character-include {
display: flex;
flex-flow: row nowrap;
white-space: nowrap;
align-items: center;
}
.sentence-termination-character-include>:first-child {
margin-right: 0.375em;
}
.sentence-termination-character-include+.sentence-termination-character-include {
margin-left: 1em;
}
#sentence-termination-character-list-empty {
flex: 0 1 auto;
margin-top: 0.5em;
}
/* Generic layouts */
.margin-above {

View File

@ -846,13 +846,72 @@
"sentenceParsing": {
"type": "object",
"required": [
"scanExtent"
"scanExtent",
"enableTerminationCharacters",
"terminationCharacters"
],
"properties": {
"scanExtent": {
"type": "integer",
"minimum": 0,
"default": 200
},
"enableTerminationCharacters": {
"type": "boolean",
"default": true
},
"terminationCharacters": {
"type": "array",
"items": {
"type": "object",
"required": [
"enabled",
"character1",
"character2",
"includeCharacterAtStart",
"includeCharacterAtEnd"
],
"properties": {
"enabled": {
"type": "boolean",
"default": true
},
"character1": {
"type": "string",
"default": "\"",
"minLength": 1,
"maxLength": 1
},
"character2": {
"type": ["string", "null"],
"default": "\"",
"minLength": 1,
"maxLength": 1
},
"includeCharacterAtStart": {
"type": "boolean",
"default": false
},
"includeCharacterAtEnd": {
"type": "boolean",
"default": false
}
}
},
"default": [
{"enabled": true, "character1": "「", "character2": "」", "includeCharacterAtStart": false, "includeCharacterAtEnd": false},
{"enabled": true, "character1": "『", "character2": "』", "includeCharacterAtStart": false, "includeCharacterAtEnd": false},
{"enabled": true, "character1": "\"", "character2": "\"", "includeCharacterAtStart": false, "includeCharacterAtEnd": false},
{"enabled": true, "character1": "'", "character2": "'", "includeCharacterAtStart": false, "includeCharacterAtEnd": false},
{"enabled": true, "character1": ".", "character2": null, "includeCharacterAtStart": false, "includeCharacterAtEnd": true},
{"enabled": true, "character1": "!", "character2": null, "includeCharacterAtStart": false, "includeCharacterAtEnd": true},
{"enabled": true, "character1": "?", "character2": null, "includeCharacterAtStart": false, "includeCharacterAtEnd": true},
{"enabled": true, "character1": "", "character2": null, "includeCharacterAtStart": false, "includeCharacterAtEnd": true},
{"enabled": true, "character1": "。", "character2": null, "includeCharacterAtStart": false, "includeCharacterAtEnd": true},
{"enabled": true, "character1": "", "character2": null, "includeCharacterAtStart": false, "includeCharacterAtEnd": true},
{"enabled": true, "character1": "", "character2": null, "includeCharacterAtStart": false, "includeCharacterAtEnd": true},
{"enabled": true, "character1": "…", "character2": null, "includeCharacterAtStart": false, "includeCharacterAtEnd": true}
]
}
}
}

View File

@ -684,13 +684,30 @@ class OptionsUtil {
// Version 8 changes:
// Added translation.textReplacements.
// Moved anki.sentenceExt to sentenceParsing.scanExtent.
// Added sentenceParsing.enableTerminationCharacters.
// Added sentenceParsing.terminationCharacters.
for (const profile of options.profiles) {
profile.options.translation.textReplacements = {
searchOriginal: true,
groups: []
};
profile.options.sentenceParsing = {
scanExtent: profile.options.anki.sentenceExt
scanExtent: profile.options.anki.sentenceExt,
enableTerminationCharacters: true,
terminationCharacters: [
{enabled: true, character1: '「', character2: '」', includeCharacterAtStart: false, includeCharacterAtEnd: false},
{enabled: true, character1: '『', character2: '』', includeCharacterAtStart: false, includeCharacterAtEnd: false},
{enabled: true, character1: '"', character2: '"', includeCharacterAtStart: false, includeCharacterAtEnd: false},
{enabled: true, character1: '\'', character2: '\'', includeCharacterAtStart: false, includeCharacterAtEnd: false},
{enabled: true, character1: '.', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true},
{enabled: true, character1: '!', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true},
{enabled: true, character1: '?', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true},
{enabled: true, character1: '', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true},
{enabled: true, character1: '。', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true},
{enabled: true, character1: '', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true},
{enabled: true, character1: '', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true},
{enabled: true, character1: '…', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true}
]
};
delete profile.options.anki.sentenceExt;
}

View File

@ -0,0 +1,255 @@
/*
* Copyright (C) 2021 Yomichan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
/* global
* OptionsUtil
*/
class SentenceTerminationCharactersController {
constructor(settingsController) {
this._settingsController = settingsController;
this._entries = [];
this._addButton = null;
this._resetButton = null;
this._listTable = null;
this._listContainer = null;
this._emptyIndicator = null;
}
get settingsController() {
return this._settingsController;
}
async prepare() {
this._addButton = document.querySelector('#sentence-termination-character-list-add');
this._resetButton = document.querySelector('#sentence-termination-character-list-reset');
this._listTable = document.querySelector('#sentence-termination-character-list-table');
this._listContainer = document.querySelector('#sentence-termination-character-list');
this._emptyIndicator = document.querySelector('#sentence-termination-character-list-empty');
this._addButton.addEventListener('click', this._onAddClick.bind(this));
this._resetButton.addEventListener('click', this._onResetClick.bind(this));
this._settingsController.on('optionsChanged', this._onOptionsChanged.bind(this));
await this._updateOptions();
}
async addEntry(terminationCharacterEntry) {
const options = await this._settingsController.getOptions();
const {sentenceParsing: {terminationCharacters}} = options;
await this._settingsController.modifyProfileSettings([{
action: 'splice',
path: 'sentenceParsing.terminationCharacters',
start: terminationCharacters.length,
deleteCount: 0,
items: [terminationCharacterEntry]
}]);
await this._updateOptions();
}
async deleteEntry(index) {
const options = await this._settingsController.getOptions();
const {sentenceParsing: {terminationCharacters}} = options;
if (index < 0 || index >= terminationCharacters.length) { return false; }
await this._settingsController.modifyProfileSettings([{
action: 'splice',
path: 'sentenceParsing.terminationCharacters',
start: index,
deleteCount: 1,
items: []
}]);
await this._updateOptions();
return true;
}
async modifyProfileSettings(targets) {
return await this._settingsController.modifyProfileSettings(targets);
}
// Private
_onOptionsChanged({options}) {
for (const entry of this._entries) {
entry.cleanup();
}
this._entries = [];
const {sentenceParsing: {terminationCharacters}} = options;
for (let i = 0, ii = terminationCharacters.length; i < ii; ++i) {
const terminationCharacterEntry = terminationCharacters[i];
const node = this._settingsController.instantiateTemplate('sentence-termination-character-entry');
this._listContainer.appendChild(node);
const entry = new SentenceTerminationCharacterEntry(this, terminationCharacterEntry, i, node);
this._entries.push(entry);
entry.prepare();
}
this._listTable.hidden = (terminationCharacters.length === 0);
this._emptyIndicator.hidden = (terminationCharacters.length !== 0);
}
_onAddClick(e) {
e.preventDefault();
this._addNewEntry();
}
_onResetClick(e) {
e.preventDefault();
this._reset();
}
async _addNewEntry() {
const newEntry = {
enabled: true,
character1: '"',
character2: '"',
includeCharacterAtStart: false,
includeCharacterAtEnd: false
};
return await this.addEntry(newEntry);
}
async _updateOptions() {
const options = await this._settingsController.getOptions();
this._onOptionsChanged({options});
}
async _reset() {
const defaultOptions = await this._getDefaultOptions();
const value = defaultOptions.profiles[0].options.sentenceParsing.terminationCharacters;
await this._settingsController.setProfileSetting('sentenceParsing.terminationCharacters', value);
await this._updateOptions();
}
async _getDefaultOptions() {
const optionsUtil = new OptionsUtil();
await optionsUtil.prepare();
const optionsFull = optionsUtil.getDefault();
return optionsFull;
}
}
class SentenceTerminationCharacterEntry {
constructor(parent, data, index, node) {
this._parent = parent;
this._data = data;
this._index = index;
this._node = node;
this._eventListeners = new EventListenerCollection();
this._character1Input = null;
this._character2Input = null;
this._basePath = `sentenceParsing.terminationCharacters[${this._index}]`;
}
prepare() {
const {enabled, character1, character2, includeCharacterAtStart, includeCharacterAtEnd} = this._data;
const node = this._node;
const enabledToggle = node.querySelector('.sentence-termination-character-enabled');
const typeSelect = node.querySelector('.sentence-termination-character-type');
const character1Input = node.querySelector('.sentence-termination-character-input1');
const character2Input = node.querySelector('.sentence-termination-character-input2');
const includeAtStartCheckbox = node.querySelector('.sentence-termination-character-include-at-start');
const includeAtEndheckbox = node.querySelector('.sentence-termination-character-include-at-end');
const menuButton = node.querySelector('.sentence-termination-character-entry-button');
this._character1Input = character1Input;
this._character2Input = character2Input;
const type = (character2 === null ? 'terminator' : 'quote');
node.dataset.type = type;
enabledToggle.checked = enabled;
typeSelect.value = type;
character1Input.value = character1;
character2Input.value = (character2 !== null ? character2 : '');
includeAtStartCheckbox.checked = includeCharacterAtStart;
includeAtEndheckbox.checked = includeCharacterAtEnd;
enabledToggle.dataset.setting = `${this._basePath}.enabled`;
includeAtStartCheckbox.dataset.setting = `${this._basePath}.includeCharacterAtStart`;
includeAtEndheckbox.dataset.setting = `${this._basePath}.includeCharacterAtEnd`;
this._eventListeners.addEventListener(typeSelect, 'change', this._onTypeSelectChange.bind(this), false);
this._eventListeners.addEventListener(character1Input, 'change', this._onCharacterChange.bind(this, 1), false);
this._eventListeners.addEventListener(character2Input, 'change', this._onCharacterChange.bind(this, 2), false);
this._eventListeners.addEventListener(menuButton, 'menuClosed', this._onMenuClosed.bind(this), false);
}
cleanup() {
this._eventListeners.removeAllEventListeners();
if (this._node.parentNode !== null) {
this._node.parentNode.removeChild(this._node);
}
}
// Private
_onTypeSelectChange(e) {
this._setHasCharacter2(e.currentTarget.value === 'quote');
}
_onCharacterChange(characterNumber, e) {
const node = e.currentTarget;
if (characterNumber === 2 && this._data.character2 === null) {
node.value = '';
}
const value = node.value.substring(0, 1);
this._setCharacterValue(node, characterNumber, value);
}
_onMenuClosed(e) {
const {detail: {action}} = e;
switch (action) {
case 'delete':
this._delete();
break;
}
}
async _delete() {
this._parent.deleteEntry(this._index);
}
async _setHasCharacter2(has) {
const okay = await this._setCharacterValue(this._character2Input, 2, has ? this._data.character1 : null);
if (okay) {
const type = (!has ? 'terminator' : 'quote');
this._node.dataset.type = type;
}
}
async _setCharacterValue(inputNode, characterNumber, value) {
const pathEnd = `character${characterNumber}`;
const r = await this._parent.settingsController.setProfileSetting(`${this._basePath}.${pathEnd}`, value);
const okay = !r[0].error;
if (okay) {
this._data[pathEnd] = value;
} else {
value = this._data[pathEnd];
}
inputNode.value = (value !== null ? value : '');
return okay;
}
}

View File

@ -32,6 +32,7 @@
* ScanInputsController
* ScanInputsSimpleController
* SecondarySearchDictionaryController
* SentenceTerminationCharactersController
* SettingsController
* SettingsDisplayController
* StatusFooter
@ -124,6 +125,9 @@ async function setupGenericSettingsController(genericSettingController) {
const translationTextReplacementsController = new TranslationTextReplacementsController(settingsController);
translationTextReplacementsController.prepare();
const sentenceTerminationCharactersController = new SentenceTerminationCharactersController(settingsController);
sentenceTerminationCharactersController.prepare();
await Promise.all(preparePromises);
document.documentElement.dataset.loaded = 'true';

View File

@ -1125,29 +1125,31 @@
</div>
</div>
<div class="settings-group advanced-only">
<div class="settings-item">
<div class="settings-item-inner settings-item-inner-wrappable">
<div class="settings-item-left">
<div class="settings-item-label">
Sentence scanning extent
<a class="more-toggle more-only" data-parent-distance="4">(?)</a>
</div>
</div>
<div class="settings-item-right">
<input type="number" data-setting="sentenceParsing.scanExtent" min="0" step="1">
</div>
<div class="settings-item"><div class="settings-item-inner settings-item-inner-wrappable">
<div class="settings-item-left">
<div class="settings-item-label">Sentence scanning extent</div>
<div class="settings-item-description">Adjust how many characters are bidirectionally scanned to form a sentence.</div>
</div>
<div class="settings-item-children more" hidden>
<p>
This option controls the maximum scanning distance used to determine the bounds of a sentence,
in number of characters.
Sentence scanning is bidirectional and begins from both the start and end of the source term.
</p>
<p>
<a class="more-toggle" data-parent-distance="3">Less&hellip;</a>
</p>
<div class="settings-item-right">
<input type="number" data-setting="sentenceParsing.scanExtent" min="0" step="1">
</div>
</div>
</div></div>
<div class="settings-item"><div class="settings-item-inner settings-item-inner-wrappable">
<div class="settings-item-left">
<div class="settings-item-label">Enable sentence termination characters</div>
</div>
<div class="settings-item-right">
<label class="toggle"><input type="checkbox" data-setting="sentenceParsing.enableTerminationCharacters"><span class="toggle-body"><span class="toggle-track"></span><span class="toggle-knob"></span></span></label>
</div>
</div></div>
<div class="settings-item settings-item-button" data-modal-action="show,sentence-termination-characters"><div class="settings-item-inner">
<div class="settings-item-left">
<div class="settings-item-label">Configure sentence termination characters&hellip;</div>
</div>
<div class="settings-item-right open-panel-button-container">
<button class="icon-button"><span class="icon-button-inner"><span class="icon" data-icon="material-right-arrow"></span></span></button>
</div>
</div></div>
</div>
<!-- Translation -->
@ -2725,6 +2727,86 @@
</div></div></template>
<!-- Sentence parsing modal -->
<div id="sentence-termination-characters" class="modal-container" tabindex="-1" role="dialog" hidden><div class="modal-content">
<div class="modal-header">
<div class="modal-title">Sentence Termination Characters</div>
<div class="modal-header-button-container">
<div class="modal-header-button-group">
<button class="icon-button modal-header-button" data-modal-action="expand"><span class="icon-button-inner"><span class="icon" data-icon="expand"></span></span></button>
<button class="icon-button modal-header-button" data-modal-action="collapse"><span class="icon-button-inner"><span class="icon" data-icon="collapse"></span></span></button>
</div>
</div>
</div>
<div class="modal-body">
<p>
Sentences are terminated by punctuation and quotation marks, which can both be configured below.
</p>
<table class="sentence-termination-character-list-table" id="sentence-termination-character-list-table" hidden>
<thead><tr>
<td>#</td>
<td>Enabled</td>
<td>Type</td>
<td>Character 1</td>
<td>Character 2</td>
<td>Include character in sentence</td>
<td></td>
</tr></thead>
<tbody class="sentence-termination-character-list generic-list" id="sentence-termination-character-list"></tbody>
</table>
<div id="sentence-termination-character-list-empty" hidden>
No terminators defined.
</div>
</div>
<div class="modal-footer">
<button class="low-emphasis danger" id="sentence-termination-character-list-reset">Reset</button>
<button class="low-emphasis" id="sentence-termination-character-list-add">Add</button>
<button data-modal-action="hide">Close</button>
</div>
</div></div>
<!-- Sentence parsing templates -->
<template id="sentence-termination-character-entry-template"><tr class="sentence-termination-character-entry">
<td class="generic-list-index-prefix"></td>
<td>
<label class="toggle"><input type="checkbox" class="sentence-termination-character-enabled"><span class="toggle-body"><span class="toggle-track"></span><span class="toggle-knob"></span></span></label>
</td>
<td>
<select class="sentence-termination-character-type">
<option value="terminator">Terminator</option>
<option value="quote">Quote</option>
</select>
</td>
<td>
<input type="text" class="sentence-termination-character-input1" maxlength="1">
</td>
<td>
<input type="text" class="sentence-termination-character-input2" maxlength="1">
<div class="sentence-termination-character-input2-alt">&mdash;</div>
</td>
<td>
<div class="sentence-termination-character-include-list">
<label class="sentence-termination-character-include">
<label class="checkbox"><input type="checkbox" class="sentence-termination-character-include-at-start"><span class="checkbox-body"><span class="checkbox-fill"></span><span class="checkbox-border"></span><span class="checkbox-check"></span></span></label>
<span>At start</span>
</label>
<label class="sentence-termination-character-include">
<label class="checkbox"><input type="checkbox" class="sentence-termination-character-include-at-end"><span class="checkbox-body"><span class="checkbox-fill"></span><span class="checkbox-border"></span><span class="checkbox-check"></span></span></label>
<span>At end</span>
</label>
</div>
</td>
<td>
<button class="icon-button sentence-termination-character-entry-button" data-menu="sentence-termination-character-entry-menu" data-menu-position="below,left"><span class="icon-button-inner"><span class="icon" data-icon="kebab-menu"></span></span></button>
</td>
</tr></template>
<template id="sentence-termination-character-entry-menu-template"><div class="popup-menu-container" tabindex="-1" role="dialog"><div class="popup-menu">
<button class="popup-menu-item" data-menu-action="delete">Delete</button>
</div></div></template>
<!-- Scripts -->
<script src="/mixed/lib/jszip.min.js"></script>
<script src="/mixed/lib/wanakana.min.js"></script>
@ -2783,6 +2865,7 @@
<script src="/bg/js/settings2/nested-popups-controller.js"></script>
<script src="/bg/js/settings2/secondary-search-dictionary-controller.js"></script>
<script src="/bg/js/settings2/sentence-termination-characters-controller.js"></script>
<script src="/bg/js/settings2/settings-display-controller.js"></script>
<script src="/bg/js/settings2/translation-text-replacements-controller.js"></script>

View File

@ -326,9 +326,9 @@ class Frontend {
touchInputEnabled: scanningOptions.touchInputEnabled,
pointerEventsEnabled: scanningOptions.pointerEventsEnabled,
scanLength: scanningOptions.length,
sentenceScanExtent: sentenceParsingOptions.scanExtent,
layoutAwareScan: scanningOptions.layoutAwareScan,
preventMiddleMouse
preventMiddleMouse,
sentenceParsingOptions
});
this._updateTextScannerEnabled();

View File

@ -327,9 +327,9 @@ class Display extends EventDispatcher {
touchInputEnabled: scanningOptions.touchInputEnabled,
pointerEventsEnabled: scanningOptions.pointerEventsEnabled,
scanLength: scanningOptions.length,
sentenceScanExtent: sentenceParsingOptions.scanExtent,
layoutAwareScan: scanningOptions.layoutAwareScan,
preventMiddleMouse: scanningOptions.preventMiddleMouse.onSearchQuery
preventMiddleMouse: scanningOptions.preventMiddleMouse.onSearchQuery,
sentenceParsingOptions
}
});
@ -1832,9 +1832,9 @@ class Display extends EventDispatcher {
touchInputEnabled: false,
pointerEventsEnabled: false,
scanLength: scanningOptions.length,
sentenceScanExtent: sentenceParsingOptions.scanExtent,
layoutAwareScan: scanningOptions.layoutAwareScan,
preventMiddleMouse: false
preventMiddleMouse: false,
sentenceParsingOptions
});
this._definitionTextScanner.setEnabled(true);

View File

@ -24,24 +24,6 @@
class DocumentUtil {
constructor() {
this._transparentColorPattern = /rgba\s*\([^)]*,\s*0(?:\.0+)?\s*\)/;
const quoteArray = [
['「', '」'],
['『', '』'],
['\'', '\''],
['"', '"']
];
const terminatorString = '…。..?!';
this._terminatorMap = new Map();
for (const char of terminatorString) {
this._terminatorMap.set(char, [false, true]);
}
this._forwardQuoteMap = new Map();
this._backwardQuoteMap = new Map();
for (const [char1, char2] of quoteArray) {
this._forwardQuoteMap.set(char1, [char2, false]);
this._backwardQuoteMap.set(char2, [char1, false]);
}
}
getRangeFromPoint(x, y, deepContentScan) {
@ -81,11 +63,30 @@ class DocumentUtil {
}
}
extractSentence(source, layoutAwareScan, extent) {
const terminatorMap = this._terminatorMap;
const forwardQuoteMap = this._forwardQuoteMap;
const backwardQuoteMap = this._backwardQuoteMap;
/**
* Extract a sentence from a document.
* @param source The text source object, either `TextSourceRange` or `TextSourceElement`.
* @param layoutAwareScan Whether or not layout-aware scan mode should be used.
* @param extent The length of the sentence to extract.
* @param terminatorMap A mapping of characters that terminate a sentence.
* Format:
* ```js
* new Map([ [character: string, [includeCharacterAtStart: boolean, includeCharacterAtEnd: boolean]], ... ])
* ```
* @param forwardQuoteMap A mapping of quote characters that delimit a sentence.
* Format:
* ```js
* new Map([ [character: string, [otherCharacter: string, includeCharacterAtStart: boolean]], ... ])
* ```
* @param backwardQuoteMap A mapping of quote characters that delimit a sentence,
* which is the inverse of forwardQuoteMap.
* Format:
* ```js
* new Map([ [character: string, [otherCharacter: string, includeCharacterAtEnd: boolean]], ... ])
* ```
* @returns The sentence and the offset to the original source: `{sentence: string, offset: integer}`.
*/
extractSentence(source, layoutAwareScan, extent, terminatorMap, forwardQuoteMap, backwardQuoteMap) {
// Scan text
source = source.clone();
const startLength = source.setStartOffset(extent, layoutAwareScan);

View File

@ -59,9 +59,12 @@ class TextScanner extends EventDispatcher {
this._touchInputEnabled = false;
this._pointerEventsEnabled = false;
this._scanLength = 1;
this._sentenceScanExtent = 1;
this._layoutAwareScan = false;
this._preventMiddleMouse = false;
this._sentenceScanExtent = 0;
this._sentenceTerminatorMap = new Map();
this._sentenceForwardQuoteMap = new Map();
this._sentenceBackwardQuoteMap = new Map();
this._inputs = [];
this._enabled = false;
@ -142,9 +145,9 @@ class TextScanner extends EventDispatcher {
touchInputEnabled,
pointerEventsEnabled,
scanLength,
sentenceScanExtent,
layoutAwareScan,
preventMiddleMouse
preventMiddleMouse,
sentenceParsingOptions
}) {
if (Array.isArray(inputs)) {
this._inputs = inputs.map(({
@ -193,15 +196,38 @@ class TextScanner extends EventDispatcher {
if (typeof scanLength === 'number') {
this._scanLength = scanLength;
}
if (typeof sentenceScanExtent === 'number') {
this._sentenceScanExtent = sentenceScanExtent;
}
if (typeof layoutAwareScan === 'boolean') {
this._layoutAwareScan = layoutAwareScan;
}
if (typeof preventMiddleMouse === 'boolean') {
this._preventMiddleMouse = preventMiddleMouse;
}
if (typeof sentenceParsingOptions === 'object' && sentenceParsingOptions !== null) {
const {scanExtent, enableTerminationCharacters, terminationCharacters} = sentenceParsingOptions;
const hasTerminationCharacters = (typeof terminationCharacters === 'object' && Array.isArray(terminationCharacters));
if (typeof scanExtent === 'number') {
this._sentenceScanExtent = sentenceParsingOptions.scanExtent;
}
if (typeof enableTerminationCharacters === 'boolean' || hasTerminationCharacters) {
const sentenceTerminatorMap = this._sentenceTerminatorMap;
const sentenceForwardQuoteMap = this._sentenceForwardQuoteMap;
const sentenceBackwardQuoteMap = this._sentenceBackwardQuoteMap;
sentenceTerminatorMap.clear();
sentenceForwardQuoteMap.clear();
sentenceBackwardQuoteMap.clear();
if (enableTerminationCharacters !== false && hasTerminationCharacters) {
for (const {enabled, character1, character2, includeCharacterAtStart, includeCharacterAtEnd} of terminationCharacters) {
if (!enabled) { continue; }
if (character2 === null) {
sentenceTerminatorMap.set(character1, [includeCharacterAtStart, includeCharacterAtEnd]);
} else {
sentenceForwardQuoteMap.set(character1, [character2, includeCharacterAtStart]);
sentenceBackwardQuoteMap.set(character2, [character1, includeCharacterAtEnd]);
}
}
}
}
}
}
getTextSourceContent(textSource, length, layoutAwareScan) {
@ -723,6 +749,9 @@ class TextScanner extends EventDispatcher {
async _findTerms(textSource, optionsContext) {
const scanLength = this._scanLength;
const sentenceScanExtent = this._sentenceScanExtent;
const sentenceTerminatorMap = this._sentenceTerminatorMap;
const sentenceForwardQuoteMap = this._sentenceForwardQuoteMap;
const sentenceBackwardQuoteMap = this._sentenceBackwardQuoteMap;
const layoutAwareScan = this._layoutAwareScan;
const searchText = this.getTextSourceContent(textSource, scanLength, layoutAwareScan);
if (searchText.length === 0) { return null; }
@ -731,13 +760,23 @@ class TextScanner extends EventDispatcher {
if (definitions.length === 0) { return null; }
textSource.setEndOffset(length, layoutAwareScan);
const sentence = this._documentUtil.extractSentence(textSource, layoutAwareScan, sentenceScanExtent);
const sentence = this._documentUtil.extractSentence(
textSource,
layoutAwareScan,
sentenceScanExtent,
sentenceTerminatorMap,
sentenceForwardQuoteMap,
sentenceBackwardQuoteMap
);
return {definitions, sentence, type: 'terms'};
}
async _findKanji(textSource, optionsContext) {
const sentenceScanExtent = this._sentenceScanExtent;
const sentenceTerminatorMap = this._sentenceTerminatorMap;
const sentenceForwardQuoteMap = this._sentenceForwardQuoteMap;
const sentenceBackwardQuoteMap = this._sentenceBackwardQuoteMap;
const layoutAwareScan = this._layoutAwareScan;
const searchText = this.getTextSourceContent(textSource, 1, layoutAwareScan);
if (searchText.length === 0) { return null; }
@ -746,7 +785,14 @@ class TextScanner extends EventDispatcher {
if (definitions.length === 0) { return null; }
textSource.setEndOffset(1, layoutAwareScan);
const sentence = this._documentUtil.extractSentence(textSource, layoutAwareScan, sentenceScanExtent);
const sentence = this._documentUtil.extractSentence(
textSource,
layoutAwareScan,
sentenceScanExtent,
sentenceTerminatorMap,
sentenceForwardQuoteMap,
sentenceBackwardQuoteMap
);
return {definitions, sentence, type: 'kanji'};
}

View File

@ -181,8 +181,29 @@ async function testDocumentTextScanningFunctions(dom, {DocumentUtil, TextSourceR
}
if (source === null) { continue; }
// Sentence info
const terminatorString = '…。..?!';
const terminatorMap = new Map();
for (const char of terminatorString) {
terminatorMap.set(char, [false, true]);
}
const quoteArray = [['「', '」'], ['『', '』'], ['\'', '\''], ['"', '"']];
const forwardQuoteMap = new Map();
const backwardQuoteMap = new Map();
for (const [char1, char2] of quoteArray) {
forwardQuoteMap.set(char1, [char2, false]);
backwardQuoteMap.set(char2, [char1, false]);
}
// Test docSentenceExtract
const sentenceActual = documentUtil.extractSentence(source, false, sentenceScanExtent).text;
const sentenceActual = documentUtil.extractSentence(
source,
false,
sentenceScanExtent,
terminatorMap,
forwardQuoteMap,
backwardQuoteMap
).text;
assert.strictEqual(sentenceActual, sentence);
// Clean

View File

@ -420,7 +420,22 @@ function createProfileOptionsUpdatedTestData1() {
fieldTemplates: null
},
sentenceParsing: {
scanExtent: 200
scanExtent: 200,
enableTerminationCharacters: true,
terminationCharacters: [
{enabled: true, character1: '「', character2: '」', includeCharacterAtStart: false, includeCharacterAtEnd: false},
{enabled: true, character1: '『', character2: '』', includeCharacterAtStart: false, includeCharacterAtEnd: false},
{enabled: true, character1: '"', character2: '"', includeCharacterAtStart: false, includeCharacterAtEnd: false},
{enabled: true, character1: '\'', character2: '\'', includeCharacterAtStart: false, includeCharacterAtEnd: false},
{enabled: true, character1: '.', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true},
{enabled: true, character1: '!', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true},
{enabled: true, character1: '?', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true},
{enabled: true, character1: '', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true},
{enabled: true, character1: '。', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true},
{enabled: true, character1: '', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true},
{enabled: true, character1: '', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true},
{enabled: true, character1: '…', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true}
]
}
};
}