Translator regex replacements (#1199)

* Add support for regex replacements during the translation process

* Allow assignment of textReplacements

* Rename

* Set up test data

* Write expected data

* Set up options

* Prevent infinite loop if regex matches empty string

* Implement setting controller

* Add support for testing pattern replacements
This commit is contained in:
toasted-nutbread 2021-01-03 12:12:55 -05:00 committed by GitHub
parent eda8534e19
commit 8e304b83c6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 4472 additions and 5 deletions

View File

@ -1732,6 +1732,113 @@ code.anki-field-marker {
height: calc(0.425em * 4 + 1em * var(--line-height-default) * 3);
}
#translation-text-replacement-list-empty {
display: none;
}
#translation-text-replacement-list:empty+#translation-text-replacement-list-empty {
display: block;
}
.translation-text-replacement-entry {
display: grid;
grid-template-columns: auto auto 1fr auto;
grid-template-rows: auto;
grid-template-areas:
"index pattern-label pattern button"
". replacement-label replacement button"
". test-label test .";
column-gap: 0.25em;
row-gap: 0.25em;
align-items: stretch;
justify-items: stretch;
}
.translation-text-replacement-entry+.translation-text-replacement-entry {
margin-top: 0.5em;
}
.translation-text-replacement-index {
grid-area: index;
align-self: center;
justify-self: start;
padding-right: 0.5em;
}
.translation-text-replacement-pattern-label {
grid-area: pattern-label;
align-self: center;
justify-self: start;
padding-right: 0.5em;
}
.translation-text-replacement-replacement-label {
grid-area: replacement-label;
align-self: center;
justify-self: start;
padding-right: 0.5em;
}
.translation-text-replacement-pattern-container {
grid-area: pattern;
align-self: stretch;
justify-self: stretch;
display: flex;
flex-flow: row nowrap;
align-items: stretch;
}
.translation-text-replacement-replacement-container {
grid-area: replacement;
align-self: stretch;
justify-self: stretch;
display: flex;
flex-flow: row nowrap;
align-items: stretch;
}
input.translation-text-replacement-pattern,
input.translation-text-replacement-replacement {
flex: 1 1 auto;
width: auto;
}
.translation-text-replacement-checkbox-container {
white-space: nowrap;
display: flex;
flex-flow: row nowrap;
align-items: center;
padding-left: 0.5em;
}
.translation-text-replacement-checkbox-label {
padding-left: 0.5em;
}
.translation-text-replacement-button {
grid-area: button;
align-self: center;
justify-self: start;
}
.translation-text-replacement-test-label {
grid-area: test-label;
align-self: center;
justify-self: start;
padding-right: 0.5em;
}
.translation-text-replacement-test-container {
grid-area: test;
align-self: stretch;
justify-self: stretch;
display: flex;
flex-flow: row nowrap;
align-items: stretch;
}
input.translation-text-replacement-test-input,
input.translation-text-replacement-test-output {
flex: 1 1 auto;
width: auto;
}
.translation-text-replacement-test-label-inner {
grid-area: button;
align-self: center;
justify-self: start;
flex: 0 0 auto;
padding: 0 0.5em;
white-space: nowrap;
}
.translation-text-replacement-entry:not([data-test-visible=true]) .translation-text-replacement-test-node {
display: none;
}
/* Generic layouts */
.margin-above {

View File

@ -592,7 +592,8 @@
"convertAlphabeticCharacters",
"convertHiraganaToKatakana",
"convertKatakanaToHiragana",
"collapseEmphaticSequences"
"collapseEmphaticSequences",
"textReplacements"
],
"properties": {
"convertHalfWidthCharacters": {
@ -624,6 +625,46 @@
"type": "string",
"enum": ["false", "true", "full"],
"default": "false"
},
"textReplacements": {
"type": "object",
"required": [
"searchOriginal",
"groups"
],
"properties": {
"searchOriginal": {
"type": "boolean",
"default": true
},
"groups": {
"type": "array",
"items": {
"type": "array",
"items": {
"required": [
"pattern",
"ignoreCase",
"replacement"
],
"properties": {
"pattern": {
"type": "string",
"default": ""
},
"ignoreCase": {
"type": "boolean",
"default": false
},
"replacement": {
"type": "string",
"default": ""
}
}
}
}
}
}
}
}
},

View File

@ -1655,9 +1655,11 @@ class Backend {
convertAlphabeticCharacters,
convertHiraganaToKatakana,
convertKatakanaToHiragana,
collapseEmphaticSequences
collapseEmphaticSequences,
textReplacements: textReplacementsOptions
}
} = options;
const textReplacements = this._getTranslatorTextReplacements(textReplacementsOptions);
return {
wildcard,
mainDictionary,
@ -1668,6 +1670,7 @@ class Backend {
convertHiraganaToKatakana,
convertKatakanaToHiragana,
collapseEmphaticSequences,
textReplacements,
enabledDictionaryMap
};
}
@ -1686,6 +1689,29 @@ class Backend {
return enabledDictionaryMap;
}
_getTranslatorTextReplacements(textReplacementsOptions) {
const textReplacements = [];
for (const group of textReplacementsOptions.groups) {
const textReplacementsEntries = [];
for (let {pattern, ignoreCase, replacement} of group) {
try {
pattern = new RegExp(pattern, ignoreCase ? 'gi' : 'g');
} catch (e) {
// Invalid pattern
continue;
}
textReplacementsEntries.push({pattern, replacement});
}
if (textReplacementsEntries.length > 0) {
textReplacements.push(textReplacementsEntries);
}
}
if (textReplacements.length === 0 || textReplacementsOptions.searchOriginal) {
textReplacements.unshift(null);
}
return textReplacements;
}
async _openWelcomeGuidePage() {
await this._createTab(chrome.runtime.getURL('/bg/welcome.html'));
}

View File

@ -485,6 +485,10 @@ class OptionsUtil {
{
async: false,
update: this._updateVersion7.bind(this)
},
{
async: false,
update: this._updateVersion8.bind(this)
}
];
}
@ -675,4 +679,16 @@ class OptionsUtil {
}
return options;
}
_updateVersion8(options) {
// Version 8 changes:
// Added translation.textReplacements.
for (const profile of options.profiles) {
profile.options.translation.textReplacements = {
searchOriginal: true,
groups: []
};
}
return options;
}
}

View File

@ -36,6 +36,7 @@
* SettingsDisplayController
* StatusFooter
* StorageController
* TranslationTextReplacementsController
* api
*/
@ -120,6 +121,9 @@ async function setupGenericSettingsController(genericSettingController) {
const secondarySearchDictionaryController = new SecondarySearchDictionaryController(settingsController);
secondarySearchDictionaryController.prepare();
const translationTextReplacementsController = new TranslationTextReplacementsController(settingsController);
translationTextReplacementsController.prepare();
await Promise.all(preparePromises);
document.documentElement.dataset.loaded = 'true';

View File

@ -0,0 +1,241 @@
/*
* Copyright (C) 2021 Yomichan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
class TranslationTextReplacementsController {
constructor(settingsController) {
this._settingsController = settingsController;
this._entryContainer = null;
this._entries = [];
}
async prepare() {
this._entryContainer = document.querySelector('#translation-text-replacement-list');
const addButton = document.querySelector('#translation-text-replacement-add');
addButton.addEventListener('click', this._onAdd.bind(this), false);
this._settingsController.on('optionsChanged', this._onOptionsChanged.bind(this));
await this._updateOptions();
}
async addGroup() {
const options = await this._settingsController.getOptions();
const {groups} = options.translation.textReplacements;
const newEntry = this._createNewEntry();
const target = (
(groups.length === 0) ?
{
action: 'splice',
path: 'translation.textReplacements.groups',
start: 0,
deleteCount: 0,
items: [[newEntry]]
} :
{
action: 'splice',
path: 'translation.textReplacements.groups[0]',
start: groups[0].length,
deleteCount: 0,
items: [newEntry]
}
);
await this._settingsController.modifyProfileSettings([target]);
await this._updateOptions();
}
async deleteGroup(index) {
const options = await this._settingsController.getOptions();
const {groups} = options.translation.textReplacements;
if (groups.length === 0) { return false; }
const group0 = groups[0];
if (index < 0 || index >= group0.length) { return false; }
const target = (
(group0.length > 1) ?
{
action: 'splice',
path: 'translation.textReplacements.groups[0]',
start: index,
deleteCount: 1,
items: []
} :
{
action: 'splice',
path: 'translation.textReplacements.groups',
start: 0,
deleteCount: group0.length,
items: []
}
);
await this._settingsController.modifyProfileSettings([target]);
await this._updateOptions();
return true;
}
// Private
_onOptionsChanged({options}) {
for (const entry of this._entries) {
entry.cleanup();
}
this._entries = [];
const {groups} = options.translation.textReplacements;
if (groups.length > 0) {
const group0 = groups[0];
for (let i = 0, ii = group0.length; i < ii; ++i) {
const data = group0[i];
const node = this._settingsController.instantiateTemplate('translation-text-replacement-entry');
this._entryContainer.appendChild(node);
const entry = new TranslationTextReplacementsEntry(this, node, i, data);
this._entries.push(entry);
entry.prepare();
}
}
}
_onAdd() {
this.addGroup();
}
async _updateOptions() {
const options = await this._settingsController.getOptions();
this._onOptionsChanged({options});
}
_createNewEntry() {
return {pattern: '', ignoreCase: false, replacement: ''};
}
}
class TranslationTextReplacementsEntry {
constructor(parent, node, index) {
this._parent = parent;
this._node = node;
this._index = index;
this._eventListeners = new EventListenerCollection();
this._patternInput = null;
this._replacementInput = null;
this._ignoreCaseToggle = null;
this._testInput = null;
this._testOutput = null;
}
prepare() {
const patternInput = this._node.querySelector('.translation-text-replacement-pattern');
const replacementInput = this._node.querySelector('.translation-text-replacement-replacement');
const ignoreCaseToggle = this._node.querySelector('.translation-text-replacement-pattern-ignore-case');
const menuButton = this._node.querySelector('.translation-text-replacement-button');
const testInput = this._node.querySelector('.translation-text-replacement-test-input');
const testOutput = this._node.querySelector('.translation-text-replacement-test-output');
this._patternInput = patternInput;
this._replacementInput = replacementInput;
this._ignoreCaseToggle = ignoreCaseToggle;
this._testInput = testInput;
this._testOutput = testOutput;
const pathBase = `translation.textReplacements.groups[0][${this._index}]`;
patternInput.dataset.setting = `${pathBase}.pattern`;
replacementInput.dataset.setting = `${pathBase}.replacement`;
ignoreCaseToggle.dataset.setting = `${pathBase}.ignoreCase`;
this._eventListeners.addEventListener(menuButton, 'menuOpened', this._onMenuOpened.bind(this), false);
this._eventListeners.addEventListener(menuButton, 'menuClosed', this._onMenuClosed.bind(this), false);
this._eventListeners.addEventListener(patternInput, 'settingChanged', this._onPatternChanged.bind(this), false);
this._eventListeners.addEventListener(ignoreCaseToggle, 'settingChanged', this._updateTestInput.bind(this), false);
this._eventListeners.addEventListener(replacementInput, 'settingChanged', this._updateTestInput.bind(this), false);
this._eventListeners.addEventListener(testInput, 'input', this._updateTestInput.bind(this), false);
}
cleanup() {
this._eventListeners.removeAllEventListeners();
if (this._node.parentNode !== null) {
this._node.parentNode.removeChild(this._node);
}
}
// Private
_onMenuOpened({detail: {menu}}) {
const testVisible = this._isTestVisible();
menu.querySelector('[data-menu-action=showTest]').hidden = testVisible;
menu.querySelector('[data-menu-action=hideTest]').hidden = !testVisible;
}
_onMenuClosed({detail: {action}}) {
switch (action) {
case 'remove':
this._parent.deleteGroup(this._index);
break;
case 'showTest':
this._setTestVisible(true);
break;
case 'hideTest':
this._setTestVisible(false);
break;
}
}
_onPatternChanged({detail: {value}}) {
this._validatePattern(value);
this._updateTestInput();
}
_validatePattern(value) {
let okay = false;
try {
new RegExp(value, 'g');
okay = true;
} catch (e) {
// NOP
}
this._patternInput.dataset.invalid = `${!okay}`;
}
_isTestVisible() {
return this._node.dataset.testVisible === 'true';
}
_setTestVisible(visible) {
this._node.dataset.testVisible = `${visible}`;
this._updateTestInput();
}
_updateTestInput() {
if (!this._isTestVisible()) { return; }
const ignoreCase = this._ignoreCaseToggle.checked;
const pattern = this._patternInput.value;
let regex;
try {
regex = new RegExp(pattern, ignoreCase ? 'gi' : 'g');
} catch (e) {
return;
}
const replacement = this._replacementInput.value;
const input = this._testInput.value;
const output = input.replace(regex, replacement);
this._testOutput.value = output;
}
}

View File

@ -68,6 +68,13 @@ class Translator {
* convertHiraganaToKatakana: (enum: 'false', 'true', 'variant'),
* convertKatakanaToHiragana: (enum: 'false', 'true', 'variant'),
* collapseEmphaticSequences: (enum: 'false', 'true', 'full'),
* textReplacements: [
* (null or [
* {pattern: (RegExp), replacement: (string)}
* ...
* ])
* ...
* ],
* enabledDictionaryMap: (Map of [
* (string),
* {
@ -302,6 +309,7 @@ class Translator {
_getAllDeinflections(text, options) {
const textOptionVariantArray = [
this._getTextReplacementsVariants(options),
this._getTextOptionEntryVariants(options.convertHalfWidthCharacters),
this._getTextOptionEntryVariants(options.convertNumericCharacters),
this._getTextOptionEntryVariants(options.convertAlphabeticCharacters),
@ -313,9 +321,12 @@ class Translator {
const jp = this._japaneseUtil;
const deinflections = [];
const used = new Set();
for (const [halfWidth, numeric, alphabetic, katakana, hiragana, [collapseEmphatic, collapseEmphaticFull]] of this._getArrayVariants(textOptionVariantArray)) {
for (const [textReplacements, halfWidth, numeric, alphabetic, katakana, hiragana, [collapseEmphatic, collapseEmphaticFull]] of this._getArrayVariants(textOptionVariantArray)) {
let text2 = text;
const sourceMap = new TextSourceMap(text2);
if (textReplacements !== null) {
text2 = this._applyTextReplacements(text2, sourceMap, textReplacements);
}
if (halfWidth) {
text2 = jp.convertHalfWidthKanaToFullWidth(text2, sourceMap);
}
@ -879,6 +890,10 @@ class Translator {
return collapseEmphaticOptions;
}
_getTextReplacementsVariants(options) {
return options.textReplacements;
}
_getSecondarySearchDictionaryMap(enabledDictionaryMap) {
const secondarySearchDictionaryMap = new Map();
for (const [dictionary, details] of enabledDictionaryMap.entries()) {
@ -1304,4 +1319,64 @@ class Translator {
return stringComparer.compare(v1.notes, v2.notes);
});
}
// Regex functions
_applyTextReplacements(text, sourceMap, replacements) {
for (const {pattern, replacement} of replacements) {
text = this._applyTextReplacement(text, sourceMap, pattern, replacement);
}
return text;
}
_applyTextReplacement(text, sourceMap, pattern, replacement) {
const isGlobal = pattern.global;
if (isGlobal) { pattern.lastIndex = 0; }
for (let loop = true; loop; loop = isGlobal) {
const match = pattern.exec(text);
if (match === null) { break; }
const matchText = match[0];
const index = match.index;
const actualReplacement = this._applyMatchReplacement(replacement, match);
const actualReplacementLength = actualReplacement.length;
const delta = actualReplacementLength - (matchText.length > 0 ? matchText.length : -1);
text = `${text.substring(0, index)}${actualReplacement}${text.substring(index + matchText.length)}`;
pattern.lastIndex += delta;
if (actualReplacementLength > 0) {
sourceMap.combine(Math.max(0, index - 1), matchText.length);
sourceMap.insert(index, ...(new Array(actualReplacementLength).fill(0)));
} else {
sourceMap.combine(index, matchText.length);
}
}
return text;
}
_applyMatchReplacement(replacement, match) {
const pattern = /\$(?:\$|&|`|'|(\d\d?)|<([^>]*)>)/g;
return replacement.replace(pattern, (g0, g1, g2) => {
if (typeof g1 !== 'undefined') {
const matchIndex = Number.parseInt(g1, 10);
if (matchIndex >= 1 && matchIndex <= match.length) {
return match[matchIndex];
}
} else if (typeof g2 !== 'undefined') {
const {groups} = match;
if (typeof groups === 'object' && groups !== null && Object.prototype.hasOwnProperty.call(groups, g2)) {
return groups[g2];
}
} else {
switch (g0) {
case '$': return '$';
case '&': return match[0];
case '`': return replacement.substring(0, match.index);
case '\'': return replacement.substring(match.index + g0.length);
}
}
return g0;
});
}
}

View File

@ -1144,6 +1144,14 @@
</div>
</div>
<div class="settings-group">
<div class="settings-item settings-item-button advanced-only" data-modal-action="show,translation-text-replacement-patterns"><div class="settings-item-inner">
<div class="settings-item-left">
<div class="settings-item-label">Configure custom text replacement patterns&hellip;</div>
</div>
<div class="settings-item-right open-panel-button-container">
<button class="icon-button"><span class="icon-button-inner"><span class="icon" data-icon="material-right-arrow"></span></span></button>
</div>
</div></div>
<div class="settings-item"><div class="settings-item-inner settings-item-inner-wrappable">
<div class="settings-item-left">
<div class="settings-item-label">Convert half width characters to full width</div>
@ -2612,6 +2620,91 @@
</div></div>
<!-- Translation modals -->
<div id="translation-text-replacement-patterns" class="modal-container" tabindex="-1" role="dialog" hidden><div class="modal-content">
<div class="modal-header">
<div class="modal-title">Custom Text Replacement Patterns</div>
<div class="modal-header-button-container">
<div class="modal-header-button-group">
<button class="icon-button modal-header-button" data-modal-action="expand"><span class="icon-button-inner"><span class="icon" data-icon="expand"></span></span></button>
<button class="icon-button modal-header-button" data-modal-action="collapse"><span class="icon-button-inner"><span class="icon" data-icon="collapse"></span></span></button>
</div>
</div>
</div>
<div class="modal-body">
<div class="settings-item"><div class="settings-item-inner"><div class="settings-item-left"><div class="settings-item-label">
Text replacement patterns are used to modify or remove text that matches certain patterns.
Patterns are defined using
<a href="https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions" target="_blank" rel="noreferrer noopener">regular expression syntax</a>,
and the replacement text can use certain
<a href="https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace#Specifying_a_string_as_a_parameter" target="_blank" rel="noreferrer noopener">special replacement patterns</a>.
</div></div></div></div>
<div class="settings-item"><div class="settings-item-inner">
<div class="settings-item-left">
<div class="settings-item-label">
Search original text
</div>
<div class="settings-item-description">
The original unmodified text will also be searched for definitions.
</div>
</div>
<div class="settings-item-right">
<label class="toggle"><input type="checkbox" data-setting="translation.textReplacements.searchOriginal"><span class="toggle-body"><span class="toggle-track"></span><span class="toggle-knob"></span></span></label>
</div>
</div></div>
<div class="settings-item">
<div class="settings-item-inner">
<div class="settings-item-left">
<div class="settings-item-label">Text replacement patterns</div>
</div>
<div class="settings-item-right">
<button id="translation-text-replacement-add" class="low-emphasis">Add</button>
</div>
</div>
<div class="settings-item-children">
<div id="translation-text-replacement-list" class="generic-list"></div>
<div id="translation-text-replacement-list-empty"><em>None defined</em></div>
</div>
</div>
</div>
<div class="modal-footer">
<button data-modal-action="hide">Close</button>
</div>
</div></div>
<!-- Translation templates -->
<template id="translation-text-replacement-entry-template"><div class="translation-text-replacement-entry">
<div class="translation-text-replacement-index generic-list-index-prefix"></div>
<div class="translation-text-replacement-pattern-label">Pattern:</div>
<div class="translation-text-replacement-replacement-label">Replacement:</div>
<div class="translation-text-replacement-pattern-container">
<input type="text" class="translation-text-replacement-pattern code">
<label class="translation-text-replacement-checkbox-container">
<label class="checkbox"><input type="checkbox" class="translation-text-replacement-checkbox translation-text-replacement-pattern-ignore-case"><span class="checkbox-body"><span class="checkbox-fill"></span><span class="checkbox-border"></span><span class="checkbox-check"></span></span></label>
<span class="translation-text-replacement-checkbox-label">Ignore case</span>
</label>
</div>
<div class="translation-text-replacement-replacement-container">
<input type="text" class="translation-text-replacement-replacement code">
</div>
<button class="icon-button translation-text-replacement-button" data-menu="translation-text-replacement-entry-menu" data-menu-position="below,left"><span class="icon-button-inner"><span class="icon" data-icon="kebab-menu"></span></span></button>
<div class="translation-text-replacement-test-label translation-text-replacement-test-node">Test Input:</div>
<div class="translation-text-replacement-test-container translation-text-replacement-test-node">
<input type="text" class="translation-text-replacement-test-input">
<div class="translation-text-replacement-test-label-inner">Output:</div>
<input type="text" class="translation-text-replacement-test-output" readonly>
</div>
</div></template>
<template id="translation-text-replacement-entry-menu-template"><div class="popup-menu-container" tabindex="-1" role="dialog"><div class="popup-menu">
<button class="popup-menu-item" data-menu-action="showTest">Test</button>
<button class="popup-menu-item" data-menu-action="hideTest">Hide test</button>
<button class="popup-menu-item" data-menu-action="remove">Remove</button>
</div></div></template>
<!-- Scripts -->
<script src="/mixed/lib/jszip.min.js"></script>
<script src="/mixed/lib/wanakana.min.js"></script>
@ -2671,6 +2764,7 @@
<script src="/bg/js/settings2/nested-popups-controller.js"></script>
<script src="/bg/js/settings2/secondary-search-dictionary-controller.js"></script>
<script src="/bg/js/settings2/settings-display-controller.js"></script>
<script src="/bg/js/settings2/translation-text-replacements-controller.js"></script>
<script src="/bg/js/settings2/settings-main.js"></script>

View File

@ -617,6 +617,10 @@ input[type=number].input-with-suffix-button {
border-right-style: none;
z-index: 1;
}
input[type=text].code,
input[type=number].code {
font-family: 'Courier New', Courier, monospace;
}
/* Material design button */

File diff suppressed because it is too large Load Diff

View File

@ -394,7 +394,11 @@ function createProfileOptionsUpdatedTestData1() {
convertAlphabeticCharacters: 'false',
convertHiraganaToKatakana: 'false',
convertKatakanaToHiragana: 'variant',
collapseEmphaticSequences: 'false'
collapseEmphaticSequences: 'false',
textReplacements: {
searchOriginal: true,
groups: []
}
},
dictionaries: {},
parsing: {
@ -502,7 +506,7 @@ function createOptionsUpdatedTestData1() {
}
],
profileCurrent: 0,
version: 7,
version: 8,
global: {
database: {
prefixWildcardsSupported: false

View File

@ -107,6 +107,16 @@ function buildOptions(optionsPresets, optionsArray, dictionaryTitle) {
}
}
// Construct regex
if (Array.isArray(options.textReplacements)) {
options.textReplacements = options.textReplacements.map((value) => {
if (Array.isArray(value)) {
value = value.map(({pattern, flags, replacement}) => ({pattern: new RegExp(pattern, flags), replacement}));
}
return value;
});
}
// Update structure
const placeholder = '${title}';
if (options.mainDictionary === placeholder) {