Merge pull request #423 from toasted-nutbread/text-source-map
Text source map
This commit is contained in:
commit
058f626efd
@ -38,6 +38,7 @@
|
||||
<script src="/bg/js/options.js"></script>
|
||||
<script src="/bg/js/profile-conditions.js"></script>
|
||||
<script src="/bg/js/request.js"></script>
|
||||
<script src="/bg/js/text-source-map.js"></script>
|
||||
<script src="/bg/js/translator.js"></script>
|
||||
<script src="/bg/js/util.js"></script>
|
||||
<script src="/mixed/js/audio-system.js"></script>
|
||||
|
@ -158,9 +158,8 @@
|
||||
return result;
|
||||
}
|
||||
|
||||
function convertHalfWidthKanaToFullWidth(text, sourceMapping) {
|
||||
function convertHalfWidthKanaToFullWidth(text, sourceMap=null) {
|
||||
let result = '';
|
||||
const hasSourceMapping = Array.isArray(sourceMapping);
|
||||
|
||||
// This function is safe to use charCodeAt instead of codePointAt, since all
|
||||
// the relevant characters are represented with a single UTF-16 character code.
|
||||
@ -192,10 +191,8 @@
|
||||
}
|
||||
}
|
||||
|
||||
if (hasSourceMapping && index > 0) {
|
||||
index = result.length;
|
||||
const v = sourceMapping.splice(index + 1, 1)[0];
|
||||
sourceMapping[index] += v;
|
||||
if (sourceMap !== null && index > 0) {
|
||||
sourceMap.combine(result.length, 1);
|
||||
}
|
||||
result += c2;
|
||||
}
|
||||
@ -203,7 +200,7 @@
|
||||
return result;
|
||||
}
|
||||
|
||||
function convertAlphabeticToKana(text, sourceMapping) {
|
||||
function convertAlphabeticToKana(text, sourceMap=null) {
|
||||
let part = '';
|
||||
let result = '';
|
||||
|
||||
@ -222,7 +219,7 @@
|
||||
c = 0x2d; // '-'
|
||||
} else {
|
||||
if (part.length > 0) {
|
||||
result += convertAlphabeticPartToKana(part, sourceMapping, result.length);
|
||||
result += convertAlphabeticPartToKana(part, sourceMap, result.length);
|
||||
part = '';
|
||||
}
|
||||
result += char;
|
||||
@ -232,17 +229,16 @@
|
||||
}
|
||||
|
||||
if (part.length > 0) {
|
||||
result += convertAlphabeticPartToKana(part, sourceMapping, result.length);
|
||||
result += convertAlphabeticPartToKana(part, sourceMap, result.length);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
function convertAlphabeticPartToKana(text, sourceMapping, sourceMappingStart) {
|
||||
function convertAlphabeticPartToKana(text, sourceMap, sourceMapStart) {
|
||||
const result = wanakana.toHiragana(text);
|
||||
|
||||
// Generate source mapping
|
||||
if (Array.isArray(sourceMapping)) {
|
||||
if (typeof sourceMappingStart !== 'number') { sourceMappingStart = 0; }
|
||||
if (sourceMap !== null) {
|
||||
let i = 0;
|
||||
let resultPos = 0;
|
||||
const ii = text.length;
|
||||
@ -262,18 +258,15 @@
|
||||
// Merge characters
|
||||
const removals = iNext - i - 1;
|
||||
if (removals > 0) {
|
||||
let sum = 0;
|
||||
const vs = sourceMapping.splice(sourceMappingStart + 1, removals);
|
||||
for (const v of vs) { sum += v; }
|
||||
sourceMapping[sourceMappingStart] += sum;
|
||||
sourceMap.combine(sourceMapStart, removals);
|
||||
}
|
||||
++sourceMappingStart;
|
||||
++sourceMapStart;
|
||||
|
||||
// Empty elements
|
||||
const additions = resultPosNext - resultPos - 1;
|
||||
for (let j = 0; j < additions; ++j) {
|
||||
sourceMapping.splice(sourceMappingStart, 0, 0);
|
||||
++sourceMappingStart;
|
||||
sourceMap.insert(sourceMapStart, 0);
|
||||
++sourceMapStart;
|
||||
}
|
||||
|
||||
i = iNext;
|
||||
|
115
ext/bg/js/text-source-map.js
Normal file
115
ext/bg/js/text-source-map.js
Normal file
@ -0,0 +1,115 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Alex Yatskov <alex@foosoft.net>
|
||||
* Author: Alex Yatskov <alex@foosoft.net>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
class TextSourceMap {
|
||||
constructor(source, mapping=null) {
|
||||
this._source = source;
|
||||
this._mapping = (Array.isArray(mapping) ? TextSourceMap._normalizeMapping(mapping) : null);
|
||||
}
|
||||
|
||||
get source() {
|
||||
return this._source;
|
||||
}
|
||||
|
||||
equals(other) {
|
||||
if (this === other) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const source = this._source;
|
||||
if (!(other instanceof TextSourceMap && source === other._source)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
let mapping = this._mapping;
|
||||
let otherMapping = other._mapping;
|
||||
if (mapping === null) {
|
||||
if (otherMapping === null) {
|
||||
return true;
|
||||
}
|
||||
mapping = TextSourceMap._createMapping(source);
|
||||
} else if (otherMapping === null) {
|
||||
otherMapping = TextSourceMap._createMapping(source);
|
||||
}
|
||||
|
||||
const mappingLength = mapping.length;
|
||||
if (mappingLength !== otherMapping.length) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (let i = 0; i < mappingLength; ++i) {
|
||||
if (mapping[i] !== otherMapping[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
getSourceLength(finalLength) {
|
||||
const mapping = this._mapping;
|
||||
if (mapping === null) {
|
||||
return finalLength;
|
||||
}
|
||||
|
||||
let sourceLength = 0;
|
||||
for (let i = 0; i < finalLength; ++i) {
|
||||
sourceLength += mapping[i];
|
||||
}
|
||||
return sourceLength;
|
||||
}
|
||||
|
||||
combine(index, count) {
|
||||
if (count <= 0) { return; }
|
||||
|
||||
if (this._mapping === null) {
|
||||
this._mapping = TextSourceMap._createMapping(this._source);
|
||||
}
|
||||
|
||||
let sum = this._mapping[index];
|
||||
const parts = this._mapping.splice(index + 1, count);
|
||||
for (const part of parts) {
|
||||
sum += part;
|
||||
}
|
||||
this._mapping[index] = sum;
|
||||
}
|
||||
|
||||
insert(index, ...items) {
|
||||
if (this._mapping === null) {
|
||||
this._mapping = TextSourceMap._createMapping(this._source);
|
||||
}
|
||||
|
||||
this._mapping.splice(index, 0, ...items);
|
||||
}
|
||||
|
||||
static _createMapping(text) {
|
||||
return new Array(text.length).fill(1);
|
||||
}
|
||||
|
||||
static _normalizeMapping(mapping) {
|
||||
const result = [];
|
||||
for (const value of mapping) {
|
||||
result.push(
|
||||
(typeof value === 'number' && Number.isFinite(value)) ?
|
||||
Math.floor(value) :
|
||||
0
|
||||
);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
@ -19,6 +19,7 @@
|
||||
/* global
|
||||
* Database
|
||||
* Deinflector
|
||||
* TextSourceMap
|
||||
* dictEnabledSet
|
||||
* dictTagBuildSource
|
||||
* dictTagSanitize
|
||||
@ -367,17 +368,15 @@ class Translator {
|
||||
const used = new Set();
|
||||
for (const [halfWidth, numeric, alphabetic, katakana, hiragana] of Translator.getArrayVariants(textOptionVariantArray)) {
|
||||
let text2 = text;
|
||||
let sourceMapping = null;
|
||||
const sourceMap = new TextSourceMap(text2);
|
||||
if (halfWidth) {
|
||||
if (sourceMapping === null) { sourceMapping = Translator.createTextSourceMapping(text2); }
|
||||
text2 = jp.convertHalfWidthKanaToFullWidth(text2, sourceMapping);
|
||||
text2 = jp.convertHalfWidthKanaToFullWidth(text2, sourceMap);
|
||||
}
|
||||
if (numeric) {
|
||||
text2 = jp.convertNumericToFullWidth(text2);
|
||||
}
|
||||
if (alphabetic) {
|
||||
if (sourceMapping === null) { sourceMapping = Translator.createTextSourceMapping(text2); }
|
||||
text2 = jp.convertAlphabeticToKana(text2, sourceMapping);
|
||||
text2 = jp.convertAlphabeticToKana(text2, sourceMap);
|
||||
}
|
||||
if (katakana) {
|
||||
text2 = jp.convertHiraganaToKatakana(text2);
|
||||
@ -391,7 +390,7 @@ class Translator {
|
||||
if (used.has(text2Substring)) { break; }
|
||||
used.add(text2Substring);
|
||||
for (const deinflection of this.deinflector.deinflect(text2Substring)) {
|
||||
deinflection.rawSource = Translator.getDeinflectionRawSource(text, i, sourceMapping);
|
||||
deinflection.rawSource = sourceMap.source.substring(0, sourceMap.getSourceLength(i));
|
||||
deinflections.push(deinflection);
|
||||
}
|
||||
}
|
||||
@ -407,25 +406,6 @@ class Translator {
|
||||
}
|
||||
}
|
||||
|
||||
static getDeinflectionRawSource(source, length, sourceMapping) {
|
||||
if (sourceMapping === null) {
|
||||
return source.substring(0, length);
|
||||
}
|
||||
|
||||
let result = '';
|
||||
let index = 0;
|
||||
for (let i = 0; i < length; ++i) {
|
||||
const c = sourceMapping[i];
|
||||
result += source.substring(index, index + c);
|
||||
index += c;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static createTextSourceMapping(text) {
|
||||
return new Array(text.length).fill(1);
|
||||
}
|
||||
|
||||
async findKanji(text, options) {
|
||||
const dictionaries = dictEnabledSet(options);
|
||||
const kanjiUnique = new Set();
|
||||
|
@ -8,7 +8,7 @@
|
||||
"scripts": {
|
||||
"test": "npm run test-lint && npm run test-code",
|
||||
"test-lint": "eslint . && node ./test/lint/global-declarations.js",
|
||||
"test-code": "node ./test/test-schema.js && node ./test/test-dictionary.js && node ./test/test-database.js && node ./test/test-document.js && node ./test/test-object-property-accessor.js && node ./test/test-japanese.js"
|
||||
"test-code": "node ./test/test-schema.js && node ./test/test-dictionary.js && node ./test/test-database.js && node ./test/test-document.js && node ./test/test-object-property-accessor.js && node ./test/test-japanese.js && node ./test/test-text-source-map.js"
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
|
@ -23,9 +23,11 @@ const vm = new VM();
|
||||
vm.execute([
|
||||
'mixed/lib/wanakana.min.js',
|
||||
'mixed/js/japanese.js',
|
||||
'bg/js/text-source-map.js',
|
||||
'bg/js/japanese.js'
|
||||
]);
|
||||
const jp = vm.get('jp');
|
||||
const TextSourceMap = vm.get('TextSourceMap');
|
||||
|
||||
|
||||
function testIsCodePointKanji() {
|
||||
@ -262,13 +264,13 @@ function testConvertHalfWidthKanaToFullWidth() {
|
||||
];
|
||||
|
||||
for (const [string, expected, expectedSourceMapping] of data) {
|
||||
const sourceMapping = new Array(string.length).fill(1);
|
||||
const sourceMap = new TextSourceMap(string);
|
||||
const actual1 = jp.convertHalfWidthKanaToFullWidth(string, null);
|
||||
const actual2 = jp.convertHalfWidthKanaToFullWidth(string, sourceMapping);
|
||||
const actual2 = jp.convertHalfWidthKanaToFullWidth(string, sourceMap);
|
||||
assert.strictEqual(actual1, expected);
|
||||
assert.strictEqual(actual2, expected);
|
||||
if (Array.isArray(expectedSourceMapping)) {
|
||||
vm.assert.deepStrictEqual(sourceMapping, expectedSourceMapping);
|
||||
if (typeof expectedSourceMapping !== 'undefined') {
|
||||
assert.ok(sourceMap.equals(new TextSourceMap(string, expectedSourceMapping)));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -285,13 +287,13 @@ function testConvertAlphabeticToKana() {
|
||||
];
|
||||
|
||||
for (const [string, expected, expectedSourceMapping] of data) {
|
||||
const sourceMapping = new Array(string.length).fill(1);
|
||||
const sourceMap = new TextSourceMap(string);
|
||||
const actual1 = jp.convertAlphabeticToKana(string, null);
|
||||
const actual2 = jp.convertAlphabeticToKana(string, sourceMapping);
|
||||
const actual2 = jp.convertAlphabeticToKana(string, sourceMap);
|
||||
assert.strictEqual(actual1, expected);
|
||||
assert.strictEqual(actual2, expected);
|
||||
if (Array.isArray(expectedSourceMapping)) {
|
||||
vm.assert.deepStrictEqual(sourceMapping, expectedSourceMapping);
|
||||
if (typeof expectedSourceMapping !== 'undefined') {
|
||||
assert.ok(sourceMap.equals(new TextSourceMap(string, expectedSourceMapping)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
234
test/test-text-source-map.js
Normal file
234
test/test-text-source-map.js
Normal file
@ -0,0 +1,234 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Alex Yatskov <alex@foosoft.net>
|
||||
* Author: Alex Yatskov <alex@foosoft.net>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
const assert = require('assert');
|
||||
const {VM} = require('./yomichan-vm');
|
||||
|
||||
const vm = new VM();
|
||||
vm.execute(['bg/js/text-source-map.js']);
|
||||
const TextSourceMap = vm.get('TextSourceMap');
|
||||
|
||||
|
||||
function testSource() {
|
||||
const data = [
|
||||
['source1'],
|
||||
['source2'],
|
||||
['source3']
|
||||
];
|
||||
|
||||
for (const [source] of data) {
|
||||
const sourceMap = new TextSourceMap(source);
|
||||
assert.strictEqual(source, sourceMap.source);
|
||||
}
|
||||
}
|
||||
|
||||
function testEquals() {
|
||||
const data = [
|
||||
[['source1', null], ['source1', null], true],
|
||||
[['source2', null], ['source2', null], true],
|
||||
[['source3', null], ['source3', null], true],
|
||||
|
||||
[['source1', [1, 1, 1, 1, 1, 1, 1]], ['source1', null], true],
|
||||
[['source2', [1, 1, 1, 1, 1, 1, 1]], ['source2', null], true],
|
||||
[['source3', [1, 1, 1, 1, 1, 1, 1]], ['source3', null], true],
|
||||
|
||||
[['source1', null], ['source1', [1, 1, 1, 1, 1, 1, 1]], true],
|
||||
[['source2', null], ['source2', [1, 1, 1, 1, 1, 1, 1]], true],
|
||||
[['source3', null], ['source3', [1, 1, 1, 1, 1, 1, 1]], true],
|
||||
|
||||
[['source1', [1, 1, 1, 1, 1, 1, 1]], ['source1', [1, 1, 1, 1, 1, 1, 1]], true],
|
||||
[['source2', [1, 1, 1, 1, 1, 1, 1]], ['source2', [1, 1, 1, 1, 1, 1, 1]], true],
|
||||
[['source3', [1, 1, 1, 1, 1, 1, 1]], ['source3', [1, 1, 1, 1, 1, 1, 1]], true],
|
||||
|
||||
[['source1', [1, 2, 1, 3]], ['source1', [1, 2, 1, 3]], true],
|
||||
[['source2', [1, 2, 1, 3]], ['source2', [1, 2, 1, 3]], true],
|
||||
[['source3', [1, 2, 1, 3]], ['source3', [1, 2, 1, 3]], true],
|
||||
|
||||
[['source1', [1, 3, 1, 2]], ['source1', [1, 2, 1, 3]], false],
|
||||
[['source2', [1, 3, 1, 2]], ['source2', [1, 2, 1, 3]], false],
|
||||
[['source3', [1, 3, 1, 2]], ['source3', [1, 2, 1, 3]], false],
|
||||
|
||||
[['source1', [1, 1, 1, 1, 1, 1, 1]], ['source4', [1, 1, 1, 1, 1, 1, 1]], false],
|
||||
[['source2', [1, 1, 1, 1, 1, 1, 1]], ['source5', [1, 1, 1, 1, 1, 1, 1]], false],
|
||||
[['source3', [1, 1, 1, 1, 1, 1, 1]], ['source6', [1, 1, 1, 1, 1, 1, 1]], false]
|
||||
];
|
||||
|
||||
for (const [[source1, mapping1], [source2, mapping2], expectedEquals] of data) {
|
||||
const sourceMap1 = new TextSourceMap(source1, mapping1);
|
||||
const sourceMap2 = new TextSourceMap(source2, mapping2);
|
||||
assert.ok(sourceMap1.equals(sourceMap1));
|
||||
assert.ok(sourceMap2.equals(sourceMap2));
|
||||
assert.strictEqual(sourceMap1.equals(sourceMap2), expectedEquals);
|
||||
}
|
||||
}
|
||||
|
||||
function testGetSourceLength() {
|
||||
const data = [
|
||||
[['source', [1, 1, 1, 1, 1, 1]], 1, 1],
|
||||
[['source', [1, 1, 1, 1, 1, 1]], 2, 2],
|
||||
[['source', [1, 1, 1, 1, 1, 1]], 3, 3],
|
||||
[['source', [1, 1, 1, 1, 1, 1]], 4, 4],
|
||||
[['source', [1, 1, 1, 1, 1, 1]], 5, 5],
|
||||
[['source', [1, 1, 1, 1, 1, 1]], 6, 6],
|
||||
|
||||
[['source', [2, 2, 2]], 1, 2],
|
||||
[['source', [2, 2, 2]], 2, 4],
|
||||
[['source', [2, 2, 2]], 3, 6],
|
||||
|
||||
[['source', [3, 3]], 1, 3],
|
||||
[['source', [3, 3]], 2, 6],
|
||||
|
||||
[['source', [6, 6]], 1, 6]
|
||||
];
|
||||
|
||||
for (const [[source, mapping], finalLength, expectedValue] of data) {
|
||||
const sourceMap = new TextSourceMap(source, mapping);
|
||||
assert.strictEqual(sourceMap.getSourceLength(finalLength), expectedValue);
|
||||
}
|
||||
}
|
||||
|
||||
function testCombineInsert() {
|
||||
const data = [
|
||||
// No operations
|
||||
[
|
||||
['source', null],
|
||||
['source', [1, 1, 1, 1, 1, 1]],
|
||||
[]
|
||||
],
|
||||
|
||||
// Combine
|
||||
[
|
||||
['source', null],
|
||||
['source', [3, 1, 1, 1]],
|
||||
[
|
||||
['combine', 0, 2]
|
||||
]
|
||||
],
|
||||
[
|
||||
['source', null],
|
||||
['source', [1, 1, 1, 3]],
|
||||
[
|
||||
['combine', 3, 2]
|
||||
]
|
||||
],
|
||||
[
|
||||
['source', null],
|
||||
['source', [3, 3]],
|
||||
[
|
||||
['combine', 0, 2],
|
||||
['combine', 1, 2]
|
||||
]
|
||||
],
|
||||
[
|
||||
['source', null],
|
||||
['source', [3, 3]],
|
||||
[
|
||||
['combine', 3, 2],
|
||||
['combine', 0, 2]
|
||||
]
|
||||
],
|
||||
|
||||
// Insert
|
||||
[
|
||||
['source', null],
|
||||
['source', [0, 1, 1, 1, 1, 1, 1]],
|
||||
[
|
||||
['insert', 0, 0]
|
||||
]
|
||||
],
|
||||
[
|
||||
['source', null],
|
||||
['source', [1, 1, 1, 1, 1, 1, 0]],
|
||||
[
|
||||
['insert', 6, 0]
|
||||
]
|
||||
],
|
||||
[
|
||||
['source', null],
|
||||
['source', [0, 1, 1, 1, 1, 1, 1, 0]],
|
||||
[
|
||||
['insert', 0, 0],
|
||||
['insert', 7, 0]
|
||||
]
|
||||
],
|
||||
[
|
||||
['source', null],
|
||||
['source', [0, 1, 1, 1, 1, 1, 1, 0]],
|
||||
[
|
||||
['insert', 6, 0],
|
||||
['insert', 0, 0]
|
||||
]
|
||||
],
|
||||
|
||||
// Mixed
|
||||
[
|
||||
['source', null],
|
||||
['source', [3, 0, 3]],
|
||||
[
|
||||
['combine', 0, 2],
|
||||
['insert', 1, 0],
|
||||
['combine', 2, 2]
|
||||
]
|
||||
],
|
||||
[
|
||||
['source', null],
|
||||
['source', [3, 0, 3]],
|
||||
[
|
||||
['combine', 0, 2],
|
||||
['combine', 1, 2],
|
||||
['insert', 1, 0]
|
||||
]
|
||||
],
|
||||
[
|
||||
['source', null],
|
||||
['source', [3, 0, 3]],
|
||||
[
|
||||
['insert', 3, 0],
|
||||
['combine', 0, 2],
|
||||
['combine', 2, 2]
|
||||
]
|
||||
]
|
||||
];
|
||||
|
||||
for (const [[source, mapping], [expectedSource, expectedMapping], operations] of data) {
|
||||
const sourceMap = new TextSourceMap(source, mapping);
|
||||
const expectedSourceMap = new TextSourceMap(expectedSource, expectedMapping);
|
||||
for (const [operation, ...args] of operations) {
|
||||
switch (operation) {
|
||||
case 'combine':
|
||||
sourceMap.combine(...args);
|
||||
break;
|
||||
case 'insert':
|
||||
sourceMap.insert(...args);
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert.ok(sourceMap.equals(expectedSourceMap));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function main() {
|
||||
testSource();
|
||||
testEquals();
|
||||
testGetSourceLength();
|
||||
testCombineInsert();
|
||||
}
|
||||
|
||||
|
||||
if (require.main === module) { main(); }
|
Loading…
Reference in New Issue
Block a user