Create simplified source map class

This commit is contained in:
toasted-nutbread 2020-03-28 17:51:58 -04:00
parent a0c4ce779d
commit ae84d13757
5 changed files with 143 additions and 52 deletions

View File

@ -38,6 +38,7 @@
<script src="/bg/js/options.js"></script> <script src="/bg/js/options.js"></script>
<script src="/bg/js/profile-conditions.js"></script> <script src="/bg/js/profile-conditions.js"></script>
<script src="/bg/js/request.js"></script> <script src="/bg/js/request.js"></script>
<script src="/bg/js/text-source-map.js"></script>
<script src="/bg/js/translator.js"></script> <script src="/bg/js/translator.js"></script>
<script src="/bg/js/util.js"></script> <script src="/bg/js/util.js"></script>
<script src="/mixed/js/audio-system.js"></script> <script src="/mixed/js/audio-system.js"></script>

View File

@ -158,9 +158,8 @@
return result; return result;
} }
function convertHalfWidthKanaToFullWidth(text, sourceMapping) { function convertHalfWidthKanaToFullWidth(text, sourceMap=null) {
let result = ''; let result = '';
const hasSourceMapping = Array.isArray(sourceMapping);
// This function is safe to use charCodeAt instead of codePointAt, since all // This function is safe to use charCodeAt instead of codePointAt, since all
// the relevant characters are represented with a single UTF-16 character code. // the relevant characters are represented with a single UTF-16 character code.
@ -192,10 +191,8 @@
} }
} }
if (hasSourceMapping && index > 0) { if (sourceMap !== null && index > 0) {
index = result.length; sourceMap.combine(result.length, 1);
const v = sourceMapping.splice(index + 1, 1)[0];
sourceMapping[index] += v;
} }
result += c2; result += c2;
} }
@ -203,7 +200,7 @@
return result; return result;
} }
function convertAlphabeticToKana(text, sourceMapping) { function convertAlphabeticToKana(text, sourceMap=null) {
let part = ''; let part = '';
let result = ''; let result = '';
@ -222,7 +219,7 @@
c = 0x2d; // '-' c = 0x2d; // '-'
} else { } else {
if (part.length > 0) { if (part.length > 0) {
result += convertAlphabeticPartToKana(part, sourceMapping, result.length); result += convertAlphabeticPartToKana(part, sourceMap, result.length);
part = ''; part = '';
} }
result += char; result += char;
@ -232,17 +229,16 @@
} }
if (part.length > 0) { if (part.length > 0) {
result += convertAlphabeticPartToKana(part, sourceMapping, result.length); result += convertAlphabeticPartToKana(part, sourceMap, result.length);
} }
return result; return result;
} }
function convertAlphabeticPartToKana(text, sourceMapping, sourceMappingStart) { function convertAlphabeticPartToKana(text, sourceMap, sourceMapStart) {
const result = wanakana.toHiragana(text); const result = wanakana.toHiragana(text);
// Generate source mapping // Generate source mapping
if (Array.isArray(sourceMapping)) { if (sourceMap !== null) {
if (typeof sourceMappingStart !== 'number') { sourceMappingStart = 0; }
let i = 0; let i = 0;
let resultPos = 0; let resultPos = 0;
const ii = text.length; const ii = text.length;
@ -262,18 +258,15 @@
// Merge characters // Merge characters
const removals = iNext - i - 1; const removals = iNext - i - 1;
if (removals > 0) { if (removals > 0) {
let sum = 0; sourceMap.combine(sourceMapStart, removals);
const vs = sourceMapping.splice(sourceMappingStart + 1, removals);
for (const v of vs) { sum += v; }
sourceMapping[sourceMappingStart] += sum;
} }
++sourceMappingStart; ++sourceMapStart;
// Empty elements // Empty elements
const additions = resultPosNext - resultPos - 1; const additions = resultPosNext - resultPos - 1;
for (let j = 0; j < additions; ++j) { for (let j = 0; j < additions; ++j) {
sourceMapping.splice(sourceMappingStart, 0, 0); sourceMap.insert(sourceMapStart, 0);
++sourceMappingStart; ++sourceMapStart;
} }
i = iNext; i = iNext;

View File

@ -0,0 +1,115 @@
/*
* Copyright (C) 2020 Alex Yatskov <alex@foosoft.net>
* Author: Alex Yatskov <alex@foosoft.net>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
class TextSourceMap {
constructor(source, mapping=null) {
this._source = source;
this._mapping = (Array.isArray(mapping) ? TextSourceMap._normalizeMapping(mapping) : null);
}
get source() {
return this._source;
}
equals(other) {
if (this === other) {
return true;
}
const source = this._source;
if (!(other instanceof TextSourceMap && source === other._source)) {
return false;
}
let mapping = this._mapping;
let otherMapping = other._mapping;
if (mapping === null) {
if (otherMapping === null) {
return true;
}
mapping = TextSourceMap._createMapping(source);
} else if (otherMapping === null) {
otherMapping = TextSourceMap._createMapping(source);
}
const mappingLength = mapping.length;
if (mappingLength !== otherMapping.length) {
return false;
}
for (let i = 0; i < mappingLength; ++i) {
if (mapping[i] !== otherMapping[i]) {
return false;
}
}
return true;
}
getSourceLength(finalLength) {
const mapping = this._mapping;
if (mapping === null) {
return finalLength;
}
let sourceLength = 0;
for (let i = 0; i < finalLength; ++i) {
sourceLength += mapping[i];
}
return sourceLength;
}
combine(index, count) {
if (count <= 0) { return; }
if (this._mapping === null) {
this._mapping = TextSourceMap._createMapping(this._source);
}
let sum = this._mapping[index];
const parts = this._mapping.splice(index + 1, count);
for (const part of parts) {
sum += part;
}
this._mapping[index] = sum;
}
insert(index, ...items) {
if (this._mapping === null) {
this._mapping = TextSourceMap._createMapping(this._source);
}
this._mapping.splice(index, 0, ...items);
}
static _createMapping(text) {
return new Array(text.length).fill(1);
}
static _normalizeMapping(mapping) {
const result = [];
for (const value of mapping) {
result.push(
(typeof value === 'number' && Number.isFinite(value)) ?
Math.floor(value) :
0
);
}
return result;
}
}

View File

@ -19,6 +19,7 @@
/* global /* global
* Database * Database
* Deinflector * Deinflector
* TextSourceMap
* dictEnabledSet * dictEnabledSet
* dictTagBuildSource * dictTagBuildSource
* dictTagSanitize * dictTagSanitize
@ -367,17 +368,15 @@ class Translator {
const used = new Set(); const used = new Set();
for (const [halfWidth, numeric, alphabetic, katakana, hiragana] of Translator.getArrayVariants(textOptionVariantArray)) { for (const [halfWidth, numeric, alphabetic, katakana, hiragana] of Translator.getArrayVariants(textOptionVariantArray)) {
let text2 = text; let text2 = text;
let sourceMapping = null; const sourceMap = new TextSourceMap(text2);
if (halfWidth) { if (halfWidth) {
if (sourceMapping === null) { sourceMapping = Translator.createTextSourceMapping(text2); } text2 = jp.convertHalfWidthKanaToFullWidth(text2, sourceMap);
text2 = jp.convertHalfWidthKanaToFullWidth(text2, sourceMapping);
} }
if (numeric) { if (numeric) {
text2 = jp.convertNumericToFullWidth(text2); text2 = jp.convertNumericToFullWidth(text2);
} }
if (alphabetic) { if (alphabetic) {
if (sourceMapping === null) { sourceMapping = Translator.createTextSourceMapping(text2); } text2 = jp.convertAlphabeticToKana(text2, sourceMap);
text2 = jp.convertAlphabeticToKana(text2, sourceMapping);
} }
if (katakana) { if (katakana) {
text2 = jp.convertHiraganaToKatakana(text2); text2 = jp.convertHiraganaToKatakana(text2);
@ -391,7 +390,7 @@ class Translator {
if (used.has(text2Substring)) { break; } if (used.has(text2Substring)) { break; }
used.add(text2Substring); used.add(text2Substring);
for (const deinflection of this.deinflector.deinflect(text2Substring)) { for (const deinflection of this.deinflector.deinflect(text2Substring)) {
deinflection.rawSource = Translator.getDeinflectionRawSource(text, i, sourceMapping); deinflection.rawSource = sourceMap.source.substring(0, sourceMap.getSourceLength(i));
deinflections.push(deinflection); deinflections.push(deinflection);
} }
} }
@ -407,25 +406,6 @@ class Translator {
} }
} }
static getDeinflectionRawSource(source, length, sourceMapping) {
if (sourceMapping === null) {
return source.substring(0, length);
}
let result = '';
let index = 0;
for (let i = 0; i < length; ++i) {
const c = sourceMapping[i];
result += source.substring(index, index + c);
index += c;
}
return result;
}
static createTextSourceMapping(text) {
return new Array(text.length).fill(1);
}
async findKanji(text, options) { async findKanji(text, options) {
const dictionaries = dictEnabledSet(options); const dictionaries = dictEnabledSet(options);
const kanjiUnique = new Set(); const kanjiUnique = new Set();

View File

@ -23,9 +23,11 @@ const vm = new VM();
vm.execute([ vm.execute([
'mixed/lib/wanakana.min.js', 'mixed/lib/wanakana.min.js',
'mixed/js/japanese.js', 'mixed/js/japanese.js',
'bg/js/text-source-map.js',
'bg/js/japanese.js' 'bg/js/japanese.js'
]); ]);
const jp = vm.get('jp'); const jp = vm.get('jp');
const TextSourceMap = vm.get('TextSourceMap');
function testIsCodePointKanji() { function testIsCodePointKanji() {
@ -262,13 +264,13 @@ function testConvertHalfWidthKanaToFullWidth() {
]; ];
for (const [string, expected, expectedSourceMapping] of data) { for (const [string, expected, expectedSourceMapping] of data) {
const sourceMapping = new Array(string.length).fill(1); const sourceMap = new TextSourceMap(string);
const actual1 = jp.convertHalfWidthKanaToFullWidth(string, null); const actual1 = jp.convertHalfWidthKanaToFullWidth(string, null);
const actual2 = jp.convertHalfWidthKanaToFullWidth(string, sourceMapping); const actual2 = jp.convertHalfWidthKanaToFullWidth(string, sourceMap);
assert.strictEqual(actual1, expected); assert.strictEqual(actual1, expected);
assert.strictEqual(actual2, expected); assert.strictEqual(actual2, expected);
if (Array.isArray(expectedSourceMapping)) { if (typeof expectedSourceMapping !== 'undefined') {
vm.assert.deepStrictEqual(sourceMapping, expectedSourceMapping); assert.ok(sourceMap.equals(new TextSourceMap(string, expectedSourceMapping)));
} }
} }
} }
@ -285,13 +287,13 @@ function testConvertAlphabeticToKana() {
]; ];
for (const [string, expected, expectedSourceMapping] of data) { for (const [string, expected, expectedSourceMapping] of data) {
const sourceMapping = new Array(string.length).fill(1); const sourceMap = new TextSourceMap(string);
const actual1 = jp.convertAlphabeticToKana(string, null); const actual1 = jp.convertAlphabeticToKana(string, null);
const actual2 = jp.convertAlphabeticToKana(string, sourceMapping); const actual2 = jp.convertAlphabeticToKana(string, sourceMap);
assert.strictEqual(actual1, expected); assert.strictEqual(actual1, expected);
assert.strictEqual(actual2, expected); assert.strictEqual(actual2, expected);
if (Array.isArray(expectedSourceMapping)) { if (typeof expectedSourceMapping !== 'undefined') {
vm.assert.deepStrictEqual(sourceMapping, expectedSourceMapping); assert.ok(sourceMap.equals(new TextSourceMap(string, expectedSourceMapping)));
} }
} }
} }