From ae84d13757a98e640c8d62f8d856cecbd84dd66f Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sat, 28 Mar 2020 17:51:58 -0400 Subject: [PATCH 1/2] Create simplified source map class --- ext/bg/background.html | 1 + ext/bg/js/japanese.js | 31 ++++------ ext/bg/js/text-source-map.js | 115 +++++++++++++++++++++++++++++++++++ ext/bg/js/translator.js | 30 ++------- test/test-japanese.js | 18 +++--- 5 files changed, 143 insertions(+), 52 deletions(-) create mode 100644 ext/bg/js/text-source-map.js diff --git a/ext/bg/background.html b/ext/bg/background.html index f7cf6e55..e456717e 100644 --- a/ext/bg/background.html +++ b/ext/bg/background.html @@ -38,6 +38,7 @@ + diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js index c5873cf1..2a2b39fd 100644 --- a/ext/bg/js/japanese.js +++ b/ext/bg/js/japanese.js @@ -158,9 +158,8 @@ return result; } - function convertHalfWidthKanaToFullWidth(text, sourceMapping) { + function convertHalfWidthKanaToFullWidth(text, sourceMap=null) { let result = ''; - const hasSourceMapping = Array.isArray(sourceMapping); // This function is safe to use charCodeAt instead of codePointAt, since all // the relevant characters are represented with a single UTF-16 character code. @@ -192,10 +191,8 @@ } } - if (hasSourceMapping && index > 0) { - index = result.length; - const v = sourceMapping.splice(index + 1, 1)[0]; - sourceMapping[index] += v; + if (sourceMap !== null && index > 0) { + sourceMap.combine(result.length, 1); } result += c2; } @@ -203,7 +200,7 @@ return result; } - function convertAlphabeticToKana(text, sourceMapping) { + function convertAlphabeticToKana(text, sourceMap=null) { let part = ''; let result = ''; @@ -222,7 +219,7 @@ c = 0x2d; // '-' } else { if (part.length > 0) { - result += convertAlphabeticPartToKana(part, sourceMapping, result.length); + result += convertAlphabeticPartToKana(part, sourceMap, result.length); part = ''; } result += char; @@ -232,17 +229,16 @@ } if (part.length > 0) { - result += convertAlphabeticPartToKana(part, sourceMapping, result.length); + result += convertAlphabeticPartToKana(part, sourceMap, result.length); } return result; } - function convertAlphabeticPartToKana(text, sourceMapping, sourceMappingStart) { + function convertAlphabeticPartToKana(text, sourceMap, sourceMapStart) { const result = wanakana.toHiragana(text); // Generate source mapping - if (Array.isArray(sourceMapping)) { - if (typeof sourceMappingStart !== 'number') { sourceMappingStart = 0; } + if (sourceMap !== null) { let i = 0; let resultPos = 0; const ii = text.length; @@ -262,18 +258,15 @@ // Merge characters const removals = iNext - i - 1; if (removals > 0) { - let sum = 0; - const vs = sourceMapping.splice(sourceMappingStart + 1, removals); - for (const v of vs) { sum += v; } - sourceMapping[sourceMappingStart] += sum; + sourceMap.combine(sourceMapStart, removals); } - ++sourceMappingStart; + ++sourceMapStart; // Empty elements const additions = resultPosNext - resultPos - 1; for (let j = 0; j < additions; ++j) { - sourceMapping.splice(sourceMappingStart, 0, 0); - ++sourceMappingStart; + sourceMap.insert(sourceMapStart, 0); + ++sourceMapStart; } i = iNext; diff --git a/ext/bg/js/text-source-map.js b/ext/bg/js/text-source-map.js new file mode 100644 index 00000000..24970978 --- /dev/null +++ b/ext/bg/js/text-source-map.js @@ -0,0 +1,115 @@ +/* + * Copyright (C) 2020 Alex Yatskov + * Author: Alex Yatskov + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +class TextSourceMap { + constructor(source, mapping=null) { + this._source = source; + this._mapping = (Array.isArray(mapping) ? TextSourceMap._normalizeMapping(mapping) : null); + } + + get source() { + return this._source; + } + + equals(other) { + if (this === other) { + return true; + } + + const source = this._source; + if (!(other instanceof TextSourceMap && source === other._source)) { + return false; + } + + let mapping = this._mapping; + let otherMapping = other._mapping; + if (mapping === null) { + if (otherMapping === null) { + return true; + } + mapping = TextSourceMap._createMapping(source); + } else if (otherMapping === null) { + otherMapping = TextSourceMap._createMapping(source); + } + + const mappingLength = mapping.length; + if (mappingLength !== otherMapping.length) { + return false; + } + + for (let i = 0; i < mappingLength; ++i) { + if (mapping[i] !== otherMapping[i]) { + return false; + } + } + + return true; + } + + getSourceLength(finalLength) { + const mapping = this._mapping; + if (mapping === null) { + return finalLength; + } + + let sourceLength = 0; + for (let i = 0; i < finalLength; ++i) { + sourceLength += mapping[i]; + } + return sourceLength; + } + + combine(index, count) { + if (count <= 0) { return; } + + if (this._mapping === null) { + this._mapping = TextSourceMap._createMapping(this._source); + } + + let sum = this._mapping[index]; + const parts = this._mapping.splice(index + 1, count); + for (const part of parts) { + sum += part; + } + this._mapping[index] = sum; + } + + insert(index, ...items) { + if (this._mapping === null) { + this._mapping = TextSourceMap._createMapping(this._source); + } + + this._mapping.splice(index, 0, ...items); + } + + static _createMapping(text) { + return new Array(text.length).fill(1); + } + + static _normalizeMapping(mapping) { + const result = []; + for (const value of mapping) { + result.push( + (typeof value === 'number' && Number.isFinite(value)) ? + Math.floor(value) : + 0 + ); + } + return result; + } +} diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js index 6f43f7b0..584da02c 100644 --- a/ext/bg/js/translator.js +++ b/ext/bg/js/translator.js @@ -19,6 +19,7 @@ /* global * Database * Deinflector + * TextSourceMap * dictEnabledSet * dictTagBuildSource * dictTagSanitize @@ -367,17 +368,15 @@ class Translator { const used = new Set(); for (const [halfWidth, numeric, alphabetic, katakana, hiragana] of Translator.getArrayVariants(textOptionVariantArray)) { let text2 = text; - let sourceMapping = null; + const sourceMap = new TextSourceMap(text2); if (halfWidth) { - if (sourceMapping === null) { sourceMapping = Translator.createTextSourceMapping(text2); } - text2 = jp.convertHalfWidthKanaToFullWidth(text2, sourceMapping); + text2 = jp.convertHalfWidthKanaToFullWidth(text2, sourceMap); } if (numeric) { text2 = jp.convertNumericToFullWidth(text2); } if (alphabetic) { - if (sourceMapping === null) { sourceMapping = Translator.createTextSourceMapping(text2); } - text2 = jp.convertAlphabeticToKana(text2, sourceMapping); + text2 = jp.convertAlphabeticToKana(text2, sourceMap); } if (katakana) { text2 = jp.convertHiraganaToKatakana(text2); @@ -391,7 +390,7 @@ class Translator { if (used.has(text2Substring)) { break; } used.add(text2Substring); for (const deinflection of this.deinflector.deinflect(text2Substring)) { - deinflection.rawSource = Translator.getDeinflectionRawSource(text, i, sourceMapping); + deinflection.rawSource = sourceMap.source.substring(0, sourceMap.getSourceLength(i)); deinflections.push(deinflection); } } @@ -407,25 +406,6 @@ class Translator { } } - static getDeinflectionRawSource(source, length, sourceMapping) { - if (sourceMapping === null) { - return source.substring(0, length); - } - - let result = ''; - let index = 0; - for (let i = 0; i < length; ++i) { - const c = sourceMapping[i]; - result += source.substring(index, index + c); - index += c; - } - return result; - } - - static createTextSourceMapping(text) { - return new Array(text.length).fill(1); - } - async findKanji(text, options) { const dictionaries = dictEnabledSet(options); const kanjiUnique = new Set(); diff --git a/test/test-japanese.js b/test/test-japanese.js index c5d220e7..a16a73b7 100644 --- a/test/test-japanese.js +++ b/test/test-japanese.js @@ -23,9 +23,11 @@ const vm = new VM(); vm.execute([ 'mixed/lib/wanakana.min.js', 'mixed/js/japanese.js', + 'bg/js/text-source-map.js', 'bg/js/japanese.js' ]); const jp = vm.get('jp'); +const TextSourceMap = vm.get('TextSourceMap'); function testIsCodePointKanji() { @@ -262,13 +264,13 @@ function testConvertHalfWidthKanaToFullWidth() { ]; for (const [string, expected, expectedSourceMapping] of data) { - const sourceMapping = new Array(string.length).fill(1); + const sourceMap = new TextSourceMap(string); const actual1 = jp.convertHalfWidthKanaToFullWidth(string, null); - const actual2 = jp.convertHalfWidthKanaToFullWidth(string, sourceMapping); + const actual2 = jp.convertHalfWidthKanaToFullWidth(string, sourceMap); assert.strictEqual(actual1, expected); assert.strictEqual(actual2, expected); - if (Array.isArray(expectedSourceMapping)) { - vm.assert.deepStrictEqual(sourceMapping, expectedSourceMapping); + if (typeof expectedSourceMapping !== 'undefined') { + assert.ok(sourceMap.equals(new TextSourceMap(string, expectedSourceMapping))); } } } @@ -285,13 +287,13 @@ function testConvertAlphabeticToKana() { ]; for (const [string, expected, expectedSourceMapping] of data) { - const sourceMapping = new Array(string.length).fill(1); + const sourceMap = new TextSourceMap(string); const actual1 = jp.convertAlphabeticToKana(string, null); - const actual2 = jp.convertAlphabeticToKana(string, sourceMapping); + const actual2 = jp.convertAlphabeticToKana(string, sourceMap); assert.strictEqual(actual1, expected); assert.strictEqual(actual2, expected); - if (Array.isArray(expectedSourceMapping)) { - vm.assert.deepStrictEqual(sourceMapping, expectedSourceMapping); + if (typeof expectedSourceMapping !== 'undefined') { + assert.ok(sourceMap.equals(new TextSourceMap(string, expectedSourceMapping))); } } } From 7225201fb6776664d7a820e45e85c3500e83c80f Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sat, 28 Mar 2020 18:24:51 -0400 Subject: [PATCH 2/2] Add tests --- package.json | 2 +- test/test-text-source-map.js | 234 +++++++++++++++++++++++++++++++++++ 2 files changed, 235 insertions(+), 1 deletion(-) create mode 100644 test/test-text-source-map.js diff --git a/package.json b/package.json index 8ae103a0..b02ec179 100644 --- a/package.json +++ b/package.json @@ -8,7 +8,7 @@ "scripts": { "test": "npm run test-lint && npm run test-code", "test-lint": "eslint . && node ./test/lint/global-declarations.js", - "test-code": "node ./test/test-schema.js && node ./test/test-dictionary.js && node ./test/test-database.js && node ./test/test-document.js && node ./test/test-object-property-accessor.js && node ./test/test-japanese.js" + "test-code": "node ./test/test-schema.js && node ./test/test-dictionary.js && node ./test/test-database.js && node ./test/test-document.js && node ./test/test-object-property-accessor.js && node ./test/test-japanese.js && node ./test/test-text-source-map.js" }, "repository": { "type": "git", diff --git a/test/test-text-source-map.js b/test/test-text-source-map.js new file mode 100644 index 00000000..25bd8fc2 --- /dev/null +++ b/test/test-text-source-map.js @@ -0,0 +1,234 @@ +/* + * Copyright (C) 2020 Alex Yatskov + * Author: Alex Yatskov + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +const assert = require('assert'); +const {VM} = require('./yomichan-vm'); + +const vm = new VM(); +vm.execute(['bg/js/text-source-map.js']); +const TextSourceMap = vm.get('TextSourceMap'); + + +function testSource() { + const data = [ + ['source1'], + ['source2'], + ['source3'] + ]; + + for (const [source] of data) { + const sourceMap = new TextSourceMap(source); + assert.strictEqual(source, sourceMap.source); + } +} + +function testEquals() { + const data = [ + [['source1', null], ['source1', null], true], + [['source2', null], ['source2', null], true], + [['source3', null], ['source3', null], true], + + [['source1', [1, 1, 1, 1, 1, 1, 1]], ['source1', null], true], + [['source2', [1, 1, 1, 1, 1, 1, 1]], ['source2', null], true], + [['source3', [1, 1, 1, 1, 1, 1, 1]], ['source3', null], true], + + [['source1', null], ['source1', [1, 1, 1, 1, 1, 1, 1]], true], + [['source2', null], ['source2', [1, 1, 1, 1, 1, 1, 1]], true], + [['source3', null], ['source3', [1, 1, 1, 1, 1, 1, 1]], true], + + [['source1', [1, 1, 1, 1, 1, 1, 1]], ['source1', [1, 1, 1, 1, 1, 1, 1]], true], + [['source2', [1, 1, 1, 1, 1, 1, 1]], ['source2', [1, 1, 1, 1, 1, 1, 1]], true], + [['source3', [1, 1, 1, 1, 1, 1, 1]], ['source3', [1, 1, 1, 1, 1, 1, 1]], true], + + [['source1', [1, 2, 1, 3]], ['source1', [1, 2, 1, 3]], true], + [['source2', [1, 2, 1, 3]], ['source2', [1, 2, 1, 3]], true], + [['source3', [1, 2, 1, 3]], ['source3', [1, 2, 1, 3]], true], + + [['source1', [1, 3, 1, 2]], ['source1', [1, 2, 1, 3]], false], + [['source2', [1, 3, 1, 2]], ['source2', [1, 2, 1, 3]], false], + [['source3', [1, 3, 1, 2]], ['source3', [1, 2, 1, 3]], false], + + [['source1', [1, 1, 1, 1, 1, 1, 1]], ['source4', [1, 1, 1, 1, 1, 1, 1]], false], + [['source2', [1, 1, 1, 1, 1, 1, 1]], ['source5', [1, 1, 1, 1, 1, 1, 1]], false], + [['source3', [1, 1, 1, 1, 1, 1, 1]], ['source6', [1, 1, 1, 1, 1, 1, 1]], false] + ]; + + for (const [[source1, mapping1], [source2, mapping2], expectedEquals] of data) { + const sourceMap1 = new TextSourceMap(source1, mapping1); + const sourceMap2 = new TextSourceMap(source2, mapping2); + assert.ok(sourceMap1.equals(sourceMap1)); + assert.ok(sourceMap2.equals(sourceMap2)); + assert.strictEqual(sourceMap1.equals(sourceMap2), expectedEquals); + } +} + +function testGetSourceLength() { + const data = [ + [['source', [1, 1, 1, 1, 1, 1]], 1, 1], + [['source', [1, 1, 1, 1, 1, 1]], 2, 2], + [['source', [1, 1, 1, 1, 1, 1]], 3, 3], + [['source', [1, 1, 1, 1, 1, 1]], 4, 4], + [['source', [1, 1, 1, 1, 1, 1]], 5, 5], + [['source', [1, 1, 1, 1, 1, 1]], 6, 6], + + [['source', [2, 2, 2]], 1, 2], + [['source', [2, 2, 2]], 2, 4], + [['source', [2, 2, 2]], 3, 6], + + [['source', [3, 3]], 1, 3], + [['source', [3, 3]], 2, 6], + + [['source', [6, 6]], 1, 6] + ]; + + for (const [[source, mapping], finalLength, expectedValue] of data) { + const sourceMap = new TextSourceMap(source, mapping); + assert.strictEqual(sourceMap.getSourceLength(finalLength), expectedValue); + } +} + +function testCombineInsert() { + const data = [ + // No operations + [ + ['source', null], + ['source', [1, 1, 1, 1, 1, 1]], + [] + ], + + // Combine + [ + ['source', null], + ['source', [3, 1, 1, 1]], + [ + ['combine', 0, 2] + ] + ], + [ + ['source', null], + ['source', [1, 1, 1, 3]], + [ + ['combine', 3, 2] + ] + ], + [ + ['source', null], + ['source', [3, 3]], + [ + ['combine', 0, 2], + ['combine', 1, 2] + ] + ], + [ + ['source', null], + ['source', [3, 3]], + [ + ['combine', 3, 2], + ['combine', 0, 2] + ] + ], + + // Insert + [ + ['source', null], + ['source', [0, 1, 1, 1, 1, 1, 1]], + [ + ['insert', 0, 0] + ] + ], + [ + ['source', null], + ['source', [1, 1, 1, 1, 1, 1, 0]], + [ + ['insert', 6, 0] + ] + ], + [ + ['source', null], + ['source', [0, 1, 1, 1, 1, 1, 1, 0]], + [ + ['insert', 0, 0], + ['insert', 7, 0] + ] + ], + [ + ['source', null], + ['source', [0, 1, 1, 1, 1, 1, 1, 0]], + [ + ['insert', 6, 0], + ['insert', 0, 0] + ] + ], + + // Mixed + [ + ['source', null], + ['source', [3, 0, 3]], + [ + ['combine', 0, 2], + ['insert', 1, 0], + ['combine', 2, 2] + ] + ], + [ + ['source', null], + ['source', [3, 0, 3]], + [ + ['combine', 0, 2], + ['combine', 1, 2], + ['insert', 1, 0] + ] + ], + [ + ['source', null], + ['source', [3, 0, 3]], + [ + ['insert', 3, 0], + ['combine', 0, 2], + ['combine', 2, 2] + ] + ] + ]; + + for (const [[source, mapping], [expectedSource, expectedMapping], operations] of data) { + const sourceMap = new TextSourceMap(source, mapping); + const expectedSourceMap = new TextSourceMap(expectedSource, expectedMapping); + for (const [operation, ...args] of operations) { + switch (operation) { + case 'combine': + sourceMap.combine(...args); + break; + case 'insert': + sourceMap.insert(...args); + break; + } + } + assert.ok(sourceMap.equals(expectedSourceMap)); + } +} + + +function main() { + testSource(); + testEquals(); + testGetSourceLength(); + testCombineInsert(); +} + + +if (require.main === module) { main(); }