Improve translator test data (#1531)

* Organize files

* Split translator test data into two files

* Add test for AnkiNoteData
This commit is contained in:
toasted-nutbread 2021-03-15 20:55:26 -04:00 committed by GitHub
parent 9ad9961ca8
commit cba45b5e30
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 55053 additions and 24190 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,295 @@
{
"optionsPresets": {
"kanji": {
"enabledDictionaryMap": [
[
"${title}",
{
"index": 0,
"priority": 0
}
]
]
},
"default": {
"wildcard": null,
"mainDictionary": "${title}",
"alphanumeric": false,
"convertHalfWidthCharacters": false,
"convertNumericCharacters": false,
"convertAlphabeticCharacters": false,
"convertHiraganaToKatakana": false,
"convertKatakanaToHiragana": false,
"collapseEmphaticSequences": false,
"textReplacements": [
null
],
"enabledDictionaryMap": [
[
"${title}",
{
"index": 0,
"priority": 0,
"allowSecondarySearches": false
}
]
]
}
},
"tests": [
{
"name": "Basic kanji test 1",
"func": "findKanji",
"text": "打",
"options": "kanji"
},
{
"name": "Basic kanji test 2",
"func": "findKanji",
"text": "込",
"options": "kanji"
},
{
"name": "Missing kanji test",
"func": "findKanji",
"text": "画",
"options": "kanji"
},
{
"name": "Find term using expression text 1",
"func": "findTerms",
"mode": "split",
"text": "打",
"options": "default"
},
{
"name": "Find term using expression text 2",
"func": "findTerms",
"mode": "split",
"text": "打つ",
"options": "default"
},
{
"name": "Find term using expression text 3",
"func": "findTerms",
"mode": "split",
"text": "打ち込む",
"options": "default"
},
{
"name": "Find term using expression text 4",
"func": "findTerms",
"mode": "split",
"text": "画像",
"options": "default"
},
{
"name": "Find term using reading 1",
"func": "findTerms",
"mode": "split",
"text": "だ",
"options": "default"
},
{
"name": "Find term using reading 2",
"func": "findTerms",
"mode": "split",
"text": "ダース",
"options": "default"
},
{
"name": "Find term using reading 3",
"func": "findTerms",
"mode": "split",
"text": "うつ",
"options": "default"
},
{
"name": "Find term using reading 4",
"func": "findTerms",
"mode": "split",
"text": "ぶつ",
"options": "default"
},
{
"name": "Find term using reading 5",
"func": "findTerms",
"mode": "split",
"text": "うちこむ",
"options": "default"
},
{
"name": "Find term using reading 6",
"func": "findTerms",
"mode": "split",
"text": "ぶちこむ",
"options": "default"
},
{
"name": "Find term using reading 7",
"func": "findTerms",
"mode": "split",
"text": "がぞう",
"options": "default"
},
{
"name": "Missing term 1",
"func": "findTerms",
"mode": "split",
"text": "為る",
"options": "default"
},
{
"name": "Missing term ",
"func": "findTerms",
"mode": "split",
"text": "する",
"options": "default"
},
{
"name": "Search using different modes",
"func": "findTerms",
"mode": "simple",
"text": "打ち込む",
"options": "default"
},
{
"name": "Search using different modes",
"func": "findTerms",
"mode": "group",
"text": "打ち込む",
"options": "default"
},
{
"name": "Search using different modes",
"func": "findTerms",
"mode": "merge",
"text": "打ち込む",
"options": "default"
},
{
"name": "Search inflected term",
"func": "findTerms",
"mode": "split",
"text": "打ち込んでいませんでした",
"options": "default"
},
{
"name": "Ignore text inside parentheses",
"func": "findTerms",
"mode": "split",
"text": "打(う)ち込(こ)む",
"options": [
"default",
{
"alphanumeric": true,
"textReplacements": [
null,
[
{
"pattern": "\\(([^)]*)(?:\\)|$)",
"flags": "g",
"replacement": ""
}
]
]
}
]
},
{
"name": "Remove parentheses around text",
"func": "findTerms",
"mode": "split",
"text": "(打)(ち)(込)(む)",
"options": [
"default",
{
"alphanumeric": true,
"textReplacements": [
null,
[
{
"pattern": "\\(([^)]*)(?:\\)|$)",
"flags": "g",
"replacement": "$1"
}
]
]
}
]
},
{
"name": "Test non-empty replacement",
"func": "findTerms",
"mode": "split",
"text": "test",
"options": [
"default",
{
"alphanumeric": true,
"textReplacements": [
null,
[
{
"pattern": "test",
"flags": "g",
"replacement": "よみ"
}
]
]
}
]
},
{
"name": "Test non-empty replacement at end",
"func": "findTerms",
"mode": "split",
"text": "つtest",
"options": [
"default",
{
"alphanumeric": true,
"textReplacements": [
null,
[
{
"pattern": "test",
"flags": "g",
"replacement": "よみ"
}
]
]
}
]
},
{
"name": "Test non-empty replacement at start",
"func": "findTerms",
"mode": "split",
"text": "testました",
"options": [
"default",
{
"alphanumeric": true,
"textReplacements": [
null,
[
{
"pattern": "test",
"flags": "g",
"replacement": "よみ"
}
]
]
}
]
},
{
"name": "Search merged mode with non-primary definitions",
"func": "findTerms",
"mode": "merge",
"text": "うちこむ",
"options": "default"
}
]
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -37,28 +37,32 @@ async function createVM() {
const vm = new DatabaseVM();
vm.execute([
'js/core.js',
'js/general/cache-map.js',
'js/language/japanese-util.js',
'js/data/json-schema.js',
'js/media/media-util.js',
'js/language/dictionary-importer.js',
'js/data/anki-note-data.js',
'js/data/database.js',
'js/language/dictionary-database.js',
'js/data/json-schema.js',
'js/general/cache-map.js',
'js/general/regex-util.js',
'js/general/text-source-map.js',
'js/language/deinflector.js',
'js/language/translator.js'
'js/language/dictionary-data-util.js',
'js/language/dictionary-importer.js',
'js/language/dictionary-database.js',
'js/language/japanese-util.js',
'js/language/translator.js',
'js/media/media-util.js'
]);
const [
DictionaryImporter,
DictionaryDatabase,
JapaneseUtil,
Translator
Translator,
AnkiNoteData
] = vm.get([
'DictionaryImporter',
'DictionaryDatabase',
'JapaneseUtil',
'Translator'
'Translator',
'AnkiNoteData'
]);
// Dictionary
@ -85,8 +89,11 @@ async function createVM() {
const deinflectionReasions = JSON.parse(fs.readFileSync(path.join(__dirname, '..', 'ext', 'data/deinflect.json')));
translator.prepare(deinflectionReasions);
// Note data creation
const createPublicAnkiNoteData = (marker, data) => new AnkiNoteData(japaneseUtil, marker, data).createPublic();
// Done
return {vm, translator, dictionary: result};
return {vm, translator, dictionary: result, createPublicAnkiNoteData};
}
function buildOptions(optionsPresets, optionsArray, dictionaryTitle) {
@ -140,37 +147,64 @@ function buildOptions(optionsPresets, optionsArray, dictionaryTitle) {
async function main() {
const write = (process.argv[2] === '--write');
const {translator, dictionary: {title}} = await createVM();
const {translator, dictionary: {title}, createPublicAnkiNoteData} = await createVM();
const dataFilePath = path.join(__dirname, 'data', 'test-translator-data.json');
const data = JSON.parse(fs.readFileSync(dataFilePath, {encoding: 'utf8'}));
const {optionsPresets, tests} = data;
for (const test of tests) {
const createTestAnkiNoteData = (definition, mode) => createPublicAnkiNoteData('{marker}', {
definition,
resultOutputMode: mode,
mode: 'mode',
glossaryLayoutMode: 'default',
compactTags: false,
context: {
url: 'url:',
sentence: {text: '', offset: 0},
documentTitle: 'title'
},
injectedMedia: null
});
const testInputsFilePath = path.join(__dirname, 'data', 'translator-test-inputs.json');
const {optionsPresets, tests} = JSON.parse(fs.readFileSync(testInputsFilePath, {encoding: 'utf8'}));
const testResults1FilePath = path.join(__dirname, 'data', 'translator-test-results.json');
const expectedResults1 = JSON.parse(fs.readFileSync(testResults1FilePath, {encoding: 'utf8'}));
const actualResults1 = [];
const testResults2FilePath = path.join(__dirname, 'data', 'translator-test-results-note-data1.json');
const expectedResults2 = JSON.parse(fs.readFileSync(testResults2FilePath, {encoding: 'utf8'}));
const actualResults2 = [];
for (let i = 0, ii = tests.length; i < ii; ++i) {
const test = tests[i];
const expected1 = expectedResults1[i];
const expected2 = expectedResults2[i];
switch (test.func) {
case 'findTerms':
{
const {mode, text} = test;
const {name, mode, text} = test;
const options = buildOptions(optionsPresets, test.options, title);
const [definitions, length] = clone(await translator.findTerms(mode, text, options));
if (write) {
test.expected = {length, definitions};
} else {
const {expected} = test;
assert.deepStrictEqual(length, expected.length);
assert.deepStrictEqual(definitions, expected.definitions);
const noteDataList = clone(definitions.map((definition) => createTestAnkiNoteData(clone(definition), null)));
actualResults1.push({name, length, definitions});
actualResults2.push({name, noteDataList});
if (!write) {
assert.deepStrictEqual(length, expected1.length);
assert.deepStrictEqual(definitions, expected1.definitions);
assert.deepStrictEqual(noteDataList, expected2.noteDataList);
}
}
break;
case 'findKanji':
{
const {text} = test;
const {name, text} = test;
const options = buildOptions(optionsPresets, test.options, title);
const definitions = clone(await translator.findKanji(text, options));
if (write) {
test.expected = {definitions};
} else {
const {expected} = test;
assert.deepStrictEqual(definitions, expected.definitions);
const noteDataList = clone(definitions.map((definition) => createTestAnkiNoteData(clone(definition), null)));
actualResults1.push({name, definitions});
actualResults2.push({name, noteDataList});
if (!write) {
assert.deepStrictEqual(definitions, expected1.definitions);
assert.deepStrictEqual(noteDataList, expected2.noteDataList);
}
}
break;
@ -178,7 +212,9 @@ async function main() {
}
if (write) {
fs.writeFileSync(dataFilePath, JSON.stringify(data, null, 4), {encoding: 'utf8'});
// Use 2 indent instead of 4 to save a bit of file size
fs.writeFileSync(testResults1FilePath, JSON.stringify(actualResults1, null, 2), {encoding: 'utf8'});
fs.writeFileSync(testResults2FilePath, JSON.stringify(actualResults2, null, 2), {encoding: 'utf8'});
}
}