Merge pull request #369 from toasted-nutbread/dictionary-validation

Dictionary validation
This commit is contained in:
toasted-nutbread 2020-02-22 14:34:20 -05:00 committed by GitHub
commit f3c4b0e1e1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 589 additions and 371 deletions

View File

@ -22,11 +22,40 @@
}, },
"format": { "format": {
"type": "integer", "type": "integer",
"description": "Format of data found in the JSON data files." "description": "Format of data found in the JSON data files.",
"enum": [1, 2, 3]
}, },
"version": { "version": {
"type": "integer", "type": "integer",
"description": "Alias for format." "description": "Alias for format.",
"enum": [1, 2, 3]
},
"tagMeta": {
"type": "object",
"description": "Tag information for terms and kanji. This object is obsolete and individual tag files should be used instead.",
"additionalProperties": {
"type": "object",
"description": "Information about a single tag. The object key is the name of the tag.",
"properties": {
"category": {
"type": "string",
"description": "Category for the tag."
},
"order": {
"type": "number",
"description": "Sorting order for the tag."
},
"notes": {
"type": "string",
"description": "Notes for the tag."
},
"score": {
"type": "number",
"description": "Score used to determine popularity. Negative values are more rare and positive values are more frequent. This score is also used to sort search results."
}
},
"additionalProperties": false
}
} }
}, },
"anyOf": [ "anyOf": [

View File

@ -16,21 +16,24 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>. * along with this program. If not, see <https://www.gnu.org/licenses/>.
*/ */
/*global dictFieldSplit, dictTagSanitize, JSZip*/ /*global dictFieldSplit, requestJson, JsonSchema, JSZip*/
class Database { class Database {
constructor() { constructor() {
this.db = null; this.db = null;
this._schemas = new Map();
} }
// Public
async prepare() { async prepare() {
if (this.db !== null) { if (this.db !== null) {
throw new Error('Database already initialized'); throw new Error('Database already initialized');
} }
try { try {
this.db = await Database.open('dict', 5, (db, transaction, oldVersion) => { this.db = await Database._open('dict', 5, (db, transaction, oldVersion) => {
Database.upgrade(db, transaction, oldVersion, [ Database._upgrade(db, transaction, oldVersion, [
{ {
version: 2, version: 2,
stores: { stores: {
@ -97,23 +100,23 @@ class Database {
} }
async close() { async close() {
this.validate(); this._validate();
this.db.close(); this.db.close();
this.db = null; this.db = null;
} }
async purge() { async purge() {
this.validate(); this._validate();
this.db.close(); this.db.close();
await Database.deleteDatabase(this.db.name); await Database._deleteDatabase(this.db.name);
this.db = null; this.db = null;
await this.prepare(); await this.prepare();
} }
async deleteDictionary(dictionaryName, onProgress, progressSettings) { async deleteDictionary(dictionaryName, onProgress, progressSettings) {
this.validate(); this._validate();
const targets = [ const targets = [
['dictionaries', 'title'], ['dictionaries', 'title'],
@ -140,14 +143,14 @@ class Database {
const dbObjectStore = dbTransaction.objectStore(objectStoreName); const dbObjectStore = dbTransaction.objectStore(objectStoreName);
const dbIndex = dbObjectStore.index(index); const dbIndex = dbObjectStore.index(index);
const only = IDBKeyRange.only(dictionaryName); const only = IDBKeyRange.only(dictionaryName);
promises.push(Database.deleteValues(dbObjectStore, dbIndex, only, onProgress, progressData, progressRate)); promises.push(Database._deleteValues(dbObjectStore, dbIndex, only, onProgress, progressData, progressRate));
} }
await Promise.all(promises); await Promise.all(promises);
} }
async findTermsBulk(termList, titles, wildcard) { async findTermsBulk(termList, titles, wildcard) {
this.validate(); this._validate();
const promises = []; const promises = [];
const visited = {}; const visited = {};
@ -155,7 +158,7 @@ class Database {
const processRow = (row, index) => { const processRow = (row, index) => {
if (titles.includes(row.dictionary) && !hasOwn(visited, row.id)) { if (titles.includes(row.dictionary) && !hasOwn(visited, row.id)) {
visited[row.id] = true; visited[row.id] = true;
results.push(Database.createTerm(row, index)); results.push(Database._createTerm(row, index));
} }
}; };
@ -171,8 +174,8 @@ class Database {
const term = prefixWildcard ? stringReverse(termList[i]) : termList[i]; const term = prefixWildcard ? stringReverse(termList[i]) : termList[i];
const query = useWildcard ? IDBKeyRange.bound(term, `${term}\uffff`, false, false) : IDBKeyRange.only(term); const query = useWildcard ? IDBKeyRange.bound(term, `${term}\uffff`, false, false) : IDBKeyRange.only(term);
promises.push( promises.push(
Database.getAll(dbIndex1, query, i, processRow), Database._getAll(dbIndex1, query, i, processRow),
Database.getAll(dbIndex2, query, i, processRow) Database._getAll(dbIndex2, query, i, processRow)
); );
} }
@ -182,13 +185,13 @@ class Database {
} }
async findTermsExactBulk(termList, readingList, titles) { async findTermsExactBulk(termList, readingList, titles) {
this.validate(); this._validate();
const promises = []; const promises = [];
const results = []; const results = [];
const processRow = (row, index) => { const processRow = (row, index) => {
if (row.reading === readingList[index] && titles.includes(row.dictionary)) { if (row.reading === readingList[index] && titles.includes(row.dictionary)) {
results.push(Database.createTerm(row, index)); results.push(Database._createTerm(row, index));
} }
}; };
@ -198,7 +201,7 @@ class Database {
for (let i = 0; i < termList.length; ++i) { for (let i = 0; i < termList.length; ++i) {
const only = IDBKeyRange.only(termList[i]); const only = IDBKeyRange.only(termList[i]);
promises.push(Database.getAll(dbIndex, only, i, processRow)); promises.push(Database._getAll(dbIndex, only, i, processRow));
} }
await Promise.all(promises); await Promise.all(promises);
@ -207,13 +210,13 @@ class Database {
} }
async findTermsBySequenceBulk(sequenceList, mainDictionary) { async findTermsBySequenceBulk(sequenceList, mainDictionary) {
this.validate(); this._validate();
const promises = []; const promises = [];
const results = []; const results = [];
const processRow = (row, index) => { const processRow = (row, index) => {
if (row.dictionary === mainDictionary) { if (row.dictionary === mainDictionary) {
results.push(Database.createTerm(row, index)); results.push(Database._createTerm(row, index));
} }
}; };
@ -223,7 +226,7 @@ class Database {
for (let i = 0; i < sequenceList.length; ++i) { for (let i = 0; i < sequenceList.length; ++i) {
const only = IDBKeyRange.only(sequenceList[i]); const only = IDBKeyRange.only(sequenceList[i]);
promises.push(Database.getAll(dbIndex, only, i, processRow)); promises.push(Database._getAll(dbIndex, only, i, processRow));
} }
await Promise.all(promises); await Promise.all(promises);
@ -232,51 +235,26 @@ class Database {
} }
async findTermMetaBulk(termList, titles) { async findTermMetaBulk(termList, titles) {
return this.findGenericBulk('termMeta', 'expression', termList, titles, Database.createTermMeta); return this._findGenericBulk('termMeta', 'expression', termList, titles, Database._createTermMeta);
} }
async findKanjiBulk(kanjiList, titles) { async findKanjiBulk(kanjiList, titles) {
return this.findGenericBulk('kanji', 'character', kanjiList, titles, Database.createKanji); return this._findGenericBulk('kanji', 'character', kanjiList, titles, Database._createKanji);
} }
async findKanjiMetaBulk(kanjiList, titles) { async findKanjiMetaBulk(kanjiList, titles) {
return this.findGenericBulk('kanjiMeta', 'character', kanjiList, titles, Database.createKanjiMeta); return this._findGenericBulk('kanjiMeta', 'character', kanjiList, titles, Database._createKanjiMeta);
}
async findGenericBulk(tableName, indexName, indexValueList, titles, createResult) {
this.validate();
const promises = [];
const results = [];
const processRow = (row, index) => {
if (titles.includes(row.dictionary)) {
results.push(createResult(row, index));
}
};
const dbTransaction = this.db.transaction([tableName], 'readonly');
const dbTerms = dbTransaction.objectStore(tableName);
const dbIndex = dbTerms.index(indexName);
for (let i = 0; i < indexValueList.length; ++i) {
const only = IDBKeyRange.only(indexValueList[i]);
promises.push(Database.getAll(dbIndex, only, i, processRow));
}
await Promise.all(promises);
return results;
} }
async findTagForTitle(name, title) { async findTagForTitle(name, title) {
this.validate(); this._validate();
let result = null; let result = null;
const dbTransaction = this.db.transaction(['tagMeta'], 'readonly'); const dbTransaction = this.db.transaction(['tagMeta'], 'readonly');
const dbTerms = dbTransaction.objectStore('tagMeta'); const dbTerms = dbTransaction.objectStore('tagMeta');
const dbIndex = dbTerms.index('name'); const dbIndex = dbTerms.index('name');
const only = IDBKeyRange.only(name); const only = IDBKeyRange.only(name);
await Database.getAll(dbIndex, only, null, (row) => { await Database._getAll(dbIndex, only, null, (row) => {
if (title === row.dictionary) { if (title === row.dictionary) {
result = row; result = row;
} }
@ -286,19 +264,19 @@ class Database {
} }
async getDictionaryInfo() { async getDictionaryInfo() {
this.validate(); this._validate();
const results = []; const results = [];
const dbTransaction = this.db.transaction(['dictionaries'], 'readonly'); const dbTransaction = this.db.transaction(['dictionaries'], 'readonly');
const dbDictionaries = dbTransaction.objectStore('dictionaries'); const dbDictionaries = dbTransaction.objectStore('dictionaries');
await Database.getAll(dbDictionaries, null, null, (info) => results.push(info)); await Database._getAll(dbDictionaries, null, null, (info) => results.push(info));
return results; return results;
} }
async getDictionaryCounts(dictionaryNames, getTotal) { async getDictionaryCounts(dictionaryNames, getTotal) {
this.validate(); this._validate();
const objectStoreNames = [ const objectStoreNames = [
'kanji', 'kanji',
@ -319,7 +297,7 @@ class Database {
// Query is required for Edge, otherwise index.count throws an exception. // Query is required for Edge, otherwise index.count throws an exception.
const query1 = IDBKeyRange.lowerBound('', false); const query1 = IDBKeyRange.lowerBound('', false);
const totalPromise = getTotal ? Database.getCounts(targets, query1) : null; const totalPromise = getTotal ? Database._getCounts(targets, query1) : null;
const counts = []; const counts = [];
const countPromises = []; const countPromises = [];
@ -327,7 +305,7 @@ class Database {
counts.push(null); counts.push(null);
const index = i; const index = i;
const query2 = IDBKeyRange.only(dictionaryNames[i]); const query2 = IDBKeyRange.only(dictionaryNames[i]);
const countPromise = Database.getCounts(targets, query2).then((v) => counts[index] = v); const countPromise = Database._getCounts(targets, query2).then((v) => counts[index] = v);
countPromises.push(countPromise); countPromises.push(countPromise);
} }
await Promise.all(countPromises); await Promise.all(countPromises);
@ -339,278 +317,287 @@ class Database {
return result; return result;
} }
async importDictionary(archive, progressCallback, details) { async importDictionary(archiveSource, onProgress, details) {
this.validate(); this._validate();
const db = this.db;
const hasOnProgress = (typeof onProgress === 'function');
const errors = []; // Read archive
const prefixWildcardsSupported = details.prefixWildcardsSupported; const archive = await JSZip.loadAsync(archiveSource);
const maxTransactionLength = 1000; // Read and validate index
const bulkAdd = async (objectStoreName, items, total, current) => { const indexFileName = 'index.json';
const db = this.db; const indexFile = archive.files[indexFileName];
for (let i = 0; i < items.length; i += maxTransactionLength) {
if (progressCallback) {
progressCallback(total, current + i / items.length);
}
try {
const count = Math.min(maxTransactionLength, items.length - i);
const transaction = db.transaction([objectStoreName], 'readwrite');
const objectStore = transaction.objectStore(objectStoreName);
await Database.bulkAdd(objectStore, items, i, count);
} catch (e) {
errors.push(e);
}
}
};
const indexDataLoaded = async (summary) => {
if (summary.version > 3) {
throw new Error('Unsupported dictionary version');
}
const db = this.db;
const dbCountTransaction = db.transaction(['dictionaries'], 'readonly');
const dbIndex = dbCountTransaction.objectStore('dictionaries').index('title');
const only = IDBKeyRange.only(summary.title);
const count = await Database.getCount(dbIndex, only);
if (count > 0) {
throw new Error('Dictionary is already imported');
}
const transaction = db.transaction(['dictionaries'], 'readwrite');
const objectStore = transaction.objectStore('dictionaries');
await Database.bulkAdd(objectStore, [summary], 0, 1);
};
const termDataLoaded = async (summary, entries, total, current) => {
const rows = [];
if (summary.version === 1) {
for (const [expression, reading, definitionTags, rules, score, ...glossary] of entries) {
rows.push({
expression,
reading,
definitionTags,
rules,
score,
glossary,
dictionary: summary.title
});
}
} else {
for (const [expression, reading, definitionTags, rules, score, glossary, sequence, termTags] of entries) {
rows.push({
expression,
reading,
definitionTags,
rules,
score,
glossary,
sequence,
termTags,
dictionary: summary.title
});
}
}
if (prefixWildcardsSupported) {
for (const row of rows) {
row.expressionReverse = stringReverse(row.expression);
row.readingReverse = stringReverse(row.reading);
}
}
await bulkAdd('terms', rows, total, current);
};
const termMetaDataLoaded = async (summary, entries, total, current) => {
const rows = [];
for (const [expression, mode, data] of entries) {
rows.push({
expression,
mode,
data,
dictionary: summary.title
});
}
await bulkAdd('termMeta', rows, total, current);
};
const kanjiDataLoaded = async (summary, entries, total, current) => {
const rows = [];
if (summary.version === 1) {
for (const [character, onyomi, kunyomi, tags, ...meanings] of entries) {
rows.push({
character,
onyomi,
kunyomi,
tags,
meanings,
dictionary: summary.title
});
}
} else {
for (const [character, onyomi, kunyomi, tags, meanings, stats] of entries) {
rows.push({
character,
onyomi,
kunyomi,
tags,
meanings,
stats,
dictionary: summary.title
});
}
}
await bulkAdd('kanji', rows, total, current);
};
const kanjiMetaDataLoaded = async (summary, entries, total, current) => {
const rows = [];
for (const [character, mode, data] of entries) {
rows.push({
character,
mode,
data,
dictionary: summary.title
});
}
await bulkAdd('kanjiMeta', rows, total, current);
};
const tagDataLoaded = async (summary, entries, total, current) => {
const rows = [];
for (const [name, category, order, notes, score] of entries) {
const row = dictTagSanitize({
name,
category,
order,
notes,
score,
dictionary: summary.title
});
rows.push(row);
}
await bulkAdd('tagMeta', rows, total, current);
};
const result = await Database.importDictionaryZip(
archive,
indexDataLoaded,
termDataLoaded,
termMetaDataLoaded,
kanjiDataLoaded,
kanjiMetaDataLoaded,
tagDataLoaded,
details
);
return {result, errors};
}
validate() {
if (this.db === null) {
throw new Error('Database not initialized');
}
}
static async importDictionaryZip(
archive,
indexDataLoaded,
termDataLoaded,
termMetaDataLoaded,
kanjiDataLoaded,
kanjiMetaDataLoaded,
tagDataLoaded,
details
) {
const zip = await JSZip.loadAsync(archive);
const indexFile = zip.files['index.json'];
if (!indexFile) { if (!indexFile) {
throw new Error('No dictionary index found in archive'); throw new Error('No dictionary index found in archive');
} }
const index = JSON.parse(await indexFile.async('string')); const index = JSON.parse(await indexFile.async('string'));
if (!index.title || !index.revision) {
const indexSchema = await this._getSchema('/bg/data/dictionary-index-schema.json');
Database._validateJsonSchema(index, indexSchema, indexFileName);
const dictionaryTitle = index.title;
const version = index.format || index.version;
if (!dictionaryTitle || !index.revision) {
throw new Error('Unrecognized dictionary format'); throw new Error('Unrecognized dictionary format');
} }
const summary = { // Verify database is not already imported
title: index.title, if (await this._dictionaryExists(dictionaryTitle)) {
revision: index.revision, throw new Error('Dictionary is already imported');
sequenced: index.sequenced,
version: index.format || index.version,
prefixWildcardsSupported: !!details.prefixWildcardsSupported
};
await indexDataLoaded(summary);
const buildTermBankName = (index) => `term_bank_${index + 1}.json`;
const buildTermMetaBankName = (index) => `term_meta_bank_${index + 1}.json`;
const buildKanjiBankName = (index) => `kanji_bank_${index + 1}.json`;
const buildKanjiMetaBankName = (index) => `kanji_meta_bank_${index + 1}.json`;
const buildTagBankName = (index) => `tag_bank_${index + 1}.json`;
const countBanks = (namer) => {
let count = 0;
while (zip.files[namer(count)]) {
++count;
}
return count;
};
const termBankCount = countBanks(buildTermBankName);
const termMetaBankCount = countBanks(buildTermMetaBankName);
const kanjiBankCount = countBanks(buildKanjiBankName);
const kanjiMetaBankCount = countBanks(buildKanjiMetaBankName);
const tagBankCount = countBanks(buildTagBankName);
let bankLoadedCount = 0;
let bankTotalCount =
termBankCount +
termMetaBankCount +
kanjiBankCount +
kanjiMetaBankCount +
tagBankCount;
if (tagDataLoaded && index.tagMeta) {
const bank = [];
for (const name in index.tagMeta) {
const tag = index.tagMeta[name];
bank.push([name, tag.category, tag.order, tag.notes, tag.score]);
}
tagDataLoaded(summary, bank, ++bankTotalCount, bankLoadedCount++);
} }
const loadBank = async (summary, namer, count, callback) => { // Data format converters
if (callback) { const convertTermBankEntry = (entry) => {
for (let i = 0; i < count; ++i) { if (version === 1) {
const bankFile = zip.files[namer(i)]; const [expression, reading, definitionTags, rules, score, ...glossary] = entry;
const bank = JSON.parse(await bankFile.async('string')); return {expression, reading, definitionTags, rules, score, glossary};
await callback(summary, bank, bankTotalCount, bankLoadedCount++); } else {
const [expression, reading, definitionTags, rules, score, glossary, sequence, termTags] = entry;
return {expression, reading, definitionTags, rules, score, glossary, sequence, termTags};
}
};
const convertTermMetaBankEntry = (entry) => {
const [expression, mode, data] = entry;
return {expression, mode, data};
};
const convertKanjiBankEntry = (entry) => {
if (version === 1) {
const [character, onyomi, kunyomi, tags, ...meanings] = entry;
return {character, onyomi, kunyomi, tags, meanings};
} else {
const [character, onyomi, kunyomi, tags, meanings, stats] = entry;
return {character, onyomi, kunyomi, tags, meanings, stats};
}
};
const convertKanjiMetaBankEntry = (entry) => {
const [character, mode, data] = entry;
return {character, mode, data};
};
const convertTagBankEntry = (entry) => {
const [name, category, order, notes, score] = entry;
return {name, category, order, notes, score};
};
// Archive file reading
const readFileSequence = async (fileNameFormat, convertEntry, schema) => {
const results = [];
for (let i = 1; true; ++i) {
const fileName = fileNameFormat.replace(/\?/, `${i}`);
const file = archive.files[fileName];
if (!file) { break; }
const entries = JSON.parse(await file.async('string'));
Database._validateJsonSchema(entries, schema, fileName);
for (let entry of entries) {
entry = convertEntry(entry);
entry.dictionary = dictionaryTitle;
results.push(entry);
}
}
return results;
};
// Load schemas
const dataBankSchemaPaths = this.constructor._getDataBankSchemaPaths(version);
const dataBankSchemas = await Promise.all(dataBankSchemaPaths.map((path) => this._getSchema(path)));
// Load data
const termList = await readFileSequence('term_bank_?.json', convertTermBankEntry, dataBankSchemas[0]);
const termMetaList = await readFileSequence('term_meta_bank_?.json', convertTermMetaBankEntry, dataBankSchemas[1]);
const kanjiList = await readFileSequence('kanji_bank_?.json', convertKanjiBankEntry, dataBankSchemas[2]);
const kanjiMetaList = await readFileSequence('kanji_meta_bank_?.json', convertKanjiMetaBankEntry, dataBankSchemas[3]);
const tagList = await readFileSequence('tag_bank_?.json', convertTagBankEntry, dataBankSchemas[4]);
// Old tags
const indexTagMeta = index.tagMeta;
if (typeof indexTagMeta === 'object' && indexTagMeta !== null) {
for (const name of Object.keys(indexTagMeta)) {
const {category, order, notes, score} = indexTagMeta[name];
tagList.push({name, category, order, notes, score});
}
}
// Prefix wildcard support
const prefixWildcardsSupported = !!details.prefixWildcardsSupported;
if (prefixWildcardsSupported) {
for (const entry of termList) {
entry.expressionReverse = stringReverse(entry.expression);
entry.readingReverse = stringReverse(entry.reading);
}
}
// Add dictionary
const summary = {
title: dictionaryTitle,
revision: index.revision,
sequenced: index.sequenced,
version,
prefixWildcardsSupported
};
{
const transaction = db.transaction(['dictionaries'], 'readwrite');
const objectStore = transaction.objectStore('dictionaries');
await Database._bulkAdd(objectStore, [summary], 0, 1);
}
// Add data
const errors = [];
const total = (
termList.length +
termMetaList.length +
kanjiList.length +
kanjiMetaList.length +
tagList.length
);
let loadedCount = 0;
const maxTransactionLength = 1000;
const bulkAdd = async (objectStoreName, entries) => {
const ii = entries.length;
for (let i = 0; i < ii; i += maxTransactionLength) {
const count = Math.min(maxTransactionLength, ii - i);
try {
const transaction = db.transaction([objectStoreName], 'readwrite');
const objectStore = transaction.objectStore(objectStoreName);
await Database._bulkAdd(objectStore, entries, i, count);
} catch (e) {
errors.push(e);
}
loadedCount += count;
if (hasOnProgress) {
onProgress(total, loadedCount);
} }
} }
}; };
await loadBank(summary, buildTermBankName, termBankCount, termDataLoaded); await bulkAdd('terms', termList);
await loadBank(summary, buildTermMetaBankName, termMetaBankCount, termMetaDataLoaded); await bulkAdd('termMeta', termMetaList);
await loadBank(summary, buildKanjiBankName, kanjiBankCount, kanjiDataLoaded); await bulkAdd('kanji', kanjiList);
await loadBank(summary, buildKanjiMetaBankName, kanjiMetaBankCount, kanjiMetaDataLoaded); await bulkAdd('kanjiMeta', kanjiMetaList);
await loadBank(summary, buildTagBankName, tagBankCount, tagDataLoaded); await bulkAdd('tagMeta', tagList);
return summary; return {result: summary, errors};
} }
static createTerm(row, index) { // Private
_validate() {
if (this.db === null) {
throw new Error('Database not initialized');
}
}
async _getSchema(fileName) {
let schemaPromise = this._schemas.get(fileName);
if (typeof schemaPromise !== 'undefined') {
return schemaPromise;
}
schemaPromise = requestJson(chrome.runtime.getURL(fileName), 'GET');
this._schemas.set(fileName, schemaPromise);
return schemaPromise;
}
static _validateJsonSchema(value, schema, fileName) {
try {
JsonSchema.validate(value, schema);
} catch (e) {
throw Database._formatSchemaError(e, fileName);
}
}
static _formatSchemaError(e, fileName) {
const valuePathString = Database._getSchemaErrorPathString(e.info.valuePath, 'dictionary');
const schemaPathString = Database._getSchemaErrorPathString(e.info.schemaPath, 'schema');
const e2 = new Error(`Dictionary has invalid data in '${fileName}' for value '${valuePathString}', validated against '${schemaPathString}': ${e.message}`);
e2.data = e;
return e2;
}
static _getSchemaErrorPathString(infoList, base='') {
let result = base;
for (const [part] of infoList) {
switch (typeof part) {
case 'string':
if (result.length > 0) {
result += '.';
}
result += part;
break;
case 'number':
result += `[${part}]`;
break;
}
}
return result;
}
static _getDataBankSchemaPaths(version) {
const termBank = (
version === 1 ?
'/bg/data/dictionary-term-bank-v1-schema.json' :
'/bg/data/dictionary-term-bank-v3-schema.json'
);
const termMetaBank = '/bg/data/dictionary-term-meta-bank-v3-schema.json';
const kanjiBank = (
version === 1 ?
'/bg/data/dictionary-kanji-bank-v1-schema.json' :
'/bg/data/dictionary-kanji-bank-v3-schema.json'
);
const kanjiMetaBank = '/bg/data/dictionary-kanji-meta-bank-v3-schema.json';
const tagBank = '/bg/data/dictionary-tag-bank-v3-schema.json';
return [termBank, termMetaBank, kanjiBank, kanjiMetaBank, tagBank];
}
async _dictionaryExists(title) {
const db = this.db;
const dbCountTransaction = db.transaction(['dictionaries'], 'readonly');
const dbIndex = dbCountTransaction.objectStore('dictionaries').index('title');
const only = IDBKeyRange.only(title);
const count = await Database._getCount(dbIndex, only);
return count > 0;
}
async _findGenericBulk(tableName, indexName, indexValueList, titles, createResult) {
this._validate();
const promises = [];
const results = [];
const processRow = (row, index) => {
if (titles.includes(row.dictionary)) {
results.push(createResult(row, index));
}
};
const dbTransaction = this.db.transaction([tableName], 'readonly');
const dbTerms = dbTransaction.objectStore(tableName);
const dbIndex = dbTerms.index(indexName);
for (let i = 0; i < indexValueList.length; ++i) {
const only = IDBKeyRange.only(indexValueList[i]);
promises.push(Database._getAll(dbIndex, only, i, processRow));
}
await Promise.all(promises);
return results;
}
static _createTerm(row, index) {
return { return {
index, index,
expression: row.expression, expression: row.expression,
@ -626,7 +613,7 @@ class Database {
}; };
} }
static createKanji(row, index) { static _createKanji(row, index) {
return { return {
index, index,
character: row.character, character: row.character,
@ -639,20 +626,20 @@ class Database {
}; };
} }
static createTermMeta({expression, mode, data, dictionary}, index) { static _createTermMeta({expression, mode, data, dictionary}, index) {
return {expression, mode, data, dictionary, index}; return {expression, mode, data, dictionary, index};
} }
static createKanjiMeta({character, mode, data, dictionary}, index) { static _createKanjiMeta({character, mode, data, dictionary}, index) {
return {character, mode, data, dictionary, index}; return {character, mode, data, dictionary, index};
} }
static getAll(dbIndex, query, context, processRow) { static _getAll(dbIndex, query, context, processRow) {
const fn = typeof dbIndex.getAll === 'function' ? Database.getAllFast : Database.getAllUsingCursor; const fn = typeof dbIndex.getAll === 'function' ? Database._getAllFast : Database._getAllUsingCursor;
return fn(dbIndex, query, context, processRow); return fn(dbIndex, query, context, processRow);
} }
static getAllFast(dbIndex, query, context, processRow) { static _getAllFast(dbIndex, query, context, processRow) {
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
const request = dbIndex.getAll(query); const request = dbIndex.getAll(query);
request.onerror = (e) => reject(e); request.onerror = (e) => reject(e);
@ -665,7 +652,7 @@ class Database {
}); });
} }
static getAllUsingCursor(dbIndex, query, context, processRow) { static _getAllUsingCursor(dbIndex, query, context, processRow) {
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
const request = dbIndex.openCursor(query, 'next'); const request = dbIndex.openCursor(query, 'next');
request.onerror = (e) => reject(e); request.onerror = (e) => reject(e);
@ -681,18 +668,18 @@ class Database {
}); });
} }
static getCounts(targets, query) { static _getCounts(targets, query) {
const countPromises = []; const countPromises = [];
const counts = {}; const counts = {};
for (const [objectStoreName, index] of targets) { for (const [objectStoreName, index] of targets) {
const n = objectStoreName; const n = objectStoreName;
const countPromise = Database.getCount(index, query).then((count) => counts[n] = count); const countPromise = Database._getCount(index, query).then((count) => counts[n] = count);
countPromises.push(countPromise); countPromises.push(countPromise);
} }
return Promise.all(countPromises).then(() => counts); return Promise.all(countPromises).then(() => counts);
} }
static getCount(dbIndex, query) { static _getCount(dbIndex, query) {
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
const request = dbIndex.count(query); const request = dbIndex.count(query);
request.onerror = (e) => reject(e); request.onerror = (e) => reject(e);
@ -700,12 +687,12 @@ class Database {
}); });
} }
static getAllKeys(dbIndex, query) { static _getAllKeys(dbIndex, query) {
const fn = typeof dbIndex.getAllKeys === 'function' ? Database.getAllKeysFast : Database.getAllKeysUsingCursor; const fn = typeof dbIndex.getAllKeys === 'function' ? Database._getAllKeysFast : Database._getAllKeysUsingCursor;
return fn(dbIndex, query); return fn(dbIndex, query);
} }
static getAllKeysFast(dbIndex, query) { static _getAllKeysFast(dbIndex, query) {
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
const request = dbIndex.getAllKeys(query); const request = dbIndex.getAllKeys(query);
request.onerror = (e) => reject(e); request.onerror = (e) => reject(e);
@ -713,7 +700,7 @@ class Database {
}); });
} }
static getAllKeysUsingCursor(dbIndex, query) { static _getAllKeysUsingCursor(dbIndex, query) {
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
const primaryKeys = []; const primaryKeys = [];
const request = dbIndex.openKeyCursor(query, 'next'); const request = dbIndex.openKeyCursor(query, 'next');
@ -730,9 +717,9 @@ class Database {
}); });
} }
static async deleteValues(dbObjectStore, dbIndex, query, onProgress, progressData, progressRate) { static async _deleteValues(dbObjectStore, dbIndex, query, onProgress, progressData, progressRate) {
const hasProgress = (typeof onProgress === 'function'); const hasProgress = (typeof onProgress === 'function');
const count = await Database.getCount(dbIndex, query); const count = await Database._getCount(dbIndex, query);
++progressData.storesProcesed; ++progressData.storesProcesed;
progressData.count += count; progressData.count += count;
if (hasProgress) { if (hasProgress) {
@ -751,16 +738,16 @@ class Database {
); );
const promises = []; const promises = [];
const primaryKeys = await Database.getAllKeys(dbIndex, query); const primaryKeys = await Database._getAllKeys(dbIndex, query);
for (const key of primaryKeys) { for (const key of primaryKeys) {
const promise = Database.deleteValue(dbObjectStore, key).then(onValueDeleted); const promise = Database._deleteValue(dbObjectStore, key).then(onValueDeleted);
promises.push(promise); promises.push(promise);
} }
await Promise.all(promises); await Promise.all(promises);
} }
static deleteValue(dbObjectStore, key) { static _deleteValue(dbObjectStore, key) {
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
const request = dbObjectStore.delete(key); const request = dbObjectStore.delete(key);
request.onerror = (e) => reject(e); request.onerror = (e) => reject(e);
@ -768,7 +755,7 @@ class Database {
}); });
} }
static bulkAdd(objectStore, items, start, count) { static _bulkAdd(objectStore, items, start, count) {
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
if (start + count > items.length) { if (start + count > items.length) {
count = items.length - start; count = items.length - start;
@ -796,7 +783,7 @@ class Database {
}); });
} }
static open(name, version, onUpgradeNeeded) { static _open(name, version, onUpgradeNeeded) {
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
const request = window.indexedDB.open(name, version * 10); const request = window.indexedDB.open(name, version * 10);
@ -814,7 +801,7 @@ class Database {
}); });
} }
static upgrade(db, transaction, oldVersion, upgrades) { static _upgrade(db, transaction, oldVersion, upgrades) {
for (const {version, stores} of upgrades) { for (const {version, stores} of upgrades) {
if (oldVersion >= version) { continue; } if (oldVersion >= version) { continue; }
@ -824,13 +811,13 @@ class Database {
const objectStoreNames = transaction.objectStoreNames || db.objectStoreNames; const objectStoreNames = transaction.objectStoreNames || db.objectStoreNames;
const objectStore = ( const objectStore = (
Database.listContains(objectStoreNames, objectStoreName) ? Database._listContains(objectStoreNames, objectStoreName) ?
transaction.objectStore(objectStoreName) : transaction.objectStore(objectStoreName) :
db.createObjectStore(objectStoreName, primaryKey) db.createObjectStore(objectStoreName, primaryKey)
); );
for (const indexName of indices) { for (const indexName of indices) {
if (Database.listContains(objectStore.indexNames, indexName)) { continue; } if (Database._listContains(objectStore.indexNames, indexName)) { continue; }
objectStore.createIndex(indexName, indexName, {}); objectStore.createIndex(indexName, indexName, {});
} }
@ -838,7 +825,7 @@ class Database {
} }
} }
static deleteDatabase(dbName) { static _deleteDatabase(dbName) {
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
const request = indexedDB.deleteDatabase(dbName); const request = indexedDB.deleteDatabase(dbName);
request.onerror = (e) => reject(e); request.onerror = (e) => reject(e);
@ -846,7 +833,7 @@ class Database {
}); });
} }
static listContains(list, value) { static _listContains(list, value) {
for (let i = 0, ii = list.length; i < ii; ++i) { for (let i = 0, ii = list.length; i < ii; ++i) {
if (list[i] === value) { return true; } if (list[i] === value) { return true; }
} }

View File

@ -0,0 +1,7 @@
{
"title": "Invalid Dictionary 1",
"format": 0,
"revision": "test",
"sequenced": true,
"description": "Invalid format number"
}

View File

@ -0,0 +1,7 @@
{
"title": "Invalid Dictionary 2",
"format": 3,
"revision": "test",
"sequenced": true,
"description": "Empty entry in kanji bank"
}

View File

@ -0,0 +1,3 @@
[
[]
]

View File

@ -0,0 +1,7 @@
{
"title": "Invalid Dictionary 3",
"format": 3,
"revision": "test",
"sequenced": true,
"description": "Invalid type entry in kanji meta bank"
}

View File

@ -0,0 +1,7 @@
{
"title": "Invalid Dictionary 4",
"format": 3,
"revision": "test",
"sequenced": true,
"description": "Invalid value as part of a tag bank entry"
}

View File

@ -0,0 +1,3 @@
[
[{"invalid": true}, "category1", 0, "tag1 notes", 0]
]

View File

@ -0,0 +1,7 @@
{
"title": "Invalid Dictionary 5",
"format": 3,
"revision": "test",
"sequenced": true,
"description": "Invalid type as part of a term bank entry"
}

View File

@ -0,0 +1,3 @@
[
["打", "だ", "tag1 tag2", "", 2, false, 1, "tag3 tag4 tag5"]
]

View File

@ -0,0 +1,7 @@
{
"title": "Invalid Dictionary 6",
"format": 3,
"revision": "test",
"sequenced": true,
"description": "Invalid root type for term meta bank"
}

View File

@ -0,0 +1 @@
false

View File

@ -1,3 +1,6 @@
const fs = require('fs');
const url = require('url');
const path = require('path');
const assert = require('assert'); const assert = require('assert');
const yomichanTest = require('./yomichan-test'); const yomichanTest = require('./yomichan-test');
require('fake-indexeddb/auto'); require('fake-indexeddb/auto');
@ -5,21 +8,86 @@ require('fake-indexeddb/auto');
const chrome = { const chrome = {
runtime: { runtime: {
onMessage: { onMessage: {
addListener: () => { /* NOP */ } addListener() { /* NOP */ }
},
getURL(path2) {
return url.pathToFileURL(path.join(__dirname, '..', 'ext', path2.replace(/^\//, '')));
} }
} }
}; };
const {Database} = yomichanTest.requireScript('ext/bg/js/database.js', ['Database']); class XMLHttpRequest {
constructor() {
this._eventCallbacks = new Map();
this._url = '';
this._responseText = null;
}
overrideMimeType() {
// NOP
}
addEventListener(eventName, callback) {
let callbacks = this._eventCallbacks.get(eventName);
if (typeof callbacks === 'undefined') {
callbacks = [];
this._eventCallbacks.set(eventName, callbacks);
}
callbacks.push(callback);
}
open(action, url) {
this._url = url;
}
send() {
const filePath = url.fileURLToPath(this._url);
Promise.resolve()
.then(() => {
let source;
try {
source = fs.readFileSync(filePath, {encoding: 'utf8'});
} catch (e) {
this._trigger('error');
return;
}
this._responseText = source;
this._trigger('load');
});
}
get responseText() {
return this._responseText;
}
_trigger(eventName, ...args) {
const callbacks = this._eventCallbacks.get(eventName);
if (typeof callbacks === 'undefined') { return; }
for (let i = 0, ii = callbacks.length; i < ii; ++i) {
callbacks[i](...args);
}
}
}
const {JsonSchema} = yomichanTest.requireScript('ext/bg/js/json-schema.js', ['JsonSchema']);
const {dictFieldSplit, dictTagSanitize} = yomichanTest.requireScript('ext/bg/js/dictionary.js', ['dictFieldSplit', 'dictTagSanitize']); const {dictFieldSplit, dictTagSanitize} = yomichanTest.requireScript('ext/bg/js/dictionary.js', ['dictFieldSplit', 'dictTagSanitize']);
const {stringReverse, hasOwn} = yomichanTest.requireScript('ext/mixed/js/core.js', ['stringReverse', 'hasOwn'], {chrome}); const {stringReverse, hasOwn} = yomichanTest.requireScript('ext/mixed/js/core.js', ['stringReverse', 'hasOwn'], {chrome});
const {requestJson} = yomichanTest.requireScript('ext/bg/js/request.js', ['requestJson'], {XMLHttpRequest});
global.window = global; const databaseGlobals = {
global.JSZip = yomichanTest.JSZip; chrome,
global.dictFieldSplit = dictFieldSplit; JsonSchema,
global.dictTagSanitize = dictTagSanitize; requestJson,
global.stringReverse = stringReverse; stringReverse,
global.hasOwn = hasOwn; hasOwn,
dictFieldSplit,
dictTagSanitize,
indexedDB: global.indexedDB,
JSZip: yomichanTest.JSZip
};
databaseGlobals.window = databaseGlobals;
const {Database} = yomichanTest.requireScript('ext/bg/js/database.js', ['Database'], databaseGlobals);
function countTermsWithExpression(terms, expression) { function countTermsWithExpression(terms, expression) {
@ -39,21 +107,31 @@ function countKanjiWithCharacter(kanji, character) {
} }
async function clearDatabase() { function clearDatabase(timeout) {
const indexedDB = global.indexedDB; return new Promise((resolve, reject) => {
for (const {name} of await indexedDB.databases()) { const timer = setTimeout(() => {
await new Promise((resolve, reject) => { reject(new Error(`clearDatabase failed to resolve after ${timeout}ms`));
const request = indexedDB.deleteDatabase(name); }, timeout);
request.onerror = (e) => reject(e);
request.onsuccess = () => resolve(); (async () => {
}); const indexedDB = global.indexedDB;
} for (const {name} of await indexedDB.databases()) {
await new Promise((resolve, reject) => {
const request = indexedDB.deleteDatabase(name);
request.onerror = (e) => reject(e);
request.onsuccess = () => resolve();
});
}
clearTimeout(timer);
resolve();
})();
});
} }
async function testDatabase1() { async function testDatabase1() {
// Load dictionary data // Load dictionary data
const testDictionary = yomichanTest.createTestDictionaryArchive(); const testDictionary = yomichanTest.createTestDictionaryArchive('valid-dictionary1');
const testDictionarySource = await testDictionary.generateAsync({type: 'string'}); const testDictionarySource = await testDictionary.generateAsync({type: 'string'});
const testDictionaryIndex = JSON.parse(await testDictionary.files['index.json'].async('string')); const testDictionaryIndex = JSON.parse(await testDictionary.files['index.json'].async('string'));
@ -732,7 +810,7 @@ async function testFindTagForTitle1(database, title) {
async function testDatabase2() { async function testDatabase2() {
// Load dictionary data // Load dictionary data
const testDictionary = yomichanTest.createTestDictionaryArchive(); const testDictionary = yomichanTest.createTestDictionaryArchive('valid-dictionary1');
const testDictionarySource = await testDictionary.generateAsync({type: 'string'}); const testDictionarySource = await testDictionary.generateAsync({type: 'string'});
const testDictionaryIndex = JSON.parse(await testDictionary.files['index.json'].async('string')); const testDictionaryIndex = JSON.parse(await testDictionary.files['index.json'].async('string'));
@ -771,12 +849,61 @@ async function testDatabase2() {
} }
async function main() { async function testDatabase3() {
await testDatabase1(); const invalidDictionaries = [
await clearDatabase(); 'invalid-dictionary1',
'invalid-dictionary2',
'invalid-dictionary3',
'invalid-dictionary4',
'invalid-dictionary5',
'invalid-dictionary6'
];
await testDatabase2(); // Setup database
await clearDatabase(); const database = new Database();
await database.prepare();
for (const invalidDictionary of invalidDictionaries) {
const testDictionary = yomichanTest.createTestDictionaryArchive(invalidDictionary);
const testDictionarySource = await testDictionary.generateAsync({type: 'string'});
let error = null;
try {
await database.importDictionary(testDictionarySource, () => {}, {});
} catch (e) {
error = e;
}
if (error === null) {
assert.ok(false, `Expected an error while importing ${invalidDictionary}`);
} else {
const prefix = 'Dictionary has invalid data';
const message = error.message;
assert.ok(typeof message, 'string');
assert.ok(message.startsWith(prefix), `Expected error message to start with '${prefix}': ${message}`);
}
}
await database.close();
}
async function main() {
const clearTimeout = 5000;
try {
await testDatabase1();
await clearDatabase(clearTimeout);
await testDatabase2();
await clearDatabase(clearTimeout);
await testDatabase3();
await clearDatabase(clearTimeout);
} catch (e) {
console.log(e);
process.exit(-1);
throw e;
}
} }

View File

@ -3,9 +3,38 @@ const dictionaryValidate = require('./dictionary-validate');
async function main() { async function main() {
const archive = yomichanTest.createTestDictionaryArchive(); const dictionaries = [
{name: 'valid-dictionary1', valid: true},
{name: 'invalid-dictionary1', valid: false},
{name: 'invalid-dictionary2', valid: false},
{name: 'invalid-dictionary3', valid: false},
{name: 'invalid-dictionary4', valid: false},
{name: 'invalid-dictionary5', valid: false},
{name: 'invalid-dictionary6', valid: false}
];
const schemas = dictionaryValidate.getSchemas(); const schemas = dictionaryValidate.getSchemas();
await dictionaryValidate.validateDictionary(archive, schemas);
for (const {name, valid} of dictionaries) {
const archive = yomichanTest.createTestDictionaryArchive(name);
let error = null;
try {
await dictionaryValidate.validateDictionary(archive, schemas);
} catch (e) {
error = e;
}
if (valid) {
if (error !== null) {
throw error;
}
} else {
if (error === null) {
throw new Error(`Expected dictionary ${name} to be invalid`);
}
}
}
} }

View File

@ -26,21 +26,14 @@ function getJSZip() {
return JSZip; return JSZip;
} }
function createTestDictionaryArchive(dictionaryName) { function createTestDictionaryArchive(dictionary, dictionaryName) {
const fileNames = [ const dictionaryDirectory = path.join(__dirname, 'data', 'dictionaries', dictionary);
'index.json', const fileNames = fs.readdirSync(dictionaryDirectory);
'tag_bank_1.json',
'tag_bank_2.json',
'term_bank_1.json',
'kanji_bank_1.json',
'term_meta_bank_1.json',
'kanji_meta_bank_1.json'
];
const archive = new (getJSZip())(); const archive = new (getJSZip())();
for (const fileName of fileNames) { for (const fileName of fileNames) {
const source = fs.readFileSync(path.join(__dirname, 'test-dictionary-data', fileName), {encoding: 'utf8'}); const source = fs.readFileSync(path.join(dictionaryDirectory, fileName), {encoding: 'utf8'});
const json = JSON.parse(source); const json = JSON.parse(source);
if (fileName === 'index.json' && typeof dictionaryName === 'string') { if (fileName === 'index.json' && typeof dictionaryName === 'string') {
json.title = dictionaryName; json.title = dictionaryName;