Merge pull request #259 from toasted-nutbread/database-optimizations2

Database optimizations part 2
This commit is contained in:
Alex Yatskov 2019-10-20 11:04:17 -07:00 committed by GitHub
commit 6592323855
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 279 additions and 240 deletions

View File

@ -20,7 +20,6 @@
class Database { class Database {
constructor() { constructor() {
this.db = null; this.db = null;
this.tagCache = {};
} }
async prepare() { async prepare() {
@ -53,33 +52,20 @@ class Database {
this.db.close(); this.db.close();
await this.db.delete(); await this.db.delete();
this.db = null; this.db = null;
this.tagCache = {};
await this.prepare(); await this.prepare();
} }
async findTerms(term, titles) { async findTermsBulk(termList, titles) {
this.validate(); this.validate();
const results = [];
await this.db.terms.where('expression').equals(term).or('reading').equals(term).each(row => {
if (titles.includes(row.dictionary)) {
results.push(Database.createTerm(row));
}
});
return results;
}
async findTermsBulk(terms, titles) {
const promises = []; const promises = [];
const visited = {}; const visited = {};
const results = []; const results = [];
const createResult = Database.createTerm;
const processRow = (row, index) => { const processRow = (row, index) => {
if (titles.includes(row.dictionary) && !visited.hasOwnProperty(row.id)) { if (titles.includes(row.dictionary) && !visited.hasOwnProperty(row.id)) {
visited[row.id] = true; visited[row.id] = true;
results.push(createResult(row, index)); results.push(Database.createTerm(row, index));
} }
}; };
@ -89,8 +75,8 @@ class Database {
const dbIndex1 = dbTerms.index('expression'); const dbIndex1 = dbTerms.index('expression');
const dbIndex2 = dbTerms.index('reading'); const dbIndex2 = dbTerms.index('reading');
for (let i = 0; i < terms.length; ++i) { for (let i = 0; i < termList.length; ++i) {
const only = IDBKeyRange.only(terms[i]); const only = IDBKeyRange.only(termList[i]);
promises.push( promises.push(
Database.getAll(dbIndex1, only, i, processRow), Database.getAll(dbIndex1, only, i, processRow),
Database.getAll(dbIndex2, only, i, processRow) Database.getAll(dbIndex2, only, i, processRow)
@ -102,66 +88,24 @@ class Database {
return results; return results;
} }
async findTermsExact(term, reading, titles) { async findTermsExactBulk(termList, readingList, titles) {
this.validate(); this.validate();
const results = [];
await this.db.terms.where('expression').equals(term).each(row => {
if (row.reading === reading && titles.includes(row.dictionary)) {
results.push(Database.createTerm(row));
}
});
return results;
}
async findTermsBySequence(sequence, mainDictionary) {
this.validate();
const results = [];
await this.db.terms.where('sequence').equals(sequence).each(row => {
if (row.dictionary === mainDictionary) {
results.push(Database.createTerm(row));
}
});
return results;
}
async findTermMeta(term, titles) {
this.validate();
const results = [];
await this.db.termMeta.where('expression').equals(term).each(row => {
if (titles.includes(row.dictionary)) {
results.push({
mode: row.mode,
data: row.data,
dictionary: row.dictionary
});
}
});
return results;
}
async findTermMetaBulk(terms, titles) {
const promises = []; const promises = [];
const results = []; const results = [];
const createResult = Database.createTermMeta;
const processRow = (row, index) => { const processRow = (row, index) => {
if (titles.includes(row.dictionary)) { if (row.reading === readingList[index] && titles.includes(row.dictionary)) {
results.push(createResult(row, index)); results.push(Database.createTerm(row, index));
} }
}; };
const db = this.db.backendDB(); const db = this.db.backendDB();
const dbTransaction = db.transaction(['termMeta'], 'readonly'); const dbTransaction = db.transaction(['terms'], 'readonly');
const dbTerms = dbTransaction.objectStore('termMeta'); const dbTerms = dbTransaction.objectStore('terms');
const dbIndex = dbTerms.index('expression'); const dbIndex = dbTerms.index('expression');
for (let i = 0; i < terms.length; ++i) { for (let i = 0; i < termList.length; ++i) {
const only = IDBKeyRange.only(terms[i]); const only = IDBKeyRange.only(termList[i]);
promises.push(Database.getAll(dbIndex, only, i, processRow)); promises.push(Database.getAll(dbIndex, only, i, processRow));
} }
@ -170,67 +114,85 @@ class Database {
return results; return results;
} }
async findKanji(kanji, titles) { async findTermsBySequenceBulk(sequenceList, mainDictionary) {
this.validate(); this.validate();
const promises = [];
const results = []; const results = [];
await this.db.kanji.where('character').equals(kanji).each(row => { const processRow = (row, index) => {
if (titles.includes(row.dictionary)) { if (row.dictionary === mainDictionary) {
results.push({ results.push(Database.createTerm(row, index));
character: row.character,
onyomi: dictFieldSplit(row.onyomi),
kunyomi: dictFieldSplit(row.kunyomi),
tags: dictFieldSplit(row.tags),
glossary: row.meanings,
stats: row.stats,
dictionary: row.dictionary
});
} }
}); };
return results; const db = this.db.backendDB();
} const dbTransaction = db.transaction(['terms'], 'readonly');
const dbTerms = dbTransaction.objectStore('terms');
const dbIndex = dbTerms.index('sequence');
async findKanjiMeta(kanji, titles) { for (let i = 0; i < sequenceList.length; ++i) {
this.validate(); const only = IDBKeyRange.only(sequenceList[i]);
promises.push(Database.getAll(dbIndex, only, i, processRow));
const results = [];
await this.db.kanjiMeta.where('character').equals(kanji).each(row => {
if (titles.includes(row.dictionary)) {
results.push({
mode: row.mode,
data: row.data,
dictionary: row.dictionary
});
}
});
return results;
}
findTagForTitleCached(name, title) {
if (this.tagCache.hasOwnProperty(title)) {
const cache = this.tagCache[title];
if (cache.hasOwnProperty(name)) {
return cache[name];
}
} }
await Promise.all(promises);
return results;
}
async findTermMetaBulk(termList, titles) {
return this.findGenericBulk('termMeta', 'expression', termList, titles, Database.createMeta);
}
async findKanjiBulk(kanjiList, titles) {
return this.findGenericBulk('kanji', 'character', kanjiList, titles, Database.createKanji);
}
async findKanjiMetaBulk(kanjiList, titles) {
return this.findGenericBulk('kanjiMeta', 'character', kanjiList, titles, Database.createMeta);
}
async findGenericBulk(tableName, indexName, indexValueList, titles, createResult) {
this.validate();
const promises = [];
const results = [];
const processRow = (row, index) => {
if (titles.includes(row.dictionary)) {
results.push(createResult(row, index));
}
};
const db = this.db.backendDB();
const dbTransaction = db.transaction([tableName], 'readonly');
const dbTerms = dbTransaction.objectStore(tableName);
const dbIndex = dbTerms.index(indexName);
for (let i = 0; i < indexValueList.length; ++i) {
const only = IDBKeyRange.only(indexValueList[i]);
promises.push(Database.getAll(dbIndex, only, i, processRow));
}
await Promise.all(promises);
return results;
} }
async findTagForTitle(name, title) { async findTagForTitle(name, title) {
this.validate(); this.validate();
const cache = (this.tagCache.hasOwnProperty(title) ? this.tagCache[title] : (this.tagCache[title] = {}));
let result = null; let result = null;
await this.db.tagMeta.where('name').equals(name).each(row => { const db = this.db.backendDB();
const dbTransaction = db.transaction(['tagMeta'], 'readonly');
const dbTerms = dbTransaction.objectStore('tagMeta');
const dbIndex = dbTerms.index('name');
const only = IDBKeyRange.only(name);
await Database.getAll(dbIndex, only, null, row => {
if (title === row.dictionary) { if (title === row.dictionary) {
result = row; result = row;
} }
}); });
cache[name] = result;
return result; return result;
} }
@ -522,7 +484,20 @@ class Database {
}; };
} }
static createTermMeta(row, index) { static createKanji(row, index) {
return {
index,
character: row.character,
onyomi: dictFieldSplit(row.onyomi),
kunyomi: dictFieldSplit(row.kunyomi),
tags: dictFieldSplit(row.tags),
glossary: row.meanings,
stats: row.stats,
dictionary: row.dictionary
};
}
static createMeta(row, index) {
return { return {
index, index,
mode: row.mode, mode: row.mode,

View File

@ -21,6 +21,7 @@ class Translator {
constructor() { constructor() {
this.database = null; this.database = null;
this.deinflector = null; this.deinflector = null;
this.tagCache = {};
} }
async prepare() { async prepare() {
@ -36,6 +37,11 @@ class Translator {
} }
} }
async purgeDatabase() {
this.tagCache = {};
await this.database.purge();
}
async findTermsGrouped(text, dictionaries, alphanumeric, options) { async findTermsGrouped(text, dictionaries, alphanumeric, options) {
const titles = Object.keys(dictionaries); const titles = Object.keys(dictionaries);
const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric); const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric);
@ -52,94 +58,121 @@ class Translator {
return {length, definitions: definitionsGrouped}; return {length, definitions: definitionsGrouped};
} }
async getSequencedDefinitions(definitions, mainDictionary) {
const definitionsBySequence = dictTermsMergeBySequence(definitions, mainDictionary);
const defaultDefinitions = definitionsBySequence['-1'];
const sequenceList = Object.keys(definitionsBySequence).map(v => Number(v)).filter(v => v >= 0);
const sequencedDefinitions = sequenceList.map((key) => ({
definitions: definitionsBySequence[key],
rawDefinitions: []
}));
for (const definition of await this.database.findTermsBySequenceBulk(sequenceList, mainDictionary)) {
sequencedDefinitions[definition.index].rawDefinitions.push(definition);
}
return {sequencedDefinitions, defaultDefinitions};
}
async getMergedSecondarySearchResults(text, expressionsMap, secondarySearchTitles) {
if (secondarySearchTitles.length === 0) {
return [];
}
const expressionList = [];
const readingList = [];
for (const expression of expressionsMap.keys()) {
if (expression === text) { continue; }
for (const reading of expressionsMap.get(expression).keys()) {
expressionList.push(expression);
readingList.push(reading);
}
}
const definitions = await this.database.findTermsExactBulk(expressionList, readingList, secondarySearchTitles);
for (const definition of definitions) {
const definitionTags = await this.expandTags(definition.definitionTags, definition.dictionary);
definitionTags.push(dictTagBuildSource(definition.dictionary));
definition.definitionTags = definitionTags;
const termTags = await this.expandTags(definition.termTags, definition.dictionary);
definition.termTags = termTags;
}
if (definitions.length > 1) {
definitions.sort((a, b) => a.index - b.index);
}
return definitions;
}
async getMergedDefinition(text, dictionaries, sequencedDefinition, defaultDefinitions, secondarySearchTitles, mergedByTermIndices) {
const result = sequencedDefinition.definitions;
const rawDefinitionsBySequence = sequencedDefinition.rawDefinitions;
for (const definition of rawDefinitionsBySequence) {
const definitionTags = await this.expandTags(definition.definitionTags, definition.dictionary);
definitionTags.push(dictTagBuildSource(definition.dictionary));
definition.definitionTags = definitionTags;
const termTags = await this.expandTags(definition.termTags, definition.dictionary);
definition.termTags = termTags;
}
const definitionsByGloss = dictTermsMergeByGloss(result, rawDefinitionsBySequence);
const secondarySearchResults = await this.getMergedSecondarySearchResults(text, result.expressions, secondarySearchTitles);
dictTermsMergeByGloss(result, defaultDefinitions.concat(secondarySearchResults), definitionsByGloss, mergedByTermIndices);
for (const gloss in definitionsByGloss) {
const definition = definitionsByGloss[gloss];
dictTagsSort(definition.definitionTags);
result.definitions.push(definition);
}
dictTermsSort(result.definitions, dictionaries);
const expressions = [];
for (const expression of result.expressions.keys()) {
for (const reading of result.expressions.get(expression).keys()) {
const termTags = result.expressions.get(expression).get(reading);
const score = termTags.map(tag => tag.score).reduce((p, v) => p + v, 0);
expressions.push({
expression: expression,
reading: reading,
termTags: dictTagsSort(termTags),
termFrequency: Translator.scoreToTermFrequency(score)
});
}
}
result.expressions = expressions;
result.expression = Array.from(result.expression);
result.reading = Array.from(result.reading);
return result;
}
async findTermsMerged(text, dictionaries, alphanumeric, options) { async findTermsMerged(text, dictionaries, alphanumeric, options) {
const secondarySearchTitles = Object.keys(options.dictionaries).filter(dict => options.dictionaries[dict].allowSecondarySearches); const secondarySearchTitles = Object.keys(options.dictionaries).filter(dict => options.dictionaries[dict].allowSecondarySearches);
const titles = Object.keys(dictionaries); const titles = Object.keys(dictionaries);
const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric); const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric);
const {sequencedDefinitions, defaultDefinitions} = await this.getSequencedDefinitions(definitions, options.general.mainDictionary);
const definitionsBySequence = dictTermsMergeBySequence(definitions, options.general.mainDictionary);
const definitionsMerged = []; const definitionsMerged = [];
const mergedByTermIndices = new Set(); const mergedByTermIndices = new Set();
for (const sequence in definitionsBySequence) {
if (sequence < 0) {
continue;
}
const result = definitionsBySequence[sequence];
const rawDefinitionsBySequence = await this.database.findTermsBySequence(Number(sequence), options.general.mainDictionary);
for (const definition of rawDefinitionsBySequence) {
const definitionTags = await this.expandTags(definition.definitionTags, definition.dictionary);
definitionTags.push(dictTagBuildSource(definition.dictionary));
definition.definitionTags = definitionTags;
const termTags = await this.expandTags(definition.termTags, definition.dictionary);
definition.termTags = termTags;
}
const definitionsByGloss = dictTermsMergeByGloss(result, rawDefinitionsBySequence);
const secondarySearchResults = [];
if (secondarySearchTitles.length > 0) {
for (const expression of result.expressions.keys()) {
if (expression === text) {
continue;
}
for (const reading of result.expressions.get(expression).keys()) {
for (const definition of await this.database.findTermsExact(expression, reading, secondarySearchTitles)) {
const definitionTags = await this.expandTags(definition.definitionTags, definition.dictionary);
definitionTags.push(dictTagBuildSource(definition.dictionary));
definition.definitionTags = definitionTags;
const termTags = await this.expandTags(definition.termTags, definition.dictionary);
definition.termTags = termTags;
secondarySearchResults.push(definition);
}
}
}
}
dictTermsMergeByGloss(result, definitionsBySequence['-1'].concat(secondarySearchResults), definitionsByGloss, mergedByTermIndices);
for (const gloss in definitionsByGloss) {
const definition = definitionsByGloss[gloss];
dictTagsSort(definition.definitionTags);
result.definitions.push(definition);
}
dictTermsSort(result.definitions, dictionaries);
const expressions = [];
for (const expression of result.expressions.keys()) {
for (const reading of result.expressions.get(expression).keys()) {
const termTags = result.expressions.get(expression).get(reading);
expressions.push({
expression: expression,
reading: reading,
termTags: dictTagsSort(termTags),
termFrequency: (score => {
if (score > 0) {
return 'popular';
} else if (score < 0) {
return 'rare';
} else {
return 'normal';
}
})(termTags.map(tag => tag.score).reduce((p, v) => p + v, 0))
});
}
}
result.expressions = expressions;
result.expression = Array.from(result.expression);
result.reading = Array.from(result.reading);
for (const sequencedDefinition of sequencedDefinitions) {
const result = await this.getMergedDefinition(
text,
dictionaries,
sequencedDefinition,
defaultDefinitions,
secondarySearchTitles,
mergedByTermIndices
);
definitionsMerged.push(result); definitionsMerged.push(result);
} }
const strayDefinitions = definitionsBySequence['-1'].filter((definition, index) => !mergedByTermIndices.has(index)); const strayDefinitions = defaultDefinitions.filter((definition, index) => !mergedByTermIndices.has(index));
for (const groupedDefinition of dictTermsGroup(strayDefinitions, dictionaries)) { for (const groupedDefinition of dictTermsGroup(strayDefinitions, dictionaries)) {
groupedDefinition.expressions = [{expression: groupedDefinition.expression, reading: groupedDefinition.reading}]; groupedDefinition.expressions = [{expression: groupedDefinition.expression, reading: groupedDefinition.reading}];
definitionsMerged.push(groupedDefinition); definitionsMerged.push(groupedDefinition);
@ -277,33 +310,44 @@ class Translator {
} }
async findKanji(text, dictionaries) { async findKanji(text, dictionaries) {
let definitions = [];
const processed = {};
const titles = Object.keys(dictionaries); const titles = Object.keys(dictionaries);
const kanjiUnique = {};
const kanjiList = [];
for (const c of text) { for (const c of text) {
if (!processed[c]) { if (!kanjiUnique.hasOwnProperty(c)) {
definitions.push(...await this.database.findKanji(c, titles)); kanjiList.push(c);
processed[c] = true; kanjiUnique[c] = true;
} }
} }
const definitions = await this.database.findKanjiBulk(kanjiList, titles);
if (definitions.length === 0) {
return definitions;
}
if (definitions.length > 1) {
definitions.sort((a, b) => a.index - b.index);
}
const kanjiList2 = [];
for (const definition of definitions) { for (const definition of definitions) {
kanjiList2.push(definition.character);
const tags = await this.expandTags(definition.tags, definition.dictionary); const tags = await this.expandTags(definition.tags, definition.dictionary);
tags.push(dictTagBuildSource(definition.dictionary)); tags.push(dictTagBuildSource(definition.dictionary));
definition.tags = dictTagsSort(tags); definition.tags = dictTagsSort(tags);
definition.stats = await this.expandStats(definition.stats, definition.dictionary); definition.stats = await this.expandStats(definition.stats, definition.dictionary);
definition.frequencies = []; definition.frequencies = [];
for (const meta of await this.database.findKanjiMeta(definition.character, titles)) { }
if (meta.mode === 'freq') {
definition.frequencies.push({ for (const meta of await this.database.findKanjiMetaBulk(kanjiList2, titles)) {
character: meta.character, if (meta.mode !== 'freq') { continue; }
frequency: meta.data, definitions[meta.index].frequencies.push({
dictionary: meta.dictionary character: meta.character,
}); frequency: meta.data,
} dictionary: meta.dictionary
} });
} }
return definitions; return definitions;
@ -359,56 +403,76 @@ class Translator {
} }
async expandTags(names, title) { async expandTags(names, title) {
const tags = []; const tagMetaList = await this.getTagMetaList(names, title);
for (const name of names) { return tagMetaList.map((meta, index) => {
const base = Translator.getNameBase(name); const name = names[index];
let meta = this.database.findTagForTitleCached(base, title); const tag = dictTagSanitize(Object.assign({}, meta !== null ? meta : {}, {name}));
if (typeof meta === 'undefined') { return dictTagSanitize(tag);
meta = await this.database.findTagForTitle(base, title); });
}
const tag = Object.assign({}, meta !== null ? meta : {}, {name});
tags.push(dictTagSanitize(tag));
}
return tags;
} }
async expandStats(items, title) { async expandStats(items, title) {
const stats = {}; const names = Object.keys(items);
for (const name in items) { const tagMetaList = await this.getTagMetaList(names, title);
const base = Translator.getNameBase(name);
let meta = this.database.findTagForTitleCached(base, title);
if (typeof meta === 'undefined') {
meta = await this.database.findTagForTitle(base, title);
if (meta === null) {
continue;
}
}
const group = stats[meta.category] = stats[meta.category] || []; const stats = {};
for (let i = 0; i < names.length; ++i) {
const name = names[i];
const meta = tagMetaList[i];
if (meta === null) { continue; }
const category = meta.category;
const group = (
stats.hasOwnProperty(category) ?
stats[category] :
(stats[category] = [])
);
const stat = Object.assign({}, meta, {name, value: items[name]}); const stat = Object.assign({}, meta, {name, value: items[name]});
group.push(dictTagSanitize(stat)); group.push(dictTagSanitize(stat));
} }
const sortCompare = (a, b) => a.notes - b.notes;
for (const category in stats) { for (const category in stats) {
stats[category].sort((a, b) => { stats[category].sort(sortCompare);
if (a.notes < b.notes) {
return -1;
} else if (a.notes > b.notes) {
return 1;
} else {
return 0;
}
});
} }
return stats; return stats;
} }
async getTagMetaList(names, title) {
const tagMetaList = [];
const cache = (
this.tagCache.hasOwnProperty(title) ?
this.tagCache[title] :
(this.tagCache[title] = {})
);
for (const name of names) {
const base = Translator.getNameBase(name);
if (cache.hasOwnProperty(base)) {
tagMetaList.push(cache[base]);
} else {
const tagMeta = await this.database.findTagForTitle(base, title);
cache[base] = tagMeta;
tagMetaList.push(tagMeta);
}
}
return tagMetaList;
}
static scoreToTermFrequency(score) {
if (score > 0) {
return 'popular';
} else if (score < 0) {
return 'rare';
} else {
return 'normal';
}
}
static getNameBase(name) { static getNameBase(name) {
const pos = name.indexOf(':'); const pos = name.indexOf(':');
return (pos >= 0 ? name.substr(0, pos) : name); return (pos >= 0 ? name.substr(0, pos) : name);

View File

@ -89,7 +89,7 @@ function utilAnkiGetModelFieldNames(modelName) {
} }
function utilDatabasePurge() { function utilDatabasePurge() {
return utilBackend().translator.database.purge(); return utilBackend().translator.purgeDatabase();
} }
async function utilDatabaseImport(data, progress, exceptions) { async function utilDatabaseImport(data, progress, exceptions) {