Merge pull request #229 from toasted-nutbread/database-optimizations

Database optimizations
This commit is contained in:
Alex Yatskov 2019-10-05 09:18:09 -07:00 committed by GitHub
commit a369f8d0a4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 305 additions and 192 deletions

View File

@ -68,24 +68,39 @@ class Database {
const results = []; const results = [];
await this.db.terms.where('expression').equals(term).or('reading').equals(term).each(row => { await this.db.terms.where('expression').equals(term).or('reading').equals(term).each(row => {
if (titles.includes(row.dictionary)) { if (titles.includes(row.dictionary)) {
results.push({ results.push(Database.createTerm(row));
expression: row.expression,
reading: row.reading,
definitionTags: dictFieldSplit(row.definitionTags || row.tags || ''),
termTags: dictFieldSplit(row.termTags || ''),
rules: dictFieldSplit(row.rules),
glossary: row.glossary,
score: row.score,
dictionary: row.dictionary,
id: row.id,
sequence: typeof row.sequence === 'undefined' ? -1 : row.sequence
});
} }
}); });
return results; return results;
} }
async findTermsBulk(terms, titles) {
const promises = [];
const visited = {};
const results = [];
const createResult = Database.createTerm;
const filter = (row) => titles.includes(row.dictionary);
const db = this.db.backendDB();
const dbTransaction = db.transaction(['terms'], 'readonly');
const dbTerms = dbTransaction.objectStore('terms');
const dbIndex1 = dbTerms.index('expression');
const dbIndex2 = dbTerms.index('reading');
for (let i = 0; i < terms.length; ++i) {
const only = IDBKeyRange.only(terms[i]);
promises.push(
Database.getAll(dbIndex1, only, i, visited, filter, createResult, results),
Database.getAll(dbIndex2, only, i, visited, filter, createResult, results)
);
}
await Promise.all(promises);
return results;
}
async findTermsExact(term, reading, titles) { async findTermsExact(term, reading, titles) {
if (!this.db) { if (!this.db) {
throw 'Database not initialized'; throw 'Database not initialized';
@ -94,18 +109,7 @@ class Database {
const results = []; const results = [];
await this.db.terms.where('expression').equals(term).each(row => { await this.db.terms.where('expression').equals(term).each(row => {
if (row.reading === reading && titles.includes(row.dictionary)) { if (row.reading === reading && titles.includes(row.dictionary)) {
results.push({ results.push(Database.createTerm(row));
expression: row.expression,
reading: row.reading,
definitionTags: dictFieldSplit(row.definitionTags || row.tags || ''),
termTags: dictFieldSplit(row.termTags || ''),
rules: dictFieldSplit(row.rules),
glossary: row.glossary,
score: row.score,
dictionary: row.dictionary,
id: row.id,
sequence: typeof row.sequence === 'undefined' ? -1 : row.sequence
});
} }
}); });
@ -120,18 +124,7 @@ class Database {
const results = []; const results = [];
await this.db.terms.where('sequence').equals(sequence).each(row => { await this.db.terms.where('sequence').equals(sequence).each(row => {
if (row.dictionary === mainDictionary) { if (row.dictionary === mainDictionary) {
results.push({ results.push(Database.createTerm(row));
expression: row.expression,
reading: row.reading,
definitionTags: dictFieldSplit(row.definitionTags || row.tags || ''),
termTags: dictFieldSplit(row.termTags || ''),
rules: dictFieldSplit(row.rules),
glossary: row.glossary,
score: row.score,
dictionary: row.dictionary,
id: row.id,
sequence: typeof row.sequence === 'undefined' ? -1 : row.sequence
});
} }
}); });
@ -157,6 +150,28 @@ class Database {
return results; return results;
} }
async findTermMetaBulk(terms, titles) {
const promises = [];
const visited = {};
const results = [];
const createResult = Database.createTermMeta;
const filter = (row) => titles.includes(row.dictionary);
const db = this.db.backendDB();
const dbTransaction = db.transaction(['termMeta'], 'readonly');
const dbTerms = dbTransaction.objectStore('termMeta');
const dbIndex = dbTerms.index('expression');
for (let i = 0; i < terms.length; ++i) {
const only = IDBKeyRange.only(terms[i]);
promises.push(Database.getAll(dbIndex, only, i, visited, filter, createResult, results));
}
await Promise.all(promises);
return results;
}
async findKanji(kanji, titles) { async findKanji(kanji, titles) {
if (!this.db) { if (!this.db) {
throw 'Database not initialized'; throw 'Database not initialized';
@ -199,23 +214,30 @@ class Database {
return results; return results;
} }
findTagForTitleCached(name, title) {
if (this.tagCache.hasOwnProperty(title)) {
const cache = this.tagCache[title];
if (cache.hasOwnProperty(name)) {
return cache[name];
}
}
}
async findTagForTitle(name, title) { async findTagForTitle(name, title) {
if (!this.db) { if (!this.db) {
throw 'Database not initialized'; throw 'Database not initialized';
} }
this.tagCache[title] = this.tagCache[title] || {}; const cache = (this.tagCache.hasOwnProperty(title) ? this.tagCache[title] : (this.tagCache[title] = {}));
let result = this.tagCache[title][name]; let result = null;
if (!result) {
await this.db.tagMeta.where('name').equals(name).each(row => { await this.db.tagMeta.where('name').equals(name).each(row => {
if (title === row.dictionary) { if (title === row.dictionary) {
result = row; result = row;
} }
}); });
this.tagCache[title][name] = result; cache[name] = result;
}
return result; return result;
} }
@ -489,4 +511,70 @@ class Database {
return summary; return summary;
} }
static createTerm(row, index) {
return {
index,
expression: row.expression,
reading: row.reading,
definitionTags: dictFieldSplit(row.definitionTags || row.tags || ''),
termTags: dictFieldSplit(row.termTags || ''),
rules: dictFieldSplit(row.rules),
glossary: row.glossary,
score: row.score,
dictionary: row.dictionary,
id: row.id,
sequence: typeof row.sequence === 'undefined' ? -1 : row.sequence
};
}
static createTermMeta(row, index) {
return {
index,
mode: row.mode,
data: row.data,
dictionary: row.dictionary
};
}
static getAll(dbIndex, query, index, visited, filter, createResult, results) {
const fn = typeof dbIndex.getAll === 'function' ? Database.getAllFast : Database.getAllUsingCursor;
return fn(dbIndex, query, index, visited, filter, createResult, results);
}
static getAllFast(dbIndex, query, index, visited, filter, createResult, results) {
return new Promise((resolve, reject) => {
const request = dbIndex.getAll(query);
request.onerror = (e) => reject(e);
request.onsuccess = (e) => {
for (const row of e.target.result) {
if (filter(row, index) && !visited.hasOwnProperty(row.id)) {
visited[row.id] = true;
results.push(createResult(row, index));
}
}
resolve();
};
});
}
static getAllUsingCursor(dbIndex, query, index, visited, filter, createResult, results) {
return new Promise((resolve, reject) => {
const request = dbIndex.openCursor(query, 'next');
request.onerror = (e) => reject(e);
request.onsuccess = (e) => {
const cursor = e.target.result;
if (cursor) {
const row = cursor.value;
if (filter(row, index) && !visited.hasOwnProperty(row.id)) {
visited[row.id] = true;
results.push(createResult(row, index));
}
cursor.continue();
} else {
resolve();
}
};
});
}
} }

View File

@ -17,21 +17,27 @@
*/ */
class Deinflection { class Deinflector {
constructor(term, {rules=[], definitions=[], reason=''} = {}) { constructor(reasons) {
this.term = term; this.reasons = reasons;
this.rules = rules;
this.definitions = definitions;
this.reason = reason;
this.children = [];
} }
async deinflect(definer, reasons) { deinflect(source) {
for (const reason in reasons) { const results = [{
for (const variant of reasons[reason]) { source,
let accept = this.rules.length === 0; term: source,
rules: [],
definitions: [],
reasons: []
}];
for (let i = 0; i < results.length; ++i) {
const entry = results[i];
for (const reason in this.reasons) {
for (const variant of this.reasons[reason]) {
let accept = entry.rules.length === 0;
if (!accept) { if (!accept) {
for (const rule of this.rules) { for (const rule of entry.rules) {
if (variant.rulesIn.includes(rule)) { if (variant.rulesIn.includes(rule)) {
accept = true; accept = true;
break; break;
@ -39,81 +45,25 @@ class Deinflection {
} }
} }
if (!accept || !this.term.endsWith(variant.kanaIn)) { if (!accept || !entry.term.endsWith(variant.kanaIn)) {
continue; continue;
} }
const term = this.term.slice(0, -variant.kanaIn.length) + variant.kanaOut; const term = entry.term.slice(0, -variant.kanaIn.length) + variant.kanaOut;
if (term.length === 0) { if (term.length === 0) {
continue; continue;
} }
const child = new Deinflection(term, {reason, rules: variant.rulesOut}); results.push({
if (await child.deinflect(definer, reasons)) { source,
this.children.push(child); term,
rules: variant.rulesOut,
definitions: [],
reasons: [reason, ...entry.reasons]
});
} }
} }
} }
const definitions = await definer(this.term);
if (this.rules.length === 0) {
this.definitions = definitions;
} else {
for (const rule of this.rules) {
for (const definition of definitions) {
if (definition.rules.includes(rule)) {
this.definitions.push(definition);
}
}
}
}
if (this.definitions.length > 0 && this.children.length > 0) {
const child = new Deinflection(this.term, {rules: this.rules, definitions: this.definitions});
this.children.push(child);
}
return this.definitions.length > 0 || this.children.length > 0;
}
gather() {
if (this.children.length === 0) {
return [{
source: this.term,
rules: this.rules,
definitions: this.definitions,
reasons: this.reason.length > 0 ? [this.reason] : []
}];
}
const results = [];
for (const child of this.children) {
for (const result of child.gather()) {
if (this.reason.length > 0) {
result.reasons.push(this.reason);
}
result.source = this.term;
results.push(result);
}
}
return results; return results;
} }
} }
class Deinflector {
constructor(reasons) {
this.reasons = reasons;
}
async deinflect(term, definer) {
const node = new Deinflection(term);
if (await node.deinflect(definer, this.reasons)) {
return node.gather();
} else {
return [];
}
}
}

View File

@ -41,9 +41,7 @@ class Translator {
const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric); const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric);
const definitionsGrouped = dictTermsGroup(definitions, dictionaries); const definitionsGrouped = dictTermsGroup(definitions, dictionaries);
for (const definition of definitionsGrouped) { await this.buildTermFrequencies(definitionsGrouped, titles);
await this.buildTermFrequencies(definition, titles);
}
if (options.general.compactTags) { if (options.general.compactTags) {
for (const definition of definitionsGrouped) { for (const definition of definitionsGrouped) {
@ -147,9 +145,7 @@ class Translator {
definitionsMerged.push(groupedDefinition); definitionsMerged.push(groupedDefinition);
} }
for (const definition of definitionsMerged) { await this.buildTermFrequencies(definitionsMerged, titles);
await this.buildTermFrequencies(definition, titles);
}
if (options.general.compactTags) { if (options.general.compactTags) {
for (const definition of definitionsMerged) { for (const definition of definitionsMerged) {
@ -164,9 +160,7 @@ class Translator {
const titles = Object.keys(dictionaries); const titles = Object.keys(dictionaries);
const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric); const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric);
for (const definition of definitions) { await this.buildTermFrequencies(definitions, titles);
await this.buildTermFrequencies(definition, titles);
}
return {length, definitions}; return {length, definitions};
} }
@ -179,13 +173,9 @@ class Translator {
} }
} }
const cache = {};
const titles = Object.keys(dictionaries);
let deinflections = await this.findTermDeinflections(text, titles, cache);
const textHiragana = jpKatakanaToHiragana(text); const textHiragana = jpKatakanaToHiragana(text);
if (text !== textHiragana) { const titles = Object.keys(dictionaries);
deinflections.push(...await this.findTermDeinflections(textHiragana, titles, cache)); const deinflections = await this.findTermDeinflections(text, textHiragana, titles);
}
let definitions = []; let definitions = [];
for (const deinflection of deinflections) { for (const deinflection of deinflections) {
@ -221,19 +211,77 @@ class Translator {
return {length, definitions}; return {length, definitions};
} }
async findTermDeinflections(text, titles, cache) { async findTermDeinflections(text, text2, titles) {
const definer = async term => { const deinflections = (text === text2 ? this.getDeinflections(text) : this.getDeinflections2(text, text2));
if (cache.hasOwnProperty(term)) {
return cache[term]; if (deinflections.length === 0) {
} else { return [];
return cache[term] = await this.database.findTerms(term, titles); }
}
}; const uniqueDeinflectionTerms = [];
const uniqueDeinflectionArrays = [];
const uniqueDeinflectionsMap = {};
for (const deinflection of deinflections) {
const term = deinflection.term;
let deinflectionArray;
if (uniqueDeinflectionsMap.hasOwnProperty(term)) {
deinflectionArray = uniqueDeinflectionsMap[term];
} else {
deinflectionArray = [];
uniqueDeinflectionTerms.push(term);
uniqueDeinflectionArrays.push(deinflectionArray);
uniqueDeinflectionsMap[term] = deinflectionArray;
}
deinflectionArray.push(deinflection);
}
const definitions = await this.database.findTermsBulk(uniqueDeinflectionTerms, titles);
for (const definition of definitions) {
for (const deinflection of uniqueDeinflectionArrays[definition.index]) {
if (Translator.definitionContainsAnyRule(definition, deinflection.rules)) {
deinflection.definitions.push(definition);
}
}
}
return deinflections.filter(e => e.definitions.length > 0);
}
static definitionContainsAnyRule(definition, rules) {
if (rules.length === 0) {
return true;
}
const definitionRules = definition.rules;
for (const rule of rules) {
if (definitionRules.includes(rule)) {
return true;
}
}
return false;
}
getDeinflections(text) {
const deinflections = [];
let deinflections = [];
for (let i = text.length; i > 0; --i) { for (let i = text.length; i > 0; --i) {
const textSlice = text.slice(0, i); const textSlice = text.slice(0, i);
deinflections.push(...await this.deinflector.deinflect(textSlice, definer)); deinflections.push(...this.deinflector.deinflect(textSlice));
}
return deinflections;
}
getDeinflections2(text, text2) {
const deinflections = [];
for (let i = text.length; i > 0; --i) {
const textSlice = text.slice(0, i);
const text2Slice = text2.slice(0, i);
deinflections.push(...this.deinflector.deinflect(textSlice));
if (textSlice !== text2Slice) {
deinflections.push(...this.deinflector.deinflect(text2Slice));
}
} }
return deinflections; return deinflections;
@ -272,18 +320,46 @@ class Translator {
return definitions; return definitions;
} }
async buildTermFrequencies(definition, titles) { async buildTermFrequencies(definitions, titles) {
let terms = []; const terms = [];
for (const definition of definitions) {
if (definition.expressions) { if (definition.expressions) {
terms.push(...definition.expressions); terms.push(...definition.expressions);
} else { } else {
terms.push(definition); terms.push(definition);
} }
}
for (const term of terms) { if (terms.length === 0) {
return;
}
// Create mapping of unique terms
const expressionsUnique = [];
const termsUnique = [];
const termsUniqueMap = {};
for (let i = 0, ii = terms.length; i < ii; ++i) {
const term = terms[i];
const expression = term.expression;
term.frequencies = []; term.frequencies = [];
for (const meta of await this.database.findTermMeta(term.expression, titles)) {
if (meta.mode === 'freq') { if (termsUniqueMap.hasOwnProperty(expression)) {
termsUniqueMap[expression].push(term);
} else {
const termList = [term];
expressionsUnique.push(expression);
termsUnique.push(termList);
termsUniqueMap[expression] = termList;
}
}
const metas = await this.database.findTermMetaBulk(expressionsUnique, titles);
for (const meta of metas) {
if (meta.mode !== 'freq') {
continue;
}
for (const term of termsUnique[meta.index]) {
term.frequencies.push({ term.frequencies.push({
expression: meta.expression, expression: meta.expression,
frequency: meta.data, frequency: meta.data,
@ -292,20 +368,17 @@ class Translator {
} }
} }
} }
}
async expandTags(names, title) { async expandTags(names, title) {
const tags = []; const tags = [];
for (const name of names) { for (const name of names) {
const base = Translator.getNameBase(name); const base = Translator.getNameBase(name);
const meta = await this.database.findTagForTitle(base, title); let meta = this.database.findTagForTitleCached(base, title);
if (typeof meta === 'undefined') {
meta = await this.database.findTagForTitle(base, title);
}
const tag = {name}; const tag = Object.assign({}, meta !== null ? meta : {}, {name});
for (const prop in meta || {}) {
if (prop !== 'name') {
tag[prop] = meta[prop];
}
}
tags.push(dictTagSanitize(tag)); tags.push(dictTagSanitize(tag));
} }
@ -317,15 +390,17 @@ class Translator {
const stats = {}; const stats = {};
for (const name in items) { for (const name in items) {
const base = Translator.getNameBase(name); const base = Translator.getNameBase(name);
const meta = await this.database.findTagForTitle(base, title); let meta = this.database.findTagForTitleCached(base, title);
if (typeof meta === 'undefined') {
meta = await this.database.findTagForTitle(base, title);
if (meta === null) {
continue;
}
}
const group = stats[meta.category] = stats[meta.category] || []; const group = stats[meta.category] = stats[meta.category] || [];
const stat = {name, value: items[name]}; const stat = Object.assign({}, meta, {name, value: items[name]});
for (const prop in meta || {}) {
if (prop !== 'name') {
stat[prop] = meta[prop];
}
}
group.push(dictTagSanitize(stat)); group.push(dictTagSanitize(stat));
} }