Use bulk database searches
This commit is contained in:
parent
664a318d7f
commit
1286b5115f
@ -75,6 +75,32 @@ class Database {
|
|||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async findTermsBulk(terms, titles) {
|
||||||
|
const promises = [];
|
||||||
|
const visited = {};
|
||||||
|
const results = [];
|
||||||
|
const createResult = Database.createTerm;
|
||||||
|
const filter = (row) => titles.includes(row.dictionary);
|
||||||
|
|
||||||
|
const db = this.db.backendDB();
|
||||||
|
const dbTransaction = db.transaction(['terms'], 'readonly');
|
||||||
|
const dbTerms = dbTransaction.objectStore('terms');
|
||||||
|
const dbIndex1 = dbTerms.index('expression');
|
||||||
|
const dbIndex2 = dbTerms.index('reading');
|
||||||
|
|
||||||
|
for (let i = 0; i < terms.length; ++i) {
|
||||||
|
const only = IDBKeyRange.only(terms[i]);
|
||||||
|
promises.push(
|
||||||
|
Database.getAll(dbIndex1, only, i, visited, filter, createResult, results),
|
||||||
|
Database.getAll(dbIndex2, only, i, visited, filter, createResult, results)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
await Promise.all(promises);
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
async findTermsExact(term, reading, titles) {
|
async findTermsExact(term, reading, titles) {
|
||||||
if (!this.db) {
|
if (!this.db) {
|
||||||
throw 'Database not initialized';
|
throw 'Database not initialized';
|
||||||
@ -124,6 +150,28 @@ class Database {
|
|||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async findTermMetaBulk(terms, titles) {
|
||||||
|
const promises = [];
|
||||||
|
const visited = {};
|
||||||
|
const results = [];
|
||||||
|
const createResult = Database.createTermMeta;
|
||||||
|
const filter = (row) => titles.includes(row.dictionary);
|
||||||
|
|
||||||
|
const db = this.db.backendDB();
|
||||||
|
const dbTransaction = db.transaction(['termMeta'], 'readonly');
|
||||||
|
const dbTerms = dbTransaction.objectStore('termMeta');
|
||||||
|
const dbIndex = dbTerms.index('expression');
|
||||||
|
|
||||||
|
for (let i = 0; i < terms.length; ++i) {
|
||||||
|
const only = IDBKeyRange.only(terms[i]);
|
||||||
|
promises.push(Database.getAll(dbIndex, only, i, visited, filter, createResult, results));
|
||||||
|
}
|
||||||
|
|
||||||
|
await Promise.all(promises);
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
async findKanji(kanji, titles) {
|
async findKanji(kanji, titles) {
|
||||||
if (!this.db) {
|
if (!this.db) {
|
||||||
throw 'Database not initialized';
|
throw 'Database not initialized';
|
||||||
@ -464,8 +512,9 @@ class Database {
|
|||||||
return summary;
|
return summary;
|
||||||
}
|
}
|
||||||
|
|
||||||
static createTerm(row) {
|
static createTerm(row, index) {
|
||||||
return {
|
return {
|
||||||
|
index,
|
||||||
expression: row.expression,
|
expression: row.expression,
|
||||||
reading: row.reading,
|
reading: row.reading,
|
||||||
definitionTags: dictFieldSplit(row.definitionTags || row.tags || ''),
|
definitionTags: dictFieldSplit(row.definitionTags || row.tags || ''),
|
||||||
@ -478,4 +527,54 @@ class Database {
|
|||||||
sequence: typeof row.sequence === 'undefined' ? -1 : row.sequence
|
sequence: typeof row.sequence === 'undefined' ? -1 : row.sequence
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static createTermMeta(row, index) {
|
||||||
|
return {
|
||||||
|
index,
|
||||||
|
mode: row.mode,
|
||||||
|
data: row.data,
|
||||||
|
dictionary: row.dictionary
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
static getAll(dbIndex, query, index, visited, filter, createResult, results) {
|
||||||
|
const fn = typeof dbIndex.getAll === 'function' ? Database.getAllFast : Database.getAllUsingCursor;
|
||||||
|
return fn(dbIndex, query, index, visited, filter, createResult, results);
|
||||||
|
}
|
||||||
|
|
||||||
|
static getAllFast(dbIndex, query, index, visited, filter, createResult, results) {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const request = dbIndex.getAll(query);
|
||||||
|
request.onerror = (e) => reject(e);
|
||||||
|
request.onsuccess = (e) => {
|
||||||
|
for (const row of e.target.result) {
|
||||||
|
if (filter(row, index) && !visited.hasOwnProperty(row.id)) {
|
||||||
|
visited[row.id] = true;
|
||||||
|
results.push(createResult(row, index));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
resolve();
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
static getAllUsingCursor(dbIndex, query, index, visited, filter, createResult, results) {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const request = dbIndex.openCursor(query, 'next');
|
||||||
|
request.onerror = (e) => reject(e);
|
||||||
|
request.onsuccess = (e) => {
|
||||||
|
const cursor = e.target.result;
|
||||||
|
if (cursor) {
|
||||||
|
const row = cursor.value;
|
||||||
|
if (filter(row, index) && !visited.hasOwnProperty(row.id)) {
|
||||||
|
visited[row.id] = true;
|
||||||
|
results.push(createResult(row, index));
|
||||||
|
}
|
||||||
|
cursor.continue();
|
||||||
|
} else {
|
||||||
|
resolve();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -17,21 +17,27 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
class Deinflection {
|
class Deinflector {
|
||||||
constructor(term, {rules=[], definitions=[], reason=''} = {}) {
|
constructor(reasons) {
|
||||||
this.term = term;
|
this.reasons = reasons;
|
||||||
this.rules = rules;
|
|
||||||
this.definitions = definitions;
|
|
||||||
this.reason = reason;
|
|
||||||
this.children = [];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async deinflect(definer, reasons) {
|
deinflect(source) {
|
||||||
for (const reason in reasons) {
|
const results = [{
|
||||||
for (const variant of reasons[reason]) {
|
source,
|
||||||
let accept = this.rules.length === 0;
|
term: source,
|
||||||
|
rules: [],
|
||||||
|
definitions: [],
|
||||||
|
reasons: []
|
||||||
|
}];
|
||||||
|
for (let i = 0; i < results.length; ++i) {
|
||||||
|
const entry = results[i];
|
||||||
|
|
||||||
|
for (const reason in this.reasons) {
|
||||||
|
for (const variant of this.reasons[reason]) {
|
||||||
|
let accept = entry.rules.length === 0;
|
||||||
if (!accept) {
|
if (!accept) {
|
||||||
for (const rule of this.rules) {
|
for (const rule of entry.rules) {
|
||||||
if (variant.rulesIn.includes(rule)) {
|
if (variant.rulesIn.includes(rule)) {
|
||||||
accept = true;
|
accept = true;
|
||||||
break;
|
break;
|
||||||
@ -39,81 +45,25 @@ class Deinflection {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!accept || !this.term.endsWith(variant.kanaIn)) {
|
if (!accept || !entry.term.endsWith(variant.kanaIn)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const term = this.term.slice(0, -variant.kanaIn.length) + variant.kanaOut;
|
const term = entry.term.slice(0, -variant.kanaIn.length) + variant.kanaOut;
|
||||||
if (term.length === 0) {
|
if (term.length === 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const child = new Deinflection(term, {reason, rules: variant.rulesOut});
|
results.push({
|
||||||
if (await child.deinflect(definer, reasons)) {
|
source,
|
||||||
this.children.push(child);
|
term,
|
||||||
|
rules: variant.rulesOut,
|
||||||
|
definitions: [],
|
||||||
|
reasons: [reason, ...entry.reasons]
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const definitions = await definer(this.term);
|
|
||||||
if (this.rules.length === 0) {
|
|
||||||
this.definitions = definitions;
|
|
||||||
} else {
|
|
||||||
for (const rule of this.rules) {
|
|
||||||
for (const definition of definitions) {
|
|
||||||
if (definition.rules.includes(rule)) {
|
|
||||||
this.definitions.push(definition);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (this.definitions.length > 0 && this.children.length > 0) {
|
|
||||||
const child = new Deinflection(this.term, {rules: this.rules, definitions: this.definitions});
|
|
||||||
this.children.push(child);
|
|
||||||
}
|
|
||||||
|
|
||||||
return this.definitions.length > 0 || this.children.length > 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
gather() {
|
|
||||||
if (this.children.length === 0) {
|
|
||||||
return [{
|
|
||||||
source: this.term,
|
|
||||||
rules: this.rules,
|
|
||||||
definitions: this.definitions,
|
|
||||||
reasons: this.reason.length > 0 ? [this.reason] : []
|
|
||||||
}];
|
|
||||||
}
|
|
||||||
|
|
||||||
const results = [];
|
|
||||||
for (const child of this.children) {
|
|
||||||
for (const result of child.gather()) {
|
|
||||||
if (this.reason.length > 0) {
|
|
||||||
result.reasons.push(this.reason);
|
|
||||||
}
|
|
||||||
|
|
||||||
result.source = this.term;
|
|
||||||
results.push(result);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class Deinflector {
|
|
||||||
constructor(reasons) {
|
|
||||||
this.reasons = reasons;
|
|
||||||
}
|
|
||||||
|
|
||||||
async deinflect(term, definer) {
|
|
||||||
const node = new Deinflection(term);
|
|
||||||
if (await node.deinflect(definer, this.reasons)) {
|
|
||||||
return node.gather();
|
|
||||||
} else {
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -41,9 +41,7 @@ class Translator {
|
|||||||
const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric);
|
const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric);
|
||||||
|
|
||||||
const definitionsGrouped = dictTermsGroup(definitions, dictionaries);
|
const definitionsGrouped = dictTermsGroup(definitions, dictionaries);
|
||||||
for (const definition of definitionsGrouped) {
|
await this.buildTermFrequencies(definitionsGrouped, titles);
|
||||||
await this.buildTermFrequencies(definition, titles);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (options.general.compactTags) {
|
if (options.general.compactTags) {
|
||||||
for (const definition of definitionsGrouped) {
|
for (const definition of definitionsGrouped) {
|
||||||
@ -147,9 +145,7 @@ class Translator {
|
|||||||
definitionsMerged.push(groupedDefinition);
|
definitionsMerged.push(groupedDefinition);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const definition of definitionsMerged) {
|
await this.buildTermFrequencies(definitionsMerged, titles);
|
||||||
await this.buildTermFrequencies(definition, titles);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (options.general.compactTags) {
|
if (options.general.compactTags) {
|
||||||
for (const definition of definitionsMerged) {
|
for (const definition of definitionsMerged) {
|
||||||
@ -164,9 +160,7 @@ class Translator {
|
|||||||
const titles = Object.keys(dictionaries);
|
const titles = Object.keys(dictionaries);
|
||||||
const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric);
|
const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric);
|
||||||
|
|
||||||
for (const definition of definitions) {
|
await this.buildTermFrequencies(definitions, titles);
|
||||||
await this.buildTermFrequencies(definition, titles);
|
|
||||||
}
|
|
||||||
|
|
||||||
return {length, definitions};
|
return {length, definitions};
|
||||||
}
|
}
|
||||||
@ -179,13 +173,9 @@ class Translator {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const cache = {};
|
|
||||||
const titles = Object.keys(dictionaries);
|
|
||||||
let deinflections = await this.findTermDeinflections(text, titles, cache);
|
|
||||||
const textHiragana = jpKatakanaToHiragana(text);
|
const textHiragana = jpKatakanaToHiragana(text);
|
||||||
if (text !== textHiragana) {
|
const titles = Object.keys(dictionaries);
|
||||||
deinflections.push(...await this.findTermDeinflections(textHiragana, titles, cache));
|
const deinflections = await this.findTermDeinflections(text, textHiragana, titles);
|
||||||
}
|
|
||||||
|
|
||||||
let definitions = [];
|
let definitions = [];
|
||||||
for (const deinflection of deinflections) {
|
for (const deinflection of deinflections) {
|
||||||
@ -221,24 +211,60 @@ class Translator {
|
|||||||
return {length, definitions};
|
return {length, definitions};
|
||||||
}
|
}
|
||||||
|
|
||||||
async findTermDeinflections(text, titles, cache) {
|
async findTermDeinflections(text, text2, titles) {
|
||||||
const definer = async term => {
|
const deinflections = (text === text2 ? this.getDeinflections(text) : this.getDeinflections2(text, text2));
|
||||||
if (cache.hasOwnProperty(term)) {
|
|
||||||
return cache[term];
|
if (deinflections.length === 0) {
|
||||||
} else {
|
return [];
|
||||||
return cache[term] = await this.database.findTerms(term, titles);
|
}
|
||||||
}
|
|
||||||
};
|
const definitions = await this.database.findTermsBulk(deinflections.map(e => e.term), titles);
|
||||||
|
|
||||||
|
for (const d of definitions) {
|
||||||
|
deinflections[d.index].definitions.push(d);
|
||||||
|
}
|
||||||
|
|
||||||
|
return deinflections.filter(e => e.definitions.length > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
getDeinflections(text) {
|
||||||
|
const deinflections = [];
|
||||||
|
const deinflectionsKeys = {};
|
||||||
|
|
||||||
let deinflections = [];
|
|
||||||
for (let i = text.length; i > 0; --i) {
|
for (let i = text.length; i > 0; --i) {
|
||||||
const textSlice = text.slice(0, i);
|
const textSlice = text.slice(0, i);
|
||||||
deinflections.push(...await this.deinflector.deinflect(textSlice, definer));
|
Translator.addUniqueDeinflections(this.deinflector.deinflect(textSlice), deinflections, deinflectionsKeys);
|
||||||
}
|
}
|
||||||
|
|
||||||
return deinflections;
|
return deinflections;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
getDeinflections2(text, text2) {
|
||||||
|
const deinflections = [];
|
||||||
|
const deinflectionsKeys = {};
|
||||||
|
|
||||||
|
for (let i = text.length; i > 0; --i) {
|
||||||
|
const textSlice = text.slice(0, i);
|
||||||
|
const text2Slice = text2.slice(0, i);
|
||||||
|
Translator.addUniqueDeinflections(this.deinflector.deinflect(textSlice), deinflections, deinflectionsKeys);
|
||||||
|
if (textSlice !== text2Slice) {
|
||||||
|
Translator.addUniqueDeinflections(this.deinflector.deinflect(text2Slice), deinflections, deinflectionsKeys);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return deinflections;
|
||||||
|
}
|
||||||
|
|
||||||
|
static addUniqueDeinflections(newValues, deinflections, deinflectionsKeys) {
|
||||||
|
for (const value of newValues) {
|
||||||
|
const key = value.term;
|
||||||
|
if (!deinflectionsKeys.hasOwnProperty(key)) {
|
||||||
|
deinflections.push(value);
|
||||||
|
deinflectionsKeys[key] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async findKanji(text, dictionaries) {
|
async findKanji(text, dictionaries) {
|
||||||
let definitions = [];
|
let definitions = [];
|
||||||
const processed = {};
|
const processed = {};
|
||||||
@ -272,18 +298,46 @@ class Translator {
|
|||||||
return definitions;
|
return definitions;
|
||||||
}
|
}
|
||||||
|
|
||||||
async buildTermFrequencies(definition, titles) {
|
async buildTermFrequencies(definitions, titles) {
|
||||||
let terms = [];
|
const terms = [];
|
||||||
|
for (const definition of definitions) {
|
||||||
if (definition.expressions) {
|
if (definition.expressions) {
|
||||||
terms.push(...definition.expressions);
|
terms.push(...definition.expressions);
|
||||||
} else {
|
} else {
|
||||||
terms.push(definition);
|
terms.push(definition);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for (const term of terms) {
|
if (terms.length === 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create mapping of unique terms
|
||||||
|
const expressionsUnique = [];
|
||||||
|
const termsUnique = [];
|
||||||
|
const termsUniqueMap = {};
|
||||||
|
for (let i = 0, ii = terms.length; i < ii; ++i) {
|
||||||
|
const term = terms[i];
|
||||||
|
const expression = term.expression;
|
||||||
term.frequencies = [];
|
term.frequencies = [];
|
||||||
for (const meta of await this.database.findTermMeta(term.expression, titles)) {
|
|
||||||
if (meta.mode === 'freq') {
|
if (termsUniqueMap.hasOwnProperty(expression)) {
|
||||||
|
termsUniqueMap[expression].push(term);
|
||||||
|
} else {
|
||||||
|
const termList = [term];
|
||||||
|
expressionsUnique.push(expression);
|
||||||
|
termsUnique.push(termList);
|
||||||
|
termsUniqueMap[expression] = termList;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const metas = await this.database.findTermMetaBulk(expressionsUnique, titles);
|
||||||
|
for (const meta of metas) {
|
||||||
|
if (meta.mode !== 'freq') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const term of termsUnique[meta.index]) {
|
||||||
term.frequencies.push({
|
term.frequencies.push({
|
||||||
expression: meta.expression,
|
expression: meta.expression,
|
||||||
frequency: meta.data,
|
frequency: meta.data,
|
||||||
@ -292,7 +346,6 @@ class Translator {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
async expandTags(names, title) {
|
async expandTags(names, title) {
|
||||||
const tags = [];
|
const tags = [];
|
||||||
|
Loading…
Reference in New Issue
Block a user