Merge pull request #274 from toasted-nutbread/lookup-wildcards

Lookup wildcards
This commit is contained in:
Alex Yatskov 2019-11-13 18:51:50 -08:00 committed by GitHub
commit b1659522b3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 103 additions and 90 deletions

View File

@ -72,33 +72,18 @@ async function apiOptionsSave(source) {
backend.onOptionsUpdated(source); backend.onOptionsUpdated(source);
} }
async function apiTermsFind(text, optionsContext) { async function apiTermsFind(text, details, optionsContext) {
const options = await apiOptionsGet(optionsContext); const options = await apiOptionsGet(optionsContext);
const translator = utilBackend().translator; const [definitions, length] = await utilBackend().translator.findTerms(text, details, options);
definitions.splice(options.general.maxResults);
const searcher = { return {length, definitions};
'merge': translator.findTermsMerged,
'split': translator.findTermsSplit,
'group': translator.findTermsGrouped
}[options.general.resultOutputMode].bind(translator);
const {definitions, length} = await searcher(
text,
dictEnabledSet(options),
options.scanning.alphanumeric,
options
);
return {
length,
definitions: definitions.slice(0, options.general.maxResults)
};
} }
async function apiKanjiFind(text, optionsContext) { async function apiKanjiFind(text, optionsContext) {
const options = await apiOptionsGet(optionsContext); const options = await apiOptionsGet(optionsContext);
const definitions = await utilBackend().translator.findKanji(text, dictEnabledSet(options)); const definitions = await utilBackend().translator.findKanji(text, options);
return definitions.slice(0, options.general.maxResults); definitions.splice(options.general.maxResults);
return definitions;
} }
async function apiDefinitionAdd(definition, mode, context, optionsContext) { async function apiDefinitionAdd(definition, mode, context, optionsContext) {

View File

@ -179,7 +179,7 @@ Backend.messageHandlers = {
optionsGet: ({optionsContext}) => apiOptionsGet(optionsContext), optionsGet: ({optionsContext}) => apiOptionsGet(optionsContext),
optionsSet: ({changedOptions, optionsContext, source}) => apiOptionsSet(changedOptions, optionsContext, source), optionsSet: ({changedOptions, optionsContext, source}) => apiOptionsSet(changedOptions, optionsContext, source),
kanjiFind: ({text, optionsContext}) => apiKanjiFind(text, optionsContext), kanjiFind: ({text, optionsContext}) => apiKanjiFind(text, optionsContext),
termsFind: ({text, optionsContext}) => apiTermsFind(text, optionsContext), termsFind: ({text, details, optionsContext}) => apiTermsFind(text, details, optionsContext),
definitionAdd: ({definition, mode, context, optionsContext}) => apiDefinitionAdd(definition, mode, context, optionsContext), definitionAdd: ({definition, mode, context, optionsContext}) => apiDefinitionAdd(definition, mode, context, optionsContext),
definitionsAddable: ({definitions, modes, optionsContext}) => apiDefinitionsAddable(definitions, modes, optionsContext), definitionsAddable: ({definitions, modes, optionsContext}) => apiDefinitionsAddable(definitions, modes, optionsContext),
noteView: ({noteId}) => apiNoteView(noteId), noteView: ({noteId}) => apiNoteView(noteId),

View File

@ -130,7 +130,7 @@ class Database {
await Promise.all(promises); await Promise.all(promises);
} }
async findTermsBulk(termList, titles) { async findTermsBulk(termList, titles, wildcard) {
this.validate(); this.validate();
const promises = []; const promises = [];
@ -149,10 +149,11 @@ class Database {
const dbIndex2 = dbTerms.index('reading'); const dbIndex2 = dbTerms.index('reading');
for (let i = 0; i < termList.length; ++i) { for (let i = 0; i < termList.length; ++i) {
const only = IDBKeyRange.only(termList[i]); const term = termList[i];
const query = wildcard ? IDBKeyRange.bound(term, `${term}\uffff`, false, false) : IDBKeyRange.only(term);
promises.push( promises.push(
Database.getAll(dbIndex1, only, i, processRow), Database.getAll(dbIndex1, query, i, processRow),
Database.getAll(dbIndex2, only, i, processRow) Database.getAll(dbIndex2, query, i, processRow)
); );
} }

View File

@ -55,39 +55,23 @@ function dictRowsSort(rows, options) {
function dictTermsSort(definitions, dictionaries=null) { function dictTermsSort(definitions, dictionaries=null) {
return definitions.sort((v1, v2) => { return definitions.sort((v1, v2) => {
let i;
if (dictionaries !== null) { if (dictionaries !== null) {
const p1 = (dictionaries[v1.dictionary] || {}).priority || 0; i = (
const p2 = (dictionaries[v2.dictionary] || {}).priority || 0; ((dictionaries[v2.dictionary] || {}).priority || 0) -
if (p1 > p2) { ((dictionaries[v1.dictionary] || {}).priority || 0)
return -1; );
} else if (p1 < p2) { if (i !== 0) { return i; }
return 1;
}
} }
const sl1 = v1.source.length; i = v2.source.length - v1.source.length;
const sl2 = v2.source.length; if (i !== 0) { return i; }
if (sl1 > sl2) {
return -1;
} else if (sl1 < sl2) {
return 1;
}
const rl1 = v1.reasons.length; i = v2.reasons.length - v1.reasons.length;
const rl2 = v2.reasons.length; if (i !== 0) { return i; }
if (rl1 < rl2) {
return -1;
} else if (rl1 > rl2) {
return 1;
}
const s1 = v1.score; i = v2.score - v1.score;
const s2 = v2.score; if (i !== 0) { return i; }
if (s1 > s2) {
return -1;
} else if (s1 < s2) {
return 1;
}
return v2.expression.toString().localeCompare(v1.expression.toString()); return v2.expression.toString().localeCompare(v1.expression.toString());
}); });

View File

@ -203,11 +203,18 @@ class DisplaySearch extends Display {
async onSearchQueryUpdated(query, animate) { async onSearchQueryUpdated(query, animate) {
try { try {
const details = {};
const match = /[\*\uff0a]+$/.exec(query);
if (match !== null) {
details.wildcard = true;
query = query.substr(0, query.length - match[0].length);
}
const valid = (query.length > 0); const valid = (query.length > 0);
this.setIntroVisible(!valid, animate); this.setIntroVisible(!valid, animate);
this.updateSearchButton(); this.updateSearchButton();
if (valid) { if (valid) {
const {definitions} = await apiTermsFind(query, this.optionsContext); const {definitions} = await apiTermsFind(query, details, this.optionsContext);
this.setContentTerms(definitions, { this.setContentTerms(definitions, {
focus: false, focus: false,
sentence: null, sentence: null,

View File

@ -47,22 +47,6 @@ class Translator {
await this.database.deleteDictionary(dictionaryName); await this.database.deleteDictionary(dictionaryName);
} }
async findTermsGrouped(text, dictionaries, alphanumeric, options) {
const titles = Object.keys(dictionaries);
const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric);
const definitionsGrouped = dictTermsGroup(definitions, dictionaries);
await this.buildTermFrequencies(definitionsGrouped, titles);
if (options.general.compactTags) {
for (const definition of definitionsGrouped) {
dictTermsCompressTags(definition.definitions);
}
}
return {length, definitions: definitionsGrouped};
}
async getSequencedDefinitions(definitions, mainDictionary) { async getSequencedDefinitions(definitions, mainDictionary) {
const definitionsBySequence = dictTermsMergeBySequence(definitions, mainDictionary); const definitionsBySequence = dictTermsMergeBySequence(definitions, mainDictionary);
const defaultDefinitions = definitionsBySequence['-1']; const defaultDefinitions = definitionsBySequence['-1'];
@ -157,10 +141,41 @@ class Translator {
return result; return result;
} }
async findTermsMerged(text, dictionaries, alphanumeric, options) { async findTerms(text, details, options) {
switch (options.general.resultOutputMode) {
case 'group':
return await this.findTermsGrouped(text, details, options);
case 'merge':
return await this.findTermsMerged(text, details, options);
case 'split':
return await this.findTermsSplit(text, details, options);
default:
return [[], 0];
}
}
async findTermsGrouped(text, details, options) {
const dictionaries = dictEnabledSet(options);
const titles = Object.keys(dictionaries);
const [definitions, length] = await this.findTermsInternal(text, dictionaries, options.scanning.alphanumeric, details);
const definitionsGrouped = dictTermsGroup(definitions, dictionaries);
await this.buildTermFrequencies(definitionsGrouped, titles);
if (options.general.compactTags) {
for (const definition of definitionsGrouped) {
dictTermsCompressTags(definition.definitions);
}
}
return [definitionsGrouped, length];
}
async findTermsMerged(text, details, options) {
const dictionaries = dictEnabledSet(options);
const secondarySearchTitles = Object.keys(options.dictionaries).filter(dict => options.dictionaries[dict].allowSecondarySearches); const secondarySearchTitles = Object.keys(options.dictionaries).filter(dict => options.dictionaries[dict].allowSecondarySearches);
const titles = Object.keys(dictionaries); const titles = Object.keys(dictionaries);
const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric); const [definitions, length] = await this.findTermsInternal(text, dictionaries, options.scanning.alphanumeric, details);
const {sequencedDefinitions, defaultDefinitions} = await this.getSequencedDefinitions(definitions, options.general.mainDictionary); const {sequencedDefinitions, defaultDefinitions} = await this.getSequencedDefinitions(definitions, options.general.mainDictionary);
const definitionsMerged = []; const definitionsMerged = [];
const mergedByTermIndices = new Set(); const mergedByTermIndices = new Set();
@ -191,29 +206,33 @@ class Translator {
} }
} }
return {length, definitions: dictTermsSort(definitionsMerged)}; return [dictTermsSort(definitionsMerged), length];
} }
async findTermsSplit(text, dictionaries, alphanumeric) { async findTermsSplit(text, details, options) {
const dictionaries = dictEnabledSet(options);
const titles = Object.keys(dictionaries); const titles = Object.keys(dictionaries);
const {length, definitions} = await this.findTerms(text, dictionaries, alphanumeric); const [definitions, length] = await this.findTermsInternal(text, dictionaries, options.scanning.alphanumeric, details);
await this.buildTermFrequencies(definitions, titles); await this.buildTermFrequencies(definitions, titles);
return {length, definitions}; return [definitions, length];
} }
async findTerms(text, dictionaries, alphanumeric) { async findTermsInternal(text, dictionaries, alphanumeric, details) {
if (!alphanumeric && text.length > 0) { if (!alphanumeric && text.length > 0) {
const c = text[0]; const c = text[0];
if (!jpIsKana(c) && !jpIsKanji(c)) { if (!jpIsKana(c) && !jpIsKanji(c)) {
return {length: 0, definitions: []}; return [[], 0];
} }
} }
const textHiragana = jpKatakanaToHiragana(text);
const titles = Object.keys(dictionaries); const titles = Object.keys(dictionaries);
const deinflections = await this.findTermDeinflections(text, textHiragana, titles); const deinflections = (
details.wildcard ?
await this.findTermWildcard(text, titles) :
await this.findTermDeinflections(text, titles)
);
let definitions = []; let definitions = [];
for (const deinflection of deinflections) { for (const deinflection of deinflections) {
@ -246,10 +265,26 @@ class Translator {
length = Math.max(length, definition.source.length); length = Math.max(length, definition.source.length);
} }
return {length, definitions}; return [definitions, length];
} }
async findTermDeinflections(text, text2, titles) { async findTermWildcard(text, titles) {
const definitions = await this.database.findTermsBulk([text], titles, true);
if (definitions.length === 0) {
return [];
}
return [{
source: text,
term: text,
rules: 0,
definitions,
reasons: []
}];
}
async findTermDeinflections(text, titles) {
const text2 = jpKatakanaToHiragana(text);
const deinflections = (text === text2 ? this.getDeinflections(text) : this.getDeinflections2(text, text2)); const deinflections = (text === text2 ? this.getDeinflections(text) : this.getDeinflections2(text, text2));
if (deinflections.length === 0) { if (deinflections.length === 0) {
@ -273,7 +308,7 @@ class Translator {
deinflectionArray.push(deinflection); deinflectionArray.push(deinflection);
} }
const definitions = await this.database.findTermsBulk(uniqueDeinflectionTerms, titles); const definitions = await this.database.findTermsBulk(uniqueDeinflectionTerms, titles, false);
for (const definition of definitions) { for (const definition of definitions) {
const definitionRules = Deinflector.rulesToRuleFlags(definition.rules); const definitionRules = Deinflector.rulesToRuleFlags(definition.rules);
@ -314,7 +349,8 @@ class Translator {
return deinflections; return deinflections;
} }
async findKanji(text, dictionaries) { async findKanji(text, options) {
const dictionaries = dictEnabledSet(options);
const titles = Object.keys(dictionaries); const titles = Object.keys(dictionaries);
const kanjiUnique = {}; const kanjiUnique = {};
const kanjiList = []; const kanjiList = [];

View File

@ -25,8 +25,8 @@ function apiOptionsSet(changedOptions, optionsContext, source) {
return utilInvoke('optionsSet', {changedOptions, optionsContext, source}); return utilInvoke('optionsSet', {changedOptions, optionsContext, source});
} }
function apiTermsFind(text, optionsContext) { function apiTermsFind(text, details, optionsContext) {
return utilInvoke('termsFind', {text, optionsContext}); return utilInvoke('termsFind', {text, details, optionsContext});
} }
function apiKanjiFind(text, optionsContext) { function apiKanjiFind(text, optionsContext) {

View File

@ -413,7 +413,7 @@ class Frontend {
const searchText = textSource.text(); const searchText = textSource.text();
if (searchText.length === 0) { return null; } if (searchText.length === 0) { return null; }
const {definitions, length} = await apiTermsFind(searchText, this.getOptionsContext()); const {definitions, length} = await apiTermsFind(searchText, {}, this.getOptionsContext());
if (definitions.length === 0) { return null; } if (definitions.length === 0) { return null; }
textSource.setEndOffset(length); textSource.setEndOffset(length);

View File

@ -112,7 +112,7 @@ class Display {
try { try {
textSource.setEndOffset(this.options.scanning.length); textSource.setEndOffset(this.options.scanning.length);
({definitions, length} = await apiTermsFind(textSource.text(), this.getOptionsContext())); ({definitions, length} = await apiTermsFind(textSource.text(), {}, this.getOptionsContext()));
if (definitions.length === 0) { if (definitions.length === 0) {
return false; return false;
} }