Improve dictionary importer (#1859)

* Optimize dictionary data parsing during import

* Use ArrayBuffer instead of string

* Add missing function for testing
This commit is contained in:
toasted-nutbread 2021-07-31 10:53:35 -04:00 committed by GitHub
parent b99850ed54
commit 2d57d69b9e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 73 additions and 27 deletions

View File

@ -68,6 +68,10 @@ class Image {
} }
} }
removeAttribute() {
// NOP
}
async _delayTriggerLoad() { async _delayTriggerLoad() {
await Promise.resolve(); await Promise.resolve();
for (const callback of this._loadCallbacks) { for (const callback of this._loadCallbacks) {

View File

@ -72,7 +72,7 @@ class TranslatorVM extends DatabaseVM {
// Dictionary // Dictionary
this._dictionaryName = dictionaryName; this._dictionaryName = dictionaryName;
const testDictionary = createDictionaryArchive(dictionaryDirectory, dictionaryName); const testDictionary = createDictionaryArchive(dictionaryDirectory, dictionaryName);
const testDictionaryContent = await testDictionary.generateAsync({type: 'string'}); const testDictionaryContent = await testDictionary.generateAsync({type: 'arraybuffer'});
// Setup database // Setup database
const dictionaryImporter = new DictionaryImporter(); const dictionaryImporter = new DictionaryImporter();

View File

@ -149,20 +149,18 @@ class DictionaryImporter {
} }
// Extended data support // Extended data support
const extendedDataContext = { const requirements = [];
archive,
media: new Map()
};
for (const entry of termList) { for (const entry of termList) {
const glossaryList = entry.glossary; const glossaryList = entry.glossary;
for (let i = 0, ii = glossaryList.length; i < ii; ++i) { for (let i = 0, ii = glossaryList.length; i < ii; ++i) {
const glossary = glossaryList[i]; const glossary = glossaryList[i];
if (typeof glossary !== 'object' || glossary === null) { continue; } if (typeof glossary !== 'object' || glossary === null) { continue; }
glossaryList[i] = await this._formatDictionaryTermGlossaryObject(glossary, extendedDataContext, entry); glossaryList[i] = this._formatDictionaryTermGlossaryObject(glossary, entry, requirements);
} }
} }
const media = [...extendedDataContext.media.values()]; // Async requirements
const {media} = await this._resolveAsyncRequirements(requirements, archive);
// Add dictionary // Add dictionary
const summary = this._createSummary(dictionaryTitle, version, index, {prefixWildcardsSupported}); const summary = this._createSummary(dictionaryTitle, version, index, {prefixWildcardsSupported});
@ -305,62 +303,106 @@ class DictionaryImporter {
return [termBank, termMetaBank, kanjiBank, kanjiMetaBank, tagBank]; return [termBank, termMetaBank, kanjiBank, kanjiMetaBank, tagBank];
} }
async _formatDictionaryTermGlossaryObject(data, context, entry) { _formatDictionaryTermGlossaryObject(data, entry, requirements) {
switch (data.type) { switch (data.type) {
case 'text': case 'text':
return data.text; return data.text;
case 'image': case 'image':
return await this._formatDictionaryTermGlossaryImage(data, context, entry); return this._formatDictionaryTermGlossaryImage(data, entry, requirements);
case 'structured-content': case 'structured-content':
return await this._formatStructuredContent(data, context, entry); return this._formatStructuredContent(data, entry, requirements);
default: default:
throw new Error(`Unhandled data type: ${data.type}`); throw new Error(`Unhandled data type: ${data.type}`);
} }
} }
async _formatDictionaryTermGlossaryImage(data, context, entry) { _formatDictionaryTermGlossaryImage(data, entry, requirements) {
return await this._createImageData(data, context, entry, {type: 'image'}); const target = {};
requirements.push({type: 'image', target, args: [data, entry]});
return target;
} }
async _formatStructuredContent(data, context, entry) { _formatStructuredContent(data, entry, requirements) {
const content = await this._prepareStructuredContent(data.content, context, entry); const content = this._prepareStructuredContent(data.content, entry, requirements);
return { return {
type: 'structured-content', type: 'structured-content',
content content
}; };
} }
async _prepareStructuredContent(content, context, entry) { _prepareStructuredContent(content, entry, requirements) {
if (typeof content === 'string' || !(typeof content === 'object' && content !== null)) { if (typeof content === 'string' || !(typeof content === 'object' && content !== null)) {
return content; return content;
} }
if (Array.isArray(content)) { if (Array.isArray(content)) {
for (let i = 0, ii = content.length; i < ii; ++i) { for (let i = 0, ii = content.length; i < ii; ++i) {
content[i] = await this._prepareStructuredContent(content[i], context, entry); content[i] = this._prepareStructuredContent(content[i], entry, requirements);
} }
return content; return content;
} }
const {tag} = content; const {tag} = content;
switch (tag) { switch (tag) {
case 'img': case 'img':
return await this._prepareStructuredContentImage(content, context, entry); return this._prepareStructuredContentImage(content, entry, requirements);
} }
const childContent = content.content; const childContent = content.content;
if (typeof childContent !== 'undefined') { if (typeof childContent !== 'undefined') {
content.content = await this._prepareStructuredContent(childContent, context, entry); content.content = this._prepareStructuredContent(childContent, entry, requirements);
} }
return content; return content;
} }
async _prepareStructuredContentImage(content, context, entry) { _prepareStructuredContentImage(content, entry, requirements) {
const target = {};
requirements.push({type: 'structured-content-image', target, args: [content, entry]});
return target;
}
async _resolveAsyncRequirements(requirements, archive) {
const media = new Map();
const context = {archive, media};
const promises = [];
for (const requirement of requirements) {
promises.push(this._resolveAsyncRequirement(context, requirement));
}
await Promise.all(promises);
return {
media: [...media.values()]
};
}
async _resolveAsyncRequirement(context, requirement) {
const {type, target, args} = requirement;
let result;
switch (type) {
case 'image':
result = await this._resolveDictionaryTermGlossaryImage(context, ...args);
break;
case 'structured-content-image':
result = await this._resolveStructuredContentImage(context, ...args);
break;
default:
return;
}
Object.assign(target, result);
}
async _resolveDictionaryTermGlossaryImage(context, data, entry) {
return await this._createImageData(context, data, entry, {type: 'image'});
}
async _resolveStructuredContentImage(context, content, entry) {
const {verticalAlign, sizeUnits} = content; const {verticalAlign, sizeUnits} = content;
const result = await this._createImageData(content, context, entry, {tag: 'img'}); const result = await this._createImageData(context, content, entry, {tag: 'img'});
if (typeof verticalAlign === 'string') { result.verticalAlign = verticalAlign; } if (typeof verticalAlign === 'string') { result.verticalAlign = verticalAlign; }
if (typeof sizeUnits === 'string') { result.sizeUnits = sizeUnits; } if (typeof sizeUnits === 'string') { result.sizeUnits = sizeUnits; }
return result; return result;
} }
async _createImageData(data, context, entry, attributes) { async _createImageData(context, data, entry, attributes) {
const { const {
path, path,
width: preferredWidth, width: preferredWidth,
@ -374,7 +416,7 @@ class DictionaryImporter {
collapsed, collapsed,
collapsible collapsible
} = data; } = data;
const {width, height} = await this._getImageMedia(path, context, entry); const {width, height} = await this._getImageMedia(context, path, entry);
const newData = Object.assign({}, attributes, {path, width, height}); const newData = Object.assign({}, attributes, {path, width, height});
if (typeof preferredWidth === 'number') { newData.preferredWidth = preferredWidth; } if (typeof preferredWidth === 'number') { newData.preferredWidth = preferredWidth; }
if (typeof preferredHeight === 'number') { newData.preferredHeight = preferredHeight; } if (typeof preferredHeight === 'number') { newData.preferredHeight = preferredHeight; }
@ -389,7 +431,7 @@ class DictionaryImporter {
return newData; return newData;
} }
async _getImageMedia(path, context, entry) { async _getImageMedia(context, path, entry) {
const {media} = context; const {media} = context;
const {dictionary, reading} = entry; const {dictionary, reading} = entry;

View File

@ -281,7 +281,7 @@ class DictionaryImportController {
const reader = new FileReader(); const reader = new FileReader();
reader.onload = () => resolve(reader.result); reader.onload = () => resolve(reader.result);
reader.onerror = () => reject(reader.error); reader.onerror = () => reject(reader.error);
reader.readAsBinaryString(file); reader.readAsArrayBuffer(file);
}); });
} }

View File

@ -86,7 +86,7 @@ function clearDatabase(timeout) {
async function testDatabase1() { async function testDatabase1() {
// Load dictionary data // Load dictionary data
const testDictionary = createTestDictionaryArchive('valid-dictionary1'); const testDictionary = createTestDictionaryArchive('valid-dictionary1');
const testDictionarySource = await testDictionary.generateAsync({type: 'string'}); const testDictionarySource = await testDictionary.generateAsync({type: 'arraybuffer'});
const testDictionaryIndex = JSON.parse(await testDictionary.files['index.json'].async('string')); const testDictionaryIndex = JSON.parse(await testDictionary.files['index.json'].async('string'));
const title = testDictionaryIndex.title; const title = testDictionaryIndex.title;
@ -766,7 +766,7 @@ async function testFindTagForTitle1(database, title) {
async function testDatabase2() { async function testDatabase2() {
// Load dictionary data // Load dictionary data
const testDictionary = createTestDictionaryArchive('valid-dictionary1'); const testDictionary = createTestDictionaryArchive('valid-dictionary1');
const testDictionarySource = await testDictionary.generateAsync({type: 'string'}); const testDictionarySource = await testDictionary.generateAsync({type: 'arraybuffer'});
const testDictionaryIndex = JSON.parse(await testDictionary.files['index.json'].async('string')); const testDictionaryIndex = JSON.parse(await testDictionary.files['index.json'].async('string'));
const title = testDictionaryIndex.title; const title = testDictionaryIndex.title;
@ -823,7 +823,7 @@ async function testDatabase3() {
for (const invalidDictionary of invalidDictionaries) { for (const invalidDictionary of invalidDictionaries) {
const testDictionary = createTestDictionaryArchive(invalidDictionary); const testDictionary = createTestDictionaryArchive(invalidDictionary);
const testDictionarySource = await testDictionary.generateAsync({type: 'string'}); const testDictionarySource = await testDictionary.generateAsync({type: 'arraybuffer'});
let error = null; let error = null;
try { try {