2020-03-31 00:51:20 +00:00
/ *
2021-01-01 19:50:41 +00:00
* Copyright ( C ) 2020 - 2021 Yomichan Authors
2020-03-31 00:51:20 +00:00
*
* This program is free software : you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation , either version 3 of the License , or
* ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program . If not , see < https : //www.gnu.org/licenses/>.
* /
/ * g l o b a l
* JSZip
2021-05-22 19:45:20 +00:00
* JsonSchema
2021-02-14 16:32:30 +00:00
* MediaUtil
2020-03-31 00:51:20 +00:00
* /
class DictionaryImporter {
constructor ( ) {
this . _schemas = new Map ( ) ;
}
2020-07-20 02:05:37 +00:00
async importDictionary ( dictionaryDatabase , archiveSource , details , onProgress ) {
2020-06-28 21:24:06 +00:00
if ( ! dictionaryDatabase ) {
2020-03-31 00:51:20 +00:00
throw new Error ( 'Invalid database' ) ;
}
2020-06-28 21:24:06 +00:00
if ( ! dictionaryDatabase . isPrepared ( ) ) {
2020-03-31 00:51:20 +00:00
throw new Error ( 'Database is not ready' ) ;
}
const hasOnProgress = ( typeof onProgress === 'function' ) ;
// Read archive
const archive = await JSZip . loadAsync ( archiveSource ) ;
// Read and validate index
const indexFileName = 'index.json' ;
const indexFile = archive . files [ indexFileName ] ;
if ( ! indexFile ) {
throw new Error ( 'No dictionary index found in archive' ) ;
}
const index = JSON . parse ( await indexFile . async ( 'string' ) ) ;
2021-02-13 00:56:24 +00:00
const indexSchema = await this . _getSchema ( '/data/schemas/dictionary-index-schema.json' ) ;
2020-03-31 00:51:20 +00:00
this . _validateJsonSchema ( index , indexSchema , indexFileName ) ;
const dictionaryTitle = index . title ;
const version = index . format || index . version ;
if ( ! dictionaryTitle || ! index . revision ) {
throw new Error ( 'Unrecognized dictionary format' ) ;
}
// Verify database is not already imported
2020-06-28 21:24:06 +00:00
if ( await dictionaryDatabase . dictionaryExists ( dictionaryTitle ) ) {
2020-03-31 00:51:20 +00:00
throw new Error ( 'Dictionary is already imported' ) ;
}
// Data format converters
const convertTermBankEntry = ( entry ) => {
if ( version === 1 ) {
const [ expression , reading , definitionTags , rules , score , ... glossary ] = entry ;
return { expression , reading , definitionTags , rules , score , glossary } ;
} else {
const [ expression , reading , definitionTags , rules , score , glossary , sequence , termTags ] = entry ;
return { expression , reading , definitionTags , rules , score , glossary , sequence , termTags } ;
}
} ;
const convertTermMetaBankEntry = ( entry ) => {
const [ expression , mode , data ] = entry ;
return { expression , mode , data } ;
} ;
const convertKanjiBankEntry = ( entry ) => {
if ( version === 1 ) {
const [ character , onyomi , kunyomi , tags , ... meanings ] = entry ;
return { character , onyomi , kunyomi , tags , meanings } ;
} else {
const [ character , onyomi , kunyomi , tags , meanings , stats ] = entry ;
return { character , onyomi , kunyomi , tags , meanings , stats } ;
}
} ;
const convertKanjiMetaBankEntry = ( entry ) => {
const [ character , mode , data ] = entry ;
return { character , mode , data } ;
} ;
const convertTagBankEntry = ( entry ) => {
const [ name , category , order , notes , score ] = entry ;
return { name , category , order , notes , score } ;
} ;
// Archive file reading
const readFileSequence = async ( fileNameFormat , convertEntry , schema ) => {
const results = [ ] ;
for ( let i = 1 ; true ; ++ i ) {
const fileName = fileNameFormat . replace ( /\?/ , ` ${ i } ` ) ;
const file = archive . files [ fileName ] ;
if ( ! file ) { break ; }
const entries = JSON . parse ( await file . async ( 'string' ) ) ;
this . _validateJsonSchema ( entries , schema , fileName ) ;
for ( let entry of entries ) {
entry = convertEntry ( entry ) ;
entry . dictionary = dictionaryTitle ;
results . push ( entry ) ;
}
}
return results ;
} ;
// Load schemas
const dataBankSchemaPaths = this . _getDataBankSchemaPaths ( version ) ;
const dataBankSchemas = await Promise . all ( dataBankSchemaPaths . map ( ( path ) => this . _getSchema ( path ) ) ) ;
// Load data
const termList = await readFileSequence ( 'term_bank_?.json' , convertTermBankEntry , dataBankSchemas [ 0 ] ) ;
const termMetaList = await readFileSequence ( 'term_meta_bank_?.json' , convertTermMetaBankEntry , dataBankSchemas [ 1 ] ) ;
const kanjiList = await readFileSequence ( 'kanji_bank_?.json' , convertKanjiBankEntry , dataBankSchemas [ 2 ] ) ;
const kanjiMetaList = await readFileSequence ( 'kanji_meta_bank_?.json' , convertKanjiMetaBankEntry , dataBankSchemas [ 3 ] ) ;
const tagList = await readFileSequence ( 'tag_bank_?.json' , convertTagBankEntry , dataBankSchemas [ 4 ] ) ;
// Old tags
const indexTagMeta = index . tagMeta ;
if ( typeof indexTagMeta === 'object' && indexTagMeta !== null ) {
for ( const name of Object . keys ( indexTagMeta ) ) {
const { category , order , notes , score } = indexTagMeta [ name ] ;
tagList . push ( { name , category , order , notes , score } ) ;
}
}
// Prefix wildcard support
const prefixWildcardsSupported = ! ! details . prefixWildcardsSupported ;
if ( prefixWildcardsSupported ) {
for ( const entry of termList ) {
entry . expressionReverse = stringReverse ( entry . expression ) ;
entry . readingReverse = stringReverse ( entry . reading ) ;
}
}
2020-03-02 03:36:42 +00:00
// Extended data support
const extendedDataContext = {
archive ,
media : new Map ( )
} ;
for ( const entry of termList ) {
const glossaryList = entry . glossary ;
for ( let i = 0 , ii = glossaryList . length ; i < ii ; ++ i ) {
const glossary = glossaryList [ i ] ;
if ( typeof glossary !== 'object' || glossary === null ) { continue ; }
glossaryList [ i ] = await this . _formatDictionaryTermGlossaryObject ( glossary , extendedDataContext , entry ) ;
}
}
const media = [ ... extendedDataContext . media . values ( ) ] ;
2020-03-31 00:51:20 +00:00
// Add dictionary
2020-04-05 18:45:54 +00:00
const summary = this . _createSummary ( dictionaryTitle , version , index , { prefixWildcardsSupported } ) ;
2020-03-31 00:51:20 +00:00
2020-06-28 21:24:06 +00:00
dictionaryDatabase . bulkAdd ( 'dictionaries' , [ summary ] , 0 , 1 ) ;
2020-03-31 00:51:20 +00:00
// Add data
const errors = [ ] ;
const total = (
termList . length +
termMetaList . length +
kanjiList . length +
kanjiMetaList . length +
tagList . length
) ;
let loadedCount = 0 ;
const maxTransactionLength = 1000 ;
const bulkAdd = async ( objectStoreName , entries ) => {
const ii = entries . length ;
for ( let i = 0 ; i < ii ; i += maxTransactionLength ) {
const count = Math . min ( maxTransactionLength , ii - i ) ;
try {
2020-06-28 21:24:06 +00:00
await dictionaryDatabase . bulkAdd ( objectStoreName , entries , i , count ) ;
2020-03-31 00:51:20 +00:00
} catch ( e ) {
2020-09-04 21:54:34 +00:00
errors . push ( e ) ;
2020-03-31 00:51:20 +00:00
}
loadedCount += count ;
if ( hasOnProgress ) {
onProgress ( total , loadedCount ) ;
}
}
} ;
await bulkAdd ( 'terms' , termList ) ;
await bulkAdd ( 'termMeta' , termMetaList ) ;
await bulkAdd ( 'kanji' , kanjiList ) ;
await bulkAdd ( 'kanjiMeta' , kanjiMetaList ) ;
await bulkAdd ( 'tagMeta' , tagList ) ;
2020-03-02 03:36:42 +00:00
await bulkAdd ( 'media' , media ) ;
2020-03-31 00:51:20 +00:00
return { result : summary , errors } ;
}
2020-04-05 18:45:54 +00:00
_createSummary ( dictionaryTitle , version , index , details ) {
const summary = {
title : dictionaryTitle ,
revision : index . revision ,
sequenced : index . sequenced ,
version
} ;
const { author , url , description , attribution } = index ;
if ( typeof author === 'string' ) { summary . author = author ; }
if ( typeof url === 'string' ) { summary . url = url ; }
if ( typeof description === 'string' ) { summary . description = description ; }
if ( typeof attribution === 'string' ) { summary . attribution = attribution ; }
Object . assign ( summary , details ) ;
return summary ;
}
2020-03-31 00:51:20 +00:00
async _getSchema ( fileName ) {
let schemaPromise = this . _schemas . get ( fileName ) ;
if ( typeof schemaPromise !== 'undefined' ) {
return schemaPromise ;
}
2021-05-22 19:45:20 +00:00
schemaPromise = this . _createSchema ( fileName ) ;
2020-03-31 00:51:20 +00:00
this . _schemas . set ( fileName , schemaPromise ) ;
return schemaPromise ;
}
2021-05-22 19:45:20 +00:00
async _createSchema ( fileName ) {
const schema = await this . _fetchJsonAsset ( fileName ) ;
return new JsonSchema ( schema ) ;
}
2020-03-31 00:51:20 +00:00
_validateJsonSchema ( value , schema , fileName ) {
try {
2021-05-22 19:45:20 +00:00
schema . validate ( value ) ;
2020-03-31 00:51:20 +00:00
} catch ( e ) {
throw this . _formatSchemaError ( e , fileName ) ;
}
}
_formatSchemaError ( e , fileName ) {
2021-05-22 21:56:44 +00:00
const valuePathString = this . _getSchemaErrorPathString ( e . valueStack , 'dictionary' ) ;
const schemaPathString = this . _getSchemaErrorPathString ( e . schemaStack , 'schema' ) ;
2020-03-31 00:51:20 +00:00
const e2 = new Error ( ` Dictionary has invalid data in ' ${ fileName } ' for value ' ${ valuePathString } ', validated against ' ${ schemaPathString } ': ${ e . message } ` ) ;
e2 . data = e ;
return e2 ;
}
_getSchemaErrorPathString ( infoList , base = '' ) {
let result = base ;
2021-05-22 19:45:20 +00:00
for ( const { path } of infoList ) {
2021-05-22 21:56:44 +00:00
const pathArray = Array . isArray ( path ) ? path : [ path ] ;
for ( const pathPart of pathArray ) {
if ( pathPart === null ) {
result = base ;
} else {
switch ( typeof pathPart ) {
case 'string' :
if ( result . length > 0 ) {
result += '.' ;
}
result += pathPart ;
break ;
case 'number' :
result += ` [ ${ pathPart } ] ` ;
break ;
2020-03-31 00:51:20 +00:00
}
2021-05-22 21:56:44 +00:00
}
2020-03-31 00:51:20 +00:00
}
}
return result ;
}
_getDataBankSchemaPaths ( version ) {
const termBank = (
version === 1 ?
2021-02-13 00:56:24 +00:00
'/data/schemas/dictionary-term-bank-v1-schema.json' :
'/data/schemas/dictionary-term-bank-v3-schema.json'
2020-03-31 00:51:20 +00:00
) ;
2021-02-13 00:56:24 +00:00
const termMetaBank = '/data/schemas/dictionary-term-meta-bank-v3-schema.json' ;
2020-03-31 00:51:20 +00:00
const kanjiBank = (
version === 1 ?
2021-02-13 00:56:24 +00:00
'/data/schemas/dictionary-kanji-bank-v1-schema.json' :
'/data/schemas/dictionary-kanji-bank-v3-schema.json'
2020-03-31 00:51:20 +00:00
) ;
2021-02-13 00:56:24 +00:00
const kanjiMetaBank = '/data/schemas/dictionary-kanji-meta-bank-v3-schema.json' ;
const tagBank = '/data/schemas/dictionary-tag-bank-v3-schema.json' ;
2020-03-31 00:51:20 +00:00
return [ termBank , termMetaBank , kanjiBank , kanjiMetaBank , tagBank ] ;
}
2020-03-02 03:36:42 +00:00
async _formatDictionaryTermGlossaryObject ( data , context , entry ) {
switch ( data . type ) {
case 'text' :
return data . text ;
case 'image' :
return await this . _formatDictionaryTermGlossaryImage ( data , context , entry ) ;
2021-05-19 22:24:50 +00:00
case 'structured-content' :
return await this . _formatStructuredContent ( data , context , entry ) ;
2020-03-02 03:36:42 +00:00
default :
throw new Error ( ` Unhandled data type: ${ data . type } ` ) ;
}
}
async _formatDictionaryTermGlossaryImage ( data , context , entry ) {
2021-05-19 22:24:50 +00:00
return await this . _createImageData ( data , context , entry , { type : 'image' } ) ;
}
async _formatStructuredContent ( data , context , entry ) {
const content = await this . _prepareStructuredContent ( data . content , context , entry ) ;
return {
type : 'structured-content' ,
content
} ;
}
async _prepareStructuredContent ( content , context , entry ) {
if ( typeof content === 'string' || ! ( typeof content === 'object' && content !== null ) ) {
return content ;
}
if ( Array . isArray ( content ) ) {
for ( let i = 0 , ii = content . length ; i < ii ; ++ i ) {
content [ i ] = await this . _prepareStructuredContent ( content [ i ] , context , entry ) ;
}
return content ;
}
const { tag } = content ;
switch ( tag ) {
case 'img' :
return await this . _prepareStructuredContentImage ( content , context , entry ) ;
}
const childContent = content . content ;
if ( typeof childContent !== 'undefined' ) {
content . content = await this . _prepareStructuredContent ( childContent , context , entry ) ;
}
return content ;
}
async _prepareStructuredContentImage ( content , context , entry ) {
2021-05-20 23:33:08 +00:00
const { verticalAlign , sizeUnits } = content ;
2021-05-19 22:24:50 +00:00
const result = await this . _createImageData ( content , context , entry , { tag : 'img' } ) ;
if ( typeof verticalAlign === 'string' ) { result . verticalAlign = verticalAlign ; }
2021-05-20 23:33:08 +00:00
if ( typeof sizeUnits === 'string' ) { result . sizeUnits = sizeUnits ; }
2021-05-19 22:24:50 +00:00
return result ;
}
async _createImageData ( data , context , entry , attributes ) {
2021-06-06 18:47:48 +00:00
const {
path ,
width : preferredWidth ,
height : preferredHeight ,
title ,
description ,
pixelated ,
imageRendering ,
appearance ,
background ,
collapsed ,
collapsible
} = data ;
2021-05-17 00:11:32 +00:00
const { width , height } = await this . _getImageMedia ( path , context , entry ) ;
2021-05-19 22:24:50 +00:00
const newData = Object . assign ( { } , attributes , { path , width , height } ) ;
2021-05-17 00:11:32 +00:00
if ( typeof preferredWidth === 'number' ) { newData . preferredWidth = preferredWidth ; }
if ( typeof preferredHeight === 'number' ) { newData . preferredHeight = preferredHeight ; }
if ( typeof title === 'string' ) { newData . title = title ; }
if ( typeof description === 'string' ) { newData . description = description ; }
if ( typeof pixelated === 'boolean' ) { newData . pixelated = pixelated ; }
2021-06-06 18:47:48 +00:00
if ( typeof imageRendering === 'string' ) { newData . imageRendering = imageRendering ; }
if ( typeof appearance === 'string' ) { newData . appearance = appearance ; }
if ( typeof background === 'boolean' ) { newData . background = background ; }
2021-05-18 21:41:27 +00:00
if ( typeof collapsed === 'boolean' ) { newData . collapsed = collapsed ; }
if ( typeof collapsible === 'boolean' ) { newData . collapsible = collapsible ; }
2021-05-17 00:11:32 +00:00
return newData ;
}
async _getImageMedia ( path , context , entry ) {
const { media } = context ;
const { dictionary , reading } = entry ;
2020-03-02 03:36:42 +00:00
let errorSource = entry . expression ;
2021-05-17 00:11:32 +00:00
if ( reading . length > 0 ) {
errorSource += ` ( ${ reading } ) ` ;
}
errorSource += dictionary ;
const createError = ( message ) => new Error ( ` ${ message } at path ${ JSON . stringify ( path ) } for ${ errorSource } ` ) ;
// Check if already added
let mediaData = media . get ( path ) ;
if ( typeof mediaData !== 'undefined' ) {
if ( MediaUtil . getFileExtensionFromImageMediaType ( mediaData . mediaType ) === null ) {
throw createError ( 'Media file is not a valid image' ) ;
}
return mediaData ;
2020-03-02 03:36:42 +00:00
}
2021-05-17 00:11:32 +00:00
// Find file in archive
2020-03-02 03:36:42 +00:00
const file = context . archive . file ( path ) ;
if ( file === null ) {
2021-05-17 00:11:32 +00:00
throw createError ( 'Could not find image' ) ;
2020-03-02 03:36:42 +00:00
}
2021-05-17 00:11:32 +00:00
// Load file content
2020-04-19 14:16:59 +00:00
const content = await file . async ( 'base64' ) ;
2021-03-14 22:04:19 +00:00
const mediaType = MediaUtil . getImageMediaTypeFromFileName ( path ) ;
2020-03-02 03:36:42 +00:00
if ( mediaType === null ) {
2021-05-17 00:11:32 +00:00
throw createError ( 'Could not determine media type for image' ) ;
2020-03-02 03:36:42 +00:00
}
2021-05-17 00:11:32 +00:00
// Load image data
2020-03-02 03:36:42 +00:00
let image ;
try {
2020-09-26 17:42:31 +00:00
image = await this . _loadImageBase64 ( mediaType , content ) ;
2020-03-02 03:36:42 +00:00
} catch ( e ) {
2021-05-17 00:11:32 +00:00
throw createError ( 'Could not load image' ) ;
2020-03-02 03:36:42 +00:00
}
// Create image data
2021-05-17 00:11:32 +00:00
mediaData = {
2020-03-02 03:36:42 +00:00
dictionary ,
path ,
mediaType ,
2021-05-17 00:11:32 +00:00
width : image . naturalWidth ,
height : image . naturalHeight ,
2020-04-19 14:16:59 +00:00
content
2020-03-02 03:36:42 +00:00
} ;
2021-05-17 00:11:32 +00:00
media . set ( path , mediaData ) ;
2020-03-02 03:36:42 +00:00
2021-05-17 00:11:32 +00:00
return mediaData ;
2020-03-02 03:36:42 +00:00
}
2020-08-02 17:30:55 +00:00
async _fetchJsonAsset ( url ) {
const response = await fetch ( chrome . runtime . getURL ( url ) , {
method : 'GET' ,
mode : 'no-cors' ,
cache : 'default' ,
credentials : 'omit' ,
redirect : 'follow' ,
referrerPolicy : 'no-referrer'
} ) ;
if ( ! response . ok ) {
throw new Error ( ` Failed to fetch ${ url } : ${ response . status } ` ) ;
}
return await response . json ( ) ;
}
2020-09-26 17:42:31 +00:00
/ * *
* Attempts to load an image using a base64 encoded content and a media type .
* @ param mediaType The media type for the image content .
* @ param content The binary content for the image , encoded in base64 .
* @ returns A Promise which resolves with an HTMLImageElement instance on
* successful load , otherwise an error is thrown .
* /
_loadImageBase64 ( mediaType , content ) {
return new Promise ( ( resolve , reject ) => {
const image = new Image ( ) ;
const eventListeners = new EventListenerCollection ( ) ;
eventListeners . addEventListener ( image , 'load' , ( ) => {
eventListeners . removeAllEventListeners ( ) ;
resolve ( image ) ;
} , false ) ;
eventListeners . addEventListener ( image , 'error' , ( ) => {
eventListeners . removeAllEventListeners ( ) ;
reject ( new Error ( 'Image failed to load' ) ) ;
} , false ) ;
2021-03-14 22:41:15 +00:00
const blob = MediaUtil . createBlobFromBase64Content ( content , mediaType ) ;
const url = URL . createObjectURL ( blob ) ;
image . src = url ;
2020-09-26 17:42:31 +00:00
} ) ;
}
2020-03-31 00:51:20 +00:00
}