Merge pull request #346 from toasted-nutbread/dictionary-schemas
Dictionary schemas
This commit is contained in:
commit
9c5ad3ea67
40
ext/bg/data/dictionary-index-schema.json
Normal file
40
ext/bg/data/dictionary-index-schema.json
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
{
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||||
|
"type": "object",
|
||||||
|
"description": "Index file containing information about the data contained in the dictionary.",
|
||||||
|
"required": [
|
||||||
|
"title",
|
||||||
|
"revision"
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"title": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Title of the dictionary."
|
||||||
|
},
|
||||||
|
"revision": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Revision of the dictionary. This value is only used for displaying information."
|
||||||
|
},
|
||||||
|
"sequenced": {
|
||||||
|
"type": "boolean",
|
||||||
|
"default": false,
|
||||||
|
"description": "Whether or not this dictionary can be used as the primary dictionary. Primary dictionaries typically contain term/expression definitions."
|
||||||
|
},
|
||||||
|
"format": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "Format of data found in the JSON data files."
|
||||||
|
},
|
||||||
|
"version": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "Alias for format."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"anyOf": [
|
||||||
|
{
|
||||||
|
"required": ["format"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"required": ["version"]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
33
ext/bg/data/dictionary-kanji-bank-v1-schema.json
Normal file
33
ext/bg/data/dictionary-kanji-bank-v1-schema.json
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
{
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||||
|
"type": "array",
|
||||||
|
"description": "Data file containing kanji information.",
|
||||||
|
"additionalItems": {
|
||||||
|
"type": "array",
|
||||||
|
"description": "Information about a single kanji character.",
|
||||||
|
"minItems": 4,
|
||||||
|
"items": [
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"description": "Kanji character.",
|
||||||
|
"minLength": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"description": "String of space-separated onyomi readings for the kanji character. An empty string is treated as no readings."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"description": "String of space-separated kunyomi readings for the kanji character. An empty string is treated as no readings."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"description": "String of space-separated tags for the kanji character. An empty string is treated as no tags."
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"additionalItems": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "A meaning for the kanji character."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
44
ext/bg/data/dictionary-kanji-bank-v3-schema.json
Normal file
44
ext/bg/data/dictionary-kanji-bank-v3-schema.json
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
{
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||||
|
"type": "array",
|
||||||
|
"description": "Data file containing kanji information.",
|
||||||
|
"additionalItems": {
|
||||||
|
"type": "array",
|
||||||
|
"description": "Information about a single kanji character.",
|
||||||
|
"minItems": 6,
|
||||||
|
"items": [
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"description": "Kanji character.",
|
||||||
|
"minLength": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"description": "String of space-separated onyomi readings for the kanji character. An empty string is treated as no readings."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"description": "String of space-separated kunyomi readings for the kanji character. An empty string is treated as no readings."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"description": "String of space-separated tags for the kanji character. An empty string is treated as no tags."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "array",
|
||||||
|
"description": "Array of meanings for the kanji character.",
|
||||||
|
"items": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "A meaning for the kanji character."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "object",
|
||||||
|
"description": "Various stats for the kanji character.",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
25
ext/bg/data/dictionary-kanji-meta-bank-v3-schema.json
Normal file
25
ext/bg/data/dictionary-kanji-meta-bank-v3-schema.json
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
{
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||||
|
"type": "array",
|
||||||
|
"description": "Custom metadata for kanji characters.",
|
||||||
|
"additionalItems": {
|
||||||
|
"type": "array",
|
||||||
|
"description": "Metadata about a single kanji character.",
|
||||||
|
"minItems": 3,
|
||||||
|
"items": [
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"minLength": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["freq"],
|
||||||
|
"description": "Type of data. \"freq\" corresponds to frequency information."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": ["string", "number"],
|
||||||
|
"description": "Data for the character."
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
32
ext/bg/data/dictionary-tag-bank-v3-schema.json
Normal file
32
ext/bg/data/dictionary-tag-bank-v3-schema.json
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
{
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||||
|
"type": "array",
|
||||||
|
"description": "Data file containing tag information for terms and kanji.",
|
||||||
|
"additionalItems": {
|
||||||
|
"type": "array",
|
||||||
|
"description": "Information about a single tag.",
|
||||||
|
"minItems": 5,
|
||||||
|
"items": [
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"description": "Tag name."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"description": "Category for the tag."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "number",
|
||||||
|
"description": "Sorting order for the tag."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"description": "Notes for the tag."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "number",
|
||||||
|
"description": "Score used to determine popularity. Negative values are more rare and positive values are more frequent. This score is also used to sort search results."
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
36
ext/bg/data/dictionary-term-bank-v1-schema.json
Normal file
36
ext/bg/data/dictionary-term-bank-v1-schema.json
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
{
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||||
|
"type": "array",
|
||||||
|
"description": "Data file containing term and expression information.",
|
||||||
|
"additionalItems": {
|
||||||
|
"type": "array",
|
||||||
|
"description": "Information about a single term/expression.",
|
||||||
|
"minItems": 5,
|
||||||
|
"items": [
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"description": "Term or expression."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"description": "Reading of the term/expression, or an empty string if the reading is the same as the term/expression."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": ["string", "null"],
|
||||||
|
"description": "String of space-separated tags for the definition. An empty string is treated as no tags."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"description": "String of space-separated rule identifiers for the definition which is used to validate delinflection. Valid rule identifiers are: v1: ichidan verb; v5: godan verb; vs: suru verb; vk: kuru verb; adj-i: i-adjective. An empty string corresponds to words which aren't inflected, such as nouns."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "number",
|
||||||
|
"description": "Score used to determine popularity. Negative values are more rare and positive values are more frequent. This score is also used to sort search results."
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"additionalItems": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Single definition for the term/expression."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
48
ext/bg/data/dictionary-term-bank-v3-schema.json
Normal file
48
ext/bg/data/dictionary-term-bank-v3-schema.json
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
{
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||||
|
"type": "array",
|
||||||
|
"description": "Data file containing term and expression information.",
|
||||||
|
"additionalItems": {
|
||||||
|
"type": "array",
|
||||||
|
"description": "Information about a single term/expression.",
|
||||||
|
"minItems": 8,
|
||||||
|
"items": [
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"description": "Term or expression."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"description": "Reading of the term/expression, or an empty string if the reading is the same as the term/expression."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": ["string", "null"],
|
||||||
|
"description": "String of space-separated tags for the definition. An empty string is treated as no tags."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"description": "String of space-separated rule identifiers for the definition which is used to validate delinflection. Valid rule identifiers are: v1: ichidan verb; v5: godan verb; vs: suru verb; vk: kuru verb; adj-i: i-adjective. An empty string corresponds to words which aren't inflected, such as nouns."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "number",
|
||||||
|
"description": "Score used to determine popularity. Negative values are more rare and positive values are more frequent. This score is also used to sort search results."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "array",
|
||||||
|
"description": "Array of definitions for the term/expression.",
|
||||||
|
"items": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Single definition for the term/expression."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "integer",
|
||||||
|
"description": "Sequence number for the term/expression. Terms/expressions with the same sequence number can be shown together when the \"resultOutputMode\" option is set to \"merge\"."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"description": "String of space-separated tags for the term/expression. An empty string is treated as no tags."
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
25
ext/bg/data/dictionary-term-meta-bank-v3-schema.json
Normal file
25
ext/bg/data/dictionary-term-meta-bank-v3-schema.json
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
{
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||||
|
"type": "array",
|
||||||
|
"description": "Custom metadata for terms/expressions.",
|
||||||
|
"additionalItems": {
|
||||||
|
"type": "array",
|
||||||
|
"description": "Metadata about a single term/expression.",
|
||||||
|
"minItems": 3,
|
||||||
|
"items": [
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"description": "Term or expression."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["freq"],
|
||||||
|
"description": "Type of data. \"freq\" corresponds to frequency information."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": ["string", "number"],
|
||||||
|
"description": "Data for the term/expression."
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
90
test/dictionary-validate.js
Normal file
90
test/dictionary-validate.js
Normal file
@ -0,0 +1,90 @@
|
|||||||
|
const fs = require('fs');
|
||||||
|
const path = require('path');
|
||||||
|
|
||||||
|
process.noDeprecation = true; // Suppress a warning about JSZip
|
||||||
|
const JSZip = require(path.join(__dirname, '../ext/mixed/lib/jszip.min.js'));
|
||||||
|
process.noDeprecation = false;
|
||||||
|
|
||||||
|
const jsonSchemaFileName = path.join(__dirname, '../ext/bg/js/json-schema.js');
|
||||||
|
const jsonSchemaFileSource = fs.readFileSync(jsonSchemaFileName, {encoding: 'utf8'});
|
||||||
|
const JsonSchema = Function(`'use strict';${jsonSchemaFileSource};return JsonSchema;`)();
|
||||||
|
|
||||||
|
|
||||||
|
function readSchema(relativeFileName) {
|
||||||
|
const fileName = path.join(__dirname, relativeFileName);
|
||||||
|
const source = fs.readFileSync(fileName, {encoding: 'utf8'});
|
||||||
|
return JSON.parse(source);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async function validateDictionaryBanks(zip, fileNameFormat, schema) {
|
||||||
|
let index = 1;
|
||||||
|
while (true) {
|
||||||
|
const fileName = fileNameFormat.replace(/%s/, index);
|
||||||
|
|
||||||
|
const file = zip.files[fileName];
|
||||||
|
if (!file) { break; }
|
||||||
|
|
||||||
|
const data = JSON.parse(await file.async('string'));
|
||||||
|
JsonSchema.validate(data, schema);
|
||||||
|
|
||||||
|
++index;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function validateDictionary(fileName, schemas) {
|
||||||
|
const source = fs.readFileSync(fileName);
|
||||||
|
const zip = await JSZip.loadAsync(source);
|
||||||
|
|
||||||
|
const indexFile = zip.files['index.json'];
|
||||||
|
if (!indexFile) {
|
||||||
|
throw new Error('No dictionary index found in archive');
|
||||||
|
}
|
||||||
|
|
||||||
|
const index = JSON.parse(await indexFile.async('string'));
|
||||||
|
const version = index.format || index.version;
|
||||||
|
|
||||||
|
JsonSchema.validate(index, schemas.index);
|
||||||
|
|
||||||
|
await validateDictionaryBanks(zip, 'term_bank_%s.json', version === 1 ? schemas.termBankV1 : schemas.termBankV3);
|
||||||
|
await validateDictionaryBanks(zip, 'term_meta_bank_%s.json', schemas.termMetaBankV3);
|
||||||
|
await validateDictionaryBanks(zip, 'kanji_bank_%s.json', version === 1 ? schemas.kanjiBankV1 : schemas.kanjiBankV3);
|
||||||
|
await validateDictionaryBanks(zip, 'kanji_meta_bank_%s.json', schemas.kanjiMetaBankV3);
|
||||||
|
await validateDictionaryBanks(zip, 'tag_bank_%s.json', schemas.tagBankV3);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const dictionaryFileNames = process.argv.slice(2);
|
||||||
|
if (dictionaryFileNames.length === 0) {
|
||||||
|
console.log([
|
||||||
|
'Usage:',
|
||||||
|
' node dictionary-validate <dictionary-file-names>...'
|
||||||
|
].join('\n'));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const schemas = {
|
||||||
|
index: readSchema('../ext/bg/data/dictionary-index-schema.json'),
|
||||||
|
kanjiBankV1: readSchema('../ext/bg/data/dictionary-kanji-bank-v1-schema.json'),
|
||||||
|
kanjiBankV3: readSchema('../ext/bg/data/dictionary-kanji-bank-v3-schema.json'),
|
||||||
|
kanjiMetaBankV3: readSchema('../ext/bg/data/dictionary-kanji-meta-bank-v3-schema.json'),
|
||||||
|
tagBankV3: readSchema('../ext/bg/data/dictionary-tag-bank-v3-schema.json'),
|
||||||
|
termBankV1: readSchema('../ext/bg/data/dictionary-term-bank-v1-schema.json'),
|
||||||
|
termBankV3: readSchema('../ext/bg/data/dictionary-term-bank-v3-schema.json'),
|
||||||
|
termMetaBankV3: readSchema('../ext/bg/data/dictionary-term-meta-bank-v3-schema.json')
|
||||||
|
};
|
||||||
|
|
||||||
|
for (const dictionaryFileName of dictionaryFileNames) {
|
||||||
|
try {
|
||||||
|
console.log(`Validating ${dictionaryFileName}...`);
|
||||||
|
await validateDictionary(dictionaryFileName, schemas);
|
||||||
|
console.log('No issues found');
|
||||||
|
} catch (e) {
|
||||||
|
console.warn(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
main();
|
Loading…
Reference in New Issue
Block a user