Devoice and nasal pronunciation info (#1832)

* Update schema to support information about nasal and devoiced mora

* Expose nasalPositions and devoicePositions in dictionary entry data

* Expose nasalPositions, devoicePositions in grouped pitch info

* Update display generator

* Update test dictionary data

* Update test data
This commit is contained in:
toasted-nutbread 2021-07-15 22:39:33 -04:00 committed by GitHub
parent 25d74140ce
commit 41fc76d6fd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 257 additions and 11 deletions

View File

@ -354,6 +354,14 @@ namespace Translation {
* Position of the downstep, as a number of mora.
*/
position: number;
/**
* Positions of moras that have a .
*/
nasalPositions: number[];
/**
* Position of the downstep, as a number of mora.
*/
devoicePositions: [];
/**
* Tags for the pitch accent.
*/

View File

@ -1512,6 +1512,31 @@ button.definition-item-expansion-button:focus:focus-visible+.definition-item-con
padding-right: 0.1em;
margin-right: 0.1em;
}
.pitch-accent-character-devoice-indicator {
display: block;
position: absolute;
left: 50%;
top: 50%;
width: 1.125em;
height: 1.125em;
border: calc(1.5em / var(--font-size-no-units)) dotted var(--danger-color);
border-radius: 50%;
box-sizing: border-box;
z-index: 1;
transform: translate(-50%, -50%);
}
.pitch-accent-character-nasal-indicator {
display: block;
position: absolute;
right: -0.125em;
top: 0.125em;
width: 0.375em;
height: 0.375em;
border: calc(1.5em / var(--font-size-no-units)) solid var(--danger-color);
border-radius: 50%;
box-sizing: border-box;
z-index: 1;
}
.pitch-accent-position::before {
content: ' [';
}

View File

@ -85,6 +85,40 @@
"description": "Mora position of the pitch accent downstep. A value of 0 indicates that the word does not have a downstep (heiban).",
"minimum": 0
},
"nasal": {
"oneOf": [
{
"type": "integer",
"description": "Position of a mora with nasal sound.",
"minimum": 0
},
{
"type": "array",
"description": "Positions of morae with nasal sound.",
"items": {
"type": "integer",
"minimum": 0
}
}
]
},
"devoice": {
"oneOf": [
{
"type": "integer",
"description": "Position of a mora with devoiced sound.",
"minimum": 0
},
{
"type": "array",
"description": "Positions of morae with devoiced sound.",
"items": {
"type": "integer",
"minimum": 0
}
}
]
},
"tags": {
"type": "array",
"description": "List of tags for this pitch accent.",

View File

@ -468,12 +468,17 @@ class DisplayGenerator {
_createPitch(details) {
const jp = this._japaneseUtil;
const {reading, position, tags, exclusiveTerms, exclusiveReadings} = details;
const {reading, position, nasalPositions, devoicePositions, tags, exclusiveTerms, exclusiveReadings} = details;
const morae = jp.getKanaMorae(reading);
const nasalPositionsSet = nasalPositions.length > 0 ? new Set(nasalPositions) : null;
const devoicePositionsSet = devoicePositions.length > 0 ? new Set(devoicePositions) : null;
const node = this._templates.instantiate('pitch-accent');
node.dataset.pitchAccentPosition = `${position}`;
if (nasalPositions.length > 0) { node.dataset.nasalMoraPosition = nasalPositions.join(' '); }
if (devoicePositions.length > 0) { node.dataset.devoiceMoraPosition = devoicePositions.join(' '); }
node.dataset.tagCount = `${tags.length}`;
let n = node.querySelector('.pitch-accent-position');
@ -487,18 +492,39 @@ class DisplayGenerator {
n = node.querySelector('.pitch-accent-characters');
for (let i = 0, ii = morae.length; i < ii; ++i) {
const i1 = i + 1;
const mora = morae[i];
const highPitch = jp.isMoraPitchHigh(i, position);
const highPitchNext = jp.isMoraPitchHigh(i + 1, position);
const highPitchNext = jp.isMoraPitchHigh(i1, position);
const nasal = nasalPositionsSet !== null && nasalPositionsSet.has(i1);
const devoice = devoicePositionsSet !== null && devoicePositionsSet.has(i1);
const n1 = this._templates.instantiate('pitch-accent-character');
const n2 = n1.querySelector('.pitch-accent-character-inner');
const n1 = document.createElement('span');
n1.className = 'pitch-accent-character';
const n2 = document.createElement('span');
n2.className = 'pitch-accent-character-inner';
n1.appendChild(n2);
n1.dataset.position = `${i}`;
n1.dataset.pitch = highPitch ? 'high' : 'low';
n1.dataset.pitchNext = highPitchNext ? 'high' : 'low';
this._setTextContent(n2, mora, 'ja');
if (devoice) {
n1.dataset.devoice = 'true';
const n3 = document.createElement('span');
n3.className = 'pitch-accent-character-devoice-indicator';
n1.appendChild(n3);
}
if (nasal) {
n1.dataset.nasal = 'true';
const n3 = document.createElement('span');
n3.className = 'pitch-accent-character-nasal-indicator';
n1.appendChild(n3);
}
n.appendChild(n1);
}

View File

@ -108,13 +108,15 @@ class DictionaryDataUtil {
dictionaryPitchAccentInfoList = [];
pitchAccentInfoMap.set(dictionary, dictionaryPitchAccentInfoList);
}
for (const {position, tags} of pitches) {
let pitchAccentInfo = this._findExistingPitchAccentInfo(reading, position, tags, dictionaryPitchAccentInfoList);
for (const {position, nasalPositions, devoicePositions, tags} of pitches) {
let pitchAccentInfo = this._findExistingPitchAccentInfo(reading, position, nasalPositions, devoicePositions, tags, dictionaryPitchAccentInfoList);
if (pitchAccentInfo === null) {
pitchAccentInfo = {
terms: new Set(),
reading,
position,
nasalPositions,
devoicePositions,
tags,
exclusiveTerms: [],
exclusiveReadings: []
@ -228,11 +230,13 @@ class DictionaryDataUtil {
return results;
}
static _findExistingPitchAccentInfo(reading, position, tags, pitchAccentInfoList) {
static _findExistingPitchAccentInfo(reading, position, nasalPositions, devoicePositions, tags, pitchAccentInfoList) {
for (const pitchInfo of pitchAccentInfoList) {
if (
pitchInfo.reading === reading &&
pitchInfo.position === position &&
this._areArraysEqual(pitchInfo.nasalPositions, nasalPositions) &&
this._areArraysEqual(pitchInfo.devoicePositions, devoicePositions) &&
this._areTagListsEqual(pitchInfo.tags, tags)
) {
return pitchInfo;
@ -241,6 +245,15 @@ class DictionaryDataUtil {
return null;
}
static _areArraysEqual(array1, array2) {
const ii = array1.length;
if (ii !== array2.length) { return false; }
for (let i = 0; i < ii; ++i) {
if (array1[i] !== array2[i]) { return false; }
}
return true;
}
static _areTagListsEqual(tagList1, tagList2) {
const ii = tagList1.length;
if (tagList2.length !== ii) { return false; }

View File

@ -828,12 +828,14 @@ class Translator {
{
if (data.reading !== reading) { continue; }
const pitches = [];
for (const {position, tags} of data.pitches) {
for (const {position, tags, nasal, devoice} of data.pitches) {
const tags2 = [];
if (Array.isArray(tags) && tags.length > 0) {
tags2.push(this._createTagGroup(dictionary, tags));
}
pitches.push({position, tags: tags2});
const nasalPositions = this._toNumberArray(nasal);
const devoicePositions = this._toNumberArray(devoice);
pitches.push({position, nasalPositions, devoicePositions, tags: tags2});
}
for (const {pronunciations, headwordIndex} of targets) {
pronunciations.push(this._createTermPronunciation(
@ -968,6 +970,10 @@ class Translator {
return JSON.stringify(array);
}
_toNumberArray(value) {
return Array.isArray(value) ? value : (typeof value === 'number' ? [value] : []);
}
// Kanji data
_createKanjiStat(name, value, databaseInfo, dictionary) {

View File

@ -41,5 +41,35 @@
{"position": 0, "tags": ["P2"]}
]
}
],
[
"番号",
"pitch",
{
"reading": "ばんごう",
"pitches": [
{"position": 3, "nasal": 3}
]
}
],
[
"中腰",
"pitch",
{
"reading": "ちゅうごし",
"pitches": [
{"position": 0, "nasal": 3}
]
}
],
[
"土木工事",
"pitch",
{
"reading": "どぼくこうじ",
"pitches": [
{"position": 4, "devoice": 3}
]
}
]
]

View File

@ -1235,10 +1235,14 @@
"pitches": [
{
"position": 0,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
},
{
"position": 3,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
}
]
@ -1366,10 +1370,14 @@
"pitches": [
{
"position": 0,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
},
{
"position": 3,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
}
]
@ -1497,10 +1505,14 @@
"pitches": [
{
"position": 0,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
},
{
"position": 3,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
}
]
@ -1628,10 +1640,14 @@
"pitches": [
{
"position": 0,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
},
{
"position": 3,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
}
]
@ -3236,10 +3252,14 @@
"pitches": [
{
"position": 0,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
},
{
"position": 3,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
}
]
@ -3367,10 +3387,14 @@
"pitches": [
{
"position": 0,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
},
{
"position": 3,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
}
]
@ -3734,10 +3758,14 @@
"pitches": [
{
"position": 0,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
},
{
"position": 3,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
}
]
@ -3865,10 +3893,14 @@
"pitches": [
{
"position": 0,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
},
{
"position": 3,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
}
]
@ -4942,10 +4974,14 @@
"pitches": [
{
"position": 0,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
},
{
"position": 3,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
}
]
@ -5120,10 +5156,14 @@
"pitches": [
{
"position": 0,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
},
{
"position": 3,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
}
]
@ -5966,10 +6006,14 @@
"pitches": [
{
"position": 0,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
},
{
"position": 3,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
}
]
@ -5983,10 +6027,14 @@
"pitches": [
{
"position": 0,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
},
{
"position": 3,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
}
]
@ -6660,10 +6708,14 @@
"pitches": [
{
"position": 0,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
},
{
"position": 3,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
}
]
@ -6795,10 +6847,14 @@
"pitches": [
{
"position": 0,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
},
{
"position": 3,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
}
]
@ -6930,10 +6986,14 @@
"pitches": [
{
"position": 0,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
},
{
"position": 3,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
}
]
@ -7065,10 +7125,14 @@
"pitches": [
{
"position": 0,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
},
{
"position": 3,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
}
]
@ -7875,10 +7939,14 @@
"pitches": [
{
"position": 0,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
},
{
"position": 3,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
}
]
@ -8006,10 +8074,14 @@
"pitches": [
{
"position": 0,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
},
{
"position": 3,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
}
]
@ -8137,10 +8209,14 @@
"pitches": [
{
"position": 0,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
},
{
"position": 3,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
}
]
@ -8268,10 +8344,14 @@
"pitches": [
{
"position": 0,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
},
{
"position": 3,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
}
]
@ -9078,10 +9158,14 @@
"pitches": [
{
"position": 0,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
},
{
"position": 3,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
}
]
@ -9209,10 +9293,14 @@
"pitches": [
{
"position": 0,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
},
{
"position": 3,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
}
]
@ -9340,10 +9428,14 @@
"pitches": [
{
"position": 0,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
},
{
"position": 3,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
}
]
@ -9471,10 +9563,14 @@
"pitches": [
{
"position": 0,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
},
{
"position": 3,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
}
]
@ -10754,10 +10850,14 @@
"pitches": [
{
"position": 0,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
},
{
"position": 3,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
}
]
@ -10771,10 +10871,14 @@
"pitches": [
{
"position": 0,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
},
{
"position": 3,
"nasalPositions": [],
"devoicePositions": [],
"tags": []
}
]

View File

@ -162,8 +162,8 @@ async function testDatabase1() {
true
);
vm.assert.deepStrictEqual(counts, {
counts: [{kanji: 2, kanjiMeta: 2, terms: 15, termMeta: 12, tagMeta: 15, media: 2}],
total: {kanji: 2, kanjiMeta: 2, terms: 15, termMeta: 12, tagMeta: 15, media: 2}
counts: [{kanji: 2, kanjiMeta: 2, terms: 15, termMeta: 15, tagMeta: 15, media: 2}],
total: {kanji: 2, kanjiMeta: 2, terms: 15, termMeta: 15, tagMeta: 15, media: 2}
});
// Test find* functions