Devoice and nasal pronunciation info (#1832)
* Update schema to support information about nasal and devoiced mora * Expose nasalPositions and devoicePositions in dictionary entry data * Expose nasalPositions, devoicePositions in grouped pitch info * Update display generator * Update test dictionary data * Update test data
This commit is contained in:
parent
25d74140ce
commit
41fc76d6fd
@ -354,6 +354,14 @@ namespace Translation {
|
||||
* Position of the downstep, as a number of mora.
|
||||
*/
|
||||
position: number;
|
||||
/**
|
||||
* Positions of moras that have a .
|
||||
*/
|
||||
nasalPositions: number[];
|
||||
/**
|
||||
* Position of the downstep, as a number of mora.
|
||||
*/
|
||||
devoicePositions: [];
|
||||
/**
|
||||
* Tags for the pitch accent.
|
||||
*/
|
||||
|
@ -1512,6 +1512,31 @@ button.definition-item-expansion-button:focus:focus-visible+.definition-item-con
|
||||
padding-right: 0.1em;
|
||||
margin-right: 0.1em;
|
||||
}
|
||||
.pitch-accent-character-devoice-indicator {
|
||||
display: block;
|
||||
position: absolute;
|
||||
left: 50%;
|
||||
top: 50%;
|
||||
width: 1.125em;
|
||||
height: 1.125em;
|
||||
border: calc(1.5em / var(--font-size-no-units)) dotted var(--danger-color);
|
||||
border-radius: 50%;
|
||||
box-sizing: border-box;
|
||||
z-index: 1;
|
||||
transform: translate(-50%, -50%);
|
||||
}
|
||||
.pitch-accent-character-nasal-indicator {
|
||||
display: block;
|
||||
position: absolute;
|
||||
right: -0.125em;
|
||||
top: 0.125em;
|
||||
width: 0.375em;
|
||||
height: 0.375em;
|
||||
border: calc(1.5em / var(--font-size-no-units)) solid var(--danger-color);
|
||||
border-radius: 50%;
|
||||
box-sizing: border-box;
|
||||
z-index: 1;
|
||||
}
|
||||
.pitch-accent-position::before {
|
||||
content: ' [';
|
||||
}
|
||||
|
@ -85,6 +85,40 @@
|
||||
"description": "Mora position of the pitch accent downstep. A value of 0 indicates that the word does not have a downstep (heiban).",
|
||||
"minimum": 0
|
||||
},
|
||||
"nasal": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "integer",
|
||||
"description": "Position of a mora with nasal sound.",
|
||||
"minimum": 0
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"description": "Positions of morae with nasal sound.",
|
||||
"items": {
|
||||
"type": "integer",
|
||||
"minimum": 0
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"devoice": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "integer",
|
||||
"description": "Position of a mora with devoiced sound.",
|
||||
"minimum": 0
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"description": "Positions of morae with devoiced sound.",
|
||||
"items": {
|
||||
"type": "integer",
|
||||
"minimum": 0
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"tags": {
|
||||
"type": "array",
|
||||
"description": "List of tags for this pitch accent.",
|
||||
|
@ -468,12 +468,17 @@ class DisplayGenerator {
|
||||
|
||||
_createPitch(details) {
|
||||
const jp = this._japaneseUtil;
|
||||
const {reading, position, tags, exclusiveTerms, exclusiveReadings} = details;
|
||||
const {reading, position, nasalPositions, devoicePositions, tags, exclusiveTerms, exclusiveReadings} = details;
|
||||
const morae = jp.getKanaMorae(reading);
|
||||
|
||||
const nasalPositionsSet = nasalPositions.length > 0 ? new Set(nasalPositions) : null;
|
||||
const devoicePositionsSet = devoicePositions.length > 0 ? new Set(devoicePositions) : null;
|
||||
|
||||
const node = this._templates.instantiate('pitch-accent');
|
||||
|
||||
node.dataset.pitchAccentPosition = `${position}`;
|
||||
if (nasalPositions.length > 0) { node.dataset.nasalMoraPosition = nasalPositions.join(' '); }
|
||||
if (devoicePositions.length > 0) { node.dataset.devoiceMoraPosition = devoicePositions.join(' '); }
|
||||
node.dataset.tagCount = `${tags.length}`;
|
||||
|
||||
let n = node.querySelector('.pitch-accent-position');
|
||||
@ -487,18 +492,39 @@ class DisplayGenerator {
|
||||
|
||||
n = node.querySelector('.pitch-accent-characters');
|
||||
for (let i = 0, ii = morae.length; i < ii; ++i) {
|
||||
const i1 = i + 1;
|
||||
const mora = morae[i];
|
||||
const highPitch = jp.isMoraPitchHigh(i, position);
|
||||
const highPitchNext = jp.isMoraPitchHigh(i + 1, position);
|
||||
const highPitchNext = jp.isMoraPitchHigh(i1, position);
|
||||
const nasal = nasalPositionsSet !== null && nasalPositionsSet.has(i1);
|
||||
const devoice = devoicePositionsSet !== null && devoicePositionsSet.has(i1);
|
||||
|
||||
const n1 = this._templates.instantiate('pitch-accent-character');
|
||||
const n2 = n1.querySelector('.pitch-accent-character-inner');
|
||||
const n1 = document.createElement('span');
|
||||
n1.className = 'pitch-accent-character';
|
||||
|
||||
const n2 = document.createElement('span');
|
||||
n2.className = 'pitch-accent-character-inner';
|
||||
|
||||
n1.appendChild(n2);
|
||||
|
||||
n1.dataset.position = `${i}`;
|
||||
n1.dataset.pitch = highPitch ? 'high' : 'low';
|
||||
n1.dataset.pitchNext = highPitchNext ? 'high' : 'low';
|
||||
this._setTextContent(n2, mora, 'ja');
|
||||
|
||||
if (devoice) {
|
||||
n1.dataset.devoice = 'true';
|
||||
const n3 = document.createElement('span');
|
||||
n3.className = 'pitch-accent-character-devoice-indicator';
|
||||
n1.appendChild(n3);
|
||||
}
|
||||
if (nasal) {
|
||||
n1.dataset.nasal = 'true';
|
||||
const n3 = document.createElement('span');
|
||||
n3.className = 'pitch-accent-character-nasal-indicator';
|
||||
n1.appendChild(n3);
|
||||
}
|
||||
|
||||
n.appendChild(n1);
|
||||
}
|
||||
|
||||
|
@ -108,13 +108,15 @@ class DictionaryDataUtil {
|
||||
dictionaryPitchAccentInfoList = [];
|
||||
pitchAccentInfoMap.set(dictionary, dictionaryPitchAccentInfoList);
|
||||
}
|
||||
for (const {position, tags} of pitches) {
|
||||
let pitchAccentInfo = this._findExistingPitchAccentInfo(reading, position, tags, dictionaryPitchAccentInfoList);
|
||||
for (const {position, nasalPositions, devoicePositions, tags} of pitches) {
|
||||
let pitchAccentInfo = this._findExistingPitchAccentInfo(reading, position, nasalPositions, devoicePositions, tags, dictionaryPitchAccentInfoList);
|
||||
if (pitchAccentInfo === null) {
|
||||
pitchAccentInfo = {
|
||||
terms: new Set(),
|
||||
reading,
|
||||
position,
|
||||
nasalPositions,
|
||||
devoicePositions,
|
||||
tags,
|
||||
exclusiveTerms: [],
|
||||
exclusiveReadings: []
|
||||
@ -228,11 +230,13 @@ class DictionaryDataUtil {
|
||||
return results;
|
||||
}
|
||||
|
||||
static _findExistingPitchAccentInfo(reading, position, tags, pitchAccentInfoList) {
|
||||
static _findExistingPitchAccentInfo(reading, position, nasalPositions, devoicePositions, tags, pitchAccentInfoList) {
|
||||
for (const pitchInfo of pitchAccentInfoList) {
|
||||
if (
|
||||
pitchInfo.reading === reading &&
|
||||
pitchInfo.position === position &&
|
||||
this._areArraysEqual(pitchInfo.nasalPositions, nasalPositions) &&
|
||||
this._areArraysEqual(pitchInfo.devoicePositions, devoicePositions) &&
|
||||
this._areTagListsEqual(pitchInfo.tags, tags)
|
||||
) {
|
||||
return pitchInfo;
|
||||
@ -241,6 +245,15 @@ class DictionaryDataUtil {
|
||||
return null;
|
||||
}
|
||||
|
||||
static _areArraysEqual(array1, array2) {
|
||||
const ii = array1.length;
|
||||
if (ii !== array2.length) { return false; }
|
||||
for (let i = 0; i < ii; ++i) {
|
||||
if (array1[i] !== array2[i]) { return false; }
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static _areTagListsEqual(tagList1, tagList2) {
|
||||
const ii = tagList1.length;
|
||||
if (tagList2.length !== ii) { return false; }
|
||||
|
@ -828,12 +828,14 @@ class Translator {
|
||||
{
|
||||
if (data.reading !== reading) { continue; }
|
||||
const pitches = [];
|
||||
for (const {position, tags} of data.pitches) {
|
||||
for (const {position, tags, nasal, devoice} of data.pitches) {
|
||||
const tags2 = [];
|
||||
if (Array.isArray(tags) && tags.length > 0) {
|
||||
tags2.push(this._createTagGroup(dictionary, tags));
|
||||
}
|
||||
pitches.push({position, tags: tags2});
|
||||
const nasalPositions = this._toNumberArray(nasal);
|
||||
const devoicePositions = this._toNumberArray(devoice);
|
||||
pitches.push({position, nasalPositions, devoicePositions, tags: tags2});
|
||||
}
|
||||
for (const {pronunciations, headwordIndex} of targets) {
|
||||
pronunciations.push(this._createTermPronunciation(
|
||||
@ -968,6 +970,10 @@ class Translator {
|
||||
return JSON.stringify(array);
|
||||
}
|
||||
|
||||
_toNumberArray(value) {
|
||||
return Array.isArray(value) ? value : (typeof value === 'number' ? [value] : []);
|
||||
}
|
||||
|
||||
// Kanji data
|
||||
|
||||
_createKanjiStat(name, value, databaseInfo, dictionary) {
|
||||
|
@ -41,5 +41,35 @@
|
||||
{"position": 0, "tags": ["P2"]}
|
||||
]
|
||||
}
|
||||
],
|
||||
[
|
||||
"番号",
|
||||
"pitch",
|
||||
{
|
||||
"reading": "ばんごう",
|
||||
"pitches": [
|
||||
{"position": 3, "nasal": 3}
|
||||
]
|
||||
}
|
||||
],
|
||||
[
|
||||
"中腰",
|
||||
"pitch",
|
||||
{
|
||||
"reading": "ちゅうごし",
|
||||
"pitches": [
|
||||
{"position": 0, "nasal": 3}
|
||||
]
|
||||
}
|
||||
],
|
||||
[
|
||||
"土木工事",
|
||||
"pitch",
|
||||
{
|
||||
"reading": "どぼくこうじ",
|
||||
"pitches": [
|
||||
{"position": 4, "devoice": 3}
|
||||
]
|
||||
}
|
||||
]
|
||||
]
|
@ -1235,10 +1235,14 @@
|
||||
"pitches": [
|
||||
{
|
||||
"position": 0,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
},
|
||||
{
|
||||
"position": 3,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
}
|
||||
]
|
||||
@ -1366,10 +1370,14 @@
|
||||
"pitches": [
|
||||
{
|
||||
"position": 0,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
},
|
||||
{
|
||||
"position": 3,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
}
|
||||
]
|
||||
@ -1497,10 +1505,14 @@
|
||||
"pitches": [
|
||||
{
|
||||
"position": 0,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
},
|
||||
{
|
||||
"position": 3,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
}
|
||||
]
|
||||
@ -1628,10 +1640,14 @@
|
||||
"pitches": [
|
||||
{
|
||||
"position": 0,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
},
|
||||
{
|
||||
"position": 3,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
}
|
||||
]
|
||||
@ -3236,10 +3252,14 @@
|
||||
"pitches": [
|
||||
{
|
||||
"position": 0,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
},
|
||||
{
|
||||
"position": 3,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
}
|
||||
]
|
||||
@ -3367,10 +3387,14 @@
|
||||
"pitches": [
|
||||
{
|
||||
"position": 0,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
},
|
||||
{
|
||||
"position": 3,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
}
|
||||
]
|
||||
@ -3734,10 +3758,14 @@
|
||||
"pitches": [
|
||||
{
|
||||
"position": 0,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
},
|
||||
{
|
||||
"position": 3,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
}
|
||||
]
|
||||
@ -3865,10 +3893,14 @@
|
||||
"pitches": [
|
||||
{
|
||||
"position": 0,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
},
|
||||
{
|
||||
"position": 3,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
}
|
||||
]
|
||||
@ -4942,10 +4974,14 @@
|
||||
"pitches": [
|
||||
{
|
||||
"position": 0,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
},
|
||||
{
|
||||
"position": 3,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
}
|
||||
]
|
||||
@ -5120,10 +5156,14 @@
|
||||
"pitches": [
|
||||
{
|
||||
"position": 0,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
},
|
||||
{
|
||||
"position": 3,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
}
|
||||
]
|
||||
@ -5966,10 +6006,14 @@
|
||||
"pitches": [
|
||||
{
|
||||
"position": 0,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
},
|
||||
{
|
||||
"position": 3,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
}
|
||||
]
|
||||
@ -5983,10 +6027,14 @@
|
||||
"pitches": [
|
||||
{
|
||||
"position": 0,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
},
|
||||
{
|
||||
"position": 3,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
}
|
||||
]
|
||||
@ -6660,10 +6708,14 @@
|
||||
"pitches": [
|
||||
{
|
||||
"position": 0,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
},
|
||||
{
|
||||
"position": 3,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
}
|
||||
]
|
||||
@ -6795,10 +6847,14 @@
|
||||
"pitches": [
|
||||
{
|
||||
"position": 0,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
},
|
||||
{
|
||||
"position": 3,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
}
|
||||
]
|
||||
@ -6930,10 +6986,14 @@
|
||||
"pitches": [
|
||||
{
|
||||
"position": 0,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
},
|
||||
{
|
||||
"position": 3,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
}
|
||||
]
|
||||
@ -7065,10 +7125,14 @@
|
||||
"pitches": [
|
||||
{
|
||||
"position": 0,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
},
|
||||
{
|
||||
"position": 3,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
}
|
||||
]
|
||||
@ -7875,10 +7939,14 @@
|
||||
"pitches": [
|
||||
{
|
||||
"position": 0,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
},
|
||||
{
|
||||
"position": 3,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
}
|
||||
]
|
||||
@ -8006,10 +8074,14 @@
|
||||
"pitches": [
|
||||
{
|
||||
"position": 0,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
},
|
||||
{
|
||||
"position": 3,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
}
|
||||
]
|
||||
@ -8137,10 +8209,14 @@
|
||||
"pitches": [
|
||||
{
|
||||
"position": 0,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
},
|
||||
{
|
||||
"position": 3,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
}
|
||||
]
|
||||
@ -8268,10 +8344,14 @@
|
||||
"pitches": [
|
||||
{
|
||||
"position": 0,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
},
|
||||
{
|
||||
"position": 3,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
}
|
||||
]
|
||||
@ -9078,10 +9158,14 @@
|
||||
"pitches": [
|
||||
{
|
||||
"position": 0,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
},
|
||||
{
|
||||
"position": 3,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
}
|
||||
]
|
||||
@ -9209,10 +9293,14 @@
|
||||
"pitches": [
|
||||
{
|
||||
"position": 0,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
},
|
||||
{
|
||||
"position": 3,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
}
|
||||
]
|
||||
@ -9340,10 +9428,14 @@
|
||||
"pitches": [
|
||||
{
|
||||
"position": 0,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
},
|
||||
{
|
||||
"position": 3,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
}
|
||||
]
|
||||
@ -9471,10 +9563,14 @@
|
||||
"pitches": [
|
||||
{
|
||||
"position": 0,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
},
|
||||
{
|
||||
"position": 3,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
}
|
||||
]
|
||||
@ -10754,10 +10850,14 @@
|
||||
"pitches": [
|
||||
{
|
||||
"position": 0,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
},
|
||||
{
|
||||
"position": 3,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
}
|
||||
]
|
||||
@ -10771,10 +10871,14 @@
|
||||
"pitches": [
|
||||
{
|
||||
"position": 0,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
},
|
||||
{
|
||||
"position": 3,
|
||||
"nasalPositions": [],
|
||||
"devoicePositions": [],
|
||||
"tags": []
|
||||
}
|
||||
]
|
||||
|
@ -162,8 +162,8 @@ async function testDatabase1() {
|
||||
true
|
||||
);
|
||||
vm.assert.deepStrictEqual(counts, {
|
||||
counts: [{kanji: 2, kanjiMeta: 2, terms: 15, termMeta: 12, tagMeta: 15, media: 2}],
|
||||
total: {kanji: 2, kanjiMeta: 2, terms: 15, termMeta: 12, tagMeta: 15, media: 2}
|
||||
counts: [{kanji: 2, kanjiMeta: 2, terms: 15, termMeta: 15, tagMeta: 15, media: 2}],
|
||||
total: {kanji: 2, kanjiMeta: 2, terms: 15, termMeta: 15, tagMeta: 15, media: 2}
|
||||
});
|
||||
|
||||
// Test find* functions
|
||||
|
Loading…
Reference in New Issue
Block a user