Get categorization of pitch accents (#1462)

This commit is contained in:
toasted-nutbread 2021-02-28 13:26:34 -05:00 committed by GitHub
parent fce2c51709
commit 445f87ebdb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 46 additions and 1 deletions

View File

@ -229,7 +229,7 @@ class DisplayGenerator {
// Private // Private
_createTermExpression(details) { _createTermExpression(details) {
const {termFrequency, furiganaSegments, expression, reading, termTags} = details; const {termFrequency, furiganaSegments, expression, reading, termTags, pitches} = details;
const searchQueries = []; const searchQueries = [];
if (expression) { searchQueries.push(expression); } if (expression) { searchQueries.push(expression); }
@ -243,6 +243,11 @@ class DisplayGenerator {
node.dataset.readingIsSame = `${!reading || reading === expression}`; node.dataset.readingIsSame = `${!reading || reading === expression}`;
node.dataset.frequency = termFrequency; node.dataset.frequency = termFrequency;
const pitchAccentCategories = this._getPitchAccentCategories(pitches);
if (pitchAccentCategories !== null) {
node.dataset.pitchAccentCategories = pitchAccentCategories;
}
this._setTextContent(node.querySelector('.expression-reading'), reading.length > 0 ? reading : expression); this._setTextContent(node.querySelector('.expression-reading'), reading.length > 0 ? reading : expression);
this._appendFurigana(expressionContainer, furiganaSegments, this._appendKanjiLinks.bind(this)); this._appendFurigana(expressionContainer, furiganaSegments, this._appendKanjiLinks.bind(this));
@ -716,4 +721,18 @@ class DisplayGenerator {
node.lang = 'ja'; node.lang = 'ja';
} }
} }
_getPitchAccentCategories(pitches) {
if (pitches.length === 0) { return null; }
const categories = [];
for (const {reading, pitches: pitches2} of pitches) {
for (const {position} of pitches2) {
const category = this._japaneseUtil.getPitchCategory(reading, position, false);
if (category !== null) {
categories.push(category);
}
}
}
return categories.length > 0 ? categories.join(' ') : null;
}
} }

View File

@ -232,6 +232,22 @@ const JapaneseUtil = (() => {
} }
} }
getPitchCategory(text, pitchAccentPosition, isVerbOrAdjective) {
if (pitchAccentPosition === 0) {
return 'heiban';
}
if (isVerbOrAdjective) {
return pitchAccentPosition > 0 ? 'kifuku' : null;
}
if (pitchAccentPosition === 1) {
return 'atamadaka';
}
if (pitchAccentPosition > 1) {
return pitchAccentPosition >= this.getKanaMoraCount(text) ? 'odaka' : 'nakadaka';
}
return null;
}
getKanaMorae(text) { getKanaMorae(text) {
const morae = []; const morae = [];
let i; let i;
@ -245,6 +261,16 @@ const JapaneseUtil = (() => {
return morae; return morae;
} }
getKanaMoraCount(text) {
let moraCount = 0;
for (const c of text) {
if (!(SMALL_KANA_SET.has(c) && moraCount > 0)) {
++moraCount;
}
}
return moraCount;
}
// Conversion functions // Conversion functions
convertToKana(text) { convertToKana(text) {