Replace furigana with reading (#1614)

* Use "reading" instead of "furigana" for reading distribution

* Update tests
This commit is contained in:
toasted-nutbread 2021-04-13 20:32:24 -04:00 committed by GitHub
parent bbf47865ff
commit 609d4fe334
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 189 additions and 185 deletions

View File

@ -1066,9 +1066,9 @@ class Backend {
const {headwords: [{term, reading}]} = dictionaryEntries[0];
const source = text.substring(i, i + originalTextLength);
const textSegments = [];
for (const {text: text2, furigana} of jp.distributeFuriganaInflected(term, reading, source)) {
const reading2 = jp.convertReading(text2, furigana, readingMode);
textSegments.push({text: text2, reading: reading2});
for (const {text: text2, reading: reading2} of jp.distributeFuriganaInflected(term, reading, source)) {
const reading3 = jp.convertReading(text2, reading2, readingMode);
textSegments.push({text: text2, reading: reading3});
}
results.push(textSegments);
i += originalTextLength;
@ -1102,13 +1102,13 @@ class Backend {
for (const line of lines) {
for (const {term, reading, source} of line) {
const termParts = [];
for (const {text: text2, furigana} of jp.distributeFuriganaInflected(
for (const {text: text2, reading: reading2} of jp.distributeFuriganaInflected(
term.length > 0 ? term : source,
jp.convertKatakanaToHiragana(reading),
source
)) {
const reading2 = jp.convertReading(text2, furigana, readingMode);
termParts.push({text: text2, reading: reading2});
const reading3 = jp.convertReading(text2, reading2, readingMode);
termParts.push({text: text2, reading: reading3});
}
result.push(termParts);
}

View File

@ -596,6 +596,10 @@ class AnkiNoteDataCreator {
}
_getTermHeadwordFuriganaSegments(term, reading) {
return this._japaneseUtil.distributeFurigana(term, reading);
const result = [];
for (const {text, reading: reading2} of this._japaneseUtil.distributeFurigana(term, reading)) {
result.push({text, furigana: reading2});
}
return result;
}
}

View File

@ -715,13 +715,13 @@ class DisplayGenerator {
_appendFurigana(container, term, reading, addText) {
container.lang = 'ja';
const segments = this._japaneseUtil.distributeFurigana(term, reading);
for (const {text, furigana} of segments) {
if (furigana) {
for (const {text, reading: reading2} of segments) {
if (reading2) {
const ruby = document.createElement('ruby');
const rt = document.createElement('rt');
addText(ruby, text);
ruby.appendChild(rt);
rt.appendChild(document.createTextNode(furigana));
rt.appendChild(document.createTextNode(reading2));
container.appendChild(ruby);
} else {
addText(container, text);

View File

@ -507,7 +507,7 @@ const JapaneseUtil = (() => {
if (stemLength < source.length) {
const remainder = source.substring(stemLength);
const segmentCount = segments.length;
if (segmentCount > 0 && segments[segmentCount - 1].furigana.length === 0) {
if (segmentCount > 0 && segments[segmentCount - 1].reading.length === 0) {
// Append to the last segment if it has an empty reading
segments[segmentCount - 1].text += remainder;
} else {
@ -554,8 +554,8 @@ const JapaneseUtil = (() => {
// Private
_createFuriganaSegment(text, furigana) {
return {text, furigana};
_createFuriganaSegment(text, reading) {
return {text, reading};
}
_segmentizeFurigana(reading, readingNormalized, groups, groupsStart) {
@ -600,8 +600,8 @@ const JapaneseUtil = (() => {
// More than one way to segmentize the tail; mark as ambiguous
return null;
}
const furigana = reading.substring(0, i);
segments.unshift(this._createFuriganaSegment(text, furigana));
const segmentReading = reading.substring(0, i);
segments.unshift(this._createFuriganaSegment(text, segmentReading));
result = segments;
}
// There is only one way to segmentize the last non-kana group

View File

@ -177,11 +177,11 @@ class TemplateRenderer {
const segs = this._japaneseUtil.distributeFurigana(expression, reading);
let result = '';
for (const seg of segs) {
if (seg.furigana.length > 0) {
result += `<ruby>${seg.text}<rt>${seg.furigana}</rt></ruby>`;
for (const {text, reading: reading2} of segs) {
if (reading2.length > 0) {
result += `<ruby>${text}<rt>${reading2}</rt></ruby>`;
} else {
result += seg.text;
result += text;
}
}
@ -193,12 +193,12 @@ class TemplateRenderer {
const segs = this._japaneseUtil.distributeFurigana(expression, reading);
let result = '';
for (const seg of segs) {
if (seg.furigana.length > 0) {
for (const {text, reading: reading2} of segs) {
if (reading2.length > 0) {
if (result.length > 0) { result += ' '; }
result += `${seg.text}[${seg.furigana}]`;
result += `${text}[${reading2}]`;
} else {
result += seg.text;
result += text;
}
}

View File

@ -278,441 +278,441 @@ function testDistributeFurigana() {
[
['有り難う', 'ありがとう'],
[
{text: '有', furigana: 'あ'},
{text: 'り', furigana: ''},
{text: '難', furigana: 'がと'},
{text: 'う', furigana: ''}
{text: '有', reading: 'あ'},
{text: 'り', reading: ''},
{text: '難', reading: 'がと'},
{text: 'う', reading: ''}
]
],
[
['方々', 'かたがた'],
[
{text: '方々', furigana: 'かたがた'}
{text: '方々', reading: 'かたがた'}
]
],
[
['お祝い', 'おいわい'],
[
{text: 'お', furigana: ''},
{text: '祝', furigana: 'いわ'},
{text: 'い', furigana: ''}
{text: 'お', reading: ''},
{text: '祝', reading: 'いわ'},
{text: 'い', reading: ''}
]
],
[
['美味しい', 'おいしい'],
[
{text: '美味', furigana: 'おい'},
{text: 'しい', furigana: ''}
{text: '美味', reading: 'おい'},
{text: 'しい', reading: ''}
]
],
[
['食べ物', 'たべもの'],
[
{text: '食', furigana: 'た'},
{text: 'べ', furigana: ''},
{text: '物', furigana: 'もの'}
{text: '食', reading: 'た'},
{text: 'べ', reading: ''},
{text: '物', reading: 'もの'}
]
],
[
['試し切り', 'ためしぎり'],
[
{text: '試', furigana: 'ため'},
{text: 'し', furigana: ''},
{text: '切', furigana: 'ぎ'},
{text: 'り', furigana: ''}
{text: '試', reading: 'ため'},
{text: 'し', reading: ''},
{text: '切', reading: 'ぎ'},
{text: 'り', reading: ''}
]
],
// Ambiguous
[
['飼い犬', 'かいいぬ'],
[
{text: '飼い犬', furigana: 'かいいぬ'}
{text: '飼い犬', reading: 'かいいぬ'}
]
],
[
['長い間', 'ながいあいだ'],
[
{text: '長い間', furigana: 'ながいあいだ'}
{text: '長い間', reading: 'ながいあいだ'}
]
],
// Same/empty reading
[
['飼い犬', ''],
[
{text: '飼い犬', furigana: ''}
{text: '飼い犬', reading: ''}
]
],
[
['かいいぬ', 'かいいぬ'],
[
{text: 'かいいぬ', furigana: ''}
{text: 'かいいぬ', reading: ''}
]
],
[
['かいぬ', 'かいぬ'],
[
{text: 'かいぬ', furigana: ''}
{text: 'かいぬ', reading: ''}
]
],
// Misc
[
['月', 'か'],
[
{text: '月', furigana: 'か'}
{text: '月', reading: 'か'}
]
],
[
['月', 'カ'],
[
{text: '月', furigana: 'カ'}
{text: '月', reading: 'カ'}
]
],
// Mismatched kana readings
[
['有り難う', 'アリガトウ'],
[
{text: '有', furigana: 'ア'},
{text: 'り', furigana: 'リ'},
{text: '難', furigana: 'ガト'},
{text: 'う', furigana: 'ウ'}
{text: '有', reading: 'ア'},
{text: 'り', reading: 'リ'},
{text: '難', reading: 'ガト'},
{text: 'う', reading: 'ウ'}
]
],
[
['ありがとう', 'アリガトウ'],
[
{text: 'ありがとう', furigana: 'アリガトウ'}
{text: 'ありがとう', reading: 'アリガトウ'}
]
],
// Mismatched kana readings (real examples)
[
['カ月', 'かげつ'],
[
{text: 'カ', furigana: 'か'},
{text: '月', furigana: 'げつ'}
{text: 'カ', reading: 'か'},
{text: '月', reading: 'げつ'}
]
],
[
['序ノ口', 'じょのくち'],
[
{text: '序', furigana: 'じょ'},
{text: '', furigana: 'の'},
{text: '口', furigana: 'くち'}
{text: '序', reading: 'じょ'},
{text: '', reading: 'の'},
{text: '口', reading: 'くち'}
]
],
[
['スズメの涙', 'すずめのなみだ'],
[
{text: 'スズメ', furigana: 'すずめ'},
{text: 'の', furigana: ''},
{text: '涙', furigana: 'なみだ'}
{text: 'スズメ', reading: 'すずめ'},
{text: 'の', reading: ''},
{text: '涙', reading: 'なみだ'}
]
],
[
['二カ所', 'にかしょ'],
[
{text: '二', furigana: 'に'},
{text: 'カ', furigana: 'か'},
{text: '所', furigana: 'しょ'}
{text: '二', reading: 'に'},
{text: 'カ', reading: 'か'},
{text: '所', reading: 'しょ'}
]
],
[
['八ツ橋', 'やつはし'],
[
{text: '八', furigana: 'や'},
{text: 'ツ', furigana: 'つ'},
{text: '橋', furigana: 'はし'}
{text: '八', reading: 'や'},
{text: 'ツ', reading: 'つ'},
{text: '橋', reading: 'はし'}
]
],
[
['八ツ橋', 'やつはし'],
[
{text: '八', furigana: 'や'},
{text: 'ツ', furigana: 'つ'},
{text: '橋', furigana: 'はし'}
{text: '八', reading: 'や'},
{text: 'ツ', reading: 'つ'},
{text: '橋', reading: 'はし'}
]
],
[
['一カ月', 'いっかげつ'],
[
{text: '一', furigana: 'いっ'},
{text: 'カ', furigana: 'か'},
{text: '月', furigana: 'げつ'}
{text: '一', reading: 'いっ'},
{text: 'カ', reading: 'か'},
{text: '月', reading: 'げつ'}
]
],
[
['一カ所', 'いっかしょ'],
[
{text: '一', furigana: 'いっ'},
{text: 'カ', furigana: 'か'},
{text: '所', furigana: 'しょ'}
{text: '一', reading: 'いっ'},
{text: 'カ', reading: 'か'},
{text: '所', reading: 'しょ'}
]
],
[
['カ所', 'かしょ'],
[
{text: 'カ', furigana: 'か'},
{text: '所', furigana: 'しょ'}
{text: 'カ', reading: 'か'},
{text: '所', reading: 'しょ'}
]
],
[
['数カ月', 'すうかげつ'],
[
{text: '数', furigana: 'すう'},
{text: 'カ', furigana: 'か'},
{text: '月', furigana: 'げつ'}
{text: '数', reading: 'すう'},
{text: 'カ', reading: 'か'},
{text: '月', reading: 'げつ'}
]
],
[
['くノ一', 'くのいち'],
[
{text: 'く', furigana: ''},
{text: '', furigana: 'の'},
{text: '一', furigana: 'いち'}
{text: 'く', reading: ''},
{text: '', reading: 'の'},
{text: '一', reading: 'いち'}
]
],
[
['くノ一', 'くのいち'],
[
{text: 'く', furigana: ''},
{text: '', furigana: 'の'},
{text: '一', furigana: 'いち'}
{text: 'く', reading: ''},
{text: '', reading: 'の'},
{text: '一', reading: 'いち'}
]
],
[
['数カ国', 'すうかこく'],
[
{text: '数', furigana: 'すう'},
{text: 'カ', furigana: 'か'},
{text: '国', furigana: 'こく'}
{text: '数', reading: 'すう'},
{text: 'カ', reading: 'か'},
{text: '国', reading: 'こく'}
]
],
[
['数カ所', 'すうかしょ'],
[
{text: '数', furigana: 'すう'},
{text: 'カ', furigana: 'か'},
{text: '所', furigana: 'しょ'}
{text: '数', reading: 'すう'},
{text: 'カ', reading: 'か'},
{text: '所', reading: 'しょ'}
]
],
[
['壇ノ浦の戦い', 'だんのうらのたたかい'],
[
{text: '壇', furigana: 'だん'},
{text: '', furigana: 'の'},
{text: '浦', furigana: 'うら'},
{text: 'の', furigana: ''},
{text: '戦', furigana: 'たたか'},
{text: 'い', furigana: ''}
{text: '壇', reading: 'だん'},
{text: '', reading: 'の'},
{text: '浦', reading: 'うら'},
{text: 'の', reading: ''},
{text: '戦', reading: 'たたか'},
{text: 'い', reading: ''}
]
],
[
['壇ノ浦の戦', 'だんのうらのたたかい'],
[
{text: '壇', furigana: 'だん'},
{text: '', furigana: 'の'},
{text: '浦', furigana: 'うら'},
{text: 'の', furigana: ''},
{text: '戦', furigana: 'たたかい'}
{text: '壇', reading: 'だん'},
{text: '', reading: 'の'},
{text: '浦', reading: 'うら'},
{text: 'の', reading: ''},
{text: '戦', reading: 'たたかい'}
]
],
[
['序ノ口格', 'じょのくちかく'],
[
{text: '序', furigana: 'じょ'},
{text: '', furigana: 'の'},
{text: '口格', furigana: 'くちかく'}
{text: '序', reading: 'じょ'},
{text: '', reading: 'の'},
{text: '口格', reading: 'くちかく'}
]
],
[
['二カ国語', 'にかこくご'],
[
{text: '二', furigana: 'に'},
{text: 'カ', furigana: 'か'},
{text: '国語', furigana: 'こくご'}
{text: '二', reading: 'に'},
{text: 'カ', reading: 'か'},
{text: '国語', reading: 'こくご'}
]
],
[
['カ国', 'かこく'],
[
{text: 'カ', furigana: 'か'},
{text: '国', furigana: 'こく'}
{text: 'カ', reading: 'か'},
{text: '国', reading: 'こく'}
]
],
[
['カ国語', 'かこくご'],
[
{text: 'カ', furigana: 'か'},
{text: '国語', furigana: 'こくご'}
{text: 'カ', reading: 'か'},
{text: '国語', reading: 'こくご'}
]
],
[
['壇ノ浦の合戦', 'だんのうらのかっせん'],
[
{text: '壇', furigana: 'だん'},
{text: '', furigana: 'の'},
{text: '浦', furigana: 'うら'},
{text: 'の', furigana: ''},
{text: '合戦', furigana: 'かっせん'}
{text: '壇', reading: 'だん'},
{text: '', reading: 'の'},
{text: '浦', reading: 'うら'},
{text: 'の', reading: ''},
{text: '合戦', reading: 'かっせん'}
]
],
[
['一タ偏', 'いちたへん'],
[
{text: '一', furigana: 'いち'},
{text: 'タ', furigana: 'た'},
{text: '偏', furigana: 'へん'}
{text: '一', reading: 'いち'},
{text: 'タ', reading: 'た'},
{text: '偏', reading: 'へん'}
]
],
[
['ル又', 'るまた'],
[
{text: 'ル', furigana: 'る'},
{text: '又', furigana: 'また'}
{text: 'ル', reading: 'る'},
{text: '又', reading: 'また'}
]
],
[
['ノ木偏', 'のぎへん'],
[
{text: '', furigana: 'の'},
{text: '木偏', furigana: 'ぎへん'}
{text: '', reading: 'の'},
{text: '木偏', reading: 'ぎへん'}
]
],
[
['一ノ貝', 'いちのかい'],
[
{text: '一', furigana: 'いち'},
{text: '', furigana: 'の'},
{text: '貝', furigana: 'かい'}
{text: '一', reading: 'いち'},
{text: '', reading: 'の'},
{text: '貝', reading: 'かい'}
]
],
[
['虎ノ門事件', 'とらのもんじけん'],
[
{text: '虎', furigana: 'とら'},
{text: '', furigana: 'の'},
{text: '門事件', furigana: 'もんじけん'}
{text: '虎', reading: 'とら'},
{text: '', reading: 'の'},
{text: '門事件', reading: 'もんじけん'}
]
],
[
['教育ニ関スル勅語', 'きょういくにかんするちょくご'],
[
{text: '教育', furigana: 'きょういく'},
{text: 'ニ', furigana: 'に'},
{text: '関', furigana: 'かん'},
{text: 'スル', furigana: 'する'},
{text: '勅語', furigana: 'ちょくご'}
{text: '教育', reading: 'きょういく'},
{text: 'ニ', reading: 'に'},
{text: '関', reading: 'かん'},
{text: 'スル', reading: 'する'},
{text: '勅語', reading: 'ちょくご'}
]
],
[
['二カ年', 'にかねん'],
[
{text: '二', furigana: 'に'},
{text: 'カ', furigana: 'か'},
{text: '年', furigana: 'ねん'}
{text: '二', reading: 'に'},
{text: 'カ', reading: 'か'},
{text: '年', reading: 'ねん'}
]
],
[
['三カ年', 'さんかねん'],
[
{text: '三', furigana: 'さん'},
{text: 'カ', furigana: 'か'},
{text: '年', furigana: 'ねん'}
{text: '三', reading: 'さん'},
{text: 'カ', reading: 'か'},
{text: '年', reading: 'ねん'}
]
],
[
['四カ年', 'よんかねん'],
[
{text: '四', furigana: 'よん'},
{text: 'カ', furigana: 'か'},
{text: '年', furigana: 'ねん'}
{text: '四', reading: 'よん'},
{text: 'カ', reading: 'か'},
{text: '年', reading: 'ねん'}
]
],
[
['五カ年', 'ごかねん'],
[
{text: '五', furigana: 'ご'},
{text: 'カ', furigana: 'か'},
{text: '年', furigana: 'ねん'}
{text: '五', reading: 'ご'},
{text: 'カ', reading: 'か'},
{text: '年', reading: 'ねん'}
]
],
[
['六カ年', 'ろっかねん'],
[
{text: '六', furigana: 'ろっ'},
{text: 'カ', furigana: 'か'},
{text: '年', furigana: 'ねん'}
{text: '六', reading: 'ろっ'},
{text: 'カ', reading: 'か'},
{text: '年', reading: 'ねん'}
]
],
[
['七カ年', 'ななかねん'],
[
{text: '七', furigana: 'なな'},
{text: 'カ', furigana: 'か'},
{text: '年', furigana: 'ねん'}
{text: '七', reading: 'なな'},
{text: 'カ', reading: 'か'},
{text: '年', reading: 'ねん'}
]
],
[
['八カ年', 'はちかねん'],
[
{text: '八', furigana: 'はち'},
{text: 'カ', furigana: 'か'},
{text: '年', furigana: 'ねん'}
{text: '八', reading: 'はち'},
{text: 'カ', reading: 'か'},
{text: '年', reading: 'ねん'}
]
],
[
['九カ年', 'きゅうかねん'],
[
{text: '九', furigana: 'きゅう'},
{text: 'カ', furigana: 'か'},
{text: '年', furigana: 'ねん'}
{text: '九', reading: 'きゅう'},
{text: 'カ', reading: 'か'},
{text: '年', reading: 'ねん'}
]
],
[
['十カ年', 'じゅうかねん'],
[
{text: '十', furigana: 'じゅう'},
{text: 'カ', furigana: 'か'},
{text: '年', furigana: 'ねん'}
{text: '十', reading: 'じゅう'},
{text: 'カ', reading: 'か'},
{text: '年', reading: 'ねん'}
]
],
[
['鏡ノ間', 'かがみのま'],
[
{text: '鏡', furigana: 'かがみ'},
{text: '', furigana: 'の'},
{text: '間', furigana: 'ま'}
{text: '鏡', reading: 'かがみ'},
{text: '', reading: 'の'},
{text: '間', reading: 'ま'}
]
],
[
['鏡ノ間', 'かがみのま'],
[
{text: '鏡', furigana: 'かがみ'},
{text: '', furigana: 'の'},
{text: '間', furigana: 'ま'}
{text: '鏡', reading: 'かがみ'},
{text: '', reading: 'の'},
{text: '間', reading: 'ま'}
]
],
[
['ページ違反', 'ぺーじいはん'],
[
{text: 'ペ', furigana: 'ぺ'},
{text: 'ー', furigana: ''},
{text: 'ジ', furigana: 'じ'},
{text: '違反', furigana: 'いはん'}
{text: 'ペ', reading: 'ぺ'},
{text: 'ー', reading: ''},
{text: 'ジ', reading: 'じ'},
{text: '違反', reading: 'いはん'}
]
],
// Mismatched kana
[
['サボる', 'サボル'],
[
{text: 'サボ', furigana: ''},
{text: 'る', furigana: 'ル'}
{text: 'サボ', reading: ''},
{text: 'る', reading: 'ル'}
]
],
// Reading starts with term, but has remainder characters
[
['シック', 'シック・ビルしょうこうぐん'],
[
{text: 'シック', furigana: 'シック・ビルしょうこうぐん'}
{text: 'シック', reading: 'シック・ビルしょうこうぐん'}
]
]
];
@ -728,40 +728,40 @@ function testDistributeFuriganaInflected() {
[
['美味しい', 'おいしい', '美味しかた'],
[
{text: '美味', furigana: 'おい'},
{text: 'しかた', furigana: ''}
{text: '美味', reading: 'おい'},
{text: 'しかた', reading: ''}
]
],
[
['食べる', 'たべる', '食べた'],
[
{text: '食', furigana: 'た'},
{text: 'べた', furigana: ''}
{text: '食', reading: 'た'},
{text: 'べた', reading: ''}
]
],
[
['迄に', 'までに', 'までに'],
[
{text: 'までに', furigana: ''}
{text: 'までに', reading: ''}
]
],
[
['行う', 'おこなう', 'おこなわなかった'],
[
{text: 'おこなわなかった', furigana: ''}
{text: 'おこなわなかった', reading: ''}
]
],
[
['いい', 'いい', 'イイ'],
[
{text: 'イイ', furigana: ''}
{text: 'イイ', reading: ''}
]
],
[
['否か', 'いなか', '否カ'],
[
{text: '否', furigana: 'いな'},
{text: 'カ', furigana: 'か'}
{text: '否', reading: 'いな'},
{text: 'カ', reading: 'か'}
]
]
];