Improve convertKatakanaToHiragana and convertHiraganaToKatakana (#916)

This commit is contained in:
toasted-nutbread 2020-10-13 21:48:21 -04:00 committed by GitHub
parent 3e9db71120
commit 4ca6101273
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -19,10 +19,16 @@ const jp = (() => {
const ITERATION_MARK_CODE_POINT = 0x3005; const ITERATION_MARK_CODE_POINT = 0x3005;
const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063; const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063;
const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3; const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3;
const KATAKANA_SMALL_KA_CODE_POINT = 0x30f5;
const KATAKANA_SMALL_KE_CODE_POINT = 0x30f6;
const KANA_PROLONGED_SOUND_MARK_CODE_POINT = 0x30fc; const KANA_PROLONGED_SOUND_MARK_CODE_POINT = 0x30fc;
const HIRAGANA_RANGE = [0x3040, 0x309f]; const HIRAGANA_RANGE = [0x3040, 0x309f];
const KATAKANA_RANGE = [0x30a0, 0x30ff]; const KATAKANA_RANGE = [0x30a0, 0x30ff];
const HIRAGANA_CONVERSION_RANGE = [0x3041, 0x3096];
const KATAKANA_CONVERSION_RANGE = [0x30a1, 0x30f6];
const KANA_RANGES = [HIRAGANA_RANGE, KATAKANA_RANGE]; const KANA_RANGES = [HIRAGANA_RANGE, KATAKANA_RANGE];
const CJK_UNIFIED_IDEOGRAPHS_RANGE = [0x4e00, 0x9fff]; const CJK_UNIFIED_IDEOGRAPHS_RANGE = [0x4e00, 0x9fff];
@ -129,6 +135,29 @@ const jp = (() => {
['ン', 'ン--'] ['ン', 'ン--']
]); ]);
const VOWEL_TO_KANA_MAPPING = new Map([
['a', 'ぁあかがさざただなはばぱまゃやらゎわヵァアカガサザタダナハバパマャヤラヮワヵヷ'],
['i', 'ぃいきぎしじちぢにひびぴみりゐィイキギシジチヂニヒビピミリヰヸ'],
['u', 'ぅうくぐすずっつづぬふぶぷむゅゆるゥウクグスズッツヅヌフブプムュユルヴ'],
['e', 'ぇえけげせぜてでねへべぺめれゑヶェエケゲセゼテデネヘベペメレヱヶヹ'],
['o', 'ぉおこごそぞとどのほぼぽもょよろをォオコゴソゾトドノホボポモョヨロヲヺ'],
['', 'のノ']
]);
const KANA_TO_VOWEL_MAPPING = (() => {
const map = new Map();
for (const [vowel, characters] of VOWEL_TO_KANA_MAPPING) {
for (const character of characters) {
map.set(character, vowel);
}
}
return map;
})();
function isCodePointInRange(codePoint, [min, max]) {
return (codePoint >= min && codePoint <= max);
}
function isCodePointInRanges(codePoint, ranges) { function isCodePointInRanges(codePoint, ranges) {
for (const [min, max] of ranges) { for (const [min, max] of ranges) {
@ -139,6 +168,17 @@ const jp = (() => {
return false; return false;
} }
function getProlongedHiragana(previousCharacter) {
switch (KANA_TO_VOWEL_MAPPING.get(previousCharacter)) {
case 'a': return 'あ';
case 'i': return 'い';
case 'u': return 'う';
case 'e': return 'え';
case 'o': return 'う';
default: return null;
}
}
function getWanakana() { function getWanakana() {
try { try {
if (typeof wanakana !== 'undefined') { if (typeof wanakana !== 'undefined') {
@ -219,30 +259,35 @@ const jp = (() => {
// Conversion functions // Conversion functions
convertKatakanaToHiragana(text) { convertKatakanaToHiragana(text) {
const wanakana = this._getWanakana();
let result = ''; let result = '';
for (const c of text) { const offset = (HIRAGANA_CONVERSION_RANGE[0] - KATAKANA_CONVERSION_RANGE[0]);
if (wanakana.isKatakana(c)) { for (let char of text) {
result += wanakana.toHiragana(c); const codePoint = char.codePointAt(0);
} else { if (codePoint === KATAKANA_SMALL_KA_CODE_POINT || codePoint === KATAKANA_SMALL_KE_CODE_POINT) {
result += c; // No change
} else if (codePoint === KANA_PROLONGED_SOUND_MARK_CODE_POINT) {
if (result.length > 0) {
const char2 = getProlongedHiragana(result[result.length - 1]);
if (char2 !== null) { char = char2; }
}
} else if (isCodePointInRange(codePoint, KATAKANA_CONVERSION_RANGE)) {
char = String.fromCodePoint(codePoint + offset);
} }
result += char;
} }
return result; return result;
} }
convertHiraganaToKatakana(text) { convertHiraganaToKatakana(text) {
const wanakana = this._getWanakana();
let result = ''; let result = '';
for (const c of text) { const offset = (KATAKANA_CONVERSION_RANGE[0] - HIRAGANA_CONVERSION_RANGE[0]);
if (wanakana.isHiragana(c)) { for (let char of text) {
result += wanakana.toKatakana(c); const codePoint = char.codePointAt(0);
} else { if (isCodePointInRange(codePoint, HIRAGANA_CONVERSION_RANGE)) {
result += c; char = String.fromCodePoint(codePoint + offset);
} }
result += char;
} }
return result; return result;
} }