yomichan/ext/js/language/sandbox/japanese-util.js

/*
 * Copyright (C) 2020-2021  Yomichan Authors
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

const JapaneseUtil = (() => {
    const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063;
    const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3;
    const KATAKANA_SMALL_KA_CODE_POINT = 0x30f5;
    const KATAKANA_SMALL_KE_CODE_POINT = 0x30f6;
    const KANA_PROLONGED_SOUND_MARK_CODE_POINT = 0x30fc;

    const HIRAGANA_RANGE = [0x3040, 0x309f];
    const KATAKANA_RANGE = [0x30a0, 0x30ff];

    const HIRAGANA_CONVERSION_RANGE = [0x3041, 0x3096];
    const KATAKANA_CONVERSION_RANGE = [0x30a1, 0x30f6];

    const KANA_RANGES = [HIRAGANA_RANGE, KATAKANA_RANGE];

    const CJK_UNIFIED_IDEOGRAPHS_RANGE = [0x4e00, 0x9fff];
    const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE = [0x3400, 0x4dbf];
    const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE = [0x20000, 0x2a6df];
    const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE = [0x2a700, 0x2b73f];
    const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE = [0x2b740, 0x2b81f];
    const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE = [0x2b820, 0x2ceaf];
    const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE = [0x2ceb0, 0x2ebef];
    const CJK_COMPATIBILITY_IDEOGRAPHS_RANGE = [0xf900, 0xfaff];
    const CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE = [0x2f800, 0x2fa1f];
    const CJK_IDEOGRAPH_RANGES = [
        CJK_UNIFIED_IDEOGRAPHS_RANGE,
        CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE,
        CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE,
        CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE,
        CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE,
        CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE,
        CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE,
        CJK_COMPATIBILITY_IDEOGRAPHS_RANGE,
        CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE
    ];

    // Japanese character ranges, roughly ordered in order of expected frequency
    const JAPANESE_RANGES = [
        HIRAGANA_RANGE,
        KATAKANA_RANGE,

        ...CJK_IDEOGRAPH_RANGES,

        [0xff66, 0xff9f], // Halfwidth katakana

        [0x30fb, 0x30fc], // Katakana punctuation
        [0xff61, 0xff65], // Kana punctuation
        [0x3000, 0x303f], // CJK punctuation

        [0xff10, 0xff19], // Fullwidth numbers
        [0xff21, 0xff3a], // Fullwidth upper case Latin letters
        [0xff41, 0xff5a], // Fullwidth lower case Latin letters

        [0xff01, 0xff0f], // Fullwidth punctuation 1
        [0xff1a, 0xff1f], // Fullwidth punctuation 2
        [0xff3b, 0xff3f], // Fullwidth punctuation 3
        [0xff5b, 0xff60], // Fullwidth punctuation 4
        [0xffe0, 0xffee]  // Currency markers
    ];

    const SMALL_KANA_SET = new Set(Array.from('ぁぃぅぇぉゃゅょゎァィゥェォャュョヮ'));

    const HALFWIDTH_KATAKANA_MAPPING = new Map([
        ['ｦ', 'ヲヺ-'],
        ['ｧ', 'ァ--'],
        ['ｨ', 'ィ--'],
        ['ｩ', 'ゥ--'],
        ['ｪ', 'ェ--'],
        ['ｫ', 'ォ--'],
        ['ｬ', 'ャ--'],
        ['ｭ', 'ュ--'],
        ['ｮ', 'ョ--'],
        ['ｯ', 'ッ--'],
        ['ｰ', 'ー--'],
        ['ｱ', 'ア--'],
        ['ｲ', 'イ--'],
        ['ｳ', 'ウヴ-'],
        ['ｴ', 'エ--'],
        ['ｵ', 'オ--'],
        ['ｶ', 'カガ-'],
        ['ｷ', 'キギ-'],
        ['ｸ', 'クグ-'],
        ['ｹ', 'ケゲ-'],
        ['ｺ', 'コゴ-'],
        ['ｻ', 'サザ-'],
        ['ｼ', 'シジ-'],
        ['ｽ', 'スズ-'],
        ['ｾ', 'セゼ-'],
        ['ｿ', 'ソゾ-'],
        ['ﾀ', 'タダ-'],
        ['ﾁ', 'チヂ-'],
        ['ﾂ', 'ツヅ-'],
        ['ﾃ', 'テデ-'],
        ['ﾄ', 'トド-'],
        ['ﾅ', 'ナ--'],
        ['ﾆ', 'ニ--'],
        ['ﾇ', 'ヌ--'],
        ['ﾈ', 'ネ--'],
        ['ﾉ', 'ノ--'],
        ['ﾊ', 'ハバパ'],
        ['ﾋ', 'ヒビピ'],
        ['ﾌ', 'フブプ'],
        ['ﾍ', 'ヘベペ'],
        ['ﾎ', 'ホボポ'],
        ['ﾏ', 'マ--'],
        ['ﾐ', 'ミ--'],
        ['ﾑ', 'ム--'],
        ['ﾒ', 'メ--'],
        ['ﾓ', 'モ--'],
        ['ﾔ', 'ヤ--'],
        ['ﾕ', 'ユ--'],
        ['ﾖ', 'ヨ--'],
        ['ﾗ', 'ラ--'],
        ['ﾘ', 'リ--'],
        ['ﾙ', 'ル--'],
        ['ﾚ', 'レ--'],
        ['ﾛ', 'ロ--'],
        ['ﾜ', 'ワ--'],
        ['ﾝ', 'ン--']
    ]);

    const VOWEL_TO_KANA_MAPPING = new Map([
        ['a', 'ぁあかがさざただなはばぱまゃやらゎわヵァアカガサザタダナハバパマャヤラヮワヵヷ'],
        ['i', 'ぃいきぎしじちぢにひびぴみりゐィイキギシジチヂニヒビピミリヰヸ'],
        ['u', 'ぅうくぐすずっつづぬふぶぷむゅゆるゥウクグスズッツヅヌフブプムュユルヴ'],
        ['e', 'ぇえけげせぜてでねへべぺめれゑヶェエケゲセゼテデネヘベペメレヱヶヹ'],
        ['o', 'ぉおこごそぞとどのほぼぽもょよろをォオコゴソゾトドノホボポモョヨロヲヺ'],
        ['', 'のノ']
    ]);

    const KANA_TO_VOWEL_MAPPING = (() => {
        const map = new Map();
        for (const [vowel, characters] of VOWEL_TO_KANA_MAPPING) {
            for (const character of characters) {
                map.set(character, vowel);
            }
        }
        return map;
    })();

    const DIACRITIC_MAPPING = (() => {
        const kana = 'うゔ-かが-きぎ-くぐ-けげ-こご-さざ-しじ-すず-せぜ-そぞ-ただ-ちぢ-つづ-てで-とど-はばぱひびぴふぶぷへべぺほぼぽワヷ-ヰヸ-ウヴ-ヱヹ-ヲヺ-カガ-キギ-クグ-ケゲ-コゴ-サザ-シジ-スズ-セゼ-ソゾ-タダ-チヂ-ツヅ-テデ-トド-ハバパヒビピフブプヘベペホボポ';
        const map = new Map();
        for (let i = 0, ii = kana.length; i < ii; i += 3) {
            const character = kana[i];
            const dakuten = kana[i + 1];
            const handakuten = kana[i + 2];
            map.set(dakuten, {character, type: 'dakuten'});
            if (handakuten !== '-') {
                map.set(handakuten, {character, type: 'handakuten'});
            }
        }
        return map;
    })();


    function isCodePointInRange(codePoint, [min, max]) {
        return (codePoint >= min && codePoint <= max);
    }

    function isCodePointInRanges(codePoint, ranges) {
        for (const [min, max] of ranges) {
            if (codePoint >= min && codePoint <= max) {
                return true;
            }
        }
        return false;
    }

    function getProlongedHiragana(previousCharacter) {
        switch (KANA_TO_VOWEL_MAPPING.get(previousCharacter)) {
            case 'a': return 'あ';
            case 'i': return 'い';
            case 'u': return 'う';
            case 'e': return 'え';
            case 'o': return 'う';
            default: return null;
        }
    }


    // eslint-disable-next-line no-shadow
    class JapaneseUtil {
        constructor(wanakana=null) {
            this._wanakana = wanakana;
        }

        // Character code testing functions

        isCodePointKanji(codePoint) {
            return isCodePointInRanges(codePoint, CJK_IDEOGRAPH_RANGES);
        }

        isCodePointKana(codePoint) {
            return isCodePointInRanges(codePoint, KANA_RANGES);
        }

        isCodePointJapanese(codePoint) {
            return isCodePointInRanges(codePoint, JAPANESE_RANGES);
        }

        // String testing functions

        isStringEntirelyKana(str) {
            if (str.length === 0) { return false; }
            for (const c of str) {
                if (!isCodePointInRanges(c.codePointAt(0), KANA_RANGES)) {
                    return false;
                }
            }
            return true;
        }

        isStringPartiallyJapanese(str) {
            if (str.length === 0) { return false; }
            for (const c of str) {
                if (isCodePointInRanges(c.codePointAt(0), JAPANESE_RANGES)) {
                    return true;
                }
            }
            return false;
        }

        // Mora functions

        isMoraPitchHigh(moraIndex, pitchAccentDownstepPosition) {
            switch (pitchAccentDownstepPosition) {
                case 0: return (moraIndex > 0);
                case 1: return (moraIndex < 1);
                default: return (moraIndex > 0 && moraIndex < pitchAccentDownstepPosition);
            }
        }

        getPitchCategory(text, pitchAccentDownstepPosition, isVerbOrAdjective) {
            if (pitchAccentDownstepPosition === 0) {
                return 'heiban';
            }
            if (isVerbOrAdjective) {
                return pitchAccentDownstepPosition > 0 ? 'kifuku' : null;
            }
            if (pitchAccentDownstepPosition === 1) {
                return 'atamadaka';
            }
            if (pitchAccentDownstepPosition > 1) {
                return pitchAccentDownstepPosition >= this.getKanaMoraCount(text) ? 'odaka' : 'nakadaka';
            }
            return null;
        }

        getKanaMorae(text) {
            const morae = [];
            let i;
            for (const c of text) {
                if (SMALL_KANA_SET.has(c) && (i = morae.length) > 0) {
                    morae[i - 1] += c;
                } else {
                    morae.push(c);
                }
            }
            return morae;
        }

        getKanaMoraCount(text) {
            let moraCount = 0;
            for (const c of text) {
                if (!(SMALL_KANA_SET.has(c) && moraCount > 0)) {
                    ++moraCount;
                }
            }
            return moraCount;
        }

        // Conversion functions

        convertToKana(text) {
            return this._getWanakana().toKana(text);
        }

        convertToKanaSupported() {
            return this._wanakana !== null;
        }

        convertKatakanaToHiragana(text, keepProlongedSoundMarks=false) {
            let result = '';
            const offset = (HIRAGANA_CONVERSION_RANGE[0] - KATAKANA_CONVERSION_RANGE[0]);
            for (let char of text) {
                const codePoint = char.codePointAt(0);
                switch (codePoint) {
                    case KATAKANA_SMALL_KA_CODE_POINT:
                    case KATAKANA_SMALL_KE_CODE_POINT:
                        // No change
                        break;
                    case KANA_PROLONGED_SOUND_MARK_CODE_POINT:
                        if (!keepProlongedSoundMarks && result.length > 0) {
                            const char2 = getProlongedHiragana(result[result.length - 1]);
                            if (char2 !== null) { char = char2; }
                        }
                        break;
                    default:
                        if (isCodePointInRange(codePoint, KATAKANA_CONVERSION_RANGE)) {
                            char = String.fromCodePoint(codePoint + offset);
                        }
                        break;
                }
                result += char;
            }
            return result;
        }

        convertHiraganaToKatakana(text) {
            let result = '';
            const offset = (KATAKANA_CONVERSION_RANGE[0] - HIRAGANA_CONVERSION_RANGE[0]);
            for (let char of text) {
                const codePoint = char.codePointAt(0);
                if (isCodePointInRange(codePoint, HIRAGANA_CONVERSION_RANGE)) {
                    char = String.fromCodePoint(codePoint + offset);
                }
                result += char;
            }
            return result;
        }

        convertToRomaji(text) {
            const wanakana = this._getWanakana();
            return wanakana.toRomaji(text);
        }

        convertToRomajiSupported() {
            return this._wanakana !== null;
        }

        convertNumericToFullWidth(text) {
            let result = '';
            for (const char of text) {
                let c = char.codePointAt(0);
                if (c >= 0x30 && c <= 0x39) { // ['0', '9']
                    c += 0xff10 - 0x30; // 0xff10 = '0' full width
                    result += String.fromCodePoint(c);
                } else {
                    result += char;
                }
            }
            return result;
        }

        convertHalfWidthKanaToFullWidth(text, sourceMap=null) {
            let result = '';

            // This function is safe to use charCodeAt instead of codePointAt, since all
            // the relevant characters are represented with a single UTF-16 character code.
            for (let i = 0, ii = text.length; i < ii; ++i) {
                const c = text[i];
                const mapping = HALFWIDTH_KATAKANA_MAPPING.get(c);
                if (typeof mapping !== 'string') {
                    result += c;
                    continue;
                }

                let index = 0;
                switch (text.charCodeAt(i + 1)) {
                    case 0xff9e: // dakuten
                        index = 1;
                        break;
                    case 0xff9f: // handakuten
                        index = 2;
                        break;
                }

                let c2 = mapping[index];
                if (index > 0) {
                    if (c2 === '-') { // invalid
                        index = 0;
                        c2 = mapping[0];
                    } else {
                        ++i;
                    }
                }

                if (sourceMap !== null && index > 0) {
                    sourceMap.combine(result.length, 1);
                }
                result += c2;
            }

            return result;
        }

        convertAlphabeticToKana(text, sourceMap=null) {
            let part = '';
            let result = '';

            for (const char of text) {
                // Note: 0x61 is the character code for 'a'
                let c = char.codePointAt(0);
                if (c >= 0x41 && c <= 0x5a) { // ['A', 'Z']
                    c += (0x61 - 0x41);
                } else if (c >= 0x61 && c <= 0x7a) { // ['a', 'z']
                    // NOP; c += (0x61 - 0x61);
                } else if (c >= 0xff21 && c <= 0xff3a) { // ['A', 'Z'] fullwidth
                    c += (0x61 - 0xff21);
                } else if (c >= 0xff41 && c <= 0xff5a) { // ['a', 'z'] fullwidth
                    c += (0x61 - 0xff41);
                } else if (c === 0x2d || c === 0xff0d) { // '-' or fullwidth dash
                    c = 0x2d; // '-'
                } else {
                    if (part.length > 0) {
                        result += this._convertAlphabeticPartToKana(part, sourceMap, result.length);
                        part = '';
                    }
                    result += char;
                    continue;
                }
                part += String.fromCodePoint(c);
            }

            if (part.length > 0) {
                result += this._convertAlphabeticPartToKana(part, sourceMap, result.length);
            }
            return result;
        }

        convertAlphabeticToKanaSupported() {
            return this._wanakana !== null;
        }

        getKanaDiacriticInfo(character) {
            const info = DIACRITIC_MAPPING.get(character);
            return typeof info !== 'undefined' ? {character: info.character, type: info.type} : null;
        }

        // Furigana distribution

        distributeFurigana(term, reading) {
            if (reading === term) {
                // Same
                return [this._createFuriganaSegment(term, '')];
            }

            const groups = [];
            let groupPre = null;
            let isKanaPre = null;
            for (const c of term) {
                const codePoint = c.codePointAt(0);
                const isKana = this.isCodePointKana(codePoint);
                if (isKana === isKanaPre) {
                    groupPre.text += c;
                } else {
                    groupPre = {isKana, text: c, textNormalized: null};
                    groups.push(groupPre);
                    isKanaPre = isKana;
                }
            }
            for (const group of groups) {
                if (group.isKana) {
                    group.textNormalized = this.convertKatakanaToHiragana(group.text);
                }
            }

            const readingNormalized = this.convertKatakanaToHiragana(reading);
            const segments = this._segmentizeFurigana(reading, readingNormalized, groups, 0);
            if (segments !== null) {
                return segments;
            }

            // Fallback
            return [this._createFuriganaSegment(term, reading)];
        }

        distributeFuriganaInflected(term, reading, source) {
            const termNormalized = this.convertKatakanaToHiragana(term);
            const readingNormalized = this.convertKatakanaToHiragana(reading);
            const sourceNormalized = this.convertKatakanaToHiragana(source);

            let mainText = term;
            let stemLength = this._getStemLength(termNormalized, sourceNormalized);

            // Check if source is derived from the reading instead of the term
            const readingStemLength = this._getStemLength(readingNormalized, sourceNormalized);
            if (readingStemLength > 0 && readingStemLength >= stemLength) {
                mainText = reading;
                stemLength = readingStemLength;
                reading = `${source.substring(0, stemLength)}${reading.substring(stemLength)}`;
            }

            const segments = [];
            if (stemLength > 0) {
                mainText = `${source.substring(0, stemLength)}${mainText.substring(stemLength)}`;
                const segments2 = this.distributeFurigana(mainText, reading);
                let consumed = 0;
                for (const segment of segments2) {
                    const {text} = segment;
                    const start = consumed;
                    consumed += text.length;
                    if (consumed < stemLength) {
                        segments.push(segment);
                    } else if (consumed === stemLength) {
                        segments.push(segment);
                        break;
                    } else {
                        if (start < stemLength) {
                            segments.push(this._createFuriganaSegment(mainText.substring(start, stemLength), ''));
                        }
                        break;
                    }
                }
            }

            if (stemLength < source.length) {
                const remainder = source.substring(stemLength);
                const segmentCount = segments.length;
                if (segmentCount > 0 && segments[segmentCount - 1].reading.length === 0) {
                    // Append to the last segment if it has an empty reading
                    segments[segmentCount - 1].text += remainder;
                } else {
                    // Otherwise, create a new segment
                    segments.push(this._createFuriganaSegment(remainder, ''));
                }
            }

            return segments;
        }

        // Miscellaneous

        collapseEmphaticSequences(text, fullCollapse, sourceMap=null) {
            let result = '';
            let collapseCodePoint = -1;
            const hasSourceMap = (sourceMap !== null);
            for (const char of text) {
                const c = char.codePointAt(0);
                if (
                    c === HIRAGANA_SMALL_TSU_CODE_POINT ||
                    c === KATAKANA_SMALL_TSU_CODE_POINT ||
                    c === KANA_PROLONGED_SOUND_MARK_CODE_POINT
                ) {
                    if (collapseCodePoint !== c) {
                        collapseCodePoint = c;
                        if (!fullCollapse) {
                            result += char;
                            continue;
                        }
                    }
                } else {
                    collapseCodePoint = -1;
                    result += char;
                    continue;
                }

                if (hasSourceMap) {
                    sourceMap.combine(Math.max(0, result.length - 1), 1);
                }
            }
            return result;
        }

        // Private

        _createFuriganaSegment(text, reading) {
            return {text, reading};
        }

        _segmentizeFurigana(reading, readingNormalized, groups, groupsStart) {
            const groupCount = groups.length - groupsStart;
            if (groupCount <= 0) {
                return reading.length === 0 ? [] : null;
            }

            const group = groups[groupsStart];
            const {isKana, text} = group;
            const textLength = text.length;
            if (isKana) {
                const {textNormalized} = group;
                if (readingNormalized.startsWith(textNormalized)) {
                    const segments = this._segmentizeFurigana(
                        reading.substring(textLength),
                        readingNormalized.substring(textLength),
                        groups,
                        groupsStart + 1
                    );
                    if (segments !== null) {
                        if (reading.startsWith(text)) {
                            segments.unshift(this._createFuriganaSegment(text, ''));
                        } else {
                            segments.unshift(...this._getFuriganaKanaSegments(text, reading));
                        }
                        return segments;
                    }
                }
                return null;
            } else {
                let result = null;
                for (let i = reading.length; i >= textLength; --i) {
                    const segments = this._segmentizeFurigana(
                        reading.substring(i),
                        readingNormalized.substring(i),
                        groups,
                        groupsStart + 1
                    );
                    if (segments !== null) {
                        if (result !== null) {
                            // More than one way to segmentize the tail; mark as ambiguous
                            return null;
                        }
                        const segmentReading = reading.substring(0, i);
                        segments.unshift(this._createFuriganaSegment(text, segmentReading));
                        result = segments;
                    }
                    // There is only one way to segmentize the last non-kana group
                    if (groupCount === 1) {
                        break;
                    }
                }
                return result;
            }
        }

        _getFuriganaKanaSegments(text, reading) {
            const textLength = text.length;
            const newSegments = [];
            let start = 0;
            let state = (reading[0] === text[0]);
            for (let i = 1; i < textLength; ++i) {
                const newState = (reading[i] === text[i]);
                if (state === newState) { continue; }
                newSegments.push(this._createFuriganaSegment(text.substring(start, i), state ? '' : reading.substring(start, i)));
                state = newState;
                start = i;
            }
            newSegments.push(this._createFuriganaSegment(text.substring(start, textLength), state ? '' : reading.substring(start, textLength)));
            return newSegments;
        }

        _getWanakana() {
            const wanakana = this._wanakana;
            if (wanakana === null) { throw new Error('Functions which use WanaKana are not supported in this context'); }
            return wanakana;
        }

        _convertAlphabeticPartToKana(text, sourceMap, sourceMapStart) {
            const wanakana = this._getWanakana();
            const result = wanakana.toHiragana(text);

            // Generate source mapping
            if (sourceMap !== null) {
                let i = 0;
                let resultPos = 0;
                const ii = text.length;
                while (i < ii) {
                    // Find smallest matching substring
                    let iNext = i + 1;
                    let resultPosNext = result.length;
                    while (iNext < ii) {
                        const t = wanakana.toHiragana(text.substring(0, iNext));
                        if (t === result.substring(0, t.length)) {
                            resultPosNext = t.length;
                            break;
                        }
                        ++iNext;
                    }

                    // Merge characters
                    const removals = iNext - i - 1;
                    if (removals > 0) {
                        sourceMap.combine(sourceMapStart, removals);
                    }
                    ++sourceMapStart;

                    // Empty elements
                    const additions = resultPosNext - resultPos - 1;
                    for (let j = 0; j < additions; ++j) {
                        sourceMap.insert(sourceMapStart, 0);
                        ++sourceMapStart;
                    }

                    i = iNext;
                    resultPos = resultPosNext;
                }
            }

            return result;
        }

        _getStemLength(text1, text2) {
            const minLength = Math.min(text1.length, text2.length);
            if (minLength === 0) { return 0; }

            let i = 0;
            while (true) {
                const char1 = text1.codePointAt(i);
                const char2 = text2.codePointAt(i);
                if (char1 !== char2) { break; }
                const charLength = String.fromCodePoint(char1).length;
                i += charLength;
                if (i >= minLength) {
                    if (i > minLength) {
                        i -= charLength; // Don't consume partial UTF16 surrogate characters
                    }
                    break;
                }
            }
            return i;
        }
    }


    return JapaneseUtil;
})();
-												Move basic string/character testing functions into a mixed/js/japanese.js

											
										
										
											2020-03-21 17:18:34 +00:00
+								/*
-												Update copyright year (#1194)

* Update copyright year

* Remove redundant Author info
											
										
										
											2021-01-01 19:50:41 +00:00
+								 * Copyright (C) 2020-2021  Yomichan Authors
-												Move basic string/character testing functions into a mixed/js/japanese.js

											
										
										
											2020-03-21 17:18:34 +00:00
+								 *
 								 * This program is free software: you can redistribute it and/or modify
 								 * it under the terms of the GNU General Public License as published by
 								 * the Free Software Foundation, either version 3 of the License, or
 								 * (at your option) any later version.
 								 *
 								 * This program is distributed in the hope that it will be useful,
 								 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 								 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 								 * GNU General Public License for more details.
 								 *
 								 * You should have received a copy of the GNU General Public License
 								 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 								 */
-												JapaneseUtil normalization (#1076)

* Use JapaneseUtil as a class which is manually instantiated

* Use alias function for toKana
											
										
										
											2020-11-29 18:09:02 +00:00
+								const JapaneseUtil = (() => {
-												Japanese util refactor (#510)

* Convert mixed japanese.js to utility class

* Copy functions from bg/js/japanese.js into mixed/js/japanese.js

* Remove bg/js/japanese.js

* Make wanakana dependency optional

* Update tests
											
										
										
											2020-05-06 23:37:36 +00:00
+								    const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063;
 								    const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3;
-												Improve convertKatakanaToHiragana and convertHiraganaToKatakana (#916)


											
										
										
											2020-10-14 01:48:21 +00:00
+								    const KATAKANA_SMALL_KA_CODE_POINT = 0x30f5;
 								    const KATAKANA_SMALL_KE_CODE_POINT = 0x30f6;
-												Japanese util refactor (#510)

* Convert mixed japanese.js to utility class

* Copy functions from bg/js/japanese.js into mixed/js/japanese.js

* Remove bg/js/japanese.js

* Make wanakana dependency optional

* Update tests
											
										
										
											2020-05-06 23:37:36 +00:00
+								    const KANA_PROLONGED_SOUND_MARK_CODE_POINT = 0x30fc;
-												Move basic string/character testing functions into a mixed/js/japanese.js

											
										
										
											2020-03-21 17:18:34 +00:00
+								    const HIRAGANA_RANGE = [0x3040, 0x309f];
 								    const KATAKANA_RANGE = [0x30a0, 0x30ff];
-												Improve convertKatakanaToHiragana and convertHiraganaToKatakana (#916)


											
										
										
											2020-10-14 01:48:21 +00:00
 								    const HIRAGANA_CONVERSION_RANGE = [0x3041, 0x3096];
 								    const KATAKANA_CONVERSION_RANGE = [0x30a1, 0x30f6];
-												Move basic string/character testing functions into a mixed/js/japanese.js

											
										
										
											2020-03-21 17:18:34 +00:00
+								    const KANA_RANGES = [HIRAGANA_RANGE, KATAKANA_RANGE];
 								    const CJK_UNIFIED_IDEOGRAPHS_RANGE = [0x4e00, 0x9fff];
 								    const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE = [0x3400, 0x4dbf];
 								    const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE = [0x20000, 0x2a6df];
 								    const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE = [0x2a700, 0x2b73f];
 								    const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE = [0x2b740, 0x2b81f];
 								    const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE = [0x2b820, 0x2ceaf];
 								    const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE = [0x2ceb0, 0x2ebef];
-												Fix japanese codepoint range issues (#1960)

* Add CJK_COMPATIBILITY_IDEOGRAPHS_RANGE

* Rename CJK_UNIFIED_IDEOGRAPHS_RANGES => CJK_IDEOGRAPH_RANGES

* Simplify isKana check

* Update tests
											
										
										
											2021-09-26 17:29:55 +00:00
+								    const CJK_COMPATIBILITY_IDEOGRAPHS_RANGE = [0xf900, 0xfaff];
-												Move basic string/character testing functions into a mixed/js/japanese.js

											
										
										
											2020-03-21 17:18:34 +00:00
+								    const CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE = [0x2f800, 0x2fa1f];
-												Fix japanese codepoint range issues (#1960)

* Add CJK_COMPATIBILITY_IDEOGRAPHS_RANGE

* Rename CJK_UNIFIED_IDEOGRAPHS_RANGES => CJK_IDEOGRAPH_RANGES

* Simplify isKana check

* Update tests
											
										
										
											2021-09-26 17:29:55 +00:00
+								    const CJK_IDEOGRAPH_RANGES = [
-												Move basic string/character testing functions into a mixed/js/japanese.js

											
										
										
											2020-03-21 17:18:34 +00:00
+								        CJK_UNIFIED_IDEOGRAPHS_RANGE,
 								        CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE,
 								        CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE,
 								        CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE,
 								        CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE,
 								        CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE,
 								        CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE,
-												Fix japanese codepoint range issues (#1960)

* Add CJK_COMPATIBILITY_IDEOGRAPHS_RANGE

* Rename CJK_UNIFIED_IDEOGRAPHS_RANGES => CJK_IDEOGRAPH_RANGES

* Simplify isKana check

* Update tests
											
										
										
											2021-09-26 17:29:55 +00:00
+								        CJK_COMPATIBILITY_IDEOGRAPHS_RANGE,
-												Move basic string/character testing functions into a mixed/js/japanese.js

											
										
										
											2020-03-21 17:18:34 +00:00
+								        CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE
 								    ];
 								    // Japanese character ranges, roughly ordered in order of expected frequency
 								    const JAPANESE_RANGES = [
 								        HIRAGANA_RANGE,
 								        KATAKANA_RANGE,
-												Fix japanese codepoint range issues (#1960)

* Add CJK_COMPATIBILITY_IDEOGRAPHS_RANGE

* Rename CJK_UNIFIED_IDEOGRAPHS_RANGES => CJK_IDEOGRAPH_RANGES

* Simplify isKana check

* Update tests
											
										
										
											2021-09-26 17:29:55 +00:00
+								        ...CJK_IDEOGRAPH_RANGES,
-												Move basic string/character testing functions into a mixed/js/japanese.js

											
										
										
											2020-03-21 17:18:34 +00:00
 								        [0xff66, 0xff9f], // Halfwidth katakana
 								        [0x30fb, 0x30fc], // Katakana punctuation
 								        [0xff61, 0xff65], // Kana punctuation
 								        [0x3000, 0x303f], // CJK punctuation
 								        [0xff10, 0xff19], // Fullwidth numbers
 								        [0xff21, 0xff3a], // Fullwidth upper case Latin letters
 								        [0xff41, 0xff5a], // Fullwidth lower case Latin letters
 								        [0xff01, 0xff0f], // Fullwidth punctuation 1
 								        [0xff1a, 0xff1f], // Fullwidth punctuation 2
 								        [0xff3b, 0xff3f], // Fullwidth punctuation 3
 								        [0xff5b, 0xff60], // Fullwidth punctuation 4
 								        [0xffe0, 0xffee]  // Currency markers
 								    ];
-												Move Japanese utility functions out of display-generator.js

											
										
										
											2020-03-28 14:47:02 +00:00
+								    const SMALL_KANA_SET = new Set(Array.from('ぁぃぅぇぉゃゅょゎァィゥェォャュョヮ'));
-												Japanese util refactor (#510)

* Convert mixed japanese.js to utility class

* Copy functions from bg/js/japanese.js into mixed/js/japanese.js

* Remove bg/js/japanese.js

* Make wanakana dependency optional

* Update tests
											
										
										
											2020-05-06 23:37:36 +00:00
+								    const HALFWIDTH_KATAKANA_MAPPING = new Map([
 								        ['ｦ', 'ヲヺ-'],
 								        ['ｧ', 'ァ--'],
 								        ['ｨ', 'ィ--'],
 								        ['ｩ', 'ゥ--'],
 								        ['ｪ', 'ェ--'],
 								        ['ｫ', 'ォ--'],
 								        ['ｬ', 'ャ--'],
 								        ['ｭ', 'ュ--'],
 								        ['ｮ', 'ョ--'],
 								        ['ｯ', 'ッ--'],
 								        ['ｰ', 'ー--'],
 								        ['ｱ', 'ア--'],
 								        ['ｲ', 'イ--'],
 								        ['ｳ', 'ウヴ-'],
 								        ['ｴ', 'エ--'],
 								        ['ｵ', 'オ--'],
 								        ['ｶ', 'カガ-'],
 								        ['ｷ', 'キギ-'],
 								        ['ｸ', 'クグ-'],
 								        ['ｹ', 'ケゲ-'],
 								        ['ｺ', 'コゴ-'],
 								        ['ｻ', 'サザ-'],
 								        ['ｼ', 'シジ-'],
 								        ['ｽ', 'スズ-'],
 								        ['ｾ', 'セゼ-'],
 								        ['ｿ', 'ソゾ-'],
 								        ['ﾀ', 'タダ-'],
 								        ['ﾁ', 'チヂ-'],
 								        ['ﾂ', 'ツヅ-'],
 								        ['ﾃ', 'テデ-'],
 								        ['ﾄ', 'トド-'],
 								        ['ﾅ', 'ナ--'],
 								        ['ﾆ', 'ニ--'],
 								        ['ﾇ', 'ヌ--'],
 								        ['ﾈ', 'ネ--'],
 								        ['ﾉ', 'ノ--'],
 								        ['ﾊ', 'ハバパ'],
 								        ['ﾋ', 'ヒビピ'],
 								        ['ﾌ', 'フブプ'],
 								        ['ﾍ', 'ヘベペ'],
 								        ['ﾎ', 'ホボポ'],
 								        ['ﾏ', 'マ--'],
 								        ['ﾐ', 'ミ--'],
 								        ['ﾑ', 'ム--'],
 								        ['ﾒ', 'メ--'],
 								        ['ﾓ', 'モ--'],
 								        ['ﾔ', 'ヤ--'],
 								        ['ﾕ', 'ユ--'],
 								        ['ﾖ', 'ヨ--'],
 								        ['ﾗ', 'ラ--'],
 								        ['ﾘ', 'リ--'],
 								        ['ﾙ', 'ル--'],
 								        ['ﾚ', 'レ--'],
 								        ['ﾛ', 'ロ--'],
 								        ['ﾜ', 'ワ--'],
 								        ['ﾝ', 'ン--']
 								    ]);
-												Move basic string/character testing functions into a mixed/js/japanese.js

											
										
										
											2020-03-21 17:18:34 +00:00
-												Improve convertKatakanaToHiragana and convertHiraganaToKatakana (#916)


											
										
										
											2020-10-14 01:48:21 +00:00
+								    const VOWEL_TO_KANA_MAPPING = new Map([
 								        ['a', 'ぁあかがさざただなはばぱまゃやらゎわヵァアカガサザタダナハバパマャヤラヮワヵヷ'],
 								        ['i', 'ぃいきぎしじちぢにひびぴみりゐィイキギシジチヂニヒビピミリヰヸ'],
 								        ['u', 'ぅうくぐすずっつづぬふぶぷむゅゆるゥウクグスズッツヅヌフブプムュユルヴ'],
 								        ['e', 'ぇえけげせぜてでねへべぺめれゑヶェエケゲセゼテデネヘベペメレヱヶヹ'],
 								        ['o', 'ぉおこごそぞとどのほぼぽもょよろをォオコゴソゾトドノホボポモョヨロヲヺ'],
 								        ['', 'のノ']
 								    ]);
 								    const KANA_TO_VOWEL_MAPPING = (() => {
 								        const map = new Map();
 								        for (const [vowel, characters] of VOWEL_TO_KANA_MAPPING) {
 								            for (const character of characters) {
 								                map.set(character, vowel);
 								            }
 								        }
 								        return map;
 								    })();
-												Pronunciation nasal improvement (#1834)

* Organize

* Add utility to get diacritic information about a character

* Show mora without diacritic

* Add a hidden handakuten for copy-paste purposes
											
										
										
											2021-07-17 16:20:11 +00:00
+								    const DIACRITIC_MAPPING = (() => {
 								        const kana = 'うゔ-かが-きぎ-くぐ-けげ-こご-さざ-しじ-すず-せぜ-そぞ-ただ-ちぢ-つづ-てで-とど-はばぱひびぴふぶぷへべぺほぼぽワヷ-ヰヸ-ウヴ-ヱヹ-ヲヺ-カガ-キギ-クグ-ケゲ-コゴ-サザ-シジ-スズ-セゼ-ソゾ-タダ-チヂ-ツヅ-テデ-トド-ハバパヒビピフブプヘベペホボポ';
 								        const map = new Map();
 								        for (let i = 0, ii = kana.length; i < ii; i += 3) {
 								            const character = kana[i];
 								            const dakuten = kana[i + 1];
 								            const handakuten = kana[i + 2];
 								            map.set(dakuten, {character, type: 'dakuten'});
 								            if (handakuten !== '-') {
 								                map.set(handakuten, {character, type: 'handakuten'});
 								            }
 								        }
 								        return map;
 								    })();
-												Improve convertKatakanaToHiragana and convertHiraganaToKatakana (#916)


											
										
										
											2020-10-14 01:48:21 +00:00
 								    function isCodePointInRange(codePoint, [min, max]) {
 								        return (codePoint >= min && codePoint <= max);
 								    }
-												Move basic string/character testing functions into a mixed/js/japanese.js

											
										
										
											2020-03-21 17:18:34 +00:00
 								    function isCodePointInRanges(codePoint, ranges) {
 								        for (const [min, max] of ranges) {
 								            if (codePoint >= min && codePoint <= max) {
 								                return true;
 								            }
 								        }
 								        return false;
 								    }
-												Improve convertKatakanaToHiragana and convertHiraganaToKatakana (#916)


											
										
										
											2020-10-14 01:48:21 +00:00
+								    function getProlongedHiragana(previousCharacter) {
 								        switch (KANA_TO_VOWEL_MAPPING.get(previousCharacter)) {
 								            case 'a': return 'あ';
 								            case 'i': return 'い';
 								            case 'u': return 'う';
 								            case 'e': return 'え';
 								            case 'o': return 'う';
 								            default: return null;
 								        }
 								    }
-												Move basic string/character testing functions into a mixed/js/japanese.js

											
										
										
											2020-03-21 17:18:34 +00:00
-												JapaneseUtil normalization (#1076)

* Use JapaneseUtil as a class which is manually instantiated

* Use alias function for toKana
											
										
										
											2020-11-29 18:09:02 +00:00
+								    // eslint-disable-next-line no-shadow
-												Japanese util refactor (#510)

* Convert mixed japanese.js to utility class

* Copy functions from bg/js/japanese.js into mixed/js/japanese.js

* Remove bg/js/japanese.js

* Make wanakana dependency optional

* Update tests
											
										
										
											2020-05-06 23:37:36 +00:00
+								    class JapaneseUtil {
 								        constructor(wanakana=null) {
 								            this._wanakana = wanakana;
 								        }
-												Move basic string/character testing functions into a mixed/js/japanese.js

											
										
										
											2020-03-21 17:18:34 +00:00
-												Japanese util refactor (#510)

* Convert mixed japanese.js to utility class

* Copy functions from bg/js/japanese.js into mixed/js/japanese.js

* Remove bg/js/japanese.js

* Make wanakana dependency optional

* Update tests
											
										
										
											2020-05-06 23:37:36 +00:00
+								        // Character code testing functions
 								        isCodePointKanji(codePoint) {
-												Fix japanese codepoint range issues (#1960)

* Add CJK_COMPATIBILITY_IDEOGRAPHS_RANGE

* Rename CJK_UNIFIED_IDEOGRAPHS_RANGES => CJK_IDEOGRAPH_RANGES

* Simplify isKana check

* Update tests
											
										
										
											2021-09-26 17:29:55 +00:00
+								            return isCodePointInRanges(codePoint, CJK_IDEOGRAPH_RANGES);
-												Japanese util refactor (#510)

* Convert mixed japanese.js to utility class

* Copy functions from bg/js/japanese.js into mixed/js/japanese.js

* Remove bg/js/japanese.js

* Make wanakana dependency optional

* Update tests
											
										
										
											2020-05-06 23:37:36 +00:00
+								        }
 								        isCodePointKana(codePoint) {
 								            return isCodePointInRanges(codePoint, KANA_RANGES);
 								        }
 								        isCodePointJapanese(codePoint) {
 								            return isCodePointInRanges(codePoint, JAPANESE_RANGES);
 								        }
 								        // String testing functions
 								        isStringEntirelyKana(str) {
 								            if (str.length === 0) { return false; }
 								            for (const c of str) {
 								                if (!isCodePointInRanges(c.codePointAt(0), KANA_RANGES)) {
 								                    return false;
 								                }
-												Move basic string/character testing functions into a mixed/js/japanese.js

											
										
										
											2020-03-21 17:18:34 +00:00
+								            }
-												Japanese util refactor (#510)

* Convert mixed japanese.js to utility class

* Copy functions from bg/js/japanese.js into mixed/js/japanese.js

* Remove bg/js/japanese.js

* Make wanakana dependency optional

* Update tests
											
										
										
											2020-05-06 23:37:36 +00:00
+								            return true;
-												Move basic string/character testing functions into a mixed/js/japanese.js

											
										
										
											2020-03-21 17:18:34 +00:00
+								        }
-												Japanese util refactor (#510)

* Convert mixed japanese.js to utility class

* Copy functions from bg/js/japanese.js into mixed/js/japanese.js

* Remove bg/js/japanese.js

* Make wanakana dependency optional

* Update tests
											
										
										
											2020-05-06 23:37:36 +00:00
+								        isStringPartiallyJapanese(str) {
 								            if (str.length === 0) { return false; }
 								            for (const c of str) {
 								                if (isCodePointInRanges(c.codePointAt(0), JAPANESE_RANGES)) {
 								                    return true;
 								                }
-												Move basic string/character testing functions into a mixed/js/japanese.js

											
										
										
											2020-03-21 17:18:34 +00:00
+								            }
-												Japanese util refactor (#510)

* Convert mixed japanese.js to utility class

* Copy functions from bg/js/japanese.js into mixed/js/japanese.js

* Remove bg/js/japanese.js

* Make wanakana dependency optional

* Update tests
											
										
										
											2020-05-06 23:37:36 +00:00
+								            return false;
-												Move basic string/character testing functions into a mixed/js/japanese.js

											
										
										
											2020-03-21 17:18:34 +00:00
+								        }
-												Japanese util refactor (#510)

* Convert mixed japanese.js to utility class

* Copy functions from bg/js/japanese.js into mixed/js/japanese.js

* Remove bg/js/japanese.js

* Make wanakana dependency optional

* Update tests
											
										
										
											2020-05-06 23:37:36 +00:00
+								        // Mora functions
-												Move basic string/character testing functions into a mixed/js/japanese.js

											
										
										
											2020-03-21 17:18:34 +00:00
-												Rename pitchAccent in source code (#1852)

* Rename pitchAccentPosition to pitchAccentDownstepPosition

* Rename function

* Rename

* Rename

* pitches => pronunciations
											
										
										
											2021-07-26 23:45:30 +00:00
+								        isMoraPitchHigh(moraIndex, pitchAccentDownstepPosition) {
 								            switch (pitchAccentDownstepPosition) {
-												Japanese util refactor (#510)

* Convert mixed japanese.js to utility class

* Copy functions from bg/js/japanese.js into mixed/js/japanese.js

* Remove bg/js/japanese.js

* Make wanakana dependency optional

* Update tests
											
										
										
											2020-05-06 23:37:36 +00:00
+								                case 0: return (moraIndex > 0);
 								                case 1: return (moraIndex < 1);
-												Rename pitchAccent in source code (#1852)

* Rename pitchAccentPosition to pitchAccentDownstepPosition

* Rename function

* Rename

* Rename

* pitches => pronunciations
											
										
										
											2021-07-26 23:45:30 +00:00
+								                default: return (moraIndex > 0 && moraIndex < pitchAccentDownstepPosition);
-												Japanese util refactor (#510)

* Convert mixed japanese.js to utility class

* Copy functions from bg/js/japanese.js into mixed/js/japanese.js

* Remove bg/js/japanese.js

* Make wanakana dependency optional

* Update tests
											
										
										
											2020-05-06 23:37:36 +00:00
+								            }
 								        }
-												Move Japanese utility functions out of display-generator.js

											
										
										
											2020-03-28 14:47:02 +00:00
-												Rename pitchAccent in source code (#1852)

* Rename pitchAccentPosition to pitchAccentDownstepPosition

* Rename function

* Rename

* Rename

* pitches => pronunciations
											
										
										
											2021-07-26 23:45:30 +00:00
+								        getPitchCategory(text, pitchAccentDownstepPosition, isVerbOrAdjective) {
 								            if (pitchAccentDownstepPosition === 0) {
-												Get categorization of pitch accents (#1462)


											
										
										
											2021-02-28 18:26:34 +00:00
+								                return 'heiban';
 								            }
 								            if (isVerbOrAdjective) {
-												Rename pitchAccent in source code (#1852)

* Rename pitchAccentPosition to pitchAccentDownstepPosition

* Rename function

* Rename

* Rename

* pitches => pronunciations
											
										
										
											2021-07-26 23:45:30 +00:00
+								                return pitchAccentDownstepPosition > 0 ? 'kifuku' : null;
-												Get categorization of pitch accents (#1462)


											
										
										
											2021-02-28 18:26:34 +00:00
+								            }
-												Rename pitchAccent in source code (#1852)

* Rename pitchAccentPosition to pitchAccentDownstepPosition

* Rename function

* Rename

* Rename

* pitches => pronunciations
											
										
										
											2021-07-26 23:45:30 +00:00
+								            if (pitchAccentDownstepPosition === 1) {
-												Get categorization of pitch accents (#1462)


											
										
										
											2021-02-28 18:26:34 +00:00
+								                return 'atamadaka';
 								            }
-												Rename pitchAccent in source code (#1852)

* Rename pitchAccentPosition to pitchAccentDownstepPosition

* Rename function

* Rename

* Rename

* pitches => pronunciations
											
										
										
											2021-07-26 23:45:30 +00:00
+								            if (pitchAccentDownstepPosition > 1) {
 								                return pitchAccentDownstepPosition >= this.getKanaMoraCount(text) ? 'odaka' : 'nakadaka';
-												Get categorization of pitch accents (#1462)


											
										
										
											2021-02-28 18:26:34 +00:00
+								            }
 								            return null;
 								        }
-												Japanese util refactor (#510)

* Convert mixed japanese.js to utility class

* Copy functions from bg/js/japanese.js into mixed/js/japanese.js

* Remove bg/js/japanese.js

* Make wanakana dependency optional

* Update tests
											
										
										
											2020-05-06 23:37:36 +00:00
+								        getKanaMorae(text) {
 								            const morae = [];
 								            let i;
 								            for (const c of text) {
 								                if (SMALL_KANA_SET.has(c) && (i = morae.length) > 0) {
 								                    morae[i - 1] += c;
 								                } else {
 								                    morae.push(c);
 								                }
 								            }
 								            return morae;
-												Fix high pitch calculation

											
										
										
											2020-04-23 01:04:18 +00:00
+								        }
-												Move Japanese utility functions out of display-generator.js

											
										
										
											2020-03-28 14:47:02 +00:00
-												Get categorization of pitch accents (#1462)


											
										
										
											2021-02-28 18:26:34 +00:00
+								        getKanaMoraCount(text) {
 								            let moraCount = 0;
 								            for (const c of text) {
 								                if (!(SMALL_KANA_SET.has(c) && moraCount > 0)) {
 								                    ++moraCount;
 								                }
 								            }
 								            return moraCount;
 								        }
-												Japanese util refactor (#510)

* Convert mixed japanese.js to utility class

* Copy functions from bg/js/japanese.js into mixed/js/japanese.js

* Remove bg/js/japanese.js

* Make wanakana dependency optional

* Update tests
											
										
										
											2020-05-06 23:37:36 +00:00
+								        // Conversion functions
-												JapaneseUtil normalization (#1076)

* Use JapaneseUtil as a class which is manually instantiated

* Use alias function for toKana
											
										
										
											2020-11-29 18:09:02 +00:00
+								        convertToKana(text) {
 								            return this._getWanakana().toKana(text);
 								        }
-												JapaneseUtil updates (#1813)

* Rename test file

* Use shorthand

* Add support checking functions

* Remove convertReading from JapaneseUtil
											
										
										
											2021-07-09 21:31:16 +00:00
+								        convertToKanaSupported() {
 								            return this._wanakana !== null;
 								        }
-												Katakana to hiragana conversion options (#1965)

* Refactor convertKatakanaToHiragana

* Add keepProlongedSoundMarks option

* Test keepProlongedSoundMarks option

* Refactor

* Add keepProlongedSoundMarks option to hiragana handlebars helper

* Update documentation
											
										
										
											2021-09-27 22:19:53 +00:00
+								        convertKatakanaToHiragana(text, keepProlongedSoundMarks=false) {
-												Japanese util refactor (#510)

* Convert mixed japanese.js to utility class

* Copy functions from bg/js/japanese.js into mixed/js/japanese.js

* Remove bg/js/japanese.js

* Make wanakana dependency optional

* Update tests
											
										
										
											2020-05-06 23:37:36 +00:00
+								            let result = '';
-												Improve convertKatakanaToHiragana and convertHiraganaToKatakana (#916)


											
										
										
											2020-10-14 01:48:21 +00:00
+								            const offset = (HIRAGANA_CONVERSION_RANGE[0] - KATAKANA_CONVERSION_RANGE[0]);
 								            for (let char of text) {
 								                const codePoint = char.codePointAt(0);
-												Katakana to hiragana conversion options (#1965)

* Refactor convertKatakanaToHiragana

* Add keepProlongedSoundMarks option

* Test keepProlongedSoundMarks option

* Refactor

* Add keepProlongedSoundMarks option to hiragana handlebars helper

* Update documentation
											
										
										
											2021-09-27 22:19:53 +00:00
+								                switch (codePoint) {
 								                    case KATAKANA_SMALL_KA_CODE_POINT:
 								                    case KATAKANA_SMALL_KE_CODE_POINT:
 								                        // No change
 								                        break;
 								                    case KANA_PROLONGED_SOUND_MARK_CODE_POINT:
 								                        if (!keepProlongedSoundMarks && result.length > 0) {
 								                            const char2 = getProlongedHiragana(result[result.length - 1]);
 								                            if (char2 !== null) { char = char2; }
 								                        }
 								                        break;
 								                    default:
 								                        if (isCodePointInRange(codePoint, KATAKANA_CONVERSION_RANGE)) {
 								                            char = String.fromCodePoint(codePoint + offset);
 								                        }
 								                        break;
-												Japanese util refactor (#510)

* Convert mixed japanese.js to utility class

* Copy functions from bg/js/japanese.js into mixed/js/japanese.js

* Remove bg/js/japanese.js

* Make wanakana dependency optional

* Update tests
											
										
										
											2020-05-06 23:37:36 +00:00
+								                }
-												Improve convertKatakanaToHiragana and convertHiraganaToKatakana (#916)


											
										
										
											2020-10-14 01:48:21 +00:00
+								                result += char;
-												Move Japanese utility functions out of display-generator.js

											
										
										
											2020-03-28 14:47:02 +00:00
+								            }
-												Japanese util refactor (#510)

* Convert mixed japanese.js to utility class

* Copy functions from bg/js/japanese.js into mixed/js/japanese.js

* Remove bg/js/japanese.js

* Make wanakana dependency optional

* Update tests
											
										
										
											2020-05-06 23:37:36 +00:00
+								            return result;
-												Move Japanese utility functions out of display-generator.js

											
										
										
											2020-03-28 14:47:02 +00:00
+								        }
-												Japanese util refactor (#510)

* Convert mixed japanese.js to utility class

* Copy functions from bg/js/japanese.js into mixed/js/japanese.js

* Remove bg/js/japanese.js

* Make wanakana dependency optional

* Update tests
											
										
										
											2020-05-06 23:37:36 +00:00
+								        convertHiraganaToKatakana(text) {
 								            let result = '';
-												Improve convertKatakanaToHiragana and convertHiraganaToKatakana (#916)


											
										
										
											2020-10-14 01:48:21 +00:00
+								            const offset = (KATAKANA_CONVERSION_RANGE[0] - HIRAGANA_CONVERSION_RANGE[0]);
 								            for (let char of text) {
 								                const codePoint = char.codePointAt(0);
 								                if (isCodePointInRange(codePoint, HIRAGANA_CONVERSION_RANGE)) {
 								                    char = String.fromCodePoint(codePoint + offset);
-												Japanese util refactor (#510)

* Convert mixed japanese.js to utility class

* Copy functions from bg/js/japanese.js into mixed/js/japanese.js

* Remove bg/js/japanese.js

* Make wanakana dependency optional

* Update tests
											
										
										
											2020-05-06 23:37:36 +00:00
+								                }
-												Improve convertKatakanaToHiragana and convertHiraganaToKatakana (#916)


											
										
										
											2020-10-14 01:48:21 +00:00
+								                result += char;
-												Japanese util refactor (#510)

* Convert mixed japanese.js to utility class

* Copy functions from bg/js/japanese.js into mixed/js/japanese.js

* Remove bg/js/japanese.js

* Make wanakana dependency optional

* Update tests
											
										
										
											2020-05-06 23:37:36 +00:00
+								            }
 								            return result;
 								        }
 								        convertToRomaji(text) {
 								            const wanakana = this._getWanakana();
 								            return wanakana.toRomaji(text);
 								        }
-												JapaneseUtil updates (#1813)

* Rename test file

* Use shorthand

* Add support checking functions

* Remove convertReading from JapaneseUtil
											
										
										
											2021-07-09 21:31:16 +00:00
+								        convertToRomajiSupported() {
 								            return this._wanakana !== null;
-												Japanese util refactor (#510)

* Convert mixed japanese.js to utility class

* Copy functions from bg/js/japanese.js into mixed/js/japanese.js

* Remove bg/js/japanese.js

* Make wanakana dependency optional

* Update tests
											
										
										
											2020-05-06 23:37:36 +00:00
+								        }
 								        convertNumericToFullWidth(text) {
 								            let result = '';
 								            for (const char of text) {
 								                let c = char.codePointAt(0);
 								                if (c >= 0x30 && c <= 0x39) { // ['0', '9']
 								                    c += 0xff10 - 0x30; // 0xff10 = '0' full width
 								                    result += String.fromCodePoint(c);
 								                } else {
 								                    result += char;
 								                }
 								            }
 								            return result;
 								        }
 								        convertHalfWidthKanaToFullWidth(text, sourceMap=null) {
 								            let result = '';
 								            // This function is safe to use charCodeAt instead of codePointAt, since all
 								            // the relevant characters are represented with a single UTF-16 character code.
 								            for (let i = 0, ii = text.length; i < ii; ++i) {
 								                const c = text[i];
 								                const mapping = HALFWIDTH_KATAKANA_MAPPING.get(c);
 								                if (typeof mapping !== 'string') {
 								                    result += c;
 								                    continue;
 								                }
 								                let index = 0;
 								                switch (text.charCodeAt(i + 1)) {
 								                    case 0xff9e: // dakuten
 								                        index = 1;
 								                        break;
 								                    case 0xff9f: // handakuten
 								                        index = 2;
 								                        break;
 								                }
 								                let c2 = mapping[index];
 								                if (index > 0) {
 								                    if (c2 === '-') { // invalid
 								                        index = 0;
 								                        c2 = mapping[0];
 								                    } else {
 								                        ++i;
 								                    }
 								                }
 								                if (sourceMap !== null && index > 0) {
 								                    sourceMap.combine(result.length, 1);
 								                }
 								                result += c2;
 								            }
 								            return result;
 								        }
 								        convertAlphabeticToKana(text, sourceMap=null) {
 								            let part = '';
 								            let result = '';
 								            for (const char of text) {
 								                // Note: 0x61 is the character code for 'a'
 								                let c = char.codePointAt(0);
 								                if (c >= 0x41 && c <= 0x5a) { // ['A', 'Z']
 								                    c += (0x61 - 0x41);
 								                } else if (c >= 0x61 && c <= 0x7a) { // ['a', 'z']
 								                    // NOP; c += (0x61 - 0x61);
 								                } else if (c >= 0xff21 && c <= 0xff3a) { // ['A', 'Z'] fullwidth
 								                    c += (0x61 - 0xff21);
 								                } else if (c >= 0xff41 && c <= 0xff5a) { // ['a', 'z'] fullwidth
 								                    c += (0x61 - 0xff41);
 								                } else if (c === 0x2d || c === 0xff0d) { // '-' or fullwidth dash
 								                    c = 0x2d; // '-'
 								                } else {
 								                    if (part.length > 0) {
 								                        result += this._convertAlphabeticPartToKana(part, sourceMap, result.length);
 								                        part = '';
 								                    }
 								                    result += char;
 								                    continue;
 								                }
 								                part += String.fromCodePoint(c);
 								            }
 								            if (part.length > 0) {
 								                result += this._convertAlphabeticPartToKana(part, sourceMap, result.length);
 								            }
 								            return result;
 								        }
-												JapaneseUtil updates (#1813)

* Rename test file

* Use shorthand

* Add support checking functions

* Remove convertReading from JapaneseUtil
											
										
										
											2021-07-09 21:31:16 +00:00
+								        convertAlphabeticToKanaSupported() {
 								            return this._wanakana !== null;
 								        }
-												Pronunciation nasal improvement (#1834)

* Organize

* Add utility to get diacritic information about a character

* Show mora without diacritic

* Add a hidden handakuten for copy-paste purposes
											
										
										
											2021-07-17 16:20:11 +00:00
+								        getKanaDiacriticInfo(character) {
 								            const info = DIACRITIC_MAPPING.get(character);
 								            return typeof info !== 'undefined' ? {character: info.character, type: info.type} : null;
 								        }
-												Japanese util refactor (#510)

* Convert mixed japanese.js to utility class

* Copy functions from bg/js/japanese.js into mixed/js/japanese.js

* Remove bg/js/japanese.js

* Make wanakana dependency optional

* Update tests
											
										
										
											2020-05-06 23:37:36 +00:00
+								        // Furigana distribution
-												Update dictionary entry terminology (#1592)

* Update terminology

* Update terminology in display.js

* Update terminology in display-audio.js

* Update terminology in text-scanner.js

* Update terminology in backend.js

* Update terminology in mecab.js

* Update terminology in audio-downloader.js

* Update terminology in translator-vm.js

* Update terminology in dictionary-data-util.js

* Update terminology in dictionary-database.js

* Update terminology in japanese-util.js

* Change/upgrade {expression} to {term}

* Update terminology in test-japanese.js

* Update terminology in test-database.js

* Update terminology in anki-templates-controller.js

* Update terminology in anki-note-builder.js

* Update terminology in backend.js

* Update terminology in text-scanner.js

* Update terminology in display.js

* Update terminology in display.js
											
										
										
											2021-04-04 20:22:35 +00:00
+								        distributeFurigana(term, reading) {
 								            if (reading === term) {
-												Frequencies marker (#1074)

* Update japanese.js tests

* Simplify fallback/early exit

* Add overloads to furigana and furiganaPlain handlebars helper functions

* Expose unique expression/reading arrays (and subsequently counts)

* Add {frequencies} marker
											
										
										
											2020-11-28 19:30:50 +00:00
+								                // Same
-												Update dictionary entry terminology (#1592)

* Update terminology

* Update terminology in display.js

* Update terminology in display-audio.js

* Update terminology in text-scanner.js

* Update terminology in backend.js

* Update terminology in mecab.js

* Update terminology in audio-downloader.js

* Update terminology in translator-vm.js

* Update terminology in dictionary-data-util.js

* Update terminology in dictionary-database.js

* Update terminology in japanese-util.js

* Change/upgrade {expression} to {term}

* Update terminology in test-japanese.js

* Update terminology in test-database.js

* Update terminology in anki-templates-controller.js

* Update terminology in anki-note-builder.js

* Update terminology in backend.js

* Update terminology in text-scanner.js

* Update terminology in display.js

* Update terminology in display.js
											
										
										
											2021-04-04 20:22:35 +00:00
+								                return [this._createFuriganaSegment(term, '')];
-												Japanese util refactor (#510)

* Convert mixed japanese.js to utility class

* Copy functions from bg/js/japanese.js into mixed/js/japanese.js

* Remove bg/js/japanese.js

* Make wanakana dependency optional

* Update tests
											
										
										
											2020-05-06 23:37:36 +00:00
+								            }
 								            const groups = [];
-												Furigana distribution improvements (#1157)

* Improve furigana when reading kana is not an exact match

* Simplify group structure

* Return consistent type

* Add more tests

* Remove redundant isAmbiguous assignment

* Simplify group usage

* Add helper function

* Optimize returned arrays

* Use variable

* Remove s(p)lice calls

* Reduce number of convertKatakanaToHiragana calls

* Optimize text length access

* Optimize reading substring

* Move segmentize to a separate function

* Use var

* Use _createFuriganaSegment

* Optimize distributeFuriganaInflected
											
										
										
											2020-12-22 16:02:19 +00:00
+								            let groupPre = null;
 								            let isKanaPre = null;
-												Update dictionary entry terminology (#1592)

* Update terminology

* Update terminology in display.js

* Update terminology in display-audio.js

* Update terminology in text-scanner.js

* Update terminology in backend.js

* Update terminology in mecab.js

* Update terminology in audio-downloader.js

* Update terminology in translator-vm.js

* Update terminology in dictionary-data-util.js

* Update terminology in dictionary-database.js

* Update terminology in japanese-util.js

* Change/upgrade {expression} to {term}

* Update terminology in test-japanese.js

* Update terminology in test-database.js

* Update terminology in anki-templates-controller.js

* Update terminology in anki-note-builder.js

* Update terminology in backend.js

* Update terminology in text-scanner.js

* Update terminology in display.js

* Update terminology in display.js
											
										
										
											2021-04-04 20:22:35 +00:00
+								            for (const c of term) {
-												Japanese util refactor (#510)

* Convert mixed japanese.js to utility class

* Copy functions from bg/js/japanese.js into mixed/js/japanese.js

* Remove bg/js/japanese.js

* Make wanakana dependency optional

* Update tests
											
										
										
											2020-05-06 23:37:36 +00:00
+								                const codePoint = c.codePointAt(0);
-												Fix japanese codepoint range issues (#1960)

* Add CJK_COMPATIBILITY_IDEOGRAPHS_RANGE

* Rename CJK_UNIFIED_IDEOGRAPHS_RANGES => CJK_IDEOGRAPH_RANGES

* Simplify isKana check

* Update tests
											
										
										
											2021-09-26 17:29:55 +00:00
+								                const isKana = this.isCodePointKana(codePoint);
-												Furigana distribution improvements (#1157)

* Improve furigana when reading kana is not an exact match

* Simplify group structure

* Return consistent type

* Add more tests

* Remove redundant isAmbiguous assignment

* Simplify group usage

* Add helper function

* Optimize returned arrays

* Use variable

* Remove s(p)lice calls

* Reduce number of convertKatakanaToHiragana calls

* Optimize text length access

* Optimize reading substring

* Move segmentize to a separate function

* Use var

* Use _createFuriganaSegment

* Optimize distributeFuriganaInflected
											
										
										
											2020-12-22 16:02:19 +00:00
+								                if (isKana === isKanaPre) {
 								                    groupPre.text += c;
-												Japanese util refactor (#510)

* Convert mixed japanese.js to utility class

* Copy functions from bg/js/japanese.js into mixed/js/japanese.js

* Remove bg/js/japanese.js

* Make wanakana dependency optional

* Update tests
											
										
										
											2020-05-06 23:37:36 +00:00
+								                } else {
-												Furigana distribution improvements (#1157)

* Improve furigana when reading kana is not an exact match

* Simplify group structure

* Return consistent type

* Add more tests

* Remove redundant isAmbiguous assignment

* Simplify group usage

* Add helper function

* Optimize returned arrays

* Use variable

* Remove s(p)lice calls

* Reduce number of convertKatakanaToHiragana calls

* Optimize text length access

* Optimize reading substring

* Move segmentize to a separate function

* Use var

* Use _createFuriganaSegment

* Optimize distributeFuriganaInflected
											
										
										
											2020-12-22 16:02:19 +00:00
+								                    groupPre = {isKana, text: c, textNormalized: null};
 								                    groups.push(groupPre);
 								                    isKanaPre = isKana;
 								                }
 								            }
 								            for (const group of groups) {
 								                if (group.isKana) {
 								                    group.textNormalized = this.convertKatakanaToHiragana(group.text);
-												Japanese util refactor (#510)

* Convert mixed japanese.js to utility class

* Copy functions from bg/js/japanese.js into mixed/js/japanese.js

* Remove bg/js/japanese.js

* Make wanakana dependency optional

* Update tests
											
										
										
											2020-05-06 23:37:36 +00:00
+								                }
 								            }
-												Furigana distribution improvements (#1157)

* Improve furigana when reading kana is not an exact match

* Simplify group structure

* Return consistent type

* Add more tests

* Remove redundant isAmbiguous assignment

* Simplify group usage

* Add helper function

* Optimize returned arrays

* Use variable

* Remove s(p)lice calls

* Reduce number of convertKatakanaToHiragana calls

* Optimize text length access

* Optimize reading substring

* Move segmentize to a separate function

* Use var

* Use _createFuriganaSegment

* Optimize distributeFuriganaInflected
											
										
										
											2020-12-22 16:02:19 +00:00
+								            const readingNormalized = this.convertKatakanaToHiragana(reading);
 								            const segments = this._segmentizeFurigana(reading, readingNormalized, groups, 0);
 								            if (segments !== null) {
-												Japanese util refactor (#510)

* Convert mixed japanese.js to utility class

* Copy functions from bg/js/japanese.js into mixed/js/japanese.js

* Remove bg/js/japanese.js

* Make wanakana dependency optional

* Update tests
											
										
										
											2020-05-06 23:37:36 +00:00
+								                return segments;
 								            }
-												Frequencies marker (#1074)

* Update japanese.js tests

* Simplify fallback/early exit

* Add overloads to furigana and furiganaPlain handlebars helper functions

* Expose unique expression/reading arrays (and subsequently counts)

* Add {frequencies} marker
											
										
										
											2020-11-28 19:30:50 +00:00
 								            // Fallback
-												Update dictionary entry terminology (#1592)

* Update terminology

* Update terminology in display.js

* Update terminology in display-audio.js

* Update terminology in text-scanner.js

* Update terminology in backend.js

* Update terminology in mecab.js

* Update terminology in audio-downloader.js

* Update terminology in translator-vm.js

* Update terminology in dictionary-data-util.js

* Update terminology in dictionary-database.js

* Update terminology in japanese-util.js

* Change/upgrade {expression} to {term}

* Update terminology in test-japanese.js

* Update terminology in test-database.js

* Update terminology in anki-templates-controller.js

* Update terminology in anki-note-builder.js

* Update terminology in backend.js

* Update terminology in text-scanner.js

* Update terminology in display.js

* Update terminology in display.js
											
										
										
											2021-04-04 20:22:35 +00:00
+								            return [this._createFuriganaSegment(term, reading)];
-												Japanese util refactor (#510)

* Convert mixed japanese.js to utility class

* Copy functions from bg/js/japanese.js into mixed/js/japanese.js

* Remove bg/js/japanese.js

* Make wanakana dependency optional

* Update tests
											
										
										
											2020-05-06 23:37:36 +00:00
+								        }
-												Update dictionary entry terminology (#1592)

* Update terminology

* Update terminology in display.js

* Update terminology in display-audio.js

* Update terminology in text-scanner.js

* Update terminology in backend.js

* Update terminology in mecab.js

* Update terminology in audio-downloader.js

* Update terminology in translator-vm.js

* Update terminology in dictionary-data-util.js

* Update terminology in dictionary-database.js

* Update terminology in japanese-util.js

* Change/upgrade {expression} to {term}

* Update terminology in test-japanese.js

* Update terminology in test-database.js

* Update terminology in anki-templates-controller.js

* Update terminology in anki-note-builder.js

* Update terminology in backend.js

* Update terminology in text-scanner.js

* Update terminology in display.js

* Update terminology in display.js
											
										
										
											2021-04-04 20:22:35 +00:00
+								        distributeFuriganaInflected(term, reading, source) {
 								            const termNormalized = this.convertKatakanaToHiragana(term);
-												Fix incorrect furigana distribution (#1514)

* Improve distributeFuriganaInflected implementation

* Update tests
											
										
										
											2021-03-11 01:27:01 +00:00
+								            const readingNormalized = this.convertKatakanaToHiragana(reading);
 								            const sourceNormalized = this.convertKatakanaToHiragana(source);
-												Update dictionary entry terminology (#1592)

* Update terminology

* Update terminology in display.js

* Update terminology in display-audio.js

* Update terminology in text-scanner.js

* Update terminology in backend.js

* Update terminology in mecab.js

* Update terminology in audio-downloader.js

* Update terminology in translator-vm.js

* Update terminology in dictionary-data-util.js

* Update terminology in dictionary-database.js

* Update terminology in japanese-util.js

* Change/upgrade {expression} to {term}

* Update terminology in test-japanese.js

* Update terminology in test-database.js

* Update terminology in anki-templates-controller.js

* Update terminology in anki-note-builder.js

* Update terminology in backend.js

* Update terminology in text-scanner.js

* Update terminology in display.js

* Update terminology in display.js
											
										
										
											2021-04-04 20:22:35 +00:00
+								            let mainText = term;
 								            let stemLength = this._getStemLength(termNormalized, sourceNormalized);
-												Fix incorrect furigana distribution (#1514)

* Improve distributeFuriganaInflected implementation

* Update tests
											
										
										
											2021-03-11 01:27:01 +00:00
-												Update dictionary entry terminology (#1592)

* Update terminology

* Update terminology in display.js

* Update terminology in display-audio.js

* Update terminology in text-scanner.js

* Update terminology in backend.js

* Update terminology in mecab.js

* Update terminology in audio-downloader.js

* Update terminology in translator-vm.js

* Update terminology in dictionary-data-util.js

* Update terminology in dictionary-database.js

* Update terminology in japanese-util.js

* Change/upgrade {expression} to {term}

* Update terminology in test-japanese.js

* Update terminology in test-database.js

* Update terminology in anki-templates-controller.js

* Update terminology in anki-note-builder.js

* Update terminology in backend.js

* Update terminology in text-scanner.js

* Update terminology in display.js

* Update terminology in display.js
											
										
										
											2021-04-04 20:22:35 +00:00
+								            // Check if source is derived from the reading instead of the term
-												Fix incorrect furigana distribution (#1514)

* Improve distributeFuriganaInflected implementation

* Update tests
											
										
										
											2021-03-11 01:27:01 +00:00
+								            const readingStemLength = this._getStemLength(readingNormalized, sourceNormalized);
-												Fix furigana distribution when source/expression kana differs (#1532)

* Fix furigana distribution when source/expression kana differs

* Add an additional test
											
										
										
											2021-03-16 03:02:38 +00:00
+								            if (readingStemLength > 0 && readingStemLength >= stemLength) {
-												Fix incorrect furigana distribution (#1514)

* Improve distributeFuriganaInflected implementation

* Update tests
											
										
										
											2021-03-11 01:27:01 +00:00
+								                mainText = reading;
 								                stemLength = readingStemLength;
-												Fix furigana distribution when source/expression kana differs (#1532)

* Fix furigana distribution when source/expression kana differs

* Add an additional test
											
										
										
											2021-03-16 03:02:38 +00:00
+								                reading = `${source.substring(0, stemLength)}${reading.substring(stemLength)}`;
-												Japanese util refactor (#510)

* Convert mixed japanese.js to utility class

* Copy functions from bg/js/japanese.js into mixed/js/japanese.js

* Remove bg/js/japanese.js

* Make wanakana dependency optional

* Update tests
											
										
										
											2020-05-06 23:37:36 +00:00
+								            }
-												Fix incorrect furigana distribution (#1514)

* Improve distributeFuriganaInflected implementation

* Update tests
											
										
										
											2021-03-11 01:27:01 +00:00
+								            const segments = [];
 								            if (stemLength > 0) {
-												Fix furigana distribution when source/expression kana differs (#1532)

* Fix furigana distribution when source/expression kana differs

* Add an additional test
											
										
										
											2021-03-16 03:02:38 +00:00
+								                mainText = `${source.substring(0, stemLength)}${mainText.substring(stemLength)}`;
-												Fix incorrect furigana distribution (#1514)

* Improve distributeFuriganaInflected implementation

* Update tests
											
										
										
											2021-03-11 01:27:01 +00:00
+								                const segments2 = this.distributeFurigana(mainText, reading);
 								                let consumed = 0;
 								                for (const segment of segments2) {
 								                    const {text} = segment;
 								                    const start = consumed;
 								                    consumed += text.length;
 								                    if (consumed < stemLength) {
 								                        segments.push(segment);
 								                    } else if (consumed === stemLength) {
 								                        segments.push(segment);
 								                        break;
 								                    } else {
 								                        if (start < stemLength) {
 								                            segments.push(this._createFuriganaSegment(mainText.substring(start, stemLength), ''));
 								                        }
 								                        break;
 								                    }
 								                }
 								            }
-												Japanese util refactor (#510)

* Convert mixed japanese.js to utility class

* Copy functions from bg/js/japanese.js into mixed/js/japanese.js

* Remove bg/js/japanese.js

* Make wanakana dependency optional

* Update tests
											
										
										
											2020-05-06 23:37:36 +00:00
-												Fix incorrect furigana distribution (#1514)

* Improve distributeFuriganaInflected implementation

* Update tests
											
										
										
											2021-03-11 01:27:01 +00:00
+								            if (stemLength < source.length) {
 								                const remainder = source.substring(stemLength);
 								                const segmentCount = segments.length;
-												Replace furigana with reading (#1614)

* Use "reading" instead of "furigana" for reading distribution

* Update tests
											
										
										
											2021-04-14 00:32:24 +00:00
+								                if (segmentCount > 0 && segments[segmentCount - 1].reading.length === 0) {
-												Fix incorrect furigana distribution (#1514)

* Improve distributeFuriganaInflected implementation

* Update tests
											
										
										
											2021-03-11 01:27:01 +00:00
+								                    // Append to the last segment if it has an empty reading
 								                    segments[segmentCount - 1].text += remainder;
 								                } else {
 								                    // Otherwise, create a new segment
 								                    segments.push(this._createFuriganaSegment(remainder, ''));
 								                }
-												Japanese util refactor (#510)

* Convert mixed japanese.js to utility class

* Copy functions from bg/js/japanese.js into mixed/js/japanese.js

* Remove bg/js/japanese.js

* Make wanakana dependency optional

* Update tests
											
										
										
											2020-05-06 23:37:36 +00:00
+								            }
-												Fix incorrect furigana distribution (#1514)

* Improve distributeFuriganaInflected implementation

* Update tests
											
										
										
											2021-03-11 01:27:01 +00:00
+								            return segments;
-												Japanese util refactor (#510)

* Convert mixed japanese.js to utility class

* Copy functions from bg/js/japanese.js into mixed/js/japanese.js

* Remove bg/js/japanese.js

* Make wanakana dependency optional

* Update tests
											
										
										
											2020-05-06 23:37:36 +00:00
+								        }
 								        // Miscellaneous
 								        collapseEmphaticSequences(text, fullCollapse, sourceMap=null) {
 								            let result = '';
 								            let collapseCodePoint = -1;
 								            const hasSourceMap = (sourceMap !== null);
 								            for (const char of text) {
 								                const c = char.codePointAt(0);
 								                if (
 								                    c === HIRAGANA_SMALL_TSU_CODE_POINT ||
 								                    c === KATAKANA_SMALL_TSU_CODE_POINT ||
 								                    c === KANA_PROLONGED_SOUND_MARK_CODE_POINT
 								                ) {
 								                    if (collapseCodePoint !== c) {
 								                        collapseCodePoint = c;
 								                        if (!fullCollapse) {
 								                            result += char;
 								                            continue;
 								                        }
 								                    }
 								                } else {
 								                    collapseCodePoint = -1;
 								                    result += char;
 								                    continue;
 								                }
 								                if (hasSourceMap) {
 								                    sourceMap.combine(Math.max(0, result.length - 1), 1);
 								                }
 								            }
 								            return result;
 								        }
 								        // Private
-												Replace furigana with reading (#1614)

* Use "reading" instead of "furigana" for reading distribution

* Update tests
											
										
										
											2021-04-14 00:32:24 +00:00
+								        _createFuriganaSegment(text, reading) {
 								            return {text, reading};
-												Furigana distribution improvements (#1157)

* Improve furigana when reading kana is not an exact match

* Simplify group structure

* Return consistent type

* Add more tests

* Remove redundant isAmbiguous assignment

* Simplify group usage

* Add helper function

* Optimize returned arrays

* Use variable

* Remove s(p)lice calls

* Reduce number of convertKatakanaToHiragana calls

* Optimize text length access

* Optimize reading substring

* Move segmentize to a separate function

* Use var

* Use _createFuriganaSegment

* Optimize distributeFuriganaInflected
											
										
										
											2020-12-22 16:02:19 +00:00
+								        }
 								        _segmentizeFurigana(reading, readingNormalized, groups, groupsStart) {
 								            const groupCount = groups.length - groupsStart;
 								            if (groupCount <= 0) {
-												Fix furigana distribution when reading starts with expression, but has remainder characters (#1496)


											
										
										
											2021-03-06 20:49:07 +00:00
+								                return reading.length === 0 ? [] : null;
-												Furigana distribution improvements (#1157)

* Improve furigana when reading kana is not an exact match

* Simplify group structure

* Return consistent type

* Add more tests

* Remove redundant isAmbiguous assignment

* Simplify group usage

* Add helper function

* Optimize returned arrays

* Use variable

* Remove s(p)lice calls

* Reduce number of convertKatakanaToHiragana calls

* Optimize text length access

* Optimize reading substring

* Move segmentize to a separate function

* Use var

* Use _createFuriganaSegment

* Optimize distributeFuriganaInflected
											
										
										
											2020-12-22 16:02:19 +00:00
+								            }
 								            const group = groups[groupsStart];
 								            const {isKana, text} = group;
 								            const textLength = text.length;
 								            if (isKana) {
 								                const {textNormalized} = group;
 								                if (readingNormalized.startsWith(textNormalized)) {
 								                    const segments = this._segmentizeFurigana(
 								                        reading.substring(textLength),
 								                        readingNormalized.substring(textLength),
 								                        groups,
 								                        groupsStart + 1
 								                    );
 								                    if (segments !== null) {
-												Improve kana segmentation (#1446)

* Improve edge case furigana distribution for mixed hiragana/katakana

* Update/add tests
											
										
										
											2021-02-27 04:23:16 +00:00
+								                        if (reading.startsWith(text)) {
 								                            segments.unshift(this._createFuriganaSegment(text, ''));
 								                        } else {
 								                            segments.unshift(...this._getFuriganaKanaSegments(text, reading));
 								                        }
-												Furigana distribution improvements (#1157)

* Improve furigana when reading kana is not an exact match

* Simplify group structure

* Return consistent type

* Add more tests

* Remove redundant isAmbiguous assignment

* Simplify group usage

* Add helper function

* Optimize returned arrays

* Use variable

* Remove s(p)lice calls

* Reduce number of convertKatakanaToHiragana calls

* Optimize text length access

* Optimize reading substring

* Move segmentize to a separate function

* Use var

* Use _createFuriganaSegment

* Optimize distributeFuriganaInflected
											
										
										
											2020-12-22 16:02:19 +00:00
+								                        return segments;
 								                    }
 								                }
 								                return null;
 								            } else {
 								                let result = null;
 								                for (let i = reading.length; i >= textLength; --i) {
 								                    const segments = this._segmentizeFurigana(
 								                        reading.substring(i),
 								                        readingNormalized.substring(i),
 								                        groups,
 								                        groupsStart + 1
 								                    );
 								                    if (segments !== null) {
 								                        if (result !== null) {
 								                            // More than one way to segmentize the tail; mark as ambiguous
 								                            return null;
 								                        }
-												Replace furigana with reading (#1614)

* Use "reading" instead of "furigana" for reading distribution

* Update tests
											
										
										
											2021-04-14 00:32:24 +00:00
+								                        const segmentReading = reading.substring(0, i);
 								                        segments.unshift(this._createFuriganaSegment(text, segmentReading));
-												Furigana distribution improvements (#1157)

* Improve furigana when reading kana is not an exact match

* Simplify group structure

* Return consistent type

* Add more tests

* Remove redundant isAmbiguous assignment

* Simplify group usage

* Add helper function

* Optimize returned arrays

* Use variable

* Remove s(p)lice calls

* Reduce number of convertKatakanaToHiragana calls

* Optimize text length access

* Optimize reading substring

* Move segmentize to a separate function

* Use var

* Use _createFuriganaSegment

* Optimize distributeFuriganaInflected
											
										
										
											2020-12-22 16:02:19 +00:00
+								                        result = segments;
 								                    }
 								                    // There is only one way to segmentize the last non-kana group
 								                    if (groupCount === 1) {
 								                        break;
 								                    }
 								                }
 								                return result;
 								            }
 								        }
-												Improve kana segmentation (#1446)

* Improve edge case furigana distribution for mixed hiragana/katakana

* Update/add tests
											
										
										
											2021-02-27 04:23:16 +00:00
+								        _getFuriganaKanaSegments(text, reading) {
 								            const textLength = text.length;
 								            const newSegments = [];
 								            let start = 0;
 								            let state = (reading[0] === text[0]);
 								            for (let i = 1; i < textLength; ++i) {
 								                const newState = (reading[i] === text[i]);
 								                if (state === newState) { continue; }
 								                newSegments.push(this._createFuriganaSegment(text.substring(start, i), state ? '' : reading.substring(start, i)));
 								                state = newState;
 								                start = i;
 								            }
 								            newSegments.push(this._createFuriganaSegment(text.substring(start, textLength), state ? '' : reading.substring(start, textLength)));
 								            return newSegments;
 								        }
-												Japanese util refactor (#510)

* Convert mixed japanese.js to utility class

* Copy functions from bg/js/japanese.js into mixed/js/japanese.js

* Remove bg/js/japanese.js

* Make wanakana dependency optional

* Update tests
											
										
										
											2020-05-06 23:37:36 +00:00
+								        _getWanakana() {
 								            const wanakana = this._wanakana;
 								            if (wanakana === null) { throw new Error('Functions which use WanaKana are not supported in this context'); }
 								            return wanakana;
 								        }
 								        _convertAlphabeticPartToKana(text, sourceMap, sourceMapStart) {
 								            const wanakana = this._getWanakana();
 								            const result = wanakana.toHiragana(text);
 								            // Generate source mapping
 								            if (sourceMap !== null) {
 								                let i = 0;
 								                let resultPos = 0;
 								                const ii = text.length;
 								                while (i < ii) {
 								                    // Find smallest matching substring
 								                    let iNext = i + 1;
 								                    let resultPosNext = result.length;
 								                    while (iNext < ii) {
 								                        const t = wanakana.toHiragana(text.substring(0, iNext));
 								                        if (t === result.substring(0, t.length)) {
 								                            resultPosNext = t.length;
 								                            break;
 								                        }
 								                        ++iNext;
 								                    }
 								                    // Merge characters
 								                    const removals = iNext - i - 1;
 								                    if (removals > 0) {
 								                        sourceMap.combine(sourceMapStart, removals);
 								                    }
 								                    ++sourceMapStart;
 								                    // Empty elements
 								                    const additions = resultPosNext - resultPos - 1;
 								                    for (let j = 0; j < additions; ++j) {
 								                        sourceMap.insert(sourceMapStart, 0);
 								                        ++sourceMapStart;
 								                    }
 								                    i = iNext;
 								                    resultPos = resultPosNext;
 								                }
 								            }
 								            return result;
 								        }
-												Fix incorrect furigana distribution (#1514)

* Improve distributeFuriganaInflected implementation

* Update tests
											
										
										
											2021-03-11 01:27:01 +00:00
 								        _getStemLength(text1, text2) {
 								            const minLength = Math.min(text1.length, text2.length);
 								            if (minLength === 0) { return 0; }
 								            let i = 0;
 								            while (true) {
 								                const char1 = text1.codePointAt(i);
 								                const char2 = text2.codePointAt(i);
 								                if (char1 !== char2) { break; }
 								                const charLength = String.fromCodePoint(char1).length;
 								                i += charLength;
 								                if (i >= minLength) {
 								                    if (i > minLength) {
 								                        i -= charLength; // Don't consume partial UTF16 surrogate characters
 								                    }
 								                    break;
 								                }
 								            }
 								            return i;
 								        }
-												Japanese util refactor (#510)

* Convert mixed japanese.js to utility class

* Copy functions from bg/js/japanese.js into mixed/js/japanese.js

* Remove bg/js/japanese.js

* Make wanakana dependency optional

* Update tests
											
										
										
											2020-05-06 23:37:36 +00:00
+								    }
-												Move Japanese utility functions out of display-generator.js

											
										
										
											2020-03-28 14:47:02 +00:00
-												Move basic string/character testing functions into a mixed/js/japanese.js

											
										
										
											2020-03-21 17:18:34 +00:00
-												JapaneseUtil normalization (#1076)

* Use JapaneseUtil as a class which is manually instantiated

* Use alias function for toKana
											
										
										
											2020-11-29 18:09:02 +00:00
+								    return JapaneseUtil;
-												Move basic string/character testing functions into a mixed/js/japanese.js

											
										
										
											2020-03-21 17:18:34 +00:00
+								})();