Add character range definitions

This commit is contained in:
toasted-nutbread 2020-01-22 21:41:32 -05:00
parent 3c17388ff8
commit 9b509d50a9

View File

@ -76,6 +76,41 @@ const JP_HALFWIDTH_KATAKANA_MAPPING = new Map([
['ン', 'ン--'] ['ン', 'ン--']
]); ]);
const JP_HIRAGANA_RANGE = [0x3040, 0x309f];
const JP_KATAKANA_RANGE = [0x30a0, 0x30ff];
const JP_KANA_RANGES = [JP_HIRAGANA_RANGE, JP_KATAKANA_RANGE];
const JP_CJK_COMMON_RANGE = [0x4e00, 0x9fff];
const JP_CJK_RARE_RANGE = [0x3400, 0x4dbf];
const JP_CJK_RANGES = [JP_CJK_COMMON_RANGE, JP_CJK_RARE_RANGE];
const JP_ITERATION_MARK_CHAR_CODE = 0x3005;
// Japanese character ranges, roughly ordered in order of expected frequency
const JP_JAPANESE_RANGES = [
JP_HIRAGANA_RANGE,
JP_KATAKANA_RANGE,
JP_CJK_COMMON_RANGE,
JP_CJK_RARE_RANGE,
[0xff66, 0xff9f], // Halfwidth katakana
[0x30fb, 0x30fc], // Katakana punctuation
[0xff61, 0xff65], // Kana punctuation
[0x3000, 0x303f], // CJK punctuation
[0xff10, 0xff19], // Fullwidth numbers
[0xff21, 0xff3a], // Fullwidth upper case Latin letters
[0xff41, 0xff5a], // Fullwidth lower case Latin letters
[0xff01, 0xff0f], // Fullwidth punctuation 1
[0xff1a, 0xff1f], // Fullwidth punctuation 2
[0xff3b, 0xff3f], // Fullwidth punctuation 3
[0xff5b, 0xff60], // Fullwidth punctuation 4
[0xffe0, 0xffee], // Currency markers
];
function jpIsKanji(c) { function jpIsKanji(c) {
const code = c.charCodeAt(0); const code = c.charCodeAt(0);