Move basic string/character testing functions into a mixed/js/japanese.js
This commit is contained in:
parent
487d4b239b
commit
77a2cc60e9
@ -20,6 +20,7 @@
|
|||||||
|
|
||||||
<script src="/mixed/js/core.js"></script>
|
<script src="/mixed/js/core.js"></script>
|
||||||
<script src="/mixed/js/dom.js"></script>
|
<script src="/mixed/js/dom.js"></script>
|
||||||
|
<script src="/mixed/js/japanese.js"></script>
|
||||||
|
|
||||||
<script src="/bg/js/anki.js"></script>
|
<script src="/bg/js/anki.js"></script>
|
||||||
<script src="/bg/js/anki-note-builder.js"></script>
|
<script src="/bg/js/anki-note-builder.js"></script>
|
||||||
|
@ -17,10 +17,11 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/* global
|
/* global
|
||||||
|
* jp
|
||||||
* wanakana
|
* wanakana
|
||||||
*/
|
*/
|
||||||
|
|
||||||
const jp = (() => {
|
(() => {
|
||||||
const HALFWIDTH_KATAKANA_MAPPING = new Map([
|
const HALFWIDTH_KATAKANA_MAPPING = new Map([
|
||||||
['ヲ', 'ヲヺ-'],
|
['ヲ', 'ヲヺ-'],
|
||||||
['ァ', 'ァ--'],
|
['ァ', 'ァ--'],
|
||||||
@ -80,101 +81,13 @@ const jp = (() => {
|
|||||||
['ン', 'ン--']
|
['ン', 'ン--']
|
||||||
]);
|
]);
|
||||||
|
|
||||||
const HIRAGANA_RANGE = [0x3040, 0x309f];
|
|
||||||
const KATAKANA_RANGE = [0x30a0, 0x30ff];
|
|
||||||
const KANA_RANGES = [HIRAGANA_RANGE, KATAKANA_RANGE];
|
|
||||||
|
|
||||||
const CJK_UNIFIED_IDEOGRAPHS_RANGE = [0x4e00, 0x9fff];
|
|
||||||
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE = [0x3400, 0x4dbf];
|
|
||||||
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE = [0x20000, 0x2a6df];
|
|
||||||
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE = [0x2a700, 0x2b73f];
|
|
||||||
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE = [0x2b740, 0x2b81f];
|
|
||||||
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE = [0x2b820, 0x2ceaf];
|
|
||||||
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE = [0x2ceb0, 0x2ebef];
|
|
||||||
const CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE = [0x2f800, 0x2fa1f];
|
|
||||||
const CJK_UNIFIED_IDEOGRAPHS_RANGES = [
|
|
||||||
CJK_UNIFIED_IDEOGRAPHS_RANGE,
|
|
||||||
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE,
|
|
||||||
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE,
|
|
||||||
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE,
|
|
||||||
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE,
|
|
||||||
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE,
|
|
||||||
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE,
|
|
||||||
CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE
|
|
||||||
];
|
|
||||||
|
|
||||||
const ITERATION_MARK_CODE_POINT = 0x3005;
|
const ITERATION_MARK_CODE_POINT = 0x3005;
|
||||||
|
|
||||||
// Japanese character ranges, roughly ordered in order of expected frequency
|
|
||||||
const JAPANESE_RANGES = [
|
|
||||||
HIRAGANA_RANGE,
|
|
||||||
KATAKANA_RANGE,
|
|
||||||
|
|
||||||
...CJK_UNIFIED_IDEOGRAPHS_RANGES,
|
// Existing functions
|
||||||
|
|
||||||
[0xff66, 0xff9f], // Halfwidth katakana
|
const isCodePointKanji = jp.isCodePointKanji;
|
||||||
|
const isStringEntirelyKana = jp.isStringEntirelyKana;
|
||||||
[0x30fb, 0x30fc], // Katakana punctuation
|
|
||||||
[0xff61, 0xff65], // Kana punctuation
|
|
||||||
[0x3000, 0x303f], // CJK punctuation
|
|
||||||
|
|
||||||
[0xff10, 0xff19], // Fullwidth numbers
|
|
||||||
[0xff21, 0xff3a], // Fullwidth upper case Latin letters
|
|
||||||
[0xff41, 0xff5a], // Fullwidth lower case Latin letters
|
|
||||||
|
|
||||||
[0xff01, 0xff0f], // Fullwidth punctuation 1
|
|
||||||
[0xff1a, 0xff1f], // Fullwidth punctuation 2
|
|
||||||
[0xff3b, 0xff3f], // Fullwidth punctuation 3
|
|
||||||
[0xff5b, 0xff60], // Fullwidth punctuation 4
|
|
||||||
[0xffe0, 0xffee] // Currency markers
|
|
||||||
];
|
|
||||||
|
|
||||||
|
|
||||||
// Character code testing functions
|
|
||||||
|
|
||||||
function isCodePointKanji(codePoint) {
|
|
||||||
return isCodePointInRanges(codePoint, CJK_UNIFIED_IDEOGRAPHS_RANGES);
|
|
||||||
}
|
|
||||||
|
|
||||||
function isCodePointKana(codePoint) {
|
|
||||||
return isCodePointInRanges(codePoint, KANA_RANGES);
|
|
||||||
}
|
|
||||||
|
|
||||||
function isCodePointJapanese(codePoint) {
|
|
||||||
return isCodePointInRanges(codePoint, JAPANESE_RANGES);
|
|
||||||
}
|
|
||||||
|
|
||||||
function isCodePointInRanges(codePoint, ranges) {
|
|
||||||
for (const [min, max] of ranges) {
|
|
||||||
if (codePoint >= min && codePoint <= max) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// String testing functions
|
|
||||||
|
|
||||||
function isStringEntirelyKana(str) {
|
|
||||||
if (str.length === 0) { return false; }
|
|
||||||
for (const c of str) {
|
|
||||||
if (!isCodePointKana(c.codePointAt(0))) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
function isStringPartiallyJapanese(str) {
|
|
||||||
if (str.length === 0) { return false; }
|
|
||||||
for (const c of str) {
|
|
||||||
if (isCodePointJapanese(c.codePointAt(0))) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Conversion functions
|
// Conversion functions
|
||||||
@ -469,12 +382,7 @@ const jp = (() => {
|
|||||||
|
|
||||||
// Exports
|
// Exports
|
||||||
|
|
||||||
return {
|
Object.assign(jp, {
|
||||||
isCodePointKanji,
|
|
||||||
isCodePointKana,
|
|
||||||
isCodePointJapanese,
|
|
||||||
isStringEntirelyKana,
|
|
||||||
isStringPartiallyJapanese,
|
|
||||||
convertKatakanaToHiragana,
|
convertKatakanaToHiragana,
|
||||||
convertHiraganaToKatakana,
|
convertHiraganaToKatakana,
|
||||||
convertToRomaji,
|
convertToRomaji,
|
||||||
@ -484,5 +392,5 @@ const jp = (() => {
|
|||||||
convertAlphabeticToKana,
|
convertAlphabeticToKana,
|
||||||
distributeFurigana,
|
distributeFurigana,
|
||||||
distributeFuriganaInflected
|
distributeFuriganaInflected
|
||||||
};
|
});
|
||||||
})();
|
})();
|
||||||
|
@ -74,6 +74,7 @@
|
|||||||
<script src="/mixed/js/core.js"></script>
|
<script src="/mixed/js/core.js"></script>
|
||||||
<script src="/mixed/js/dom.js"></script>
|
<script src="/mixed/js/dom.js"></script>
|
||||||
<script src="/mixed/js/api.js"></script>
|
<script src="/mixed/js/api.js"></script>
|
||||||
|
<script src="/mixed/js/japanese.js"></script>
|
||||||
|
|
||||||
<script src="/bg/js/dictionary.js"></script>
|
<script src="/bg/js/dictionary.js"></script>
|
||||||
<script src="/bg/js/handlebars.js"></script>
|
<script src="/bg/js/handlebars.js"></script>
|
||||||
|
@ -1088,6 +1088,7 @@
|
|||||||
<script src="/mixed/js/core.js"></script>
|
<script src="/mixed/js/core.js"></script>
|
||||||
<script src="/mixed/js/dom.js"></script>
|
<script src="/mixed/js/dom.js"></script>
|
||||||
<script src="/mixed/js/api.js"></script>
|
<script src="/mixed/js/api.js"></script>
|
||||||
|
<script src="/mixed/js/japanese.js"></script>
|
||||||
|
|
||||||
<script src="/bg/js/anki.js"></script>
|
<script src="/bg/js/anki.js"></script>
|
||||||
<script src="/bg/js/anki-note-builder.js"></script>
|
<script src="/bg/js/anki-note-builder.js"></script>
|
||||||
|
124
ext/mixed/js/japanese.js
Normal file
124
ext/mixed/js/japanese.js
Normal file
@ -0,0 +1,124 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2020 Alex Yatskov <alex@foosoft.net>
|
||||||
|
* Author: Alex Yatskov <alex@foosoft.net>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
const jp = (() => {
|
||||||
|
const HIRAGANA_RANGE = [0x3040, 0x309f];
|
||||||
|
const KATAKANA_RANGE = [0x30a0, 0x30ff];
|
||||||
|
const KANA_RANGES = [HIRAGANA_RANGE, KATAKANA_RANGE];
|
||||||
|
|
||||||
|
const CJK_UNIFIED_IDEOGRAPHS_RANGE = [0x4e00, 0x9fff];
|
||||||
|
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE = [0x3400, 0x4dbf];
|
||||||
|
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE = [0x20000, 0x2a6df];
|
||||||
|
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE = [0x2a700, 0x2b73f];
|
||||||
|
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE = [0x2b740, 0x2b81f];
|
||||||
|
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE = [0x2b820, 0x2ceaf];
|
||||||
|
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE = [0x2ceb0, 0x2ebef];
|
||||||
|
const CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE = [0x2f800, 0x2fa1f];
|
||||||
|
const CJK_UNIFIED_IDEOGRAPHS_RANGES = [
|
||||||
|
CJK_UNIFIED_IDEOGRAPHS_RANGE,
|
||||||
|
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE,
|
||||||
|
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE,
|
||||||
|
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE,
|
||||||
|
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE,
|
||||||
|
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE,
|
||||||
|
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE,
|
||||||
|
CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE
|
||||||
|
];
|
||||||
|
|
||||||
|
// Japanese character ranges, roughly ordered in order of expected frequency
|
||||||
|
const JAPANESE_RANGES = [
|
||||||
|
HIRAGANA_RANGE,
|
||||||
|
KATAKANA_RANGE,
|
||||||
|
|
||||||
|
...CJK_UNIFIED_IDEOGRAPHS_RANGES,
|
||||||
|
|
||||||
|
[0xff66, 0xff9f], // Halfwidth katakana
|
||||||
|
|
||||||
|
[0x30fb, 0x30fc], // Katakana punctuation
|
||||||
|
[0xff61, 0xff65], // Kana punctuation
|
||||||
|
[0x3000, 0x303f], // CJK punctuation
|
||||||
|
|
||||||
|
[0xff10, 0xff19], // Fullwidth numbers
|
||||||
|
[0xff21, 0xff3a], // Fullwidth upper case Latin letters
|
||||||
|
[0xff41, 0xff5a], // Fullwidth lower case Latin letters
|
||||||
|
|
||||||
|
[0xff01, 0xff0f], // Fullwidth punctuation 1
|
||||||
|
[0xff1a, 0xff1f], // Fullwidth punctuation 2
|
||||||
|
[0xff3b, 0xff3f], // Fullwidth punctuation 3
|
||||||
|
[0xff5b, 0xff60], // Fullwidth punctuation 4
|
||||||
|
[0xffe0, 0xffee] // Currency markers
|
||||||
|
];
|
||||||
|
|
||||||
|
|
||||||
|
// Character code testing functions
|
||||||
|
|
||||||
|
function isCodePointKanji(codePoint) {
|
||||||
|
return isCodePointInRanges(codePoint, CJK_UNIFIED_IDEOGRAPHS_RANGES);
|
||||||
|
}
|
||||||
|
|
||||||
|
function isCodePointKana(codePoint) {
|
||||||
|
return isCodePointInRanges(codePoint, KANA_RANGES);
|
||||||
|
}
|
||||||
|
|
||||||
|
function isCodePointJapanese(codePoint) {
|
||||||
|
return isCodePointInRanges(codePoint, JAPANESE_RANGES);
|
||||||
|
}
|
||||||
|
|
||||||
|
function isCodePointInRanges(codePoint, ranges) {
|
||||||
|
for (const [min, max] of ranges) {
|
||||||
|
if (codePoint >= min && codePoint <= max) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// String testing functions
|
||||||
|
|
||||||
|
function isStringEntirelyKana(str) {
|
||||||
|
if (str.length === 0) { return false; }
|
||||||
|
for (const c of str) {
|
||||||
|
if (!isCodePointKana(c.codePointAt(0))) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
function isStringPartiallyJapanese(str) {
|
||||||
|
if (str.length === 0) { return false; }
|
||||||
|
for (const c of str) {
|
||||||
|
if (isCodePointJapanese(c.codePointAt(0))) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Exports
|
||||||
|
|
||||||
|
return {
|
||||||
|
isCodePointKanji,
|
||||||
|
isCodePointKana,
|
||||||
|
isCodePointJapanese,
|
||||||
|
isStringEntirelyKana,
|
||||||
|
isStringPartiallyJapanese
|
||||||
|
};
|
||||||
|
})();
|
@ -22,6 +22,7 @@ const {VM} = require('./yomichan-vm');
|
|||||||
const vm = new VM();
|
const vm = new VM();
|
||||||
vm.execute([
|
vm.execute([
|
||||||
'mixed/lib/wanakana.min.js',
|
'mixed/lib/wanakana.min.js',
|
||||||
|
'mixed/js/japanese.js',
|
||||||
'bg/js/japanese.js'
|
'bg/js/japanese.js'
|
||||||
]);
|
]);
|
||||||
const jp = vm.get('jp');
|
const jp = vm.get('jp');
|
||||||
|
Loading…
Reference in New Issue
Block a user