Add support for collapsing emphatic character sequences
This commit is contained in:
parent
f177e3699a
commit
42a2917bf7
@ -388,7 +388,8 @@
|
||||
"convertNumericCharacters",
|
||||
"convertAlphabeticCharacters",
|
||||
"convertHiraganaToKatakana",
|
||||
"convertKatakanaToHiragana"
|
||||
"convertKatakanaToHiragana",
|
||||
"collapseEmphaticSequences"
|
||||
],
|
||||
"properties": {
|
||||
"convertHalfWidthCharacters": {
|
||||
@ -415,6 +416,11 @@
|
||||
"type": "string",
|
||||
"enum": ["false", "true", "variant"],
|
||||
"default": "variant"
|
||||
},
|
||||
"collapseEmphaticSequences": {
|
||||
"type": "string",
|
||||
"enum": ["false", "true", "full"],
|
||||
"default": "false"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
@ -83,6 +83,8 @@
|
||||
|
||||
const ITERATION_MARK_CODE_POINT = 0x3005;
|
||||
|
||||
const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063;
|
||||
const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3;
|
||||
|
||||
// Existing functions
|
||||
|
||||
@ -373,6 +375,39 @@
|
||||
}
|
||||
|
||||
|
||||
// Miscellaneous
|
||||
|
||||
function collapseEmphaticSequences(sourceText, fullCollapse, sourceMap=null) {
|
||||
let result = '';
|
||||
let collapseCodePoint = -1;
|
||||
const hasSourceMap = (sourceMap !== null);
|
||||
for (const char of sourceText) {
|
||||
const c = char.codePointAt(0);
|
||||
if (c === HIRAGANA_SMALL_TSU_CODE_POINT || c === KATAKANA_SMALL_TSU_CODE_POINT) {
|
||||
if (collapseCodePoint !== c) {
|
||||
collapseCodePoint = c;
|
||||
if (!fullCollapse) {
|
||||
result += char;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
collapseCodePoint = -1;
|
||||
result += char;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (hasSourceMap) {
|
||||
const index = result.length;
|
||||
if (index > 0) {
|
||||
sourceMap.combine(index - 1, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
// Exports
|
||||
|
||||
Object.assign(jp, {
|
||||
@ -384,6 +419,7 @@
|
||||
convertHalfWidthKanaToFullWidth,
|
||||
convertAlphabeticToKana,
|
||||
distributeFurigana,
|
||||
distributeFuriganaInflected
|
||||
distributeFuriganaInflected,
|
||||
collapseEmphaticSequences
|
||||
});
|
||||
})();
|
||||
|
@ -171,7 +171,8 @@ function profileOptionsCreateDefaults() {
|
||||
convertNumericCharacters: 'false',
|
||||
convertAlphabeticCharacters: 'false',
|
||||
convertHiraganaToKatakana: 'false',
|
||||
convertKatakanaToHiragana: 'variant'
|
||||
convertKatakanaToHiragana: 'variant',
|
||||
collapseEmphaticSequences: 'false'
|
||||
},
|
||||
|
||||
dictionaries: {},
|
||||
|
@ -119,6 +119,7 @@ async function formRead(options) {
|
||||
options.translation.convertAlphabeticCharacters = $('#translation-convert-alphabetic-characters').val();
|
||||
options.translation.convertHiraganaToKatakana = $('#translation-convert-hiragana-to-katakana').val();
|
||||
options.translation.convertKatakanaToHiragana = $('#translation-convert-katakana-to-hiragana').val();
|
||||
options.translation.collapseEmphaticSequences = $('#translation-collapse-emphatic-sequences').val();
|
||||
|
||||
options.parsing.enableScanningParser = $('#parsing-scan-enable').prop('checked');
|
||||
options.parsing.enableMecabParser = $('#parsing-mecab-enable').prop('checked');
|
||||
@ -200,6 +201,7 @@ async function formWrite(options) {
|
||||
$('#translation-convert-alphabetic-characters').val(options.translation.convertAlphabeticCharacters);
|
||||
$('#translation-convert-hiragana-to-katakana').val(options.translation.convertHiraganaToKatakana);
|
||||
$('#translation-convert-katakana-to-hiragana').val(options.translation.convertKatakanaToHiragana);
|
||||
$('#translation-collapse-emphatic-sequences').val(options.translation.collapseEmphaticSequences);
|
||||
|
||||
$('#parsing-scan-enable').prop('checked', options.parsing.enableScanningParser);
|
||||
$('#parsing-mecab-enable').prop('checked', options.parsing.enableMecabParser);
|
||||
|
@ -348,17 +348,27 @@ class Translator {
|
||||
|
||||
getAllDeinflections(text, options) {
|
||||
const translationOptions = options.translation;
|
||||
const collapseEmphaticOptions = [[false, false]];
|
||||
switch (translationOptions.collapseEmphaticSequences) {
|
||||
case 'true':
|
||||
collapseEmphaticOptions.push([true, false]);
|
||||
break;
|
||||
case 'full':
|
||||
collapseEmphaticOptions.push([true, true]);
|
||||
break;
|
||||
}
|
||||
const textOptionVariantArray = [
|
||||
Translator.getTextOptionEntryVariants(translationOptions.convertHalfWidthCharacters),
|
||||
Translator.getTextOptionEntryVariants(translationOptions.convertNumericCharacters),
|
||||
Translator.getTextOptionEntryVariants(translationOptions.convertAlphabeticCharacters),
|
||||
Translator.getTextOptionEntryVariants(translationOptions.convertHiraganaToKatakana),
|
||||
Translator.getTextOptionEntryVariants(translationOptions.convertKatakanaToHiragana)
|
||||
Translator.getTextOptionEntryVariants(translationOptions.convertKatakanaToHiragana),
|
||||
collapseEmphaticOptions
|
||||
];
|
||||
|
||||
const deinflections = [];
|
||||
const used = new Set();
|
||||
for (const [halfWidth, numeric, alphabetic, katakana, hiragana] of Translator.getArrayVariants(textOptionVariantArray)) {
|
||||
for (const [halfWidth, numeric, alphabetic, katakana, hiragana, [collapseEmphatic, collapseEmphaticFull]] of Translator.getArrayVariants(textOptionVariantArray)) {
|
||||
let text2 = text;
|
||||
const sourceMap = new TextSourceMap(text2);
|
||||
if (halfWidth) {
|
||||
@ -376,6 +386,9 @@ class Translator {
|
||||
if (hiragana) {
|
||||
text2 = jp.convertKatakanaToHiragana(text2);
|
||||
}
|
||||
if (collapseEmphatic) {
|
||||
text2 = jp.collapseEmphaticSequences(text2, collapseEmphaticFull, sourceMap);
|
||||
}
|
||||
|
||||
for (let i = text2.length; i > 0; --i) {
|
||||
const text2Substring = text2.substring(0, i);
|
||||
|
@ -427,7 +427,7 @@
|
||||
|
||||
<p class="help-block">
|
||||
The conversion options below are listed in the order that the conversions are applied to the input text.
|
||||
Each conversion has three possible values:
|
||||
Conversions commonly have three possible values:
|
||||
</p>
|
||||
|
||||
<ul class="help-block">
|
||||
@ -490,6 +490,15 @@
|
||||
<option value="variant">Use both variants</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<label for="translation-collapse-emphatic-sequences">Collapse emphatic character sequences <span class="label-light">(かっっっこいい → かっこいい)</span></label>
|
||||
<select class="form-control" id="translation-collapse-emphatic-sequences">
|
||||
<option value="false">Disabled</option>
|
||||
<option value="true">Collapse into single character</option>
|
||||
<option value="full">Remove all characters</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="popup-content-scanning">
|
||||
|
Loading…
x
Reference in New Issue
Block a user