Add support for collapsing emphatic character sequences

This commit is contained in:
toasted-nutbread 2020-04-10 11:56:18 -04:00
parent f177e3699a
commit 42a2917bf7
6 changed files with 73 additions and 6 deletions

View File

@ -388,7 +388,8 @@
"convertNumericCharacters", "convertNumericCharacters",
"convertAlphabeticCharacters", "convertAlphabeticCharacters",
"convertHiraganaToKatakana", "convertHiraganaToKatakana",
"convertKatakanaToHiragana" "convertKatakanaToHiragana",
"collapseEmphaticSequences"
], ],
"properties": { "properties": {
"convertHalfWidthCharacters": { "convertHalfWidthCharacters": {
@ -415,6 +416,11 @@
"type": "string", "type": "string",
"enum": ["false", "true", "variant"], "enum": ["false", "true", "variant"],
"default": "variant" "default": "variant"
},
"collapseEmphaticSequences": {
"type": "string",
"enum": ["false", "true", "full"],
"default": "false"
} }
} }
}, },

View File

@ -83,6 +83,8 @@
const ITERATION_MARK_CODE_POINT = 0x3005; const ITERATION_MARK_CODE_POINT = 0x3005;
const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063;
const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3;
// Existing functions // Existing functions
@ -373,6 +375,39 @@
} }
// Miscellaneous
function collapseEmphaticSequences(sourceText, fullCollapse, sourceMap=null) {
let result = '';
let collapseCodePoint = -1;
const hasSourceMap = (sourceMap !== null);
for (const char of sourceText) {
const c = char.codePointAt(0);
if (c === HIRAGANA_SMALL_TSU_CODE_POINT || c === KATAKANA_SMALL_TSU_CODE_POINT) {
if (collapseCodePoint !== c) {
collapseCodePoint = c;
if (!fullCollapse) {
result += char;
continue;
}
}
} else {
collapseCodePoint = -1;
result += char;
continue;
}
if (hasSourceMap) {
const index = result.length;
if (index > 0) {
sourceMap.combine(index - 1, 1);
}
}
}
return result;
}
// Exports // Exports
Object.assign(jp, { Object.assign(jp, {
@ -384,6 +419,7 @@
convertHalfWidthKanaToFullWidth, convertHalfWidthKanaToFullWidth,
convertAlphabeticToKana, convertAlphabeticToKana,
distributeFurigana, distributeFurigana,
distributeFuriganaInflected distributeFuriganaInflected,
collapseEmphaticSequences
}); });
})(); })();

View File

@ -171,7 +171,8 @@ function profileOptionsCreateDefaults() {
convertNumericCharacters: 'false', convertNumericCharacters: 'false',
convertAlphabeticCharacters: 'false', convertAlphabeticCharacters: 'false',
convertHiraganaToKatakana: 'false', convertHiraganaToKatakana: 'false',
convertKatakanaToHiragana: 'variant' convertKatakanaToHiragana: 'variant',
collapseEmphaticSequences: 'false'
}, },
dictionaries: {}, dictionaries: {},

View File

@ -119,6 +119,7 @@ async function formRead(options) {
options.translation.convertAlphabeticCharacters = $('#translation-convert-alphabetic-characters').val(); options.translation.convertAlphabeticCharacters = $('#translation-convert-alphabetic-characters').val();
options.translation.convertHiraganaToKatakana = $('#translation-convert-hiragana-to-katakana').val(); options.translation.convertHiraganaToKatakana = $('#translation-convert-hiragana-to-katakana').val();
options.translation.convertKatakanaToHiragana = $('#translation-convert-katakana-to-hiragana').val(); options.translation.convertKatakanaToHiragana = $('#translation-convert-katakana-to-hiragana').val();
options.translation.collapseEmphaticSequences = $('#translation-collapse-emphatic-sequences').val();
options.parsing.enableScanningParser = $('#parsing-scan-enable').prop('checked'); options.parsing.enableScanningParser = $('#parsing-scan-enable').prop('checked');
options.parsing.enableMecabParser = $('#parsing-mecab-enable').prop('checked'); options.parsing.enableMecabParser = $('#parsing-mecab-enable').prop('checked');
@ -200,6 +201,7 @@ async function formWrite(options) {
$('#translation-convert-alphabetic-characters').val(options.translation.convertAlphabeticCharacters); $('#translation-convert-alphabetic-characters').val(options.translation.convertAlphabeticCharacters);
$('#translation-convert-hiragana-to-katakana').val(options.translation.convertHiraganaToKatakana); $('#translation-convert-hiragana-to-katakana').val(options.translation.convertHiraganaToKatakana);
$('#translation-convert-katakana-to-hiragana').val(options.translation.convertKatakanaToHiragana); $('#translation-convert-katakana-to-hiragana').val(options.translation.convertKatakanaToHiragana);
$('#translation-collapse-emphatic-sequences').val(options.translation.collapseEmphaticSequences);
$('#parsing-scan-enable').prop('checked', options.parsing.enableScanningParser); $('#parsing-scan-enable').prop('checked', options.parsing.enableScanningParser);
$('#parsing-mecab-enable').prop('checked', options.parsing.enableMecabParser); $('#parsing-mecab-enable').prop('checked', options.parsing.enableMecabParser);

View File

@ -348,17 +348,27 @@ class Translator {
getAllDeinflections(text, options) { getAllDeinflections(text, options) {
const translationOptions = options.translation; const translationOptions = options.translation;
const collapseEmphaticOptions = [[false, false]];
switch (translationOptions.collapseEmphaticSequences) {
case 'true':
collapseEmphaticOptions.push([true, false]);
break;
case 'full':
collapseEmphaticOptions.push([true, true]);
break;
}
const textOptionVariantArray = [ const textOptionVariantArray = [
Translator.getTextOptionEntryVariants(translationOptions.convertHalfWidthCharacters), Translator.getTextOptionEntryVariants(translationOptions.convertHalfWidthCharacters),
Translator.getTextOptionEntryVariants(translationOptions.convertNumericCharacters), Translator.getTextOptionEntryVariants(translationOptions.convertNumericCharacters),
Translator.getTextOptionEntryVariants(translationOptions.convertAlphabeticCharacters), Translator.getTextOptionEntryVariants(translationOptions.convertAlphabeticCharacters),
Translator.getTextOptionEntryVariants(translationOptions.convertHiraganaToKatakana), Translator.getTextOptionEntryVariants(translationOptions.convertHiraganaToKatakana),
Translator.getTextOptionEntryVariants(translationOptions.convertKatakanaToHiragana) Translator.getTextOptionEntryVariants(translationOptions.convertKatakanaToHiragana),
collapseEmphaticOptions
]; ];
const deinflections = []; const deinflections = [];
const used = new Set(); const used = new Set();
for (const [halfWidth, numeric, alphabetic, katakana, hiragana] of Translator.getArrayVariants(textOptionVariantArray)) { for (const [halfWidth, numeric, alphabetic, katakana, hiragana, [collapseEmphatic, collapseEmphaticFull]] of Translator.getArrayVariants(textOptionVariantArray)) {
let text2 = text; let text2 = text;
const sourceMap = new TextSourceMap(text2); const sourceMap = new TextSourceMap(text2);
if (halfWidth) { if (halfWidth) {
@ -376,6 +386,9 @@ class Translator {
if (hiragana) { if (hiragana) {
text2 = jp.convertKatakanaToHiragana(text2); text2 = jp.convertKatakanaToHiragana(text2);
} }
if (collapseEmphatic) {
text2 = jp.collapseEmphaticSequences(text2, collapseEmphaticFull, sourceMap);
}
for (let i = text2.length; i > 0; --i) { for (let i = text2.length; i > 0; --i) {
const text2Substring = text2.substring(0, i); const text2Substring = text2.substring(0, i);

View File

@ -427,7 +427,7 @@
<p class="help-block"> <p class="help-block">
The conversion options below are listed in the order that the conversions are applied to the input text. The conversion options below are listed in the order that the conversions are applied to the input text.
Each conversion has three possible values: Conversions commonly have three possible values:
</p> </p>
<ul class="help-block"> <ul class="help-block">
@ -490,6 +490,15 @@
<option value="variant">Use both variants</option> <option value="variant">Use both variants</option>
</select> </select>
</div> </div>
<div class="form-group">
<label for="translation-collapse-emphatic-sequences">Collapse emphatic character sequences <span class="label-light">(かっっっこいい &rarr; かっこいい)</span></label>
<select class="form-control" id="translation-collapse-emphatic-sequences">
<option value="false">Disabled</option>
<option value="true">Collapse into single character</option>
<option value="full">Remove all characters</option>
</select>
</div>
</div> </div>
<div id="popup-content-scanning"> <div id="popup-content-scanning">