assume and propagate strings for text parsing

This commit is contained in:
siikamiika 2020-04-13 22:55:33 +03:00
parent 25bc86c3ce
commit f93dc85710
5 changed files with 36 additions and 64 deletions

View File

@ -334,7 +334,7 @@ class Backend {
}
text = text.substring(source.length);
} else {
const reading = jp.convertReading(text[0], null, options.parsing.readingMode);
const reading = jp.convertReading(text[0], '', options.parsing.readingMode);
term.push({text: text[0], reading});
text = text.substring(1);
}
@ -349,24 +349,20 @@ class Backend {
for (const [mecabName, parsedLines] of Object.entries(rawResults)) {
const result = [];
for (const parsedLine of parsedLines) {
for (const {expression, reading, source} of parsedLine) {
for (let {expression, reading, source} of parsedLine) {
const term = [];
if (expression !== null && reading !== null) {
for (const {text: text2, furigana} of jp.distributeFuriganaInflected(
expression,
jp.convertKatakanaToHiragana(reading),
source
)) {
const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode);
term.push({text: text2, reading: reading2});
}
} else {
const reading2 = jp.convertReading(source, null, options.parsing.readingMode);
term.push({text: source, reading: reading2});
if (expression === '') { expression = source; }
for (const {text: text2, furigana} of jp.distributeFuriganaInflected(
expression,
jp.convertKatakanaToHiragana(reading),
source
)) {
const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode);
term.push({text: text2, reading: reading2});
}
result.push(term);
}
result.push([{text: '\n'}]);
result.push([{text: '\n', reading: ''}]);
}
results.push([mecabName, result]);
}

View File

@ -127,9 +127,9 @@
function convertReading(expressionFragment, readingFragment, readingMode) {
switch (readingMode) {
case 'hiragana':
return convertKatakanaToHiragana(readingFragment || '');
return convertKatakanaToHiragana(readingFragment);
case 'katakana':
return convertHiraganaToKatakana(readingFragment || '');
return convertHiraganaToKatakana(readingFragment);
case 'romaji':
if (readingFragment) {
return convertToRomaji(readingFragment);
@ -140,7 +140,7 @@
}
return readingFragment;
case 'none':
return null;
return '';
default:
return readingFragment;
}
@ -300,7 +300,7 @@
const readingLeft = reading2.substring(group.text.length);
const segs = segmentize(readingLeft, groups.splice(1));
if (segs) {
return [{text: group.text}].concat(segs);
return [{text: group.text, furigana: ''}].concat(segs);
}
}
} else {
@ -368,7 +368,7 @@
}
if (stemLength !== source.length) {
output.push({text: source.substring(stemLength)});
output.push({text: source.substring(stemLength), furigana: ''});
}
return output;

View File

@ -36,7 +36,7 @@ class QueryParserGenerator {
const termContainer = this._templateHandler.instantiate(preview ? 'term-preview' : 'term');
for (const segment of term) {
if (!segment.text.trim()) { continue; }
if (!segment.reading || !segment.reading.trim()) {
if (!segment.reading.trim()) {
termContainer.appendChild(this.createSegmentText(segment.text));
} else {
termContainer.appendChild(this.createSegment(segment));

View File

@ -140,7 +140,7 @@ class QueryParser extends TextScanner {
const previewTerms = [];
for (let i = 0, ii = text.length; i < ii; i += 2) {
const tempText = text.substring(i, i + 2);
previewTerms.push([{text: tempText}]);
previewTerms.push([{text: tempText, reading: ''}]);
}
this.queryParser.textContent = '';
this.queryParser.appendChild(this.queryParserGenerator.createParseResult(previewTerms, true));

View File

@ -176,19 +176,19 @@ function testConvertReading() {
[['アリガトウ', 'アリガトウ', 'hiragana'], 'ありがとう'],
[['アリガトウ', 'アリガトウ', 'katakana'], 'アリガトウ'],
[['アリガトウ', 'アリガトウ', 'romaji'], 'arigatou'],
[['アリガトウ', 'アリガトウ', 'none'], null],
[['アリガトウ', 'アリガトウ', 'none'], ''],
[['アリガトウ', 'アリガトウ', 'default'], 'アリガトウ'],
[['ありがとう', 'ありがとう', 'hiragana'], 'ありがとう'],
[['ありがとう', 'ありがとう', 'katakana'], 'アリガトウ'],
[['ありがとう', 'ありがとう', 'romaji'], 'arigatou'],
[['ありがとう', 'ありがとう', 'none'], null],
[['ありがとう', 'ありがとう', 'none'], ''],
[['ありがとう', 'ありがとう', 'default'], 'ありがとう'],
[['有り難う', 'ありがとう', 'hiragana'], 'ありがとう'],
[['有り難う', 'ありがとう', 'katakana'], 'アリガトウ'],
[['有り難う', 'ありがとう', 'romaji'], 'arigatou'],
[['有り難う', 'ありがとう', 'none'], null],
[['有り難う', 'ありがとう', 'none'], ''],
[['有り難う', 'ありがとう', 'default'], 'ありがとう'],
// Cases with falsy readings
@ -196,40 +196,16 @@ function testConvertReading() {
[['ありがとう', '', 'hiragana'], ''],
[['ありがとう', '', 'katakana'], ''],
[['ありがとう', '', 'romaji'], 'arigatou'],
[['ありがとう', '', 'none'], null],
[['ありがとう', '', 'none'], ''],
[['ありがとう', '', 'default'], ''],
[['ありがとう', null, 'hiragana'], ''],
[['ありがとう', null, 'katakana'], ''],
[['ありがとう', null, 'romaji'], 'arigatou'],
[['ありがとう', null, 'none'], null],
[['ありがとう', null, 'default'], null],
[['ありがとう', void 0, 'hiragana'], ''],
[['ありがとう', void 0, 'katakana'], ''],
[['ありがとう', void 0, 'romaji'], 'arigatou'],
[['ありがとう', void 0, 'none'], null],
[['ありがとう', void 0, 'default'], void 0],
// Cases with falsy readings and kanji expressions
[['有り難う', '', 'hiragana'], ''],
[['有り難う', '', 'katakana'], ''],
[['有り難う', '', 'romaji'], ''],
[['有り難う', '', 'none'], null],
[['有り難う', '', 'default'], ''],
[['有り難う', null, 'hiragana'], ''],
[['有り難う', null, 'katakana'], ''],
[['有り難う', null, 'romaji'], null],
[['有り難う', null, 'none'], null],
[['有り難う', null, 'default'], null],
[['有り難う', void 0, 'hiragana'], ''],
[['有り難う', void 0, 'katakana'], ''],
[['有り難う', void 0, 'romaji'], void 0],
[['有り難う', void 0, 'none'], null],
[['有り難う', void 0, 'default'], void 0]
[['有り難う', '', 'none'], ''],
[['有り難う', '', 'default'], '']
];
for (const [[expressionFragment, readingFragment, readingMode], expected] of data) {
@ -303,9 +279,9 @@ function testDistributeFurigana() {
['有り難う', 'ありがとう'],
[
{text: '有', furigana: 'あ'},
{text: 'り'},
{text: 'り', furigana: ''},
{text: '難', furigana: 'がと'},
{text: 'う'}
{text: 'う', furigana: ''}
]
],
[
@ -317,23 +293,23 @@ function testDistributeFurigana() {
[
['お祝い', 'おいわい'],
[
{text: 'お'},
{text: 'お', furigana: ''},
{text: '祝', furigana: 'いわ'},
{text: 'い'}
{text: 'い', furigana: ''}
]
],
[
['美味しい', 'おいしい'],
[
{text: '美味', furigana: 'おい'},
{text: 'しい'}
{text: 'しい', furigana: ''}
]
],
[
['食べ物', 'たべもの'],
[
{text: '食', furigana: 'た'},
{text: 'べ'},
{text: 'べ', furigana: ''},
{text: '物', furigana: 'もの'}
]
],
@ -341,9 +317,9 @@ function testDistributeFurigana() {
['試し切り', 'ためしぎり'],
[
{text: '試', furigana: 'ため'},
{text: 'し'},
{text: 'し', furigana: ''},
{text: '切', furigana: 'ぎ'},
{text: 'り'}
{text: 'り', furigana: ''}
]
],
// Ambiguous
@ -373,16 +349,16 @@ function testDistributeFuriganaInflected() {
['美味しい', 'おいしい', '美味しかた'],
[
{text: '美味', furigana: 'おい'},
{text: 'し'},
{text: 'かた'}
{text: 'し', furigana: ''},
{text: 'かた', furigana: ''}
]
],
[
['食べる', 'たべる', '食べた'],
[
{text: '食', furigana: 'た'},
{text: 'べ'},
{text: 'た'}
{text: 'べ', furigana: ''},
{text: 'た', furigana: ''}
]
]
];