assume and propagate strings for text parsing
This commit is contained in:
parent
25bc86c3ce
commit
f93dc85710
@ -334,7 +334,7 @@ class Backend {
|
||||
}
|
||||
text = text.substring(source.length);
|
||||
} else {
|
||||
const reading = jp.convertReading(text[0], null, options.parsing.readingMode);
|
||||
const reading = jp.convertReading(text[0], '', options.parsing.readingMode);
|
||||
term.push({text: text[0], reading});
|
||||
text = text.substring(1);
|
||||
}
|
||||
@ -349,24 +349,20 @@ class Backend {
|
||||
for (const [mecabName, parsedLines] of Object.entries(rawResults)) {
|
||||
const result = [];
|
||||
for (const parsedLine of parsedLines) {
|
||||
for (const {expression, reading, source} of parsedLine) {
|
||||
for (let {expression, reading, source} of parsedLine) {
|
||||
const term = [];
|
||||
if (expression !== null && reading !== null) {
|
||||
for (const {text: text2, furigana} of jp.distributeFuriganaInflected(
|
||||
expression,
|
||||
jp.convertKatakanaToHiragana(reading),
|
||||
source
|
||||
)) {
|
||||
const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode);
|
||||
term.push({text: text2, reading: reading2});
|
||||
}
|
||||
} else {
|
||||
const reading2 = jp.convertReading(source, null, options.parsing.readingMode);
|
||||
term.push({text: source, reading: reading2});
|
||||
if (expression === '') { expression = source; }
|
||||
for (const {text: text2, furigana} of jp.distributeFuriganaInflected(
|
||||
expression,
|
||||
jp.convertKatakanaToHiragana(reading),
|
||||
source
|
||||
)) {
|
||||
const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode);
|
||||
term.push({text: text2, reading: reading2});
|
||||
}
|
||||
result.push(term);
|
||||
}
|
||||
result.push([{text: '\n'}]);
|
||||
result.push([{text: '\n', reading: ''}]);
|
||||
}
|
||||
results.push([mecabName, result]);
|
||||
}
|
||||
|
@ -127,9 +127,9 @@
|
||||
function convertReading(expressionFragment, readingFragment, readingMode) {
|
||||
switch (readingMode) {
|
||||
case 'hiragana':
|
||||
return convertKatakanaToHiragana(readingFragment || '');
|
||||
return convertKatakanaToHiragana(readingFragment);
|
||||
case 'katakana':
|
||||
return convertHiraganaToKatakana(readingFragment || '');
|
||||
return convertHiraganaToKatakana(readingFragment);
|
||||
case 'romaji':
|
||||
if (readingFragment) {
|
||||
return convertToRomaji(readingFragment);
|
||||
@ -140,7 +140,7 @@
|
||||
}
|
||||
return readingFragment;
|
||||
case 'none':
|
||||
return null;
|
||||
return '';
|
||||
default:
|
||||
return readingFragment;
|
||||
}
|
||||
@ -300,7 +300,7 @@
|
||||
const readingLeft = reading2.substring(group.text.length);
|
||||
const segs = segmentize(readingLeft, groups.splice(1));
|
||||
if (segs) {
|
||||
return [{text: group.text}].concat(segs);
|
||||
return [{text: group.text, furigana: ''}].concat(segs);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -368,7 +368,7 @@
|
||||
}
|
||||
|
||||
if (stemLength !== source.length) {
|
||||
output.push({text: source.substring(stemLength)});
|
||||
output.push({text: source.substring(stemLength), furigana: ''});
|
||||
}
|
||||
|
||||
return output;
|
||||
|
@ -36,7 +36,7 @@ class QueryParserGenerator {
|
||||
const termContainer = this._templateHandler.instantiate(preview ? 'term-preview' : 'term');
|
||||
for (const segment of term) {
|
||||
if (!segment.text.trim()) { continue; }
|
||||
if (!segment.reading || !segment.reading.trim()) {
|
||||
if (!segment.reading.trim()) {
|
||||
termContainer.appendChild(this.createSegmentText(segment.text));
|
||||
} else {
|
||||
termContainer.appendChild(this.createSegment(segment));
|
||||
|
@ -140,7 +140,7 @@ class QueryParser extends TextScanner {
|
||||
const previewTerms = [];
|
||||
for (let i = 0, ii = text.length; i < ii; i += 2) {
|
||||
const tempText = text.substring(i, i + 2);
|
||||
previewTerms.push([{text: tempText}]);
|
||||
previewTerms.push([{text: tempText, reading: ''}]);
|
||||
}
|
||||
this.queryParser.textContent = '';
|
||||
this.queryParser.appendChild(this.queryParserGenerator.createParseResult(previewTerms, true));
|
||||
|
@ -176,19 +176,19 @@ function testConvertReading() {
|
||||
[['アリガトウ', 'アリガトウ', 'hiragana'], 'ありがとう'],
|
||||
[['アリガトウ', 'アリガトウ', 'katakana'], 'アリガトウ'],
|
||||
[['アリガトウ', 'アリガトウ', 'romaji'], 'arigatou'],
|
||||
[['アリガトウ', 'アリガトウ', 'none'], null],
|
||||
[['アリガトウ', 'アリガトウ', 'none'], ''],
|
||||
[['アリガトウ', 'アリガトウ', 'default'], 'アリガトウ'],
|
||||
|
||||
[['ありがとう', 'ありがとう', 'hiragana'], 'ありがとう'],
|
||||
[['ありがとう', 'ありがとう', 'katakana'], 'アリガトウ'],
|
||||
[['ありがとう', 'ありがとう', 'romaji'], 'arigatou'],
|
||||
[['ありがとう', 'ありがとう', 'none'], null],
|
||||
[['ありがとう', 'ありがとう', 'none'], ''],
|
||||
[['ありがとう', 'ありがとう', 'default'], 'ありがとう'],
|
||||
|
||||
[['有り難う', 'ありがとう', 'hiragana'], 'ありがとう'],
|
||||
[['有り難う', 'ありがとう', 'katakana'], 'アリガトウ'],
|
||||
[['有り難う', 'ありがとう', 'romaji'], 'arigatou'],
|
||||
[['有り難う', 'ありがとう', 'none'], null],
|
||||
[['有り難う', 'ありがとう', 'none'], ''],
|
||||
[['有り難う', 'ありがとう', 'default'], 'ありがとう'],
|
||||
|
||||
// Cases with falsy readings
|
||||
@ -196,40 +196,16 @@ function testConvertReading() {
|
||||
[['ありがとう', '', 'hiragana'], ''],
|
||||
[['ありがとう', '', 'katakana'], ''],
|
||||
[['ありがとう', '', 'romaji'], 'arigatou'],
|
||||
[['ありがとう', '', 'none'], null],
|
||||
[['ありがとう', '', 'none'], ''],
|
||||
[['ありがとう', '', 'default'], ''],
|
||||
|
||||
[['ありがとう', null, 'hiragana'], ''],
|
||||
[['ありがとう', null, 'katakana'], ''],
|
||||
[['ありがとう', null, 'romaji'], 'arigatou'],
|
||||
[['ありがとう', null, 'none'], null],
|
||||
[['ありがとう', null, 'default'], null],
|
||||
|
||||
[['ありがとう', void 0, 'hiragana'], ''],
|
||||
[['ありがとう', void 0, 'katakana'], ''],
|
||||
[['ありがとう', void 0, 'romaji'], 'arigatou'],
|
||||
[['ありがとう', void 0, 'none'], null],
|
||||
[['ありがとう', void 0, 'default'], void 0],
|
||||
|
||||
// Cases with falsy readings and kanji expressions
|
||||
|
||||
[['有り難う', '', 'hiragana'], ''],
|
||||
[['有り難う', '', 'katakana'], ''],
|
||||
[['有り難う', '', 'romaji'], ''],
|
||||
[['有り難う', '', 'none'], null],
|
||||
[['有り難う', '', 'default'], ''],
|
||||
|
||||
[['有り難う', null, 'hiragana'], ''],
|
||||
[['有り難う', null, 'katakana'], ''],
|
||||
[['有り難う', null, 'romaji'], null],
|
||||
[['有り難う', null, 'none'], null],
|
||||
[['有り難う', null, 'default'], null],
|
||||
|
||||
[['有り難う', void 0, 'hiragana'], ''],
|
||||
[['有り難う', void 0, 'katakana'], ''],
|
||||
[['有り難う', void 0, 'romaji'], void 0],
|
||||
[['有り難う', void 0, 'none'], null],
|
||||
[['有り難う', void 0, 'default'], void 0]
|
||||
[['有り難う', '', 'none'], ''],
|
||||
[['有り難う', '', 'default'], '']
|
||||
];
|
||||
|
||||
for (const [[expressionFragment, readingFragment, readingMode], expected] of data) {
|
||||
@ -303,9 +279,9 @@ function testDistributeFurigana() {
|
||||
['有り難う', 'ありがとう'],
|
||||
[
|
||||
{text: '有', furigana: 'あ'},
|
||||
{text: 'り'},
|
||||
{text: 'り', furigana: ''},
|
||||
{text: '難', furigana: 'がと'},
|
||||
{text: 'う'}
|
||||
{text: 'う', furigana: ''}
|
||||
]
|
||||
],
|
||||
[
|
||||
@ -317,23 +293,23 @@ function testDistributeFurigana() {
|
||||
[
|
||||
['お祝い', 'おいわい'],
|
||||
[
|
||||
{text: 'お'},
|
||||
{text: 'お', furigana: ''},
|
||||
{text: '祝', furigana: 'いわ'},
|
||||
{text: 'い'}
|
||||
{text: 'い', furigana: ''}
|
||||
]
|
||||
],
|
||||
[
|
||||
['美味しい', 'おいしい'],
|
||||
[
|
||||
{text: '美味', furigana: 'おい'},
|
||||
{text: 'しい'}
|
||||
{text: 'しい', furigana: ''}
|
||||
]
|
||||
],
|
||||
[
|
||||
['食べ物', 'たべもの'],
|
||||
[
|
||||
{text: '食', furigana: 'た'},
|
||||
{text: 'べ'},
|
||||
{text: 'べ', furigana: ''},
|
||||
{text: '物', furigana: 'もの'}
|
||||
]
|
||||
],
|
||||
@ -341,9 +317,9 @@ function testDistributeFurigana() {
|
||||
['試し切り', 'ためしぎり'],
|
||||
[
|
||||
{text: '試', furigana: 'ため'},
|
||||
{text: 'し'},
|
||||
{text: 'し', furigana: ''},
|
||||
{text: '切', furigana: 'ぎ'},
|
||||
{text: 'り'}
|
||||
{text: 'り', furigana: ''}
|
||||
]
|
||||
],
|
||||
// Ambiguous
|
||||
@ -373,16 +349,16 @@ function testDistributeFuriganaInflected() {
|
||||
['美味しい', 'おいしい', '美味しかた'],
|
||||
[
|
||||
{text: '美味', furigana: 'おい'},
|
||||
{text: 'し'},
|
||||
{text: 'かた'}
|
||||
{text: 'し', furigana: ''},
|
||||
{text: 'かた', furigana: ''}
|
||||
]
|
||||
],
|
||||
[
|
||||
['食べる', 'たべる', '食べた'],
|
||||
[
|
||||
{text: '食', furigana: 'た'},
|
||||
{text: 'べ'},
|
||||
{text: 'た'}
|
||||
{text: 'べ', furigana: ''},
|
||||
{text: 'た', furigana: ''}
|
||||
]
|
||||
]
|
||||
];
|
||||
|
Loading…
Reference in New Issue
Block a user