From f93dc857107e9b23ec06f1b568aad2c6f870ba4c Mon Sep 17 00:00:00 2001 From: siikamiika Date: Mon, 13 Apr 2020 22:55:33 +0300 Subject: [PATCH] assume and propagate strings for text parsing --- ext/bg/js/backend.js | 26 ++++------ ext/bg/js/japanese.js | 10 ++-- ext/bg/js/search-query-parser-generator.js | 2 +- ext/bg/js/search-query-parser.js | 2 +- test/test-japanese.js | 60 +++++++--------------- 5 files changed, 36 insertions(+), 64 deletions(-) diff --git a/ext/bg/js/backend.js b/ext/bg/js/backend.js index 65e00f28..d5086af2 100644 --- a/ext/bg/js/backend.js +++ b/ext/bg/js/backend.js @@ -334,7 +334,7 @@ class Backend { } text = text.substring(source.length); } else { - const reading = jp.convertReading(text[0], null, options.parsing.readingMode); + const reading = jp.convertReading(text[0], '', options.parsing.readingMode); term.push({text: text[0], reading}); text = text.substring(1); } @@ -349,24 +349,20 @@ class Backend { for (const [mecabName, parsedLines] of Object.entries(rawResults)) { const result = []; for (const parsedLine of parsedLines) { - for (const {expression, reading, source} of parsedLine) { + for (let {expression, reading, source} of parsedLine) { const term = []; - if (expression !== null && reading !== null) { - for (const {text: text2, furigana} of jp.distributeFuriganaInflected( - expression, - jp.convertKatakanaToHiragana(reading), - source - )) { - const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode); - term.push({text: text2, reading: reading2}); - } - } else { - const reading2 = jp.convertReading(source, null, options.parsing.readingMode); - term.push({text: source, reading: reading2}); + if (expression === '') { expression = source; } + for (const {text: text2, furigana} of jp.distributeFuriganaInflected( + expression, + jp.convertKatakanaToHiragana(reading), + source + )) { + const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode); + term.push({text: text2, reading: reading2}); } result.push(term); } - result.push([{text: '\n'}]); + result.push([{text: '\n', reading: ''}]); } results.push([mecabName, result]); } diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js index 5fef27a7..c74e4553 100644 --- a/ext/bg/js/japanese.js +++ b/ext/bg/js/japanese.js @@ -127,9 +127,9 @@ function convertReading(expressionFragment, readingFragment, readingMode) { switch (readingMode) { case 'hiragana': - return convertKatakanaToHiragana(readingFragment || ''); + return convertKatakanaToHiragana(readingFragment); case 'katakana': - return convertHiraganaToKatakana(readingFragment || ''); + return convertHiraganaToKatakana(readingFragment); case 'romaji': if (readingFragment) { return convertToRomaji(readingFragment); @@ -140,7 +140,7 @@ } return readingFragment; case 'none': - return null; + return ''; default: return readingFragment; } @@ -300,7 +300,7 @@ const readingLeft = reading2.substring(group.text.length); const segs = segmentize(readingLeft, groups.splice(1)); if (segs) { - return [{text: group.text}].concat(segs); + return [{text: group.text, furigana: ''}].concat(segs); } } } else { @@ -368,7 +368,7 @@ } if (stemLength !== source.length) { - output.push({text: source.substring(stemLength)}); + output.push({text: source.substring(stemLength), furigana: ''}); } return output; diff --git a/ext/bg/js/search-query-parser-generator.js b/ext/bg/js/search-query-parser-generator.js index d44829f7..527302ed 100644 --- a/ext/bg/js/search-query-parser-generator.js +++ b/ext/bg/js/search-query-parser-generator.js @@ -36,7 +36,7 @@ class QueryParserGenerator { const termContainer = this._templateHandler.instantiate(preview ? 'term-preview' : 'term'); for (const segment of term) { if (!segment.text.trim()) { continue; } - if (!segment.reading || !segment.reading.trim()) { + if (!segment.reading.trim()) { termContainer.appendChild(this.createSegmentText(segment.text)); } else { termContainer.appendChild(this.createSegment(segment)); diff --git a/ext/bg/js/search-query-parser.js b/ext/bg/js/search-query-parser.js index da61b045..692fb1a8 100644 --- a/ext/bg/js/search-query-parser.js +++ b/ext/bg/js/search-query-parser.js @@ -140,7 +140,7 @@ class QueryParser extends TextScanner { const previewTerms = []; for (let i = 0, ii = text.length; i < ii; i += 2) { const tempText = text.substring(i, i + 2); - previewTerms.push([{text: tempText}]); + previewTerms.push([{text: tempText, reading: ''}]); } this.queryParser.textContent = ''; this.queryParser.appendChild(this.queryParserGenerator.createParseResult(previewTerms, true)); diff --git a/test/test-japanese.js b/test/test-japanese.js index 89e41c36..7fd71ba8 100644 --- a/test/test-japanese.js +++ b/test/test-japanese.js @@ -176,19 +176,19 @@ function testConvertReading() { [['アリガトウ', 'アリガトウ', 'hiragana'], 'ありがとう'], [['アリガトウ', 'アリガトウ', 'katakana'], 'アリガトウ'], [['アリガトウ', 'アリガトウ', 'romaji'], 'arigatou'], - [['アリガトウ', 'アリガトウ', 'none'], null], + [['アリガトウ', 'アリガトウ', 'none'], ''], [['アリガトウ', 'アリガトウ', 'default'], 'アリガトウ'], [['ありがとう', 'ありがとう', 'hiragana'], 'ありがとう'], [['ありがとう', 'ありがとう', 'katakana'], 'アリガトウ'], [['ありがとう', 'ありがとう', 'romaji'], 'arigatou'], - [['ありがとう', 'ありがとう', 'none'], null], + [['ありがとう', 'ありがとう', 'none'], ''], [['ありがとう', 'ありがとう', 'default'], 'ありがとう'], [['有り難う', 'ありがとう', 'hiragana'], 'ありがとう'], [['有り難う', 'ありがとう', 'katakana'], 'アリガトウ'], [['有り難う', 'ありがとう', 'romaji'], 'arigatou'], - [['有り難う', 'ありがとう', 'none'], null], + [['有り難う', 'ありがとう', 'none'], ''], [['有り難う', 'ありがとう', 'default'], 'ありがとう'], // Cases with falsy readings @@ -196,40 +196,16 @@ function testConvertReading() { [['ありがとう', '', 'hiragana'], ''], [['ありがとう', '', 'katakana'], ''], [['ありがとう', '', 'romaji'], 'arigatou'], - [['ありがとう', '', 'none'], null], + [['ありがとう', '', 'none'], ''], [['ありがとう', '', 'default'], ''], - [['ありがとう', null, 'hiragana'], ''], - [['ありがとう', null, 'katakana'], ''], - [['ありがとう', null, 'romaji'], 'arigatou'], - [['ありがとう', null, 'none'], null], - [['ありがとう', null, 'default'], null], - - [['ありがとう', void 0, 'hiragana'], ''], - [['ありがとう', void 0, 'katakana'], ''], - [['ありがとう', void 0, 'romaji'], 'arigatou'], - [['ありがとう', void 0, 'none'], null], - [['ありがとう', void 0, 'default'], void 0], - // Cases with falsy readings and kanji expressions [['有り難う', '', 'hiragana'], ''], [['有り難う', '', 'katakana'], ''], [['有り難う', '', 'romaji'], ''], - [['有り難う', '', 'none'], null], - [['有り難う', '', 'default'], ''], - - [['有り難う', null, 'hiragana'], ''], - [['有り難う', null, 'katakana'], ''], - [['有り難う', null, 'romaji'], null], - [['有り難う', null, 'none'], null], - [['有り難う', null, 'default'], null], - - [['有り難う', void 0, 'hiragana'], ''], - [['有り難う', void 0, 'katakana'], ''], - [['有り難う', void 0, 'romaji'], void 0], - [['有り難う', void 0, 'none'], null], - [['有り難う', void 0, 'default'], void 0] + [['有り難う', '', 'none'], ''], + [['有り難う', '', 'default'], ''] ]; for (const [[expressionFragment, readingFragment, readingMode], expected] of data) { @@ -303,9 +279,9 @@ function testDistributeFurigana() { ['有り難う', 'ありがとう'], [ {text: '有', furigana: 'あ'}, - {text: 'り'}, + {text: 'り', furigana: ''}, {text: '難', furigana: 'がと'}, - {text: 'う'} + {text: 'う', furigana: ''} ] ], [ @@ -317,23 +293,23 @@ function testDistributeFurigana() { [ ['お祝い', 'おいわい'], [ - {text: 'お'}, + {text: 'お', furigana: ''}, {text: '祝', furigana: 'いわ'}, - {text: 'い'} + {text: 'い', furigana: ''} ] ], [ ['美味しい', 'おいしい'], [ {text: '美味', furigana: 'おい'}, - {text: 'しい'} + {text: 'しい', furigana: ''} ] ], [ ['食べ物', 'たべもの'], [ {text: '食', furigana: 'た'}, - {text: 'べ'}, + {text: 'べ', furigana: ''}, {text: '物', furigana: 'もの'} ] ], @@ -341,9 +317,9 @@ function testDistributeFurigana() { ['試し切り', 'ためしぎり'], [ {text: '試', furigana: 'ため'}, - {text: 'し'}, + {text: 'し', furigana: ''}, {text: '切', furigana: 'ぎ'}, - {text: 'り'} + {text: 'り', furigana: ''} ] ], // Ambiguous @@ -373,16 +349,16 @@ function testDistributeFuriganaInflected() { ['美味しい', 'おいしい', '美味しかた'], [ {text: '美味', furigana: 'おい'}, - {text: 'し'}, - {text: 'かた'} + {text: 'し', furigana: ''}, + {text: 'かた', furigana: ''} ] ], [ ['食べる', 'たべる', '食べた'], [ {text: '食', furigana: 'た'}, - {text: 'べ'}, - {text: 'た'} + {text: 'べ', furigana: ''}, + {text: 'た', furigana: ''} ] ] ];