Merge pull request #456 from siikamiika/parse-text-refactor
Parse text refactor
This commit is contained in:
commit
8c16a6e580
@ -84,7 +84,6 @@ class Backend {
|
||||
['kanjiFind', {handler: this._onApiKanjiFind.bind(this), async: true}],
|
||||
['termsFind', {handler: this._onApiTermsFind.bind(this), async: true}],
|
||||
['textParse', {handler: this._onApiTextParse.bind(this), async: true}],
|
||||
['textParseMecab', {handler: this._onApiTextParseMecab.bind(this), async: true}],
|
||||
['definitionAdd', {handler: this._onApiDefinitionAdd.bind(this), async: true}],
|
||||
['definitionsAddable', {handler: this._onApiDefinitionsAddable.bind(this), async: true}],
|
||||
['noteView', {handler: this._onApiNoteView.bind(this), async: true}],
|
||||
@ -314,6 +313,60 @@ class Backend {
|
||||
return await this.dictionaryImporter.import(this.database, archiveSource, onProgress, details);
|
||||
}
|
||||
|
||||
async _textParseScanning(text, options) {
|
||||
const results = [];
|
||||
while (text.length > 0) {
|
||||
const term = [];
|
||||
const [definitions, sourceLength] = await this.translator.findTerms(
|
||||
'simple',
|
||||
text.substring(0, options.scanning.length),
|
||||
{},
|
||||
options
|
||||
);
|
||||
if (definitions.length > 0 && sourceLength > 0) {
|
||||
dictTermsSort(definitions);
|
||||
const {expression, reading} = definitions[0];
|
||||
const source = text.substring(0, sourceLength);
|
||||
for (const {text: text2, furigana} of jp.distributeFuriganaInflected(expression, reading, source)) {
|
||||
const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode);
|
||||
term.push({text: text2, reading: reading2});
|
||||
}
|
||||
text = text.substring(source.length);
|
||||
} else {
|
||||
const reading = jp.convertReading(text[0], '', options.parsing.readingMode);
|
||||
term.push({text: text[0], reading});
|
||||
text = text.substring(1);
|
||||
}
|
||||
results.push(term);
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
async _textParseMecab(text, options) {
|
||||
const results = [];
|
||||
const rawResults = await this.mecab.parseText(text);
|
||||
for (const [mecabName, parsedLines] of Object.entries(rawResults)) {
|
||||
const result = [];
|
||||
for (const parsedLine of parsedLines) {
|
||||
for (const {expression, reading, source} of parsedLine) {
|
||||
const term = [];
|
||||
for (const {text: text2, furigana} of jp.distributeFuriganaInflected(
|
||||
expression.length > 0 ? expression : source,
|
||||
jp.convertKatakanaToHiragana(reading),
|
||||
source
|
||||
)) {
|
||||
const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode);
|
||||
term.push({text: text2, reading: reading2});
|
||||
}
|
||||
result.push(term);
|
||||
}
|
||||
result.push([{text: '\n', reading: ''}]);
|
||||
}
|
||||
results.push([mecabName, result]);
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
// Message handlers
|
||||
|
||||
_onApiYomichanCoreReady(_params, sender) {
|
||||
@ -405,61 +458,27 @@ class Backend {
|
||||
async _onApiTextParse({text, optionsContext}) {
|
||||
const options = this.getOptions(optionsContext);
|
||||
const results = [];
|
||||
while (text.length > 0) {
|
||||
const term = [];
|
||||
const [definitions, sourceLength] = await this.translator.findTerms(
|
||||
'simple',
|
||||
text.substring(0, options.scanning.length),
|
||||
{},
|
||||
options
|
||||
);
|
||||
if (definitions.length > 0) {
|
||||
dictTermsSort(definitions);
|
||||
const {expression, reading} = definitions[0];
|
||||
const source = text.substring(0, sourceLength);
|
||||
for (const {text: text2, furigana} of jp.distributeFuriganaInflected(expression, reading, source)) {
|
||||
const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode);
|
||||
term.push({text: text2, reading: reading2});
|
||||
}
|
||||
text = text.substring(source.length);
|
||||
} else {
|
||||
const reading = jp.convertReading(text[0], null, options.parsing.readingMode);
|
||||
term.push({text: text[0], reading});
|
||||
text = text.substring(1);
|
||||
}
|
||||
results.push(term);
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
async _onApiTextParseMecab({text, optionsContext}) {
|
||||
const options = this.getOptions(optionsContext);
|
||||
const results = [];
|
||||
const rawResults = await this.mecab.parseText(text);
|
||||
for (const [mecabName, parsedLines] of Object.entries(rawResults)) {
|
||||
const result = [];
|
||||
for (const parsedLine of parsedLines) {
|
||||
for (const {expression, reading, source} of parsedLine) {
|
||||
const term = [];
|
||||
if (expression !== null && reading !== null) {
|
||||
for (const {text: text2, furigana} of jp.distributeFuriganaInflected(
|
||||
expression,
|
||||
jp.convertKatakanaToHiragana(reading),
|
||||
source
|
||||
)) {
|
||||
const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode);
|
||||
term.push({text: text2, reading: reading2});
|
||||
}
|
||||
} else {
|
||||
const reading2 = jp.convertReading(source, null, options.parsing.readingMode);
|
||||
term.push({text: source, reading: reading2});
|
||||
}
|
||||
result.push(term);
|
||||
}
|
||||
result.push([{text: '\n'}]);
|
||||
}
|
||||
results.push([mecabName, result]);
|
||||
if (options.parsing.enableScanningParser) {
|
||||
results.push({
|
||||
source: 'scanning-parser',
|
||||
id: 'scan',
|
||||
content: await this._textParseScanning(text, options)
|
||||
});
|
||||
}
|
||||
|
||||
if (options.parsing.enableMecabParser) {
|
||||
const mecabResults = await this._textParseMecab(text, options);
|
||||
for (const [mecabDictName, mecabDictResults] of mecabResults) {
|
||||
results.push({
|
||||
source: 'mecab',
|
||||
dictionary: mecabDictName,
|
||||
id: `mecab-${mecabDictName}`,
|
||||
content: mecabDictResults
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
|
@ -124,25 +124,25 @@
|
||||
return wanakana.toRomaji(text);
|
||||
}
|
||||
|
||||
function convertReading(expressionFragment, readingFragment, readingMode) {
|
||||
function convertReading(expression, reading, readingMode) {
|
||||
switch (readingMode) {
|
||||
case 'hiragana':
|
||||
return convertKatakanaToHiragana(readingFragment || '');
|
||||
return convertKatakanaToHiragana(reading);
|
||||
case 'katakana':
|
||||
return convertHiraganaToKatakana(readingFragment || '');
|
||||
return convertHiraganaToKatakana(reading);
|
||||
case 'romaji':
|
||||
if (readingFragment) {
|
||||
return convertToRomaji(readingFragment);
|
||||
if (reading) {
|
||||
return convertToRomaji(reading);
|
||||
} else {
|
||||
if (isStringEntirelyKana(expressionFragment)) {
|
||||
return convertToRomaji(expressionFragment);
|
||||
if (isStringEntirelyKana(expression)) {
|
||||
return convertToRomaji(expression);
|
||||
}
|
||||
}
|
||||
return readingFragment;
|
||||
return reading;
|
||||
case 'none':
|
||||
return null;
|
||||
return '';
|
||||
default:
|
||||
return readingFragment;
|
||||
return reading;
|
||||
}
|
||||
}
|
||||
|
||||
@ -300,7 +300,7 @@
|
||||
const readingLeft = reading2.substring(group.text.length);
|
||||
const segs = segmentize(readingLeft, groups.splice(1));
|
||||
if (segs) {
|
||||
return [{text: group.text}].concat(segs);
|
||||
return [{text: group.text, furigana: ''}].concat(segs);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -368,7 +368,7 @@
|
||||
}
|
||||
|
||||
if (stemLength !== source.length) {
|
||||
output.push({text: source.substring(stemLength)});
|
||||
output.push({text: source.substring(stemLength), furigana: ''});
|
||||
}
|
||||
|
||||
return output;
|
||||
|
@ -40,7 +40,36 @@ class Mecab {
|
||||
}
|
||||
|
||||
async parseText(text) {
|
||||
return await this.invoke('parse_text', {text});
|
||||
const rawResults = await this.invoke('parse_text', {text});
|
||||
// {
|
||||
// 'mecab-name': [
|
||||
// // line1
|
||||
// [
|
||||
// {str expression: 'expression', str reading: 'reading', str source: 'source'},
|
||||
// {str expression: 'expression2', str reading: 'reading2', str source: 'source2'}
|
||||
// ],
|
||||
// line2,
|
||||
// ...
|
||||
// ],
|
||||
// 'mecab-name2': [...]
|
||||
// }
|
||||
const results = {};
|
||||
for (const [mecabName, parsedLines] of Object.entries(rawResults)) {
|
||||
const result = [];
|
||||
for (const parsedLine of parsedLines) {
|
||||
const line = [];
|
||||
for (const {expression, reading, source} of parsedLine) {
|
||||
line.push({
|
||||
expression: expression || '',
|
||||
reading: reading || '',
|
||||
source: source || ''
|
||||
});
|
||||
}
|
||||
result.push(line);
|
||||
}
|
||||
results[mecabName] = result;
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
startListener() {
|
||||
|
@ -36,7 +36,7 @@ class QueryParserGenerator {
|
||||
const termContainer = this._templateHandler.instantiate(preview ? 'term-preview' : 'term');
|
||||
for (const segment of term) {
|
||||
if (!segment.text.trim()) { continue; }
|
||||
if (!segment.reading || !segment.reading.trim()) {
|
||||
if (!segment.reading.trim()) {
|
||||
termContainer.appendChild(this.createSegmentText(segment.text));
|
||||
} else {
|
||||
termContainer.appendChild(this.createSegment(segment));
|
||||
@ -71,7 +71,17 @@ class QueryParserGenerator {
|
||||
for (const parseResult of parseResults) {
|
||||
const optionContainer = this._templateHandler.instantiate('select-option');
|
||||
optionContainer.value = parseResult.id;
|
||||
optionContainer.textContent = parseResult.name;
|
||||
switch (parseResult.source) {
|
||||
case 'scanning-parser':
|
||||
optionContainer.textContent = 'Scanning parser';
|
||||
break;
|
||||
case 'mecab':
|
||||
optionContainer.textContent = `MeCab: ${parseResult.dictionary}`;
|
||||
break;
|
||||
default:
|
||||
optionContainer.textContent = 'Unrecognized dictionary';
|
||||
break;
|
||||
}
|
||||
optionContainer.defaultSelected = selectedParser === parseResult.id;
|
||||
selectContainer.appendChild(optionContainer);
|
||||
}
|
||||
|
@ -21,7 +21,6 @@
|
||||
* apiOptionsSet
|
||||
* apiTermsFind
|
||||
* apiTextParse
|
||||
* apiTextParseMecab
|
||||
* docSentenceExtract
|
||||
*/
|
||||
|
||||
@ -128,7 +127,7 @@ class QueryParser extends TextScanner {
|
||||
|
||||
this.setPreview(text);
|
||||
|
||||
this.parseResults = await this.parseText(text);
|
||||
this.parseResults = await apiTextParse(text, this.getOptionsContext());
|
||||
this.refreshSelectedParser();
|
||||
|
||||
this.renderParserSelect();
|
||||
@ -137,33 +136,11 @@ class QueryParser extends TextScanner {
|
||||
this.setSpinnerVisible(false);
|
||||
}
|
||||
|
||||
async parseText(text) {
|
||||
const results = [];
|
||||
if (this.options.parsing.enableScanningParser) {
|
||||
results.push({
|
||||
name: 'Scanning parser',
|
||||
id: 'scan',
|
||||
parsedText: await apiTextParse(text, this.getOptionsContext())
|
||||
});
|
||||
}
|
||||
if (this.options.parsing.enableMecabParser) {
|
||||
const mecabResults = await apiTextParseMecab(text, this.getOptionsContext());
|
||||
for (const [mecabDictName, mecabDictResults] of mecabResults) {
|
||||
results.push({
|
||||
name: `MeCab: ${mecabDictName}`,
|
||||
id: `mecab-${mecabDictName}`,
|
||||
parsedText: mecabDictResults
|
||||
});
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
setPreview(text) {
|
||||
const previewTerms = [];
|
||||
for (let i = 0, ii = text.length; i < ii; i += 2) {
|
||||
const tempText = text.substring(i, i + 2);
|
||||
previewTerms.push([{text: tempText}]);
|
||||
previewTerms.push([{text: tempText, reading: ''}]);
|
||||
}
|
||||
this.queryParser.textContent = '';
|
||||
this.queryParser.appendChild(this.queryParserGenerator.createParseResult(previewTerms, true));
|
||||
@ -183,6 +160,6 @@ class QueryParser extends TextScanner {
|
||||
const parseResult = this.getParseResult();
|
||||
this.queryParser.textContent = '';
|
||||
if (!parseResult) { return; }
|
||||
this.queryParser.appendChild(this.queryParserGenerator.createParseResult(parseResult.parsedText));
|
||||
this.queryParser.appendChild(this.queryParserGenerator.createParseResult(parseResult.content));
|
||||
}
|
||||
}
|
||||
|
@ -44,10 +44,6 @@ function apiTextParse(text, optionsContext) {
|
||||
return _apiInvoke('textParse', {text, optionsContext});
|
||||
}
|
||||
|
||||
function apiTextParseMecab(text, optionsContext) {
|
||||
return _apiInvoke('textParseMecab', {text, optionsContext});
|
||||
}
|
||||
|
||||
function apiKanjiFind(text, optionsContext) {
|
||||
return _apiInvoke('kanjiFind', {text, optionsContext});
|
||||
}
|
||||
|
@ -176,19 +176,19 @@ function testConvertReading() {
|
||||
[['アリガトウ', 'アリガトウ', 'hiragana'], 'ありがとう'],
|
||||
[['アリガトウ', 'アリガトウ', 'katakana'], 'アリガトウ'],
|
||||
[['アリガトウ', 'アリガトウ', 'romaji'], 'arigatou'],
|
||||
[['アリガトウ', 'アリガトウ', 'none'], null],
|
||||
[['アリガトウ', 'アリガトウ', 'none'], ''],
|
||||
[['アリガトウ', 'アリガトウ', 'default'], 'アリガトウ'],
|
||||
|
||||
[['ありがとう', 'ありがとう', 'hiragana'], 'ありがとう'],
|
||||
[['ありがとう', 'ありがとう', 'katakana'], 'アリガトウ'],
|
||||
[['ありがとう', 'ありがとう', 'romaji'], 'arigatou'],
|
||||
[['ありがとう', 'ありがとう', 'none'], null],
|
||||
[['ありがとう', 'ありがとう', 'none'], ''],
|
||||
[['ありがとう', 'ありがとう', 'default'], 'ありがとう'],
|
||||
|
||||
[['有り難う', 'ありがとう', 'hiragana'], 'ありがとう'],
|
||||
[['有り難う', 'ありがとう', 'katakana'], 'アリガトウ'],
|
||||
[['有り難う', 'ありがとう', 'romaji'], 'arigatou'],
|
||||
[['有り難う', 'ありがとう', 'none'], null],
|
||||
[['有り難う', 'ありがとう', 'none'], ''],
|
||||
[['有り難う', 'ありがとう', 'default'], 'ありがとう'],
|
||||
|
||||
// Cases with falsy readings
|
||||
@ -196,44 +196,20 @@ function testConvertReading() {
|
||||
[['ありがとう', '', 'hiragana'], ''],
|
||||
[['ありがとう', '', 'katakana'], ''],
|
||||
[['ありがとう', '', 'romaji'], 'arigatou'],
|
||||
[['ありがとう', '', 'none'], null],
|
||||
[['ありがとう', '', 'none'], ''],
|
||||
[['ありがとう', '', 'default'], ''],
|
||||
|
||||
[['ありがとう', null, 'hiragana'], ''],
|
||||
[['ありがとう', null, 'katakana'], ''],
|
||||
[['ありがとう', null, 'romaji'], 'arigatou'],
|
||||
[['ありがとう', null, 'none'], null],
|
||||
[['ありがとう', null, 'default'], null],
|
||||
|
||||
[['ありがとう', void 0, 'hiragana'], ''],
|
||||
[['ありがとう', void 0, 'katakana'], ''],
|
||||
[['ありがとう', void 0, 'romaji'], 'arigatou'],
|
||||
[['ありがとう', void 0, 'none'], null],
|
||||
[['ありがとう', void 0, 'default'], void 0],
|
||||
|
||||
// Cases with falsy readings and kanji expressions
|
||||
|
||||
[['有り難う', '', 'hiragana'], ''],
|
||||
[['有り難う', '', 'katakana'], ''],
|
||||
[['有り難う', '', 'romaji'], ''],
|
||||
[['有り難う', '', 'none'], null],
|
||||
[['有り難う', '', 'default'], ''],
|
||||
|
||||
[['有り難う', null, 'hiragana'], ''],
|
||||
[['有り難う', null, 'katakana'], ''],
|
||||
[['有り難う', null, 'romaji'], null],
|
||||
[['有り難う', null, 'none'], null],
|
||||
[['有り難う', null, 'default'], null],
|
||||
|
||||
[['有り難う', void 0, 'hiragana'], ''],
|
||||
[['有り難う', void 0, 'katakana'], ''],
|
||||
[['有り難う', void 0, 'romaji'], void 0],
|
||||
[['有り難う', void 0, 'none'], null],
|
||||
[['有り難う', void 0, 'default'], void 0]
|
||||
[['有り難う', '', 'none'], ''],
|
||||
[['有り難う', '', 'default'], '']
|
||||
];
|
||||
|
||||
for (const [[expressionFragment, readingFragment, readingMode], expected] of data) {
|
||||
assert.strictEqual(jp.convertReading(expressionFragment, readingFragment, readingMode), expected);
|
||||
for (const [[expression, reading, readingMode], expected] of data) {
|
||||
assert.strictEqual(jp.convertReading(expression, reading, readingMode), expected);
|
||||
}
|
||||
}
|
||||
|
||||
@ -303,9 +279,9 @@ function testDistributeFurigana() {
|
||||
['有り難う', 'ありがとう'],
|
||||
[
|
||||
{text: '有', furigana: 'あ'},
|
||||
{text: 'り'},
|
||||
{text: 'り', furigana: ''},
|
||||
{text: '難', furigana: 'がと'},
|
||||
{text: 'う'}
|
||||
{text: 'う', furigana: ''}
|
||||
]
|
||||
],
|
||||
[
|
||||
@ -317,23 +293,23 @@ function testDistributeFurigana() {
|
||||
[
|
||||
['お祝い', 'おいわい'],
|
||||
[
|
||||
{text: 'お'},
|
||||
{text: 'お', furigana: ''},
|
||||
{text: '祝', furigana: 'いわ'},
|
||||
{text: 'い'}
|
||||
{text: 'い', furigana: ''}
|
||||
]
|
||||
],
|
||||
[
|
||||
['美味しい', 'おいしい'],
|
||||
[
|
||||
{text: '美味', furigana: 'おい'},
|
||||
{text: 'しい'}
|
||||
{text: 'しい', furigana: ''}
|
||||
]
|
||||
],
|
||||
[
|
||||
['食べ物', 'たべもの'],
|
||||
[
|
||||
{text: '食', furigana: 'た'},
|
||||
{text: 'べ'},
|
||||
{text: 'べ', furigana: ''},
|
||||
{text: '物', furigana: 'もの'}
|
||||
]
|
||||
],
|
||||
@ -341,9 +317,9 @@ function testDistributeFurigana() {
|
||||
['試し切り', 'ためしぎり'],
|
||||
[
|
||||
{text: '試', furigana: 'ため'},
|
||||
{text: 'し'},
|
||||
{text: 'し', furigana: ''},
|
||||
{text: '切', furigana: 'ぎ'},
|
||||
{text: 'り'}
|
||||
{text: 'り', furigana: ''}
|
||||
]
|
||||
],
|
||||
// Ambiguous
|
||||
@ -373,16 +349,16 @@ function testDistributeFuriganaInflected() {
|
||||
['美味しい', 'おいしい', '美味しかた'],
|
||||
[
|
||||
{text: '美味', furigana: 'おい'},
|
||||
{text: 'し'},
|
||||
{text: 'かた'}
|
||||
{text: 'し', furigana: ''},
|
||||
{text: 'かた', furigana: ''}
|
||||
]
|
||||
],
|
||||
[
|
||||
['食べる', 'たべる', '食べた'],
|
||||
[
|
||||
{text: '食', furigana: 'た'},
|
||||
{text: 'べ'},
|
||||
{text: 'た'}
|
||||
{text: 'べ', furigana: ''},
|
||||
{text: 'た', furigana: ''}
|
||||
]
|
||||
]
|
||||
];
|
||||
|
Loading…
x
Reference in New Issue
Block a user