add parser selection options

This commit is contained in:
siikamiika 2019-11-05 15:56:45 +02:00
parent 1700318988
commit 955e131f96
6 changed files with 86 additions and 31 deletions

View File

@ -109,25 +109,30 @@ async function apiTextParseMecab(text, optionsContext) {
const options = await apiOptionsGet(optionsContext);
const mecab = utilBackend().mecab;
const results = [];
for (const parsedLine of await mecab.parseText(text)) {
for (const {expression, reading, source} of parsedLine) {
const term = [];
if (expression && reading) {
for (const {text, furigana} of jpDistributeFuriganaInflected(
expression,
jpKatakanaToHiragana(reading),
source
)) {
// can't use 'furigana' in templates
term.push({text, reading: furigana});
const results = {};
const rawResults = await mecab.parseText(text);
for (const mecabName in rawResults) {
const result = [];
for (const parsedLine of rawResults[mecabName]) {
for (const {expression, reading, source} of parsedLine) {
const term = [];
if (expression && reading) {
for (const {text, furigana} of jpDistributeFuriganaInflected(
expression,
jpKatakanaToHiragana(reading),
source
)) {
// can't use 'furigana' in templates
term.push({text, reading: furigana});
}
} else {
term.push({text: source});
}
} else {
term.push({text: source});
result.push(term);
}
results.push(term);
result.push([{text: '\n'}]);
}
results.push([{text: '\n'}]);
results[mecabName] = result;
}
return results;
}

View File

@ -60,4 +60,4 @@ class Mecab {
}
}
Mecab.timeout = 1000;
Mecab.timeout = 5000;

View File

@ -311,6 +311,11 @@ function profileOptionsCreateDefaults() {
dictionaries: {},
parsing: {
enableScanningParser: true,
enableMecabParser: false
},
anki: {
enable: false,
server: 'http://127.0.0.1:8765',

View File

@ -86,22 +86,32 @@ class QueryParser {
this.search.setSpinnerVisible(true);
await this.setPreview(text);
// const results = await apiTextParse(text, this.search.getOptionsContext());
const results = await apiTextParseMecab(text, this.search.getOptionsContext());
const results = {};
if (this.search.options.parsing.enableScanningParser) {
results['scan'] = await apiTextParse(text, this.search.getOptionsContext());
}
if (this.search.options.parsing.enableMecabParser) {
let mecabResults = await apiTextParseMecab(text, this.search.getOptionsContext());
for (const mecabDictName in mecabResults) {
results[`mecab-${mecabDictName}`] = mecabResults[mecabDictName];
}
}
const content = await apiTemplateRender('query-parser.html', {
terms: results.map((term) => {
return term.filter(part => part.text.trim()).map((part) => {
return {
text: Array.from(part.text),
reading: part.reading,
raw: !part.reading || !part.reading.trim(),
};
});
})
});
const contents = await Promise.all(Object.values(results).map(async result => {
return await apiTemplateRender('query-parser.html', {
terms: result.map((term) => {
return term.filter(part => part.text.trim()).map((part) => {
return {
text: Array.from(part.text),
reading: part.reading,
raw: !part.reading || !part.reading.trim(),
};
});
})
});
}));
this.queryParser.innerHTML = content;
this.queryParser.innerHTML = contents.join('<hr>');
this.queryParser.querySelectorAll('.query-parser-char').forEach((charElement) => {
this.activateScanning(charElement);

View File

@ -64,6 +64,9 @@ async function formRead(options) {
options.scanning.modifier = $('#scan-modifier-key').val();
options.scanning.popupNestingMaxDepth = parseInt($('#popup-nesting-max-depth').val(), 10);
options.parsing.enableScanningParser = $('#parsing-scan-enable').prop('checked');
options.parsing.enableMecabParser = $('#parsing-mecab-enable').prop('checked');
const optionsAnkiEnableOld = options.anki.enable;
options.anki.enable = $('#anki-enable').prop('checked');
options.anki.tags = utilBackgroundIsolate($('#card-tags').val().split(/[,; ]+/));
@ -126,6 +129,9 @@ async function formWrite(options) {
$('#scan-modifier-key').val(options.scanning.modifier);
$('#popup-nesting-max-depth').val(options.scanning.popupNestingMaxDepth);
$('#parsing-scan-enable').prop('checked', options.parsing.enableScanningParser);
$('#parsing-mecab-enable').prop('checked', options.parsing.enableMecabParser);
$('#anki-enable').prop('checked', options.anki.enable);
$('#card-tags').val(options.anki.tags.join(' '));
$('#sentence-detection-extent').val(options.anki.sentenceExt);

View File

@ -587,6 +587,35 @@
</div></div>
</div>
<div id="text-parsing">
<h3>Text Parsing Options</h3>
<p class="help-block">
Yomichan can attempt to parse entire sentences or longer text blocks on the search page,
adding furigana above words and a small space between words.
</p>
<p class="help-block">
Two types of parsers are supported. The first one, enabled by default, works using the built-in
scanning functionality by automatically advancing in the sentence after a matching word.
</p>
<p class="help-block">
The second type is an external program called <a href="https://en.wikipedia.org/wiki/MeCab" target="_blank" rel="noopener">MeCab</a>
that uses its own dictionaries and a special parsing algorithm. To get it working, you must first
install it and <a href="https://github.com/siikamiika/yomichan-mecab-installer" target="_blank" rel="noopener">a native messaging component</a>
that acts as a bridge between the program and Yomichan.
</p>
<div class="checkbox">
<label><input type="checkbox" id="parsing-scan-enable"> Enable text parsing using installed dictionaries</label>
</div>
<div class="checkbox">
<label><input type="checkbox" id="parsing-mecab-enable"> Enable text parsing using MeCab</label>
</div>
</div>
<div>
<div>
<img src="/mixed/img/spinner.gif" class="pull-right" id="anki-spinner" alt>