add mecab support
This commit is contained in:
parent
3881457e4e
commit
41020289ab
@ -21,6 +21,7 @@
|
|||||||
<script src="/mixed/js/extension.js"></script>
|
<script src="/mixed/js/extension.js"></script>
|
||||||
|
|
||||||
<script src="/bg/js/anki.js"></script>
|
<script src="/bg/js/anki.js"></script>
|
||||||
|
<script src="/bg/js/mecab.js"></script>
|
||||||
<script src="/bg/js/api.js"></script>
|
<script src="/bg/js/api.js"></script>
|
||||||
<script src="/bg/js/audio.js"></script>
|
<script src="/bg/js/audio.js"></script>
|
||||||
<script src="/bg/js/backend-api-forwarder.js"></script>
|
<script src="/bg/js/backend-api-forwarder.js"></script>
|
||||||
|
@ -91,25 +91,10 @@ async function apiTextParse(text, optionsContext) {
|
|||||||
definitions = dictTermsSort(definitions);
|
definitions = dictTermsSort(definitions);
|
||||||
const {expression, reading} = definitions[0];
|
const {expression, reading} = definitions[0];
|
||||||
const source = text.slice(0, sourceLength);
|
const source = text.slice(0, sourceLength);
|
||||||
|
for (const {text, furigana} of jpDistributeFuriganaInflected(expression, reading, source)) {
|
||||||
let stemLength = 0;
|
// can't use 'furigana' in templates
|
||||||
const shortest = Math.min(source.length, expression.length);
|
term.push({text, reading: furigana});
|
||||||
while (stemLength < shortest && source[stemLength] === expression[stemLength]) {
|
|
||||||
++stemLength;
|
|
||||||
}
|
}
|
||||||
const offset = source.length - stemLength;
|
|
||||||
|
|
||||||
for (const {text, furigana} of jpDistributeFurigana(
|
|
||||||
source.slice(0, offset === 0 ? source.length : source.length - offset),
|
|
||||||
reading.slice(0, offset === 0 ? reading.length : reading.length - expression.length + stemLength)
|
|
||||||
)) {
|
|
||||||
term.push({text, reading: furigana || ''});
|
|
||||||
}
|
|
||||||
|
|
||||||
if (stemLength !== source.length) {
|
|
||||||
term.push({text: source.slice(stemLength)});
|
|
||||||
}
|
|
||||||
|
|
||||||
text = text.slice(source.length);
|
text = text.slice(source.length);
|
||||||
} else {
|
} else {
|
||||||
term.push({text: text[0]});
|
term.push({text: text[0]});
|
||||||
@ -120,6 +105,33 @@ async function apiTextParse(text, optionsContext) {
|
|||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function apiTextParseMecab(text, optionsContext) {
|
||||||
|
const options = await apiOptionsGet(optionsContext);
|
||||||
|
const mecab = utilBackend().mecab;
|
||||||
|
|
||||||
|
const results = [];
|
||||||
|
for (const parsedLine of await mecab.parseText(text)) {
|
||||||
|
for (const {expression, reading, source} of parsedLine) {
|
||||||
|
const term = [];
|
||||||
|
if (expression && reading) {
|
||||||
|
for (const {text, furigana} of jpDistributeFuriganaInflected(
|
||||||
|
expression,
|
||||||
|
jpKatakanaToHiragana(reading),
|
||||||
|
source
|
||||||
|
)) {
|
||||||
|
// can't use 'furigana' in templates
|
||||||
|
term.push({text, reading: furigana});
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
term.push({text: source});
|
||||||
|
}
|
||||||
|
results.push(term);
|
||||||
|
}
|
||||||
|
results.push([{text: '\n'}]);
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
async function apiKanjiFind(text, optionsContext) {
|
async function apiKanjiFind(text, optionsContext) {
|
||||||
const options = await apiOptionsGet(optionsContext);
|
const options = await apiOptionsGet(optionsContext);
|
||||||
const definitions = await utilBackend().translator.findKanji(text, options);
|
const definitions = await utilBackend().translator.findKanji(text, options);
|
||||||
|
@ -21,6 +21,7 @@ class Backend {
|
|||||||
constructor() {
|
constructor() {
|
||||||
this.translator = new Translator();
|
this.translator = new Translator();
|
||||||
this.anki = new AnkiNull();
|
this.anki = new AnkiNull();
|
||||||
|
this.mecab = new Mecab();
|
||||||
this.options = null;
|
this.options = null;
|
||||||
this.optionsContext = {
|
this.optionsContext = {
|
||||||
depth: 0,
|
depth: 0,
|
||||||
@ -181,6 +182,7 @@ Backend.messageHandlers = {
|
|||||||
kanjiFind: ({text, optionsContext}) => apiKanjiFind(text, optionsContext),
|
kanjiFind: ({text, optionsContext}) => apiKanjiFind(text, optionsContext),
|
||||||
termsFind: ({text, details, optionsContext}) => apiTermsFind(text, details, optionsContext),
|
termsFind: ({text, details, optionsContext}) => apiTermsFind(text, details, optionsContext),
|
||||||
textParse: ({text, optionsContext}) => apiTextParse(text, optionsContext),
|
textParse: ({text, optionsContext}) => apiTextParse(text, optionsContext),
|
||||||
|
textParseMecab: ({text, optionsContext}) => apiTextParseMecab(text, optionsContext),
|
||||||
definitionAdd: ({definition, mode, context, optionsContext}) => apiDefinitionAdd(definition, mode, context, optionsContext),
|
definitionAdd: ({definition, mode, context, optionsContext}) => apiDefinitionAdd(definition, mode, context, optionsContext),
|
||||||
definitionsAddable: ({definitions, modes, optionsContext}) => apiDefinitionsAddable(definitions, modes, optionsContext),
|
definitionsAddable: ({definitions, modes, optionsContext}) => apiDefinitionsAddable(definitions, modes, optionsContext),
|
||||||
noteView: ({noteId}) => apiNoteView(noteId),
|
noteView: ({noteId}) => apiNoteView(noteId),
|
||||||
|
63
ext/bg/js/mecab.js
Normal file
63
ext/bg/js/mecab.js
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2019 Alex Yatskov <alex@foosoft.net>
|
||||||
|
* Author: Alex Yatskov <alex@foosoft.net>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
class Mecab {
|
||||||
|
constructor() {
|
||||||
|
this.listeners = {};
|
||||||
|
this.sequence = 0;
|
||||||
|
this.startListener();
|
||||||
|
}
|
||||||
|
|
||||||
|
async parseText(text) {
|
||||||
|
return await this.invoke('parse_text', {text});
|
||||||
|
}
|
||||||
|
|
||||||
|
startListener() {
|
||||||
|
this.port = chrome.runtime.connectNative('mecab');
|
||||||
|
this.port.onMessage.addListener((message) => {
|
||||||
|
const {sequence, data} = message;
|
||||||
|
const {callback, timer} = this.listeners[sequence] || {};
|
||||||
|
if (timer) {
|
||||||
|
clearTimeout(timer);
|
||||||
|
delete this.listeners[sequence];
|
||||||
|
callback(data);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
invoke(action, params) {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const sequence = this.sequence++;
|
||||||
|
|
||||||
|
this.listeners[sequence] = {
|
||||||
|
callback: (data) => {
|
||||||
|
resolve(data);
|
||||||
|
},
|
||||||
|
timer: setTimeout(() => {
|
||||||
|
delete this.listeners[sequence];
|
||||||
|
reject(`Mecab invoke timed out in ${Mecab.timeout} ms`);
|
||||||
|
}, 1000)
|
||||||
|
}
|
||||||
|
|
||||||
|
this.port.postMessage({action, params, sequence});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Mecab.timeout = 1000;
|
@ -74,7 +74,8 @@ class QueryParser {
|
|||||||
preview: true
|
preview: true
|
||||||
});
|
});
|
||||||
|
|
||||||
const results = await apiTextParse(text, this.search.getOptionsContext());
|
// const results = await apiTextParse(text, this.search.getOptionsContext());
|
||||||
|
const results = await apiTextParseMecab(text, this.search.getOptionsContext());
|
||||||
|
|
||||||
const content = await apiTemplateRender('query-parser.html', {
|
const content = await apiTemplateRender('query-parser.html', {
|
||||||
terms: results.map((term) => {
|
terms: results.map((term) => {
|
||||||
|
@ -33,6 +33,10 @@ function apiTextParse(text, optionsContext) {
|
|||||||
return utilInvoke('textParse', {text, optionsContext});
|
return utilInvoke('textParse', {text, optionsContext});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function apiTextParseMecab(text, optionsContext) {
|
||||||
|
return utilInvoke('textParseMecab', {text, optionsContext});
|
||||||
|
}
|
||||||
|
|
||||||
function apiKanjiFind(text, optionsContext) {
|
function apiKanjiFind(text, optionsContext) {
|
||||||
return utilInvoke('kanjiFind', {text, optionsContext});
|
return utilInvoke('kanjiFind', {text, optionsContext});
|
||||||
}
|
}
|
||||||
|
@ -42,7 +42,8 @@
|
|||||||
"<all_urls>",
|
"<all_urls>",
|
||||||
"storage",
|
"storage",
|
||||||
"clipboardWrite",
|
"clipboardWrite",
|
||||||
"unlimitedStorage"
|
"unlimitedStorage",
|
||||||
|
"nativeMessaging"
|
||||||
],
|
],
|
||||||
"optional_permissions": [
|
"optional_permissions": [
|
||||||
"clipboardRead"
|
"clipboardRead"
|
||||||
|
@ -61,12 +61,11 @@ function jpDistributeFurigana(expression, reading) {
|
|||||||
|
|
||||||
const group = groups[0];
|
const group = groups[0];
|
||||||
if (group.mode === 'kana') {
|
if (group.mode === 'kana') {
|
||||||
if (reading.startsWith(group.text)) {
|
if (jpKatakanaToHiragana(reading).startsWith(jpKatakanaToHiragana(group.text))) {
|
||||||
const readingUsed = reading.substring(0, group.text.length);
|
|
||||||
const readingLeft = reading.substring(group.text.length);
|
const readingLeft = reading.substring(group.text.length);
|
||||||
const segs = segmentize(readingLeft, groups.splice(1));
|
const segs = segmentize(readingLeft, groups.splice(1));
|
||||||
if (segs) {
|
if (segs) {
|
||||||
return [{text: readingUsed}].concat(segs);
|
return [{text: group.text}].concat(segs);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -95,3 +94,33 @@ function jpDistributeFurigana(expression, reading) {
|
|||||||
|
|
||||||
return segmentize(reading, groups) || fallback;
|
return segmentize(reading, groups) || fallback;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function jpDistributeFuriganaInflected(expression, reading, source) {
|
||||||
|
const output = [];
|
||||||
|
|
||||||
|
let stemLength = 0;
|
||||||
|
const shortest = Math.min(source.length, expression.length);
|
||||||
|
const sourceHiragana = jpKatakanaToHiragana(source);
|
||||||
|
const expressionHiragana = jpKatakanaToHiragana(expression);
|
||||||
|
while (
|
||||||
|
stemLength < shortest &&
|
||||||
|
// sometimes an expression can use a kanji that's different from the source
|
||||||
|
(!jpIsKana(source[stemLength]) || (sourceHiragana[stemLength] === expressionHiragana[stemLength]))
|
||||||
|
) {
|
||||||
|
++stemLength;
|
||||||
|
}
|
||||||
|
const offset = source.length - stemLength;
|
||||||
|
|
||||||
|
for (const segment of jpDistributeFurigana(
|
||||||
|
source.slice(0, offset === 0 ? source.length : source.length - offset),
|
||||||
|
reading.slice(0, offset === 0 ? reading.length : reading.length - expression.length + stemLength)
|
||||||
|
)) {
|
||||||
|
output.push(segment);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (stemLength !== source.length) {
|
||||||
|
output.push({text: source.slice(stemLength)});
|
||||||
|
}
|
||||||
|
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user