Mecab refactor (#1357)

* Use private variables

* Use local variables

* Remove onError

* Use private functions

* Move results parsing

* Set up disconnect observation

* Add _clearPort

* Update API

* Rename

* Fix iterator

* Update parseText invocation

* Update parseText format

* Reorganize

* Update implementation and docs

* Fix some port setup issues
This commit is contained in:
toasted-nutbread 2021-02-08 17:53:12 -05:00 committed by GitHub
parent 7b28c2c532
commit 69e8addb47
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 173 additions and 94 deletions

View File

@ -877,11 +877,7 @@ class Backend {
this._anki.server = options.anki.server; this._anki.server = options.anki.server;
this._anki.enabled = options.anki.enable; this._anki.enabled = options.anki.enable;
if (options.parsing.enableMecabParser) { this._mecab.setEnabled(options.parsing.enableMecabParser);
this._mecab.startListener();
} else {
this._mecab.stopListener();
}
if (options.clipboard.enableBackgroundMonitor) { if (options.clipboard.enableBackgroundMonitor) {
this._clipboardMonitor.start(); this._clipboardMonitor.start();
@ -988,12 +984,19 @@ class Backend {
async _textParseMecab(text, options) { async _textParseMecab(text, options) {
const jp = this._japaneseUtil; const jp = this._japaneseUtil;
const {parsing: {readingMode}} = options; const {parsing: {readingMode}} = options;
let parseTextResults;
try {
parseTextResults = await this._mecab.parseText(text);
} catch (e) {
return [];
}
const results = []; const results = [];
const rawResults = await this._mecab.parseText(text); for (const {name, lines} of parseTextResults) {
for (const [mecabName, parsedLines] of Object.entries(rawResults)) {
const result = []; const result = [];
for (const parsedLine of parsedLines) { for (const line of lines) {
for (const {expression, reading, source} of parsedLine) { for (const {expression, reading, source} of line) {
const term = []; const term = [];
for (const {text: text2, furigana} of jp.distributeFuriganaInflected( for (const {text: text2, furigana} of jp.distributeFuriganaInflected(
expression.length > 0 ? expression : source, expression.length > 0 ? expression : source,
@ -1007,7 +1010,7 @@ class Backend {
} }
result.push([{text: '\n', reading: ''}]); result.push([{text: '\n', reading: ''}]);
} }
results.push([mecabName, result]); results.push([name, result]);
} }
return results; return results;
} }

View File

@ -15,107 +15,183 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>. * along with this program. If not, see <https://www.gnu.org/licenses/>.
*/ */
/**
* This class is used to connect Yomichan to a native component that is
* used to parse text into individual terms.
*/
class Mecab { class Mecab {
/**
* Creates a new instance of the class.
*/
constructor() { constructor() {
this.port = null; this._port = null;
this.listeners = new Map(); this._sequence = 0;
this.sequence = 0; this._invocations = new Map();
this._eventListeners = new EventListenerCollection();
this._timeout = 5000;
this._version = 1;
this._remoteVersion = null;
this._enabled = false;
this._setupPortPromise = null;
} }
onError(error) { /**
yomichan.logError(error); * Returns whether or not the component is enabled.
*/
isEnabled() {
return this._enabled;
} }
async checkVersion() { /**
try { * Changes whether or not the component connection is enabled.
const {version} = await this.invoke('get_version', {}); * @param enabled A boolean indicating whether or not the component should be enabled.
if (version !== Mecab.version) { */
this.stopListener(); setEnabled(enabled) {
throw new Error(`Unsupported MeCab native messenger version ${version}. Yomichan supports version ${Mecab.version}.`); this._enabled = !!enabled;
} if (!this._enabled && this._port !== null) {
} catch (error) { this._clearPort();
this.onError(error);
} }
} }
/**
* Gets the version of the MeCab component.
* @returns The version of the MeCab component, or `null` if the component was not found.
*/
async getVersion() {
try {
await this._setupPort();
} catch (e) {
// NOP
}
return this._remoteVersion;
}
/**
* Parses a string of Japanese text into arrays of lines and terms.
*
* Return value format:
* ```js
* [
* {
* name: (string),
* lines: [
* {expression: (string), reading: (string), source: (string)},
* ...
* ]
* },
* ...
* ]
* ```
* @param text The string to parse.
* @returns A collection of parsing results of the text.
*/
async parseText(text) { async parseText(text) {
const rawResults = await this.invoke('parse_text', {text}); await this._setupPort();
// { const rawResults = await this._invoke('parse_text', {text});
// 'mecab-name': [ return this._convertParseTextResults(rawResults);
// // line1 }
// [
// {str expression: 'expression', str reading: 'reading', str source: 'source'}, // Private
// {str expression: 'expression2', str reading: 'reading2', str source: 'source2'}
// ], _onMessage({sequence, data}) {
// line2, const invocation = this._invocations.get(sequence);
// ... if (typeof invocation === 'undefined') { return; }
// ],
// 'mecab-name2': [...] const {resolve, timer} = invocation;
// } clearTimeout(timer);
const results = {}; resolve(data);
for (const [mecabName, parsedLines] of Object.entries(rawResults)) { this._invocations.delete(sequence);
const result = []; }
for (const parsedLine of parsedLines) {
const line = []; _onDisconnect() {
for (const {expression, reading, source} of parsedLine) { if (this._port === null) { return; }
line.push({ const e = chrome.runtime.lastError;
expression: expression || '', const error = new Error(e ? e.message : 'MeCab disconnected');
reading: reading || '', for (const {reject, timer} of this._invocations.values()) {
source: source || '' clearTimeout(timer);
}); reject(error);
} }
result.push(line); this._clearPort();
}
_invoke(action, params) {
return new Promise((resolve, reject) => {
if (this._port === null) {
reject(new Error('Port disconnected'));
} }
results[mecabName] = result;
const sequence = this._sequence++;
const timer = setTimeout(() => {
this._invocations.delete(sequence);
reject(new Error(`MeCab invoke timed out after ${this._timeout}ms`));
}, this._timeout);
this._invocations.set(sequence, {resolve, reject, timer}, this._timeout);
this._port.postMessage({action, params, sequence});
});
}
_convertParseTextResults(rawResults) {
const results = [];
for (const [name, rawLines] of Object.entries(rawResults)) {
const lines = [];
for (const rawLine of rawLines) {
const line = [];
for (let {expression, reading, source} of rawLine) {
if (typeof expression !== 'string') { expression = ''; }
if (typeof reading !== 'string') { reading = ''; }
if (typeof source !== 'string') { source = ''; }
line.push({expression, reading, source});
}
lines.push(line);
}
results.push({name, lines});
} }
return results; return results;
} }
startListener() { async _setupPort() {
if (this.port !== null) { return; } if (!this._enabled) {
this.port = chrome.runtime.connectNative('yomichan_mecab'); throw new Error('MeCab not enabled');
this.port.onMessage.addListener(this.onNativeMessage.bind(this));
this.checkVersion();
}
stopListener() {
if (this.port === null) { return; }
this.port.disconnect();
this.port = null;
this.listeners.clear();
this.sequence = 0;
}
onNativeMessage({sequence, data}) {
const listener = this.listeners.get(sequence);
if (typeof listener === 'undefined') { return; }
const {callback, timer} = listener;
clearTimeout(timer);
callback(data);
this.listeners.delete(sequence);
}
invoke(action, params) {
if (this.port === null) {
return Promise.resolve({});
} }
return new Promise((resolve, reject) => { if (this._setupPortPromise === null) {
const sequence = this.sequence++; this._setupPortPromise = this._setupPort2();
}
try {
await this._setupPortPromise;
} catch (e) {
throw new Error(e.message);
}
}
this.listeners.set(sequence, { async _setupPort2() {
callback: resolve, const port = chrome.runtime.connectNative('yomichan_mecab');
timer: setTimeout(() => { this._eventListeners.addListener(port.onMessage, this._onMessage.bind(this));
this.listeners.delete(sequence); this._eventListeners.addListener(port.onDisconnect, this._onDisconnect.bind(this));
reject(new Error(`Mecab invoke timed out in ${Mecab.timeout} ms`)); this._port = port;
}, Mecab.timeout)
});
this.port.postMessage({action, params, sequence}); try {
}); const {version} = await this._invoke('get_version', {});
this._remoteVersion = version;
if (version !== this._version) {
throw new Error(`Unsupported MeCab native messenger version ${version}. Yomichan supports version ${this._version}.`);
}
} catch (e) {
if (this._port === port) {
this._clearPort();
}
throw e;
}
}
_clearPort() {
this._port.disconnect();
this._port = null;
this._invocations.clear();
this._eventListeners.removeAllEventListeners();
this._sequence = 0;
this._setupPortPromise = null;
} }
} }
Mecab.timeout = 5000;
Mecab.version = 1;