Mecab refactor (#1357)

* Use private variables

* Use local variables

* Remove onError

* Use private functions

* Move results parsing

* Set up disconnect observation

* Add _clearPort

* Update API

* Rename

* Fix iterator

* Update parseText invocation

* Update parseText format

* Reorganize

* Update implementation and docs

* Fix some port setup issues
This commit is contained in:
toasted-nutbread 2021-02-08 17:53:12 -05:00 committed by GitHub
parent 7b28c2c532
commit 69e8addb47
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 173 additions and 94 deletions

View File

@ -877,11 +877,7 @@ class Backend {
this._anki.server = options.anki.server;
this._anki.enabled = options.anki.enable;
if (options.parsing.enableMecabParser) {
this._mecab.startListener();
} else {
this._mecab.stopListener();
}
this._mecab.setEnabled(options.parsing.enableMecabParser);
if (options.clipboard.enableBackgroundMonitor) {
this._clipboardMonitor.start();
@ -988,12 +984,19 @@ class Backend {
async _textParseMecab(text, options) {
const jp = this._japaneseUtil;
const {parsing: {readingMode}} = options;
let parseTextResults;
try {
parseTextResults = await this._mecab.parseText(text);
} catch (e) {
return [];
}
const results = [];
const rawResults = await this._mecab.parseText(text);
for (const [mecabName, parsedLines] of Object.entries(rawResults)) {
for (const {name, lines} of parseTextResults) {
const result = [];
for (const parsedLine of parsedLines) {
for (const {expression, reading, source} of parsedLine) {
for (const line of lines) {
for (const {expression, reading, source} of line) {
const term = [];
for (const {text: text2, furigana} of jp.distributeFuriganaInflected(
expression.length > 0 ? expression : source,
@ -1007,7 +1010,7 @@ class Backend {
}
result.push([{text: '\n', reading: ''}]);
}
results.push([mecabName, result]);
results.push([name, result]);
}
return results;
}

View File

@ -15,107 +15,183 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
/**
* This class is used to connect Yomichan to a native component that is
* used to parse text into individual terms.
*/
class Mecab {
/**
* Creates a new instance of the class.
*/
constructor() {
this.port = null;
this.listeners = new Map();
this.sequence = 0;
this._port = null;
this._sequence = 0;
this._invocations = new Map();
this._eventListeners = new EventListenerCollection();
this._timeout = 5000;
this._version = 1;
this._remoteVersion = null;
this._enabled = false;
this._setupPortPromise = null;
}
onError(error) {
yomichan.logError(error);
/**
* Returns whether or not the component is enabled.
*/
isEnabled() {
return this._enabled;
}
async checkVersion() {
/**
* Changes whether or not the component connection is enabled.
* @param enabled A boolean indicating whether or not the component should be enabled.
*/
setEnabled(enabled) {
this._enabled = !!enabled;
if (!this._enabled && this._port !== null) {
this._clearPort();
}
}
/**
* Gets the version of the MeCab component.
* @returns The version of the MeCab component, or `null` if the component was not found.
*/
async getVersion() {
try {
const {version} = await this.invoke('get_version', {});
if (version !== Mecab.version) {
this.stopListener();
throw new Error(`Unsupported MeCab native messenger version ${version}. Yomichan supports version ${Mecab.version}.`);
}
} catch (error) {
this.onError(error);
await this._setupPort();
} catch (e) {
// NOP
}
return this._remoteVersion;
}
/**
* Parses a string of Japanese text into arrays of lines and terms.
*
* Return value format:
* ```js
* [
* {
* name: (string),
* lines: [
* {expression: (string), reading: (string), source: (string)},
* ...
* ]
* },
* ...
* ]
* ```
* @param text The string to parse.
* @returns A collection of parsing results of the text.
*/
async parseText(text) {
const rawResults = await this.invoke('parse_text', {text});
// {
// 'mecab-name': [
// // line1
// [
// {str expression: 'expression', str reading: 'reading', str source: 'source'},
// {str expression: 'expression2', str reading: 'reading2', str source: 'source2'}
// ],
// line2,
// ...
// ],
// 'mecab-name2': [...]
// }
const results = {};
for (const [mecabName, parsedLines] of Object.entries(rawResults)) {
const result = [];
for (const parsedLine of parsedLines) {
const line = [];
for (const {expression, reading, source} of parsedLine) {
line.push({
expression: expression || '',
reading: reading || '',
source: source || ''
await this._setupPort();
const rawResults = await this._invoke('parse_text', {text});
return this._convertParseTextResults(rawResults);
}
// Private
_onMessage({sequence, data}) {
const invocation = this._invocations.get(sequence);
if (typeof invocation === 'undefined') { return; }
const {resolve, timer} = invocation;
clearTimeout(timer);
resolve(data);
this._invocations.delete(sequence);
}
_onDisconnect() {
if (this._port === null) { return; }
const e = chrome.runtime.lastError;
const error = new Error(e ? e.message : 'MeCab disconnected');
for (const {reject, timer} of this._invocations.values()) {
clearTimeout(timer);
reject(error);
}
this._clearPort();
}
_invoke(action, params) {
return new Promise((resolve, reject) => {
if (this._port === null) {
reject(new Error('Port disconnected'));
}
const sequence = this._sequence++;
const timer = setTimeout(() => {
this._invocations.delete(sequence);
reject(new Error(`MeCab invoke timed out after ${this._timeout}ms`));
}, this._timeout);
this._invocations.set(sequence, {resolve, reject, timer}, this._timeout);
this._port.postMessage({action, params, sequence});
});
}
result.push(line);
_convertParseTextResults(rawResults) {
const results = [];
for (const [name, rawLines] of Object.entries(rawResults)) {
const lines = [];
for (const rawLine of rawLines) {
const line = [];
for (let {expression, reading, source} of rawLine) {
if (typeof expression !== 'string') { expression = ''; }
if (typeof reading !== 'string') { reading = ''; }
if (typeof source !== 'string') { source = ''; }
line.push({expression, reading, source});
}
results[mecabName] = result;
lines.push(line);
}
results.push({name, lines});
}
return results;
}
startListener() {
if (this.port !== null) { return; }
this.port = chrome.runtime.connectNative('yomichan_mecab');
this.port.onMessage.addListener(this.onNativeMessage.bind(this));
this.checkVersion();
async _setupPort() {
if (!this._enabled) {
throw new Error('MeCab not enabled');
}
if (this._setupPortPromise === null) {
this._setupPortPromise = this._setupPort2();
}
try {
await this._setupPortPromise;
} catch (e) {
throw new Error(e.message);
}
}
stopListener() {
if (this.port === null) { return; }
this.port.disconnect();
this.port = null;
this.listeners.clear();
this.sequence = 0;
async _setupPort2() {
const port = chrome.runtime.connectNative('yomichan_mecab');
this._eventListeners.addListener(port.onMessage, this._onMessage.bind(this));
this._eventListeners.addListener(port.onDisconnect, this._onDisconnect.bind(this));
this._port = port;
try {
const {version} = await this._invoke('get_version', {});
this._remoteVersion = version;
if (version !== this._version) {
throw new Error(`Unsupported MeCab native messenger version ${version}. Yomichan supports version ${this._version}.`);
}
} catch (e) {
if (this._port === port) {
this._clearPort();
}
throw e;
}
}
onNativeMessage({sequence, data}) {
const listener = this.listeners.get(sequence);
if (typeof listener === 'undefined') { return; }
const {callback, timer} = listener;
clearTimeout(timer);
callback(data);
this.listeners.delete(sequence);
}
invoke(action, params) {
if (this.port === null) {
return Promise.resolve({});
}
return new Promise((resolve, reject) => {
const sequence = this.sequence++;
this.listeners.set(sequence, {
callback: resolve,
timer: setTimeout(() => {
this.listeners.delete(sequence);
reject(new Error(`Mecab invoke timed out in ${Mecab.timeout} ms`));
}, Mecab.timeout)
});
this.port.postMessage({action, params, sequence});
});
_clearPort() {
this._port.disconnect();
this._port = null;
this._invocations.clear();
this._eventListeners.removeAllEventListeners();
this._sequence = 0;
this._setupPortPromise = null;
}
}
Mecab.timeout = 5000;
Mecab.version = 1;