Text parse updates (#1811)

* Expose more settings to QueryParser * Rename textParse to parseText * Refactor * Convert reading inside QueryParser rather than Backend * Remove use of readingMode from backend * Rename * Update parseText args * Simplify * Update results * Move use of optionsContext * Run mecab and internal parser in parallel * Added "dictionary-reading" reading mode * Update romaji conversion case
2021-07-09 16:05:57 -04:00 · 2021-07-09 16:05:57 -04:00 · 24ec22b2e1
commit 24ec22b2e1
parent 315dc425e4
7 changed files with 68 additions and 42 deletions
--- a/ext/data/schemas/options-schema.json
+++ b/ext/data/schemas/options-schema.json
@ -797,7 +797,7 @@
                                    },
                                    "readingMode": {
                                        "type": "string",
-                                        "enum": ["hiragana", "katakana", "romaji", "none"],
+                                        "enum": ["hiragana", "katakana", "romaji", "dictionary-reading", "none"],
                                        "default": "hiragana"
                                    }
                                }
--- a/ext/js/background/backend.js
+++ b/ext/js/background/backend.js
@ -90,7 +90,7 @@ class Backend {
            ['optionsGetFull',               {async: false, contentScript: true,  handler: this._onApiOptionsGetFull.bind(this)}],
            ['kanjiFind',                    {async: true,  contentScript: true,  handler: this._onApiKanjiFind.bind(this)}],
            ['termsFind',                    {async: true,  contentScript: true,  handler: this._onApiTermsFind.bind(this)}],
-            ['textParse',                    {async: true,  contentScript: true,  handler: this._onApiTextParse.bind(this)}],
+            ['parseText',                    {async: true,  contentScript: true,  handler: this._onApiParseText.bind(this)}],
            ['getAnkiConnectVersion',        {async: true,  contentScript: true,  handler: this._onApGetAnkiConnectVersion.bind(this)}],
            ['isAnkiConnected',              {async: true,  contentScript: true,  handler: this._onApiIsAnkiConnected.bind(this)}],
            ['addAnkiNote',                  {async: true,  contentScript: true,  handler: this._onApiAddAnkiNote.bind(this)}],
@ -417,26 +417,30 @@ class Backend {
        return {dictionaryEntries, originalTextLength};
    }

-    async _onApiTextParse({text, optionsContext}) {
-        const options = this._getProfileOptions(optionsContext);
+    async _onApiParseText({text, optionsContext, scanLength, useInternalParser, useMecabParser}) {
+        const [internalResults, mecabResults] = await Promise.all([
+            (useInternalParser ? this._textParseScanning(text, scanLength, optionsContext) : null),
+            (useMecabParser ? this._textParseMecab(text) : null)
+        ]);
+
        const results = [];

-        if (options.parsing.enableScanningParser) {
+        if (internalResults !== null) {
            results.push({
-                source: 'scanning-parser',
                id: 'scan',
-                content: await this._textParseScanning(text, options)
+                source: 'scanning-parser',
+                dictionary: null,
+                content: internalResults
            });
        }

-        if (options.parsing.enableMecabParser) {
-            const mecabResults = await this._textParseMecab(text, options);
-            for (const [mecabDictName, mecabDictResults] of mecabResults) {
+        if (mecabResults !== null) {
+            for (const [dictionary, content] of mecabResults) {
                results.push({
+                    id: `mecab-${dictionary}`,
                    source: 'mecab',
-                    dictionary: mecabDictName,
-                    id: `mecab-${mecabDictName}`,
-                    content: mecabDictResults
+                    dictionary,
+                    content
                });
            }
        }
@ -1042,10 +1046,10 @@ class Backend {
        return true;
    }

-    async _textParseScanning(text, options) {
+    async _textParseScanning(text, scanLength, optionsContext) {
        const jp = this._japaneseUtil;
-        const {scanning: {length: scanningLength}, parsing: {readingMode}} = options;
        const mode = 'simple';
+        const options = this._getProfileOptions(optionsContext);
        const findTermsOptions = this._getTranslatorFindTermsOptions(mode, {wildcard: null}, options);
        const results = [];
        let previousUngroupedSegment = null;
@ -1054,7 +1058,7 @@ class Backend {
        while (i < ii) {
            const {dictionaryEntries, originalTextLength} = await this._translator.findTerms(
                mode,
-                text.substring(i, i + scanningLength),
+                text.substring(i, i + scanLength),
                findTermsOptions
            );
            const codePoint = text.codePointAt(i);
@ -1069,8 +1073,7 @@ class Backend {
                const source = text.substring(i, i + originalTextLength);
                const textSegments = [];
                for (const {text: text2, reading: reading2} of jp.distributeFuriganaInflected(term, reading, source)) {
-                    const reading3 = jp.convertReading(text2, reading2, readingMode);
-                    textSegments.push({text: text2, reading: reading3});
+                    textSegments.push({text: text2, reading: reading2});
                }
                results.push(textSegments);
                i += originalTextLength;
@ -1087,9 +1090,8 @@ class Backend {
        return results;
    }

-    async _textParseMecab(text, options) {
+    async _textParseMecab(text) {
        const jp = this._japaneseUtil;
-        const {parsing: {readingMode}} = options;

        let parseTextResults;
        try {
@ -1109,8 +1111,7 @@ class Backend {
                        jp.convertKatakanaToHiragana(reading),
                        source
                    )) {
-                        const reading3 = jp.convertReading(text2, reading2, readingMode);
-                        termParts.push({text: text2, reading: reading3});
+                        termParts.push({text: text2, reading: reading2});
                    }
                    result.push(termParts);
                }
--- a/ext/js/comm/api.js
+++ b/ext/js/comm/api.js
@ -32,8 +32,8 @@ class API {
        return this._invoke('termsFind', {text, details, optionsContext});
    }

-    textParse(text, optionsContext) {
-        return this._invoke('textParse', {text, optionsContext});
+    parseText(text, optionsContext, scanLength, useInternalParser, useMecabParser) {
+        return this._invoke('parseText', {text, optionsContext, scanLength, useInternalParser, useMecabParser});
    }

    kanjiFind(text, optionsContext) {
--- a/ext/js/display/display.js
+++ b/ext/js/display/display.js
@ -83,7 +83,8 @@ class Display extends EventDispatcher {
        this._queryParserContainer = document.querySelector('#query-parser-container');
        this._queryParser = new QueryParser({
            getSearchContext: this._getSearchContext.bind(this),
-            documentUtil: this._documentUtil
+            documentUtil: this._documentUtil,
+            japaneseUtil
        });
        this._contentScrollElement = document.querySelector('#content-scroll');
        this._contentScrollBodyElement = document.querySelector('#content-body');
@ -312,6 +313,9 @@ class Display extends EventDispatcher {
        this._queryParser.setOptions({
            selectedParser: options.parsing.selectedParser,
            termSpacing: options.parsing.termSpacing,
+            readingMode: options.parsing.readingMode,
+            useInternalParser: options.parsing.enableScanningParser,
+            useMecabParser: options.parsing.enableMecabParser,
            scanning: {
                inputs: scanningOptions.inputs,
                deepContentScan: scanningOptions.deepDomScan,
--- a/ext/js/display/query-parser.js
+++ b/ext/js/display/query-parser.js
@ -20,13 +20,18 @@
 */

 class QueryParser extends EventDispatcher {
-    constructor({getSearchContext, documentUtil}) {
+    constructor({getSearchContext, documentUtil, japaneseUtil}) {
        super();
        this._getSearchContext = getSearchContext;
        this._documentUtil = documentUtil;
+        this._japaneseUtil = japaneseUtil;
        this._text = '';
        this._setTextToken = null;
        this._selectedParser = null;
+        this._readingMode = 'none';
+        this._scanLength = 1;
+        this._useInternalParser = true;
+        this._useMecabParser = false;
        this._parseResults = [];
        this._queryParser = document.querySelector('#query-parser-content');
        this._queryParserModeContainer = document.querySelector('#query-parser-mode-container');
@ -52,7 +57,7 @@ class QueryParser extends EventDispatcher {
        this._queryParserModeSelect.addEventListener('change', this._onParserChange.bind(this), false);
    }

-    setOptions({selectedParser, termSpacing, scanning}) {
+    setOptions({selectedParser, termSpacing, readingMode, useInternalParser, useMecabParser, scanning}) {
        let selectedParserChanged = false;
        if (selectedParser === null || typeof selectedParser === 'string') {
            selectedParserChanged = (this._selectedParser !== selectedParser);
@ -61,7 +66,20 @@ class QueryParser extends EventDispatcher {
        if (typeof termSpacing === 'boolean') {
            this._queryParser.dataset.termSpacing = `${termSpacing}`;
        }
+        if (typeof readingMode === 'string') {
+            this._readingMode = readingMode;
+        }
+        if (typeof useInternalParser === 'boolean') {
+            this._useInternalParser = useInternalParser;
+        }
+        if (typeof useMecabParser === 'boolean') {
+            this._useMecabParser = useMecabParser;
+        }
        if (scanning !== null && typeof scanning === 'object') {
+            const {scanLength} = scanning;
+            if (typeof scanLength === 'number') {
+                this._scanLength = scanLength;
+            }
            this._textScanner.setOptions(scanning);
        }
        this._textScanner.setEnabled(true);
@ -76,7 +94,7 @@ class QueryParser extends EventDispatcher {

        const token = {};
        this._setTextToken = token;
-        this._parseResults = await yomichan.api.textParse(text, this._getOptionsContext());
+        this._parseResults = await yomichan.api.parseText(text, this._getOptionsContext(), this._scanLength, this._useInternalParser, this._useMecabParser);
        if (this._setTextToken !== token) { return; }

        this._refreshSelectedParser();
@ -189,16 +207,19 @@ class QueryParser extends EventDispatcher {
        select.selectedIndex = selectedIndex;
    }

-    _createParseResult(terms) {
+    _createParseResult(data) {
+        const jp = this._japaneseUtil;
+        const readingMode = this._readingMode;
        const fragment = document.createDocumentFragment();
-        for (const term of terms) {
+        for (const term of data) {
            const termNode = document.createElement('span');
            termNode.className = 'query-parser-term';
-            for (const segment of term) {
-                if (segment.reading.trim().length === 0) {
-                    termNode.appendChild(document.createTextNode(segment.text));
+            for (const {text, reading} of term) {
+                if (reading.length === 0) {
+                    termNode.appendChild(document.createTextNode(text));
                } else {
-                    termNode.appendChild(this._createSegment(segment));
+                    const reading2 = jp.convertReading(text, reading, readingMode);
+                    termNode.appendChild(this._createSegment(text, reading2));
                }
            }
            fragment.appendChild(termNode);
@ -206,7 +227,7 @@ class QueryParser extends EventDispatcher {
        return fragment;
    }

-    _createSegment(segment) {
+    _createSegment(text, reading) {
        const segmentNode = document.createElement('ruby');
        segmentNode.className = 'query-parser-segment';

@ -219,8 +240,8 @@ class QueryParser extends EventDispatcher {
        segmentNode.appendChild(textNode);
        segmentNode.appendChild(readingNode);

-        textNode.textContent = segment.text;
-        readingNode.textContent = segment.reading;
+        textNode.textContent = text;
+        readingNode.textContent = reading;

        return segmentNode;
    }
--- a/ext/js/language/japanese-util.js
+++ b/ext/js/language/japanese-util.js
@ -322,14 +322,13 @@ const JapaneseUtil = (() => {
                case 'katakana':
                    return this.convertHiraganaToKatakana(reading);
                case 'romaji':
-                    if (reading) {
+                    if (reading.length > 0) {
                        return this.convertToRomaji(reading);
+                    } else if (this.isStringEntirelyKana(term)) {
+                        return this.convertToRomaji(term);
                    } else {
-                        if (this.isStringEntirelyKana(term)) {
-                            return this.convertToRomaji(term);
-                        }
+                        return reading;
                    }
-                    return reading;
                case 'none':
                    return '';
                default:
--- a/ext/settings.html
+++ b/ext/settings.html
@ -1224,6 +1224,7 @@
                    <option value="hiragana">ひらがな</option>
                    <option value="katakana">カタカナ</option>
                    <option value="romaji">Romaji</option>
+                    <option value="dictionary-reading">Dictionary reading</option>
                </select>
            </div>
        </div></div>