From 24ec22b2e1095a18d2031f697f4aaffb5a0c0609 Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Fri, 9 Jul 2021 16:05:57 -0400 Subject: [PATCH] Text parse updates (#1811) * Expose more settings to QueryParser * Rename textParse to parseText * Refactor * Convert reading inside QueryParser rather than Backend * Remove use of readingMode from backend * Rename * Update parseText args * Simplify * Update results * Move use of optionsContext * Run mecab and internal parser in parallel * Added "dictionary-reading" reading mode * Update romaji conversion case --- ext/data/schemas/options-schema.json | 2 +- ext/js/background/backend.js | 43 +++++++++++++------------- ext/js/comm/api.js | 4 +-- ext/js/display/display.js | 6 +++- ext/js/display/query-parser.js | 45 ++++++++++++++++++++-------- ext/js/language/japanese-util.js | 9 +++--- ext/settings.html | 1 + 7 files changed, 68 insertions(+), 42 deletions(-) diff --git a/ext/data/schemas/options-schema.json b/ext/data/schemas/options-schema.json index 4b97342c..9bdf8747 100644 --- a/ext/data/schemas/options-schema.json +++ b/ext/data/schemas/options-schema.json @@ -797,7 +797,7 @@ }, "readingMode": { "type": "string", - "enum": ["hiragana", "katakana", "romaji", "none"], + "enum": ["hiragana", "katakana", "romaji", "dictionary-reading", "none"], "default": "hiragana" } } diff --git a/ext/js/background/backend.js b/ext/js/background/backend.js index 7f9fe7f8..c797bbf5 100644 --- a/ext/js/background/backend.js +++ b/ext/js/background/backend.js @@ -90,7 +90,7 @@ class Backend { ['optionsGetFull', {async: false, contentScript: true, handler: this._onApiOptionsGetFull.bind(this)}], ['kanjiFind', {async: true, contentScript: true, handler: this._onApiKanjiFind.bind(this)}], ['termsFind', {async: true, contentScript: true, handler: this._onApiTermsFind.bind(this)}], - ['textParse', {async: true, contentScript: true, handler: this._onApiTextParse.bind(this)}], + ['parseText', {async: true, contentScript: true, handler: this._onApiParseText.bind(this)}], ['getAnkiConnectVersion', {async: true, contentScript: true, handler: this._onApGetAnkiConnectVersion.bind(this)}], ['isAnkiConnected', {async: true, contentScript: true, handler: this._onApiIsAnkiConnected.bind(this)}], ['addAnkiNote', {async: true, contentScript: true, handler: this._onApiAddAnkiNote.bind(this)}], @@ -417,26 +417,30 @@ class Backend { return {dictionaryEntries, originalTextLength}; } - async _onApiTextParse({text, optionsContext}) { - const options = this._getProfileOptions(optionsContext); + async _onApiParseText({text, optionsContext, scanLength, useInternalParser, useMecabParser}) { + const [internalResults, mecabResults] = await Promise.all([ + (useInternalParser ? this._textParseScanning(text, scanLength, optionsContext) : null), + (useMecabParser ? this._textParseMecab(text) : null) + ]); + const results = []; - if (options.parsing.enableScanningParser) { + if (internalResults !== null) { results.push({ - source: 'scanning-parser', id: 'scan', - content: await this._textParseScanning(text, options) + source: 'scanning-parser', + dictionary: null, + content: internalResults }); } - if (options.parsing.enableMecabParser) { - const mecabResults = await this._textParseMecab(text, options); - for (const [mecabDictName, mecabDictResults] of mecabResults) { + if (mecabResults !== null) { + for (const [dictionary, content] of mecabResults) { results.push({ + id: `mecab-${dictionary}`, source: 'mecab', - dictionary: mecabDictName, - id: `mecab-${mecabDictName}`, - content: mecabDictResults + dictionary, + content }); } } @@ -1042,10 +1046,10 @@ class Backend { return true; } - async _textParseScanning(text, options) { + async _textParseScanning(text, scanLength, optionsContext) { const jp = this._japaneseUtil; - const {scanning: {length: scanningLength}, parsing: {readingMode}} = options; const mode = 'simple'; + const options = this._getProfileOptions(optionsContext); const findTermsOptions = this._getTranslatorFindTermsOptions(mode, {wildcard: null}, options); const results = []; let previousUngroupedSegment = null; @@ -1054,7 +1058,7 @@ class Backend { while (i < ii) { const {dictionaryEntries, originalTextLength} = await this._translator.findTerms( mode, - text.substring(i, i + scanningLength), + text.substring(i, i + scanLength), findTermsOptions ); const codePoint = text.codePointAt(i); @@ -1069,8 +1073,7 @@ class Backend { const source = text.substring(i, i + originalTextLength); const textSegments = []; for (const {text: text2, reading: reading2} of jp.distributeFuriganaInflected(term, reading, source)) { - const reading3 = jp.convertReading(text2, reading2, readingMode); - textSegments.push({text: text2, reading: reading3}); + textSegments.push({text: text2, reading: reading2}); } results.push(textSegments); i += originalTextLength; @@ -1087,9 +1090,8 @@ class Backend { return results; } - async _textParseMecab(text, options) { + async _textParseMecab(text) { const jp = this._japaneseUtil; - const {parsing: {readingMode}} = options; let parseTextResults; try { @@ -1109,8 +1111,7 @@ class Backend { jp.convertKatakanaToHiragana(reading), source )) { - const reading3 = jp.convertReading(text2, reading2, readingMode); - termParts.push({text: text2, reading: reading3}); + termParts.push({text: text2, reading: reading2}); } result.push(termParts); } diff --git a/ext/js/comm/api.js b/ext/js/comm/api.js index 7c89f0c5..3ac1d3f7 100644 --- a/ext/js/comm/api.js +++ b/ext/js/comm/api.js @@ -32,8 +32,8 @@ class API { return this._invoke('termsFind', {text, details, optionsContext}); } - textParse(text, optionsContext) { - return this._invoke('textParse', {text, optionsContext}); + parseText(text, optionsContext, scanLength, useInternalParser, useMecabParser) { + return this._invoke('parseText', {text, optionsContext, scanLength, useInternalParser, useMecabParser}); } kanjiFind(text, optionsContext) { diff --git a/ext/js/display/display.js b/ext/js/display/display.js index 1f88258b..d79cc7e2 100644 --- a/ext/js/display/display.js +++ b/ext/js/display/display.js @@ -83,7 +83,8 @@ class Display extends EventDispatcher { this._queryParserContainer = document.querySelector('#query-parser-container'); this._queryParser = new QueryParser({ getSearchContext: this._getSearchContext.bind(this), - documentUtil: this._documentUtil + documentUtil: this._documentUtil, + japaneseUtil }); this._contentScrollElement = document.querySelector('#content-scroll'); this._contentScrollBodyElement = document.querySelector('#content-body'); @@ -312,6 +313,9 @@ class Display extends EventDispatcher { this._queryParser.setOptions({ selectedParser: options.parsing.selectedParser, termSpacing: options.parsing.termSpacing, + readingMode: options.parsing.readingMode, + useInternalParser: options.parsing.enableScanningParser, + useMecabParser: options.parsing.enableMecabParser, scanning: { inputs: scanningOptions.inputs, deepContentScan: scanningOptions.deepDomScan, diff --git a/ext/js/display/query-parser.js b/ext/js/display/query-parser.js index 39b09646..0acf6ec8 100644 --- a/ext/js/display/query-parser.js +++ b/ext/js/display/query-parser.js @@ -20,13 +20,18 @@ */ class QueryParser extends EventDispatcher { - constructor({getSearchContext, documentUtil}) { + constructor({getSearchContext, documentUtil, japaneseUtil}) { super(); this._getSearchContext = getSearchContext; this._documentUtil = documentUtil; + this._japaneseUtil = japaneseUtil; this._text = ''; this._setTextToken = null; this._selectedParser = null; + this._readingMode = 'none'; + this._scanLength = 1; + this._useInternalParser = true; + this._useMecabParser = false; this._parseResults = []; this._queryParser = document.querySelector('#query-parser-content'); this._queryParserModeContainer = document.querySelector('#query-parser-mode-container'); @@ -52,7 +57,7 @@ class QueryParser extends EventDispatcher { this._queryParserModeSelect.addEventListener('change', this._onParserChange.bind(this), false); } - setOptions({selectedParser, termSpacing, scanning}) { + setOptions({selectedParser, termSpacing, readingMode, useInternalParser, useMecabParser, scanning}) { let selectedParserChanged = false; if (selectedParser === null || typeof selectedParser === 'string') { selectedParserChanged = (this._selectedParser !== selectedParser); @@ -61,7 +66,20 @@ class QueryParser extends EventDispatcher { if (typeof termSpacing === 'boolean') { this._queryParser.dataset.termSpacing = `${termSpacing}`; } + if (typeof readingMode === 'string') { + this._readingMode = readingMode; + } + if (typeof useInternalParser === 'boolean') { + this._useInternalParser = useInternalParser; + } + if (typeof useMecabParser === 'boolean') { + this._useMecabParser = useMecabParser; + } if (scanning !== null && typeof scanning === 'object') { + const {scanLength} = scanning; + if (typeof scanLength === 'number') { + this._scanLength = scanLength; + } this._textScanner.setOptions(scanning); } this._textScanner.setEnabled(true); @@ -76,7 +94,7 @@ class QueryParser extends EventDispatcher { const token = {}; this._setTextToken = token; - this._parseResults = await yomichan.api.textParse(text, this._getOptionsContext()); + this._parseResults = await yomichan.api.parseText(text, this._getOptionsContext(), this._scanLength, this._useInternalParser, this._useMecabParser); if (this._setTextToken !== token) { return; } this._refreshSelectedParser(); @@ -189,16 +207,19 @@ class QueryParser extends EventDispatcher { select.selectedIndex = selectedIndex; } - _createParseResult(terms) { + _createParseResult(data) { + const jp = this._japaneseUtil; + const readingMode = this._readingMode; const fragment = document.createDocumentFragment(); - for (const term of terms) { + for (const term of data) { const termNode = document.createElement('span'); termNode.className = 'query-parser-term'; - for (const segment of term) { - if (segment.reading.trim().length === 0) { - termNode.appendChild(document.createTextNode(segment.text)); + for (const {text, reading} of term) { + if (reading.length === 0) { + termNode.appendChild(document.createTextNode(text)); } else { - termNode.appendChild(this._createSegment(segment)); + const reading2 = jp.convertReading(text, reading, readingMode); + termNode.appendChild(this._createSegment(text, reading2)); } } fragment.appendChild(termNode); @@ -206,7 +227,7 @@ class QueryParser extends EventDispatcher { return fragment; } - _createSegment(segment) { + _createSegment(text, reading) { const segmentNode = document.createElement('ruby'); segmentNode.className = 'query-parser-segment'; @@ -219,8 +240,8 @@ class QueryParser extends EventDispatcher { segmentNode.appendChild(textNode); segmentNode.appendChild(readingNode); - textNode.textContent = segment.text; - readingNode.textContent = segment.reading; + textNode.textContent = text; + readingNode.textContent = reading; return segmentNode; } diff --git a/ext/js/language/japanese-util.js b/ext/js/language/japanese-util.js index 8780c24c..7ee726b2 100644 --- a/ext/js/language/japanese-util.js +++ b/ext/js/language/japanese-util.js @@ -322,14 +322,13 @@ const JapaneseUtil = (() => { case 'katakana': return this.convertHiraganaToKatakana(reading); case 'romaji': - if (reading) { + if (reading.length > 0) { return this.convertToRomaji(reading); + } else if (this.isStringEntirelyKana(term)) { + return this.convertToRomaji(term); } else { - if (this.isStringEntirelyKana(term)) { - return this.convertToRomaji(term); - } + return reading; } - return reading; case 'none': return ''; default: diff --git a/ext/settings.html b/ext/settings.html index 49bcd722..a8c8149b 100644 --- a/ext/settings.html +++ b/ext/settings.html @@ -1224,6 +1224,7 @@ +