Text parse updates (#1811)

* Expose more settings to QueryParser

* Rename textParse to parseText

* Refactor

* Convert reading inside QueryParser rather than Backend

* Remove use of readingMode from backend

* Rename

* Update parseText args

* Simplify

* Update results

* Move use of optionsContext

* Run mecab and internal parser in parallel

* Added "dictionary-reading" reading mode

* Update romaji conversion case
This commit is contained in:
toasted-nutbread 2021-07-09 16:05:57 -04:00 committed by GitHub
parent 315dc425e4
commit 24ec22b2e1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 68 additions and 42 deletions

View File

@ -797,7 +797,7 @@
}, },
"readingMode": { "readingMode": {
"type": "string", "type": "string",
"enum": ["hiragana", "katakana", "romaji", "none"], "enum": ["hiragana", "katakana", "romaji", "dictionary-reading", "none"],
"default": "hiragana" "default": "hiragana"
} }
} }

View File

@ -90,7 +90,7 @@ class Backend {
['optionsGetFull', {async: false, contentScript: true, handler: this._onApiOptionsGetFull.bind(this)}], ['optionsGetFull', {async: false, contentScript: true, handler: this._onApiOptionsGetFull.bind(this)}],
['kanjiFind', {async: true, contentScript: true, handler: this._onApiKanjiFind.bind(this)}], ['kanjiFind', {async: true, contentScript: true, handler: this._onApiKanjiFind.bind(this)}],
['termsFind', {async: true, contentScript: true, handler: this._onApiTermsFind.bind(this)}], ['termsFind', {async: true, contentScript: true, handler: this._onApiTermsFind.bind(this)}],
['textParse', {async: true, contentScript: true, handler: this._onApiTextParse.bind(this)}], ['parseText', {async: true, contentScript: true, handler: this._onApiParseText.bind(this)}],
['getAnkiConnectVersion', {async: true, contentScript: true, handler: this._onApGetAnkiConnectVersion.bind(this)}], ['getAnkiConnectVersion', {async: true, contentScript: true, handler: this._onApGetAnkiConnectVersion.bind(this)}],
['isAnkiConnected', {async: true, contentScript: true, handler: this._onApiIsAnkiConnected.bind(this)}], ['isAnkiConnected', {async: true, contentScript: true, handler: this._onApiIsAnkiConnected.bind(this)}],
['addAnkiNote', {async: true, contentScript: true, handler: this._onApiAddAnkiNote.bind(this)}], ['addAnkiNote', {async: true, contentScript: true, handler: this._onApiAddAnkiNote.bind(this)}],
@ -417,26 +417,30 @@ class Backend {
return {dictionaryEntries, originalTextLength}; return {dictionaryEntries, originalTextLength};
} }
async _onApiTextParse({text, optionsContext}) { async _onApiParseText({text, optionsContext, scanLength, useInternalParser, useMecabParser}) {
const options = this._getProfileOptions(optionsContext); const [internalResults, mecabResults] = await Promise.all([
(useInternalParser ? this._textParseScanning(text, scanLength, optionsContext) : null),
(useMecabParser ? this._textParseMecab(text) : null)
]);
const results = []; const results = [];
if (options.parsing.enableScanningParser) { if (internalResults !== null) {
results.push({ results.push({
source: 'scanning-parser',
id: 'scan', id: 'scan',
content: await this._textParseScanning(text, options) source: 'scanning-parser',
dictionary: null,
content: internalResults
}); });
} }
if (options.parsing.enableMecabParser) { if (mecabResults !== null) {
const mecabResults = await this._textParseMecab(text, options); for (const [dictionary, content] of mecabResults) {
for (const [mecabDictName, mecabDictResults] of mecabResults) {
results.push({ results.push({
id: `mecab-${dictionary}`,
source: 'mecab', source: 'mecab',
dictionary: mecabDictName, dictionary,
id: `mecab-${mecabDictName}`, content
content: mecabDictResults
}); });
} }
} }
@ -1042,10 +1046,10 @@ class Backend {
return true; return true;
} }
async _textParseScanning(text, options) { async _textParseScanning(text, scanLength, optionsContext) {
const jp = this._japaneseUtil; const jp = this._japaneseUtil;
const {scanning: {length: scanningLength}, parsing: {readingMode}} = options;
const mode = 'simple'; const mode = 'simple';
const options = this._getProfileOptions(optionsContext);
const findTermsOptions = this._getTranslatorFindTermsOptions(mode, {wildcard: null}, options); const findTermsOptions = this._getTranslatorFindTermsOptions(mode, {wildcard: null}, options);
const results = []; const results = [];
let previousUngroupedSegment = null; let previousUngroupedSegment = null;
@ -1054,7 +1058,7 @@ class Backend {
while (i < ii) { while (i < ii) {
const {dictionaryEntries, originalTextLength} = await this._translator.findTerms( const {dictionaryEntries, originalTextLength} = await this._translator.findTerms(
mode, mode,
text.substring(i, i + scanningLength), text.substring(i, i + scanLength),
findTermsOptions findTermsOptions
); );
const codePoint = text.codePointAt(i); const codePoint = text.codePointAt(i);
@ -1069,8 +1073,7 @@ class Backend {
const source = text.substring(i, i + originalTextLength); const source = text.substring(i, i + originalTextLength);
const textSegments = []; const textSegments = [];
for (const {text: text2, reading: reading2} of jp.distributeFuriganaInflected(term, reading, source)) { for (const {text: text2, reading: reading2} of jp.distributeFuriganaInflected(term, reading, source)) {
const reading3 = jp.convertReading(text2, reading2, readingMode); textSegments.push({text: text2, reading: reading2});
textSegments.push({text: text2, reading: reading3});
} }
results.push(textSegments); results.push(textSegments);
i += originalTextLength; i += originalTextLength;
@ -1087,9 +1090,8 @@ class Backend {
return results; return results;
} }
async _textParseMecab(text, options) { async _textParseMecab(text) {
const jp = this._japaneseUtil; const jp = this._japaneseUtil;
const {parsing: {readingMode}} = options;
let parseTextResults; let parseTextResults;
try { try {
@ -1109,8 +1111,7 @@ class Backend {
jp.convertKatakanaToHiragana(reading), jp.convertKatakanaToHiragana(reading),
source source
)) { )) {
const reading3 = jp.convertReading(text2, reading2, readingMode); termParts.push({text: text2, reading: reading2});
termParts.push({text: text2, reading: reading3});
} }
result.push(termParts); result.push(termParts);
} }

View File

@ -32,8 +32,8 @@ class API {
return this._invoke('termsFind', {text, details, optionsContext}); return this._invoke('termsFind', {text, details, optionsContext});
} }
textParse(text, optionsContext) { parseText(text, optionsContext, scanLength, useInternalParser, useMecabParser) {
return this._invoke('textParse', {text, optionsContext}); return this._invoke('parseText', {text, optionsContext, scanLength, useInternalParser, useMecabParser});
} }
kanjiFind(text, optionsContext) { kanjiFind(text, optionsContext) {

View File

@ -83,7 +83,8 @@ class Display extends EventDispatcher {
this._queryParserContainer = document.querySelector('#query-parser-container'); this._queryParserContainer = document.querySelector('#query-parser-container');
this._queryParser = new QueryParser({ this._queryParser = new QueryParser({
getSearchContext: this._getSearchContext.bind(this), getSearchContext: this._getSearchContext.bind(this),
documentUtil: this._documentUtil documentUtil: this._documentUtil,
japaneseUtil
}); });
this._contentScrollElement = document.querySelector('#content-scroll'); this._contentScrollElement = document.querySelector('#content-scroll');
this._contentScrollBodyElement = document.querySelector('#content-body'); this._contentScrollBodyElement = document.querySelector('#content-body');
@ -312,6 +313,9 @@ class Display extends EventDispatcher {
this._queryParser.setOptions({ this._queryParser.setOptions({
selectedParser: options.parsing.selectedParser, selectedParser: options.parsing.selectedParser,
termSpacing: options.parsing.termSpacing, termSpacing: options.parsing.termSpacing,
readingMode: options.parsing.readingMode,
useInternalParser: options.parsing.enableScanningParser,
useMecabParser: options.parsing.enableMecabParser,
scanning: { scanning: {
inputs: scanningOptions.inputs, inputs: scanningOptions.inputs,
deepContentScan: scanningOptions.deepDomScan, deepContentScan: scanningOptions.deepDomScan,

View File

@ -20,13 +20,18 @@
*/ */
class QueryParser extends EventDispatcher { class QueryParser extends EventDispatcher {
constructor({getSearchContext, documentUtil}) { constructor({getSearchContext, documentUtil, japaneseUtil}) {
super(); super();
this._getSearchContext = getSearchContext; this._getSearchContext = getSearchContext;
this._documentUtil = documentUtil; this._documentUtil = documentUtil;
this._japaneseUtil = japaneseUtil;
this._text = ''; this._text = '';
this._setTextToken = null; this._setTextToken = null;
this._selectedParser = null; this._selectedParser = null;
this._readingMode = 'none';
this._scanLength = 1;
this._useInternalParser = true;
this._useMecabParser = false;
this._parseResults = []; this._parseResults = [];
this._queryParser = document.querySelector('#query-parser-content'); this._queryParser = document.querySelector('#query-parser-content');
this._queryParserModeContainer = document.querySelector('#query-parser-mode-container'); this._queryParserModeContainer = document.querySelector('#query-parser-mode-container');
@ -52,7 +57,7 @@ class QueryParser extends EventDispatcher {
this._queryParserModeSelect.addEventListener('change', this._onParserChange.bind(this), false); this._queryParserModeSelect.addEventListener('change', this._onParserChange.bind(this), false);
} }
setOptions({selectedParser, termSpacing, scanning}) { setOptions({selectedParser, termSpacing, readingMode, useInternalParser, useMecabParser, scanning}) {
let selectedParserChanged = false; let selectedParserChanged = false;
if (selectedParser === null || typeof selectedParser === 'string') { if (selectedParser === null || typeof selectedParser === 'string') {
selectedParserChanged = (this._selectedParser !== selectedParser); selectedParserChanged = (this._selectedParser !== selectedParser);
@ -61,7 +66,20 @@ class QueryParser extends EventDispatcher {
if (typeof termSpacing === 'boolean') { if (typeof termSpacing === 'boolean') {
this._queryParser.dataset.termSpacing = `${termSpacing}`; this._queryParser.dataset.termSpacing = `${termSpacing}`;
} }
if (typeof readingMode === 'string') {
this._readingMode = readingMode;
}
if (typeof useInternalParser === 'boolean') {
this._useInternalParser = useInternalParser;
}
if (typeof useMecabParser === 'boolean') {
this._useMecabParser = useMecabParser;
}
if (scanning !== null && typeof scanning === 'object') { if (scanning !== null && typeof scanning === 'object') {
const {scanLength} = scanning;
if (typeof scanLength === 'number') {
this._scanLength = scanLength;
}
this._textScanner.setOptions(scanning); this._textScanner.setOptions(scanning);
} }
this._textScanner.setEnabled(true); this._textScanner.setEnabled(true);
@ -76,7 +94,7 @@ class QueryParser extends EventDispatcher {
const token = {}; const token = {};
this._setTextToken = token; this._setTextToken = token;
this._parseResults = await yomichan.api.textParse(text, this._getOptionsContext()); this._parseResults = await yomichan.api.parseText(text, this._getOptionsContext(), this._scanLength, this._useInternalParser, this._useMecabParser);
if (this._setTextToken !== token) { return; } if (this._setTextToken !== token) { return; }
this._refreshSelectedParser(); this._refreshSelectedParser();
@ -189,16 +207,19 @@ class QueryParser extends EventDispatcher {
select.selectedIndex = selectedIndex; select.selectedIndex = selectedIndex;
} }
_createParseResult(terms) { _createParseResult(data) {
const jp = this._japaneseUtil;
const readingMode = this._readingMode;
const fragment = document.createDocumentFragment(); const fragment = document.createDocumentFragment();
for (const term of terms) { for (const term of data) {
const termNode = document.createElement('span'); const termNode = document.createElement('span');
termNode.className = 'query-parser-term'; termNode.className = 'query-parser-term';
for (const segment of term) { for (const {text, reading} of term) {
if (segment.reading.trim().length === 0) { if (reading.length === 0) {
termNode.appendChild(document.createTextNode(segment.text)); termNode.appendChild(document.createTextNode(text));
} else { } else {
termNode.appendChild(this._createSegment(segment)); const reading2 = jp.convertReading(text, reading, readingMode);
termNode.appendChild(this._createSegment(text, reading2));
} }
} }
fragment.appendChild(termNode); fragment.appendChild(termNode);
@ -206,7 +227,7 @@ class QueryParser extends EventDispatcher {
return fragment; return fragment;
} }
_createSegment(segment) { _createSegment(text, reading) {
const segmentNode = document.createElement('ruby'); const segmentNode = document.createElement('ruby');
segmentNode.className = 'query-parser-segment'; segmentNode.className = 'query-parser-segment';
@ -219,8 +240,8 @@ class QueryParser extends EventDispatcher {
segmentNode.appendChild(textNode); segmentNode.appendChild(textNode);
segmentNode.appendChild(readingNode); segmentNode.appendChild(readingNode);
textNode.textContent = segment.text; textNode.textContent = text;
readingNode.textContent = segment.reading; readingNode.textContent = reading;
return segmentNode; return segmentNode;
} }

View File

@ -322,14 +322,13 @@ const JapaneseUtil = (() => {
case 'katakana': case 'katakana':
return this.convertHiraganaToKatakana(reading); return this.convertHiraganaToKatakana(reading);
case 'romaji': case 'romaji':
if (reading) { if (reading.length > 0) {
return this.convertToRomaji(reading); return this.convertToRomaji(reading);
} else if (this.isStringEntirelyKana(term)) {
return this.convertToRomaji(term);
} else { } else {
if (this.isStringEntirelyKana(term)) { return reading;
return this.convertToRomaji(term);
}
} }
return reading;
case 'none': case 'none':
return ''; return '';
default: default:

View File

@ -1224,6 +1224,7 @@
<option value="hiragana">ひらがな</option> <option value="hiragana">ひらがな</option>
<option value="katakana">カタカナ</option> <option value="katakana">カタカナ</option>
<option value="romaji">Romaji</option> <option value="romaji">Romaji</option>
<option value="dictionary-reading">Dictionary reading</option>
</select> </select>
</div> </div>
</div></div> </div></div>