Text parse updates (#1811)

* Expose more settings to QueryParser

* Rename textParse to parseText

* Refactor

* Convert reading inside QueryParser rather than Backend

* Remove use of readingMode from backend

* Rename

* Update parseText args

* Simplify

* Update results

* Move use of optionsContext

* Run mecab and internal parser in parallel

* Added "dictionary-reading" reading mode

* Update romaji conversion case
This commit is contained in:
toasted-nutbread 2021-07-09 16:05:57 -04:00 committed by GitHub
parent 315dc425e4
commit 24ec22b2e1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 68 additions and 42 deletions

View File

@ -797,7 +797,7 @@
},
"readingMode": {
"type": "string",
"enum": ["hiragana", "katakana", "romaji", "none"],
"enum": ["hiragana", "katakana", "romaji", "dictionary-reading", "none"],
"default": "hiragana"
}
}

View File

@ -90,7 +90,7 @@ class Backend {
['optionsGetFull', {async: false, contentScript: true, handler: this._onApiOptionsGetFull.bind(this)}],
['kanjiFind', {async: true, contentScript: true, handler: this._onApiKanjiFind.bind(this)}],
['termsFind', {async: true, contentScript: true, handler: this._onApiTermsFind.bind(this)}],
['textParse', {async: true, contentScript: true, handler: this._onApiTextParse.bind(this)}],
['parseText', {async: true, contentScript: true, handler: this._onApiParseText.bind(this)}],
['getAnkiConnectVersion', {async: true, contentScript: true, handler: this._onApGetAnkiConnectVersion.bind(this)}],
['isAnkiConnected', {async: true, contentScript: true, handler: this._onApiIsAnkiConnected.bind(this)}],
['addAnkiNote', {async: true, contentScript: true, handler: this._onApiAddAnkiNote.bind(this)}],
@ -417,26 +417,30 @@ class Backend {
return {dictionaryEntries, originalTextLength};
}
async _onApiTextParse({text, optionsContext}) {
const options = this._getProfileOptions(optionsContext);
async _onApiParseText({text, optionsContext, scanLength, useInternalParser, useMecabParser}) {
const [internalResults, mecabResults] = await Promise.all([
(useInternalParser ? this._textParseScanning(text, scanLength, optionsContext) : null),
(useMecabParser ? this._textParseMecab(text) : null)
]);
const results = [];
if (options.parsing.enableScanningParser) {
if (internalResults !== null) {
results.push({
source: 'scanning-parser',
id: 'scan',
content: await this._textParseScanning(text, options)
source: 'scanning-parser',
dictionary: null,
content: internalResults
});
}
if (options.parsing.enableMecabParser) {
const mecabResults = await this._textParseMecab(text, options);
for (const [mecabDictName, mecabDictResults] of mecabResults) {
if (mecabResults !== null) {
for (const [dictionary, content] of mecabResults) {
results.push({
id: `mecab-${dictionary}`,
source: 'mecab',
dictionary: mecabDictName,
id: `mecab-${mecabDictName}`,
content: mecabDictResults
dictionary,
content
});
}
}
@ -1042,10 +1046,10 @@ class Backend {
return true;
}
async _textParseScanning(text, options) {
async _textParseScanning(text, scanLength, optionsContext) {
const jp = this._japaneseUtil;
const {scanning: {length: scanningLength}, parsing: {readingMode}} = options;
const mode = 'simple';
const options = this._getProfileOptions(optionsContext);
const findTermsOptions = this._getTranslatorFindTermsOptions(mode, {wildcard: null}, options);
const results = [];
let previousUngroupedSegment = null;
@ -1054,7 +1058,7 @@ class Backend {
while (i < ii) {
const {dictionaryEntries, originalTextLength} = await this._translator.findTerms(
mode,
text.substring(i, i + scanningLength),
text.substring(i, i + scanLength),
findTermsOptions
);
const codePoint = text.codePointAt(i);
@ -1069,8 +1073,7 @@ class Backend {
const source = text.substring(i, i + originalTextLength);
const textSegments = [];
for (const {text: text2, reading: reading2} of jp.distributeFuriganaInflected(term, reading, source)) {
const reading3 = jp.convertReading(text2, reading2, readingMode);
textSegments.push({text: text2, reading: reading3});
textSegments.push({text: text2, reading: reading2});
}
results.push(textSegments);
i += originalTextLength;
@ -1087,9 +1090,8 @@ class Backend {
return results;
}
async _textParseMecab(text, options) {
async _textParseMecab(text) {
const jp = this._japaneseUtil;
const {parsing: {readingMode}} = options;
let parseTextResults;
try {
@ -1109,8 +1111,7 @@ class Backend {
jp.convertKatakanaToHiragana(reading),
source
)) {
const reading3 = jp.convertReading(text2, reading2, readingMode);
termParts.push({text: text2, reading: reading3});
termParts.push({text: text2, reading: reading2});
}
result.push(termParts);
}

View File

@ -32,8 +32,8 @@ class API {
return this._invoke('termsFind', {text, details, optionsContext});
}
textParse(text, optionsContext) {
return this._invoke('textParse', {text, optionsContext});
parseText(text, optionsContext, scanLength, useInternalParser, useMecabParser) {
return this._invoke('parseText', {text, optionsContext, scanLength, useInternalParser, useMecabParser});
}
kanjiFind(text, optionsContext) {

View File

@ -83,7 +83,8 @@ class Display extends EventDispatcher {
this._queryParserContainer = document.querySelector('#query-parser-container');
this._queryParser = new QueryParser({
getSearchContext: this._getSearchContext.bind(this),
documentUtil: this._documentUtil
documentUtil: this._documentUtil,
japaneseUtil
});
this._contentScrollElement = document.querySelector('#content-scroll');
this._contentScrollBodyElement = document.querySelector('#content-body');
@ -312,6 +313,9 @@ class Display extends EventDispatcher {
this._queryParser.setOptions({
selectedParser: options.parsing.selectedParser,
termSpacing: options.parsing.termSpacing,
readingMode: options.parsing.readingMode,
useInternalParser: options.parsing.enableScanningParser,
useMecabParser: options.parsing.enableMecabParser,
scanning: {
inputs: scanningOptions.inputs,
deepContentScan: scanningOptions.deepDomScan,

View File

@ -20,13 +20,18 @@
*/
class QueryParser extends EventDispatcher {
constructor({getSearchContext, documentUtil}) {
constructor({getSearchContext, documentUtil, japaneseUtil}) {
super();
this._getSearchContext = getSearchContext;
this._documentUtil = documentUtil;
this._japaneseUtil = japaneseUtil;
this._text = '';
this._setTextToken = null;
this._selectedParser = null;
this._readingMode = 'none';
this._scanLength = 1;
this._useInternalParser = true;
this._useMecabParser = false;
this._parseResults = [];
this._queryParser = document.querySelector('#query-parser-content');
this._queryParserModeContainer = document.querySelector('#query-parser-mode-container');
@ -52,7 +57,7 @@ class QueryParser extends EventDispatcher {
this._queryParserModeSelect.addEventListener('change', this._onParserChange.bind(this), false);
}
setOptions({selectedParser, termSpacing, scanning}) {
setOptions({selectedParser, termSpacing, readingMode, useInternalParser, useMecabParser, scanning}) {
let selectedParserChanged = false;
if (selectedParser === null || typeof selectedParser === 'string') {
selectedParserChanged = (this._selectedParser !== selectedParser);
@ -61,7 +66,20 @@ class QueryParser extends EventDispatcher {
if (typeof termSpacing === 'boolean') {
this._queryParser.dataset.termSpacing = `${termSpacing}`;
}
if (typeof readingMode === 'string') {
this._readingMode = readingMode;
}
if (typeof useInternalParser === 'boolean') {
this._useInternalParser = useInternalParser;
}
if (typeof useMecabParser === 'boolean') {
this._useMecabParser = useMecabParser;
}
if (scanning !== null && typeof scanning === 'object') {
const {scanLength} = scanning;
if (typeof scanLength === 'number') {
this._scanLength = scanLength;
}
this._textScanner.setOptions(scanning);
}
this._textScanner.setEnabled(true);
@ -76,7 +94,7 @@ class QueryParser extends EventDispatcher {
const token = {};
this._setTextToken = token;
this._parseResults = await yomichan.api.textParse(text, this._getOptionsContext());
this._parseResults = await yomichan.api.parseText(text, this._getOptionsContext(), this._scanLength, this._useInternalParser, this._useMecabParser);
if (this._setTextToken !== token) { return; }
this._refreshSelectedParser();
@ -189,16 +207,19 @@ class QueryParser extends EventDispatcher {
select.selectedIndex = selectedIndex;
}
_createParseResult(terms) {
_createParseResult(data) {
const jp = this._japaneseUtil;
const readingMode = this._readingMode;
const fragment = document.createDocumentFragment();
for (const term of terms) {
for (const term of data) {
const termNode = document.createElement('span');
termNode.className = 'query-parser-term';
for (const segment of term) {
if (segment.reading.trim().length === 0) {
termNode.appendChild(document.createTextNode(segment.text));
for (const {text, reading} of term) {
if (reading.length === 0) {
termNode.appendChild(document.createTextNode(text));
} else {
termNode.appendChild(this._createSegment(segment));
const reading2 = jp.convertReading(text, reading, readingMode);
termNode.appendChild(this._createSegment(text, reading2));
}
}
fragment.appendChild(termNode);
@ -206,7 +227,7 @@ class QueryParser extends EventDispatcher {
return fragment;
}
_createSegment(segment) {
_createSegment(text, reading) {
const segmentNode = document.createElement('ruby');
segmentNode.className = 'query-parser-segment';
@ -219,8 +240,8 @@ class QueryParser extends EventDispatcher {
segmentNode.appendChild(textNode);
segmentNode.appendChild(readingNode);
textNode.textContent = segment.text;
readingNode.textContent = segment.reading;
textNode.textContent = text;
readingNode.textContent = reading;
return segmentNode;
}

View File

@ -322,14 +322,13 @@ const JapaneseUtil = (() => {
case 'katakana':
return this.convertHiraganaToKatakana(reading);
case 'romaji':
if (reading) {
if (reading.length > 0) {
return this.convertToRomaji(reading);
} else if (this.isStringEntirelyKana(term)) {
return this.convertToRomaji(term);
} else {
if (this.isStringEntirelyKana(term)) {
return this.convertToRomaji(term);
}
return reading;
}
return reading;
case 'none':
return '';
default:

View File

@ -1224,6 +1224,7 @@
<option value="hiragana">ひらがな</option>
<option value="katakana">カタカナ</option>
<option value="romaji">Romaji</option>
<option value="dictionary-reading">Dictionary reading</option>
</select>
</div>
</div></div>