Text parse updates (#1811)
* Expose more settings to QueryParser * Rename textParse to parseText * Refactor * Convert reading inside QueryParser rather than Backend * Remove use of readingMode from backend * Rename * Update parseText args * Simplify * Update results * Move use of optionsContext * Run mecab and internal parser in parallel * Added "dictionary-reading" reading mode * Update romaji conversion case
This commit is contained in:
parent
315dc425e4
commit
24ec22b2e1
@ -797,7 +797,7 @@
|
||||
},
|
||||
"readingMode": {
|
||||
"type": "string",
|
||||
"enum": ["hiragana", "katakana", "romaji", "none"],
|
||||
"enum": ["hiragana", "katakana", "romaji", "dictionary-reading", "none"],
|
||||
"default": "hiragana"
|
||||
}
|
||||
}
|
||||
|
@ -90,7 +90,7 @@ class Backend {
|
||||
['optionsGetFull', {async: false, contentScript: true, handler: this._onApiOptionsGetFull.bind(this)}],
|
||||
['kanjiFind', {async: true, contentScript: true, handler: this._onApiKanjiFind.bind(this)}],
|
||||
['termsFind', {async: true, contentScript: true, handler: this._onApiTermsFind.bind(this)}],
|
||||
['textParse', {async: true, contentScript: true, handler: this._onApiTextParse.bind(this)}],
|
||||
['parseText', {async: true, contentScript: true, handler: this._onApiParseText.bind(this)}],
|
||||
['getAnkiConnectVersion', {async: true, contentScript: true, handler: this._onApGetAnkiConnectVersion.bind(this)}],
|
||||
['isAnkiConnected', {async: true, contentScript: true, handler: this._onApiIsAnkiConnected.bind(this)}],
|
||||
['addAnkiNote', {async: true, contentScript: true, handler: this._onApiAddAnkiNote.bind(this)}],
|
||||
@ -417,26 +417,30 @@ class Backend {
|
||||
return {dictionaryEntries, originalTextLength};
|
||||
}
|
||||
|
||||
async _onApiTextParse({text, optionsContext}) {
|
||||
const options = this._getProfileOptions(optionsContext);
|
||||
async _onApiParseText({text, optionsContext, scanLength, useInternalParser, useMecabParser}) {
|
||||
const [internalResults, mecabResults] = await Promise.all([
|
||||
(useInternalParser ? this._textParseScanning(text, scanLength, optionsContext) : null),
|
||||
(useMecabParser ? this._textParseMecab(text) : null)
|
||||
]);
|
||||
|
||||
const results = [];
|
||||
|
||||
if (options.parsing.enableScanningParser) {
|
||||
if (internalResults !== null) {
|
||||
results.push({
|
||||
source: 'scanning-parser',
|
||||
id: 'scan',
|
||||
content: await this._textParseScanning(text, options)
|
||||
source: 'scanning-parser',
|
||||
dictionary: null,
|
||||
content: internalResults
|
||||
});
|
||||
}
|
||||
|
||||
if (options.parsing.enableMecabParser) {
|
||||
const mecabResults = await this._textParseMecab(text, options);
|
||||
for (const [mecabDictName, mecabDictResults] of mecabResults) {
|
||||
if (mecabResults !== null) {
|
||||
for (const [dictionary, content] of mecabResults) {
|
||||
results.push({
|
||||
id: `mecab-${dictionary}`,
|
||||
source: 'mecab',
|
||||
dictionary: mecabDictName,
|
||||
id: `mecab-${mecabDictName}`,
|
||||
content: mecabDictResults
|
||||
dictionary,
|
||||
content
|
||||
});
|
||||
}
|
||||
}
|
||||
@ -1042,10 +1046,10 @@ class Backend {
|
||||
return true;
|
||||
}
|
||||
|
||||
async _textParseScanning(text, options) {
|
||||
async _textParseScanning(text, scanLength, optionsContext) {
|
||||
const jp = this._japaneseUtil;
|
||||
const {scanning: {length: scanningLength}, parsing: {readingMode}} = options;
|
||||
const mode = 'simple';
|
||||
const options = this._getProfileOptions(optionsContext);
|
||||
const findTermsOptions = this._getTranslatorFindTermsOptions(mode, {wildcard: null}, options);
|
||||
const results = [];
|
||||
let previousUngroupedSegment = null;
|
||||
@ -1054,7 +1058,7 @@ class Backend {
|
||||
while (i < ii) {
|
||||
const {dictionaryEntries, originalTextLength} = await this._translator.findTerms(
|
||||
mode,
|
||||
text.substring(i, i + scanningLength),
|
||||
text.substring(i, i + scanLength),
|
||||
findTermsOptions
|
||||
);
|
||||
const codePoint = text.codePointAt(i);
|
||||
@ -1069,8 +1073,7 @@ class Backend {
|
||||
const source = text.substring(i, i + originalTextLength);
|
||||
const textSegments = [];
|
||||
for (const {text: text2, reading: reading2} of jp.distributeFuriganaInflected(term, reading, source)) {
|
||||
const reading3 = jp.convertReading(text2, reading2, readingMode);
|
||||
textSegments.push({text: text2, reading: reading3});
|
||||
textSegments.push({text: text2, reading: reading2});
|
||||
}
|
||||
results.push(textSegments);
|
||||
i += originalTextLength;
|
||||
@ -1087,9 +1090,8 @@ class Backend {
|
||||
return results;
|
||||
}
|
||||
|
||||
async _textParseMecab(text, options) {
|
||||
async _textParseMecab(text) {
|
||||
const jp = this._japaneseUtil;
|
||||
const {parsing: {readingMode}} = options;
|
||||
|
||||
let parseTextResults;
|
||||
try {
|
||||
@ -1109,8 +1111,7 @@ class Backend {
|
||||
jp.convertKatakanaToHiragana(reading),
|
||||
source
|
||||
)) {
|
||||
const reading3 = jp.convertReading(text2, reading2, readingMode);
|
||||
termParts.push({text: text2, reading: reading3});
|
||||
termParts.push({text: text2, reading: reading2});
|
||||
}
|
||||
result.push(termParts);
|
||||
}
|
||||
|
@ -32,8 +32,8 @@ class API {
|
||||
return this._invoke('termsFind', {text, details, optionsContext});
|
||||
}
|
||||
|
||||
textParse(text, optionsContext) {
|
||||
return this._invoke('textParse', {text, optionsContext});
|
||||
parseText(text, optionsContext, scanLength, useInternalParser, useMecabParser) {
|
||||
return this._invoke('parseText', {text, optionsContext, scanLength, useInternalParser, useMecabParser});
|
||||
}
|
||||
|
||||
kanjiFind(text, optionsContext) {
|
||||
|
@ -83,7 +83,8 @@ class Display extends EventDispatcher {
|
||||
this._queryParserContainer = document.querySelector('#query-parser-container');
|
||||
this._queryParser = new QueryParser({
|
||||
getSearchContext: this._getSearchContext.bind(this),
|
||||
documentUtil: this._documentUtil
|
||||
documentUtil: this._documentUtil,
|
||||
japaneseUtil
|
||||
});
|
||||
this._contentScrollElement = document.querySelector('#content-scroll');
|
||||
this._contentScrollBodyElement = document.querySelector('#content-body');
|
||||
@ -312,6 +313,9 @@ class Display extends EventDispatcher {
|
||||
this._queryParser.setOptions({
|
||||
selectedParser: options.parsing.selectedParser,
|
||||
termSpacing: options.parsing.termSpacing,
|
||||
readingMode: options.parsing.readingMode,
|
||||
useInternalParser: options.parsing.enableScanningParser,
|
||||
useMecabParser: options.parsing.enableMecabParser,
|
||||
scanning: {
|
||||
inputs: scanningOptions.inputs,
|
||||
deepContentScan: scanningOptions.deepDomScan,
|
||||
|
@ -20,13 +20,18 @@
|
||||
*/
|
||||
|
||||
class QueryParser extends EventDispatcher {
|
||||
constructor({getSearchContext, documentUtil}) {
|
||||
constructor({getSearchContext, documentUtil, japaneseUtil}) {
|
||||
super();
|
||||
this._getSearchContext = getSearchContext;
|
||||
this._documentUtil = documentUtil;
|
||||
this._japaneseUtil = japaneseUtil;
|
||||
this._text = '';
|
||||
this._setTextToken = null;
|
||||
this._selectedParser = null;
|
||||
this._readingMode = 'none';
|
||||
this._scanLength = 1;
|
||||
this._useInternalParser = true;
|
||||
this._useMecabParser = false;
|
||||
this._parseResults = [];
|
||||
this._queryParser = document.querySelector('#query-parser-content');
|
||||
this._queryParserModeContainer = document.querySelector('#query-parser-mode-container');
|
||||
@ -52,7 +57,7 @@ class QueryParser extends EventDispatcher {
|
||||
this._queryParserModeSelect.addEventListener('change', this._onParserChange.bind(this), false);
|
||||
}
|
||||
|
||||
setOptions({selectedParser, termSpacing, scanning}) {
|
||||
setOptions({selectedParser, termSpacing, readingMode, useInternalParser, useMecabParser, scanning}) {
|
||||
let selectedParserChanged = false;
|
||||
if (selectedParser === null || typeof selectedParser === 'string') {
|
||||
selectedParserChanged = (this._selectedParser !== selectedParser);
|
||||
@ -61,7 +66,20 @@ class QueryParser extends EventDispatcher {
|
||||
if (typeof termSpacing === 'boolean') {
|
||||
this._queryParser.dataset.termSpacing = `${termSpacing}`;
|
||||
}
|
||||
if (typeof readingMode === 'string') {
|
||||
this._readingMode = readingMode;
|
||||
}
|
||||
if (typeof useInternalParser === 'boolean') {
|
||||
this._useInternalParser = useInternalParser;
|
||||
}
|
||||
if (typeof useMecabParser === 'boolean') {
|
||||
this._useMecabParser = useMecabParser;
|
||||
}
|
||||
if (scanning !== null && typeof scanning === 'object') {
|
||||
const {scanLength} = scanning;
|
||||
if (typeof scanLength === 'number') {
|
||||
this._scanLength = scanLength;
|
||||
}
|
||||
this._textScanner.setOptions(scanning);
|
||||
}
|
||||
this._textScanner.setEnabled(true);
|
||||
@ -76,7 +94,7 @@ class QueryParser extends EventDispatcher {
|
||||
|
||||
const token = {};
|
||||
this._setTextToken = token;
|
||||
this._parseResults = await yomichan.api.textParse(text, this._getOptionsContext());
|
||||
this._parseResults = await yomichan.api.parseText(text, this._getOptionsContext(), this._scanLength, this._useInternalParser, this._useMecabParser);
|
||||
if (this._setTextToken !== token) { return; }
|
||||
|
||||
this._refreshSelectedParser();
|
||||
@ -189,16 +207,19 @@ class QueryParser extends EventDispatcher {
|
||||
select.selectedIndex = selectedIndex;
|
||||
}
|
||||
|
||||
_createParseResult(terms) {
|
||||
_createParseResult(data) {
|
||||
const jp = this._japaneseUtil;
|
||||
const readingMode = this._readingMode;
|
||||
const fragment = document.createDocumentFragment();
|
||||
for (const term of terms) {
|
||||
for (const term of data) {
|
||||
const termNode = document.createElement('span');
|
||||
termNode.className = 'query-parser-term';
|
||||
for (const segment of term) {
|
||||
if (segment.reading.trim().length === 0) {
|
||||
termNode.appendChild(document.createTextNode(segment.text));
|
||||
for (const {text, reading} of term) {
|
||||
if (reading.length === 0) {
|
||||
termNode.appendChild(document.createTextNode(text));
|
||||
} else {
|
||||
termNode.appendChild(this._createSegment(segment));
|
||||
const reading2 = jp.convertReading(text, reading, readingMode);
|
||||
termNode.appendChild(this._createSegment(text, reading2));
|
||||
}
|
||||
}
|
||||
fragment.appendChild(termNode);
|
||||
@ -206,7 +227,7 @@ class QueryParser extends EventDispatcher {
|
||||
return fragment;
|
||||
}
|
||||
|
||||
_createSegment(segment) {
|
||||
_createSegment(text, reading) {
|
||||
const segmentNode = document.createElement('ruby');
|
||||
segmentNode.className = 'query-parser-segment';
|
||||
|
||||
@ -219,8 +240,8 @@ class QueryParser extends EventDispatcher {
|
||||
segmentNode.appendChild(textNode);
|
||||
segmentNode.appendChild(readingNode);
|
||||
|
||||
textNode.textContent = segment.text;
|
||||
readingNode.textContent = segment.reading;
|
||||
textNode.textContent = text;
|
||||
readingNode.textContent = reading;
|
||||
|
||||
return segmentNode;
|
||||
}
|
||||
|
@ -322,14 +322,13 @@ const JapaneseUtil = (() => {
|
||||
case 'katakana':
|
||||
return this.convertHiraganaToKatakana(reading);
|
||||
case 'romaji':
|
||||
if (reading) {
|
||||
if (reading.length > 0) {
|
||||
return this.convertToRomaji(reading);
|
||||
} else if (this.isStringEntirelyKana(term)) {
|
||||
return this.convertToRomaji(term);
|
||||
} else {
|
||||
if (this.isStringEntirelyKana(term)) {
|
||||
return this.convertToRomaji(term);
|
||||
}
|
||||
return reading;
|
||||
}
|
||||
return reading;
|
||||
case 'none':
|
||||
return '';
|
||||
default:
|
||||
|
@ -1224,6 +1224,7 @@
|
||||
<option value="hiragana">ひらがな</option>
|
||||
<option value="katakana">カタカナ</option>
|
||||
<option value="romaji">Romaji</option>
|
||||
<option value="dictionary-reading">Dictionary reading</option>
|
||||
</select>
|
||||
</div>
|
||||
</div></div>
|
||||
|
Loading…
Reference in New Issue
Block a user