Text parse updates (#1811)
* Expose more settings to QueryParser * Rename textParse to parseText * Refactor * Convert reading inside QueryParser rather than Backend * Remove use of readingMode from backend * Rename * Update parseText args * Simplify * Update results * Move use of optionsContext * Run mecab and internal parser in parallel * Added "dictionary-reading" reading mode * Update romaji conversion case
This commit is contained in:
parent
315dc425e4
commit
24ec22b2e1
@ -797,7 +797,7 @@
|
|||||||
},
|
},
|
||||||
"readingMode": {
|
"readingMode": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"enum": ["hiragana", "katakana", "romaji", "none"],
|
"enum": ["hiragana", "katakana", "romaji", "dictionary-reading", "none"],
|
||||||
"default": "hiragana"
|
"default": "hiragana"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -90,7 +90,7 @@ class Backend {
|
|||||||
['optionsGetFull', {async: false, contentScript: true, handler: this._onApiOptionsGetFull.bind(this)}],
|
['optionsGetFull', {async: false, contentScript: true, handler: this._onApiOptionsGetFull.bind(this)}],
|
||||||
['kanjiFind', {async: true, contentScript: true, handler: this._onApiKanjiFind.bind(this)}],
|
['kanjiFind', {async: true, contentScript: true, handler: this._onApiKanjiFind.bind(this)}],
|
||||||
['termsFind', {async: true, contentScript: true, handler: this._onApiTermsFind.bind(this)}],
|
['termsFind', {async: true, contentScript: true, handler: this._onApiTermsFind.bind(this)}],
|
||||||
['textParse', {async: true, contentScript: true, handler: this._onApiTextParse.bind(this)}],
|
['parseText', {async: true, contentScript: true, handler: this._onApiParseText.bind(this)}],
|
||||||
['getAnkiConnectVersion', {async: true, contentScript: true, handler: this._onApGetAnkiConnectVersion.bind(this)}],
|
['getAnkiConnectVersion', {async: true, contentScript: true, handler: this._onApGetAnkiConnectVersion.bind(this)}],
|
||||||
['isAnkiConnected', {async: true, contentScript: true, handler: this._onApiIsAnkiConnected.bind(this)}],
|
['isAnkiConnected', {async: true, contentScript: true, handler: this._onApiIsAnkiConnected.bind(this)}],
|
||||||
['addAnkiNote', {async: true, contentScript: true, handler: this._onApiAddAnkiNote.bind(this)}],
|
['addAnkiNote', {async: true, contentScript: true, handler: this._onApiAddAnkiNote.bind(this)}],
|
||||||
@ -417,26 +417,30 @@ class Backend {
|
|||||||
return {dictionaryEntries, originalTextLength};
|
return {dictionaryEntries, originalTextLength};
|
||||||
}
|
}
|
||||||
|
|
||||||
async _onApiTextParse({text, optionsContext}) {
|
async _onApiParseText({text, optionsContext, scanLength, useInternalParser, useMecabParser}) {
|
||||||
const options = this._getProfileOptions(optionsContext);
|
const [internalResults, mecabResults] = await Promise.all([
|
||||||
|
(useInternalParser ? this._textParseScanning(text, scanLength, optionsContext) : null),
|
||||||
|
(useMecabParser ? this._textParseMecab(text) : null)
|
||||||
|
]);
|
||||||
|
|
||||||
const results = [];
|
const results = [];
|
||||||
|
|
||||||
if (options.parsing.enableScanningParser) {
|
if (internalResults !== null) {
|
||||||
results.push({
|
results.push({
|
||||||
source: 'scanning-parser',
|
|
||||||
id: 'scan',
|
id: 'scan',
|
||||||
content: await this._textParseScanning(text, options)
|
source: 'scanning-parser',
|
||||||
|
dictionary: null,
|
||||||
|
content: internalResults
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
if (options.parsing.enableMecabParser) {
|
if (mecabResults !== null) {
|
||||||
const mecabResults = await this._textParseMecab(text, options);
|
for (const [dictionary, content] of mecabResults) {
|
||||||
for (const [mecabDictName, mecabDictResults] of mecabResults) {
|
|
||||||
results.push({
|
results.push({
|
||||||
|
id: `mecab-${dictionary}`,
|
||||||
source: 'mecab',
|
source: 'mecab',
|
||||||
dictionary: mecabDictName,
|
dictionary,
|
||||||
id: `mecab-${mecabDictName}`,
|
content
|
||||||
content: mecabDictResults
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1042,10 +1046,10 @@ class Backend {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
async _textParseScanning(text, options) {
|
async _textParseScanning(text, scanLength, optionsContext) {
|
||||||
const jp = this._japaneseUtil;
|
const jp = this._japaneseUtil;
|
||||||
const {scanning: {length: scanningLength}, parsing: {readingMode}} = options;
|
|
||||||
const mode = 'simple';
|
const mode = 'simple';
|
||||||
|
const options = this._getProfileOptions(optionsContext);
|
||||||
const findTermsOptions = this._getTranslatorFindTermsOptions(mode, {wildcard: null}, options);
|
const findTermsOptions = this._getTranslatorFindTermsOptions(mode, {wildcard: null}, options);
|
||||||
const results = [];
|
const results = [];
|
||||||
let previousUngroupedSegment = null;
|
let previousUngroupedSegment = null;
|
||||||
@ -1054,7 +1058,7 @@ class Backend {
|
|||||||
while (i < ii) {
|
while (i < ii) {
|
||||||
const {dictionaryEntries, originalTextLength} = await this._translator.findTerms(
|
const {dictionaryEntries, originalTextLength} = await this._translator.findTerms(
|
||||||
mode,
|
mode,
|
||||||
text.substring(i, i + scanningLength),
|
text.substring(i, i + scanLength),
|
||||||
findTermsOptions
|
findTermsOptions
|
||||||
);
|
);
|
||||||
const codePoint = text.codePointAt(i);
|
const codePoint = text.codePointAt(i);
|
||||||
@ -1069,8 +1073,7 @@ class Backend {
|
|||||||
const source = text.substring(i, i + originalTextLength);
|
const source = text.substring(i, i + originalTextLength);
|
||||||
const textSegments = [];
|
const textSegments = [];
|
||||||
for (const {text: text2, reading: reading2} of jp.distributeFuriganaInflected(term, reading, source)) {
|
for (const {text: text2, reading: reading2} of jp.distributeFuriganaInflected(term, reading, source)) {
|
||||||
const reading3 = jp.convertReading(text2, reading2, readingMode);
|
textSegments.push({text: text2, reading: reading2});
|
||||||
textSegments.push({text: text2, reading: reading3});
|
|
||||||
}
|
}
|
||||||
results.push(textSegments);
|
results.push(textSegments);
|
||||||
i += originalTextLength;
|
i += originalTextLength;
|
||||||
@ -1087,9 +1090,8 @@ class Backend {
|
|||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
async _textParseMecab(text, options) {
|
async _textParseMecab(text) {
|
||||||
const jp = this._japaneseUtil;
|
const jp = this._japaneseUtil;
|
||||||
const {parsing: {readingMode}} = options;
|
|
||||||
|
|
||||||
let parseTextResults;
|
let parseTextResults;
|
||||||
try {
|
try {
|
||||||
@ -1109,8 +1111,7 @@ class Backend {
|
|||||||
jp.convertKatakanaToHiragana(reading),
|
jp.convertKatakanaToHiragana(reading),
|
||||||
source
|
source
|
||||||
)) {
|
)) {
|
||||||
const reading3 = jp.convertReading(text2, reading2, readingMode);
|
termParts.push({text: text2, reading: reading2});
|
||||||
termParts.push({text: text2, reading: reading3});
|
|
||||||
}
|
}
|
||||||
result.push(termParts);
|
result.push(termParts);
|
||||||
}
|
}
|
||||||
|
@ -32,8 +32,8 @@ class API {
|
|||||||
return this._invoke('termsFind', {text, details, optionsContext});
|
return this._invoke('termsFind', {text, details, optionsContext});
|
||||||
}
|
}
|
||||||
|
|
||||||
textParse(text, optionsContext) {
|
parseText(text, optionsContext, scanLength, useInternalParser, useMecabParser) {
|
||||||
return this._invoke('textParse', {text, optionsContext});
|
return this._invoke('parseText', {text, optionsContext, scanLength, useInternalParser, useMecabParser});
|
||||||
}
|
}
|
||||||
|
|
||||||
kanjiFind(text, optionsContext) {
|
kanjiFind(text, optionsContext) {
|
||||||
|
@ -83,7 +83,8 @@ class Display extends EventDispatcher {
|
|||||||
this._queryParserContainer = document.querySelector('#query-parser-container');
|
this._queryParserContainer = document.querySelector('#query-parser-container');
|
||||||
this._queryParser = new QueryParser({
|
this._queryParser = new QueryParser({
|
||||||
getSearchContext: this._getSearchContext.bind(this),
|
getSearchContext: this._getSearchContext.bind(this),
|
||||||
documentUtil: this._documentUtil
|
documentUtil: this._documentUtil,
|
||||||
|
japaneseUtil
|
||||||
});
|
});
|
||||||
this._contentScrollElement = document.querySelector('#content-scroll');
|
this._contentScrollElement = document.querySelector('#content-scroll');
|
||||||
this._contentScrollBodyElement = document.querySelector('#content-body');
|
this._contentScrollBodyElement = document.querySelector('#content-body');
|
||||||
@ -312,6 +313,9 @@ class Display extends EventDispatcher {
|
|||||||
this._queryParser.setOptions({
|
this._queryParser.setOptions({
|
||||||
selectedParser: options.parsing.selectedParser,
|
selectedParser: options.parsing.selectedParser,
|
||||||
termSpacing: options.parsing.termSpacing,
|
termSpacing: options.parsing.termSpacing,
|
||||||
|
readingMode: options.parsing.readingMode,
|
||||||
|
useInternalParser: options.parsing.enableScanningParser,
|
||||||
|
useMecabParser: options.parsing.enableMecabParser,
|
||||||
scanning: {
|
scanning: {
|
||||||
inputs: scanningOptions.inputs,
|
inputs: scanningOptions.inputs,
|
||||||
deepContentScan: scanningOptions.deepDomScan,
|
deepContentScan: scanningOptions.deepDomScan,
|
||||||
|
@ -20,13 +20,18 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
class QueryParser extends EventDispatcher {
|
class QueryParser extends EventDispatcher {
|
||||||
constructor({getSearchContext, documentUtil}) {
|
constructor({getSearchContext, documentUtil, japaneseUtil}) {
|
||||||
super();
|
super();
|
||||||
this._getSearchContext = getSearchContext;
|
this._getSearchContext = getSearchContext;
|
||||||
this._documentUtil = documentUtil;
|
this._documentUtil = documentUtil;
|
||||||
|
this._japaneseUtil = japaneseUtil;
|
||||||
this._text = '';
|
this._text = '';
|
||||||
this._setTextToken = null;
|
this._setTextToken = null;
|
||||||
this._selectedParser = null;
|
this._selectedParser = null;
|
||||||
|
this._readingMode = 'none';
|
||||||
|
this._scanLength = 1;
|
||||||
|
this._useInternalParser = true;
|
||||||
|
this._useMecabParser = false;
|
||||||
this._parseResults = [];
|
this._parseResults = [];
|
||||||
this._queryParser = document.querySelector('#query-parser-content');
|
this._queryParser = document.querySelector('#query-parser-content');
|
||||||
this._queryParserModeContainer = document.querySelector('#query-parser-mode-container');
|
this._queryParserModeContainer = document.querySelector('#query-parser-mode-container');
|
||||||
@ -52,7 +57,7 @@ class QueryParser extends EventDispatcher {
|
|||||||
this._queryParserModeSelect.addEventListener('change', this._onParserChange.bind(this), false);
|
this._queryParserModeSelect.addEventListener('change', this._onParserChange.bind(this), false);
|
||||||
}
|
}
|
||||||
|
|
||||||
setOptions({selectedParser, termSpacing, scanning}) {
|
setOptions({selectedParser, termSpacing, readingMode, useInternalParser, useMecabParser, scanning}) {
|
||||||
let selectedParserChanged = false;
|
let selectedParserChanged = false;
|
||||||
if (selectedParser === null || typeof selectedParser === 'string') {
|
if (selectedParser === null || typeof selectedParser === 'string') {
|
||||||
selectedParserChanged = (this._selectedParser !== selectedParser);
|
selectedParserChanged = (this._selectedParser !== selectedParser);
|
||||||
@ -61,7 +66,20 @@ class QueryParser extends EventDispatcher {
|
|||||||
if (typeof termSpacing === 'boolean') {
|
if (typeof termSpacing === 'boolean') {
|
||||||
this._queryParser.dataset.termSpacing = `${termSpacing}`;
|
this._queryParser.dataset.termSpacing = `${termSpacing}`;
|
||||||
}
|
}
|
||||||
|
if (typeof readingMode === 'string') {
|
||||||
|
this._readingMode = readingMode;
|
||||||
|
}
|
||||||
|
if (typeof useInternalParser === 'boolean') {
|
||||||
|
this._useInternalParser = useInternalParser;
|
||||||
|
}
|
||||||
|
if (typeof useMecabParser === 'boolean') {
|
||||||
|
this._useMecabParser = useMecabParser;
|
||||||
|
}
|
||||||
if (scanning !== null && typeof scanning === 'object') {
|
if (scanning !== null && typeof scanning === 'object') {
|
||||||
|
const {scanLength} = scanning;
|
||||||
|
if (typeof scanLength === 'number') {
|
||||||
|
this._scanLength = scanLength;
|
||||||
|
}
|
||||||
this._textScanner.setOptions(scanning);
|
this._textScanner.setOptions(scanning);
|
||||||
}
|
}
|
||||||
this._textScanner.setEnabled(true);
|
this._textScanner.setEnabled(true);
|
||||||
@ -76,7 +94,7 @@ class QueryParser extends EventDispatcher {
|
|||||||
|
|
||||||
const token = {};
|
const token = {};
|
||||||
this._setTextToken = token;
|
this._setTextToken = token;
|
||||||
this._parseResults = await yomichan.api.textParse(text, this._getOptionsContext());
|
this._parseResults = await yomichan.api.parseText(text, this._getOptionsContext(), this._scanLength, this._useInternalParser, this._useMecabParser);
|
||||||
if (this._setTextToken !== token) { return; }
|
if (this._setTextToken !== token) { return; }
|
||||||
|
|
||||||
this._refreshSelectedParser();
|
this._refreshSelectedParser();
|
||||||
@ -189,16 +207,19 @@ class QueryParser extends EventDispatcher {
|
|||||||
select.selectedIndex = selectedIndex;
|
select.selectedIndex = selectedIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
_createParseResult(terms) {
|
_createParseResult(data) {
|
||||||
|
const jp = this._japaneseUtil;
|
||||||
|
const readingMode = this._readingMode;
|
||||||
const fragment = document.createDocumentFragment();
|
const fragment = document.createDocumentFragment();
|
||||||
for (const term of terms) {
|
for (const term of data) {
|
||||||
const termNode = document.createElement('span');
|
const termNode = document.createElement('span');
|
||||||
termNode.className = 'query-parser-term';
|
termNode.className = 'query-parser-term';
|
||||||
for (const segment of term) {
|
for (const {text, reading} of term) {
|
||||||
if (segment.reading.trim().length === 0) {
|
if (reading.length === 0) {
|
||||||
termNode.appendChild(document.createTextNode(segment.text));
|
termNode.appendChild(document.createTextNode(text));
|
||||||
} else {
|
} else {
|
||||||
termNode.appendChild(this._createSegment(segment));
|
const reading2 = jp.convertReading(text, reading, readingMode);
|
||||||
|
termNode.appendChild(this._createSegment(text, reading2));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fragment.appendChild(termNode);
|
fragment.appendChild(termNode);
|
||||||
@ -206,7 +227,7 @@ class QueryParser extends EventDispatcher {
|
|||||||
return fragment;
|
return fragment;
|
||||||
}
|
}
|
||||||
|
|
||||||
_createSegment(segment) {
|
_createSegment(text, reading) {
|
||||||
const segmentNode = document.createElement('ruby');
|
const segmentNode = document.createElement('ruby');
|
||||||
segmentNode.className = 'query-parser-segment';
|
segmentNode.className = 'query-parser-segment';
|
||||||
|
|
||||||
@ -219,8 +240,8 @@ class QueryParser extends EventDispatcher {
|
|||||||
segmentNode.appendChild(textNode);
|
segmentNode.appendChild(textNode);
|
||||||
segmentNode.appendChild(readingNode);
|
segmentNode.appendChild(readingNode);
|
||||||
|
|
||||||
textNode.textContent = segment.text;
|
textNode.textContent = text;
|
||||||
readingNode.textContent = segment.reading;
|
readingNode.textContent = reading;
|
||||||
|
|
||||||
return segmentNode;
|
return segmentNode;
|
||||||
}
|
}
|
||||||
|
@ -322,14 +322,13 @@ const JapaneseUtil = (() => {
|
|||||||
case 'katakana':
|
case 'katakana':
|
||||||
return this.convertHiraganaToKatakana(reading);
|
return this.convertHiraganaToKatakana(reading);
|
||||||
case 'romaji':
|
case 'romaji':
|
||||||
if (reading) {
|
if (reading.length > 0) {
|
||||||
return this.convertToRomaji(reading);
|
return this.convertToRomaji(reading);
|
||||||
|
} else if (this.isStringEntirelyKana(term)) {
|
||||||
|
return this.convertToRomaji(term);
|
||||||
} else {
|
} else {
|
||||||
if (this.isStringEntirelyKana(term)) {
|
return reading;
|
||||||
return this.convertToRomaji(term);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return reading;
|
|
||||||
case 'none':
|
case 'none':
|
||||||
return '';
|
return '';
|
||||||
default:
|
default:
|
||||||
|
@ -1224,6 +1224,7 @@
|
|||||||
<option value="hiragana">ひらがな</option>
|
<option value="hiragana">ひらがな</option>
|
||||||
<option value="katakana">カタカナ</option>
|
<option value="katakana">カタカナ</option>
|
||||||
<option value="romaji">Romaji</option>
|
<option value="romaji">Romaji</option>
|
||||||
|
<option value="dictionary-reading">Dictionary reading</option>
|
||||||
</select>
|
</select>
|
||||||
</div>
|
</div>
|
||||||
</div></div>
|
</div></div>
|
||||||
|
Loading…
Reference in New Issue
Block a user