Improve text parser (#1469)

* Merge ungrouped characters

* Update iteration

* Fix incorrect code point handling

* Simplify text

* Specify language

* Update how parsed status is represented
This commit is contained in:
toasted-nutbread 2021-02-28 16:38:01 -05:00 committed by GitHub
parent ec1a8380b5
commit 8f057c63fe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 27 additions and 22 deletions

View File

@ -1038,31 +1038,41 @@ class Backend {
const {scanning: {length: scanningLength}, parsing: {readingMode}} = options;
const findTermsOptions = this._getTranslatorFindTermsOptions({wildcard: null}, options);
const results = [];
while (text.length > 0) {
const term = [];
let previousUngroupedSegment = null;
let i = 0;
const ii = text.length;
while (i < ii) {
const [definitions, sourceLength] = await this._translator.findTerms(
'simple',
text.substring(0, scanningLength),
text.substring(i, i + scanningLength),
findTermsOptions
);
const codePoint = text.codePointAt(i);
const character = String.fromCodePoint(codePoint);
if (
definitions.length > 0 &&
sourceLength > 0 &&
(sourceLength !== 1 || this._japaneseUtil.isCodePointJapanese(text[0]))
(sourceLength !== character.length || this._japaneseUtil.isCodePointJapanese(codePoint))
) {
previousUngroupedSegment = null;
const {expression, reading} = definitions[0];
const source = text.substring(0, sourceLength);
const source = text.substring(i, i + sourceLength);
const term = [];
for (const {text: text2, furigana} of jp.distributeFuriganaInflected(expression, reading, source)) {
const reading2 = jp.convertReading(text2, furigana, readingMode);
term.push({text: text2, reading: reading2});
}
text = text.substring(source.length);
results.push(term);
i += sourceLength;
} else {
const reading = jp.convertReading(text[0], '', readingMode);
term.push({text: text[0], reading});
text = text.substring(1);
if (previousUngroupedSegment === null) {
previousUngroupedSegment = {text: character, reading: ''};
results.push([previousUngroupedSegment]);
} else {
previousUngroupedSegment.text += character;
}
i += character.length;
}
results.push(term);
}
return results;
}

View File

@ -132,7 +132,8 @@ class QueryParser extends EventDispatcher {
_setPreview(text) {
const terms = [[{text, reading: ''}]];
this._queryParser.textContent = '';
this._queryParser.appendChild(this._createParseResult(terms, true));
this._queryParser.dataset.parsed = 'false';
this._queryParser.appendChild(this._createParseResult(terms));
}
_renderParserSelect() {
@ -146,6 +147,7 @@ class QueryParser extends EventDispatcher {
_renderParseResult() {
const parseResult = this._getParseResult();
this._queryParser.textContent = '';
this._queryParser.dataset.parsed = 'true';
if (!parseResult) { return; }
this._queryParser.appendChild(this._createParseResult(parseResult.content, false));
}
@ -182,13 +184,11 @@ class QueryParser extends EventDispatcher {
select.selectedIndex = selectedIndex;
}
_createParseResult(terms, preview) {
const type = preview ? 'preview' : 'normal';
_createParseResult(terms) {
const fragment = document.createDocumentFragment();
for (const term of terms) {
const termNode = document.createElement('span');
termNode.className = 'query-parser-term';
termNode.dataset.type = type;
for (const segment of term) {
if (segment.reading.trim().length === 0) {
this._addSegmentText(segment.text, termNode);
@ -221,11 +221,6 @@ class QueryParser extends EventDispatcher {
}
_addSegmentText(text, container) {
for (const character of text) {
const node = document.createElement('span');
node.className = 'query-parser-char';
node.textContent = character;
container.appendChild(node);
}
container.textContent = text;
}
}

View File

@ -29,7 +29,7 @@
<div class="search-option" id="query-parser-mode-container" hidden>
<select id="query-parser-mode-select"></select>
</div>
<div id="query-parser-content"></div>
<div id="query-parser-content" lang="ja"></div>
</div>
<div id="definitions"></div>

View File

@ -52,7 +52,7 @@
</div>
<div class="scan-disable" id="query-parser-container" hidden>
<div id="query-parser-content"></div>
<div id="query-parser-content" lang="ja"></div>
</div>
<div id="definitions"></div>