Improve text parser (#1469)

* Merge ungrouped characters

* Update iteration

* Fix incorrect code point handling

* Simplify text

* Specify language

* Update how parsed status is represented
This commit is contained in:
toasted-nutbread 2021-02-28 16:38:01 -05:00 committed by GitHub
parent ec1a8380b5
commit 8f057c63fe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 27 additions and 22 deletions

View File

@ -1038,31 +1038,41 @@ class Backend {
const {scanning: {length: scanningLength}, parsing: {readingMode}} = options; const {scanning: {length: scanningLength}, parsing: {readingMode}} = options;
const findTermsOptions = this._getTranslatorFindTermsOptions({wildcard: null}, options); const findTermsOptions = this._getTranslatorFindTermsOptions({wildcard: null}, options);
const results = []; const results = [];
while (text.length > 0) { let previousUngroupedSegment = null;
const term = []; let i = 0;
const ii = text.length;
while (i < ii) {
const [definitions, sourceLength] = await this._translator.findTerms( const [definitions, sourceLength] = await this._translator.findTerms(
'simple', 'simple',
text.substring(0, scanningLength), text.substring(i, i + scanningLength),
findTermsOptions findTermsOptions
); );
const codePoint = text.codePointAt(i);
const character = String.fromCodePoint(codePoint);
if ( if (
definitions.length > 0 && definitions.length > 0 &&
sourceLength > 0 && sourceLength > 0 &&
(sourceLength !== 1 || this._japaneseUtil.isCodePointJapanese(text[0])) (sourceLength !== character.length || this._japaneseUtil.isCodePointJapanese(codePoint))
) { ) {
previousUngroupedSegment = null;
const {expression, reading} = definitions[0]; const {expression, reading} = definitions[0];
const source = text.substring(0, sourceLength); const source = text.substring(i, i + sourceLength);
const term = [];
for (const {text: text2, furigana} of jp.distributeFuriganaInflected(expression, reading, source)) { for (const {text: text2, furigana} of jp.distributeFuriganaInflected(expression, reading, source)) {
const reading2 = jp.convertReading(text2, furigana, readingMode); const reading2 = jp.convertReading(text2, furigana, readingMode);
term.push({text: text2, reading: reading2}); term.push({text: text2, reading: reading2});
} }
text = text.substring(source.length); results.push(term);
i += sourceLength;
} else { } else {
const reading = jp.convertReading(text[0], '', readingMode); if (previousUngroupedSegment === null) {
term.push({text: text[0], reading}); previousUngroupedSegment = {text: character, reading: ''};
text = text.substring(1); results.push([previousUngroupedSegment]);
} else {
previousUngroupedSegment.text += character;
}
i += character.length;
} }
results.push(term);
} }
return results; return results;
} }

View File

@ -132,7 +132,8 @@ class QueryParser extends EventDispatcher {
_setPreview(text) { _setPreview(text) {
const terms = [[{text, reading: ''}]]; const terms = [[{text, reading: ''}]];
this._queryParser.textContent = ''; this._queryParser.textContent = '';
this._queryParser.appendChild(this._createParseResult(terms, true)); this._queryParser.dataset.parsed = 'false';
this._queryParser.appendChild(this._createParseResult(terms));
} }
_renderParserSelect() { _renderParserSelect() {
@ -146,6 +147,7 @@ class QueryParser extends EventDispatcher {
_renderParseResult() { _renderParseResult() {
const parseResult = this._getParseResult(); const parseResult = this._getParseResult();
this._queryParser.textContent = ''; this._queryParser.textContent = '';
this._queryParser.dataset.parsed = 'true';
if (!parseResult) { return; } if (!parseResult) { return; }
this._queryParser.appendChild(this._createParseResult(parseResult.content, false)); this._queryParser.appendChild(this._createParseResult(parseResult.content, false));
} }
@ -182,13 +184,11 @@ class QueryParser extends EventDispatcher {
select.selectedIndex = selectedIndex; select.selectedIndex = selectedIndex;
} }
_createParseResult(terms, preview) { _createParseResult(terms) {
const type = preview ? 'preview' : 'normal';
const fragment = document.createDocumentFragment(); const fragment = document.createDocumentFragment();
for (const term of terms) { for (const term of terms) {
const termNode = document.createElement('span'); const termNode = document.createElement('span');
termNode.className = 'query-parser-term'; termNode.className = 'query-parser-term';
termNode.dataset.type = type;
for (const segment of term) { for (const segment of term) {
if (segment.reading.trim().length === 0) { if (segment.reading.trim().length === 0) {
this._addSegmentText(segment.text, termNode); this._addSegmentText(segment.text, termNode);
@ -221,11 +221,6 @@ class QueryParser extends EventDispatcher {
} }
_addSegmentText(text, container) { _addSegmentText(text, container) {
for (const character of text) { container.textContent = text;
const node = document.createElement('span');
node.className = 'query-parser-char';
node.textContent = character;
container.appendChild(node);
}
} }
} }

View File

@ -29,7 +29,7 @@
<div class="search-option" id="query-parser-mode-container" hidden> <div class="search-option" id="query-parser-mode-container" hidden>
<select id="query-parser-mode-select"></select> <select id="query-parser-mode-select"></select>
</div> </div>
<div id="query-parser-content"></div> <div id="query-parser-content" lang="ja"></div>
</div> </div>
<div id="definitions"></div> <div id="definitions"></div>

View File

@ -52,7 +52,7 @@
</div> </div>
<div class="scan-disable" id="query-parser-container" hidden> <div class="scan-disable" id="query-parser-container" hidden>
<div id="query-parser-content"></div> <div id="query-parser-content" lang="ja"></div>
</div> </div>
<div id="definitions"></div> <div id="definitions"></div>