Improve text parser (#1469)
* Merge ungrouped characters * Update iteration * Fix incorrect code point handling * Simplify text * Specify language * Update how parsed status is represented
This commit is contained in:
parent
ec1a8380b5
commit
8f057c63fe
@ -1038,31 +1038,41 @@ class Backend {
|
||||
const {scanning: {length: scanningLength}, parsing: {readingMode}} = options;
|
||||
const findTermsOptions = this._getTranslatorFindTermsOptions({wildcard: null}, options);
|
||||
const results = [];
|
||||
while (text.length > 0) {
|
||||
const term = [];
|
||||
let previousUngroupedSegment = null;
|
||||
let i = 0;
|
||||
const ii = text.length;
|
||||
while (i < ii) {
|
||||
const [definitions, sourceLength] = await this._translator.findTerms(
|
||||
'simple',
|
||||
text.substring(0, scanningLength),
|
||||
text.substring(i, i + scanningLength),
|
||||
findTermsOptions
|
||||
);
|
||||
const codePoint = text.codePointAt(i);
|
||||
const character = String.fromCodePoint(codePoint);
|
||||
if (
|
||||
definitions.length > 0 &&
|
||||
sourceLength > 0 &&
|
||||
(sourceLength !== 1 || this._japaneseUtil.isCodePointJapanese(text[0]))
|
||||
(sourceLength !== character.length || this._japaneseUtil.isCodePointJapanese(codePoint))
|
||||
) {
|
||||
previousUngroupedSegment = null;
|
||||
const {expression, reading} = definitions[0];
|
||||
const source = text.substring(0, sourceLength);
|
||||
const source = text.substring(i, i + sourceLength);
|
||||
const term = [];
|
||||
for (const {text: text2, furigana} of jp.distributeFuriganaInflected(expression, reading, source)) {
|
||||
const reading2 = jp.convertReading(text2, furigana, readingMode);
|
||||
term.push({text: text2, reading: reading2});
|
||||
}
|
||||
text = text.substring(source.length);
|
||||
results.push(term);
|
||||
i += sourceLength;
|
||||
} else {
|
||||
const reading = jp.convertReading(text[0], '', readingMode);
|
||||
term.push({text: text[0], reading});
|
||||
text = text.substring(1);
|
||||
if (previousUngroupedSegment === null) {
|
||||
previousUngroupedSegment = {text: character, reading: ''};
|
||||
results.push([previousUngroupedSegment]);
|
||||
} else {
|
||||
previousUngroupedSegment.text += character;
|
||||
}
|
||||
i += character.length;
|
||||
}
|
||||
results.push(term);
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
@ -132,7 +132,8 @@ class QueryParser extends EventDispatcher {
|
||||
_setPreview(text) {
|
||||
const terms = [[{text, reading: ''}]];
|
||||
this._queryParser.textContent = '';
|
||||
this._queryParser.appendChild(this._createParseResult(terms, true));
|
||||
this._queryParser.dataset.parsed = 'false';
|
||||
this._queryParser.appendChild(this._createParseResult(terms));
|
||||
}
|
||||
|
||||
_renderParserSelect() {
|
||||
@ -146,6 +147,7 @@ class QueryParser extends EventDispatcher {
|
||||
_renderParseResult() {
|
||||
const parseResult = this._getParseResult();
|
||||
this._queryParser.textContent = '';
|
||||
this._queryParser.dataset.parsed = 'true';
|
||||
if (!parseResult) { return; }
|
||||
this._queryParser.appendChild(this._createParseResult(parseResult.content, false));
|
||||
}
|
||||
@ -182,13 +184,11 @@ class QueryParser extends EventDispatcher {
|
||||
select.selectedIndex = selectedIndex;
|
||||
}
|
||||
|
||||
_createParseResult(terms, preview) {
|
||||
const type = preview ? 'preview' : 'normal';
|
||||
_createParseResult(terms) {
|
||||
const fragment = document.createDocumentFragment();
|
||||
for (const term of terms) {
|
||||
const termNode = document.createElement('span');
|
||||
termNode.className = 'query-parser-term';
|
||||
termNode.dataset.type = type;
|
||||
for (const segment of term) {
|
||||
if (segment.reading.trim().length === 0) {
|
||||
this._addSegmentText(segment.text, termNode);
|
||||
@ -221,11 +221,6 @@ class QueryParser extends EventDispatcher {
|
||||
}
|
||||
|
||||
_addSegmentText(text, container) {
|
||||
for (const character of text) {
|
||||
const node = document.createElement('span');
|
||||
node.className = 'query-parser-char';
|
||||
node.textContent = character;
|
||||
container.appendChild(node);
|
||||
}
|
||||
container.textContent = text;
|
||||
}
|
||||
}
|
||||
|
@ -29,7 +29,7 @@
|
||||
<div class="search-option" id="query-parser-mode-container" hidden>
|
||||
<select id="query-parser-mode-select"></select>
|
||||
</div>
|
||||
<div id="query-parser-content"></div>
|
||||
<div id="query-parser-content" lang="ja"></div>
|
||||
</div>
|
||||
|
||||
<div id="definitions"></div>
|
||||
|
@ -52,7 +52,7 @@
|
||||
</div>
|
||||
|
||||
<div class="scan-disable" id="query-parser-container" hidden>
|
||||
<div id="query-parser-content"></div>
|
||||
<div id="query-parser-content" lang="ja"></div>
|
||||
</div>
|
||||
|
||||
<div id="definitions"></div>
|
||||
|
Loading…
x
Reference in New Issue
Block a user