Improve text parser (#1469)
* Merge ungrouped characters * Update iteration * Fix incorrect code point handling * Simplify text * Specify language * Update how parsed status is represented
This commit is contained in:
parent
ec1a8380b5
commit
8f057c63fe
@ -1038,31 +1038,41 @@ class Backend {
|
|||||||
const {scanning: {length: scanningLength}, parsing: {readingMode}} = options;
|
const {scanning: {length: scanningLength}, parsing: {readingMode}} = options;
|
||||||
const findTermsOptions = this._getTranslatorFindTermsOptions({wildcard: null}, options);
|
const findTermsOptions = this._getTranslatorFindTermsOptions({wildcard: null}, options);
|
||||||
const results = [];
|
const results = [];
|
||||||
while (text.length > 0) {
|
let previousUngroupedSegment = null;
|
||||||
const term = [];
|
let i = 0;
|
||||||
|
const ii = text.length;
|
||||||
|
while (i < ii) {
|
||||||
const [definitions, sourceLength] = await this._translator.findTerms(
|
const [definitions, sourceLength] = await this._translator.findTerms(
|
||||||
'simple',
|
'simple',
|
||||||
text.substring(0, scanningLength),
|
text.substring(i, i + scanningLength),
|
||||||
findTermsOptions
|
findTermsOptions
|
||||||
);
|
);
|
||||||
|
const codePoint = text.codePointAt(i);
|
||||||
|
const character = String.fromCodePoint(codePoint);
|
||||||
if (
|
if (
|
||||||
definitions.length > 0 &&
|
definitions.length > 0 &&
|
||||||
sourceLength > 0 &&
|
sourceLength > 0 &&
|
||||||
(sourceLength !== 1 || this._japaneseUtil.isCodePointJapanese(text[0]))
|
(sourceLength !== character.length || this._japaneseUtil.isCodePointJapanese(codePoint))
|
||||||
) {
|
) {
|
||||||
|
previousUngroupedSegment = null;
|
||||||
const {expression, reading} = definitions[0];
|
const {expression, reading} = definitions[0];
|
||||||
const source = text.substring(0, sourceLength);
|
const source = text.substring(i, i + sourceLength);
|
||||||
|
const term = [];
|
||||||
for (const {text: text2, furigana} of jp.distributeFuriganaInflected(expression, reading, source)) {
|
for (const {text: text2, furigana} of jp.distributeFuriganaInflected(expression, reading, source)) {
|
||||||
const reading2 = jp.convertReading(text2, furigana, readingMode);
|
const reading2 = jp.convertReading(text2, furigana, readingMode);
|
||||||
term.push({text: text2, reading: reading2});
|
term.push({text: text2, reading: reading2});
|
||||||
}
|
}
|
||||||
text = text.substring(source.length);
|
|
||||||
} else {
|
|
||||||
const reading = jp.convertReading(text[0], '', readingMode);
|
|
||||||
term.push({text: text[0], reading});
|
|
||||||
text = text.substring(1);
|
|
||||||
}
|
|
||||||
results.push(term);
|
results.push(term);
|
||||||
|
i += sourceLength;
|
||||||
|
} else {
|
||||||
|
if (previousUngroupedSegment === null) {
|
||||||
|
previousUngroupedSegment = {text: character, reading: ''};
|
||||||
|
results.push([previousUngroupedSegment]);
|
||||||
|
} else {
|
||||||
|
previousUngroupedSegment.text += character;
|
||||||
|
}
|
||||||
|
i += character.length;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
@ -132,7 +132,8 @@ class QueryParser extends EventDispatcher {
|
|||||||
_setPreview(text) {
|
_setPreview(text) {
|
||||||
const terms = [[{text, reading: ''}]];
|
const terms = [[{text, reading: ''}]];
|
||||||
this._queryParser.textContent = '';
|
this._queryParser.textContent = '';
|
||||||
this._queryParser.appendChild(this._createParseResult(terms, true));
|
this._queryParser.dataset.parsed = 'false';
|
||||||
|
this._queryParser.appendChild(this._createParseResult(terms));
|
||||||
}
|
}
|
||||||
|
|
||||||
_renderParserSelect() {
|
_renderParserSelect() {
|
||||||
@ -146,6 +147,7 @@ class QueryParser extends EventDispatcher {
|
|||||||
_renderParseResult() {
|
_renderParseResult() {
|
||||||
const parseResult = this._getParseResult();
|
const parseResult = this._getParseResult();
|
||||||
this._queryParser.textContent = '';
|
this._queryParser.textContent = '';
|
||||||
|
this._queryParser.dataset.parsed = 'true';
|
||||||
if (!parseResult) { return; }
|
if (!parseResult) { return; }
|
||||||
this._queryParser.appendChild(this._createParseResult(parseResult.content, false));
|
this._queryParser.appendChild(this._createParseResult(parseResult.content, false));
|
||||||
}
|
}
|
||||||
@ -182,13 +184,11 @@ class QueryParser extends EventDispatcher {
|
|||||||
select.selectedIndex = selectedIndex;
|
select.selectedIndex = selectedIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
_createParseResult(terms, preview) {
|
_createParseResult(terms) {
|
||||||
const type = preview ? 'preview' : 'normal';
|
|
||||||
const fragment = document.createDocumentFragment();
|
const fragment = document.createDocumentFragment();
|
||||||
for (const term of terms) {
|
for (const term of terms) {
|
||||||
const termNode = document.createElement('span');
|
const termNode = document.createElement('span');
|
||||||
termNode.className = 'query-parser-term';
|
termNode.className = 'query-parser-term';
|
||||||
termNode.dataset.type = type;
|
|
||||||
for (const segment of term) {
|
for (const segment of term) {
|
||||||
if (segment.reading.trim().length === 0) {
|
if (segment.reading.trim().length === 0) {
|
||||||
this._addSegmentText(segment.text, termNode);
|
this._addSegmentText(segment.text, termNode);
|
||||||
@ -221,11 +221,6 @@ class QueryParser extends EventDispatcher {
|
|||||||
}
|
}
|
||||||
|
|
||||||
_addSegmentText(text, container) {
|
_addSegmentText(text, container) {
|
||||||
for (const character of text) {
|
container.textContent = text;
|
||||||
const node = document.createElement('span');
|
|
||||||
node.className = 'query-parser-char';
|
|
||||||
node.textContent = character;
|
|
||||||
container.appendChild(node);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -29,7 +29,7 @@
|
|||||||
<div class="search-option" id="query-parser-mode-container" hidden>
|
<div class="search-option" id="query-parser-mode-container" hidden>
|
||||||
<select id="query-parser-mode-select"></select>
|
<select id="query-parser-mode-select"></select>
|
||||||
</div>
|
</div>
|
||||||
<div id="query-parser-content"></div>
|
<div id="query-parser-content" lang="ja"></div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div id="definitions"></div>
|
<div id="definitions"></div>
|
||||||
|
@ -52,7 +52,7 @@
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="scan-disable" id="query-parser-container" hidden>
|
<div class="scan-disable" id="query-parser-container" hidden>
|
||||||
<div id="query-parser-content"></div>
|
<div id="query-parser-content" lang="ja"></div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div id="definitions"></div>
|
<div id="definitions"></div>
|
||||||
|
Loading…
Reference in New Issue
Block a user