Translator alphanumeric improvement (#1635)

* Simplify alphanumeric check

* Update option name for clarity
This commit is contained in:
toasted-nutbread 2021-04-28 21:17:05 -04:00 committed by GitHub
parent 763c6c76aa
commit 512391346b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 17 additions and 14 deletions

View File

@ -1896,7 +1896,7 @@ class Backend {
return { return {
wildcard, wildcard,
mainDictionary, mainDictionary,
alphanumeric, removeNonJapaneseCharacters: !alphanumeric,
convertHalfWidthCharacters, convertHalfWidthCharacters,
convertNumericCharacters, convertNumericCharacters,
convertAlphabeticCharacters, convertAlphabeticCharacters,

View File

@ -64,7 +64,7 @@ class Translator {
* { * {
* wildcard: (enum: null, 'prefix', 'suffix'), * wildcard: (enum: null, 'prefix', 'suffix'),
* mainDictionary: (string), * mainDictionary: (string),
* alphanumeric: (boolean), * removeNonJapaneseCharacters: (boolean),
* convertHalfWidthCharacters: (enum: 'false', 'true', 'variant'), * convertHalfWidthCharacters: (enum: 'false', 'true', 'variant'),
* convertNumericCharacters: (enum: 'false', 'true', 'variant'), * convertNumericCharacters: (enum: 'false', 'true', 'variant'),
* convertAlphabeticCharacters: (enum: 'false', 'true', 'variant'), * convertAlphabeticCharacters: (enum: 'false', 'true', 'variant'),
@ -169,8 +169,10 @@ class Translator {
// Find terms internal implementation // Find terms internal implementation
async _findTermsInternal(text, enabledDictionaryMap, options) { async _findTermsInternal(text, enabledDictionaryMap, options) {
const {alphanumeric, wildcard} = options; const {wildcard} = options;
text = this._getSearchableText(text, alphanumeric); if (options.removeNonJapaneseCharacters) {
text = this._getJapaneseOnlyText(text);
}
if (text.length === 0) { if (text.length === 0) {
return {dictionaryEntries: [], originalTextLength: 0}; return {dictionaryEntries: [], originalTextLength: 0};
} }
@ -302,15 +304,16 @@ class Translator {
return text; return text;
} }
_getSearchableText(text, allowAlphanumericCharacters) { _getJapaneseOnlyText(text) {
if (allowAlphanumericCharacters) { return text; }
const jp = this._japaneseUtil; const jp = this._japaneseUtil;
let length = 0; let length = 0;
for (const c of text) { for (const c of text) {
if (!jp.isCodePointJapanese(c.codePointAt(0))) { break; } if (!jp.isCodePointJapanese(c.codePointAt(0))) {
return text.substring(0, length);
}
length += c.length; length += c.length;
} }
return length >= text.length ? text : text.substring(0, length); return text;
} }
_getTextOptionEntryVariants(value) { _getTextOptionEntryVariants(value) {

View File

@ -14,7 +14,7 @@
"default": { "default": {
"wildcard": null, "wildcard": null,
"mainDictionary": "${title}", "mainDictionary": "${title}",
"alphanumeric": false, "removeNonJapaneseCharacters": true,
"convertHalfWidthCharacters": false, "convertHalfWidthCharacters": false,
"convertNumericCharacters": false, "convertNumericCharacters": false,
"convertAlphabeticCharacters": false, "convertAlphabeticCharacters": false,
@ -182,7 +182,7 @@
"options": [ "options": [
"default", "default",
{ {
"alphanumeric": true, "removeNonJapaneseCharacters": false,
"textReplacements": [ "textReplacements": [
null, null,
[ [
@ -204,7 +204,7 @@
"options": [ "options": [
"default", "default",
{ {
"alphanumeric": true, "removeNonJapaneseCharacters": false,
"textReplacements": [ "textReplacements": [
null, null,
[ [
@ -226,7 +226,7 @@
"options": [ "options": [
"default", "default",
{ {
"alphanumeric": true, "removeNonJapaneseCharacters": false,
"textReplacements": [ "textReplacements": [
null, null,
[ [
@ -248,7 +248,7 @@
"options": [ "options": [
"default", "default",
{ {
"alphanumeric": true, "removeNonJapaneseCharacters": false,
"textReplacements": [ "textReplacements": [
null, null,
[ [
@ -270,7 +270,7 @@
"options": [ "options": [
"default", "default",
{ {
"alphanumeric": true, "removeNonJapaneseCharacters": false,
"textReplacements": [ "textReplacements": [
null, null,
[ [