Fix translation regex replacements (#1423)

* Fix regex replacements having issues at the start of scanned text

* Fix test cases

* Add tests
This commit is contained in:
toasted-nutbread 2021-02-19 18:39:43 -05:00 committed by GitHub
parent 6a6d7ba84e
commit 1e927dd66e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 336 additions and 9 deletions

View File

@ -1361,8 +1361,8 @@ class Translator {
pattern.lastIndex += delta;
if (actualReplacementLength > 0) {
sourceMap.combine(Math.max(0, index - 1), matchText.length);
sourceMap.insert(index, ...(new Array(actualReplacementLength).fill(0)));
sourceMap.combine(index - 1 + actualReplacementLength, matchText.length);
} else {
sourceMap.combine(index, matchText.length);
}

View File

@ -3,5 +3,7 @@
["tag2", "category2", 0, "tag2 notes", 0],
["tag3", "category3", 0, "tag3 notes", 0],
["tag4", "category4", 0, "tag4 notes", 0],
["tag5", "category5", 0, "tag5 notes", 0]
["tag5", "category5", 0, "tag5 notes", 0],
["popular", "popular", 0, "popular term", 0],
["vt", "partOfSpeech", 0, "transitive verb", 0]
]

View File

@ -9,5 +9,7 @@
["打ち込む", "うちこむ", "tag1 tag2", "v5", 1, ["definition15", "definition16"], 4, "tag5 tag6 tag7"],
["打ち込む", "ぶちこむ", "tag1 tag2", "v5", 10, ["definition17", "definition18"], 4, "tag3 tag4 tag5"],
["打ち込む", "ぶちこむ", "tag1 tag2", "v5", 1, ["definition19", "definition20"], 4, "tag3 tag4 tag5"],
["画像", "がぞう", "tag1 tag2", "", 1, ["definition21", {"type": "image", "path": "image.gif", "width": 350, "height": 350, "description": "An image", "pixelated": true}], 5, "tag3 tag4 tag5"]
["画像", "がぞう", "tag1 tag2", "", 1, ["definition21", {"type": "image", "path": "image.gif", "width": 350, "height": 350, "description": "An image", "pixelated": true}], 5, "tag3 tag4 tag5"],
["読む", "よむ", "popular", "v5", 100, ["to read"], 6, "vt"],
["強み", "つよみ", "popular", "n", 90, ["strong point"], 7, ""]
]

View File

@ -18803,7 +18803,7 @@
"type": "term",
"id": 3,
"source": "打ち",
"rawSource": "(打)(ち)(込)",
"rawSource": "(打)(ち)",
"sourceTerm": "打つ",
"reasons": [
"masu stem"
@ -18967,7 +18967,7 @@
"type": "term",
"id": 5,
"source": "打ち",
"rawSource": "(打)(ち)(込)",
"rawSource": "(打)(ち)",
"sourceTerm": "打つ",
"reasons": [
"masu stem"
@ -19131,7 +19131,7 @@
"type": "term",
"id": 4,
"source": "打ち",
"rawSource": "(打)(ち)(込)",
"rawSource": "(打)(ち)",
"sourceTerm": "打つ",
"reasons": [
"masu stem"
@ -19295,7 +19295,7 @@
"type": "term",
"id": 6,
"source": "打ち",
"rawSource": "(打)(ち)(込)",
"rawSource": "(打)(ち)",
"sourceTerm": "打つ",
"reasons": [
"masu stem"
@ -19459,7 +19459,7 @@
"type": "term",
"id": 1,
"source": "打",
"rawSource": "(打)(ち)",
"rawSource": "(打)",
"sourceTerm": "打",
"reasons": [],
"score": 1,
@ -19613,7 +19613,7 @@
"type": "term",
"id": 2,
"source": "打",
"rawSource": "(打)(ち)",
"rawSource": "(打)",
"sourceTerm": "打",
"reasons": [],
"score": 1,
@ -19765,6 +19765,329 @@
}
]
}
},
{
"comment": "Test non-empty replacement",
"func": "findTerms",
"mode": "split",
"text": "test",
"options": [
"default",
{
"alphanumeric": true,
"textReplacements": [
null,
[
{
"pattern": "test",
"flags": "g",
"replacement": "よみ"
}
]
]
}
],
"expected": {
"length": 4,
"definitions": [
{
"type": "term",
"id": 12,
"source": "よみ",
"rawSource": "test",
"sourceTerm": "よむ",
"reasons": [
"masu stem"
],
"score": 100,
"sequence": 6,
"dictionary": "Test Dictionary 2",
"dictionaryPriority": 0,
"dictionaryNames": [
"Test Dictionary 2"
],
"expression": "読む",
"reading": "よむ",
"expressions": [
{
"sourceTerm": "よむ",
"expression": "読む",
"reading": "よむ",
"furiganaSegments": [
{
"text": "読",
"furigana": "よ"
},
{
"text": "む",
"furigana": ""
}
],
"termTags": [
{
"name": "vt",
"category": "partOfSpeech",
"notes": "transitive verb",
"order": 0,
"score": 0,
"dictionary": "Test Dictionary 2",
"redundant": false
}
],
"termFrequency": "normal",
"frequencies": [],
"pitches": []
}
],
"furiganaSegments": [
{
"text": "読",
"furigana": "よ"
},
{
"text": "む",
"furigana": ""
}
],
"glossary": [
"to read"
],
"definitionTags": [
{
"name": "popular",
"category": "popular",
"notes": "popular term",
"order": 0,
"score": 0,
"dictionary": "Test Dictionary 2",
"redundant": false
}
],
"termTags": [
{
"name": "vt",
"category": "partOfSpeech",
"notes": "transitive verb",
"order": 0,
"score": 0,
"dictionary": "Test Dictionary 2",
"redundant": false
}
],
"frequencies": [],
"pitches": [],
"sourceTermExactMatchCount": 0
}
]
}
},
{
"comment": "Test non-empty replacement at end",
"func": "findTerms",
"mode": "split",
"text": "つtest",
"options": [
"default",
{
"alphanumeric": true,
"textReplacements": [
null,
[
{
"pattern": "test",
"flags": "g",
"replacement": "よみ"
}
]
]
}
],
"expected": {
"length": 5,
"definitions": [
{
"type": "term",
"id": 13,
"source": "つよみ",
"rawSource": "つtest",
"sourceTerm": "つよみ",
"reasons": [],
"score": 90,
"sequence": 7,
"dictionary": "Test Dictionary 2",
"dictionaryPriority": 0,
"dictionaryNames": [
"Test Dictionary 2"
],
"expression": "強み",
"reading": "つよみ",
"expressions": [
{
"sourceTerm": "つよみ",
"expression": "強み",
"reading": "つよみ",
"furiganaSegments": [
{
"text": "強",
"furigana": "つよ"
},
{
"text": "み",
"furigana": ""
}
],
"termTags": [],
"termFrequency": "normal",
"frequencies": [],
"pitches": []
}
],
"furiganaSegments": [
{
"text": "強",
"furigana": "つよ"
},
{
"text": "み",
"furigana": ""
}
],
"glossary": [
"strong point"
],
"definitionTags": [
{
"name": "popular",
"category": "popular",
"notes": "popular term",
"order": 0,
"score": 0,
"dictionary": "Test Dictionary 2",
"redundant": false
}
],
"termTags": [],
"frequencies": [],
"pitches": [],
"sourceTermExactMatchCount": 0
}
]
}
},
{
"comment": "Test non-empty replacement at start",
"func": "findTerms",
"mode": "split",
"text": "testました",
"options": [
"default",
{
"alphanumeric": true,
"textReplacements": [
null,
[
{
"pattern": "test",
"flags": "g",
"replacement": "よみ"
}
]
]
}
],
"expected": {
"length": 7,
"definitions": [
{
"type": "term",
"id": 12,
"source": "よみました",
"rawSource": "testました",
"sourceTerm": "よむ",
"reasons": [
"polite past"
],
"score": 100,
"sequence": 6,
"dictionary": "Test Dictionary 2",
"dictionaryPriority": 0,
"dictionaryNames": [
"Test Dictionary 2"
],
"expression": "読む",
"reading": "よむ",
"expressions": [
{
"sourceTerm": "よむ",
"expression": "読む",
"reading": "よむ",
"furiganaSegments": [
{
"text": "読",
"furigana": "よ"
},
{
"text": "む",
"furigana": ""
}
],
"termTags": [
{
"name": "vt",
"category": "partOfSpeech",
"notes": "transitive verb",
"order": 0,
"score": 0,
"dictionary": "Test Dictionary 2",
"redundant": false
}
],
"termFrequency": "normal",
"frequencies": [],
"pitches": []
}
],
"furiganaSegments": [
{
"text": "読",
"furigana": "よ"
},
{
"text": "む",
"furigana": ""
}
],
"glossary": [
"to read"
],
"definitionTags": [
{
"name": "popular",
"category": "popular",
"notes": "popular term",
"order": 0,
"score": 0,
"dictionary": "Test Dictionary 2",
"redundant": false
}
],
"termTags": [
{
"name": "vt",
"category": "partOfSpeech",
"notes": "transitive verb",
"order": 0,
"score": 0,
"dictionary": "Test Dictionary 2",
"redundant": false
}
],
"frequencies": [],
"pitches": [],
"sourceTermExactMatchCount": 0
}
]
}
}
]
}