From 6224b4c21f159649099ebaee95050d6c426660c7 Mon Sep 17 00:00:00 2001 From: Randy Palamar Date: Thu, 17 Jun 2021 07:32:06 -0600 Subject: [PATCH] finish mapping most of daijirin MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now you can search for totally useful every day words like 瘟㾮日 and 多羅吒干𤚥 :^). The characters that remain either don't exist in unicode or are very difficult to find. Also a couple terms seem unsearchable in qolibri so I couldn't check what the characters are supposed to be. Any questionable choice was marked with FIXME. This will make it easy in the future to replace some characters with their images if its something that we want to support in the future. * The FIXMEs with the missing font symbol should all be the correct character (not commonly covered by fonts) * The くの字点 choices are to try and imitate the daijirin experience(TM). Probably the worst use of image fonts I've seen. Those characters should never appear in horizontal text. They should have just been replaced with the text that was supposed to be repeated. * The 漢文訓読 characters in '{}' are technically the unicode specified characters for those glyphs however they just look like their full size variants. I surrounded them with '{}' so the examples that use them are still readable. * The other FIXMEs should be self explanatory. Search the term in qolibri and look at what they used to see why they are questionable. --- daijirin.go | 242 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 236 insertions(+), 6 deletions(-) diff --git a/daijirin.go b/daijirin.go index 4811938..5983918 100644 --- a/daijirin.go +++ b/daijirin.go @@ -134,7 +134,7 @@ func (e *daijirinExtractor) exportRules(term *dbTerm, tags []string) { } func (*daijirinExtractor) getRevision() string { - return "daijirin1" + return "daijirin2" } func (*daijirinExtractor) getFontNarrow() map[int]string { @@ -184,10 +184,11 @@ func (*daijirinExtractor) getFontNarrow() map[int]string { 49483: "ỳ", 49484: "ɑ", 49485: "ə", + 49486: "ə", 49487: "ɛ", 49488: "θ", 49489: "ʌ", - 49490: "ɑ́", + 49490: "ɒ", 49491: "ə́", 49492: "ɔ́", 49493: "ɛ́", @@ -201,7 +202,7 @@ func (*daijirinExtractor) getFontNarrow() map[int]string { 49501: "ǽ", 49502: "æ̀", 49503: "Æ", - 49504: "ɑ̃", + 49504: "ɑ", 49505: "å", 49506: "˘", 49507: "ă", @@ -209,32 +210,89 @@ func (*daijirinExtractor) getFontNarrow() map[int]string { 49509: "ĭ", 49510: "V́", 49511: "T́", + 49512: "ɠ", 49513: "ɔ̃", + 49515: "ɚ", + 49516: "«", + 49517: "»", + 49519: "ŋ", + 49520: "m̥", + 49521: "ḿ̥", + 49522: "Ɂ", + 49523: "◌́◌̃", /* FIXME: should be acute ontop of tilde */ + 49524: "ã", + 49525: "æ", + 49526: "ɔ", 49527: "ć", + 49528: "ã́", + 49529: "ɛ̃́", 49531: "û", 49532: "Ý", + 49533: "ɔ", 49534: "Ḿ", + 49697: "ɛ̃", + 49698: "⁺", + 49699: "ˣ", 49700: "ō", 49701: "ğ", + 49702: "𝐴", + 49703: "𝐵", + 49704: "𝐷", 49705: "Ḍ", + 49706: "𝐸", + 49707: "𝐹", + 49708: "𝐺", + 49709: "𝐻", 49710: "Ḥ", + 49711: "𝐿", + 49712: "𝑀", + 49713: "𝑁", + 49714: "𝑃", + 49715: "𝑄", + 49716: "𝑅", 49717: "Ṛ", + 49718: "𝑆", 49719: "Ṣ", + 49720: "𝑇", + 49721: "𝑉", 49722: "Ẓ", + 49723: "𝑎", 49724: "ą", + 49725: "𝑏", + 49726: "𝑐", + 49727: "𝑑", 49728: "ḍ", + 49729: "𝑒", 49730: "ę", + 49731: "𝑓", + 49732: "𝑔", + 49733: "𝘩", 49734: "ḥ", + 49735: "𝒾", 49736: "ị", + 49737: "𝑘", + 49738: "𝑙", + 49739: "𝑚", 49740: "ṃ", + 49741: "𝑛", 49742: "ṇ", + 49743: "𝑜", + 49744: "𝑝", + 49745: "𝑞", + 49746: "𝑟", 49747: "ṛ", - 49749: "ş", + 49748: "𝑠", + 49749: "ş", 49750: "ṣ", - 49752: "ṭ", + 49751: "𝑡", + 49752: "ṭ", + 49753: "𝑣", + 49754: "𝑥", + 49755: "𝑦", + 49756: "𝑧", 49757: "ẓ", 49758: "İ", - 49759: "ṁ", + 49759: "ṁ", 49760: "ṅ", 49761: "ż", 49762: "Ś", @@ -249,6 +307,7 @@ func (*daijirinExtractor) getFontNarrow() map[int]string { 49771: "Ü", 49772: "ÿ", 49773: "Â", + 49774: "ộ", 49775: "û", 49776: "Ā", 49777: "Ē", @@ -265,14 +324,68 @@ func (*daijirinExtractor) getFontNarrow() map[int]string { 49788: "ø", 49789: "ĩ", 49790: "õ", + 49954: "°R", 49955: "º", 49956: "½", + 49957: "⅓", 49958: "¹", 49959: "²", 49960: "¾", 49961: "³", + 49962: "⁴", + 49963: "⁵", + 49964: "⁶", + 49965: "⁷", + 49966: "⁸", + 49967: "⁹", + 49968: "ᴹ", + 49969: "𝑎/𝑏", + 49970: "ᵇ", + 49971: "(𝑎→𝑏)", /* FIXME: 定積分 should be 'b' above 'a' for integral */ 49972: "ɟ", + 49973: "ⁱ", + 49974: "ᵐ", + 49975: "ⁿ", + 49976: "ʳ", + 49977: "ᵗ", + 49978: "ˣ", + 49979: "(𝑎→𝑥)", /* FIXME: 不定積分 should be 'x' above 'a' for integral */ + 49980: "ʸ", + 49981: "⁺", + 49982: "⁻", + 49983: "±", + 49984: "ᶿ", + 49985: "₀", + 49986: "₁", + 49987: "₂", + 49988: "₃", + 49989: "₄", + 49990: "₅", + 49991: "₆", + 49992: "₇", + 49993: "₈", + 49994: "₉", + 49995: "ᴀ", + 49996: "ₐ", + 49997: "ᵦ", /* FIXME: subscript 'b' doesn't exist */ + 49998: "ᵢ", + 49999: "ₖ", + 50000: "ₘ", + 50001: "ₙ", + 50002: "ᵣ", + 50003: "ₓ", + 50004: "₋ㇾ", /* FIXME: 漢文訓読 */ + 50005: "₊", + 50006: "₋", 50010: "g̀", + 50012: "$", + 50016: "ㇾ", /* FIXME: 漢文訓読 */ + 50020: "₋", /* FIXME: 漢文訓読 */ + 50021: "{㆘}", /* FIXME: 漢文訓読 */ + 50022: "{㆔}", /* FIXME: 漢文訓読 */ + 50023: "{㆖}", /* FIXME: 漢文訓読 */ + 50025: "{㆗}", /* FIXME: 漢文訓読 */ + 50026: "₌", /* FIXME: 漢文訓読 */ 50027: "ĕ", 50028: "Č", 50029: "Š", @@ -287,11 +400,15 @@ func (*daijirinExtractor) getFontNarrow() map[int]string { 50038: "ヱ", 50039: "ɯ̈", 50040: "ɰ", + 50041: "ữ", 50042: "ʔ", 50043: "ɦ", 50044: "ß", + 50045: "ɪ", + 50046: "ɴ", 50209: "ɲ", 50210: "ː", + 50211: "ς", } } @@ -699,6 +816,7 @@ func (*daijirinExtractor) getFontWide() map[int]string { 42299: "匇", 42300: "匃", 42301: "匜", + 42302: "㔺", 42303: "嗢", 42304: "囉", 42305: "唽", @@ -708,9 +826,11 @@ func (*daijirinExtractor) getFontWide() map[int]string { 42309: "嚞", 42310: "喁", 42311: "噞", + 42312: "𠵅", 42313: "哯", 42314: "嚩", 42315: "喈", + 42316: "𠺕", 42317: "晷", 42318: "叵", 42319: "嗩", @@ -718,11 +838,13 @@ func (*daijirinExtractor) getFontWide() map[int]string { 42321: "娭", 42322: "嫚", 42323: "嬗", + 42324: "𡝂", 42325: "娓", 42326: "姞", 42328: "孁", 42329: "堄", 42330: "埿", + 42331: "𡑮", 42332: "坍", 42333: "垸", 42334: "坅", @@ -739,6 +861,7 @@ func (*daijirinExtractor) getFontWide() map[int]string { 42345: "尰", 42346: "屟", 42347: "屣", + 42348: "𡱖", 42349: "异", 42351: "岺", 42352: "岏", @@ -748,12 +871,14 @@ func (*daijirinExtractor) getFontWide() map[int]string { 42356: "幉", 42357: "帒", 42358: "幞", + 42359: "㡜", 42360: "彇", 42361: "弣", 42362: "弶", 42363: "弽", 42364: "庪", 42365: "擌", + 42366: "𢷡", 42529: "擎", 42530: "挗", 42531: "擐", @@ -767,6 +892,7 @@ func (*daijirinExtractor) getFontWide() map[int]string { 42539: "撾", 42540: "摭", 42541: "熮", + 42542: "㸅", 42543: "烑", 42544: "灵", 42545: "煑", @@ -792,6 +918,7 @@ func (*daijirinExtractor) getFontWide() map[int]string { 42565: "涘", 42566: "湌", 42567: "灔", + 42568: "𤂖", 42569: "涔", 42570: "涬", 42571: "邾", @@ -874,6 +1001,7 @@ func (*daijirinExtractor) getFontWide() map[int]string { 42812: "槾", 42813: "楗", 42814: "棙", + 42815: "𣑊", 42816: "桄", 42817: "杴", 42818: "枒", @@ -891,9 +1019,12 @@ func (*daijirinExtractor) getFontWide() map[int]string { 42830: "殮", 42831: "槩", 42832: "櫲", + 42833: "𣏕", + 42834: "𬄚", /* FIXME: あて ⿰木惡 */ 42835: "穀", 42836: "蒁", 42837: "迱", + 42838: "𨗈", 42839: "适", 42840: "逈", 42841: "迍", @@ -928,6 +1059,7 @@ func (*daijirinExtractor) getFontWide() map[int]string { 42872: "矪", 42873: "矬", 42874: "穭", + 42875: "𧘱", 42876: "袽", 42877: "襅", 42878: "筯", @@ -939,6 +1071,8 @@ func (*daijirinExtractor) getFontWide() map[int]string { 43046: "褲", 43047: "褙", 43048: "粿", + 43049: "𥻨", + 43050: "𦀌", 43051: "縬", 43052: "罇", 43053: "纆", @@ -959,16 +1093,20 @@ func (*daijirinExtractor) getFontWide() map[int]string { 43068: "蟟", 43069: "蛁", 43070: "蜞", + 43071: "𧏛", 43073: "蝯", + 43074: "𪆐", 43075: "鵒", 43076: "鴝", 43077: "鸜", 43078: "鸇", 43079: "鶖", + 43080: "𪃹", 43081: "鸍", 43082: "鵩", 43083: "鶡", 43084: "鷴", + 43085: "鴒", /* FIXME: 交わる see 広辞苑 {5} for this replacement. should be ⿰𩙿鳥 */ 43086: "鷧", 43087: "鏌", 43088: "鎁", @@ -982,8 +1120,10 @@ func (*daijirinExtractor) getFontWide() map[int]string { 43096: "鋂", 43097: "鋧", 43098: "鐴", + 43099: "𫒒", /* FIXME: むね【棟】character is ⿰釒丘 */ 43100: "鋐", 43101: "蹔", + 43102: "䟽", 43103: "踶", 43104: "詵", 43105: "諐", @@ -998,6 +1138,7 @@ func (*daijirinExtractor) getFontWide() map[int]string { 43114: "釻", 43115: "鎛", 43116: "鐧", + 43117: "䥫", 43118: "鉃", 43119: "纇", 43120: "熲", @@ -1046,6 +1187,7 @@ func (*daijirinExtractor) getFontWide() map[int]string { 43325: "梲", 43326: "橅", 43327: "檉", + 43328: "㮶", 43329: "櫧", 43330: "枻", 43331: "柃", @@ -1056,6 +1198,7 @@ func (*daijirinExtractor) getFontWide() map[int]string { 43336: "朳", 43337: "棭", 43338: "梂", + 43339: "𣜌", 43340: "榰", 43341: "柷", 43342: "槵", @@ -1098,6 +1241,7 @@ func (*daijirinExtractor) getFontWide() map[int]string { 43379: "袘", 43380: "襀", 43381: "裓", + 43382: "𧚄", 43383: "褘", 43384: "褹", 43385: "襢", @@ -1115,10 +1259,13 @@ func (*daijirinExtractor) getFontWide() map[int]string { 43559: "荇", 43560: "蓎", 43561: "笯", + 43562: "𥫱", 43563: "篅", 43564: "簳", 43565: "簹", 43566: "篔", + 43567: "䈇", + 43568: "䇮", 43569: "筲", 43570: "笭", 43571: "筎", @@ -1142,6 +1289,7 @@ func (*daijirinExtractor) getFontWide() map[int]string { 43589: "緦", 43590: "紞", 43591: "纍", + 43592: "𥿠", 43593: "羿", 43594: "翺", 43595: "翥", @@ -1154,6 +1302,7 @@ func (*daijirinExtractor) getFontWide() map[int]string { 43602: "螇", 43603: "蠁", 43604: "蜱", + 43605: "𧐐", 43606: "蛺", 43607: "虵", 43608: "蝱", @@ -1176,6 +1325,7 @@ func (*daijirinExtractor) getFontWide() map[int]string { 43625: "闋", 43627: "鏱", 43628: "鈼", + 43629: "𨫤", 43630: "鬌", 43631: "鞖", 43632: "靪", @@ -1235,6 +1385,7 @@ func (*daijirinExtractor) getFontWide() map[int]string { 43848: "嬥", 43849: "妤", 43850: "媞", + 43851: "縁", /* FIXME: 彐頭, Daijisen uses this char */ 43852: "廋", 43853: "庿", 43854: "愒", @@ -1249,6 +1400,7 @@ func (*daijirinExtractor) getFontWide() map[int]string { 43863: "幫", 43864: "帮", 43865: "毈", + 43866: "𢏳", /* FIXME: ペテン */ 43867: "彽", 43868: "徸", 43869: "鄯", @@ -1271,7 +1423,9 @@ func (*daijirinExtractor) getFontWide() map[int]string { 43886: "昉", 43887: "昰", 43888: "甗", + 43889: "𤭯", 43890: "瓫", + 43891: "𤚥", 43892: "敔", 43893: "忩", 43894: "毿", @@ -1288,6 +1442,7 @@ func (*daijirinExtractor) getFontWide() map[int]string { 44067: "睺", 44068: "毗", 44069: "翮", + 44070: "𥝱", 44071: "稭", 44072: "稹", 44073: "祆", @@ -1297,6 +1452,8 @@ func (*daijirinExtractor) getFontWide() map[int]string { 44077: "翃", 44078: "舢", 44079: "艠", + 44080: "𦨞", + 44081: "⿰舟若", /* FIXME: 抄物書き */ 44082: "趯", 44083: "醶", 44084: "跑", @@ -1320,14 +1477,17 @@ func (*daijirinExtractor) getFontWide() map[int]string { 44102: "頊", 44103: "骶", 44104: "髐", + 44105: "䯊", 44106: "鶍", 44107: "鴲", 44108: "鸕", 44109: "鵼", 44110: "鷀", + 44111: "䳑", 44112: "鼹", 44113: "鼷", 44114: "髖", + 44115: "𪀚", 44116: "鸊", 44117: "鷉", 44118: "鵟", @@ -1368,10 +1528,13 @@ func (*daijirinExtractor) getFontWide() map[int]string { 44153: "苕", 44154: "芡", 44155: "茺", + 44156: "薗", /* FIXME: 汝人, should be ⿱艹圃, see daijisen ex. sent. */ 44157: "蔤", + 44158: "芸", /* FIXME: Should be trad. chin. font */ 44321: "葈", 44322: "你", 44323: "儛", + 44324: "𦬇", /* FIXME: ささぼさつ */ 44325: "塼", 44326: "坼", 44327: "塌", @@ -1416,12 +1579,15 @@ func (*daijirinExtractor) getFontWide() map[int]string { 44366: "蚜", 44367: "蚉", 44368: "蛽", + 44369: "虻", /* FIXME: 蜻蛉, see ex sent. for 虻(アム) */ 44370: "螵", 44371: "蚇", 44372: "螓", 44373: "蜐", 44374: "瘀", + 44375: "㾮", 44376: "瘼", + 44377: "𤸎", 44378: "痱", 44379: "癯", 44380: "癁", @@ -1430,22 +1596,29 @@ func (*daijirinExtractor) getFontWide() map[int]string { 44383: "砉", 44384: "耷", 44385: "耼", + 44386: "𨏍", 44387: "軑", 44388: "轘", 44389: "輀", 44390: "魹", 44391: "韴", 44392: "鞲", + 44393: "𩊱", + 44394: "𩊠", 44395: "鮲", + 44396: "𫙧", /* FIXME: ⿰魚近(ちか) */ 44397: "鰘", + 44398: "𩸭", 44399: "鰙", 44400: "鯝", 44401: "鰣", 44402: "鯽", + 44403: "𩸽", 44404: "魶", 44405: "鰚", 44406: "鱲", 44407: "鱜", + 44408: "𩺊", 44409: "鱊", 44410: "鱐", 44411: "鱟", @@ -1462,7 +1635,10 @@ func (*daijirinExtractor) getFontWide() map[int]string { 44584: "騭", 44585: "麅", 44586: "麞", + 44587: "鹿子", /* FIXME: should be ⿰鹿子 */ + 44588: "亻", 44589: "乚", + 44590: "㔾", 44591: "氵", 44592: "艹", 44593: "艹", @@ -1471,30 +1647,69 @@ func (*daijirinExtractor) getFontWide() map[int]string { 44596: "犭", 44597: "阝", 44598: "刂", + 44599: "𠆢", 44600: "忄", + 44601: "㓁", 44602: "耂", 44603: "爫", + 44604: "爫", 44605: "灬", + 44606: "⺗", 44607: "氺", + 44608: "𤣩", 44609: "罒", 44610: "礻", 44611: "衤", 44612: "飠", + 44613: "𩙿", + 44619: "𤣥", /* FIXME: 欠画 */ + 44621: "⺩", + 44622: "⺏", /* FIXME: 尢, this but with with leg stretched as in 尩 */ + 44623: "भर", /* FIXME: 勃嚕唵/bhrūṃ in sanskrit */ + 44624: "㐂", + 44625: "𛀸", /* FIXME: 変体仮名 (こ) */ + 44626: "𛄋", /* FIXME: 異体文字 (は)*/ + 44627: "𛀆", /* FIXME: 変体仮名 (い) */ + 44628: "𛁟", /* FIXME: 変体仮名 (た) */ + 44629: "𛀙", /* FIXME: 変体仮名 (か) */ + 44630: "⁎", + 44631: "⁑", 44632: "©", 44633: "♮", + 44634: "𝄐", /* FIXME: pause (music) */ + 44635: "𝄑", /* FIXME: pause (music) */ + 44636: "𝅘𝅥𝅯", /* FIXME: semiquaver/16th note */ + 44637: "⁂", + 44638: "*", 44639: "㊙", 44640: "☞", + 44641: "˘", + 44644: "卐", /* FIXME: should be tilted 45 degrees, not in unicode */ + 44645: "卐", + 44646: "✓", + 44647: "ƿ", + 44648: "℅", 44649: "®", + 44650: "∛𝑎", 44651: "Æ", 44652: "æ", + 44653: "ffl", 44654: "fl", + 44655: "ⁿ√", 44656: "œ", 44657: "∘", + 44658: "∓", + 44659: "^", /* FIXME: should be printed double width */ 44660: "℧", + 44661: "√2", + 44662: "√𝑎", 44663: "©", + 44664: "(公)", /* FIXME: 丸公 enclosed 公 */ 44665: "㊜", 44666: "〖", 44667: "〗", + 45106: "--", + 45107: "―", 45108: "☰", 45109: "☷", 45110: "☱", @@ -1502,13 +1717,22 @@ func (*daijirinExtractor) getFontWide() map[int]string { 45112: "☴", 45113: "☵", 45114: "☶", + 45115: "\", /* FIXME: くの字点 */ + 45116: "\゙", /* FIXME: くの字点 */ + 45117: "/", /* FIXME: くの字点 */ + 45118: "〻", + 45119: "ǂ", /* FIXME: 複十字, probably not the char they wanted */ 45120: "℉", 45121: "〽", 45122: "卍", 45123: "♨", 45124: "♠", 45125: "♥", + 45126: "𝄐", /* FIXME: フェルマータ */ + 45127: "℥", 45130: "♩", + 45131: "𝄉", /* FIXME: ダルセーニョ */ + 45132: "𝄪", /* FIXME: 重嬰記号 */ 45133: "❶", 45134: "❷", 45135: "❸", @@ -1528,8 +1752,14 @@ func (*daijirinExtractor) getFontWide() map[int]string { 45149: "⓱", 45150: "⓲", 45151: "⓳", + 45152: "ゑ", + 45153: "ヶ", /* FIXME: 交野, uses small ガ not in unicode */ + 45157: "ト", /* FIXME: 九秋, not sure what they are trying to indicate */ 45158: "ノ", + 45160: "ミ", /* FIXME: 青海苔, not sure what they are trying to indicate */ + 45162: "ㇿ", 45163: "ヰ", + 45164: "ン", /* FIXME: 捨て仮名, probably small 'ン' (not a char in unicode) */ 45175: "㏋", } }