1

finish mapping most of daijirin

Now you can search for totally useful every day words like 瘟㾮日
and 多羅吒干𤚥 :^).

The characters that remain either don't exist in unicode or are very
difficult to find. Also a couple terms seem unsearchable in qolibri so
I couldn't check what the characters are supposed to be.

Any questionable choice was marked with FIXME. This will make it easy in
the future to replace some characters with their images if its something
that we want to support in the future.

* The FIXMEs with the missing font symbol should all be the correct
  character (not commonly covered by fonts)

* The くの字点 choices are to try and imitate the daijirin
  experience(TM). Probably the worst use of image fonts I've seen. Those
  characters should never appear in horizontal text. They should have
  just been replaced with the text that was supposed to be repeated.

* The 漢文訓読 characters in '{}' are technically the unicode specified
  characters for those glyphs however they just look like their full
  size variants. I surrounded them with '{}' so the examples that use
  them are still readable.

* The other FIXMEs should be self explanatory. Search the term in qolibri
  and look at what they used to see why they are questionable.
This commit is contained in:
Randy Palamar 2021-06-17 07:32:06 -06:00
parent 83e3e44f46
commit 6224b4c21f

View File

@ -134,7 +134,7 @@ func (e *daijirinExtractor) exportRules(term *dbTerm, tags []string) {
}
func (*daijirinExtractor) getRevision() string {
return "daijirin1"
return "daijirin2"
}
func (*daijirinExtractor) getFontNarrow() map[int]string {
@ -184,10 +184,11 @@ func (*daijirinExtractor) getFontNarrow() map[int]string {
49483: "ỳ",
49484: "ɑ",
49485: "ə",
49486: "ə",
49487: "ɛ",
49488: "θ",
49489: "ʌ",
49490: "ɑ́",
49490: "ɒ",
49491: "ə́",
49492: "ɔ́",
49493: "ɛ́",
@ -201,7 +202,7 @@ func (*daijirinExtractor) getFontNarrow() map[int]string {
49501: "ǽ",
49502: "æ̀",
49503: "Æ",
49504: "ɑ̃",
49504: "ɑ",
49505: "å",
49506: "˘",
49507: "ă",
@ -209,32 +210,89 @@ func (*daijirinExtractor) getFontNarrow() map[int]string {
49509: "ĭ",
49510: "V́",
49511: "T́",
49512: "ɠ",
49513: "ɔ̃",
49515: "ɚ",
49516: "«",
49517: "»",
49519: "ŋ",
49520: "m̥",
49521: "ḿ̥",
49522: "Ɂ",
49523: "◌́◌̃", /* FIXME: should be acute ontop of tilde */
49524: "ã",
49525: "æ",
49526: "ɔ",
49527: "ć",
49528: "ã́",
49529: "ɛ̃́",
49531: "û",
49532: "Ý",
49533: "ɔ",
49534: "Ḿ",
49697: "ɛ̃",
49698: "⁺",
49699: "ˣ",
49700: "ō",
49701: "ğ",
49702: "𝐴",
49703: "𝐵",
49704: "𝐷",
49705: "Ḍ",
49706: "𝐸",
49707: "𝐹",
49708: "𝐺",
49709: "𝐻",
49710: "Ḥ",
49711: "𝐿",
49712: "𝑀",
49713: "𝑁",
49714: "𝑃",
49715: "𝑄",
49716: "𝑅",
49717: "Ṛ",
49718: "𝑆",
49719: "Ṣ",
49720: "𝑇",
49721: "𝑉",
49722: "Ẓ",
49723: "𝑎",
49724: "ą",
49725: "𝑏",
49726: "𝑐",
49727: "𝑑",
49728: "ḍ",
49729: "𝑒",
49730: "ę",
49731: "𝑓",
49732: "𝑔",
49733: "𝘩",
49734: "ḥ",
49735: "𝒾",
49736: "ị",
49737: "𝑘",
49738: "𝑙",
49739: "𝑚",
49740: "ṃ",
49741: "𝑛",
49742: "ṇ",
49743: "𝑜",
49744: "𝑝",
49745: "𝑞",
49746: "𝑟",
49747: "ṛ",
49749: "ş",
49748: "𝑠",
49749: "ş",
49750: "ṣ",
49752: "ṭ",
49751: "𝑡",
49752: "ṭ",
49753: "𝑣",
49754: "𝑥",
49755: "𝑦",
49756: "𝑧",
49757: "ẓ",
49758: "İ",
49759: "ṁ",
49759: "",
49760: "ṅ",
49761: "ż",
49762: "Ś",
@ -249,6 +307,7 @@ func (*daijirinExtractor) getFontNarrow() map[int]string {
49771: "Ü",
49772: "ÿ",
49773: "Â",
49774: "ộ",
49775: "û",
49776: "Ā",
49777: "Ē",
@ -265,14 +324,68 @@ func (*daijirinExtractor) getFontNarrow() map[int]string {
49788: "ø",
49789: "ĩ",
49790: "õ",
49954: "°R",
49955: "º",
49956: "½",
49957: "⅓",
49958: "¹",
49959: "²",
49960: "¾",
49961: "³",
49962: "⁴",
49963: "⁵",
49964: "⁶",
49965: "⁷",
49966: "⁸",
49967: "⁹",
49968: "ᴹ",
49969: "𝑎/𝑏",
49970: "ᵇ",
49971: "(𝑎→𝑏)", /* FIXME: 定積分 should be 'b' above 'a' for integral */
49972: "ɟ",
49973: "ⁱ",
49974: "ᵐ",
49975: "ⁿ",
49976: "ʳ",
49977: "ᵗ",
49978: "ˣ",
49979: "(𝑎→𝑥)", /* FIXME: 不定積分 should be 'x' above 'a' for integral */
49980: "ʸ",
49981: "⁺",
49982: "⁻",
49983: "±",
49984: "ᶿ",
49985: "₀",
49986: "₁",
49987: "₂",
49988: "₃",
49989: "₄",
49990: "₅",
49991: "₆",
49992: "₇",
49993: "₈",
49994: "₉",
49995: "ᴀ",
49996: "ₐ",
49997: "ᵦ", /* FIXME: subscript 'b' doesn't exist */
49998: "ᵢ",
49999: "ₖ",
50000: "ₘ",
50001: "ₙ",
50002: "ᵣ",
50003: "ₓ",
50004: "₋ㇾ", /* FIXME: 漢文訓読 */
50005: "₊",
50006: "₋",
50010: "g̀",
50012: "$",
50016: "ㇾ", /* FIXME: 漢文訓読 */
50020: "₋", /* FIXME: 漢文訓読 */
50021: "{㆘}", /* FIXME: 漢文訓読 */
50022: "{㆔}", /* FIXME: 漢文訓読 */
50023: "{㆖}", /* FIXME: 漢文訓読 */
50025: "{㆗}", /* FIXME: 漢文訓読 */
50026: "₌", /* FIXME: 漢文訓読 */
50027: "ĕ",
50028: "Č",
50029: "Š",
@ -287,11 +400,15 @@ func (*daijirinExtractor) getFontNarrow() map[int]string {
50038: "ヱ",
50039: "ɯ̈",
50040: "ɰ",
50041: "ữ",
50042: "ʔ",
50043: "ɦ",
50044: "ß",
50045: "ɪ",
50046: "ɴ",
50209: "ɲ",
50210: "ː",
50211: "ς",
}
}
@ -699,6 +816,7 @@ func (*daijirinExtractor) getFontWide() map[int]string {
42299: "匇",
42300: "匃",
42301: "匜",
42302: "㔺",
42303: "嗢",
42304: "囉",
42305: "唽",
@ -708,9 +826,11 @@ func (*daijirinExtractor) getFontWide() map[int]string {
42309: "嚞",
42310: "喁",
42311: "噞",
42312: "𠵅",
42313: "哯",
42314: "嚩",
42315: "喈",
42316: "𠺕",
42317: "晷",
42318: "叵",
42319: "嗩",
@ -718,11 +838,13 @@ func (*daijirinExtractor) getFontWide() map[int]string {
42321: "娭",
42322: "嫚",
42323: "嬗",
42324: "𡝂",
42325: "娓",
42326: "姞",
42328: "孁",
42329: "堄",
42330: "埿",
42331: "𡑮",
42332: "坍",
42333: "垸",
42334: "坅",
@ -739,6 +861,7 @@ func (*daijirinExtractor) getFontWide() map[int]string {
42345: "尰",
42346: "屟",
42347: "屣",
42348: "𡱖",
42349: "异",
42351: "岺",
42352: "岏",
@ -748,12 +871,14 @@ func (*daijirinExtractor) getFontWide() map[int]string {
42356: "幉",
42357: "帒",
42358: "幞",
42359: "㡜",
42360: "彇",
42361: "弣",
42362: "弶",
42363: "弽",
42364: "庪",
42365: "擌",
42366: "𢷡",
42529: "擎",
42530: "挗",
42531: "擐",
@ -767,6 +892,7 @@ func (*daijirinExtractor) getFontWide() map[int]string {
42539: "撾",
42540: "摭",
42541: "熮",
42542: "㸅",
42543: "烑",
42544: "灵",
42545: "煑",
@ -792,6 +918,7 @@ func (*daijirinExtractor) getFontWide() map[int]string {
42565: "涘",
42566: "湌",
42567: "灔",
42568: "𤂖",
42569: "涔",
42570: "涬",
42571: "邾",
@ -874,6 +1001,7 @@ func (*daijirinExtractor) getFontWide() map[int]string {
42812: "槾",
42813: "楗",
42814: "棙",
42815: "𣑊",
42816: "桄",
42817: "杴",
42818: "枒",
@ -891,9 +1019,12 @@ func (*daijirinExtractor) getFontWide() map[int]string {
42830: "殮",
42831: "槩",
42832: "櫲",
42833: "𣏕",
42834: "𬄚", /* FIXME: あて ⿰木惡 */
42835: "穀",
42836: "蒁",
42837: "迱",
42838: "𨗈",
42839: "适",
42840: "逈",
42841: "迍",
@ -928,6 +1059,7 @@ func (*daijirinExtractor) getFontWide() map[int]string {
42872: "矪",
42873: "矬",
42874: "穭",
42875: "𧘱",
42876: "袽",
42877: "襅",
42878: "筯",
@ -939,6 +1071,8 @@ func (*daijirinExtractor) getFontWide() map[int]string {
43046: "褲",
43047: "褙",
43048: "粿",
43049: "𥻨",
43050: "𦀌",
43051: "縬",
43052: "罇",
43053: "纆",
@ -959,16 +1093,20 @@ func (*daijirinExtractor) getFontWide() map[int]string {
43068: "蟟",
43069: "蛁",
43070: "蜞",
43071: "𧏛",
43073: "蝯",
43074: "𪆐",
43075: "鵒",
43076: "鴝",
43077: "鸜",
43078: "鸇",
43079: "鶖",
43080: "𪃹",
43081: "鸍",
43082: "鵩",
43083: "鶡",
43084: "鷴",
43085: "鴒", /* FIXME: 交わる see 広辞苑 {5} for this replacement. should be ⿰𩙿鳥 */
43086: "鷧",
43087: "鏌",
43088: "鎁",
@ -982,8 +1120,10 @@ func (*daijirinExtractor) getFontWide() map[int]string {
43096: "鋂",
43097: "鋧",
43098: "鐴",
43099: "𫒒", /* FIXME: むね【棟】character is ⿰釒丘 */
43100: "鋐",
43101: "蹔",
43102: "䟽",
43103: "踶",
43104: "詵",
43105: "諐",
@ -998,6 +1138,7 @@ func (*daijirinExtractor) getFontWide() map[int]string {
43114: "釻",
43115: "鎛",
43116: "鐧",
43117: "䥫",
43118: "鉃",
43119: "纇",
43120: "熲",
@ -1046,6 +1187,7 @@ func (*daijirinExtractor) getFontWide() map[int]string {
43325: "梲",
43326: "橅",
43327: "檉",
43328: "㮶",
43329: "櫧",
43330: "枻",
43331: "柃",
@ -1056,6 +1198,7 @@ func (*daijirinExtractor) getFontWide() map[int]string {
43336: "朳",
43337: "棭",
43338: "梂",
43339: "𣜌",
43340: "榰",
43341: "柷",
43342: "槵",
@ -1098,6 +1241,7 @@ func (*daijirinExtractor) getFontWide() map[int]string {
43379: "袘",
43380: "襀",
43381: "裓",
43382: "𧚄",
43383: "褘",
43384: "褹",
43385: "襢",
@ -1115,10 +1259,13 @@ func (*daijirinExtractor) getFontWide() map[int]string {
43559: "荇",
43560: "蓎",
43561: "笯",
43562: "𥫱",
43563: "篅",
43564: "簳",
43565: "簹",
43566: "篔",
43567: "䈇",
43568: "䇮",
43569: "筲",
43570: "笭",
43571: "筎",
@ -1142,6 +1289,7 @@ func (*daijirinExtractor) getFontWide() map[int]string {
43589: "緦",
43590: "紞",
43591: "纍",
43592: "𥿠",
43593: "羿",
43594: "翺",
43595: "翥",
@ -1154,6 +1302,7 @@ func (*daijirinExtractor) getFontWide() map[int]string {
43602: "螇",
43603: "蠁",
43604: "蜱",
43605: "𧐐",
43606: "蛺",
43607: "虵",
43608: "蝱",
@ -1176,6 +1325,7 @@ func (*daijirinExtractor) getFontWide() map[int]string {
43625: "闋",
43627: "鏱",
43628: "鈼",
43629: "𨫤",
43630: "鬌",
43631: "鞖",
43632: "靪",
@ -1235,6 +1385,7 @@ func (*daijirinExtractor) getFontWide() map[int]string {
43848: "嬥",
43849: "妤",
43850: "媞",
43851: "縁", /* FIXME: 彐頭, Daijisen uses this char */
43852: "廋",
43853: "庿",
43854: "愒",
@ -1249,6 +1400,7 @@ func (*daijirinExtractor) getFontWide() map[int]string {
43863: "幫",
43864: "帮",
43865: "毈",
43866: "𢏳", /* FIXME: ペテン */
43867: "彽",
43868: "徸",
43869: "鄯",
@ -1271,7 +1423,9 @@ func (*daijirinExtractor) getFontWide() map[int]string {
43886: "昉",
43887: "昰",
43888: "甗",
43889: "𤭯",
43890: "瓫",
43891: "𤚥",
43892: "敔",
43893: "忩",
43894: "毿",
@ -1288,6 +1442,7 @@ func (*daijirinExtractor) getFontWide() map[int]string {
44067: "睺",
44068: "毗",
44069: "翮",
44070: "𥝱",
44071: "稭",
44072: "稹",
44073: "祆",
@ -1297,6 +1452,8 @@ func (*daijirinExtractor) getFontWide() map[int]string {
44077: "翃",
44078: "舢",
44079: "艠",
44080: "𦨞",
44081: "⿰舟若", /* FIXME: 抄物書き */
44082: "趯",
44083: "醶",
44084: "跑",
@ -1320,14 +1477,17 @@ func (*daijirinExtractor) getFontWide() map[int]string {
44102: "頊",
44103: "骶",
44104: "髐",
44105: "䯊",
44106: "鶍",
44107: "鴲",
44108: "鸕",
44109: "鵼",
44110: "鷀",
44111: "䳑",
44112: "鼹",
44113: "鼷",
44114: "髖",
44115: "𪀚",
44116: "鸊",
44117: "鷉",
44118: "鵟",
@ -1368,10 +1528,13 @@ func (*daijirinExtractor) getFontWide() map[int]string {
44153: "苕",
44154: "芡",
44155: "茺",
44156: "薗", /* FIXME: 汝人, should be ⿱艹圃, see daijisen ex. sent. */
44157: "蔤",
44158: "芸", /* FIXME: Should be trad. chin. font */
44321: "葈",
44322: "你",
44323: "儛",
44324: "𦬇", /* FIXME: ささぼさつ */
44325: "塼",
44326: "坼",
44327: "塌",
@ -1416,12 +1579,15 @@ func (*daijirinExtractor) getFontWide() map[int]string {
44366: "蚜",
44367: "蚉",
44368: "蛽",
44369: "虻", /* FIXME: 蜻蛉, see ex sent. for 虻(アム) */
44370: "螵",
44371: "蚇",
44372: "螓",
44373: "蜐",
44374: "瘀",
44375: "㾮",
44376: "瘼",
44377: "𤸎",
44378: "痱",
44379: "癯",
44380: "癁",
@ -1430,22 +1596,29 @@ func (*daijirinExtractor) getFontWide() map[int]string {
44383: "砉",
44384: "耷",
44385: "耼",
44386: "𨏍",
44387: "軑",
44388: "轘",
44389: "輀",
44390: "魹",
44391: "韴",
44392: "鞲",
44393: "𩊱",
44394: "𩊠",
44395: "鮲",
44396: "𫙧", /* FIXME: ⿰魚近(ちか) */
44397: "鰘",
44398: "𩸭",
44399: "鰙",
44400: "鯝",
44401: "鰣",
44402: "鯽",
44403: "𩸽",
44404: "魶",
44405: "鰚",
44406: "鱲",
44407: "鱜",
44408: "𩺊",
44409: "鱊",
44410: "鱐",
44411: "鱟",
@ -1462,7 +1635,10 @@ func (*daijirinExtractor) getFontWide() map[int]string {
44584: "騭",
44585: "麅",
44586: "麞",
44587: "鹿子", /* FIXME: should be ⿰鹿子 */
44588: "亻",
44589: "乚",
44590: "㔾",
44591: "氵",
44592: "艹",
44593: "艹",
@ -1471,30 +1647,69 @@ func (*daijirinExtractor) getFontWide() map[int]string {
44596: "犭",
44597: "阝",
44598: "刂",
44599: "𠆢",
44600: "忄",
44601: "㓁",
44602: "耂",
44603: "爫",
44604: "爫",
44605: "灬",
44606: "⺗",
44607: "氺",
44608: "𤣩",
44609: "罒",
44610: "礻",
44611: "衤",
44612: "飠",
44613: "𩙿",
44619: "𤣥", /* FIXME: 欠画 */
44621: "⺩",
44622: "⺏", /* FIXME: 尢, this but with with leg stretched as in 尩 */
44623: "भर", /* FIXME: 勃嚕唵/bhrūṃ in sanskrit */
44624: "㐂",
44625: "𛀸", /* FIXME: 変体仮名 (こ) */
44626: "𛄋", /* FIXME: 異体文字 (は)*/
44627: "𛀆", /* FIXME: 変体仮名 (い) */
44628: "𛁟", /* FIXME: 変体仮名 (た) */
44629: "𛀙", /* FIXME: 変体仮名 (か) */
44630: "",
44631: "⁑",
44632: "©",
44633: "♮",
44634: "𝄐", /* FIXME: pause (music) */
44635: "𝄑", /* FIXME: pause (music) */
44636: "𝅘𝅥𝅯", /* FIXME: semiquaver/16th note */
44637: "⁂",
44638: "",
44639: "㊙",
44640: "☞",
44641: "˘",
44644: "卐", /* FIXME: should be tilted 45 degrees, not in unicode */
44645: "卐",
44646: "✓",
44647: "ƿ",
44648: "℅",
44649: "®",
44650: "∛𝑎",
44651: "Æ",
44652: "æ",
44653: "ffl",
44654: "fl",
44655: "ⁿ√",
44656: "œ",
44657: "∘",
44658: "∓",
44659: "^", /* FIXME: should be printed double width */
44660: "℧",
44661: "√2",
44662: "√𝑎",
44663: "©",
44664: "(公)", /* FIXME: 丸公 enclosed 公 */
44665: "㊜",
44666: "〖",
44667: "〗",
45106: "--",
45107: "―",
45108: "☰",
45109: "☷",
45110: "☱",
@ -1502,13 +1717,22 @@ func (*daijirinExtractor) getFontWide() map[int]string {
45112: "☴",
45113: "☵",
45114: "☶",
45115: "", /* FIXME: くの字点 */
45116: "\゙", /* FIXME: くの字点 */
45117: "", /* FIXME: くの字点 */
45118: "〻",
45119: "ǂ", /* FIXME: 複十字, probably not the char they wanted */
45120: "℉",
45121: "〽",
45122: "卍",
45123: "♨",
45124: "♠",
45125: "♥",
45126: "𝄐", /* FIXME: フェルマータ */
45127: "℥",
45130: "♩",
45131: "𝄉", /* FIXME: ダルセーニョ */
45132: "𝄪", /* FIXME: 重嬰記号 */
45133: "❶",
45134: "❷",
45135: "❸",
@ -1528,8 +1752,14 @@ func (*daijirinExtractor) getFontWide() map[int]string {
45149: "⓱",
45150: "⓲",
45151: "⓳",
45152: "ゑ",
45153: "ヶ", /* FIXME: 交野, uses small ガ not in unicode */
45157: "ト", /* FIXME: 九秋, not sure what they are trying to indicate */
45158: "",
45160: "ミ", /* FIXME: 青海苔, not sure what they are trying to indicate */
45162: "ㇿ",
45163: "ヰ",
45164: "ン", /* FIXME: 捨て仮名, probably small 'ン' (not a char in unicode) */
45175: "㏋",
}
}