From 0e0e449e7ee4d6223791f52f14bc7909d24662e9 Mon Sep 17 00:00:00 2001 From: Alex Yatskov Date: Sat, 6 Aug 2016 18:17:02 -0700 Subject: [PATCH] Cleanup --- common.go | 113 +++++++++++++++++ edict.go | 102 ++++++++++++++++ enamdict.go | 88 ++++++++++++++ jmdict.go | 340 ---------------------------------------------------- kanjidic.go | 117 ++++++++++++++++++ main.go | 2 +- 6 files changed, 421 insertions(+), 341 deletions(-) create mode 100644 common.go create mode 100644 edict.go create mode 100644 enamdict.go delete mode 100644 jmdict.go create mode 100644 kanjidic.go diff --git a/common.go b/common.go new file mode 100644 index 0000000..a578c48 --- /dev/null +++ b/common.go @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2016 Alex Yatskov + * Author: Alex Yatskov + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package main + +import ( + "encoding/json" + "io" + "strconv" + "strings" +) + +type vocabJson struct { + Indices map[string]string `json:"i"` + Entities map[string]string `json:"e"` + Defs [][]string `json:"d"` +} + +type vocabSource struct { + Expression string + Reading string + Tags []string + Glossary []string +} + +func (d *vocabSource) addTags(tags []string) { + for _, tag := range tags { + if !hasString(tag, d.Tags) { + d.Tags = append(d.Tags, tag) + } + } +} + +func buildVocabJson(entries []vocabSource, entities map[string]string) vocabJson { + dict := vocabJson{ + Indices: make(map[string]string), + Entities: entities, + } + + for i, e := range entries { + entry := []string{e.Expression, e.Reading, strings.Join(e.Tags, " ")} + entry = append(entry, e.Glossary...) + dict.Defs = append(dict.Defs, entry) + + appendStrIndex(dict.Indices, e.Expression, i) + if len(e.Reading) > 0 { + appendStrIndex(dict.Indices, e.Reading, i) + } + } + + return dict +} + +func outputVocabJson(writer io.Writer, entries []vocabSource, entities map[string]string, pretty bool) error { + dict := buildVocabJson(entries, entities) + + var ( + bytes []byte + err error + ) + + if pretty { + bytes, err = json.MarshalIndent(dict, "", " ") + } else { + bytes, err = json.Marshal(dict) + } + + if err != nil { + return err + } + + _, err = writer.Write(bytes) + return err +} + +func appendStrIndex(indices map[string]string, key string, value int) { + def, _ := indices[key] + if len(def) > 0 { + def += " " + } + + def += strconv.Itoa(value) + indices[key] = def +} + +func hasString(needle string, haystack []string) bool { + for _, value := range haystack { + if needle == value { + return true + } + } + + return false +} diff --git a/edict.go b/edict.go new file mode 100644 index 0000000..2307174 --- /dev/null +++ b/edict.go @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2016 Alex Yatskov + * Author: Alex Yatskov + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package main + +import ( + "io" + + "github.com/FooSoft/jmdict" +) + +func convertEdictEntry(edictEntry jmdict.JmdictEntry) []vocabSource { + var entries []vocabSource + + convert := func(reading jmdict.JmdictReading, kanji *jmdict.JmdictKanji) { + if kanji != nil && hasString(kanji.Expression, reading.Restrictions) { + return + } + + var entry vocabSource + if kanji == nil { + entry.Expression = reading.Reading + } else { + entry.Expression = kanji.Expression + entry.Reading = reading.Reading + + entry.addTags(kanji.Information) + entry.addTags(kanji.Priorities) + } + + entry.addTags(reading.Information) + entry.addTags(reading.Priorities) + + for _, sense := range edictEntry.Sense { + if hasString(reading.Reading, sense.RestrictedReadings) { + continue + } + + if kanji != nil && hasString(kanji.Expression, sense.RestrictedKanji) { + continue + } + + for _, glossary := range sense.Glossary { + entry.Glossary = append(entry.Glossary, glossary.Content) + } + + entry.addTags(sense.PartsOfSpeech) + entry.addTags(sense.Fields) + entry.addTags(sense.Misc) + entry.addTags(sense.Dialects) + } + + entries = append(entries, entry) + } + + if len(edictEntry.Kanji) > 0 { + for _, kanji := range edictEntry.Kanji { + for _, reading := range edictEntry.Readings { + convert(reading, &kanji) + } + } + } else { + for _, reading := range edictEntry.Readings { + convert(reading, nil) + } + } + + return entries +} + +func processEdict(writer io.Writer, reader io.Reader, flags int) error { + dict, entities, err := jmdict.LoadJmdictNoTransform(reader) + if err != nil { + return err + } + + var entries []vocabSource + for _, e := range dict.Entries { + entries = append(entries, convertEdictEntry(e)...) + } + + return outputVocabJson(writer, entries, entities, flags&flagPrettyJson == flagPrettyJson) +} diff --git a/enamdict.go b/enamdict.go new file mode 100644 index 0000000..4cfe389 --- /dev/null +++ b/enamdict.go @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2016 Alex Yatskov + * Author: Alex Yatskov + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package main + +import ( + "io" + + "github.com/FooSoft/jmdict" +) + +func convertJmnedictEntry(enamdictEntry jmdict.JmnedictEntry) []vocabSource { + var entries []vocabSource + + convert := func(reading jmdict.JmnedictReading, kanji *jmdict.JmnedictKanji) { + if kanji != nil && hasString(kanji.Expression, reading.Restrictions) { + return + } + + var entry vocabSource + if kanji == nil { + entry.Expression = reading.Reading + } else { + entry.Expression = kanji.Expression + entry.Reading = reading.Reading + + entry.addTags(kanji.Information) + entry.addTags(kanji.Priorities) + } + + entry.addTags(reading.Information) + entry.addTags(reading.Priorities) + + for _, trans := range enamdictEntry.Translations { + entry.Glossary = append(entry.Glossary, trans.Translations...) + entry.addTags(trans.NameTypes) + } + + entries = append(entries, entry) + } + + if len(enamdictEntry.Kanji) > 0 { + for _, kanji := range enamdictEntry.Kanji { + for _, reading := range enamdictEntry.Readings { + convert(reading, &kanji) + } + } + } else { + for _, reading := range enamdictEntry.Readings { + convert(reading, nil) + } + } + + return entries +} + +func processJmnedict(writer io.Writer, reader io.Reader, flags int) error { + dict, entities, err := jmdict.LoadJmnedictNoTransform(reader) + if err != nil { + return err + } + + var entries []vocabSource + for _, e := range dict.Entries { + entries = append(entries, convertJmnedictEntry(e)...) + } + + return outputVocabJson(writer, entries, entities, flags&flagPrettyJson == flagPrettyJson) +} diff --git a/jmdict.go b/jmdict.go deleted file mode 100644 index 698463a..0000000 --- a/jmdict.go +++ /dev/null @@ -1,340 +0,0 @@ -/* - * Copyright (c) 2016 Alex Yatskov - * Author: Alex Yatskov - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of - * this software and associated documentation files (the "Software"), to deal in - * the Software without restriction, including without limitation the rights to - * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of - * the Software, and to permit persons to whom the Software is furnished to do so, - * subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS - * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR - * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER - * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package main - -import ( - "encoding/json" - "io" - "strconv" - "strings" - - "github.com/FooSoft/jmdict" -) - -// -// Edict and Enamdict processing -// - -type vocabDictJson struct { - Indices map[string]string `json:"i"` - Entities map[string]string `json:"e"` - Defs [][]string `json:"d"` -} - -type vocabDictSource struct { - Expression string - Reading string - Tags []string - Glossary []string -} - -func (d *vocabDictSource) addTags(tags []string) { - for _, tag := range tags { - if findString(tag, d.Tags) == -1 { - d.Tags = append(d.Tags, tag) - } - } -} - -func appendIndex(indices map[string]string, key string, value int) { - def, _ := indices[key] - if len(def) > 0 { - def += " " - } - def += strconv.Itoa(value) - indices[key] = def -} - -func buildVocabDictJson(entries []vocabDictSource, entities map[string]string) vocabDictJson { - dict := vocabDictJson{ - Indices: make(map[string]string), - Entities: entities, - } - - for i, e := range entries { - entry := []string{e.Expression, e.Reading, strings.Join(e.Tags, " ")} - entry = append(entry, e.Glossary...) - dict.Defs = append(dict.Defs, entry) - - appendIndex(dict.Indices, e.Expression, i) - if len(e.Reading) > 0 { - appendIndex(dict.Indices, e.Reading, i) - } - } - - return dict -} - -func outputVocabDictJson(writer io.Writer, entries []vocabDictSource, entities map[string]string, pretty bool) error { - dict := buildVocabDictJson(entries, entities) - - var ( - bytes []byte - err error - ) - - if pretty { - bytes, err = json.MarshalIndent(dict, "", " ") - } else { - bytes, err = json.Marshal(dict) - } - - if err != nil { - return err - } - - _, err = writer.Write(bytes) - return err -} - -func findString(needle string, haystack []string) int { - for index, value := range haystack { - if needle == value { - return index - } - } - - return -1 -} - -func convertEnamdictEntry(enamdictEntry jmdict.EnamdictEntry) []vocabDictSource { - var entries []vocabDictSource - - convert := func(reading jmdict.EnamdictReading, kanji *jmdict.EnamdictKanji) { - if kanji != nil && findString(kanji.Expression, reading.Restrictions) != -1 { - return - } - - var entry vocabDictSource - if kanji == nil { - entry.Expression = reading.Reading - } else { - entry.Expression = kanji.Expression - entry.Reading = reading.Reading - - entry.addTags(kanji.Information) - entry.addTags(kanji.Priorities) - } - - entry.addTags(reading.Information) - entry.addTags(reading.Priorities) - - for _, trans := range enamdictEntry.Translations { - entry.Glossary = append(entry.Glossary, trans.Translations...) - entry.addTags(trans.NameTypes) - } - - entries = append(entries, entry) - } - - if len(enamdictEntry.Kanji) > 0 { - for _, kanji := range enamdictEntry.Kanji { - for _, reading := range enamdictEntry.Readings { - convert(reading, &kanji) - } - } - } else { - for _, reading := range enamdictEntry.Readings { - convert(reading, nil) - } - } - - return entries -} - -func convertEdictEntry(edictEntry jmdict.EdictEntry) []vocabDictSource { - var entries []vocabDictSource - - convert := func(reading jmdict.EdictReading, kanji *jmdict.EdictKanji) { - if kanji != nil && findString(kanji.Expression, reading.Restrictions) != -1 { - return - } - - var entry vocabDictSource - if kanji == nil { - entry.Expression = reading.Reading - } else { - entry.Expression = kanji.Expression - entry.Reading = reading.Reading - - entry.addTags(kanji.Information) - entry.addTags(kanji.Priorities) - } - - entry.addTags(reading.Information) - entry.addTags(reading.Priorities) - - for _, sense := range edictEntry.Sense { - if findString(reading.Reading, sense.RestrictedReadings) != -1 { - continue - } - - if kanji != nil && findString(kanji.Expression, sense.RestrictedKanji) != -1 { - continue - } - - for _, glossary := range sense.Glossary { - entry.Glossary = append(entry.Glossary, glossary.Content) - } - - entry.addTags(sense.PartsOfSpeech) - entry.addTags(sense.Fields) - entry.addTags(sense.Misc) - entry.addTags(sense.Dialects) - } - - entries = append(entries, entry) - } - - if len(edictEntry.Kanji) > 0 { - for _, kanji := range edictEntry.Kanji { - for _, reading := range edictEntry.Readings { - convert(reading, &kanji) - } - } - } else { - for _, reading := range edictEntry.Readings { - convert(reading, nil) - } - } - - return entries -} - -func processEnamdict(writer io.Writer, reader io.Reader, flags int) error { - enamdictEntries, entities, err := jmdict.LoadEnamdict(reader, false) - if err != nil { - return err - } - - var entries []vocabDictSource - for _, enamdictEntry := range enamdictEntries { - entries = append(entries, convertEnamdictEntry(enamdictEntry)...) - } - - return outputVocabDictJson(writer, entries, entities, flags&flagPrettyJson == flagPrettyJson) -} - -func processEdict(writer io.Writer, reader io.Reader, flags int) error { - edictEntries, entities, err := jmdict.LoadEdict(reader, false) - if err != nil { - return err - } - - var entries []vocabDictSource - for _, edictEntry := range edictEntries { - entries = append(entries, convertEdictEntry(edictEntry)...) - } - - return outputVocabDictJson(writer, entries, entities, flags&flagPrettyJson == flagPrettyJson) -} - -// -// Kanjidic processing -// - -type characterDictJson struct { - Characters map[string][]string `json:"c"` -} - -type characterDictSource struct { - Character string - Kunyomi []string - Onyomi []string - Meanings []string -} - -func buildCharacterDictJson(characters []characterDictSource) characterDictJson { - dict := characterDictJson{make(map[string][]string)} - - for _, c := range characters { - var params []string - params = append(params, strings.Join(c.Onyomi, " ")) - params = append(params, strings.Join(c.Kunyomi, " ")) - params = append(params, c.Meanings...) - dict.Characters[c.Character] = params - } - - return dict -} - -func outputCharacterDictJson(writer io.Writer, characters []characterDictSource, pretty bool) error { - dict := buildCharacterDictJson(characters) - - var ( - bytes []byte - err error - ) - - if pretty { - bytes, err = json.MarshalIndent(dict, "", " ") - } else { - bytes, err = json.Marshal(dict) - } - - if err != nil { - return err - } - - _, err = writer.Write(bytes) - return err -} - -func convertKanjidicCharacter(kanjidicCharacter jmdict.KanjidicCharacter) characterDictSource { - character := characterDictSource{Character: kanjidicCharacter.Literal} - - if kanjidicCharacter.ReadingMeaning != nil { - for _, m := range kanjidicCharacter.ReadingMeaning.Meanings { - if m.Language == "en" || m.Language == "" { - character.Meanings = append(character.Meanings, m.Meaning) - } - } - - for _, r := range kanjidicCharacter.ReadingMeaning.Readings { - switch r.Type { - case "ja_on": - character.Onyomi = append(character.Onyomi, r.Value) - break - case "ja_kun": - character.Kunyomi = append(character.Kunyomi, r.Value) - break - } - } - } - - return character -} - -func processKanjidic(writer io.Writer, reader io.Reader, flags int) error { - kanjidic, err := jmdict.LoadKanjidic(reader) - if err != nil { - return err - } - - var characters []characterDictSource - for _, kanjidicCharacter := range kanjidic.Characters { - characters = append(characters, convertKanjidicCharacter(kanjidicCharacter)) - } - - return outputCharacterDictJson(writer, characters, flags&flagPrettyJson == flagPrettyJson) -} diff --git a/kanjidic.go b/kanjidic.go new file mode 100644 index 0000000..4f4d799 --- /dev/null +++ b/kanjidic.go @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2016 Alex Yatskov + * Author: Alex Yatskov + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package main + +import ( + "encoding/json" + "io" + "strings" + + "github.com/FooSoft/jmdict" +) + +type kanjiJson struct { + Characters map[string][]string `json:"c"` +} + +type kanjiSource struct { + Character string + Kunyomi []string + Onyomi []string + Meanings []string +} + +func buildKanjiJson(kanji []kanjiSource) kanjiJson { + dict := kanjiJson{make(map[string][]string)} + + for _, k := range kanji { + var params []string + params = append(params, strings.Join(k.Onyomi, " ")) + params = append(params, strings.Join(k.Kunyomi, " ")) + params = append(params, k.Meanings...) + dict.Characters[k.Character] = params + } + + return dict +} + +func outputKanjiJson(writer io.Writer, kanji []kanjiSource, pretty bool) error { + dict := buildKanjiJson(kanji) + + var ( + bytes []byte + err error + ) + + if pretty { + bytes, err = json.MarshalIndent(dict, "", " ") + } else { + bytes, err = json.Marshal(dict) + } + + if err != nil { + return err + } + + _, err = writer.Write(bytes) + return err +} + +func convertKanjidicCharacter(kanjidicCharacter jmdict.KanjidicCharacter) kanjiSource { + character := kanjiSource{Character: kanjidicCharacter.Literal} + + if kanjidicCharacter.ReadingMeaning != nil { + for _, m := range kanjidicCharacter.ReadingMeaning.Meanings { + if m.Language == nil || *m.Language == "en" { + character.Meanings = append(character.Meanings, m.Meaning) + } + } + + for _, r := range kanjidicCharacter.ReadingMeaning.Readings { + switch r.Type { + case "ja_on": + character.Onyomi = append(character.Onyomi, r.Value) + break + case "ja_kun": + character.Kunyomi = append(character.Kunyomi, r.Value) + break + } + } + } + + return character +} + +func processKanjidic(writer io.Writer, reader io.Reader, flags int) error { + dict, err := jmdict.LoadKanjidic(reader) + if err != nil { + return err + } + + var kanji []kanjiSource + for _, kanjidicCharacter := range dict.Characters { + kanji = append(kanji, convertKanjidicCharacter(kanjidicCharacter)) + } + + return outputKanjiJson(writer, kanji, flags&flagPrettyJson == flagPrettyJson) +} diff --git a/main.go b/main.go index eaaf5cc..66634de 100644 --- a/main.go +++ b/main.go @@ -45,7 +45,7 @@ func usage() { func process(fileFormat, inputFile, outputFile string, flags int) error { handlers := map[string]func(io.Writer, io.Reader, int) error{ "edict": processEdict, - "enamdict": processEnamdict, + "enamdict": processJmnedict, "kanjidic": processKanjidic, }