From 8252612626c0cd3ac5e1ac298e789bf89afd9867 Mon Sep 17 00:00:00 2001 From: siikamiika Date: Fri, 13 Oct 2017 02:48:58 +0300 Subject: [PATCH] add Sequence to other dictionary formats --- daijirin.go | 4 +++- daijisen.go | 4 +++- enamdict.go | 10 +++++----- epwing.go | 8 ++++++-- kotowaza.go | 3 ++- meikyou.go | 4 +++- rikai.go | 11 ++++++++--- wadai.go | 3 ++- 8 files changed, 32 insertions(+), 15 deletions(-) diff --git a/daijirin.go b/daijirin.go index 29e967a..47d2054 100644 --- a/daijirin.go +++ b/daijirin.go @@ -47,7 +47,7 @@ func makeDaijirinExtractor() epwingExtractor { } } -func (e *daijirinExtractor) extractTerms(entry epwingEntry) []dbTerm { +func (e *daijirinExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm { matches := e.partsExp.FindStringSubmatch(entry.Heading) if matches == nil { return nil @@ -86,6 +86,7 @@ func (e *daijirinExtractor) extractTerms(entry epwingEntry) []dbTerm { term := dbTerm{ Expression: reading, Glossary: []string{entry.Text}, + Sequence: sequence, } e.exportRules(&term, tags) @@ -99,6 +100,7 @@ func (e *daijirinExtractor) extractTerms(entry epwingEntry) []dbTerm { Expression: expression, Reading: reading, Glossary: []string{entry.Text}, + Sequence: sequence, } e.exportRules(&term, tags) diff --git a/daijisen.go b/daijisen.go index 596fd46..44e1f32 100644 --- a/daijisen.go +++ b/daijisen.go @@ -49,7 +49,7 @@ func makeDaijisenExtractor() epwingExtractor { } } -func (e *daijisenExtractor) extractTerms(entry epwingEntry) []dbTerm { +func (e *daijisenExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm { matches := e.partsExp.FindStringSubmatch(entry.Heading) if matches == nil { return nil @@ -88,6 +88,7 @@ func (e *daijisenExtractor) extractTerms(entry epwingEntry) []dbTerm { term := dbTerm{ Expression: reading, Glossary: []string{entry.Text}, + Sequence: sequence, } e.exportRules(&term, tags) @@ -99,6 +100,7 @@ func (e *daijisenExtractor) extractTerms(entry epwingEntry) []dbTerm { Expression: expression, Reading: reading, Glossary: []string{entry.Text}, + Sequence: sequence, } e.exportRules(&term, tags) diff --git a/enamdict.go b/enamdict.go index e003d0f..2442dab 100644 --- a/enamdict.go +++ b/enamdict.go @@ -57,26 +57,26 @@ func jmnedictExtractTerms(enamdictEntry jmdict.JmnedictEntry) []dbTerm { } var term dbTerm - term.addTags(reading.Information...) + term.Sequence = enamdictEntry.Sequence + term.addTermTags(reading.Information...) if kanji == nil { term.Expression = reading.Reading - term.addTags(reading.Information...) } else { term.Expression = kanji.Expression term.Reading = reading.Reading - term.addTags(kanji.Information...) + term.addTermTags(kanji.Information...) for _, priority := range kanji.Priorities { if hasString(priority, reading.Priorities) { - term.addTags(priority) + term.addTermTags(priority) } } } for _, trans := range enamdictEntry.Translations { term.Glossary = append(term.Glossary, trans.Translations...) - term.addTags(trans.NameTypes...) + term.addDefinitionTags(trans.NameTypes...) } terms = append(terms, term) diff --git a/epwing.go b/epwing.go index d0ec6f3..627dacb 100644 --- a/epwing.go +++ b/epwing.go @@ -55,7 +55,7 @@ type epwingBook struct { } type epwingExtractor interface { - extractTerms(entry epwingEntry) []dbTerm + extractTerms(entry epwingEntry, sequence int) []dbTerm extractKanji(entry epwingEntry) []dbKanji getFontNarrow() map[int]string getFontWide() map[int]string @@ -155,6 +155,8 @@ func epwingExportDb(inputPath, outputPath, language, title string, stride int, p log.Println("formatting dictionary data...") + var sequence int + for _, subbook := range book.Subbooks { if extractor, ok := epwingExtractors[subbook.Title]; ok { fontNarrow := extractor.getFontNarrow() @@ -185,8 +187,10 @@ func epwingExportDb(inputPath, outputPath, language, title string, stride int, p entry.Heading = translate(entry.Heading) entry.Text = translate(entry.Text) - terms = append(terms, extractor.extractTerms(entry)...) + terms = append(terms, extractor.extractTerms(entry, sequence)...) kanji = append(kanji, extractor.extractKanji(entry)...) + + sequence++ } revisions = append(revisions, extractor.getRevision()) diff --git a/kotowaza.go b/kotowaza.go index 0c85a2f..feddb8b 100644 --- a/kotowaza.go +++ b/kotowaza.go @@ -43,7 +43,7 @@ func makeKotowazaExtractor() epwingExtractor { } } -func (e *kotowazaExtractor) extractTerms(entry epwingEntry) []dbTerm { +func (e *kotowazaExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm { heading := entry.Heading queue := []string{heading} @@ -93,6 +93,7 @@ func (e *kotowazaExtractor) extractTerms(entry epwingEntry) []dbTerm { Expression: expression, Reading: reading, Glossary: []string{entry.Text}, + Sequence: sequence, } terms = append(terms, term) diff --git a/meikyou.go b/meikyou.go index e353576..ed0bf42 100644 --- a/meikyou.go +++ b/meikyou.go @@ -77,7 +77,7 @@ func makeMeikyouExtractor() epwingExtractor { } } -func (e *meikyouExtractor) extractTerms(entry epwingEntry) []dbTerm { +func (e *meikyouExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm { matches := e.partsExp.FindStringSubmatch(entry.Heading) if matches == nil { return nil @@ -127,6 +127,7 @@ func (e *meikyouExtractor) extractTerms(entry epwingEntry) []dbTerm { term := dbTerm{ Expression: reading, Glossary: []string{entry.Text}, + Sequence: sequence, } e.exportRules(&term, tags) @@ -140,6 +141,7 @@ func (e *meikyouExtractor) extractTerms(entry epwingEntry) []dbTerm { Expression: expression, Reading: reading, Glossary: []string{entry.Text}, + Sequence: sequence, } e.exportRules(&term, tags) diff --git a/rikai.go b/rikai.go index 1459268..b39ca36 100644 --- a/rikai.go +++ b/rikai.go @@ -39,7 +39,7 @@ type rikaiEntry struct { } func rikaiBuildRules(term *dbTerm) { - for _, tag := range term.Tags { + for _, tag := range term.DefinitionTags { switch tag { case "adj-i", "v1", "vk": term.addRules(tag) @@ -54,7 +54,7 @@ func rikaiBuildRules(term *dbTerm) { } func rikaiBuildScore(term *dbTerm) { - for _, tag := range term.Tags { + for _, tag := range term.DefinitionTags { switch tag { case "news", "ichi", "spec", "gai": term.Score++ @@ -73,6 +73,8 @@ func rikaiExtractTerms(rows *sql.Rows) (dbTermList, error) { readExp := regexp.MustCompile(`\[([^\]]+)\]`) tagExp := regexp.MustCompile(`[\s\(\),]`) + var sequence int + for rows.Next() { var ( kanji, kana, entry *string @@ -104,6 +106,7 @@ func rikaiExtractTerms(rows *sql.Rows) (dbTermList, error) { } var term dbTerm + term.Sequence = sequence if kana != nil { term.Expression = *kana term.Reading = *kana @@ -118,7 +121,7 @@ func rikaiExtractTerms(rows *sql.Rows) (dbTermList, error) { if dfnMatch := dfnExp.FindStringSubmatch(segment); dfnMatch != nil { for _, tag := range tagExp.Split(dfnMatch[1], -1) { if rikaiTagParsed(tag) { - term.addTags(tag) + term.addDefinitionTags(tag) } } @@ -132,6 +135,8 @@ func rikaiExtractTerms(rows *sql.Rows) (dbTermList, error) { rikaiBuildScore(&term) terms = append(terms, term) + + sequence++ } return terms, nil diff --git a/wadai.go b/wadai.go index 280e490..39a0246 100644 --- a/wadai.go +++ b/wadai.go @@ -45,7 +45,7 @@ func makeWadaiExtractor() epwingExtractor { } } -func (e *wadaiExtractor) extractTerms(entry epwingEntry) []dbTerm { +func (e *wadaiExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm { matches := e.partsExp.FindStringSubmatch(entry.Heading) if matches == nil { return nil @@ -90,6 +90,7 @@ func (e *wadaiExtractor) extractTerms(entry epwingEntry) []dbTerm { Expression: expression, Reading: reading, Glossary: []string{entry.Text}, + Sequence: sequence, } terms = append(terms, term)