add Sequence to other dictionary formats
This commit is contained in:
parent
26d01e0d56
commit
8252612626
@ -47,7 +47,7 @@ func makeDaijirinExtractor() epwingExtractor {
|
||||
}
|
||||
}
|
||||
|
||||
func (e *daijirinExtractor) extractTerms(entry epwingEntry) []dbTerm {
|
||||
func (e *daijirinExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm {
|
||||
matches := e.partsExp.FindStringSubmatch(entry.Heading)
|
||||
if matches == nil {
|
||||
return nil
|
||||
@ -86,6 +86,7 @@ func (e *daijirinExtractor) extractTerms(entry epwingEntry) []dbTerm {
|
||||
term := dbTerm{
|
||||
Expression: reading,
|
||||
Glossary: []string{entry.Text},
|
||||
Sequence: sequence,
|
||||
}
|
||||
|
||||
e.exportRules(&term, tags)
|
||||
@ -99,6 +100,7 @@ func (e *daijirinExtractor) extractTerms(entry epwingEntry) []dbTerm {
|
||||
Expression: expression,
|
||||
Reading: reading,
|
||||
Glossary: []string{entry.Text},
|
||||
Sequence: sequence,
|
||||
}
|
||||
|
||||
e.exportRules(&term, tags)
|
||||
|
@ -49,7 +49,7 @@ func makeDaijisenExtractor() epwingExtractor {
|
||||
}
|
||||
}
|
||||
|
||||
func (e *daijisenExtractor) extractTerms(entry epwingEntry) []dbTerm {
|
||||
func (e *daijisenExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm {
|
||||
matches := e.partsExp.FindStringSubmatch(entry.Heading)
|
||||
if matches == nil {
|
||||
return nil
|
||||
@ -88,6 +88,7 @@ func (e *daijisenExtractor) extractTerms(entry epwingEntry) []dbTerm {
|
||||
term := dbTerm{
|
||||
Expression: reading,
|
||||
Glossary: []string{entry.Text},
|
||||
Sequence: sequence,
|
||||
}
|
||||
|
||||
e.exportRules(&term, tags)
|
||||
@ -99,6 +100,7 @@ func (e *daijisenExtractor) extractTerms(entry epwingEntry) []dbTerm {
|
||||
Expression: expression,
|
||||
Reading: reading,
|
||||
Glossary: []string{entry.Text},
|
||||
Sequence: sequence,
|
||||
}
|
||||
|
||||
e.exportRules(&term, tags)
|
||||
|
10
enamdict.go
10
enamdict.go
@ -57,26 +57,26 @@ func jmnedictExtractTerms(enamdictEntry jmdict.JmnedictEntry) []dbTerm {
|
||||
}
|
||||
|
||||
var term dbTerm
|
||||
term.addTags(reading.Information...)
|
||||
term.Sequence = enamdictEntry.Sequence
|
||||
term.addTermTags(reading.Information...)
|
||||
|
||||
if kanji == nil {
|
||||
term.Expression = reading.Reading
|
||||
term.addTags(reading.Information...)
|
||||
} else {
|
||||
term.Expression = kanji.Expression
|
||||
term.Reading = reading.Reading
|
||||
term.addTags(kanji.Information...)
|
||||
term.addTermTags(kanji.Information...)
|
||||
|
||||
for _, priority := range kanji.Priorities {
|
||||
if hasString(priority, reading.Priorities) {
|
||||
term.addTags(priority)
|
||||
term.addTermTags(priority)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, trans := range enamdictEntry.Translations {
|
||||
term.Glossary = append(term.Glossary, trans.Translations...)
|
||||
term.addTags(trans.NameTypes...)
|
||||
term.addDefinitionTags(trans.NameTypes...)
|
||||
}
|
||||
|
||||
terms = append(terms, term)
|
||||
|
@ -55,7 +55,7 @@ type epwingBook struct {
|
||||
}
|
||||
|
||||
type epwingExtractor interface {
|
||||
extractTerms(entry epwingEntry) []dbTerm
|
||||
extractTerms(entry epwingEntry, sequence int) []dbTerm
|
||||
extractKanji(entry epwingEntry) []dbKanji
|
||||
getFontNarrow() map[int]string
|
||||
getFontWide() map[int]string
|
||||
@ -155,6 +155,8 @@ func epwingExportDb(inputPath, outputPath, language, title string, stride int, p
|
||||
|
||||
log.Println("formatting dictionary data...")
|
||||
|
||||
var sequence int
|
||||
|
||||
for _, subbook := range book.Subbooks {
|
||||
if extractor, ok := epwingExtractors[subbook.Title]; ok {
|
||||
fontNarrow := extractor.getFontNarrow()
|
||||
@ -185,8 +187,10 @@ func epwingExportDb(inputPath, outputPath, language, title string, stride int, p
|
||||
entry.Heading = translate(entry.Heading)
|
||||
entry.Text = translate(entry.Text)
|
||||
|
||||
terms = append(terms, extractor.extractTerms(entry)...)
|
||||
terms = append(terms, extractor.extractTerms(entry, sequence)...)
|
||||
kanji = append(kanji, extractor.extractKanji(entry)...)
|
||||
|
||||
sequence++
|
||||
}
|
||||
|
||||
revisions = append(revisions, extractor.getRevision())
|
||||
|
@ -43,7 +43,7 @@ func makeKotowazaExtractor() epwingExtractor {
|
||||
}
|
||||
}
|
||||
|
||||
func (e *kotowazaExtractor) extractTerms(entry epwingEntry) []dbTerm {
|
||||
func (e *kotowazaExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm {
|
||||
heading := entry.Heading
|
||||
|
||||
queue := []string{heading}
|
||||
@ -93,6 +93,7 @@ func (e *kotowazaExtractor) extractTerms(entry epwingEntry) []dbTerm {
|
||||
Expression: expression,
|
||||
Reading: reading,
|
||||
Glossary: []string{entry.Text},
|
||||
Sequence: sequence,
|
||||
}
|
||||
|
||||
terms = append(terms, term)
|
||||
|
@ -77,7 +77,7 @@ func makeMeikyouExtractor() epwingExtractor {
|
||||
}
|
||||
}
|
||||
|
||||
func (e *meikyouExtractor) extractTerms(entry epwingEntry) []dbTerm {
|
||||
func (e *meikyouExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm {
|
||||
matches := e.partsExp.FindStringSubmatch(entry.Heading)
|
||||
if matches == nil {
|
||||
return nil
|
||||
@ -127,6 +127,7 @@ func (e *meikyouExtractor) extractTerms(entry epwingEntry) []dbTerm {
|
||||
term := dbTerm{
|
||||
Expression: reading,
|
||||
Glossary: []string{entry.Text},
|
||||
Sequence: sequence,
|
||||
}
|
||||
|
||||
e.exportRules(&term, tags)
|
||||
@ -140,6 +141,7 @@ func (e *meikyouExtractor) extractTerms(entry epwingEntry) []dbTerm {
|
||||
Expression: expression,
|
||||
Reading: reading,
|
||||
Glossary: []string{entry.Text},
|
||||
Sequence: sequence,
|
||||
}
|
||||
|
||||
e.exportRules(&term, tags)
|
||||
|
11
rikai.go
11
rikai.go
@ -39,7 +39,7 @@ type rikaiEntry struct {
|
||||
}
|
||||
|
||||
func rikaiBuildRules(term *dbTerm) {
|
||||
for _, tag := range term.Tags {
|
||||
for _, tag := range term.DefinitionTags {
|
||||
switch tag {
|
||||
case "adj-i", "v1", "vk":
|
||||
term.addRules(tag)
|
||||
@ -54,7 +54,7 @@ func rikaiBuildRules(term *dbTerm) {
|
||||
}
|
||||
|
||||
func rikaiBuildScore(term *dbTerm) {
|
||||
for _, tag := range term.Tags {
|
||||
for _, tag := range term.DefinitionTags {
|
||||
switch tag {
|
||||
case "news", "ichi", "spec", "gai":
|
||||
term.Score++
|
||||
@ -73,6 +73,8 @@ func rikaiExtractTerms(rows *sql.Rows) (dbTermList, error) {
|
||||
readExp := regexp.MustCompile(`\[([^\]]+)\]`)
|
||||
tagExp := regexp.MustCompile(`[\s\(\),]`)
|
||||
|
||||
var sequence int
|
||||
|
||||
for rows.Next() {
|
||||
var (
|
||||
kanji, kana, entry *string
|
||||
@ -104,6 +106,7 @@ func rikaiExtractTerms(rows *sql.Rows) (dbTermList, error) {
|
||||
}
|
||||
|
||||
var term dbTerm
|
||||
term.Sequence = sequence
|
||||
if kana != nil {
|
||||
term.Expression = *kana
|
||||
term.Reading = *kana
|
||||
@ -118,7 +121,7 @@ func rikaiExtractTerms(rows *sql.Rows) (dbTermList, error) {
|
||||
if dfnMatch := dfnExp.FindStringSubmatch(segment); dfnMatch != nil {
|
||||
for _, tag := range tagExp.Split(dfnMatch[1], -1) {
|
||||
if rikaiTagParsed(tag) {
|
||||
term.addTags(tag)
|
||||
term.addDefinitionTags(tag)
|
||||
}
|
||||
}
|
||||
|
||||
@ -132,6 +135,8 @@ func rikaiExtractTerms(rows *sql.Rows) (dbTermList, error) {
|
||||
rikaiBuildScore(&term)
|
||||
|
||||
terms = append(terms, term)
|
||||
|
||||
sequence++
|
||||
}
|
||||
|
||||
return terms, nil
|
||||
|
3
wadai.go
3
wadai.go
@ -45,7 +45,7 @@ func makeWadaiExtractor() epwingExtractor {
|
||||
}
|
||||
}
|
||||
|
||||
func (e *wadaiExtractor) extractTerms(entry epwingEntry) []dbTerm {
|
||||
func (e *wadaiExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm {
|
||||
matches := e.partsExp.FindStringSubmatch(entry.Heading)
|
||||
if matches == nil {
|
||||
return nil
|
||||
@ -90,6 +90,7 @@ func (e *wadaiExtractor) extractTerms(entry epwingEntry) []dbTerm {
|
||||
Expression: expression,
|
||||
Reading: reading,
|
||||
Glossary: []string{entry.Text},
|
||||
Sequence: sequence,
|
||||
}
|
||||
|
||||
terms = append(terms, term)
|
||||
|
Loading…
Reference in New Issue
Block a user