1

add Sequence to other dictionary formats

This commit is contained in:
siikamiika 2017-10-13 02:48:58 +03:00
parent 26d01e0d56
commit 8252612626
8 changed files with 32 additions and 15 deletions

View File

@ -47,7 +47,7 @@ func makeDaijirinExtractor() epwingExtractor {
}
}
func (e *daijirinExtractor) extractTerms(entry epwingEntry) []dbTerm {
func (e *daijirinExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm {
matches := e.partsExp.FindStringSubmatch(entry.Heading)
if matches == nil {
return nil
@ -86,6 +86,7 @@ func (e *daijirinExtractor) extractTerms(entry epwingEntry) []dbTerm {
term := dbTerm{
Expression: reading,
Glossary: []string{entry.Text},
Sequence: sequence,
}
e.exportRules(&term, tags)
@ -99,6 +100,7 @@ func (e *daijirinExtractor) extractTerms(entry epwingEntry) []dbTerm {
Expression: expression,
Reading: reading,
Glossary: []string{entry.Text},
Sequence: sequence,
}
e.exportRules(&term, tags)

View File

@ -49,7 +49,7 @@ func makeDaijisenExtractor() epwingExtractor {
}
}
func (e *daijisenExtractor) extractTerms(entry epwingEntry) []dbTerm {
func (e *daijisenExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm {
matches := e.partsExp.FindStringSubmatch(entry.Heading)
if matches == nil {
return nil
@ -88,6 +88,7 @@ func (e *daijisenExtractor) extractTerms(entry epwingEntry) []dbTerm {
term := dbTerm{
Expression: reading,
Glossary: []string{entry.Text},
Sequence: sequence,
}
e.exportRules(&term, tags)
@ -99,6 +100,7 @@ func (e *daijisenExtractor) extractTerms(entry epwingEntry) []dbTerm {
Expression: expression,
Reading: reading,
Glossary: []string{entry.Text},
Sequence: sequence,
}
e.exportRules(&term, tags)

View File

@ -57,26 +57,26 @@ func jmnedictExtractTerms(enamdictEntry jmdict.JmnedictEntry) []dbTerm {
}
var term dbTerm
term.addTags(reading.Information...)
term.Sequence = enamdictEntry.Sequence
term.addTermTags(reading.Information...)
if kanji == nil {
term.Expression = reading.Reading
term.addTags(reading.Information...)
} else {
term.Expression = kanji.Expression
term.Reading = reading.Reading
term.addTags(kanji.Information...)
term.addTermTags(kanji.Information...)
for _, priority := range kanji.Priorities {
if hasString(priority, reading.Priorities) {
term.addTags(priority)
term.addTermTags(priority)
}
}
}
for _, trans := range enamdictEntry.Translations {
term.Glossary = append(term.Glossary, trans.Translations...)
term.addTags(trans.NameTypes...)
term.addDefinitionTags(trans.NameTypes...)
}
terms = append(terms, term)

View File

@ -55,7 +55,7 @@ type epwingBook struct {
}
type epwingExtractor interface {
extractTerms(entry epwingEntry) []dbTerm
extractTerms(entry epwingEntry, sequence int) []dbTerm
extractKanji(entry epwingEntry) []dbKanji
getFontNarrow() map[int]string
getFontWide() map[int]string
@ -155,6 +155,8 @@ func epwingExportDb(inputPath, outputPath, language, title string, stride int, p
log.Println("formatting dictionary data...")
var sequence int
for _, subbook := range book.Subbooks {
if extractor, ok := epwingExtractors[subbook.Title]; ok {
fontNarrow := extractor.getFontNarrow()
@ -185,8 +187,10 @@ func epwingExportDb(inputPath, outputPath, language, title string, stride int, p
entry.Heading = translate(entry.Heading)
entry.Text = translate(entry.Text)
terms = append(terms, extractor.extractTerms(entry)...)
terms = append(terms, extractor.extractTerms(entry, sequence)...)
kanji = append(kanji, extractor.extractKanji(entry)...)
sequence++
}
revisions = append(revisions, extractor.getRevision())

View File

@ -43,7 +43,7 @@ func makeKotowazaExtractor() epwingExtractor {
}
}
func (e *kotowazaExtractor) extractTerms(entry epwingEntry) []dbTerm {
func (e *kotowazaExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm {
heading := entry.Heading
queue := []string{heading}
@ -93,6 +93,7 @@ func (e *kotowazaExtractor) extractTerms(entry epwingEntry) []dbTerm {
Expression: expression,
Reading: reading,
Glossary: []string{entry.Text},
Sequence: sequence,
}
terms = append(terms, term)

View File

@ -77,7 +77,7 @@ func makeMeikyouExtractor() epwingExtractor {
}
}
func (e *meikyouExtractor) extractTerms(entry epwingEntry) []dbTerm {
func (e *meikyouExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm {
matches := e.partsExp.FindStringSubmatch(entry.Heading)
if matches == nil {
return nil
@ -127,6 +127,7 @@ func (e *meikyouExtractor) extractTerms(entry epwingEntry) []dbTerm {
term := dbTerm{
Expression: reading,
Glossary: []string{entry.Text},
Sequence: sequence,
}
e.exportRules(&term, tags)
@ -140,6 +141,7 @@ func (e *meikyouExtractor) extractTerms(entry epwingEntry) []dbTerm {
Expression: expression,
Reading: reading,
Glossary: []string{entry.Text},
Sequence: sequence,
}
e.exportRules(&term, tags)

View File

@ -39,7 +39,7 @@ type rikaiEntry struct {
}
func rikaiBuildRules(term *dbTerm) {
for _, tag := range term.Tags {
for _, tag := range term.DefinitionTags {
switch tag {
case "adj-i", "v1", "vk":
term.addRules(tag)
@ -54,7 +54,7 @@ func rikaiBuildRules(term *dbTerm) {
}
func rikaiBuildScore(term *dbTerm) {
for _, tag := range term.Tags {
for _, tag := range term.DefinitionTags {
switch tag {
case "news", "ichi", "spec", "gai":
term.Score++
@ -73,6 +73,8 @@ func rikaiExtractTerms(rows *sql.Rows) (dbTermList, error) {
readExp := regexp.MustCompile(`\[([^\]]+)\]`)
tagExp := regexp.MustCompile(`[\s\(\),]`)
var sequence int
for rows.Next() {
var (
kanji, kana, entry *string
@ -104,6 +106,7 @@ func rikaiExtractTerms(rows *sql.Rows) (dbTermList, error) {
}
var term dbTerm
term.Sequence = sequence
if kana != nil {
term.Expression = *kana
term.Reading = *kana
@ -118,7 +121,7 @@ func rikaiExtractTerms(rows *sql.Rows) (dbTermList, error) {
if dfnMatch := dfnExp.FindStringSubmatch(segment); dfnMatch != nil {
for _, tag := range tagExp.Split(dfnMatch[1], -1) {
if rikaiTagParsed(tag) {
term.addTags(tag)
term.addDefinitionTags(tag)
}
}
@ -132,6 +135,8 @@ func rikaiExtractTerms(rows *sql.Rows) (dbTermList, error) {
rikaiBuildScore(&term)
terms = append(terms, term)
sequence++
}
return terms, nil

View File

@ -45,7 +45,7 @@ func makeWadaiExtractor() epwingExtractor {
}
}
func (e *wadaiExtractor) extractTerms(entry epwingEntry) []dbTerm {
func (e *wadaiExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm {
matches := e.partsExp.FindStringSubmatch(entry.Heading)
if matches == nil {
return nil
@ -90,6 +90,7 @@ func (e *wadaiExtractor) extractTerms(entry epwingEntry) []dbTerm {
Expression: expression,
Reading: reading,
Glossary: []string{entry.Text},
Sequence: sequence,
}
terms = append(terms, term)