add Sequence to other dictionary formats
This commit is contained in:
parent
26d01e0d56
commit
8252612626
@ -47,7 +47,7 @@ func makeDaijirinExtractor() epwingExtractor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *daijirinExtractor) extractTerms(entry epwingEntry) []dbTerm {
|
func (e *daijirinExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm {
|
||||||
matches := e.partsExp.FindStringSubmatch(entry.Heading)
|
matches := e.partsExp.FindStringSubmatch(entry.Heading)
|
||||||
if matches == nil {
|
if matches == nil {
|
||||||
return nil
|
return nil
|
||||||
@ -86,6 +86,7 @@ func (e *daijirinExtractor) extractTerms(entry epwingEntry) []dbTerm {
|
|||||||
term := dbTerm{
|
term := dbTerm{
|
||||||
Expression: reading,
|
Expression: reading,
|
||||||
Glossary: []string{entry.Text},
|
Glossary: []string{entry.Text},
|
||||||
|
Sequence: sequence,
|
||||||
}
|
}
|
||||||
|
|
||||||
e.exportRules(&term, tags)
|
e.exportRules(&term, tags)
|
||||||
@ -99,6 +100,7 @@ func (e *daijirinExtractor) extractTerms(entry epwingEntry) []dbTerm {
|
|||||||
Expression: expression,
|
Expression: expression,
|
||||||
Reading: reading,
|
Reading: reading,
|
||||||
Glossary: []string{entry.Text},
|
Glossary: []string{entry.Text},
|
||||||
|
Sequence: sequence,
|
||||||
}
|
}
|
||||||
|
|
||||||
e.exportRules(&term, tags)
|
e.exportRules(&term, tags)
|
||||||
|
@ -49,7 +49,7 @@ func makeDaijisenExtractor() epwingExtractor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *daijisenExtractor) extractTerms(entry epwingEntry) []dbTerm {
|
func (e *daijisenExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm {
|
||||||
matches := e.partsExp.FindStringSubmatch(entry.Heading)
|
matches := e.partsExp.FindStringSubmatch(entry.Heading)
|
||||||
if matches == nil {
|
if matches == nil {
|
||||||
return nil
|
return nil
|
||||||
@ -88,6 +88,7 @@ func (e *daijisenExtractor) extractTerms(entry epwingEntry) []dbTerm {
|
|||||||
term := dbTerm{
|
term := dbTerm{
|
||||||
Expression: reading,
|
Expression: reading,
|
||||||
Glossary: []string{entry.Text},
|
Glossary: []string{entry.Text},
|
||||||
|
Sequence: sequence,
|
||||||
}
|
}
|
||||||
|
|
||||||
e.exportRules(&term, tags)
|
e.exportRules(&term, tags)
|
||||||
@ -99,6 +100,7 @@ func (e *daijisenExtractor) extractTerms(entry epwingEntry) []dbTerm {
|
|||||||
Expression: expression,
|
Expression: expression,
|
||||||
Reading: reading,
|
Reading: reading,
|
||||||
Glossary: []string{entry.Text},
|
Glossary: []string{entry.Text},
|
||||||
|
Sequence: sequence,
|
||||||
}
|
}
|
||||||
|
|
||||||
e.exportRules(&term, tags)
|
e.exportRules(&term, tags)
|
||||||
|
10
enamdict.go
10
enamdict.go
@ -57,26 +57,26 @@ func jmnedictExtractTerms(enamdictEntry jmdict.JmnedictEntry) []dbTerm {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var term dbTerm
|
var term dbTerm
|
||||||
term.addTags(reading.Information...)
|
term.Sequence = enamdictEntry.Sequence
|
||||||
|
term.addTermTags(reading.Information...)
|
||||||
|
|
||||||
if kanji == nil {
|
if kanji == nil {
|
||||||
term.Expression = reading.Reading
|
term.Expression = reading.Reading
|
||||||
term.addTags(reading.Information...)
|
|
||||||
} else {
|
} else {
|
||||||
term.Expression = kanji.Expression
|
term.Expression = kanji.Expression
|
||||||
term.Reading = reading.Reading
|
term.Reading = reading.Reading
|
||||||
term.addTags(kanji.Information...)
|
term.addTermTags(kanji.Information...)
|
||||||
|
|
||||||
for _, priority := range kanji.Priorities {
|
for _, priority := range kanji.Priorities {
|
||||||
if hasString(priority, reading.Priorities) {
|
if hasString(priority, reading.Priorities) {
|
||||||
term.addTags(priority)
|
term.addTermTags(priority)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, trans := range enamdictEntry.Translations {
|
for _, trans := range enamdictEntry.Translations {
|
||||||
term.Glossary = append(term.Glossary, trans.Translations...)
|
term.Glossary = append(term.Glossary, trans.Translations...)
|
||||||
term.addTags(trans.NameTypes...)
|
term.addDefinitionTags(trans.NameTypes...)
|
||||||
}
|
}
|
||||||
|
|
||||||
terms = append(terms, term)
|
terms = append(terms, term)
|
||||||
|
@ -55,7 +55,7 @@ type epwingBook struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type epwingExtractor interface {
|
type epwingExtractor interface {
|
||||||
extractTerms(entry epwingEntry) []dbTerm
|
extractTerms(entry epwingEntry, sequence int) []dbTerm
|
||||||
extractKanji(entry epwingEntry) []dbKanji
|
extractKanji(entry epwingEntry) []dbKanji
|
||||||
getFontNarrow() map[int]string
|
getFontNarrow() map[int]string
|
||||||
getFontWide() map[int]string
|
getFontWide() map[int]string
|
||||||
@ -155,6 +155,8 @@ func epwingExportDb(inputPath, outputPath, language, title string, stride int, p
|
|||||||
|
|
||||||
log.Println("formatting dictionary data...")
|
log.Println("formatting dictionary data...")
|
||||||
|
|
||||||
|
var sequence int
|
||||||
|
|
||||||
for _, subbook := range book.Subbooks {
|
for _, subbook := range book.Subbooks {
|
||||||
if extractor, ok := epwingExtractors[subbook.Title]; ok {
|
if extractor, ok := epwingExtractors[subbook.Title]; ok {
|
||||||
fontNarrow := extractor.getFontNarrow()
|
fontNarrow := extractor.getFontNarrow()
|
||||||
@ -185,8 +187,10 @@ func epwingExportDb(inputPath, outputPath, language, title string, stride int, p
|
|||||||
entry.Heading = translate(entry.Heading)
|
entry.Heading = translate(entry.Heading)
|
||||||
entry.Text = translate(entry.Text)
|
entry.Text = translate(entry.Text)
|
||||||
|
|
||||||
terms = append(terms, extractor.extractTerms(entry)...)
|
terms = append(terms, extractor.extractTerms(entry, sequence)...)
|
||||||
kanji = append(kanji, extractor.extractKanji(entry)...)
|
kanji = append(kanji, extractor.extractKanji(entry)...)
|
||||||
|
|
||||||
|
sequence++
|
||||||
}
|
}
|
||||||
|
|
||||||
revisions = append(revisions, extractor.getRevision())
|
revisions = append(revisions, extractor.getRevision())
|
||||||
|
@ -43,7 +43,7 @@ func makeKotowazaExtractor() epwingExtractor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *kotowazaExtractor) extractTerms(entry epwingEntry) []dbTerm {
|
func (e *kotowazaExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm {
|
||||||
heading := entry.Heading
|
heading := entry.Heading
|
||||||
|
|
||||||
queue := []string{heading}
|
queue := []string{heading}
|
||||||
@ -93,6 +93,7 @@ func (e *kotowazaExtractor) extractTerms(entry epwingEntry) []dbTerm {
|
|||||||
Expression: expression,
|
Expression: expression,
|
||||||
Reading: reading,
|
Reading: reading,
|
||||||
Glossary: []string{entry.Text},
|
Glossary: []string{entry.Text},
|
||||||
|
Sequence: sequence,
|
||||||
}
|
}
|
||||||
|
|
||||||
terms = append(terms, term)
|
terms = append(terms, term)
|
||||||
|
@ -77,7 +77,7 @@ func makeMeikyouExtractor() epwingExtractor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *meikyouExtractor) extractTerms(entry epwingEntry) []dbTerm {
|
func (e *meikyouExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm {
|
||||||
matches := e.partsExp.FindStringSubmatch(entry.Heading)
|
matches := e.partsExp.FindStringSubmatch(entry.Heading)
|
||||||
if matches == nil {
|
if matches == nil {
|
||||||
return nil
|
return nil
|
||||||
@ -127,6 +127,7 @@ func (e *meikyouExtractor) extractTerms(entry epwingEntry) []dbTerm {
|
|||||||
term := dbTerm{
|
term := dbTerm{
|
||||||
Expression: reading,
|
Expression: reading,
|
||||||
Glossary: []string{entry.Text},
|
Glossary: []string{entry.Text},
|
||||||
|
Sequence: sequence,
|
||||||
}
|
}
|
||||||
|
|
||||||
e.exportRules(&term, tags)
|
e.exportRules(&term, tags)
|
||||||
@ -140,6 +141,7 @@ func (e *meikyouExtractor) extractTerms(entry epwingEntry) []dbTerm {
|
|||||||
Expression: expression,
|
Expression: expression,
|
||||||
Reading: reading,
|
Reading: reading,
|
||||||
Glossary: []string{entry.Text},
|
Glossary: []string{entry.Text},
|
||||||
|
Sequence: sequence,
|
||||||
}
|
}
|
||||||
|
|
||||||
e.exportRules(&term, tags)
|
e.exportRules(&term, tags)
|
||||||
|
11
rikai.go
11
rikai.go
@ -39,7 +39,7 @@ type rikaiEntry struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func rikaiBuildRules(term *dbTerm) {
|
func rikaiBuildRules(term *dbTerm) {
|
||||||
for _, tag := range term.Tags {
|
for _, tag := range term.DefinitionTags {
|
||||||
switch tag {
|
switch tag {
|
||||||
case "adj-i", "v1", "vk":
|
case "adj-i", "v1", "vk":
|
||||||
term.addRules(tag)
|
term.addRules(tag)
|
||||||
@ -54,7 +54,7 @@ func rikaiBuildRules(term *dbTerm) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func rikaiBuildScore(term *dbTerm) {
|
func rikaiBuildScore(term *dbTerm) {
|
||||||
for _, tag := range term.Tags {
|
for _, tag := range term.DefinitionTags {
|
||||||
switch tag {
|
switch tag {
|
||||||
case "news", "ichi", "spec", "gai":
|
case "news", "ichi", "spec", "gai":
|
||||||
term.Score++
|
term.Score++
|
||||||
@ -73,6 +73,8 @@ func rikaiExtractTerms(rows *sql.Rows) (dbTermList, error) {
|
|||||||
readExp := regexp.MustCompile(`\[([^\]]+)\]`)
|
readExp := regexp.MustCompile(`\[([^\]]+)\]`)
|
||||||
tagExp := regexp.MustCompile(`[\s\(\),]`)
|
tagExp := regexp.MustCompile(`[\s\(\),]`)
|
||||||
|
|
||||||
|
var sequence int
|
||||||
|
|
||||||
for rows.Next() {
|
for rows.Next() {
|
||||||
var (
|
var (
|
||||||
kanji, kana, entry *string
|
kanji, kana, entry *string
|
||||||
@ -104,6 +106,7 @@ func rikaiExtractTerms(rows *sql.Rows) (dbTermList, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var term dbTerm
|
var term dbTerm
|
||||||
|
term.Sequence = sequence
|
||||||
if kana != nil {
|
if kana != nil {
|
||||||
term.Expression = *kana
|
term.Expression = *kana
|
||||||
term.Reading = *kana
|
term.Reading = *kana
|
||||||
@ -118,7 +121,7 @@ func rikaiExtractTerms(rows *sql.Rows) (dbTermList, error) {
|
|||||||
if dfnMatch := dfnExp.FindStringSubmatch(segment); dfnMatch != nil {
|
if dfnMatch := dfnExp.FindStringSubmatch(segment); dfnMatch != nil {
|
||||||
for _, tag := range tagExp.Split(dfnMatch[1], -1) {
|
for _, tag := range tagExp.Split(dfnMatch[1], -1) {
|
||||||
if rikaiTagParsed(tag) {
|
if rikaiTagParsed(tag) {
|
||||||
term.addTags(tag)
|
term.addDefinitionTags(tag)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -132,6 +135,8 @@ func rikaiExtractTerms(rows *sql.Rows) (dbTermList, error) {
|
|||||||
rikaiBuildScore(&term)
|
rikaiBuildScore(&term)
|
||||||
|
|
||||||
terms = append(terms, term)
|
terms = append(terms, term)
|
||||||
|
|
||||||
|
sequence++
|
||||||
}
|
}
|
||||||
|
|
||||||
return terms, nil
|
return terms, nil
|
||||||
|
3
wadai.go
3
wadai.go
@ -45,7 +45,7 @@ func makeWadaiExtractor() epwingExtractor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *wadaiExtractor) extractTerms(entry epwingEntry) []dbTerm {
|
func (e *wadaiExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm {
|
||||||
matches := e.partsExp.FindStringSubmatch(entry.Heading)
|
matches := e.partsExp.FindStringSubmatch(entry.Heading)
|
||||||
if matches == nil {
|
if matches == nil {
|
||||||
return nil
|
return nil
|
||||||
@ -90,6 +90,7 @@ func (e *wadaiExtractor) extractTerms(entry epwingEntry) []dbTerm {
|
|||||||
Expression: expression,
|
Expression: expression,
|
||||||
Reading: reading,
|
Reading: reading,
|
||||||
Glossary: []string{entry.Text},
|
Glossary: []string{entry.Text},
|
||||||
|
Sequence: sequence,
|
||||||
}
|
}
|
||||||
|
|
||||||
terms = append(terms, term)
|
terms = append(terms, term)
|
||||||
|
Loading…
Reference in New Issue
Block a user