1

fix various scoring issues with jmdict and rikai

This commit is contained in:
Alex Yatskov 2017-08-23 20:03:06 -07:00
parent d2acb5d5e3
commit f0d72fefaa
2 changed files with 12 additions and 9 deletions

View File

@ -29,7 +29,7 @@ import (
"github.com/FooSoft/jmdict"
)
const JMDICT_REVISION = "jmdict2"
const JMDICT_REVISION = "jmdict3"
func jmdictBuildRules(term *dbTerm) {
for _, tag := range term.Tags {
@ -47,13 +47,14 @@ func jmdictBuildRules(term *dbTerm) {
}
func jmdictBuildScore(term *dbTerm) {
term.Score = 0
for _, tag := range term.Tags {
switch tag {
case "news", "ichi", "spec", "gai":
term.Score += 100
case "P":
term.Score += 5
term.Score += 500
case "arch", "iK":
term.Score -= 1
term.Score -= 100
}
}
}
@ -88,7 +89,7 @@ func jmdictBuildTagMeta(entities map[string]string) map[string]dbTagMeta {
tag.Order = -5
case "arch", "iK":
tag.Category = "archaism"
tag.Order = 5
tag.Order = -4
}
tags[name] = tag
@ -136,6 +137,7 @@ func jmdictExtractTerms(edictEntry jmdict.JmdictEntry, language string) []dbTerm
term := dbTerm{
Reading: termBase.Reading,
Expression: termBase.Expression,
Score: len(edictEntry.Sense) - index,
}
for _, glossary := range sense.Glossary {

View File

@ -30,7 +30,7 @@ import (
_ "github.com/mattn/go-sqlite3"
)
const RIKAI_REVISION = "rikai1"
const RIKAI_REVISION = "rikai2"
type rikaiEntry struct {
kanji string
@ -54,9 +54,10 @@ func rikaiBuildRules(term *dbTerm) {
}
func rikaiBuildScore(term *dbTerm) {
term.Score = 0
for _, tag := range term.Tags {
switch tag {
case "news", "ichi", "spec", "gai":
term.Score += 1
case "P":
term.Score += 5
case "arch", "iK":
@ -161,8 +162,8 @@ func rikaiExportDb(inputPath, outputPath, language, title string, stride int, pr
"P": {Category: "popular", Order: -10},
"exp": {Category: "expression", Order: -5},
"id": {Category: "expression", Order: -5},
"arch": {Category: "archaism", Order: 5},
"iK": {Category: "archaism", Order: 5},
"arch": {Category: "archaism", Order: -4},
"iK": {Category: "archaism", Order: -4},
}
return writeDb(