/* * Copyright (c) 2016 Alex Yatskov * Author: Alex Yatskov * * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of * the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ package main import ( "bufio" "encoding/json" "fmt" "io/ioutil" "log" "os" "os/exec" "path/filepath" "regexp" "runtime" "strconv" "strings" ) type epwingEntry struct { Heading string `json:"heading"` Text string `json:"text"` } type epwingSubbook struct { Title string `json:"title"` Copyright string `json:"copyright"` Entries []epwingEntry `json:"entries"` } type epwingBook struct { CharCode string `json:"charCode"` DiscCode string `json:"discCode"` Subbooks []epwingSubbook `json:"subbooks"` } type epwingExtractor interface { extractTerms(entry epwingEntry) []dbTerm extractKanji(entry epwingEntry) []dbKanji getFontNarrow() map[int]string getFontWide() map[int]string getRevision() string } func epwingExportDb(inputPath, outputDir, language, title string, stride int, pretty bool) error { stat, err := os.Stat(inputPath) if err != nil { return err } var data []byte if stat.IsDir() { toolPath := filepath.Join("bin", runtime.GOOS, "zero-epwing") if runtime.GOOS == "windows" { toolPath += ".exe" } if toolPath, err = filepath.Abs(toolPath); err != nil { return err } if _, err = os.Stat(toolPath); err != nil { return fmt.Errorf("failed to find zero-epwing in '%s'", toolPath) } cmd := exec.Command(toolPath, "--entries", inputPath) stdout, err := cmd.StdoutPipe() if err != nil { return err } stderr, err := cmd.StderrPipe() if err != nil { return err } log.Printf("invoking zero-epwing from '%s'...\n", toolPath) if err := cmd.Start(); err != nil { return err } go func() { scanner := bufio.NewScanner(stderr) for scanner.Scan() { log.Printf("\t > %s\n", scanner.Text()) } }() if data, err = ioutil.ReadAll(stdout); err != nil { return err } if err := cmd.Wait(); err != nil { return err } log.Println("completed zero-epwing processing") } else { data, err = ioutil.ReadFile(inputPath) } if err != nil { return err } var book epwingBook if err := json.Unmarshal(data, &book); err != nil { return err } translateExp := regexp.MustCompile(`{{([nw])_(\d+)}}`) epwingExtractors := map[string]epwingExtractor{ "三省堂 スーパー大辞林": makeDaijirinExtractor(), "大辞泉": makeDaijisenExtractor(), "明鏡国語辞典": makeMeikyouExtractor(), "故事ことわざの辞典": makeKotowazaExtractor(), "研究社 新和英大辞典 第5版": makeWadaiExtractor(), } var ( terms dbTermList kanji dbKanjiList revisions []string titles []string ) log.Println("formatting dictionary data...") for _, subbook := range book.Subbooks { if extractor, ok := epwingExtractors[subbook.Title]; ok { fontNarrow := extractor.getFontNarrow() fontWide := extractor.getFontWide() translate := func(str string) string { for _, matches := range translateExp.FindAllStringSubmatch(str, -1) { var font map[int]string if matches[1] == "n" { font = fontNarrow } else { font = fontWide } code, _ := strconv.Atoi(matches[2]) replacement, ok := font[code] if !ok { replacement = "�" } str = strings.Replace(str, matches[0], replacement, -1) } return str } for _, entry := range subbook.Entries { entry.Heading = translate(entry.Heading) entry.Text = translate(entry.Text) terms = append(terms, extractor.extractTerms(entry)...) kanji = append(kanji, extractor.extractKanji(entry)...) } revisions = append(revisions, extractor.getRevision()) titles = append(titles, subbook.Title) } else { return fmt.Errorf("failed to find compatible extractor for '%s'", subbook.Title) } } if title == "" { title = strings.Join(titles, ", ") } return writeDb( outputDir, title, strings.Join(revisions, ";"), terms.crush(), kanji.crush(), nil, stride, pretty, ) }