Hash pages

This commit is contained in:
Alex Yatskov 2023-11-03 19:03:08 -07:00
parent a0e2581f6b
commit 2c12c2d6ea

View File

@ -2,10 +2,12 @@ package mex
import ( import (
"bytes" "bytes"
"crypto/sha256"
_ "embed" _ "embed"
"errors" "errors"
"fmt" "fmt"
"html/template" "html/template"
"io"
"math" "math"
"os" "os"
"path/filepath" "path/filepath"
@ -110,19 +112,9 @@ type Volume struct {
Book *Book Book *Book
Pages []*Page Pages []*Page
Index int Index int
}
func (self *Volume) AveragePageSize() int { avgSize int
if len(self.Pages) == 0 { hash []byte
return 0
}
var totalSize int
for _, page := range self.Pages {
totalSize += int(page.Node.Info.Size())
}
return totalSize / len(self.Pages)
} }
func (self *Volume) export(path string, config ExportConfig, allocator *TempDirAllocator) error { func (self *Volume) export(path string, config ExportConfig, allocator *TempDirAllocator) error {
@ -164,35 +156,26 @@ func (self *Volume) export(path string, config ExportConfig, allocator *TempDirA
} }
func (self *Volume) compare(other *Volume) int { func (self *Volume) compare(other *Volume) int {
if len(self.Pages) == len(other.Pages) {
var different bool
for i := range self.Pages {
if self.Pages[i].Node.Info.Size() != other.Pages[i].Node.Info.Size() {
different = true
break
}
}
if !different {
return 0
}
}
if len(self.Pages) > len(other.Pages) { if len(self.Pages) > len(other.Pages) {
return 1 return 1
} } else if len(self.Pages) < len(other.Pages) {
if self.AveragePageSize() > other.AveragePageSize() {
return 1
}
return -1 return -1
}
if self.avgSize > other.avgSize {
return 1
} else if self.avgSize < other.avgSize {
return -1
}
return bytes.Compare(self.hash, other.hash)
} }
type Book struct { type Book struct {
Node *Node Node *Node
Volumes map[int]*Volume Volumes map[int]*Volume
VolumeCount int VolumeCount int
orphans []*Volume orphans []*Volume
} }
@ -293,9 +276,9 @@ func (self *Book) addOrphan(newVolume *Volume) {
self.orphans = append(self.orphans, newVolume) self.orphans = append(self.orphans, newVolume)
} }
func (self *Book) parseVolumes(node *Node) { func (self *Book) parseVolumes(node *Node) error {
if !node.Info.IsDir() { if !node.Info.IsDir() {
return return nil
} }
volume := &Volume{ volume := &Volume{
@ -306,25 +289,55 @@ func (self *Book) parseVolumes(node *Node) {
var pageIndex int var pageIndex int
for _, child := range node.Children { for _, child := range node.Children {
if child.Info.IsDir() { if child.Info.IsDir() {
self.parseVolumes(child) if err := self.parseVolumes(child); err != nil {
return err
}
} else if isImagePath(child.Name) { } else if isImagePath(child.Name) {
volume.Pages = append(volume.Pages, &Page{child, volume, pageIndex}) volume.Pages = append(volume.Pages, &Page{child, volume, pageIndex})
pageIndex++ pageIndex++
} }
} }
if len(volume.Pages) > 0 { if len(volume.Pages) == 0 {
return nil
}
sort.Slice(volume.Pages, func(i, j int) bool { sort.Slice(volume.Pages, func(i, j int) bool {
return strings.Compare(volume.Pages[i].Node.Name, volume.Pages[j].Node.Name) < 0 return strings.Compare(volume.Pages[i].Node.Name, volume.Pages[j].Node.Name) < 0
}) })
var (
hasher = sha256.New()
totalSize = 0
)
for _, page := range volume.Pages {
fp, err := os.Open(page.Node.Path)
if err != nil {
return err
}
size, err := io.Copy(hasher, fp)
fp.Close()
if err != nil {
return err
}
totalSize += int(size)
}
volume.avgSize = totalSize / len(volume.Pages)
volume.hash = hasher.Sum(nil)
if index := parseVolumeIndex(node.Name); index != nil { if index := parseVolumeIndex(node.Name); index != nil {
volume.Index = *index volume.Index = *index
self.addVolume(volume) self.addVolume(volume)
} else { } else {
self.addOrphan(volume) self.addOrphan(volume)
} }
}
return nil
} }
func ParseBook(node *Node) (*Book, error) { func ParseBook(node *Node) (*Book, error) {