Skip to content

Commit

Permalink
Optimize hash calculation (#3)
Browse files Browse the repository at this point in the history
Signed-off-by: Igor Shishkin <[email protected]>
  • Loading branch information
teran committed Jun 30, 2024
1 parent cc3e3eb commit 295d887
Show file tree
Hide file tree
Showing 7 changed files with 217 additions and 128 deletions.
9 changes: 5 additions & 4 deletions database/legacy/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"time"

"github.com/fatih/color"
"github.com/pkg/errors"
)

// DataObject is a file object in JSON database
Expand Down Expand Up @@ -52,25 +53,25 @@ func NewDatabase(path string) (*Database, error) {
Data: make(map[string]*DataObject),
})
if err != nil {
return nil, fmt.Errorf("Error marshaling initial JSON: %s", err)
return nil, errors.Errorf("Error marshaling initial JSON: %s", err)
}

err = ioutil.WriteFile(path, js, 0644)
if err != nil {
return nil, fmt.Errorf("Error creating schema: %s", err)
return nil, errors.Errorf("Error creating schema: %s", err)
}
}

fp, err := os.Open(path)
if err != nil {
return nil, fmt.Errorf("Error opening file: %s", err)
return nil, errors.Errorf("Error opening file: %s", err)
}
defer fp.Close()

decoder := json.NewDecoder(fp)
err = decoder.Decode(&database.Schema)
if err != nil {
return nil, fmt.Errorf("Error decoding JSON data: %s", err)
return nil, errors.Errorf("Error decoding JSON data: %s", err)
}

return &database, nil
Expand Down
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ require (
github.com/cosiner/flag v0.5.2
github.com/fatih/color v1.17.0
github.com/mattn/go-runewidth v0.0.15 // indirect
github.com/pkg/errors v0.9.1
github.com/stretchr/testify v1.9.0
golang.org/x/sync v0.7.0
gopkg.in/cheggaaa/pb.v1 v1.0.28
)
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWE
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U=
github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
Expand All @@ -27,6 +29,8 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4=
Expand Down
149 changes: 63 additions & 86 deletions main.go
Original file line number Diff line number Diff line change
@@ -1,27 +1,24 @@
package main

import (
"bytes"
"flag"
"fmt"
"log"
"os"
"path/filepath"
"regexp"
"sort"
"sync"
"sync/atomic"
"time"

"github.com/fatih/color"
"golang.org/x/sync/errgroup"
"gopkg.in/cheggaaa/pb.v1"

database "github.com/teran/checksum/database/legacy"
)

var (
wg sync.WaitGroup

appVersion = "No version specified(probably trunk build)"
buildTimestamp = "0000-00-00T00:00:00Z"

Expand Down Expand Up @@ -60,7 +57,6 @@ func main() {
}

if !cfg.GenerateChecksumOnly {
sem := make(chan bool, cfg.Concurrency)
var bar *pb.ProgressBar
if cfg.Progressbar {
bar = pb.New(db.Count())
Expand All @@ -80,94 +76,83 @@ func main() {
}
sort.Strings(keys)

for _, key := range keys {
sem <- true
wg.Add(1)
go func(file string, obj *database.DataObject) {
if cfg.Progressbar {
defer func() {
bar.Increment()
}()
}
defer func() {
<-sem
}()
defer wg.Done()

if _, err := os.Stat(file); os.IsNotExist(err) {
if !cfg.SkipMissed {
fmt.Printf("%s %s\n", color.RedString("[MISS]"), file)
}
wg := &errgroup.Group{}
wg.SetLimit(cfg.Concurrency)

if cfg.DeleteMissed {
fmt.Printf("%s DeleteMissed requested: deleting file `%s` from database\n", color.BlueString("[NOTE]"), file)
db.DeleteOne(file)
atomic.AddUint64(&cntDeleted, 1)
for _, key := range keys {
wg.Go(func(file string, obj *database.DataObject) func() error {
return func() error {
if cfg.Progressbar {
defer func() {
bar.Increment()
}()
}

atomic.AddUint64(&cntMissed, 1)
return
}
if _, err := os.Stat(file); os.IsNotExist(err) {
if !cfg.SkipMissed {
fmt.Printf("%s %s\n", color.RedString("[MISS]"), file)
}

isChanged := false
if cfg.DeleteMissed {
fmt.Printf("%s DeleteMissed requested: deleting file `%s` from database\n", color.BlueString("[NOTE]"), file)
db.DeleteOne(file)
atomic.AddUint64(&cntDeleted, 1)
}

if obj.Length == 0 {
obj.Length = flength(file)
isChanged = true
}
atomic.AddUint64(&cntMissed, 1)
return nil
}

data, err := readFile(file)
if err != nil {
log.Fatalf("error reading data: %s", err)
}
isChanged := false

if obj.SHA1 == "" {
obj.SHA1, err = SHA1(bytes.NewReader(data))
if err != nil {
log.Fatalf("error calculating SHA1: %s", err)
if obj.Length == 0 {
obj.Length = flength(file)
isChanged = true
}

isChanged = true
}
if obj.SHA1 == "" || obj.SHA256 == "" {
sha1, sha256, err := generateActualChecksum(file)
if err != nil {
return err
}

if obj.SHA256 == "" {
obj.SHA256, err = SHA256(bytes.NewReader(data))
if err != nil {
log.Fatalf("error calculating SHA256: %s", err)
}
obj.SHA1 = sha1
obj.SHA256 = sha256

isChanged = true
}
isChanged = true
}

res := verify(file, obj.Length, obj.SHA1, obj.SHA256)
res := verify(file, obj.Length, obj.SHA1, obj.SHA256)

if isChanged {
db.WriteOne(file, &database.DataObject{
Length: obj.Length,
SHA1: obj.SHA1,
SHA256: obj.SHA256,
Modified: time.Now().UTC(),
})
}
if isChanged {
db.WriteOne(file, &database.DataObject{
Length: obj.Length,
SHA1: obj.SHA1,
SHA256: obj.SHA256,
Modified: time.Now().UTC(),
})
}

if res {
if !cfg.SkipOk {
fmt.Printf("%s %s\n", color.GreenString("[ OK ]"), file)
if res {
if !cfg.SkipOk {
fmt.Printf("%s %s\n", color.GreenString("[ OK ]"), file)
}
atomic.AddUint64(&cntPassed, 1)
return nil
}
atomic.AddUint64(&cntPassed, 1)
return
}
if !cfg.SkipFailed {
fmt.Printf("%s %s\n", color.RedString("[FAIL]"), file)
if !cfg.SkipFailed {
fmt.Printf("%s %s\n", color.RedString("[FAIL]"), file)
}
atomic.AddUint64(&cntFailed, 1)
return nil
}
atomic.AddUint64(&cntFailed, 1)
}(key, objects[key])
}(key, objects[key]))
}

for i := 0; i < cap(sem); i++ {
sem <- true
err = wg.Wait()
if err != nil {
log.Fatalf("error handling threads")
}
wg.Wait()

if cfg.Progressbar {
bar.Finish()
Expand All @@ -179,24 +164,16 @@ func main() {
if cfg.DataDir != "" {
fmt.Printf("%s Checking for new files on %s\n", color.CyanString("[INFO]"), cfg.DataDir)

// TODO: check data dir for existence

err = filepath.Walk(cfg.DataDir, func(path string, info os.FileInfo, err error) error {
if info.IsDir() {
return nil
}
if isApplicable(path) {
data, err := readFile(path)
if err != nil {
log.Fatalf("error reading file: %s", err)
}

sha1, err := SHA1(bytes.NewReader(data))
if err != nil {
log.Fatalf("error calculating SHA1: %s", err)
}

sha256, err := SHA256(bytes.NewReader(data))
sha1, sha256, err := generateActualChecksum(path)
if err != nil {
log.Fatalf("error calculating SHA256: %s", err)
return err
}

db.WriteOne(path, &database.DataObject{
Expand Down
65 changes: 27 additions & 38 deletions operations.go
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
package main

import (
"bytes"
"context"
"crypto/sha1"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"io/ioutil"
"log"
"os"
"path/filepath"
"runtime"
"strings"

"github.com/teran/checksum/utils/concurrent"
)

func completeArgs(word string) {
Expand All @@ -28,63 +30,50 @@ func completeArgs(word string) {
}, " "))
}

func readFile(fn string) ([]byte, error) {
fp, err := os.Open(fn)
func flength(filename string) int64 {
stat, err := os.Stat(filename)
if err != nil {
return nil, err
log.Fatal(err)
}
defer fp.Close()

return ioutil.ReadAll(fp)
return stat.Size()
}

// SHA256 ...
func SHA256(rd io.Reader) (string, error) {
h := sha256.New()
_, err := io.Copy(h, rd)
func generateActualChecksum(filename string) (sha1sum string, sha256sum string, err error) {
fi, err := os.Stat(filename)
if err != nil {
return "", err
return "", "", err
}

return fmt.Sprintf("%x", h.Sum(nil)), nil
}

// SHA1 ...
func SHA1(rd io.Reader) (string, error) {
h := sha1.New()
_, err := io.Copy(h, rd)
fp, err := os.Open(filename)
if err != nil {
return "", err
return "", "", err
}
defer fp.Close()

return fmt.Sprintf("%x", h.Sum(nil)), nil
}
sha1hasher := sha1.New()
sha256hasher := sha256.New()

func flength(filename string) int64 {
stat, err := os.Stat(filename)
w, err := concurrent.NewConcurrentMultiWriter(context.TODO(), sha1hasher, sha256hasher)
if err != nil {
log.Fatal(err)
return "", "", err
}

return stat.Size()
}

func verify(path string, length int64, sha1, sha256 string) bool {
data, err := readFile(path)
n, err := io.Copy(w, fp)
if err != nil {
log.Printf("error reading file: %s", err)
return false
return "", "", err
}

actSHA1, err := SHA1(bytes.NewReader(data))
if err != nil {
log.Printf("error calculating SHA1: %s", err)
return false
if n != fi.Size() {
return "", "", io.ErrShortWrite
}

actSHA256, err := SHA256(bytes.NewReader(data))
return hex.EncodeToString(sha1hasher.Sum(nil)), hex.EncodeToString(sha256hasher.Sum(nil)), nil
}

func verify(path string, length int64, sha1, sha256 string) bool {
actSHA1, actSHA256, err := generateActualChecksum(path)
if err != nil {
log.Printf("error calculating SHA256: %s", err)
return false
}

Expand Down
Loading

0 comments on commit 295d887

Please sign in to comment.