Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize hash calculation #3

Merged
merged 1 commit into from
Jun 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions database/legacy/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"time"

"github.com/fatih/color"
"github.com/pkg/errors"
)

// DataObject is a file object in JSON database
Expand Down Expand Up @@ -52,25 +53,25 @@ func NewDatabase(path string) (*Database, error) {
Data: make(map[string]*DataObject),
})
if err != nil {
return nil, fmt.Errorf("Error marshaling initial JSON: %s", err)
return nil, errors.Errorf("Error marshaling initial JSON: %s", err)
}

err = ioutil.WriteFile(path, js, 0644)
if err != nil {
return nil, fmt.Errorf("Error creating schema: %s", err)
return nil, errors.Errorf("Error creating schema: %s", err)
}
}

fp, err := os.Open(path)
if err != nil {
return nil, fmt.Errorf("Error opening file: %s", err)
return nil, errors.Errorf("Error opening file: %s", err)
}
defer fp.Close()

decoder := json.NewDecoder(fp)
err = decoder.Decode(&database.Schema)
if err != nil {
return nil, fmt.Errorf("Error decoding JSON data: %s", err)
return nil, errors.Errorf("Error decoding JSON data: %s", err)
}

return &database, nil
Expand Down
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ require (
github.com/cosiner/flag v0.5.2
github.com/fatih/color v1.17.0
github.com/mattn/go-runewidth v0.0.15 // indirect
github.com/pkg/errors v0.9.1
github.com/stretchr/testify v1.9.0
golang.org/x/sync v0.7.0
gopkg.in/cheggaaa/pb.v1 v1.0.28
)
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWE
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U=
github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
Expand All @@ -27,6 +29,8 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4=
Expand Down
149 changes: 63 additions & 86 deletions main.go
Original file line number Diff line number Diff line change
@@ -1,27 +1,24 @@
package main

import (
"bytes"
"flag"
"fmt"
"log"
"os"
"path/filepath"
"regexp"
"sort"
"sync"
"sync/atomic"
"time"

"github.com/fatih/color"
"golang.org/x/sync/errgroup"
"gopkg.in/cheggaaa/pb.v1"

database "github.com/teran/checksum/database/legacy"
)

var (
wg sync.WaitGroup

appVersion = "No version specified(probably trunk build)"
buildTimestamp = "0000-00-00T00:00:00Z"

Expand Down Expand Up @@ -60,7 +57,6 @@ func main() {
}

if !cfg.GenerateChecksumOnly {
sem := make(chan bool, cfg.Concurrency)
var bar *pb.ProgressBar
if cfg.Progressbar {
bar = pb.New(db.Count())
Expand All @@ -80,94 +76,83 @@ func main() {
}
sort.Strings(keys)

for _, key := range keys {
sem <- true
wg.Add(1)
go func(file string, obj *database.DataObject) {
if cfg.Progressbar {
defer func() {
bar.Increment()
}()
}
defer func() {
<-sem
}()
defer wg.Done()

if _, err := os.Stat(file); os.IsNotExist(err) {
if !cfg.SkipMissed {
fmt.Printf("%s %s\n", color.RedString("[MISS]"), file)
}
wg := &errgroup.Group{}
wg.SetLimit(cfg.Concurrency)

if cfg.DeleteMissed {
fmt.Printf("%s DeleteMissed requested: deleting file `%s` from database\n", color.BlueString("[NOTE]"), file)
db.DeleteOne(file)
atomic.AddUint64(&cntDeleted, 1)
for _, key := range keys {
wg.Go(func(file string, obj *database.DataObject) func() error {
return func() error {
if cfg.Progressbar {
defer func() {
bar.Increment()
}()
}

atomic.AddUint64(&cntMissed, 1)
return
}
if _, err := os.Stat(file); os.IsNotExist(err) {
if !cfg.SkipMissed {
fmt.Printf("%s %s\n", color.RedString("[MISS]"), file)
}

isChanged := false
if cfg.DeleteMissed {
fmt.Printf("%s DeleteMissed requested: deleting file `%s` from database\n", color.BlueString("[NOTE]"), file)
db.DeleteOne(file)
atomic.AddUint64(&cntDeleted, 1)
}

if obj.Length == 0 {
obj.Length = flength(file)
isChanged = true
}
atomic.AddUint64(&cntMissed, 1)
return nil
}

data, err := readFile(file)
if err != nil {
log.Fatalf("error reading data: %s", err)
}
isChanged := false

if obj.SHA1 == "" {
obj.SHA1, err = SHA1(bytes.NewReader(data))
if err != nil {
log.Fatalf("error calculating SHA1: %s", err)
if obj.Length == 0 {
obj.Length = flength(file)
isChanged = true
}

isChanged = true
}
if obj.SHA1 == "" || obj.SHA256 == "" {
sha1, sha256, err := generateActualChecksum(file)
if err != nil {
return err
}

if obj.SHA256 == "" {
obj.SHA256, err = SHA256(bytes.NewReader(data))
if err != nil {
log.Fatalf("error calculating SHA256: %s", err)
}
obj.SHA1 = sha1
obj.SHA256 = sha256

isChanged = true
}
isChanged = true
}

res := verify(file, obj.Length, obj.SHA1, obj.SHA256)
res := verify(file, obj.Length, obj.SHA1, obj.SHA256)

if isChanged {
db.WriteOne(file, &database.DataObject{
Length: obj.Length,
SHA1: obj.SHA1,
SHA256: obj.SHA256,
Modified: time.Now().UTC(),
})
}
if isChanged {
db.WriteOne(file, &database.DataObject{
Length: obj.Length,
SHA1: obj.SHA1,
SHA256: obj.SHA256,
Modified: time.Now().UTC(),
})
}

if res {
if !cfg.SkipOk {
fmt.Printf("%s %s\n", color.GreenString("[ OK ]"), file)
if res {
if !cfg.SkipOk {
fmt.Printf("%s %s\n", color.GreenString("[ OK ]"), file)
}
atomic.AddUint64(&cntPassed, 1)
return nil
}
atomic.AddUint64(&cntPassed, 1)
return
}
if !cfg.SkipFailed {
fmt.Printf("%s %s\n", color.RedString("[FAIL]"), file)
if !cfg.SkipFailed {
fmt.Printf("%s %s\n", color.RedString("[FAIL]"), file)
}
atomic.AddUint64(&cntFailed, 1)
return nil
}
atomic.AddUint64(&cntFailed, 1)
}(key, objects[key])
}(key, objects[key]))
}

for i := 0; i < cap(sem); i++ {
sem <- true
err = wg.Wait()
if err != nil {
log.Fatalf("error handling threads")
}
wg.Wait()

if cfg.Progressbar {
bar.Finish()
Expand All @@ -179,24 +164,16 @@ func main() {
if cfg.DataDir != "" {
fmt.Printf("%s Checking for new files on %s\n", color.CyanString("[INFO]"), cfg.DataDir)

// TODO: check data dir for existence

err = filepath.Walk(cfg.DataDir, func(path string, info os.FileInfo, err error) error {
if info.IsDir() {
return nil
}
if isApplicable(path) {
data, err := readFile(path)
if err != nil {
log.Fatalf("error reading file: %s", err)
}

sha1, err := SHA1(bytes.NewReader(data))
if err != nil {
log.Fatalf("error calculating SHA1: %s", err)
}

sha256, err := SHA256(bytes.NewReader(data))
sha1, sha256, err := generateActualChecksum(path)
if err != nil {
log.Fatalf("error calculating SHA256: %s", err)
return err
}

db.WriteOne(path, &database.DataObject{
Expand Down
65 changes: 27 additions & 38 deletions operations.go
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
package main

import (
"bytes"
"context"
"crypto/sha1"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"io/ioutil"
"log"
"os"
"path/filepath"
"runtime"
"strings"

"github.com/teran/checksum/utils/concurrent"
)

func completeArgs(word string) {
Expand All @@ -28,63 +30,50 @@ func completeArgs(word string) {
}, " "))
}

func readFile(fn string) ([]byte, error) {
fp, err := os.Open(fn)
func flength(filename string) int64 {
stat, err := os.Stat(filename)
if err != nil {
return nil, err
log.Fatal(err)
}
defer fp.Close()

return ioutil.ReadAll(fp)
return stat.Size()
}

// SHA256 ...
func SHA256(rd io.Reader) (string, error) {
h := sha256.New()
_, err := io.Copy(h, rd)
func generateActualChecksum(filename string) (sha1sum string, sha256sum string, err error) {
fi, err := os.Stat(filename)
if err != nil {
return "", err
return "", "", err
}

return fmt.Sprintf("%x", h.Sum(nil)), nil
}

// SHA1 ...
func SHA1(rd io.Reader) (string, error) {
h := sha1.New()
_, err := io.Copy(h, rd)
fp, err := os.Open(filename)
if err != nil {
return "", err
return "", "", err
}
defer fp.Close()

return fmt.Sprintf("%x", h.Sum(nil)), nil
}
sha1hasher := sha1.New()
sha256hasher := sha256.New()

func flength(filename string) int64 {
stat, err := os.Stat(filename)
w, err := concurrent.NewConcurrentMultiWriter(context.TODO(), sha1hasher, sha256hasher)
if err != nil {
log.Fatal(err)
return "", "", err
}

return stat.Size()
}

func verify(path string, length int64, sha1, sha256 string) bool {
data, err := readFile(path)
n, err := io.Copy(w, fp)
if err != nil {
log.Printf("error reading file: %s", err)
return false
return "", "", err
}

actSHA1, err := SHA1(bytes.NewReader(data))
if err != nil {
log.Printf("error calculating SHA1: %s", err)
return false
if n != fi.Size() {
return "", "", io.ErrShortWrite
}

actSHA256, err := SHA256(bytes.NewReader(data))
return hex.EncodeToString(sha1hasher.Sum(nil)), hex.EncodeToString(sha256hasher.Sum(nil)), nil
}

func verify(path string, length int64, sha1, sha256 string) bool {
actSHA1, actSHA256, err := generateActualChecksum(path)
if err != nil {
log.Printf("error calculating SHA256: %s", err)
return false
}

Expand Down
Loading
Loading