Skip to content

Commit

Permalink
Support SHA256 digest algorithm (#121)
Browse files Browse the repository at this point in the history
  • Loading branch information
folbricht authored Aug 21, 2019
1 parent 74e3261 commit d907313
Show file tree
Hide file tree
Showing 13 changed files with 71 additions and 22 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Among the distinguishing factors:
- Supported on MacOS, though there could be incompatibilities when exchanging catar-files between Linux and Mac for example since devices and filemodes differ slightly. \*BSD should work as well but hasn't been tested. Windows supports a limited subset of commands.
- Where the upstream command has chosen to optimize for storage efficiency (f/e, being able to use local files as "seeds", building temporary indexes into them), this command chooses to optimize for runtime performance (maintaining a local explicit chunk store, avoiding the need to reindex) at cost to storage efficiency.
- Where the upstream command has chosen to take full advantage of Linux platform features, this client chooses to implement a minimum featureset and, while high-value platform-specific features (such as support for btrfs reflinks into a decompressed local chunk cache) might be added in the future, the ability to build without them on other platforms will be maintained.
- SHA512/256 is currently the only supported hash function.
- Both, SHA512/256 and SHA256 are supported hash functions.
- Only chunk stores using zstd compression as well uncompressed are supported at this point.
- Supports local stores as well as remote stores (as client) over SSH, SFTP and HTTP
- Built-in HTTP(S) chunk server that can proxy multiple local or remote stores and also supports caching and deduplication for concurrent requests.
Expand Down
3 changes: 1 addition & 2 deletions assemble.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package desync

import (
"context"
"crypto/sha512"
"fmt"
"os"

Expand Down Expand Up @@ -128,7 +127,7 @@ func AssembleFile(ctx context.Context, name string, idx Index, s Store, seeds []
if _, err := f.ReadAt(b, int64(c.Start)); err != nil {
return err
}
sum := sha512.Sum512_256(b)
sum := Digest.Sum(b)
if sum == c.ID {
// Record this chunk's been written in the self-seed
ss.add(job.segment)
Expand Down
3 changes: 1 addition & 2 deletions chunk.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package desync

import (
"crypto/sha512"
"errors"
)

Expand Down Expand Up @@ -80,7 +79,7 @@ func (c *Chunk) ID() ChunkID {
if err != nil {
return ChunkID{}
}
c.id = sha512.Sum512_256(b)
c.id = Digest.Sum(b)
c.idCalculated = true
return c.id
}
3 changes: 1 addition & 2 deletions cmd/desync/chunk.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package main

import (
"context"
"crypto/sha512"
"fmt"
"io"
"os"
Expand Down Expand Up @@ -77,7 +76,7 @@ func runChunk(ctx context.Context, opt chunkOptions, args []string) error {
if len(b) == 0 {
return nil
}
sum := sha512.Sum512_256(b)
sum := desync.Digest.Sum(b)
fmt.Printf("%d\t%d\t%x\n", start+opt.startPos, len(b), sum)
}
}
14 changes: 14 additions & 0 deletions cmd/desync/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,3 +160,17 @@ func initConfig() {
die(errors.Wrap(err, "reading "+cfgFile))
}
}

// Digest algorithm to be used by desync globally.
var digestAlgorithm string

func setDigestAlgorithm() {
switch digestAlgorithm {
case "", "sha512-256":
desync.Digest = desync.SHA512256{}
case "sha256":
desync.Digest = desync.SHA256{}
default:
die(fmt.Errorf("invalid digest algorithm '%s'", digestAlgorithm))
}
}
2 changes: 1 addition & 1 deletion cmd/desync/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ func main() {
}()

// Read config early
cobra.OnInitialize(initConfig)
cobra.OnInitialize(initConfig, setDigestAlgorithm)

// Register the sub-commands under root
rootCmd := newRootCommand()
Expand Down
1 change: 1 addition & 0 deletions cmd/desync/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@ func newRootCommand() *cobra.Command {
Short: "Content-addressed binary distribution system.",
}
cmd.PersistentFlags().StringVar(&cfgFile, "config", "", "config file (default $HOME/.config/desync/config.json)")
cmd.PersistentFlags().StringVar(&digestAlgorithm, "digest", "sha512-256", "digest algorithm, sha512-256 or sha256")
return cmd
}
29 changes: 29 additions & 0 deletions digest.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package desync

import (
"crypto"
"crypto/sha256"
"crypto/sha512"
)

// Digest algorithm used globally for all chunk hashing. Can be set to SHA512256
// (default) or to SHA256.
var Digest HashAlgorithm = SHA512256{}

// HashAlgorithm is a digest algorithm used to hash chunks.
type HashAlgorithm interface {
Sum([]byte) [32]byte
Algorithm() crypto.Hash
}

// SHA512-256 hashing algoritm for Digest.
type SHA512256 struct{}

func (h SHA512256) Sum(data []byte) [32]byte { return sha512.Sum512_256(data) }
func (h SHA512256) Algorithm() crypto.Hash { return crypto.SHA512_256 }

// SHA256 hashing algoritm for Digest.
type SHA256 struct{}

func (h SHA256) Sum(data []byte) [32]byte { return sha256.Sum256(data) }
func (h SHA256) Algorithm() crypto.Hash { return crypto.SHA256 }
3 changes: 1 addition & 2 deletions fileseed.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package desync

import (
"crypto/sha512"
"fmt"
"io"
"os"
Expand Down Expand Up @@ -135,7 +134,7 @@ func (s *fileSeedSegment) validate(src *os.File) error {
if _, err := src.ReadAt(b, int64(c.Start)); err != nil {
return err
}
sum := sha512.Sum512_256(b)
sum := Digest.Sum(b)
if sum != c.ID {
return fmt.Errorf("seed index for %s doesn't match its data", s.file)
}
Expand Down
13 changes: 11 additions & 2 deletions index.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package desync
import (
"bufio"
"context"
"crypto"
"fmt"
"math"
"sync"
Expand Down Expand Up @@ -45,8 +46,16 @@ func IndexFromReader(r io.Reader) (c Index, err error) {
return c, errors.New("input is not an index file")
}

if c.Index.FeatureFlags&CaFormatSHA512256 == 0 {
return c, errors.New("only SHA512/256 is supported")
// Ensure the algorithm the library uses matches that of the index file
switch Digest.Algorithm() {
case crypto.SHA512_256:
if c.Index.FeatureFlags&CaFormatSHA512256 == 0 {
return c, errors.New("index file uses SHA256")
}
case crypto.SHA256:
if c.Index.FeatureFlags&CaFormatSHA512256 != 0 {
return c, errors.New("index file uses SHA512-256")
}
}

// Read the table
Expand Down
11 changes: 8 additions & 3 deletions make.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package desync

import (
"context"
"crypto/sha512"
"crypto"
"fmt"
"io"
"os"
Expand Down Expand Up @@ -31,9 +31,14 @@ func IndexFromFile(ctx context.Context,
ctx, cancel := context.WithCancel(ctx)
defer cancel()

var digestFlag uint64
if Digest.Algorithm() == crypto.SHA512_256 {
digestFlag = CaFormatSHA512256
}

index := Index{
Index: FormatIndex{
FeatureFlags: CaFormatExcludeNoDump | CaFormatSHA512256,
FeatureFlags: CaFormatExcludeNoDump | digestFlag,
ChunkSizeMin: min,
ChunkSizeAvg: avg,
ChunkSizeMax: max,
Expand Down Expand Up @@ -201,7 +206,7 @@ func (c *pChunker) start(ctx context.Context) {
return
}
// Calculate the chunk ID
id := sha512.Sum512_256(b)
id := Digest.Sum(b)

// Store it in our bucket
chunk := IndexChunk{Start: start, Size: uint64(len(b)), ID: id}
Expand Down
6 changes: 1 addition & 5 deletions nullchunk.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
package desync

import (
"crypto/sha512"
)

// NullChunk is used in places where it's common to see requests for chunks
// containing only 0-bytes. When a chunked file has large areas of 0-bytes,
// the chunking algorithm does not produce split boundaries, which results
Expand All @@ -22,6 +18,6 @@ func NewNullChunk(size uint64) *NullChunk {
b := make([]byte, int(size))
return &NullChunk{
Data: b,
ID: sha512.Sum512_256(b),
ID: Digest.Sum(b),
}
}
3 changes: 1 addition & 2 deletions verifyindex.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package desync

import (
"context"
"crypto/sha512"
"fmt"
"io"
"os"
Expand Down Expand Up @@ -59,7 +58,7 @@ func VerifyIndex(ctx context.Context, name string, idx Index, n int, pb Progress

// Calculate this chunks checksum and compare to what it's supposed to be
// according to the index
sum := sha512.Sum512_256(b)
sum := Digest.Sum(b)
if sum != c.ID {
return fmt.Errorf("checksum does not match chunk %s", c.ID)
}
Expand Down

0 comments on commit d907313

Please sign in to comment.