Skip to content

Commit df0b3ca

Browse files
authored
vetu pull and vetu clone sparsely (#28)
* Introduce sparseio.Copy() and write the disk layers sparsely * vetu create: no need to use temporary.AtomicallyCopyThrough() ...since we're dealing with a temporary VM directory anyways. * Put temporary.AtomicallyCopyThrough() on the sparse rails * sparseio_test.go: add TestCopySmall test * Fix sparseio.Copy() argument order and add TestAtomicallyCopyThrough
1 parent 82a729f commit df0b3ca

File tree

6 files changed

+242
-12
lines changed

6 files changed

+242
-12
lines changed

internal/command/create/create.go

+4-3
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"github.com/cirruslabs/vetu/internal/storage/local"
88
"github.com/cirruslabs/vetu/internal/storage/temporary"
99
"github.com/cirruslabs/vetu/internal/vmconfig"
10+
cp "github.com/otiai10/copy"
1011
"github.com/spf13/cobra"
1112
"path/filepath"
1213
)
@@ -63,7 +64,7 @@ func runCreate(cmd *cobra.Command, args []string) error {
6364

6465
// Kernel
6566
if kernel != "" {
66-
if err := temporary.AtomicallyCopyThrough(kernel, vmDir.KernelPath()); err != nil {
67+
if err := cp.Copy(kernel, vmDir.KernelPath()); err != nil {
6768
return fmt.Errorf("failed to copy kernel to the VM's directory: %v", err)
6869
}
6970
} else {
@@ -72,7 +73,7 @@ func runCreate(cmd *cobra.Command, args []string) error {
7273

7374
// Initramfs
7475
if initramfs != "" {
75-
if err := temporary.AtomicallyCopyThrough(initramfs, vmDir.InitramfsPath()); err != nil {
76+
if err := cp.Copy(initramfs, vmDir.InitramfsPath()); err != nil {
7677
return fmt.Errorf("failed to copy initramfs to the VM's directory: %v", err)
7778
}
7879
}
@@ -86,7 +87,7 @@ func runCreate(cmd *cobra.Command, args []string) error {
8687
for _, disk := range disks {
8788
diskName := filepath.Base(disk)
8889

89-
if err := temporary.AtomicallyCopyThrough(disk, filepath.Join(vmDir.Path(), diskName)); err != nil {
90+
if err := cp.Copy(disk, filepath.Join(vmDir.Path(), diskName)); err != nil {
9091
return fmt.Errorf("failed to copy disk %q to the VM's directory: %v", diskName, err)
9192
}
9293

internal/oci/diskpuller/diskpuller.go

+4-4
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package diskpuller
33
import (
44
"context"
55
"fmt"
6+
"github.com/cirruslabs/vetu/internal/sparseio"
67
"github.com/cirruslabs/vetu/internal/vmdirectory"
78
"github.com/dustin/go-humanize"
89
"github.com/regclient/regclient"
@@ -160,9 +161,8 @@ func (diskTask *diskTask) process(
160161
if err != nil {
161162
return err
162163
}
163-
if _, err := diskFile.Seek(diskTask.Offset, io.SeekStart); err != nil {
164-
return err
165-
}
164+
165+
diskFileAtOffset := io.NewOffsetWriter(diskFile, diskTask.Offset)
166166

167167
// Pull disk layer from the OCI registry
168168
blobReader, err := client.BlobGet(ctx, reference, diskTask.Desc)
@@ -175,7 +175,7 @@ func (diskTask *diskTask) process(
175175
progressBarReader := progressbar.NewReader(blobReader, progressBar)
176176
decompressor := initializeDecompressor(&progressBarReader)
177177

178-
if _, err := io.Copy(diskFile, decompressor); err != nil {
178+
if err := sparseio.Copy(diskFileAtOffset, decompressor); err != nil {
179179
return err
180180
}
181181

internal/sparseio/sparseio.go

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
package sparseio
2+
3+
import (
4+
"bytes"
5+
"errors"
6+
"io"
7+
)
8+
9+
const blockSize = 64 * 1024
10+
11+
func Copy(dst io.WriterAt, src io.Reader) error {
12+
chunk := make([]byte, blockSize)
13+
zeroedChunk := make([]byte, blockSize)
14+
15+
var offset int64
16+
17+
for {
18+
n, err := src.Read(chunk)
19+
if err != nil {
20+
if errors.Is(err, io.EOF) {
21+
return nil
22+
}
23+
24+
return err
25+
}
26+
27+
// Only write non-zero chunks
28+
if !bytes.Equal(chunk[:n], zeroedChunk[:n]) {
29+
if _, err := dst.WriteAt(chunk[:n], offset); err != nil {
30+
return err
31+
}
32+
}
33+
34+
offset += int64(n)
35+
}
36+
}

internal/sparseio/sparseio_test.go

+96
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
package sparseio_test
2+
3+
import (
4+
"github.com/cirruslabs/vetu/internal/sparseio"
5+
"github.com/dustin/go-humanize"
6+
"github.com/opencontainers/go-digest"
7+
"github.com/stretchr/testify/require"
8+
"math/rand"
9+
"os"
10+
"path/filepath"
11+
"testing"
12+
)
13+
14+
func TestCopySmall(t *testing.T) {
15+
// Create a small file
16+
originalFilePath := filepath.Join(t.TempDir(), "original.txt")
17+
err := os.WriteFile(originalFilePath, []byte("Hello, World!\n"), 0600)
18+
require.NoError(t, err)
19+
20+
// Sparsely copy it
21+
sparseFilePath := filepath.Join(t.TempDir(), "sparse.txt")
22+
copySparse(t, originalFilePath, sparseFilePath)
23+
24+
// Ensure that both files have identical contents
25+
require.Equal(t, fileDigest(t, originalFilePath), fileDigest(t, sparseFilePath))
26+
}
27+
28+
//nolint:gosec // we don't need cryptographically secure randomness here
29+
func TestCopyRandomized(t *testing.T) {
30+
// Create a sufficiently large file that contains
31+
// interleaved random-filled and zero-filled parts
32+
originalFilePath := filepath.Join(t.TempDir(), "original.bin")
33+
originalFile, err := os.Create(originalFilePath)
34+
require.NoError(t, err)
35+
36+
var wroteBytes int64
37+
38+
for wroteBytes < 1*humanize.GByte {
39+
chunk := randomlySizedChunk(1*humanize.KByte, 4*humanize.MByte)
40+
41+
// Randomize the contents of some chunks
42+
if rand.Intn(2) == 1 {
43+
//nolint:staticcheck // what's the alternative to the deprecated rand.Read() anyways?
44+
_, err = rand.Read(chunk)
45+
require.NoError(t, err)
46+
}
47+
48+
n, err := originalFile.Write(chunk)
49+
require.NoError(t, err)
50+
51+
wroteBytes += int64(n)
52+
}
53+
54+
require.NoError(t, originalFile.Close())
55+
56+
// Sparsely copy the original file
57+
sparseFilePath := filepath.Join(t.TempDir(), "sparse.bin")
58+
copySparse(t, originalFilePath, sparseFilePath)
59+
60+
// Ensure that the copied file has the same contents as the original file
61+
require.Equal(t, fileDigest(t, originalFilePath), fileDigest(t, sparseFilePath))
62+
}
63+
64+
func copySparse(t *testing.T, originalFilePath string, sparseFilePath string) {
65+
originalFile, err := os.Open(originalFilePath)
66+
require.NoError(t, err)
67+
68+
originalFileInfo, err := originalFile.Stat()
69+
require.NoError(t, err)
70+
71+
sparseFile, err := os.Create(sparseFilePath)
72+
require.NoError(t, err)
73+
74+
require.NoError(t, sparseFile.Truncate(originalFileInfo.Size()))
75+
require.NoError(t, sparseio.Copy(sparseFile, originalFile))
76+
77+
require.NoError(t, originalFile.Close())
78+
require.NoError(t, sparseFile.Close())
79+
}
80+
81+
//nolint:gosec // we don't need cryptographically secure randomness here
82+
func randomlySizedChunk(minBytes int, maxBytes int) []byte {
83+
return make([]byte, rand.Intn(maxBytes-minBytes+1)+minBytes)
84+
}
85+
86+
func fileDigest(t *testing.T, path string) digest.Digest {
87+
file, err := os.Open(path)
88+
require.NoError(t, err)
89+
90+
digest, err := digest.FromReader(file)
91+
require.NoError(t, err)
92+
93+
require.NoError(t, file.Close())
94+
95+
return digest
96+
}

internal/storage/temporary/temporary.go

+47-5
Original file line numberDiff line numberDiff line change
@@ -2,26 +2,68 @@ package temporary
22

33
import (
44
"github.com/cirruslabs/vetu/internal/homedir"
5+
"github.com/cirruslabs/vetu/internal/sparseio"
56
"github.com/cirruslabs/vetu/internal/vmdirectory"
67
"github.com/google/uuid"
7-
cp "github.com/otiai10/copy"
88
"os"
99
"path/filepath"
1010
)
1111

12-
func AtomicallyCopyThrough(src string, dest string) error {
12+
func AtomicallyCopyThrough(srcDir string, dstDir string) error {
1313
baseDir, err := initialize()
1414
if err != nil {
1515
return err
1616
}
1717

18-
copyThroughPath := filepath.Join(baseDir, uuid.NewString())
18+
// Create an intermediate directory that we'll later
19+
// os.Rename() into dstDir to achieve the atomicity
20+
intermediateDir := filepath.Join(baseDir, uuid.NewString())
1921

20-
if err := cp.Copy(src, copyThroughPath); err != nil {
22+
if err := os.Mkdir(intermediateDir, 0755); err != nil {
2123
return err
2224
}
2325

24-
return os.Rename(copyThroughPath, dest)
26+
// Copy the files from the source directory
27+
// to the intermediate directory
28+
dirEntries, err := os.ReadDir(srcDir)
29+
if err != nil {
30+
return err
31+
}
32+
33+
for _, dirEntry := range dirEntries {
34+
srcFile, err := os.Open(filepath.Join(srcDir, dirEntry.Name()))
35+
if err != nil {
36+
return err
37+
}
38+
39+
srcFileInfo, err := srcFile.Stat()
40+
if err != nil {
41+
return err
42+
}
43+
44+
dstFile, err := os.Create(filepath.Join(intermediateDir, dirEntry.Name()))
45+
if err != nil {
46+
return err
47+
}
48+
49+
if err := dstFile.Truncate(srcFileInfo.Size()); err != nil {
50+
return err
51+
}
52+
53+
if err := sparseio.Copy(dstFile, srcFile); err != nil {
54+
return err
55+
}
56+
57+
if err := srcFile.Close(); err != nil {
58+
return err
59+
}
60+
61+
if err := dstFile.Close(); err != nil {
62+
return err
63+
}
64+
}
65+
66+
return os.Rename(intermediateDir, dstDir)
2567
}
2668

2769
func Create() (*vmdirectory.VMDirectory, error) {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
package temporary_test
2+
3+
import (
4+
cryptorand "crypto/rand"
5+
"github.com/cirruslabs/vetu/internal/storage/temporary"
6+
"github.com/dustin/go-humanize"
7+
"github.com/opencontainers/go-digest"
8+
"github.com/stretchr/testify/require"
9+
"os"
10+
"path/filepath"
11+
"testing"
12+
)
13+
14+
func TestAtomicallyCopyThrough(t *testing.T) {
15+
t.Setenv("VETU_HOME", filepath.Join(t.TempDir(), ".vetu"))
16+
17+
tmpDir := t.TempDir()
18+
19+
// Create a source directory
20+
srcDir := filepath.Join(tmpDir, "src")
21+
require.NoError(t, os.Mkdir(srcDir, 0700))
22+
23+
// Add a small-sized text file to it
24+
err := os.WriteFile(filepath.Join(srcDir, "text.txt"), []byte("Hello, World!\n"), 0600)
25+
require.NoError(t, err)
26+
27+
// Add a medium-sized binary file to it
28+
buf := make([]byte, 64*humanize.MByte)
29+
_, err = cryptorand.Read(buf)
30+
require.NoError(t, err)
31+
32+
err = os.WriteFile(filepath.Join(srcDir, "binary.bin"), buf, 0600)
33+
require.NoError(t, err)
34+
35+
// Copy source directory contents to destination directory
36+
dstDir := filepath.Join(tmpDir, "dst")
37+
require.NoError(t, temporary.AtomicallyCopyThrough(srcDir, dstDir))
38+
39+
// Ensure that the files copied are identical
40+
// to the ones in the source directory
41+
require.Equal(t, fileDigest(t, filepath.Join(dstDir, "text.txt")), digest.FromString("Hello, World!\n"))
42+
require.Equal(t, fileDigest(t, filepath.Join(dstDir, "binary.bin")), digest.FromBytes(buf))
43+
}
44+
45+
func fileDigest(t *testing.T, path string) digest.Digest {
46+
file, err := os.Open(path)
47+
require.NoError(t, err)
48+
49+
digest, err := digest.FromReader(file)
50+
require.NoError(t, err)
51+
52+
require.NoError(t, file.Close())
53+
54+
return digest
55+
}

0 commit comments

Comments
 (0)