diff --git a/.gitignore b/.gitignore index 95ab7b95..d802af87 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,9 @@ /go.work /go.work.sum +# tools +.mise.toml + # default data directories /vunnel /bin diff --git a/cmd/grype-db/cli/commands/cache_restore.go b/cmd/grype-db/cli/commands/cache_restore.go index 318fcfe9..b69456fd 100644 --- a/cmd/grype-db/cli/commands/cache_restore.go +++ b/cmd/grype-db/cli/commands/cache_restore.go @@ -11,6 +11,7 @@ import ( "strings" "github.com/scylladb/go-set/strset" + "github.com/spf13/afero" "github.com/spf13/cobra" "github.com/spf13/pflag" "github.com/spf13/viper" @@ -200,6 +201,18 @@ func extractTarGz(reader io.Reader, selectedProviders *strset.Set) error { tr := tar.NewReader(gr) + rootPath, err := os.Getwd() + if err != nil { + return fmt.Errorf("failed to get current working directory: %w", err) + } + + rootPath, err = filepath.Abs(rootPath) + if err != nil { + return fmt.Errorf("failed to get absolute path: %w", err) + } + + var restoredAny bool + fs := afero.NewOsFs() for { header, err := tr.Next() @@ -217,36 +230,131 @@ func extractTarGz(reader io.Reader, selectedProviders *strset.Set) error { continue } - log.WithFields("path", header.Name).Trace("extracting file") + restoredAny = true - switch header.Typeflag { - case tar.TypeDir: - if err := os.Mkdir(header.Name, 0755); err != nil { - return fmt.Errorf("failed to create directory: %w", err) - } - case tar.TypeReg: - parentPath := filepath.Dir(header.Name) - if parentPath != "" { - if err := os.MkdirAll(parentPath, 0755); err != nil { - return fmt.Errorf("failed to create parent directory %q for file %q: %w", parentPath, header.Name, err) - } - } + if err := processTarHeader(fs, rootPath, header, tr); err != nil { + return err + } + } - outFile, err := os.Create(header.Name) - if err != nil { - return fmt.Errorf("failed to create file: %w", err) - } - if err := safeCopy(outFile, tr); err != nil { - return fmt.Errorf("failed to copy file: %w", err) - } - if err := outFile.Close(); err != nil { - return fmt.Errorf("failed to close file: %w", err) - } + if !restoredAny { + return fmt.Errorf("no provider data was restored") + } + return nil +} + +func processTarHeader(fs afero.Fs, rootPath string, header *tar.Header, reader io.Reader) error { + // clean the path to avoid traversal (removes "..", ".", etc.) + cleanedPath := cleanPathRelativeToRoot(rootPath, header.Name) + + if err := detectPathTraversal(rootPath, cleanedPath); err != nil { + return err + } + + log.WithFields("path", cleanedPath).Trace("extracting file") + + switch header.Typeflag { + case tar.TypeDir: + if err := fs.Mkdir(cleanedPath, 0755); err != nil { + return fmt.Errorf("failed to create directory: %w", err) + } + case tar.TypeSymlink: + if err := handleSymlink(fs, rootPath, cleanedPath, header.Linkname); err != nil { + return fmt.Errorf("failed to create symlink: %w", err) + } + case tar.TypeReg: + if err := handleFile(fs, cleanedPath, reader); err != nil { + return fmt.Errorf("failed to handle file: %w", err) + } + default: + log.WithFields("name", cleanedPath, "type", header.Typeflag).Warn("unknown file type in backup archive") + } + return nil +} + +func handleFile(fs afero.Fs, cleanedPath string, reader io.Reader) error { + if cleanedPath == "" { + return fmt.Errorf("empty path") + } + + parentPath := filepath.Dir(cleanedPath) + if parentPath != "" { + if err := fs.MkdirAll(parentPath, 0755); err != nil { + return fmt.Errorf("failed to create parent directory %q for file %q: %w", parentPath, cleanedPath, err) + } + } - default: - log.WithFields("name", header.Name, "type", header.Typeflag).Warn("unknown file type in backup archive") + outFile, err := fs.Create(cleanedPath) + if err != nil { + return fmt.Errorf("failed to create file: %w", err) + } + if err := safeCopy(outFile, reader); err != nil { + return fmt.Errorf("failed to copy file: %w", err) + } + if err := outFile.Close(); err != nil { + return fmt.Errorf("failed to close file: %w", err) + } + return nil +} + +func handleSymlink(fs afero.Fs, rootPath, cleanedPath, linkName string) error { + if err := detectLinkTraversal(rootPath, cleanedPath, linkName); err != nil { + return err + } + + linkReader, ok := fs.(afero.LinkReader) + if !ok { + return afero.ErrNoReadlink + } + + // check if the symlink already exists and is pointing to the correct target + if linkTarget, err := linkReader.ReadlinkIfPossible(cleanedPath); err == nil { + if linkTarget == linkName { + return nil + } + + if err := fs.Remove(cleanedPath); err != nil { + return fmt.Errorf("failed to remove existing symlink: %w", err) } } + + linker, ok := fs.(afero.Linker) + if !ok { + return afero.ErrNoSymlink + } + + if err := linker.SymlinkIfPossible(linkName, cleanedPath); err != nil { + return fmt.Errorf("failed to create symlink: %w", err) + } + return nil +} + +func cleanPathRelativeToRoot(rootPath, path string) string { + return filepath.Join(rootPath, filepath.Clean(path)) +} + +func detectLinkTraversal(rootPath, cleanedPath, linkTarget string) error { + linkTarget = filepath.Clean(linkTarget) + if filepath.IsAbs(linkTarget) { + return detectPathTraversal(rootPath, linkTarget) + } + + linkTarget = filepath.Join(filepath.Dir(cleanedPath), linkTarget) + + if !strings.HasPrefix(linkTarget, rootPath) { + return fmt.Errorf("symlink points outside root: %s -> %s", cleanedPath, linkTarget) + } + return nil +} + +func detectPathTraversal(rootPath, cleanedPath string) error { + if cleanedPath == "" { + return nil + } + + if !strings.HasPrefix(cleanedPath, rootPath) { + return fmt.Errorf("path traversal detected: %s", cleanedPath) + } return nil } diff --git a/cmd/grype-db/cli/commands/cache_restore_test.go b/cmd/grype-db/cli/commands/cache_restore_test.go new file mode 100644 index 00000000..c2d56906 --- /dev/null +++ b/cmd/grype-db/cli/commands/cache_restore_test.go @@ -0,0 +1,242 @@ +package commands + +import ( + "bytes" + "path/filepath" + "testing" + + "github.com/spf13/afero" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestDetectLinkTraversal(t *testing.T) { + rootPath := "/safe/root" + + tests := []struct { + name string + cleanedPath string + linkTarget string + wantErr bool + }{ + { + name: "valid symlink inside root", + cleanedPath: "/safe/root/some/file", + linkTarget: "target/file", + wantErr: false, + }, + { + name: "symlink outside root", + cleanedPath: "/safe/root/some/file", + linkTarget: "../../outside/file", + wantErr: true, + }, + { + name: "absolute symlink outside root", + cleanedPath: "/safe/root/some/file", + linkTarget: "/other/path/file", + wantErr: true, + }, + { + name: "valid symlink to a deeper path", + cleanedPath: "/safe/root/some/file", + linkTarget: "another/file", + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := detectLinkTraversal(rootPath, tt.cleanedPath, tt.linkTarget) + if tt.wantErr { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + }) + } +} + +func TestDetectPathTraversal(t *testing.T) { + rootPath := "/safe/root" + + tests := []struct { + name string + cleanedPath string + wantErr bool + }{ + { + name: "valid path inside root", + cleanedPath: "/safe/root/some/file", + wantErr: false, + }, + { + name: "path outside root", + cleanedPath: "/unsafe/root/some/file", + wantErr: true, + }, + { + name: "empty path", + cleanedPath: "", + wantErr: false, + }, + { + name: "root path itself", + cleanedPath: "/safe/root", + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := detectPathTraversal(rootPath, tt.cleanedPath) + if tt.wantErr { + assert.Error(t, err) + assert.Contains(t, err.Error(), "path traversal detected") + } else { + assert.NoError(t, err) + } + }) + } +} + +func TestHandleFile(t *testing.T) { + fs := afero.NewMemMapFs() + + tests := []struct { + name string + path string + content string + wantErr require.ErrorAssertionFunc + verifyFunc func(t *testing.T, fs afero.Fs, path, content string) + }{ + { + name: "valid file creation", + path: "/testdir/file.txt", + content: "hello world", + verifyFunc: func(t *testing.T, fs afero.Fs, path, expected string) { + fileExists, err := afero.Exists(fs, path) + require.NoError(t, err) + assert.True(t, fileExists) + + fileContent, err := afero.ReadFile(fs, path) + require.NoError(t, err) + assert.Equal(t, expected, string(fileContent)) + }, + }, + { + name: "parent directory creation", + path: "/newdir/subdir/file.txt", + content: "content in nested directory", + verifyFunc: func(t *testing.T, fs afero.Fs, path, expected string) { + fileExists, err := afero.Exists(fs, path) + require.NoError(t, err) + assert.True(t, fileExists) + + fileContent, err := afero.ReadFile(fs, path) + require.NoError(t, err) + assert.Equal(t, expected, string(fileContent)) + + dirExists, err := afero.DirExists(fs, "/newdir/subdir") + require.NoError(t, err) + assert.True(t, dirExists) + }, + }, + { + name: "file creation failure", + path: "", + content: "should fail", + wantErr: require.Error, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + reader := bytes.NewReader([]byte(tt.content)) + + err := handleFile(fs, tt.path, reader) + tt.wantErr(t, err) + if tt.verifyFunc != nil { + tt.verifyFunc(t, fs, tt.path, tt.content) + } + }) + } +} + +func TestHandleSymlink(t *testing.T) { + + tests := []struct { + name string + headerName string + linkName string + setupFunc func(fs afero.Fs, rootPath, path, linkTarget string) error + wantErr require.ErrorAssertionFunc + }{ + { + name: "valid symlink creation", + headerName: "symlink", + linkName: "target/file", + }, + { + name: "symlink already exists and points to the correct target", + headerName: "symlink", + linkName: "target/file", + setupFunc: func(fs afero.Fs, rootPath, path, linkName string) error { + linker, ok := fs.(afero.Linker) + require.True(t, ok) + return linker.SymlinkIfPossible(linkName, filepath.Join(rootPath, path)) + }, + }, + { + name: "symlink exists and points to a different target", + headerName: "symlink", + linkName: "target/file", + setupFunc: func(fs afero.Fs, rootPath, path, linkName string) error { + linker, ok := fs.(afero.Linker) + require.True(t, ok) + return linker.SymlinkIfPossible("wrong/target", filepath.Join(rootPath, path)) + }, + }, + { + name: "detectLinkTraversal error", + headerName: "symlink", + linkName: "../../outside", + wantErr: require.Error, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + fs := afero.NewOsFs() + + rootPath := t.TempDir() + + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + if tt.setupFunc != nil { + err := tt.setupFunc(fs, rootPath, tt.headerName, tt.linkName) + require.NoError(t, err) + } + + cleanPath := cleanPathRelativeToRoot(rootPath, tt.headerName) + err := handleSymlink(fs, rootPath, cleanPath, tt.linkName) + tt.wantErr(t, err) + if err != nil { + return + } + + // check if symlink was created and points to the correct target + linkReader, ok := fs.(afero.LinkReader) + require.True(t, ok) + + linkTarget, err := linkReader.ReadlinkIfPossible(cleanPath) + require.NoError(t, err) + assert.Equal(t, tt.linkName, linkTarget) + + }) + } +} diff --git a/config/grype/acceptance.yaml b/config/grype/acceptance.yaml deleted file mode 100644 index 11e3ec0a..00000000 --- a/config/grype/acceptance.yaml +++ /dev/null @@ -1,2 +0,0 @@ -db: - validate-by-hash-on-start: true diff --git a/internal/tarutil/file_entry.go b/internal/tarutil/file_entry.go index 628cacd7..c7778b69 100644 --- a/internal/tarutil/file_entry.go +++ b/internal/tarutil/file_entry.go @@ -1,7 +1,6 @@ package tarutil import ( - "archive/tar" "fmt" "io" "os" @@ -28,34 +27,11 @@ func NewEntryFromFilePaths(paths ...string) []Entry { } func (t FileEntry) writeEntry(tw lowLevelWriter) error { - filePath := t.Path - f, err := os.Open(filePath) + fi, err := os.Lstat(t.Path) if err != nil { - return fmt.Errorf("unable to open file (%s): %w", filePath, err) + return fmt.Errorf("unable to stat file %q: %w", t.Path, err) } - defer f.Close() - - stat, err := f.Stat() - if err != nil { - return fmt.Errorf("unable to get stat for file (%s): %w", filePath, err) - } - - header := &tar.Header{ - Name: filePath, - Size: stat.Size(), - Mode: int64(stat.Mode()), - ModTime: stat.ModTime(), - } - - err = tw.WriteHeader(header) - if err != nil { - return fmt.Errorf("unable to write header for file (%s): %w", filePath, err) - } - - _, err = io.Copy(tw, f) - if err != nil { - return fmt.Errorf("unable to copy data to the tar (file='%s'): %w", filePath, err) - } - - return nil + return writeEntry(tw, t.Path, fi, func() (io.Reader, error) { + return os.Open(t.Path) + }) } diff --git a/internal/tarutil/file_entry_test.go b/internal/tarutil/file_entry_test.go index 9598b90f..0ce7b739 100644 --- a/internal/tarutil/file_entry_test.go +++ b/internal/tarutil/file_entry_test.go @@ -45,7 +45,6 @@ func (m *mockTarWriter) Write(b []byte) (int, error) { func TestFileEntry_writeEntry(t *testing.T) { testStr := "hello world" - tests := []struct { name string file func(t *testing.T) string diff --git a/internal/tarutil/reader_entry.go b/internal/tarutil/reader_entry.go index 09dda2c4..eda3326f 100644 --- a/internal/tarutil/reader_entry.go +++ b/internal/tarutil/reader_entry.go @@ -27,33 +27,64 @@ func NewEntryFromBytes(by []byte, filename string, fileInfo os.FileInfo) Entry { func (t ReaderEntry) writeEntry(tw lowLevelWriter) error { log.WithFields("path", t.Filename).Trace("adding stream to archive") + return writeEntry(tw, t.Filename, t.FileInfo, func() (io.Reader, error) { + return t.Reader, nil + }) +} - header, err := tar.FileInfoHeader(t.FileInfo, t.FileInfo.Name()) +func writeEntry(tw lowLevelWriter, filename string, fileInfo os.FileInfo, opener func() (io.Reader, error)) error { + header, err := tar.FileInfoHeader(fileInfo, "") if err != nil { return err } - contents, err := io.ReadAll(t.Reader) - if err != nil { - return err - } + header.Name = filename + switch fileInfo.Mode() & os.ModeType { + case os.ModeDir: + header.Size = 0 + err = tw.WriteHeader(header) + if err != nil { + return err + } + return nil - header.Name = t.Filename - header.Size = int64(len(contents)) - err = tw.WriteHeader(header) - if err != nil { - return err - } + case os.ModeSymlink: + linkTarget, err := os.Readlink(filename) + if err != nil { + return err + } + header.Linkname = linkTarget + header.Size = 0 + err = tw.WriteHeader(header) + if err != nil { + return err + } + return nil - _, err = tw.Write(contents) - if err != nil { - return err - } + default: + reader, err := opener() + if err != nil { + return err + } - // note: this will ensure that the tar entry is properly aligned in the tar archive (padding with zeros) - err = tw.Flush() - if err != nil { - return err + contents, err := io.ReadAll(reader) + if err != nil { + return err + } + header.Size = int64(len(contents)) + + if err := tw.WriteHeader(header); err != nil { + return err + } + + if _, err := tw.Write(contents); err != nil { + return err + } + + // ensure proper alignment in the tar archive (padding with zeros) + if err := tw.Flush(); err != nil { + return err + } } return nil diff --git a/internal/tarutil/reader_entry_test.go b/internal/tarutil/reader_entry_test.go index 6b9d6425..56b7bcb2 100644 --- a/internal/tarutil/reader_entry_test.go +++ b/internal/tarutil/reader_entry_test.go @@ -1,11 +1,14 @@ package tarutil import ( + "archive/tar" "io/fs" "os" + "path/filepath" "testing" "time" + "github.com/spf13/afero" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -48,20 +51,34 @@ func (m mockFileInfo) Sys() any { } func TestReaderEntry_writeEntry(t *testing.T) { + d := t.TempDir() + file := filepath.Join(d, "file.txt") + require.NoError(t, os.WriteFile(file, []byte("hello world"), 0644)) + + link := filepath.Join(d, "link") + require.NoError(t, os.Symlink(file, link)) + + dir := filepath.Join(d, "dir") + require.NoError(t, os.Mkdir(dir, 0755)) tests := []struct { - name string - bytes []byte - filename string - fileinfo os.FileInfo - wantErr require.ErrorAssertionFunc + name string + typeFlag byte + bytes []byte + filename string + fileinfo os.FileInfo + wantErr require.ErrorAssertionFunc + expectFlush bool + fs afero.Fs }{ { - name: "valid file", - bytes: []byte("hello world"), - filename: "file.txt", + name: "valid file", + typeFlag: tar.TypeReg, + bytes: []byte("hello world"), + filename: file, + expectFlush: true, fileinfo: &mockFileInfo{ - name: "SOMEWHEREELSE/PLACES.txt", + name: file, size: 11, mode: 0644, modTime: time.Now(), @@ -69,6 +86,34 @@ func TestReaderEntry_writeEntry(t *testing.T) { sys: nil, }, }, + { + name: "symlink", + typeFlag: tar.TypeSymlink, + bytes: nil, + filename: link, + expectFlush: false, + fileinfo: &mockFileInfo{ + name: link, + size: 0, + mode: os.ModeSymlink, + modTime: time.Now(), + isDir: false, + }, + }, + { + name: "directory", + typeFlag: tar.TypeDir, + bytes: nil, + filename: dir, + expectFlush: false, + fileinfo: &mockFileInfo{ + name: dir, + size: 0, + mode: os.ModeDir, + modTime: time.Now(), + isDir: true, + }, + }, } for _, tt := range tests { @@ -88,10 +133,11 @@ func TestReaderEntry_writeEntry(t *testing.T) { assert.NoError(t, err) require.Len(t, tw.headers, 1) + assert.Equal(t, tt.typeFlag, tw.headers[0].Typeflag) assert.Equal(t, tt.filename, tw.headers[0].Name) assert.Equal(t, int64(len(tt.bytes)), tw.headers[0].Size) assert.Equal(t, string(tt.bytes), tw.buffers[0].String()) - assert.True(t, tw.flushCalled) + assert.Equal(t, tt.expectFlush, tw.flushCalled) }) } } diff --git a/manager/src/grype_db_manager/cli/db.py b/manager/src/grype_db_manager/cli/db.py index af343c0c..55c565b4 100644 --- a/manager/src/grype_db_manager/cli/db.py +++ b/manager/src/grype_db_manager/cli/db.py @@ -153,12 +153,10 @@ def validate_db( label="custom-db", name="grype", version=grype_version + f"+import-db={db_info.archive_path}", - profile="acceptance", ), ycfg.Tool( name="grype", version=grype_version, - # profile="acceptance", # TODO: enable after current db is fixed ), ], ), @@ -166,18 +164,7 @@ def validate_db( yardstick_cfg = ycfg.Application( profiles=ycfg.Profiles( - data={ - "grype": { - "acceptance": { - "config_path": "config/grype/acceptance.yaml", - }, - }, - "grype[custom-db]": { - "acceptance": { - "config_path": "config/grype/acceptance.yaml", - }, - }, - }, + data={}, ), store_root=cfg.data.yardstick_root, default_max_year=cfg.validate.default_max_year, @@ -185,6 +172,8 @@ def validate_db( ) for r in result_sets: + # workaround for test setup issues + os.environ["GRYPE_DB_VALIDATE_BY_HASH_ON_START"] = "true" db.capture_results( cfg=yardstick_cfg, db_uuid=db_uuid, diff --git a/manager/src/grype_db_manager/db/validation.py b/manager/src/grype_db_manager/db/validation.py index 2db63069..743e51d2 100644 --- a/manager/src/grype_db_manager/db/validation.py +++ b/manager/src/grype_db_manager/db/validation.py @@ -48,12 +48,15 @@ def capture_results(cfg: ycfg.Application, db_uuid: str, result_set: str, root_d db_info = dbm.get_db_info(db_uuid) request_images = cfg.result_sets[result_set].images() - is_stale = _is_result_set_stale( - request_images=request_images, - result_set=result_set, - db_info=db_info, - yardstick_root_dir=cfg.store_root, - ) + + is_stale = True + if not recapture: + is_stale = _is_result_set_stale( + request_images=request_images, + result_set=result_set, + db_info=db_info, + yardstick_root_dir=cfg.store_root, + ) if is_stale or recapture: capture.result_set( diff --git a/manager/src/grype_db_manager/grypedb.py b/manager/src/grype_db_manager/grypedb.py index 964aba26..d5aecab2 100644 --- a/manager/src/grype_db_manager/grypedb.py +++ b/manager/src/grype_db_manager/grypedb.py @@ -232,6 +232,7 @@ "wolfi:rolling", ] + def expected_namespaces(schema_version: int) -> list[str]: if schema_version <= 3: return v3_expected_namespaces @@ -239,6 +240,7 @@ def expected_namespaces(schema_version: int) -> list[str]: return v4_expected_namespaces return v4_expected_namespaces + v5_additional_namespaces + @dataclasses.dataclass class DBInfo: uuid: str diff --git a/manager/tests/cli/.gitignore b/manager/tests/cli/.gitignore index 41589d42..51878389 100644 --- a/manager/tests/cli/.gitignore +++ b/manager/tests/cli/.gitignore @@ -1,4 +1,4 @@ -cli-test-data +cli-test-data* .yardstick .grype-db-manager grype-db-cache.tar.gz diff --git a/manager/tests/cli/workflow-2-validate-db.sh b/manager/tests/cli/workflow-2-validate-db.sh index 793aec22..4fe8cbc0 100755 --- a/manager/tests/cli/workflow-2-validate-db.sh +++ b/manager/tests/cli/workflow-2-validate-db.sh @@ -20,6 +20,9 @@ header "Case 1: fail DB validation (too many unknowns)" make clean-yardstick-labels +# workaround for go1.23+ looking into parent dirs when building go modules in subdirs +export GOWORK=off + run_expect_fail grype-db-manager db validate $DB_ID -vvv --skip-namespace-check --recapture assert_contains $(last_stdout_file) "current indeterminate matches % is greater than 10%"