From 9fdc9990a413603a8a72b906c089ee1ba34fd743 Mon Sep 17 00:00:00 2001 From: Keith Zantow Date: Wed, 18 Sep 2024 17:45:08 -0400 Subject: [PATCH] chore: move cache to external library, add API-level configuration Signed-off-by: Keith Zantow --- cmd/syft/internal/options/cache.go | 15 ++- cmd/syft/internal/options/cache_test.go | 16 +-- go.mod | 1 + go.sum | 2 + internal/cache/README.md | 51 -------- internal/cache/bypass.go | 24 ---- internal/cache/bypass_test.go | 18 --- internal/cache/cache.go | 49 ------- internal/cache/cache_test.go | 32 ----- internal/cache/error_resolver.go | 40 ------ internal/cache/error_resolver_test.go | 47 ------- internal/cache/filesystem.go | 120 ----------------- internal/cache/filesystem_test.go | 106 --------------- internal/cache/hash_type.go | 71 ---------- internal/cache/hash_type_test.go | 123 ------------------ internal/cache/memory.go | 19 --- internal/cache/memory_test.go | 37 ------ internal/cache/resolver.go | 83 ------------ internal/cache/resolver_test.go | 92 ------------- internal/cachemanager/cache.go | 28 ++++ internal/cachemanager/cache_test.go | 38 ++++++ syft/lib.go | 7 + syft/lib_test.go | 13 ++ syft/pkg/cataloger/golang/licenses.go | 5 +- syft/pkg/cataloger/java/maven_resolver.go | 5 +- syft/source/directorysource/cache_excludes.go | 4 +- 26 files changed, 113 insertions(+), 933 deletions(-) delete mode 100644 internal/cache/README.md delete mode 100644 internal/cache/bypass.go delete mode 100644 internal/cache/bypass_test.go delete mode 100644 internal/cache/cache.go delete mode 100644 internal/cache/cache_test.go delete mode 100644 internal/cache/error_resolver.go delete mode 100644 internal/cache/error_resolver_test.go delete mode 100644 internal/cache/filesystem.go delete mode 100644 internal/cache/filesystem_test.go delete mode 100644 internal/cache/hash_type.go delete mode 100644 internal/cache/hash_type_test.go delete mode 100644 internal/cache/memory.go delete mode 100644 internal/cache/memory_test.go delete mode 100644 internal/cache/resolver.go delete mode 100644 internal/cache/resolver_test.go create mode 100644 internal/cachemanager/cache.go create mode 100644 internal/cachemanager/cache_test.go diff --git a/cmd/syft/internal/options/cache.go b/cmd/syft/internal/options/cache.go index a8ee1c55f0e..943a8154934 100644 --- a/cmd/syft/internal/options/cache.go +++ b/cmd/syft/internal/options/cache.go @@ -13,8 +13,9 @@ import ( "github.com/mitchellh/go-homedir" "github.com/anchore/clio" - "github.com/anchore/syft/internal/cache" + "github.com/anchore/go-cache" "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft" ) // Cache provides configuration for the Syft caching behavior @@ -36,25 +37,25 @@ func (c *Cache) PostLoad() error { } // if TTL is <= 0, disable caching entirely if ttl <= 0 { - cache.SetManager(nil) + syft.SetCacheManager(nil) return nil } // if dir == "" but we have a TTL, use an in-memory cache if c.Dir == "" { - cache.SetManager(cache.NewInMemory(ttl)) + syft.SetCacheManager(cache.NewInMemory(ttl)) return nil } dir, err := homedir.Expand(c.Dir) if err != nil { log.Warnf("unable to expand cache directory %s: %v", c.Dir, err) - cache.SetManager(cache.NewInMemory(ttl)) + syft.SetCacheManager(cache.NewInMemory(ttl)) } else { - m, err := cache.NewFromDir(dir, ttl) + m, err := cache.NewFromDir(log.Get(), dir, ttl) if err != nil { log.Warnf("unable to get filesystem cache at %s: %v", c.Dir, err) - cache.SetManager(cache.NewInMemory(ttl)) + syft.SetCacheManager(cache.NewInMemory(ttl)) } else { - cache.SetManager(m) + syft.SetCacheManager(m) } } return nil diff --git a/cmd/syft/internal/options/cache_test.go b/cmd/syft/internal/options/cache_test.go index e4d527d3af2..29e7722c8c5 100644 --- a/cmd/syft/internal/options/cache_test.go +++ b/cmd/syft/internal/options/cache_test.go @@ -13,7 +13,7 @@ import ( "github.com/stretchr/testify/require" "github.com/anchore/syft/internal" - "github.com/anchore/syft/internal/cache" + "github.com/anchore/syft/internal/cachemanager" ) func Test_defaultDir(t *testing.T) { @@ -109,7 +109,7 @@ func Test_cacheOptions(t *testing.T) { TTL: "0", }, test: func(t *testing.T) { - c := cache.GetManager().GetCache("test-disable-1", "v-disable-1") + c := cachemanager.Get().GetCache("test-disable-1", "v-disable-1") err := c.Write("key-disable-1", strings.NewReader("some-value-disable-1")) require.NoError(t, err) rdr, err := c.Read("key-disable-1") @@ -124,7 +124,7 @@ func Test_cacheOptions(t *testing.T) { TTL: "0s", }, test: func(t *testing.T) { - c := cache.GetManager().GetCache("test-disable-2", "v-disable-2") + c := cachemanager.Get().GetCache("test-disable-2", "v-disable-2") err := c.Write("key-disable-2", strings.NewReader("some-value-disable-2")) require.NoError(t, err) rdr, err := c.Read("key-disable-2") @@ -140,7 +140,7 @@ func Test_cacheOptions(t *testing.T) { TTL: "0d", }, test: func(t *testing.T) { - c := cache.GetManager().GetCache("test-disable-3", "v-disable-3") + c := cachemanager.Get().GetCache("test-disable-3", "v-disable-3") err := c.Write("key-disable-3", strings.NewReader("some-value-disable-3")) require.NoError(t, err) rdr, err := c.Read("key-disable-3") @@ -155,7 +155,7 @@ func Test_cacheOptions(t *testing.T) { TTL: "10m", }, test: func(t *testing.T) { - c := cache.GetManager().GetCache("test-mem", "v-mem") + c := cachemanager.Get().GetCache("test-mem", "v-mem") err := c.Write("key-mem", strings.NewReader("some-value-mem")) require.NoError(t, err) rdr, err := c.Read("key-mem") @@ -175,7 +175,7 @@ func Test_cacheOptions(t *testing.T) { TTL: "10m", }, test: func(t *testing.T) { - c := cache.GetManager().GetCache("test-disk", "v-disk") + c := cachemanager.Get().GetCache("test-disk", "v-disk") err := c.Write("key-disk", strings.NewReader("some-value-disk")) require.NoError(t, err) rdr, err := c.Read("key-disk") @@ -191,8 +191,8 @@ func Test_cacheOptions(t *testing.T) { } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - original := cache.GetManager() - defer cache.SetManager(original) + original := cachemanager.Get() + defer cachemanager.Set(original) err := test.opts.PostLoad() require.NoError(t, err) diff --git a/go.mod b/go.mod index ebf85066696..51da1bea964 100644 --- a/go.mod +++ b/go.mod @@ -90,6 +90,7 @@ require ( github.com/BurntSushi/toml v1.4.0 github.com/OneOfOne/xxhash v1.2.8 github.com/adrg/xdg v0.5.0 + github.com/anchore/go-cache v0.0.0-20240918212921-3fb02c7c559f github.com/magiconair/properties v1.8.7 golang.org/x/exp v0.0.0-20231108232855-2478ac86f678 ) diff --git a/go.sum b/go.sum index 6ccd3e6f3bd..9531515d3e1 100644 --- a/go.sum +++ b/go.sum @@ -101,6 +101,8 @@ github.com/anchore/clio v0.0.0-20240522144804-d81e109008aa h1:pwlAn4O9SBUnlgfa69 github.com/anchore/clio v0.0.0-20240522144804-d81e109008aa/go.mod h1:nD3H5uIvjxlfmakOBgtyFQbk5Zjp3l538kxfpHPslzI= github.com/anchore/fangs v0.0.0-20240903175602-e716ef12c23d h1:ZD4wdCBgJJzJybjTUIEiiupLF7B9H3WLuBTjspBO2Mc= github.com/anchore/fangs v0.0.0-20240903175602-e716ef12c23d/go.mod h1:Xh4ObY3fmoMzOEVXwDtS1uK44JC7+nRD0n29/1KYFYg= +github.com/anchore/go-cache v0.0.0-20240918212921-3fb02c7c559f h1:Hgh7nFHNzYcHSt/k93SpisP8ArlaZxGbq4/U7fes+pE= +github.com/anchore/go-cache v0.0.0-20240918212921-3fb02c7c559f/go.mod h1:sX0O2JkumwyIZaAhNVG7RoQXZ1yF+J8sLyPpr8WouXI= github.com/anchore/go-collections v0.0.0-20240216171411-9321230ce537 h1:GjNGuwK5jWjJMyVppBjYS54eOiiSNv4Ba869k4wh72Q= github.com/anchore/go-collections v0.0.0-20240216171411-9321230ce537/go.mod h1:1aiktV46ATCkuVg0O573ZrH56BUawTECPETbZyBcqT8= github.com/anchore/go-logger v0.0.0-20230725134548-c21dafa1ec5a h1:nJ2G8zWKASyVClGVgG7sfM5mwoZlZ2zYpIzN2OhjWkw= diff --git a/internal/cache/README.md b/internal/cache/README.md deleted file mode 100644 index b24a6f93408..00000000000 --- a/internal/cache/README.md +++ /dev/null @@ -1,51 +0,0 @@ -# Caching - -All caches are created from a global `manager`. By default this is a `bypassedCache`, which performs no caching. -One benefit of this is that tests don't need to worry about caching causing issues unless they explicitly need -to test the cache and can opt-in using the `cache.TestCache(t)` helper. - -Syft sets a `filesystemCache` when the [cache options](../../cmd/syft/internal/options/cache.go) are loaded. - -When using the `filesystemCache` all items are stored on disk under a root directory, generally in the form of: -``` -///path/to/data -``` - -# Using the cache - -The easiest and preferred method to use the cache is a `cache.Resolver`, which automatically creates a `` -based on the _structure_ of the provided type. -If the structure changes in any way it will end up with a new version key and all will re populate this new key, -ignoring cached values from older, different versions. -The resolver will store items using the `json` package to serialize/deserialize values, so to save space -it is encouraged to use `omitempty`. For example: - -```go -type myCacheItem struct { - Name string `json:"name",omitempty` -} -``` - -It is possible to use core types such as `pkg.Package` as long as they support the standard `json` serialization, -but this is discouraged in order to decouple changes to them from affecting the information stored in the cache. - -To get a cache for this type: -```go -resolver := cache.GetResolver[myCacheItem]("myCacheName", "v1") -``` - -Using the `resolver` is a single call, which manages checking for items in the cache, expiry times, -and if not found invoking the callback to populate the cache and return a value: -```go -data := resolver.Resolve("some/cache/key", func() (myCacheItem, error) { - // do things to return a myCacheItem or error -}) -``` - -If it is common that checking for an item will result in errors, and you do not want to re-run the resolve function -when errors are encountered, instead of using `GetResolver`, you can use `GetResolverCachingErrors`, which is useful -for things such as resolving artifacts over a network, where a number of them will not be resolved, and you do not want -to continue to have the expense of running the network resolution. This should be used when it is acceptable a network -outage and cached errors is an acceptable risk. - -An example can be seen in the [golang cataloger](../../syft/pkg/cataloger/golang/licenses.go) fetching remote licenses. diff --git a/internal/cache/bypass.go b/internal/cache/bypass.go deleted file mode 100644 index 4d32f1a613d..00000000000 --- a/internal/cache/bypass.go +++ /dev/null @@ -1,24 +0,0 @@ -package cache - -import "io" - -type bypassedCache struct{} - -func (b *bypassedCache) Read(_ string) (ReaderAtCloser, error) { - return nil, errNotFound -} - -func (b *bypassedCache) Write(_ string, contents io.Reader) error { - if closer, ok := contents.(io.Closer); ok { - _ = closer.Close() - } - return nil -} - -func (b *bypassedCache) GetCache(_, _ string) Cache { - return b -} - -func (b *bypassedCache) RootDirs() []string { - return nil -} diff --git a/internal/cache/bypass_test.go b/internal/cache/bypass_test.go deleted file mode 100644 index 073c0ef36a7..00000000000 --- a/internal/cache/bypass_test.go +++ /dev/null @@ -1,18 +0,0 @@ -package cache - -import ( - "strings" - "testing" - - "github.com/stretchr/testify/require" -) - -func Test_bypassedCache(t *testing.T) { - m := bypassedCache{} - cache := m.GetCache("name", "version") - err := cache.Write("test", strings.NewReader("value")) - require.NoError(t, err) - rdr, err := cache.Read("test") - require.Nil(t, rdr) - require.ErrorIs(t, err, errNotFound) -} diff --git a/internal/cache/cache.go b/internal/cache/cache.go deleted file mode 100644 index 3ca2ece9865..00000000000 --- a/internal/cache/cache.go +++ /dev/null @@ -1,49 +0,0 @@ -package cache - -import ( - "io" -) - -// Manager is responsible for managing cache data and instantiating all caches -type Manager interface { - // GetCache returns a cache scoped to the given named, versioned data - GetCache(name, version string) Cache - - // RootDirs returns any root directories this cache manager uses - RootDirs() []string -} - -// ReaderAtCloser is an amalgamation of: io.Reader, io.ReaderAt, and io.Closer -type ReaderAtCloser interface { - io.Reader - io.ReaderAt - io.Closer -} - -// Cache is what the application interacts with to get and set cached data -type Cache interface { - // Read returns a reader for the cache value, if found and not expired - // or errors when unable to find / expired - Read(key string) (ReaderAtCloser, error) - - // Write writes the contents of the reader to the cache - // and closes it, if the reader implements io.Closer - Write(key string, contents io.Reader) error -} - -// GetManager returns the global cache manager, which is used to instantiate all caches -func GetManager() Manager { - return manager -} - -// SetManager sets the global cache manager, which is used to instantiate all caches. -// Setting this to nil disables caching. -func SetManager(m Manager) { - if m == nil { - manager = &bypassedCache{} - } else { - manager = m - } -} - -var manager Manager = &bypassedCache{} diff --git a/internal/cache/cache_test.go b/internal/cache/cache_test.go deleted file mode 100644 index 2fceda3af04..00000000000 --- a/internal/cache/cache_test.go +++ /dev/null @@ -1,32 +0,0 @@ -package cache - -import ( - "testing" - "time" - - "github.com/stretchr/testify/require" -) - -func Test_SetManager(t *testing.T) { - original := GetManager() - defer SetManager(original) - - SetManager(nil) - - require.NotNil(t, GetManager()) - require.IsType(t, &bypassedCache{}, GetManager()) - - SetManager(NewInMemory(0)) - - require.NotNil(t, GetManager()) - require.IsType(t, &bypassedCache{}, GetManager()) - - SetManager(NewInMemory(1 * time.Hour)) - - require.NotNil(t, GetManager()) - require.IsType(t, &filesystemCache{}, GetManager()) - - SetManager(nil) - require.NotNil(t, GetManager()) - require.IsType(t, &bypassedCache{}, GetManager()) -} diff --git a/internal/cache/error_resolver.go b/internal/cache/error_resolver.go deleted file mode 100644 index efbf915aa50..00000000000 --- a/internal/cache/error_resolver.go +++ /dev/null @@ -1,40 +0,0 @@ -package cache - -import "fmt" - -// GetResolverCachingErrors returns a Resolver that caches errors and will return them -// instead of continuing to call the provided resolve functions -func GetResolverCachingErrors[T any](name, version string) Resolver[T] { - return &errorResolver[T]{ - resolver: GetResolver[errResponse[T]](name, version), - } -} - -type errResponse[T any] struct { - Error string `json:"err,omitempty"` - Value T `json:"val,omitempty"` -} - -type errorResolver[T any] struct { - resolver Resolver[errResponse[T]] -} - -func (r *errorResolver[T]) Resolve(key string, resolver resolverFunc[T]) (T, error) { - v, err := r.resolver.Resolve(key, func() (errResponse[T], error) { - v, err := resolver() - out := errResponse[T]{ - Value: v, - } - if err != nil { - out.Error = err.Error() - } - return out, nil - }) - if err != nil { - return v.Value, err - } - if v.Error != "" { - return v.Value, fmt.Errorf("failed to resolve cache: %s", v.Error) - } - return v.Value, nil -} diff --git a/internal/cache/error_resolver_test.go b/internal/cache/error_resolver_test.go deleted file mode 100644 index b7216a536b8..00000000000 --- a/internal/cache/error_resolver_test.go +++ /dev/null @@ -1,47 +0,0 @@ -package cache - -import ( - "fmt" - "testing" - "time" - - "github.com/stretchr/testify/require" -) - -func Test_errorResolver(t *testing.T) { - original := GetManager() - defer SetManager(original) - SetManager(NewInMemory(time.Hour)) - - resolver := GetResolverCachingErrors[string]("theCache", "theVersion") - - resolveCount := 0 - resolveFunc := func() (string, error) { - resolveCount++ - return "theValue", nil - } - - val, err := resolver.Resolve("theKey", resolveFunc) - require.NoError(t, err) - require.Equal(t, 1, resolveCount) - require.Equal(t, "theValue", val) - - val, err = resolver.Resolve("theKey", resolveFunc) - require.NoError(t, err) - require.Equal(t, 1, resolveCount) - require.Equal(t, "theValue", val) - - errorCount := 0 - errorFunc := func() (string, error) { - errorCount++ - return "", fmt.Errorf("an error") - } - - _, err = resolver.Resolve("errorValue", errorFunc) - require.ErrorContains(t, err, "an error") - require.Equal(t, 1, errorCount) - - _, err = resolver.Resolve("errorValue", errorFunc) - require.ErrorContains(t, err, "an error") - require.Equal(t, 1, errorCount) -} diff --git a/internal/cache/filesystem.go b/internal/cache/filesystem.go deleted file mode 100644 index 6e717599944..00000000000 --- a/internal/cache/filesystem.go +++ /dev/null @@ -1,120 +0,0 @@ -package cache - -import ( - "errors" - "fmt" - "io" - "net/url" - "os" - "path/filepath" - "regexp" - "strings" - "time" - - "github.com/spf13/afero" - - "github.com/anchore/syft/internal/log" -) - -// NewFromDir creates a new cache manager which returns caches stored on disk, rooted at the given directory -func NewFromDir(dir string, ttl time.Duration) (Manager, error) { - dir = filepath.Clean(dir) - fsys, err := subFs(afero.NewOsFs(), dir) - if err != nil { - return nil, err - } - return &filesystemCache{ - dir: dir, - fs: fsys, - ttl: ttl, - }, nil -} - -const filePermissions = 0700 -const dirPermissions = os.ModeDir | filePermissions - -type filesystemCache struct { - dir string - fs afero.Fs - ttl time.Duration -} - -func (d *filesystemCache) GetCache(name, version string) Cache { - fsys, err := subFs(d.fs, name, version) - if err != nil { - log.Warnf("error getting cache for: %s/%s: %v", name, version, err) - return &bypassedCache{} - } - return &filesystemCache{ - dir: filepath.Join(d.dir, name, version), - fs: fsys, - ttl: d.ttl, - } -} - -func (d *filesystemCache) RootDirs() []string { - if d.dir == "" { - return nil - } - return []string{d.dir} -} - -func (d *filesystemCache) Read(key string) (ReaderAtCloser, error) { - path := makeDiskKey(key) - f, err := d.fs.Open(path) - if err != nil { - log.Tracef("no cache entry for %s %s: %v", d.dir, key, err) - return nil, errNotFound - } else if stat, err := f.Stat(); err != nil || stat == nil || time.Since(stat.ModTime()) > d.ttl { - log.Tracef("cache entry is too old for %s %s", d.dir, key) - return nil, errExpired - } - log.Tracef("using cache for %s %s", d.dir, key) - return f, nil -} - -func (d *filesystemCache) Write(key string, contents io.Reader) error { - path := makeDiskKey(key) - return afero.WriteReader(d.fs, path, contents) -} - -// subFs returns a writable directory with the given name under the root cache directory returned from findRoot, -// the directory will be created if it does not exist -func subFs(fsys afero.Fs, subDirs ...string) (afero.Fs, error) { - dir := filepath.Join(subDirs...) - dir = filepath.Clean(dir) - stat, err := fsys.Stat(dir) - if errors.Is(err, afero.ErrFileNotFound) { - err = fsys.MkdirAll(dir, dirPermissions) - if err != nil { - return nil, fmt.Errorf("unable to create directory at '%s': %v", dir, err) - } - stat, err = fsys.Stat(dir) - if err != nil { - return nil, err - } - } - if err != nil || stat == nil || !stat.IsDir() { - return nil, fmt.Errorf("unable to verify directory '%s': %v", dir, err) - } - fsys = afero.NewBasePathFs(fsys, dir) - return fsys, err -} - -var keyReplacer = regexp.MustCompile("[^-._/a-zA-Z0-9]") - -// makeDiskKey makes a safe sub-path but not escape forward slashes, this allows for logical partitioning on disk -func makeDiskKey(key string) string { - // encode single dot directory - if key == "." { - return "%2E" - } - // replace any disallowed chars with encoded form - key = keyReplacer.ReplaceAllStringFunc(key, url.QueryEscape) - // allow . in names but not .. - key = strings.ReplaceAll(key, "..", "%2E%2E") - return key -} - -var errNotFound = fmt.Errorf("not found") -var errExpired = fmt.Errorf("expired") diff --git a/internal/cache/filesystem_test.go b/internal/cache/filesystem_test.go deleted file mode 100644 index 47c6930420f..00000000000 --- a/internal/cache/filesystem_test.go +++ /dev/null @@ -1,106 +0,0 @@ -package cache - -import ( - "fmt" - "io" - "net/url" - "os" - "path/filepath" - "strings" - "testing" - "time" - - "github.com/stretchr/testify/require" - - "github.com/anchore/syft/internal" -) - -func Test_filesystemCache(t *testing.T) { - dir := t.TempDir() - man, err := NewFromDir(dir, 1*time.Minute) - require.NoError(t, err) - - cacheName := "test" - cacheVersion := "v1" - cache := man.GetCache(cacheName, cacheVersion) - - cacheKey := "test-key" - contentsValue := "some contents to cache" - - err = cache.Write(cacheKey, strings.NewReader(contentsValue)) - require.NoError(t, err) - - rdr, err := cache.Read(cacheKey) - require.NoError(t, err) - defer internal.CloseAndLogError(rdr, cacheKey) - - contents, err := io.ReadAll(rdr) - require.NoError(t, err) - require.Equal(t, contentsValue, string(contents)) - - // check the contents were actually written to disk as expected - contents, err = os.ReadFile(filepath.Join(dir, cacheName, cacheVersion, cacheKey)) - require.NoError(t, err) - require.Equal(t, contentsValue, string(contents)) - - _, err = cache.Read("otherKey") - require.ErrorIs(t, err, errNotFound) -} - -func Test_makeDiskKey(t *testing.T) { - tests := []struct { - in string - expected string - }{ - { - in: "", - expected: "", - }, - { - in: ".", - expected: "%2E", - }, - { - in: "..", - expected: "%2E%2E", - }, - { - in: "github.com", - expected: "github.com", - }, - { - in: "../github.com", - expected: "%2E%2E/github.com", - }, - { - in: "github.com/../..", - expected: "github.com/%2E%2E/%2E%2E", - }, - { - in: "github.com/%2E../..", - expected: "github.com/%252E%2E%2E/%2E%2E", - }, - } - for _, test := range tests { - t.Run(test.in, func(t *testing.T) { - got := makeDiskKey(test.in) - // validate appropriate escaping - require.Equal(t, test.expected, got) - // also validate that unescaped string matches original - unescaped, err := url.QueryUnescape(got) - require.NoError(t, err) - require.Equal(t, test.in, unescaped) - }) - } -} - -func Test_errors(t *testing.T) { - tmp := t.TempDir() - cache := filepath.Join(tmp, "cache") - // make a non-writable directory - require.NoError(t, os.MkdirAll(cache, 0500|os.ModeDir)) - // attempt to make cache in non-writable directory - dir := filepath.Join(cache, "dir") - _, err := NewFromDir(dir, time.Hour) - require.ErrorContains(t, err, fmt.Sprintf("unable to create directory at '%s':", dir)) -} diff --git a/internal/cache/hash_type.go b/internal/cache/hash_type.go deleted file mode 100644 index c7b3c4a4233..00000000000 --- a/internal/cache/hash_type.go +++ /dev/null @@ -1,71 +0,0 @@ -package cache - -import ( - "fmt" - "reflect" - - "github.com/mitchellh/hashstructure/v2" -) - -// hashType returns a stable hash based on the structure of the type -func hashType[T any]() string { - // get the base type and hash an empty instance - var t T - empty := emptyValue(reflect.TypeOf(t)).Interface() - hash, err := hashstructure.Hash(empty, hashstructure.FormatV2, &hashstructure.HashOptions{ - ZeroNil: false, - IgnoreZeroValue: false, - SlicesAsSets: false, - UseStringer: false, - }) - if err != nil { - panic(fmt.Errorf("unable to use type as cache key: %w", err)) - } - return fmt.Sprintf("%x", hash) -} - -func emptyValue(t reflect.Type) reflect.Value { - switch t.Kind() { - case reflect.Pointer: - e := t.Elem() - v := emptyValue(e) - if v.CanAddr() { - return v.Addr() - } - ptrv := reflect.New(e) - ptrv.Elem().Set(v) - return ptrv - case reflect.Slice: - v := emptyValue(t.Elem()) - s := reflect.MakeSlice(t, 1, 1) - s.Index(0).Set(v) - return s - case reflect.Struct: - v := reflect.New(t).Elem() - // get all empty field values, too - for i := 0; i < v.NumField(); i++ { - f := t.Field(i) - if isIgnored(f) { - continue - } - fv := v.Field(i) - if fv.CanSet() { - fv.Set(emptyValue(f.Type)) - } - } - return v - default: - return reflect.New(t).Elem() - } -} - -func isIgnored(f reflect.StructField) bool { - if !f.IsExported() { - return true - } - tag := f.Tag.Get("hash") - if tag == "-" || tag == "ignore" { - return true - } - return false -} diff --git a/internal/cache/hash_type_test.go b/internal/cache/hash_type_test.go deleted file mode 100644 index 3fe24cbd3ee..00000000000 --- a/internal/cache/hash_type_test.go +++ /dev/null @@ -1,123 +0,0 @@ -package cache - -import ( - "fmt" - "testing" - - "github.com/mitchellh/hashstructure/v2" - "github.com/stretchr/testify/require" -) - -func Test_hashType(t *testing.T) { - type t1 struct { - Name string - } - type t2 struct { - Name string - } - type generic[T any] struct { - Val T - } - tests := []struct { - name string - hash func() string - expected string - }{ - { - name: "struct 1", - hash: func() string { return hashType[t1]() }, - expected: "d106c3ffbf98a0b1", - }, - { - name: "slice of struct 1", - hash: func() string { return hashType[[]t1]() }, - expected: "8122ace4ee1af0b4", - }, - { - name: "slice of struct 2", - hash: func() string { return hashType[[]t2]() }, - expected: "8cc04b5808be5bf9", - }, - { - name: "ptr 1", - hash: func() string { return hashType[*t1]() }, - expected: "d106c3ffbf98a0b1", // same hash as t1, which is ok since the structs are the same - }, - { - name: "slice of ptr 1", - hash: func() string { return hashType[[]*t1]() }, - expected: "8122ace4ee1af0b4", // same hash as []t1, again underlying serialization is the same - }, - { - name: "slice of ptr 2", - hash: func() string { return hashType[[]*t2]() }, - expected: "8cc04b5808be5bf9", // same hash as []t2, underlying serialization is the same - }, - { - name: "slice of ptr of slice of ptr", - hash: func() string { return hashType[[]*[]*t1]() }, - expected: "500d9f5b3a5977ce", - }, - { - name: "generic 1", - hash: func() string { return hashType[generic[t1]]() }, - expected: "b5fbb30e24400e81", - }, - { - name: "generic 2", - hash: func() string { return hashType[generic[t2]]() }, - expected: "becdb767c6b22bfa", - }, - { - name: "generic with ptr 1", - hash: func() string { return hashType[generic[*t1]]() }, - expected: "30c8855bf290fd83", - }, - { - name: "generic with ptr 2", - hash: func() string { return hashType[generic[*t2]]() }, - expected: "b66366b6ce9e6361", - }, - { - name: "generic with slice 1", - hash: func() string { return hashType[generic[[]t1]]() }, - expected: "d2ed158942fa6c29", - }, - { - name: "generic with slice 2", - hash: func() string { return hashType[generic[[]t2]]() }, - expected: "7a7bec575871c179", - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - require.Equal(t, test.expected, test.hash()) - }) - } -} - -func Test_hashIgnores(t *testing.T) { - hash := func(v any) string { - v, err := hashstructure.Hash(v, hashstructure.FormatV2, &hashstructure.HashOptions{}) - require.NoError(t, err) - return fmt.Sprintf("%x", v) - } - type t1 struct { - Name string - notExported string - } - require.Equal(t, hash(t1{notExported: "a value"}), hashType[t1]()) - - type t2 struct { - Name string - Exported string `hash:"ignore"` - } - require.Equal(t, hash(t2{Exported: "another value"}), hashType[t2]()) - - type t3 struct { - Name string - Exported string `hash:"-"` - } - require.Equal(t, hash(t3{Exported: "still valued"}), hashType[t3]()) -} diff --git a/internal/cache/memory.go b/internal/cache/memory.go deleted file mode 100644 index 696f8c9bede..00000000000 --- a/internal/cache/memory.go +++ /dev/null @@ -1,19 +0,0 @@ -package cache - -import ( - "time" - - "github.com/spf13/afero" -) - -// NewInMemory returns an in-memory only cache manager -func NewInMemory(ttl time.Duration) Manager { - if ttl <= 0 { - return &bypassedCache{} - } - return &filesystemCache{ - dir: "", - fs: afero.NewMemMapFs(), - ttl: ttl, - } -} diff --git a/internal/cache/memory_test.go b/internal/cache/memory_test.go deleted file mode 100644 index e8bcbef7fa2..00000000000 --- a/internal/cache/memory_test.go +++ /dev/null @@ -1,37 +0,0 @@ -package cache - -import ( - "io" - "strings" - "testing" - "time" - - "github.com/stretchr/testify/require" - - "github.com/anchore/syft/internal" -) - -func Test_NewInMemory(t *testing.T) { - man := NewInMemory(time.Hour) - - cacheName := "test" - cacheVersion := "v1" - cache := man.GetCache(cacheName, cacheVersion) - - cacheKey := "test-key" - contentsValue := "some contents to cache" - - err := cache.Write(cacheKey, strings.NewReader(contentsValue)) - require.NoError(t, err) - - rdr, err := cache.Read(cacheKey) - require.NoError(t, err) - defer internal.CloseAndLogError(rdr, cacheKey) - - contents, err := io.ReadAll(rdr) - require.NoError(t, err) - require.Equal(t, contentsValue, string(contents)) - - _, err = cache.Read("otherKey") - require.ErrorIs(t, err, errNotFound) -} diff --git a/internal/cache/resolver.go b/internal/cache/resolver.go deleted file mode 100644 index 68af21a2be5..00000000000 --- a/internal/cache/resolver.go +++ /dev/null @@ -1,83 +0,0 @@ -package cache - -import ( - "bytes" - "encoding/json" - "fmt" - "path" - - "github.com/anchore/syft/internal" - "github.com/anchore/syft/internal/log" -) - -// Resolver interface provides a single Resolve method, which will return from cache -// or call the provided resolve function to get the value if not available in cache -type Resolver[T any] interface { - // Resolve attempts to resolve the given key from cache and convert it to the type of the cache, - // or calls the resolver function if unable to resolve a cached value - Resolve(key string, resolver resolverFunc[T]) (T, error) -} - -// GetResolver returns a cache resolver for persistent cached data across Syft runs, stored in a unique -// location based on the provided name and versioned by the type -func GetResolver[T any](name, version string) Resolver[T] { - typeHash := hashType[T]() - versionKey := path.Join(version, typeHash) - return &cacheResolver[T]{ - name: fmt.Sprintf("%s/%s", name, versionKey), - cache: manager.GetCache(name, versionKey), - } -} - -const resolverKeySuffix = ".json" - -type resolverFunc[T any] func() (T, error) - -type cacheResolver[T any] struct { - name string - cache Cache -} - -var _ interface { - Resolver[int] -} = (*cacheResolver[int])(nil) - -func (r *cacheResolver[T]) Resolve(key string, resolver resolverFunc[T]) (T, error) { - key += resolverKeySuffix - - rdr, err := r.cache.Read(key) - if rdr == nil || err != nil { - return r.resolveAndCache(key, resolver) - } - defer internal.CloseAndLogError(rdr, key) - - dec := json.NewDecoder(rdr) - if dec == nil { - log.Tracef("error getting cache json decoder for %s %v: %v", r.name, key, err) - return r.resolveAndCache(key, resolver) - } - var t T - err = dec.Decode(&t) - if err != nil { - log.Tracef("error decoding cached entry for %s %v: %v", r.name, key, err) - return r.resolveAndCache(key, resolver) - } - // no error, able to resolve from cache - return t, nil -} - -func (r *cacheResolver[T]) resolveAndCache(key string, resolver func() (T, error)) (T, error) { - t, err := resolver() - if err != nil { - return t, err - } - var data bytes.Buffer - enc := json.NewEncoder(&data) - enc.SetEscapeHTML(false) - err = enc.Encode(t) - if err != nil { - return t, err - } - err = r.cache.Write(key, &data) - return t, err -} diff --git a/internal/cache/resolver_test.go b/internal/cache/resolver_test.go deleted file mode 100644 index 4a74d052616..00000000000 --- a/internal/cache/resolver_test.go +++ /dev/null @@ -1,92 +0,0 @@ -package cache - -import ( - "encoding/json" - "fmt" - "testing" - "time" - - "github.com/stretchr/testify/require" -) - -func Test_Resolver(t *testing.T) { - original := GetManager() - defer SetManager(original) - SetManager(NewInMemory(time.Hour)) - - type sub struct { - Name string - Value bool - } - - type thing struct { - Value string - Values []int - Subs []*sub - } - - versionHash := hashType[thing]() - cache := GetManager().GetCache("test", "v7/"+versionHash) - - resolver := GetResolver[thing]("test", "v7") - require.NotNil(t, resolver) - - require.IsType(t, &cacheResolver[thing]{}, resolver) - cr := resolver.(*cacheResolver[thing]) - - require.IsType(t, cache, cr.cache) - - resolveErrCount := 0 - resolveThingErr := func() (thing, error) { - resolveErrCount++ - return thing{}, fmt.Errorf("an error") - } - - _, err := resolver.Resolve("err", resolveThingErr) - require.ErrorContains(t, err, "an error") - require.Equal(t, 1, resolveErrCount) - - _, err = resolver.Resolve("err", resolveThingErr) - require.ErrorContains(t, err, "an error") - require.Equal(t, 2, resolveErrCount) - - aThing := thing{ - Value: "a value", - Values: []int{7, 8, 9}, - Subs: []*sub{ - { - Name: "sub1", - Value: true, - }, - { - Name: "sub2", - Value: false, - }, - }, - } - - resolveThingCount := 0 - resolveThing := func() (thing, error) { - resolveThingCount++ - return aThing, nil - } - - val, err := resolver.Resolve("thing", resolveThing) - require.NoError(t, err) - require.Equal(t, 1, resolveThingCount) - require.Equal(t, aThing, val) - - val, err = resolver.Resolve("thing", resolveThing) - require.NoError(t, err) - require.Equal(t, 1, resolveThingCount) - require.Equal(t, aThing, val) - - rdr, err := cache.Read("thing" + resolverKeySuffix) - require.NoError(t, err) - decoder := json.NewDecoder(rdr) - - var val2 thing - err = decoder.Decode(&val2) - require.NoError(t, err) - require.Equal(t, aThing, val2) -} diff --git a/internal/cachemanager/cache.go b/internal/cachemanager/cache.go new file mode 100644 index 00000000000..091b2d8d54e --- /dev/null +++ b/internal/cachemanager/cache.go @@ -0,0 +1,28 @@ +package cachemanager + +import ( + "fmt" + + "github.com/anchore/go-cache" +) + +// Get returns the global cache manager, which is used to instantiate all caches +func Get() cache.Manager { + return manager +} + +// Set sets the global cache manager, which is used to instantiate all caches. +// Setting this to nil disables caching. +func Set(m cache.Manager) { + if m == nil { + manager = cache.NewBypassed() + } else { + manager = m + } +} + +func GetResolverCachingErrors[T any](name, version string) cache.Resolver[T] { + return cache.NewResolverCachingErrors[T](manager.GetCache(name, fmt.Sprintf("%s/%s", version, cache.HashType[T]()))) +} + +var manager = cache.NewBypassed() diff --git a/internal/cachemanager/cache_test.go b/internal/cachemanager/cache_test.go new file mode 100644 index 00000000000..082750f499d --- /dev/null +++ b/internal/cachemanager/cache_test.go @@ -0,0 +1,38 @@ +package cachemanager_test + +import ( + "testing" + "time" + + "github.com/stretchr/testify/require" + + "github.com/anchore/go-cache" + "github.com/anchore/syft/internal/cachemanager" +) + +func Test_Set(t *testing.T) { + original := cachemanager.Get() + defer cachemanager.Set(original) + + cachemanager.Set(nil) + + require.NotNil(t, cachemanager.Get()) + require.IsType(t, cache.NewBypassed(), cachemanager.Get()) + + cachemanager.Set(cache.NewInMemory(0)) + + require.NotNil(t, cachemanager.Get()) + require.IsType(t, cache.NewBypassed(), cachemanager.Get()) + + cachemanager.Set(cache.NewInMemory(1 * time.Hour)) + + require.NotNil(t, cachemanager.Get()) + + typ, err := cache.NewFromDir(nil, t.TempDir(), time.Hour) + require.NoError(t, err) + require.IsType(t, typ, cachemanager.Get()) + + cachemanager.Set(nil) + require.NotNil(t, cachemanager.Get()) + require.IsType(t, cache.NewBypassed(), cachemanager.Get()) +} diff --git a/syft/lib.go b/syft/lib.go index 8ebb3cfbab4..66155c6c692 100644 --- a/syft/lib.go +++ b/syft/lib.go @@ -19,8 +19,10 @@ package syft import ( "github.com/wagoodman/go-partybus" + "github.com/anchore/go-cache" "github.com/anchore/go-logger" "github.com/anchore/syft/internal/bus" + "github.com/anchore/syft/internal/cachemanager" "github.com/anchore/syft/internal/log" ) @@ -33,3 +35,8 @@ func SetLogger(logger logger.Logger) { func SetBus(b *partybus.Bus) { bus.Set(b) } + +// SetCacheManager sets the syft cache.Manager implementation +func SetCacheManager(manager cache.Manager) { + cachemanager.Set(manager) +} diff --git a/syft/lib_test.go b/syft/lib_test.go index abb0e0027fe..616764692fd 100644 --- a/syft/lib_test.go +++ b/syft/lib_test.go @@ -2,9 +2,12 @@ package syft import ( "testing" + "time" "github.com/stretchr/testify/require" + "github.com/anchore/go-cache" + "github.com/anchore/syft/internal/cachemanager" "github.com/anchore/syft/internal/relationship" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/pkg" @@ -41,3 +44,13 @@ func Test_removeRelationshipsByID(t *testing.T) { require.Equal(t, rel(p3), relationships) } + +func Test_SetCacheManager(t *testing.T) { + existing := cachemanager.Get() + defer cachemanager.Set(existing) + + impl, err := cache.NewFromDir(nil, t.TempDir(), time.Hour) + require.NoError(t, err) + SetCacheManager(impl) + require.Equal(t, impl, cachemanager.Get()) +} diff --git a/syft/pkg/cataloger/golang/licenses.go b/syft/pkg/cataloger/golang/licenses.go index 0b02c6581c1..23c7638ce06 100644 --- a/syft/pkg/cataloger/golang/licenses.go +++ b/syft/pkg/cataloger/golang/licenses.go @@ -20,8 +20,9 @@ import ( "github.com/go-git/go-git/v5/storage/memory" "github.com/scylladb/go-set/strset" + "github.com/anchore/go-cache" "github.com/anchore/syft/internal" - "github.com/anchore/syft/internal/cache" + "github.com/anchore/syft/internal/cachemanager" "github.com/anchore/syft/internal/licenses" "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/file" @@ -55,7 +56,7 @@ func newGoLicenseResolver(catalogerName string, opts CatalogerConfig) goLicenseR catalogerName: catalogerName, opts: opts, localModCacheDir: localModCacheDir, - licenseCache: cache.GetResolverCachingErrors[[]goLicense]("golang", "v1"), + licenseCache: cachemanager.GetResolverCachingErrors[[]goLicense]("golang", "v1"), lowerLicenseFileNames: strset.New(lowercaseLicenseFiles()...), } } diff --git a/syft/pkg/cataloger/java/maven_resolver.go b/syft/pkg/cataloger/java/maven_resolver.go index f9d375b8ea3..a5d3a3b6094 100644 --- a/syft/pkg/cataloger/java/maven_resolver.go +++ b/syft/pkg/cataloger/java/maven_resolver.go @@ -18,8 +18,9 @@ import ( "github.com/vifraa/gopom" + "github.com/anchore/go-cache" "github.com/anchore/syft/internal" - "github.com/anchore/syft/internal/cache" + "github.com/anchore/syft/internal/cachemanager" "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/file" ) @@ -55,7 +56,7 @@ type mavenResolver struct { func newMavenResolver(fileResolver file.Resolver, cfg ArchiveCatalogerConfig) *mavenResolver { return &mavenResolver{ cfg: cfg, - cache: cache.GetManager().GetCache("java/maven/repo", "v1"), + cache: cachemanager.Get().GetCache("java/maven/repo", "v1"), resolved: map[mavenID]*gopom.Project{}, remoteRequestTimeout: time.Second * 10, fileResolver: fileResolver, diff --git a/syft/source/directorysource/cache_excludes.go b/syft/source/directorysource/cache_excludes.go index 59c7c490ed7..8f8409c990e 100644 --- a/syft/source/directorysource/cache_excludes.go +++ b/syft/source/directorysource/cache_excludes.go @@ -4,7 +4,7 @@ import ( "os" "strings" - "github.com/anchore/syft/internal/cache" + "github.com/anchore/syft/internal/cachemanager" "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/internal/fileresolver" ) @@ -12,7 +12,7 @@ import ( // we do not want to cache things and then subsequently scan them, if, for example a user runs `syft /` twice func excludeCachePathVisitors() []fileresolver.PathIndexVisitor { var out []fileresolver.PathIndexVisitor - for _, dir := range cache.GetManager().RootDirs() { + for _, dir := range cachemanager.Get().RootDirs() { out = append(out, excludeCacheDirPathVisitor{ dir: dir, }.excludeCacheDir)