diff --git a/lib/builds/builder_agent/main.go b/lib/builds/builder_agent/main.go index beb5b182..045b3005 100644 --- a/lib/builds/builder_agent/main.go +++ b/lib/builds/builder_agent/main.go @@ -728,11 +728,14 @@ func runBuild(ctx context.Context, config *BuildConfig, logWriter io.Writer) (st } // Export cache based on build type + // Note: image-manifest=true ensures layer blobs are stored in the registry cache image + // rather than as references to external registries (e.g., docker.io). This is critical + // for cache hits in ephemeral BuildKit instances that don't have local layer storage. if config.IsAdminBuild { // Admin build: export to global cache if config.GlobalCacheKey != "" { globalCacheRef := fmt.Sprintf("%s/cache/global/%s", registryHost, config.GlobalCacheKey) - cacheOpts := "type=registry,ref=" + globalCacheRef + ",mode=max" + cacheOpts := "type=registry,ref=" + globalCacheRef + ",mode=max,image-manifest=true,oci-mediatypes=true" if useInsecureFlag { cacheOpts += ",registry.insecure=true" } @@ -743,7 +746,7 @@ func runBuild(ctx context.Context, config *BuildConfig, logWriter io.Writer) (st // Regular build: export to tenant cache if config.CacheScope != "" { tenantCacheRef := fmt.Sprintf("%s/cache/%s", registryHost, config.CacheScope) - cacheOpts := "type=registry,ref=" + tenantCacheRef + ",mode=max" + cacheOpts := "type=registry,ref=" + tenantCacheRef + ",mode=max,image-manifest=true,oci-mediatypes=true" if useInsecureFlag { cacheOpts += ",registry.insecure=true" } diff --git a/lib/builds/cache.go b/lib/builds/cache.go index ff3e26a8..f47e3318 100644 --- a/lib/builds/cache.go +++ b/lib/builds/cache.go @@ -100,8 +100,10 @@ func (k *CacheKey) ImportCacheArg() string { } // ExportCacheArg returns the BuildKit --export-cache argument +// Uses image-manifest=true to ensure layer blobs are stored in the cache image +// rather than as external references, enabling cache hits in ephemeral BuildKit instances. func (k *CacheKey) ExportCacheArg() string { - return fmt.Sprintf("type=registry,ref=%s,mode=max", k.Reference) + return fmt.Sprintf("type=registry,ref=%s,mode=max,image-manifest=true,oci-mediatypes=true", k.Reference) } // normalizeCacheScope normalizes a cache scope to only contain safe characters diff --git a/lib/builds/cache_test.go b/lib/builds/cache_test.go index d51fb7cf..7f3637b6 100644 --- a/lib/builds/cache_test.go +++ b/lib/builds/cache_test.go @@ -103,7 +103,7 @@ func TestCacheKey_Args(t *testing.T) { assert.Equal(t, "type=registry,ref=localhost:8080/cache/tenant/nodejs/abc123", importArg) exportArg := key.ExportCacheArg() - assert.Equal(t, "type=registry,ref=localhost:8080/cache/tenant/nodejs/abc123,mode=max", exportArg) + assert.Equal(t, "type=registry,ref=localhost:8080/cache/tenant/nodejs/abc123,mode=max,image-manifest=true,oci-mediatypes=true", exportArg) } func TestValidateCacheScope(t *testing.T) { diff --git a/lib/images/oci_test.go b/lib/images/oci_test.go new file mode 100644 index 00000000..592da9ac --- /dev/null +++ b/lib/images/oci_test.go @@ -0,0 +1,190 @@ +package images + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// BuildKit cache config mediatype - this is what BuildKit uses when exporting +// cache with image-manifest=true +const buildKitCacheConfigMediaType = "application/vnd.buildkit.cacheconfig.v0" + +// TestUnpackLayersFailsOnBuildKitCacheMediatype verifies that hypeman's image +// unpacker fails when encountering BuildKit cache images. This reproduces the +// production issue where global cache images exported by BuildKit cannot be +// pre-pulled by hypeman because they use a non-standard config mediatype. +// +// The error occurs because: +// 1. BuildKit exports cache with --export-cache type=registry,image-manifest=true +// 2. The exported manifest uses "application/vnd.buildkit.cacheconfig.v0" as config mediatype +// 3. hypeman's unpackLayers expects "application/vnd.oci.image.config.v1+json" +// 4. umoci.UnpackRootfs fails with "config blob is not correct mediatype" +func TestUnpackLayersFailsOnBuildKitCacheMediatype(t *testing.T) { + // Create a temp directory for the OCI layout + cacheDir := t.TempDir() + + // Create OCI layout structure with BuildKit cache mediatype + err := createBuildKitCacheLayout(cacheDir, "test-cache") + require.NoError(t, err, "failed to create mock BuildKit cache layout") + + // Create OCI client and try to unpack + client, err := newOCIClient(cacheDir) + require.NoError(t, err) + + targetDir := t.TempDir() + err = client.unpackLayers(context.Background(), "test-cache", targetDir) + + // This should fail with a mediatype error + require.Error(t, err, "unpackLayers should fail on BuildKit cache mediatype") + assert.Contains(t, err.Error(), "config", "error should mention config") + + t.Logf("Got expected error: %v", err) +} + +// TestExtractMetadataSucceedsOnBuildKitCache verifies that extractOCIMetadata +// does NOT fail on BuildKit cache images - it's go-containerregistry which is +// lenient about mediatypes. The failure only happens during unpackLayers when +// umoci tries to unpack the rootfs. +func TestExtractMetadataSucceedsOnBuildKitCache(t *testing.T) { + cacheDir := t.TempDir() + + err := createBuildKitCacheLayout(cacheDir, "test-cache") + require.NoError(t, err) + + client, err := newOCIClient(cacheDir) + require.NoError(t, err) + + // This succeeds because go-containerregistry doesn't validate config mediatype + // The failure only happens in unpackLayers when umoci validates the config + meta, err := client.extractOCIMetadata("test-cache") + require.NoError(t, err, "extractOCIMetadata succeeds - go-containerregistry is lenient") + + // But the metadata will be empty/invalid since it's not a real OCI config + t.Logf("Got metadata (likely empty): %+v", meta) +} + +// createBuildKitCacheLayout creates an OCI layout that mimics what BuildKit +// exports when using --export-cache type=registry,image-manifest=true +// +// Layout structure: +// cacheDir/ +// ├── oci-layout (OCI layout version marker) +// ├── index.json (points to manifest) +// └── blobs/sha256/ +// ├── (image manifest with buildkit config mediatype) +// ├── (buildkit cache config blob) +// └── (dummy layer) +func createBuildKitCacheLayout(cacheDir, layoutTag string) error { + // Create directory structure + blobsDir := filepath.Join(cacheDir, "blobs", "sha256") + if err := os.MkdirAll(blobsDir, 0755); err != nil { + return err + } + + // 1. Create oci-layout file + ociLayout := map[string]string{"imageLayoutVersion": "1.0.0"} + ociLayoutBytes, _ := json.Marshal(ociLayout) + if err := os.WriteFile(filepath.Join(cacheDir, "oci-layout"), ociLayoutBytes, 0644); err != nil { + return err + } + + // 2. Create a dummy layer blob (gzipped tar with a single file) + // This is a minimal valid gzipped tar + layerContent := []byte{ + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, // gzip header + 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // empty tar + } + layerDigest := sha256Hash(layerContent) + if err := os.WriteFile(filepath.Join(blobsDir, layerDigest), layerContent, 0644); err != nil { + return err + } + + // 3. Create BuildKit cache config blob + // This is what BuildKit puts in the config - NOT a standard OCI config + cacheConfig := map[string]interface{}{ + "layers": []map[string]interface{}{ + { + "blob": "sha256:" + layerDigest, + "mediaType": "application/vnd.oci.image.layer.v1.tar+gzip", + }, + }, + } + configBytes, _ := json.Marshal(cacheConfig) + configDigest := sha256Hash(configBytes) + if err := os.WriteFile(filepath.Join(blobsDir, configDigest), configBytes, 0644); err != nil { + return err + } + + // 4. Create image manifest with BuildKit's cache config mediatype + manifest := map[string]interface{}{ + "schemaVersion": 2, + "mediaType": "application/vnd.oci.image.manifest.v1+json", + "config": map[string]interface{}{ + "mediaType": buildKitCacheConfigMediaType, // This is the problem! + "digest": "sha256:" + configDigest, + "size": len(configBytes), + }, + "layers": []map[string]interface{}{ + { + "mediaType": "application/vnd.oci.image.layer.v1.tar+gzip", + "digest": "sha256:" + layerDigest, + "size": len(layerContent), + }, + }, + } + manifestBytes, _ := json.Marshal(manifest) + manifestDigest := sha256Hash(manifestBytes) + if err := os.WriteFile(filepath.Join(blobsDir, manifestDigest), manifestBytes, 0644); err != nil { + return err + } + + // 5. Create index.json pointing to the manifest with our layout tag + index := map[string]interface{}{ + "schemaVersion": 2, + "mediaType": "application/vnd.oci.image.index.v1+json", + "manifests": []map[string]interface{}{ + { + "mediaType": "application/vnd.oci.image.manifest.v1+json", + "digest": "sha256:" + manifestDigest, + "size": len(manifestBytes), + "annotations": map[string]string{ + "org.opencontainers.image.ref.name": layoutTag, + }, + }, + }, + } + indexBytes, _ := json.Marshal(index) + if err := os.WriteFile(filepath.Join(cacheDir, "index.json"), indexBytes, 0644); err != nil { + return err + } + + return nil +} + +// sha256Hash computes the SHA256 hash of data and returns the hex string +func sha256Hash(data []byte) string { + h := sha256.Sum256(data) + return hex.EncodeToString(h[:]) +} + +// TestConvertToOCIMediaTypePassesThroughBuildKitType verifies that the +// mediatype conversion function doesn't handle BuildKit's cache config type, +// which is the root cause of the unpack failure. +func TestConvertToOCIMediaTypePassesThroughBuildKitType(t *testing.T) { + // Verify that BuildKit's mediatype passes through unchanged + result := convertToOCIMediaType(buildKitCacheConfigMediaType) + assert.Equal(t, buildKitCacheConfigMediaType, result, + "BuildKit cache config mediatype should pass through unchanged (this is the bug)") + + // Standard Docker types should be converted + assert.Equal(t, "application/vnd.oci.image.config.v1+json", + convertToOCIMediaType("application/vnd.docker.container.image.v1+json")) +} diff --git a/lib/registry/registry.go b/lib/registry/registry.go index 44535f47..651baf9c 100644 --- a/lib/registry/registry.go +++ b/lib/registry/registry.go @@ -138,7 +138,16 @@ func (w *responseWrapper) WriteHeader(code int) { } // triggerConversion queues the image for conversion to ext4 disk format. +// Skips BuildKit cache images (cache/*) since they're not runnable containers. func (r *Registry) triggerConversion(repo, reference, dockerDigest string) { + // Skip BuildKit cache images - they use a custom mediatype that can't be + // unpacked as a standard OCI image. BuildKit imports them directly from + // the registry without needing local conversion. + // Note: repo may include host prefix (e.g., "10.102.0.1:8083/cache/global/node") + if strings.HasPrefix(repo, "cache/") || strings.Contains(repo, "/cache/") { + return + } + imageRef := repo + ":" + reference if strings.HasPrefix(reference, "sha256:") { imageRef = repo + "@" + reference