From 9fe0113343a8cfd66f04b3171ec928ce7abb5c31 Mon Sep 17 00:00:00 2001 From: Euan Harris Date: Tue, 3 Dec 2024 15:53:02 +0000 Subject: [PATCH] containertool: Use same gzip headers on Linux and macOS Motivation ---------- Packaging the same binary using the same version of `containertool` produces different application image layers on macOS and Linux: ``` linux% swift run containertool --verbose --repository registry.test:5000/hello hello-world --from scratch ... Uploading application layer application layer: sha256:54a282d5cd082320d2d4976e7d9a952da46e3bc4bab3ce1e0b3931ccf945b849 (80394382 bytes) image configuration: sha256:fdcb887ef6e27a09456419b03b1d8353b15d68d088b8ea023f38af892fca69be (462 bytes) ... macos% swift run containertool --verbose --repository registry.test:5000/hello hello-world --from scratch ... Uploading application layer application layer: sha256:08a21093e79423c17b58325decc48d7196481ed55276c2d168de23a75d38727e (80394382 bytes) image configuration: sha256:2648cd8cca1cad7ec5b386e8433e36ca77a40e31859e5994260b2ef1d07f0753 (462 bytes) ... ``` The `application layer` hashes are different, even though they contain the same binary. The `image configuration` metadata blob hashes also differ, but they contain timestamps so this will continue to happen even after this PR is merged. A future change could make these timestamps default to the epoch, allowing identical metadata blobs to be created on Linux and macOS as well. The image layer is a gzipped TAR archive containing the executable. Saving the intermediate steps shows that the TAR archives are identical and the gzipped streams are different, but only by one byte: ``` % diff <(hexdump -X linux-image.tar.gz) <(hexdump -X darwin-image.tar.gz) 1c1 < 0000000 1f 8b 08 00 00 00 00 00 00 03 ed 57 eb 6e 1c b7 --- > 0000000 1f 8b 08 00 00 00 00 00 00 13 ed 57 eb 6e 1c b7``` ``` The difference is in the 10th byte of the gzip header: the [OS field](https://datatracker.ietf.org/doc/html/rfc1952#page-5). RFC 1952 defines a list of [known operating systems](https://datatracker.ietf.org/doc/html/rfc1952#page-8): `0x03` is the OS code for Unix, however the RFC was written in 1996 so `Macintosh` refers to the classic MacOS. Zlib uses an updated operating system list https://github.com/madler/zlib/commit/ce12c5cd00628bf8f680c98123a369974d32df15 which defines `19` / `0x13` as the OS code for Darwin. Interestingly, using `gzip` to compress a file directly produces identical results on macOS and Linux (`-n` is needed to prevent `gzip` from including the current timestamp on macOS): ``` linux% cat hello-world | gzip -n | md5sum ef64adbee9e89e78114000442a804e0e - macos% cat hello-world | gzip -n | md5sum ef64adbee9e89e78114000442a804e0e - ``` Modifications ------------- By default, Zlib uses the value of `OS_CODE` [set at compile time](https://github.com/madler/zlib/blob/ef24c4c7502169f016dcd2a26923dbaf3216748c/deflate.c#L1054). This commit uses [deflateSetHeader()](https://github.com/madler/zlib/blob/ef24c4c7502169f016dcd2a26923dbaf3216748c/deflate.c#L705) to override the default gzip header, forcing the OS code to be 0x03 (Unix) on both Linux and macOS. Result ------ After this change, image layers containing the same binary will use identical gzip headers and should have the same hash whether they are built on Linux or macOS. It is still possible that different versions of Zlib might produce different compressed data, causing the overall hashes to change. Test Plan --------- Tested manually on macOS and Linux, verifying that image layers containing identical binaries have identical hashes. --- Package.swift | 2 +- Sources/containertool/gzip.swift | 26 ++++++++++++++++++++ Tests/containertoolTests/ZlibTests.swift | 31 ++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 Tests/containertoolTests/ZlibTests.swift diff --git a/Package.swift b/Package.swift index 3e7ef3e..4343ddb 100644 --- a/Package.swift +++ b/Package.swift @@ -86,7 +86,7 @@ let package = Package( name: "ContainerRegistryTests", dependencies: [.target(name: "ContainerRegistry")], resources: [.process("Resources")] - ), + ), .testTarget(name: "containertoolTests", dependencies: [.target(name: "containertool")]), ], swiftLanguageModes: [.v6] ) diff --git a/Sources/containertool/gzip.swift b/Sources/containertool/gzip.swift index 2234b5f..6db9b4d 100644 --- a/Sources/containertool/gzip.swift +++ b/Sources/containertool/gzip.swift @@ -35,10 +35,36 @@ func gzip(_ bytes: [UInt8]) -> [UInt8] { stream.zfree = nil stream.opaque = nil + // Force identical gzip headers to be created on Linux and macOS. + // + // RFC1952 defines operating system codes which can be embedded in the gzip header. + // + // * Initially, zlib generated a default gzip header with the + // OS field set to `Unknown` (255). + // * https://github.com/madler/zlib/commit/0484693e1723bbab791c56f95597bd7dbe867d03 + // changed the default to `Unix` (3). + // * https://github.com/madler/zlib/commit/ce12c5cd00628bf8f680c98123a369974d32df15 + // changed the default to use a value based on the OS detected + // at compile time. After this, zlib on Linux continued to + // use `Unix` (3) whereas macOS started to use `Apple` (19). + // + // According to RFC1952 Section 2.3.1.2. (Compliance), `Unknown` + // 255 should be used by default where the OS on which the file + // was created is not known. + // + // Different versions of zlib might still produce different + // compressed output for the same input, but using the same default + // value removes one one source of differences between platforms. + + let gz_os_unknown = Int32(255) + var header = gz_header() + header.os = gz_os_unknown + let windowBits: Int32 = 15 + 16 let level = Z_DEFAULT_COMPRESSION let memLevel: Int32 = 8 let rc = CNIOExtrasZlib_deflateInit2(&stream, level, Z_DEFLATED, windowBits, memLevel, Z_DEFAULT_STRATEGY) + deflateSetHeader(&stream, &header) precondition(rc == Z_OK, "Unexpected return from zlib init: \(rc)") diff --git a/Tests/containertoolTests/ZlibTests.swift b/Tests/containertoolTests/ZlibTests.swift new file mode 100644 index 0000000..c9152ac --- /dev/null +++ b/Tests/containertoolTests/ZlibTests.swift @@ -0,0 +1,31 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the SwiftContainerPlugin open source project +// +// Copyright (c) 2024 Apple Inc. and the SwiftContainerPlugin project authors +// Licensed under Apache License v2.0 +// +// See LICENSE.txt for license information +// See CONTRIBUTORS.txt for the list of SwiftContainerPlugin project authors +// +// SPDX-License-Identifier: Apache-2.0 +// +//===----------------------------------------------------------------------===// + +import Foundation +@testable import containertool +import Crypto +import XCTest + +class ZlibTests: XCTestCase, @unchecked Sendable { + // Check that compressing the same data on macOS and Linux produces the same output. + func testGzipHeader() async throws { + let data = "test" + + let result = gzip([UInt8](data.utf8)) + XCTAssertEqual( + "\(SHA256.hash(data: result))", + "SHA256 digest: 7dff8d09129482017247cb373e8138772e852a1a02f097d1440387055d2be69c" + ) + } +}