From 61de4384a8867c6bfebc24c41b3bf7ae94b28623 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Thu, 7 May 2026 12:15:49 -0400 Subject: [PATCH 1/3] tests: Fall back to upstream registry when mirror is unavailable The mirror-fixture-images.yml workflow only runs on pushes to main, so during the PR that adds a new entry to ci/fixture-images.txt the ghcr.io mirror does not yet exist. Without a fallback, the integration test would fail trying to pull from ghcr.io. Add an upstream_ref field to ContainerImage and teach pull_image() to try the mirror first, then warn and retry against the upstream registry if the mirror pull fails. Fill in upstream_ref for existing images (UBI10 and centos-bootc) so the struct is complete. Assisted-by: OpenCode (Claude Sonnet 4.6) Signed-off-by: Colin Walters --- .../src/tests/digest_stability.rs | 46 ++++++++++++++++++- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/crates/composefs-integration-tests/src/tests/digest_stability.rs b/crates/composefs-integration-tests/src/tests/digest_stability.rs index 704f7dad..940b63b8 100644 --- a/crates/composefs-integration-tests/src/tests/digest_stability.rs +++ b/crates/composefs-integration-tests/src/tests/digest_stability.rs @@ -9,8 +9,17 @@ use crate::{cfsctl, integration_test}; struct ContainerImage { /// Human-readable label for test output. label: &'static str, - /// OCI image reference (docker:// prefix). + /// Primary OCI image reference — the ghcr.io mirror (docker:// prefix). + /// + /// The mirror is populated by the `mirror-fixture-images.yml` workflow after + /// a PR that adds a new entry to `ci/fixture-images.txt` is merged to main. + /// During the PR itself the mirror does not exist yet, so the test falls back + /// to `upstream_ref` when this pull fails. image_ref: &'static str, + /// Upstream OCI image reference used as a fallback when `image_ref` is + /// unavailable (e.g. a PR that adds a new mirror entry before it has been + /// pushed). Should be pinned by digest for reproducibility. + upstream_ref: &'static str, /// Expected composefs image ID without `--bootable`. expected_id: &'static str, /// Expected composefs image ID with `--bootable`, or `None` if the @@ -25,6 +34,7 @@ struct ContainerImage { const UBI10: ContainerImage = ContainerImage { label: "ubi10", image_ref: "docker://ghcr.io/composefs/ci-fixture-ubi10:10.1-1772441712", + upstream_ref: "docker://registry.access.redhat.com/ubi10/ubi:10.1-1772441712", expected_id: "ff8dad033a3e6015d63d6b00c16918da27bf96cc8ddd824e521549db01013227\ 87c30a3f49e5716f8f6052d78b46308dfaaccf0dfc504d26fe58d468810c0b0e", expected_bootable_id: None, @@ -37,6 +47,7 @@ const UBI10: ContainerImage = ContainerImage { const CENTOS_BOOTC: ContainerImage = ContainerImage { label: "centos-bootc", image_ref: "docker://ghcr.io/composefs/ci-fixture-centos-bootc:stream10-d1913e3d", + upstream_ref: "docker://quay.io/centos-bootc/centos-bootc@sha256:d1913e3d616b9acb7fc2e3331be8baf844048bca2681a23d34e53e75eb18f3d0", expected_id: "ad575e0570dfb74cbc837f41715d3fba890dd983d992332eaeee93493ce112ee\ 50d3dc5f6f2a3214cc92412fe3ae936e2e9c0eac24ea787e83ef13c0a718a193", expected_bootable_id: Some( @@ -54,7 +65,38 @@ fn skip_network() -> bool { } /// Pull an OCI image and return the config digest from the pull output. +/// +/// Tries `image.image_ref` (the ghcr.io mirror) first. If that fails — +/// which is expected for PRs that add a new mirror entry before it has been +/// pushed — falls back to `image.upstream_ref` with a warning. fn pull_image( + sh: &Shell, + cfsctl: &std::path::Path, + repo: &std::path::Path, + image: &ContainerImage, + name: &str, +) -> Result { + let candidates = [(image.image_ref, false), (image.upstream_ref, true)]; + let mut last_err = None; + for (image_ref, is_fallback) in candidates { + if is_fallback { + eprintln!( + "WARNING: mirror pull failed for {}; falling back to upstream {image_ref}", + image.label, + ); + } + match try_pull_image(sh, cfsctl, repo, image_ref, name) { + Ok(config) => return Ok(config), + Err(e) => { + eprintln!("Pull of {image_ref} failed: {e:#}"); + last_err = Some(e); + } + } + } + Err(last_err.unwrap()) +} + +fn try_pull_image( sh: &Shell, cfsctl: &std::path::Path, repo: &std::path::Path, @@ -126,7 +168,7 @@ fn test_oci_container_digest_stability() -> Result<()> { cmd!(sh, "{cfsctl} --repo {repo} init --insecure").read()?; eprintln!("Pulling {} (this may take a while)...", image.label); - let config = pull_image(&sh, &cfsctl, repo, image.image_ref, image.label)?; + let config = pull_image(&sh, &cfsctl, repo, image, image.label)?; // Plain (non-bootable) image ID let plain_id = compute_id(&sh, &cfsctl, repo, &config, false)?; From 51e31e8ffb8420eeedb5d15dbd65efa7813e759d Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Thu, 7 May 2026 11:33:38 -0400 Subject: [PATCH 2/3] tar: Preserve trailing record-padding bytes after end-of-archive MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GNU tar pads archives to a "record size" (typically 20 × 512 = 10240 bytes). The archive ends with two 512-byte zero blocks that tar-core emits as ParseEvent::End, but any additional zero-padding blocks that fill out the record were silently dropped. split_async() stored only what ParseEvent::End::consumed covered (1024 bytes for the two zero blocks), leaving the remaining trailing bytes unread. When cat() later reconstructed the archive, the output was shorter than the original, causing the sha256 to differ from the layer's diff_id. This broke the checksum validation path in create_filesystem() — specifically the @digest reference path that lacks a verity proof and must re-hash the reconstructed tar. The fix reads any remaining bytes after the End event until true EOF and stores them inline, making cat() byte-for-bit faithful to the original archive. This resolves "Layer has incorrect checksum" errors for images where the tar ends with GNU-style record padding, including Ubuntu 26.04 (resolute) which uses umoci/PAX format. Assisted-by: OpenCode (Claude Sonnet 4.6) Signed-off-by: Colin Walters --- crates/composefs-oci/src/tar.rs | 133 ++++++++++++++++++++++++-------- 1 file changed, 101 insertions(+), 32 deletions(-) diff --git a/crates/composefs-oci/src/tar.rs b/crates/composefs-oci/src/tar.rs index 9d72fb07..1fde0246 100644 --- a/crates/composefs-oci/src/tar.rs +++ b/crates/composefs-oci/src/tar.rs @@ -203,6 +203,26 @@ pub async fn split_async( } ParseEvent::End { consumed } => { builder.push_inline(&buf.split_to(consumed)); + // GNU tar pads archives to a "record size" (typically 20×512 = 10240 bytes). + // After the two end-of-archive zero blocks (consumed above), there may be + // additional zero-padding blocks before EOF. We must store them to reproduce + // the original byte stream faithfully for diff_id checksum verification. + // + // Note: ideally tar-core would surface these extra bytes through + // ParseEvent::End::consumed so callers don't need to know about record + // granularity; this drain is a workaround until that is addressed upstream. + // See https://github.com/composefs/tar-core/pull/24 which will obviate this. + if !buf.is_empty() { + builder.push_inline(&buf.split()); + } + loop { + buf.reserve(IO_BUF_CAPACITY); + let n = tar_stream.read_buf(&mut buf).await?; + if n == 0 { + break; + } + builder.push_inline(&buf.split()); + } break; } ParseEvent::SparseEntry { .. } => { @@ -582,6 +602,56 @@ mod tests { assert!(get_entry(&mut reader).unwrap().is_none()); } + /// Verify that a tar without any trailing record padding survives a byte-exact + /// roundtrip. This is the common case for tars produced by the Rust `tar` crate + /// and most standard tooling; it forms a baseline paired with the padding test below. + #[test] + fn test_no_record_padding_roundtrip() { + let mut tar_data = Vec::new(); + { + let mut builder = Builder::new(&mut tar_data); + append_file(&mut builder, "hello.txt", b"hello world").unwrap(); + builder.finish().unwrap(); + } + // Confirm the Rust tar crate did not add GNU record padding. + const GNU_RECORD_SIZE: usize = 20 * 512; + assert_ne!( + tar_data.len() % GNU_RECORD_SIZE, + 0, + "expected tar without GNU record padding for this test" + ); + roundtrip_tar_bytes(&tar_data); + } + + /// Verify that GNU-style record padding (zero bytes after the two end-of-archive + /// blocks, filling the archive out to a 20×512 record boundary) is preserved + /// byte-for-bit through split_async → cat(). Without the fix, the reconstructed + /// tar was shorter than the original, causing diff_id checksum failures for images + /// produced by umoci/Rockcraft (e.g. Ubuntu 26.04). + #[test] + fn test_gnu_record_padding_roundtrip() { + const GNU_RECORD_SIZE: usize = 20 * 512; // 10240 bytes + + let mut tar_data = Vec::new(); + { + let mut builder = Builder::new(&mut tar_data); + append_file(&mut builder, "hello.txt", b"hello world").unwrap(); + builder.finish().unwrap(); + } + + // Simulate GNU record padding: extend to the next record boundary with zeros. + let remainder = tar_data.len() % GNU_RECORD_SIZE; + if remainder != 0 { + tar_data.resize(tar_data.len() + (GNU_RECORD_SIZE - remainder), 0); + } + + // The tar length must now be a multiple of the record size. + assert_eq!(tar_data.len() % GNU_RECORD_SIZE, 0); + + // roundtrip_tar_bytes asserts byte-exact reproduction through the splitstream. + roundtrip_tar_bytes(&tar_data); + } + #[tokio::test] async fn test_single_small_file() { let mut tar_data = Vec::new(); @@ -1387,6 +1457,37 @@ mod tests { assert_eq!(entries[0].path, PathBuf::from(format!("/{full_path}"))); } + /// Byte-exact roundtrip: original tar bytes -> split_async -> splitstream -> cat() + /// -> assert bytes match. Catches any corruption in either the inline or + /// external code paths, including missing padding or off-by-one errors. + fn roundtrip_tar_bytes(tar_data: &[u8]) { + let rt = tokio::runtime::Runtime::new().unwrap(); + rt.block_on(async { + let repo = create_test_repository().unwrap(); + let (object_id, _stats) = split_async(tar_data, repo.clone(), TAR_LAYER_CONTENT_TYPE) + .await + .unwrap(); + + let mut reader: SplitStreamReader = SplitStreamReader::new( + repo.open_object(&object_id).unwrap().into(), + Some(TAR_LAYER_CONTENT_TYPE), + ) + .unwrap(); + + let mut reassembled = Vec::new(); + reader.cat(&repo, &mut reassembled).unwrap(); + assert_eq!( + reassembled.len(), + tar_data.len(), + "reassembled tar length mismatch" + ); + assert_eq!( + reassembled, tar_data, + "reassembled tar bytes differ from original" + ); + }); + } + /// Property-based tests for tar path handling. mod proptest_tests { use super::*; @@ -1527,38 +1628,6 @@ mod tests { tar_data } - /// Byte-exact roundtrip: original tar -> split_async -> splitstream -> cat() - /// -> assert bytes match. Catches any corruption in either the inline or - /// external code paths, including missing padding or off-by-one errors. - fn roundtrip_tar_bytes(tar_data: &[u8]) { - let rt = tokio::runtime::Runtime::new().unwrap(); - rt.block_on(async { - let repo = create_test_repository().unwrap(); - let (object_id, _stats) = - split_async(tar_data, repo.clone(), TAR_LAYER_CONTENT_TYPE) - .await - .unwrap(); - - let mut reader: SplitStreamReader = SplitStreamReader::new( - repo.open_object(&object_id).unwrap().into(), - Some(TAR_LAYER_CONTENT_TYPE), - ) - .unwrap(); - - let mut reassembled = Vec::new(); - reader.cat(&repo, &mut reassembled).unwrap(); - assert_eq!( - reassembled.len(), - tar_data.len(), - "reassembled tar length mismatch" - ); - assert_eq!( - reassembled, tar_data, - "reassembled tar bytes differ from original" - ); - }); - } - proptest! { #![proptest_config(ProptestConfig::with_cases(64))] From 789c48baabfca95d4ef9858f60d86400d9e3504c Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Thu, 7 May 2026 11:47:53 -0400 Subject: [PATCH 3/3] tests: Add Ubuntu 26.04 (resolute) digest stability test Ubuntu 26.04 uses umoci/PAX format tars produced by Rockcraft, which include GNU-style record padding after the end-of-archive blocks. Before the preceding fix, this caused 'Layer has incorrect checksum' errors on the @digest compute-id path because the reconstructed tar was shorter than the original. Pin the image by manifest digest (d31acef2) and record the expected composefs image ID so regressions are caught automatically. Assisted-by: OpenCode (Claude Sonnet 4.6) Signed-off-by: Colin Walters --- ci/fixture-images.txt | 6 ++++++ .../src/tests/digest_stability.rs | 18 +++++++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/ci/fixture-images.txt b/ci/fixture-images.txt index 07d1de4d..3e70b911 100644 --- a/ci/fixture-images.txt +++ b/ci/fixture-images.txt @@ -16,3 +16,9 @@ docker://registry.access.redhat.com/ubi10/ubi:10.1-1772441712 docker://ghcr.io/c # centos-bootc stream10, pinned by manifest digest. # Used in: crates/integration-tests/src/tests/digest_stability.rs docker://quay.io/centos-bootc/centos-bootc@sha256:d1913e3d616b9acb7fc2e3331be8baf844048bca2681a23d34e53e75eb18f3d0 docker://ghcr.io/composefs/ci-fixture-centos-bootc:stream10-d1913e3d + +# Ubuntu 26.04 (resolute), pinned by manifest digest. +# Uses umoci/PAX format tars with GNU record padding — specifically exercises +# the trailing-padding preservation fix in split_async(). +# Used in: crates/integration-tests/src/tests/digest_stability.rs +docker://docker.io/library/ubuntu@sha256:d31acef2a964b6df1f2b7e20a1525c4f2378024e087a4f8a8a9a4247e6a79573 docker://ghcr.io/composefs/ci-fixture-ubuntu-resolute:26.04-d31acef2 diff --git a/crates/composefs-integration-tests/src/tests/digest_stability.rs b/crates/composefs-integration-tests/src/tests/digest_stability.rs index 940b63b8..11fb5b95 100644 --- a/crates/composefs-integration-tests/src/tests/digest_stability.rs +++ b/crates/composefs-integration-tests/src/tests/digest_stability.rs @@ -56,8 +56,24 @@ const CENTOS_BOOTC: ContainerImage = ContainerImage { ), }; +// Ubuntu 26.04 (resolute), pinned by manifest digest. +// Mirrored from docker.io/library/ubuntu via ci/fixture-images.txt. +// Ubuntu 26.04 uses umoci/PAX format tars produced by Rockcraft, which emit +// GNU-style record padding after the end-of-archive blocks. This exercises +// the trailing-padding preservation fix in split_async() — without it the +// reconstructed tar is shorter than the original and the diff_id checksum +// fails in the @digest code path of create_filesystem(). +const UBUNTU_RESOLUTE: ContainerImage = ContainerImage { + label: "ubuntu-resolute", + image_ref: "docker://ghcr.io/composefs/ci-fixture-ubuntu-resolute:26.04-d31acef2", + upstream_ref: "docker://docker.io/library/ubuntu@sha256:d31acef2a964b6df1f2b7e20a1525c4f2378024e087a4f8a8a9a4247e6a79573", + expected_id: "150caabb982d7005db1a1d0480d57a95e84b160aa2b1159f9aae66e92ba07b36\ + 11ea38e1836eff923dc3a1a617c18494757be0f5e3db16cc7a522981b3f42d40", + expected_bootable_id: None, +}; + /// All container images to test. -const CONTAINER_IMAGES: &[&ContainerImage] = &[&UBI10, &CENTOS_BOOTC]; +const CONTAINER_IMAGES: &[&ContainerImage] = &[&UBI10, &CENTOS_BOOTC, &UBUNTU_RESOLUTE]; /// Return `true` if network tests should be skipped. fn skip_network() -> bool {