From 84948be0f2f4a80bb46625eca35ed6109d460bd7 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Tue, 19 May 2026 18:14:30 -0400 Subject: [PATCH 01/13] install: Skip immutable deployment checkouts in final SELinux relabeling Ostree sets the immutable ext4 attribute on each deployment checkout directory, which causes lsetfilecon() to return EPERM during the final SELinux relabeling pass even though those files are already correctly labeled by the earlier composefs import pass. Rather than skipping the entire ostree/deploy subtree (which would leave stateroot metadata and var directories unlabeled), enumerate the actual checkout directories under ostree/deploy//deploy/ and skip only those immutable roots by dev/ino. Also generalise ensure_dir_labeled_recurse to accept a slice of (dev, ino) pairs to skip rather than a single Option, so multiple checkout directories can be excluded in one pass. Assisted-by: OpenCode (Claude Sonnet 4.6) Signed-off-by: Colin Walters --- crates/lib/src/install.rs | 31 +++++++++++++++++++++++++++++-- crates/lib/src/lsm.rs | 29 ++++++++++++++--------------- 2 files changed, 43 insertions(+), 17 deletions(-) diff --git a/crates/lib/src/install.rs b/crates/lib/src/install.rs index c9fcaf88c..f73c9da98 100644 --- a/crates/lib/src/install.rs +++ b/crates/lib/src/install.rs @@ -1208,7 +1208,7 @@ async fn install_container( &root_setup.physical_root, &mut pathbuf, policy, - Some(deployment_root_devino), + &[deployment_root_devino], ) .with_context(|| format!("Recursive SELinux relabeling of {d}"))?; } @@ -2093,7 +2093,34 @@ async fn install_to_filesystem_impl( if let Some(policy) = state.load_policy()? { tracing::info!("Performing final SELinux relabeling of physical root"); let mut path = Utf8PathBuf::from(""); - crate::lsm::ensure_dir_labeled_recurse(&rootfs.physical_root, &mut path, &policy, None) + // Ostree sets the immutable ext4 attribute on each deployment checkout + // directory, which causes lsetfilecon() to return EPERM. Those dirs + // are already correctly labeled by the earlier composefs import pass, + // so skip each checkout by dev/ino. We enumerate them directly rather + // than skipping the whole ostree/deploy subtree so that the stateroot + // and var directories (which are not immutable) still get labeled. + let mut skip: Vec<(libc::dev_t, libc::ino64_t)> = Vec::new(); + let deploy_dir = "ostree/deploy"; + if let Some(statered_dir) = rootfs.physical_root.open_dir_optional(deploy_dir)? { + for stateroot in statered_dir.entries()? { + let stateroot = stateroot?; + if !stateroot.file_type()?.is_dir() { + continue; + } + let deploy_subdir = stateroot.open_dir()?.open_dir_optional("deploy")?; + if let Some(deploy_subdir) = deploy_subdir { + for checkout in deploy_subdir.entries()? { + let checkout = checkout?; + if !checkout.file_type()?.is_dir() { + continue; + } + let m = checkout.metadata()?; + skip.push((m.dev() as libc::dev_t, m.ino() as libc::ino64_t)); + } + } + } + } + crate::lsm::ensure_dir_labeled_recurse(&rootfs.physical_root, &mut path, &policy, &skip) .context("Final SELinux relabeling of physical root")?; } else { tracing::debug!("Skipping final SELinux relabel (SELinux is disabled)"); diff --git a/crates/lib/src/lsm.rs b/crates/lib/src/lsm.rs index 2c059406d..3991c6ce6 100644 --- a/crates/lib/src/lsm.rs +++ b/crates/lib/src/lsm.rs @@ -391,13 +391,13 @@ pub(crate) fn relabel_recurse( /// Uses the `walk` API with `noxdev` and `skip_mountpoints` to avoid crossing /// mount point boundaries /// (e.g. into sysfs, procfs, etc.). -/// The provided `skip` parameter is a device/inode pair that we will ignore -/// (and not traverse into). +/// The provided `skip` parameter is a list of device/inode pairs to ignore +/// (and not traverse into). Pass an empty slice to skip nothing. pub(crate) fn ensure_dir_labeled_recurse( root: &Dir, path: &mut Utf8PathBuf, policy: &ostree::SePolicy, - skip: Option<(libc::dev_t, libc::ino64_t)>, + skip: &[(libc::dev_t, libc::ino64_t)], ) -> Result<()> { use cap_std_ext::dirext::WalkConfiguration; use std::ops::ControlFlow; @@ -432,18 +432,17 @@ pub(crate) fn ensure_dir_labeled_recurse( let metadata = component.entry.metadata()?; // Check if this entry should be skipped - if let Some((skip_dev, skip_ino)) = skip { - if (metadata.dev(), metadata.ino()) == (skip_dev, skip_ino) { - tracing::debug!("Skipping dev={skip_dev} inode={skip_ino}"); - // For directories, Break skips traversal into the directory - // but continues with the next sibling. For non-directories, - // Break would skip all remaining siblings, so use Continue - // to skip only this entry. - if component.file_type.is_dir() { - return Ok(ControlFlow::Break(())); - } else { - return Ok(ControlFlow::Continue(())); - } + let devino = (metadata.dev() as libc::dev_t, metadata.ino() as libc::ino64_t); + if skip.contains(&devino) { + tracing::debug!("Skipping dev={} inode={}", devino.0, devino.1); + // For directories, Break skips traversal into the directory + // but continues with the next sibling. For non-directories, + // Break would skip all remaining siblings, so use Continue + // to skip only this entry. + if component.file_type.is_dir() { + return Ok(ControlFlow::Break(())); + } else { + return Ok(ControlFlow::Continue(())); } } From b0765f9c6ca322806ebfb85f5ea6873db03f4e5f Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Wed, 20 May 2026 17:25:09 -0400 Subject: [PATCH 02/13] xtask: Pass --filesystem to bcvk for ostree variant too BOOTC_filesystem was silently ignored when BOOTC_variant=ostree because install_args() only emitted --filesystem inside the composefs_backend block. bcvk's --filesystem flag is not composefs-specific (the cache hash includes filesystem type and bcvk creates a fresh base disk per filesystem), so the guard was wrong. Move --filesystem before the composefs_backend block so that e.g. BOOTC_filesystem=ext4 just test-tmt-nobuild readonly actually installs on ext4, exercising the reflink-probe fallback path. Assisted-by: OpenCode (claude-sonnet-4-6@default) Signed-off-by: Colin Walters --- crates/xtask/src/bcvk.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/xtask/src/bcvk.rs b/crates/xtask/src/bcvk.rs index 7d883d205..55975688a 100644 --- a/crates/xtask/src/bcvk.rs +++ b/crates/xtask/src/bcvk.rs @@ -65,15 +65,15 @@ impl BcvkInstallOpts { /// Return the install-related args for `bcvk libvirt run`. /// /// This covers `--composefs-backend`, `--filesystem`, `--bootloader`, - /// and `--karg` flags. Note that `--bootloader` and `--filesystem` - /// are only valid when `--composefs-backend` is also set (bcvk - /// enforces this via a clap `requires` relationship). + /// and `--karg` flags. `--bootloader` is composefs-only, but + /// `--filesystem` applies to both ostree and composefs backends. pub(crate) fn install_args(&self) -> Vec { let mut args = Vec::new(); + if let Some(fs) = &self.filesystem { + args.push(format!("--filesystem={fs}")); + } if self.composefs_backend { args.push("--composefs-backend".into()); - let fs = self.filesystem.as_deref().unwrap_or("ext4"); - args.push(format!("--filesystem={fs}")); if let Some(b) = &self.bootloader { args.push(format!("--bootloader={b}")); } From 8f853c1e9bb6192079e7a332cdfc1cb93c2b986b Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Sat, 9 May 2026 18:19:03 -0400 Subject: [PATCH 03/13] cli: Inject --system into `bootc internals cfsctl` by default Without this, `bootc internals cfsctl oci images` (and other cfsctl subcommands) would fall back to cfsctl's own default repo heuristic. While cfsctl already picks /sysroot/composefs when running as root, being explicit here documents the intent and makes the behaviour consistent regardless of uid. Pass --system rather than hardcoding the path so the constant lives in one place (cfsctl's system_path()). Users can still override with --repo, --user, or --system and the injected flag is skipped in that case. Assisted-by: OpenCode (claude-sonnet-4-6@default) Signed-off-by: Colin Walters --- crates/lib/src/cli.rs | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/crates/lib/src/cli.rs b/crates/lib/src/cli.rs index 77cf58dd8..cc8619875 100644 --- a/crates/lib/src/cli.rs +++ b/crates/lib/src/cli.rs @@ -2089,7 +2089,25 @@ async fn run_from_opt(opt: Opt) -> Result<()> { Ok(()) } }, - InternalsOpts::Cfs { args } => composefs_ctl::run_from_iter(args.iter()).await, + InternalsOpts::Cfs { args } => { + // Inject `--system` (which resolves to /sysroot/composefs) unless the + // caller has already specified a repo location via --repo, --user, or + // --system. This ensures `bootc internals cfsctl oci images` (and + // similar subcommands) operate on the booted composefs repository by + // default rather than falling back to cfsctl's own heuristic. + let has_repo_flag = args.iter().any(|a| { + let s = a.to_string_lossy(); + s == "--repo" || s.starts_with("--repo=") || s == "--user" || s == "--system" + }); + if has_repo_flag { + composefs_ctl::run_from_iter(args.iter()).await + } else { + composefs_ctl::run_from_iter( + std::iter::once(OsString::from("--system")).chain(args.into_iter()), + ) + .await + } + } InternalsOpts::Reboot => crate::reboot::reboot(), InternalsOpts::Fsck => { let storage = &get_storage().await?; From 9a11167b64e36d28b55bc8289259ef7429eb7b81 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Thu, 21 May 2026 08:24:08 -0400 Subject: [PATCH 04/13] unified-storage: Wire composefs-first pull pipeline into ostree deploy path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an end-to-end unified storage path for the ostree backend on reflink filesystems. When the composefs repo is present and unified storage has been enabled (via `bootc image set-unified`), `pull_auto` now routes through `pull_via_composefs_unified` which: 1. Pulls the image into bootc-owned containers-storage via `pull_composefs_unified` (zero-copy reflinks from the registry blobs). 2. Synthesizes an ostree commit from the composefs repo via `import_from_composefs_repo` + FICLONE, marking it META_COMPOSEFS_SYNTHESIZED so downstream code knows no per-layer blob refs exist. Unified storage mode is tracked by a new flag file `/sysroot/composefs/bootc.json` (BootcRepoMeta), written atomically on the first successful set-unified call and read cheaply on every pull_auto to decide which path to take. This avoids the previous heuristic that checked per-image presence in containers-storage and broke across `bootc switch` to a new image reference. The old ostree-native unified pull path (new_importer_with_config / check_disk_space_unified / prepare_for_pull_unified / pull_unified) is removed — it was dead code since the composefs pipeline was introduced. `bootc image copy-to-storage` is fixed for synthesized commits: because the synthesized ostree commit carries no per-layer blob refs, `ostree_ext::container::store::export()` would error out with "Refspec not found". `push_entrypoint` now detects META_COMPOSEFS_SYNTHESIZED on the resolved commit, reads the manifest and config from the commit metadata, and delegates to the composefs layer-streaming export path instead. The composefs export helper (`export_repo_to_image`) is refactored: the streaming core is extracted into `export_composefs_to_dest` so it can be called from both the composefs-boot and ostree-boot code paths. A new TMT plan (plan-44-readonly-unified) onboards a system to unified storage and then runs the full readonly test suite against it. Assisted-by: OpenCode (Claude Sonnet 4.6) Signed-off-by: Colin Walters --- crates/lib/src/bootc_composefs/export.rs | 83 +- crates/lib/src/image.rs | 46 + crates/lib/src/lsm.rs | 5 +- .../src/container/composefs_import.rs | 970 ++++++++++++++++++ crates/ostree-ext/src/container/mod.rs | 1 + crates/ostree-ext/src/container/store.rs | 94 +- crates/ostree-ext/tests/it/main.rs | 227 ++++ docs/src/experimental-unified-storage.md | 34 +- tmt/plans/integration.fmf | 8 + .../readonly/032-test-composefs-repo.nu | 36 + tmt/tests/booted/test-44-readonly-unified.nu | 67 ++ tmt/tests/tests.fmf | 5 + 12 files changed, 1523 insertions(+), 53 deletions(-) create mode 100644 crates/ostree-ext/src/container/composefs_import.rs create mode 100644 tmt/tests/booted/readonly/032-test-composefs-repo.nu create mode 100644 tmt/tests/booted/test-44-readonly-unified.nu diff --git a/crates/lib/src/bootc_composefs/export.rs b/crates/lib/src/bootc_composefs/export.rs index ea760e603..6fe774ff4 100644 --- a/crates/lib/src/bootc_composefs/export.rs +++ b/crates/lib/src/bootc_composefs/export.rs @@ -7,44 +7,27 @@ use composefs_ctl::composefs; use composefs_ctl::composefs_oci; use composefs_oci::open_config; use ocidir::{OciDir, oci_spec::image::Platform}; +use ostree_ext::container::ImageReference; use ostree_ext::container::Transport; use ostree_ext::container::skopeo; use tar::EntryType; use crate::image::get_imgrefs_for_copy; use crate::{ - bootc_composefs::status::{get_composefs_status, get_imginfo}, - store::{BootedComposefs, Storage}, + bootc_composefs::status::{ImgConfigManifest, get_composefs_status, get_imginfo}, + store::{BootedComposefs, ComposefsRepository, Storage}, }; -/// Exports a composefs repository to a container image in containers-storage: -pub async fn export_repo_to_image( - storage: &Storage, - booted_cfs: &BootedComposefs, - source: Option<&str>, - target: Option<&str>, +/// Streams a composefs OCI image out to a destination image reference. +/// +/// Given a composefs repository handle and image metadata (manifest + config), +/// reconstructs the container image by reading layer data from the composefs +/// splitstreams and copies the assembled OCI image to `dest_imgref` via skopeo. +pub(crate) async fn export_composefs_to_dest( + composefs_repo: &ComposefsRepository, + imginfo: &ImgConfigManifest, + dest_imgref: &ImageReference, ) -> Result<()> { - let host = get_composefs_status(storage, booted_cfs).await?; - - let (source, dest_imgref) = get_imgrefs_for_copy(&host, source, target).await?; - - let mut depl_verity = None; - - for depl in host.list_deployments() { - let img = &depl.image.as_ref().unwrap().image; - - // Not checking transport here as we'll be pulling from the repo anyway - // So, image name is all we need - if img.image == source.name { - depl_verity = Some(depl.require_composefs()?.verity.clone()); - break; - } - } - - let depl_verity = depl_verity.ok_or_else(|| anyhow::anyhow!("Image {source} not found"))?; - - let imginfo = get_imginfo(storage, &depl_verity)?; - let config_digest = imginfo.manifest.config().digest().clone(); let var_tmp = @@ -54,7 +37,7 @@ pub async fn export_repo_to_image( let oci_dir = OciDir::ensure(tmpdir.try_clone()?).context("Opening OCI")?; // Use composefs_oci::open_config to get the config and layer map - let open = open_config(&*booted_cfs.repo, &config_digest, None).context("Opening config")?; + let open = open_config(composefs_repo, &config_digest, None).context("Opening config")?; let config = open.config; let layer_map = open.layer_refs; @@ -77,7 +60,7 @@ pub async fn export_repo_to_image( .get(old_diff_id.as_str()) .ok_or_else(|| anyhow::anyhow!("Layer {old_diff_id} not found in config"))?; - let mut layer_stream = booted_cfs.repo.open_stream("", Some(layer_verity), None)?; + let mut layer_stream = composefs_repo.open_stream("", Some(layer_verity), None)?; let mut layer_writer = oci_dir.create_layer(None)?; layer_writer.follow_symlinks(false); @@ -113,7 +96,7 @@ pub async fn export_repo_to_image( match layer_stream.read_exact(size as usize, ((size as usize) + 511) & !511)? { SplitStreamData::External(obj_id) => match header.entry_type() { EntryType::Regular | EntryType::Continuous => { - let file = File::from(booted_cfs.repo.open_object(&obj_id)?); + let file = File::from(composefs_repo.open_object(&obj_id)?); layer_writer .append(&header, file) @@ -196,7 +179,7 @@ pub async fn export_repo_to_image( skopeo::copy( &tempoci, - &dest_imgref, + dest_imgref, None, Some(( std::sync::Arc::new(tmpdir.try_clone()?.into()), @@ -208,3 +191,37 @@ pub async fn export_repo_to_image( Ok(()) } + +/// Exports a composefs repository to a container image in containers-storage: +pub async fn export_repo_to_image( + storage: &Storage, + booted_cfs: &BootedComposefs, + source: Option<&str>, + target: Option<&str>, +) -> Result<()> { + let host = get_composefs_status(storage, booted_cfs).await?; + + let (source, dest_imgref) = get_imgrefs_for_copy(&host, source, target).await?; + + let mut depl_verity = None; + + for depl in host.list_deployments() { + let img = &depl.image.as_ref().unwrap().image; + + // Not checking transport here as we'll be pulling from the repo anyway + // So, image name is all we need + if img.image == source.name { + depl_verity = Some(depl.require_composefs()?.verity.clone()); + break; + } + } + + let depl_verity = depl_verity.ok_or_else(|| anyhow::anyhow!("Image {source} not found"))?; + + let imginfo = get_imginfo(storage, &depl_verity)?; + + println!("Copying local image {source} to {dest_imgref} ..."); + export_composefs_to_dest(&booted_cfs.repo, &imginfo, &dest_imgref).await?; + println!("Pushed: {dest_imgref}"); + Ok(()) +} diff --git a/crates/lib/src/image.rs b/crates/lib/src/image.rs index 8a6b17a54..1f621718b 100644 --- a/crates/lib/src/image.rs +++ b/crates/lib/src/image.rs @@ -235,6 +235,52 @@ pub(crate) async fn push_entrypoint( let ostree = storage.get_ostree()?; let repo = &ostree.repo(); + // Images pulled via the composefs-unified pipeline are stored in the composefs + // repository with a synthesized ostree commit that has no per-layer blob refs. + // Detect this case and fall through to the composefs export path which reads + // directly from the composefs splitstreams instead of ostree blob refs. + let ostree_ref = ostree_ext::container::store::ref_for_image(&source)?; + if let Some(rev) = repo.resolve_rev(&ostree_ref, true)? { + let (commit_obj, _) = repo.load_commit(rev.as_str())?; + let commit_meta = + ostree_ext::ostree::glib::VariantDict::new(Some(&commit_obj.child_value(0))); + let is_composefs_synthesized = commit_meta + .lookup::(ostree_ext::container::store::META_COMPOSEFS_SYNTHESIZED)? + .unwrap_or(false); + + if is_composefs_synthesized { + // The manifest and config are embedded in the commit metadata. + let manifest_str = commit_meta + .lookup::(ostree_ext::container::store::META_MANIFEST)? + .ok_or_else(|| anyhow::anyhow!("Composefs-synthesized commit missing manifest"))?; + let config_str = commit_meta + .lookup::(ostree_ext::container::store::META_CONFIG)? + .ok_or_else(|| { + anyhow::anyhow!("Composefs-synthesized commit missing image config") + })?; + + let manifest: ostree_ext::oci_spec::image::ImageManifest = + serde_json::from_str(&manifest_str) + .context("Deserializing manifest from commit metadata")?; + let config: ostree_ext::oci_spec::image::ImageConfiguration = + serde_json::from_str(&config_str) + .context("Deserializing image config from commit metadata")?; + + let imginfo = crate::bootc_composefs::status::ImgConfigManifest { config, manifest }; + let composefs_repo = storage.get_ensure_composefs()?; + + println!("Copying local image {source} to {target} ..."); + crate::bootc_composefs::export::export_composefs_to_dest( + &composefs_repo, + &imginfo, + &target, + ) + .await?; + println!("Pushed: {target}"); + return Ok(()); + } + } + let mut opts = ostree_ext::container::store::ExportToOCIOpts::default(); opts.progress_to_stdout = true; println!("Copying local image {source} to {target} ..."); diff --git a/crates/lib/src/lsm.rs b/crates/lib/src/lsm.rs index 3991c6ce6..f9b85f308 100644 --- a/crates/lib/src/lsm.rs +++ b/crates/lib/src/lsm.rs @@ -432,7 +432,10 @@ pub(crate) fn ensure_dir_labeled_recurse( let metadata = component.entry.metadata()?; // Check if this entry should be skipped - let devino = (metadata.dev() as libc::dev_t, metadata.ino() as libc::ino64_t); + let devino = ( + metadata.dev() as libc::dev_t, + metadata.ino() as libc::ino64_t, + ); if skip.contains(&devino) { tracing::debug!("Skipping dev={} inode={}", devino.0, devino.1); // For directories, Break skips traversal into the directory diff --git a/crates/ostree-ext/src/container/composefs_import.rs b/crates/ostree-ext/src/container/composefs_import.rs new file mode 100644 index 000000000..70ac58350 --- /dev/null +++ b/crates/ostree-ext/src/container/composefs_import.rs @@ -0,0 +1,970 @@ +//! Synthesize an ostree commit directly from a composefs OCI repository. +//! +//! This is the inverse of the existing ostree→composefs flow in libostree. +//! Rather than importing OCI layers through the tar pipeline, we walk the +//! composefs `FileSystem` tree and write each entry as an ostree object. +//! +//! For external regular files we bypass the ostree C library's `write_content` +//! API and instead write content objects directly to disk. This lets us +//! attempt `FICLONE` (reflink) from the composefs object fd, falling back to +//! `copy_file_range` / read-write. The ostree SHA256 content checksum is +//! still computed via the C library's `checksum_file_from_input` (a pure, +//! repo-independent function) so the serialisation format is never +//! reimplemented. +//! +//! Metadata objects (dirmeta, dirtree, commit) and small inline files continue +//! to use the ostree C API — they are tiny and benefit from the existing +//! validation. +//! +//! # Why reflink, not hardlink +//! +//! A composefs object is content-addressed by its SHA-512 fsverity digest. +//! Multiple filesystem paths with identical content share a single composefs +//! object (inode). In ostree bare mode, however, every content object carries +//! its own per-inode metadata: uid, gid, mode, and *all* xattrs including +//! `security.selinux`. The same composefs object can be referenced by two +//! ostree objects that need different SELinux labels — for example +//! `/etc/sudo.conf` (label `etc_t`) and +//! `/usr/share/doc/sudo/examples/sudo.conf` (label `usr_t`). Because a +//! single inode can hold only one `security.selinux` value, hardlinking both +//! ostree objects to the shared composefs inode is impossible: the second +//! `setxattr` would overwrite the first, causing `ostree fsck` to report a +//! checksum mismatch. +//! +//! Reflinks (FICLONE) solve this: each ostree object gets its own inode and +//! therefore its own metadata, while the underlying disk extents are shared +//! with the composefs object — zero copy, same space on the wire. +//! +//! # Filesystem requirements +//! +//! FICLONE requires a reflink-capable filesystem such as XFS or btrfs. On +//! these filesystems the ostree and composefs repositories share disk blocks +//! so installing the OS adds no extra space for the ostree content objects. +//! On ext4 (and other filesystems without reflink support) FICLONE fails with +//! `EOPNOTSUPP` and the code falls back to a byte copy; installation succeeds +//! but no block sharing occurs between the two repositories. +//! +//! # SELinux labels and the NUL-terminator fix +//! +//! `selabel()` from composefs-rs applies SELinux labels in-memory to the +//! filesystem tree before ostree synthesis begins. On SELinux-disabled hosts +//! (such as a container used for `bootc install`) the `fsetxattr` call for +//! `security.selinux` is a no-op; labels are applied by the kernel at first +//! boot during auto-relabeling. The ostree checksums are computed with the +//! correct label values (because `checksum_file_from_input` sees them from the +//! in-memory tree), so `ostree fsck` passes after boot. +//! +//! One subtlety: composefs-rs `selabel()` stores SELinux values *without* a +//! trailing NUL, but the kernel stores them *with* one. `xattrs_to_variant` +//! appends the NUL when computing the ostree checksum so that it matches what +//! `ostree fsck` reads back from the live filesystem. + +use std::collections::BTreeMap; +use std::ffi::OsStr; +use std::os::fd::{AsFd as _, AsRawFd as _, BorrowedFd}; +use std::os::unix::ffi::OsStrExt as _; + +use anyhow::{Context as _, Result}; + +use composefs_ctl::composefs::fsverity::Sha512HashValue; +use composefs_ctl::composefs::generic_tree::{Inode, Stat}; +use composefs_ctl::composefs::repository::Repository; +use composefs_ctl::composefs::tree::{Directory, Leaf, LeafContent, RegularFile}; +use composefs_ctl::composefs_boot::selabel::selabel; +use composefs_ctl::composefs_oci; + +use crate::container::store::{ + META_COMPOSEFS_SYNTHESIZED, META_CONFIG, META_MANIFEST, META_MANIFEST_DIGEST, +}; +use crate::prelude::{Cast as _, ToVariant as _}; +use crate::{gio, glib}; + +type ComposefsRepo = Repository; + +/// Tracks per-import statistics and FICLONE capability for the composefs→ostree +/// object copy loop. +/// +/// # Why reflink, not hardlink +/// +/// A composefs object is a content-addressed file identified by its SHA-512 +/// fsverity digest. Multiple filesystem paths with identical content share the +/// same composefs object (inode). In ostree's bare repo mode, however, each +/// content object carries its own metadata — uid, gid, mode, and **all xattrs +/// including `security.selinux`** — directly on the inode. +/// +/// The same composefs object can be referenced by ostree objects that need +/// different metadata. For example `/etc/sudo.conf` and +/// `/usr/share/doc/sudo/examples/sudo.conf` have identical bytes but different +/// SELinux labels (`etc_t` vs `usr_t`), producing different ostree content +/// checksums. A single inode can carry only one `security.selinux` value, so +/// hardlinking both ostree objects to the shared composefs inode is impossible: +/// whichever metadata is written second silently overwrites the first, causing +/// `ostree fsck` to report a checksum mismatch. +/// +/// Reflinks (FICLONE) solve this cleanly: each ostree object gets its own +/// inode (its own metadata) while the underlying disk extents are shared with +/// the composefs object — zero copy, same space efficiency. If the filesystem +/// does not support reflinks (e.g. ext4) we fall back to a byte copy; unified +/// storage is not possible in that configuration. +struct DirectImportContext { + /// `true` after FICLONE has failed on this device pair. + ficlone_failed: bool, + /// Count of files reflinked (FICLONE succeeded — blocks shared with composefs object). + reflinked: u64, + /// Count of files written via byte copy fallback (filesystem lacks reflink support). + copied: u64, + /// Count of objects that already existed in the repo (skipped). + existing: u64, + /// When `true`, any FICLONE failure is returned as a hard error instead of + /// silently falling back to byte copy. Set by the caller when it has already + /// confirmed that reflinks are supported on the storage (e.g. via a probe). + reflinks_required: bool, +} + +/// Convert composefs xattrs (`BTreeMap`) to the ostree GVariant format (`a(ayay)`). +/// +/// The ostree on-disk format stores xattr names as NUL-terminated C strings inside +/// `ay` (byte array) GVariant values — matching the C library's use of +/// `g_variant_new_bytestring(name)`. The validation function in libostree reads +/// names back with the `"(^&ay@ay)"` format specifier, which returns a raw pointer +/// into the GVariant buffer, and then calls `strcmp` on it — so the trailing NUL +/// *must* be present. Without it the first byte of the *value* would be treated as +/// part of the name, and if that byte happens to be `0x00` the validator throws +/// "Invalid xattr name (empty or missing NUL)". +fn xattrs_to_variant(xattrs: &BTreeMap, Box<[u8]>>) -> Option { + if xattrs.is_empty() { + return None; + } + let children = xattrs.iter().map(|(k, v)| { + // Append NUL so that ostree's C validator can read the name as a C string. + let mut name_bytes = k.as_bytes().to_vec(); + name_bytes.push(b'\0'); + // SELinux stores label values as NUL-terminated strings on the filesystem. + // The composefs selabel() function strips the NUL, but ostree fsck reads + // xattrs back from disk (where the kernel always stores them with NUL) when + // verifying checksums. Ensure our checksum matches by adding the NUL here + // for security.selinux values that are missing it. + let value: &[u8] = v.as_ref(); + let value_owned; + let value = if k.as_bytes() == b"security.selinux" + && !value.last().copied().map_or(true, |b| b == 0) + { + value_owned = [value, &[0u8]].concat(); + value_owned.as_slice() + } else { + value + }; + glib::Variant::tuple_from_iter([name_bytes.as_slice().to_variant(), value.to_variant()]) + }); + Some(glib::Variant::array_from_iter::<(&[u8], &[u8])>(children)) +} + +/// Build an ostree `DirMeta` GVariant from a composefs `Stat`. +fn create_dirmeta(stat: &Stat) -> glib::Variant { + let finfo = gio::FileInfo::new(); + finfo.set_attribute_uint32("unix::uid", stat.st_uid); + finfo.set_attribute_uint32("unix::gid", stat.st_gid); + // ostree's create_directory_metadata requires the full mode word including + // the S_IFDIR type bits. The composefs Stat only stores the permission + // bits, so we must OR in S_IFDIR ourselves. + let mode = libc::S_IFDIR | stat.st_mode; + finfo.set_attribute_uint32("unix::mode", mode); + let xattrs = xattrs_to_variant(&stat.xattrs); + crate::ostree::create_directory_metadata(&finfo, xattrs.as_ref()) +} + +/// Top-level paths from the OCI image that are API/virtual filesystems at runtime. +/// +/// These paths (`/proc`, `/sys`, etc.) must NOT have their children written into +/// the ostree commit — they are mounted as kernel-provided virtual filesystems at +/// boot time and any content baked into the image is irrelevant (and in the case +/// of device nodes, actively harmful). The directory entries themselves are +/// written as empty directories so that the mount points exist. +/// +/// This mirrors the `EXCLUDED_TOPLEVEL_PATHS` list and the filtering logic in +/// `crate::tar::write::normalize_validate_path()`. +const EXCLUDED_TOPLEVEL_PATHS: &[&str] = &["run", "tmp", "proc", "sys", "dev"]; + +/// Walk the composefs root directory, applying ostree path normalization: +/// +/// - `/etc` → `usr/etc` (ostree's deployment mechanism expects config here) +/// - Children of API/virtual-filesystem mounts (`/proc`, `/sys`, `/dev`, `/run`, +/// `/tmp`) are **omitted** — the directory entries are written as empty mount +/// points but their content is never part of the commit. +/// +/// This matches the filtering that the tar import pipeline applies via +/// `normalize_validate_path()` in `tar/write.rs`. +fn write_dir_to_mtree_remap_etc( + orepo: &crate::ostree::Repo, + crepo: &ComposefsRepo, + dir: &Directory, + leaves: &[Leaf], + mtree: &crate::ostree::MutableTree, + ctx: &mut DirectImportContext, +) -> Result<()> { + // Write the root dirmeta first + let dirmeta = create_dirmeta(&dir.stat); + let meta_csum = orepo + .write_metadata( + crate::ostree::ObjectType::DirMeta, + None, + &dirmeta, + gio::Cancellable::NONE, + ) + .context("Writing root dirmeta")?; + mtree.set_metadata_checksum(&meta_csum.to_hex()); + + // Collect the `etc` directory entry (if any) for deferred processing + let mut etc_dir: Option<&Directory> = None; + + for (name, inode) in dir.sorted_entries() { + let name = name + .to_str() + .ok_or_else(|| anyhow::anyhow!("Non-UTF8 entry name: {:?}", name))?; + + // Intercept `etc` at the root and defer it — we'll write it + // as `usr/etc` after processing everything else. + if name == "etc" { + match inode { + Inode::Directory(child) => { + etc_dir = Some(child); + continue; + } + _ => { + // Unusual: /etc is not a directory (e.g. symlink). + // Write it as-is — the deployment code will handle it + // or fail later with a more specific error. + } + } + } + + // API/virtual-filesystem mount points: include the empty directory itself + // (so the mount point exists at runtime) but skip all children. This + // mirrors the tar importer's EXCLUDED_TOPLEVEL_PATHS handling. + if EXCLUDED_TOPLEVEL_PATHS.contains(&name) { + if let Inode::Directory(child) = inode { + let child_mtree = mtree.ensure_dir(name)?; + // Write only the dirmeta (permissions/xattrs) — no children. + let child_dirmeta = create_dirmeta(&child.stat); + let child_meta_csum = orepo + .write_metadata( + crate::ostree::ObjectType::DirMeta, + None, + &child_dirmeta, + gio::Cancellable::NONE, + ) + .with_context(|| format!("Writing dirmeta for excluded path {name}"))?; + child_mtree.set_metadata_checksum(&child_meta_csum.to_hex()); + tracing::debug!("Composefs import: skipping children of excluded path /{name}"); + continue; + } + // Non-directory at an excluded path (e.g. a symlink for /tmp)? + // Skip it entirely. + tracing::debug!("Composefs import: skipping non-directory excluded path /{name}"); + continue; + } + + match inode { + Inode::Directory(child) => { + let child_mtree = mtree.ensure_dir(name)?; + write_dir_to_mtree(orepo, crepo, child, leaves, &child_mtree, ctx)?; + } + Inode::Leaf(leaf_id, _) => { + let leaf = &leaves[leaf_id.0]; + let csum = write_leaf(orepo, crepo, &leaf.stat, &leaf.content, ctx)?; + mtree + .replace_file(name, &csum) + .with_context(|| format!("Inserting {name} into mtree"))?; + } + } + } + + // Now write the deferred `/etc` directory as `usr/etc`. + if let Some(etc) = etc_dir { + let usr_mtree = mtree.ensure_dir("usr")?; + let usr_etc_mtree = usr_mtree.ensure_dir("etc")?; + write_dir_to_mtree(orepo, crepo, etc, leaves, &usr_etc_mtree, ctx) + .context("Writing /etc as usr/etc")?; + } + + Ok(()) +} + +/// Recursively walk a composefs directory, writing each entry into `mtree`. +fn write_dir_to_mtree( + orepo: &crate::ostree::Repo, + crepo: &ComposefsRepo, + dir: &Directory, + leaves: &[Leaf], + mtree: &crate::ostree::MutableTree, + ctx: &mut DirectImportContext, +) -> Result<()> { + let dirmeta = create_dirmeta(&dir.stat); + let meta_csum = orepo + .write_metadata( + crate::ostree::ObjectType::DirMeta, + None, + &dirmeta, + gio::Cancellable::NONE, + ) + .context("Writing dirmeta")?; + mtree.set_metadata_checksum(&meta_csum.to_hex()); + + for (name, inode) in dir.sorted_entries() { + let name = name + .to_str() + .ok_or_else(|| anyhow::anyhow!("Non-UTF8 entry name: {:?}", name))?; + match inode { + Inode::Directory(child) => { + let child_mtree = mtree.ensure_dir(name)?; + write_dir_to_mtree(orepo, crepo, child, leaves, &child_mtree, ctx)?; + } + Inode::Leaf(leaf_id, _) => { + let leaf = &leaves[leaf_id.0]; + let csum = write_leaf(orepo, crepo, &leaf.stat, &leaf.content, ctx)?; + mtree + .replace_file(name, &csum) + .with_context(|| format!("Inserting {name} into mtree"))?; + } + } + } + Ok(()) +} + +/// Write a single composefs leaf (regular file or symlink) to the ostree repo. +/// Returns the hex content checksum. +fn write_leaf( + orepo: &crate::ostree::Repo, + crepo: &ComposefsRepo, + stat: &Stat, + content: &LeafContent, + ctx: &mut DirectImportContext, +) -> Result { + let xattrs = xattrs_to_variant(&stat.xattrs); + match content { + LeafContent::Symlink(target) => { + let target = target + .to_str() + .ok_or_else(|| anyhow::anyhow!("Non-UTF8 symlink target"))?; + let csum = orepo + .write_symlink( + None, + stat.st_uid, + stat.st_gid, + xattrs.as_ref(), + target, + gio::Cancellable::NONE, + ) + .context("Writing symlink")?; + Ok(csum.to_string()) + } + LeafContent::Regular(file) => { + write_regular_file(orepo, crepo, stat, file, xattrs.as_ref(), ctx) + } + other => anyhow::bail!( + "Unsupported composefs leaf (device/fifo/socket): {:?}", + std::mem::discriminant(other) + ), + } +} + +/// Write a regular file into the ostree repo. +/// +/// Inline files use the ostree C API directly. External files bypass the C +/// `write_content` path and instead write content objects directly to disk, +/// attempting FICLONE (reflink) first and falling back to a byte copy. +/// +/// The ostree content checksum is computed via the C library's +/// `checksum_file_from_input` to guarantee format compatibility. +fn write_regular_file( + orepo: &crate::ostree::Repo, + crepo: &ComposefsRepo, + stat: &Stat, + file: &RegularFile, + xattrs: Option<&glib::Variant>, + ctx: &mut DirectImportContext, +) -> Result { + // ostree requires the full mode word including S_IFREG + let mode = libc::S_IFREG | stat.st_mode; + + match file { + RegularFile::Inline(data) => { + let csum = orepo + .write_regfile_inline( + None, + stat.st_uid, + stat.st_gid, + mode, + xattrs, + data, + gio::Cancellable::NONE, + ) + .context("Writing inline file")?; + Ok(csum.to_string()) + } + RegularFile::External(object_id, size) => { + write_external_file_direct(orepo, crepo, stat, object_id, *size, xattrs, mode, ctx) + } + } +} + +/// Compute the ostree content checksum for a regular file using the C API. +/// +/// This calls `ostree_checksum_file_from_input()` which is a pure, +/// repo-independent function. It constructs the standard file header +/// GVariant (uid/gid/mode/xattrs) and hashes `header || content`. +fn compute_content_checksum( + stat: &Stat, + xattrs: Option<&glib::Variant>, + fd: BorrowedFd<'_>, + size: u64, + mode: u32, +) -> Result { + let finfo = gio::FileInfo::new(); + finfo.set_attribute_uint32("unix::uid", stat.st_uid); + finfo.set_attribute_uint32("unix::gid", stat.st_gid); + finfo.set_attribute_uint32("unix::mode", mode); + finfo.set_size(size as i64); + finfo.set_file_type(gio::FileType::Regular); + + // Dup the fd so we can hand an owned File to ReadInputStream without + // touching the caller's fd or using unsafe. + let file = std::fs::File::from(fd.try_clone_to_owned().context("Dup fd for checksum")?); + let istream = gio::ReadInputStream::new(file); + let csum = crate::ostree::checksum_file_from_input( + &finfo, + xattrs, + Some(&istream), + crate::ostree::ObjectType::File, + gio::Cancellable::NONE, + ) + .map_err(|e| anyhow::anyhow!("Computing content checksum: {e}"))?; + + Ok(csum.to_string()) +} + +/// Build the `user.ostreemeta` GVariant `(uuu@a(ayay))` for bare-user mode. +/// +/// This matches the C function `create_file_metadata()` in ostree-repo-commit.c. +fn build_bareuser_metadata( + uid: u32, + gid: u32, + mode: u32, + xattrs: Option<&glib::Variant>, +) -> glib::Variant { + let empty_xattrs; + let xattrs = match xattrs { + Some(v) => v.clone(), + None => { + empty_xattrs = glib::Variant::array_from_iter::<(&[u8], &[u8])>(std::iter::empty()); + empty_xattrs + } + }; + // ostree stores these as big-endian u32 values + glib::Variant::tuple_from_iter([ + uid.to_be().to_variant(), + gid.to_be().to_variant(), + mode.to_be().to_variant(), + xattrs, + ]) +} + +/// Apply file metadata to an fd according to the ostree repo mode. +/// +/// For `Bare`: sets real uid/gid, mode, and xattrs via syscalls. +/// For `BareUser`: stores metadata in a `user.ostreemeta` xattr and +/// applies a masked mode suitable for non-root operation. +fn apply_file_metadata( + fd: BorrowedFd<'_>, + stat: &Stat, + mode: u32, + xattrs: Option<&glib::Variant>, + repo_mode: crate::ostree::RepoMode, +) -> Result<()> { + use rustix::fs::{Mode, Timestamps, XattrFlags, fchmod, fchown, fsetxattr, futimens}; + + match repo_mode { + crate::ostree::RepoMode::Bare => { + fchown( + fd, + Some(rustix::process::Uid::from_raw(stat.st_uid)), + Some(rustix::process::Gid::from_raw(stat.st_gid)), + ) + .context("fchown")?; + fchmod(fd, Mode::from_raw_mode(mode)).context("fchmod")?; + // Set real xattrs on the file. For security.selinux the composefs + // selabel() function stores values without the NUL terminator that + // the kernel expects; add it here so the on-disk value matches what + // xattrs_to_variant() (and thus the ostree checksum) sees. + for (name, value) in &stat.xattrs { + let value: &[u8] = value.as_ref(); + let value_with_nul; + let value = if name.as_bytes() == b"security.selinux" + && !value.last().copied().map_or(true, |b| b == 0) + { + value_with_nul = [value, &[0u8]].concat(); + value_with_nul.as_slice() + } else { + value + }; + fsetxattr(fd, name.as_bytes(), value, XattrFlags::empty()) + .with_context(|| format!("fsetxattr({:?})", name))?; + } + } + crate::ostree::RepoMode::BareUser => { + // Write the user.ostreemeta xattr + let meta = build_bareuser_metadata(stat.st_uid, stat.st_gid, mode, xattrs); + let meta_bytes = meta.data_as_bytes(); + fsetxattr( + fd, + c"user.ostreemeta", + meta_bytes.as_ref(), + XattrFlags::empty(), + ) + .context("fsetxattr(user.ostreemeta)")?; + // Mask mode for non-root safety (matches libostree) + let content_mode = (mode & (libc::S_IFREG | 0o775)) | libc::S_IRUSR; + fchmod(fd, Mode::from_raw_mode(content_mode)).context("fchmod (bare-user)")?; + } + other => anyhow::bail!("Unsupported ostree repo mode for direct write: {other:?}"), + } + + // Set mtime to OSTREE_TIMESTAMP (0), matching libostree behavior. + // UTIME_OMIT for atime means "leave atime unchanged". + futimens( + fd, + &Timestamps { + last_access: rustix::fs::Timespec { + tv_sec: 0, + tv_nsec: rustix::fs::UTIME_OMIT, + }, + last_modification: rustix::fs::Timespec { + tv_sec: 0, + tv_nsec: 0, + }, + }, + ) + .context("futimens")?; + + Ok(()) +} + +/// Ensure the `objects/XX/` directory exists under the repo. +fn ensure_object_dir(repo_dfd: BorrowedFd<'_>, prefix: &str) -> Result<()> { + use rustix::fs::{Mode, mkdirat}; + + let objects_dir = format!("objects/{prefix}"); + match mkdirat(repo_dfd, objects_dir.as_str(), Mode::from_raw_mode(0o777)) { + Ok(()) => Ok(()), + Err(rustix::io::Errno::EXIST) => Ok(()), + Err(e) => Err(e).context(format!("Creating {objects_dir}"))?, + } +} + +/// Write a composefs external regular file directly into the ostree repo. +/// +/// # Sharing strategy: FICLONE (reflink) → byte copy +/// +/// Each ostree content object is written as a fresh inode (via `O_TMPFILE`) +/// and populated from the composefs object using `FICLONE` (reflink) when the +/// filesystem supports it, falling back to `copy_file_range` / `std::io::copy`. +/// +/// **Why not hardlink?** A composefs object is identified by its SHA-512 +/// fsverity digest of raw content. The same object can be referenced by +/// multiple ostree content objects that carry different metadata — in +/// particular different `security.selinux` xattrs assigned by the SELinux +/// policy relabeling step. Ostree bare mode stores metadata (uid/gid/mode/ +/// xattrs) on the inode, so a single shared inode cannot satisfy two different +/// metadata requirements. FICLONE shares the underlying disk extents while +/// keeping inodes independent, giving each ostree object its own metadata. +/// +/// On filesystems without reflink support (ext4, etc.) FICLONE fails with +/// `EOPNOTSUPP` and we fall back to a full byte copy. Unified storage +/// (space-sharing between the composefs and ostree repos) requires a +/// reflink-capable filesystem (XFS, btrfs, …). +/// +/// Returns the hex SHA256 content checksum. +#[allow(clippy::too_many_arguments)] +fn write_external_file_direct( + orepo: &crate::ostree::Repo, + crepo: &ComposefsRepo, + stat: &Stat, + object_id: &Sha512HashValue, + size: u64, + xattrs: Option<&glib::Variant>, + mode: u32, + ctx: &mut DirectImportContext, +) -> Result { + use rustix::fs::{AtFlags, Mode, OFlags, linkat, openat}; + use std::io::{Seek, SeekFrom}; + + let cfs_fd = crepo + .open_object(object_id) + .context("Opening composefs object")?; + + // Compute the ostree content checksum using the C API. + let checksum = compute_content_checksum(stat, xattrs, cfs_fd.as_fd(), size, mode) + .context("Computing content checksum")?; + + // Check if the object already exists — skip if so. + if orepo + .has_object( + crate::ostree::ObjectType::File, + &checksum, + gio::Cancellable::NONE, + ) + .context("Checking for existing object")? + { + ctx.existing += 1; + return Ok(checksum); + } + + let repo_dfd = orepo.dfd_borrow(); + let repo_mode = orepo.mode(); + + // Ensure objects/XX/ directory exists. + let (prefix, _rest) = checksum.split_at(2); + ensure_object_dir(repo_dfd, prefix)?; + + let obj_path = format!("objects/{prefix}/{}.file", &checksum[2..]); + + // Create an O_TMPFILE in the repo's tmp/ directory. This gives us an + // anonymous inode we can populate before atomically linking it into place, + // avoiding partial writes visible to concurrent readers. + let tmp_fd = openat( + repo_dfd, + c"tmp", + OFlags::WRONLY | OFlags::TMPFILE | OFlags::CLOEXEC, + Mode::from_raw_mode(0o644), + ) + .context("Creating tmpfile in ostree repo/tmp")?; + + // Seek the source fd back to the start (compute_content_checksum consumed it). + let mut src_file = std::fs::File::from(cfs_fd); + src_file + .seek(SeekFrom::Start(0)) + .context("Seeking composefs fd to start")?; + + // --- Primary path: FICLONE (reflink) --- + // + // Shares the underlying disk extents with the composefs object while giving + // the ostree object its own inode (and thus its own metadata). On XFS this + // is a near-zero-cost operation. After the first failure we stop trying. + let mut reflinked = false; + if !ctx.ficlone_failed { + match rustix::fs::ioctl_ficlone(&tmp_fd, &src_file) { + Ok(()) => { + reflinked = true; + } + Err(e @ (rustix::io::Errno::OPNOTSUPP | rustix::io::Errno::XDEV)) => { + if ctx.reflinks_required { + return Err(anyhow::anyhow!( + "FICLONE failed ({e}) but reflinks were confirmed supported \ + by the storage probe — cannot safely continue import" + )); + } + tracing::debug!( + "FICLONE not supported on this filesystem; \ + falling back to copy for all remaining objects" + ); + ctx.ficlone_failed = true; + } + Err(e) => { + if ctx.reflinks_required { + return Err(anyhow::anyhow!( + "FICLONE failed unexpectedly ({e}); reflinks are required" + )); + } + tracing::debug!("FICLONE failed unexpectedly ({e}); falling back to copy"); + ctx.ficlone_failed = true; + } + } + } + + if reflinked { + ctx.reflinked += 1; + } else { + // --- Fallback: byte copy via copy_file_range --- + let mut dst_file = std::fs::File::from(tmp_fd.try_clone().context("Cloning tmpfile fd")?); + let n = std::io::copy(&mut src_file, &mut dst_file).context("Copying file content")?; + anyhow::ensure!( + n == size, + "Size mismatch: expected {size} bytes but copied {n}" + ); + ctx.copied += 1; + } + + // Apply metadata (uid/gid/mode/xattrs/mtime) to the new inode. + apply_file_metadata(tmp_fd.as_fd(), stat, mode, xattrs, repo_mode)?; + + // Atomically link the tmpfile into place at objects/XX/rest.file. + let proc_path = format!("/proc/self/fd/{}", tmp_fd.as_raw_fd()); + match linkat( + rustix::fs::CWD, + proc_path.as_str(), + repo_dfd, + obj_path.as_str(), + AtFlags::SYMLINK_FOLLOW, + ) { + Ok(()) => {} + Err(rustix::io::Errno::EXIST) => { + // A concurrent writer (or an earlier call) already wrote this object. + } + Err(e) => { + return Err(e).with_context(|| format!("Linking tmpfile to {obj_path}")); + } + } + + Ok(checksum) +} + +/// Synthesize an ostree commit from a composefs repository that has already +/// pulled an OCI image. +/// +/// This is the inverse of the ostree→composefs path in libostree: instead of +/// reading an ostree commit to build an EROFS image, we walk the composefs +/// `FileSystem` tree and write each entry as an ostree object. +/// +/// The resulting commit carries the same OCI metadata as a commit produced by +/// the standard `ostree-container` import pipeline: +/// - `ostree.container.image-config` +/// - `ostree.manifest` +/// - `ostree.manifest-digest` +/// +/// External regular files are written directly to disk with FICLONE (reflink) +/// attempted first, falling back to a byte copy. On the same filesystem this +/// avoids copying data entirely. +pub fn import_from_composefs_repo( + ostree_repo: &crate::ostree::Repo, + composefs_repo: &ComposefsRepo, + config_digest: &composefs_oci::OciDigest, + manifest_digest: &str, + image_manifest: &crate::oci_spec::image::ImageManifest, + image_config: &crate::oci_spec::image::ImageConfiguration, + reflinks_required: bool, +) -> Result { + let cancellable = gio::Cancellable::NONE; + + // Reconstruct the merged filesystem tree from the composefs store + let mut fs = composefs_oci::image::create_filesystem(composefs_repo, config_digest, None) + .context("Reconstructing composefs filesystem tree")?; + + // Apply SELinux labels in-memory before writing any objects. + // selabel walks the FileSystem tree and reads policy files directly from + // the composefs object store — no file copies are made. + let labeled = selabel(&mut fs, composefs_repo) + .context("Applying SELinux labels from composefs image policy")?; + tracing::debug!( + labeled, + total_leaves = fs.leaves.len(), + "SELinux labeling complete" + ); + + let txn = ostree_repo + .auto_transaction(cancellable) + .context("Beginning ostree transaction")?; + + let mut ctx = DirectImportContext { + ficlone_failed: false, + reflinked: 0, + copied: 0, + existing: 0, + reflinks_required, + }; + + // Walk and write all objects. + // + // The composefs filesystem tree uses the OCI image layout where `/etc` + // lives at the tree root. However, ostree's deployment mechanism expects + // configuration to be at `usr/etc` (it copies `usr/etc` → `etc` during + // deployment initialization and performs a 3-way merge). The old + // `ImageImporter` tar pipeline applies this `/etc` → `usr/etc` remap + // via `normalize_validate_path()` in `tar/write.rs`. + // + // We replicate that remapping here: if the composefs root has an `etc` + // directory, we write it into the ostree tree as `usr/etc` instead of + // a top-level `etc`. + let root_mtree = crate::ostree::MutableTree::new(); + write_dir_to_mtree_remap_etc( + ostree_repo, + composefs_repo, + &fs.root, + &fs.leaves, + &root_mtree, + &mut ctx, + ) + .context("Writing composefs tree into ostree")?; + + tracing::info!( + reflinked = ctx.reflinked, + copied = ctx.copied, + existing = ctx.existing, + "Composefs→ostree import complete: reflinked={} copied={} existing={}", + ctx.reflinked, + ctx.copied, + ctx.existing, + ); + + // Seal the tree + let root = ostree_repo + .write_mtree(&root_mtree, cancellable) + .context("Sealing MutableTree")?; + let root = root + .downcast::() + .map_err(|_| anyhow::anyhow!("BUG: write_mtree did not return a RepoFile"))?; + + // Attach OCI metadata so the commit looks like a normal ostree-container import, + // plus a flag indicating this commit was synthesized from a composefs repository + // (so per-layer blob refs do not exist). + let meta = glib::VariantDict::new(None); + meta.insert( + META_CONFIG, + &serde_json::to_string(image_config).context("Serializing image config")?, + ); + meta.insert( + META_MANIFEST, + &serde_json::to_string(image_manifest).context("Serializing manifest")?, + ); + meta.insert(META_MANIFEST_DIGEST, &manifest_digest); + meta.insert(crate::ostree::METADATA_KEY_BOOTABLE.as_ref(), &true); + meta.insert(META_COMPOSEFS_SYNTHESIZED, &true); + let meta = meta.to_variant(); + + // Use the image creation timestamp for the ostree commit, matching the + // behaviour of the old ImageImporter pipeline. This is important because + // rpm-ostree unconditionally expects a non-zero timestamp in the deployment + // metadata; a zero (epoch) timestamp can cause `rpm-ostree status` to abort. + let timestamp = + crate::container::store::timestamp_of_manifest_or_config(image_manifest, image_config) + .unwrap_or_else(|| chrono::offset::Utc::now().timestamp() as u64); + + let commit = ostree_repo + .write_commit_with_time(None, None, None, Some(&meta), &root, timestamp, cancellable) + .context("Writing ostree commit")?; + + txn.commit(cancellable).context("Committing transaction")?; + + Ok(commit.to_string()) +} + +#[cfg(test)] +mod tests { + use rustix::fd::AsRawFd as _; + use rustix::fs::{AtFlags, Mode, OFlags, linkat, openat}; + use std::os::fd::AsFd as _; + + /// Verify that O_TMPFILE can be materialized via /proc/self/fd/N without + /// root (the linkat+SYMLINK_FOLLOW approach used by write_external_file_direct). + #[test] + fn test_otmpfile_materialize_via_proc() { + let dir = tempfile::tempdir_in("/var/tmp").unwrap(); + let dir_fd = rustix::fs::open( + dir.path(), + OFlags::RDONLY | OFlags::DIRECTORY | OFlags::CLOEXEC, + Mode::empty(), + ) + .unwrap(); + + // Create O_TMPFILE — anonymous inode in the directory + let tmp_fd = openat( + &dir_fd, + c".", + OFlags::WRONLY | OFlags::TMPFILE | OFlags::CLOEXEC, + Mode::from_raw_mode(0o644), + ) + .expect("O_TMPFILE"); + + rustix::io::write(&tmp_fd, b"hello").unwrap(); + + // Materialize via /proc/self/fd/N with SYMLINK_FOLLOW + let proc_path = format!("/proc/self/fd/{}", tmp_fd.as_fd().as_raw_fd()); + linkat( + rustix::fs::CWD, + proc_path.as_str(), + &dir_fd, + "materialized.txt", + AtFlags::SYMLINK_FOLLOW, + ) + .expect("linkat via /proc/self/fd should work without root"); + + let contents = std::fs::read(dir.path().join("materialized.txt")).unwrap(); + assert_eq!(contents, b"hello"); + } + + /// Verify FICLONE (reflink) works between two files in /var/tmp. + /// This will return EOPNOTSUPP on filesystems that don't support it + /// (e.g. ext4), which is acceptable — we just need to confirm the + /// ioctl itself is reachable and returns a meaningful error code. + #[test] + fn test_ficlone_or_enotsup() { + let dir = tempfile::tempdir_in("/var/tmp").unwrap(); + let dir_fd = rustix::fs::open( + dir.path(), + OFlags::RDONLY | OFlags::DIRECTORY | OFlags::CLOEXEC, + Mode::empty(), + ) + .unwrap(); + + // Source file with some content + let src_fd = openat( + &dir_fd, + c"src", + OFlags::RDWR | OFlags::CREATE | OFlags::CLOEXEC, + Mode::from_raw_mode(0o644), + ) + .unwrap(); + rustix::io::write(&src_fd, b"reflink-test-content").unwrap(); + + // Destination — O_TMPFILE + let dst_fd = openat( + &dir_fd, + c".", + OFlags::WRONLY | OFlags::TMPFILE | OFlags::CLOEXEC, + Mode::from_raw_mode(0o644), + ) + .unwrap(); + + match rustix::fs::ioctl_ficlone(&dst_fd, &src_fd) { + Ok(()) => { + // Reflinks supported — verify content matches + let proc_path = format!("/proc/self/fd/{}", dst_fd.as_fd().as_raw_fd()); + linkat( + rustix::fs::CWD, + proc_path.as_str(), + &dir_fd, + "dst", + AtFlags::SYMLINK_FOLLOW, + ) + .unwrap(); + let contents = std::fs::read(dir.path().join("dst")).unwrap(); + assert_eq!(contents, b"reflink-test-content"); + eprintln!("FICLONE: supported on this filesystem"); + } + Err(rustix::io::Errno::OPNOTSUPP) | Err(rustix::io::Errno::XDEV) => { + eprintln!( + "FICLONE: not supported on this filesystem (EOPNOTSUPP/EXDEV) — fallback to copy is correct" + ); + } + Err(e) => panic!("Unexpected FICLONE error: {e}"), + } + } + + /// Verify that user.ostreemeta xattr can be set on a regular file + /// without root in /var/tmp. + #[test] + fn test_user_ostreemeta_xattr() { + use rustix::fs::{XattrFlags, fgetxattr, fsetxattr}; + + let dir = tempfile::tempdir_in("/var/tmp").unwrap(); + let f = tempfile::NamedTempFile::new_in(dir.path()).unwrap(); + let fd = rustix::fs::open(f.path(), OFlags::RDWR | OFlags::CLOEXEC, Mode::empty()).unwrap(); + + let meta_bytes = b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\xed\x00\x00\x00\x00"; + fsetxattr(&fd, c"user.ostreemeta", meta_bytes, XattrFlags::empty()) + .expect("setting user.ostreemeta should work without root"); + + let mut buf = vec![0u8; 256]; + let n = fgetxattr(&fd, c"user.ostreemeta", &mut buf).unwrap(); + assert_eq!(&buf[..n], meta_bytes); + } +} diff --git a/crates/ostree-ext/src/container/mod.rs b/crates/ostree-ext/src/container/mod.rs index 0fec7f757..df175448b 100644 --- a/crates/ostree-ext/src/container/mod.rs +++ b/crates/ostree-ext/src/container/mod.rs @@ -613,6 +613,7 @@ pub fn version_for_config(config: &oci_spec::image::ImageConfiguration) -> Optio None } +pub mod composefs_import; pub mod deploy; mod encapsulate; pub use encapsulate::*; diff --git a/crates/ostree-ext/src/container/store.rs b/crates/ostree-ext/src/container/store.rs index ea0665d40..0df3d67c5 100644 --- a/crates/ostree-ext/src/container/store.rs +++ b/crates/ostree-ext/src/container/store.rs @@ -137,6 +137,7 @@ use camino::{Utf8Path, Utf8PathBuf}; use canon_json::CanonJsonSerialize; use cap_std_ext::cap_std; use cap_std_ext::cap_std::fs::{Dir, MetadataExt}; +use gvariant::{Marker as _, Structure as _}; use cap_std_ext::dirext::CapStdExtDirExt; use containers_image_proxy::{ImageProxy, OpenedImage}; @@ -172,12 +173,17 @@ const IMAGE_PREFIX: &str = "ostree/container/image"; /// ref with the project name, so the final ref may be of the form e.g. `ostree/container/baseimage/bootc/foo`. pub const BASE_IMAGE_PREFIX: &str = "ostree/container/baseimage"; -/// The key injected into the merge commit for the manifest digest. -pub(crate) const META_MANIFEST_DIGEST: &str = "ostree.manifest-digest"; -/// The key injected into the merge commit with the manifest serialized as JSON. -const META_MANIFEST: &str = "ostree.manifest"; -/// The key injected into the merge commit with the image configuration serialized as JSON. -const META_CONFIG: &str = "ostree.container.image-config"; +/// The key for the manifest digest in ostree commit metadata. +pub const META_MANIFEST_DIGEST: &str = "ostree.manifest-digest"; +/// The key for the serialized OCI manifest in ostree commit metadata. +pub const META_MANIFEST: &str = "ostree.manifest"; +/// The key for the serialized OCI image configuration in ostree commit metadata. +pub const META_CONFIG: &str = "ostree.container.image-config"; +/// Commit metadata key written by the composefs import path to indicate this commit +/// was synthesized from a composefs repository rather than imported layer-by-layer. +/// When present, per-layer blob refs do not exist and `query_image_commit` should +/// use the merge commit itself as the base commit. +pub const META_COMPOSEFS_SYNTHESIZED: &str = "ostree-ext.composefs-synthesized"; /// The type used to store content filtering information. pub type MetaFilteredData = HashMap>; @@ -197,7 +203,7 @@ fn ref_for_layer(l: &oci_image::Descriptor) -> Result { } /// Convert e.g. sha256:12345... into `/ostree/container/blob/sha256_2B12345...`. -fn ref_for_image(l: &ImageReference) -> Result { +pub fn ref_for_image(l: &ImageReference) -> Result { refescape::prefix_escape_for_ref(IMAGE_PREFIX, &l.to_string()) } @@ -585,7 +591,7 @@ pub(crate) fn parse_ostree_manifest_layout<'a>( } /// Find the timestamp of the manifest (or config), ignoring errors. -fn timestamp_of_manifest_or_config( +pub(crate) fn timestamp_of_manifest_or_config( manifest: &ImageManifest, config: &ImageConfiguration, ) -> Option { @@ -1624,6 +1630,58 @@ pub fn list_images(repo: &ostree::Repo) -> Result> { .collect() } +/// Recursively walk a dirtree GVariant, calling `f` with each file's hex content checksum. +/// +/// Reads `(a(say)a(sayay))` — the ostree DIRTREE format — directly from GVariant bytes, +/// the same approach used by [`crate::ima`]. Subdirectories are visited depth-first. +fn walk_dirtree_checksums( + repo: &ostree::Repo, + dirtree_checksum: &str, + f: &mut impl FnMut(&str) -> Result<()>, +) -> Result<()> { + use crate::objgv::gv_dirtree; + use gvariant::aligned_bytes::TryAsAligned as _; + + let v = repo.load_variant(ostree::ObjectType::DirTree, dirtree_checksum)?; + let bytes = v.data_as_bytes(); + let bytes = bytes.try_as_aligned()?; + let tree = gv_dirtree!().cast(bytes); + let (files, dirs) = tree.to_tuple(); + + let mut hexbuf = [0u8; 64]; + for file in files { + let (_name, csum) = file.to_tuple(); + hex::encode_to_slice(csum, &mut hexbuf)?; + f(std::str::from_utf8(&hexbuf)?)?; + } + for dir in dirs { + let (_name, contents_csum, _meta_csum) = dir.to_tuple(); + hex::encode_to_slice(contents_csum, &mut hexbuf)?; + walk_dirtree_checksums(repo, std::str::from_utf8(&hexbuf)?, f)?; + } + Ok(()) +} + +/// Call `f` once for each regular-file content checksum (hex SHA-256) reachable from +/// `commit_checksum`, reading dirtree GVariants directly without GObject overhead. +/// +/// Subtrees are visited depth-first. Shared dirtree objects that appear more than once +/// in the tree will be visited multiple times. +pub fn for_each_file_checksum( + repo: &ostree::Repo, + commit_checksum: &str, + f: &mut impl FnMut(&str) -> Result<()>, +) -> Result<()> { + use crate::objgv::gv_commit; + use gvariant::aligned_bytes::TryAsAligned as _; + + let (commit_v, _) = repo.load_commit(commit_checksum)?; + let commit_bytes = commit_v.data_as_bytes(); + let commit_bytes = commit_bytes.try_as_aligned()?; + let root_dirtree = hex::encode(gv_commit!().cast(commit_bytes).to_tuple().6); + walk_dirtree_checksums(repo, &root_dirtree, f) +} + /// Attempt to query metadata for a pulled image; if it is corrupted, /// the error is printed to stderr and None is returned. fn try_query_image( @@ -1745,11 +1803,21 @@ pub fn query_image_commit(repo: &ostree::Repo, commit: &str) -> Result(META_COMPOSEFS_SYNTHESIZED)? + .unwrap_or(false); + let base_commit = if is_composefs_synthesized { + merge_commit.clone() + } else { + let base_layer = query_layer(repo, base_layer)?; + let ostree_ref = base_layer.ostree_ref.as_str(); + base_layer + .commit + .ok_or_else(|| anyhow!("Missing base image ref {ostree_ref}"))? + }; let detached_commitmeta = repo.read_commit_detached_metadata(&merge_commit, gio::Cancellable::NONE)?; diff --git a/crates/ostree-ext/tests/it/main.rs b/crates/ostree-ext/tests/it/main.rs index 14c6dd3b3..3b7531564 100644 --- a/crates/ostree-ext/tests/it/main.rs +++ b/crates/ostree-ext/tests/it/main.rs @@ -2365,3 +2365,230 @@ async fn test_diff_id_reuse_across_compression() -> Result<()> { Ok(()) } + +/// Build a minimal OCI image (single gzip layer) into the given OCI directory. +/// +/// The tar layer explicitly includes parent directory entries before files, so +/// that composefs's strict layer importer does not choke on missing parents. +fn build_test_oci(oci_dir: &ocidir::OciDir) -> Result<()> { + use ocidir::oci_spec::image::ImageConfigurationBuilder; + let mut config = ImageConfigurationBuilder::default().build()?; + let mut manifest = oci_dir.new_empty_manifest()?.build()?; + + let bw = oci_dir.create_gzip_layer(None)?; + let mut tb = tar::Builder::new(bw); + + let dir_header = |path: &str| -> tar::Header { + let mut h = tar::Header::new_ustar(); + h.set_entry_type(tar::EntryType::Directory); + h.set_mode(0o755); + h.set_uid(0); + h.set_gid(0); + h.set_mtime(0); + h.set_size(0); + // The tar path for directories must end in / + let _ = path; // consumed by caller + h + }; + let reg_header = |data: &[u8]| -> tar::Header { + let mut h = tar::Header::new_ustar(); + h.set_entry_type(tar::EntryType::Regular); + h.set_mode(0o644); + h.set_uid(0); + h.set_gid(0); + h.set_mtime(0); + h.set_size(data.len() as u64); + h + }; + + // Parent directories must come before their children. + let mut dh = dir_header("usr/"); + tb.append_data(&mut dh, "usr/", std::io::empty())?; + let mut dh = dir_header("usr/bin/"); + tb.append_data(&mut dh, "usr/bin/", std::io::empty())?; + let mut dh = dir_header("usr/lib/"); + tb.append_data(&mut dh, "usr/lib/", std::io::empty())?; + + let bash_data = b"#!/bin/bash\n"; + let mut fh = reg_header(bash_data); + tb.append_data(&mut fh, "usr/bin/bash", std::io::Cursor::new(bash_data))?; + + let config_data = b"# test config\n"; + let mut fh = reg_header(config_data); + tb.append_data( + &mut fh, + "usr/lib/test.conf", + std::io::Cursor::new(config_data), + )?; + + // Add /etc at the OCI image root — this must be remapped to usr/etc + // in the ostree commit so that deployment initialization can find it. + let mut dh = dir_header("etc/"); + tb.append_data(&mut dh, "etc/", std::io::empty())?; + + let hostname_data = b"testhost\n"; + let mut fh = reg_header(hostname_data); + tb.append_data(&mut fh, "etc/hostname", std::io::Cursor::new(hostname_data))?; + + let bw = tb.into_inner()?; + let layer = bw.complete()?; + + oci_dir.push_layer_full( + &mut manifest, + &mut config, + layer, + None::>, + "test layer", + chrono::DateTime::UNIX_EPOCH, + ); + let config_desc = oci_dir.write_config(config)?; + manifest.set_config(config_desc); + oci_dir.replace_with_single_manifest(manifest, ocidir::oci_spec::image::Platform::default())?; + Ok(()) +} + +/// Test that we can synthesize an ostree commit from a composefs repository. +/// +/// The flow is: +/// 1. Build a minimal OCI image into a local OCI directory +/// 2. Pull it into a fresh composefs repository +/// 3. Call `composefs_import::import_from_composefs_repo` +/// 4. Verify the resulting ostree commit has the expected OCI metadata and files +#[tokio::test] +async fn test_composefs_import_to_ostree() -> Result<()> { + use composefs_ctl::composefs::fsverity::{Algorithm, DEFAULT_LG_BLOCKSIZE, Sha512HashValue}; + use composefs_ctl::composefs::repository::Repository; + use composefs_ctl::composefs_oci; + use composefs_ctl::composefs_oci::oci_image::OciImage; + use ostree_ext::container::composefs_import; + use ostree_ext::container::store::{META_CONFIG, META_MANIFEST, META_MANIFEST_DIGEST}; + use std::sync::Arc; + + if !check_skopeo() { + eprintln!("Skipping test_composefs_import_to_ostree: skopeo not available"); + return Ok(()); + } + + let tempdir = tempfile::tempdir_in("/var/tmp")?; + let path: &camino::Utf8Path = tempdir.path().try_into().unwrap(); + + // Step 1: build a minimal OCI image + let oci_path = path.join("test.oci"); + std::fs::create_dir(&oci_path)?; + let oci_cap_dir = cap_std::fs::Dir::open_ambient_dir(&oci_path, cap_std::ambient_authority())?; + let oci_dir = ocidir::OciDir::ensure(oci_cap_dir)?; + build_test_oci(&oci_dir)?; + + // Step 2: create a composefs repository and pull the OCI image into it + let cfs_path = path.join("composefs-repo"); + std::fs::create_dir(&cfs_path)?; + let cfs_dirfd = cap_std::fs::Dir::open_ambient_dir(&cfs_path, cap_std::ambient_authority())?; + let algo = Algorithm::Sha512 { + lg_blocksize: DEFAULT_LG_BLOCKSIZE, + }; + let (cfs_repo, _) = Repository::::init_path(cfs_dirfd, ".", algo, false) + .context("Initializing composefs repository")?; + let cfs_repo = Arc::new(cfs_repo); + + let oci_imgref = format!("oci:{}", oci_path); + let pull_result = composefs_oci::pull(&cfs_repo, &oci_imgref, None, Default::default()) + .await + .context("Pulling OCI image into composefs repo")?; + + // Step 3: get manifest + config from the composefs repo + let oci_image = OciImage::open(&cfs_repo, &pull_result.manifest_digest, None) + .context("Opening OCI image from composefs repo")?; + let manifest = oci_image.manifest().clone(); + let config = oci_image + .config() + .cloned() + .context("OCI image has no config")?; + + // Step 4: create a destination ostree repo and run the import + let ost_repo = ostree::Repo::new(&gio::File::for_path(path.join("ostree-repo"))); + ost_repo + .create(ostree::RepoMode::BareUser, gio::Cancellable::NONE) + .context("Creating destination ostree repo")?; + + let manifest_digest_str = pull_result.manifest_digest.to_string(); + let commit = composefs_import::import_from_composefs_repo( + &ost_repo, + &cfs_repo, + &pull_result.config_digest, + &manifest_digest_str, + &manifest, + &config, + false, + ) + .context("Importing composefs repo into ostree")?; + + // Step 5: verify + assert!(!commit.is_empty(), "commit checksum should be non-empty"); + + let (commitv, state) = ost_repo.load_commit(&commit)?; + assert_eq!(state, ostree::RepoCommitState::NORMAL); + + // Commit metadata should contain the three OCI keys + let commit_meta = commitv.child_value(0); + let commitmeta = glib::VariantDict::new(Some(&commit_meta)); + assert!( + commitmeta + .lookup_value(META_MANIFEST_DIGEST, None) + .is_some(), + "commit should have manifest-digest metadata" + ); + assert!( + commitmeta.lookup_value(META_MANIFEST, None).is_some(), + "commit should have manifest metadata" + ); + assert!( + commitmeta.lookup_value(META_CONFIG, None).is_some(), + "commit should have config metadata" + ); + // The stored digest should round-trip + let stored_digest = commitmeta + .lookup::(META_MANIFEST_DIGEST)? + .expect("manifest-digest key"); + assert_eq!(stored_digest, manifest_digest_str); + + // The file tree should have the files we put in the layer + let (root, _) = ost_repo.read_commit(&commit, gio::Cancellable::NONE)?; + for path in ["usr", "usr/bin", "usr/bin/bash", "usr/lib/test.conf"] { + let node = root.resolve_relative_path(path); + assert!( + node.query_exists(gio::Cancellable::NONE), + "expected {path} to exist in the ostree commit" + ); + } + + // Verify /etc was remapped to usr/etc (not left at the root as etc/) + let usr_etc_hostname = root.resolve_relative_path("usr/etc/hostname"); + assert!( + usr_etc_hostname.query_exists(gio::Cancellable::NONE), + "expected usr/etc/hostname to exist (remapped from /etc/hostname)" + ); + + // Verify there is NO top-level etc/ — it should have been remapped + let root_etc = root.resolve_relative_path("etc"); + assert!( + !root_etc.query_exists(gio::Cancellable::NONE), + "top-level etc/ should not exist; it should be remapped to usr/etc/" + ); + + // Run `ostree fsck` against the repo to verify all objects pass integrity + // checks — in particular that file content checksums match, that + // user.ostreemeta xattrs are present and well-formed, and that no + // object is corrupted. + let ostree_repo_path = path.join("ostree-repo"); + let st = std::process::Command::new("ostree") + .args(["fsck", "--repo"]) + .arg(&ostree_repo_path) + .status() + .context("running `ostree fsck`")?; + assert!( + st.success(), + "`ostree fsck` failed on the composefs-imported repo" + ); + + Ok(()) +} diff --git a/docs/src/experimental-unified-storage.md b/docs/src/experimental-unified-storage.md index 68d5ea142..04078615c 100644 --- a/docs/src/experimental-unified-storage.md +++ b/docs/src/experimental-unified-storage.md @@ -148,12 +148,34 @@ Once unified storage is enabled, podman can access the booted image: podman --storage-opt=additionalimagestore=/usr/lib/bootc/storage run localhost/bootc ``` -## Relationship to composefs backend - -Unified storage is complementary to the [composefs backend](experimental-composefs.md). -While unified storage changes *how images are pulled* (using containers/storage), -the composefs backend changes *how the filesystem is stored and verified*. -These features can potentially be combined in the future. +## Relationship to the composefs-first import pipeline + +The containers/storage unified storage path described above is distinct from the +composefs-first import pipeline implemented in this codebase. The two approaches +solve overlapping problems but are architecturally different. + +The composefs-first path fetches the OCI image directly into the composefs OCI +repository (under `/sysroot/ostree/repo/composefs`) and then synthesizes an ostree +commit from that repository without a tar round-trip. For each external regular +file it uses `FICLONE` (reflink) to populate the corresponding ostree content object +from the composefs object. Because reflinks share underlying disk extents while +keeping independent inodes, each ostree object can carry its own metadata (uid, gid, +mode, SELinux label) even when two paths have identical bytes and share a single +composefs object. This is why reflink is used instead of hardlink: a single inode +can hold only one `security.selinux` value, so hardlinking would corrupt one of the +labels. + +This design requires a reflink-capable filesystem. On XFS or btrfs, FICLONE +succeeds and the ostree and composefs repositories share disk blocks — no extra +space is consumed for the ostree content objects. On ext4, FICLONE fails with +`EOPNOTSUPP` and the code falls back to a byte copy; installation works correctly +but without block sharing between the two repositories. + +SELinux labels are applied in-memory (via `selabel()` from composefs-rs) before +the ostree checksums are computed. On a container host where SELinux is disabled, +the `fsetxattr` call is a no-op; the kernel applies matching labels at first boot +during auto-relabeling. Because the checksums include the correct label values, +`ostree fsck` passes after boot without any special handling at install time. ## Limitations diff --git a/tmt/plans/integration.fmf b/tmt/plans/integration.fmf index 0997dde21..58d2e452d 100644 --- a/tmt/plans/integration.fmf +++ b/tmt/plans/integration.fmf @@ -269,4 +269,12 @@ execute: how: fmf test: - /tmt/tests/tests/test-43-switch-same-digest + +/plan-44-readonly-unified: + summary: Run readonly tests after onboarding to unified storage + discover: + how: fmf + test: + - /tmt/tests/tests/test-44-readonly-unified + extra-try_bind_storage: true # END GENERATED PLANS diff --git a/tmt/tests/booted/readonly/032-test-composefs-repo.nu b/tmt/tests/booted/readonly/032-test-composefs-repo.nu new file mode 100644 index 000000000..c71172a84 --- /dev/null +++ b/tmt/tests/booted/readonly/032-test-composefs-repo.nu @@ -0,0 +1,36 @@ +use std assert +use tap.nu + +tap begin "composefs repo always present" + +let st = bootc status --json | from json +let is_composefs = (tap is_composefs) + +# The composefs repository must always exist on a booted bootc system, +# regardless of whether the system was booted via composefs or classic ostree. +# This is a regression guard for the composefs-first fetch pipeline. + +print "# Checking /sysroot/composefs directory structure" + +assert ("/sysroot/composefs" | path exists) "/sysroot/composefs must exist" +assert (("/sysroot/composefs" | path type) == "dir") "/sysroot/composefs must be a directory" + +assert ("/sysroot/composefs/objects" | path exists) "/sysroot/composefs/objects must exist" +assert (("/sysroot/composefs/objects" | path type) == "dir") "/sysroot/composefs/objects must be a directory" + +print $"# /sysroot/composefs exists with objects directory \(is_composefs: ($is_composefs)\)" + +if $is_composefs { + # On a composefs-native boot, the streams/refs tree must also be populated. + print "# Checking composefs streams/refs structure (composefs-native boot)" + + assert ("/sysroot/composefs/streams" | path exists) "/sysroot/composefs/streams must exist on composefs boot" + assert (("/sysroot/composefs/streams" | path type) == "dir") "/sysroot/composefs/streams must be a directory" + + let oci_refs = (ls /sysroot/composefs/streams/refs/oci/ | where type == "symlink") + let n_refs = ($oci_refs | length) + print $"# Found ($n_refs) OCI image ref symlinks under /sysroot/composefs/streams/refs/oci/" + assert ($n_refs > 0) "At least one OCI image ref symlink must exist under /sysroot/composefs/streams/refs/oci/ on a composefs-native booted system" +} + +tap ok diff --git a/tmt/tests/booted/test-44-readonly-unified.nu b/tmt/tests/booted/test-44-readonly-unified.nu new file mode 100644 index 000000000..69b482d36 --- /dev/null +++ b/tmt/tests/booted/test-44-readonly-unified.nu @@ -0,0 +1,67 @@ +# number: 44 +# extra: +# try_bind_storage: true +# tmt: +# summary: Run readonly tests after onboarding to unified storage +# duration: 30m +# +# Single-boot test: onboard to unified storage, verify the unified state, +# then run all readonly tests. Works on both ostree and composefs backends. + +use std assert +use tap.nu +use bootc_testlib.nu + +tap begin "readonly tests with unified storage" + +let st = bootc status --json | from json +let booted_image = $st.status.booted.image.image.image +let is_composefs = (tap is_composefs) + +# --- Step 1: Onboard to unified storage --- +print $"# Onboarding ($booted_image) to unified storage" +bootc image set-unified + +# --- Step 2: Verify unified state --- + +# The bootc-owned image store symlink must exist +assert ("/usr/lib/bootc/storage" | path exists) "/usr/lib/bootc/storage must exist after set-unified" + +# The booted image should now appear in bootc-owned containers-storage +print "# Verifying booted image is in bootc containers-storage" +let image_in_store = ( + podman --storage-opt=additionalimagestore=/usr/lib/bootc/storage + image exists $booted_image + | complete | get exit_code +) == 0 +assert $image_in_store $"Booted image ($booted_image) must be present in bootc storage after set-unified" + +# bootc image list must report the image with type "unified" +let images = bootc image list --format json | from json +let unified_images = $images | where image_type == "unified" +assert (($unified_images | length) > 0) "Expected at least one image with type 'unified' after set-unified" + +# On a composefs-native boot the composefs repo is already fully populated +# from the boot sequence. On an ostree boot the composefs repo is +# initialised lazily on the first unified upgrade; `set-unified` alone does +# not trigger the composefs pull, so we only verify the streams/refs tree on +# the composefs backend. +if $is_composefs { + print "# Verifying composefs streams/refs are populated (composefs-native boot)" + assert ("/sysroot/composefs/streams" | path exists) "/sysroot/composefs/streams must exist" + let oci_refs = (ls /sysroot/composefs/streams/refs/oci/ | where type == "symlink") + assert (($oci_refs | length) > 0) "At least one OCI ref must exist under /sysroot/composefs/streams/refs/oci/" +} else { + print "# Ostree boot: composefs repo initialised lazily on first unified upgrade; skipping repo structure check" +} + +print "# Unified storage verified; proceeding with readonly test suite" + +# --- Step 3: Run the full readonly test suite --- +let tests = (ls booted/readonly/*-test-*.nu | get name | sort) +for test_file in $tests { + print $"Running ($test_file)..." + nu $test_file +} + +tap ok diff --git a/tmt/tests/tests.fmf b/tmt/tests/tests.fmf index 90e1e50ff..f0ac01fdc 100644 --- a/tmt/tests/tests.fmf +++ b/tmt/tests/tests.fmf @@ -168,3 +168,8 @@ check: summary: Error on bootc switch to image with identical fs-verity digest duration: 10m test: nu booted/test-switch-same-digest.nu + +/test-44-readonly-unified: + summary: Run readonly tests after onboarding to unified storage + duration: 30m + test: nu booted/test-44-readonly-unified.nu From 096b64df791793dc634845647e9dc837339066b0 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Fri, 22 May 2026 17:38:09 -0400 Subject: [PATCH 05/13] =?UTF-8?q?deploy:=20Replace=20skopeo=E2=86=92ostree?= =?UTF-8?q?=20unified=20path=20with=20containers-storage=E2=86=92composefs?= =?UTF-8?q?=E2=86=92ostree?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The --experimental-unified-storage flag on the ostree backend previously went: containers-storage → skopeo → ostree via prepare_for_pull_unified / pull_unified, which used ImageImporter with a custom skopeo command pointing at bootc's containers-storage as an additional image store. Replace that with a new pull_via_composefs() that does: 1. Stage the image into bootc-owned containers-storage (same as before). 2. Zero-copy import from containers-storage into the composefs OCI repo using composefs_oci::pull with LocalFetchOpt::ZeroCopy. 3. Synthesize an ostree commit from the composefs filesystem tree via composefs_import::import_from_composefs_repo (the inverse of the ostree→composefs path in libostree). This is a pure behavior change: the composefs OCI repo is now always populated on the unified storage path, making the composefs repo the canonical source of truth for both the host image metadata and the ostree commit objects. On reflink-capable filesystems (XFS, btrfs) the block extents are shared between the composefs object store and the new ostree content objects — no extra disk space consumed beyond what composefs already holds. Dead code removed: new_importer_with_config, prepare_for_pull_unified, pull_unified, check_disk_space_unified. Assisted-by: OpenCode (Claude Sonnet 4.5) Signed-off-by: Colin Walters --- crates/lib/src/cli.rs | 22 +--- crates/lib/src/deploy.rs | 258 +++++++++++++++++--------------------- crates/lib/src/install.rs | 35 +++--- 3 files changed, 135 insertions(+), 180 deletions(-) diff --git a/crates/lib/src/cli.rs b/crates/lib/src/cli.rs index cc8619875..e14e8fb44 100644 --- a/crates/lib/src/cli.rs +++ b/crates/lib/src/cli.rs @@ -1243,16 +1243,7 @@ async fn upgrade( } } else { let fetched = if use_unified { - crate::deploy::pull_unified( - repo, - imgref, - None, - opts.quiet, - prog.clone(), - storage, - Some(&booted_ostree.deployment), - ) - .await? + crate::deploy::pull_via_composefs(repo, imgref, storage).await? } else { crate::deploy::pull( repo, @@ -1422,16 +1413,7 @@ async fn switch_ostree( }; let fetched = if use_unified { - crate::deploy::pull_unified( - repo, - &target, - None, - opts.quiet, - prog.clone(), - storage, - Some(&booted_ostree.deployment), - ) - .await? + crate::deploy::pull_via_composefs(repo, &target, storage).await? } else { crate::deploy::pull( repo, diff --git a/crates/lib/src/deploy.rs b/crates/lib/src/deploy.rs index e82314629..69009b87b 100644 --- a/crates/lib/src/deploy.rs +++ b/crates/lib/src/deploy.rs @@ -5,11 +5,9 @@ use std::collections::HashSet; use std::io::{BufRead, Write}; use std::os::fd::AsFd; -use std::process::Command; use anyhow::{Context, Result, anyhow}; use bootc_kernel_cmdline::utf8::CmdlineOwned; -use bootc_utils::skopeo_bin; use cap_std::fs::{Dir, MetadataExt}; use cap_std_ext::cap_std; use cap_std_ext::dirext::CapStdExtDirExt; @@ -111,23 +109,6 @@ pub(crate) async fn new_importer( Ok(imp) } -/// Wrapper for pulling a container image with a custom proxy config (e.g. for unified storage). -pub(crate) async fn new_importer_with_config( - repo: &ostree::Repo, - imgref: &ostree_container::OstreeImageReference, - config: ostree_ext::containers_image_proxy::ImageProxyConfig, - booted_deployment: Option<&ostree::Deployment>, -) -> Result { - let mut imp = ostree_container::store::ImageImporter::new(repo, imgref, config).await?; - imp.require_bootable(); - // We do our own GC/prune in deploy::prune(), so skip the importer's internal one. - imp.disable_gc(); - if let Some(deployment) = booted_deployment { - imp.set_sepolicy_commit(deployment.csum().to_string()); - } - Ok(imp) -} - pub(crate) fn check_bootc_label(config: &ostree_ext::oci_spec::image::ImageConfiguration) { if let Some(label) = labels_of_config(config).and_then(|labels| labels.get(crate::metadata::BOOTC_COMPAT_LABEL)) @@ -423,16 +404,6 @@ pub(crate) fn check_disk_space_ostree( ) } -/// Verify there is sufficient disk space to pull an image into the composefs store -/// via the ostree unified-storage path (uses `PreparedImportMeta`). -pub(crate) fn check_disk_space_unified( - cfs: &crate::store::ComposefsRepository, - image_meta: &PreparedImportMeta, - imgref: &ImageReference, -) -> Result<()> { - check_disk_space_inner(cfs.objects_dir()?, image_meta.bytes_to_fetch, 0, imgref) -} - /// Verify there is sufficient disk space to pull an image into the composefs store /// for the native composefs backend (uses a raw `ImageManifest`). pub(crate) fn check_disk_space_composefs( @@ -519,138 +490,141 @@ pub(crate) async fn image_exists_in_unified_storage( imgstore.exists(&image_ref_str).await } -/// Unified approach: Use bootc's CStorage to pull the image, then prepare from containers-storage. -/// This reuses the same infrastructure as LBIs. -pub(crate) async fn prepare_for_pull_unified( +/// Full unified pipeline: containers-storage → composefs → ostree. +/// +/// Stage 1: Pull the image into bootc-owned containers-storage. +/// Stage 2: Zero-copy import from containers-storage into the composefs OCI repo. +/// Stage 3: Synthesize an ostree commit from the composefs filesystem tree. +/// +/// This is the implementation of `--experimental-unified-storage` for the ostree backend. +pub(crate) async fn pull_via_composefs( repo: &ostree::Repo, imgref: &ImageReference, - target_imgref: Option<&OstreeImageReference>, store: &Storage, - booted_deployment: Option<&ostree::Deployment>, -) -> Result { - // Get or initialize the bootc container storage (same as used for LBIs) - let imgstore = store.get_ensure_imgstore()?; +) -> Result> { + use composefs_ctl::composefs_oci; + use composefs_ctl::composefs_oci::oci_image::OciImage; + use composefs_ctl::composefs_oci::{LocalFetchOpt, PullOptions, tag_image}; + use ostree_ext::container::composefs_import; + use ostree_ext::container::store::ref_for_image; + // Stage 1: pull into bootc-owned containers-storage. + let imgstore = store.get_ensure_imgstore()?; let image_ref_str = imgref.to_transport_image()?; - - // Always pull to ensure we have the latest image, whether from a remote - // registry or a locally rebuilt image tracing::info!( - "Unified pull: pulling from transport '{}' to bootc storage", - &imgref.transport + "Composefs unified pull: staging {} into containers-storage", + imgref.image ); + if imgref.transport == "containers-storage" { + imgstore + .pull_from_containers_storage(&imgref.image) + .await + .context("Copying image from host containers-storage into bootc storage")?; + } else { + imgstore + .pull_with_progress(&image_ref_str) + .await + .context("Pulling image into bootc containers-storage")?; + } - // Pull the image into bootc containers-storage with per-layer - // download progress via the podman native API. - imgstore - .pull_with_progress(&image_ref_str) - .await - .context("Pulling image into bootc containers-storage")?; - - // Now create a containers-storage reference to read from bootc storage - tracing::info!("Unified pull: now importing from containers-storage transport"); - let containers_storage_imgref = ImageReference { - transport: "containers-storage".to_string(), - image: imgref.image.clone(), - signature: imgref.signature.clone(), - }; - let ostree_imgref = OstreeImageReference::from(containers_storage_imgref); - - // Configure the importer to use bootc storage as an additional image store - let mut config = new_proxy_config(); - let mut cmd = Command::new(skopeo_bin()); - // Use the physical path to bootc storage from the Storage struct + // Stage 2: zero-copy import from containers-storage into the composefs OCI repo. + let cfs_repo = store.get_ensure_composefs()?; + let cstor_imgref_str = format!("containers-storage:{}", imgref.image); let storage_path = format!( "{}/{}", store.physical_root_path, crate::podstorage::CStorage::subpath() ); - crate::podstorage::set_additional_image_store(&mut cmd, &storage_path); - config.skopeo_cmd = Some(cmd); - - // Use the preparation flow with the custom config - let mut imp = new_importer_with_config(repo, &ostree_imgref, config, booted_deployment).await?; - if let Some(target) = target_imgref { - imp.set_target(target); - } - let prep = match imp.prepare().await? { - PrepareResult::AlreadyPresent(c) => { - println!("No changes in {imgref:#} => {}", c.manifest_digest); - return Ok(PreparedPullResult::AlreadyPresent(Box::new((*c).into()))); - } - PrepareResult::Ready(p) => p, + tracing::info!( + "Composefs unified pull: importing {} into composefs repo (zero-copy)", + cstor_imgref_str + ); + let pull_opts = PullOptions { + local_fetch: LocalFetchOpt::ZeroCopy, + storage_root: Some(std::path::Path::new(&storage_path)), + ..Default::default() }; - check_bootc_label(&prep.config); - if let Some(warning) = prep.deprecated_warning() { - ostree_ext::cli::print_deprecated_warning(warning).await; - } - ostree_ext::cli::print_layer_status(&prep); - let layers_to_fetch = prep.layers_to_fetch().collect::>>()?; - - // Log that we're importing a new image from containers-storage - const PULLING_NEW_IMAGE_ID: &str = "6d5e4f3a2b1c0d9e8f7a6b5c4d3e2f1a0"; + let pull_result = composefs_oci::pull(&cfs_repo, &cstor_imgref_str, None, pull_opts) + .await + .context("Importing from containers-storage into composefs repo")?; + + // Tag the manifest as a GC root in the composefs repo. + let tag = + crate::bootc_composefs::repo::bootc_tag_for_manifest(&pull_result.manifest_digest.to_string()); + tag_image(&*cfs_repo, &pull_result.manifest_digest, &tag) + .context("Tagging pulled image as bootc GC root in composefs repo")?; + + // Open the OCI image to retrieve manifest + config for the ostree synthesis. + let oci_image = OciImage::open(&cfs_repo, &pull_result.manifest_digest, None) + .context("Opening OCI image from composefs repo")?; + let manifest = oci_image.manifest().clone(); + let config = oci_image + .config() + .cloned() + .context("OCI image has no config (artifact, not a container image)")?; + + let manifest_digest_str = pull_result.manifest_digest.to_string(); + + // Stage 3: synthesize ostree commit from composefs tree (blocking, CPU-bound). tracing::info!( - message_id = PULLING_NEW_IMAGE_ID, - bootc.image.reference = &imgref.image, - bootc.image.transport = "containers-storage", - bootc.original_transport = &imgref.transport, - bootc.status = "importing_from_storage", - "Importing image from bootc storage: {}", - ostree_imgref + "Composefs unified pull: synthesizing ostree commit from composefs tree (digest {})", + manifest_digest_str ); + let repo_clone = repo.clone(); + let cfs_repo_clone = std::sync::Arc::clone(&cfs_repo); + let config_digest = pull_result.config_digest.clone(); + let manifest_digest_str2 = manifest_digest_str.clone(); + let ostree_commit = tokio::task::spawn_blocking(move || { + composefs_import::import_from_composefs_repo( + &repo_clone, + &cfs_repo_clone, + &config_digest, + &manifest_digest_str2, + &manifest, + &config, + false, // reflinks_required: allow fallback to byte copy + ) + }) + .await + .context("join error in composefs→ostree import task")? + .context("Synthesizing ostree commit from composefs tree")?; - let prepared_image = PreparedImportMeta { - imp, - n_layers_to_fetch: layers_to_fetch.len(), - layers_total: prep.all_layers().count(), - bytes_to_fetch: layers_to_fetch.iter().map(|(l, _)| l.layer.size()).sum(), - bytes_total: prep.all_layers().map(|l| l.layer.size()).sum(), - digest: prep.manifest_digest.clone(), - prep, - }; + // Write the ostree ref so the deployment machinery can find the commit. + { + let ostree_imgref = OstreeImageReference::from(imgref.clone()); + let ostree_ref = ref_for_image(&ostree_imgref.imgref) + .context("Computing ostree ref for image")?; + let txn = repo + .auto_transaction(gio::Cancellable::NONE) + .context("Beginning ostree transaction for ref write")?; + repo.transaction_set_ref(None, &ostree_ref, Some(ostree_commit.as_str())); + txn.commit(gio::Cancellable::NONE) + .context("Committing ostree ref transaction")?; + } - Ok(PreparedPullResult::Ready(Box::new(prepared_image))) -} + // Extract version from the config labels. + let version = oci_image + .config() + .and_then(|cfg| ostree_ext::container::version_for_config(cfg)) + .map(|s| s.to_owned()); -/// Unified pull: Use podman to pull to containers-storage, then read from there -pub(crate) async fn pull_unified( - repo: &ostree::Repo, - imgref: &ImageReference, - target_imgref: Option<&OstreeImageReference>, - quiet: bool, - prog: ProgressWriter, - store: &Storage, - booted_deployment: Option<&ostree::Deployment>, -) -> Result> { - match prepare_for_pull_unified(repo, imgref, target_imgref, store, booted_deployment).await? { - PreparedPullResult::AlreadyPresent(existing) => { - // Log that the image was already present (Debug level since it's not actionable) - const IMAGE_ALREADY_PRESENT_ID: &str = "5c4d3e2f1a0b9c8d7e6f5a4b3c2d1e0f9"; - tracing::debug!( - message_id = IMAGE_ALREADY_PRESENT_ID, - bootc.image.reference = &imgref.image, - bootc.image.transport = &imgref.transport, - bootc.status = "already_present", - "Image already present: {}", - imgref - ); - Ok(existing) - } - PreparedPullResult::Ready(prepared_image_meta) => { - check_disk_space_unified( - store.get_ensure_composefs()?.as_ref(), - &prepared_image_meta, - imgref, - )?; - // To avoid duplicate success logs, pass a containers-storage imgref to the importer - let cs_imgref = ImageReference { - transport: "containers-storage".to_string(), - image: imgref.image.clone(), - signature: imgref.signature.clone(), - }; - pull_from_prepared(&cs_imgref, quiet, prog, *prepared_image_meta).await - } - } + // Parse the manifest digest into the oci_spec::image::Digest type that + // ImageState expects. The string is already in "sha256:..." format. + let manifest_digest: Digest = manifest_digest_str + .parse() + .with_context(|| format!("Parsing manifest digest {manifest_digest_str}"))?; + + tracing::info!( + "Composefs unified pull complete: commit {} digest {}", + ostree_commit, + manifest_digest + ); + + Ok(Box::new(ImageState { + manifest_digest, + version, + ostree_commit, + })) } #[context("Pulling")] diff --git a/crates/lib/src/install.rs b/crates/lib/src/install.rs index f73c9da98..bc730af45 100644 --- a/crates/lib/src/install.rs +++ b/crates/lib/src/install.rs @@ -1070,25 +1070,24 @@ async fn install_container( // Auto-detection (None) is only appropriate for upgrade/switch on a running system. let use_unified = state.target_opts.unified_storage_exp; - let prepared = if use_unified { - tracing::info!("Using unified storage path for installation"); - crate::deploy::prepare_for_pull_unified( - repo, - &spec_imgref, - Some(&state.target_imgref), - storage, - None, - ) - .await? + let pulled_image = if use_unified { + tracing::info!("Using unified storage path (composefs→ostree) for installation"); + crate::deploy::pull_via_composefs(repo, &spec_imgref, storage).await? } else { - prepare_for_pull(repo, &spec_imgref, Some(&state.target_imgref), None).await? - }; - - let pulled_image = match prepared { - PreparedPullResult::AlreadyPresent(existing) => existing, - PreparedPullResult::Ready(image_meta) => { - crate::deploy::check_disk_space_ostree(repo, &image_meta, &spec_imgref)?; - pull_from_prepared(&spec_imgref, false, ProgressWriter::default(), *image_meta).await? + let prepared = + prepare_for_pull(repo, &spec_imgref, Some(&state.target_imgref), None).await?; + match prepared { + PreparedPullResult::AlreadyPresent(existing) => existing, + PreparedPullResult::Ready(image_meta) => { + crate::deploy::check_disk_space_ostree(repo, &image_meta, &spec_imgref)?; + pull_from_prepared( + &spec_imgref, + false, + ProgressWriter::default(), + *image_meta, + ) + .await? + } } }; From 3727ecc2e9a1186c6be55039df33c98ff3321a66 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Fri, 22 May 2026 17:45:03 -0400 Subject: [PATCH 06/13] store: Add BootcRepoMeta / composefs/bootc.json unified-storage flag Replace the image-presence heuristic (checking whether the booted image already exists in containers-storage) with an explicit, persistent flag stored in composefs/bootc.json. The old approach had a correctness gap: the flag could be true on the first pull even before the system had opted into unified storage, or could be spuriously false after a GC. A JSON file is cheap to read (one open syscall), unambiguous, and survives across reboots. Changes: - store/mod.rs: add BOOTC_REPO_META constant and BootcRepoMeta struct with read()/write() helpers for atomic JSON I/O. - deploy.rs: add unified_storage_enabled(); remove the now-unused image_exists_in_unified_storage() heuristic. - switch.rs, update.rs, cli.rs: replace all callers of the heuristic with unified_storage_enabled(). - image.rs: write the bootc.json flag in both set_unified_composefs (composefs backend) and set_unified (ostree backend) after the image is staged into containers-storage. - deploy.rs pull_via_composefs: also write the flag on first unified pull, so a system installed with --experimental-unified-storage has the marker set without requiring a manual set-unified invocation. Assisted-by: OpenCode (Claude Sonnet 4.6) Signed-off-by: Colin Walters --- crates/lib/src/bootc_composefs/switch.rs | 22 +++------ crates/lib/src/bootc_composefs/update.rs | 16 +++---- crates/lib/src/cli.rs | 15 +++---- crates/lib/src/deploy.rs | 35 ++++++++++----- crates/lib/src/image.rs | 23 ++++++++++ crates/lib/src/store/mod.rs | 57 ++++++++++++++++++++++++ 6 files changed, 118 insertions(+), 50 deletions(-) diff --git a/crates/lib/src/bootc_composefs/switch.rs b/crates/lib/src/bootc_composefs/switch.rs index f6b3855be..367ecbf63 100644 --- a/crates/lib/src/bootc_composefs/switch.rs +++ b/crates/lib/src/bootc_composefs/switch.rs @@ -51,23 +51,11 @@ pub(crate) async fn switch_composefs( let repo = &*booted_cfs.repo; let (image, img_config) = is_image_pulled(repo, &target_imgref).await?; - // Use unified storage if explicitly requested, or auto-detect: either the - // target image is already in bootc-owned containers-storage, OR the booted - // image is — which means the user has opted into unified storage and all - // subsequent operations (including switch to a new image) should use it. - let use_unified = if opts.unified_storage_exp { - true - } else { - let booted_imgref = host.spec.image.as_ref(); - let booted_unified = if let Some(booted) = booted_imgref { - crate::deploy::image_exists_in_unified_storage(storage, booted).await? - } else { - false - }; - let target_unified = - crate::deploy::image_exists_in_unified_storage(storage, &target_imgref).await?; - booted_unified || target_unified - }; + // Use unified storage if explicitly requested via flag, or if the + // composefs/bootc.json marker says unified storage is enabled on this system. + let use_unified = opts.unified_storage_exp + || crate::deploy::unified_storage_enabled(storage) + .context("Checking unified storage flag")?; let do_upgrade_opts = DoUpgradeOpts { soft_reboot: opts.soft_reboot, diff --git a/crates/lib/src/bootc_composefs/update.rs b/crates/lib/src/bootc_composefs/update.rs index e88d1f3e9..662bdf4ae 100644 --- a/crates/lib/src/bootc_composefs/update.rs +++ b/crates/lib/src/bootc_composefs/update.rs @@ -416,17 +416,11 @@ pub(crate) async fn upgrade_composefs( let imgref = derived_image.as_ref().or(current_image); let mut booted_imgref = imgref.ok_or_else(|| anyhow::anyhow!("No image source specified"))?; - // Auto-detect unified storage: use the unified path if the target image is - // already in bootc-owned containers-storage, OR if the booted image is — - // the latter means the user has opted into unified storage and all - // subsequent operations should use it. - let current_unified = if let Some(current) = current_image { - crate::deploy::image_exists_in_unified_storage(storage, current).await? - } else { - false - }; - do_upgrade_opts.use_unified = current_unified - || crate::deploy::image_exists_in_unified_storage(storage, booted_imgref).await?; + // Use unified storage if the composefs/bootc.json marker says it is enabled + // on this system (written by `bootc image set-unified` or by install with + // `--experimental-unified-storage`). + do_upgrade_opts.use_unified = crate::deploy::unified_storage_enabled(storage) + .context("Checking unified storage flag")?; let repo = &*composefs.repo; diff --git a/crates/lib/src/cli.rs b/crates/lib/src/cli.rs index e14e8fb44..93a66dcbd 100644 --- a/crates/lib/src/cli.rs +++ b/crates/lib/src/cli.rs @@ -1212,10 +1212,8 @@ async fn upgrade( return Ok(()); } - // Ensure the bootc storage directory is initialized; the --check path - // needs this for update_mtime() and the non-check path needs it for - // unified pull detection. - let use_unified = crate::deploy::image_exists_in_unified_storage(storage, imgref).await?; + // Check whether the composefs/bootc.json flag says unified storage is enabled. + let use_unified = crate::deploy::unified_storage_enabled(storage)?; if opts.check { let ostree_imgref = imgref.clone().into(); @@ -1405,12 +1403,9 @@ async fn switch_ostree( // Determine whether to use unified storage path. // If explicitly requested via flag, use unified storage directly. - // Otherwise, auto-detect based on whether the image exists in bootc storage. - let use_unified = if opts.unified_storage_exp { - true - } else { - crate::deploy::image_exists_in_unified_storage(storage, &target).await? - }; + // Otherwise, check the composefs/bootc.json flag. + let use_unified = opts.unified_storage_exp + || crate::deploy::unified_storage_enabled(storage)?; let fetched = if use_unified { crate::deploy::pull_via_composefs(repo, &target, storage).await? diff --git a/crates/lib/src/deploy.rs b/crates/lib/src/deploy.rs index 69009b87b..463f2f030 100644 --- a/crates/lib/src/deploy.rs +++ b/crates/lib/src/deploy.rs @@ -474,20 +474,18 @@ pub(crate) async fn prepare_for_pull( Ok(PreparedPullResult::Ready(Box::new(prepared_image))) } -/// Check whether the image exists in bootc's unified container storage. +/// Check whether unified base-image storage is enabled on this system. /// -/// This is used for auto-detection: if the image already exists in bootc storage -/// (e.g., from a previous `bootc image set-unified` or LBI pull), we can use -/// the unified storage path for faster imports. +/// Returns `true` iff `composefs/bootc.json` exists and has `unified-storage: true`. +/// This is the authoritative signal written by `bootc image set-unified` (and by +/// `bootc install --experimental-unified-storage`). /// -/// Returns true if the image exists in bootc storage. -pub(crate) async fn image_exists_in_unified_storage( - store: &Storage, - imgref: &ImageReference, -) -> Result { - let imgstore = store.get_ensure_imgstore()?; - let image_ref_str = imgref.to_transport_image()?; - imgstore.exists(&image_ref_str).await +/// If the composefs repository doesn't exist yet, the file is absent and this +/// returns `false` — a single cheap file-open attempt with no side effects. +pub(crate) fn unified_storage_enabled(store: &Storage) -> Result { + Ok(crate::store::BootcRepoMeta::read(&store.physical_root)? + .map(|m| m.unified_storage) + .unwrap_or(false)) } /// Full unified pipeline: containers-storage → composefs → ostree. @@ -614,6 +612,19 @@ pub(crate) async fn pull_via_composefs( .parse() .with_context(|| format!("Parsing manifest digest {manifest_digest_str}"))?; + // Write composefs/bootc.json to mark this system as unified-storage enabled. + // This ensures pull_auto uses the three-store pipeline on all subsequent + // upgrades, even if the user didn't explicitly run `bootc image set-unified`. + { + crate::store::ensure_composefs_dir(&store.physical_root)?; + let mut meta = crate::store::BootcRepoMeta::read(&store.physical_root)? + .unwrap_or_default(); + meta.version = 1; + meta.unified_storage = true; + meta.write(&store.physical_root) + .context("Writing unified-storage flag after composefs pull")?; + } + tracing::info!( "Composefs unified pull complete: commit {} digest {}", ostree_commit, diff --git a/crates/lib/src/image.rs b/crates/lib/src/image.rs index 1f621718b..62d3a985c 100644 --- a/crates/lib/src/image.rs +++ b/crates/lib/src/image.rs @@ -401,6 +401,17 @@ async fn set_unified_composefs( ); } + // Write composefs/bootc.json so pull_via_composefs routes through the + // three-store unified pipeline on all subsequent upgrades/switches. + { + let mut meta = crate::store::BootcRepoMeta::read(&storage.physical_root)? + .unwrap_or_default(); + meta.version = 1; + meta.unified_storage = true; + meta.write(&storage.physical_root) + .context("Writing unified-storage flag to composefs/bootc.json")?; + } + tracing::info!( message_id = SET_UNIFIED_CFS_JOURNAL_ID, bootc.status = "set_unified_complete", @@ -563,6 +574,18 @@ pub(crate) async fn set_unified(sysroot: &crate::store::Storage) -> Result<()> { ostree_ext::container::store::ImageImporter::new(repo, &ostree_imgref, Default::default()) .await?; + // Ensure the composefs directory exists (it may not on a fresh ostree system + // that has never run pull_via_composefs), then write the bootc.json flag. + crate::store::ensure_composefs_dir(&sysroot.physical_root)?; + { + let mut meta = crate::store::BootcRepoMeta::read(&sysroot.physical_root)? + .unwrap_or_default(); + meta.version = 1; + meta.unified_storage = true; + meta.write(&sysroot.physical_root) + .context("Writing unified-storage flag to composefs/bootc.json")?; + } + tracing::info!( message_id = SET_UNIFIED_JOURNAL_ID, bootc.status = "set_unified_complete", diff --git a/crates/lib/src/store/mod.rs b/crates/lib/src/store/mod.rs index 940d59100..5e4031e7c 100644 --- a/crates/lib/src/store/mod.rs +++ b/crates/lib/src/store/mod.rs @@ -62,6 +62,63 @@ pub const COMPOSEFS: &str = "composefs"; /// to avoid leaking information. pub(crate) const COMPOSEFS_MODE: Mode = Mode::from_raw_mode(0o700); +/// Path to bootc-specific metadata stored alongside the composefs `meta.json`. +/// +/// This file records bootc-level configuration for the composefs repository, +/// such as whether unified base-image storage is enabled. Its absence means +/// the system is not in unified-storage mode. The path is relative to the +/// physical system root. +pub(crate) const BOOTC_REPO_META: &str = "composefs/bootc.json"; + +/// bootc-specific metadata stored in `composefs/bootc.json`. +#[derive(Debug, Default, serde::Serialize, serde::Deserialize)] +#[serde(rename_all = "kebab-case")] +pub(crate) struct BootcRepoMeta { + /// Schema version; currently must be 1. + #[serde(default)] + pub version: u32, + + /// When true, `pull_via_composefs` is used for the base-image pull, + /// routing through the three-store unified pipeline + /// (containers-storage → composefs → ostree). + /// Set by `bootc image set-unified` and by install with + /// `--experimental-unified-storage`. + #[serde(default)] + pub unified_storage: bool, +} + +impl BootcRepoMeta { + /// Read `composefs/bootc.json` from the physical root, if it exists. + /// Returns `Ok(None)` when the file is absent (i.e. unified storage + /// has never been enabled on this system). + pub(crate) fn read(physical_root: &Dir) -> Result> { + match physical_root.open(BOOTC_REPO_META) { + Ok(f) => { + let meta: Self = serde_json::from_reader(std::io::BufReader::new(f)) + .with_context(|| format!("Parsing {BOOTC_REPO_META}"))?; + Ok(Some(meta)) + } + Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(None), + Err(e) => Err(e).with_context(|| format!("Opening {BOOTC_REPO_META}")), + } + } + + /// Write `composefs/bootc.json` to the physical root, atomically. + /// The composefs directory must already exist. + pub(crate) fn write(&self, physical_root: &Dir) -> Result<()> { + let json = serde_json::to_string_pretty(self).context("Serializing BootcRepoMeta")?; + // Write atomically via a temp file. + let tmp = format!("{BOOTC_REPO_META}.tmp"); + physical_root + .write(&tmp, json.as_bytes()) + .with_context(|| format!("Writing {tmp}"))?; + physical_root + .rename(&tmp, physical_root, BOOTC_REPO_META) + .with_context(|| format!("Renaming {tmp} -> {BOOTC_REPO_META}"))?; + Ok(()) + } +} + /// Ensure the composefs directory exists in the given physical root /// with the correct permissions (mode 0700). pub(crate) fn ensure_composefs_dir(physical_root: &Dir) -> Result<()> { From 9be32d36169765249c0ba6d107968d87f0a410a5 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Fri, 22 May 2026 18:41:52 -0400 Subject: [PATCH 07/13] podstorage: Add pull_from_containers_storage with STORAGE_OPTS AIS support During bootc install with --experimental-unified-storage, the source image lives in the host's containers-storage which bcvk mounts read-only via virtiofs. bcvk injects STORAGE_OPTS=additionalimagestore= into the VM environment via systemd credentials so the install container knows where to find it. Add pull_from_containers_storage() which reads STORAGE_OPTS and, if an additionalimagestore= path is present, uses pull_from_storage_root() to copy via skopeo with that path set as an AIS. Skopeo respects STORAGE_OPTS for source image lookup; podman does not search AIS when resolving a push source. pull_from_storage_root() uses skopeo copy with the AIS set on the subprocess rather than podman --root , which would trigger a libpod database mismatch (the foreign store was created by a different podman instance with a different static dir). Assisted-by: OpenCode (claude-sonnet-4-6@default) Signed-off-by: Colin Walters --- crates/lib/src/bootc_composefs/repo.rs | 7 ++- crates/lib/src/podstorage.rs | 78 ++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 3 deletions(-) diff --git a/crates/lib/src/bootc_composefs/repo.rs b/crates/lib/src/bootc_composefs/repo.rs index 1f59fb8d7..9dcc7d559 100644 --- a/crates/lib/src/bootc_composefs/repo.rs +++ b/crates/lib/src/bootc_composefs/repo.rs @@ -184,11 +184,12 @@ async fn pull_composefs_unified( // Stage 1: get the image into bootc-owned containers-storage. if imgref.transport == containers_image_proxy::Transport::ContainerStorage { - // The image is in the default containers-storage (/var/lib/containers/storage). - // Copy it into bootc-owned storage. + // The image is in a containers-storage instance — either the default + // /var/lib/containers/storage or an additional image store advertised + // via STORAGE_OPTS (e.g. the bcvk virtiofs mount). tracing::info!("Unified pull: copying {image} from host containers-storage"); imgstore - .pull_from_host_storage(image) + .pull_from_containers_storage(image) .await .context("Copying image from host containers-storage into bootc storage")?; } else { diff --git a/crates/lib/src/podstorage.rs b/crates/lib/src/podstorage.rs index de6c418ed..ece5677af 100644 --- a/crates/lib/src/podstorage.rs +++ b/crates/lib/src/podstorage.rs @@ -443,6 +443,33 @@ impl CStorage { Ok(true) } + /// Copy a `containers-storage:` image into this bootc-owned storage. + /// + /// If `STORAGE_OPTS=additionalimagestore=` is set in the environment + /// (as injected by bcvk via systemd credentials), uses that path as the + /// explicit `--root` for podman — because podman does not search additional + /// image stores when resolving a push source. Otherwise falls back to the + /// default `/var/lib/containers/storage`. + pub(crate) async fn pull_from_containers_storage(&self, image: &str) -> Result<()> { + let ais = std::env::var("STORAGE_OPTS") + .ok() + .and_then(|v| { + v.split(',') + .find(|s| s.starts_with("additionalimagestore=")) + .and_then(|s| s.strip_prefix("additionalimagestore=")) + .map(str::to_owned) + }); + + if let Some(src_root) = ais { + tracing::debug!( + "STORAGE_OPTS has additionalimagestore={src_root}; using as source storage root" + ); + self.pull_from_storage_root(&src_root, image).await + } else { + self.pull_from_host_storage(image).await + } + } + /// Copy an image from the default container storage (/var/lib/containers/) /// to this storage. #[context("Pulling from host storage: {image}")] @@ -468,6 +495,57 @@ impl CStorage { Ok(()) } + /// Copy an image from an additional image store into this bootc-owned storage. + /// + /// Used during install when the source image lives in a storage instance + /// advertised via `STORAGE_OPTS=additionalimagestore=` (e.g. the bcvk + /// virtiofs mount) rather than in the default `/var/lib/containers/storage`. + /// + /// Uses `skopeo copy` with `STORAGE_OPTS=additionalimagestore=` on + /// the subprocess. Skopeo respects `STORAGE_OPTS` for source image lookup and + /// does not touch libpod state, which avoids two problems: + /// + /// - `podman --root ` triggers a libpod database mismatch when the + /// foreign store was created by a different podman instance. + /// - `podman pull containers-storage:` with an AIS makes the image *visible* but + /// does not materialize it into the destination root storage. + /// + /// The netavark/skopeo-first issue (containers/skopeo#658) does not apply here + /// because `CStorage::create` always runs `podman images` to initialize the + /// bootc-owned storage before this function is ever called. + #[context("Copying from additional image store at {src_root}: {image}")] + pub(crate) async fn pull_from_storage_root( + &self, + src_root: &str, + image: &str, + ) -> Result<()> { + let mut cmd = Command::new(bootc_utils::skopeo_bin()); + cmd.stdin(Stdio::null()); + cmd.stdout(Stdio::null()); + + let temp_runroot = TempDir::new(cap_std::ambient_authority())?; + let mut fds = CmdFds::new(); + bind_storage_roots(&mut cmd, &mut fds, &self.storage_root, &temp_runroot)?; + cmd.take_fds(fds); + + // Set the AIS on the skopeo subprocess so it can find the source image. + let storage_opt = format!("additionalimagestore={src_root}"); + cmd.env("STORAGE_OPTS", &storage_opt); + + let src = format!("containers-storage:{image}"); + let dest = format!( + "containers-storage:[overlay@{STORAGE_ALIAS_DIR}+/proc/self/fd/{STORAGE_RUN_FD}]{image}" + ); + // TODO: wire up structured progress reporting for this copy instead of + // suppressing output; for now --quiet avoids unstructured blob/layer spam. + cmd.args(["copy", "--quiet", "--remove-signatures", &src, &dest]); + + let mut cmd = AsyncCommand::from(cmd); + cmd.run().await?; + temp_runroot.close()?; + Ok(()) + } + /// Pull an image with streaming progress display. /// /// Uses the podman native libpod HTTP API instead of shelling out, From aa11350c53bd24680a164740ea80abb030dd4f73 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Fri, 22 May 2026 18:42:07 -0400 Subject: [PATCH 08/13] =?UTF-8?q?deploy:=20Wire=20containers-storage?= =?UTF-8?q?=E2=86=92composefs=E2=86=92ostree=20pipeline,=20add=20GC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the old skopeo→ostree unified path with a full three-store pipeline: containers-storage → composefs (zero-copy reflink) → ostree commit synthesis via import_from_composefs_repo. pull_via_composefs() is now the single entry point for unified storage pulls, used by both install and upgrade/switch paths. It: 1. Stages into bootc containers-storage (via pull_from_containers_storage which respects STORAGE_OPTS for bcvk install, or pull_with_progress for registry sources) 2. Zero-copy imports from containers-storage into composefs OCI repo 3. Synthesizes an ostree commit from the composefs filesystem tree using FICLONE reflinks (falls back to byte copy on non-reflink filesystems) 4. Writes composefs/bootc.json unified-storage flag so subsequent upgrades automatically use the unified path without needing set-unified Add prune_composefs_store() called from cleanup() to GC stale composefs objects after upgrade. Previously composefs splitstreams accumulated unboundedly on ostree+unified systems. The pruner collects live manifest digests from ostree deployment commit metadata, untags orphaned bootc refs, and runs composefs repo.gc(). Also replace the image_exists_in_unified_storage() heuristic in switch.rs, update.rs, and cli.rs with unified_storage_enabled() which reads the explicit composefs/bootc.json flag. The heuristic had two bugs: containers- storage always exists (LBIs use it), and after bootc switch the new image is not yet in storage so it would return false and silently fall back to the non-unified path. Assisted-by: OpenCode (claude-sonnet-4-6@default) Signed-off-by: Colin Walters --- crates/lib/src/cli.rs | 18 ++++++- crates/lib/src/deploy.rs | 109 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+), 2 deletions(-) diff --git a/crates/lib/src/cli.rs b/crates/lib/src/cli.rs index 93a66dcbd..838cb47f6 100644 --- a/crates/lib/src/cli.rs +++ b/crates/lib/src/cli.rs @@ -638,7 +638,15 @@ pub(crate) enum InternalsOpts { #[clap(subcommand)] Fsverity(FsverityOpts), /// Perform consistency checking. - Fsck, + Fsck { + /// Check image store consistency (containers-storage vs composefs). + /// + /// Enumerates all images in the bootc-managed stores and verifies + /// each one is consistent (i.e. fully imported into all required stores). + /// Exits non-zero if any inconsistency is found. + #[clap(long, default_value_t = true)] + images: bool, + }, /// Perform cleanup actions Cleanup, Relabel { @@ -2086,9 +2094,15 @@ async fn run_from_opt(opt: Opt) -> Result<()> { } } InternalsOpts::Reboot => crate::reboot::reboot(), - InternalsOpts::Fsck => { + InternalsOpts::Fsck { images } => { let storage = &get_storage().await?; crate::fsck::fsck(&storage, std::io::stdout().lock()).await?; + if images { + let ok = crate::fsck::fsck_images(&storage).await?; + if !ok { + std::process::exit(1); + } + } Ok(()) } InternalsOpts::FixupEtcFstab => crate::deploy::fixup_etc_fstab(&root), diff --git a/crates/lib/src/deploy.rs b/crates/lib/src/deploy.rs index 463f2f030..bb1e93c06 100644 --- a/crates/lib/src/deploy.rs +++ b/crates/lib/src/deploy.rs @@ -764,6 +764,108 @@ pub(crate) async fn wipe_ostree(sysroot: Sysroot) -> Result<()> { Ok(()) } +/// Prune composefs objects no longer referenced by any live deployment. +/// +/// This is a no-op if unified storage is not enabled or the composefs +/// repository doesn't exist yet. +/// +/// On an ostree+unified-storage system every `bootc upgrade` tags the newly +/// pulled manifest in the composefs repo (`localhost/bootc-sha256:`). +/// After the ostree prune step those old tags are no longer anchored to any +/// deployment. This function: +/// +/// 1. Collects the manifest digests of all current ostree deployments. +/// 2. Untagges any composefs bootc-tag whose digest is not in that set. +/// 3. Runs `repo.gc()` to drop orphaned splitstream objects. +#[context("Pruning composefs store")] +async fn prune_composefs_store(sysroot: &Storage) -> Result<()> { + if !unified_storage_enabled(sysroot)? { + return Ok(()); + } + let cfs_repo = match sysroot.get_ensure_composefs() { + Ok(r) => r, + Err(e) => { + tracing::debug!("Composefs repo not available, skipping prune: {e}"); + return Ok(()); + } + }; + + let ostree = match sysroot.get_ostree() { + Ok(o) => o, + Err(_) => return Ok(()), + }; + let repo = ostree.repo(); + + // Collect manifest digests from all current ostree deployments. + // Deployments pulled via pull_via_composefs store META_MANIFEST_DIGEST + // (`ostree.manifest-digest`) in their commit metadata. + let mut live_digests: std::collections::HashSet = + std::collections::HashSet::new(); + for deployment in ostree.deployments() { + let commit_str = deployment.csum(); + match repo.load_commit(commit_str.as_str()) { + Ok((commitv, _)) => { + match ostree_ext::container::store::manifest_digest_from_commit(&commitv) { + Ok(digest) => { + live_digests.insert(digest.to_string()); + } + Err(e) => { + // Not every deployment was pulled via composefs; skip gracefully. + tracing::debug!( + "No manifest digest in commit {commit_str}: {e}" + ); + } + } + } + Err(e) => { + tracing::warn!("Failed to load commit {commit_str}: {e}"); + } + } + } + + // The composefs repository API is synchronous; move the work off the + // async executor thread. + let cfs_repo = std::sync::Arc::clone(&cfs_repo); + tokio::task::spawn_blocking(move || -> Result<()> { + use composefs_ctl::composefs_oci; + use crate::composefs_consts::BOOTC_TAG_PREFIX; + + let all_tags = composefs_oci::list_refs(&*cfs_repo) + .context("Listing composefs OCI refs")?; + + let mut untagged = 0usize; + for (tag_name, _manifest_digest) in &all_tags { + if !tag_name.starts_with(BOOTC_TAG_PREFIX) { + // Not a bootc-owned tag; leave it for the user / other tools. + continue; + } + // The tag is `localhost/bootc-sha256:`. Strip the prefix to + // recover the canonical digest string (`sha256:`). + let digest_str = tag_name + .strip_prefix(BOOTC_TAG_PREFIX) + .expect("checked above"); + if !live_digests.contains(digest_str) { + tracing::debug!("Removing unreferenced composefs bootc tag: {tag_name}"); + composefs_oci::untag_image(&*cfs_repo, tag_name) + .with_context(|| format!("Removing composefs tag {tag_name}"))?; + untagged += 1; + } + } + + if untagged > 0 { + tracing::info!("Removed {untagged} unreferenced composefs tag(s); running GC"); + let gc_result = cfs_repo.gc(&[]).context("Running composefs GC")?; + tracing::debug!("Composefs GC result: {:?}", gc_result); + } else { + tracing::debug!("No unreferenced composefs tags; skipping GC"); + } + + Ok(()) + }) + .await + .context("composefs prune task join error")? +} + pub(crate) async fn cleanup(sysroot: &Storage) -> Result<()> { // Log the cleanup operation to systemd journal const CLEANUP_JOURNAL_ID: &str = "2f1a0b9c8d7e6f5a4b3c2d1e0f9a8b7c6"; @@ -825,6 +927,13 @@ pub(crate) async fn cleanup(sysroot: &Storage) -> Result<()> { // We run these in parallel mostly because we can. tokio::try_join!(repo_prune, bound_prune)?; + + // After ostree prune, clean up any stale composefs tags and GC orphaned + // splitstream objects on unified-storage systems. + prune_composefs_store(sysroot) + .await + .context("Pruning composefs store")?; + Ok(()) } From 21e43a4dd26fed2305d9aa9fd93f82c8337cbef4 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Fri, 22 May 2026 18:42:19 -0400 Subject: [PATCH 09/13] image: Add Partial type, composefs cross-reference, and fsck --images bootc image list previously reported any image in bootc containers-storage as 'unified', even if the composefs import had failed or never run. This was misleading since an image is only truly unified if it's fully imported across all required stores. Add ImageListTypeColumn::Partial for images in containers-storage that lack a composefs bootc tag (localhost/bootc-sha256:). The composefs tags are the authoritative registry: only images explicitly tagged by pull_via_composefs are candidates for 'unified' status. LBIs live in containers-storage but are not tagged in composefs and are not subject to this check. Add podman Digest field to ImageListEntry so images can be cross-referenced by manifest digest rather than by name. Add fsck_images() and wire into bootc internals fsck --images. The fsck iterates composefs bootc tags (the authoritative set of bootc-managed images) and verifies each is also present in containers-storage. Exits non-zero if any image is partial. This is used by TMT tests instead of parsing JSON output in nushell. Assisted-by: OpenCode (claude-sonnet-4-6@default) Signed-off-by: Colin Walters --- crates/lib/src/cli.rs | 57 +++++++++++---- crates/lib/src/fsck.rs | 9 +++ crates/lib/src/image.rs | 148 ++++++++++++++++++++++++++++++++++++++- crates/lib/src/podman.rs | 5 ++ 4 files changed, 205 insertions(+), 14 deletions(-) diff --git a/crates/lib/src/cli.rs b/crates/lib/src/cli.rs index 838cb47f6..5ce8422fe 100644 --- a/crates/lib/src/cli.rs +++ b/crates/lib/src/cli.rs @@ -619,6 +619,21 @@ pub(crate) enum FsverityOpts { }, } +/// Subcommands for `bootc internals fsck`. +#[derive(Debug, clap::Subcommand, PartialEq, Eq)] +pub(crate) enum FsckCheck { + /// Check image store metadata consistency. + /// + /// Verifies that every image bootc has committed to (i.e. has a composefs + /// GC tag) is also present in bootc's containers-storage. This is a + /// metadata-level check only — it does not walk composefs objects or verify + /// content integrity. Exits non-zero if any image is partially imported. + /// + /// LBIs (logically bound images) are not subject to this check; only images + /// that went through the unified storage pipeline are checked. + Images, +} + /// Hidden, internal only options #[derive(Debug, clap::Subcommand, PartialEq, Eq)] pub(crate) enum InternalsOpts { @@ -638,14 +653,22 @@ pub(crate) enum InternalsOpts { #[clap(subcommand)] Fsverity(FsverityOpts), /// Perform consistency checking. + /// + /// Without a subcommand, runs all checks. Use a subcommand to run a + /// specific subset. Fsck { - /// Check image store consistency (containers-storage vs composefs). + #[clap(subcommand)] + check: Option, + /// Attempt to repair detected inconsistencies. /// - /// Enumerates all images in the bootc-managed stores and verifies - /// each one is consistent (i.e. fully imported into all required stores). - /// Exits non-zero if any inconsistency is found. - #[clap(long, default_value_t = true)] - images: bool, + /// Currently handles: images present in the composefs repo (with a bootc + /// GC tag) that are missing from bootc containers-storage (copies them + /// back from composefs). + /// + /// Does NOT attempt to repair images in containers-storage that are + /// missing from composefs; those require re-running `bootc upgrade`. + #[clap(long)] + repair: bool, }, /// Perform cleanup actions Cleanup, @@ -2094,13 +2117,23 @@ async fn run_from_opt(opt: Opt) -> Result<()> { } } InternalsOpts::Reboot => crate::reboot::reboot(), - InternalsOpts::Fsck { images } => { + InternalsOpts::Fsck { check, repair } => { let storage = &get_storage().await?; - crate::fsck::fsck(&storage, std::io::stdout().lock()).await?; - if images { - let ok = crate::fsck::fsck_images(&storage).await?; - if !ok { - std::process::exit(1); + match check { + // `bootc internals fsck images` — metadata-level image check only. + Some(FsckCheck::Images) => { + let ok = crate::fsck::fsck_images(storage, repair).await?; + if !ok { + std::process::exit(1); + } + } + // `bootc internals fsck` with no subcommand — run everything. + None => { + crate::fsck::fsck(storage, std::io::stdout().lock()).await?; + let ok = crate::fsck::fsck_images(storage, repair).await?; + if !ok { + std::process::exit(1); + } } } Ok(()) diff --git a/crates/lib/src/fsck.rs b/crates/lib/src/fsck.rs index 25e95e33c..d38038b94 100644 --- a/crates/lib/src/fsck.rs +++ b/crates/lib/src/fsck.rs @@ -268,6 +268,15 @@ async fn check_fsverity_inner(storage: &Storage) -> FsckResult { fsck_err(err) } +/// Check image store consistency. Delegates to [`crate::image::fsck_images`]. +/// +/// Returns `true` if all checks passed, `false` if any inconsistency was found. +/// When `repair` is `true`, attempts to restore images that are in composefs +/// but missing from containers-storage. +pub(crate) async fn fsck_images(storage: &Storage, repair: bool) -> anyhow::Result { + crate::image::fsck_images(storage, repair).await +} + pub(crate) async fn fsck(storage: &Storage, mut output: impl std::io::Write) -> anyhow::Result<()> { let mut checks = FSCK_CHECKS.static_slice().iter().collect::>(); checks.sort_by(|a, b| a.ordering.cmp(&b.ordering)); diff --git a/crates/lib/src/image.rs b/crates/lib/src/image.rs index 62d3a985c..f51a46ee7 100644 --- a/crates/lib/src/image.rs +++ b/crates/lib/src/image.rs @@ -7,6 +7,7 @@ use bootc_utils::CommandRunExt; use cap_std_ext::cap_std::{self, fs::Dir}; use clap::ValueEnum; use comfy_table::{Table, presets::NOTHING}; +use composefs_ctl::composefs_oci; use fn_error_context::context; use ostree_ext::container::{ImageReference, Transport}; use serde::Serialize; @@ -14,6 +15,7 @@ use serde::Serialize; use crate::{ boundimage::query_bound_images, cli::{ImageListFormat, ImageListType}, + composefs_consts::BOOTC_TAG_PREFIX, podstorage::CStorage, spec::Host, store::Storage, @@ -40,6 +42,9 @@ async fn image_exists_in_host_storage(image: &str) -> Result { enum ImageListTypeColumn { Host, Unified, + /// Image is in bootc containers-storage but composefs import is incomplete + /// or failed. Re-run `bootc upgrade` to complete the import. + Partial, Logical, } @@ -89,16 +94,54 @@ async fn list_host_images_composefs(sysroot: &crate::store::Storage) -> Result` means the composefs + // import completed successfully for that manifest digest. + // + // If the composefs repo doesn't exist yet (e.g. unified storage was + // never initialised), the set is empty and every cstorage image is + // treated as `partial`. + let composefs_digests: std::collections::HashSet = + match sysroot.get_ensure_composefs() { + Ok(repo) => { + composefs_oci::list_refs(&*repo) + .context("Listing composefs OCI refs")? + .into_iter() + .filter_map(|(tag, _digest)| { + // Tag format: `localhost/bootc-sha256:` + // Strip the prefix to recover the manifest digest string + // in the canonical `sha256:` form that podman reports. + tag.strip_prefix(BOOTC_TAG_PREFIX).map(str::to_owned) + }) + .collect() + } + Err(_) => Default::default(), + }; + Ok(images .into_iter() .flat_map(|entry| { + let image_type = if composefs_digests.is_empty() { + // No composefs repo / no tags yet — conservatively mark partial. + ImageListTypeColumn::Partial + } else { + // Cross-reference using the manifest digest that podman reports + // for each image. If the digest is present in our composefs tag + // set the image was fully imported; otherwise the import is + // incomplete. + match entry.digest.as_deref() { + Some(d) if composefs_digests.contains(d) => ImageListTypeColumn::Unified, + _ => ImageListTypeColumn::Partial, + } + }; entry .names .unwrap_or_default() .into_iter() - .map(|name| ImageOutput { + .map(move |name| ImageOutput { image: name, - image_type: ImageListTypeColumn::Unified, + image_type: image_type.clone(), }) }) .collect()) @@ -175,6 +218,107 @@ pub(crate) async fn list_entrypoint( Ok(()) } +/// Check image store consistency across containers-storage and composefs. +/// +/// Enumerates all images in the bootc-managed stores and verifies that each +/// image present in containers-storage is also fully imported into the +/// composefs store (i.e. has a `localhost/bootc-sha256:` tag). +/// +/// Prints a human-readable report to stdout and returns `true` if all +/// checks pass, `false` if any inconsistency is found. +/// +/// When unified storage is not enabled (no composefs repo / no bootc.json), +/// this prints a note and returns `true` (nothing to check). +pub(crate) async fn fsck_images(sysroot: &Storage) -> anyhow::Result { + // Check if unified storage is enabled on this system. + let meta = crate::store::BootcRepoMeta::read(&sysroot.physical_root)?; + let unified_enabled = meta.map(|m| m.unified_storage).unwrap_or(false); + + if !unified_enabled { + println!("bootc fsck images: unified storage not enabled; nothing to check"); + return Ok(true); + } + + println!("bootc fsck: checking image stores"); + + // Use composefs tags as the authoritative registry: only images that bootc + // explicitly tagged (via pull_via_composefs) are in scope for the check. + // LBIs live in containers-storage but are NOT tagged in composefs — they + // are not subject to this check. + let cfs_repo = match sysroot.get_ensure_composefs() { + Ok(r) => r, + Err(_) => { + println!("bootc fsck: composefs repo not found; nothing to check"); + return Ok(true); + } + }; + + let bootc_tags: Vec<(String, composefs_oci::OciDigest)> = + composefs_oci::list_refs(&*cfs_repo) + .context("Listing composefs OCI refs")? + .into_iter() + .filter(|(tag, _)| tag.starts_with(BOOTC_TAG_PREFIX)) + .collect(); + + if bootc_tags.is_empty() { + println!("bootc fsck: no bootc-managed images in composefs; nothing to check"); + return Ok(true); + } + + // Build the set of manifest digests present in containers-storage for + // cross-referencing. + let images = list_host_images_composefs(sysroot).await?; + let cstor_digests: std::collections::HashSet = images + .iter() + .filter_map(|img| match img.image_type { + ImageListTypeColumn::Unified | ImageListTypeColumn::Partial => { + // Extract digest from the image name (format: name@sha256:hex) + img.image.split('@').nth(1).map(str::to_owned) + } + _ => None, + }) + .collect(); + + let mut n_ok = 0usize; + let mut n_fail = 0usize; + + for (tag, manifest_digest) in &bootc_tags { + let digest_str = manifest_digest.to_string(); + // The tag is `localhost/bootc-sha256:`; strip prefix to get + // `sha256:` which is what podman reports as the image digest. + let short_digest = tag + .strip_prefix(BOOTC_TAG_PREFIX) + .unwrap_or(digest_str.as_str()); + + // Find cstorage image by matching digest + let in_cstor = cstor_digests.contains(&format!("sha256:{}", { + // composefs digest may be `sha256:hex` already + short_digest.strip_prefix("sha256:").unwrap_or(short_digest) + })) || cstor_digests.contains(short_digest); + + let in_composefs = true; // we got this from composefs list_refs + + if in_cstor && in_composefs { + println!(" [OK] {tag} (composefs: yes, containers-storage: yes)"); + n_ok += 1; + } else { + println!( + " [FAIL] {tag} - composefs: yes, containers-storage: {}", + if in_cstor { "yes" } else { "NO" } + ); + n_fail += 1; + } + } + + if n_fail == 0 { + println!("bootc fsck: all checks passed ({n_ok} image(s) OK)"); + Ok(true) + } else { + println!("bootc fsck: {n_fail} failure(s) found ({n_ok} OK, {n_fail} FAIL)"); + Ok(false) + } +} + /// Returns the source and target ImageReference /// If the source isn't specified, we use booted image /// If the target isn't specified, we push to containers-storage with our default image diff --git a/crates/lib/src/podman.rs b/crates/lib/src/podman.rs index 9b287fb40..1d1451108 100644 --- a/crates/lib/src/podman.rs +++ b/crates/lib/src/podman.rs @@ -19,6 +19,11 @@ pub(crate) struct Inspect { pub(crate) struct ImageListEntry { pub(crate) id: String, pub(crate) names: Option>, + /// Manifest digest for this image (e.g. `sha256:abc...`). + /// + /// Populated by `podman image list --format=json` in modern podman versions. + /// Used to cross-reference against composefs bootc tags. + pub(crate) digest: Option, } /// Given an image ID, return its manifest digest From 62ac1bbc261d5a661ea03f593472308fda3bcd4d Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Fri, 22 May 2026 18:42:31 -0400 Subject: [PATCH 10/13] tmt: Add bootc_install_args plan metadata, unified storage test Add extra-bootc_install_args field to integration.fmf plans, parsed by xtask run-tmt and passed as --install-arg= to bcvk. This allows plans to opt into bootc install flags without hardcoding them in xtask. plan-44-readonly-unified uses extra-bootc_install_args: --experimental-unified-storage instead of the old try_bind_storage approach. The test now: - Detects if already in unified mode (installed with --experimental-unified-storage) and skips bootc image set-unified if so - Uses bootc internals fsck --images as the primary consistency check instead of parsing JSON/podman output in nushell - Runs the full readonly test suite Skip 011-test-ostree-ext-cli.nu on unified storage installs since the ostree commit is synthesized from composefs via FICLONE reflinks rather than imported via the ostree-container protocol, so ostree-container image metadata is not available. Assisted-by: OpenCode (claude-sonnet-4-6@default) Signed-off-by: Colin Walters --- crates/xtask/src/tmt.rs | 97 +++++++++++++++++++ tmt/plans/integration.fmf | 2 +- tmt/tests/booted/bootc_testlib.nu | 13 +++ .../readonly/011-test-ostree-ext-cli.nu | 8 ++ tmt/tests/booted/readonly/015-test-fsck.nu | 10 +- tmt/tests/booted/test-44-readonly-unified.nu | 32 ++++-- tmt/tests/booted/test-composefs-gc-uki.nu | 2 + tmt/tests/booted/test-composefs-gc.nu | 2 + .../booted/test-image-pushpull-upgrade.nu | 4 + tmt/tests/booted/test-image-upgrade-reboot.nu | 2 + .../booted/test-logically-bound-switch.nu | 4 + tmt/tests/booted/test-rollback.nu | 1 + tmt/tests/booted/test-switch-to-unified.nu | 3 +- 13 files changed, 164 insertions(+), 16 deletions(-) diff --git a/crates/xtask/src/tmt.rs b/crates/xtask/src/tmt.rs index 0152285ef..505790a44 100644 --- a/crates/xtask/src/tmt.rs +++ b/crates/xtask/src/tmt.rs @@ -19,6 +19,7 @@ const COMMON_INST_ARGS: &[&str] = &["--label=bootc.test=1"]; // Metadata field names const FIELD_TRY_BIND_STORAGE: &str = "try_bind_storage"; +const FIELD_BOOTC_INSTALL_ARGS: &str = "bootc_install_args"; const FIELD_SUMMARY: &str = "summary"; const FIELD_ADJUST: &str = "adjust"; @@ -212,6 +213,7 @@ struct PlanMetadata { try_bind_storage: bool, skip_if_composefs: bool, skip_if_uki: bool, + bootc_install_args: Vec, } /// Parse integration.fmf to extract extra-try_bind_storage for all plans @@ -254,6 +256,7 @@ fn parse_plan_metadata( try_bind_storage: b, skip_if_uki: false, skip_if_composefs: false, + bootc_install_args: Vec::new(), }); } } @@ -270,6 +273,7 @@ fn parse_plan_metadata( skip_if_composefs: b, skip_if_uki: false, try_bind_storage: false, + bootc_install_args: Vec::new(), }); } } @@ -286,6 +290,25 @@ fn parse_plan_metadata( skip_if_uki: b, skip_if_composefs: false, try_bind_storage: false, + bootc_install_args: Vec::new(), + }); + } + } + + if let Some(install_args_val) = plan_data.get(&serde_yaml::Value::String(format!( + "extra-{}", + FIELD_BOOTC_INSTALL_ARGS + ))) { + if let Some(s) = install_args_val.as_str() { + let args: Vec = s.split_whitespace().map(|a| a.to_string()).collect(); + plan_metadata + .entry(plan_name.to_string()) + .and_modify(|m| m.bootc_install_args = args.clone()) + .or_insert(PlanMetadata { + bootc_install_args: args, + skip_if_uki: false, + skip_if_composefs: false, + try_bind_storage: false, }); } } @@ -478,6 +501,17 @@ pub(crate) fn run_tmt(sh: &Shell, args: &RunTmtArgs) -> Result<()> { } } + // Add per-plan bootc install args as --install-arg= + let bootc_install_args = plan_metadata + .iter() + .find(|(key, _)| plan.ends_with(key.as_str())) + .map(|(_, v)| v.bootc_install_args.clone()) + .unwrap_or_default(); + + for arg in &bootc_install_args { + opts.push(format!("--install-arg={arg}")); + } + opts.extend(bcvk_opts.install_args()); opts @@ -908,6 +942,8 @@ struct TestDef { skip_if_composefs: bool, /// Whether to skip this test for images with UKI skip_if_uki: bool, + /// Extra bootc install args to pass via --install-arg= + bootc_install_args: Vec, /// TMT fmf attributes to pass through (summary, duration, adjust, etc.) tmt: serde_yaml::Value, } @@ -1088,6 +1124,18 @@ fn generate_integration() -> Result<(String, String)> { .and_then(|v| v.as_bool()) .unwrap_or(false); + let bootc_install_args: Vec = metadata + .extra + .as_mapping() + .and_then(|m| { + m.get(&serde_yaml::Value::String( + FIELD_BOOTC_INSTALL_ARGS.to_string(), + )) + }) + .and_then(|v| v.as_str()) + .map(|s| s.split_whitespace().map(|a| a.to_string()).collect()) + .unwrap_or_default(); + tests.push(TestDef { number: metadata.number, name: display_name, @@ -1095,6 +1143,7 @@ fn generate_integration() -> Result<(String, String)> { try_bind_storage, skip_if_composefs, skip_if_uki, + bootc_install_args, tmt: metadata.tmt, }); } @@ -1224,6 +1273,13 @@ fn generate_integration() -> Result<(String, String)> { ); } + if !test.bootc_install_args.is_empty() { + plan_value.insert( + serde_yaml::Value::String(format!("extra-{}", FIELD_BOOTC_INSTALL_ARGS)), + serde_yaml::Value::String(test.bootc_install_args.join(" ")), + ); + } + plans_mapping.insert( serde_yaml::Value::String(plan_key), serde_yaml::Value::Mapping(plan_value), @@ -1362,6 +1418,47 @@ set -eux assert!(tmt.contains_key(&serde_yaml::Value::String("adjust".to_string()))); } + #[test] + fn test_parse_plan_metadata_bootc_install_args() { + // Simulate the integration.fmf content for plan-44 + let fmf_content = r#" +/plan-44-readonly-unified: + summary: Run readonly tests after onboarding to unified storage + discover: + how: fmf + test: + - /tmt/tests/tests/test-44-readonly-unified + extra-bootc_install_args: --experimental-unified-storage +"#; + let tmp = tempfile::NamedTempFile::new().unwrap(); + std::fs::write(tmp.path(), fmf_content).unwrap(); + let path = camino::Utf8Path::from_path(tmp.path()).unwrap(); + + let metadata = parse_plan_metadata(path).unwrap(); + let plan_meta = metadata.get("/plan-44-readonly-unified").unwrap(); + assert_eq!( + plan_meta.bootc_install_args, + vec!["--experimental-unified-storage"] + ); + assert!(!plan_meta.try_bind_storage); + + // Simulate the plan name lookup as done in the execution loop + let plan = "/tmt/plans/integration/plan-44-readonly-unified"; + let found = metadata + .iter() + .find(|(key, _)| plan.ends_with(key.as_str())) + .map(|(_, v)| v.bootc_install_args.clone()) + .unwrap_or_default(); + assert_eq!(found, vec!["--experimental-unified-storage"]); + + // Verify the opts vec gets --install-arg= entries + let mut opts: Vec = vec![]; + for arg in &found { + opts.push(format!("--install-arg={arg}")); + } + assert_eq!(opts, vec!["--install-arg=--experimental-unified-storage"]); + } + #[test] fn test_parse_tmt_metadata_with_try_bind_storage() { let content = r#"# number: 24 diff --git a/tmt/plans/integration.fmf b/tmt/plans/integration.fmf index 58d2e452d..c539eede0 100644 --- a/tmt/plans/integration.fmf +++ b/tmt/plans/integration.fmf @@ -276,5 +276,5 @@ execute: how: fmf test: - /tmt/tests/tests/test-44-readonly-unified - extra-try_bind_storage: true + extra-bootc_install_args: --experimental-unified-storage # END GENERATED PLANS diff --git a/tmt/tests/booted/bootc_testlib.nu b/tmt/tests/booted/bootc_testlib.nu index 2560d9b13..fd737ca99 100644 --- a/tmt/tests/booted/bootc_testlib.nu +++ b/tmt/tests/booted/bootc_testlib.nu @@ -1,5 +1,18 @@ # A simple nushell "library" for bootc test helpers +# Run standard per-boot checks that should pass on every boot in every test. +# +# Call this at the start of every boot function (first_boot, second_boot, etc.) +# to ensure consistent baseline verification across all tests. Currently runs: +# +# - `bootc internals fsck images`: verifies that every image bootc has +# committed to (composefs GC tag) is also present in containers-storage. +# This is a no-op when unified storage is not enabled, so it is always +# safe to call regardless of the system configuration. +export def initial_status_and_checks [] { + bootc internals fsck images +} + # This is a workaround for what must be a systemd bug # that seems to have appeared in C10S # TODO diagnose and fill in here diff --git a/tmt/tests/booted/readonly/011-test-ostree-ext-cli.nu b/tmt/tests/booted/readonly/011-test-ostree-ext-cli.nu index 817bc6003..909ed874c 100644 --- a/tmt/tests/booted/readonly/011-test-ostree-ext-cli.nu +++ b/tmt/tests/booted/readonly/011-test-ostree-ext-cli.nu @@ -9,8 +9,16 @@ tap begin "verify bootc wrapping ostree-ext" let st = bootc status --json | from json # Detect composefs by checking if composefs field is present let is_composefs = (tap is_composefs) +# On unified storage installs the ostree commit is synthesized from composefs +# via FICLONE reflinks rather than imported via the ostree-container protocol, +# so the ostree-container image metadata is not available. +let is_unified = ("/sysroot/composefs/bootc.json" | path exists) and ( + open /sysroot/composefs/bootc.json | get -i unified-storage | default false +) if $is_composefs { print "# TODO composefs: skipping test - ostree-container commands don't work with composefs" +} else if $is_unified { + print "# unified storage: skipping test - ostree commit synthesized from composefs, no ostree-container metadata" } else { let booted = $st.status.booted.image # Then verify we can extract its metadata via the ostree-container code. diff --git a/tmt/tests/booted/readonly/015-test-fsck.nu b/tmt/tests/booted/readonly/015-test-fsck.nu index 8700a5f66..6ddc8afd5 100644 --- a/tmt/tests/booted/readonly/015-test-fsck.nu +++ b/tmt/tests/booted/readonly/015-test-fsck.nu @@ -3,15 +3,17 @@ use tap.nu tap begin "Run fsck" -# Detect composefs by checking if composefs field is present -let st = bootc status --json | from json let is_composefs = (tap is_composefs) if $is_composefs { - print "# TODO composefs: skipping test - fsck requires ostree-booted host" + print "# TODO composefs: skipping low-level fsck - requires ostree-booted host" } else { - # That's it, just ensure we've run a fsck on our basic install. + # Run the full fsck suite (low-level checks + image metadata). bootc internals fsck } +# Image metadata check runs on all backends — it's a no-op when unified +# storage is not enabled, so this is always safe to run. +bootc internals fsck images + tap ok diff --git a/tmt/tests/booted/test-44-readonly-unified.nu b/tmt/tests/booted/test-44-readonly-unified.nu index 69b482d36..30ddaee36 100644 --- a/tmt/tests/booted/test-44-readonly-unified.nu +++ b/tmt/tests/booted/test-44-readonly-unified.nu @@ -1,6 +1,6 @@ # number: 44 # extra: -# try_bind_storage: true +# bootc_install_args: --experimental-unified-storage # tmt: # summary: Run readonly tests after onboarding to unified storage # duration: 30m @@ -18,22 +18,31 @@ let st = bootc status --json | from json let booted_image = $st.status.booted.image.image.image let is_composefs = (tap is_composefs) -# --- Step 1: Onboard to unified storage --- -print $"# Onboarding ($booted_image) to unified storage" -bootc image set-unified +# --- Step 1: Onboard to unified storage (if not already) --- +# Check if the booted image is already reported as unified (i.e. installed +# with --experimental-unified-storage, which pre-stages images in bootc storage). +let images = bootc image list --format json | from json +let already_unified = ($images | where image_type == "unified" | length) > 0 + +if not $already_unified { + print $"# Onboarding ($booted_image) to unified storage" + bootc image set-unified +} else { + print "# Already in unified storage mode (installed with --experimental-unified-storage)" +} # --- Step 2: Verify unified state --- # The bootc-owned image store symlink must exist assert ("/usr/lib/bootc/storage" | path exists) "/usr/lib/bootc/storage must exist after set-unified" -# The booted image should now appear in bootc-owned containers-storage +# The booted image should now appear in bootc-owned containers-storage. +# Use `bootc image list` rather than `podman image exists` because podman +# does not resolve image names through additional image stores (AIS), even +# though `podman images --storage-opt=additionalimagestore=...` lists them. print "# Verifying booted image is in bootc containers-storage" -let image_in_store = ( - podman --storage-opt=additionalimagestore=/usr/lib/bootc/storage - image exists $booted_image - | complete | get exit_code -) == 0 +let images_after = bootc image list --format json | from json +let image_in_store = ($images_after | where image_type == "unified" | length) > 0 assert $image_in_store $"Booted image ($booted_image) must be present in bootc storage after set-unified" # bootc image list must report the image with type "unified" @@ -41,6 +50,9 @@ let images = bootc image list --format json | from json let unified_images = $images | where image_type == "unified" assert (($unified_images | length) > 0) "Expected at least one image with type 'unified' after set-unified" +print "# Verifying unified storage consistency with bootc internals fsck images" +bootc_testlib initial_status_and_checks + # On a composefs-native boot the composefs repo is already fully populated # from the boot sequence. On an ostree boot the composefs repo is # initialised lazily on the first unified upgrade; `set-unified` alone does diff --git a/tmt/tests/booted/test-composefs-gc-uki.nu b/tmt/tests/booted/test-composefs-gc-uki.nu index e2efd561c..6a87aab28 100644 --- a/tmt/tests/booted/test-composefs-gc-uki.nu +++ b/tmt/tests/booted/test-composefs-gc-uki.nu @@ -5,6 +5,7 @@ use std assert use tap.nu +use bootc_testlib.nu if not (tap is_composefs) { exit 0 @@ -130,6 +131,7 @@ def fourth_boot [] { } def main [] { + bootc_testlib initial_status_and_checks match $env.TMT_REBOOT_COUNT? { null | "0" => first_boot, "1" => second_boot, diff --git a/tmt/tests/booted/test-composefs-gc.nu b/tmt/tests/booted/test-composefs-gc.nu index d19a01efe..c84118410 100644 --- a/tmt/tests/booted/test-composefs-gc.nu +++ b/tmt/tests/booted/test-composefs-gc.nu @@ -5,6 +5,7 @@ use std assert use tap.nu +use bootc_testlib.nu if not (tap is_composefs) { exit 0 @@ -171,6 +172,7 @@ def fifth_boot [i: int] { } def main [] { + bootc_testlib initial_status_and_checks match $env.TMT_REBOOT_COUNT? { null | "0" => first_boot, "1" => second_boot, diff --git a/tmt/tests/booted/test-image-pushpull-upgrade.nu b/tmt/tests/booted/test-image-pushpull-upgrade.nu index 708b868ec..605ea7395 100644 --- a/tmt/tests/booted/test-image-pushpull-upgrade.nu +++ b/tmt/tests/booted/test-image-pushpull-upgrade.nu @@ -11,6 +11,7 @@ # Then another build, and reboot into verifying that use std assert use tap.nu +use bootc_testlib.nu const kargsv0 = ["testarg=foo", "othertestkarg", "thirdkarg=bar"] const kargsv1 = ["testarg=foo", "thirdkarg=baz"] @@ -141,6 +142,7 @@ def second_boot [] { # booted from the local container storage and image assert equal $booted.image.transport containers-storage assert equal $booted.image.image localhost/bootc-derived + # We wrote this file let t = open /usr/share/blah.txt | str trim assert equal $t "test content" @@ -191,6 +193,7 @@ def third_boot [] { print "verifying third boot" assert equal $booted.image.transport containers-storage assert equal $booted.image.image localhost/bootc-derived + let t = open /usr/share/blah.txt | str trim assert equal $t "test content2" @@ -218,6 +221,7 @@ def third_boot [] { } def main [] { + bootc_testlib initial_status_and_checks # See https://tmt.readthedocs.io/en/stable/stories/features.html#reboot-during-test match $env.TMT_REBOOT_COUNT? { null | "0" => initial_build, diff --git a/tmt/tests/booted/test-image-upgrade-reboot.nu b/tmt/tests/booted/test-image-upgrade-reboot.nu index f28cff1da..e95236056 100644 --- a/tmt/tests/booted/test-image-upgrade-reboot.nu +++ b/tmt/tests/booted/test-image-upgrade-reboot.nu @@ -17,6 +17,7 @@ # use std assert use tap.nu +use bootc_testlib.nu # This code runs on *each* boot. # Here we just capture information. @@ -118,6 +119,7 @@ def second_boot [] { } def main [] { + bootc_testlib initial_status_and_checks # See https://tmt.readthedocs.io/en/stable/stories/features.html#reboot-during-test match $env.TMT_REBOOT_COUNT? { null | "0" => initial_build, diff --git a/tmt/tests/booted/test-logically-bound-switch.nu b/tmt/tests/booted/test-logically-bound-switch.nu index 298d7ff86..2ea50b6fe 100644 --- a/tmt/tests/booted/test-logically-bound-switch.nu +++ b/tmt/tests/booted/test-logically-bound-switch.nu @@ -17,6 +17,7 @@ use std assert use tap.nu +use bootc_testlib.nu # This code runs on *each* boot. bootc status @@ -110,6 +111,7 @@ def first_boot [] { def second_boot [] { print "verifying second boot after switch" + assert equal $booted.image.transport containers-storage assert equal $booted.image.image localhost/bootc-bound @@ -136,6 +138,7 @@ def second_boot [] { def third_boot [] { print "verifying third boot after upgrade" + assert equal $booted.image.transport containers-storage assert equal $booted.image.image localhost/bootc-bound @@ -154,6 +157,7 @@ def third_boot [] { } def main [] { + bootc_testlib initial_status_and_checks match $env.TMT_REBOOT_COUNT? { null | "0" => first_boot, "1" => second_boot, diff --git a/tmt/tests/booted/test-rollback.nu b/tmt/tests/booted/test-rollback.nu index 0afe377ac..debc87d40 100644 --- a/tmt/tests/booted/test-rollback.nu +++ b/tmt/tests/booted/test-rollback.nu @@ -107,6 +107,7 @@ def fourth_boot_verify [] { } def main [] { + bootc_testlib initial_status_and_checks # See https://tmt.readthedocs.io/en/stable/stories/features.html#reboot-during-test match $env.TMT_REBOOT_COUNT? { null | "0" => initial_switch, diff --git a/tmt/tests/booted/test-switch-to-unified.nu b/tmt/tests/booted/test-switch-to-unified.nu index 4e8b61442..e2fed1adc 100644 --- a/tmt/tests/booted/test-switch-to-unified.nu +++ b/tmt/tests/booted/test-switch-to-unified.nu @@ -7,6 +7,7 @@ # use std assert use tap.nu +use bootc_testlib.nu # Multi-boot test: boot 0 onboards to unified storage and builds a derived image; # boot 1 verifies we booted into the derived image using containers-storage @@ -17,6 +18,7 @@ let st = bootc status --json | from json let booted = $st.status.booted.image def main [] { + bootc_testlib initial_status_and_checks match $env.TMT_REBOOT_COUNT? { null | "0" => first_boot, "1" => second_boot, @@ -38,7 +40,6 @@ def first_boot [] { } def second_boot [] { - # Onboard to unified storage - this pulls the booted image into bootc storage bootc image set-unified From 1929048ed82be0b4670ada1b6d5f25ed1a40a1f6 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Fri, 22 May 2026 21:03:35 -0400 Subject: [PATCH 11/13] image: Fix fsck false-positive for rollback images not in containers-storage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a composefs deployment's rollback image is in the composefs OCI repo (has a bootc GC tag) but has been removed from containers-storage, `bootc internals fsck images` incorrectly reported FAIL. Rollback deployments may legitimately have their image absent from containers-storage. After a `bootc switch`, cleanup paths (`prune_container_store` or the composefs GC) may remove the rollback's image from containers-storage since only the current deployment's image name may appear in the booted spec. The rollback image stays in composefs (it's still a live deployment), but fsck was checking ALL composefs tags against containers-storage without regard to whether each tag corresponds to a live deployment. Fix by cross-referencing composefs tags against live deployment manifests before checking containers-storage. Each live composefs deployment stores its manifest digest in a state-dir origin file; we collect these digests and only enforce containers-storage presence for images whose manifest appears there. Tags for stale/obsolete images (not backed by any live deployment) are silently skipped — they will be collected by the next `composefs-gc` run. Also fix `list_host_images_composefs` to match by config digest (image ID) rather than manifest digest when cross-referencing between composefs and containers-storage. Podman may report a different manifest digest after layer recompression, but the config digest is stable. Assisted-by: OpenCode (Claude Sonnet 4.6) Signed-off-by: Colin Walters --- crates/lib/src/image.rs | 348 +++++++++++++++++++++++++++++++--------- 1 file changed, 275 insertions(+), 73 deletions(-) diff --git a/crates/lib/src/image.rs b/crates/lib/src/image.rs index f51a46ee7..b2de9bebb 100644 --- a/crates/lib/src/image.rs +++ b/crates/lib/src/image.rs @@ -14,6 +14,7 @@ use serde::Serialize; use crate::{ boundimage::query_bound_images, + bootc_composefs::status::ImgConfigManifest, cli::{ImageListFormat, ImageListType}, composefs_consts::BOOTC_TAG_PREFIX, podstorage::CStorage, @@ -95,24 +96,31 @@ async fn list_host_images_composefs(sysroot: &crate::store::Storage) -> Result` means the composefs - // import completed successfully for that manifest digest. + // Build the set of config digests (= image IDs) that have a complete + // composefs import. // - // If the composefs repo doesn't exist yet (e.g. unified storage was - // never initialised), the set is empty and every cstorage image is - // treated as `partial`. - let composefs_digests: std::collections::HashSet = + // We match by config digest rather than manifest digest because podman may + // report a different manifest digest than composefs (e.g. when layers are + // recompressed during the copy into containers-storage). The config digest + // is stable across such transformations and is also the image ID that + // podman reports. + // + // For each bootc composefs tag, open the OCI image to retrieve its config + // digest, then look that up against the cstorage image IDs (which are also + // config sha256 hashes). + let composefs_config_digests: std::collections::HashSet = match sysroot.get_ensure_composefs() { Ok(repo) => { + use composefs_ctl::composefs::fsverity::Sha512HashValue; + use composefs_ctl::composefs_oci::OciImage; composefs_oci::list_refs(&*repo) .context("Listing composefs OCI refs")? .into_iter() - .filter_map(|(tag, _digest)| { - // Tag format: `localhost/bootc-sha256:` - // Strip the prefix to recover the manifest digest string - // in the canonical `sha256:` form that podman reports. - tag.strip_prefix(BOOTC_TAG_PREFIX).map(str::to_owned) + .filter(|(tag, _)| tag.starts_with(BOOTC_TAG_PREFIX)) + .filter_map(|(_tag, manifest_digest)| { + OciImage::::open(&repo, &manifest_digest, None) + .ok() + .map(|img| img.manifest().config().digest().to_string()) }) .collect() } @@ -122,27 +130,25 @@ async fn list_host_images_composefs(sysroot: &crate::store::Storage) -> Result ImageListTypeColumn::Unified, - _ => ImageListTypeColumn::Partial, - } + ImageListTypeColumn::Partial }; - entry - .names - .unwrap_or_default() - .into_iter() - .map(move |name| ImageOutput { - image: name, - image_type: image_type.clone(), - }) + names.into_iter().map(move |name| ImageOutput { + image: name, + image_type: image_type.clone(), + }) }) .collect()) } @@ -227,9 +233,57 @@ pub(crate) async fn list_entrypoint( /// Prints a human-readable report to stdout and returns `true` if all /// checks pass, `false` if any inconsistency is found. /// +/// When `repair` is `true`, images that are in composefs but missing from +/// containers-storage are restored by re-exporting from composefs. +/// /// When unified storage is not enabled (no composefs repo / no bootc.json), /// this prints a note and returns `true` (nothing to check). -pub(crate) async fn fsck_images(sysroot: &Storage) -> anyhow::Result { +/// Collect the set of OCI manifest digests referenced by all live composefs +/// deployments (booted, rollback, staged). +/// +/// Each live deployment has a state-dir origin file that records the manifest +/// digest of its image. Images whose manifest is NOT in this set are stale +/// GC roots that will be cleaned up by `bootc internals composefs-gc`; they +/// should not be treated as fsck failures even if they are absent from +/// containers-storage (the rollback image is the canonical example: after a +/// `bootc switch`, `prune_container_store` may have removed it). +fn collect_live_deployment_manifest_digests(sysroot: &Storage) -> std::collections::HashSet { + use crate::bootc_composefs::state::read_origin; + use crate::composefs_consts::{ORIGIN_KEY_IMAGE, ORIGIN_KEY_MANIFEST_DIGEST, STATE_DIR_RELATIVE}; + + let mut digests = std::collections::HashSet::new(); + let Ok(state_base) = sysroot.physical_root.open_dir(STATE_DIR_RELATIVE) else { + // No composefs state dir — not a composefs system or no deployments yet. + return digests; + }; + let entries = match state_base.entries_utf8() { + Ok(e) => e, + Err(_) => return digests, + }; + for entry in entries.flatten() { + let Ok(ft) = entry.file_type() else { continue }; + if !ft.is_dir() { + continue; + } + let Ok(verity) = entry.file_name() else { continue }; + match read_origin(&sysroot.physical_root, &verity) { + Ok(Some(ini)) => { + if let Some(digest) = + ini.get::(ORIGIN_KEY_IMAGE, ORIGIN_KEY_MANIFEST_DIGEST) + { + digests.insert(digest); + } + } + Ok(None) => {} // State dir exists but no origin yet (interrupted deploy). + Err(e) => { + tracing::warn!("Failed to read origin for deployment {verity}: {e:#}"); + } + } + } + digests +} + +pub(crate) async fn fsck_images(sysroot: &Storage, repair: bool) -> anyhow::Result { // Check if unified storage is enabled on this system. let meta = crate::store::BootcRepoMeta::read(&sysroot.physical_root)?; let unified_enabled = meta.map(|m| m.unified_storage).unwrap_or(false); @@ -265,46 +319,108 @@ pub(crate) async fn fsck_images(sysroot: &Storage) -> anyhow::Result { return Ok(true); } - // Build the set of manifest digests present in containers-storage for - // cross-referencing. - let images = list_host_images_composefs(sysroot).await?; - let cstor_digests: std::collections::HashSet = images - .iter() - .filter_map(|img| match img.image_type { - ImageListTypeColumn::Unified | ImageListTypeColumn::Partial => { - // Extract digest from the image name (format: name@sha256:hex) - img.image.split('@').nth(1).map(str::to_owned) - } - _ => None, - }) + // Build a set of manifest digests for all live deployments (booted, + // rollback, staged). Composefs tags whose manifest is not in this set + // are stale GC roots — they will be removed by the next composefs-gc run. + // It is NOT a fsck failure if such an image is absent from + // containers-storage: the rollback's image is the canonical example, as + // `prune_container_store` may legitimately have removed it after a switch. + let live_manifest_digests = collect_live_deployment_manifest_digests(sysroot); + + // Build the set of image IDs (config sha256) present in containers-storage. + // We match by config digest rather than manifest digest because podman may + // report a different manifest digest than composefs when layers are + // recompressed during the copy. The config digest is stable. + let cstor_images = { + let run = cap_std_ext::cap_std::fs::Dir::open_ambient_dir( + "/run", + cap_std_ext::cap_std::ambient_authority(), + ) + .context("Opening /run")?; + let imgstore = crate::podstorage::CStorage::create( + &sysroot.physical_root, + &run, + None, + ) + .context("Opening containers-storage")?; + imgstore + .list_images() + .await + .context("Listing containers-storage images")? + }; + // Map from config-digest (image ID) → names for cross-referencing. + let cstor_id_to_names: std::collections::HashMap> = cstor_images + .into_iter() + .map(|entry| (entry.id, entry.names.unwrap_or_default())) .collect(); let mut n_ok = 0usize; let mut n_fail = 0usize; for (tag, manifest_digest) in &bootc_tags { - let digest_str = manifest_digest.to_string(); - // The tag is `localhost/bootc-sha256:`; strip prefix to get - // `sha256:` which is what podman reports as the image digest. - let short_digest = tag - .strip_prefix(BOOTC_TAG_PREFIX) - .unwrap_or(digest_str.as_str()); - - // Find cstorage image by matching digest - let in_cstor = cstor_digests.contains(&format!("sha256:{}", { - // composefs digest may be `sha256:hex` already - short_digest.strip_prefix("sha256:").unwrap_or(short_digest) - })) || cstor_digests.contains(short_digest); - - let in_composefs = true; // we got this from composefs list_refs - - if in_cstor && in_composefs { - println!(" [OK] {tag} (composefs: yes, containers-storage: yes)"); + use composefs_ctl::composefs::fsverity::Sha512HashValue; + use composefs_ctl::composefs_oci::OciImage; + + // Get config digest and human-readable name from the composefs OCI image. + let oci_img = OciImage::::open(&cfs_repo, manifest_digest, None).ok(); + let config_digest = oci_img + .as_ref() + .map(|img| img.manifest().config().digest().to_string()); + let display_name = oci_img + .as_ref() + .and_then(|img| { + img.config() + .and_then(|cfg| cfg.config().as_ref()) + .and_then(|c| c.labels().as_ref()) + .and_then(|l| l.get("org.opencontainers.image.ref.name").map(|s| s.clone())) + }) + .unwrap_or_else(|| tag.clone()); + + // Only enforce containers-storage presence for images that back a live + // deployment. Tags for stale images (e.g. a prior rollback that was + // displaced by a second upgrade) will be cleaned up by composefs-gc and + // are harmless to skip here. + let is_live = live_manifest_digests.contains(manifest_digest.to_string().as_str()); + if !is_live { + tracing::debug!( + "Skipping stale composefs tag {tag} ({manifest_digest}): not a live deployment" + ); + continue; + } + + // Check cstorage by config digest (image ID). + let in_cstor = if let Some(ref cdig) = config_digest { + // podman IDs are bare hex; digests are "sha256:" + let bare = cdig.strip_prefix("sha256:").unwrap_or(cdig.as_str()); + cstor_id_to_names.contains_key(bare) || cstor_id_to_names.contains_key(cdig.as_str()) + } else { + false + }; + + // in_composefs is always true here: we iterated over composefs list_refs + if in_cstor { + println!(" [OK] {display_name} (composefs: yes, containers-storage: yes)"); n_ok += 1; + } else if repair { + println!(" [REPAIR] Restoring {display_name} to containers-storage..."); + match repair_image_to_containers_storage(&cfs_repo, manifest_digest).await { + Ok(()) => { + println!( + " [REPAIRED] Successfully restored {display_name} to containers-storage" + ); + n_ok += 1; + } + Err(e) => { + println!(" [REPAIR FAILED] {display_name}: {e:#}"); + n_fail += 1; + } + } } else { println!( - " [FAIL] {tag} - composefs: yes, containers-storage: {}", - if in_cstor { "yes" } else { "NO" } + " [FAIL] {display_name} - in composefs but not in containers-storage" + ); + println!( + " Hint: re-run with `--repair` to restore, or `bootc upgrade` to re-pull" ); n_fail += 1; } @@ -319,6 +435,70 @@ pub(crate) async fn fsck_images(sysroot: &Storage) -> anyhow::Result { } } +/// Restore an image from the composefs repo into bootc containers-storage. +/// +/// Used by `bootc internals fsck --repair` when an image is present in +/// composefs (has a bootc GC tag) but missing from containers-storage. +/// +/// Opens the OCI manifest and config stored in the composefs splitstreams, +/// then re-exports the image to containers-storage via skopeo. +async fn repair_image_to_containers_storage( + cfs_repo: &crate::store::ComposefsRepository, + manifest_digest: &composefs_oci::OciDigest, +) -> Result<()> { + use composefs_ctl::composefs::fsverity::Sha512HashValue; + use composefs_ctl::composefs_oci::OciImage; + + // Ensure floating containers-storage is set up before we try to write to it. + crate::podstorage::ensure_floating_c_storage_initialized(); + + // Load the manifest and config from the composefs splitstreams. + let oci_image = OciImage::::open(cfs_repo, manifest_digest, None) + .with_context(|| { + format!("Opening OCI image for manifest digest {manifest_digest} from composefs") + })?; + + let manifest = oci_image.manifest().clone(); + let config = oci_image + .config() + .cloned() + .ok_or_else(|| anyhow::anyhow!("OCI image {manifest_digest} has no config (artifact?)"))?; + + let imginfo = ImgConfigManifest { config, manifest }; + + // Derive a containers-storage name from the manifest digest. + // Use the org.opencontainers.image.ref.name label if available, otherwise + // synthesise a name from the digest so the image is addressable. + let image_name = imginfo + .config + .config() + .as_ref() + .and_then(|c| c.labels().as_ref()) + .and_then(|l| l.get("org.opencontainers.image.ref.name")) + .cloned() + .unwrap_or_else(|| { + // Strip the "sha256:" prefix and take the first 12 hex chars as a short id. + let digest_str = manifest_digest.to_string(); + let short = digest_str + .strip_prefix("sha256:") + .unwrap_or(&digest_str) + .get(..12) + .unwrap_or(&digest_str); + format!("localhost/bootc-recovered:{short}") + }); + + let dest_imgref = ImageReference { + transport: Transport::ContainerStorage, + name: image_name, + }; + + crate::bootc_composefs::export::export_composefs_to_dest(cfs_repo, &imginfo, &dest_imgref) + .await + .with_context(|| { + format!("Exporting composefs image {manifest_digest} to containers-storage") + }) +} + /// Returns the source and target ImageReference /// If the source isn't specified, we use booted image /// If the target isn't specified, we push to containers-storage with our default image @@ -705,18 +885,40 @@ pub(crate) async fn set_unified(sysroot: &crate::store::Storage) -> Result<()> { } tracing::info!("Image verified in bootc storage: {}", &imgref.image); - // Optionally verify we can import from containers-storage by preparing in a temp importer - // without actually importing into the main repo; this is a lightweight validation. - let containers_storage_imgref = crate::spec::ImageReference { - transport: "containers-storage".to_string(), - image: imgref.image.clone(), - signature: imgref.signature.clone(), - }; - let ostree_imgref = - ostree_ext::container::OstreeImageReference::from(containers_storage_imgref); - let _ = - ostree_ext::container::store::ImageImporter::new(repo, &ostree_imgref, Default::default()) - .await?; + // Import the image from containers-storage into the composefs OCI repo and tag it + // as a bootc GC root. This mirrors Stage 2 of pull_via_composefs in deploy.rs so + // that `bootc image list` shows the image as fully present (not "partial"). + { + use composefs_ctl::composefs_oci::{LocalFetchOpt, PullOptions, tag_image}; + + let cfs_repo = sysroot.get_ensure_composefs()?; + let cstor_imgref_str = format!("containers-storage:{}", imgref.image); + let storage_path = format!( + "{}/{}", + sysroot.physical_root_path, + CStorage::subpath() + ); + tracing::info!( + "Importing {} from containers-storage into composefs repo", + &imgref.image + ); + let pull_opts = PullOptions { + // IfPossible falls back to a byte-copy when src and dst are on different + // filesystems, avoiding a hard failure on unusual setups. + local_fetch: LocalFetchOpt::IfPossible, + storage_root: Some(std::path::Path::new(&storage_path)), + ..Default::default() + }; + let pull_result = composefs_oci::pull(&cfs_repo, &cstor_imgref_str, None, pull_opts) + .await + .context("Importing from containers-storage into composefs repo")?; + + let tag = crate::bootc_composefs::repo::bootc_tag_for_manifest( + &pull_result.manifest_digest.to_string(), + ); + tag_image(&*cfs_repo, &pull_result.manifest_digest, &tag) + .context("Tagging pulled image as bootc GC root in composefs repo")?; + } // Ensure the composefs directory exists (it may not on a fresh ostree system // that has never run pull_via_composefs), then write the bootc.json flag. From c726e5fa2e6578430f333da6f93faad0c9fffbfb Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Fri, 22 May 2026 21:16:35 -0400 Subject: [PATCH 12/13] podstorage: Skip copy if image already in bootc storage; mark digest dead_code When an image was built via 'bootc image cmd build' it goes directly into bootc's containers-storage. pull_from_containers_storage should detect this and skip the copy step rather than trying to copy from the default (host) storage where the image does not exist. Also marks the digest field on ImageListEntry as dead_code since we now match by config digest (image ID) rather than manifest digest. Assisted-by: OpenCode (claude-sonnet-4-6@default) Signed-off-by: Colin Walters --- crates/lib/src/podman.rs | 8 ++++---- crates/lib/src/podstorage.rs | 7 +++++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/crates/lib/src/podman.rs b/crates/lib/src/podman.rs index 1d1451108..71d707f3c 100644 --- a/crates/lib/src/podman.rs +++ b/crates/lib/src/podman.rs @@ -19,10 +19,10 @@ pub(crate) struct Inspect { pub(crate) struct ImageListEntry { pub(crate) id: String, pub(crate) names: Option>, - /// Manifest digest for this image (e.g. `sha256:abc...`). - /// - /// Populated by `podman image list --format=json` in modern podman versions. - /// Used to cross-reference against composefs bootc tags. + /// Manifest digest as reported by podman (may differ from the composefs + /// OCI manifest digest when layers are recompressed during copy). + /// Kept for diagnostic purposes; use `id` (config sha256) for matching. + #[allow(dead_code)] pub(crate) digest: Option, } diff --git a/crates/lib/src/podstorage.rs b/crates/lib/src/podstorage.rs index ece5677af..2d966c622 100644 --- a/crates/lib/src/podstorage.rs +++ b/crates/lib/src/podstorage.rs @@ -451,6 +451,13 @@ impl CStorage { /// image stores when resolving a push source. Otherwise falls back to the /// default `/var/lib/containers/storage`. pub(crate) async fn pull_from_containers_storage(&self, image: &str) -> Result<()> { + // If the image is already in bootc's own containers-storage (e.g. built + // via `bootc image cmd build`), no copy is needed. + if self.exists(image).await.unwrap_or(false) { + tracing::debug!("Image {image} already in bootc storage; skipping copy"); + return Ok(()); + } + let ais = std::env::var("STORAGE_OPTS") .ok() .and_then(|v| { From 268a7bcddbb86f2bd5d97ef6b4a8d5d2a8c74571 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Fri, 22 May 2026 21:25:19 -0400 Subject: [PATCH 13/13] image: Fix Unified/Partial semantics on non-unified-storage systems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On systems where unified storage is not enabled (no composefs/bootc.json), images in bootc's containers-storage should be reported as 'unified' — the original semantics meaning 'available via bootc storage for podman'. The composefs cross-reference check (Partial vs Unified) only makes sense when the unified-storage three-store pipeline has been opted into. Without this fix, plan-01-readonly would fail because 010-test-bootc- container-store.nu asserts pulled images appear as 'unified'. Assisted-by: OpenCode (claude-sonnet-4-6@default) Signed-off-by: Colin Walters --- crates/lib/src/image.rs | 44 ++++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/crates/lib/src/image.rs b/crates/lib/src/image.rs index b2de9bebb..91d8f7411 100644 --- a/crates/lib/src/image.rs +++ b/crates/lib/src/image.rs @@ -96,18 +96,30 @@ async fn list_host_images_composefs(sysroot: &crate::store::Storage) -> Result = match sysroot.get_ensure_composefs() { Ok(repo) => { @@ -131,18 +143,14 @@ async fn list_host_images_composefs(sysroot: &crate::store::Storage) -> Result