diff --git a/crates/lib/src/bootc_composefs/export.rs b/crates/lib/src/bootc_composefs/export.rs index ea760e603..6fe774ff4 100644 --- a/crates/lib/src/bootc_composefs/export.rs +++ b/crates/lib/src/bootc_composefs/export.rs @@ -7,44 +7,27 @@ use composefs_ctl::composefs; use composefs_ctl::composefs_oci; use composefs_oci::open_config; use ocidir::{OciDir, oci_spec::image::Platform}; +use ostree_ext::container::ImageReference; use ostree_ext::container::Transport; use ostree_ext::container::skopeo; use tar::EntryType; use crate::image::get_imgrefs_for_copy; use crate::{ - bootc_composefs::status::{get_composefs_status, get_imginfo}, - store::{BootedComposefs, Storage}, + bootc_composefs::status::{ImgConfigManifest, get_composefs_status, get_imginfo}, + store::{BootedComposefs, ComposefsRepository, Storage}, }; -/// Exports a composefs repository to a container image in containers-storage: -pub async fn export_repo_to_image( - storage: &Storage, - booted_cfs: &BootedComposefs, - source: Option<&str>, - target: Option<&str>, +/// Streams a composefs OCI image out to a destination image reference. +/// +/// Given a composefs repository handle and image metadata (manifest + config), +/// reconstructs the container image by reading layer data from the composefs +/// splitstreams and copies the assembled OCI image to `dest_imgref` via skopeo. +pub(crate) async fn export_composefs_to_dest( + composefs_repo: &ComposefsRepository, + imginfo: &ImgConfigManifest, + dest_imgref: &ImageReference, ) -> Result<()> { - let host = get_composefs_status(storage, booted_cfs).await?; - - let (source, dest_imgref) = get_imgrefs_for_copy(&host, source, target).await?; - - let mut depl_verity = None; - - for depl in host.list_deployments() { - let img = &depl.image.as_ref().unwrap().image; - - // Not checking transport here as we'll be pulling from the repo anyway - // So, image name is all we need - if img.image == source.name { - depl_verity = Some(depl.require_composefs()?.verity.clone()); - break; - } - } - - let depl_verity = depl_verity.ok_or_else(|| anyhow::anyhow!("Image {source} not found"))?; - - let imginfo = get_imginfo(storage, &depl_verity)?; - let config_digest = imginfo.manifest.config().digest().clone(); let var_tmp = @@ -54,7 +37,7 @@ pub async fn export_repo_to_image( let oci_dir = OciDir::ensure(tmpdir.try_clone()?).context("Opening OCI")?; // Use composefs_oci::open_config to get the config and layer map - let open = open_config(&*booted_cfs.repo, &config_digest, None).context("Opening config")?; + let open = open_config(composefs_repo, &config_digest, None).context("Opening config")?; let config = open.config; let layer_map = open.layer_refs; @@ -77,7 +60,7 @@ pub async fn export_repo_to_image( .get(old_diff_id.as_str()) .ok_or_else(|| anyhow::anyhow!("Layer {old_diff_id} not found in config"))?; - let mut layer_stream = booted_cfs.repo.open_stream("", Some(layer_verity), None)?; + let mut layer_stream = composefs_repo.open_stream("", Some(layer_verity), None)?; let mut layer_writer = oci_dir.create_layer(None)?; layer_writer.follow_symlinks(false); @@ -113,7 +96,7 @@ pub async fn export_repo_to_image( match layer_stream.read_exact(size as usize, ((size as usize) + 511) & !511)? { SplitStreamData::External(obj_id) => match header.entry_type() { EntryType::Regular | EntryType::Continuous => { - let file = File::from(booted_cfs.repo.open_object(&obj_id)?); + let file = File::from(composefs_repo.open_object(&obj_id)?); layer_writer .append(&header, file) @@ -196,7 +179,7 @@ pub async fn export_repo_to_image( skopeo::copy( &tempoci, - &dest_imgref, + dest_imgref, None, Some(( std::sync::Arc::new(tmpdir.try_clone()?.into()), @@ -208,3 +191,37 @@ pub async fn export_repo_to_image( Ok(()) } + +/// Exports a composefs repository to a container image in containers-storage: +pub async fn export_repo_to_image( + storage: &Storage, + booted_cfs: &BootedComposefs, + source: Option<&str>, + target: Option<&str>, +) -> Result<()> { + let host = get_composefs_status(storage, booted_cfs).await?; + + let (source, dest_imgref) = get_imgrefs_for_copy(&host, source, target).await?; + + let mut depl_verity = None; + + for depl in host.list_deployments() { + let img = &depl.image.as_ref().unwrap().image; + + // Not checking transport here as we'll be pulling from the repo anyway + // So, image name is all we need + if img.image == source.name { + depl_verity = Some(depl.require_composefs()?.verity.clone()); + break; + } + } + + let depl_verity = depl_verity.ok_or_else(|| anyhow::anyhow!("Image {source} not found"))?; + + let imginfo = get_imginfo(storage, &depl_verity)?; + + println!("Copying local image {source} to {dest_imgref} ..."); + export_composefs_to_dest(&booted_cfs.repo, &imginfo, &dest_imgref).await?; + println!("Pushed: {dest_imgref}"); + Ok(()) +} diff --git a/crates/lib/src/bootc_composefs/repo.rs b/crates/lib/src/bootc_composefs/repo.rs index 1f59fb8d7..9dcc7d559 100644 --- a/crates/lib/src/bootc_composefs/repo.rs +++ b/crates/lib/src/bootc_composefs/repo.rs @@ -184,11 +184,12 @@ async fn pull_composefs_unified( // Stage 1: get the image into bootc-owned containers-storage. if imgref.transport == containers_image_proxy::Transport::ContainerStorage { - // The image is in the default containers-storage (/var/lib/containers/storage). - // Copy it into bootc-owned storage. + // The image is in a containers-storage instance — either the default + // /var/lib/containers/storage or an additional image store advertised + // via STORAGE_OPTS (e.g. the bcvk virtiofs mount). tracing::info!("Unified pull: copying {image} from host containers-storage"); imgstore - .pull_from_host_storage(image) + .pull_from_containers_storage(image) .await .context("Copying image from host containers-storage into bootc storage")?; } else { diff --git a/crates/lib/src/bootc_composefs/switch.rs b/crates/lib/src/bootc_composefs/switch.rs index f6b3855be..367ecbf63 100644 --- a/crates/lib/src/bootc_composefs/switch.rs +++ b/crates/lib/src/bootc_composefs/switch.rs @@ -51,23 +51,11 @@ pub(crate) async fn switch_composefs( let repo = &*booted_cfs.repo; let (image, img_config) = is_image_pulled(repo, &target_imgref).await?; - // Use unified storage if explicitly requested, or auto-detect: either the - // target image is already in bootc-owned containers-storage, OR the booted - // image is — which means the user has opted into unified storage and all - // subsequent operations (including switch to a new image) should use it. - let use_unified = if opts.unified_storage_exp { - true - } else { - let booted_imgref = host.spec.image.as_ref(); - let booted_unified = if let Some(booted) = booted_imgref { - crate::deploy::image_exists_in_unified_storage(storage, booted).await? - } else { - false - }; - let target_unified = - crate::deploy::image_exists_in_unified_storage(storage, &target_imgref).await?; - booted_unified || target_unified - }; + // Use unified storage if explicitly requested via flag, or if the + // composefs/bootc.json marker says unified storage is enabled on this system. + let use_unified = opts.unified_storage_exp + || crate::deploy::unified_storage_enabled(storage) + .context("Checking unified storage flag")?; let do_upgrade_opts = DoUpgradeOpts { soft_reboot: opts.soft_reboot, diff --git a/crates/lib/src/bootc_composefs/update.rs b/crates/lib/src/bootc_composefs/update.rs index e88d1f3e9..662bdf4ae 100644 --- a/crates/lib/src/bootc_composefs/update.rs +++ b/crates/lib/src/bootc_composefs/update.rs @@ -416,17 +416,11 @@ pub(crate) async fn upgrade_composefs( let imgref = derived_image.as_ref().or(current_image); let mut booted_imgref = imgref.ok_or_else(|| anyhow::anyhow!("No image source specified"))?; - // Auto-detect unified storage: use the unified path if the target image is - // already in bootc-owned containers-storage, OR if the booted image is — - // the latter means the user has opted into unified storage and all - // subsequent operations should use it. - let current_unified = if let Some(current) = current_image { - crate::deploy::image_exists_in_unified_storage(storage, current).await? - } else { - false - }; - do_upgrade_opts.use_unified = current_unified - || crate::deploy::image_exists_in_unified_storage(storage, booted_imgref).await?; + // Use unified storage if the composefs/bootc.json marker says it is enabled + // on this system (written by `bootc image set-unified` or by install with + // `--experimental-unified-storage`). + do_upgrade_opts.use_unified = crate::deploy::unified_storage_enabled(storage) + .context("Checking unified storage flag")?; let repo = &*composefs.repo; diff --git a/crates/lib/src/cli.rs b/crates/lib/src/cli.rs index 77cf58dd8..5ce8422fe 100644 --- a/crates/lib/src/cli.rs +++ b/crates/lib/src/cli.rs @@ -619,6 +619,21 @@ pub(crate) enum FsverityOpts { }, } +/// Subcommands for `bootc internals fsck`. +#[derive(Debug, clap::Subcommand, PartialEq, Eq)] +pub(crate) enum FsckCheck { + /// Check image store metadata consistency. + /// + /// Verifies that every image bootc has committed to (i.e. has a composefs + /// GC tag) is also present in bootc's containers-storage. This is a + /// metadata-level check only — it does not walk composefs objects or verify + /// content integrity. Exits non-zero if any image is partially imported. + /// + /// LBIs (logically bound images) are not subject to this check; only images + /// that went through the unified storage pipeline are checked. + Images, +} + /// Hidden, internal only options #[derive(Debug, clap::Subcommand, PartialEq, Eq)] pub(crate) enum InternalsOpts { @@ -638,7 +653,23 @@ pub(crate) enum InternalsOpts { #[clap(subcommand)] Fsverity(FsverityOpts), /// Perform consistency checking. - Fsck, + /// + /// Without a subcommand, runs all checks. Use a subcommand to run a + /// specific subset. + Fsck { + #[clap(subcommand)] + check: Option, + /// Attempt to repair detected inconsistencies. + /// + /// Currently handles: images present in the composefs repo (with a bootc + /// GC tag) that are missing from bootc containers-storage (copies them + /// back from composefs). + /// + /// Does NOT attempt to repair images in containers-storage that are + /// missing from composefs; those require re-running `bootc upgrade`. + #[clap(long)] + repair: bool, + }, /// Perform cleanup actions Cleanup, Relabel { @@ -1212,10 +1243,8 @@ async fn upgrade( return Ok(()); } - // Ensure the bootc storage directory is initialized; the --check path - // needs this for update_mtime() and the non-check path needs it for - // unified pull detection. - let use_unified = crate::deploy::image_exists_in_unified_storage(storage, imgref).await?; + // Check whether the composefs/bootc.json flag says unified storage is enabled. + let use_unified = crate::deploy::unified_storage_enabled(storage)?; if opts.check { let ostree_imgref = imgref.clone().into(); @@ -1243,16 +1272,7 @@ async fn upgrade( } } else { let fetched = if use_unified { - crate::deploy::pull_unified( - repo, - imgref, - None, - opts.quiet, - prog.clone(), - storage, - Some(&booted_ostree.deployment), - ) - .await? + crate::deploy::pull_via_composefs(repo, imgref, storage).await? } else { crate::deploy::pull( repo, @@ -1414,24 +1434,12 @@ async fn switch_ostree( // Determine whether to use unified storage path. // If explicitly requested via flag, use unified storage directly. - // Otherwise, auto-detect based on whether the image exists in bootc storage. - let use_unified = if opts.unified_storage_exp { - true - } else { - crate::deploy::image_exists_in_unified_storage(storage, &target).await? - }; + // Otherwise, check the composefs/bootc.json flag. + let use_unified = opts.unified_storage_exp + || crate::deploy::unified_storage_enabled(storage)?; let fetched = if use_unified { - crate::deploy::pull_unified( - repo, - &target, - None, - opts.quiet, - prog.clone(), - storage, - Some(&booted_ostree.deployment), - ) - .await? + crate::deploy::pull_via_composefs(repo, &target, storage).await? } else { crate::deploy::pull( repo, @@ -2089,11 +2097,45 @@ async fn run_from_opt(opt: Opt) -> Result<()> { Ok(()) } }, - InternalsOpts::Cfs { args } => composefs_ctl::run_from_iter(args.iter()).await, + InternalsOpts::Cfs { args } => { + // Inject `--system` (which resolves to /sysroot/composefs) unless the + // caller has already specified a repo location via --repo, --user, or + // --system. This ensures `bootc internals cfsctl oci images` (and + // similar subcommands) operate on the booted composefs repository by + // default rather than falling back to cfsctl's own heuristic. + let has_repo_flag = args.iter().any(|a| { + let s = a.to_string_lossy(); + s == "--repo" || s.starts_with("--repo=") || s == "--user" || s == "--system" + }); + if has_repo_flag { + composefs_ctl::run_from_iter(args.iter()).await + } else { + composefs_ctl::run_from_iter( + std::iter::once(OsString::from("--system")).chain(args.into_iter()), + ) + .await + } + } InternalsOpts::Reboot => crate::reboot::reboot(), - InternalsOpts::Fsck => { + InternalsOpts::Fsck { check, repair } => { let storage = &get_storage().await?; - crate::fsck::fsck(&storage, std::io::stdout().lock()).await?; + match check { + // `bootc internals fsck images` — metadata-level image check only. + Some(FsckCheck::Images) => { + let ok = crate::fsck::fsck_images(storage, repair).await?; + if !ok { + std::process::exit(1); + } + } + // `bootc internals fsck` with no subcommand — run everything. + None => { + crate::fsck::fsck(storage, std::io::stdout().lock()).await?; + let ok = crate::fsck::fsck_images(storage, repair).await?; + if !ok { + std::process::exit(1); + } + } + } Ok(()) } InternalsOpts::FixupEtcFstab => crate::deploy::fixup_etc_fstab(&root), diff --git a/crates/lib/src/deploy.rs b/crates/lib/src/deploy.rs index e82314629..bb1e93c06 100644 --- a/crates/lib/src/deploy.rs +++ b/crates/lib/src/deploy.rs @@ -5,11 +5,9 @@ use std::collections::HashSet; use std::io::{BufRead, Write}; use std::os::fd::AsFd; -use std::process::Command; use anyhow::{Context, Result, anyhow}; use bootc_kernel_cmdline::utf8::CmdlineOwned; -use bootc_utils::skopeo_bin; use cap_std::fs::{Dir, MetadataExt}; use cap_std_ext::cap_std; use cap_std_ext::dirext::CapStdExtDirExt; @@ -111,23 +109,6 @@ pub(crate) async fn new_importer( Ok(imp) } -/// Wrapper for pulling a container image with a custom proxy config (e.g. for unified storage). -pub(crate) async fn new_importer_with_config( - repo: &ostree::Repo, - imgref: &ostree_container::OstreeImageReference, - config: ostree_ext::containers_image_proxy::ImageProxyConfig, - booted_deployment: Option<&ostree::Deployment>, -) -> Result { - let mut imp = ostree_container::store::ImageImporter::new(repo, imgref, config).await?; - imp.require_bootable(); - // We do our own GC/prune in deploy::prune(), so skip the importer's internal one. - imp.disable_gc(); - if let Some(deployment) = booted_deployment { - imp.set_sepolicy_commit(deployment.csum().to_string()); - } - Ok(imp) -} - pub(crate) fn check_bootc_label(config: &ostree_ext::oci_spec::image::ImageConfiguration) { if let Some(label) = labels_of_config(config).and_then(|labels| labels.get(crate::metadata::BOOTC_COMPAT_LABEL)) @@ -423,16 +404,6 @@ pub(crate) fn check_disk_space_ostree( ) } -/// Verify there is sufficient disk space to pull an image into the composefs store -/// via the ostree unified-storage path (uses `PreparedImportMeta`). -pub(crate) fn check_disk_space_unified( - cfs: &crate::store::ComposefsRepository, - image_meta: &PreparedImportMeta, - imgref: &ImageReference, -) -> Result<()> { - check_disk_space_inner(cfs.objects_dir()?, image_meta.bytes_to_fetch, 0, imgref) -} - /// Verify there is sufficient disk space to pull an image into the composefs store /// for the native composefs backend (uses a raw `ImageManifest`). pub(crate) fn check_disk_space_composefs( @@ -503,154 +474,168 @@ pub(crate) async fn prepare_for_pull( Ok(PreparedPullResult::Ready(Box::new(prepared_image))) } -/// Check whether the image exists in bootc's unified container storage. +/// Check whether unified base-image storage is enabled on this system. /// -/// This is used for auto-detection: if the image already exists in bootc storage -/// (e.g., from a previous `bootc image set-unified` or LBI pull), we can use -/// the unified storage path for faster imports. +/// Returns `true` iff `composefs/bootc.json` exists and has `unified-storage: true`. +/// This is the authoritative signal written by `bootc image set-unified` (and by +/// `bootc install --experimental-unified-storage`). /// -/// Returns true if the image exists in bootc storage. -pub(crate) async fn image_exists_in_unified_storage( - store: &Storage, - imgref: &ImageReference, -) -> Result { - let imgstore = store.get_ensure_imgstore()?; - let image_ref_str = imgref.to_transport_image()?; - imgstore.exists(&image_ref_str).await +/// If the composefs repository doesn't exist yet, the file is absent and this +/// returns `false` — a single cheap file-open attempt with no side effects. +pub(crate) fn unified_storage_enabled(store: &Storage) -> Result { + Ok(crate::store::BootcRepoMeta::read(&store.physical_root)? + .map(|m| m.unified_storage) + .unwrap_or(false)) } -/// Unified approach: Use bootc's CStorage to pull the image, then prepare from containers-storage. -/// This reuses the same infrastructure as LBIs. -pub(crate) async fn prepare_for_pull_unified( +/// Full unified pipeline: containers-storage → composefs → ostree. +/// +/// Stage 1: Pull the image into bootc-owned containers-storage. +/// Stage 2: Zero-copy import from containers-storage into the composefs OCI repo. +/// Stage 3: Synthesize an ostree commit from the composefs filesystem tree. +/// +/// This is the implementation of `--experimental-unified-storage` for the ostree backend. +pub(crate) async fn pull_via_composefs( repo: &ostree::Repo, imgref: &ImageReference, - target_imgref: Option<&OstreeImageReference>, store: &Storage, - booted_deployment: Option<&ostree::Deployment>, -) -> Result { - // Get or initialize the bootc container storage (same as used for LBIs) - let imgstore = store.get_ensure_imgstore()?; +) -> Result> { + use composefs_ctl::composefs_oci; + use composefs_ctl::composefs_oci::oci_image::OciImage; + use composefs_ctl::composefs_oci::{LocalFetchOpt, PullOptions, tag_image}; + use ostree_ext::container::composefs_import; + use ostree_ext::container::store::ref_for_image; + // Stage 1: pull into bootc-owned containers-storage. + let imgstore = store.get_ensure_imgstore()?; let image_ref_str = imgref.to_transport_image()?; - - // Always pull to ensure we have the latest image, whether from a remote - // registry or a locally rebuilt image tracing::info!( - "Unified pull: pulling from transport '{}' to bootc storage", - &imgref.transport + "Composefs unified pull: staging {} into containers-storage", + imgref.image ); + if imgref.transport == "containers-storage" { + imgstore + .pull_from_containers_storage(&imgref.image) + .await + .context("Copying image from host containers-storage into bootc storage")?; + } else { + imgstore + .pull_with_progress(&image_ref_str) + .await + .context("Pulling image into bootc containers-storage")?; + } - // Pull the image into bootc containers-storage with per-layer - // download progress via the podman native API. - imgstore - .pull_with_progress(&image_ref_str) - .await - .context("Pulling image into bootc containers-storage")?; - - // Now create a containers-storage reference to read from bootc storage - tracing::info!("Unified pull: now importing from containers-storage transport"); - let containers_storage_imgref = ImageReference { - transport: "containers-storage".to_string(), - image: imgref.image.clone(), - signature: imgref.signature.clone(), - }; - let ostree_imgref = OstreeImageReference::from(containers_storage_imgref); - - // Configure the importer to use bootc storage as an additional image store - let mut config = new_proxy_config(); - let mut cmd = Command::new(skopeo_bin()); - // Use the physical path to bootc storage from the Storage struct + // Stage 2: zero-copy import from containers-storage into the composefs OCI repo. + let cfs_repo = store.get_ensure_composefs()?; + let cstor_imgref_str = format!("containers-storage:{}", imgref.image); let storage_path = format!( "{}/{}", store.physical_root_path, crate::podstorage::CStorage::subpath() ); - crate::podstorage::set_additional_image_store(&mut cmd, &storage_path); - config.skopeo_cmd = Some(cmd); + tracing::info!( + "Composefs unified pull: importing {} into composefs repo (zero-copy)", + cstor_imgref_str + ); + let pull_opts = PullOptions { + local_fetch: LocalFetchOpt::ZeroCopy, + storage_root: Some(std::path::Path::new(&storage_path)), + ..Default::default() + }; + let pull_result = composefs_oci::pull(&cfs_repo, &cstor_imgref_str, None, pull_opts) + .await + .context("Importing from containers-storage into composefs repo")?; + + // Tag the manifest as a GC root in the composefs repo. + let tag = + crate::bootc_composefs::repo::bootc_tag_for_manifest(&pull_result.manifest_digest.to_string()); + tag_image(&*cfs_repo, &pull_result.manifest_digest, &tag) + .context("Tagging pulled image as bootc GC root in composefs repo")?; + + // Open the OCI image to retrieve manifest + config for the ostree synthesis. + let oci_image = OciImage::open(&cfs_repo, &pull_result.manifest_digest, None) + .context("Opening OCI image from composefs repo")?; + let manifest = oci_image.manifest().clone(); + let config = oci_image + .config() + .cloned() + .context("OCI image has no config (artifact, not a container image)")?; + + let manifest_digest_str = pull_result.manifest_digest.to_string(); + + // Stage 3: synthesize ostree commit from composefs tree (blocking, CPU-bound). + tracing::info!( + "Composefs unified pull: synthesizing ostree commit from composefs tree (digest {})", + manifest_digest_str + ); + let repo_clone = repo.clone(); + let cfs_repo_clone = std::sync::Arc::clone(&cfs_repo); + let config_digest = pull_result.config_digest.clone(); + let manifest_digest_str2 = manifest_digest_str.clone(); + let ostree_commit = tokio::task::spawn_blocking(move || { + composefs_import::import_from_composefs_repo( + &repo_clone, + &cfs_repo_clone, + &config_digest, + &manifest_digest_str2, + &manifest, + &config, + false, // reflinks_required: allow fallback to byte copy + ) + }) + .await + .context("join error in composefs→ostree import task")? + .context("Synthesizing ostree commit from composefs tree")?; - // Use the preparation flow with the custom config - let mut imp = new_importer_with_config(repo, &ostree_imgref, config, booted_deployment).await?; - if let Some(target) = target_imgref { - imp.set_target(target); + // Write the ostree ref so the deployment machinery can find the commit. + { + let ostree_imgref = OstreeImageReference::from(imgref.clone()); + let ostree_ref = ref_for_image(&ostree_imgref.imgref) + .context("Computing ostree ref for image")?; + let txn = repo + .auto_transaction(gio::Cancellable::NONE) + .context("Beginning ostree transaction for ref write")?; + repo.transaction_set_ref(None, &ostree_ref, Some(ostree_commit.as_str())); + txn.commit(gio::Cancellable::NONE) + .context("Committing ostree ref transaction")?; } - let prep = match imp.prepare().await? { - PrepareResult::AlreadyPresent(c) => { - println!("No changes in {imgref:#} => {}", c.manifest_digest); - return Ok(PreparedPullResult::AlreadyPresent(Box::new((*c).into()))); - } - PrepareResult::Ready(p) => p, - }; - check_bootc_label(&prep.config); - if let Some(warning) = prep.deprecated_warning() { - ostree_ext::cli::print_deprecated_warning(warning).await; + + // Extract version from the config labels. + let version = oci_image + .config() + .and_then(|cfg| ostree_ext::container::version_for_config(cfg)) + .map(|s| s.to_owned()); + + // Parse the manifest digest into the oci_spec::image::Digest type that + // ImageState expects. The string is already in "sha256:..." format. + let manifest_digest: Digest = manifest_digest_str + .parse() + .with_context(|| format!("Parsing manifest digest {manifest_digest_str}"))?; + + // Write composefs/bootc.json to mark this system as unified-storage enabled. + // This ensures pull_auto uses the three-store pipeline on all subsequent + // upgrades, even if the user didn't explicitly run `bootc image set-unified`. + { + crate::store::ensure_composefs_dir(&store.physical_root)?; + let mut meta = crate::store::BootcRepoMeta::read(&store.physical_root)? + .unwrap_or_default(); + meta.version = 1; + meta.unified_storage = true; + meta.write(&store.physical_root) + .context("Writing unified-storage flag after composefs pull")?; } - ostree_ext::cli::print_layer_status(&prep); - let layers_to_fetch = prep.layers_to_fetch().collect::>>()?; - // Log that we're importing a new image from containers-storage - const PULLING_NEW_IMAGE_ID: &str = "6d5e4f3a2b1c0d9e8f7a6b5c4d3e2f1a0"; tracing::info!( - message_id = PULLING_NEW_IMAGE_ID, - bootc.image.reference = &imgref.image, - bootc.image.transport = "containers-storage", - bootc.original_transport = &imgref.transport, - bootc.status = "importing_from_storage", - "Importing image from bootc storage: {}", - ostree_imgref + "Composefs unified pull complete: commit {} digest {}", + ostree_commit, + manifest_digest ); - let prepared_image = PreparedImportMeta { - imp, - n_layers_to_fetch: layers_to_fetch.len(), - layers_total: prep.all_layers().count(), - bytes_to_fetch: layers_to_fetch.iter().map(|(l, _)| l.layer.size()).sum(), - bytes_total: prep.all_layers().map(|l| l.layer.size()).sum(), - digest: prep.manifest_digest.clone(), - prep, - }; - - Ok(PreparedPullResult::Ready(Box::new(prepared_image))) -} - -/// Unified pull: Use podman to pull to containers-storage, then read from there -pub(crate) async fn pull_unified( - repo: &ostree::Repo, - imgref: &ImageReference, - target_imgref: Option<&OstreeImageReference>, - quiet: bool, - prog: ProgressWriter, - store: &Storage, - booted_deployment: Option<&ostree::Deployment>, -) -> Result> { - match prepare_for_pull_unified(repo, imgref, target_imgref, store, booted_deployment).await? { - PreparedPullResult::AlreadyPresent(existing) => { - // Log that the image was already present (Debug level since it's not actionable) - const IMAGE_ALREADY_PRESENT_ID: &str = "5c4d3e2f1a0b9c8d7e6f5a4b3c2d1e0f9"; - tracing::debug!( - message_id = IMAGE_ALREADY_PRESENT_ID, - bootc.image.reference = &imgref.image, - bootc.image.transport = &imgref.transport, - bootc.status = "already_present", - "Image already present: {}", - imgref - ); - Ok(existing) - } - PreparedPullResult::Ready(prepared_image_meta) => { - check_disk_space_unified( - store.get_ensure_composefs()?.as_ref(), - &prepared_image_meta, - imgref, - )?; - // To avoid duplicate success logs, pass a containers-storage imgref to the importer - let cs_imgref = ImageReference { - transport: "containers-storage".to_string(), - image: imgref.image.clone(), - signature: imgref.signature.clone(), - }; - pull_from_prepared(&cs_imgref, quiet, prog, *prepared_image_meta).await - } - } + Ok(Box::new(ImageState { + manifest_digest, + version, + ostree_commit, + })) } #[context("Pulling")] @@ -779,6 +764,108 @@ pub(crate) async fn wipe_ostree(sysroot: Sysroot) -> Result<()> { Ok(()) } +/// Prune composefs objects no longer referenced by any live deployment. +/// +/// This is a no-op if unified storage is not enabled or the composefs +/// repository doesn't exist yet. +/// +/// On an ostree+unified-storage system every `bootc upgrade` tags the newly +/// pulled manifest in the composefs repo (`localhost/bootc-sha256:`). +/// After the ostree prune step those old tags are no longer anchored to any +/// deployment. This function: +/// +/// 1. Collects the manifest digests of all current ostree deployments. +/// 2. Untagges any composefs bootc-tag whose digest is not in that set. +/// 3. Runs `repo.gc()` to drop orphaned splitstream objects. +#[context("Pruning composefs store")] +async fn prune_composefs_store(sysroot: &Storage) -> Result<()> { + if !unified_storage_enabled(sysroot)? { + return Ok(()); + } + let cfs_repo = match sysroot.get_ensure_composefs() { + Ok(r) => r, + Err(e) => { + tracing::debug!("Composefs repo not available, skipping prune: {e}"); + return Ok(()); + } + }; + + let ostree = match sysroot.get_ostree() { + Ok(o) => o, + Err(_) => return Ok(()), + }; + let repo = ostree.repo(); + + // Collect manifest digests from all current ostree deployments. + // Deployments pulled via pull_via_composefs store META_MANIFEST_DIGEST + // (`ostree.manifest-digest`) in their commit metadata. + let mut live_digests: std::collections::HashSet = + std::collections::HashSet::new(); + for deployment in ostree.deployments() { + let commit_str = deployment.csum(); + match repo.load_commit(commit_str.as_str()) { + Ok((commitv, _)) => { + match ostree_ext::container::store::manifest_digest_from_commit(&commitv) { + Ok(digest) => { + live_digests.insert(digest.to_string()); + } + Err(e) => { + // Not every deployment was pulled via composefs; skip gracefully. + tracing::debug!( + "No manifest digest in commit {commit_str}: {e}" + ); + } + } + } + Err(e) => { + tracing::warn!("Failed to load commit {commit_str}: {e}"); + } + } + } + + // The composefs repository API is synchronous; move the work off the + // async executor thread. + let cfs_repo = std::sync::Arc::clone(&cfs_repo); + tokio::task::spawn_blocking(move || -> Result<()> { + use composefs_ctl::composefs_oci; + use crate::composefs_consts::BOOTC_TAG_PREFIX; + + let all_tags = composefs_oci::list_refs(&*cfs_repo) + .context("Listing composefs OCI refs")?; + + let mut untagged = 0usize; + for (tag_name, _manifest_digest) in &all_tags { + if !tag_name.starts_with(BOOTC_TAG_PREFIX) { + // Not a bootc-owned tag; leave it for the user / other tools. + continue; + } + // The tag is `localhost/bootc-sha256:`. Strip the prefix to + // recover the canonical digest string (`sha256:`). + let digest_str = tag_name + .strip_prefix(BOOTC_TAG_PREFIX) + .expect("checked above"); + if !live_digests.contains(digest_str) { + tracing::debug!("Removing unreferenced composefs bootc tag: {tag_name}"); + composefs_oci::untag_image(&*cfs_repo, tag_name) + .with_context(|| format!("Removing composefs tag {tag_name}"))?; + untagged += 1; + } + } + + if untagged > 0 { + tracing::info!("Removed {untagged} unreferenced composefs tag(s); running GC"); + let gc_result = cfs_repo.gc(&[]).context("Running composefs GC")?; + tracing::debug!("Composefs GC result: {:?}", gc_result); + } else { + tracing::debug!("No unreferenced composefs tags; skipping GC"); + } + + Ok(()) + }) + .await + .context("composefs prune task join error")? +} + pub(crate) async fn cleanup(sysroot: &Storage) -> Result<()> { // Log the cleanup operation to systemd journal const CLEANUP_JOURNAL_ID: &str = "2f1a0b9c8d7e6f5a4b3c2d1e0f9a8b7c6"; @@ -840,6 +927,13 @@ pub(crate) async fn cleanup(sysroot: &Storage) -> Result<()> { // We run these in parallel mostly because we can. tokio::try_join!(repo_prune, bound_prune)?; + + // After ostree prune, clean up any stale composefs tags and GC orphaned + // splitstream objects on unified-storage systems. + prune_composefs_store(sysroot) + .await + .context("Pruning composefs store")?; + Ok(()) } diff --git a/crates/lib/src/fsck.rs b/crates/lib/src/fsck.rs index 25e95e33c..d38038b94 100644 --- a/crates/lib/src/fsck.rs +++ b/crates/lib/src/fsck.rs @@ -268,6 +268,15 @@ async fn check_fsverity_inner(storage: &Storage) -> FsckResult { fsck_err(err) } +/// Check image store consistency. Delegates to [`crate::image::fsck_images`]. +/// +/// Returns `true` if all checks passed, `false` if any inconsistency was found. +/// When `repair` is `true`, attempts to restore images that are in composefs +/// but missing from containers-storage. +pub(crate) async fn fsck_images(storage: &Storage, repair: bool) -> anyhow::Result { + crate::image::fsck_images(storage, repair).await +} + pub(crate) async fn fsck(storage: &Storage, mut output: impl std::io::Write) -> anyhow::Result<()> { let mut checks = FSCK_CHECKS.static_slice().iter().collect::>(); checks.sort_by(|a, b| a.ordering.cmp(&b.ordering)); diff --git a/crates/lib/src/image.rs b/crates/lib/src/image.rs index 8a6b17a54..91d8f7411 100644 --- a/crates/lib/src/image.rs +++ b/crates/lib/src/image.rs @@ -7,13 +7,16 @@ use bootc_utils::CommandRunExt; use cap_std_ext::cap_std::{self, fs::Dir}; use clap::ValueEnum; use comfy_table::{Table, presets::NOTHING}; +use composefs_ctl::composefs_oci; use fn_error_context::context; use ostree_ext::container::{ImageReference, Transport}; use serde::Serialize; use crate::{ boundimage::query_bound_images, + bootc_composefs::status::ImgConfigManifest, cli::{ImageListFormat, ImageListType}, + composefs_consts::BOOTC_TAG_PREFIX, podstorage::CStorage, spec::Host, store::Storage, @@ -40,6 +43,9 @@ async fn image_exists_in_host_storage(image: &str) -> Result { enum ImageListTypeColumn { Host, Unified, + /// Image is in bootc containers-storage but composefs import is incomplete + /// or failed. Re-run `bootc upgrade` to complete the import. + Partial, Logical, } @@ -89,17 +95,68 @@ async fn list_host_images_composefs(sysroot: &crate::store::Storage) -> Result = + match sysroot.get_ensure_composefs() { + Ok(repo) => { + use composefs_ctl::composefs::fsverity::Sha512HashValue; + use composefs_ctl::composefs_oci::OciImage; + composefs_oci::list_refs(&*repo) + .context("Listing composefs OCI refs")? + .into_iter() + .filter(|(tag, _)| tag.starts_with(BOOTC_TAG_PREFIX)) + .filter_map(|(_tag, manifest_digest)| { + OciImage::::open(&repo, &manifest_digest, None) + .ok() + .map(|img| img.manifest().config().digest().to_string()) + }) + .collect() + } + Err(_) => Default::default(), + }; + + Ok(images + .into_iter() + .flat_map(|entry| { + let names = entry.names.unwrap_or_default(); + let image_type = if { + let id = &entry.id; + composefs_config_digests.contains(&format!("sha256:{id}")) + || composefs_config_digests.contains(id.as_str()) + } { + ImageListTypeColumn::Unified + } else { + // In composefs storage but not composefs-tagged — partial import. + ImageListTypeColumn::Partial + }; + names.into_iter().map(move |name| ImageOutput { + image: name, + image_type: image_type.clone(), + }) }) .collect()) } @@ -175,6 +232,281 @@ pub(crate) async fn list_entrypoint( Ok(()) } +/// Check image store consistency across containers-storage and composefs. +/// +/// Enumerates all images in the bootc-managed stores and verifies that each +/// image present in containers-storage is also fully imported into the +/// composefs store (i.e. has a `localhost/bootc-sha256:` tag). +/// +/// Prints a human-readable report to stdout and returns `true` if all +/// checks pass, `false` if any inconsistency is found. +/// +/// When `repair` is `true`, images that are in composefs but missing from +/// containers-storage are restored by re-exporting from composefs. +/// +/// When unified storage is not enabled (no composefs repo / no bootc.json), +/// this prints a note and returns `true` (nothing to check). +/// Collect the set of OCI manifest digests referenced by all live composefs +/// deployments (booted, rollback, staged). +/// +/// Each live deployment has a state-dir origin file that records the manifest +/// digest of its image. Images whose manifest is NOT in this set are stale +/// GC roots that will be cleaned up by `bootc internals composefs-gc`; they +/// should not be treated as fsck failures even if they are absent from +/// containers-storage (the rollback image is the canonical example: after a +/// `bootc switch`, `prune_container_store` may have removed it). +fn collect_live_deployment_manifest_digests(sysroot: &Storage) -> std::collections::HashSet { + use crate::bootc_composefs::state::read_origin; + use crate::composefs_consts::{ORIGIN_KEY_IMAGE, ORIGIN_KEY_MANIFEST_DIGEST, STATE_DIR_RELATIVE}; + + let mut digests = std::collections::HashSet::new(); + let Ok(state_base) = sysroot.physical_root.open_dir(STATE_DIR_RELATIVE) else { + // No composefs state dir — not a composefs system or no deployments yet. + return digests; + }; + let entries = match state_base.entries_utf8() { + Ok(e) => e, + Err(_) => return digests, + }; + for entry in entries.flatten() { + let Ok(ft) = entry.file_type() else { continue }; + if !ft.is_dir() { + continue; + } + let Ok(verity) = entry.file_name() else { continue }; + match read_origin(&sysroot.physical_root, &verity) { + Ok(Some(ini)) => { + if let Some(digest) = + ini.get::(ORIGIN_KEY_IMAGE, ORIGIN_KEY_MANIFEST_DIGEST) + { + digests.insert(digest); + } + } + Ok(None) => {} // State dir exists but no origin yet (interrupted deploy). + Err(e) => { + tracing::warn!("Failed to read origin for deployment {verity}: {e:#}"); + } + } + } + digests +} + +pub(crate) async fn fsck_images(sysroot: &Storage, repair: bool) -> anyhow::Result { + // Check if unified storage is enabled on this system. + let meta = crate::store::BootcRepoMeta::read(&sysroot.physical_root)?; + let unified_enabled = meta.map(|m| m.unified_storage).unwrap_or(false); + + if !unified_enabled { + println!("bootc fsck images: unified storage not enabled; nothing to check"); + return Ok(true); + } + + println!("bootc fsck: checking image stores"); + + // Use composefs tags as the authoritative registry: only images that bootc + // explicitly tagged (via pull_via_composefs) are in scope for the check. + // LBIs live in containers-storage but are NOT tagged in composefs — they + // are not subject to this check. + let cfs_repo = match sysroot.get_ensure_composefs() { + Ok(r) => r, + Err(_) => { + println!("bootc fsck: composefs repo not found; nothing to check"); + return Ok(true); + } + }; + + let bootc_tags: Vec<(String, composefs_oci::OciDigest)> = + composefs_oci::list_refs(&*cfs_repo) + .context("Listing composefs OCI refs")? + .into_iter() + .filter(|(tag, _)| tag.starts_with(BOOTC_TAG_PREFIX)) + .collect(); + + if bootc_tags.is_empty() { + println!("bootc fsck: no bootc-managed images in composefs; nothing to check"); + return Ok(true); + } + + // Build a set of manifest digests for all live deployments (booted, + // rollback, staged). Composefs tags whose manifest is not in this set + // are stale GC roots — they will be removed by the next composefs-gc run. + // It is NOT a fsck failure if such an image is absent from + // containers-storage: the rollback's image is the canonical example, as + // `prune_container_store` may legitimately have removed it after a switch. + let live_manifest_digests = collect_live_deployment_manifest_digests(sysroot); + + // Build the set of image IDs (config sha256) present in containers-storage. + // We match by config digest rather than manifest digest because podman may + // report a different manifest digest than composefs when layers are + // recompressed during the copy. The config digest is stable. + let cstor_images = { + let run = cap_std_ext::cap_std::fs::Dir::open_ambient_dir( + "/run", + cap_std_ext::cap_std::ambient_authority(), + ) + .context("Opening /run")?; + let imgstore = crate::podstorage::CStorage::create( + &sysroot.physical_root, + &run, + None, + ) + .context("Opening containers-storage")?; + imgstore + .list_images() + .await + .context("Listing containers-storage images")? + }; + // Map from config-digest (image ID) → names for cross-referencing. + let cstor_id_to_names: std::collections::HashMap> = cstor_images + .into_iter() + .map(|entry| (entry.id, entry.names.unwrap_or_default())) + .collect(); + + let mut n_ok = 0usize; + let mut n_fail = 0usize; + + for (tag, manifest_digest) in &bootc_tags { + use composefs_ctl::composefs::fsverity::Sha512HashValue; + use composefs_ctl::composefs_oci::OciImage; + + // Get config digest and human-readable name from the composefs OCI image. + let oci_img = OciImage::::open(&cfs_repo, manifest_digest, None).ok(); + let config_digest = oci_img + .as_ref() + .map(|img| img.manifest().config().digest().to_string()); + let display_name = oci_img + .as_ref() + .and_then(|img| { + img.config() + .and_then(|cfg| cfg.config().as_ref()) + .and_then(|c| c.labels().as_ref()) + .and_then(|l| l.get("org.opencontainers.image.ref.name").map(|s| s.clone())) + }) + .unwrap_or_else(|| tag.clone()); + + // Only enforce containers-storage presence for images that back a live + // deployment. Tags for stale images (e.g. a prior rollback that was + // displaced by a second upgrade) will be cleaned up by composefs-gc and + // are harmless to skip here. + let is_live = live_manifest_digests.contains(manifest_digest.to_string().as_str()); + if !is_live { + tracing::debug!( + "Skipping stale composefs tag {tag} ({manifest_digest}): not a live deployment" + ); + continue; + } + + // Check cstorage by config digest (image ID). + let in_cstor = if let Some(ref cdig) = config_digest { + // podman IDs are bare hex; digests are "sha256:" + let bare = cdig.strip_prefix("sha256:").unwrap_or(cdig.as_str()); + cstor_id_to_names.contains_key(bare) || cstor_id_to_names.contains_key(cdig.as_str()) + } else { + false + }; + + // in_composefs is always true here: we iterated over composefs list_refs + if in_cstor { + println!(" [OK] {display_name} (composefs: yes, containers-storage: yes)"); + n_ok += 1; + } else if repair { + println!(" [REPAIR] Restoring {display_name} to containers-storage..."); + match repair_image_to_containers_storage(&cfs_repo, manifest_digest).await { + Ok(()) => { + println!( + " [REPAIRED] Successfully restored {display_name} to containers-storage" + ); + n_ok += 1; + } + Err(e) => { + println!(" [REPAIR FAILED] {display_name}: {e:#}"); + n_fail += 1; + } + } + } else { + println!( + " [FAIL] {display_name} - in composefs but not in containers-storage" + ); + println!( + " Hint: re-run with `--repair` to restore, or `bootc upgrade` to re-pull" + ); + n_fail += 1; + } + } + + if n_fail == 0 { + println!("bootc fsck: all checks passed ({n_ok} image(s) OK)"); + Ok(true) + } else { + println!("bootc fsck: {n_fail} failure(s) found ({n_ok} OK, {n_fail} FAIL)"); + Ok(false) + } +} + +/// Restore an image from the composefs repo into bootc containers-storage. +/// +/// Used by `bootc internals fsck --repair` when an image is present in +/// composefs (has a bootc GC tag) but missing from containers-storage. +/// +/// Opens the OCI manifest and config stored in the composefs splitstreams, +/// then re-exports the image to containers-storage via skopeo. +async fn repair_image_to_containers_storage( + cfs_repo: &crate::store::ComposefsRepository, + manifest_digest: &composefs_oci::OciDigest, +) -> Result<()> { + use composefs_ctl::composefs::fsverity::Sha512HashValue; + use composefs_ctl::composefs_oci::OciImage; + + // Ensure floating containers-storage is set up before we try to write to it. + crate::podstorage::ensure_floating_c_storage_initialized(); + + // Load the manifest and config from the composefs splitstreams. + let oci_image = OciImage::::open(cfs_repo, manifest_digest, None) + .with_context(|| { + format!("Opening OCI image for manifest digest {manifest_digest} from composefs") + })?; + + let manifest = oci_image.manifest().clone(); + let config = oci_image + .config() + .cloned() + .ok_or_else(|| anyhow::anyhow!("OCI image {manifest_digest} has no config (artifact?)"))?; + + let imginfo = ImgConfigManifest { config, manifest }; + + // Derive a containers-storage name from the manifest digest. + // Use the org.opencontainers.image.ref.name label if available, otherwise + // synthesise a name from the digest so the image is addressable. + let image_name = imginfo + .config + .config() + .as_ref() + .and_then(|c| c.labels().as_ref()) + .and_then(|l| l.get("org.opencontainers.image.ref.name")) + .cloned() + .unwrap_or_else(|| { + // Strip the "sha256:" prefix and take the first 12 hex chars as a short id. + let digest_str = manifest_digest.to_string(); + let short = digest_str + .strip_prefix("sha256:") + .unwrap_or(&digest_str) + .get(..12) + .unwrap_or(&digest_str); + format!("localhost/bootc-recovered:{short}") + }); + + let dest_imgref = ImageReference { + transport: Transport::ContainerStorage, + name: image_name, + }; + + crate::bootc_composefs::export::export_composefs_to_dest(cfs_repo, &imginfo, &dest_imgref) + .await + .with_context(|| { + format!("Exporting composefs image {manifest_digest} to containers-storage") + }) +} + /// Returns the source and target ImageReference /// If the source isn't specified, we use booted image /// If the target isn't specified, we push to containers-storage with our default image @@ -235,6 +567,52 @@ pub(crate) async fn push_entrypoint( let ostree = storage.get_ostree()?; let repo = &ostree.repo(); + // Images pulled via the composefs-unified pipeline are stored in the composefs + // repository with a synthesized ostree commit that has no per-layer blob refs. + // Detect this case and fall through to the composefs export path which reads + // directly from the composefs splitstreams instead of ostree blob refs. + let ostree_ref = ostree_ext::container::store::ref_for_image(&source)?; + if let Some(rev) = repo.resolve_rev(&ostree_ref, true)? { + let (commit_obj, _) = repo.load_commit(rev.as_str())?; + let commit_meta = + ostree_ext::ostree::glib::VariantDict::new(Some(&commit_obj.child_value(0))); + let is_composefs_synthesized = commit_meta + .lookup::(ostree_ext::container::store::META_COMPOSEFS_SYNTHESIZED)? + .unwrap_or(false); + + if is_composefs_synthesized { + // The manifest and config are embedded in the commit metadata. + let manifest_str = commit_meta + .lookup::(ostree_ext::container::store::META_MANIFEST)? + .ok_or_else(|| anyhow::anyhow!("Composefs-synthesized commit missing manifest"))?; + let config_str = commit_meta + .lookup::(ostree_ext::container::store::META_CONFIG)? + .ok_or_else(|| { + anyhow::anyhow!("Composefs-synthesized commit missing image config") + })?; + + let manifest: ostree_ext::oci_spec::image::ImageManifest = + serde_json::from_str(&manifest_str) + .context("Deserializing manifest from commit metadata")?; + let config: ostree_ext::oci_spec::image::ImageConfiguration = + serde_json::from_str(&config_str) + .context("Deserializing image config from commit metadata")?; + + let imginfo = crate::bootc_composefs::status::ImgConfigManifest { config, manifest }; + let composefs_repo = storage.get_ensure_composefs()?; + + println!("Copying local image {source} to {target} ..."); + crate::bootc_composefs::export::export_composefs_to_dest( + &composefs_repo, + &imginfo, + &target, + ) + .await?; + println!("Pushed: {target}"); + return Ok(()); + } + } + let mut opts = ostree_ext::container::store::ExportToOCIOpts::default(); opts.progress_to_stdout = true; println!("Copying local image {source} to {target} ..."); @@ -355,6 +733,17 @@ async fn set_unified_composefs( ); } + // Write composefs/bootc.json so pull_via_composefs routes through the + // three-store unified pipeline on all subsequent upgrades/switches. + { + let mut meta = crate::store::BootcRepoMeta::read(&storage.physical_root)? + .unwrap_or_default(); + meta.version = 1; + meta.unified_storage = true; + meta.write(&storage.physical_root) + .context("Writing unified-storage flag to composefs/bootc.json")?; + } + tracing::info!( message_id = SET_UNIFIED_CFS_JOURNAL_ID, bootc.status = "set_unified_complete", @@ -504,18 +893,52 @@ pub(crate) async fn set_unified(sysroot: &crate::store::Storage) -> Result<()> { } tracing::info!("Image verified in bootc storage: {}", &imgref.image); - // Optionally verify we can import from containers-storage by preparing in a temp importer - // without actually importing into the main repo; this is a lightweight validation. - let containers_storage_imgref = crate::spec::ImageReference { - transport: "containers-storage".to_string(), - image: imgref.image.clone(), - signature: imgref.signature.clone(), - }; - let ostree_imgref = - ostree_ext::container::OstreeImageReference::from(containers_storage_imgref); - let _ = - ostree_ext::container::store::ImageImporter::new(repo, &ostree_imgref, Default::default()) - .await?; + // Import the image from containers-storage into the composefs OCI repo and tag it + // as a bootc GC root. This mirrors Stage 2 of pull_via_composefs in deploy.rs so + // that `bootc image list` shows the image as fully present (not "partial"). + { + use composefs_ctl::composefs_oci::{LocalFetchOpt, PullOptions, tag_image}; + + let cfs_repo = sysroot.get_ensure_composefs()?; + let cstor_imgref_str = format!("containers-storage:{}", imgref.image); + let storage_path = format!( + "{}/{}", + sysroot.physical_root_path, + CStorage::subpath() + ); + tracing::info!( + "Importing {} from containers-storage into composefs repo", + &imgref.image + ); + let pull_opts = PullOptions { + // IfPossible falls back to a byte-copy when src and dst are on different + // filesystems, avoiding a hard failure on unusual setups. + local_fetch: LocalFetchOpt::IfPossible, + storage_root: Some(std::path::Path::new(&storage_path)), + ..Default::default() + }; + let pull_result = composefs_oci::pull(&cfs_repo, &cstor_imgref_str, None, pull_opts) + .await + .context("Importing from containers-storage into composefs repo")?; + + let tag = crate::bootc_composefs::repo::bootc_tag_for_manifest( + &pull_result.manifest_digest.to_string(), + ); + tag_image(&*cfs_repo, &pull_result.manifest_digest, &tag) + .context("Tagging pulled image as bootc GC root in composefs repo")?; + } + + // Ensure the composefs directory exists (it may not on a fresh ostree system + // that has never run pull_via_composefs), then write the bootc.json flag. + crate::store::ensure_composefs_dir(&sysroot.physical_root)?; + { + let mut meta = crate::store::BootcRepoMeta::read(&sysroot.physical_root)? + .unwrap_or_default(); + meta.version = 1; + meta.unified_storage = true; + meta.write(&sysroot.physical_root) + .context("Writing unified-storage flag to composefs/bootc.json")?; + } tracing::info!( message_id = SET_UNIFIED_JOURNAL_ID, diff --git a/crates/lib/src/install.rs b/crates/lib/src/install.rs index c9fcaf88c..bc730af45 100644 --- a/crates/lib/src/install.rs +++ b/crates/lib/src/install.rs @@ -1070,25 +1070,24 @@ async fn install_container( // Auto-detection (None) is only appropriate for upgrade/switch on a running system. let use_unified = state.target_opts.unified_storage_exp; - let prepared = if use_unified { - tracing::info!("Using unified storage path for installation"); - crate::deploy::prepare_for_pull_unified( - repo, - &spec_imgref, - Some(&state.target_imgref), - storage, - None, - ) - .await? + let pulled_image = if use_unified { + tracing::info!("Using unified storage path (composefs→ostree) for installation"); + crate::deploy::pull_via_composefs(repo, &spec_imgref, storage).await? } else { - prepare_for_pull(repo, &spec_imgref, Some(&state.target_imgref), None).await? - }; - - let pulled_image = match prepared { - PreparedPullResult::AlreadyPresent(existing) => existing, - PreparedPullResult::Ready(image_meta) => { - crate::deploy::check_disk_space_ostree(repo, &image_meta, &spec_imgref)?; - pull_from_prepared(&spec_imgref, false, ProgressWriter::default(), *image_meta).await? + let prepared = + prepare_for_pull(repo, &spec_imgref, Some(&state.target_imgref), None).await?; + match prepared { + PreparedPullResult::AlreadyPresent(existing) => existing, + PreparedPullResult::Ready(image_meta) => { + crate::deploy::check_disk_space_ostree(repo, &image_meta, &spec_imgref)?; + pull_from_prepared( + &spec_imgref, + false, + ProgressWriter::default(), + *image_meta, + ) + .await? + } } }; @@ -1208,7 +1207,7 @@ async fn install_container( &root_setup.physical_root, &mut pathbuf, policy, - Some(deployment_root_devino), + &[deployment_root_devino], ) .with_context(|| format!("Recursive SELinux relabeling of {d}"))?; } @@ -2093,7 +2092,34 @@ async fn install_to_filesystem_impl( if let Some(policy) = state.load_policy()? { tracing::info!("Performing final SELinux relabeling of physical root"); let mut path = Utf8PathBuf::from(""); - crate::lsm::ensure_dir_labeled_recurse(&rootfs.physical_root, &mut path, &policy, None) + // Ostree sets the immutable ext4 attribute on each deployment checkout + // directory, which causes lsetfilecon() to return EPERM. Those dirs + // are already correctly labeled by the earlier composefs import pass, + // so skip each checkout by dev/ino. We enumerate them directly rather + // than skipping the whole ostree/deploy subtree so that the stateroot + // and var directories (which are not immutable) still get labeled. + let mut skip: Vec<(libc::dev_t, libc::ino64_t)> = Vec::new(); + let deploy_dir = "ostree/deploy"; + if let Some(statered_dir) = rootfs.physical_root.open_dir_optional(deploy_dir)? { + for stateroot in statered_dir.entries()? { + let stateroot = stateroot?; + if !stateroot.file_type()?.is_dir() { + continue; + } + let deploy_subdir = stateroot.open_dir()?.open_dir_optional("deploy")?; + if let Some(deploy_subdir) = deploy_subdir { + for checkout in deploy_subdir.entries()? { + let checkout = checkout?; + if !checkout.file_type()?.is_dir() { + continue; + } + let m = checkout.metadata()?; + skip.push((m.dev() as libc::dev_t, m.ino() as libc::ino64_t)); + } + } + } + } + crate::lsm::ensure_dir_labeled_recurse(&rootfs.physical_root, &mut path, &policy, &skip) .context("Final SELinux relabeling of physical root")?; } else { tracing::debug!("Skipping final SELinux relabel (SELinux is disabled)"); diff --git a/crates/lib/src/lsm.rs b/crates/lib/src/lsm.rs index 2c059406d..f9b85f308 100644 --- a/crates/lib/src/lsm.rs +++ b/crates/lib/src/lsm.rs @@ -391,13 +391,13 @@ pub(crate) fn relabel_recurse( /// Uses the `walk` API with `noxdev` and `skip_mountpoints` to avoid crossing /// mount point boundaries /// (e.g. into sysfs, procfs, etc.). -/// The provided `skip` parameter is a device/inode pair that we will ignore -/// (and not traverse into). +/// The provided `skip` parameter is a list of device/inode pairs to ignore +/// (and not traverse into). Pass an empty slice to skip nothing. pub(crate) fn ensure_dir_labeled_recurse( root: &Dir, path: &mut Utf8PathBuf, policy: &ostree::SePolicy, - skip: Option<(libc::dev_t, libc::ino64_t)>, + skip: &[(libc::dev_t, libc::ino64_t)], ) -> Result<()> { use cap_std_ext::dirext::WalkConfiguration; use std::ops::ControlFlow; @@ -432,18 +432,20 @@ pub(crate) fn ensure_dir_labeled_recurse( let metadata = component.entry.metadata()?; // Check if this entry should be skipped - if let Some((skip_dev, skip_ino)) = skip { - if (metadata.dev(), metadata.ino()) == (skip_dev, skip_ino) { - tracing::debug!("Skipping dev={skip_dev} inode={skip_ino}"); - // For directories, Break skips traversal into the directory - // but continues with the next sibling. For non-directories, - // Break would skip all remaining siblings, so use Continue - // to skip only this entry. - if component.file_type.is_dir() { - return Ok(ControlFlow::Break(())); - } else { - return Ok(ControlFlow::Continue(())); - } + let devino = ( + metadata.dev() as libc::dev_t, + metadata.ino() as libc::ino64_t, + ); + if skip.contains(&devino) { + tracing::debug!("Skipping dev={} inode={}", devino.0, devino.1); + // For directories, Break skips traversal into the directory + // but continues with the next sibling. For non-directories, + // Break would skip all remaining siblings, so use Continue + // to skip only this entry. + if component.file_type.is_dir() { + return Ok(ControlFlow::Break(())); + } else { + return Ok(ControlFlow::Continue(())); } } diff --git a/crates/lib/src/podman.rs b/crates/lib/src/podman.rs index 9b287fb40..71d707f3c 100644 --- a/crates/lib/src/podman.rs +++ b/crates/lib/src/podman.rs @@ -19,6 +19,11 @@ pub(crate) struct Inspect { pub(crate) struct ImageListEntry { pub(crate) id: String, pub(crate) names: Option>, + /// Manifest digest as reported by podman (may differ from the composefs + /// OCI manifest digest when layers are recompressed during copy). + /// Kept for diagnostic purposes; use `id` (config sha256) for matching. + #[allow(dead_code)] + pub(crate) digest: Option, } /// Given an image ID, return its manifest digest diff --git a/crates/lib/src/podstorage.rs b/crates/lib/src/podstorage.rs index de6c418ed..2d966c622 100644 --- a/crates/lib/src/podstorage.rs +++ b/crates/lib/src/podstorage.rs @@ -443,6 +443,40 @@ impl CStorage { Ok(true) } + /// Copy a `containers-storage:` image into this bootc-owned storage. + /// + /// If `STORAGE_OPTS=additionalimagestore=` is set in the environment + /// (as injected by bcvk via systemd credentials), uses that path as the + /// explicit `--root` for podman — because podman does not search additional + /// image stores when resolving a push source. Otherwise falls back to the + /// default `/var/lib/containers/storage`. + pub(crate) async fn pull_from_containers_storage(&self, image: &str) -> Result<()> { + // If the image is already in bootc's own containers-storage (e.g. built + // via `bootc image cmd build`), no copy is needed. + if self.exists(image).await.unwrap_or(false) { + tracing::debug!("Image {image} already in bootc storage; skipping copy"); + return Ok(()); + } + + let ais = std::env::var("STORAGE_OPTS") + .ok() + .and_then(|v| { + v.split(',') + .find(|s| s.starts_with("additionalimagestore=")) + .and_then(|s| s.strip_prefix("additionalimagestore=")) + .map(str::to_owned) + }); + + if let Some(src_root) = ais { + tracing::debug!( + "STORAGE_OPTS has additionalimagestore={src_root}; using as source storage root" + ); + self.pull_from_storage_root(&src_root, image).await + } else { + self.pull_from_host_storage(image).await + } + } + /// Copy an image from the default container storage (/var/lib/containers/) /// to this storage. #[context("Pulling from host storage: {image}")] @@ -468,6 +502,57 @@ impl CStorage { Ok(()) } + /// Copy an image from an additional image store into this bootc-owned storage. + /// + /// Used during install when the source image lives in a storage instance + /// advertised via `STORAGE_OPTS=additionalimagestore=` (e.g. the bcvk + /// virtiofs mount) rather than in the default `/var/lib/containers/storage`. + /// + /// Uses `skopeo copy` with `STORAGE_OPTS=additionalimagestore=` on + /// the subprocess. Skopeo respects `STORAGE_OPTS` for source image lookup and + /// does not touch libpod state, which avoids two problems: + /// + /// - `podman --root ` triggers a libpod database mismatch when the + /// foreign store was created by a different podman instance. + /// - `podman pull containers-storage:` with an AIS makes the image *visible* but + /// does not materialize it into the destination root storage. + /// + /// The netavark/skopeo-first issue (containers/skopeo#658) does not apply here + /// because `CStorage::create` always runs `podman images` to initialize the + /// bootc-owned storage before this function is ever called. + #[context("Copying from additional image store at {src_root}: {image}")] + pub(crate) async fn pull_from_storage_root( + &self, + src_root: &str, + image: &str, + ) -> Result<()> { + let mut cmd = Command::new(bootc_utils::skopeo_bin()); + cmd.stdin(Stdio::null()); + cmd.stdout(Stdio::null()); + + let temp_runroot = TempDir::new(cap_std::ambient_authority())?; + let mut fds = CmdFds::new(); + bind_storage_roots(&mut cmd, &mut fds, &self.storage_root, &temp_runroot)?; + cmd.take_fds(fds); + + // Set the AIS on the skopeo subprocess so it can find the source image. + let storage_opt = format!("additionalimagestore={src_root}"); + cmd.env("STORAGE_OPTS", &storage_opt); + + let src = format!("containers-storage:{image}"); + let dest = format!( + "containers-storage:[overlay@{STORAGE_ALIAS_DIR}+/proc/self/fd/{STORAGE_RUN_FD}]{image}" + ); + // TODO: wire up structured progress reporting for this copy instead of + // suppressing output; for now --quiet avoids unstructured blob/layer spam. + cmd.args(["copy", "--quiet", "--remove-signatures", &src, &dest]); + + let mut cmd = AsyncCommand::from(cmd); + cmd.run().await?; + temp_runroot.close()?; + Ok(()) + } + /// Pull an image with streaming progress display. /// /// Uses the podman native libpod HTTP API instead of shelling out, diff --git a/crates/lib/src/store/mod.rs b/crates/lib/src/store/mod.rs index 940d59100..5e4031e7c 100644 --- a/crates/lib/src/store/mod.rs +++ b/crates/lib/src/store/mod.rs @@ -62,6 +62,63 @@ pub const COMPOSEFS: &str = "composefs"; /// to avoid leaking information. pub(crate) const COMPOSEFS_MODE: Mode = Mode::from_raw_mode(0o700); +/// Path to bootc-specific metadata stored alongside the composefs `meta.json`. +/// +/// This file records bootc-level configuration for the composefs repository, +/// such as whether unified base-image storage is enabled. Its absence means +/// the system is not in unified-storage mode. The path is relative to the +/// physical system root. +pub(crate) const BOOTC_REPO_META: &str = "composefs/bootc.json"; + +/// bootc-specific metadata stored in `composefs/bootc.json`. +#[derive(Debug, Default, serde::Serialize, serde::Deserialize)] +#[serde(rename_all = "kebab-case")] +pub(crate) struct BootcRepoMeta { + /// Schema version; currently must be 1. + #[serde(default)] + pub version: u32, + + /// When true, `pull_via_composefs` is used for the base-image pull, + /// routing through the three-store unified pipeline + /// (containers-storage → composefs → ostree). + /// Set by `bootc image set-unified` and by install with + /// `--experimental-unified-storage`. + #[serde(default)] + pub unified_storage: bool, +} + +impl BootcRepoMeta { + /// Read `composefs/bootc.json` from the physical root, if it exists. + /// Returns `Ok(None)` when the file is absent (i.e. unified storage + /// has never been enabled on this system). + pub(crate) fn read(physical_root: &Dir) -> Result> { + match physical_root.open(BOOTC_REPO_META) { + Ok(f) => { + let meta: Self = serde_json::from_reader(std::io::BufReader::new(f)) + .with_context(|| format!("Parsing {BOOTC_REPO_META}"))?; + Ok(Some(meta)) + } + Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(None), + Err(e) => Err(e).with_context(|| format!("Opening {BOOTC_REPO_META}")), + } + } + + /// Write `composefs/bootc.json` to the physical root, atomically. + /// The composefs directory must already exist. + pub(crate) fn write(&self, physical_root: &Dir) -> Result<()> { + let json = serde_json::to_string_pretty(self).context("Serializing BootcRepoMeta")?; + // Write atomically via a temp file. + let tmp = format!("{BOOTC_REPO_META}.tmp"); + physical_root + .write(&tmp, json.as_bytes()) + .with_context(|| format!("Writing {tmp}"))?; + physical_root + .rename(&tmp, physical_root, BOOTC_REPO_META) + .with_context(|| format!("Renaming {tmp} -> {BOOTC_REPO_META}"))?; + Ok(()) + } +} + /// Ensure the composefs directory exists in the given physical root /// with the correct permissions (mode 0700). pub(crate) fn ensure_composefs_dir(physical_root: &Dir) -> Result<()> { diff --git a/crates/ostree-ext/src/container/composefs_import.rs b/crates/ostree-ext/src/container/composefs_import.rs new file mode 100644 index 000000000..70ac58350 --- /dev/null +++ b/crates/ostree-ext/src/container/composefs_import.rs @@ -0,0 +1,970 @@ +//! Synthesize an ostree commit directly from a composefs OCI repository. +//! +//! This is the inverse of the existing ostree→composefs flow in libostree. +//! Rather than importing OCI layers through the tar pipeline, we walk the +//! composefs `FileSystem` tree and write each entry as an ostree object. +//! +//! For external regular files we bypass the ostree C library's `write_content` +//! API and instead write content objects directly to disk. This lets us +//! attempt `FICLONE` (reflink) from the composefs object fd, falling back to +//! `copy_file_range` / read-write. The ostree SHA256 content checksum is +//! still computed via the C library's `checksum_file_from_input` (a pure, +//! repo-independent function) so the serialisation format is never +//! reimplemented. +//! +//! Metadata objects (dirmeta, dirtree, commit) and small inline files continue +//! to use the ostree C API — they are tiny and benefit from the existing +//! validation. +//! +//! # Why reflink, not hardlink +//! +//! A composefs object is content-addressed by its SHA-512 fsverity digest. +//! Multiple filesystem paths with identical content share a single composefs +//! object (inode). In ostree bare mode, however, every content object carries +//! its own per-inode metadata: uid, gid, mode, and *all* xattrs including +//! `security.selinux`. The same composefs object can be referenced by two +//! ostree objects that need different SELinux labels — for example +//! `/etc/sudo.conf` (label `etc_t`) and +//! `/usr/share/doc/sudo/examples/sudo.conf` (label `usr_t`). Because a +//! single inode can hold only one `security.selinux` value, hardlinking both +//! ostree objects to the shared composefs inode is impossible: the second +//! `setxattr` would overwrite the first, causing `ostree fsck` to report a +//! checksum mismatch. +//! +//! Reflinks (FICLONE) solve this: each ostree object gets its own inode and +//! therefore its own metadata, while the underlying disk extents are shared +//! with the composefs object — zero copy, same space on the wire. +//! +//! # Filesystem requirements +//! +//! FICLONE requires a reflink-capable filesystem such as XFS or btrfs. On +//! these filesystems the ostree and composefs repositories share disk blocks +//! so installing the OS adds no extra space for the ostree content objects. +//! On ext4 (and other filesystems without reflink support) FICLONE fails with +//! `EOPNOTSUPP` and the code falls back to a byte copy; installation succeeds +//! but no block sharing occurs between the two repositories. +//! +//! # SELinux labels and the NUL-terminator fix +//! +//! `selabel()` from composefs-rs applies SELinux labels in-memory to the +//! filesystem tree before ostree synthesis begins. On SELinux-disabled hosts +//! (such as a container used for `bootc install`) the `fsetxattr` call for +//! `security.selinux` is a no-op; labels are applied by the kernel at first +//! boot during auto-relabeling. The ostree checksums are computed with the +//! correct label values (because `checksum_file_from_input` sees them from the +//! in-memory tree), so `ostree fsck` passes after boot. +//! +//! One subtlety: composefs-rs `selabel()` stores SELinux values *without* a +//! trailing NUL, but the kernel stores them *with* one. `xattrs_to_variant` +//! appends the NUL when computing the ostree checksum so that it matches what +//! `ostree fsck` reads back from the live filesystem. + +use std::collections::BTreeMap; +use std::ffi::OsStr; +use std::os::fd::{AsFd as _, AsRawFd as _, BorrowedFd}; +use std::os::unix::ffi::OsStrExt as _; + +use anyhow::{Context as _, Result}; + +use composefs_ctl::composefs::fsverity::Sha512HashValue; +use composefs_ctl::composefs::generic_tree::{Inode, Stat}; +use composefs_ctl::composefs::repository::Repository; +use composefs_ctl::composefs::tree::{Directory, Leaf, LeafContent, RegularFile}; +use composefs_ctl::composefs_boot::selabel::selabel; +use composefs_ctl::composefs_oci; + +use crate::container::store::{ + META_COMPOSEFS_SYNTHESIZED, META_CONFIG, META_MANIFEST, META_MANIFEST_DIGEST, +}; +use crate::prelude::{Cast as _, ToVariant as _}; +use crate::{gio, glib}; + +type ComposefsRepo = Repository; + +/// Tracks per-import statistics and FICLONE capability for the composefs→ostree +/// object copy loop. +/// +/// # Why reflink, not hardlink +/// +/// A composefs object is a content-addressed file identified by its SHA-512 +/// fsverity digest. Multiple filesystem paths with identical content share the +/// same composefs object (inode). In ostree's bare repo mode, however, each +/// content object carries its own metadata — uid, gid, mode, and **all xattrs +/// including `security.selinux`** — directly on the inode. +/// +/// The same composefs object can be referenced by ostree objects that need +/// different metadata. For example `/etc/sudo.conf` and +/// `/usr/share/doc/sudo/examples/sudo.conf` have identical bytes but different +/// SELinux labels (`etc_t` vs `usr_t`), producing different ostree content +/// checksums. A single inode can carry only one `security.selinux` value, so +/// hardlinking both ostree objects to the shared composefs inode is impossible: +/// whichever metadata is written second silently overwrites the first, causing +/// `ostree fsck` to report a checksum mismatch. +/// +/// Reflinks (FICLONE) solve this cleanly: each ostree object gets its own +/// inode (its own metadata) while the underlying disk extents are shared with +/// the composefs object — zero copy, same space efficiency. If the filesystem +/// does not support reflinks (e.g. ext4) we fall back to a byte copy; unified +/// storage is not possible in that configuration. +struct DirectImportContext { + /// `true` after FICLONE has failed on this device pair. + ficlone_failed: bool, + /// Count of files reflinked (FICLONE succeeded — blocks shared with composefs object). + reflinked: u64, + /// Count of files written via byte copy fallback (filesystem lacks reflink support). + copied: u64, + /// Count of objects that already existed in the repo (skipped). + existing: u64, + /// When `true`, any FICLONE failure is returned as a hard error instead of + /// silently falling back to byte copy. Set by the caller when it has already + /// confirmed that reflinks are supported on the storage (e.g. via a probe). + reflinks_required: bool, +} + +/// Convert composefs xattrs (`BTreeMap`) to the ostree GVariant format (`a(ayay)`). +/// +/// The ostree on-disk format stores xattr names as NUL-terminated C strings inside +/// `ay` (byte array) GVariant values — matching the C library's use of +/// `g_variant_new_bytestring(name)`. The validation function in libostree reads +/// names back with the `"(^&ay@ay)"` format specifier, which returns a raw pointer +/// into the GVariant buffer, and then calls `strcmp` on it — so the trailing NUL +/// *must* be present. Without it the first byte of the *value* would be treated as +/// part of the name, and if that byte happens to be `0x00` the validator throws +/// "Invalid xattr name (empty or missing NUL)". +fn xattrs_to_variant(xattrs: &BTreeMap, Box<[u8]>>) -> Option { + if xattrs.is_empty() { + return None; + } + let children = xattrs.iter().map(|(k, v)| { + // Append NUL so that ostree's C validator can read the name as a C string. + let mut name_bytes = k.as_bytes().to_vec(); + name_bytes.push(b'\0'); + // SELinux stores label values as NUL-terminated strings on the filesystem. + // The composefs selabel() function strips the NUL, but ostree fsck reads + // xattrs back from disk (where the kernel always stores them with NUL) when + // verifying checksums. Ensure our checksum matches by adding the NUL here + // for security.selinux values that are missing it. + let value: &[u8] = v.as_ref(); + let value_owned; + let value = if k.as_bytes() == b"security.selinux" + && !value.last().copied().map_or(true, |b| b == 0) + { + value_owned = [value, &[0u8]].concat(); + value_owned.as_slice() + } else { + value + }; + glib::Variant::tuple_from_iter([name_bytes.as_slice().to_variant(), value.to_variant()]) + }); + Some(glib::Variant::array_from_iter::<(&[u8], &[u8])>(children)) +} + +/// Build an ostree `DirMeta` GVariant from a composefs `Stat`. +fn create_dirmeta(stat: &Stat) -> glib::Variant { + let finfo = gio::FileInfo::new(); + finfo.set_attribute_uint32("unix::uid", stat.st_uid); + finfo.set_attribute_uint32("unix::gid", stat.st_gid); + // ostree's create_directory_metadata requires the full mode word including + // the S_IFDIR type bits. The composefs Stat only stores the permission + // bits, so we must OR in S_IFDIR ourselves. + let mode = libc::S_IFDIR | stat.st_mode; + finfo.set_attribute_uint32("unix::mode", mode); + let xattrs = xattrs_to_variant(&stat.xattrs); + crate::ostree::create_directory_metadata(&finfo, xattrs.as_ref()) +} + +/// Top-level paths from the OCI image that are API/virtual filesystems at runtime. +/// +/// These paths (`/proc`, `/sys`, etc.) must NOT have their children written into +/// the ostree commit — they are mounted as kernel-provided virtual filesystems at +/// boot time and any content baked into the image is irrelevant (and in the case +/// of device nodes, actively harmful). The directory entries themselves are +/// written as empty directories so that the mount points exist. +/// +/// This mirrors the `EXCLUDED_TOPLEVEL_PATHS` list and the filtering logic in +/// `crate::tar::write::normalize_validate_path()`. +const EXCLUDED_TOPLEVEL_PATHS: &[&str] = &["run", "tmp", "proc", "sys", "dev"]; + +/// Walk the composefs root directory, applying ostree path normalization: +/// +/// - `/etc` → `usr/etc` (ostree's deployment mechanism expects config here) +/// - Children of API/virtual-filesystem mounts (`/proc`, `/sys`, `/dev`, `/run`, +/// `/tmp`) are **omitted** — the directory entries are written as empty mount +/// points but their content is never part of the commit. +/// +/// This matches the filtering that the tar import pipeline applies via +/// `normalize_validate_path()` in `tar/write.rs`. +fn write_dir_to_mtree_remap_etc( + orepo: &crate::ostree::Repo, + crepo: &ComposefsRepo, + dir: &Directory, + leaves: &[Leaf], + mtree: &crate::ostree::MutableTree, + ctx: &mut DirectImportContext, +) -> Result<()> { + // Write the root dirmeta first + let dirmeta = create_dirmeta(&dir.stat); + let meta_csum = orepo + .write_metadata( + crate::ostree::ObjectType::DirMeta, + None, + &dirmeta, + gio::Cancellable::NONE, + ) + .context("Writing root dirmeta")?; + mtree.set_metadata_checksum(&meta_csum.to_hex()); + + // Collect the `etc` directory entry (if any) for deferred processing + let mut etc_dir: Option<&Directory> = None; + + for (name, inode) in dir.sorted_entries() { + let name = name + .to_str() + .ok_or_else(|| anyhow::anyhow!("Non-UTF8 entry name: {:?}", name))?; + + // Intercept `etc` at the root and defer it — we'll write it + // as `usr/etc` after processing everything else. + if name == "etc" { + match inode { + Inode::Directory(child) => { + etc_dir = Some(child); + continue; + } + _ => { + // Unusual: /etc is not a directory (e.g. symlink). + // Write it as-is — the deployment code will handle it + // or fail later with a more specific error. + } + } + } + + // API/virtual-filesystem mount points: include the empty directory itself + // (so the mount point exists at runtime) but skip all children. This + // mirrors the tar importer's EXCLUDED_TOPLEVEL_PATHS handling. + if EXCLUDED_TOPLEVEL_PATHS.contains(&name) { + if let Inode::Directory(child) = inode { + let child_mtree = mtree.ensure_dir(name)?; + // Write only the dirmeta (permissions/xattrs) — no children. + let child_dirmeta = create_dirmeta(&child.stat); + let child_meta_csum = orepo + .write_metadata( + crate::ostree::ObjectType::DirMeta, + None, + &child_dirmeta, + gio::Cancellable::NONE, + ) + .with_context(|| format!("Writing dirmeta for excluded path {name}"))?; + child_mtree.set_metadata_checksum(&child_meta_csum.to_hex()); + tracing::debug!("Composefs import: skipping children of excluded path /{name}"); + continue; + } + // Non-directory at an excluded path (e.g. a symlink for /tmp)? + // Skip it entirely. + tracing::debug!("Composefs import: skipping non-directory excluded path /{name}"); + continue; + } + + match inode { + Inode::Directory(child) => { + let child_mtree = mtree.ensure_dir(name)?; + write_dir_to_mtree(orepo, crepo, child, leaves, &child_mtree, ctx)?; + } + Inode::Leaf(leaf_id, _) => { + let leaf = &leaves[leaf_id.0]; + let csum = write_leaf(orepo, crepo, &leaf.stat, &leaf.content, ctx)?; + mtree + .replace_file(name, &csum) + .with_context(|| format!("Inserting {name} into mtree"))?; + } + } + } + + // Now write the deferred `/etc` directory as `usr/etc`. + if let Some(etc) = etc_dir { + let usr_mtree = mtree.ensure_dir("usr")?; + let usr_etc_mtree = usr_mtree.ensure_dir("etc")?; + write_dir_to_mtree(orepo, crepo, etc, leaves, &usr_etc_mtree, ctx) + .context("Writing /etc as usr/etc")?; + } + + Ok(()) +} + +/// Recursively walk a composefs directory, writing each entry into `mtree`. +fn write_dir_to_mtree( + orepo: &crate::ostree::Repo, + crepo: &ComposefsRepo, + dir: &Directory, + leaves: &[Leaf], + mtree: &crate::ostree::MutableTree, + ctx: &mut DirectImportContext, +) -> Result<()> { + let dirmeta = create_dirmeta(&dir.stat); + let meta_csum = orepo + .write_metadata( + crate::ostree::ObjectType::DirMeta, + None, + &dirmeta, + gio::Cancellable::NONE, + ) + .context("Writing dirmeta")?; + mtree.set_metadata_checksum(&meta_csum.to_hex()); + + for (name, inode) in dir.sorted_entries() { + let name = name + .to_str() + .ok_or_else(|| anyhow::anyhow!("Non-UTF8 entry name: {:?}", name))?; + match inode { + Inode::Directory(child) => { + let child_mtree = mtree.ensure_dir(name)?; + write_dir_to_mtree(orepo, crepo, child, leaves, &child_mtree, ctx)?; + } + Inode::Leaf(leaf_id, _) => { + let leaf = &leaves[leaf_id.0]; + let csum = write_leaf(orepo, crepo, &leaf.stat, &leaf.content, ctx)?; + mtree + .replace_file(name, &csum) + .with_context(|| format!("Inserting {name} into mtree"))?; + } + } + } + Ok(()) +} + +/// Write a single composefs leaf (regular file or symlink) to the ostree repo. +/// Returns the hex content checksum. +fn write_leaf( + orepo: &crate::ostree::Repo, + crepo: &ComposefsRepo, + stat: &Stat, + content: &LeafContent, + ctx: &mut DirectImportContext, +) -> Result { + let xattrs = xattrs_to_variant(&stat.xattrs); + match content { + LeafContent::Symlink(target) => { + let target = target + .to_str() + .ok_or_else(|| anyhow::anyhow!("Non-UTF8 symlink target"))?; + let csum = orepo + .write_symlink( + None, + stat.st_uid, + stat.st_gid, + xattrs.as_ref(), + target, + gio::Cancellable::NONE, + ) + .context("Writing symlink")?; + Ok(csum.to_string()) + } + LeafContent::Regular(file) => { + write_regular_file(orepo, crepo, stat, file, xattrs.as_ref(), ctx) + } + other => anyhow::bail!( + "Unsupported composefs leaf (device/fifo/socket): {:?}", + std::mem::discriminant(other) + ), + } +} + +/// Write a regular file into the ostree repo. +/// +/// Inline files use the ostree C API directly. External files bypass the C +/// `write_content` path and instead write content objects directly to disk, +/// attempting FICLONE (reflink) first and falling back to a byte copy. +/// +/// The ostree content checksum is computed via the C library's +/// `checksum_file_from_input` to guarantee format compatibility. +fn write_regular_file( + orepo: &crate::ostree::Repo, + crepo: &ComposefsRepo, + stat: &Stat, + file: &RegularFile, + xattrs: Option<&glib::Variant>, + ctx: &mut DirectImportContext, +) -> Result { + // ostree requires the full mode word including S_IFREG + let mode = libc::S_IFREG | stat.st_mode; + + match file { + RegularFile::Inline(data) => { + let csum = orepo + .write_regfile_inline( + None, + stat.st_uid, + stat.st_gid, + mode, + xattrs, + data, + gio::Cancellable::NONE, + ) + .context("Writing inline file")?; + Ok(csum.to_string()) + } + RegularFile::External(object_id, size) => { + write_external_file_direct(orepo, crepo, stat, object_id, *size, xattrs, mode, ctx) + } + } +} + +/// Compute the ostree content checksum for a regular file using the C API. +/// +/// This calls `ostree_checksum_file_from_input()` which is a pure, +/// repo-independent function. It constructs the standard file header +/// GVariant (uid/gid/mode/xattrs) and hashes `header || content`. +fn compute_content_checksum( + stat: &Stat, + xattrs: Option<&glib::Variant>, + fd: BorrowedFd<'_>, + size: u64, + mode: u32, +) -> Result { + let finfo = gio::FileInfo::new(); + finfo.set_attribute_uint32("unix::uid", stat.st_uid); + finfo.set_attribute_uint32("unix::gid", stat.st_gid); + finfo.set_attribute_uint32("unix::mode", mode); + finfo.set_size(size as i64); + finfo.set_file_type(gio::FileType::Regular); + + // Dup the fd so we can hand an owned File to ReadInputStream without + // touching the caller's fd or using unsafe. + let file = std::fs::File::from(fd.try_clone_to_owned().context("Dup fd for checksum")?); + let istream = gio::ReadInputStream::new(file); + let csum = crate::ostree::checksum_file_from_input( + &finfo, + xattrs, + Some(&istream), + crate::ostree::ObjectType::File, + gio::Cancellable::NONE, + ) + .map_err(|e| anyhow::anyhow!("Computing content checksum: {e}"))?; + + Ok(csum.to_string()) +} + +/// Build the `user.ostreemeta` GVariant `(uuu@a(ayay))` for bare-user mode. +/// +/// This matches the C function `create_file_metadata()` in ostree-repo-commit.c. +fn build_bareuser_metadata( + uid: u32, + gid: u32, + mode: u32, + xattrs: Option<&glib::Variant>, +) -> glib::Variant { + let empty_xattrs; + let xattrs = match xattrs { + Some(v) => v.clone(), + None => { + empty_xattrs = glib::Variant::array_from_iter::<(&[u8], &[u8])>(std::iter::empty()); + empty_xattrs + } + }; + // ostree stores these as big-endian u32 values + glib::Variant::tuple_from_iter([ + uid.to_be().to_variant(), + gid.to_be().to_variant(), + mode.to_be().to_variant(), + xattrs, + ]) +} + +/// Apply file metadata to an fd according to the ostree repo mode. +/// +/// For `Bare`: sets real uid/gid, mode, and xattrs via syscalls. +/// For `BareUser`: stores metadata in a `user.ostreemeta` xattr and +/// applies a masked mode suitable for non-root operation. +fn apply_file_metadata( + fd: BorrowedFd<'_>, + stat: &Stat, + mode: u32, + xattrs: Option<&glib::Variant>, + repo_mode: crate::ostree::RepoMode, +) -> Result<()> { + use rustix::fs::{Mode, Timestamps, XattrFlags, fchmod, fchown, fsetxattr, futimens}; + + match repo_mode { + crate::ostree::RepoMode::Bare => { + fchown( + fd, + Some(rustix::process::Uid::from_raw(stat.st_uid)), + Some(rustix::process::Gid::from_raw(stat.st_gid)), + ) + .context("fchown")?; + fchmod(fd, Mode::from_raw_mode(mode)).context("fchmod")?; + // Set real xattrs on the file. For security.selinux the composefs + // selabel() function stores values without the NUL terminator that + // the kernel expects; add it here so the on-disk value matches what + // xattrs_to_variant() (and thus the ostree checksum) sees. + for (name, value) in &stat.xattrs { + let value: &[u8] = value.as_ref(); + let value_with_nul; + let value = if name.as_bytes() == b"security.selinux" + && !value.last().copied().map_or(true, |b| b == 0) + { + value_with_nul = [value, &[0u8]].concat(); + value_with_nul.as_slice() + } else { + value + }; + fsetxattr(fd, name.as_bytes(), value, XattrFlags::empty()) + .with_context(|| format!("fsetxattr({:?})", name))?; + } + } + crate::ostree::RepoMode::BareUser => { + // Write the user.ostreemeta xattr + let meta = build_bareuser_metadata(stat.st_uid, stat.st_gid, mode, xattrs); + let meta_bytes = meta.data_as_bytes(); + fsetxattr( + fd, + c"user.ostreemeta", + meta_bytes.as_ref(), + XattrFlags::empty(), + ) + .context("fsetxattr(user.ostreemeta)")?; + // Mask mode for non-root safety (matches libostree) + let content_mode = (mode & (libc::S_IFREG | 0o775)) | libc::S_IRUSR; + fchmod(fd, Mode::from_raw_mode(content_mode)).context("fchmod (bare-user)")?; + } + other => anyhow::bail!("Unsupported ostree repo mode for direct write: {other:?}"), + } + + // Set mtime to OSTREE_TIMESTAMP (0), matching libostree behavior. + // UTIME_OMIT for atime means "leave atime unchanged". + futimens( + fd, + &Timestamps { + last_access: rustix::fs::Timespec { + tv_sec: 0, + tv_nsec: rustix::fs::UTIME_OMIT, + }, + last_modification: rustix::fs::Timespec { + tv_sec: 0, + tv_nsec: 0, + }, + }, + ) + .context("futimens")?; + + Ok(()) +} + +/// Ensure the `objects/XX/` directory exists under the repo. +fn ensure_object_dir(repo_dfd: BorrowedFd<'_>, prefix: &str) -> Result<()> { + use rustix::fs::{Mode, mkdirat}; + + let objects_dir = format!("objects/{prefix}"); + match mkdirat(repo_dfd, objects_dir.as_str(), Mode::from_raw_mode(0o777)) { + Ok(()) => Ok(()), + Err(rustix::io::Errno::EXIST) => Ok(()), + Err(e) => Err(e).context(format!("Creating {objects_dir}"))?, + } +} + +/// Write a composefs external regular file directly into the ostree repo. +/// +/// # Sharing strategy: FICLONE (reflink) → byte copy +/// +/// Each ostree content object is written as a fresh inode (via `O_TMPFILE`) +/// and populated from the composefs object using `FICLONE` (reflink) when the +/// filesystem supports it, falling back to `copy_file_range` / `std::io::copy`. +/// +/// **Why not hardlink?** A composefs object is identified by its SHA-512 +/// fsverity digest of raw content. The same object can be referenced by +/// multiple ostree content objects that carry different metadata — in +/// particular different `security.selinux` xattrs assigned by the SELinux +/// policy relabeling step. Ostree bare mode stores metadata (uid/gid/mode/ +/// xattrs) on the inode, so a single shared inode cannot satisfy two different +/// metadata requirements. FICLONE shares the underlying disk extents while +/// keeping inodes independent, giving each ostree object its own metadata. +/// +/// On filesystems without reflink support (ext4, etc.) FICLONE fails with +/// `EOPNOTSUPP` and we fall back to a full byte copy. Unified storage +/// (space-sharing between the composefs and ostree repos) requires a +/// reflink-capable filesystem (XFS, btrfs, …). +/// +/// Returns the hex SHA256 content checksum. +#[allow(clippy::too_many_arguments)] +fn write_external_file_direct( + orepo: &crate::ostree::Repo, + crepo: &ComposefsRepo, + stat: &Stat, + object_id: &Sha512HashValue, + size: u64, + xattrs: Option<&glib::Variant>, + mode: u32, + ctx: &mut DirectImportContext, +) -> Result { + use rustix::fs::{AtFlags, Mode, OFlags, linkat, openat}; + use std::io::{Seek, SeekFrom}; + + let cfs_fd = crepo + .open_object(object_id) + .context("Opening composefs object")?; + + // Compute the ostree content checksum using the C API. + let checksum = compute_content_checksum(stat, xattrs, cfs_fd.as_fd(), size, mode) + .context("Computing content checksum")?; + + // Check if the object already exists — skip if so. + if orepo + .has_object( + crate::ostree::ObjectType::File, + &checksum, + gio::Cancellable::NONE, + ) + .context("Checking for existing object")? + { + ctx.existing += 1; + return Ok(checksum); + } + + let repo_dfd = orepo.dfd_borrow(); + let repo_mode = orepo.mode(); + + // Ensure objects/XX/ directory exists. + let (prefix, _rest) = checksum.split_at(2); + ensure_object_dir(repo_dfd, prefix)?; + + let obj_path = format!("objects/{prefix}/{}.file", &checksum[2..]); + + // Create an O_TMPFILE in the repo's tmp/ directory. This gives us an + // anonymous inode we can populate before atomically linking it into place, + // avoiding partial writes visible to concurrent readers. + let tmp_fd = openat( + repo_dfd, + c"tmp", + OFlags::WRONLY | OFlags::TMPFILE | OFlags::CLOEXEC, + Mode::from_raw_mode(0o644), + ) + .context("Creating tmpfile in ostree repo/tmp")?; + + // Seek the source fd back to the start (compute_content_checksum consumed it). + let mut src_file = std::fs::File::from(cfs_fd); + src_file + .seek(SeekFrom::Start(0)) + .context("Seeking composefs fd to start")?; + + // --- Primary path: FICLONE (reflink) --- + // + // Shares the underlying disk extents with the composefs object while giving + // the ostree object its own inode (and thus its own metadata). On XFS this + // is a near-zero-cost operation. After the first failure we stop trying. + let mut reflinked = false; + if !ctx.ficlone_failed { + match rustix::fs::ioctl_ficlone(&tmp_fd, &src_file) { + Ok(()) => { + reflinked = true; + } + Err(e @ (rustix::io::Errno::OPNOTSUPP | rustix::io::Errno::XDEV)) => { + if ctx.reflinks_required { + return Err(anyhow::anyhow!( + "FICLONE failed ({e}) but reflinks were confirmed supported \ + by the storage probe — cannot safely continue import" + )); + } + tracing::debug!( + "FICLONE not supported on this filesystem; \ + falling back to copy for all remaining objects" + ); + ctx.ficlone_failed = true; + } + Err(e) => { + if ctx.reflinks_required { + return Err(anyhow::anyhow!( + "FICLONE failed unexpectedly ({e}); reflinks are required" + )); + } + tracing::debug!("FICLONE failed unexpectedly ({e}); falling back to copy"); + ctx.ficlone_failed = true; + } + } + } + + if reflinked { + ctx.reflinked += 1; + } else { + // --- Fallback: byte copy via copy_file_range --- + let mut dst_file = std::fs::File::from(tmp_fd.try_clone().context("Cloning tmpfile fd")?); + let n = std::io::copy(&mut src_file, &mut dst_file).context("Copying file content")?; + anyhow::ensure!( + n == size, + "Size mismatch: expected {size} bytes but copied {n}" + ); + ctx.copied += 1; + } + + // Apply metadata (uid/gid/mode/xattrs/mtime) to the new inode. + apply_file_metadata(tmp_fd.as_fd(), stat, mode, xattrs, repo_mode)?; + + // Atomically link the tmpfile into place at objects/XX/rest.file. + let proc_path = format!("/proc/self/fd/{}", tmp_fd.as_raw_fd()); + match linkat( + rustix::fs::CWD, + proc_path.as_str(), + repo_dfd, + obj_path.as_str(), + AtFlags::SYMLINK_FOLLOW, + ) { + Ok(()) => {} + Err(rustix::io::Errno::EXIST) => { + // A concurrent writer (or an earlier call) already wrote this object. + } + Err(e) => { + return Err(e).with_context(|| format!("Linking tmpfile to {obj_path}")); + } + } + + Ok(checksum) +} + +/// Synthesize an ostree commit from a composefs repository that has already +/// pulled an OCI image. +/// +/// This is the inverse of the ostree→composefs path in libostree: instead of +/// reading an ostree commit to build an EROFS image, we walk the composefs +/// `FileSystem` tree and write each entry as an ostree object. +/// +/// The resulting commit carries the same OCI metadata as a commit produced by +/// the standard `ostree-container` import pipeline: +/// - `ostree.container.image-config` +/// - `ostree.manifest` +/// - `ostree.manifest-digest` +/// +/// External regular files are written directly to disk with FICLONE (reflink) +/// attempted first, falling back to a byte copy. On the same filesystem this +/// avoids copying data entirely. +pub fn import_from_composefs_repo( + ostree_repo: &crate::ostree::Repo, + composefs_repo: &ComposefsRepo, + config_digest: &composefs_oci::OciDigest, + manifest_digest: &str, + image_manifest: &crate::oci_spec::image::ImageManifest, + image_config: &crate::oci_spec::image::ImageConfiguration, + reflinks_required: bool, +) -> Result { + let cancellable = gio::Cancellable::NONE; + + // Reconstruct the merged filesystem tree from the composefs store + let mut fs = composefs_oci::image::create_filesystem(composefs_repo, config_digest, None) + .context("Reconstructing composefs filesystem tree")?; + + // Apply SELinux labels in-memory before writing any objects. + // selabel walks the FileSystem tree and reads policy files directly from + // the composefs object store — no file copies are made. + let labeled = selabel(&mut fs, composefs_repo) + .context("Applying SELinux labels from composefs image policy")?; + tracing::debug!( + labeled, + total_leaves = fs.leaves.len(), + "SELinux labeling complete" + ); + + let txn = ostree_repo + .auto_transaction(cancellable) + .context("Beginning ostree transaction")?; + + let mut ctx = DirectImportContext { + ficlone_failed: false, + reflinked: 0, + copied: 0, + existing: 0, + reflinks_required, + }; + + // Walk and write all objects. + // + // The composefs filesystem tree uses the OCI image layout where `/etc` + // lives at the tree root. However, ostree's deployment mechanism expects + // configuration to be at `usr/etc` (it copies `usr/etc` → `etc` during + // deployment initialization and performs a 3-way merge). The old + // `ImageImporter` tar pipeline applies this `/etc` → `usr/etc` remap + // via `normalize_validate_path()` in `tar/write.rs`. + // + // We replicate that remapping here: if the composefs root has an `etc` + // directory, we write it into the ostree tree as `usr/etc` instead of + // a top-level `etc`. + let root_mtree = crate::ostree::MutableTree::new(); + write_dir_to_mtree_remap_etc( + ostree_repo, + composefs_repo, + &fs.root, + &fs.leaves, + &root_mtree, + &mut ctx, + ) + .context("Writing composefs tree into ostree")?; + + tracing::info!( + reflinked = ctx.reflinked, + copied = ctx.copied, + existing = ctx.existing, + "Composefs→ostree import complete: reflinked={} copied={} existing={}", + ctx.reflinked, + ctx.copied, + ctx.existing, + ); + + // Seal the tree + let root = ostree_repo + .write_mtree(&root_mtree, cancellable) + .context("Sealing MutableTree")?; + let root = root + .downcast::() + .map_err(|_| anyhow::anyhow!("BUG: write_mtree did not return a RepoFile"))?; + + // Attach OCI metadata so the commit looks like a normal ostree-container import, + // plus a flag indicating this commit was synthesized from a composefs repository + // (so per-layer blob refs do not exist). + let meta = glib::VariantDict::new(None); + meta.insert( + META_CONFIG, + &serde_json::to_string(image_config).context("Serializing image config")?, + ); + meta.insert( + META_MANIFEST, + &serde_json::to_string(image_manifest).context("Serializing manifest")?, + ); + meta.insert(META_MANIFEST_DIGEST, &manifest_digest); + meta.insert(crate::ostree::METADATA_KEY_BOOTABLE.as_ref(), &true); + meta.insert(META_COMPOSEFS_SYNTHESIZED, &true); + let meta = meta.to_variant(); + + // Use the image creation timestamp for the ostree commit, matching the + // behaviour of the old ImageImporter pipeline. This is important because + // rpm-ostree unconditionally expects a non-zero timestamp in the deployment + // metadata; a zero (epoch) timestamp can cause `rpm-ostree status` to abort. + let timestamp = + crate::container::store::timestamp_of_manifest_or_config(image_manifest, image_config) + .unwrap_or_else(|| chrono::offset::Utc::now().timestamp() as u64); + + let commit = ostree_repo + .write_commit_with_time(None, None, None, Some(&meta), &root, timestamp, cancellable) + .context("Writing ostree commit")?; + + txn.commit(cancellable).context("Committing transaction")?; + + Ok(commit.to_string()) +} + +#[cfg(test)] +mod tests { + use rustix::fd::AsRawFd as _; + use rustix::fs::{AtFlags, Mode, OFlags, linkat, openat}; + use std::os::fd::AsFd as _; + + /// Verify that O_TMPFILE can be materialized via /proc/self/fd/N without + /// root (the linkat+SYMLINK_FOLLOW approach used by write_external_file_direct). + #[test] + fn test_otmpfile_materialize_via_proc() { + let dir = tempfile::tempdir_in("/var/tmp").unwrap(); + let dir_fd = rustix::fs::open( + dir.path(), + OFlags::RDONLY | OFlags::DIRECTORY | OFlags::CLOEXEC, + Mode::empty(), + ) + .unwrap(); + + // Create O_TMPFILE — anonymous inode in the directory + let tmp_fd = openat( + &dir_fd, + c".", + OFlags::WRONLY | OFlags::TMPFILE | OFlags::CLOEXEC, + Mode::from_raw_mode(0o644), + ) + .expect("O_TMPFILE"); + + rustix::io::write(&tmp_fd, b"hello").unwrap(); + + // Materialize via /proc/self/fd/N with SYMLINK_FOLLOW + let proc_path = format!("/proc/self/fd/{}", tmp_fd.as_fd().as_raw_fd()); + linkat( + rustix::fs::CWD, + proc_path.as_str(), + &dir_fd, + "materialized.txt", + AtFlags::SYMLINK_FOLLOW, + ) + .expect("linkat via /proc/self/fd should work without root"); + + let contents = std::fs::read(dir.path().join("materialized.txt")).unwrap(); + assert_eq!(contents, b"hello"); + } + + /// Verify FICLONE (reflink) works between two files in /var/tmp. + /// This will return EOPNOTSUPP on filesystems that don't support it + /// (e.g. ext4), which is acceptable — we just need to confirm the + /// ioctl itself is reachable and returns a meaningful error code. + #[test] + fn test_ficlone_or_enotsup() { + let dir = tempfile::tempdir_in("/var/tmp").unwrap(); + let dir_fd = rustix::fs::open( + dir.path(), + OFlags::RDONLY | OFlags::DIRECTORY | OFlags::CLOEXEC, + Mode::empty(), + ) + .unwrap(); + + // Source file with some content + let src_fd = openat( + &dir_fd, + c"src", + OFlags::RDWR | OFlags::CREATE | OFlags::CLOEXEC, + Mode::from_raw_mode(0o644), + ) + .unwrap(); + rustix::io::write(&src_fd, b"reflink-test-content").unwrap(); + + // Destination — O_TMPFILE + let dst_fd = openat( + &dir_fd, + c".", + OFlags::WRONLY | OFlags::TMPFILE | OFlags::CLOEXEC, + Mode::from_raw_mode(0o644), + ) + .unwrap(); + + match rustix::fs::ioctl_ficlone(&dst_fd, &src_fd) { + Ok(()) => { + // Reflinks supported — verify content matches + let proc_path = format!("/proc/self/fd/{}", dst_fd.as_fd().as_raw_fd()); + linkat( + rustix::fs::CWD, + proc_path.as_str(), + &dir_fd, + "dst", + AtFlags::SYMLINK_FOLLOW, + ) + .unwrap(); + let contents = std::fs::read(dir.path().join("dst")).unwrap(); + assert_eq!(contents, b"reflink-test-content"); + eprintln!("FICLONE: supported on this filesystem"); + } + Err(rustix::io::Errno::OPNOTSUPP) | Err(rustix::io::Errno::XDEV) => { + eprintln!( + "FICLONE: not supported on this filesystem (EOPNOTSUPP/EXDEV) — fallback to copy is correct" + ); + } + Err(e) => panic!("Unexpected FICLONE error: {e}"), + } + } + + /// Verify that user.ostreemeta xattr can be set on a regular file + /// without root in /var/tmp. + #[test] + fn test_user_ostreemeta_xattr() { + use rustix::fs::{XattrFlags, fgetxattr, fsetxattr}; + + let dir = tempfile::tempdir_in("/var/tmp").unwrap(); + let f = tempfile::NamedTempFile::new_in(dir.path()).unwrap(); + let fd = rustix::fs::open(f.path(), OFlags::RDWR | OFlags::CLOEXEC, Mode::empty()).unwrap(); + + let meta_bytes = b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\xed\x00\x00\x00\x00"; + fsetxattr(&fd, c"user.ostreemeta", meta_bytes, XattrFlags::empty()) + .expect("setting user.ostreemeta should work without root"); + + let mut buf = vec![0u8; 256]; + let n = fgetxattr(&fd, c"user.ostreemeta", &mut buf).unwrap(); + assert_eq!(&buf[..n], meta_bytes); + } +} diff --git a/crates/ostree-ext/src/container/mod.rs b/crates/ostree-ext/src/container/mod.rs index 0fec7f757..df175448b 100644 --- a/crates/ostree-ext/src/container/mod.rs +++ b/crates/ostree-ext/src/container/mod.rs @@ -613,6 +613,7 @@ pub fn version_for_config(config: &oci_spec::image::ImageConfiguration) -> Optio None } +pub mod composefs_import; pub mod deploy; mod encapsulate; pub use encapsulate::*; diff --git a/crates/ostree-ext/src/container/store.rs b/crates/ostree-ext/src/container/store.rs index ea0665d40..0df3d67c5 100644 --- a/crates/ostree-ext/src/container/store.rs +++ b/crates/ostree-ext/src/container/store.rs @@ -137,6 +137,7 @@ use camino::{Utf8Path, Utf8PathBuf}; use canon_json::CanonJsonSerialize; use cap_std_ext::cap_std; use cap_std_ext::cap_std::fs::{Dir, MetadataExt}; +use gvariant::{Marker as _, Structure as _}; use cap_std_ext::dirext::CapStdExtDirExt; use containers_image_proxy::{ImageProxy, OpenedImage}; @@ -172,12 +173,17 @@ const IMAGE_PREFIX: &str = "ostree/container/image"; /// ref with the project name, so the final ref may be of the form e.g. `ostree/container/baseimage/bootc/foo`. pub const BASE_IMAGE_PREFIX: &str = "ostree/container/baseimage"; -/// The key injected into the merge commit for the manifest digest. -pub(crate) const META_MANIFEST_DIGEST: &str = "ostree.manifest-digest"; -/// The key injected into the merge commit with the manifest serialized as JSON. -const META_MANIFEST: &str = "ostree.manifest"; -/// The key injected into the merge commit with the image configuration serialized as JSON. -const META_CONFIG: &str = "ostree.container.image-config"; +/// The key for the manifest digest in ostree commit metadata. +pub const META_MANIFEST_DIGEST: &str = "ostree.manifest-digest"; +/// The key for the serialized OCI manifest in ostree commit metadata. +pub const META_MANIFEST: &str = "ostree.manifest"; +/// The key for the serialized OCI image configuration in ostree commit metadata. +pub const META_CONFIG: &str = "ostree.container.image-config"; +/// Commit metadata key written by the composefs import path to indicate this commit +/// was synthesized from a composefs repository rather than imported layer-by-layer. +/// When present, per-layer blob refs do not exist and `query_image_commit` should +/// use the merge commit itself as the base commit. +pub const META_COMPOSEFS_SYNTHESIZED: &str = "ostree-ext.composefs-synthesized"; /// The type used to store content filtering information. pub type MetaFilteredData = HashMap>; @@ -197,7 +203,7 @@ fn ref_for_layer(l: &oci_image::Descriptor) -> Result { } /// Convert e.g. sha256:12345... into `/ostree/container/blob/sha256_2B12345...`. -fn ref_for_image(l: &ImageReference) -> Result { +pub fn ref_for_image(l: &ImageReference) -> Result { refescape::prefix_escape_for_ref(IMAGE_PREFIX, &l.to_string()) } @@ -585,7 +591,7 @@ pub(crate) fn parse_ostree_manifest_layout<'a>( } /// Find the timestamp of the manifest (or config), ignoring errors. -fn timestamp_of_manifest_or_config( +pub(crate) fn timestamp_of_manifest_or_config( manifest: &ImageManifest, config: &ImageConfiguration, ) -> Option { @@ -1624,6 +1630,58 @@ pub fn list_images(repo: &ostree::Repo) -> Result> { .collect() } +/// Recursively walk a dirtree GVariant, calling `f` with each file's hex content checksum. +/// +/// Reads `(a(say)a(sayay))` — the ostree DIRTREE format — directly from GVariant bytes, +/// the same approach used by [`crate::ima`]. Subdirectories are visited depth-first. +fn walk_dirtree_checksums( + repo: &ostree::Repo, + dirtree_checksum: &str, + f: &mut impl FnMut(&str) -> Result<()>, +) -> Result<()> { + use crate::objgv::gv_dirtree; + use gvariant::aligned_bytes::TryAsAligned as _; + + let v = repo.load_variant(ostree::ObjectType::DirTree, dirtree_checksum)?; + let bytes = v.data_as_bytes(); + let bytes = bytes.try_as_aligned()?; + let tree = gv_dirtree!().cast(bytes); + let (files, dirs) = tree.to_tuple(); + + let mut hexbuf = [0u8; 64]; + for file in files { + let (_name, csum) = file.to_tuple(); + hex::encode_to_slice(csum, &mut hexbuf)?; + f(std::str::from_utf8(&hexbuf)?)?; + } + for dir in dirs { + let (_name, contents_csum, _meta_csum) = dir.to_tuple(); + hex::encode_to_slice(contents_csum, &mut hexbuf)?; + walk_dirtree_checksums(repo, std::str::from_utf8(&hexbuf)?, f)?; + } + Ok(()) +} + +/// Call `f` once for each regular-file content checksum (hex SHA-256) reachable from +/// `commit_checksum`, reading dirtree GVariants directly without GObject overhead. +/// +/// Subtrees are visited depth-first. Shared dirtree objects that appear more than once +/// in the tree will be visited multiple times. +pub fn for_each_file_checksum( + repo: &ostree::Repo, + commit_checksum: &str, + f: &mut impl FnMut(&str) -> Result<()>, +) -> Result<()> { + use crate::objgv::gv_commit; + use gvariant::aligned_bytes::TryAsAligned as _; + + let (commit_v, _) = repo.load_commit(commit_checksum)?; + let commit_bytes = commit_v.data_as_bytes(); + let commit_bytes = commit_bytes.try_as_aligned()?; + let root_dirtree = hex::encode(gv_commit!().cast(commit_bytes).to_tuple().6); + walk_dirtree_checksums(repo, &root_dirtree, f) +} + /// Attempt to query metadata for a pulled image; if it is corrupted, /// the error is printed to stderr and None is returned. fn try_query_image( @@ -1745,11 +1803,21 @@ pub fn query_image_commit(repo: &ostree::Repo, commit: &str) -> Result(META_COMPOSEFS_SYNTHESIZED)? + .unwrap_or(false); + let base_commit = if is_composefs_synthesized { + merge_commit.clone() + } else { + let base_layer = query_layer(repo, base_layer)?; + let ostree_ref = base_layer.ostree_ref.as_str(); + base_layer + .commit + .ok_or_else(|| anyhow!("Missing base image ref {ostree_ref}"))? + }; let detached_commitmeta = repo.read_commit_detached_metadata(&merge_commit, gio::Cancellable::NONE)?; diff --git a/crates/ostree-ext/tests/it/main.rs b/crates/ostree-ext/tests/it/main.rs index 14c6dd3b3..3b7531564 100644 --- a/crates/ostree-ext/tests/it/main.rs +++ b/crates/ostree-ext/tests/it/main.rs @@ -2365,3 +2365,230 @@ async fn test_diff_id_reuse_across_compression() -> Result<()> { Ok(()) } + +/// Build a minimal OCI image (single gzip layer) into the given OCI directory. +/// +/// The tar layer explicitly includes parent directory entries before files, so +/// that composefs's strict layer importer does not choke on missing parents. +fn build_test_oci(oci_dir: &ocidir::OciDir) -> Result<()> { + use ocidir::oci_spec::image::ImageConfigurationBuilder; + let mut config = ImageConfigurationBuilder::default().build()?; + let mut manifest = oci_dir.new_empty_manifest()?.build()?; + + let bw = oci_dir.create_gzip_layer(None)?; + let mut tb = tar::Builder::new(bw); + + let dir_header = |path: &str| -> tar::Header { + let mut h = tar::Header::new_ustar(); + h.set_entry_type(tar::EntryType::Directory); + h.set_mode(0o755); + h.set_uid(0); + h.set_gid(0); + h.set_mtime(0); + h.set_size(0); + // The tar path for directories must end in / + let _ = path; // consumed by caller + h + }; + let reg_header = |data: &[u8]| -> tar::Header { + let mut h = tar::Header::new_ustar(); + h.set_entry_type(tar::EntryType::Regular); + h.set_mode(0o644); + h.set_uid(0); + h.set_gid(0); + h.set_mtime(0); + h.set_size(data.len() as u64); + h + }; + + // Parent directories must come before their children. + let mut dh = dir_header("usr/"); + tb.append_data(&mut dh, "usr/", std::io::empty())?; + let mut dh = dir_header("usr/bin/"); + tb.append_data(&mut dh, "usr/bin/", std::io::empty())?; + let mut dh = dir_header("usr/lib/"); + tb.append_data(&mut dh, "usr/lib/", std::io::empty())?; + + let bash_data = b"#!/bin/bash\n"; + let mut fh = reg_header(bash_data); + tb.append_data(&mut fh, "usr/bin/bash", std::io::Cursor::new(bash_data))?; + + let config_data = b"# test config\n"; + let mut fh = reg_header(config_data); + tb.append_data( + &mut fh, + "usr/lib/test.conf", + std::io::Cursor::new(config_data), + )?; + + // Add /etc at the OCI image root — this must be remapped to usr/etc + // in the ostree commit so that deployment initialization can find it. + let mut dh = dir_header("etc/"); + tb.append_data(&mut dh, "etc/", std::io::empty())?; + + let hostname_data = b"testhost\n"; + let mut fh = reg_header(hostname_data); + tb.append_data(&mut fh, "etc/hostname", std::io::Cursor::new(hostname_data))?; + + let bw = tb.into_inner()?; + let layer = bw.complete()?; + + oci_dir.push_layer_full( + &mut manifest, + &mut config, + layer, + None::>, + "test layer", + chrono::DateTime::UNIX_EPOCH, + ); + let config_desc = oci_dir.write_config(config)?; + manifest.set_config(config_desc); + oci_dir.replace_with_single_manifest(manifest, ocidir::oci_spec::image::Platform::default())?; + Ok(()) +} + +/// Test that we can synthesize an ostree commit from a composefs repository. +/// +/// The flow is: +/// 1. Build a minimal OCI image into a local OCI directory +/// 2. Pull it into a fresh composefs repository +/// 3. Call `composefs_import::import_from_composefs_repo` +/// 4. Verify the resulting ostree commit has the expected OCI metadata and files +#[tokio::test] +async fn test_composefs_import_to_ostree() -> Result<()> { + use composefs_ctl::composefs::fsverity::{Algorithm, DEFAULT_LG_BLOCKSIZE, Sha512HashValue}; + use composefs_ctl::composefs::repository::Repository; + use composefs_ctl::composefs_oci; + use composefs_ctl::composefs_oci::oci_image::OciImage; + use ostree_ext::container::composefs_import; + use ostree_ext::container::store::{META_CONFIG, META_MANIFEST, META_MANIFEST_DIGEST}; + use std::sync::Arc; + + if !check_skopeo() { + eprintln!("Skipping test_composefs_import_to_ostree: skopeo not available"); + return Ok(()); + } + + let tempdir = tempfile::tempdir_in("/var/tmp")?; + let path: &camino::Utf8Path = tempdir.path().try_into().unwrap(); + + // Step 1: build a minimal OCI image + let oci_path = path.join("test.oci"); + std::fs::create_dir(&oci_path)?; + let oci_cap_dir = cap_std::fs::Dir::open_ambient_dir(&oci_path, cap_std::ambient_authority())?; + let oci_dir = ocidir::OciDir::ensure(oci_cap_dir)?; + build_test_oci(&oci_dir)?; + + // Step 2: create a composefs repository and pull the OCI image into it + let cfs_path = path.join("composefs-repo"); + std::fs::create_dir(&cfs_path)?; + let cfs_dirfd = cap_std::fs::Dir::open_ambient_dir(&cfs_path, cap_std::ambient_authority())?; + let algo = Algorithm::Sha512 { + lg_blocksize: DEFAULT_LG_BLOCKSIZE, + }; + let (cfs_repo, _) = Repository::::init_path(cfs_dirfd, ".", algo, false) + .context("Initializing composefs repository")?; + let cfs_repo = Arc::new(cfs_repo); + + let oci_imgref = format!("oci:{}", oci_path); + let pull_result = composefs_oci::pull(&cfs_repo, &oci_imgref, None, Default::default()) + .await + .context("Pulling OCI image into composefs repo")?; + + // Step 3: get manifest + config from the composefs repo + let oci_image = OciImage::open(&cfs_repo, &pull_result.manifest_digest, None) + .context("Opening OCI image from composefs repo")?; + let manifest = oci_image.manifest().clone(); + let config = oci_image + .config() + .cloned() + .context("OCI image has no config")?; + + // Step 4: create a destination ostree repo and run the import + let ost_repo = ostree::Repo::new(&gio::File::for_path(path.join("ostree-repo"))); + ost_repo + .create(ostree::RepoMode::BareUser, gio::Cancellable::NONE) + .context("Creating destination ostree repo")?; + + let manifest_digest_str = pull_result.manifest_digest.to_string(); + let commit = composefs_import::import_from_composefs_repo( + &ost_repo, + &cfs_repo, + &pull_result.config_digest, + &manifest_digest_str, + &manifest, + &config, + false, + ) + .context("Importing composefs repo into ostree")?; + + // Step 5: verify + assert!(!commit.is_empty(), "commit checksum should be non-empty"); + + let (commitv, state) = ost_repo.load_commit(&commit)?; + assert_eq!(state, ostree::RepoCommitState::NORMAL); + + // Commit metadata should contain the three OCI keys + let commit_meta = commitv.child_value(0); + let commitmeta = glib::VariantDict::new(Some(&commit_meta)); + assert!( + commitmeta + .lookup_value(META_MANIFEST_DIGEST, None) + .is_some(), + "commit should have manifest-digest metadata" + ); + assert!( + commitmeta.lookup_value(META_MANIFEST, None).is_some(), + "commit should have manifest metadata" + ); + assert!( + commitmeta.lookup_value(META_CONFIG, None).is_some(), + "commit should have config metadata" + ); + // The stored digest should round-trip + let stored_digest = commitmeta + .lookup::(META_MANIFEST_DIGEST)? + .expect("manifest-digest key"); + assert_eq!(stored_digest, manifest_digest_str); + + // The file tree should have the files we put in the layer + let (root, _) = ost_repo.read_commit(&commit, gio::Cancellable::NONE)?; + for path in ["usr", "usr/bin", "usr/bin/bash", "usr/lib/test.conf"] { + let node = root.resolve_relative_path(path); + assert!( + node.query_exists(gio::Cancellable::NONE), + "expected {path} to exist in the ostree commit" + ); + } + + // Verify /etc was remapped to usr/etc (not left at the root as etc/) + let usr_etc_hostname = root.resolve_relative_path("usr/etc/hostname"); + assert!( + usr_etc_hostname.query_exists(gio::Cancellable::NONE), + "expected usr/etc/hostname to exist (remapped from /etc/hostname)" + ); + + // Verify there is NO top-level etc/ — it should have been remapped + let root_etc = root.resolve_relative_path("etc"); + assert!( + !root_etc.query_exists(gio::Cancellable::NONE), + "top-level etc/ should not exist; it should be remapped to usr/etc/" + ); + + // Run `ostree fsck` against the repo to verify all objects pass integrity + // checks — in particular that file content checksums match, that + // user.ostreemeta xattrs are present and well-formed, and that no + // object is corrupted. + let ostree_repo_path = path.join("ostree-repo"); + let st = std::process::Command::new("ostree") + .args(["fsck", "--repo"]) + .arg(&ostree_repo_path) + .status() + .context("running `ostree fsck`")?; + assert!( + st.success(), + "`ostree fsck` failed on the composefs-imported repo" + ); + + Ok(()) +} diff --git a/crates/xtask/src/bcvk.rs b/crates/xtask/src/bcvk.rs index 7d883d205..55975688a 100644 --- a/crates/xtask/src/bcvk.rs +++ b/crates/xtask/src/bcvk.rs @@ -65,15 +65,15 @@ impl BcvkInstallOpts { /// Return the install-related args for `bcvk libvirt run`. /// /// This covers `--composefs-backend`, `--filesystem`, `--bootloader`, - /// and `--karg` flags. Note that `--bootloader` and `--filesystem` - /// are only valid when `--composefs-backend` is also set (bcvk - /// enforces this via a clap `requires` relationship). + /// and `--karg` flags. `--bootloader` is composefs-only, but + /// `--filesystem` applies to both ostree and composefs backends. pub(crate) fn install_args(&self) -> Vec { let mut args = Vec::new(); + if let Some(fs) = &self.filesystem { + args.push(format!("--filesystem={fs}")); + } if self.composefs_backend { args.push("--composefs-backend".into()); - let fs = self.filesystem.as_deref().unwrap_or("ext4"); - args.push(format!("--filesystem={fs}")); if let Some(b) = &self.bootloader { args.push(format!("--bootloader={b}")); } diff --git a/crates/xtask/src/tmt.rs b/crates/xtask/src/tmt.rs index 0152285ef..505790a44 100644 --- a/crates/xtask/src/tmt.rs +++ b/crates/xtask/src/tmt.rs @@ -19,6 +19,7 @@ const COMMON_INST_ARGS: &[&str] = &["--label=bootc.test=1"]; // Metadata field names const FIELD_TRY_BIND_STORAGE: &str = "try_bind_storage"; +const FIELD_BOOTC_INSTALL_ARGS: &str = "bootc_install_args"; const FIELD_SUMMARY: &str = "summary"; const FIELD_ADJUST: &str = "adjust"; @@ -212,6 +213,7 @@ struct PlanMetadata { try_bind_storage: bool, skip_if_composefs: bool, skip_if_uki: bool, + bootc_install_args: Vec, } /// Parse integration.fmf to extract extra-try_bind_storage for all plans @@ -254,6 +256,7 @@ fn parse_plan_metadata( try_bind_storage: b, skip_if_uki: false, skip_if_composefs: false, + bootc_install_args: Vec::new(), }); } } @@ -270,6 +273,7 @@ fn parse_plan_metadata( skip_if_composefs: b, skip_if_uki: false, try_bind_storage: false, + bootc_install_args: Vec::new(), }); } } @@ -286,6 +290,25 @@ fn parse_plan_metadata( skip_if_uki: b, skip_if_composefs: false, try_bind_storage: false, + bootc_install_args: Vec::new(), + }); + } + } + + if let Some(install_args_val) = plan_data.get(&serde_yaml::Value::String(format!( + "extra-{}", + FIELD_BOOTC_INSTALL_ARGS + ))) { + if let Some(s) = install_args_val.as_str() { + let args: Vec = s.split_whitespace().map(|a| a.to_string()).collect(); + plan_metadata + .entry(plan_name.to_string()) + .and_modify(|m| m.bootc_install_args = args.clone()) + .or_insert(PlanMetadata { + bootc_install_args: args, + skip_if_uki: false, + skip_if_composefs: false, + try_bind_storage: false, }); } } @@ -478,6 +501,17 @@ pub(crate) fn run_tmt(sh: &Shell, args: &RunTmtArgs) -> Result<()> { } } + // Add per-plan bootc install args as --install-arg= + let bootc_install_args = plan_metadata + .iter() + .find(|(key, _)| plan.ends_with(key.as_str())) + .map(|(_, v)| v.bootc_install_args.clone()) + .unwrap_or_default(); + + for arg in &bootc_install_args { + opts.push(format!("--install-arg={arg}")); + } + opts.extend(bcvk_opts.install_args()); opts @@ -908,6 +942,8 @@ struct TestDef { skip_if_composefs: bool, /// Whether to skip this test for images with UKI skip_if_uki: bool, + /// Extra bootc install args to pass via --install-arg= + bootc_install_args: Vec, /// TMT fmf attributes to pass through (summary, duration, adjust, etc.) tmt: serde_yaml::Value, } @@ -1088,6 +1124,18 @@ fn generate_integration() -> Result<(String, String)> { .and_then(|v| v.as_bool()) .unwrap_or(false); + let bootc_install_args: Vec = metadata + .extra + .as_mapping() + .and_then(|m| { + m.get(&serde_yaml::Value::String( + FIELD_BOOTC_INSTALL_ARGS.to_string(), + )) + }) + .and_then(|v| v.as_str()) + .map(|s| s.split_whitespace().map(|a| a.to_string()).collect()) + .unwrap_or_default(); + tests.push(TestDef { number: metadata.number, name: display_name, @@ -1095,6 +1143,7 @@ fn generate_integration() -> Result<(String, String)> { try_bind_storage, skip_if_composefs, skip_if_uki, + bootc_install_args, tmt: metadata.tmt, }); } @@ -1224,6 +1273,13 @@ fn generate_integration() -> Result<(String, String)> { ); } + if !test.bootc_install_args.is_empty() { + plan_value.insert( + serde_yaml::Value::String(format!("extra-{}", FIELD_BOOTC_INSTALL_ARGS)), + serde_yaml::Value::String(test.bootc_install_args.join(" ")), + ); + } + plans_mapping.insert( serde_yaml::Value::String(plan_key), serde_yaml::Value::Mapping(plan_value), @@ -1362,6 +1418,47 @@ set -eux assert!(tmt.contains_key(&serde_yaml::Value::String("adjust".to_string()))); } + #[test] + fn test_parse_plan_metadata_bootc_install_args() { + // Simulate the integration.fmf content for plan-44 + let fmf_content = r#" +/plan-44-readonly-unified: + summary: Run readonly tests after onboarding to unified storage + discover: + how: fmf + test: + - /tmt/tests/tests/test-44-readonly-unified + extra-bootc_install_args: --experimental-unified-storage +"#; + let tmp = tempfile::NamedTempFile::new().unwrap(); + std::fs::write(tmp.path(), fmf_content).unwrap(); + let path = camino::Utf8Path::from_path(tmp.path()).unwrap(); + + let metadata = parse_plan_metadata(path).unwrap(); + let plan_meta = metadata.get("/plan-44-readonly-unified").unwrap(); + assert_eq!( + plan_meta.bootc_install_args, + vec!["--experimental-unified-storage"] + ); + assert!(!plan_meta.try_bind_storage); + + // Simulate the plan name lookup as done in the execution loop + let plan = "/tmt/plans/integration/plan-44-readonly-unified"; + let found = metadata + .iter() + .find(|(key, _)| plan.ends_with(key.as_str())) + .map(|(_, v)| v.bootc_install_args.clone()) + .unwrap_or_default(); + assert_eq!(found, vec!["--experimental-unified-storage"]); + + // Verify the opts vec gets --install-arg= entries + let mut opts: Vec = vec![]; + for arg in &found { + opts.push(format!("--install-arg={arg}")); + } + assert_eq!(opts, vec!["--install-arg=--experimental-unified-storage"]); + } + #[test] fn test_parse_tmt_metadata_with_try_bind_storage() { let content = r#"# number: 24 diff --git a/docs/src/experimental-unified-storage.md b/docs/src/experimental-unified-storage.md index 68d5ea142..04078615c 100644 --- a/docs/src/experimental-unified-storage.md +++ b/docs/src/experimental-unified-storage.md @@ -148,12 +148,34 @@ Once unified storage is enabled, podman can access the booted image: podman --storage-opt=additionalimagestore=/usr/lib/bootc/storage run localhost/bootc ``` -## Relationship to composefs backend - -Unified storage is complementary to the [composefs backend](experimental-composefs.md). -While unified storage changes *how images are pulled* (using containers/storage), -the composefs backend changes *how the filesystem is stored and verified*. -These features can potentially be combined in the future. +## Relationship to the composefs-first import pipeline + +The containers/storage unified storage path described above is distinct from the +composefs-first import pipeline implemented in this codebase. The two approaches +solve overlapping problems but are architecturally different. + +The composefs-first path fetches the OCI image directly into the composefs OCI +repository (under `/sysroot/ostree/repo/composefs`) and then synthesizes an ostree +commit from that repository without a tar round-trip. For each external regular +file it uses `FICLONE` (reflink) to populate the corresponding ostree content object +from the composefs object. Because reflinks share underlying disk extents while +keeping independent inodes, each ostree object can carry its own metadata (uid, gid, +mode, SELinux label) even when two paths have identical bytes and share a single +composefs object. This is why reflink is used instead of hardlink: a single inode +can hold only one `security.selinux` value, so hardlinking would corrupt one of the +labels. + +This design requires a reflink-capable filesystem. On XFS or btrfs, FICLONE +succeeds and the ostree and composefs repositories share disk blocks — no extra +space is consumed for the ostree content objects. On ext4, FICLONE fails with +`EOPNOTSUPP` and the code falls back to a byte copy; installation works correctly +but without block sharing between the two repositories. + +SELinux labels are applied in-memory (via `selabel()` from composefs-rs) before +the ostree checksums are computed. On a container host where SELinux is disabled, +the `fsetxattr` call is a no-op; the kernel applies matching labels at first boot +during auto-relabeling. Because the checksums include the correct label values, +`ostree fsck` passes after boot without any special handling at install time. ## Limitations diff --git a/tmt/plans/integration.fmf b/tmt/plans/integration.fmf index 0997dde21..c539eede0 100644 --- a/tmt/plans/integration.fmf +++ b/tmt/plans/integration.fmf @@ -269,4 +269,12 @@ execute: how: fmf test: - /tmt/tests/tests/test-43-switch-same-digest + +/plan-44-readonly-unified: + summary: Run readonly tests after onboarding to unified storage + discover: + how: fmf + test: + - /tmt/tests/tests/test-44-readonly-unified + extra-bootc_install_args: --experimental-unified-storage # END GENERATED PLANS diff --git a/tmt/tests/booted/bootc_testlib.nu b/tmt/tests/booted/bootc_testlib.nu index 2560d9b13..fd737ca99 100644 --- a/tmt/tests/booted/bootc_testlib.nu +++ b/tmt/tests/booted/bootc_testlib.nu @@ -1,5 +1,18 @@ # A simple nushell "library" for bootc test helpers +# Run standard per-boot checks that should pass on every boot in every test. +# +# Call this at the start of every boot function (first_boot, second_boot, etc.) +# to ensure consistent baseline verification across all tests. Currently runs: +# +# - `bootc internals fsck images`: verifies that every image bootc has +# committed to (composefs GC tag) is also present in containers-storage. +# This is a no-op when unified storage is not enabled, so it is always +# safe to call regardless of the system configuration. +export def initial_status_and_checks [] { + bootc internals fsck images +} + # This is a workaround for what must be a systemd bug # that seems to have appeared in C10S # TODO diagnose and fill in here diff --git a/tmt/tests/booted/readonly/011-test-ostree-ext-cli.nu b/tmt/tests/booted/readonly/011-test-ostree-ext-cli.nu index 817bc6003..909ed874c 100644 --- a/tmt/tests/booted/readonly/011-test-ostree-ext-cli.nu +++ b/tmt/tests/booted/readonly/011-test-ostree-ext-cli.nu @@ -9,8 +9,16 @@ tap begin "verify bootc wrapping ostree-ext" let st = bootc status --json | from json # Detect composefs by checking if composefs field is present let is_composefs = (tap is_composefs) +# On unified storage installs the ostree commit is synthesized from composefs +# via FICLONE reflinks rather than imported via the ostree-container protocol, +# so the ostree-container image metadata is not available. +let is_unified = ("/sysroot/composefs/bootc.json" | path exists) and ( + open /sysroot/composefs/bootc.json | get -i unified-storage | default false +) if $is_composefs { print "# TODO composefs: skipping test - ostree-container commands don't work with composefs" +} else if $is_unified { + print "# unified storage: skipping test - ostree commit synthesized from composefs, no ostree-container metadata" } else { let booted = $st.status.booted.image # Then verify we can extract its metadata via the ostree-container code. diff --git a/tmt/tests/booted/readonly/015-test-fsck.nu b/tmt/tests/booted/readonly/015-test-fsck.nu index 8700a5f66..6ddc8afd5 100644 --- a/tmt/tests/booted/readonly/015-test-fsck.nu +++ b/tmt/tests/booted/readonly/015-test-fsck.nu @@ -3,15 +3,17 @@ use tap.nu tap begin "Run fsck" -# Detect composefs by checking if composefs field is present -let st = bootc status --json | from json let is_composefs = (tap is_composefs) if $is_composefs { - print "# TODO composefs: skipping test - fsck requires ostree-booted host" + print "# TODO composefs: skipping low-level fsck - requires ostree-booted host" } else { - # That's it, just ensure we've run a fsck on our basic install. + # Run the full fsck suite (low-level checks + image metadata). bootc internals fsck } +# Image metadata check runs on all backends — it's a no-op when unified +# storage is not enabled, so this is always safe to run. +bootc internals fsck images + tap ok diff --git a/tmt/tests/booted/readonly/032-test-composefs-repo.nu b/tmt/tests/booted/readonly/032-test-composefs-repo.nu new file mode 100644 index 000000000..c71172a84 --- /dev/null +++ b/tmt/tests/booted/readonly/032-test-composefs-repo.nu @@ -0,0 +1,36 @@ +use std assert +use tap.nu + +tap begin "composefs repo always present" + +let st = bootc status --json | from json +let is_composefs = (tap is_composefs) + +# The composefs repository must always exist on a booted bootc system, +# regardless of whether the system was booted via composefs or classic ostree. +# This is a regression guard for the composefs-first fetch pipeline. + +print "# Checking /sysroot/composefs directory structure" + +assert ("/sysroot/composefs" | path exists) "/sysroot/composefs must exist" +assert (("/sysroot/composefs" | path type) == "dir") "/sysroot/composefs must be a directory" + +assert ("/sysroot/composefs/objects" | path exists) "/sysroot/composefs/objects must exist" +assert (("/sysroot/composefs/objects" | path type) == "dir") "/sysroot/composefs/objects must be a directory" + +print $"# /sysroot/composefs exists with objects directory \(is_composefs: ($is_composefs)\)" + +if $is_composefs { + # On a composefs-native boot, the streams/refs tree must also be populated. + print "# Checking composefs streams/refs structure (composefs-native boot)" + + assert ("/sysroot/composefs/streams" | path exists) "/sysroot/composefs/streams must exist on composefs boot" + assert (("/sysroot/composefs/streams" | path type) == "dir") "/sysroot/composefs/streams must be a directory" + + let oci_refs = (ls /sysroot/composefs/streams/refs/oci/ | where type == "symlink") + let n_refs = ($oci_refs | length) + print $"# Found ($n_refs) OCI image ref symlinks under /sysroot/composefs/streams/refs/oci/" + assert ($n_refs > 0) "At least one OCI image ref symlink must exist under /sysroot/composefs/streams/refs/oci/ on a composefs-native booted system" +} + +tap ok diff --git a/tmt/tests/booted/test-44-readonly-unified.nu b/tmt/tests/booted/test-44-readonly-unified.nu new file mode 100644 index 000000000..30ddaee36 --- /dev/null +++ b/tmt/tests/booted/test-44-readonly-unified.nu @@ -0,0 +1,79 @@ +# number: 44 +# extra: +# bootc_install_args: --experimental-unified-storage +# tmt: +# summary: Run readonly tests after onboarding to unified storage +# duration: 30m +# +# Single-boot test: onboard to unified storage, verify the unified state, +# then run all readonly tests. Works on both ostree and composefs backends. + +use std assert +use tap.nu +use bootc_testlib.nu + +tap begin "readonly tests with unified storage" + +let st = bootc status --json | from json +let booted_image = $st.status.booted.image.image.image +let is_composefs = (tap is_composefs) + +# --- Step 1: Onboard to unified storage (if not already) --- +# Check if the booted image is already reported as unified (i.e. installed +# with --experimental-unified-storage, which pre-stages images in bootc storage). +let images = bootc image list --format json | from json +let already_unified = ($images | where image_type == "unified" | length) > 0 + +if not $already_unified { + print $"# Onboarding ($booted_image) to unified storage" + bootc image set-unified +} else { + print "# Already in unified storage mode (installed with --experimental-unified-storage)" +} + +# --- Step 2: Verify unified state --- + +# The bootc-owned image store symlink must exist +assert ("/usr/lib/bootc/storage" | path exists) "/usr/lib/bootc/storage must exist after set-unified" + +# The booted image should now appear in bootc-owned containers-storage. +# Use `bootc image list` rather than `podman image exists` because podman +# does not resolve image names through additional image stores (AIS), even +# though `podman images --storage-opt=additionalimagestore=...` lists them. +print "# Verifying booted image is in bootc containers-storage" +let images_after = bootc image list --format json | from json +let image_in_store = ($images_after | where image_type == "unified" | length) > 0 +assert $image_in_store $"Booted image ($booted_image) must be present in bootc storage after set-unified" + +# bootc image list must report the image with type "unified" +let images = bootc image list --format json | from json +let unified_images = $images | where image_type == "unified" +assert (($unified_images | length) > 0) "Expected at least one image with type 'unified' after set-unified" + +print "# Verifying unified storage consistency with bootc internals fsck images" +bootc_testlib initial_status_and_checks + +# On a composefs-native boot the composefs repo is already fully populated +# from the boot sequence. On an ostree boot the composefs repo is +# initialised lazily on the first unified upgrade; `set-unified` alone does +# not trigger the composefs pull, so we only verify the streams/refs tree on +# the composefs backend. +if $is_composefs { + print "# Verifying composefs streams/refs are populated (composefs-native boot)" + assert ("/sysroot/composefs/streams" | path exists) "/sysroot/composefs/streams must exist" + let oci_refs = (ls /sysroot/composefs/streams/refs/oci/ | where type == "symlink") + assert (($oci_refs | length) > 0) "At least one OCI ref must exist under /sysroot/composefs/streams/refs/oci/" +} else { + print "# Ostree boot: composefs repo initialised lazily on first unified upgrade; skipping repo structure check" +} + +print "# Unified storage verified; proceeding with readonly test suite" + +# --- Step 3: Run the full readonly test suite --- +let tests = (ls booted/readonly/*-test-*.nu | get name | sort) +for test_file in $tests { + print $"Running ($test_file)..." + nu $test_file +} + +tap ok diff --git a/tmt/tests/booted/test-composefs-gc-uki.nu b/tmt/tests/booted/test-composefs-gc-uki.nu index e2efd561c..6a87aab28 100644 --- a/tmt/tests/booted/test-composefs-gc-uki.nu +++ b/tmt/tests/booted/test-composefs-gc-uki.nu @@ -5,6 +5,7 @@ use std assert use tap.nu +use bootc_testlib.nu if not (tap is_composefs) { exit 0 @@ -130,6 +131,7 @@ def fourth_boot [] { } def main [] { + bootc_testlib initial_status_and_checks match $env.TMT_REBOOT_COUNT? { null | "0" => first_boot, "1" => second_boot, diff --git a/tmt/tests/booted/test-composefs-gc.nu b/tmt/tests/booted/test-composefs-gc.nu index d19a01efe..c84118410 100644 --- a/tmt/tests/booted/test-composefs-gc.nu +++ b/tmt/tests/booted/test-composefs-gc.nu @@ -5,6 +5,7 @@ use std assert use tap.nu +use bootc_testlib.nu if not (tap is_composefs) { exit 0 @@ -171,6 +172,7 @@ def fifth_boot [i: int] { } def main [] { + bootc_testlib initial_status_and_checks match $env.TMT_REBOOT_COUNT? { null | "0" => first_boot, "1" => second_boot, diff --git a/tmt/tests/booted/test-image-pushpull-upgrade.nu b/tmt/tests/booted/test-image-pushpull-upgrade.nu index 708b868ec..605ea7395 100644 --- a/tmt/tests/booted/test-image-pushpull-upgrade.nu +++ b/tmt/tests/booted/test-image-pushpull-upgrade.nu @@ -11,6 +11,7 @@ # Then another build, and reboot into verifying that use std assert use tap.nu +use bootc_testlib.nu const kargsv0 = ["testarg=foo", "othertestkarg", "thirdkarg=bar"] const kargsv1 = ["testarg=foo", "thirdkarg=baz"] @@ -141,6 +142,7 @@ def second_boot [] { # booted from the local container storage and image assert equal $booted.image.transport containers-storage assert equal $booted.image.image localhost/bootc-derived + # We wrote this file let t = open /usr/share/blah.txt | str trim assert equal $t "test content" @@ -191,6 +193,7 @@ def third_boot [] { print "verifying third boot" assert equal $booted.image.transport containers-storage assert equal $booted.image.image localhost/bootc-derived + let t = open /usr/share/blah.txt | str trim assert equal $t "test content2" @@ -218,6 +221,7 @@ def third_boot [] { } def main [] { + bootc_testlib initial_status_and_checks # See https://tmt.readthedocs.io/en/stable/stories/features.html#reboot-during-test match $env.TMT_REBOOT_COUNT? { null | "0" => initial_build, diff --git a/tmt/tests/booted/test-image-upgrade-reboot.nu b/tmt/tests/booted/test-image-upgrade-reboot.nu index f28cff1da..e95236056 100644 --- a/tmt/tests/booted/test-image-upgrade-reboot.nu +++ b/tmt/tests/booted/test-image-upgrade-reboot.nu @@ -17,6 +17,7 @@ # use std assert use tap.nu +use bootc_testlib.nu # This code runs on *each* boot. # Here we just capture information. @@ -118,6 +119,7 @@ def second_boot [] { } def main [] { + bootc_testlib initial_status_and_checks # See https://tmt.readthedocs.io/en/stable/stories/features.html#reboot-during-test match $env.TMT_REBOOT_COUNT? { null | "0" => initial_build, diff --git a/tmt/tests/booted/test-logically-bound-switch.nu b/tmt/tests/booted/test-logically-bound-switch.nu index 298d7ff86..2ea50b6fe 100644 --- a/tmt/tests/booted/test-logically-bound-switch.nu +++ b/tmt/tests/booted/test-logically-bound-switch.nu @@ -17,6 +17,7 @@ use std assert use tap.nu +use bootc_testlib.nu # This code runs on *each* boot. bootc status @@ -110,6 +111,7 @@ def first_boot [] { def second_boot [] { print "verifying second boot after switch" + assert equal $booted.image.transport containers-storage assert equal $booted.image.image localhost/bootc-bound @@ -136,6 +138,7 @@ def second_boot [] { def third_boot [] { print "verifying third boot after upgrade" + assert equal $booted.image.transport containers-storage assert equal $booted.image.image localhost/bootc-bound @@ -154,6 +157,7 @@ def third_boot [] { } def main [] { + bootc_testlib initial_status_and_checks match $env.TMT_REBOOT_COUNT? { null | "0" => first_boot, "1" => second_boot, diff --git a/tmt/tests/booted/test-rollback.nu b/tmt/tests/booted/test-rollback.nu index 0afe377ac..debc87d40 100644 --- a/tmt/tests/booted/test-rollback.nu +++ b/tmt/tests/booted/test-rollback.nu @@ -107,6 +107,7 @@ def fourth_boot_verify [] { } def main [] { + bootc_testlib initial_status_and_checks # See https://tmt.readthedocs.io/en/stable/stories/features.html#reboot-during-test match $env.TMT_REBOOT_COUNT? { null | "0" => initial_switch, diff --git a/tmt/tests/booted/test-switch-to-unified.nu b/tmt/tests/booted/test-switch-to-unified.nu index 4e8b61442..e2fed1adc 100644 --- a/tmt/tests/booted/test-switch-to-unified.nu +++ b/tmt/tests/booted/test-switch-to-unified.nu @@ -7,6 +7,7 @@ # use std assert use tap.nu +use bootc_testlib.nu # Multi-boot test: boot 0 onboards to unified storage and builds a derived image; # boot 1 verifies we booted into the derived image using containers-storage @@ -17,6 +18,7 @@ let st = bootc status --json | from json let booted = $st.status.booted.image def main [] { + bootc_testlib initial_status_and_checks match $env.TMT_REBOOT_COUNT? { null | "0" => first_boot, "1" => second_boot, @@ -38,7 +40,6 @@ def first_boot [] { } def second_boot [] { - # Onboard to unified storage - this pulls the booted image into bootc storage bootc image set-unified diff --git a/tmt/tests/tests.fmf b/tmt/tests/tests.fmf index 90e1e50ff..f0ac01fdc 100644 --- a/tmt/tests/tests.fmf +++ b/tmt/tests/tests.fmf @@ -168,3 +168,8 @@ check: summary: Error on bootc switch to image with identical fs-verity digest duration: 10m test: nu booted/test-switch-same-digest.nu + +/test-44-readonly-unified: + summary: Run readonly tests after onboarding to unified storage + duration: 30m + test: nu booted/test-44-readonly-unified.nu