diff --git a/crates/composefs-boot/src/lib.rs b/crates/composefs-boot/src/lib.rs index 40ff0335..40f7734c 100644 --- a/crates/composefs-boot/src/lib.rs +++ b/crates/composefs-boot/src/lib.rs @@ -44,11 +44,15 @@ const REQUIRED_TOPLEVEL_TO_EMPTY_DIRS: &[&str] = &["boot", "sysroot"]; /// Empty the required top-level directories and set their mtime to match /usr. fn empty_toplevel_dirs(fs: &mut FileSystem) -> Result<()> { - let usr_mtime = fs.root.get_directory(OsStr::new("usr"))?.stat.st_mtim_sec; + let usr_mtime = { + let stat = &fs.root.get_directory(OsStr::new("usr"))?.stat; + (stat.st_mtim_sec, stat.st_mtim_nsec) + }; for d in REQUIRED_TOPLEVEL_TO_EMPTY_DIRS { let d = fs.root.get_directory_mut(d.as_ref())?; - d.stat.st_mtim_sec = usr_mtime; + d.stat.st_mtim_sec = usr_mtime.0; + d.stat.st_mtim_nsec = usr_mtime.1; d.clear(); } diff --git a/crates/composefs-boot/src/selabel.rs b/crates/composefs-boot/src/selabel.rs index 04204737..0b3fd111 100644 --- a/crates/composefs-boot/src/selabel.rs +++ b/crates/composefs-boot/src/selabel.rs @@ -508,6 +508,7 @@ mod tests { st_uid: 0, st_gid: 0, st_mtim_sec: 0, + st_mtim_nsec: 0, xattrs: Default::default(), }; @@ -522,6 +523,7 @@ mod tests { st_uid: 0, st_gid: 0, st_mtim_sec: 0, + st_mtim_nsec: 0, xattrs: Default::default(), }, LeafContent::Regular(RegularFile::Inline(data.to_vec().into_boxed_slice())), diff --git a/crates/composefs-fuse/src/lib.rs b/crates/composefs-fuse/src/lib.rs index 269b0458..ef7a0d09 100644 --- a/crates/composefs-fuse/src/lib.rs +++ b/crates/composefs-fuse/src/lib.rs @@ -180,7 +180,8 @@ impl<'a, ObjectID: FsVerityHashValue> InodeRef<'a, ObjectID> { fn fileattr(&self, ino: Ino, nlink_map: &[u32]) -> FileAttr { let stat = self.stat(); - let mtime = SystemTime::UNIX_EPOCH + Duration::from_secs(stat.st_mtim_sec as u64); + let mtime = + SystemTime::UNIX_EPOCH + Duration::new(stat.st_mtim_sec as u64, stat.st_mtim_nsec); FileAttr { ino, diff --git a/crates/composefs-oci/src/image.rs b/crates/composefs-oci/src/image.rs index 14a8ae2f..152a9d29 100644 --- a/crates/composefs-oci/src/image.rs +++ b/crates/composefs-oci/src/image.rs @@ -171,6 +171,7 @@ mod test { st_uid: 0, st_gid: 0, st_mtim_sec: 0, + st_mtim_nsec: 0, xattrs: BTreeMap::new(), }, item: TarItem::Leaf(LeafContent::Regular(RegularFile::Inline([].into()))), @@ -185,6 +186,7 @@ mod test { st_uid: 0, st_gid: 0, st_mtim_sec: 0, + st_mtim_nsec: 0, xattrs: BTreeMap::new(), }, item: TarItem::Directory, diff --git a/crates/composefs-oci/src/tar.rs b/crates/composefs-oci/src/tar.rs index 9d72fb07..4c98e386 100644 --- a/crates/composefs-oci/src/tar.rs +++ b/crates/composefs-oci/src/tar.rs @@ -23,7 +23,7 @@ use anyhow::{Context, Result, bail, ensure}; use bytes::{Bytes, BytesMut}; use rustix::fs::makedev; use tar_core::{ - EntryType, HEADER_SIZE, + EntryType, HEADER_SIZE, PaxExtensions, parse::{ParseEvent, Parser}, }; use tokio::{ @@ -42,6 +42,40 @@ use composefs::{ use crate::ImportStats; +/// Extract sub-second nanoseconds from PAX extension mtime. +/// +/// PAX mtime values have the form `"."` where `` is a +/// decimal fraction of a second with up to 9 significant digits. +/// `tar-core` keeps only the integer part in `ParsedEntry::mtime`; we read +/// the fractional part from the raw PAX bytes ourselves. +/// +/// Returns 0 if there is no PAX mtime, the value has no fractional part, +/// or the value cannot be parsed. +fn pax_mtime_nsec(pax: &[u8]) -> u32 { + for ext in PaxExtensions::new(pax).flatten() { + if ext.key_bytes() == b"mtime" { + let Ok(value) = ext.value() else { return 0 }; + // Split on '.': "1234567890.123456789" → frac = "123456789" + let Some(frac) = value.split_once('.').map(|(_, f)| f) else { + return 0; + }; + // Truncate or pad to exactly 9 digits (nanosecond precision) + let frac = if frac.len() >= 9 { + &frac[..9] + } else { + // fewer than 9 digits: treat as leading digits, e.g. "5" → 500_000_000 + let padding_digits = 9u32.saturating_sub(frac.len() as u32); + return frac + .parse::() + .ok() + .map_or(0, |v| v * 10u32.pow(padding_digits)); + }; + return frac.parse::().unwrap_or(0); + } + } + 0 +} + /// Receive data from channel, write to tmpfile, compute verity, and store object. /// /// This runs in a blocking task to avoid blocking the async runtime. @@ -436,6 +470,7 @@ pub fn get_entry( st_gid: entry.gid as u32, st_mode: entry.mode, st_mtim_sec: entry.mtime as i64, + st_mtim_nsec: entry.pax.map_or(0, pax_mtime_nsec), xattrs, }, item, @@ -522,6 +557,84 @@ mod tests { Ok(entries) } + #[test] + fn test_pax_mtime_nsec_parsing() { + // Standard 9-digit fractional part + // "30 mtime=1234567890.123456789\n": "mtime=1234567890.123456789\n" = 27 bytes, "30 " = 3 → total 30 + let pax = b"30 mtime=1234567890.123456789\n"; + assert_eq!(pax_mtime_nsec(pax), 123_456_789, "9-digit fraction"); + + // Fewer than 9 digits: "5" → 500_000_000 ns + // "mtime=1234567890.5\n" = 19 bytes, "22 " = 3 → total 22 + let pax = b"22 mtime=1234567890.5\n"; + assert_eq!(pax_mtime_nsec(pax), 500_000_000, "1-digit fraction"); + + // Exactly 9 digits (no truncation needed) + // "mtime=1234567890.000000001\n" = 27 bytes, "30 " = 3 → total 30 + let pax = b"30 mtime=1234567890.000000001\n"; + assert_eq!(pax_mtime_nsec(pax), 1, "trailing single non-zero digit"); + + // More than 9 digits (truncate to 9) + // "mtime=1234567890.1234567899\n" = 28 bytes, "31 " = 3 → total 31 + let pax = b"31 mtime=1234567890.1234567899\n"; + assert_eq!( + pax_mtime_nsec(pax), + 123_456_789, + "10-digit fraction truncated" + ); + + // No fractional part + // "mtime=1234567890\n" = 17 bytes, "20 " = 3 → total 20 + let pax = b"20 mtime=1234567890\n"; + assert_eq!(pax_mtime_nsec(pax), 0, "no fractional part"); + + // No mtime key + // "path=foo.txt\n" = 13 bytes, "16 " = 3 → total 16 + let pax = b"16 path=foo.txt\n"; + assert_eq!(pax_mtime_nsec(pax), 0, "no mtime key"); + + // Empty PAX data + assert_eq!(pax_mtime_nsec(b""), 0, "empty pax"); + } + + #[tokio::test] + async fn test_pax_mtime_nsec_on_entry() { + let content = b"test content"; + let mut tar_data = Vec::new(); + { + let mut builder = Builder::new(&mut tar_data); + + let mut pax = tar_core::builder::PaxBuilder::new(); + pax.add("mtime", "1234567890.123456789"); + let pax_data = pax.finish(); + + let mut pax_header = tar::Header::new_ustar(); + pax_header.set_entry_type(tar::EntryType::XHeader); + pax_header.set_mode(0o644); + pax_header.set_size(pax_data.len() as u64); + builder + .append_data(&mut pax_header, "PaxHeader/file.txt", &pax_data[..]) + .unwrap(); + + let mut header = tar::Header::new_ustar(); + header.set_mode(0o644); + header.set_uid(1000); + header.set_gid(1000); + header.set_mtime(1234567890); + header.set_size(content.len() as u64); + header.set_entry_type(tar::EntryType::Regular); + builder + .append_data(&mut header, "file.txt", &content[..]) + .unwrap(); + builder.finish().unwrap(); + } + + let entries = read_all_via_splitstream(tar_data).await.unwrap(); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].stat.st_mtim_sec, 1_234_567_890); + assert_eq!(entries[0].stat.st_mtim_nsec, 123_456_789); + } + #[test] fn test_make_absolute_path() { let cases: &[(&[u8], &str)] = &[ diff --git a/crates/composefs/fuzz/generate_corpus.rs b/crates/composefs/fuzz/generate_corpus.rs index dc179f76..6de2c15f 100644 --- a/crates/composefs/fuzz/generate_corpus.rs +++ b/crates/composefs/fuzz/generate_corpus.rs @@ -27,6 +27,7 @@ fn stat(mode: u32, uid: u32, gid: u32, mtime: i64) -> Stat { st_uid: uid, st_gid: gid, st_mtim_sec: mtime, + st_mtim_nsec: 0, xattrs: BTreeMap::new(), } } diff --git a/crates/composefs/src/dumpfile.rs b/crates/composefs/src/dumpfile.rs index 7143715d..588eed5f 100644 --- a/crates/composefs/src/dumpfile.rs +++ b/crates/composefs/src/dumpfile.rs @@ -114,11 +114,12 @@ fn write_entry( let uid = stat.st_uid; let gid = stat.st_gid; let mtim_sec = stat.st_mtim_sec; + let mtim_nsec = stat.st_mtim_nsec; write_escaped(writer, path.as_os_str().as_bytes())?; write!( writer, - " {size} {mode:o} {nlink} {uid} {gid} {rdev} {mtim_sec}.0 " + " {size} {mode:o} {nlink} {uid} {gid} {rdev} {mtim_sec}.{mtim_nsec} " )?; write_escaped(writer, payload.as_ref().as_bytes())?; write!(writer, " ")?; @@ -540,6 +541,7 @@ fn entry_to_stat(entry: &Entry<'_>) -> Stat { st_uid: entry.uid, st_gid: entry.gid, st_mtim_sec: entry.mtime.sec as i64, + st_mtim_nsec: entry.mtime.nsec as u32, xattrs, } } @@ -724,6 +726,7 @@ mod tests { st_uid: 0, st_gid: 0, st_mtim_sec: 0, + st_mtim_nsec: 0, xattrs: BTreeMap::new(), }); let leaf_id = fs.push_leaf( @@ -732,6 +735,7 @@ mod tests { st_uid: 0, st_gid: 0, st_mtim_sec: 0, + st_mtim_nsec: 0, xattrs, }, LeafContent::Regular(RegularFile::Inline(b"test".to_vec().into())), @@ -757,6 +761,7 @@ mod tests { st_uid: 0, st_gid: 0, st_mtim_sec: 0, + st_mtim_nsec: 0, xattrs: BTreeMap::new(), }; diff --git a/crates/composefs/src/dumpfile_parse.rs b/crates/composefs/src/dumpfile_parse.rs index f8cccdd8..46898436 100644 --- a/crates/composefs/src/dumpfile_parse.rs +++ b/crates/composefs/src/dumpfile_parse.rs @@ -317,9 +317,14 @@ impl FromStr for Mtime { let (sec, nsec) = s .split_once('.') .ok_or_else(|| anyhow!("Missing . in mtime"))?; + let nsec = u32::from_str(nsec)?; + anyhow::ensure!( + nsec < 1_000_000_000, + "Invalid mtime nanoseconds: {nsec} (must be < 1_000_000_000)" + ); Ok(Self { sec: u64::from_str(sec)?, - nsec: u64::from_str(nsec)?, + nsec: u64::from(nsec), }) } } diff --git a/crates/composefs/src/erofs/reader.rs b/crates/composefs/src/erofs/reader.rs index 06f09932..98793adb 100644 --- a/crates/composefs/src/erofs/reader.rs +++ b/crates/composefs/src/erofs/reader.rs @@ -1078,7 +1078,7 @@ fn construct_xattr_name(xattr: &XAttr) -> Result, ErofsReaderError> { /// - Strips `trusted.overlay.metacopy` and `trusted.overlay.redirect` /// - Unescapes `trusted.overlay.overlay.X` back to `trusted.overlay.X` fn stat_from_inode_for_tree(img: &Image, inode: &InodeType) -> anyhow::Result { - let (st_mode, st_uid, st_gid, st_mtim_sec) = match inode { + let (st_mode, st_uid, st_gid, st_mtim_sec, st_mtim_nsec) = match inode { InodeType::Compact(inode) => ( inode.header.mode.0.get() as u32 & 0o7777, inode.header.uid.get() as u32, @@ -1087,12 +1087,14 @@ fn stat_from_inode_for_tree(img: &Image, inode: &InodeType) -> anyhow::Result ( inode.header.mode.0.get() as u32 & 0o7777, inode.header.uid.get(), inode.header.gid.get(), inode.header.mtime.get() as i64, + inode.header.mtime_nsec.get(), ), }; @@ -1120,6 +1122,7 @@ fn stat_from_inode_for_tree(img: &Image, inode: &InodeType) -> anyhow::Result Inode<'_, ObjectID> { uid: self.stat.st_uid.into(), gid: self.stat.st_gid.into(), mtime: (self.stat.st_mtim_sec as u64).into(), + mtime_nsec: self.stat.st_mtim_nsec.into(), nlink: (nlink as u32).into(), ..Default::default() }); diff --git a/crates/composefs/src/fs.rs b/crates/composefs/src/fs.rs index 76ac2f3a..21abb1dc 100644 --- a/crates/composefs/src/fs.rs +++ b/crates/composefs/src/fs.rs @@ -216,6 +216,7 @@ fn stat_fd(fd: &OwnedFd, ifmt: FileType) -> Result<(rustix::fs::Stat, generic_tr st_uid: buf.st_uid, st_gid: buf.st_gid, st_mtim_sec: buf.st_mtime as i64, + st_mtim_nsec: buf.st_mtime_nsec as u32, xattrs: read_xattrs(fd)?, }, )) @@ -689,6 +690,7 @@ mod tests { st_uid: 0, st_gid: 0, st_mtim_sec: Default::default(), + st_mtim_nsec: 0, xattrs: Default::default(), }; set_file_contents(&td, OsStr::new("testfile"), &st, b"new contents").unwrap(); diff --git a/crates/composefs/src/generic_tree.rs b/crates/composefs/src/generic_tree.rs index 1f296d50..5371ca28 100644 --- a/crates/composefs/src/generic_tree.rs +++ b/crates/composefs/src/generic_tree.rs @@ -21,6 +21,8 @@ pub struct Stat { pub st_gid: u32, /// Modification time in seconds since Unix epoch. pub st_mtim_sec: i64, + /// Nanosecond component of the modification time. + pub st_mtim_nsec: u32, /// Extended attributes as key-value pairs. pub xattrs: BTreeMap, Box<[u8]>>, } @@ -46,6 +48,7 @@ impl Stat { st_uid: 0, st_gid: 0, st_mtim_sec: 0, + st_mtim_nsec: 0, xattrs: BTreeMap::new(), } } @@ -631,6 +634,7 @@ impl FileSystem { let st_uid = usr.stat.st_uid; let st_gid = usr.stat.st_gid; let st_mtim_sec = usr.stat.st_mtim_sec; + let st_mtim_nsec = usr.stat.st_mtim_nsec; let xattrs = usr.stat.xattrs.clone(); // Apply copied metadata to root @@ -638,6 +642,7 @@ impl FileSystem { self.root.stat.st_uid = st_uid; self.root.stat.st_gid = st_gid; self.root.stat.st_mtim_sec = st_mtim_sec; + self.root.stat.st_mtim_nsec = st_mtim_nsec; self.root.stat.xattrs = xattrs; Ok(()) @@ -722,9 +727,10 @@ impl FileSystem { /// Returns an error if `/usr` does not exist (needed to get the mtime). pub fn canonicalize_run(&mut self) -> Result<(), ImageError> { if self.root.get_directory_opt(OsStr::new("run"))?.is_some() { - let usr_mtime = self.root.get_directory(OsStr::new("usr"))?.stat.st_mtim_sec; + let usr = self.root.get_directory(OsStr::new("usr"))?.stat.clone(); let run_dir = self.root.get_directory_mut(OsStr::new("run"))?; - run_dir.stat.st_mtim_sec = usr_mtime; + run_dir.stat.st_mtim_sec = usr.st_mtim_sec; + run_dir.stat.st_mtim_nsec = usr.st_mtim_nsec; run_dir.clear(); } Ok(()) @@ -1001,6 +1007,7 @@ mod tests { st_uid: 0, st_gid: 0, st_mtim_sec: 0, + st_mtim_nsec: 0, xattrs: BTreeMap::new(), } } @@ -1012,6 +1019,7 @@ mod tests { st_uid: 1000, st_gid: 1000, st_mtim_sec: mtime, + st_mtim_nsec: 0, xattrs: BTreeMap::new(), } } @@ -1313,6 +1321,7 @@ mod tests { st_uid: 42, st_gid: 43, st_mtim_sec: 1234567890, + st_mtim_nsec: 0, xattrs: BTreeMap::from([( Box::from(OsStr::new("security.selinux")), Box::from(b"system_u:object_r:usr_t:s0".as_slice()), @@ -1358,6 +1367,7 @@ mod tests { st_uid: 0, st_gid: 0, st_mtim_sec: 0, + st_mtim_nsec: 0, xattrs: BTreeMap::from([ ( Box::from(OsStr::new("security.selinux")), @@ -1610,6 +1620,7 @@ mod tests { st_uid: 100, st_gid: 200, st_mtim_sec: 54321, + st_mtim_nsec: 0, xattrs: BTreeMap::from([( Box::from(OsStr::new("user.test")), Box::from(b"val".as_slice()), diff --git a/crates/composefs/src/repository.rs b/crates/composefs/src/repository.rs index 4dcaf381..f6df47d3 100644 --- a/crates/composefs/src/repository.rs +++ b/crates/composefs/src/repository.rs @@ -3905,6 +3905,7 @@ mod tests { st_uid: 0, st_gid: 0, st_mtim_sec: 0, + st_mtim_nsec: 0, xattrs: Default::default(), } } @@ -3918,6 +3919,7 @@ mod tests { st_uid: 0, st_gid: 0, st_mtim_sec: 0, + st_mtim_nsec: 0, xattrs: Default::default(), }, LeafContent::Regular(RegularFile::External(obj.clone(), size)), @@ -4080,6 +4082,7 @@ mod tests { st_uid: 0, st_gid: 0, st_mtim_sec: 0, + st_mtim_nsec: 0, xattrs: Default::default(), }, LeafContent::Regular(RegularFile::External(obj2.clone(), size2)), diff --git a/crates/composefs/src/test.rs b/crates/composefs/src/test.rs index b5674e9c..4c7dda5e 100644 --- a/crates/composefs/src/test.rs +++ b/crates/composefs/src/test.rs @@ -172,6 +172,7 @@ pub(crate) mod proptest_strategies { st_uid: uid, st_gid: gid, st_mtim_sec: mtime, + st_mtim_nsec: 0, xattrs, }) } diff --git a/crates/composefs/src/tree.rs b/crates/composefs/src/tree.rs index dd8865d4..ddfc61bf 100644 --- a/crates/composefs/src/tree.rs +++ b/crates/composefs/src/tree.rs @@ -57,6 +57,7 @@ mod tests { st_uid: 1000, st_gid: 1000, st_mtim_sec: mtime, + st_mtim_nsec: 0, xattrs: BTreeMap::new(), } } @@ -76,6 +77,7 @@ mod tests { st_uid: 0, st_gid: 0, st_mtim_sec: 0, + st_mtim_nsec: 0, xattrs: BTreeMap::new(), } } diff --git a/crates/composefs/tests/mkfs.rs b/crates/composefs/tests/mkfs.rs index b2896c69..204869f6 100644 --- a/crates/composefs/tests/mkfs.rs +++ b/crates/composefs/tests/mkfs.rs @@ -23,6 +23,7 @@ fn default_stat() -> Stat { st_uid: 0, st_gid: 0, st_mtim_sec: 0, + st_mtim_nsec: 0, xattrs: BTreeMap::new(), } } @@ -54,6 +55,7 @@ fn add_leaf( st_uid: 0, st_mode: 0, st_mtim_sec: 0, + st_mtim_nsec: 0, xattrs: BTreeMap::new(), }, content, diff --git a/doc/oci.md b/doc/oci.md index d1f850f4..e7ea7dac 100644 --- a/doc/oci.md +++ b/doc/oci.md @@ -9,11 +9,9 @@ We currently create a composefs image using the granularity of data as typically appears in OCI tarballs: - atime and ctime are not present (these are actually not physically present in the erofs inode structure at all, either the compact or extended forms) - - mtime is set to the mtime in seconds; the sub-seconds value is simply - truncated (ie: we always round down). erofs has an nsec field, but it's not - normally present in OCI tarballs. That's down to the fact that the usual - tar header only has timestamps in seconds and extended headers are not - usually added for this purpose. + - mtime is set to the mtime from the tar metadata. The usual tar header only + stores timestamps in seconds; when PAX extended headers include a fractional + `mtime`, we preserve that value in erofs' nsec field. - we take great care to faithfully represent hardlinks: even though the produced filesystem is read-only and we have data de-duplication via the objects store, we make sure that hardlinks result in an actual shared inode