diff --git a/Cargo.lock b/Cargo.lock index e76517d504..fa7e480d14 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3072,13 +3072,13 @@ name = "libdd-library-config" version = "1.1.0" dependencies = [ "anyhow", + "libc", "libdd-trace-protobuf", "memfd", "prost", "rand 0.8.5", "rmp", "rmp-serde", - "rustix 1.1.3", "serde", "serde_yaml", "serial_test", diff --git a/libdd-library-config/Cargo.toml b/libdd-library-config/Cargo.toml index fed2bfd7c2..0d379c7645 100644 --- a/libdd-library-config/Cargo.toml +++ b/libdd-library-config/Cargo.toml @@ -35,4 +35,4 @@ serial_test = "3.2" [target.'cfg(unix)'.dependencies] memfd = { version = "0.6" } -rustix = { version = "1.1.3", features = ["param", "mm", "process", "fs", "time"] } +libc = "0.2" diff --git a/libdd-library-config/src/otel_process_ctx.rs b/libdd-library-config/src/otel_process_ctx.rs index b3efad4aab..3f8ba1cd43 100644 --- a/libdd-library-config/src/otel_process_ctx.rs +++ b/libdd-library-config/src/otel_process_ctx.rs @@ -15,9 +15,9 @@ #[cfg(target_has_atomic = "64")] pub mod linux { use std::{ - ffi::c_void, + ffi::{c_void, CStr}, mem::ManuallyDrop, - os::fd::AsFd as _, + os::fd::{AsRawFd, FromRawFd, OwnedFd}, ptr::{self, addr_of_mut}, sync::{ atomic::{fence, AtomicU64, Ordering}, @@ -28,13 +28,6 @@ pub mod linux { use anyhow::Context; - use rustix::{ - fs::{ftruncate, memfd_create, MemfdFlags}, - mm::{madvise, mmap, mmap_anonymous, munmap, Advice, MapFlags, ProtFlags}, - process::{getpid, set_virtual_memory_region_name, Pid}, - time::{clock_gettime, ClockId}, - }; - use libdd_trace_protobuf::opentelemetry::proto::common::v1::ProcessContext; use prost::Message; @@ -43,7 +36,7 @@ pub mod linux { /// Signature bytes for identifying process context mappings pub const SIGNATURE: &[u8; 8] = b"OTEL_CTX"; /// The discoverable name of the memory mapping. - pub const MAPPING_NAME: &str = "OTEL_CTX"; + pub const MAPPING_NAME: &CStr = c"OTEL_CTX"; /// The header structure written at the start of the mapping. This must match the C /// layout of the specification. @@ -101,47 +94,53 @@ pub mod linux { fn new() -> anyhow::Result { let size = mapping_size(); - memfd_create( - MAPPING_NAME, - MemfdFlags::CLOEXEC | MemfdFlags::NOEXEC_SEAL | MemfdFlags::ALLOW_SEALING, - ) - .or_else(|_| memfd_create(MAPPING_NAME, MemfdFlags::CLOEXEC | MemfdFlags::ALLOW_SEALING)) - .and_then(|fd| { - ftruncate(fd.as_fd(), mapping_size() as u64)?; - // Safety: we pass a null pointer to mmap which is unconditionally ok - let start_addr = unsafe { - mmap( - ptr::null_mut(), - size, - ProtFlags::WRITE | ProtFlags::READ, - MapFlags::PRIVATE, - fd.as_fd(), - 0, - )? - }; - - // We (implicitly) close the file descriptor right away, but this ok - Ok(MemMapping { - start_addr, + try_memfd(MAPPING_NAME, libc::MFD_CLOEXEC | libc::MFD_NOEXEC_SEAL | libc::MFD_ALLOW_SEALING) + .or_else(|_| try_memfd(MAPPING_NAME, libc::MFD_CLOEXEC | libc::MFD_ALLOW_SEALING)) + .and_then(|fd| { + // Safety: fd is a valid open file descriptor. + check_syscall_retval( + unsafe { + libc::ftruncate(fd.as_raw_fd(), mapping_size() as libc::off_t) + }, + "ftruncate failed" + )?; + // Safety: we pass a null pointer to mmap which is unconditionally ok + let start_addr = check_mapping_addr( + unsafe { + libc::mmap( + ptr::null_mut(), + size, + libc::PROT_WRITE | libc::PROT_READ, + libc::MAP_PRIVATE, + fd.as_raw_fd(), + 0, + ) + }, + "mmap failed" + )?; + + // We (implicitly) close the file descriptor right away, but this ok + Ok(MemMapping { start_addr }) + }) + // If any previous step failed, we fallback to an anonymous mapping + .or_else(|_| { + // Safety: we pass a null pointer to mmap, no precondition to uphold + let start_addr = check_mapping_addr( + unsafe { + libc::mmap( + ptr::null_mut(), + size, + libc::PROT_WRITE | libc::PROT_READ, + libc::MAP_PRIVATE | libc::MAP_ANONYMOUS, + -1, + 0, + ) + }, + "mmap failed: couldn't create a memfd or anonymous mmapped region for process context publication" + )?; + + Ok(MemMapping { start_addr }) }) - }) - // If any previous step failed, we fallback to an anonymous mapping - .or_else(|_| { - // Safety: we pass a null pointer to mmap, no precondition to uphold - let start_addr = unsafe { - mmap_anonymous( - ptr::null_mut(), - size, - ProtFlags::WRITE | ProtFlags::READ, - MapFlags::PRIVATE, - ) - .context( - "Couldn't create a memfd or anonymous mmapped region for process context publication", - )? - }; - - Ok(MemMapping { start_addr }) - }) } /// Makes this mapping discoverable by giving it a name. @@ -151,16 +150,21 @@ pub mod linux { /// tried, as per the /// [spec](https://github.com/open-telemetry/opentelemetry-specification/pull/4719). fn set_name(&mut self) -> anyhow::Result<()> { - // Safety: the invariants of `MemMapping` ensures that `start` is non null and comes - // from a previous call to `mmap` of size `mapping_size()` - set_virtual_memory_region_name( - unsafe { std::slice::from_raw_parts(self.start_addr as *const u8, mapping_size()) }, - Some( - std::ffi::CString::new(MAPPING_NAME) - .context("unexpected null byte in process context mapping name")? - .as_c_str(), - ), + // Safety: self.start_addr is valid for mapping_size() bytes as per MemMapping + // invariants. name is a valid NUL-terminated string that outlives the prctl call. + check_syscall_retval( + unsafe { + libc::prctl( + libc::PR_SET_VMA, + libc::PR_SET_VMA_ANON_NAME as libc::c_ulong, + self.start_addr as libc::c_ulong, + mapping_size() as libc::c_ulong, + MAPPING_NAME.as_ptr() as libc::c_ulong, + ) + }, + "prctl PR_SET_VMA_ANON_NAME failed", )?; + Ok(()) } @@ -189,13 +193,13 @@ pub mod linux { /// Practically, `self` must be put in a `ManuallyDrop` wrapper and forgotten, or being in /// the process of being dropped. unsafe fn unmap(&mut self) -> anyhow::Result<()> { - unsafe { - munmap(self.start_addr, mapping_size()).map_err(|errno| { - anyhow::anyhow!( - "munmap failed when freeing the process context with error {errno}" - ) - }) - } + check_syscall_retval( + // Safety: upheld by the caller. + unsafe { libc::munmap(self.start_addr, mapping_size()) }, + "munmap failed when freeing the process context", + )?; + + Ok(()) } } @@ -216,7 +220,7 @@ pub mod linux { payload: Vec, /// The process id of the last publisher. This is useful to detect forks(), and publish a /// new context accordingly. - pid: Pid, + pid: libc::pid_t, } impl ProcessContextHandle { @@ -225,10 +229,12 @@ pub mod linux { let mut mapping = MemMapping::new()?; let size = mapping_size(); - // Safety: the invariants of MemMapping ensures `start_addr` is not null and comes - // from a previous call to `mmap` - unsafe { madvise(mapping.start_addr, size, Advice::LinuxDontFork) } - .context("madvise MADVISE_DONTFORK failed")?; + check_syscall_retval( + // Safety: the invariants of MemMapping ensures `start_addr` is not null and comes + // from a previous call to `mmap` + unsafe { libc::madvise(mapping.start_addr, size, libc::MADV_DONTFORK) }, + "madvise MADVISE_DONTFORK failed", + )?; let published_at_ns = since_boottime_ns().ok_or_else(|| { anyhow::anyhow!("failed to get current time for process context publication") @@ -270,7 +276,8 @@ pub mod linux { Ok(ProcessContextHandle { mapping, payload, - pid: getpid(), + // Safety: getpid() is always safe to call. + pid: unsafe { libc::getpid() }, }) } @@ -322,10 +329,42 @@ pub mod linux { } } - // Whether this size depends on the page size or not in the future, Rustix's `page_size()` - // caches the value in a static atomic, so it's ok to call `mapping_size()` repeatedly; it - // won't result in a syscall each time. - // + /// Returns `Err` wrapping the current `errno` with `msg` as context if `addr` equals + /// `MAP_FAILED`, `Ok(addr)` otherwise. + fn check_mapping_addr(addr: *mut c_void, msg: &'static str) -> anyhow::Result<*mut c_void> { + if addr == libc::MAP_FAILED { + Err(std::io::Error::last_os_error()).context(msg) + } else { + Ok(addr) + } + } + + /// Returns `Err` wrapping the current `errno` with `msg` as context if `ret` is negative, + /// `Ok(ret)` otherwise. + fn check_syscall_retval(ret: libc::c_int, msg: &'static str) -> anyhow::Result { + if ret < 0 { + Err(std::io::Error::last_os_error()).context(msg) + } else { + Ok(ret) + } + } + + /// Creates a `memfd` file descriptor with the given name and flags. + fn try_memfd(name: &CStr, flags: libc::c_uint) -> anyhow::Result { + // We use the raw syscall rather than `libc::memfd_create` because the latter requires + // glibc >= 2.27, while `syscall()` + `SYS_memfd_create` works with any glibc version. + check_syscall_retval( + // Safety: name is a valid NUL-terminated string; flags are constant bit flags. + unsafe { + libc::syscall(libc::SYS_memfd_create, name.as_ptr(), flags as libc::c_long) + as libc::c_int + }, + "memfd_create failed", + ) + // Safety: fd is a valid file descriptor just returned by memfd_create. + .map(|fd| unsafe { OwnedFd::from_raw_fd(fd) }) + } + // The returned size is guaranteed to be larger or equal to the size of `MappingHeader`. fn mapping_size() -> usize { size_of::() @@ -333,7 +372,18 @@ pub mod linux { /// Returns the value of the monotonic BOOTTIME clock in nanoseconds. fn since_boottime_ns() -> Option { - let duration = Duration::try_from(clock_gettime(ClockId::Boottime)).ok()?; + let mut ts = libc::timespec { + tv_sec: 0, + tv_nsec: 0, + }; + // Safety: ts is a valid, writable timespec. + let ret = unsafe { libc::clock_gettime(libc::CLOCK_BOOTTIME, &mut ts) }; + if ret != 0 { + return None; + } + let secs: u64 = ts.tv_sec.try_into().ok()?; + let nanos: u32 = ts.tv_nsec.try_into().ok()?; + let duration = Duration::new(secs, nanos); u64::try_from(duration.as_nanos()).ok() } @@ -374,8 +424,9 @@ pub mod linux { fn publish_raw_payload(payload: Vec) -> anyhow::Result<()> { let mut guard = lock_context_handle()?; + // Safety: getpid() is always safe to call. match &mut *guard { - Some(handler) if handler.pid == getpid() => handler.update(payload), + Some(handler) if handler.pid == unsafe { libc::getpid() } => handler.update(payload), Some(handler) => { let mut local_handler = ProcessContextHandle::publish(payload)?; // If we've been forked, we need to prevent the mapping from being dropped