From 2dba48e0ec0cdcea3cacae2fcb0bb6445309718f Mon Sep 17 00:00:00 2001 From: Mauro Ezequiel Moltrasio Date: Wed, 3 Dec 2025 15:15:00 +0100 Subject: [PATCH 1/5] ROX-30437: basic inode tracking for host path resolution This is a very basic implementation of inode tracking meant to be used in the upcoming release and improved upon in the near future. The current implementation will perform a scan of the paths that are configured to be monitored, using the inode and device numbers as a key to two maps: - A BPF hash map for kernelspace to know when an inode triggers an event. - A HashMap in userspace that maps the inode to a path that we found the inode at. With these two maps we are able to confidently emit events for files that are mounted into containers with paths that don't necessarily match the prefixes configured for monitoring and, at a later stage in userspace, add the path on the host to the event itself. The implemented approach is far from complete, it will only work as long as the files found during the initial scan are not moved, deleted or replaced by a different file. Future patches will extend the functionality of both kernel and userspace to be able to catch more corner cases. --- Cargo.lock | 1 + fact-ebpf/Cargo.toml | 1 + fact-ebpf/src/bpf/builtins.h | 7 -- fact-ebpf/src/bpf/events.h | 20 +++-- fact-ebpf/src/bpf/file.h | 47 ------------ fact-ebpf/src/bpf/inode.h | 97 ++++++++++++++++++++++++ fact-ebpf/src/bpf/kdev.h | 22 ++++++ fact-ebpf/src/bpf/main.c | 40 +++++++--- fact-ebpf/src/bpf/maps.h | 7 ++ fact-ebpf/src/bpf/process.h | 8 +- fact-ebpf/src/bpf/types.h | 9 ++- fact-ebpf/src/lib.rs | 32 +++++++- fact/src/bpf/mod.rs | 35 +++++---- fact/src/event/mod.rs | 35 ++++++--- fact/src/host_info.rs | 21 ++++- fact/src/host_scanner.rs | 143 +++++++++++++++++++++++++++++++++++ fact/src/lib.rs | 22 +++++- 17 files changed, 441 insertions(+), 106 deletions(-) delete mode 100644 fact-ebpf/src/bpf/builtins.h create mode 100644 fact-ebpf/src/bpf/inode.h create mode 100644 fact-ebpf/src/bpf/kdev.h create mode 100644 fact/src/host_scanner.rs diff --git a/Cargo.lock b/Cargo.lock index f2f51b2b..3a79bb7e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -466,6 +466,7 @@ dependencies = [ "aya", "bindgen", "libc", + "serde", ] [[package]] diff --git a/fact-ebpf/Cargo.toml b/fact-ebpf/Cargo.toml index 618f2338..c605048c 100644 --- a/fact-ebpf/Cargo.toml +++ b/fact-ebpf/Cargo.toml @@ -10,6 +10,7 @@ license.workspace = true [dependencies] aya = { workspace = true } libc = { workspace = true } +serde = { workspace = true } [build-dependencies] anyhow = { workspace = true } diff --git a/fact-ebpf/src/bpf/builtins.h b/fact-ebpf/src/bpf/builtins.h deleted file mode 100644 index e35fae6d..00000000 --- a/fact-ebpf/src/bpf/builtins.h +++ /dev/null @@ -1,7 +0,0 @@ -#pragma once - -// clang-format off -#ifndef memcpy -#define memcpy __builtin_memcpy -#endif -// clang-format on diff --git a/fact-ebpf/src/bpf/events.h b/fact-ebpf/src/bpf/events.h index adadbd46..ab027bb4 100644 --- a/fact-ebpf/src/bpf/events.h +++ b/fact-ebpf/src/bpf/events.h @@ -1,13 +1,21 @@ #pragma once -#include +// clang-format off +#include "vmlinux.h" +#include "inode.h" #include "maps.h" #include "process.h" #include "types.h" -#include "vmlinux.h" -__always_inline static void submit_event(struct metrics_by_hook_t* m, file_activity_type_t event_type, const char filename[PATH_MAX], struct dentry* dentry, bool use_bpf_d_path) { +#include +// clang-format on + +__always_inline static void submit_event(struct metrics_by_hook_t* m, + file_activity_type_t event_type, + const char filename[PATH_MAX], + inode_key_t* inode, + bool use_bpf_d_path) { struct event_t* event = bpf_ringbuf_reserve(&rb, sizeof(struct event_t), 0); if (event == NULL) { m->ringbuffer_full++; @@ -16,6 +24,7 @@ __always_inline static void submit_event(struct metrics_by_hook_t* m, file_activ event->type = event_type; event->timestamp = bpf_ktime_get_boot_ns(); + inode_copy_or_reset(&event->inode, inode); bpf_probe_read_str(event->filename, PATH_MAX, filename); struct helper_t* helper = get_helper(); @@ -23,11 +32,6 @@ __always_inline static void submit_event(struct metrics_by_hook_t* m, file_activ goto error; } - const char* p = get_host_path(helper->buf, dentry); - if (p != NULL) { - bpf_probe_read_str(event->host_file, PATH_MAX, p); - } - int64_t err = process_fill(&event->process, use_bpf_d_path); if (err) { bpf_printk("Failed to fill process information: %d", err); diff --git a/fact-ebpf/src/bpf/file.h b/fact-ebpf/src/bpf/file.h index 5edbbc36..dd0639de 100644 --- a/fact-ebpf/src/bpf/file.h +++ b/fact-ebpf/src/bpf/file.h @@ -3,9 +3,7 @@ // clang-format off #include "vmlinux.h" -#include "bound_path.h" #include "builtins.h" -#include "d_path.h" #include "types.h" #include "maps.h" @@ -13,51 +11,6 @@ #include // clang-format on -__always_inline static char* get_host_path(char buf[PATH_MAX * 2], struct dentry* d) { - int offset = PATH_MAX - 1; - buf[PATH_MAX - 1] = '\0'; - - for (int i = 0; i < 16 && offset > 0; i++) { - struct qstr d_name; - BPF_CORE_READ_INTO(&d_name, d, d_name); - if (d_name.name == NULL) { - break; - } - - int len = d_name.len; - if (len <= 0 || len >= PATH_MAX) { - return NULL; - } - - offset -= len; - if (offset <= 0) { - return NULL; - } - - if (bpf_probe_read_kernel(&buf[offset], len, d_name.name) != 0) { - return NULL; - } - - if (len == 1 && buf[offset] == '/') { - // Reached the root - offset++; - break; - } - - offset--; - buf[offset] = '/'; - - struct dentry* parent = BPF_CORE_READ(d, d_parent); - // if we reached the root - if (parent == NULL || d == parent) { - break; - } - d = parent; - } - - return &buf[offset]; -} - __always_inline static bool is_monitored(struct bound_path_t* path) { if (!filter_by_prefix()) { // no path configured, allow all diff --git a/fact-ebpf/src/bpf/inode.h b/fact-ebpf/src/bpf/inode.h new file mode 100644 index 00000000..58ac5696 --- /dev/null +++ b/fact-ebpf/src/bpf/inode.h @@ -0,0 +1,97 @@ +#pragma once + +// clang-format off +#include "vmlinux.h" + +#include "kdev.h" +#include "types.h" +#include "maps.h" + +#include +#include +// clang-format on + +#define BTRFS_SUPER_MAGIC 0x9123683E + +/** + * Retrieve the inode and device numbers and return them as a new key. + * + * Different filesystems may `stat` files in different ways, if support + * for a new filesystem is needed, add it here. + * + * Most Linux filesystems use the following generic function to fill + * these fields when running `stat`: + * https://github.com/torvalds/linux/blob/7d0a66e4bb9081d75c82ec4957c50034cb0ea449/fs/stat.c#L82 + * + * The method used to retrieve the device is different in btrfs and can + * be found here: + * https://github.com/torvalds/linux/blob/7d0a66e4bb9081d75c82ec4957c50034cb0ea449/fs/btrfs/inode.c#L8038 + */ +__always_inline static inode_key_t inode_to_key(struct inode* inode) { + inode_key_t key = {0}; + if (inode == NULL) { + return key; + } + + unsigned long magic = inode->i_sb->s_magic; + switch (magic) { + case BTRFS_SUPER_MAGIC: + if (bpf_core_type_exists(struct btrfs_inode)) { + struct btrfs_inode* btrfs_inode = container_of(inode, struct btrfs_inode, vfs_inode); + key.inode = inode->i_ino; + key.dev = BPF_CORE_READ(btrfs_inode, root, anon_dev); + break; + } + // If the btrfs_inode does not exist, most likely it is not + // supported on the system. Fallback to the generic implementation + // just in case. + default: + key.inode = inode->i_ino; + key.dev = inode->i_sb->s_dev; + break; + } + + // Encode the device so it matches with the result of `stat` in + // userspace + key.dev = new_encode_dev(key.dev); + + return key; +} + +__always_inline static inode_value_t* inode_get(struct inode_key_t* inode) { + if (inode == NULL) { + return NULL; + } + return bpf_map_lookup_elem(&inode_map, inode); +} + +__always_inline static long inode_remove(struct inode_key_t* inode) { + return bpf_map_delete_elem(&inode_map, inode); +} + +typedef enum inode_monitored_t { + NOT_MONITORED = 0, + MONITORED, +} inode_monitored_t; + +__always_inline static inode_monitored_t inode_is_monitored(const inode_value_t* inode) { + if (inode != NULL) { + return MONITORED; + } + + return NOT_MONITORED; +} + +__always_inline static void inode_copy_or_reset(inode_key_t* dst, const inode_key_t* src) { + if (dst == NULL) { + return; + } + + if (src != NULL) { + dst->inode = src->inode; + dst->dev = src->dev; + } else { + dst->inode = 0; + dst->dev = 0; + } +} diff --git a/fact-ebpf/src/bpf/kdev.h b/fact-ebpf/src/bpf/kdev.h new file mode 100644 index 00000000..30c9acc5 --- /dev/null +++ b/fact-ebpf/src/bpf/kdev.h @@ -0,0 +1,22 @@ +#pragma once + +// clang-format off +#include "vmlinux.h" + +#include +// clang-format on + +// Most of the code in this file is taken from: +// https://github.com/torvalds/linux/blob/559e608c46553c107dbba19dae0854af7b219400/include/linux/kdev_t.h + +#define MINORBITS 20 +#define MINORMASK ((1U << MINORBITS) - 1) + +#define MAJOR(dev) ((unsigned int)((dev) >> MINORBITS)) +#define MINOR(dev) ((unsigned int)((dev) & MINORMASK)) + +__always_inline static u32 new_encode_dev(dev_t dev) { + unsigned major = MAJOR(dev); + unsigned minor = MINOR(dev); + return (minor & 0xff) | (major << 8) | ((minor & ~0xff) << 12); +} diff --git a/fact-ebpf/src/bpf/main.c b/fact-ebpf/src/bpf/main.c index 659814bf..19b6ea49 100644 --- a/fact-ebpf/src/bpf/main.c +++ b/fact-ebpf/src/bpf/main.c @@ -3,7 +3,7 @@ #include "file.h" #include "types.h" -#include "process.h" +#include "inode.h" #include "maps.h" #include "events.h" #include "bound_path.h" @@ -44,12 +44,19 @@ int BPF_PROG(trace_file_open, struct file* file) { return 0; } - if (!is_monitored(path)) { - goto ignored; + inode_key_t inode_key = inode_to_key(file->f_inode); + const inode_value_t* inode = inode_get(&inode_key); + switch (inode_is_monitored(inode)) { + case NOT_MONITORED: + if (!is_monitored(path)) { + goto ignored; + } + break; + case MONITORED: + break; } - struct dentry* d = BPF_CORE_READ(file, f_path.dentry); - submit_event(&m->file_open, event_type, path->path, d, true); + submit_event(&m->file_open, event_type, path->path, &inode_key, true); return 0; @@ -91,12 +98,27 @@ int BPF_PROG(trace_path_unlink, struct path* dir, struct dentry* dentry) { goto error; } - if (!is_monitored(path)) { - m->path_unlink.ignored++; - return 0; + inode_key_t inode_key = inode_to_key(dentry->d_inode); + const inode_value_t* inode = inode_get(&inode_key); + + switch (inode_is_monitored(inode)) { + case NOT_MONITORED: + if (!is_monitored(path)) { + m->path_unlink.ignored++; + return 0; + } + break; + + case MONITORED: + inode_remove(&inode_key); + break; } - submit_event(&m->path_unlink, FILE_ACTIVITY_UNLINK, path->path, dentry, path_unlink_supports_bpf_d_path); + submit_event(&m->path_unlink, + FILE_ACTIVITY_UNLINK, + path->path, + &inode_key, + path_unlink_supports_bpf_d_path); return 0; error: diff --git a/fact-ebpf/src/bpf/maps.h b/fact-ebpf/src/bpf/maps.h index 0e9ae4c5..1b0402e2 100644 --- a/fact-ebpf/src/bpf/maps.h +++ b/fact-ebpf/src/bpf/maps.h @@ -92,6 +92,13 @@ struct { __uint(max_entries, 8 * 1024 * 1024); } rb SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, inode_key_t); + __type(value, inode_value_t); + __uint(max_entries, 1024); +} inode_map SEC(".maps"); + struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __type(key, __u32); diff --git a/fact-ebpf/src/bpf/process.h b/fact-ebpf/src/bpf/process.h index 910f4825..5061ee38 100644 --- a/fact-ebpf/src/bpf/process.h +++ b/fact-ebpf/src/bpf/process.h @@ -1,12 +1,12 @@ #pragma once -#include "file.h" -#include "maps.h" -#include "types.h" - // clang-format off #include "vmlinux.h" +#include "d_path.h" +#include "maps.h" +#include "types.h" + #include #include // clang-format on diff --git a/fact-ebpf/src/bpf/types.h b/fact-ebpf/src/bpf/types.h index f32ade10..e280cf5a 100644 --- a/fact-ebpf/src/bpf/types.h +++ b/fact-ebpf/src/bpf/types.h @@ -32,6 +32,13 @@ typedef struct process_t { char in_root_mount_ns; } process_t; +typedef struct inode_key_t { + unsigned long long inode; + unsigned long long dev; +} inode_key_t; + +typedef char inode_value_t; + typedef enum file_activity_type_t { FILE_ACTIVITY_INIT = -1, FILE_ACTIVITY_OPEN = 0, @@ -43,7 +50,7 @@ struct event_t { unsigned long timestamp; process_t process; char filename[PATH_MAX]; - char host_file[PATH_MAX]; + inode_key_t inode; file_activity_type_t type; }; diff --git a/fact-ebpf/src/lib.rs b/fact-ebpf/src/lib.rs index da25ef83..2251993d 100644 --- a/fact-ebpf/src/lib.rs +++ b/fact-ebpf/src/lib.rs @@ -1,9 +1,10 @@ #![allow(dead_code, non_camel_case_types)] -use std::{error::Error, ffi::c_char, fmt::Display, path::PathBuf}; +use std::{error::Error, ffi::c_char, fmt::Display, hash::Hash, path::PathBuf}; use aya::{maps::lpm_trie, Pod}; use libc::memcpy; +use serde::{ser::SerializeStruct, Serialize}; include!(concat!(env!("OUT_DIR"), "/bindings.rs")); @@ -62,6 +63,35 @@ impl PartialEq for path_prefix_t { unsafe impl Pod for path_prefix_t {} +impl PartialEq for inode_key_t { + fn eq(&self, other: &Self) -> bool { + self.inode == other.inode && self.dev == other.dev + } +} + +impl Eq for inode_key_t {} + +impl Hash for inode_key_t { + fn hash(&self, state: &mut H) { + self.inode.hash(state); + self.dev.hash(state); + } +} + +impl Serialize for inode_key_t { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + let mut state = serializer.serialize_struct("inode_key_t", 2)?; + state.serialize_field("inode", &self.inode)?; + state.serialize_field("dev", &self.dev)?; + state.end() + } +} + +unsafe impl Pod for inode_key_t {} + impl metrics_by_hook_t { fn accumulate(&self, other: &metrics_by_hook_t) -> metrics_by_hook_t { let mut m = metrics_by_hook_t { ..*self }; diff --git a/fact/src/bpf/mod.rs b/fact/src/bpf/mod.rs index 3b6e27ec..2a6b85e2 100644 --- a/fact/src/bpf/mod.rs +++ b/fact/src/bpf/mod.rs @@ -1,8 +1,8 @@ -use std::{io, path::PathBuf, sync::Arc}; +use std::{io, path::PathBuf}; use anyhow::{bail, Context}; use aya::{ - maps::{Array, LpmTrie, MapData, PerCpuArray, RingBuf}, + maps::{Array, HashMap, LpmTrie, MapData, PerCpuArray, RingBuf}, programs::Lsm, Btf, Ebpf, }; @@ -11,13 +11,13 @@ use libc::c_char; use log::{debug, error, info}; use tokio::{ io::unix::AsyncFd, - sync::{broadcast, watch}, + sync::{mpsc, watch}, task::JoinHandle, }; use crate::{event::Event, host_info, metrics::EventCounter}; -use fact_ebpf::{event_t, metrics_t, path_prefix_t, LPM_SIZE_MAX}; +use fact_ebpf::{event_t, inode_key_t, inode_value_t, metrics_t, path_prefix_t, LPM_SIZE_MAX}; mod checks; @@ -26,7 +26,7 @@ const RINGBUFFER_NAME: &str = "rb"; pub struct Bpf { obj: Ebpf, - tx: broadcast::Sender>, + tx: mpsc::Sender, paths: Vec, paths_config: watch::Receiver>, @@ -36,6 +36,7 @@ impl Bpf { pub fn new( paths_config: watch::Receiver>, ringbuf_size: u32, + tx: mpsc::Sender, ) -> anyhow::Result { Bpf::bump_memlock_rlimit()?; @@ -55,7 +56,6 @@ impl Bpf { .load(fact_ebpf::EBPF_OBJ)?; let paths = Vec::new(); - let (tx, _) = broadcast::channel(100); let mut bpf = Bpf { obj, tx, @@ -86,8 +86,13 @@ impl Bpf { Ok(()) } - pub fn subscribe(&self) -> broadcast::Receiver> { - self.tx.subscribe() + pub fn take_inode_map( + &mut self, + ) -> anyhow::Result> { + let Some(inode_map) = self.obj.take_map("inode_map") else { + bail!("inode_map not found"); + }; + Ok(inode_map.try_into()?) } pub fn take_metrics(&mut self) -> anyhow::Result> { @@ -184,7 +189,7 @@ impl Bpf { while let Some(event) = ringbuf.next() { let event: &event_t = unsafe { &*(event.as_ptr() as *const _) }; let event = match Event::try_from(event) { - Ok(event) => Arc::new(event), + Ok(event) => event, Err(e) => { error!("Failed to parse event: '{e}'"); debug!("Event: {event:?}"); @@ -194,7 +199,7 @@ impl Bpf { }; event_counter.added(); - if self.tx.send(event).is_err() { + if self.tx.send(event).await.is_err() { info!("No BPF consumers left, stopping..."); break; } @@ -261,9 +266,9 @@ mod bpf_tests { config.set_paths(paths); let reloader = Reloader::from(config); executor.block_on(async { - let mut bpf = Bpf::new(reloader.paths(), reloader.config().ringbuf_size()) + let (tx, mut rx) = mpsc::channel(100); + let mut bpf = Bpf::new(reloader.paths(), reloader.config().ringbuf_size(), tx) .expect("Failed to load BPF code"); - let mut rx = bpf.subscribe(); let (run_tx, run_rx) = watch::channel(true); // Create a metrics exporter, but don't start it let exporter = Exporter::new(bpf.take_metrics().unwrap()); @@ -281,16 +286,16 @@ mod bpf_tests { file_activity_type_t::FILE_ACTIVITY_CREATION, host_info::get_hostname(), file.path().to_path_buf(), - file.path().to_path_buf(), + PathBuf::new(), // host path is resolved by HostScanner Process::current(), ) .unwrap(); println!("Expected: {expected:?}"); let wait = timeout(Duration::from_secs(1), async move { - while let Ok(event) = rx.recv().await { + while let Some(event) = rx.recv().await { println!("{event:?}"); - if *event == expected { + if event == expected { break; } } diff --git a/fact/src/event/mod.rs b/fact/src/event/mod.rs index 18b6ea34..205b6339 100644 --- a/fact/src/event/mod.rs +++ b/fact/src/event/mod.rs @@ -4,7 +4,7 @@ use std::{ffi::CStr, os::raw::c_char, path::PathBuf}; use serde::Serialize; -use fact_ebpf::{event_t, file_activity_type_t, PATH_MAX}; +use fact_ebpf::{event_t, file_activity_type_t, inode_key_t, PATH_MAX}; use crate::host_info; use process::Process; @@ -45,6 +45,7 @@ impl Event { let inner = BaseFileData { filename, host_file, + inode: Default::default(), }; let file = match event_type { file_activity_type_t::FILE_ACTIVITY_OPEN => FileData::Open(inner), @@ -60,6 +61,22 @@ impl Event { file, }) } + + pub fn get_inode(&self) -> &inode_key_t { + match &self.file { + FileData::Open(data) => &data.inode, + FileData::Creation(data) => &data.inode, + FileData::Unlink(data) => &data.inode, + } + } + + pub fn set_host_path(&mut self, host_path: PathBuf) { + match &mut self.file { + FileData::Open(data) => data.host_file = host_path, + FileData::Creation(data) => data.host_file = host_path, + FileData::Unlink(data) => data.host_file = host_path, + } + } } impl TryFrom<&event_t> for Event { @@ -68,7 +85,7 @@ impl TryFrom<&event_t> for Event { fn try_from(value: &event_t) -> Result { let process = Process::try_from(value.process)?; let timestamp = host_info::get_boot_time() + value.timestamp; - let file = FileData::new(value.type_, value.filename, value.host_file)?; + let file = FileData::new(value.type_, value.filename, value.inode)?; Ok(Event { timestamp, @@ -112,9 +129,9 @@ impl FileData { pub fn new( event_type: file_activity_type_t, filename: [c_char; PATH_MAX as usize], - host_file: [c_char; PATH_MAX as usize], + inode: inode_key_t, ) -> anyhow::Result { - let inner = BaseFileData::new(filename, host_file)?; + let inner = BaseFileData::new(filename, inode)?; let file = match event_type { file_activity_type_t::FILE_ACTIVITY_OPEN => FileData::Open(inner), file_activity_type_t::FILE_ACTIVITY_CREATION => FileData::Creation(inner), @@ -164,19 +181,17 @@ impl PartialEq for FileData { pub struct BaseFileData { pub filename: PathBuf, host_file: PathBuf, + inode: inode_key_t, } impl BaseFileData { - pub fn new( - filename: [c_char; PATH_MAX as usize], - host_file: [c_char; PATH_MAX as usize], - ) -> anyhow::Result { + pub fn new(filename: [c_char; PATH_MAX as usize], inode: inode_key_t) -> anyhow::Result { let filename = slice_to_string(&filename)?.into(); - let host_file = slice_to_string(&host_file)?.into(); Ok(BaseFileData { filename, - host_file, + host_file: PathBuf::new(), // this field is set by HostScanner + inode, }) } } diff --git a/fact/src/host_info.rs b/fact/src/host_info.rs index 53a316fd..8e50800e 100644 --- a/fact/src/host_info.rs +++ b/fact/src/host_info.rs @@ -7,7 +7,7 @@ use std::{ fs::{read_to_string, File}, io::{BufRead, BufReader}, mem, - path::PathBuf, + path::{Path, PathBuf}, sync::LazyLock, }; @@ -22,6 +22,25 @@ pub fn get_host_mount() -> &'static PathBuf { &HOST_MOUNT } +pub fn prepend_host_mount(path: &Path) -> PathBuf { + let path = if path.has_root() { + path.strip_prefix(Path::new("/")).unwrap() + } else { + path + }; + get_host_mount().join(path) +} + +pub fn remove_host_mount(path: &Path) -> PathBuf { + let host_mount = get_host_mount(); + if path.starts_with(host_mount) { + let path = path.strip_prefix(host_mount).unwrap(); + Path::new("/").join(path) + } else { + path.to_path_buf() + } +} + fn get_clock(clockid: clockid_t) -> u64 { let mut tp = timespec { tv_sec: 0, diff --git a/fact/src/host_scanner.rs b/fact/src/host_scanner.rs new file mode 100644 index 00000000..77a61e64 --- /dev/null +++ b/fact/src/host_scanner.rs @@ -0,0 +1,143 @@ +use std::{ + cell::RefCell, + os::linux::fs::MetadataExt, + path::{Path, PathBuf}, + sync::Arc, +}; + +use anyhow::Context; +use aya::maps::MapData; +use fact_ebpf::{inode_key_t, inode_value_t}; +use log::{debug, info, warn}; +use tokio::{ + sync::{broadcast, mpsc, watch}, + task::JoinHandle, +}; + +use crate::{bpf::Bpf, event::Event, host_info}; + +pub struct HostScanner { + kernel_inode_map: RefCell>, + inode_map: RefCell>, + + config: watch::Receiver>, + running: watch::Receiver, + + rx: mpsc::Receiver, + tx: broadcast::Sender>, +} + +impl HostScanner { + pub fn new( + bpf: &mut Bpf, + rx: mpsc::Receiver, + config: watch::Receiver>, + running: watch::Receiver, + ) -> anyhow::Result { + let kernel_inode_map = RefCell::new(bpf.take_inode_map()?); + let inode_map = RefCell::new(std::collections::HashMap::new()); + let (tx, _) = broadcast::channel(100); + + let host_scanner = HostScanner { + kernel_inode_map, + inode_map, + config, + running, + rx, + tx, + }; + + // Run an initial scan to fill in the inode map + host_scanner.scan()?; + + Ok(host_scanner) + } + + fn scan(&self) -> anyhow::Result<()> { + debug!("Host scan started"); + for path in self.config.borrow().iter() { + let path = host_info::prepend_host_mount(path); + self.scan_inner(&path)?; + } + debug!("Host scan done"); + + Ok(()) + } + + fn scan_inner(&self, path: &Path) -> anyhow::Result<()> { + if path.is_dir() { + for entry in path.read_dir()?.flatten() { + let entry = entry.path(); + self.scan_inner(&entry) + .with_context(|| format!("Failed to scan {}", entry.display()))?; + } + } else if path.is_file() { + self.update_entry(path) + .with_context(|| format!("Failed to update entry for {}", path.display()))?; + } + Ok(()) + } + + fn update_entry(&self, path: &Path) -> anyhow::Result<()> { + if !path.exists() { + // If path does not exist, we don't have anything to update + return Ok(()); + } + + let metadata = path.metadata()?; + let inode = inode_key_t { + inode: metadata.st_ino(), + dev: metadata.st_dev(), + }; + + self.kernel_inode_map.borrow_mut().insert(inode, 0, 0)?; + let mut inode_map = self.inode_map.borrow_mut(); + let entry = inode_map.entry(inode).or_default(); + *entry = host_info::remove_host_mount(path); + Ok(()) + } + + pub fn subscribe(&self) -> broadcast::Receiver> { + self.tx.subscribe() + } + + fn get_host_path(&self, inode: &inode_key_t) -> Option { + // The path here needs to be cloned because we won't keep the + // inode_map borrow long enough. + self.inode_map.borrow().get(inode).cloned() + } + + pub fn start(mut self) -> JoinHandle> { + tokio::spawn(async move { + info!("Starting host scanner..."); + + loop { + tokio::select! { + event = self.rx.recv() => { + let Some(mut event) = event else { + info!("No more events to process"); + break; + }; + + if let Some(host_path) = self.get_host_path(event.get_inode()) { + event.set_host_path(host_path); + } + + let event = Arc::new(event); + if let Err(e) = self.tx.send(event) { + warn!("Failed to send event: {e}"); + } + }, + _ = self.running.changed() => { + if !*self.running.borrow() { + break; + } + } + } + } + + info!("Stopping host scanner"); + Ok(()) + }) + } +} diff --git a/fact/src/lib.rs b/fact/src/lib.rs index d198f67a..880813bb 100644 --- a/fact/src/lib.rs +++ b/fact/src/lib.rs @@ -3,11 +3,12 @@ use std::{borrow::BorrowMut, io::Write, str::FromStr}; use anyhow::Context; use bpf::Bpf; use host_info::{get_distro, get_hostname, SystemInfo}; +use host_scanner::HostScanner; use log::{debug, info, warn, LevelFilter}; use metrics::exporter::Exporter; use tokio::{ signal::unix::{signal, SignalKind}, - sync::watch, + sync::{mpsc, watch}, }; mod bpf; @@ -15,6 +16,7 @@ pub mod config; mod endpoints; mod event; mod host_info; +mod host_scanner; mod metrics; mod output; mod pre_flight; @@ -76,16 +78,21 @@ pub async fn run(config: FactConfig) -> anyhow::Result<()> { let reloader = config::reloader::Reloader::from(config); let config_trigger = reloader.get_trigger(); - let mut bpf = Bpf::new(reloader.paths(), reloader.config().ringbuf_size())?; + let (tx, rx) = mpsc::channel(100); + + let mut bpf = Bpf::new(reloader.paths(), reloader.config().ringbuf_size(), tx)?; let exporter = Exporter::new(bpf.take_metrics()?); + let host_scanner = HostScanner::new(&mut bpf, rx, reloader.paths(), running.subscribe())?; + output::start( - bpf.subscribe(), + host_scanner.subscribe(), running.subscribe(), exporter.metrics.output.clone(), reloader.grpc(), reloader.config().json(), )?; + let mut host_scanner_handle = host_scanner.start(); endpoints::Server::new(exporter.clone(), reloader.endpoint(), running.subscribe()).start(); let mut bpf_handle = bpf.start(running.subscribe(), exporter.metrics.bpf_worker.clone()); reloader.start(running.subscribe()); @@ -106,6 +113,15 @@ pub async fn run(config: FactConfig) -> anyhow::Result<()> { } break; } + res = host_scanner_handle.borrow_mut() => { + match res { + Ok(res) => if let Err(e) = res { + warn!("HostScanner worker errored out: {e:?}"); + } + Err(e) => warn!("HostScanner task errored out: {e:?}"), + } + break; + } } } From d0d21b9e544388583eb93527e42f7bf9bb1e9b6a Mon Sep 17 00:00:00 2001 From: Mauro Ezequiel Moltrasio Date: Wed, 12 Nov 2025 10:18:52 +0100 Subject: [PATCH 2/5] ROX-30437: add integration tests Added tests will validate events generated on an overlayfs file properly shows the event on the upper layer and the access to the underlying FS. They also validate a mounted path on a container resolves to the correct host path. While developing these tests, it became painfully obvious getting the information of the process running inside the container is not straightforward. Because containers tend to be fairly static, we should be able to manually create the information statically in the test and still have everything work correctly. In order to minimize the amount of changes on existing tests, the default Process constructor now takes fields directly and there is a from_proc class method that builds a new Process object from /proc. Additionally, getting the pid of a process in a container is virtually impossible, so we make the pid check optional. --- tests/conftest.py | 63 ++++++++++---- tests/event.py | 84 +++++++++++++----- tests/test_config_hotreload.py | 44 +++++----- tests/test_file_open.py | 123 ++++++++++++++++++++++----- tests/test_path_unlink.py | 150 +++++++++++++++++++++++++++------ 5 files changed, 361 insertions(+), 103 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index b87f2914..3b74c36c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -22,6 +22,20 @@ def monitored_dir(): rmtree(tmp) +@pytest.fixture +def test_file(monitored_dir): + """ + Create a temporary file for tests + + This file needs to exist when fact starts up for the inode tracking + algorithm to work. + """ + fut = os.path.join(monitored_dir, 'test.txt') + with open(fut, 'w') as f: + f.write('test') + yield fut + + @pytest.fixture def ignored_dir(): """ @@ -84,7 +98,7 @@ def dump_logs(container, file): def fact_config(request, monitored_dir, logs_dir): cwd = os.getcwd() config = { - 'paths': [monitored_dir], + 'paths': [monitored_dir, '/mounted', '/container-dir'], 'grpc': { 'url': 'http://127.0.0.1:9999', }, @@ -107,7 +121,36 @@ def fact_config(request, monitored_dir, logs_dir): @pytest.fixture -def fact(request, docker_client, fact_config, server, logs_dir): +def test_container(request, docker_client, monitored_dir, ignored_dir): + """ + Run a container for triggering events in. + """ + container = docker_client.containers.run( + 'quay.io/fedora/fedora:43', + detach=True, + tty=True, + volumes={ + ignored_dir: { + 'bind': '/mounted', + 'mode': 'z', + }, + monitored_dir: { + 'bind': '/unmonitored', + 'mode': 'z', + } + }, + name='fedora', + ) + container.exec_run('mkdir /container-dir') + + yield container + + container.stop(timeout=1) + container.remove() + + +@pytest.fixture +def fact(request, docker_client, fact_config, server, logs_dir, test_file): """ Run the fact docker container for integration tests. """ @@ -124,20 +167,8 @@ def fact(request, docker_client, fact_config, server, logs_dir): network_mode='host', privileged=True, volumes={ - '/sys/kernel/security': { - 'bind': '/host/sys/kernel/security', - 'mode': 'ro', - }, - '/etc': { - 'bind': '/host/etc', - 'mode': 'ro', - }, - '/proc/sys/kernel': { - 'bind': '/host/proc/sys/kernel', - 'mode': 'ro', - }, - '/usr/lib/os-release': { - 'bind': '/host/usr/lib/os-release', + '/': { + 'bind': '/host', 'mode': 'ro', }, config_file: { diff --git a/tests/event.py b/tests/event.py index 1b30cf7d..bdd3a6ed 100644 --- a/tests/event.py +++ b/tests/event.py @@ -37,10 +37,31 @@ class Process: Represents a process with its attributes. """ - def __init__(self, pid: int | None = None): - self._pid: int = pid if pid is not None else os.getpid() - proc_dir = os.path.join('/proc', str(self._pid)) - + def __init__(self, + pid: int | None, + uid: int, + gid: int, + exe_path: str, + args: str, + name: str, + container_id: str, + loginuid: int): + self._pid: int | None = pid + self._uid: int = uid + self._gid: int = gid + self._exe_path: str = exe_path + self._args: str = args + self._name: str = name + self._container_id: str = container_id + self._loginuid: int = loginuid + + @classmethod + def from_proc(cls, pid: int | None = None): + pid: int = pid if pid is not None else os.getpid() + proc_dir = os.path.join('/proc', str(pid)) + + uid = 0 + gid = 0 with open(os.path.join(proc_dir, 'status'), 'r') as f: def get_id(line: str, wanted_id: str) -> int | None: if line.startswith(f'{wanted_id}:'): @@ -50,27 +71,36 @@ def get_id(line: str, wanted_id: str) -> int | None: return None for line in f.readlines(): - if (uid := get_id(line, 'Uid')) is not None: - self._uid: int = uid - elif (gid := get_id(line, 'Gid')) is not None: - self._gid: int = gid + if (id := get_id(line, 'Uid')) is not None: + uid = id + elif (id := get_id(line, 'Gid')) is not None: + gid = id - self._exe_path: str = os.path.realpath(os.path.join(proc_dir, 'exe')) + exe_path = os.path.realpath(os.path.join(proc_dir, 'exe')) with open(os.path.join(proc_dir, 'cmdline'), 'rb') as f: content = f.read(4096) args = [arg.decode('utf-8') for arg in content.split(b'\x00') if arg] - self._args: str = ' '.join(args) + args = ' '.join(args) with open(os.path.join(proc_dir, 'comm'), 'r') as f: - self._name: str = f.read().strip() + name = f.read().strip() with open(os.path.join(proc_dir, 'cgroup'), 'r') as f: - self._container_id: str = extract_container_id(f.read()) + container_id = extract_container_id(f.read()) with open(os.path.join(proc_dir, 'loginuid'), 'r') as f: - self._loginuid: int = int(f.read()) + loginuid = int(f.read()) + + return Process(pid=pid, + uid=uid, + gid=gid, + exe_path=exe_path, + args=args, + name=name, + container_id=container_id, + loginuid=loginuid) @property def uid(self) -> int: @@ -81,7 +111,7 @@ def gid(self) -> int: return self._gid @property - def pid(self) -> int: + def pid(self) -> int | None: return self._pid @property @@ -107,10 +137,12 @@ def loginuid(self) -> int: @override def __eq__(self, other: Any) -> bool: if isinstance(other, ProcessSignal): + if self.pid is not None and self.pid != other.pid: + return False + return ( self.uid == other.uid and self.gid == other.gid and - self.pid == other.pid and self.exe_path == other.exec_file_path and self.args == other.args and self.name == other.name and @@ -124,7 +156,7 @@ def __str__(self) -> str: return (f'Process(uid={self.uid}, gid={self.gid}, pid={self.pid}, ' f'exe_path={self.exe_path}, args={self.args}, ' f'name={self.name}, container_id={self.container_id}, ' - f'loginuid={self.loginuid}') + f'loginuid={self.loginuid})') class Event: @@ -136,10 +168,12 @@ class Event: def __init__(self, process: Process, event_type: EventType, - file: str): + file: str, + host_path: str = ''): self._type: EventType = event_type self._process: Process = process self._file: str = file + self._host_path: str = host_path @property def event_type(self) -> EventType: @@ -153,6 +187,10 @@ def process(self) -> Process: def file(self) -> str: return self._file + @property + def host_path(self) -> str: + return self._host_path + @override def __eq__(self, other: Any) -> bool: if isinstance(other, FileActivity): @@ -160,15 +198,19 @@ def __eq__(self, other: Any) -> bool: return False if self.event_type == EventType.CREATION: - return self.file == other.creation.activity.path + return self.file == other.creation.activity.path and \ + self.host_path == other.creation.activity.host_path elif self.event_type == EventType.OPEN: - return self.file == other.open.activity.path + return self.file == other.open.activity.path and \ + self.host_path == other.open.activity.host_path elif self.event_type == EventType.UNLINK: - return self.file == other.unlink.activity.path + return self.file == other.unlink.activity.path and \ + self.host_path == other.unlink.activity.host_path return False raise NotImplementedError @override def __str__(self) -> str: return (f'Event(event_type={self.event_type.name}, ' - f'process={self.process}, file="{self.file}")') + f'process={self.process}, file="{self.file}", ' + f'host_path="{self.host_path}")') diff --git a/tests/test_config_hotreload.py b/tests/test_config_hotreload.py index d5f5b0bb..4b5d337d 100644 --- a/tests/test_config_hotreload.py +++ b/tests/test_config_hotreload.py @@ -93,12 +93,13 @@ def test_output_grpc_address_change(fact, fact_config, monitored_dir, server, al change. """ # File Under Test - fut = os.path.join(monitored_dir, 'test.txt') + fut = os.path.join(monitored_dir, 'test2.txt') with open(fut, 'w') as f: f.write('This is a test') - process = Process() - e = Event(process=process, event_type=EventType.CREATION, file=fut) + process = Process.from_proc() + e = Event(process=process, event_type=EventType.CREATION, + file=fut, host_path='') print(f'Waiting for event: {e}') server.wait_events([e]) @@ -111,30 +112,32 @@ def test_output_grpc_address_change(fact, fact_config, monitored_dir, server, al with open(fut, 'w') as f: f.write('This is another test') - e = Event(process=process, event_type=EventType.OPEN, file=fut) + e = Event(process=process, event_type=EventType.OPEN, + file=fut, host_path='') print(f'Waiting for event on alternate server: {e}') alternate_server.wait_events([e]) def test_paths(fact, fact_config, monitored_dir, ignored_dir, server): - p = Process() + p = Process.from_proc() # Ignored file, must not show up in the server ignored_file = os.path.join(ignored_dir, 'test.txt') with open(ignored_file, 'w') as f: f.write('This is to be ignored') - ignored_event = Event( - process=p, event_type=EventType.CREATION, file=ignored_file) + ignored_event = Event(process=p, event_type=EventType.CREATION, + file=ignored_file, host_path='') print(f'Ignoring: {ignored_event}') # File Under Test - fut = os.path.join(monitored_dir, 'test.txt') + fut = os.path.join(monitored_dir, 'test2.txt') with open(fut, 'w') as f: f.write('This is a test') - e = Event(process=p, event_type=EventType.CREATION, file=fut) + e = Event(process=p, event_type=EventType.CREATION, + file=fut, host_path='') print(f'Waiting for event: {e}') server.wait_events([e], ignored=[ignored_event]) @@ -148,38 +151,40 @@ def test_paths(fact, fact_config, monitored_dir, ignored_dir, server): with open(ignored_file, 'w') as f: f.write('This is another test') - e = Event( - process=p, event_type=EventType.OPEN, file=ignored_file) + e = Event(process=p, event_type=EventType.OPEN, + file=ignored_file, host_path='') print(f'Waiting for event: {e}') # File Under Test with open(fut, 'w') as f: f.write('This is another ignored event') - ignored_event = Event(process=p, event_type=EventType.OPEN, file=fut) + ignored_event = Event( + process=p, event_type=EventType.OPEN, file=fut, host_path='') print(f'Ignoring: {ignored_event}') server.wait_events([e], ignored=[ignored_event]) def test_paths_addition(fact, fact_config, monitored_dir, ignored_dir, server): - p = Process() + p = Process.from_proc() # Ignored file, must not show up in the server ignored_file = os.path.join(ignored_dir, 'test.txt') with open(ignored_file, 'w') as f: f.write('This is to be ignored') - ignored_event = Event( - process=p, event_type=EventType.CREATION, file=ignored_file) + ignored_event = Event(process=p, event_type=EventType.CREATION, + file=ignored_file, host_path='') print(f'Ignoring: {ignored_event}') # File Under Test - fut = os.path.join(monitored_dir, 'test.txt') + fut = os.path.join(monitored_dir, 'test2.txt') with open(fut, 'w') as f: f.write('This is a test') - e = Event(process=p, event_type=EventType.CREATION, file=fut) + e = Event(process=p, event_type=EventType.CREATION, + file=fut, host_path='') print(f'Waiting for event: {e}') server.wait_events([e], ignored=[ignored_event]) @@ -196,8 +201,9 @@ def test_paths_addition(fact, fact_config, monitored_dir, ignored_dir, server): f.write('This is one final event') events = [ - Event(process=p, event_type=EventType.OPEN, file=ignored_file), - Event(process=p, event_type=EventType.OPEN, file=fut) + Event(process=p, event_type=EventType.OPEN, + file=ignored_file, host_path=''), + Event(process=p, event_type=EventType.OPEN, file=fut, host_path='') ] print(f'Waiting for events: {events}') diff --git a/tests/test_file_open.py b/tests/test_file_open.py index b499cdce..513adea8 100644 --- a/tests/test_file_open.py +++ b/tests/test_file_open.py @@ -1,6 +1,8 @@ import multiprocessing as mp import os +import docker + from event import Event, EventType, Process @@ -15,11 +17,12 @@ def test_open(fact, monitored_dir, server): server: The server instance to communicate with. """ # File Under Test - fut = os.path.join(monitored_dir, 'test.txt') + fut = os.path.join(monitored_dir, 'create.txt') with open(fut, 'w') as f: f.write('This is a test') - e = Event(process=Process(), event_type=EventType.CREATION, file=fut) + e = Event(process=Process.from_proc(), event_type=EventType.CREATION, + file=fut, host_path='') print(f'Waiting for event: {e}') server.wait_events([e]) @@ -36,21 +39,22 @@ def test_multiple(fact, monitored_dir, server): server: The server instance to communicate with. """ events = [] - process = Process() + process = Process.from_proc() # File Under Test for i in range(3): fut = os.path.join(monitored_dir, f'{i}.txt') with open(fut, 'w') as f: f.write('This is a test') - e = Event(process=process, event_type=EventType.CREATION, file=fut) + e = Event(process=process, event_type=EventType.CREATION, + file=fut, host_path='') print(f'Waiting for event: {e}') events.append(e) server.wait_events(events) -def test_multiple_access(fact, monitored_dir, server): +def test_multiple_access(fact, test_file, server): """ Tests multiple opening of a file and verifies that the corresponding events are captured by the server. @@ -61,49 +65,46 @@ def test_multiple_access(fact, monitored_dir, server): server: The server instance to communicate with. """ events = [] - # File Under Test - fut = os.path.join(monitored_dir, 'test.txt') - for i in range(3): - with open(fut, 'a+') as f: + with open(test_file, 'a+') as f: f.write('This is a test') - e = Event(process=Process(), file=fut, - event_type=EventType.CREATION if i == 0 else EventType.OPEN) + e = Event(process=Process.from_proc(), file=test_file, + host_path=test_file, event_type=EventType.OPEN) print(f'Waiting for event: {e}') events.append(e) server.wait_events(events) -def test_ignored(fact, monitored_dir, ignored_dir, server): +def test_ignored(fact, test_file, ignored_dir, server): """ Tests that open events on ignored files are not captured by the server. Args: fact: Fixture for file activity (only required to be running). - monitored_dir: Temporary directory path for creating the test file. + test_file: Temporary file for testing. ignored_dir: Temporary directory path that is not monitored by fact. server: The server instance to communicate with. """ - p = Process() + p = Process.from_proc() # Ignored file, must not show up in the server ignored_file = os.path.join(ignored_dir, 'test.txt') with open(ignored_file, 'w') as f: f.write('This is to be ignored') - ignored_event = Event( - process=p, event_type=EventType.CREATION, file=ignored_file) + ignored_event = Event(process=p, event_type=EventType.CREATION, + file=ignored_file, host_path='') print(f'Ignoring: {ignored_event}') # File Under Test - fut = os.path.join(monitored_dir, 'test.txt') - with open(fut, 'w') as f: + with open(test_file, 'w') as f: f.write('This is a test') - e = Event(process=p, event_type=EventType.CREATION, file=fut) + e = Event(process=p, event_type=EventType.OPEN, + file=test_file, host_path=test_file) print(f'Waiting for event: {e}') server.wait_events([e], ignored=[ignored_event]) @@ -131,15 +132,17 @@ def test_external_process(fact, monitored_dir, server): """ # File Under Test - fut = os.path.join(monitored_dir, 'test.txt') + fut = os.path.join(monitored_dir, 'test2.txt') stop_event = mp.Event() proc = mp.Process(target=do_test, args=(fut, stop_event)) proc.start() - p = Process(proc.pid) + p = Process.from_proc(proc.pid) - creation = Event(process=p, event_type=EventType.CREATION, file=fut) + creation = Event(process=p, event_type=EventType.CREATION, + file=fut, host_path='') print(f'Waiting for event: {creation}') - write_access = Event(process=p, event_type=EventType.OPEN, file=fut) + write_access = Event( + process=p, event_type=EventType.OPEN, file=fut, host_path='') print(f'Waiting for event: {write_access}') try: @@ -147,3 +150,77 @@ def test_external_process(fact, monitored_dir, server): finally: stop_event.set() proc.join(1) + + +def test_overlay(fact, test_container, server): + # File Under Test + fut = '/container-dir/test.txt' + + # Create the exec and an equivalent event that it will trigger + test_container.exec_run(f'touch {fut}') + inspect = docker.APIClient().inspect_container(test_container.id) + upper_dir = inspect['GraphDriver']['Data']['UpperDir'] + + process = Process(pid=None, + uid=0, + gid=0, + exe_path='/usr/bin/touch', + args=f'touch {fut}', + name='touch', + container_id=test_container.id[:12], + loginuid=pow(2, 32)-1) + events = [ + Event(process=process, event_type=EventType.CREATION, + file=fut, host_path=''), + Event(process=process, event_type=EventType.OPEN, + file=fut, host_path='') + ] + + for e in events: + print(f'Waiting for event: {e}') + + server.wait_events(events) + + +def test_mounted_dir(fact, test_container, ignored_dir, server): + # File Under Test + fut = '/mounted/test.txt' + + # Create the exec and an equivalent event that it will trigger + test_container.exec_run(f'touch {fut}') + + process = Process(pid=None, + uid=0, + gid=0, + exe_path='/usr/bin/touch', + args=f'touch {fut}', + name='touch', + container_id=test_container.id[:12], + loginuid=pow(2, 32)-1) + event = Event(process=process, event_type=EventType.CREATION, + file=fut, host_path='') + print(f'Waiting for event: {event}') + + server.wait_events([event]) + + +def test_unmonitored_mounted_dir(fact, test_container, test_file, server): + # File Under Test + fut = '/unmonitored/test.txt' + + # Create the exec and an equivalent event that it will trigger + test_container.exec_run(f'touch {fut}') + + process = Process(pid=None, + uid=0, + gid=0, + exe_path='/usr/bin/touch', + args=f'touch {fut}', + name='touch', + container_id=test_container.id[:12], + loginuid=pow(2, 32)-1) + event = Event(process=process, event_type=EventType.OPEN, + file=fut, host_path=test_file) + print(f'Waiting for event: {event}') + + server.wait_events([event]) diff --git a/tests/test_path_unlink.py b/tests/test_path_unlink.py index 9486b9bd..106e5af7 100644 --- a/tests/test_path_unlink.py +++ b/tests/test_path_unlink.py @@ -1,28 +1,27 @@ import multiprocessing as mp import os +import docker + from event import Event, EventType, Process -def test_remove(fact, monitored_dir, server): +def test_remove(fact, test_file, server): """ Tests the removal of a file and verifies the corresponding event is captured by the server. Args: fact: Fixture for file activity (only required to be running). - monitored_dir: Temporary directory path for monitoring the test file. + test_file: Temporary file for testing. server: The server instance to communicate with. """ - fut = os.path.join(monitored_dir, 'test.txt') - with open(fut, 'w') as f: - f.write('This is a test') - os.remove(fut) + os.remove(test_file) - process = Process() + process = Process.from_proc() events = [ - Event(process=process, event_type=EventType.CREATION, file=fut), - Event(process=process, event_type=EventType.UNLINK, file=fut), + Event(process=process, event_type=EventType.UNLINK, + file=test_file, host_path=test_file), ] server.wait_events(events) @@ -39,7 +38,7 @@ def test_multiple(fact, monitored_dir, server): server: The server instance to communicate with. """ events = [] - process = Process() + process = Process.from_proc() # File Under Test for i in range(3): @@ -49,14 +48,16 @@ def test_multiple(fact, monitored_dir, server): os.remove(fut) events.extend([ - Event(process=process, event_type=EventType.CREATION, file=fut), - Event(process=process, event_type=EventType.UNLINK, file=fut), + Event(process=process, event_type=EventType.CREATION, + file=fut, host_path=''), + Event(process=process, event_type=EventType.UNLINK, + file=fut, host_path=''), ]) server.wait_events(events) -def test_ignored(fact, monitored_dir, ignored_dir, server): +def test_ignored(fact, test_file, ignored_dir, server): """ Tests that unlink events on ignored files are not captured by the server. @@ -67,7 +68,7 @@ def test_ignored(fact, monitored_dir, ignored_dir, server): ignored_dir: Temporary directory path that is not monitored by fact. server: The server instance to communicate with. """ - process = Process() + process = Process.from_proc() # Ignored file, must not show up in the server ignored_file = os.path.join(ignored_dir, 'test.txt') @@ -75,17 +76,15 @@ def test_ignored(fact, monitored_dir, ignored_dir, server): f.write('This is to be ignored') os.remove(ignored_file) - ignored_event = Event( - process=process, event_type=EventType.UNLINK, file=ignored_file) + ignored_event = Event(process=process, event_type=EventType.UNLINK, + file=ignored_file, host_path='') print(f'Ignoring: {ignored_event}') # File Under Test - fut = os.path.join(monitored_dir, 'test.txt') - with open(fut, 'w') as f: - f.write('This is a test') - os.remove(fut) + os.remove(test_file) - e = Event(process=process, event_type=EventType.UNLINK, file=fut) + e = Event(process=process, event_type=EventType.UNLINK, + file=test_file, host_path=test_file) print(f'Waiting for event: {e}') server.wait_events([e], ignored=[ignored_event]) @@ -111,13 +110,14 @@ def test_external_process(fact, monitored_dir, server): server: The server instance to communicate with. """ # File Under Test - fut = os.path.join(monitored_dir, 'test.txt') + fut = os.path.join(monitored_dir, 'test2.txt') stop_event = mp.Event() proc = mp.Process(target=do_test, args=(fut, stop_event)) proc.start() - process = Process(proc.pid) + process = Process.from_proc(proc.pid) - removal = Event(process=process, event_type=EventType.UNLINK, file=fut) + removal = Event(process=process, event_type=EventType.UNLINK, + file=fut, host_path='') print(f'Waiting for event: {removal}') try: @@ -125,3 +125,105 @@ def test_external_process(fact, monitored_dir, server): finally: stop_event.set() proc.join(1) + + +def test_overlay(fact, test_container, server): + # File Under Test + fut = '/container-dir/test.txt' + + # Create the exec and an equivalent event that it will trigger + test_container.exec_run(f'touch {fut}') + test_container.exec_run(f'rm {fut}') + inspect = docker.APIClient().inspect_container(test_container.id) + upper_dir = inspect['GraphDriver']['Data']['UpperDir'] + + loginuid = pow(2, 32)-1 + touch = Process(pid=None, + uid=0, + gid=0, + exe_path='/usr/bin/touch', + args=f'touch {fut}', + name='touch', + container_id=test_container.id[:12], + loginuid=loginuid) + rm = Process(pid=None, + uid=0, + gid=0, + exe_path='/usr/bin/rm', + args=f'rm {fut}', + name='rm', + container_id=test_container.id[:12], + loginuid=loginuid) + events = [ + Event(process=touch, event_type=EventType.CREATION, + file=fut, host_path=''), + Event(process=touch, event_type=EventType.OPEN, + file=fut, host_path=''), + Event(process=rm, event_type=EventType.UNLINK, + file=fut, host_path=''), + ] + + for e in events: + print(f'Waiting for event: {e}') + + server.wait_events(events) + + +def test_mounted_dir(fact, test_container, ignored_dir, server): + # File Under Test + fut = '/mounted/test.txt' + + # Create the exec and an equivalent event that it will trigger + test_container.exec_run(f'touch {fut}') + test_container.exec_run(f'rm {fut}') + + loginuid = pow(2, 32)-1 + touch = Process(pid=None, + uid=0, + gid=0, + exe_path='/usr/bin/touch', + args=f'touch {fut}', + name='touch', + container_id=test_container.id[:12], + loginuid=loginuid) + rm = Process(pid=None, + uid=0, + gid=0, + exe_path='/usr/bin/rm', + args=f'rm {fut}', + name='rm', + container_id=test_container.id[:12], + loginuid=loginuid) + events = [ + Event(process=touch, event_type=EventType.CREATION, file=fut, + host_path=''), + Event(process=rm, event_type=EventType.UNLINK, file=fut, + host_path=''), + ] + + for e in events: + print(f'Waiting for event: {e}') + + server.wait_events(events) + + +def test_unmonitored_mounted_dir(fact, test_container, test_file, server): + # File Under Test + fut = '/unmonitored/test.txt' + + # Create the exec and an equivalent event that it will trigger + test_container.exec_run(f'rm {fut}') + + process = Process(pid=None, + uid=0, + gid=0, + exe_path='/usr/bin/rm', + args=f'rm {fut}', + name='rm', + container_id=test_container.id[:12], + loginuid=pow(2, 32)-1) + event = Event(process=process, event_type=EventType.UNLINK, + file=fut, host_path=test_file) + print(f'Waiting for event: {event}') + + server.wait_events([event]) From ce0b5fcccd6e0ca72d6902f57d7917b7fba0287f Mon Sep 17 00:00:00 2001 From: Mauro Ezequiel Moltrasio Date: Wed, 3 Dec 2025 15:28:00 +0100 Subject: [PATCH 3/5] ROX-30437: fix k8s manifest for inode tracking --- k8s/manifest.yml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/k8s/manifest.yml b/k8s/manifest.yml index f470c708..dc8e1e94 100644 --- a/k8s/manifest.yml +++ b/k8s/manifest.yml @@ -26,6 +26,8 @@ spec: env: - name: FACT_LOGLEVEL value: 'debug' + - name: FACT_HOST_MOUNT + value: '/host' securityContext: capabilities: drop: @@ -33,11 +35,11 @@ spec: privileged: true readOnlyRootFilesystem: true volumeMounts: - - mountPath: /sys - name: sys-ro + - mountPath: /host + name: root-ro readOnly: true mountPropagation: HostToContainer volumes: - hostPath: - path: /sys/ - name: sys-ro + path: / + name: root-ro From 23c63ba5578684e44e8b3494edd4d035ea5fab2d Mon Sep 17 00:00:00 2001 From: Mauro Ezequiel Moltrasio Date: Mon, 15 Dec 2025 12:42:00 +0100 Subject: [PATCH 4/5] chore: general cleanups * Added a missing null check. * Added missing doc strings. * Downgrade inode and dev numbers to 32 bits. * Added some logging statements. --- fact-ebpf/src/bpf/inode.h | 11 +++++++++++ fact-ebpf/src/bpf/types.h | 4 ++-- fact/src/host_scanner.rs | 27 ++++++++++++++++++++++++++- 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/fact-ebpf/src/bpf/inode.h b/fact-ebpf/src/bpf/inode.h index 58ac5696..842f1251 100644 --- a/fact-ebpf/src/bpf/inode.h +++ b/fact-ebpf/src/bpf/inode.h @@ -66,6 +66,9 @@ __always_inline static inode_value_t* inode_get(struct inode_key_t* inode) { } __always_inline static long inode_remove(struct inode_key_t* inode) { + if (inode == NULL) { + return 0; + } return bpf_map_delete_elem(&inode_map, inode); } @@ -74,6 +77,14 @@ typedef enum inode_monitored_t { MONITORED, } inode_monitored_t; +/** + * Check if the provided inode is being monitored. + * + * The current implementation is very basic and might seem like + * overkill, but in the near future this function will be extended to + * check if the parent of the provided inode is monitored and provide + * different results for handling more complicated scenarios. + */ __always_inline static inode_monitored_t inode_is_monitored(const inode_value_t* inode) { if (inode != NULL) { return MONITORED; diff --git a/fact-ebpf/src/bpf/types.h b/fact-ebpf/src/bpf/types.h index e280cf5a..58f17135 100644 --- a/fact-ebpf/src/bpf/types.h +++ b/fact-ebpf/src/bpf/types.h @@ -33,8 +33,8 @@ typedef struct process_t { } process_t; typedef struct inode_key_t { - unsigned long long inode; - unsigned long long dev; + unsigned long inode; + unsigned long dev; } inode_key_t; typedef char inode_value_t; diff --git a/fact/src/host_scanner.rs b/fact/src/host_scanner.rs index 77a61e64..ac2b5bae 100644 --- a/fact/src/host_scanner.rs +++ b/fact/src/host_scanner.rs @@ -1,3 +1,23 @@ +//! # Host Scanner module +//! +//! This module is in charge of scanning the host file system and +//! maintaining a mapping of inode and device number to host path. +//! +//! An initial scan of the filesystem is triggered when a `HostScanner` +//! object is first created. The scan will populate two maps: +//! * A HashMap that holds an inode to path translation. +//! * An eBPF HashMap that will let the eBPF programs know if a given +//! file is being monitored, regardless of the path being used to +//! access it. +//! +//! Calling the `start` method on the `HostScanner` object will consume +//! it and spawn a new tokio task that will receive events from the +//! provided `mpsc::Receiver`, update their host paths and send +//! them out its `broadcast::Sender>` for further processing. +//! +//! TODO: Implement updating maps based on received events, periodic +//! scans to remediate inconsistencies due to missed events, etc.. + use std::{ cell::RefCell, os::linux::fs::MetadataExt, @@ -90,10 +110,15 @@ impl HostScanner { dev: metadata.st_dev(), }; - self.kernel_inode_map.borrow_mut().insert(inode, 0, 0)?; + self.kernel_inode_map + .borrow_mut() + .insert(inode, 0, 0) + .with_context(|| format!("Failed to insert kernel entry for {}", path.display()))?; let mut inode_map = self.inode_map.borrow_mut(); let entry = inode_map.entry(inode).or_default(); *entry = host_info::remove_host_mount(path); + + debug!("Added entry for {}: {inode:?}", path.display()); Ok(()) } From e01148bc58c78177cc48cc9bf03b0480f9239c83 Mon Sep 17 00:00:00 2001 From: Mauro Ezequiel Moltrasio Date: Mon, 15 Dec 2025 14:55:45 +0100 Subject: [PATCH 5/5] Small comment on char vs bool --- fact-ebpf/src/bpf/types.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fact-ebpf/src/bpf/types.h b/fact-ebpf/src/bpf/types.h index 58f17135..17c1f43b 100644 --- a/fact-ebpf/src/bpf/types.h +++ b/fact-ebpf/src/bpf/types.h @@ -37,6 +37,9 @@ typedef struct inode_key_t { unsigned long dev; } inode_key_t; +// We can't use bool here because it is not a standard C type, we would +// need to include vmlinux.h but that would explode our Rust bindings. +// For the time being we just keep a char. typedef char inode_value_t; typedef enum file_activity_type_t {