diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..48aa829 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,3 @@ +[workspace] +members = ["halide-cache", "lager"] +resolver = "3" diff --git a/halide-cache/Cargo.lock b/halide-cache/Cargo.lock index a7f7bcd..e9be473 100644 --- a/halide-cache/Cargo.lock +++ b/halide-cache/Cargo.lock @@ -52,6 +52,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + [[package]] name = "arrayref" version = "0.3.9" @@ -197,6 +203,7 @@ dependencies = [ name = "halide-cache" version = "0.1.0" dependencies = [ + "anyhow", "blake3", "clap", "lager", diff --git a/halide-cache/Cargo.toml b/halide-cache/Cargo.toml index 479ff7b..761725d 100644 --- a/halide-cache/Cargo.toml +++ b/halide-cache/Cargo.toml @@ -8,3 +8,4 @@ lager = { path = "../lager" } blake3 = "1.8.2" clap = { version = "4.5.53", features = ["derive"] } named-lock = "0.4.1" +anyhow = "1.0.102" diff --git a/halide-cache/src/main.rs b/halide-cache/src/main.rs index bc4eb9b..2c570d5 100644 --- a/halide-cache/src/main.rs +++ b/halide-cache/src/main.rs @@ -19,93 +19,99 @@ struct Args { const MAX_CACHE_SIZE_BYTES: u64 = 10737418240; // 10 GiB -fn main() { - let args = Args::parse(); +struct Dependencies<'a> { + path: &'a Path, + dependencies: &'a [PathBuf], + env: &'a [String], +} + +impl<'a> Dependencies<'a> { + fn make_address(&self) -> anyhow::Result { + let mut hasher = blake3::Hasher::new(); + + hasher.update(self.path.as_os_str().as_encoded_bytes()); + hasher.update(&[0u8]); + hasher.update(&[0u8]); - let cache_dir = match env::var("HALIDE_CACHE_DIR") { - Ok(env) => env, - Err(e) => { - eprintln!("HALIDE_CACHE_DIR environment variable not set: {}", e); - std::process::exit(1); + for d in self.dependencies { + let file = std::fs::File::open(d)?; + hasher.update_reader(file)?; + hasher.update(&[0u8]); } - }; + hasher.update(&[0u8]); + + for e in self.env { + hasher.update(e.as_bytes()); + hasher.update(&[0u8]); + } + hasher.update(&[0u8]); + + let mut buf = [0u8; _]; + hasher.finalize_xof().fill(&mut buf); + Ok(buf.into()) + } +} + +fn main() -> anyhow::Result<()> { + let args = Args::parse(); + + let cache_dir = env::var("HALIDE_CACHE_DIR")?; if !Path::new(&cache_dir).exists() { - fs::create_dir_all(&cache_dir).unwrap(); + fs::create_dir_all(&cache_dir)?; } - let lager = match Lager::new(Path::new(&cache_dir)) { - Ok(l) => l, - Err(e) => { - eprintln!("Failed to initialize Lager cache: {}", e); - std::process::exit(1); - } - }; + let lager = Lager::new(Path::new(&cache_dir))?; let zivid_env = collect_zivid_env(); - let mut object_dependencies = zivid_env.clone(); - object_dependencies.push( - args.generated_object - .clone() - .into_os_string() - .into_string() - .unwrap(), - ); - - let mut header_dependencies = zivid_env; - header_dependencies.push( - args.generated_header - .clone() - .into_os_string() - .into_string() - .unwrap(), - ); - - hash_all_dependencies_contents( - args.dependencies, - &mut object_dependencies, - &mut header_dependencies, - ); - - let object_address = hash_vector(&object_dependencies); - let header_address = hash_vector(&header_dependencies); + let object_dependencies = Dependencies { + path: &args.generated_object, + dependencies: &args.dependencies, + env: &zivid_env, + }; + + let header_dependencies = Dependencies { + path: &args.generated_header, + dependencies: &args.dependencies, + env: &zivid_env, + }; + let header_address = header_dependencies.make_address()?; + let object_address = object_dependencies.make_address()?; if cache_hit( &args.generated_object, &args.generated_header, &lager, &object_address, &header_address, - ) { - return; + )? { + return Ok(()); } let status = Command::new(&args.builder[0]) .args(&args.builder[1..]) - .status() - .expect("Failed to execute command"); + .status()?; if status.success() { - lager - .store_at(&object_address, &args.generated_object) - .expect("Store at failed for the Halide object"); - lager - .store_at(&header_address, &args.generated_header) - .expect("Store at failed for the Halide header"); + lager.store_at(&object_address, &args.generated_object)?; + lager.store_at(&header_address, &args.generated_header)?; } - try_cleaning_up(lager); + try_cleaning_up(lager)?; + + Ok(()) } -fn try_cleaning_up(lager: Lager) { - let lock = NamedLock::create("lager_lock").unwrap(); +fn try_cleaning_up(lager: Lager) -> anyhow::Result<()> { + let lock = NamedLock::create("lager_lock")?; if let Ok(_guard) = lock.lock() { let mut lru = LRU::new(lager); - lru.scan().unwrap(); + lru.scan()?; if lru.lager_size() > MAX_CACHE_SIZE_BYTES { - lru.evict_until(MAX_CACHE_SIZE_BYTES).unwrap(); + lru.evict_until(MAX_CACHE_SIZE_BYTES)?; } } + Ok(()) } fn collect_zivid_env() -> Vec { @@ -116,77 +122,30 @@ fn collect_zivid_env() -> Vec { v } -fn hash_all_dependencies_contents( - dependencies: Vec, - object_dependencies: &mut Vec, - header_dependencies: &mut Vec, -) { - for dep in &dependencies { - let file_content_hash = match compute_hash_of_file(dep) { - Ok(hash) => hash, - Err(e) => { - eprintln!("Failed to compute hash for {:?}: {}", dependencies, e); - std::process::exit(1); - } - }; - object_dependencies.push(file_content_hash.clone()); - header_dependencies.push(file_content_hash); - } -} - fn cache_hit( - generated_object: &PathBuf, - generated_header: &PathBuf, + generated_object: &Path, + generated_header: &Path, lager: &Lager, object_address: &Address, header_address: &Address, -) -> bool { - match lager.retrieve(object_address, generated_object.as_path()) { - Ok(_) => { - println!("Cache hit for Halide object. {:?}", generated_object); - match lager.retrieve(header_address, generated_header.as_path()) { - Ok(_) => { - println!("Cache hit for Halide header {:?}", generated_header); - return true; - } - Err(e) => { - eprintln!("Error for Halide header {}", e); - } - } - } - Err(_) => { - // Cache miss, proceed to build +) -> anyhow::Result { + match ( + lager.retrieve(object_address, generated_object), + lager.retrieve(header_address, generated_header), + ) { + (Ok(_), Ok(_)) => { + println!( + "Cache hits for Halide objects: {:?} and {:?}", + generated_object, generated_header + ); + Ok(true) } - } - false -} - -fn serialize_vector(vec: &[String]) -> String { - format!( - "[{}]", - vec.iter() - .map(|s| format!("\"{}\"", s)) - .collect::>() - .join(",") - ) -} -fn compute_hash_of_file>( - path: P, -) -> Result> { - let mut file = std::fs::File::open(path)?; - let mut hasher = blake3::Hasher::new(); - std::io::copy(&mut file, &mut hasher)?; - let hash = hasher.finalize(); - Ok(hash.to_hex().to_string()) -} - -fn hash_vector(vector: &[String]) -> Address { - let input = serialize_vector(vector); - let mut hasher = blake3::Hasher::new(); - hasher.update(input.as_bytes()); - let hash = hasher.finalize().to_hex().to_string(); - match hash.into_bytes().as_slice().try_into() { - Ok(addr) => addr, - Err(_) => panic!("Hash length mismatch"), + ( + Err(lager::Error::NotFound { address: _ }), + Err(lager::Error::NotFound { address: _ }), + ) => Ok(false), + (Err(oe), Err(he)) => Err(anyhow::anyhow!(oe).context(he)), + (Ok(_), Err(e)) => Err(anyhow::anyhow!(e).context("Retrieving the object was successful")), + (Err(e), Ok(_)) => Err(anyhow::anyhow!(e).context("Retrieving the header was successful")), } } diff --git a/halide-cache/tests/basic.rs b/halide-cache/tests/basic.rs new file mode 100644 index 0000000..91201b9 --- /dev/null +++ b/halide-cache/tests/basic.rs @@ -0,0 +1,8 @@ +#[test] +fn run_basic_tests() { + let success = std::process::Command::new("tests/run") + .arg("tests/builder") + .status(); + assert!(success.is_ok()); + assert!(success.unwrap().success()); +} diff --git a/halide-cache/tests/builder b/halide-cache/tests/builder new file mode 100755 index 0000000..290ddc3 --- /dev/null +++ b/halide-cache/tests/builder @@ -0,0 +1,21 @@ +#!/bin/bash + +read -r case < "$HALIDE_TEST_DEPENDENCIES/case.txt" +read -r direction < "$HALIDE_TEST_DEPENDENCIES/direction.txt" + +process() { + cat "$1" | + if [[ case = upper ]]; then + tr '[:lower:]' '[:upper:]' + else + cat + fi | + if [[ $direction = reverse ]]; then + rev + else + cat + fi > "$2" +} + +process "$1" "$2" +process "$1" "$3" diff --git a/halide-cache/tests/run b/halide-cache/tests/run new file mode 100755 index 0000000..87e16c6 --- /dev/null +++ b/halide-cache/tests/run @@ -0,0 +1,148 @@ +#!/bin/bash + +# ---------- SETUP ---------- + +builder=$1 + +die() { + if (( $# == 1 )); then + printf '%s\n' "$1" + elif (( $# > 1 )); then + printf "$@" + fi >&2 + exit 1 +} + +test_dir=$(mktemp -d) && +mkdir -p "$test_dir"/{cache,outputs,inputs,dependencies,temp} || exit +echo "Using test directory: $test_dir" + +# The builder script uses these to change its output. It reads them directly +# and are not part of the command line +export HALIDE_TEST_DEPENDENCIES="$test_dir/dependencies" + +configure_output() { + echo "$1" > "$HALIDE_TEST_DEPENDENCIES/case.txt" + echo "$2" > "$HALIDE_TEST_DEPENDENCIES/direction.txt" +} + +run_builder_with_cache() { + local base_name="${1##*/}" + local base_name="${base_name%.txt}" + local header="$test_dir/outputs/$base_name.h" + local object="$test_dir/outputs/$base_name.o" + + cargo run --quiet --bin halide-cache -- \ + --dependencies "$HALIDE_TEST_DEPENDENCIES"/*.txt "$1" \ + --generated-object "$object" --generated-header "$header" \ + -- "$builder" "$1" "$object" "$header" || die +} + +export HALIDE_CACHE_DIR="$test_dir/cache" + +assert_cache_entries() { + local count=$(find "$HALIDE_CACHE_DIR" -type f -name '*.zst' | wc -l) + (( $1 == count )) || die "expected != actual: %s != %s. Expectation %s" $expected_count $count "$2" +} + +expected_count=0 +base_output_content='' + +# ---------- TESTS ---------- + +base_case() { + configure_output normal normal + echo hello >"$test_dir/inputs/hello.txt" + + assert_cache_entries $expected_count + + run_builder_with_cache "$test_dir/inputs/hello.txt" + (( expected_count += 2 )) + assert_cache_entries $expected_count "First entries to an empty cache" + + base_output_content="$(sha1sum "$test_dir/outputs/hello.o" "$test_dir/outputs/hello.h")" + + run_builder_with_cache "$test_dir/inputs/hello.txt" + assert_cache_entries $expected_count "No changes, cache hit" +} + +difference_in_dependencies() { + configure_output normal2 normal2 # Deps have changed but has no effect on output + + (( expected_count += 2 )) + run_builder_with_cache "$test_dir/inputs/hello.txt" + assert_cache_entries $expected_count "Dependencies have changed content but input and output hasn't" + + local output_content="$(sha1sum "$test_dir/outputs/hello.o" "$test_dir/outputs/hello.h")" + [[ $output_content = $base_output_content ]] || die 'Hashes different but they shouldn not be' +} + +uppercase() { + configure_output uppercase normal + + (( expected_count += 2 )) + + run_builder_with_cache "$test_dir/inputs/hello.txt" + assert_cache_entries $expected_count "Dependencies and and output has changed" + run_builder_with_cache "$test_dir/inputs/hello.txt" + assert_cache_entries $expected_count "No change inputs and outputs => cache hit" +} + +reverse_direction() { + configure_output uppercase reverse + + ((expected_count += 2)) + + run_builder_with_cache "$test_dir/inputs/hello.txt" + assert_cache_entries $expected_count "Dependencies have changed, affecting output" + run_builder_with_cache "$test_dir/inputs/hello.txt" + assert_cache_entries $expected_count "No change inputs and outputs => cache hit" +} + +lowercase() { + configure_output normal reverse + + ((expected_count += 2)) + + run_builder_with_cache "$test_dir/inputs/hello.txt" + assert_cache_entries $expected_count "Dependencies have changed, affecting output" + run_builder_with_cache "$test_dir/inputs/hello.txt" + assert_cache_entries $expected_count "No change inputs and outputs => cache hit" +} + +normal_direction() { + configure_output normal normal + + run_builder_with_cache "$test_dir/inputs/hello.txt" + assert_cache_entries $expected_count "Dependencies have changed, affecting output, but back to a previous case" + run_builder_with_cache "$test_dir/inputs/hello.txt" + assert_cache_entries $expected_count "No change inputs and outputs => cache hit" +} + +input_path_different() { + configure_output normal normal + + echo hello > "$test_dir/inputs/different.txt" + ((expected_count += 2)) + run_builder_with_cache "$test_dir/inputs/different.txt" + assert_cache_entries $expected_count "Path to input has changed" +} + +input_content_different() { + configure_output normal normal + + echo hello4 > "$test_dir/inputs/different.txt" + ((expected_count += 2)) + run_builder_with_cache "$test_dir/inputs/different.txt" + assert_cache_entries $expected_count "Content of input has changed" +} + +# Unfortunately the order of these matter +base_case +difference_in_dependencies +uppercase +reverse_direction +lowercase +normal_direction +input_path_different +input_content_different diff --git a/lager/src/compression.rs b/lager/src/compression.rs index bc8925c..31bf74a 100644 --- a/lager/src/compression.rs +++ b/lager/src/compression.rs @@ -1,4 +1,3 @@ - use std::io::{BufWriter, Read, Write}; use std::path::Path; diff --git a/lager/src/error.rs b/lager/src/error.rs index 00666a8..245ef7d 100644 --- a/lager/src/error.rs +++ b/lager/src/error.rs @@ -1,57 +1,20 @@ use crate::Address; -use std::io; use std::path::PathBuf; use thiserror::Error; -fn format_io_error_with_message(msg: &Option, source: &io::Error) -> String { - match msg { - Some(m) => format!("{}: {}", m, source), - None => format!("{}", source), - } -} - #[derive(Error, Debug)] pub enum Error { #[error("{}", .msg)] - Runtime { - msg: String, - #[source] - source: Option>, - }, + Runtime { msg: String }, + #[error("Invalid address")] + InvalidAddress(#[from] hex::FromHexError), #[error("Address not found: {}", .address)] NotFound { address: Address }, #[error("No such file or directory: {}", .path.display())] NoSuchFile { path: PathBuf }, - #[error("{}", format_io_error_with_message(.msg, .source))] - Io { - msg: Option, - #[source] - source: io::Error, - }, -} - -impl From for Error { - fn from(source: io::Error) -> Self { - Error::Io { - msg: None, - source: io::Error::other(source), - } - } -} - -impl From for Error { - fn from(source: walkdir::Error) -> Self { - if let Some(io_error) = source.into_io_error() { - Error::Io { - msg: Some("An error occurred while walking the directory tree".to_string()), - source: io_error, - } - } else { - Error::Runtime { - msg: "An error occurred while walking the directory tree".to_string(), - source: None, - } - } - } + #[error("Io")] + Io(#[from] std::io::Error), + #[error("WalkDir")] + WalkDir(#[from] walkdir::Error), } diff --git a/lager/src/lager.rs b/lager/src/lager.rs index 84714b9..07ce9c5 100644 --- a/lager/src/lager.rs +++ b/lager/src/lager.rs @@ -60,9 +60,7 @@ impl Lager { compression::read_dir(destination, file)?; Ok(()) } else { - Err(Error::NotFound { - address: *address, - }) + Err(Error::NotFound { address: *address }) } } @@ -90,6 +88,8 @@ impl Lager { #[cfg(test)] mod tests { + use crate::ADDRESS_SIZE; + use super::*; use tempdir::TempDir; @@ -108,7 +108,7 @@ mod tests { let temp_file_path = dir.path().join("temp_file.txt"); std::fs::write(&temp_file_path, b"Hello, World!").unwrap(); - let address = Address::from([0u8; 32]); + let address = Address::from([0u8; ADDRESS_SIZE]); lager.store_at(&address, &temp_file_path).unwrap(); @@ -129,7 +129,7 @@ mod tests { std::fs::write(temp_dir_path.join("file1.txt"), b"File 1").unwrap(); std::fs::write(temp_dir_path.join("file2.txt"), b"File 2").unwrap(); - let address = Address::from([1u8; 32]); + let address = Address::from([1u8; ADDRESS_SIZE]); lager.store_at(&address, &temp_dir_path).unwrap(); @@ -148,7 +148,7 @@ mod tests { let dir = TempDir::new("lager_test").unwrap(); let lager = Lager::new(dir.path()).unwrap(); - let address = Address::from([2u8; 32]); + let address = Address::from([2u8; ADDRESS_SIZE]); let retrieve_path = dir.path().join("nonexistent_retrieved.txt"); let result = lager.retrieve(&address, &retrieve_path); diff --git a/lager/src/lib.rs b/lager/src/lib.rs index 9d42613..252ca90 100644 --- a/lager/src/lib.rs +++ b/lager/src/lib.rs @@ -15,25 +15,23 @@ use thiserror::Error; pub type Result = std::result::Result; +const ADDRESS_SIZE: usize = 64; + #[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] -pub struct Address([u8; 64]); +pub struct Address([u8; ADDRESS_SIZE]); impl Address { pub(crate) fn from_hex(hex: &str) -> Result { - let bytes = hex::decode(hex).map_err(|e| Error::Runtime { - msg: format!("Failed to decode address: {}", hex), - source: Some(Box::new(e)), - })?; - if bytes.len() != 64 { + let bytes = hex::decode(hex)?; + if bytes.len() != ADDRESS_SIZE { return Err(Error::Runtime { msg: format!( "Invalid address length ({}). Only 64 byte addresses are supported.", bytes.len() ), - source: None, }); } - let mut array = [0u8; 64]; + let mut array = [0u8; ADDRESS_SIZE]; array.copy_from_slice(&bytes); Ok(Address(array)) } @@ -42,22 +40,21 @@ impl Address { impl std::convert::TryFrom<&[u8]> for Address { type Error = Error; fn try_from(bytes: &[u8]) -> Result { - if bytes.len() != 64 { + if bytes.len() != ADDRESS_SIZE { return Err(Error::Runtime { msg: format!( "Invalid address length ({}). Only 64 byte addresses are supported.", bytes.len() ), - source: None, }); } - let mut array = [0u8; 64]; + let mut array = [0u8; ADDRESS_SIZE]; array.copy_from_slice(bytes); Ok(Address(array)) } } -impl From<[u8; 64]> for Address { - fn from(bytes: [u8; 64]) -> Self { +impl From<[u8; ADDRESS_SIZE]> for Address { + fn from(bytes: [u8; ADDRESS_SIZE]) -> Self { Address(bytes) } } diff --git a/lager/src/lru.rs b/lager/src/lru.rs index 1851833..b1c44f1 100644 --- a/lager/src/lru.rs +++ b/lager/src/lru.rs @@ -86,6 +86,8 @@ impl LRU { #[cfg(test)] mod tests { + use crate::ADDRESS_SIZE; + use super::*; use std::thread; use std::time::Duration; @@ -108,9 +110,9 @@ mod tests { std::fs::write(&temp_file3, b"Content 3 even longer text").unwrap(); // Store files in lager with different addresses - let addr1 = Address::from([1u8; 32]); - let addr2 = Address::from([2u8; 32]); - let addr3 = Address::from([3u8; 32]); + let addr1 = Address::from([1u8; ADDRESS_SIZE]); + let addr2 = Address::from([2u8; ADDRESS_SIZE]); + let addr3 = Address::from([3u8; ADDRESS_SIZE]); lager.store_at(&addr1, &temp_file1).unwrap(); lager.store_at(&addr2, &temp_file2).unwrap();