diff --git a/lib/descriptor/arch.rs b/lib/descriptor/arch.rs index 5231b57..5759f19 100644 --- a/lib/descriptor/arch.rs +++ b/lib/descriptor/arch.rs @@ -47,13 +47,13 @@ impl Arch { Get the architecture of the current host system. */ #[must_use] - pub fn current_system() -> Self { - match CURRENT_ARCH { - "aarch64" => Self::Arm64, - "x86_64" => Self::X64, - "x86" => Self::X86, - "arm" => Self::Arm32, - _ => panic!("Unsupported architecture: {CURRENT_ARCH}"), + pub const fn current_system() -> Self { + match CURRENT_ARCH.as_bytes() { + b"aarch64" => Self::Arm64, + b"x86_64" => Self::X64, + b"x86" => Self::X86, + b"arm" => Self::Arm32, + _ => panic!("Unsupported architecture"), } } @@ -118,7 +118,7 @@ impl Arch { Get the architecture as a string, such as "x64" or "arm64". */ #[must_use] - pub fn as_str(&self) -> &'static str { + pub const fn as_str(&self) -> &'static str { match self { Self::Arm64 => "arm64", Self::X64 => "x64", diff --git a/lib/descriptor/mod.rs b/lib/descriptor/mod.rs index 8351eac..e289aa7 100644 --- a/lib/descriptor/mod.rs +++ b/lib/descriptor/mod.rs @@ -38,7 +38,7 @@ impl Descriptor { Get the description for the current host system. */ #[must_use] - pub fn current_system() -> Self { + pub const fn current_system() -> Self { Self { os: OS::current_system(), arch: Some(Arch::current_system()), @@ -85,7 +85,7 @@ impl Descriptor { Get the operating system of this description. */ #[must_use] - pub fn os(&self) -> OS { + pub const fn os(&self) -> OS { self.os } @@ -93,7 +93,7 @@ impl Descriptor { Get the architecture of this description. */ #[must_use] - pub fn arch(&self) -> Option { + pub const fn arch(&self) -> Option { self.arch } @@ -101,7 +101,7 @@ impl Descriptor { Get the preferred toolchain of this description. */ #[must_use] - pub fn toolchain(&self) -> Option { + pub const fn toolchain(&self) -> Option { self.toolchain } diff --git a/lib/descriptor/os.rs b/lib/descriptor/os.rs index e05c5b0..d2ffaf2 100644 --- a/lib/descriptor/os.rs +++ b/lib/descriptor/os.rs @@ -38,12 +38,12 @@ impl OS { Get the operating system of the current host system. */ #[must_use] - pub fn current_system() -> Self { - match CURRENT_OS { - "windows" => Self::Windows, - "macos" => Self::MacOS, - "linux" => Self::Linux, - _ => panic!("Unsupported OS: {CURRENT_OS}"), + pub const fn current_system() -> Self { + match CURRENT_OS.as_bytes() { + b"windows" => Self::Windows, + b"macos" => Self::MacOS, + b"linux" => Self::Linux, + _ => panic!("Unsupported OS"), } } @@ -93,7 +93,7 @@ impl OS { Get the name of the operating system as a string. */ #[must_use] - pub fn as_str(self) -> &'static str { + pub const fn as_str(self) -> &'static str { match self { Self::Windows => "windows", Self::MacOS => "macos", diff --git a/lib/descriptor/toolchain.rs b/lib/descriptor/toolchain.rs index 95badad..5548fbf 100644 --- a/lib/descriptor/toolchain.rs +++ b/lib/descriptor/toolchain.rs @@ -21,8 +21,16 @@ impl Toolchain { Get the toolchain of the current host system. */ #[must_use] - pub fn current_system() -> Option { - None // TODO: Implement detection of the host toolchain + pub const fn current_system() -> Option { + if cfg!(target_env = "msvc") { + Some(Self::Msvc) + } else if cfg!(target_env = "gnu") { + Some(Self::Gnu) + } else if cfg!(target_env = "musl") { + Some(Self::Musl) + } else { + None + } } /** @@ -44,7 +52,7 @@ impl Toolchain { Get the name of the toolchain as a string. */ #[must_use] - pub fn as_str(self) -> &'static str { + pub const fn as_str(self) -> &'static str { match self { Self::Msvc => "msvc", Self::Gnu => "gnu", diff --git a/lib/sources/extraction.rs b/lib/sources/extraction.rs index 4f9e4ee..e7136c6 100644 --- a/lib/sources/extraction.rs +++ b/lib/sources/extraction.rs @@ -1,6 +1,7 @@ #![allow(clippy::struct_excessive_bools)] use std::{ + collections::BTreeMap, env::consts::{EXE_EXTENSION, EXE_SUFFIX}, io::{self, Read}, path::{Path, PathBuf, MAIN_SEPARATOR_STR}, @@ -11,7 +12,11 @@ use thiserror::Error; use tokio::{task::spawn_blocking, time::Instant}; use zip::ZipArchive; -use crate::{descriptor::OS, result::RokitResult, sources::ArtifactFormat}; +use crate::{ + descriptor::{Descriptor, OS}, + result::RokitResult, + sources::ArtifactFormat, +}; #[derive(Debug, Error)] pub enum ExtractError { @@ -61,6 +66,7 @@ struct Candidate { matched_file_inexact: bool, // Case-insensitive filename match has_exec_perms: bool, // Has executable permissions (UNIX only) has_exec_suffix: bool, // Has an executable suffix (e.g. `.exe`) + has_descriptor: bool, // Has executable contents (any platform) } impl Candidate { @@ -70,11 +76,13 @@ impl Candidate { + u32::from(self.matched_file_inexact) + u32::from(self.has_exec_perms) + u32::from(self.has_exec_suffix) + + u32::from(self.has_descriptor) } fn find_best( entry_paths: impl AsRef<[(PathBuf, Option)]>, desired_file_path: impl AsRef, + mut read_file_contents: impl FnMut(&Path) -> Option>, ) -> Option { let entry_paths = entry_paths.as_ref(); let desired_file_path = desired_file_path.as_ref(); @@ -97,6 +105,9 @@ impl Candidate { let has_exec_perms = perms.is_some_and(|perms| (perms & 0o111) != 0); let has_exec_suffix = path.extension().is_some_and(|ext| ext == EXE_EXTENSION); + let has_descriptor = read_file_contents(path) + .and_then(Descriptor::detect_from_executable) + .is_some(); Some(Self { path: path.clone(), @@ -105,6 +116,7 @@ impl Candidate { matched_file_inexact, has_exec_perms, has_exec_suffix, + has_descriptor, }) }) .filter(|c| c.priority() > 0) // Filter out candidates with no matches at all @@ -140,43 +152,51 @@ pub async fn extract_zip_file( // Reading a zip file is a potentially expensive operation, so // spawn it as a blocking task and use the tokio thread pool. spawn_blocking(move || { - let mut found = None; - let mut reader = io::Cursor::new(&zip_contents); - let mut zip = ZipArchive::new(&mut reader)?; + let mut zip_cursor = io::Cursor::new(&zip_contents); + let mut zip_reader = ZipArchive::new(&mut zip_cursor)?; - // Gather paths and their permissions, - // avoiding reading the entire zip file - let entry_paths = zip + // Gather simple path + permissions pairs to find candidates from + let entry_paths = zip_reader .file_names() - .map(|name| { - // NOTE: We don't need to sanitize the files names here - // since we only use them for matching *within the zip file* - (PathBuf::from(name), None::) - }) + .map(|name| (PathBuf::from(name), None::)) .collect::>(); - // Find the best candidate to extract, if any - let best = Candidate::find_best(entry_paths, &desired_file_path); - if let Some(candidate) = best { - if let Some(path_str) = candidate.path.to_str() { - if let Ok(mut entry) = zip.by_name(path_str) { - let mut bytes = Vec::new(); - entry.read_to_end(&mut bytes)?; - found = Some(bytes); - } + // Lazily cache any files that we read, to ensure that we do not + // try to read a file entry which has already been read to its end + let mut read_file_cache = BTreeMap::<_, Vec>::new(); + let mut read_file_contents = |path: &Path| { + if let Some(existing) = read_file_cache.get(path) { + Ok(existing.clone()) + } else if let Ok(mut entry) = zip_reader.by_name(path.to_str().unwrap()) { + let mut bytes = Vec::new(); + entry.read_to_end(&mut bytes)?; + read_file_cache.insert(path.to_path_buf(), bytes.clone()); + Ok(bytes) + } else { + Err(io::Error::new( + io::ErrorKind::NotFound, + format!("File not found: {}", path.display()), + )) } - if found.is_none() { - tracing::warn!( - path = ?candidate.path, - "found candidate path, but failed to extract file" - ); + }; + + // Find the best candidate to extract, if any + let best = Candidate::find_best(entry_paths, &desired_file_path, |path| { + read_file_contents(path).ok() + }); + let (path, found) = match best { + None => (None, None), + Some(candidate) => { + let contents = read_file_contents(&candidate.path)?; + (Some(candidate.path), Some(contents)) } - } + }; tracing::debug!( num_kilobytes, elapsed = ?start.elapsed(), - found = found.is_some(), + found_any = found.is_some(), + found_path = path.map(|path| path.display().to_string()), "extracted zip file" ); Ok(found) @@ -203,19 +223,11 @@ pub async fn extract_tar_file( // Reading a tar file is a potentially expensive operation, so // spawn it as a blocking task and use the tokio thread pool. spawn_blocking(move || { - let mut found = None; - - /* - Gather paths and their permissions - note that we - need to read the tar file twice to be able to use - our find_best_candidate matching implementation... - - We can however use the `entries_with_seek` method - to avoid reading actual file contents into memory. - */ - let mut entry_cursor = io::Cursor::new(&tar_contents); - let mut entry_reader = TarArchive::new(&mut entry_cursor); - let entry_paths = entry_reader + // Gather paths and references to their respective entries, + // without reading actual file contents into memory just yet + let mut tar_cursor = io::Cursor::new(&tar_contents); + let mut tar_reader = TarArchive::new(&mut tar_cursor); + let mut tar_files = tar_reader .entries_with_seek()? .filter_map(|entry| { let entry = entry.ok()?; @@ -223,38 +235,55 @@ pub async fn extract_tar_file( return None; } let path = entry.path().ok()?; + Some((path.to_path_buf(), entry)) + }) + .collect::>(); + + // Map to simple path + permissions pairs to find candidates from + let entry_paths = tar_files + .iter() + .map(|(path, entry)| { let perms = entry.header().mode().ok(); - Some((path.to_path_buf(), perms)) + (path.clone(), perms) }) .collect::>(); - // Find the best candidate to extract, if any - let best = Candidate::find_best(entry_paths, &desired_file_path); - if let Some(candidate) = best { - let contents_cursor = io::Cursor::new(&tar_contents); - let mut contents_reader = TarArchive::new(contents_cursor); - for entry in contents_reader.entries_with_seek()? { - let mut entry = entry?; - let entry_path = entry.path()?; - if entry_path == candidate.path.as_path() { - let mut bytes = Vec::new(); - entry.read_to_end(&mut bytes)?; - found = Some(bytes); - break; - } + // Lazily cache any files that we read, to ensure that we do not + // try to read a file entry which has already been read to its end + let mut read_file_cache = BTreeMap::<_, Vec>::new(); + let mut read_file_contents = |path: &Path| { + if let Some(existing) = read_file_cache.get(path) { + Ok(existing.clone()) + } else if let Some(entry) = tar_files.get_mut(path) { + let mut bytes = Vec::new(); + entry.read_to_end(&mut bytes)?; + read_file_cache.insert(path.to_path_buf(), bytes.clone()); + Ok(bytes) + } else { + Err(io::Error::new( + io::ErrorKind::NotFound, + format!("File not found: {}", path.display()), + )) } - if found.is_none() { - tracing::warn!( - path = ?candidate.path, - "found candidate path, but failed to extract file" - ); + }; + + // Find the best candidate to extract, if any + let best = Candidate::find_best(entry_paths, &desired_file_path, |path| { + read_file_contents(path).ok() + }); + let (path, found) = match best { + None => (None, None), + Some(candidate) => { + let contents = read_file_contents(&candidate.path)?; + (Some(candidate.path), Some(contents)) } - } + }; tracing::debug!( num_kilobytes, elapsed = ?start.elapsed(), - found = found.is_some(), + found_any = found.is_some(), + found_path = path.map(|path| path.display().to_string()), "extracted tar file" ); Ok(found)