Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions lib/descriptor/arch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,13 @@ impl Arch {
Get the architecture of the current host system.
*/
#[must_use]
pub fn current_system() -> Self {
match CURRENT_ARCH {
"aarch64" => Self::Arm64,
"x86_64" => Self::X64,
"x86" => Self::X86,
"arm" => Self::Arm32,
_ => panic!("Unsupported architecture: {CURRENT_ARCH}"),
pub const fn current_system() -> Self {
match CURRENT_ARCH.as_bytes() {
b"aarch64" => Self::Arm64,
b"x86_64" => Self::X64,
b"x86" => Self::X86,
b"arm" => Self::Arm32,
_ => panic!("Unsupported architecture"),
}
}

Expand Down Expand Up @@ -118,7 +118,7 @@ impl Arch {
Get the architecture as a string, such as "x64" or "arm64".
*/
#[must_use]
pub fn as_str(&self) -> &'static str {
pub const fn as_str(&self) -> &'static str {
match self {
Self::Arm64 => "arm64",
Self::X64 => "x64",
Expand Down
8 changes: 4 additions & 4 deletions lib/descriptor/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ impl Descriptor {
Get the description for the current host system.
*/
#[must_use]
pub fn current_system() -> Self {
pub const fn current_system() -> Self {
Self {
os: OS::current_system(),
arch: Some(Arch::current_system()),
Expand Down Expand Up @@ -85,23 +85,23 @@ impl Descriptor {
Get the operating system of this description.
*/
#[must_use]
pub fn os(&self) -> OS {
pub const fn os(&self) -> OS {
self.os
}

/**
Get the architecture of this description.
*/
#[must_use]
pub fn arch(&self) -> Option<Arch> {
pub const fn arch(&self) -> Option<Arch> {
self.arch
}

/**
Get the preferred toolchain of this description.
*/
#[must_use]
pub fn toolchain(&self) -> Option<Toolchain> {
pub const fn toolchain(&self) -> Option<Toolchain> {
self.toolchain
}

Expand Down
14 changes: 7 additions & 7 deletions lib/descriptor/os.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,12 @@ impl OS {
Get the operating system of the current host system.
*/
#[must_use]
pub fn current_system() -> Self {
match CURRENT_OS {
"windows" => Self::Windows,
"macos" => Self::MacOS,
"linux" => Self::Linux,
_ => panic!("Unsupported OS: {CURRENT_OS}"),
pub const fn current_system() -> Self {
match CURRENT_OS.as_bytes() {
b"windows" => Self::Windows,
b"macos" => Self::MacOS,
b"linux" => Self::Linux,
_ => panic!("Unsupported OS"),
}
}

Expand Down Expand Up @@ -93,7 +93,7 @@ impl OS {
Get the name of the operating system as a string.
*/
#[must_use]
pub fn as_str(self) -> &'static str {
pub const fn as_str(self) -> &'static str {
match self {
Self::Windows => "windows",
Self::MacOS => "macos",
Expand Down
14 changes: 11 additions & 3 deletions lib/descriptor/toolchain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,16 @@ impl Toolchain {
Get the toolchain of the current host system.
*/
#[must_use]
pub fn current_system() -> Option<Self> {
None // TODO: Implement detection of the host toolchain
pub const fn current_system() -> Option<Self> {
if cfg!(target_env = "msvc") {
Some(Self::Msvc)
} else if cfg!(target_env = "gnu") {
Some(Self::Gnu)
} else if cfg!(target_env = "musl") {
Some(Self::Musl)
} else {
None
}
}

/**
Expand All @@ -44,7 +52,7 @@ impl Toolchain {
Get the name of the toolchain as a string.
*/
#[must_use]
pub fn as_str(self) -> &'static str {
pub const fn as_str(self) -> &'static str {
match self {
Self::Msvc => "msvc",
Self::Gnu => "gnu",
Expand Down
155 changes: 92 additions & 63 deletions lib/sources/extraction.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#![allow(clippy::struct_excessive_bools)]

use std::{
collections::BTreeMap,
env::consts::{EXE_EXTENSION, EXE_SUFFIX},
io::{self, Read},
path::{Path, PathBuf, MAIN_SEPARATOR_STR},
Expand All @@ -11,7 +12,11 @@ use thiserror::Error;
use tokio::{task::spawn_blocking, time::Instant};
use zip::ZipArchive;

use crate::{descriptor::OS, result::RokitResult, sources::ArtifactFormat};
use crate::{
descriptor::{Descriptor, OS},
result::RokitResult,
sources::ArtifactFormat,
};

#[derive(Debug, Error)]
pub enum ExtractError {
Expand Down Expand Up @@ -61,6 +66,7 @@ struct Candidate {
matched_file_inexact: bool, // Case-insensitive filename match
has_exec_perms: bool, // Has executable permissions (UNIX only)
has_exec_suffix: bool, // Has an executable suffix (e.g. `.exe`)
has_descriptor: bool, // Has executable contents (any platform)
}

impl Candidate {
Expand All @@ -70,11 +76,13 @@ impl Candidate {
+ u32::from(self.matched_file_inexact)
+ u32::from(self.has_exec_perms)
+ u32::from(self.has_exec_suffix)
+ u32::from(self.has_descriptor)
}

fn find_best(
entry_paths: impl AsRef<[(PathBuf, Option<u32>)]>,
desired_file_path: impl AsRef<Path>,
mut read_file_contents: impl FnMut(&Path) -> Option<Vec<u8>>,
) -> Option<Self> {
let entry_paths = entry_paths.as_ref();
let desired_file_path = desired_file_path.as_ref();
Expand All @@ -97,6 +105,9 @@ impl Candidate {

let has_exec_perms = perms.is_some_and(|perms| (perms & 0o111) != 0);
let has_exec_suffix = path.extension().is_some_and(|ext| ext == EXE_EXTENSION);
let has_descriptor = read_file_contents(path)
.and_then(Descriptor::detect_from_executable)
.is_some();

Some(Self {
path: path.clone(),
Expand All @@ -105,6 +116,7 @@ impl Candidate {
matched_file_inexact,
has_exec_perms,
has_exec_suffix,
has_descriptor,
})
})
.filter(|c| c.priority() > 0) // Filter out candidates with no matches at all
Expand Down Expand Up @@ -140,43 +152,51 @@ pub async fn extract_zip_file(
// Reading a zip file is a potentially expensive operation, so
// spawn it as a blocking task and use the tokio thread pool.
spawn_blocking(move || {
let mut found = None;
let mut reader = io::Cursor::new(&zip_contents);
let mut zip = ZipArchive::new(&mut reader)?;
let mut zip_cursor = io::Cursor::new(&zip_contents);
let mut zip_reader = ZipArchive::new(&mut zip_cursor)?;

// Gather paths and their permissions,
// avoiding reading the entire zip file
let entry_paths = zip
// Gather simple path + permissions pairs to find candidates from
let entry_paths = zip_reader
.file_names()
.map(|name| {
// NOTE: We don't need to sanitize the files names here
// since we only use them for matching *within the zip file*
(PathBuf::from(name), None::<u32>)
})
.map(|name| (PathBuf::from(name), None::<u32>))
.collect::<Vec<_>>();

// Find the best candidate to extract, if any
let best = Candidate::find_best(entry_paths, &desired_file_path);
if let Some(candidate) = best {
if let Some(path_str) = candidate.path.to_str() {
if let Ok(mut entry) = zip.by_name(path_str) {
let mut bytes = Vec::new();
entry.read_to_end(&mut bytes)?;
found = Some(bytes);
}
// Lazily cache any files that we read, to ensure that we do not
// try to read a file entry which has already been read to its end
let mut read_file_cache = BTreeMap::<_, Vec<u8>>::new();
let mut read_file_contents = |path: &Path| {
if let Some(existing) = read_file_cache.get(path) {
Ok(existing.clone())
} else if let Ok(mut entry) = zip_reader.by_name(path.to_str().unwrap()) {
let mut bytes = Vec::new();
entry.read_to_end(&mut bytes)?;
read_file_cache.insert(path.to_path_buf(), bytes.clone());
Ok(bytes)
} else {
Err(io::Error::new(
io::ErrorKind::NotFound,
format!("File not found: {}", path.display()),
))
}
if found.is_none() {
tracing::warn!(
path = ?candidate.path,
"found candidate path, but failed to extract file"
);
};

// Find the best candidate to extract, if any
let best = Candidate::find_best(entry_paths, &desired_file_path, |path| {
read_file_contents(path).ok()
});
let (path, found) = match best {
None => (None, None),
Some(candidate) => {
let contents = read_file_contents(&candidate.path)?;
(Some(candidate.path), Some(contents))
}
}
};

tracing::debug!(
num_kilobytes,
elapsed = ?start.elapsed(),
found = found.is_some(),
found_any = found.is_some(),
found_path = path.map(|path| path.display().to_string()),
"extracted zip file"
);
Ok(found)
Expand All @@ -203,58 +223,67 @@ pub async fn extract_tar_file(
// Reading a tar file is a potentially expensive operation, so
// spawn it as a blocking task and use the tokio thread pool.
spawn_blocking(move || {
let mut found = None;

/*
Gather paths and their permissions - note that we
need to read the tar file twice to be able to use
our find_best_candidate matching implementation...

We can however use the `entries_with_seek` method
to avoid reading actual file contents into memory.
*/
let mut entry_cursor = io::Cursor::new(&tar_contents);
let mut entry_reader = TarArchive::new(&mut entry_cursor);
let entry_paths = entry_reader
// Gather paths and references to their respective entries,
// without reading actual file contents into memory just yet
let mut tar_cursor = io::Cursor::new(&tar_contents);
let mut tar_reader = TarArchive::new(&mut tar_cursor);
let mut tar_files = tar_reader
.entries_with_seek()?
.filter_map(|entry| {
let entry = entry.ok()?;
if entry.header().entry_type().is_dir() {
return None;
}
let path = entry.path().ok()?;
Some((path.to_path_buf(), entry))
})
.collect::<BTreeMap<PathBuf, _>>();

// Map to simple path + permissions pairs to find candidates from
let entry_paths = tar_files
.iter()
.map(|(path, entry)| {
let perms = entry.header().mode().ok();
Some((path.to_path_buf(), perms))
(path.clone(), perms)
})
.collect::<Vec<_>>();

// Find the best candidate to extract, if any
let best = Candidate::find_best(entry_paths, &desired_file_path);
if let Some(candidate) = best {
let contents_cursor = io::Cursor::new(&tar_contents);
let mut contents_reader = TarArchive::new(contents_cursor);
for entry in contents_reader.entries_with_seek()? {
let mut entry = entry?;
let entry_path = entry.path()?;
if entry_path == candidate.path.as_path() {
let mut bytes = Vec::new();
entry.read_to_end(&mut bytes)?;
found = Some(bytes);
break;
}
// Lazily cache any files that we read, to ensure that we do not
// try to read a file entry which has already been read to its end
let mut read_file_cache = BTreeMap::<_, Vec<u8>>::new();
let mut read_file_contents = |path: &Path| {
if let Some(existing) = read_file_cache.get(path) {
Ok(existing.clone())
} else if let Some(entry) = tar_files.get_mut(path) {
let mut bytes = Vec::new();
entry.read_to_end(&mut bytes)?;
read_file_cache.insert(path.to_path_buf(), bytes.clone());
Ok(bytes)
} else {
Err(io::Error::new(
io::ErrorKind::NotFound,
format!("File not found: {}", path.display()),
))
}
if found.is_none() {
tracing::warn!(
path = ?candidate.path,
"found candidate path, but failed to extract file"
);
};

// Find the best candidate to extract, if any
let best = Candidate::find_best(entry_paths, &desired_file_path, |path| {
read_file_contents(path).ok()
});
let (path, found) = match best {
None => (None, None),
Some(candidate) => {
let contents = read_file_contents(&candidate.path)?;
(Some(candidate.path), Some(contents))
}
}
};

tracing::debug!(
num_kilobytes,
elapsed = ?start.elapsed(),
found = found.is_some(),
found_any = found.is_some(),
found_path = path.map(|path| path.display().to_string()),
"extracted tar file"
);
Ok(found)
Expand Down