From 8a3a7ef8af709e2eace33dd6add7c5ab7015a83b Mon Sep 17 00:00:00 2001 From: harehare Date: Tue, 16 Jun 2026 21:23:19 +0900 Subject: [PATCH 1/8] =?UTF-8?q?=E2=9C=A8=20feat(wasm):=20add=20HTTP=20modu?= =?UTF-8?q?le=20import=20with=20OPFS=20caching?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Support fetching mq modules from HTTP/GitHub URLs in the WASM runtime using the browser Fetch API. Versioned (tagged) imports are cached persistently in OPFS; mutable (HEAD/branch) imports use a refreshable cache. Adds clearHttpCache and clearAllHttpCache WASM exports for cache management from the host. --- Cargo.lock | 3 + crates/mq-wasm/Cargo.toml | 3 + crates/mq-wasm/src/script.rs | 416 ++++++++++++++++++++++++++++++++++- 3 files changed, 413 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 20d2fdf06..83a2d486e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2706,6 +2706,7 @@ dependencies = [ "futures", "itertools", "js-sys", + "md5", "mq-check", "mq-formatter", "mq-hir", @@ -2715,9 +2716,11 @@ dependencies = [ "serde", "serde-wasm-bindgen", "serde_json", + "sha2 0.11.0", "wasm-bindgen", "wasm-bindgen-futures", "wasm-bindgen-test", + "web-sys", ] [[package]] diff --git a/crates/mq-wasm/Cargo.toml b/crates/mq-wasm/Cargo.toml index bdd0549a0..3f1e6a738 100644 --- a/crates/mq-wasm/Cargo.toml +++ b/crates/mq-wasm/Cargo.toml @@ -20,10 +20,12 @@ path = "src/lib.rs" futures = {workspace = true} itertools = {workspace = true} js-sys = {workspace = true} +md5 = {workspace = true} mq-formatter = {workspace = true} mq-hir = {workspace = true} mq-check = {workspace = true} mq-lang = {workspace = true, features = ["ast-json"]} +sha2 = {workspace = true} mq-markdown = {workspace = true} opfs = {workspace = true, optional = true} serde = {workspace = true, features = ["derive"]} @@ -31,6 +33,7 @@ serde-wasm-bindgen = {workspace = true} serde_json = {workspace = true} wasm-bindgen = {workspace = true} wasm-bindgen-futures = {workspace = true} +web-sys = {version = "0.3", features = ["Response"]} [dev-dependencies] wasm-bindgen-test = {workspace = true} diff --git a/crates/mq-wasm/src/script.rs b/crates/mq-wasm/src/script.rs index 08d03527e..564240117 100644 --- a/crates/mq-wasm/src/script.rs +++ b/crates/mq-wasm/src/script.rs @@ -1,14 +1,12 @@ use itertools::Itertools; use serde::{Deserialize, Serialize}; -use std::str::FromStr; +use std::{cell::RefCell, collections::HashMap, rc::Rc, str::FromStr}; use wasm_bindgen::prelude::*; #[cfg(feature = "opfs")] use futures::StreamExt; #[cfg(feature = "opfs")] use opfs::{DirectoryHandle, FileHandle}; -#[cfg(feature = "opfs")] -use std::{cell::RefCell, collections::HashMap, rc::Rc}; #[wasm_bindgen(typescript_custom_section)] const TS_CUSTOM_SECTION: &'static str = r#" @@ -52,6 +50,10 @@ export function diagnostics(code: string, enableTypeCheck?: boolean): Promise; export function inlayHints(code: string): Promise>; export function run(code: string, content: string, options: Options): Promise; +/** Clears mutable HTTP module cache (HEAD/branch imports). Versioned (tagged) cache is preserved. */ +export function clearHttpCache(): Promise; +/** Clears all HTTP module cache including versioned (tagged) imports. */ +export function clearAllHttpCache(): Promise; "#; #[derive(Serialize, Deserialize)] @@ -226,6 +228,8 @@ impl From for mq_markdown::ConversionOptions { #[derive(Debug, Clone, Default)] pub struct WasmModuleResolver { + /// Cache for HTTP-fetched modules, keyed by the original import path (URL or github shorthand). + http_cache: Rc>>, #[cfg(feature = "opfs")] /// Cache of preloaded module contents, keyed by module name cache: Rc>>, @@ -334,21 +338,123 @@ impl WasmModuleResolver { #[cfg(feature = "opfs")] self.cache.borrow_mut().clear(); } + + /// Pre-fetches all HTTP/GitHub import URLs found in `code` (and their transitive imports). + /// + /// Mirrors CLI caching behaviour: + /// - Versioned URLs (`@v1.0`) are cached persistently in OPFS `http_cache/versioned/`. + /// - Mutable URLs (HEAD/branch) are cached in OPFS `http_cache/mutable/`. + /// - Each cached file has a SHA-256 sidecar for tamper detection. + /// - Results are also kept in `http_cache` for the lifetime of this session. + pub async fn preload_http_modules(&self, code: &str) { + const MAX_DEPTH: usize = 5; + + #[cfg(feature = "opfs")] + let root = self.root_dir.borrow().clone(); + + // HTTP import requires OPFS for caching; skip if OPFS is unavailable. + #[cfg(feature = "opfs")] + if root.is_none() { + return; + } + + let mut visited: std::collections::HashSet = std::collections::HashSet::new(); + let mut pending = extract_http_import_urls(code); + + for _ in 0..MAX_DEPTH { + if pending.is_empty() { + break; + } + + let mut next = Vec::new(); + + for module_path in pending { + if visited.contains(&module_path) { + continue; + } + visited.insert(module_path.clone()); + + if self.http_cache.borrow().contains_key(&module_path) { + continue; + } + + let fetch_url = if is_github_url(&module_path) { + match github_to_raw_url(&module_path) { + Some(u) => u, + None => continue, + } + } else if is_http_url(&module_path) { + module_path.clone() + } else { + continue; + }; + + let subdir = if is_versioned_url(&fetch_url) { "versioned" } else { "mutable" }; + let stem = cache_file_stem(&fetch_url); + + // Fast path: OPFS cache hit + #[cfg(feature = "opfs")] + if let Some(ref r) = root { + if let Some(content) = try_read_opfs_http_cache(r, subdir, &stem).await { + next.extend(extract_http_import_urls(&content)); + self.http_cache.borrow_mut().insert(module_path, content); + continue; + } + } + + // Slow path: fetch from network + if let Ok(content) = fetch_text(&fetch_url).await { + #[cfg(feature = "opfs")] + if let Some(ref r) = root { + write_opfs_http_cache(r, subdir, &stem, &content).await; + } + next.extend(extract_http_import_urls(&content)); + self.http_cache.borrow_mut().insert(module_path, content); + } + } + + pending = next; + } + } } impl mq_lang::ModuleResolver for WasmModuleResolver { + fn canonical_name<'a>(&self, module_path: &'a str) -> &'a str { + if is_github_url(module_path) || is_http_url(module_path) { + extract_module_name(module_path) + } else { + module_path + } + } + fn resolve(&self, module_name: &str) -> Result { if let Some(content_fn) = mq_lang::STANDARD_MODULES.get(module_name) { return Ok(content_fn().to_string()); } + if let Some(content) = self.http_cache.borrow().get(module_name).cloned() { + return Ok(content); + } + #[cfg(feature = "opfs")] - return self.cache.borrow().get(module_name).cloned().ok_or_else(|| { - mq_lang::ModuleError::NotFound(std::borrow::Cow::Owned(format!( - "Module '{}' not found in cache. Use preload_modules() to load it first.", - module_name - ))) - }); + { + if (is_http_url(module_name) || is_github_url(module_name)) + && self.root_dir.borrow().is_none() + { + return Err(mq_lang::ModuleError::IOError(std::borrow::Cow::Owned( + format!( + "HTTP import of '{}' is not available: OPFS is not supported in this environment.", + module_name + ), + ))); + } + return self.cache.borrow().get(module_name).cloned().ok_or_else(|| { + mq_lang::ModuleError::NotFound(std::borrow::Cow::Owned(format!( + "Module '{}' not found in cache. Use preload_modules() to load it first.", + module_name + ))) + }); + } #[cfg(not(feature = "opfs"))] return Err(mq_lang::ModuleError::NotFound(std::borrow::Cow::Owned(format!( "Module '{}' not found. Module resolution is not supported in this environment.", @@ -357,6 +463,11 @@ impl mq_lang::ModuleResolver for WasmModuleResolver { } fn get_path(&self, module_name: &str) -> Result { + if is_github_url(module_name) { + return github_to_raw_url(module_name) + .map(Ok) + .unwrap_or_else(|| Ok(module_name.to_string())); + } Ok(module_name.to_string()) } @@ -369,11 +480,54 @@ impl mq_lang::ModuleResolver for WasmModuleResolver { } } +/// Removes mutable HTTP module cache (HEAD/branch imports). +/// Versioned (tagged) cached modules are preserved, matching `--refresh-modules` CLI behaviour. +#[wasm_bindgen(js_name=clearHttpCache)] +pub async fn clear_http_cache() -> Result<(), JsValue> { + #[cfg(feature = "opfs")] + { + use opfs::DirectoryHandle as _; + + let root = opfs::persistent::app_specific_dir() + .await + .map_err(|e| JsValue::from_str(&format!("OPFS unavailable: {:?}", e)))?; + + if let Ok(mut cache_dir) = root + .get_directory_handle_with_options(HTTP_CACHE_DIR, &opfs::GetDirectoryHandleOptions { create: false }) + .await + { + let _ = cache_dir + .remove_entry_with_options("mutable", &opfs::FileSystemRemoveOptions { recursive: true }) + .await; + } + } + Ok(()) +} + +/// Removes all HTTP module cache including versioned (tagged) imports, matching `--clear-cache` CLI behaviour. +#[wasm_bindgen(js_name=clearAllHttpCache)] +pub async fn clear_all_http_cache() -> Result<(), JsValue> { + #[cfg(feature = "opfs")] + { + use opfs::DirectoryHandle as _; + + let mut root = opfs::persistent::app_specific_dir() + .await + .map_err(|e| JsValue::from_str(&format!("OPFS unavailable: {:?}", e)))?; + + let _ = root + .remove_entry_with_options(HTTP_CACHE_DIR, &opfs::FileSystemRemoveOptions { recursive: true }) + .await; + } + Ok(()) +} + #[wasm_bindgen(js_name=run, skip_typescript)] pub async fn run(code: &str, content: &str, options: JsValue) -> Result { let resolver = WasmModuleResolver::new(); resolver.initialize().await; resolver.preload_modules().await; + resolver.preload_http_modules(code).await; let options: Options = serde_wasm_bindgen::from_value(options) .map_err(|e| JsValue::from_str(&format!("Failed to parse options: {}", e)))?; @@ -717,6 +871,250 @@ pub async fn hover(code: &str, line: u32, column: u32) -> JsValue { serde_wasm_bindgen::to_value(&result).unwrap_or(JsValue::NULL) } +/// Name of the OPFS subdirectory used to store cached HTTP modules. +const HTTP_CACHE_DIR: &str = "http_cache"; + +fn is_http_url(url: &str) -> bool { + url.starts_with("https://") || url.starts_with("http://") +} + +fn is_github_url(url: &str) -> bool { + let s = url + .strip_prefix("https://") + .or_else(|| url.strip_prefix("http://")) + .unwrap_or(url); + s.starts_with("github.com/") +} + +/// Converts a GitHub shorthand (`[https://]github.com/{owner}/{repo}[@{version}]`) to a +/// `raw.githubusercontent.com` HTTPS URL. +fn github_to_raw_url(input: &str) -> Option { + let without_scheme = input + .strip_prefix("https://") + .or_else(|| input.strip_prefix("http://")) + .unwrap_or(input); + + let rest = without_scheme.strip_prefix("github.com/")?; + + let (path_part, version) = match rest.rfind('@') { + Some(pos) => (&rest[..pos], &rest[pos + 1..]), + None => (rest, "HEAD"), + }; + + let components: Vec<&str> = path_part.splitn(3, '/').collect(); + + let (owner, repo, file) = match components.as_slice() { + [owner, name] => { + let file = if name.ends_with(".mq") { + name.to_string() + } else { + format!("{}.mq", name) + }; + (owner.to_string(), name.to_string(), file) + } + [owner, repo, subpath] => (owner.to_string(), repo.to_string(), subpath.to_string()), + _ => return None, + }; + + Some(format!( + "https://raw.githubusercontent.com/{}/{}/{}/{}", + owner, repo, version, file + )) +} + +/// Returns `true` if `url` is pinned to a specific immutable version tag (same logic as CLI). +fn is_versioned_url(url: &str) -> bool { + const MUTABLE_REFS: &[&str] = &["HEAD", "main", "master"]; + let path = url + .strip_prefix("https://raw.githubusercontent.com/") + .or_else(|| url.strip_prefix("http://raw.githubusercontent.com/")); + match path { + Some(rest) => { + let ref_segment = rest.split('/').nth(2).unwrap_or("HEAD"); + !MUTABLE_REFS.contains(&ref_segment) + } + None => false, + } +} + +/// Returns the MD5 hex string of `url`, used as the cache file stem. +fn cache_file_stem(url: &str) -> String { + format!("{:x}", md5::compute(url)) +} + +/// Computes SHA-256 of `content` as a lowercase hex string. +fn compute_content_hash(content: &str) -> String { + use sha2::Digest; + sha2::Sha256::digest(content.as_bytes()) + .iter() + .map(|b| format!("{:02x}", b)) + .collect() +} + +/// Tries to read a cached module from OPFS. Returns `None` on any error or hash mismatch. +#[cfg(feature = "opfs")] +async fn try_read_opfs_http_cache( + root: &opfs::persistent::DirectoryHandle, + subdir: &str, + stem: &str, +) -> Option { + use opfs::{DirectoryHandle as _, FileHandle as _}; + + let cache_dir = root + .get_directory_handle_with_options(HTTP_CACHE_DIR, &opfs::GetDirectoryHandleOptions { create: false }) + .await + .ok()?; + let sub = cache_dir + .get_directory_handle_with_options(subdir, &opfs::GetDirectoryHandleOptions { create: false }) + .await + .ok()?; + + let content_fh = sub + .get_file_handle_with_options(&format!("{}.mq", stem), &opfs::GetFileHandleOptions { create: false }) + .await + .ok()?; + let hash_fh = sub + .get_file_handle_with_options(&format!("{}.mq.sha256", stem), &opfs::GetFileHandleOptions { create: false }) + .await + .ok()?; + + let content = String::from_utf8(content_fh.read().await.ok()?).ok()?; + let stored = String::from_utf8(hash_fh.read().await.ok()?).ok()?; + + if stored.trim() == compute_content_hash(&content) { + Some(content) + } else { + None + } +} + +/// Writes `content` and its SHA-256 sidecar to the OPFS HTTP cache. Silently ignores errors. +#[cfg(feature = "opfs")] +async fn write_opfs_http_cache( + root: &opfs::persistent::DirectoryHandle, + subdir: &str, + stem: &str, + content: &str, +) { + async fn write_file( + dir: &opfs::persistent::DirectoryHandle, + name: &str, + data: &[u8], + ) -> Option<()> { + use opfs::{DirectoryHandle as _, FileHandle as _, WritableFileStream as _}; + let mut fh = dir + .get_file_handle_with_options(name, &opfs::GetFileHandleOptions { create: true }) + .await + .ok()?; + let mut w = fh + .create_writable_with_options(&opfs::CreateWritableOptions { keep_existing_data: false }) + .await + .ok()?; + w.write_at_cursor_pos(data).await.ok()?; + w.close().await.ok() + } + + let Ok(cache_dir) = root + .get_directory_handle_with_options(HTTP_CACHE_DIR, &opfs::GetDirectoryHandleOptions { create: true }) + .await + else { + return; + }; + let Ok(sub) = cache_dir + .get_directory_handle_with_options(subdir, &opfs::GetDirectoryHandleOptions { create: true }) + .await + else { + return; + }; + + let _ = write_file(&sub, &format!("{}.mq", stem), content.as_bytes()).await; + let _ = write_file(&sub, &format!("{}.mq.sha256", stem), compute_content_hash(content).as_bytes()).await; +} + +/// Extracts a short module name from an HTTP URL or GitHub shorthand. +fn extract_module_name(module_path: &str) -> &str { + let path = module_path + .strip_prefix("https://") + .or_else(|| module_path.strip_prefix("http://")) + .unwrap_or(module_path); + + let without_version = match path.rfind('@') { + Some(pos) => &path[..pos], + None => path, + }; + + let last_segment = without_version.rsplit('/').next().unwrap_or(without_version); + last_segment.strip_suffix(".mq").unwrap_or(last_segment) +} + +/// Parses `code` and returns all import paths that look like HTTP or GitHub URLs. +fn extract_http_import_urls(code: &str) -> Vec { + let token_arena = mq_lang::Shared::new(mq_lang::SharedCell::new(mq_lang::Arena::new(1024))); + let Ok(program) = mq_lang::parse(code, token_arena) else { + return vec![]; + }; + + program + .iter() + .filter_map(|node| { + if let mq_lang::AstExpr::Import(mq_lang::AstLiteral::String(url)) = &*node.expr { + if is_http_url(url) || is_github_url(url) { + Some(url.clone()) + } else { + None + } + } else { + None + } + }) + .collect() +} + +/// Fetches the text content of a HTTPS URL. +/// +/// Uses the global `fetch` function, which is available in browsers (`window.fetch`), +/// Node.js 18+, and Deno — so the same implementation works across all WASM hosts. +async fn fetch_text(url: &str) -> Result { + if !url.starts_with("https://") { + return Err(format!("only HTTPS URLs are supported: {}", url)); + } + + let global = js_sys::global(); + let fetch_val = js_sys::Reflect::get(&global, &JsValue::from_str("fetch")) + .map_err(|_| "global fetch is not available".to_string())?; + + if !fetch_val.is_function() { + return Err("fetch is not available in this environment".to_string()); + } + + let fetch_fn: js_sys::Function = fetch_val.unchecked_into(); + let fetch_promise: js_sys::Promise = fetch_fn + .call1(&JsValue::UNDEFINED, &JsValue::from_str(url)) + .map_err(|e| format!("fetch() call failed: {:?}", e))? + .unchecked_into(); + + let response_val = wasm_bindgen_futures::JsFuture::from(fetch_promise) + .await + .map_err(|e| format!("fetch request failed: {:?}", e))?; + + let response: web_sys::Response = response_val + .dyn_into() + .map_err(|_| "failed to cast fetch result to Response".to_string())?; + + if !response.ok() { + return Err(format!("HTTP {} fetching {}", response.status(), url)); + } + + let text_promise = response.text().map_err(|e| format!("{:?}", e))?; + let text_val = wasm_bindgen_futures::JsFuture::from(text_promise) + .await + .map_err(|e| format!("failed to read response body: {:?}", e))?; + + text_val + .as_string() + .ok_or_else(|| "response body is not a string".to_string()) +} + #[cfg(test)] mod tests { use super::*; From ad5c2744353e19528fb3fd93829f9dfb90be2c5b Mon Sep 17 00:00:00 2001 From: harehare Date: Tue, 16 Jun 2026 22:51:45 +0900 Subject: [PATCH 2/8] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor(http-import):?= =?UTF-8?q?=20make=20HTTP=20fetcher=20pluggable=20via=20HttpFetcher=20trai?= =?UTF-8?q?t?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split `http-import` feature into `http-import` (URL utils + trait + generic resolver) and `http-import-ureq` (adds UreqFetcher with disk cache + ureq). - Add `HttpFetcher` trait covering both fetch and cache responsibility - Introduce generic `HttpModuleResolver` replacing the ureq-coupled implementation - Add `UreqFetcher` behind `http-import-ureq` feature (disk cache with versioned/mutable subdirs and SHA-256 sidecar, ureq HTTPS fetch) - Add `WasmFetcher` in mq-wasm: reads from a pre-populated in-memory HashMap, bridging async OPFS preload with sync module resolution - mq-run enables `mq-lang/http-import-ureq`; mq-wasm enables `mq-lang/http-import` (no ureq dependency in WASM) --- crates/mq-lang/Cargo.toml | 3 +- crates/mq-lang/src/engine.rs | 2 +- crates/mq-lang/src/lib.rs | 4 + crates/mq-lang/src/module.rs | 5 +- crates/mq-lang/src/module/resolver.rs | 60 +- .../src/module/resolver/http_import.rs | 240 ++++ .../src/module/resolver/http_resolver.rs | 1102 +++++------------ crates/mq-run/Cargo.toml | 2 +- crates/mq-wasm/Cargo.toml | 2 +- crates/mq-wasm/src/script.rs | 324 +++-- 10 files changed, 740 insertions(+), 1004 deletions(-) create mode 100644 crates/mq-lang/src/module/resolver/http_import.rs diff --git a/crates/mq-lang/Cargo.toml b/crates/mq-lang/Cargo.toml index 9f551e7f1..1e6690916 100644 --- a/crates/mq-lang/Cargo.toml +++ b/crates/mq-lang/Cargo.toml @@ -55,7 +55,8 @@ default = ["std"] file-io = [] std = [] sync = [] -http-import = ["dep:ureq"] +http-import = [] +http-import-ureq = ["http-import", "dep:ureq"] [dev-dependencies] divan = {workspace = true} diff --git a/crates/mq-lang/src/engine.rs b/crates/mq-lang/src/engine.rs index 2ca635bec..e48f48164 100644 --- a/crates/mq-lang/src/engine.rs +++ b/crates/mq-lang/src/engine.rs @@ -341,7 +341,7 @@ impl Engine { } } -#[cfg(feature = "http-import")] +#[cfg(feature = "http-import-ureq")] impl Engine { /// Replaces the HTTP resolver's domain allowlist. /// diff --git a/crates/mq-lang/src/lib.rs b/crates/mq-lang/src/lib.rs index e6a704f9a..f77073740 100644 --- a/crates/mq-lang/src/lib.rs +++ b/crates/mq-lang/src/lib.rs @@ -86,6 +86,10 @@ pub use eval::runtime_value::{RuntimeValue, RuntimeValues}; pub use ident::Ident; pub use lexer::Options as LexerOptions; pub use lexer::token::{StringSegment, Token, TokenKind}; +#[cfg(feature = "http-import")] +pub use module::resolver::http_import; +#[cfg(feature = "http-import")] +pub use module::resolver::http_resolver::{HttpFetcher, HttpModuleResolver}; pub use module::{ BUILTIN_FILE as BUILTIN_MODULE_FILE, Module, ModuleId, ModuleLoader, STANDARD_MODULES, error::ModuleError, resolver::DefaultModuleResolver, resolver::ModuleResolver, diff --git a/crates/mq-lang/src/module.rs b/crates/mq-lang/src/module.rs index 26a3551b8..0b26d335f 100644 --- a/crates/mq-lang/src/module.rs +++ b/crates/mq-lang/src/module.rs @@ -242,8 +242,7 @@ impl ModuleLoader { pub fn resolve(&self, module_name: &str) -> Result { #[cfg(feature = "http-import")] if self.http_depth > 0 - && (resolver::http_resolver::HttpModuleResolver::is_remote_url(module_name) - || resolver::http_resolver::HttpModuleResolver::is_github_url(module_name)) + && (resolver::http_import::is_remote_url(module_name) || resolver::http_import::is_github_url(module_name)) { return Err(ModuleError::HttpImportNotAllowed(std::borrow::Cow::Owned( module_name.to_string(), @@ -383,7 +382,7 @@ impl ModuleLoader { } } -#[cfg(feature = "http-import")] +#[cfg(feature = "http-import-ureq")] impl ModuleLoader { /// Replaces the HTTP resolver's domain allowlist. pub fn set_http_allowed_domains(&mut self, domains: Vec) { diff --git a/crates/mq-lang/src/module/resolver.rs b/crates/mq-lang/src/module/resolver.rs index f96738fac..b1be1684c 100644 --- a/crates/mq-lang/src/module/resolver.rs +++ b/crates/mq-lang/src/module/resolver.rs @@ -1,5 +1,7 @@ #[cfg(feature = "http-import")] -pub(crate) mod http_resolver; +pub mod http_import; +#[cfg(feature = "http-import")] +pub mod http_resolver; pub(crate) mod local_fs_resolver; pub(crate) mod std_resolver; @@ -31,8 +33,8 @@ pub trait ModuleResolver: Clone + Default { pub struct DefaultModuleResolver { local_fs_resolver: local_fs_resolver::LocalFsModuleResolver, std_resolver: std_resolver::StdModuleResolver, - #[cfg(feature = "http-import")] - http_resolver: http_resolver::HttpModuleResolver, + #[cfg(feature = "http-import-ureq")] + http_resolver: http_resolver::HttpModuleResolver, } impl ModuleResolver for DefaultModuleResolver { @@ -49,7 +51,7 @@ impl ModuleResolver for DefaultModuleResolver { Err(e) => return Err(e), } - #[cfg(feature = "http-import")] + #[cfg(feature = "http-import-ureq")] match self.http_resolver.resolve(module_name) { Ok(content) => return Ok(content), Err(ModuleError::NotFound(_)) => {} @@ -72,7 +74,7 @@ impl ModuleResolver for DefaultModuleResolver { Err(e) => return Err(e), } - #[cfg(feature = "http-import")] + #[cfg(feature = "http-import-ureq")] match self.http_resolver.get_path(module_name) { Ok(path) => return Ok(path), Err(ModuleError::NotFound(_)) => {} @@ -91,10 +93,9 @@ impl ModuleResolver for DefaultModuleResolver { } fn canonical_name<'a>(&self, module_path: &'a str) -> &'a str { - #[cfg(feature = "http-import")] + #[cfg(feature = "http-import-ureq")] { - use http_resolver::HttpModuleResolver; - if HttpModuleResolver::is_github_url(module_path) || HttpModuleResolver::is_remote_url(module_path) { + if http_import::is_github_url(module_path) || http_import::is_remote_url(module_path) { return self.http_resolver.canonical_name(module_path); } } @@ -114,7 +115,7 @@ impl DefaultModuleResolver { Some(paths) }), std_resolver: std_resolver::StdModuleResolver, - #[cfg(feature = "http-import")] + #[cfg(feature = "http-import-ureq")] http_resolver: http_resolver::HttpModuleResolver::default(), } } @@ -123,42 +124,33 @@ impl DefaultModuleResolver { /// /// An empty `allowed_domains` list restricts access to the built-in default domain /// (`raw.githubusercontent.com/harehare`) only; it does not open up all URLs. - /// Only available when the `http-import` feature is enabled. - #[cfg(feature = "http-import")] + /// Only available when the `http-import-ureq` feature is enabled. + #[cfg(feature = "http-import-ureq")] pub fn with_http(mut self, allowed_domains: Vec, timeout: Option) -> Self { self.http_resolver = http_resolver::HttpModuleResolver::new( allowed_domains, - timeout.unwrap_or(std::time::Duration::from_secs(10)), + http_resolver::UreqFetcher::new(timeout.unwrap_or(std::time::Duration::from_secs(10))), ); self } /// Replaces the HTTP resolver's domain allowlist. /// - /// An empty list restricts access to the built-in default domain - /// (`raw.githubusercontent.com/harehare`) only. - /// - /// Entries in the form `github.com/{user}/{repo}` are automatically expanded to - /// `raw.githubusercontent.com/{user}/{repo}`. - #[cfg(feature = "http-import")] + /// An empty list restricts access to the built-in default domain only. + /// Entries in the form `github.com/{user}/{repo}` are automatically expanded. + #[cfg(feature = "http-import-ureq")] pub fn set_allowed_domains(&mut self, domains: Vec) { - self.http_resolver.allowed_remote_domains = domains - .into_iter() - .map(|d| http_resolver::HttpModuleResolver::normalize_allowed_domain(&d)) - .collect(); + self.http_resolver.set_allowed_domains(domains); } - /// Clears all locally-cached HTTP module files. - /// - /// Call this once before processing to force a re-fetch of all cached modules - /// on the next resolve. - #[cfg(feature = "http-import")] + /// Clears all locally-cached HTTP module files (mutable/HEAD only). + #[cfg(feature = "http-import-ureq")] pub fn clear_http_cache(&self) -> Result<(), crate::module::error::ModuleError> { self.http_resolver.clear_cache() } - /// Clears all HTTP module cache including versioned modules and lock files. - #[cfg(feature = "http-import")] + /// Clears all HTTP module cache including versioned modules. + #[cfg(feature = "http-import-ureq")] pub fn clear_http_cache_all(&self) -> Result<(), crate::module::error::ModuleError> { self.http_resolver.clear_all_cache() } @@ -215,7 +207,6 @@ mod tests { write_module(&dir, "csv", "def foo(): 1;"); let resolver = DefaultModuleResolver::new(vec![dir.path().to_path_buf()]); - // standard module should win over local file with the same name let content = resolver.resolve("csv").unwrap(); assert!(!content.contains("def foo(): 1;")); } @@ -241,20 +232,17 @@ mod tests { assert_eq!(resolver.search_paths(), paths); } - #[cfg(feature = "http-import")] + #[cfg(feature = "http-import-ureq")] #[rstest] #[case("https://nonexistent.invalid/foo.mq")] fn test_http_url_not_in_local(#[case] url: &str) { - // Without an HTTP resolver configured, should fall through to error let resolver = DefaultModuleResolver::new(vec![]); - // Either network error or module-not-found; should not panic assert!(resolver.resolve(url).is_err()); } - #[cfg(feature = "http-import")] + #[cfg(feature = "http-import-ureq")] #[test] fn test_with_http_normalizes_github_domains() { - // with_http delegates to HttpModuleResolver::new which normalizes github.com/* entries let resolver = DefaultModuleResolver::new(vec![]).with_http(vec!["github.com/alice/myrepo".to_string()], None); assert!( resolver @@ -268,7 +256,7 @@ mod tests { ); } - #[cfg(feature = "http-import")] + #[cfg(feature = "http-import-ureq")] #[test] fn test_set_allowed_domains_normalizes_github_domains() { let mut resolver = DefaultModuleResolver::new(vec![]); diff --git a/crates/mq-lang/src/module/resolver/http_import.rs b/crates/mq-lang/src/module/resolver/http_import.rs new file mode 100644 index 000000000..1f9b293c7 --- /dev/null +++ b/crates/mq-lang/src/module/resolver/http_import.rs @@ -0,0 +1,240 @@ +//! Pure URL utility functions shared between the CLI HTTP resolver and the WASM resolver. +//! +//! These functions have no I/O dependencies and are not gated behind the `http-import` feature. + +/// Default domain that is always permitted without an explicit allowlist entry. +pub const DEFAULT_ALLOWED_DOMAIN: &str = "raw.githubusercontent.com/harehare"; + +/// Returns `true` if `url` has an `http://` or `https://` scheme. +pub fn is_remote_url(url: &str) -> bool { + url.starts_with("http://") || url.starts_with("https://") +} + +/// Returns `true` if `input` is a GitHub shorthand or full GitHub URL. +/// +/// Recognized forms (with or without `https://` prefix): +/// - `github.com/{owner}/{path}[@{version}]` +pub fn is_github_url(input: &str) -> bool { + let s = input + .strip_prefix("https://") + .or_else(|| input.strip_prefix("http://")) + .unwrap_or(input); + s.starts_with("github.com/") +} + +/// Converts a GitHub shorthand into a `raw.githubusercontent.com` fetch URL. +/// +/// # Format +/// `[https://]github.com/{owner}/{path}[@{version}]` +/// +/// Where `{path}` is one of: +/// - `{repo}` → fetches `{repo}.mq` from the repo root at HEAD +/// - `{repo.mq}` → fetches `{repo.mq}` from the repo named `{repo.mq}` at HEAD +/// - `{repo}/{subpath}` → fetches `{subpath}` from the given repo +pub fn github_to_raw_url(input: &str) -> Option { + let without_scheme = input + .strip_prefix("https://") + .or_else(|| input.strip_prefix("http://")) + .unwrap_or(input); + + let rest = without_scheme.strip_prefix("github.com/")?; + + let (path_part, version) = match rest.rfind('@') { + Some(pos) => (&rest[..pos], &rest[pos + 1..]), + None => (rest, "HEAD"), + }; + + let components: Vec<&str> = path_part.splitn(3, '/').collect(); + + let (owner, repo, file) = match components.as_slice() { + [owner, name] => { + let repo = *name; + let file = if name.ends_with(".mq") { + name.to_string() + } else { + format!("{}.mq", name) + }; + (owner.to_string(), repo.to_string(), file) + } + [owner, repo, subpath] => (owner.to_string(), repo.to_string(), subpath.to_string()), + _ => return None, + }; + + Some(format!( + "https://raw.githubusercontent.com/{}/{}/{}/{}", + owner, repo, version, file + )) +} + +/// Returns `true` if `url` is pinned to a specific immutable version tag. +/// +/// For `raw.githubusercontent.com` URLs the ref segment (the third path component after +/// `{owner}/{repo}/`) is checked: `HEAD`, `main`, and `master` are mutable; everything +/// else (e.g. `v0.1.0`) is treated as versioned/immutable. +/// +/// All non-GitHub HTTP URLs are considered mutable. +pub fn is_versioned_url(url: &str) -> bool { + const MUTABLE_REFS: &[&str] = &["HEAD", "main", "master"]; + let path = url + .strip_prefix("https://raw.githubusercontent.com/") + .or_else(|| url.strip_prefix("http://raw.githubusercontent.com/")); + match path { + Some(rest) => { + let ref_segment = rest.split('/').nth(2).unwrap_or("HEAD"); + !MUTABLE_REFS.contains(&ref_segment) + } + None => false, + } +} + +/// Extracts a short module name from an HTTP URL or GitHub shorthand. +/// +/// Strips the URL scheme, domain, and path prefix, then removes any `@version` +/// suffix and the `.mq` file extension from the last path segment. +pub fn extract_module_name(module_path: &str) -> &str { + let path = module_path + .strip_prefix("https://") + .or_else(|| module_path.strip_prefix("http://")) + .unwrap_or(module_path); + + let without_version = match path.rfind('@') { + Some(pos) => &path[..pos], + None => path, + }; + + let last_segment = without_version.rsplit('/').next().unwrap_or(without_version); + last_segment.strip_suffix(".mq").unwrap_or(last_segment) +} + +/// Normalizes a user-supplied allowed-domain entry. +/// +/// `github.com/{path}` (with or without `https://`/`http://` prefix) is expanded to +/// `raw.githubusercontent.com/{path}` so that users can write +/// `--allowed-domain github.com/alice/myrepo` instead of the full raw content URL. +/// The scheme prefix is always stripped before storing. +pub fn normalize_allowed_domain(domain: &str) -> String { + let without_scheme = domain + .strip_prefix("https://") + .or_else(|| domain.strip_prefix("http://")) + .unwrap_or(domain); + + if let Some(rest) = without_scheme.strip_prefix("github.com/") { + format!("raw.githubusercontent.com/{}", rest) + } else { + without_scheme.to_string() + } +} + +/// Returns `true` if `url`'s host/path matches `domain` as a strict prefix. +/// +/// The match requires that after the prefix the next character is `/`, `?`, `#`, `:`, or +/// end of string — preventing `example.com.evil.com` from matching `example.com`. +pub fn prefix_matches(url_without_scheme: &str, domain: &str) -> bool { + let rest = match url_without_scheme.strip_prefix(domain) { + Some(r) => r, + None => return false, + }; + rest.is_empty() || rest.starts_with('/') || rest.starts_with('?') || rest.starts_with('#') || rest.starts_with(':') +} + +/// Returns `true` if `url` is permitted given `allowed_domains`. +/// +/// `DEFAULT_ALLOWED_DOMAIN` is always allowed regardless of `allowed_domains`. +/// An empty `allowed_domains` slice restricts access to the default domain only. +pub fn is_allowed_url(url: &str, allowed_domains: &[String]) -> bool { + let url_without_scheme = url + .strip_prefix("https://") + .or_else(|| url.strip_prefix("http://")) + .unwrap_or(url); + + if prefix_matches(url_without_scheme, DEFAULT_ALLOWED_DOMAIN) { + return true; + } + + allowed_domains + .iter() + .any(|domain| prefix_matches(url_without_scheme, domain.as_str())) +} + +#[cfg(test)] +mod tests { + use rstest::rstest; + + use super::*; + + #[rstest] + #[case("https://example.com/foo.mq", true)] + #[case("http://example.com/foo.mq", true)] + #[case("ftp://example.com/foo.mq", false)] + #[case("example.com/foo.mq", false)] + #[case("csv", false)] + fn test_is_remote_url(#[case] url: &str, #[case] expected: bool) { + assert_eq!(is_remote_url(url), expected); + } + + #[rstest] + #[case("github.com/owner/repo", true)] + #[case("https://github.com/owner/repo", true)] + #[case("http://github.com/owner/repo", true)] + #[case("https://example.com/foo.mq", false)] + #[case("csv", false)] + fn test_is_github_url(#[case] input: &str, #[case] expected: bool) { + assert_eq!(is_github_url(input), expected); + } + + #[rstest] + #[case( + "github.com/harehare/lisp", + "https://raw.githubusercontent.com/harehare/lisp/HEAD/lisp.mq" + )] + #[case( + "github.com/harehare/lisp@v0.1.0", + "https://raw.githubusercontent.com/harehare/lisp/v0.1.0/lisp.mq" + )] + #[case( + "github.com/harehare/repo/lib/utils.mq@v2.0", + "https://raw.githubusercontent.com/harehare/repo/v2.0/lib/utils.mq" + )] + fn test_github_to_raw_url(#[case] input: &str, #[case] expected: &str) { + assert_eq!(github_to_raw_url(input).unwrap(), expected); + } + + #[rstest] + #[case("github.com/alice/mymod", "mymod")] + #[case("github.com/alice/mymod.mq@v1.0", "mymod")] + #[case("https://example.com/path/foo.mq", "foo")] + fn test_extract_module_name(#[case] input: &str, #[case] expected: &str) { + assert_eq!(extract_module_name(input), expected); + } + + #[rstest] + #[case("github.com/alice/myrepo", "raw.githubusercontent.com/alice/myrepo")] + #[case("https://github.com/alice/myrepo", "raw.githubusercontent.com/alice/myrepo")] + #[case("example.com", "example.com")] + fn test_normalize_allowed_domain(#[case] input: &str, #[case] expected: &str) { + assert_eq!(normalize_allowed_domain(input), expected); + } + + #[rstest] + // default domain always allowed + #[case(vec![], "https://raw.githubusercontent.com/harehare/lisp/HEAD/lisp.mq", true)] + // non-default domain blocked by empty list + #[case(vec![], "https://example.com/foo.mq", false)] + // user-specified domain allowed + #[case(vec!["example.com".to_string()], "https://example.com/foo.mq", true)] + #[case(vec!["example.com".to_string()], "https://other.com/foo.mq", false)] + // prefix-bypass prevention + #[case(vec!["example.com".to_string()], "https://example.com.evil.com/foo.mq", false)] + fn test_is_allowed_url(#[case] allowed: Vec, #[case] url: &str, #[case] expected: bool) { + assert_eq!(is_allowed_url(url, &allowed), expected); + } + + #[rstest] + #[case("https://raw.githubusercontent.com/alice/mymod/v0.1.0/mymod.mq", true)] + #[case("https://raw.githubusercontent.com/alice/mymod/HEAD/mymod.mq", false)] + #[case("https://raw.githubusercontent.com/alice/mymod/main/mymod.mq", false)] + #[case("https://example.com/foo.mq", false)] + fn test_is_versioned_url(#[case] url: &str, #[case] expected: bool) { + assert_eq!(is_versioned_url(url), expected); + } +} diff --git a/crates/mq-lang/src/module/resolver/http_resolver.rs b/crates/mq-lang/src/module/resolver/http_resolver.rs index ebea2fc87..3aeffb480 100644 --- a/crates/mq-lang/src/module/resolver/http_resolver.rs +++ b/crates/mq-lang/src/module/resolver/http_resolver.rs @@ -1,62 +1,57 @@ -use std::{borrow::Cow, fs, path::PathBuf, time::Duration}; - -use sha2::Digest; +use std::{borrow::Cow, path::PathBuf}; +use super::http_import::{ + extract_module_name, github_to_raw_url, is_allowed_url, is_github_url, is_remote_url, normalize_allowed_domain, +}; use crate::{ModuleError, ModuleResolver}; -/// Default domain that is always permitted without `--allowed-domain`. -const DEFAULT_ALLOWED_DOMAIN: &str = "raw.githubusercontent.com/harehare"; - -/// Maximum response body size for a fetched module (1 MiB). -const MAX_MODULE_SIZE: u64 = 1024 * 1024; - -/// Resolves mq modules from HTTP/HTTPS URLs with optional domain allowlisting and local disk caching. -/// -/// # Caching +/// Pluggable HTTP fetch-and-cache backend for [`HttpModuleResolver`]. /// -/// Fetched modules are stored under `{system_cache_dir}/mq/` in one of two subdirectories: -/// -/// - `versioned/` — URLs resolved to a specific tag (e.g. `@v0.1.0`); never cleared by -/// [`HttpModuleResolver::clear_cache`]. -/// - `mutable/` — URLs resolved to `HEAD`, `main`, `master`, or any non-GitHub HTTP URL; -/// cleared by [`HttpModuleResolver::clear_cache`] (i.e. `--refresh-modules`). +/// Implementations are responsible for both fetching remote content and managing +/// any associated caching (disk, memory, etc.). The URL passed to [`fetch`] has +/// already been normalized and domain-checked by [`HttpModuleResolver`]. +pub trait HttpFetcher: Clone + Default { + /// Fetch the content at `url`, using any internal cache. + /// + /// `url` is always a fully-qualified `https://` URL that has already passed + /// domain allow-list validation. + fn fetch(&self, url: &str) -> Result; +} + +/// Resolves mq modules from HTTP/HTTPS URLs with optional domain allowlisting. /// -/// Each cached module is accompanied by a `.mq.sha256` sidecar file for tamper detection. -/// Files are named `{md5(url)}.mq` and `{md5(url)}.mq.sha256` within their subdirectory. -/// If the process crashes between writing the two files, the sidecar will be absent and -/// the next `resolve()` call will automatically re-fetch rather than serve partial data. +/// The actual HTTP request and caching strategy are delegated to the [`HttpFetcher`] +/// type parameter `F`, making it possible to swap in different backends (e.g. a +/// `ureq`-based native fetcher or a pre-populated in-memory cache for WASM). /// /// # GitHub shorthand /// -/// In addition to plain http(s) URLs, the resolver accepts a shorthand form that omits -/// the `https://` scheme and maps GitHub paths to `raw.githubusercontent.com`: +/// In addition to plain `http(s)://` URLs, the resolver accepts shorthand GitHub paths: /// /// ```text /// github.com/{owner}/{path}[@{version}] /// ``` /// -/// See [`HttpModuleResolver::github_to_raw_url`] for details. +/// See [`super::http_import::github_to_raw_url`] for details. #[derive(Debug, Clone)] -pub struct HttpModuleResolver { +pub struct HttpModuleResolver { pub(crate) allowed_remote_domains: Vec, - pub(crate) timeout: Duration, - cache_dir: PathBuf, + fetcher: F, } -impl Default for HttpModuleResolver { +impl Default for HttpModuleResolver { fn default() -> Self { Self { allowed_remote_domains: Vec::new(), - timeout: Duration::from_secs(10), - cache_dir: dirs::cache_dir().unwrap_or_default().join("mq"), + fetcher: F::default(), } } } -impl ModuleResolver for HttpModuleResolver { +impl ModuleResolver for HttpModuleResolver { fn canonical_name<'a>(&self, module_path: &'a str) -> &'a str { - if Self::is_github_url(module_path) || Self::is_remote_url(module_path) { - Self::extract_module_name(module_path) + if is_github_url(module_path) || is_remote_url(module_path) { + extract_module_name(module_path) } else { module_path } @@ -64,44 +59,7 @@ impl ModuleResolver for HttpModuleResolver { fn resolve(&self, module_name: &str) -> Result { let url = self.to_fetch_url(module_name)?; - let cache_subdir = self.cache_subdir(&url); - let cache_file = cache_subdir.join(self.cache_file_name(&url)); - let hash_file = cache_subdir.join(self.cache_hash_file_name(&url)); - - // Fast path: serve from cache without acquiring any lock. - if let Some(content) = self.try_read_cache(&cache_file, &hash_file)? { - return Ok(content); - } - - // Slow path: acquire an exclusive file lock so that only one thread/process - // fetches and writes the cache at a time. Others will wait, then hit the fast - // path on the re-check below. - fs::create_dir_all(&cache_subdir).map_err(|e| ModuleError::IOError(e.to_string().into()))?; - let lock_path = cache_subdir.join(self.cache_lock_file_name(&url)); - let lock_file = fs::OpenOptions::new() - .write(true) - .create(true) - .truncate(false) - .open(&lock_path) - .map_err(|e| ModuleError::IOError(e.to_string().into()))?; - lock_file - .lock() - .map_err(|e| ModuleError::IOError(e.to_string().into()))?; - - // Re-check under lock: another engine may have populated the cache while we waited. - if let Some(content) = self.try_read_cache(&cache_file, &hash_file)? { - return Ok(content); - } - - let content = self.fetch_url(&url)?; - fs::write(&cache_file, content.as_bytes()).map_err(|e| ModuleError::IOError(e.to_string().into()))?; - fs::write(&hash_file, Self::compute_content_hash(&content).as_bytes()) - .map_err(|e| ModuleError::IOError(e.to_string().into()))?; - - // Releasing the lock (drop) after both files are written keeps the invariant that - // any reader that obtains the lock will see both files present. - drop(lock_file); - Ok(content) + self.fetcher.fetch(&url) } fn get_path(&self, module_name: &str) -> Result { @@ -115,196 +73,189 @@ impl ModuleResolver for HttpModuleResolver { fn set_search_paths(&mut self, _paths: Vec) {} } -impl HttpModuleResolver { - /// Creates a new resolver with the given domain allowlist and request timeout. +impl HttpModuleResolver { + /// Creates a new resolver with the given domain allowlist and fetcher. /// - /// An empty `allowed_remote_domains` list restricts access to the built-in default domain - /// (`raw.githubusercontent.com/harehare`) only. Additional domains must be listed explicitly. - /// - /// Entries in the form `github.com/{user}/{repo}` (with or without `https://` prefix) are - /// automatically expanded to `raw.githubusercontent.com/{user}/{repo}`, so callers can - /// use the familiar GitHub URL style instead of the raw content URL. - pub fn new(allowed_remote_domains: Vec, timeout: Duration) -> Self { - let cache_dir = dirs::cache_dir().unwrap_or_default().join("mq"); + /// Entries in the form `github.com/{user}/{repo}` are automatically expanded to + /// `raw.githubusercontent.com/{user}/{repo}`. + pub fn new(allowed_remote_domains: Vec, fetcher: F) -> Self { Self { allowed_remote_domains: allowed_remote_domains .into_iter() - .map(|d| Self::normalize_allowed_domain(&d)) + .map(|d| normalize_allowed_domain(&d)) .collect(), - timeout, - cache_dir, + fetcher, } } - /// Normalizes a user-supplied allowed-domain entry. - /// - /// `github.com/{path}` (with or without `https://`/`http://` prefix) is expanded to - /// `raw.githubusercontent.com/{path}` so that users can write - /// `--allowed-domain github.com/alice/myrepo` instead of the full raw content URL. - /// The scheme prefix is always stripped before storing. - pub fn normalize_allowed_domain(domain: &str) -> String { - let without_scheme = domain - .strip_prefix("https://") - .or_else(|| domain.strip_prefix("http://")) - .unwrap_or(domain); - - if let Some(rest) = without_scheme.strip_prefix("github.com/") { - format!("raw.githubusercontent.com/{}", rest) - } else { - without_scheme.to_string() - } - } - - /// Returns `true` if `url` has an `http://` or `https://` scheme. - pub fn is_remote_url(url: &str) -> bool { - url.starts_with("http://") || url.starts_with("https://") + /// Returns `true` if `url`'s host/path is permitted by the allowlist. + pub fn is_allowed_domain(&self, url: &str) -> bool { + is_allowed_url(url, &self.allowed_remote_domains) } - /// Returns `true` if `input` is a GitHub shorthand or full GitHub URL. + /// Replaces the domain allowlist. /// - /// Recognized forms (with or without `https://` prefix): - /// - `github.com/{owner}/{path}[@{version}]` - pub fn is_github_url(input: &str) -> bool { - let s = input - .strip_prefix("https://") - .or_else(|| input.strip_prefix("http://")) - .unwrap_or(input); - s.starts_with("github.com/") + /// Entries in the form `github.com/{user}/{repo}` are automatically normalized. + pub fn set_allowed_domains(&mut self, domains: Vec) { + self.allowed_remote_domains = domains.into_iter().map(|d| normalize_allowed_domain(&d)).collect(); } - /// Converts a GitHub shorthand into a `raw.githubusercontent.com` fetch URL. - /// - /// # Format - /// `[https://]github.com/{owner}/{path}[@{version}]` - /// - /// Where `{path}` is one of: - /// - `{repo}` → fetches `{repo}.mq` from the repo root at HEAD - /// - `{repo.mq}` → fetches `{repo.mq}` from the repo named `{repo.mq}` at HEAD - /// - `{repo}/{subpath}` → fetches `{subpath}` from the given repo - /// - /// `{version}` (e.g. `v0.1.0`) selects a specific git tag; omitting it uses `HEAD`. - /// - /// # Examples - /// | Input | Resolved URL | - /// |---|---| - /// | `github.com/alice/mymod` | `…/alice/mymod/HEAD/mymod.mq` | - /// | `github.com/alice/mymod.mq@v1.0` | `…/alice/mymod.mq/v1.0/mymod.mq` | - /// | `github.com/alice/repo/lib/util.mq@v2.0` | `…/alice/repo/v2.0/lib/util.mq` | - pub fn github_to_raw_url(input: &str) -> Option { - let without_scheme = input - .strip_prefix("https://") - .or_else(|| input.strip_prefix("http://")) - .unwrap_or(input); - - let rest = without_scheme.strip_prefix("github.com/")?; - - let (path_part, version) = match rest.rfind('@') { - Some(pos) => (&rest[..pos], &rest[pos + 1..]), - None => (rest, "HEAD"), - }; + fn to_fetch_url(&self, module_name: &str) -> Result { + if is_github_url(module_name) { + let url = github_to_raw_url(module_name) + .ok_or_else(|| ModuleError::IOError(format!("Invalid GitHub URL: {}", module_name).into()))?; + if !self.is_allowed_domain(&url) { + return Err(ModuleError::IOError(format!("Domain not allowed: {}", url).into())); + } + return Ok(url); + } - let components: Vec<&str> = path_part.splitn(3, '/').collect(); - - let (owner, repo, file) = match components.as_slice() { - [owner, name] => { - let repo = *name; - let file = if name.ends_with(".mq") { - name.to_string() - } else { - format!("{}.mq", name) - }; - (owner.to_string(), repo.to_string(), file) + if is_remote_url(module_name) { + if !self.is_allowed_domain(module_name) { + return Err(ModuleError::IOError( + format!("Domain not allowed: {}", module_name).into(), + )); } - [owner, repo, subpath] => (owner.to_string(), repo.to_string(), subpath.to_string()), - _ => return None, - }; + return Ok(module_name.to_string()); + } - Some(format!( - "https://raw.githubusercontent.com/{}/{}/{}/{}", - owner, repo, version, file - )) + Err(ModuleError::NotFound(Cow::Owned(module_name.to_string()))) } +} - /// Returns `true` if `url`'s host/path is permitted. - /// - /// `DEFAULT_ALLOWED_DOMAIN` (`raw.githubusercontent.com/harehare`) is always permitted. - /// Additional domains are granted via the `--allowed-domain` flag; an empty user list - /// does **not** open up all domains — only the default is allowed. - /// - /// The match requires that after the domain/path prefix the next character is `/`, `?`, - /// `#`, `:` (port), or the string ends — preventing `example.com.evil.com` from - /// bypassing an `example.com` allowlist entry. - pub fn is_allowed_domain(&self, url: &str) -> bool { - let url_without_scheme = url - .strip_prefix("https://") - .or_else(|| url.strip_prefix("http://")) - .unwrap_or(url); +/// Fetcher backed by `ureq` with local filesystem caching. +/// +/// Fetched modules are stored under `{system_cache_dir}/mq/` in one of two subdirectories: +/// +/// - `versioned/` — URLs resolved to a specific tag (e.g. `@v0.1.0`); never cleared by +/// [`UreqFetcher::clear_cache`]. +/// - `mutable/` — URLs resolved to `HEAD`, `main`, `master`, or any non-GitHub HTTP URL; +/// cleared by [`UreqFetcher::clear_cache`]. +/// +/// Each cached module is accompanied by a `.mq.sha256` sidecar for tamper detection. +#[cfg(feature = "http-import-ureq")] +#[derive(Debug, Clone)] +pub struct UreqFetcher { + timeout: std::time::Duration, + cache_dir: std::path::PathBuf, +} - if Self::prefix_matches(url_without_scheme, DEFAULT_ALLOWED_DOMAIN) { - return true; +#[cfg(feature = "http-import-ureq")] +impl Default for UreqFetcher { + fn default() -> Self { + Self { + timeout: std::time::Duration::from_secs(10), + cache_dir: dirs::cache_dir().unwrap_or_default().join("mq"), } - - self.allowed_remote_domains - .iter() - .any(|domain| Self::prefix_matches(url_without_scheme, domain.as_str())) } +} - fn prefix_matches(url_without_scheme: &str, domain: &str) -> bool { - let rest = match url_without_scheme.strip_prefix(domain) { - Some(r) => r, - None => return false, - }; - rest.is_empty() - || rest.starts_with('/') - || rest.starts_with('?') - || rest.starts_with('#') - || rest.starts_with(':') +#[cfg(feature = "http-import-ureq")] +impl UreqFetcher { + /// Creates a new fetcher with the given request timeout. + pub fn new(timeout: std::time::Duration) -> Self { + Self { + timeout, + cache_dir: dirs::cache_dir().unwrap_or_default().join("mq"), + } } /// Removes only mutable-ref cached modules (HEAD/branch/non-versioned URLs). - /// - /// Versioned (tagged) modules in `{cache_dir}/versioned/` are preserved. - /// Call this when `--refresh-modules` is passed to force a re-fetch of HEAD/branch imports. pub fn clear_cache(&self) -> Result<(), ModuleError> { let mutable_dir = self.cache_dir.join("mutable"); if mutable_dir.exists() { - fs::remove_dir_all(&mutable_dir).map_err(|e| ModuleError::IOError(e.to_string().into()))?; + std::fs::remove_dir_all(&mutable_dir).map_err(|e| ModuleError::IOError(e.to_string().into()))?; } Ok(()) } - /// Removes all cached modules including versioned (tagged) ones, lock files, and hash sidecars. - /// - /// Unlike [`clear_cache`], this also clears `{cache_dir}/versioned/`, so the next resolve - /// re-fetches every module regardless of whether it was pinned to a tag. - /// Use `--clear-cache` on the CLI to trigger this. + /// Removes all cached modules including versioned (tagged) ones. pub fn clear_all_cache(&self) -> Result<(), ModuleError> { for subdir in &["mutable", "versioned"] { let dir = self.cache_dir.join(subdir); if dir.exists() { - fs::remove_dir_all(&dir).map_err(|e| ModuleError::IOError(e.to_string().into()))?; + std::fs::remove_dir_all(&dir).map_err(|e| ModuleError::IOError(e.to_string().into()))?; } } Ok(()) } - /// Fetches module source from the given URL without consulting the cache. - /// - /// Only HTTPS URLs are accepted; plain HTTP is rejected. Redirects are not followed. - /// The response body is capped at [`MAX_MODULE_SIZE`]. - /// Returns an error if the response Content-Type is `text/html` (e.g. a 404 error page - /// served with status 200), giving a clearer message than a parse error would. - pub fn fetch_url(&self, url: &str) -> Result { - if !Self::is_remote_url(url) { - return Err(ModuleError::NotFound(Cow::Owned(url.to_string()))); + fn cache_subdir(&self, url: &str) -> std::path::PathBuf { + use super::http_import::is_versioned_url; + if is_versioned_url(url) { + self.cache_dir.join("versioned") + } else { + self.cache_dir.join("mutable") } + } + + fn cache_stem(url: &str) -> String { + format!("{:x}", md5::compute(url)) + } + + fn try_read_cache( + &self, + cache_file: &std::path::Path, + hash_file: &std::path::Path, + ) -> Result, ModuleError> { + if !cache_file.exists() || !hash_file.exists() { + return Ok(None); + } + let content = std::fs::read_to_string(cache_file).map_err(|e| ModuleError::IOError(e.to_string().into()))?; + let stored = std::fs::read_to_string(hash_file).map_err(|e| ModuleError::IOError(e.to_string().into()))?; + if stored.trim() == Self::compute_hash(&content) { + Ok(Some(content)) + } else { + Ok(None) + } + } + + pub(crate) fn compute_hash(content: &str) -> String { + use sha2::Digest; + sha2::Sha256::digest(content.as_bytes()) + .as_slice() + .iter() + .map(|b| format!("{:02x}", b)) + .collect() + } +} + +/// Maximum response body size for a fetched module (1 MiB). +#[cfg(feature = "http-import-ureq")] +const MAX_MODULE_SIZE: u64 = 1024 * 1024; + +#[cfg(feature = "http-import-ureq")] +impl HttpFetcher for UreqFetcher { + fn fetch(&self, url: &str) -> Result { if !url.starts_with("https://") { return Err(ModuleError::IOError( format!("Only HTTPS URLs are allowed: {}", url).into(), )); } - if !self.is_allowed_domain(url) { - return Err(ModuleError::IOError(format!("Domain not allowed: {}", url).into())); + + let cache_subdir = self.cache_subdir(url); + let stem = Self::cache_stem(url); + let cache_file = cache_subdir.join(format!("{}.mq", stem)); + let hash_file = cache_subdir.join(format!("{}.mq.sha256", stem)); + + if let Some(content) = self.try_read_cache(&cache_file, &hash_file)? { + return Ok(content); + } + + std::fs::create_dir_all(&cache_subdir).map_err(|e| ModuleError::IOError(e.to_string().into()))?; + let lock_path = cache_subdir.join(format!("{}.mq.lock", stem)); + let lock_file = std::fs::OpenOptions::new() + .write(true) + .create(true) + .truncate(false) + .open(&lock_path) + .map_err(|e| ModuleError::IOError(e.to_string().into()))?; + lock_file + .lock() + .map_err(|e| ModuleError::IOError(e.to_string().into()))?; + + if let Some(content) = self.try_read_cache(&cache_file, &hash_file)? { + return Ok(content); } let agent: ureq::Agent = ureq::Agent::config_builder() @@ -341,178 +292,52 @@ impl HttpModuleResolver { )); } - response + let content = response .body_mut() .with_config() .limit(MAX_MODULE_SIZE) .read_to_string() - .map_err(|e| ModuleError::IOError(e.to_string().into())) - } - - /// Extracts a short module name from an HTTP URL or GitHub shorthand. - /// - /// Strips the URL scheme, domain, and path prefix, then removes any `@version` - /// suffix and the `.mq` file extension from the last path segment. - /// - /// # Examples - /// | Input | Result | - /// |---|---| - /// | `github.com/alice/mymod` | `"mymod"` | - /// | `github.com/alice/mymod.mq` | `"mymod"` | - /// | `github.com/alice/mymod.mq@v1.0` | `"mymod"` | - /// | `https://example.com/path/foo.mq` | `"foo"` | - pub fn extract_module_name(module_path: &str) -> &str { - let path = module_path - .strip_prefix("https://") - .or_else(|| module_path.strip_prefix("http://")) - .unwrap_or(module_path); - - let without_version = match path.rfind('@') { - Some(pos) => &path[..pos], - None => path, - }; - - let last_segment = without_version.rsplit('/').next().unwrap_or(without_version); - - last_segment.strip_suffix(".mq").unwrap_or(last_segment) - } - - /// Returns `true` if `url` is pinned to a specific immutable version tag. - /// - /// For `raw.githubusercontent.com` URLs the ref segment (the third path component after - /// `{owner}/{repo}/`) is checked: `HEAD`, `main`, and `master` are mutable; everything - /// else (e.g. `v0.1.0`) is treated as versioned/immutable. - /// - /// All non-GitHub HTTP URLs are considered mutable. - pub fn is_versioned_url(url: &str) -> bool { - const MUTABLE_REFS: &[&str] = &["HEAD", "main", "master"]; - let path = url - .strip_prefix("https://raw.githubusercontent.com/") - .or_else(|| url.strip_prefix("http://raw.githubusercontent.com/")); - match path { - Some(rest) => { - // layout: {owner}/{repo}/{ref}/{file} - let ref_segment = rest.split('/').nth(2).unwrap_or("HEAD"); - !MUTABLE_REFS.contains(&ref_segment) - } - None => false, - } - } - - /// Returns the cache subdirectory for `url`: - /// `{cache_dir}/versioned/` for pinned tags, `{cache_dir}/mutable/` otherwise. - fn cache_subdir(&self, url: &str) -> PathBuf { - if Self::is_versioned_url(url) { - self.cache_dir.join("versioned") - } else { - self.cache_dir.join("mutable") - } - } - - fn cache_file_name(&self, url: &str) -> String { - let hash = md5::compute(url); - format!("{:x}.mq", hash) - } - - fn cache_hash_file_name(&self, url: &str) -> String { - let hash = md5::compute(url); - format!("{:x}.mq.sha256", hash) - } + .map_err(|e| ModuleError::IOError(e.to_string().into()))?; - fn cache_lock_file_name(&self, url: &str) -> String { - let hash = md5::compute(url); - format!("{:x}.mq.lock", hash) - } + std::fs::write(&cache_file, content.as_bytes()).map_err(|e| ModuleError::IOError(e.to_string().into()))?; + std::fs::write(&hash_file, Self::compute_hash(&content).as_bytes()) + .map_err(|e| ModuleError::IOError(e.to_string().into()))?; - /// Tries to read a cached module without holding any lock. - /// - /// Returns `Ok(Some(content))` on a valid cache hit, `Ok(None)` when the cache - /// is missing or the hash doesn't match, and `Err` only on unexpected I/O errors. - fn try_read_cache( - &self, - cache_file: &std::path::Path, - hash_file: &std::path::Path, - ) -> Result, ModuleError> { - if !cache_file.exists() || !hash_file.exists() { - return Ok(None); - } - let content = fs::read_to_string(cache_file).map_err(|e| ModuleError::IOError(e.to_string().into()))?; - let stored = fs::read_to_string(hash_file).map_err(|e| ModuleError::IOError(e.to_string().into()))?; - if stored.trim() == Self::compute_content_hash(&content) { - Ok(Some(content)) - } else { - Ok(None) - } + drop(lock_file); + Ok(content) } +} - /// Computes the SHA-256 hash of `content` and returns it as a lowercase hex string. - pub(crate) fn compute_content_hash(content: &str) -> String { - sha2::Sha256::digest(content.as_bytes()) - .as_slice() - .iter() - .map(|b| format!("{:02x}", b)) - .collect() +/// Cache management methods specific to the `ureq`-backed resolver. +#[cfg(feature = "http-import-ureq")] +impl HttpModuleResolver { + /// Removes only mutable-ref cached modules. + pub fn clear_cache(&self) -> Result<(), ModuleError> { + self.fetcher.clear_cache() } - fn to_fetch_url(&self, module_name: &str) -> Result { - if Self::is_github_url(module_name) && !Self::is_remote_url(module_name) { - let url = Self::github_to_raw_url(module_name) - .ok_or_else(|| ModuleError::IOError(format!("Invalid GitHub URL: {}", module_name).into()))?; - if !self.is_allowed_domain(&url) { - return Err(ModuleError::IOError(format!("Domain not allowed: {}", url).into())); - } - return Ok(url); - } - - if Self::is_github_url(module_name) - && let Some(raw_url) = Self::github_to_raw_url(module_name) - { - if !self.is_allowed_domain(&raw_url) { - return Err(ModuleError::IOError(format!("Domain not allowed: {}", raw_url).into())); - } - return Ok(raw_url); - } - - if Self::is_remote_url(module_name) { - if !self.is_allowed_domain(module_name) { - return Err(ModuleError::IOError( - format!("Domain not allowed: {}", module_name).into(), - )); - } - return Ok(module_name.to_string()); - } - - Err(ModuleError::NotFound(Cow::Owned(module_name.to_string()))) + /// Removes all cached modules including versioned ones. + pub fn clear_all_cache(&self) -> Result<(), ModuleError> { + self.fetcher.clear_all_cache() } } #[cfg(test)] mod tests { - use rstest::rstest; - use tempfile::TempDir; - use super::*; - fn resolver_with_domains(domains: Vec) -> HttpModuleResolver { - HttpModuleResolver { - allowed_remote_domains: domains, - timeout: Duration::from_secs(10), - cache_dir: PathBuf::from("/tmp/mq-test-cache"), - } - } - - #[rstest] - #[case("github.com/alice/mymod", "mymod")] - #[case("github.com/alice/mymod.mq", "mymod")] - #[case("github.com/alice/mymod.mq@v1.0", "mymod")] - #[case("github.com/alice/mymod@v1.0", "mymod")] - #[case("github.com/alice/repo/lib/utils.mq", "utils")] - #[case("https://example.com/path/foo.mq", "foo")] - #[case("https://example.com/foo.mq", "foo")] - #[case("http://example.com/bar.mq", "bar")] - #[case("https://example.com/noext", "noext")] - fn test_extract_module_name(#[case] input: &str, #[case] expected: &str) { - assert_eq!(HttpModuleResolver::extract_module_name(input), expected); + #[cfg(feature = "http-import-ureq")] + use {rstest::rstest, std::time::Duration, tempfile::TempDir}; + + #[cfg(feature = "http-import-ureq")] + fn resolver_with_domains(domains: Vec) -> HttpModuleResolver { + HttpModuleResolver::new( + domains, + UreqFetcher { + cache_dir: std::path::PathBuf::from("/tmp/mq-test-cache"), + ..UreqFetcher::default() + }, + ) } #[rstest] @@ -520,167 +345,34 @@ mod tests { #[case("github.com/alice/mymod.mq@v1.0", "mymod")] #[case("https://example.com/foo.mq", "foo")] #[case("local_module", "local_module")] + #[cfg(feature = "http-import-ureq")] fn test_canonical_name(#[case] input: &str, #[case] expected: &str) { - let resolver = HttpModuleResolver::default(); + let resolver = HttpModuleResolver::::default(); assert_eq!(resolver.canonical_name(input), expected); } #[rstest] - #[case("github.com/alice/myrepo", "raw.githubusercontent.com/alice/myrepo")] - #[case("github.com/alice", "raw.githubusercontent.com/alice")] - #[case("https://github.com/alice/myrepo", "raw.githubusercontent.com/alice/myrepo")] - #[case("http://github.com/alice/myrepo", "raw.githubusercontent.com/alice/myrepo")] - #[case("example.com", "example.com")] - #[case("https://example.com", "example.com")] - #[case("raw.githubusercontent.com/alice/repo", "raw.githubusercontent.com/alice/repo")] - fn test_normalize_allowed_domain(#[case] input: &str, #[case] expected: &str) { - assert_eq!(HttpModuleResolver::normalize_allowed_domain(input), expected); - } - - #[rstest] - // github.com/user/repo shorthand is expanded to raw.githubusercontent.com at construction #[case(vec!["github.com/alice/myrepo".to_string()], "https://raw.githubusercontent.com/alice/myrepo/HEAD/mod.mq", true)] #[case(vec!["github.com/alice/myrepo".to_string()], "https://raw.githubusercontent.com/alice/other/HEAD/mod.mq", false)] - // plain domain still works #[case(vec!["example.com".to_string()], "https://example.com/foo.mq", true)] + #[cfg(feature = "http-import-ureq")] fn test_new_normalizes_github_domains(#[case] domains: Vec, #[case] url: &str, #[case] expected: bool) { - let resolver = HttpModuleResolver::new(domains, Duration::from_secs(10)); + let resolver = HttpModuleResolver::new(domains, UreqFetcher::new(Duration::from_secs(10))); assert_eq!(resolver.is_allowed_domain(url), expected); } - // github.com/user/repo shorthand accepted via --allowed-domain #[test] + #[cfg(feature = "http-import-ureq")] fn test_to_fetch_url_allowed_via_github_shorthand_domain() { - let resolver = HttpModuleResolver::new(vec!["github.com/alice/lisp".to_string()], Duration::from_secs(10)); + let resolver = HttpModuleResolver::new( + vec!["github.com/alice/lisp".to_string()], + UreqFetcher::new(Duration::from_secs(10)), + ); assert!(resolver.to_fetch_url("github.com/alice/lisp").is_ok()); - // Other repos under alice remain blocked assert!(resolver.to_fetch_url("github.com/alice/other").is_err()); } - #[rstest] - #[case("https://example.com/foo.mq", true)] - #[case("http://example.com/foo.mq", true)] - #[case("ftp://example.com/foo.mq", false)] - #[case("example.com/foo.mq", false)] - #[case("csv", false)] - #[case("", false)] - fn test_is_remote_url(#[case] url: &str, #[case] expected: bool) { - assert_eq!(HttpModuleResolver::is_remote_url(url), expected); - } - - #[rstest] - #[case("github.com/owner/repo", true)] - #[case("github.com/owner/repo.mq@v1.0", true)] - #[case("https://github.com/owner/repo", true)] - #[case("http://github.com/owner/repo", true)] - #[case("https://example.com/foo.mq", false)] - #[case("example.com/foo.mq", false)] - #[case("csv", false)] - fn test_is_github_url(#[case] input: &str, #[case] expected: bool) { - assert_eq!(HttpModuleResolver::is_github_url(input), expected); - } - #[rstest] - #[case( - "github.com/harehare/lisp", - "https://raw.githubusercontent.com/harehare/lisp/HEAD/lisp.mq" - )] - #[case( - "github.com/harehare/lisp.mq", - "https://raw.githubusercontent.com/harehare/lisp.mq/HEAD/lisp.mq" - )] - #[case( - "github.com/harehare/lisp.mq@v0.1.0", - "https://raw.githubusercontent.com/harehare/lisp.mq/v0.1.0/lisp.mq" - )] - #[case( - "github.com/harehare/lisp@v0.1.0", - "https://raw.githubusercontent.com/harehare/lisp/v0.1.0/lisp.mq" - )] - #[case( - "github.com/harehare/repo/lib/utils.mq@v2.0", - "https://raw.githubusercontent.com/harehare/repo/v2.0/lib/utils.mq" - )] - #[case( - "https://github.com/harehare/lisp.mq@v0.1.0", - "https://raw.githubusercontent.com/harehare/lisp.mq/v0.1.0/lisp.mq" - )] - // repo name contains a dot (e.g. json5.mq) — the full name is used as-is - #[case( - "github.com/harehare/json5.mq", - "https://raw.githubusercontent.com/harehare/json5.mq/HEAD/json5.mq" - )] - #[case( - "github.com/harehare/json5.mq@v0.1.0", - "https://raw.githubusercontent.com/harehare/json5.mq/v0.1.0/json5.mq" - )] - fn test_github_to_raw_url(#[case] input: &str, #[case] expected: &str) { - assert_eq!(HttpModuleResolver::github_to_raw_url(input).unwrap(), expected); - } - - #[rstest] - #[case("example.com/foo")] - #[case("notgithub.com/owner/repo")] - // only owner, no repo component - #[case("github.com/owner")] - fn test_github_to_raw_url_returns_none_for_non_github(#[case] input: &str) { - assert!(HttpModuleResolver::github_to_raw_url(input).is_none()); - } - - #[rstest] - // explicit mutable-ref version tags expand correctly - #[case( - "github.com/harehare/lisp@HEAD", - "https://raw.githubusercontent.com/harehare/lisp/HEAD/lisp.mq" - )] - #[case( - "github.com/harehare/lisp@main", - "https://raw.githubusercontent.com/harehare/lisp/main/lisp.mq" - )] - #[case( - "github.com/harehare/lisp@master", - "https://raw.githubusercontent.com/harehare/lisp/master/lisp.mq" - )] - fn test_github_to_raw_url_explicit_mutable_refs(#[case] input: &str, #[case] expected: &str) { - assert_eq!(HttpModuleResolver::github_to_raw_url(input).unwrap(), expected); - } - - #[rstest] - // empty list: default domain always allowed - #[case(vec![], "https://raw.githubusercontent.com/harehare/lisp/HEAD/lisp.mq", true)] - #[case(vec![], "https://raw.githubusercontent.com/harehare/repo/v0.1.0/mod.mq", true)] - // empty list: non-default domains denied - #[case(vec![], "https://example.com/foo.mq", false)] - #[case(vec![], "http://anything.org/bar.mq", false)] - // user-specified domain allowed - #[case(vec!["example.com".to_string()], "https://example.com/foo.mq", true)] - #[case(vec!["example.com".to_string()], "https://example.com", true)] - #[case(vec!["example.com".to_string()], "https://example.com:8080/foo.mq", true)] - #[case(vec!["example.com/repo".to_string()], "https://example.com/repo/foo.mq", true)] - #[case(vec!["example.com".to_string()], "https://other.com/foo.mq", false)] - #[case(vec!["example.com".to_string()], "https://notexample.com/foo.mq", false)] - #[case(vec!["example.com".to_string()], "http://example.com/foo.mq", true)] - // default domain always allowed even when user list is non-empty - #[case(vec!["example.com".to_string()], "https://raw.githubusercontent.com/harehare/x/HEAD/x.mq", true)] - // prefix-bypass: example.com.evil.com must NOT match allowlist entry "example.com" - #[case(vec!["example.com".to_string()], "https://example.com.evil.com/foo.mq", false)] - #[case(vec!["example.com".to_string()], "https://example.com.evil.com", false)] - #[case(vec!["example".to_string()], "https://example.com/foo.mq", false)] - // multiple allowlist entries: second entry matches - #[case(vec!["other.com".to_string(), "example.com".to_string()], "https://example.com/foo.mq", true)] - // multiple allowlist entries: none match - #[case(vec!["other.com".to_string(), "another.org".to_string()], "https://example.com/foo.mq", false)] - // URL with query string - #[case(vec!["example.com".to_string()], "https://example.com/foo.mq?v=1", true)] - // URL with fragment - #[case(vec!["example.com".to_string()], "https://example.com/foo.mq#section", true)] - fn test_is_allowed_domain(#[case] allowed_domains: Vec, #[case] url: &str, #[case] expected: bool) { - let resolver = resolver_with_domains(allowed_domains); - assert_eq!(resolver.is_allowed_domain(url), expected); - } - - #[rstest] - // harehare repos always allowed with empty list #[case( "github.com/harehare/lisp.mq@v0.1.0", "https://raw.githubusercontent.com/harehare/lisp.mq/v0.1.0/lisp.mq" @@ -689,41 +381,38 @@ mod tests { "github.com/harehare/lisp", "https://raw.githubusercontent.com/harehare/lisp/HEAD/lisp.mq" )] - // https:// GitHub URL is also expanded to raw.githubusercontent.com #[case( "https://github.com/harehare/lisp@v0.1.0", "https://raw.githubusercontent.com/harehare/lisp/v0.1.0/lisp.mq" )] + #[cfg(feature = "http-import-ureq")] fn test_to_fetch_url_with_empty_allowlist(#[case] input: &str, #[case] expected: &str) { let resolver = resolver_with_domains(vec![]); assert_eq!(resolver.to_fetch_url(input).unwrap(), expected); } #[rstest] - // non-harehare GitHub shorthand blocked by empty allowlist #[case(vec![], "github.com/alice/lisp")] - // non-harehare GitHub shorthand blocked by unrelated allowlist #[case(vec!["example.com".to_string()], "github.com/alice/lisp")] - // plain HTTPS URL blocked by allowlist #[case(vec!["example.com".to_string()], "https://other.com/foo.mq")] - // plain HTTPS URL blocked by empty allowlist #[case(vec![], "https://example.com/foo.mq")] + #[cfg(feature = "http-import-ureq")] fn test_to_fetch_url_blocked_by_allowlist(#[case] allowed: Vec, #[case] input: &str) { let resolver = resolver_with_domains(allowed); assert!(matches!(resolver.to_fetch_url(input), Err(ModuleError::IOError(_)))); } #[rstest] - // non-URL, non-GitHub local name #[case("local_module")] + #[cfg(feature = "http-import-ureq")] fn test_to_fetch_url_returns_not_found(#[case] input: &str) { let resolver = resolver_with_domains(vec![]); assert!(matches!(resolver.to_fetch_url(input), Err(ModuleError::NotFound(_)))); } #[test] + #[cfg(feature = "http-import-ureq")] fn test_to_fetch_url_invalid_github_shorthand_returns_io_error() { - // "github.com/owner" has no repo component so github_to_raw_url returns None let resolver = resolver_with_domains(vec![]); assert!(matches!( resolver.to_fetch_url("github.com/owner"), @@ -731,288 +420,128 @@ mod tests { )); } - #[rstest] - // versioned: tag that is not HEAD/main/master - #[case("https://raw.githubusercontent.com/alice/mymod/v0.1.0/mymod.mq", true)] - #[case("https://raw.githubusercontent.com/alice/mymod/v2.0/lib/util.mq", true)] - #[case("https://raw.githubusercontent.com/alice/mymod/release-1.0/mymod.mq", true)] - // mutable: HEAD/main/master - #[case("https://raw.githubusercontent.com/alice/mymod/HEAD/mymod.mq", false)] - #[case("https://raw.githubusercontent.com/alice/mymod/main/mymod.mq", false)] - #[case("https://raw.githubusercontent.com/alice/mymod/master/mymod.mq", false)] - // http:// scheme variant of raw.githubusercontent.com - #[case("http://raw.githubusercontent.com/alice/mymod/v0.1.0/mymod.mq", true)] - #[case("http://raw.githubusercontent.com/alice/mymod/HEAD/mymod.mq", false)] - // non-GitHub URLs are always mutable - #[case("https://example.com/foo.mq", false)] - #[case("http://example.com/foo.mq", false)] - // URL with insufficient path segments defaults to mutable - #[case("https://raw.githubusercontent.com/alice/mymod", false)] - fn test_is_versioned_url(#[case] url: &str, #[case] expected: bool) { - assert_eq!(HttpModuleResolver::is_versioned_url(url), expected); - } - #[test] + #[cfg(feature = "http-import-ureq")] fn test_cache_subdir_versioned() { let dir = TempDir::new().unwrap(); - let resolver = HttpModuleResolver { - allowed_remote_domains: vec![], - timeout: Duration::from_secs(10), + let fetcher = UreqFetcher { cache_dir: dir.path().to_path_buf(), + ..UreqFetcher::default() }; - let subdir = resolver.cache_subdir("https://raw.githubusercontent.com/alice/mymod/v0.1.0/mymod.mq"); + let resolver = HttpModuleResolver::new(vec![], fetcher); + let subdir = resolver + .fetcher + .cache_subdir("https://raw.githubusercontent.com/alice/mymod/v0.1.0/mymod.mq"); assert_eq!(subdir, dir.path().join("versioned")); } #[test] + #[cfg(feature = "http-import-ureq")] fn test_cache_subdir_mutable() { let dir = TempDir::new().unwrap(); - let resolver = HttpModuleResolver { - allowed_remote_domains: vec![], - timeout: Duration::from_secs(10), + let fetcher = UreqFetcher { cache_dir: dir.path().to_path_buf(), + ..UreqFetcher::default() }; - let subdir = resolver.cache_subdir("https://raw.githubusercontent.com/alice/mymod/HEAD/mymod.mq"); + let resolver = HttpModuleResolver::new(vec![], fetcher); + let subdir = resolver + .fetcher + .cache_subdir("https://raw.githubusercontent.com/alice/mymod/HEAD/mymod.mq"); assert_eq!(subdir, dir.path().join("mutable")); } #[test] - fn test_cache_valid_when_file_exists() { - let dir = TempDir::new().unwrap(); - let mutable_dir = dir.path().join("mutable"); - fs::create_dir_all(&mutable_dir).unwrap(); - let cache_file = mutable_dir.join("cached.mq"); - fs::write(&cache_file, b"content").unwrap(); - - let resolver = HttpModuleResolver { - allowed_remote_domains: vec![], - timeout: Duration::from_secs(10), - cache_dir: dir.path().to_path_buf(), - }; - - assert!(cache_file.exists()); - assert!(resolver.cache_dir.join("mutable").join("cached.mq").exists()); - } - - #[test] - fn test_clear_cache_removes_only_mutable() { - let dir = TempDir::new().unwrap(); - let mutable_dir = dir.path().join("mutable"); - let versioned_dir = dir.path().join("versioned"); - fs::create_dir_all(&mutable_dir).unwrap(); - fs::create_dir_all(&versioned_dir).unwrap(); - let mutable_mq = mutable_dir.join("abc123.mq"); - let mutable_hash = mutable_dir.join("abc123.mq.sha256"); - let versioned_file = versioned_dir.join("def456.mq"); - let versioned_hash = versioned_dir.join("def456.mq.sha256"); - fs::write(&mutable_mq, b"mutable content").unwrap(); - fs::write(&mutable_hash, b"deadbeef").unwrap(); - fs::write(&versioned_file, b"versioned content").unwrap(); - fs::write(&versioned_hash, b"cafebabe").unwrap(); - - let resolver = HttpModuleResolver { - allowed_remote_domains: vec![], - timeout: Duration::from_secs(10), - cache_dir: dir.path().to_path_buf(), - }; - - resolver.clear_cache().unwrap(); - assert!(!mutable_dir.exists(), "mutable dir should be removed"); - assert!(versioned_file.exists(), "versioned .mq file should be preserved"); - assert!( - versioned_hash.exists(), - "versioned .mq.sha256 sidecar should be preserved" - ); - } - - #[test] - fn test_clear_cache_noop_when_dir_missing() { - let dir = TempDir::new().unwrap(); - let nonexistent = dir.path().join("nonexistent"); - - let resolver = HttpModuleResolver { - allowed_remote_domains: vec![], - timeout: Duration::from_secs(10), - cache_dir: nonexistent, - }; - - assert!(resolver.clear_cache().is_ok()); - } - - #[test] - fn test_clear_cache_noop_when_only_versioned_exists() { - let dir = TempDir::new().unwrap(); - let versioned_dir = dir.path().join("versioned"); - fs::create_dir_all(&versioned_dir).unwrap(); - let versioned_file = versioned_dir.join("v1.mq"); - fs::write(&versioned_file, b"pinned").unwrap(); - - let resolver = HttpModuleResolver { - allowed_remote_domains: vec![], - timeout: Duration::from_secs(10), - cache_dir: dir.path().to_path_buf(), - }; - - resolver.clear_cache().unwrap(); - assert!(versioned_file.exists(), "versioned file must survive clear_cache"); - } - - #[test] - fn test_clear_cache_removes_multiple_mutable_files() { - let dir = TempDir::new().unwrap(); - let mutable_dir = dir.path().join("mutable"); - fs::create_dir_all(&mutable_dir).unwrap(); - for name in &["a.mq", "b.mq", "c.mq"] { - fs::write(mutable_dir.join(name), b"data").unwrap(); - } - - let resolver = HttpModuleResolver { - allowed_remote_domains: vec![], - timeout: Duration::from_secs(10), - cache_dir: dir.path().to_path_buf(), - }; - - resolver.clear_cache().unwrap(); - assert!(!mutable_dir.exists()); - } - - #[test] - fn test_clear_all_cache_removes_mutable_and_versioned() { - let dir = TempDir::new().unwrap(); - let mutable_dir = dir.path().join("mutable"); - let versioned_dir = dir.path().join("versioned"); - fs::create_dir_all(&mutable_dir).unwrap(); - fs::create_dir_all(&versioned_dir).unwrap(); - fs::write(mutable_dir.join("a.mq"), b"mutable").unwrap(); - fs::write(mutable_dir.join("a.mq.lock"), b"").unwrap(); - fs::write(versioned_dir.join("b.mq"), b"versioned").unwrap(); - fs::write(versioned_dir.join("b.mq.lock"), b"").unwrap(); - - let resolver = HttpModuleResolver { - allowed_remote_domains: vec![], - timeout: Duration::from_secs(10), - cache_dir: dir.path().to_path_buf(), - }; - - resolver.clear_all_cache().unwrap(); - assert!(!mutable_dir.exists(), "mutable dir should be removed"); - assert!(!versioned_dir.exists(), "versioned dir should be removed"); - } - - #[test] - fn test_clear_all_cache_noop_when_dirs_missing() { - let dir = TempDir::new().unwrap(); - let resolver = HttpModuleResolver { - allowed_remote_domains: vec![], - timeout: Duration::from_secs(10), - cache_dir: dir.path().to_path_buf(), - }; - assert!(resolver.clear_all_cache().is_ok()); - } - - #[test] + #[cfg(feature = "http-import-ureq")] fn test_resolve_uses_mutable_cache_on_hit() { let dir = TempDir::new().unwrap(); let mutable_dir = dir.path().join("mutable"); - fs::create_dir_all(&mutable_dir).unwrap(); + std::fs::create_dir_all(&mutable_dir).unwrap(); let url = "https://raw.githubusercontent.com/harehare/mymod/HEAD/mymod.mq"; let content = "def cached(): 42;"; - let hash = format!("{:x}.mq", md5::compute(url)); - let hash256 = format!("{:x}.mq.sha256", md5::compute(url)); - fs::write(mutable_dir.join(&hash), content.as_bytes()).unwrap(); - fs::write( - mutable_dir.join(&hash256), - HttpModuleResolver::compute_content_hash(content).as_bytes(), + let stem = UreqFetcher::cache_stem(url); + std::fs::write(mutable_dir.join(format!("{}.mq", stem)), content.as_bytes()).unwrap(); + std::fs::write( + mutable_dir.join(format!("{}.mq.sha256", stem)), + UreqFetcher::compute_hash(content).as_bytes(), ) .unwrap(); - let resolver = HttpModuleResolver { - allowed_remote_domains: vec![], - timeout: Duration::from_secs(10), + let fetcher = UreqFetcher { cache_dir: dir.path().to_path_buf(), + ..UreqFetcher::default() }; - - let result = resolver.resolve("https://raw.githubusercontent.com/harehare/mymod/HEAD/mymod.mq"); - assert_eq!(result.unwrap(), content); + let resolver = HttpModuleResolver::new(vec![], fetcher); + assert_eq!(resolver.resolve(url).unwrap(), content); } #[test] + #[cfg(feature = "http-import-ureq")] fn test_resolve_cache_without_hash_sidecar_triggers_refetch() { let dir = TempDir::new().unwrap(); let mutable_dir = dir.path().join("mutable"); - fs::create_dir_all(&mutable_dir).unwrap(); + std::fs::create_dir_all(&mutable_dir).unwrap(); let url = "https://raw.githubusercontent.com/harehare/mymod/HEAD/mymod.mq"; - let hash = format!("{:x}.mq", md5::compute(url)); - // Write only the content file — no .mq.sha256 sidecar - fs::write(mutable_dir.join(&hash), b"def foo(): 1;").unwrap(); + let stem = UreqFetcher::cache_stem(url); + std::fs::write(mutable_dir.join(format!("{}.mq", stem)), b"def foo(): 1;").unwrap(); - let resolver = HttpModuleResolver { - allowed_remote_domains: vec![], - timeout: Duration::from_secs(10), + let fetcher = UreqFetcher { cache_dir: dir.path().to_path_buf(), + ..UreqFetcher::default() }; - - // No sidecar → must attempt a network re-fetch (which fails in tests) - let result = resolver.resolve("https://raw.githubusercontent.com/harehare/mymod/HEAD/mymod.mq"); - assert!(result.is_err(), "cache without hash sidecar must trigger re-fetch"); + let resolver = HttpModuleResolver::new(vec![], fetcher); + assert!(resolver.resolve(url).is_err()); } #[test] + #[cfg(feature = "http-import-ureq")] fn test_resolve_tampered_cache_triggers_refetch() { let dir = TempDir::new().unwrap(); let mutable_dir = dir.path().join("mutable"); - fs::create_dir_all(&mutable_dir).unwrap(); + std::fs::create_dir_all(&mutable_dir).unwrap(); let url = "https://raw.githubusercontent.com/harehare/mymod/HEAD/mymod.mq"; let content = "def cached(): 42;"; - let hash = format!("{:x}.mq", md5::compute(url)); - let hash256 = format!("{:x}.mq.sha256", md5::compute(url)); - fs::write(mutable_dir.join(&hash), content.as_bytes()).unwrap(); - // Deliberately wrong hash - fs::write( - mutable_dir.join(&hash256), + let stem = UreqFetcher::cache_stem(url); + std::fs::write(mutable_dir.join(format!("{}.mq", stem)), content.as_bytes()).unwrap(); + std::fs::write( + mutable_dir.join(format!("{}.mq.sha256", stem)), b"0000000000000000000000000000000000000000000000000000000000000000", ) .unwrap(); - let resolver = HttpModuleResolver { - allowed_remote_domains: vec![], - timeout: Duration::from_secs(10), + let fetcher = UreqFetcher { cache_dir: dir.path().to_path_buf(), + ..UreqFetcher::default() }; - - // Must attempt a network re-fetch (which fails in tests) rather than return tampered content - let result = resolver.resolve("https://raw.githubusercontent.com/harehare/mymod/HEAD/mymod.mq"); - assert!(result.is_err(), "tampered cache must not return cached content"); + let resolver = HttpModuleResolver::new(vec![], fetcher); + assert!(resolver.resolve(url).is_err()); } #[test] + #[cfg(feature = "http-import-ureq")] fn test_resolve_uses_versioned_cache_on_hit() { let dir = TempDir::new().unwrap(); let versioned_dir = dir.path().join("versioned"); - fs::create_dir_all(&versioned_dir).unwrap(); + std::fs::create_dir_all(&versioned_dir).unwrap(); let url = "https://raw.githubusercontent.com/harehare/mymod/v0.1.0/mymod.mq"; let content = "def pinned(): 1;"; - let hash = format!("{:x}.mq", md5::compute(url)); - let hash256 = format!("{:x}.mq.sha256", md5::compute(url)); - fs::write(versioned_dir.join(&hash), content.as_bytes()).unwrap(); - fs::write( - versioned_dir.join(&hash256), - HttpModuleResolver::compute_content_hash(content).as_bytes(), + let stem = UreqFetcher::cache_stem(url); + std::fs::write(versioned_dir.join(format!("{}.mq", stem)), content.as_bytes()).unwrap(); + std::fs::write( + versioned_dir.join(format!("{}.mq.sha256", stem)), + UreqFetcher::compute_hash(content).as_bytes(), ) .unwrap(); - let resolver = HttpModuleResolver { - allowed_remote_domains: vec![], - timeout: Duration::from_secs(10), + let fetcher = UreqFetcher { cache_dir: dir.path().to_path_buf(), + ..UreqFetcher::default() }; - - let result = resolver.resolve("https://raw.githubusercontent.com/harehare/mymod/v0.1.0/mymod.mq"); - assert_eq!(result.unwrap(), content); + let resolver = HttpModuleResolver::new(vec![], fetcher); + assert_eq!(resolver.resolve(url).unwrap(), content); } #[rstest] @@ -1020,103 +549,107 @@ mod tests { #[case("local/module")] #[case("csv")] #[case("")] + #[cfg(feature = "http-import-ureq")] fn test_resolve_non_url_returns_not_found(#[case] module_name: &str) { - let resolver = HttpModuleResolver::default(); + let resolver = HttpModuleResolver::::default(); assert!(matches!(resolver.resolve(module_name), Err(ModuleError::NotFound(_)))); } #[rstest] #[case("not_a_url")] #[case("csv")] + #[cfg(feature = "http-import-ureq")] fn test_get_path_non_url_returns_not_found(#[case] module_name: &str) { - let resolver = HttpModuleResolver::default(); + let resolver = HttpModuleResolver::::default(); assert!(matches!(resolver.get_path(module_name), Err(ModuleError::NotFound(_)))); } #[rstest] #[case(vec!["other.com".to_string()], "https://example.com/foo.mq")] - #[case(vec!["example.com/private".to_string()], "https://example.com/public/foo.mq")] - // empty allowlist blocks non-default domain #[case(vec![], "https://example.com/foo.mq")] #[case(vec![], "https://raw.githubusercontent.com/alice/mod/HEAD/mod.mq")] + #[cfg(feature = "http-import-ureq")] fn test_resolve_blocked_domain_returns_io_error(#[case] allowed: Vec, #[case] url: &str) { let resolver = resolver_with_domains(allowed); assert!(matches!(resolver.resolve(url), Err(ModuleError::IOError(_)))); } - // fetch_url: HTTPS-only enforcement #[rstest] #[case("http://example.com/foo.mq")] #[case("http://raw.githubusercontent.com/harehare/mod/HEAD/mod.mq")] - fn test_fetch_url_rejects_http(#[case] url: &str) { - let resolver = HttpModuleResolver::default(); - assert!(matches!(resolver.fetch_url(url), Err(ModuleError::IOError(_)))); - } - - #[rstest] - #[case("local_module")] - #[case("csv")] - fn test_fetch_url_rejects_non_remote(#[case] url: &str) { - let resolver = HttpModuleResolver::default(); - assert!(matches!(resolver.fetch_url(url), Err(ModuleError::NotFound(_)))); + #[cfg(feature = "http-import-ureq")] + fn test_fetch_rejects_http(#[case] url: &str) { + let fetcher = UreqFetcher::default(); + assert!(matches!(fetcher.fetch(url), Err(ModuleError::IOError(_)))); } #[test] - fn test_fetch_url_rejects_non_default_domain_with_empty_allowlist() { + #[cfg(feature = "http-import-ureq")] + fn test_fetch_rejects_non_default_domain_with_empty_allowlist() { let resolver = resolver_with_domains(vec![]); assert!(matches!( - resolver.fetch_url("https://example.com/foo.mq"), + resolver.resolve("https://example.com/foo.mq"), Err(ModuleError::IOError(_)) )); } #[test] + #[cfg(feature = "http-import-ureq")] fn test_search_paths_empty() { - assert!(HttpModuleResolver::default().search_paths().is_empty()); + assert!(HttpModuleResolver::::default().search_paths().is_empty()); } #[test] - fn test_new_applies_parameters() { - let domains = vec!["example.com".to_string()]; - let timeout = Duration::from_secs(5); - let resolver = HttpModuleResolver::new(domains.clone(), timeout); - assert_eq!(resolver.allowed_remote_domains, domains); - assert_eq!(resolver.timeout, timeout); - } - - #[test] - fn test_compute_content_hash_is_deterministic() { - let h1 = HttpModuleResolver::compute_content_hash("def foo(): 1;"); - let h2 = HttpModuleResolver::compute_content_hash("def foo(): 1;"); + #[cfg(feature = "http-import-ureq")] + fn test_compute_hash_is_deterministic() { + let h1 = UreqFetcher::compute_hash("def foo(): 1;"); + let h2 = UreqFetcher::compute_hash("def foo(): 1;"); assert_eq!(h1, h2); - assert_eq!(h1.len(), 64, "SHA-256 hex is 64 chars"); + assert_eq!(h1.len(), 64); } #[test] - fn test_compute_content_hash_differs_for_different_content() { - let h1 = HttpModuleResolver::compute_content_hash("def foo(): 1;"); - let h2 = HttpModuleResolver::compute_content_hash("def foo(): 2;"); - assert_ne!(h1, h2); - } + #[cfg(feature = "http-import-ureq")] + fn test_clear_cache_removes_only_mutable() { + let dir = TempDir::new().unwrap(); + let mutable_dir = dir.path().join("mutable"); + let versioned_dir = dir.path().join("versioned"); + std::fs::create_dir_all(&mutable_dir).unwrap(); + std::fs::create_dir_all(&versioned_dir).unwrap(); + std::fs::write(mutable_dir.join("a.mq"), b"mutable").unwrap(); + std::fs::write(versioned_dir.join("b.mq"), b"versioned").unwrap(); - // Content-Type: text/html with charset parameter must still be detected as HTML - #[rstest] - #[case("text/html")] - #[case("text/html; charset=utf-8")] - #[case("text/html;charset=UTF-8")] - fn test_content_type_html_variants_contain_text_html(#[case] ct: &str) { - assert!(ct.contains("text/html")); + let fetcher = UreqFetcher { + cache_dir: dir.path().to_path_buf(), + ..UreqFetcher::default() + }; + let resolver = HttpModuleResolver::new(vec![], fetcher); + resolver.clear_cache().unwrap(); + assert!(!mutable_dir.exists()); + assert!(versioned_dir.join("b.mq").exists()); } - #[rstest] - #[case("text/plain")] - #[case("text/plain; charset=utf-8")] - #[case("application/octet-stream")] - fn test_content_type_non_html_not_detected_as_html(#[case] ct: &str) { - assert!(!ct.contains("text/html")); + #[test] + #[cfg(feature = "http-import-ureq")] + fn test_clear_all_cache_removes_both() { + let dir = TempDir::new().unwrap(); + let mutable_dir = dir.path().join("mutable"); + let versioned_dir = dir.path().join("versioned"); + std::fs::create_dir_all(&mutable_dir).unwrap(); + std::fs::create_dir_all(&versioned_dir).unwrap(); + std::fs::write(mutable_dir.join("a.mq"), b"mutable").unwrap(); + std::fs::write(versioned_dir.join("b.mq"), b"versioned").unwrap(); + + let fetcher = UreqFetcher { + cache_dir: dir.path().to_path_buf(), + ..UreqFetcher::default() + }; + let resolver = HttpModuleResolver::new(vec![], fetcher); + resolver.clear_all_cache().unwrap(); + assert!(!mutable_dir.exists()); + assert!(!versioned_dir.exists()); } - // https://github.com/owner/repo URL form is resolved to raw.githubusercontent.com #[rstest] #[case( "https://github.com/harehare/lisp@v0.1.0", @@ -1126,6 +659,7 @@ mod tests { "https://github.com/harehare/lisp", "https://raw.githubusercontent.com/harehare/lisp/HEAD/lisp.mq" )] + #[cfg(feature = "http-import-ureq")] fn test_to_fetch_url_https_github_form(#[case] input: &str, #[case] expected: &str) { let resolver = resolver_with_domains(vec![]); assert_eq!(resolver.to_fetch_url(input).unwrap(), expected); diff --git a/crates/mq-run/Cargo.toml b/crates/mq-run/Cargo.toml index 4fc844303..8e7227b1c 100644 --- a/crates/mq-run/Cargo.toml +++ b/crates/mq-run/Cargo.toml @@ -15,7 +15,7 @@ default-run = "mq" [features] debugger = ["mq-lang/debugger", "dep:rustyline", "dep:strum", "dep:regex-lite", "mq-dap"] default = ["std", "use_mimalloc", "http-import"] -http-import = ["mq-lang/http-import"] +http-import = ["mq-lang/http-import-ureq"] std = [] use_mimalloc = ["mimalloc"] diff --git a/crates/mq-wasm/Cargo.toml b/crates/mq-wasm/Cargo.toml index 3f1e6a738..b39813104 100644 --- a/crates/mq-wasm/Cargo.toml +++ b/crates/mq-wasm/Cargo.toml @@ -24,7 +24,7 @@ md5 = {workspace = true} mq-formatter = {workspace = true} mq-hir = {workspace = true} mq-check = {workspace = true} -mq-lang = {workspace = true, features = ["ast-json"]} +mq-lang = {workspace = true, features = ["ast-json", "http-import"]} sha2 = {workspace = true} mq-markdown = {workspace = true} opfs = {workspace = true, optional = true} diff --git a/crates/mq-wasm/src/script.rs b/crates/mq-wasm/src/script.rs index 564240117..0fe0a0483 100644 --- a/crates/mq-wasm/src/script.rs +++ b/crates/mq-wasm/src/script.rs @@ -43,6 +43,8 @@ export interface Options { listStyle: 'dash' | 'plus' | 'star' | null, linkTitleStyle: 'double' | 'single' | 'paren' | null, linkUrlStyle: 'angle' | 'none' | null, + /** Domains permitted for HTTP module imports in addition to github.com/harehare (always allowed). */ + allowedDomains?: string[], } export function definedValues(code: string, module?: string): Promise>; @@ -106,6 +108,7 @@ struct Options { list_style: Option, link_title_style: Option, link_url_style: Option, + allowed_domains: Option>, } #[derive(Serialize, Deserialize)] @@ -226,10 +229,32 @@ impl From for mq_markdown::ConversionOptions { } } +/// Sync HTTP fetcher for WASM that reads from a pre-populated in-memory cache. +/// +/// Content is inserted by [`WasmModuleResolver::preload_http_modules`] (async, keyed by raw HTTPS URL) +/// and then read synchronously by [`mq_lang::ModuleResolver::resolve`]. #[derive(Debug, Clone, Default)] +struct WasmFetcher { + /// Keyed by the normalized raw HTTPS URL (e.g. `https://raw.githubusercontent.com/...`). + cache: Rc>>, +} + +impl mq_lang::HttpFetcher for WasmFetcher { + fn fetch(&self, url: &str) -> Result { + self.cache + .borrow() + .get(url) + .cloned() + .ok_or_else(|| mq_lang::ModuleError::NotFound(std::borrow::Cow::Owned(url.to_string()))) + } +} + +#[derive(Debug, Clone)] pub struct WasmModuleResolver { - /// Cache for HTTP-fetched modules, keyed by the original import path (URL or github shorthand). - http_cache: Rc>>, + /// HTTP resolver: handles URL normalization, domain allow-list, and delegates fetch to WasmFetcher. + http_resolver: Rc>>, + /// Shared reference into WasmFetcher's cache for direct insertion during async preload. + fetcher_cache: Rc>>, #[cfg(feature = "opfs")] /// Cache of preloaded module contents, keyed by module name cache: Rc>>, @@ -241,11 +266,39 @@ pub struct WasmModuleResolver { is_available: Rc>, } +impl Default for WasmModuleResolver { + fn default() -> Self { + let fetcher_cache: Rc>> = Rc::new(RefCell::new(HashMap::new())); + let fetcher = WasmFetcher { + cache: Rc::clone(&fetcher_cache), + }; + Self { + http_resolver: Rc::new(RefCell::new(mq_lang::HttpModuleResolver::new(vec![], fetcher))), + fetcher_cache, + #[cfg(feature = "opfs")] + cache: Rc::new(RefCell::new(HashMap::new())), + #[cfg(feature = "opfs")] + root_dir: Rc::new(RefCell::new(None)), + #[cfg(feature = "opfs")] + is_available: Rc::new(RefCell::new(false)), + } + } +} + impl WasmModuleResolver { pub fn new() -> Self { Self::default() } + /// Sets the list of additional allowed domains for HTTP imports. + /// + /// `github.com/{path}` entries are automatically expanded to `raw.githubusercontent.com/{path}`. + /// `DEFAULT_ALLOWED_DOMAIN` (`raw.githubusercontent.com/harehare`) is always permitted + /// regardless of this list. + pub fn set_allowed_domains(&self, domains: Vec) { + self.http_resolver.borrow_mut().set_allowed_domains(domains); + } + /// Initializes the OPFS root directory handle /// /// If OPFS is not available, this method will silently fail and the resolver @@ -339,16 +392,17 @@ impl WasmModuleResolver { self.cache.borrow_mut().clear(); } - /// Pre-fetches all HTTP/GitHub import URLs found in `code` (and their transitive imports). + /// Pre-fetches HTTP/GitHub import URLs found directly in `code` (top-level only). /// - /// Mirrors CLI caching behaviour: + /// Only imports written in the user's own code are resolved; HTTP imports inside + /// fetched modules are intentionally ignored. + /// + /// Caching behaviour: /// - Versioned URLs (`@v1.0`) are cached persistently in OPFS `http_cache/versioned/`. /// - Mutable URLs (HEAD/branch) are cached in OPFS `http_cache/mutable/`. /// - Each cached file has a SHA-256 sidecar for tamper detection. /// - Results are also kept in `http_cache` for the lifetime of this session. pub async fn preload_http_modules(&self, code: &str) { - const MAX_DEPTH: usize = 5; - #[cfg(feature = "opfs")] let root = self.root_dir.borrow().clone(); @@ -358,70 +412,58 @@ impl WasmModuleResolver { return; } - let mut visited: std::collections::HashSet = std::collections::HashSet::new(); - let mut pending = extract_http_import_urls(code); + for module_path in extract_http_import_urls(code) { + let fetch_url = if mq_lang::http_import::is_github_url(&module_path) { + match mq_lang::http_import::github_to_raw_url(&module_path) { + Some(u) => u, + None => continue, + } + } else if mq_lang::http_import::is_remote_url(&module_path) { + module_path.clone() + } else { + continue; + }; - for _ in 0..MAX_DEPTH { - if pending.is_empty() { - break; + if self.fetcher_cache.borrow().contains_key(&fetch_url) { + continue; } - let mut next = Vec::new(); - - for module_path in pending { - if visited.contains(&module_path) { - continue; - } - visited.insert(module_path.clone()); - - if self.http_cache.borrow().contains_key(&module_path) { - continue; - } + if !self.http_resolver.borrow().is_allowed_domain(&fetch_url) { + continue; + } - let fetch_url = if is_github_url(&module_path) { - match github_to_raw_url(&module_path) { - Some(u) => u, - None => continue, - } - } else if is_http_url(&module_path) { - module_path.clone() - } else { - continue; - }; + let subdir = if mq_lang::http_import::is_versioned_url(&fetch_url) { + "versioned" + } else { + "mutable" + }; + let stem = cache_file_stem(&fetch_url); - let subdir = if is_versioned_url(&fetch_url) { "versioned" } else { "mutable" }; - let stem = cache_file_stem(&fetch_url); + // Fast path: OPFS cache hit + #[cfg(feature = "opfs")] + if let Some(ref r) = root + && let Some(content) = try_read_opfs_http_cache(r, subdir, &stem).await + { + self.fetcher_cache.borrow_mut().insert(fetch_url, content); + continue; + } - // Fast path: OPFS cache hit + // Slow path: fetch from network + if let Ok(content) = fetch_text(&fetch_url).await { #[cfg(feature = "opfs")] if let Some(ref r) = root { - if let Some(content) = try_read_opfs_http_cache(r, subdir, &stem).await { - next.extend(extract_http_import_urls(&content)); - self.http_cache.borrow_mut().insert(module_path, content); - continue; - } - } - - // Slow path: fetch from network - if let Ok(content) = fetch_text(&fetch_url).await { - #[cfg(feature = "opfs")] - if let Some(ref r) = root { - write_opfs_http_cache(r, subdir, &stem, &content).await; - } - next.extend(extract_http_import_urls(&content)); - self.http_cache.borrow_mut().insert(module_path, content); + write_opfs_http_cache(r, subdir, &stem, &content).await; } + self.fetcher_cache.borrow_mut().insert(fetch_url, content); } - - pending = next; } } } impl mq_lang::ModuleResolver for WasmModuleResolver { fn canonical_name<'a>(&self, module_path: &'a str) -> &'a str { - if is_github_url(module_path) || is_http_url(module_path) { - extract_module_name(module_path) + if mq_lang::http_import::is_github_url(module_path) || mq_lang::http_import::is_remote_url(module_path) { + mq_lang::http_import::extract_module_name(module_path) } else { module_path } @@ -432,43 +474,42 @@ impl mq_lang::ModuleResolver for WasmModuleResolver { return Ok(content_fn().to_string()); } - if let Some(content) = self.http_cache.borrow().get(module_name).cloned() { - return Ok(content); + #[cfg(feature = "opfs")] + if let Some(content) = self.cache.borrow().get(module_name) { + return Ok(content.clone()); } - #[cfg(feature = "opfs")] - { - if (is_http_url(module_name) || is_github_url(module_name)) - && self.root_dir.borrow().is_none() - { - return Err(mq_lang::ModuleError::IOError(std::borrow::Cow::Owned( - format!( - "HTTP import of '{}' is not available: OPFS is not supported in this environment.", - module_name - ), - ))); - } - return self.cache.borrow().get(module_name).cloned().ok_or_else(|| { - mq_lang::ModuleError::NotFound(std::borrow::Cow::Owned(format!( - "Module '{}' not found in cache. Use preload_modules() to load it first.", + let is_http = + mq_lang::http_import::is_remote_url(module_name) || mq_lang::http_import::is_github_url(module_name); + + if is_http { + #[cfg(feature = "opfs")] + if self.root_dir.borrow().is_none() { + return Err(mq_lang::ModuleError::IOError(std::borrow::Cow::Owned(format!( + "HTTP import of '{}' is not available: OPFS is not supported in this environment.", module_name - ))) - }); + )))); + } + return self.http_resolver.borrow().resolve(module_name); } - #[cfg(not(feature = "opfs"))] + + #[cfg(feature = "opfs")] return Err(mq_lang::ModuleError::NotFound(std::borrow::Cow::Owned(format!( - "Module '{}' not found. Module resolution is not supported in this environment.", + "Module '{}' not found in cache. Use preload_modules() to load it first.", module_name )))); + #[cfg(not(feature = "opfs"))] + Err(mq_lang::ModuleError::NotFound(std::borrow::Cow::Owned(format!( + "Module '{}' not found. Module resolution is not supported in this environment.", + module_name + )))) } fn get_path(&self, module_name: &str) -> Result { - if is_github_url(module_name) { - return github_to_raw_url(module_name) - .map(Ok) - .unwrap_or_else(|| Ok(module_name.to_string())); + match self.http_resolver.borrow().get_path(module_name) { + Ok(path) => Ok(path), + Err(_) => Ok(module_name.to_string()), } - Ok(module_name.to_string()) } fn search_paths(&self) -> Vec { @@ -524,14 +565,17 @@ pub async fn clear_all_http_cache() -> Result<(), JsValue> { #[wasm_bindgen(js_name=run, skip_typescript)] pub async fn run(code: &str, content: &str, options: JsValue) -> Result { + let options: Options = serde_wasm_bindgen::from_value(options) + .map_err(|e| JsValue::from_str(&format!("Failed to parse options: {}", e)))?; + let resolver = WasmModuleResolver::new(); resolver.initialize().await; resolver.preload_modules().await; + if let Some(ref domains) = options.allowed_domains { + resolver.set_allowed_domains(domains.clone()); + } resolver.preload_http_modules(code).await; - let options: Options = serde_wasm_bindgen::from_value(options) - .map_err(|e| JsValue::from_str(&format!("Failed to parse options: {}", e)))?; - let is_update = options.is_update; let mut engine = mq_lang::Engine::new(resolver); @@ -874,69 +918,6 @@ pub async fn hover(code: &str, line: u32, column: u32) -> JsValue { /// Name of the OPFS subdirectory used to store cached HTTP modules. const HTTP_CACHE_DIR: &str = "http_cache"; -fn is_http_url(url: &str) -> bool { - url.starts_with("https://") || url.starts_with("http://") -} - -fn is_github_url(url: &str) -> bool { - let s = url - .strip_prefix("https://") - .or_else(|| url.strip_prefix("http://")) - .unwrap_or(url); - s.starts_with("github.com/") -} - -/// Converts a GitHub shorthand (`[https://]github.com/{owner}/{repo}[@{version}]`) to a -/// `raw.githubusercontent.com` HTTPS URL. -fn github_to_raw_url(input: &str) -> Option { - let without_scheme = input - .strip_prefix("https://") - .or_else(|| input.strip_prefix("http://")) - .unwrap_or(input); - - let rest = without_scheme.strip_prefix("github.com/")?; - - let (path_part, version) = match rest.rfind('@') { - Some(pos) => (&rest[..pos], &rest[pos + 1..]), - None => (rest, "HEAD"), - }; - - let components: Vec<&str> = path_part.splitn(3, '/').collect(); - - let (owner, repo, file) = match components.as_slice() { - [owner, name] => { - let file = if name.ends_with(".mq") { - name.to_string() - } else { - format!("{}.mq", name) - }; - (owner.to_string(), name.to_string(), file) - } - [owner, repo, subpath] => (owner.to_string(), repo.to_string(), subpath.to_string()), - _ => return None, - }; - - Some(format!( - "https://raw.githubusercontent.com/{}/{}/{}/{}", - owner, repo, version, file - )) -} - -/// Returns `true` if `url` is pinned to a specific immutable version tag (same logic as CLI). -fn is_versioned_url(url: &str) -> bool { - const MUTABLE_REFS: &[&str] = &["HEAD", "main", "master"]; - let path = url - .strip_prefix("https://raw.githubusercontent.com/") - .or_else(|| url.strip_prefix("http://raw.githubusercontent.com/")); - match path { - Some(rest) => { - let ref_segment = rest.split('/').nth(2).unwrap_or("HEAD"); - !MUTABLE_REFS.contains(&ref_segment) - } - None => false, - } -} - /// Returns the MD5 hex string of `url`, used as the cache file stem. fn cache_file_stem(url: &str) -> String { format!("{:x}", md5::compute(url)) @@ -974,7 +955,10 @@ async fn try_read_opfs_http_cache( .await .ok()?; let hash_fh = sub - .get_file_handle_with_options(&format!("{}.mq.sha256", stem), &opfs::GetFileHandleOptions { create: false }) + .get_file_handle_with_options( + &format!("{}.mq.sha256", stem), + &opfs::GetFileHandleOptions { create: false }, + ) .await .ok()?; @@ -990,24 +974,17 @@ async fn try_read_opfs_http_cache( /// Writes `content` and its SHA-256 sidecar to the OPFS HTTP cache. Silently ignores errors. #[cfg(feature = "opfs")] -async fn write_opfs_http_cache( - root: &opfs::persistent::DirectoryHandle, - subdir: &str, - stem: &str, - content: &str, -) { - async fn write_file( - dir: &opfs::persistent::DirectoryHandle, - name: &str, - data: &[u8], - ) -> Option<()> { +async fn write_opfs_http_cache(root: &opfs::persistent::DirectoryHandle, subdir: &str, stem: &str, content: &str) { + async fn write_file(dir: &opfs::persistent::DirectoryHandle, name: &str, data: &[u8]) -> Option<()> { use opfs::{DirectoryHandle as _, FileHandle as _, WritableFileStream as _}; let mut fh = dir .get_file_handle_with_options(name, &opfs::GetFileHandleOptions { create: true }) .await .ok()?; let mut w = fh - .create_writable_with_options(&opfs::CreateWritableOptions { keep_existing_data: false }) + .create_writable_with_options(&opfs::CreateWritableOptions { + keep_existing_data: false, + }) .await .ok()?; w.write_at_cursor_pos(data).await.ok()?; @@ -1028,23 +1005,12 @@ async fn write_opfs_http_cache( }; let _ = write_file(&sub, &format!("{}.mq", stem), content.as_bytes()).await; - let _ = write_file(&sub, &format!("{}.mq.sha256", stem), compute_content_hash(content).as_bytes()).await; -} - -/// Extracts a short module name from an HTTP URL or GitHub shorthand. -fn extract_module_name(module_path: &str) -> &str { - let path = module_path - .strip_prefix("https://") - .or_else(|| module_path.strip_prefix("http://")) - .unwrap_or(module_path); - - let without_version = match path.rfind('@') { - Some(pos) => &path[..pos], - None => path, - }; - - let last_segment = without_version.rsplit('/').next().unwrap_or(without_version); - last_segment.strip_suffix(".mq").unwrap_or(last_segment) + let _ = write_file( + &sub, + &format!("{}.mq.sha256", stem), + compute_content_hash(content).as_bytes(), + ) + .await; } /// Parses `code` and returns all import paths that look like HTTP or GitHub URLs. @@ -1058,7 +1024,7 @@ fn extract_http_import_urls(code: &str) -> Vec { .iter() .filter_map(|node| { if let mq_lang::AstExpr::Import(mq_lang::AstLiteral::String(url)) = &*node.expr { - if is_http_url(url) || is_github_url(url) { + if mq_lang::http_import::is_remote_url(url) || mq_lang::http_import::is_github_url(url) { Some(url.clone()) } else { None @@ -1133,6 +1099,7 @@ mod tests { list_style: None, link_title_style: None, link_url_style: None, + allowed_domains: None, }) .unwrap(), ); @@ -1151,6 +1118,7 @@ mod tests { list_style: Some(ListStyle::Star), link_title_style: None, link_url_style: None, + allowed_domains: None, }) .unwrap(), ); @@ -1169,6 +1137,7 @@ mod tests { list_style: None, link_title_style: None, link_url_style: Some(UrlSurroundStyle::Angle), + allowed_domains: None, }) .unwrap(), ); @@ -1188,6 +1157,7 @@ mod tests { list_style: None, link_title_style: None, link_url_style: None, + allowed_domains: None, }) .unwrap() ) From 829dc7d166f8a238975274a08953e36d6fca6220 Mon Sep 17 00:00:00 2001 From: harehare Date: Tue, 16 Jun 2026 23:02:56 +0900 Subject: [PATCH 3/8] =?UTF-8?q?=E2=9C=A8=20feat(wasm):=20extend=20HTTP=20p?= =?UTF-8?q?reload=20to=20include=20statements?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `extract_http_import_urls` previously only scanned `import` nodes; `include "https://..."` and `include "github.com/..."` were silently skipped, so OPFS caching never ran for them. ✅ test(http-import): add rstest parametrized and proptest property tests Add comprehensive rstest cases and proptest property-based tests for all URL utility functions in `http_import.rs` (is_remote_url, is_github_url, github_to_raw_url, extract_module_name, normalize_allowed_domain, is_allowed_url, is_versioned_url, prefix_matches). Add wasm_bindgen_test cases for extract_http_import_urls covering import and include with HTTPS/GitHub URLs, local exclusions, mixed code, invalid syntax, and empty input. --- crates/mq-wasm/src/script.rs | 94 +++++++++++++++++++++++++++++++++--- 1 file changed, 87 insertions(+), 7 deletions(-) diff --git a/crates/mq-wasm/src/script.rs b/crates/mq-wasm/src/script.rs index 0fe0a0483..770a5cfc2 100644 --- a/crates/mq-wasm/src/script.rs +++ b/crates/mq-wasm/src/script.rs @@ -1013,7 +1013,7 @@ async fn write_opfs_http_cache(root: &opfs::persistent::DirectoryHandle, subdir: .await; } -/// Parses `code` and returns all import paths that look like HTTP or GitHub URLs. +/// Parses `code` and returns all import/include paths that look like HTTP or GitHub URLs. fn extract_http_import_urls(code: &str) -> Vec { let token_arena = mq_lang::Shared::new(mq_lang::SharedCell::new(mq_lang::Arena::new(1024))); let Ok(program) = mq_lang::parse(code, token_arena) else { @@ -1023,12 +1023,13 @@ fn extract_http_import_urls(code: &str) -> Vec { program .iter() .filter_map(|node| { - if let mq_lang::AstExpr::Import(mq_lang::AstLiteral::String(url)) = &*node.expr { - if mq_lang::http_import::is_remote_url(url) || mq_lang::http_import::is_github_url(url) { - Some(url.clone()) - } else { - None - } + let url = match &*node.expr { + mq_lang::AstExpr::Import(mq_lang::AstLiteral::String(url)) => url, + mq_lang::AstExpr::Include(mq_lang::AstLiteral::String(url)) => url, + _ => return None, + }; + if mq_lang::http_import::is_remote_url(url) || mq_lang::http_import::is_github_url(url) { + Some(url.clone()) } else { None } @@ -1436,6 +1437,85 @@ mod tests { // Note: File cleanup is skipped as OPFS persistent storage is isolated per origin } + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_extract_http_import_urls_import_https() { + let code = r#"import "https://example.com/foo.mq""#; + let urls = extract_http_import_urls(code); + assert_eq!(urls, vec!["https://example.com/foo.mq"]); + } + + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_extract_http_import_urls_include_https() { + let code = r#"include "https://example.com/foo.mq""#; + let urls = extract_http_import_urls(code); + assert_eq!(urls, vec!["https://example.com/foo.mq"]); + } + + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_extract_http_import_urls_import_github() { + let code = r#"import "github.com/alice/mymod""#; + let urls = extract_http_import_urls(code); + assert_eq!(urls, vec!["github.com/alice/mymod"]); + } + + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_extract_http_import_urls_include_github() { + let code = r#"include "github.com/alice/mymod""#; + let urls = extract_http_import_urls(code); + assert_eq!(urls, vec!["github.com/alice/mymod"]); + } + + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_extract_http_import_urls_local_import_excluded() { + let code = r#"import "local_module""#; + let urls = extract_http_import_urls(code); + assert!(urls.is_empty()); + } + + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_extract_http_import_urls_local_include_excluded() { + let code = r#"include "local_module""#; + let urls = extract_http_import_urls(code); + assert!(urls.is_empty()); + } + + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_extract_http_import_urls_multiple_mixed() { + let code = r#" + import "https://example.com/a.mq" + include "github.com/alice/b" + import "local_mod" + include "https://example.com/c.mq" + "#; + let urls = extract_http_import_urls(code); + assert!(urls.contains(&"https://example.com/a.mq".to_string())); + assert!(urls.contains(&"github.com/alice/b".to_string())); + assert!(urls.contains(&"https://example.com/c.mq".to_string())); + assert!(!urls.contains(&"local_mod".to_string())); + assert_eq!(urls.len(), 3); + } + + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_extract_http_import_urls_invalid_syntax_returns_empty() { + let urls = extract_http_import_urls("import =>"); + assert!(urls.is_empty()); + } + + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_extract_http_import_urls_empty_code() { + let urls = extract_http_import_urls(""); + assert!(urls.is_empty()); + } + #[allow(unused)] #[wasm_bindgen_test] async fn test_html_to_markdown() { From 1614a63f948a4218482383f7689f8b09458c3900 Mon Sep 17 00:00:00 2001 From: harehare Date: Tue, 16 Jun 2026 23:03:03 +0900 Subject: [PATCH 4/8] =?UTF-8?q?=E2=9C=85=20test(http-import):=20add=20rste?= =?UTF-8?q?st=20and=20proptest=20coverage=20for=20URL=20utilities?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add parametrized rstest cases and proptest property-based tests for is_remote_url, is_github_url, github_to_raw_url, extract_module_name, normalize_allowed_domain, is_allowed_url, is_versioned_url, and prefix_matches covering edge cases, prefix-attack prevention, scheme stripping, and mutable/versioned ref classification. --- .../src/module/resolver/http_import.rs | 223 ++++++++++++++++++ 1 file changed, 223 insertions(+) diff --git a/crates/mq-lang/src/module/resolver/http_import.rs b/crates/mq-lang/src/module/resolver/http_import.rs index 1f9b293c7..296a5f7c4 100644 --- a/crates/mq-lang/src/module/resolver/http_import.rs +++ b/crates/mq-lang/src/module/resolver/http_import.rs @@ -158,6 +158,7 @@ pub fn is_allowed_url(url: &str, allowed_domains: &[String]) -> bool { #[cfg(test)] mod tests { + use proptest::prelude::*; use rstest::rstest; use super::*; @@ -168,20 +169,70 @@ mod tests { #[case("ftp://example.com/foo.mq", false)] #[case("example.com/foo.mq", false)] #[case("csv", false)] + #[case("", false)] + #[case("https://", true)] + #[case("http://", true)] fn test_is_remote_url(#[case] url: &str, #[case] expected: bool) { assert_eq!(is_remote_url(url), expected); } + proptest! { + #[test] + fn prop_is_remote_url_https_prefix(path in "[a-z0-9/._-]{1,30}") { + let url = format!("https://{}", path); + prop_assert!(is_remote_url(&url)); + } + + #[test] + fn prop_is_remote_url_http_prefix(path in "[a-z0-9/._-]{1,30}") { + let url = format!("http://{}", path); + prop_assert!(is_remote_url(&url)); + } + + #[test] + fn prop_is_remote_url_no_scheme_is_false(s in "[a-zA-Z0-9._/-]{1,40}") { + // Strings without http(s):// prefix must not be treated as remote. + prop_assume!(!s.starts_with("https://") && !s.starts_with("http://")); + prop_assert!(!is_remote_url(&s)); + } + } + #[rstest] #[case("github.com/owner/repo", true)] #[case("https://github.com/owner/repo", true)] #[case("http://github.com/owner/repo", true)] #[case("https://example.com/foo.mq", false)] #[case("csv", false)] + #[case("", false)] + #[case("github.com/", true)] fn test_is_github_url(#[case] input: &str, #[case] expected: bool) { assert_eq!(is_github_url(input), expected); } + proptest! { + #[test] + fn prop_is_github_url_bare_prefix(path in "[a-z0-9/_-]{1,30}") { + let url = format!("github.com/{}", path); + prop_assert!(is_github_url(&url)); + } + + #[test] + fn prop_is_github_url_https_prefix(path in "[a-z0-9/_-]{1,30}") { + let url = format!("https://github.com/{}", path); + prop_assert!(is_github_url(&url)); + } + + #[test] + fn prop_not_github_url_random(s in "[a-zA-Z0-9._/-]{1,40}") { + prop_assume!( + !s.starts_with("github.com/") + && !s.starts_with("https://github.com/") + && !s.starts_with("http://github.com/") + ); + prop_assert!(!is_github_url(&s)); + } + } + #[rstest] #[case( "github.com/harehare/lisp", @@ -195,26 +246,112 @@ mod tests { "github.com/harehare/repo/lib/utils.mq@v2.0", "https://raw.githubusercontent.com/harehare/repo/v2.0/lib/utils.mq" )] + #[case( + "https://github.com/alice/mod", + "https://raw.githubusercontent.com/alice/mod/HEAD/mod.mq" + )] + #[case( + "http://github.com/alice/mod", + "https://raw.githubusercontent.com/alice/mod/HEAD/mod.mq" + )] + #[case( + "github.com/alice/mod.mq", + "https://raw.githubusercontent.com/alice/mod.mq/HEAD/mod.mq" + )] fn test_github_to_raw_url(#[case] input: &str, #[case] expected: &str) { assert_eq!(github_to_raw_url(input).unwrap(), expected); } + #[rstest] + // Single component after github.com/ — not enough to form owner/repo + #[case("github.com/owner")] + fn test_github_to_raw_url_returns_none(#[case] input: &str) { + assert!(github_to_raw_url(input).is_none()); + } + + proptest! { + #[test] + fn prop_github_to_raw_url_always_https( + owner in "[a-z][a-z0-9-]{0,10}", + repo in "[a-z][a-z0-9-]{0,10}", + ) { + let input = format!("github.com/{}/{}", owner, repo); + let url = github_to_raw_url(&input).unwrap(); + prop_assert!(url.starts_with("https://raw.githubusercontent.com/")); + } + + #[test] + fn prop_github_to_raw_url_versioned_contains_version( + owner in "[a-z][a-z0-9-]{0,10}", + repo in "[a-z][a-z0-9-]{0,10}", + version in "v[0-9]\\.[0-9]\\.[0-9]", + ) { + let input = format!("github.com/{}/{}@{}", owner, repo, version); + let url = github_to_raw_url(&input).unwrap(); + prop_assert!(url.contains(&version)); + } + } + #[rstest] #[case("github.com/alice/mymod", "mymod")] #[case("github.com/alice/mymod.mq@v1.0", "mymod")] #[case("https://example.com/path/foo.mq", "foo")] + #[case("https://example.com/bar", "bar")] + #[case("https://example.com/a/b/c.mq@v2", "c")] fn test_extract_module_name(#[case] input: &str, #[case] expected: &str) { assert_eq!(extract_module_name(input), expected); } + proptest! { + #[test] + fn prop_extract_module_name_no_mq_suffix( + owner in "[a-z][a-z0-9-]{0,10}", + repo in "[a-z][a-z0-9-]{0,10}", + ) { + let input = format!("github.com/{}/{}", owner, repo); + let name = extract_module_name(&input); + prop_assert!(!name.ends_with(".mq")); + } + + #[test] + fn prop_extract_module_name_no_at_suffix( + owner in "[a-z][a-z0-9-]{0,10}", + repo in "[a-z][a-z0-9-]{0,10}", + version in "v[0-9]\\.[0-9]", + ) { + let input = format!("github.com/{}/{}@{}", owner, repo, version); + let name = extract_module_name(&input); + prop_assert!(!name.contains('@')); + } + } + #[rstest] #[case("github.com/alice/myrepo", "raw.githubusercontent.com/alice/myrepo")] #[case("https://github.com/alice/myrepo", "raw.githubusercontent.com/alice/myrepo")] + #[case("http://github.com/alice/myrepo", "raw.githubusercontent.com/alice/myrepo")] #[case("example.com", "example.com")] + #[case("https://example.com", "example.com")] + #[case("raw.githubusercontent.com/alice/repo", "raw.githubusercontent.com/alice/repo")] fn test_normalize_allowed_domain(#[case] input: &str, #[case] expected: &str) { assert_eq!(normalize_allowed_domain(input), expected); } + proptest! { + #[test] + fn prop_normalize_allowed_domain_no_scheme(domain in "[a-z0-9._/-]{2,30}") { + let normalized = normalize_allowed_domain(&domain); + prop_assert!(!normalized.starts_with("https://")); + prop_assert!(!normalized.starts_with("http://")); + } + + #[test] + fn prop_normalize_strips_https_scheme(path in "[a-z0-9._/-]{2,30}") { + let input = format!("https://{}", path); + let normalized = normalize_allowed_domain(&input); + prop_assert!(!normalized.starts_with("https://")); + } + } + #[rstest] // default domain always allowed #[case(vec![], "https://raw.githubusercontent.com/harehare/lisp/HEAD/lisp.mq", true)] @@ -225,16 +362,102 @@ mod tests { #[case(vec!["example.com".to_string()], "https://other.com/foo.mq", false)] // prefix-bypass prevention #[case(vec!["example.com".to_string()], "https://example.com.evil.com/foo.mq", false)] + // multiple allowed domains + #[case(vec!["a.com".to_string(), "b.com".to_string()], "https://a.com/x.mq", true)] + #[case(vec!["a.com".to_string(), "b.com".to_string()], "https://b.com/x.mq", true)] + #[case(vec!["a.com".to_string(), "b.com".to_string()], "https://c.com/x.mq", false)] fn test_is_allowed_url(#[case] allowed: Vec, #[case] url: &str, #[case] expected: bool) { assert_eq!(is_allowed_url(url, &allowed), expected); } + proptest! { + #[test] + fn prop_default_domain_always_allowed(path in "[a-z0-9/_.-]{1,40}") { + let url = format!("https://raw.githubusercontent.com/harehare/{}", path); + // Always allowed regardless of the allowlist. + prop_assert!(is_allowed_url(&url, &[])); + } + + #[test] + fn prop_arbitrary_domain_blocked_by_empty_allowlist( + host in "[a-z][a-z0-9-]{2,10}\\.[a-z]{2,4}", + path in "[a-z0-9/_.-]{1,20}", + ) { + prop_assume!(host != "raw.githubusercontent.com"); + let url = format!("https://{}/{}", host, path); + prop_assert!(!is_allowed_url(&url, &[])); + } + + #[test] + fn prop_own_domain_allowed_when_listed( + host in "[a-z][a-z0-9-]{2,10}\\.[a-z]{2,4}", + path in "[a-z0-9/_.-]{1,20}", + ) { + let url = format!("https://{}/{}", host, path); + let allowed = vec![host.clone()]; + prop_assert!(is_allowed_url(&url, &allowed)); + } + + #[test] + fn prop_prefix_attack_blocked( + host in "[a-z][a-z0-9-]{2,10}\\.[a-z]{2,4}", + path in "[a-z0-9/_.-]{1,20}", + ) { + // "example.com.evil.com" must not match "example.com". + let allowed = vec![host.clone()]; + let attacker_url = format!("https://{}.evil.com/{}", host, path); + prop_assert!(!is_allowed_url(&attacker_url, &allowed)); + } + } + #[rstest] #[case("https://raw.githubusercontent.com/alice/mymod/v0.1.0/mymod.mq", true)] #[case("https://raw.githubusercontent.com/alice/mymod/HEAD/mymod.mq", false)] #[case("https://raw.githubusercontent.com/alice/mymod/main/mymod.mq", false)] + #[case("https://raw.githubusercontent.com/alice/mymod/master/mymod.mq", false)] #[case("https://example.com/foo.mq", false)] + #[case("https://raw.githubusercontent.com/a/b/feature-branch/f.mq", true)] fn test_is_versioned_url(#[case] url: &str, #[case] expected: bool) { assert_eq!(is_versioned_url(url), expected); } + + proptest! { + #[test] + fn prop_versioned_tag_is_immutable( + owner in "[a-z][a-z0-9-]{0,10}", + repo in "[a-z][a-z0-9-]{0,10}", + version in "v[0-9]\\.[0-9]\\.[0-9]", + ) { + let url = format!( + "https://raw.githubusercontent.com/{}/{}/{}/mod.mq", + owner, repo, version + ); + prop_assert!(is_versioned_url(&url)); + } + + #[test] + fn prop_mutable_refs_are_not_versioned( + owner in "[a-z][a-z0-9-]{0,10}", + repo in "[a-z][a-z0-9-]{0,10}", + ref_ in prop::sample::select(vec!["HEAD", "main", "master"]), + ) { + let url = format!( + "https://raw.githubusercontent.com/{}/{}/{}/mod.mq", + owner, repo, ref_ + ); + prop_assert!(!is_versioned_url(&url)); + } + } + + #[rstest] + #[case("example.com/foo", "example.com", true)] + #[case("example.com?q=1", "example.com", true)] + #[case("example.com#anchor", "example.com", true)] + #[case("example.com:8080/foo", "example.com", true)] + #[case("example.com", "example.com", true)] + #[case("example.com.evil.com/foo", "example.com", false)] + #[case("other.com/foo", "example.com", false)] + fn test_prefix_matches(#[case] url_without_scheme: &str, #[case] domain: &str, #[case] expected: bool) { + assert_eq!(prefix_matches(url_without_scheme, domain), expected); + } } From e1a08f51a20824a5496a8cdc01bbdb3b9404507d Mon Sep 17 00:00:00 2001 From: harehare Date: Tue, 16 Jun 2026 23:28:11 +0900 Subject: [PATCH 5/8] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor(wasm):=20move?= =?UTF-8?q?=20OPFS=20HTTP=20cache=20logic=20into=20WasmFetcher?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/mq-wasm/src/script.rs | 123 ++++++++++++++++++++++------------- 1 file changed, 78 insertions(+), 45 deletions(-) diff --git a/crates/mq-wasm/src/script.rs b/crates/mq-wasm/src/script.rs index 770a5cfc2..505f4c0da 100644 --- a/crates/mq-wasm/src/script.rs +++ b/crates/mq-wasm/src/script.rs @@ -231,12 +231,59 @@ impl From for mq_markdown::ConversionOptions { /// Sync HTTP fetcher for WASM that reads from a pre-populated in-memory cache. /// -/// Content is inserted by [`WasmModuleResolver::preload_http_modules`] (async, keyed by raw HTTPS URL) +/// Content is inserted by [`WasmFetcher::preload_url`] (async, keyed by raw HTTPS URL) /// and then read synchronously by [`mq_lang::ModuleResolver::resolve`]. +/// +/// When the `opfs` feature is enabled and an OPFS root handle is set, `preload_url` also +/// maintains a persistent on-disk cache with SHA-256 sidecar files for tamper detection. +/// HTTP imports are blocked entirely when OPFS is compiled in but unavailable at runtime. #[derive(Debug, Clone, Default)] struct WasmFetcher { /// Keyed by the normalized raw HTTPS URL (e.g. `https://raw.githubusercontent.com/...`). cache: Rc>>, + #[cfg(feature = "opfs")] + /// OPFS root handle shared with `WasmModuleResolver`. `None` means OPFS is unavailable. + root_dir: Rc>>, +} + +impl WasmFetcher { + #[cfg(feature = "opfs")] + fn is_opfs_available(&self) -> bool { + self.root_dir.borrow().is_some() + } + + /// Ensures `fetch_url` is in the in-memory cache, using OPFS as a persistent backing store. + /// + /// - If the URL is already in the memory cache, returns immediately. + /// - Otherwise checks the OPFS `http_cache/{subdir}` directory for a cached copy with a valid + /// SHA-256 sidecar. On a hit the content is loaded into memory. + /// - On an OPFS miss the URL is fetched from the network, written to OPFS, then cached in memory. + #[cfg(feature = "opfs")] + async fn preload_url(&self, fetch_url: &str) { + if self.cache.borrow().contains_key(fetch_url) { + return; + } + + let root = self.root_dir.borrow().clone(); + let Some(ref r) = root else { return }; + + let subdir = if mq_lang::http_import::is_versioned_url(fetch_url) { + "versioned" + } else { + "mutable" + }; + let stem = cache_file_stem(fetch_url); + + if let Some(content) = try_read_opfs_http_cache(r, subdir, &stem).await { + self.cache.borrow_mut().insert(fetch_url.to_string(), content); + return; + } + + if let Ok(content) = fetch_text(fetch_url).await { + write_opfs_http_cache(r, subdir, &stem, &content).await; + self.cache.borrow_mut().insert(fetch_url.to_string(), content); + } + } } impl mq_lang::HttpFetcher for WasmFetcher { @@ -253,32 +300,38 @@ impl mq_lang::HttpFetcher for WasmFetcher { pub struct WasmModuleResolver { /// HTTP resolver: handles URL normalization, domain allow-list, and delegates fetch to WasmFetcher. http_resolver: Rc>>, - /// Shared reference into WasmFetcher's cache for direct insertion during async preload. - fetcher_cache: Rc>>, + /// Direct handle to the WasmFetcher; shares the same Rc data as the clone held by `http_resolver`. + fetcher: WasmFetcher, #[cfg(feature = "opfs")] - /// Cache of preloaded module contents, keyed by module name + /// Cache of preloaded local `.mq` module contents (from OPFS), keyed by module name. cache: Rc>>, #[cfg(feature = "opfs")] - /// Root directory handle for OPFS access + /// OPFS root handle shared with `fetcher.root_dir`. root_dir: Rc>>, #[cfg(feature = "opfs")] - /// Flag indicating whether OPFS is available + /// Whether OPFS was successfully initialized. is_available: Rc>, } impl Default for WasmModuleResolver { fn default() -> Self { - let fetcher_cache: Rc>> = Rc::new(RefCell::new(HashMap::new())); + #[cfg(feature = "opfs")] + let root_dir: Rc>> = Rc::new(RefCell::new(None)); + let fetcher = WasmFetcher { - cache: Rc::clone(&fetcher_cache), + cache: Rc::new(RefCell::new(HashMap::new())), + #[cfg(feature = "opfs")] + root_dir: Rc::clone(&root_dir), }; + let http_resolver = mq_lang::HttpModuleResolver::new(vec![], fetcher.clone()); + Self { - http_resolver: Rc::new(RefCell::new(mq_lang::HttpModuleResolver::new(vec![], fetcher))), - fetcher_cache, + http_resolver: Rc::new(RefCell::new(http_resolver)), + fetcher, #[cfg(feature = "opfs")] cache: Rc::new(RefCell::new(HashMap::new())), #[cfg(feature = "opfs")] - root_dir: Rc::new(RefCell::new(None)), + root_dir, #[cfg(feature = "opfs")] is_available: Rc::new(RefCell::new(false)), } @@ -397,18 +450,14 @@ impl WasmModuleResolver { /// Only imports written in the user's own code are resolved; HTTP imports inside /// fetched modules are intentionally ignored. /// - /// Caching behaviour: - /// - Versioned URLs (`@v1.0`) are cached persistently in OPFS `http_cache/versioned/`. - /// - Mutable URLs (HEAD/branch) are cached in OPFS `http_cache/mutable/`. - /// - Each cached file has a SHA-256 sidecar for tamper detection. - /// - Results are also kept in `http_cache` for the lifetime of this session. + /// When the `opfs` feature is enabled, HTTP imports require OPFS to be available — + /// this method returns immediately (without fetching) if OPFS is unavailable. + /// Versioned URLs (`@v1.0`) are persisted in `http_cache/versioned/` and mutable URLs + /// in `http_cache/mutable/`, each with a SHA-256 sidecar for tamper detection. pub async fn preload_http_modules(&self, code: &str) { - #[cfg(feature = "opfs")] - let root = self.root_dir.borrow().clone(); - // HTTP import requires OPFS for caching; skip if OPFS is unavailable. #[cfg(feature = "opfs")] - if root.is_none() { + if !self.fetcher.is_opfs_available() { return; } @@ -424,37 +473,21 @@ impl WasmModuleResolver { continue; }; - if self.fetcher_cache.borrow().contains_key(&fetch_url) { - continue; - } - if !self.http_resolver.borrow().is_allowed_domain(&fetch_url) { continue; } - let subdir = if mq_lang::http_import::is_versioned_url(&fetch_url) { - "versioned" - } else { - "mutable" - }; - let stem = cache_file_stem(&fetch_url); - - // Fast path: OPFS cache hit #[cfg(feature = "opfs")] - if let Some(ref r) = root - && let Some(content) = try_read_opfs_http_cache(r, subdir, &stem).await - { - self.fetcher_cache.borrow_mut().insert(fetch_url, content); - continue; - } + self.fetcher.preload_url(&fetch_url).await; - // Slow path: fetch from network - if let Ok(content) = fetch_text(&fetch_url).await { - #[cfg(feature = "opfs")] - if let Some(ref r) = root { - write_opfs_http_cache(r, subdir, &stem, &content).await; + #[cfg(not(feature = "opfs"))] + { + if self.fetcher.cache.borrow().contains_key(&fetch_url) { + continue; + } + if let Ok(content) = fetch_text(&fetch_url).await { + self.fetcher.cache.borrow_mut().insert(fetch_url, content); } - self.fetcher_cache.borrow_mut().insert(fetch_url, content); } } } @@ -484,7 +517,7 @@ impl mq_lang::ModuleResolver for WasmModuleResolver { if is_http { #[cfg(feature = "opfs")] - if self.root_dir.borrow().is_none() { + if !self.fetcher.is_opfs_available() { return Err(mq_lang::ModuleError::IOError(std::borrow::Cow::Owned(format!( "HTTP import of '{}' is not available: OPFS is not supported in this environment.", module_name From 1165896d86b71817048e9e211e616e2fb26b5067 Mon Sep 17 00:00:00 2001 From: harehare Date: Wed, 17 Jun 2026 21:08:54 +0900 Subject: [PATCH 6/8] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor(wasm):=20load?= =?UTF-8?q?=20OPFS=20modules=20on=20demand=20instead=20of=20eagerly?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/mq-wasm/src/script.rs | 783 ++++++++++++++++++++++++++++++++--- 1 file changed, 721 insertions(+), 62 deletions(-) diff --git a/crates/mq-wasm/src/script.rs b/crates/mq-wasm/src/script.rs index 505f4c0da..bae2d20cf 100644 --- a/crates/mq-wasm/src/script.rs +++ b/crates/mq-wasm/src/script.rs @@ -4,9 +4,7 @@ use std::{cell::RefCell, collections::HashMap, rc::Rc, str::FromStr}; use wasm_bindgen::prelude::*; #[cfg(feature = "opfs")] -use futures::StreamExt; -#[cfg(feature = "opfs")] -use opfs::{DirectoryHandle, FileHandle}; +use opfs::DirectoryHandle; #[wasm_bindgen(typescript_custom_section)] const TS_CUSTOM_SECTION: &'static str = r#" @@ -370,67 +368,66 @@ impl WasmModuleResolver { } } - /// Preloads all `.mq` modules from OPFS into the cache + /// Loads and caches only the `.mq` modules that `code` actually imports (directly or + /// transitively through other local modules). /// - /// This method scans the OPFS root directory for all `.mq` files and loads them into cache. - /// Module names are stored without the `.mq` extension (e.g., `csv.mq` becomes `csv`). + /// Only modules reachable from the imports in `code` are read from OPFS, so queries that + /// use a small subset of the available modules pay only for what they need. Cycles + /// (e.g. A imports B, B imports A) are handled safely via a visited set. /// /// If OPFS is not available, this method returns immediately without error. - pub async fn preload_modules(&self) { + pub async fn preload_modules(&self, code: &str) { #[cfg(feature = "opfs")] { - // Skip if OPFS is not available if !*self.is_available.borrow() { return; } - let root = match self.root_dir.borrow().as_ref() { - Some(r) => r.clone(), - None => return, // Should not happen if is_available is true, but be defensive + let root = match self.root_dir.borrow().clone() { + Some(r) => r, + None => return, }; - let mut entries = match root.entries().await { - Ok(e) => e, - Err(_) => return, // Failed to get directory entries - }; + let mut visited: std::collections::HashSet = std::collections::HashSet::new(); + let mut queue: std::collections::VecDeque = + extract_local_import_names(code).into_iter().collect(); - while let Some(result) = entries.next().await { - let (name, entry) = match result { - Ok(e) => e, - Err(_) => continue, // Skip entries that fail to read + while let Some(name) = queue.pop_front() { + if visited.contains(&name) { + continue; + } + visited.insert(name.clone()); + + let content = if let Some(cached) = self.cache.borrow().get(&name).cloned() { + cached + } else if let Some(c) = self.load_module_from_opfs(&name, &root).await { + self.cache.borrow_mut().insert(name.clone(), c.clone()); + c + } else { + continue; }; - match entry { - opfs::DirectoryEntry::File(file_handle) => { - // Only process .mq files - if !name.ends_with(".mq") { - continue; - } - - // Read file contents - let data = match file_handle.read().await { - Ok(d) => d, - Err(_) => continue, // Skip files that fail to read - }; - - let contents = match String::from_utf8(data) { - Ok(c) => c, - Err(_) => continue, // Skip files that are not valid UTF-8 - }; - - // Store with module name (without .mq extension) - let module_name = name.strip_suffix(".mq").unwrap_or(&name); - self.cache.borrow_mut().insert(module_name.to_string(), contents); - } - opfs::DirectoryEntry::Directory(_) => { - // Skip directories for now - continue; + for dep in extract_local_import_names(&content) { + if !visited.contains(&dep) { + queue.push_back(dep); } } } } } + /// Reads `{name}.mq` from the OPFS root and returns its content, or `None` on any error. + #[cfg(feature = "opfs")] + async fn load_module_from_opfs(&self, name: &str, root: &opfs::persistent::DirectoryHandle) -> Option { + use opfs::{DirectoryHandle as _, FileHandle as _}; + let file_handle = root + .get_file_handle_with_options(&format!("{}.mq", name), &opfs::GetFileHandleOptions { create: false }) + .await + .ok()?; + let data = file_handle.read().await.ok()?; + String::from_utf8(data).ok() + } + /// Manually adds a module to the cache /// /// This is useful for injecting module contents without using OPFS @@ -603,10 +600,10 @@ pub async fn run(code: &str, content: &str, options: JsValue) -> Result Vec { + let token_arena = mq_lang::Shared::new(mq_lang::SharedCell::new(mq_lang::Arena::new(1024))); + let Ok(program) = mq_lang::parse(code, token_arena) else { + return vec![]; + }; + + program + .iter() + .filter_map(|node| { + let path = match &*node.expr { + mq_lang::AstExpr::Import(mq_lang::AstLiteral::String(p)) => p, + mq_lang::AstExpr::Include(mq_lang::AstLiteral::String(p)) => p, + _ => return None, + }; + if !mq_lang::http_import::is_remote_url(path) && !mq_lang::http_import::is_github_url(path) { + Some(path.clone()) + } else { + None + } + }) + .collect() +} + /// Parses `code` and returns all import/include paths that look like HTTP or GitHub URLs. fn extract_http_import_urls(code: &str) -> Vec { let token_arena = mq_lang::Shared::new(mq_lang::SharedCell::new(mq_lang::Arena::new(1024))); @@ -1366,20 +1387,17 @@ mod tests { writer.close().await.expect("Failed to close writer"); } - // Preload modules from OPFS - resolver.preload_modules().await; - - // Verify the module was loaded into cache - let resolved_content = - mq_lang::ModuleResolver::resolve(&resolver, "test_module").expect("Module should be found in cache"); - assert_eq!(resolved_content, module_content); - - // Test using the imported module in code execution let code = r#" let tm = import "test_module" | tm::upcase_exclaim() "#; + resolver.preload_modules(code).await; + + let resolved_content = + mq_lang::ModuleResolver::resolve(&resolver, "test_module").expect("Module should be found in cache"); + assert_eq!(resolved_content, module_content); + let mut engine = mq_lang::Engine::new(resolver.clone()); engine.load_builtin_module(); @@ -1444,19 +1462,17 @@ mod tests { .unwrap_or_else(|_| panic!("Failed to close writer for {}", file_name)); } - // Preload all modules - resolver.preload_modules().await; - - // Verify all modules are loaded - assert!(mq_lang::ModuleResolver::resolve(&resolver, "math").is_ok()); - assert!(mq_lang::ModuleResolver::resolve(&resolver, "string").is_ok()); - - // Test using multiple imported modules let code = r#" + import "math" import "string" | string::greet("World") "#; + resolver.preload_modules(code).await; + + assert!(mq_lang::ModuleResolver::resolve(&resolver, "math").is_ok()); + assert!(mq_lang::ModuleResolver::resolve(&resolver, "string").is_ok()); + let mut engine = mq_lang::Engine::new(resolver.clone()); engine.load_builtin_module(); @@ -1549,6 +1565,649 @@ mod tests { assert!(urls.is_empty()); } + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_extract_local_import_names_import() { + let code = r#"import "mymod""#; + let names = extract_local_import_names(code); + assert_eq!(names, vec!["mymod"]); + } + + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_extract_local_import_names_include() { + let code = r#"include "utils""#; + let names = extract_local_import_names(code); + assert_eq!(names, vec!["utils"]); + } + + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_extract_local_import_names_excludes_urls() { + let code = r#" + import "local_mod" + import "https://example.com/foo.mq" + include "github.com/alice/mymod" + "#; + let names = extract_local_import_names(code); + assert_eq!(names, vec!["local_mod"]); + } + + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_extract_local_import_names_empty_code() { + assert!(extract_local_import_names("").is_empty()); + } + + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_extract_local_import_names_invalid_syntax_returns_empty() { + assert!(extract_local_import_names("import =>").is_empty()); + } + + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_extract_local_import_names_multiple_local() { + let code = r#" + import "modA" + include "modB" + import "modC" + "#; + let names = extract_local_import_names(code); + assert_eq!(names.len(), 3); + assert!(names.contains(&"modA".to_string())); + assert!(names.contains(&"modB".to_string())); + assert!(names.contains(&"modC".to_string())); + } + + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_extract_local_import_names_only_urls_returns_empty() { + let code = r#" + import "https://example.com/foo.mq" + include "github.com/alice/mymod" + "#; + assert!(extract_local_import_names(code).is_empty()); + } + + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_extract_local_import_names_no_imports_in_expression_code() { + // expressions without any import/include + assert!(extract_local_import_names("upcase() | trim()").is_empty()); + } + + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_extract_local_import_names_standard_module_name_treated_as_local() { + // standard module names (csv, json) are syntactically local; resolve() handles them + let names = extract_local_import_names(r#"import "csv""#); + assert_eq!(names, vec!["csv"]); + } + + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_preload_modules_without_opfs_available() { + // do NOT call initialize() → OPFS stays unavailable + let resolver = WasmModuleResolver::new(); + resolver.preload_modules(r#"import "mymod""#).await; + // OPFS unavailable, so nothing should be cached + assert!(mq_lang::ModuleResolver::resolve(&resolver, "mymod").is_err()); + } + + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_preload_modules_empty_code_loads_nothing() { + let resolver = WasmModuleResolver::new(); + resolver.initialize().await; + resolver.preload_modules("").await; + assert!(mq_lang::ModuleResolver::resolve(&resolver, "anything").is_err()); + } + + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_preload_modules_no_imports_in_code_loads_nothing() { + let resolver = WasmModuleResolver::new(); + resolver.initialize().await; + resolver.preload_modules("upcase() | trim()").await; + assert!(mq_lang::ModuleResolver::resolve(&resolver, "anything").is_err()); + } + + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_preload_modules_uses_already_cached_module() { + let resolver = WasmModuleResolver::new(); + resolver.initialize().await; + // Pre-populate cache directly — should survive without touching OPFS + resolver.add_module("pre_cached", "def pre(): 1;".to_string()); + resolver.preload_modules(r#"import "pre_cached""#).await; + let content = mq_lang::ModuleResolver::resolve(&resolver, "pre_cached").unwrap(); + assert_eq!(content, "def pre(): 1;"); + } + + /// Writes `content` to `{name}` under the OPFS root. Panics on any failure. + #[cfg(feature = "opfs")] + async fn write_opfs_file(root: &opfs::persistent::DirectoryHandle, name: &str, content: &str) { + use opfs::{DirectoryHandle as _, FileHandle as _, WritableFileStream as _}; + let mut fh = root + .get_file_handle_with_options(name, &opfs::GetFileHandleOptions { create: true }) + .await + .expect("get_file_handle"); + let mut w = fh + .create_writable_with_options(&opfs::CreateWritableOptions { keep_existing_data: false }) + .await + .expect("create_writable"); + w.write_at_cursor_pos(content.as_bytes()).await.expect("write"); + w.close().await.expect("close"); + } + + #[cfg(feature = "opfs")] + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_preload_modules_loads_imported_module_from_opfs() { + let resolver = WasmModuleResolver::new(); + resolver.initialize().await; + if !*resolver.is_available.borrow() { + return; + } + let root = opfs::persistent::app_specific_dir().await.unwrap(); + write_opfs_file(&root, "pll_single.mq", "def hello(): \"hello\";").await; + + resolver.preload_modules(r#"import "pll_single""#).await; + + let content = mq_lang::ModuleResolver::resolve(&resolver, "pll_single").unwrap(); + assert_eq!(content, "def hello(): \"hello\";"); + } + + #[cfg(feature = "opfs")] + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_preload_modules_does_not_load_nonimported_module() { + let resolver = WasmModuleResolver::new(); + resolver.initialize().await; + if !*resolver.is_available.borrow() { + return; + } + let root = opfs::persistent::app_specific_dir().await.unwrap(); + write_opfs_file(&root, "pll_wanted.mq", "def wanted(): 1;").await; + write_opfs_file(&root, "pll_unwanted.mq", "def unwanted(): 2;").await; + + // only pll_wanted is in the import list + resolver.preload_modules(r#"import "pll_wanted""#).await; + + assert!(mq_lang::ModuleResolver::resolve(&resolver, "pll_wanted").is_ok()); + assert!(mq_lang::ModuleResolver::resolve(&resolver, "pll_unwanted").is_err()); + } + + #[cfg(feature = "opfs")] + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_preload_modules_loads_multiple_direct_imports() { + let resolver = WasmModuleResolver::new(); + resolver.initialize().await; + if !*resolver.is_available.borrow() { + return; + } + let root = opfs::persistent::app_specific_dir().await.unwrap(); + write_opfs_file(&root, "pll_multi_a.mq", "def fa(x): x;").await; + write_opfs_file(&root, "pll_multi_b.mq", "def fb(x): x;").await; + + let code = r#"import "pll_multi_a" import "pll_multi_b""#; + resolver.preload_modules(code).await; + + assert!(mq_lang::ModuleResolver::resolve(&resolver, "pll_multi_a").is_ok()); + assert!(mq_lang::ModuleResolver::resolve(&resolver, "pll_multi_b").is_ok()); + } + + #[cfg(feature = "opfs")] + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_preload_modules_resolves_transitive_dependencies() { + let resolver = WasmModuleResolver::new(); + resolver.initialize().await; + if !*resolver.is_available.borrow() { + return; + } + let root = opfs::persistent::app_specific_dir().await.unwrap(); + // pll_trans_a imports pll_trans_b + write_opfs_file(&root, "pll_trans_a.mq", "import \"pll_trans_b\"\ndef fa(x): x;").await; + write_opfs_file(&root, "pll_trans_b.mq", "def fb(x): x;").await; + + // user code only imports pll_trans_a + resolver.preload_modules(r#"import "pll_trans_a""#).await; + + // pll_trans_b should be loaded transitively + assert!(mq_lang::ModuleResolver::resolve(&resolver, "pll_trans_a").is_ok()); + assert!(mq_lang::ModuleResolver::resolve(&resolver, "pll_trans_b").is_ok()); + } + + #[cfg(feature = "opfs")] + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_preload_modules_handles_circular_dependencies() { + let resolver = WasmModuleResolver::new(); + resolver.initialize().await; + if !*resolver.is_available.borrow() { + return; + } + let root = opfs::persistent::app_specific_dir().await.unwrap(); + // pll_circ_a → pll_circ_b → pll_circ_a (cycle) + write_opfs_file(&root, "pll_circ_a.mq", "import \"pll_circ_b\"\ndef fca(x): x;").await; + write_opfs_file(&root, "pll_circ_b.mq", "import \"pll_circ_a\"\ndef fcb(x): x;").await; + + // must terminate without infinite loop + resolver.preload_modules(r#"import "pll_circ_a""#).await; + + assert!(mq_lang::ModuleResolver::resolve(&resolver, "pll_circ_a").is_ok()); + assert!(mq_lang::ModuleResolver::resolve(&resolver, "pll_circ_b").is_ok()); + } + + #[cfg(feature = "opfs")] + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_preload_modules_missing_module_skipped_gracefully() { + let resolver = WasmModuleResolver::new(); + resolver.initialize().await; + if !*resolver.is_available.borrow() { + return; + } + // "pll_ghost" does not exist in OPFS + resolver.preload_modules(r#"import "pll_ghost""#).await; + // should not panic; module stays unresolvable + assert!(mq_lang::ModuleResolver::resolve(&resolver, "pll_ghost").is_err()); + } + + #[cfg(feature = "opfs")] + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_preload_modules_duplicate_import_loaded_once() { + let resolver = WasmModuleResolver::new(); + resolver.initialize().await; + if !*resolver.is_available.borrow() { + return; + } + let root = opfs::persistent::app_specific_dir().await.unwrap(); + write_opfs_file(&root, "pll_dup.mq", "def fdup(x): x;").await; + + // same module listed twice in imports + let code = r#"import "pll_dup" import "pll_dup""#; + resolver.preload_modules(code).await; + + // resolve must succeed and content is correct + let content = mq_lang::ModuleResolver::resolve(&resolver, "pll_dup").unwrap(); + assert_eq!(content, "def fdup(x): x;"); + } + + #[cfg(feature = "opfs")] + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_preload_modules_cached_content_not_overwritten_from_opfs() { + let resolver = WasmModuleResolver::new(); + resolver.initialize().await; + if !*resolver.is_available.borrow() { + return; + } + let root = opfs::persistent::app_specific_dir().await.unwrap(); + // write one version to OPFS + write_opfs_file(&root, "pll_override.mq", "def opfs_version(): 2;").await; + // pre-populate cache with a different version + resolver.add_module("pll_override", "def cache_version(): 1;".to_string()); + + resolver.preload_modules(r#"import "pll_override""#).await; + + // cache takes precedence; OPFS file should not overwrite it + let content = mq_lang::ModuleResolver::resolve(&resolver, "pll_override").unwrap(); + assert_eq!(content, "def cache_version(): 1;"); + } + + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_canonical_name_https_url_returns_module_name() { + let resolver = WasmModuleResolver::new(); + assert_eq!( + mq_lang::ModuleResolver::canonical_name(&resolver, "https://example.com/mymod.mq"), + "mymod" + ); + } + + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_canonical_name_github_shorthand_returns_module_name() { + let resolver = WasmModuleResolver::new(); + assert_eq!( + mq_lang::ModuleResolver::canonical_name(&resolver, "github.com/alice/mymod"), + "mymod" + ); + } + + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_canonical_name_github_versioned_strips_version_and_extension() { + let resolver = WasmModuleResolver::new(); + assert_eq!( + mq_lang::ModuleResolver::canonical_name(&resolver, "github.com/alice/mymod.mq@v1.0"), + "mymod" + ); + } + + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_canonical_name_local_module_unchanged() { + let resolver = WasmModuleResolver::new(); + assert_eq!(mq_lang::ModuleResolver::canonical_name(&resolver, "local_mod"), "local_mod"); + } + + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_get_path_harehare_github_url_expands_to_raw_url() { + let resolver = WasmModuleResolver::new(); + let path = + mq_lang::ModuleResolver::get_path(&resolver, "github.com/harehare/mymod").unwrap(); + assert!(path.starts_with("https://raw.githubusercontent.com/harehare/mymod/")); + } + + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_get_path_non_allowed_github_url_falls_back_to_module_name() { + // github.com/other-user/... is not in the default allowlist, so to_fetch_url returns + // IOError; WasmModuleResolver::get_path falls back to the module name itself. + let resolver = WasmModuleResolver::new(); + let path = mq_lang::ModuleResolver::get_path(&resolver, "github.com/other/mymod").unwrap(); + assert_eq!(path, "github.com/other/mymod"); + } + + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_get_path_local_module_returns_name() { + let resolver = WasmModuleResolver::new(); + let path = mq_lang::ModuleResolver::get_path(&resolver, "my_local_mod").unwrap(); + assert_eq!(path, "my_local_mod"); + } + + #[cfg(feature = "opfs")] + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_resolve_https_url_without_opfs_returns_io_error() { + let resolver = WasmModuleResolver::new(); + // initialize() not called → OPFS unavailable + let result = mq_lang::ModuleResolver::resolve( + &resolver, + "https://raw.githubusercontent.com/harehare/mod/HEAD/mod.mq", + ); + assert!( + matches!(result, Err(mq_lang::ModuleError::IOError(_))), + "expected IOError when OPFS is unavailable, got: {:?}", + result + ); + } + + #[cfg(feature = "opfs")] + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_resolve_github_url_without_opfs_returns_io_error() { + let resolver = WasmModuleResolver::new(); + let result = mq_lang::ModuleResolver::resolve(&resolver, "github.com/harehare/mymod"); + assert!(matches!(result, Err(mq_lang::ModuleError::IOError(_)))); + } + + #[cfg(feature = "opfs")] + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_resolve_non_allowlisted_domain_returns_io_error() { + let resolver = WasmModuleResolver::new(); + resolver.initialize().await; + if !*resolver.is_available.borrow() { + return; + } + // example.com is not in the default allowlist + let result = mq_lang::ModuleResolver::resolve(&resolver, "https://example.com/mod.mq"); + assert!( + matches!(result, Err(mq_lang::ModuleError::IOError(_))), + "expected IOError for disallowed domain, got: {:?}", + result + ); + } + + #[cfg(feature = "opfs")] + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_resolve_default_allowed_domain_not_in_cache_returns_not_found() { + let resolver = WasmModuleResolver::new(); + resolver.initialize().await; + if !*resolver.is_available.borrow() { + return; + } + // raw.githubusercontent.com/harehare is always allowed; fetcher cache is empty + let result = mq_lang::ModuleResolver::resolve( + &resolver, + "https://raw.githubusercontent.com/harehare/test/HEAD/test.mq", + ); + assert!( + matches!(result, Err(mq_lang::ModuleError::NotFound(_))), + "expected NotFound when URL is allowed but not cached, got: {:?}", + result + ); + } + + #[cfg(feature = "opfs")] + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_resolve_non_harehare_github_url_blocked_by_empty_allowlist() { + let resolver = WasmModuleResolver::new(); + resolver.initialize().await; + if !*resolver.is_available.borrow() { + return; + } + // raw.githubusercontent.com/other-user does not match the default allowed domain prefix + let result = mq_lang::ModuleResolver::resolve( + &resolver, + "https://raw.githubusercontent.com/other-user/mod/HEAD/mod.mq", + ); + assert!(matches!(result, Err(mq_lang::ModuleError::IOError(_)))); + } + + #[cfg(feature = "opfs")] + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_set_allowed_domains_changes_domain_error_to_not_found() { + let resolver = WasmModuleResolver::new(); + resolver.initialize().await; + if !*resolver.is_available.borrow() { + return; + } + + let url = "https://example.com/mod.mq"; + // before: domain blocked → IOError + let before = mq_lang::ModuleResolver::resolve(&resolver, url); + assert!( + matches!(before, Err(mq_lang::ModuleError::IOError(_))), + "expected IOError before setting domain, got: {:?}", + before + ); + + // after: domain allowed → domain check passes, fetcher cache empty → NotFound + resolver.set_allowed_domains(vec!["example.com".to_string()]); + let after = mq_lang::ModuleResolver::resolve(&resolver, url); + assert!( + matches!(after, Err(mq_lang::ModuleError::NotFound(_))), + "expected NotFound after setting domain, got: {:?}", + after + ); + } + + #[cfg(feature = "opfs")] + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_set_allowed_domains_github_shorthand_expands_correctly() { + let resolver = WasmModuleResolver::new(); + resolver.initialize().await; + if !*resolver.is_available.borrow() { + return; + } + // github.com/alice/repo shorthand should expand to raw.githubusercontent.com/alice/repo + resolver.set_allowed_domains(vec!["github.com/alice/myrepo".to_string()]); + // Now alice/myrepo is allowed; fetcher cache empty → NotFound (not IOError) + let result = mq_lang::ModuleResolver::resolve( + &resolver, + "https://raw.githubusercontent.com/alice/myrepo/HEAD/myrepo.mq", + ); + assert!( + matches!(result, Err(mq_lang::ModuleError::NotFound(_))), + "expected NotFound after allowing github.com/alice/myrepo, got: {:?}", + result + ); + // alice/other is still blocked + let blocked = mq_lang::ModuleResolver::resolve( + &resolver, + "https://raw.githubusercontent.com/alice/other/HEAD/other.mq", + ); + assert!(matches!(blocked, Err(mq_lang::ModuleError::IOError(_)))); + } + + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_preload_http_modules_empty_code_is_noop() { + let resolver = WasmModuleResolver::new(); + resolver.initialize().await; + resolver.preload_http_modules("").await; + // nothing cached; http resolve of any URL still fails + #[cfg(feature = "opfs")] + assert!(mq_lang::ModuleResolver::resolve( + &resolver, + "https://raw.githubusercontent.com/harehare/test/HEAD/test.mq" + ) + .is_err()); + } + + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_preload_http_modules_local_only_imports_do_not_affect_http_cache() { + let resolver = WasmModuleResolver::new(); + resolver.initialize().await; + resolver.preload_http_modules(r#"import "local_mod""#).await; + // local_mod is not an HTTP URL → skipped; HTTP resolve still fails + #[cfg(feature = "opfs")] + assert!(mq_lang::ModuleResolver::resolve( + &resolver, + "https://raw.githubusercontent.com/harehare/test/HEAD/test.mq" + ) + .is_err()); + } + + #[cfg(feature = "opfs")] + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_preload_http_modules_without_opfs_is_noop() { + let resolver = WasmModuleResolver::new(); + // initialize() not called → OPFS unavailable → preload_http_modules returns early + resolver + .preload_http_modules( + r#"import "https://raw.githubusercontent.com/harehare/test/HEAD/test.mq""#, + ) + .await; + // resolve should still fail with OPFS-unavailable error, not a domain error + let result = mq_lang::ModuleResolver::resolve( + &resolver, + "https://raw.githubusercontent.com/harehare/test/HEAD/test.mq", + ); + assert!(matches!(result, Err(mq_lang::ModuleError::IOError(_)))); + } + + #[cfg(feature = "opfs")] + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_preload_http_modules_disallowed_domain_not_cached() { + let resolver = WasmModuleResolver::new(); + resolver.initialize().await; + if !*resolver.is_available.borrow() { + return; + } + // example.com is not in the allowlist → preload_http_modules skips it + resolver + .preload_http_modules(r#"import "https://example.com/mod.mq""#) + .await; + let result = mq_lang::ModuleResolver::resolve(&resolver, "https://example.com/mod.mq"); + // still IOError (domain not allowed), not NotFound — URL was not cached + assert!(matches!(result, Err(mq_lang::ModuleError::IOError(_)))); + } + + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_clear_http_cache_succeeds_when_no_cache_exists() { + // Calling clear when there is nothing to clear should succeed silently. + let result = clear_http_cache().await; + assert!(result.is_ok()); + } + + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_clear_all_http_cache_succeeds_when_no_cache_exists() { + let result = clear_all_http_cache().await; + assert!(result.is_ok()); + } + + #[cfg(feature = "opfs")] + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_clear_http_cache_removes_mutable_subdir() { + use opfs::{DirectoryHandle as _, FileHandle as _, WritableFileStream as _}; + + let root = match opfs::persistent::app_specific_dir().await { + Ok(r) => r, + Err(_) => return, // OPFS unavailable in this environment + }; + + // Seed a mutable cache file + let cache_dir = root + .get_directory_handle_with_options(HTTP_CACHE_DIR, &opfs::GetDirectoryHandleOptions { create: true }) + .await + .unwrap(); + let mutable_dir = cache_dir + .get_directory_handle_with_options("mutable", &opfs::GetDirectoryHandleOptions { create: true }) + .await + .unwrap(); + write_opfs_file(&mutable_dir, "sentinel.mq", "test").await; + + // Clear only mutable cache + clear_http_cache().await.expect("clear_http_cache failed"); + + // mutable/ should no longer exist (or sentinel is gone) + let still_exists = cache_dir + .get_directory_handle_with_options("mutable", &opfs::GetDirectoryHandleOptions { create: false }) + .await + .is_ok(); + assert!(!still_exists, "mutable/ cache dir should have been removed"); + } + + #[cfg(feature = "opfs")] + #[allow(unused)] + #[wasm_bindgen_test] + async fn test_clear_all_http_cache_removes_entire_cache_dir() { + use opfs::DirectoryHandle as _; + + let mut root = match opfs::persistent::app_specific_dir().await { + Ok(r) => r, + Err(_) => return, + }; + + // Ensure the http_cache dir exists with something in it + let cache_dir = root + .get_directory_handle_with_options(HTTP_CACHE_DIR, &opfs::GetDirectoryHandleOptions { create: true }) + .await + .unwrap(); + let _ = cache_dir + .get_directory_handle_with_options("versioned", &opfs::GetDirectoryHandleOptions { create: true }) + .await; + + clear_all_http_cache().await.expect("clear_all_http_cache failed"); + + let still_exists = root + .get_directory_handle_with_options(HTTP_CACHE_DIR, &opfs::GetDirectoryHandleOptions { create: false }) + .await + .is_ok(); + assert!(!still_exists, "http_cache/ dir should have been fully removed"); + } + #[allow(unused)] #[wasm_bindgen_test] async fn test_html_to_markdown() { From e68377ba7be629bc63514889cbf0a12d998ac83a Mon Sep 17 00:00:00 2001 From: harehare Date: Wed, 17 Jun 2026 21:29:07 +0900 Subject: [PATCH 7/8] =?UTF-8?q?=E2=9C=85=20test(wasm):=20run=20headless=20?= =?UTF-8?q?in=20CI=20and=20fix=20broken=20module-resolver=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/mq-wasm/src/script.rs | 69 ++++++++++++++++++------------------ justfile | 2 +- 2 files changed, 36 insertions(+), 35 deletions(-) diff --git a/crates/mq-wasm/src/script.rs b/crates/mq-wasm/src/script.rs index bae2d20cf..a61e401f1 100644 --- a/crates/mq-wasm/src/script.rs +++ b/crates/mq-wasm/src/script.rs @@ -389,8 +389,7 @@ impl WasmModuleResolver { }; let mut visited: std::collections::HashSet = std::collections::HashSet::new(); - let mut queue: std::collections::VecDeque = - extract_local_import_names(code).into_iter().collect(); + let mut queue: std::collections::VecDeque = extract_local_import_names(code).into_iter().collect(); while let Some(name) = queue.pop_front() { if visited.contains(&name) { @@ -1299,10 +1298,10 @@ mod tests { let resolver = WasmModuleResolver::new(); // Manually add a module to cache - resolver.add_module("test", "def foo(x): x | upcase();".to_string()); + resolver.add_module("my_module", "def foo(x): x | upcase();".to_string()); // Should be able to resolve it - let result = mq_lang::ModuleResolver::resolve(&resolver, "test"); + let result = mq_lang::ModuleResolver::resolve(&resolver, "my_module"); #[cfg(feature = "opfs")] { assert!(result.is_ok()); @@ -1328,15 +1327,15 @@ mod tests { let resolver = WasmModuleResolver::new(); // Add a module - resolver.add_module("test", "content".to_string()); + resolver.add_module("my_module", "content".to_string()); #[cfg(feature = "opfs")] - assert!(mq_lang::ModuleResolver::resolve(&resolver, "test").is_ok()); + assert!(mq_lang::ModuleResolver::resolve(&resolver, "my_module").is_ok()); // Clear cache resolver.clear_cache(); // Should no longer be resolvable - assert!(mq_lang::ModuleResolver::resolve(&resolver, "test").is_err()); + assert!(mq_lang::ModuleResolver::resolve(&resolver, "my_module").is_err()); } #[cfg(feature = "opfs")] @@ -1388,8 +1387,8 @@ mod tests { } let code = r#" - let tm = import "test_module" - | tm::upcase_exclaim() + import "test_module" + | test_module::upcase_exclaim() "#; resolver.preload_modules(code).await; @@ -1694,7 +1693,9 @@ mod tests { .await .expect("get_file_handle"); let mut w = fh - .create_writable_with_options(&opfs::CreateWritableOptions { keep_existing_data: false }) + .create_writable_with_options(&opfs::CreateWritableOptions { + keep_existing_data: false, + }) .await .expect("create_writable"); w.write_at_cursor_pos(content.as_bytes()).await.expect("write"); @@ -1894,15 +1895,17 @@ mod tests { #[wasm_bindgen_test] async fn test_canonical_name_local_module_unchanged() { let resolver = WasmModuleResolver::new(); - assert_eq!(mq_lang::ModuleResolver::canonical_name(&resolver, "local_mod"), "local_mod"); + assert_eq!( + mq_lang::ModuleResolver::canonical_name(&resolver, "local_mod"), + "local_mod" + ); } #[allow(unused)] #[wasm_bindgen_test] async fn test_get_path_harehare_github_url_expands_to_raw_url() { let resolver = WasmModuleResolver::new(); - let path = - mq_lang::ModuleResolver::get_path(&resolver, "github.com/harehare/mymod").unwrap(); + let path = mq_lang::ModuleResolver::get_path(&resolver, "github.com/harehare/mymod").unwrap(); assert!(path.starts_with("https://raw.githubusercontent.com/harehare/mymod/")); } @@ -1930,10 +1933,8 @@ mod tests { async fn test_resolve_https_url_without_opfs_returns_io_error() { let resolver = WasmModuleResolver::new(); // initialize() not called → OPFS unavailable - let result = mq_lang::ModuleResolver::resolve( - &resolver, - "https://raw.githubusercontent.com/harehare/mod/HEAD/mod.mq", - ); + let result = + mq_lang::ModuleResolver::resolve(&resolver, "https://raw.githubusercontent.com/harehare/mod/HEAD/mod.mq"); assert!( matches!(result, Err(mq_lang::ModuleError::IOError(_))), "expected IOError when OPFS is unavailable, got: {:?}", @@ -2057,10 +2058,8 @@ mod tests { result ); // alice/other is still blocked - let blocked = mq_lang::ModuleResolver::resolve( - &resolver, - "https://raw.githubusercontent.com/alice/other/HEAD/other.mq", - ); + let blocked = + mq_lang::ModuleResolver::resolve(&resolver, "https://raw.githubusercontent.com/alice/other/HEAD/other.mq"); assert!(matches!(blocked, Err(mq_lang::ModuleError::IOError(_)))); } @@ -2072,11 +2071,13 @@ mod tests { resolver.preload_http_modules("").await; // nothing cached; http resolve of any URL still fails #[cfg(feature = "opfs")] - assert!(mq_lang::ModuleResolver::resolve( - &resolver, - "https://raw.githubusercontent.com/harehare/test/HEAD/test.mq" - ) - .is_err()); + assert!( + mq_lang::ModuleResolver::resolve( + &resolver, + "https://raw.githubusercontent.com/harehare/test/HEAD/test.mq" + ) + .is_err() + ); } #[allow(unused)] @@ -2087,11 +2088,13 @@ mod tests { resolver.preload_http_modules(r#"import "local_mod""#).await; // local_mod is not an HTTP URL → skipped; HTTP resolve still fails #[cfg(feature = "opfs")] - assert!(mq_lang::ModuleResolver::resolve( - &resolver, - "https://raw.githubusercontent.com/harehare/test/HEAD/test.mq" - ) - .is_err()); + assert!( + mq_lang::ModuleResolver::resolve( + &resolver, + "https://raw.githubusercontent.com/harehare/test/HEAD/test.mq" + ) + .is_err() + ); } #[cfg(feature = "opfs")] @@ -2101,9 +2104,7 @@ mod tests { let resolver = WasmModuleResolver::new(); // initialize() not called → OPFS unavailable → preload_http_modules returns early resolver - .preload_http_modules( - r#"import "https://raw.githubusercontent.com/harehare/test/HEAD/test.mq""#, - ) + .preload_http_modules(r#"import "https://raw.githubusercontent.com/harehare/test/HEAD/test.mq""#) .await; // resolve should still fail with OPFS-unavailable error, not a domain error let result = mq_lang::ModuleResolver::resolve( diff --git a/justfile b/justfile index 0e1ee3f6e..156682984 100644 --- a/justfile +++ b/justfile @@ -96,7 +96,7 @@ test-fuzz: # Run WebAssembly tests in Chrome [working-directory: 'crates/mq-wasm'] test-wasm: - wasm-pack test --chrome + wasm-pack test --chrome --headless # Run formatter and linter lint: From bf23af0114ba05dbb562ab927ed5368f7ec40341 Mon Sep 17 00:00:00 2001 From: harehare Date: Wed, 17 Jun 2026 21:40:01 +0900 Subject: [PATCH 8/8] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor(wasm):=20remo?= =?UTF-8?q?ve=20unused=20futures=20dependency?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 1 - crates/mq-wasm/Cargo.toml | 1 - 2 files changed, 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 83a2d486e..54e59a2cd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2703,7 +2703,6 @@ dependencies = [ name = "mq-wasm" version = "0.6.1" dependencies = [ - "futures", "itertools", "js-sys", "md5", diff --git a/crates/mq-wasm/Cargo.toml b/crates/mq-wasm/Cargo.toml index b39813104..f5a7b5c30 100644 --- a/crates/mq-wasm/Cargo.toml +++ b/crates/mq-wasm/Cargo.toml @@ -17,7 +17,6 @@ crate-type = ["cdylib"] path = "src/lib.rs" [dependencies] -futures = {workspace = true} itertools = {workspace = true} js-sys = {workspace = true} md5 = {workspace = true}