Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,15 @@ jobs:
key: ${{ runner.os }}-cargo-test-${{ hashFiles('**/Cargo.lock') }}
restore-keys: ${{ runner.os }}-cargo-test-

- name: Install wasm target for GEPA test modules
run: rustup target add wasm32-unknown-unknown

- name: Build GEPA test WASM modules
run: |
for module in gepa-replay gepa-reflective gepa-score gepa-pareto gepa-verify; do
(cd "wasm-modules/$module" && cargo build --target wasm32-unknown-unknown --release)
done

- name: cargo test --workspace
run: cargo test --workspace

Expand Down
55 changes: 55 additions & 0 deletions .proofs/001_gepa_temperagent_failure_reflection.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# 001_gepa_temperagent_failure_reflection

Date: 2026-03-23
Branch: docs/positioning-rewrite
Scope: TemperAgent sandbox provisioning failure reflection + root-cause hardening

## Problem
`sandbox_provisioner` previously swallowed TemperFS bootstrap errors and still emitted `SandboxReady` with empty IDs. This made failures look like partial success.

## Code Changes
- Hard fail on TemperFS bootstrap failure with explicit error context (`temper_api_url`, tenant, agent id).
- Added `temper_api_url` to TemperAgent state + `Configure` action.
- Updated CSDL to expose `TemperApiUrl` and `Configure.temper_api_url`.
- All TemperAgent WASM modules now resolve Temper API URL from entity `fields.temper_api_url` first, then integration config.
- Installing `temper-agent` now auto-installs `temper-fs` dependency.
- Added regression test for dependency install behavior.

## Verification
### Build/Test
- `cargo fmt --all`
- `cargo check -p temper-platform`
- `cargo test -p temper-platform os_apps::tests::test_install_temper_agent_auto_installs_temper_fs -- --nocapture` (passed)
- Built all TemperAgent WASM modules for `wasm32-unknown-unknown`.

### Live Proof (port 3015)
Tenant: `proof-fix-20260323`

Prereq: Uploaded tenant-scoped WASM modules:
- `sandbox_provisioner`
- `llm_caller`
- `tool_runner`
- `workspace_restorer`

Case A (bad API URL):
- Agent: `019d1c5a-4a43-71b0-adc9-199cb90eefab`
- Configure with `temper_api_url=http://127.0.0.1:39999`
- Provision result:
- `status=Failed`
- `error_message="TemperFS bootstrap failed at http://127.0.0.1:39999/tdata ... Ensure os-app 'temper-fs' is installed ... temper_api_url is correct."`
- `workspace_id`, `conversation_file_id`, `file_manifest_id` all empty

Case B (correct API URL):
- Agent: `019d1c5a-4f84-7d23-8c18-dfc23885e3b9`
- Configure with `temper_api_url=http://127.0.0.1:3015`
- Provision + callback result:
- `status=Failed` (intentional due `max_turns=0`)
- `error_message="turn budget exhausted (0/0)"`
- `workspace_id`, `conversation_file_id`, `file_manifest_id` all populated

Interpretation:
- Failure reflection is now explicit and truthful for TemperFS bootstrap issues.
- With correct URL, TemperFS bootstrap succeeds and IDs are present.

## Operational Caveat
If tenant-scoped WASM modules are not uploaded, integration dispatch fails with `WASM module '<name>' not found`. This is separate from TemperFS bootstrap handling.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ exclude = [
"wasm-modules/gepa-pareto",
"wasm-modules/gepa-reflective",
"wasm-modules/gepa-proposer-agent",
"wasm-modules/gepa-verify",
"crates/temper-wasm/tests/fixtures/echo-integration-src",
"os-apps/temper-agent/wasm/llm_caller",
"os-apps/temper-agent/wasm/tool_runner",
Expand Down
19 changes: 19 additions & 0 deletions crates/temper-platform/src/os_apps/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,14 @@ fn load_skill_bundle(skill_dir: &Path) -> Option<SkillBundle> {
})
}

fn os_app_dependencies(name: &str) -> &'static [&'static str] {
match name {
// TemperAgent persists conversation/files in TemperFS entities.
"temper-agent" => &["temper-fs"],
_ => &[],
}
}

/// Install an OS app into a tenant (workspace).
///
/// Reads skill files from disk, runs the verification cascade, registers
Expand All @@ -483,6 +491,17 @@ pub async fn install_os_app(
state: &PlatformState,
tenant: &str,
app_name: &str,
) -> Result<InstallResult, String> {
for dependency in os_app_dependencies(app_name) {
install_os_app_without_dependencies(state, tenant, dependency).await?;
}
install_os_app_without_dependencies(state, tenant, app_name).await
}

async fn install_os_app_without_dependencies(
state: &PlatformState,
tenant: &str,
app_name: &str,
) -> Result<InstallResult, String> {
let bundle =
get_os_app(app_name).ok_or_else(|| format!("OS app '{app_name}' not found in catalog"))?;
Expand Down
36 changes: 22 additions & 14 deletions crates/temper-platform/src/os_apps/mod_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,6 @@ fn test_list_skills_returns_catalog() {
"missing intent-discovery: {names:?}"
);

// Check entity types for known skills.
let pm = apps
.iter()
.find(|e| e.name == "project-management")
Expand Down Expand Up @@ -346,6 +345,28 @@ async fn test_install_skill_agent_orchestration_registers_entities() {
assert!(registry.get_table(&tenant, "BudgetLedger").is_some());
}

#[tokio::test]
async fn test_install_temper_agent_auto_installs_temper_fs() {
let state = PlatformState::new(None);
install_os_app(&state, "test-agent", "temper-agent")
.await
.expect("install temper-agent");
let registry = state.registry.read().unwrap();
let tenant = TenantId::new("test-agent");
for entity in [
"TemperAgent",
"Workspace",
"File",
"Directory",
"FileVersion",
] {
assert!(
registry.get_table(&tenant, entity).is_some(),
"missing {entity}"
);
}
}

#[tokio::test]
async fn test_install_skill_nonexistent_returns_error() {
let state = PlatformState::new(None);
Expand Down Expand Up @@ -501,11 +522,9 @@ async fn test_skill_install_survives_restart() {
use temper_server::registry_bootstrap::restore_registry_from_turso;
use temper_store_turso::TursoEventStore;

// Use a unique temp file DB for this test.
let db_path = format!("/tmp/temper-test-{}.db", uuid::Uuid::new_v4());
let db_url = format!("file:{db_path}");

// ── Phase A: Install into a fresh state with Turso. ─────────
let turso = TursoEventStore::new(&db_url, None).await.unwrap();
let mut state = PlatformState::new(None);
state.server.event_store = Some(Arc::new(ServerEventStore::Turso(turso)));
Expand All @@ -515,15 +534,13 @@ async fn test_skill_install_survives_restart() {
let result = result.unwrap();
assert_eq!(result.added.len(), 5);

// Verify specs are in the in-memory registry.
{
let registry = state.registry.read().unwrap();
let tenant = TenantId::new("test-ws");
assert!(registry.get_table(&tenant, "Issue").is_some());
assert!(registry.get_table(&tenant, "Project").is_some());
}

// Verify specs are persisted to Turso.
let turso_ref = state
.server
.event_store
Expand All @@ -538,17 +555,14 @@ async fn test_skill_install_survives_restart() {
"Issue spec not found in Turso"
);

// Verify installed_apps record is in Turso.
let installed = turso_ref.list_all_installed_apps().await.unwrap();
assert!(
installed.contains(&("test-ws".to_string(), "project-management".to_string())),
"installed app record not found"
);

// ── Phase B: Simulate restart — fresh state, same DB. ───────
let turso2 = TursoEventStore::new(&db_url, None).await.unwrap();
let state2 = PlatformState::new(None);
// Verify fresh registry is empty for this tenant.
{
let registry = state2.registry.read().unwrap();
let tenant = TenantId::new("test-ws");
Expand All @@ -558,9 +572,6 @@ async fn test_skill_install_survives_restart() {
);
}

// Restore from Turso (this is what build_registry does on boot).
// Fetch async data outside the lock, then assign synchronously to avoid
// holding a RwLockWriteGuard across an await point.
{
use temper_server::registry::SpecRegistry;
let mut temp_registry = SpecRegistry::new();
Expand All @@ -571,7 +582,6 @@ async fn test_skill_install_survives_restart() {
*state2.registry.write().unwrap() = temp_registry;
}

// Verify specs survived the restart.
{
let registry = state2.registry.read().unwrap();
let tenant = TenantId::new("test-ws");
Expand All @@ -582,15 +592,13 @@ async fn test_skill_install_survives_restart() {
assert!(registry.get_table(&tenant, "Label").is_some());
}

// Clean up temp DB.
let _ = std::fs::remove_file(&db_path);
let _ = std::fs::remove_file(format!("{db_path}-wal"));
let _ = std::fs::remove_file(format!("{db_path}-shm"));
}

#[test]
fn test_reload_picks_up_disk_changes() {
// Just verify reload doesn't panic and produces a valid catalog.
reload_skills();
let skills = list_skills();
assert!(
Expand Down
41 changes: 37 additions & 4 deletions crates/temper-server/src/authz/wasm_gate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ impl WasmAuthzGate for CedarWasmAuthzGate {

// Convert BTreeMap to HashMap at Cedar boundary (determinism-ok)
let hash_attrs: std::collections::HashMap<_, _> = resource_attrs.into_iter().collect(); // determinism-ok: Cedar API requires HashMap
let decision = self.engine.authorize_or_bypass(
let decision = self.engine.authorize_for_tenant_or_bypass(
&ctx.tenant,
&enriched_ctx,
"http_call",
"HttpEndpoint",
Expand Down Expand Up @@ -90,9 +91,13 @@ impl WasmAuthzGate for CedarWasmAuthzGate {

// Convert BTreeMap to HashMap at Cedar boundary (determinism-ok)
let hash_attrs: std::collections::HashMap<_, _> = resource_attrs.into_iter().collect(); // determinism-ok: Cedar API requires HashMap
let decision =
self.engine
.authorize_or_bypass(&security_ctx, "access_secret", "Secret", &hash_attrs);
let decision = self.engine.authorize_for_tenant_or_bypass(
&ctx.tenant,
&security_ctx,
"access_secret",
"Secret",
&hash_attrs,
);

match decision {
AuthzDecision::Allow => WasmAuthzDecision::Allow,
Expand Down Expand Up @@ -279,6 +284,34 @@ mod tests {
assert!(matches!(result, WasmAuthzDecision::Deny(_)));
}

#[test]
fn cedar_gate_uses_tenant_scoped_policies() {
let engine = Arc::new(AuthzEngine::empty());
let policy = r#"
permit(
principal is Agent,
action == Action::"http_call",
resource is HttpEndpoint
) when {
context.module == "stripe_charge" &&
context.domain == "api.stripe.com"
};
"#;
engine
.reload_tenant_policies("test-tenant", policy)
.expect("tenant policy should load");

let gate = CedarWasmAuthzGate::new(engine);
let ctx = test_ctx(); // tenant = test-tenant, module_name = stripe_charge
let result = gate.authorize_http_call(
"api.stripe.com",
"POST",
"https://api.stripe.com/v1/charges",
&ctx,
);
assert_eq!(result, WasmAuthzDecision::Allow);
}

#[test]
fn build_security_context_from_wasm_ctx() {
let ctx = test_ctx();
Expand Down
Loading
Loading