IBM · faloker · Jun 23, 2026 · Jun 22, 2026 · Jun 22, 2026 · Jun 22, 2026
diff --git a/internal/detonators/simrun_detonator.go b/internal/detonators/simrun_detonator.go
@@ -99,12 +99,17 @@ func (d *SimrunDetonator) Detonate() (map[string]string, error) {
 		return nil, fmt.Errorf("failed to generate execution ID: %w", err)
 	}
 
+	// Carry execution_id out on every failure path past this point so a
+	// partially-completed detonation (e.g. terraform applied, then the
+	// detonation timed out) stays correlatable in the persisted result.
+	failure := map[string]string{"execution_id": executionID}
+
 	d.setupLoggingAndTerraform(executionID)
 
 	// Get manifest and find simulation
 	simulation, err := d.resolveSimulation(ctx)
 	if err != nil {
-		return nil, err
+		return failure, err
 	}
 
 	d.logSimulationInfo(simulation)
@@ -115,7 +120,7 @@ func (d *SimrunDetonator) Detonate() (map[string]string, error) {
 	}
 	terraformOutputs, tfWorkDir, err := d.setupTerraform(ctx, executionID, simulation)
 	if err != nil {
-		return nil, err
+		return failure, err
 	}
 	if tfWorkDir != "" {
 		defer d.cleanupTerraform(ctx, executionID, tfWorkDir)
@@ -125,7 +130,7 @@ func (d *SimrunDetonator) Detonate() (map[string]string, error) {
 	d.reportPhase("detonating")
 	detonateOutput, err := d.executeSimulation(ctx, executionID, terraformOutputs)
 	if err != nil {
-		return nil, err
+		return failure, err
 	}
 
 	// Run custom cleanup if needed
@@ -217,6 +222,38 @@ func (d *SimrunDetonator) terraformEnvVars(executionID string) map[string]string
 	return vars
 }
 
+// detonationEnvVars returns the environment for the pack runner. It augments
+// the run env with AWS_REGION/AWS_DEFAULT_REGION derived from the resolved
+// aws_region parameter. Terraform pins the AWS provider to var.aws_region, but
+// the detonation resolves region via the AWS SDK default chain (ambient
+// AWS_REGION). Without aligning them, terraform creates resources in
+// var.aws_region (default us-east-1) while the detonation acts on the
+// container's ambient region — producing NotFound errors. envutil merges this
+// map over the process env, so the value here wins over any ambient AWS_REGION.
+func (d *SimrunDetonator) detonationEnvVars() map[string]string {
+	env := make(map[string]string, len(d.envVars)+2)
+	for k, v := range d.envVars {
+		env[k] = v
+	}
+	region := d.resolveAWSRegion()
+	env["AWS_REGION"] = region
+	env["AWS_DEFAULT_REGION"] = region
+	return env
+}
+
+// resolveAWSRegion resolves the AWS region the detonation should use, mirroring
+// the precedence terraformEnvVars applies to TF_VAR_aws_region: per-sim
+// scenario param > pack-level param > built-in default.
+func (d *SimrunDetonator) resolveAWSRegion() string {
+	if r, ok := d.params["aws_region"].(string); ok && r != "" {
+		return r
+	}
+	if r, ok := d.packConfig.Parameters["aws_region"].(string); ok && r != "" {
+		return r
+	}
+	return pack.DefaultAWSRegion
+}
+
 // formatTFVar renders a parameter value into the string form Terraform
 // accepts for TF_VAR_*. Map and slice values are JSON-encoded so map/list
 // typed Terraform variables receive a parseable HCL/JSON literal.
@@ -398,7 +435,7 @@ func (d *SimrunDetonator) logSimulationInfo(simulation *pack.SimulationManifest)
 
 // createExecutor creates a pack runner and executor for simulation execution
 func (d *SimrunDetonator) createExecutor(ctx context.Context, executionID string) (*executor.Executor, runner.PackRunner, error) {
-	packRunner, err := d.runnerFactory.CreateRunner(ctx, d.packConfig, d.envVars)
+	packRunner, err := d.runnerFactory.CreateRunner(ctx, d.packConfig, d.detonationEnvVars())
 	if err != nil {
 		return nil, nil, fmt.Errorf("failed to create pack runner: %w", err)
 	}

diff --git a/internal/detonators/simrun_detonator_test.go b/internal/detonators/simrun_detonator_test.go
@@ -4,8 +4,64 @@ import (
 	"testing"
 
 	"github.com/IBM/simrun/internal/config"
+	"github.com/IBM/simrun/pack"
 )
 
+// TestDetonationEnvVars_AWSRegion verifies the detonation env pins
+// AWS_REGION/AWS_DEFAULT_REGION to the same aws_region value terraform's
+// provider uses. Without this, terraform creates resources in var.aws_region
+// (default us-east-1) while the detonation SDK resolves region from the
+// container's ambient AWS_REGION, so deletes/lookups hit the wrong region and
+// fail with NotFound. Precedence mirrors terraformEnvVars: per-sim > pack-level
+// > built-in default.
+func TestDetonationEnvVars_AWSRegion(t *testing.T) {
+	tests := []struct {
+		name       string
+		envVars    map[string]string
+		packParams map[string]any
+		simParams  map[string]any
+		want       string
+	}{
+		{
+			name: "unset falls back to built-in default",
+			want: pack.DefaultAWSRegion,
+		},
+		{
+			name:       "pack-level value used",
+			packParams: map[string]any{"aws_region": "eu-west-1"},
+			want:       "eu-west-1",
+		},
+		{
+			name:       "per-sim overrides pack-level",
+			packParams: map[string]any{"aws_region": "eu-west-1"},
+			simParams:  map[string]any{"aws_region": "ap-south-1"},
+			want:       "ap-south-1",
+		},
+		{
+			name:       "param overrides ambient run-env region",
+			envVars:    map[string]string{"AWS_REGION": "us-west-2"},
+			packParams: map[string]any{"aws_region": "eu-west-1"},
+			want:       "eu-west-1",
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			d := &SimrunDetonator{
+				packConfig: config.PackConfig{Parameters: tt.packParams},
+				params:     tt.simParams,
+				envVars:    tt.envVars,
+			}
+			env := d.detonationEnvVars()
+			if env["AWS_REGION"] != tt.want {
+				t.Errorf("AWS_REGION = %q, want %q", env["AWS_REGION"], tt.want)
+			}
+			if env["AWS_DEFAULT_REGION"] != tt.want {
+				t.Errorf("AWS_DEFAULT_REGION = %q, want %q", env["AWS_DEFAULT_REGION"], tt.want)
+			}
+		})
+	}
+}
+
 func TestTerraformEnvVars_PerSimOverridesPackLevel(t *testing.T) {
 	d := &SimrunDetonator{
 		packConfig: config.PackConfig{

diff --git a/internal/runner/runner.go b/internal/runner/runner.go
@@ -81,7 +81,10 @@ func (m *Runner) runScenario(scenario *Scenario) (string, float64, error) {
 
 	executionOutput, logger, err := m.executeScenario(scenario)
 	if err != nil {
-		return "", 0, err
+		// Preserve the execution_id even on failure so a partially-completed
+		// detonation (e.g. terraform applied, detonation timed out) stays
+		// correlatable. Indexing a nil map yields "".
+		return executionOutput["execution_id"], 0, err
 	}
 
 	indicators := m.buildIndicatorsList(scenario, executionOutput)

diff --git a/internal/runner/runner_test.go b/internal/runner/runner_test.go
@@ -103,7 +103,7 @@ func TestRunnerErrorHandling(t *testing.T) {
 	mockDetonator.On("SetStatusCallback", mock.AnythingOfType("func(string)")).Return()
 
 	mockFailingDetonator := &detonatorMocks.MockDetonator{}
-	mockFailingDetonator.On("Detonate").Return(map[string]string{}, errors.New("foo"))
+	mockFailingDetonator.On("Detonate").Return(map[string]string{"execution_id": "failed-uid"}, errors.New("foo"))
 	mockFailingDetonator.On("String").Return("mock-failing-detonator")
 	mockFailingDetonator.On("SimulationId").Return("failing-simulation")
 	mockFailingDetonator.On("PackName").Return("")
@@ -155,6 +155,15 @@ func TestRunnerErrorHandling(t *testing.T) {
 	assert.Equal(t, 2, successCount)
 	assert.Equal(t, 1, failedCount)
 
+	// A failed scenario must still carry its execution_id so the result can be
+	// correlated with the partial detonation (e.g. terraform that did apply).
+	for _, result := range results {
+		if !result.Success {
+			assert.Equal(t, "failed-uid", result.ExecutionId,
+				"failed scenario should retain the execution_id from the detonation output")
+		}
+	}
+
 	// All scenarios should have been detonated, even if one returned an error
 	mockDetonator.AssertNumberOfCalls(t, "Detonate", 2)
 	mockFailingDetonator.AssertNumberOfCalls(t, "Detonate", 1)

diff --git a/internal/web/retention_test.go b/internal/web/retention_test.go
@@ -122,3 +122,96 @@ func TestSweepAssessments_DisabledIsNoOp(t *testing.T) {
 	assert.FileExists(t, jsonl)
 	assert.FileExists(t, ndjson)
 }
+
+// seedTerraformDir creates <dataDir>/terraform/<execID>/ with nested state and
+// plugin contents, mirroring a real Terraform working directory so removal must
+// recurse (os.RemoveAll), not a flat os.Remove.
+func seedTerraformDir(t *testing.T, dataDir, execID string) string {
+	t.Helper()
+	dir := filepath.Join(dataDir, "terraform", execID)
+	require.NoError(t, os.MkdirAll(filepath.Join(dir, ".terraform"), 0o755))
+	require.NoError(t, os.WriteFile(filepath.Join(dir, "terraform.tfstate"), []byte("{}"), 0o644))
+	return dir
+}
+
+// makeAgedRunWithExecutions creates an aged completed run whose scenario results
+// carry the given execution IDs (one result per ID), returning the run ID.
+func makeAgedRunWithExecutions(t *testing.T, ctx context.Context, store *fakes.RunStore, dataDir string, execIDs ...string) uuid.UUID {
+	t.Helper()
+	id := uuid.New()
+	created := time.Now().AddDate(0, 0, -40)
+	require.NoError(t, store.Create(ctx, &db.Run{
+		ID:        id,
+		Status:    "completed",
+		StartTime: created,
+		CreatedAt: created,
+	}))
+	writeLog(t, dataDir, id.String(), 40)
+	for i, execID := range execIDs {
+		require.NoError(t, store.AddScenarioResult(ctx, id, &db.ScenarioResult{
+			Name:        "s" + uuid.NewString()[:4] + "-" + string(rune('a'+i)),
+			ExecutionID: execID,
+		}))
+	}
+	return id
+}
+
+// Deleting a run reclaims the per-execution Terraform working directory for each
+// of its scenario results — the disk space these dirs hold is the whole point.
+func TestSweepAssessments_RemovesTerraformDirs(t *testing.T) {
+	dataDir := t.TempDir()
+	store := fakes.New().Run
+	ctx := context.Background()
+
+	e1, e2 := uuid.NewString(), uuid.NewString()
+	id := makeAgedRunWithExecutions(t, ctx, store, dataDir, e1, e2)
+	dir1 := seedTerraformDir(t, dataDir, e1)
+	dir2 := seedTerraformDir(t, dataDir, e2)
+
+	web.SweepAssessments(ctx, store, dataDir, true, 30)
+
+	_, err := store.Get(ctx, id)
+	assert.Error(t, err, "aged run row should be deleted")
+	assert.NoDirExists(t, dir1, "Terraform dir for E1 should be removed")
+	assert.NoDirExists(t, dir2, "Terraform dir for E2 should be removed")
+}
+
+// An unsafe execution_id must never cause cleanup to escape or wipe the
+// terraform/ base directory — a blank id would otherwise RemoveAll the base.
+func TestSweepAssessments_SkipsUnsafeExecutionID(t *testing.T) {
+	for _, execID := range []string{"", "   ", "../escape", "nested/id", ".", ".."} {
+		t.Run("id="+execID, func(t *testing.T) {
+			dataDir := t.TempDir()
+			store := fakes.New().Run
+			ctx := context.Background()
+
+			// A sibling dir keyed on a safe id proves only the unsafe id is skipped.
+			safe := uuid.NewString()
+			id := makeAgedRunWithExecutions(t, ctx, store, dataDir, execID, safe)
+			safeDir := seedTerraformDir(t, dataDir, safe)
+			base := filepath.Join(dataDir, "terraform")
+
+			web.SweepAssessments(ctx, store, dataDir, true, 30)
+
+			_, err := store.Get(ctx, id)
+			assert.Error(t, err, "aged run row should still be deleted")
+			assert.DirExists(t, base, "terraform base dir must survive an unsafe id")
+			assert.NoDirExists(t, safeDir, "the safe sibling id should still be cleaned up")
+		})
+	}
+}
+
+// A missing Terraform dir must not fail the delete — removal is best-effort.
+func TestSweepAssessments_MissingTerraformDirIsBestEffort(t *testing.T) {
+	dataDir := t.TempDir()
+	store := fakes.New().Run
+	ctx := context.Background()
+
+	// No terraform dir is seeded for this execution id.
+	id := makeAgedRunWithExecutions(t, ctx, store, dataDir, uuid.NewString())
+
+	web.SweepAssessments(ctx, store, dataDir, true, 30)
+
+	_, err := store.Get(ctx, id)
+	assert.Error(t, err, "delete should succeed even though the Terraform dir was missing")
+}
diff --git a/internal/web/run_deletion.go b/internal/web/run_deletion.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"os"
 	"path/filepath"
+	"strings"
 	"time"
 
 	"github.com/IBM/simrun/internal/db"
@@ -13,19 +14,22 @@ import (
 
 // deleteRunWithArtifacts removes a run's database row (cascading to its
 // scenario_results via FK) and best-effort removes all of its on-disk
-// artifacts: the per-run JSONL log file and every collected .ndjson file
-// referenced by scenario_results.collected_log_path.
+// artifacts: the per-run JSONL log file, every collected .ndjson file
+// referenced by scenario_results.collected_log_path, and each scenario
+// result's Terraform working directory at <dataDir>/terraform/<execution_id>/.
 //
-// Collected paths are read before the row is deleted, since the cascade
-// removes the scenario_results that hold them. On-disk removal is best-effort:
-// a missing or unremovable file logs a warning but does not fail the delete,
-// so a leftover artifact never blocks reclaiming the database row.
+// Collected paths and execution IDs are read before the row is deleted, since
+// the cascade removes the scenario_results that hold them. On-disk removal is
+// best-effort: a missing or unremovable file logs a warning but does not fail
+// the delete, so a leftover artifact never blocks reclaiming the database row.
 //
 // Shared by manual delete (HandleDeleteRun) and the assessment-retention
 // sweeper so both reclaim the large collected .ndjson artifacts identically.
 func deleteRunWithArtifacts(ctx context.Context, runStore db.RunStore, dataDir string, runID uuid.UUID) error {
-	// Collect the .ndjson paths before deleting, while the rows still exist.
+	// Collect the .ndjson paths and execution IDs before deleting, while the
+	// rows still exist.
 	var collected []string
+	var executionIDs []string
 	results, err := runStore.GetScenarioResults(ctx, runID)
 	if err != nil {
 		// A failed lookup must not strand the row: log and continue to delete.
@@ -36,6 +40,7 @@ func deleteRunWithArtifacts(ctx context.Context, runStore db.RunStore, dataDir s
 			if res.CollectedLogPath != nil && *res.CollectedLogPath != "" {
 				collected = append(collected, *res.CollectedLogPath)
 			}
+			executionIDs = append(executionIDs, res.ExecutionID)
 		}
 	}
 
@@ -62,6 +67,23 @@ func deleteRunWithArtifacts(ctx context.Context, runStore db.RunStore, dataDir s
 		}
 	}
 
+	// Best-effort: remove each scenario result's Terraform working directory at
+	// <dataDir>/terraform/<execution_id>/.
+	base := filepath.Join(dataDir, "terraform")
+	for _, id := range executionIDs {
+		dir := filepath.Join(base, id)
+		// Skip blank/whitespace ids and any id that doesn't resolve to a direct
+		// child of terraform/. filepath.Join cleans the result, so separators,
+		// "." and ".." would otherwise let cleanup escape or wipe the base dir.
+		if strings.TrimSpace(id) == "" || filepath.Dir(dir) != base {
+			continue
+		}
+		if err := os.RemoveAll(dir); err != nil && !os.IsNotExist(err) {
+			logrus.WithError(err).WithField("path", dir).WithField("run_id", runID).
+				Warn("failed to remove Terraform working directory")
+		}
+	}
+
 	return nil
 }
 

diff --git a/openspec/changes/archive/2026-06-22-purge-terraform-dirs/.openspec.yaml b/openspec/changes/archive/2026-06-22-purge-terraform-dirs/.openspec.yaml
@@ -0,0 +1,2 @@
+schema: spec-driven
+created: 2026-06-22