diff --git a/apps/decodex/src/orchestrator/operator_dashboard.html b/apps/decodex/src/orchestrator/operator_dashboard.html index 4916ce0f..569ca8c8 100644 --- a/apps/decodex/src/orchestrator/operator_dashboard.html +++ b/apps/decodex/src/orchestrator/operator_dashboard.html @@ -4278,7 +4278,10 @@

Run History

if (candidate.classification === "closed") { return "tone-muted"; } - if (candidate.reason === "issue_needs_attention") { + if ( + candidate.reason === "issue_needs_attention" || + candidate.reason === "linear_active_label_present" + ) { return "tone-blocked"; } return "tone-wait"; @@ -4342,6 +4345,8 @@

Run History

return "Automation is disabled for this issue."; case "issue_needs_attention": return ""; + case "linear_active_label_present": + return "Active ownership is still present; reconcile before dispatch."; case "non_startable_state": return `${candidate.state} cannot start.`; case "terminal_state": @@ -4389,6 +4394,9 @@

Run History

if (candidate.reason === "issue_needs_attention") { return "needs attention"; } + if (candidate.reason === "linear_active_label_present") { + return "active label present"; + } if (candidate.attention?.retry_budget_attempt_count != null) { return "auto retry paused"; } @@ -4431,6 +4439,7 @@

Run History

return Boolean( candidate.attention || candidate.reason === "issue_needs_attention" || + candidate.reason === "linear_active_label_present" || candidate.reason === "retry_budget_exhausted", ); } @@ -4950,6 +4959,9 @@

Run History

if (reason === "needs_attention_label") { return "needs-attention label set"; } + if (reason === "linear_active_label_present") { + return "active label present"; + } return `blocked by ${sentenceToken(reason)}`; } @@ -5513,6 +5525,8 @@

Run History

return "start identity unavailable"; case "process_start_identity_mismatch": return "process identity changed"; + case "process_identity_mismatch": + return "process identity changed"; default: return reason ? reason.replaceAll("_", " ") : "unknown"; } @@ -5528,6 +5542,8 @@

Run History

return "protocol active"; case "process_stopped": return "process stopped"; + case "process_identity_mismatch": + return "process identity mismatch"; case "not_running": return "not running"; default: @@ -5551,6 +5567,8 @@

Run History

return "no lease; protocol active"; case "process_stopped": return "no lease; stopped process"; + case "process_identity_mismatch": + return "no lease; process identity mismatch"; default: return "no lease"; } @@ -9384,7 +9402,8 @@

${escapeHtml(title)}

const attentionWorktree = candidate.attention?.worktree_path; return ( - candidate.reason === "issue_needs_attention" && + (candidate.reason === "issue_needs_attention" || + candidate.reason === "linear_active_label_present") && (attentionWorktree === worktree.worktree_path || candidate.issue_id === worktree.issue_id || candidate.issue_identifier === worktree.issue_id) diff --git a/apps/decodex/src/orchestrator/status.rs b/apps/decodex/src/orchestrator/status.rs index aca17ebe..2b8a859b 100644 --- a/apps/decodex/src/orchestrator/status.rs +++ b/apps/decodex/src/orchestrator/status.rs @@ -4,6 +4,10 @@ use crate::pull_request::{self, PullRequestLandingGateView}; use crate::worktree; use crate::worktree::MergedWorktreeCleanupDebt; +const QUEUE_REASON_LINEAR_ACTIVE_LABEL_PRESENT: &str = "linear_active_label_present"; +const ATTENTION_ERROR_EVIDENCE_MISSING: &str = "evidence_missing"; +const EXECUTION_LIVENESS_PROCESS_IDENTITY_MISMATCH: &str = "process_identity_mismatch"; + #[derive(Clone, Copy, Debug, Eq, PartialEq)] enum RetainedCloseoutPrMergeGate { Merged, @@ -673,7 +677,10 @@ fn worktree_has_queued_attention_owner( snapshot: &OperatorStatusSnapshot, ) -> bool { snapshot.queued_candidates.iter().any(|candidate| { - candidate.reason == "issue_needs_attention" + matches!( + candidate.reason.as_str(), + "issue_needs_attention" | QUEUE_REASON_LINEAR_ACTIVE_LABEL_PRESENT + ) && (candidate .attention .as_ref() @@ -1807,6 +1814,13 @@ where if state_store.issue_has_active_shared_claim(project.service_id(), &issue.id)? { return Ok(("claimed", "shared_claim_present")); } + if tracker::issue_has_label_with_server_confirmation( + tracker, + issue, + &tracker::automation_active_label(project.service_id()), + )? { + return Ok(("blocked", QUEUE_REASON_LINEAR_ACTIVE_LABEL_PRESENT)); + } if tracker_policy.terminal_states().iter().any(|state| state == &issue.state.name) { return Ok(("closed", "terminal_state")); } @@ -1849,7 +1863,12 @@ fn operator_queued_issue_attention_status( where T: IssueTracker, { - if !matches!(reason, "issue_needs_attention" | "retry_budget_exhausted") { + if !matches!( + reason, + "issue_needs_attention" + | "retry_budget_exhausted" + | QUEUE_REASON_LINEAR_ACTIVE_LABEL_PRESENT + ) { return Ok(None); } @@ -1861,12 +1880,27 @@ where let retry_budget_attempts = state_retry_attempts.max(marker_retry_attempts); let retry_budget_attempt_count = (retry_budget_attempts > 0).then_some(retry_budget_attempts); let retry_budget_max_attempts = i64::from(workflow.frontmatter().execution().max_attempts()); - let auto_retry_blocked_reason = - (reason == "issue_needs_attention").then(|| String::from("needs_attention_label")); + let auto_retry_blocked_reason = match reason { + "issue_needs_attention" => Some(String::from("needs_attention_label")), + QUEUE_REASON_LINEAR_ACTIVE_LABEL_PRESENT => { + Some(String::from(QUEUE_REASON_LINEAR_ACTIVE_LABEL_PRESENT)) + }, + _ => None, + }; let attention_record = operator_queued_issue_latest_attention_record(tracker, project, state_store, issue); - let attention_error_class = - attention_record.as_ref().and_then(|record| record.error_class.clone()); + let private_evidence_missing = operator_queued_issue_private_evidence_missing( + project, + state_store, + issue, + marker.as_ref(), + reason, + )?; + let attention_error_class = if private_evidence_missing { + Some(String::from(ATTENTION_ERROR_EVIDENCE_MISSING)) + } else { + attention_record.as_ref().and_then(|record| record.error_class.clone()) + }; let attention_next_action = attention_record.as_ref().and_then(|record| record.next_action.clone()); let attempt_status = marker @@ -1925,6 +1959,30 @@ where })) } +fn operator_queued_issue_private_evidence_missing( + project: &ServiceConfig, + state_store: &StateStore, + issue: &TrackerIssue, + marker: Option<&RunActivityMarker>, + reason: &str, +) -> crate::prelude::Result { + if reason != QUEUE_REASON_LINEAR_ACTIVE_LABEL_PRESENT { + return Ok(false); + } + + let Some(marker) = marker else { + return Ok(true); + }; + let events = state_store.list_private_execution_events( + project.service_id(), + &issue.id, + marker.run_id(), + marker.attempt_number(), + )?; + + Ok(events.is_empty()) +} + fn operator_queued_issue_latest_attention_record( tracker: &T, project: &ServiceConfig, @@ -2003,6 +2061,33 @@ fn operator_queued_issue_attention_summary( worktree_has_tracked_changes: bool, attention_error_class: Option<&str>, ) -> String { + if reason == QUEUE_REASON_LINEAR_ACTIVE_LABEL_PRESENT { + if worktree_has_tracked_changes { + return String::from( + "Linear active ownership is still present with retained worktree changes; inspect the patch and reconcile the lane before dispatch.", + ); + } + if attention_error_class == Some(ATTENTION_ERROR_EVIDENCE_MISSING) { + return if marker.is_some() { + String::from( + "Linear active ownership is still present but private execution evidence is missing; inspect the retained marker and reconcile before dispatch.", + ) + } else { + String::from( + "Linear active ownership is still present but the retained marker or private execution evidence is missing; reconcile before dispatch.", + ) + }; + } + if marker.is_some() { + return String::from( + "Linear active ownership is still present alongside queue intake; inspect the retained marker before dispatch.", + ); + } + + return String::from( + "Linear active ownership is still present without a matching local active lease; reconcile before dispatch.", + ); + } if worktree_has_tracked_changes { if retry_budget_attempts > 0 { return format!( @@ -4275,6 +4360,10 @@ fn operator_run_execution_liveness( return String::from("process_alive"); } if timing.process_alive == Some(false) { + if process_liveness_reason_is_identity_mismatch(timing.process_liveness_reason.as_deref()) { + return String::from(EXECUTION_LIVENESS_PROCESS_IDENTITY_MISMATCH); + } + return String::from("process_stopped"); } if matches!(app_server_state.thread_status.as_deref(), Some("active")) @@ -4289,6 +4378,10 @@ fn operator_run_execution_liveness( String::from("not_captured") } +fn process_liveness_reason_is_identity_mismatch(reason: Option<&str>) -> bool { + matches!(reason, Some("host_boot_id_mismatch" | "process_start_identity_mismatch")) +} + fn operator_run_child_agent_activity( marker: Option<&RunActivityMarker>, now_unix_epoch: i64, @@ -5015,7 +5108,10 @@ fn rendered_worktree_role<'a>( return "post_review_lane"; } if snapshot.queued_candidates.iter().any(|candidate| { - candidate.reason == "issue_needs_attention" + matches!( + candidate.reason.as_str(), + "issue_needs_attention" | QUEUE_REASON_LINEAR_ACTIVE_LABEL_PRESENT + ) && (candidate .attention .as_ref() @@ -5453,6 +5549,9 @@ fn operator_run_queue_lease_summary(run: &OperatorRunStatus) -> String { "thread_active" => String::from("not_held (thread_active keeps lane visible)"), "protocol_observed" => String::from("not_held (protocol_observed keeps lane visible)"), "process_stopped" => String::from("not_held (process_stopped needs attention)"), + EXECUTION_LIVENESS_PROCESS_IDENTITY_MISMATCH => { + String::from("not_held (process_identity_mismatch needs attention)") + }, _ => String::from("not_held"), } } diff --git a/apps/decodex/src/orchestrator/tests/operator/status/dashboard.rs b/apps/decodex/src/orchestrator/tests/operator/status/dashboard.rs index 36a22e62..019700c6 100644 --- a/apps/decodex/src/orchestrator/tests/operator/status/dashboard.rs +++ b/apps/decodex/src/orchestrator/tests/operator/status/dashboard.rs @@ -1412,6 +1412,7 @@ fn operator_dashboard_prioritizes_needs_attention_reason_over_retry_count() { "facts.push([\"Auto retry\", autoRetryBlockedReasonText(attention.auto_retry_blocked_reason)]);" )); assert!(response.contains("return \"needs-attention label set\";")); + assert!(response.contains("return \"active label present\";")); assert!(reason_text.contains("return \"auto retry paused\";")); assert!(response.contains("function queuedCandidateInlineReason(candidate)")); assert!(response.contains("displayTextRepeats(reason, sentenceToken(candidate.attention.attention_error_class))")); diff --git a/apps/decodex/src/orchestrator/tests/operator/status/queue.rs b/apps/decodex/src/orchestrator/tests/operator/status/queue.rs index a0038d45..f3190979 100644 --- a/apps/decodex/src/orchestrator/tests/operator/status/queue.rs +++ b/apps/decodex/src/orchestrator/tests/operator/status/queue.rs @@ -253,6 +253,146 @@ fn live_operator_status_snapshot_excludes_claimed_candidates_from_waiting_intake assert!(rendered.contains("Active queue echoes: 1")); } +#[test] +fn live_operator_status_snapshot_blocks_active_plus_queued_label_without_local_claim() { + let (_temp_dir, config, workflow) = temp_project_layout(); + let active_label = tracker::automation_active_label(TEST_SERVICE_ID); + let state_store = StateStore::open_in_memory().expect("state store should open"); + let issue = sample_issue_with_sort_fields( + "issue-active-queued", + "PUB-111", + "Todo", + &[active_label.as_str()], + Some(1), + "2026-03-13T04:16:17.133Z", + ); + let worktree_path = config.worktree_root().join(&issue.identifier); + let tracker = FakeTracker::new(vec![issue.clone()]); + + tracker.issue_comments.borrow_mut().insert( + issue.id.clone(), + vec![linear_execution_history_comment( + &issue, + "needs_attention", + "2026-03-13T04:20:00Z", + "older-attention", + |record| { + record.error_class = Some(String::from("older_attention_record")); + record.summary = Some(String::from("Older attention record should not mask liveness.")); + record.next_action = Some(String::from("Reconcile the retained lane.")); + record.blockers = Some(Vec::new()); + record.evidence = Some(vec![String::from("older attention event")]); + record.terminal_path = Some(String::from("manual_attention")); + }, + )], + ); + + state_store + .upsert_worktree( + config.service_id(), + &issue.id, + "x/pubfi-pub-111", + &worktree_path.display().to_string(), + ) + .expect("worktree should record"); + state_store + .record_run_attempt("pub-111-attempt-1", &issue.id, 1, "running") + .expect("run attempt should record"); + + fs::create_dir_all(&worktree_path).expect("worktree path should exist"); + state::write_run_activity_marker_for_process(&worktree_path, "pub-111-attempt-1", 1, u32::MAX) + .expect("stopped process marker should write"); + + let snapshot = orchestrator::build_live_operator_status_snapshot( + &tracker, + &config, + &workflow, + &state_store, + 10, + ) + .expect("snapshot should build"); + let project = snapshot.projects.first().expect("project summary should exist"); + let candidate = snapshot + .queued_candidates + .iter() + .find(|candidate| candidate.issue_identifier == "PUB-111") + .expect("active-plus-queued issue should remain visible"); + let attention = candidate.attention.as_ref().expect("recovery details should render"); + let rendered = orchestrator::render_operator_status(&snapshot); + + assert_eq!(candidate.classification, "blocked"); + assert_eq!(candidate.reason, "linear_active_label_present"); + assert_eq!( + attention.auto_retry_blocked_reason.as_deref(), + Some("linear_active_label_present") + ); + assert_eq!(attention.attention_error_class.as_deref(), Some("evidence_missing")); + assert_eq!(attention.process_alive, Some(false)); + assert_eq!(attention.process_liveness_reason.as_deref(), Some("process_stopped")); + assert_eq!(project.attention_count, 1); + assert!(rendered.contains("reason: linear_active_label_present")); + assert!(rendered.contains("attention_cause: evidence_missing")); +} + +#[test] +fn live_operator_status_snapshot_surfaces_dirty_active_label_recovery_worktree() { + let (_temp_dir, config, workflow) = temp_project_layout(); + let active_label = tracker::automation_active_label(TEST_SERVICE_ID); + let state_store = StateStore::open_in_memory().expect("state store should open"); + let issue = sample_issue_with_sort_fields( + "issue-dirty-active", + "PUB-112", + "In Progress", + &[active_label.as_str()], + Some(1), + "2026-03-13T04:16:17.133Z", + ); + let worktree_path = config.worktree_root().join(&issue.identifier); + let tracker = FakeTracker::new(vec![issue.clone()]); + + git_status_success( + config.repo_root(), + &["worktree", "add", "-b", "x/pubfi-pub-112", ".worktrees/PUB-112", "main"], + ); + + fs::write(worktree_path.join("README.md"), "dirty active-label patch\n") + .expect("tracked worktree file should change"); + + let snapshot = orchestrator::build_live_operator_status_snapshot( + &tracker, + &config, + &workflow, + &state_store, + 10, + ) + .expect("snapshot should build"); + let project = snapshot.projects.first().expect("project summary should exist"); + let candidate = snapshot + .queued_candidates + .iter() + .find(|candidate| candidate.issue_identifier == "PUB-112") + .expect("dirty active-label issue should remain visible"); + let attention = candidate.attention.as_ref().expect("recovery details should render"); + let worktree = snapshot + .worktrees + .iter() + .find(|worktree| worktree.issue_identifier.as_deref() == Some("PUB-112")) + .expect("retained worktree should remain visible"); + + assert_eq!(candidate.classification, "blocked"); + assert_eq!(candidate.reason, "linear_active_label_present"); + assert_eq!(attention.attention_error_class.as_deref(), Some("evidence_missing")); + assert!(attention.worktree_has_tracked_changes); + assert!( + attention.summary.contains("retained worktree changes"), + "summary should explain dirty retained recovery, got {:?}", + attention.summary + ); + assert_eq!(worktree.ownership, "queued_attention"); + assert_eq!(project.attention_count, 1); + assert_eq!(project.retained_worktree_count, 0); +} + #[test] fn live_operator_status_snapshot_reports_capacity_waiting_separately_from_blocked() { let (_temp_dir, config, workflow) = temp_project_layout(); diff --git a/apps/decodex/src/orchestrator/tests/operator/status/running_lanes.rs b/apps/decodex/src/orchestrator/tests/operator/status/running_lanes.rs index 2ec264d7..492f8069 100644 --- a/apps/decodex/src/orchestrator/tests/operator/status/running_lanes.rs +++ b/apps/decodex/src/orchestrator/tests/operator/status/running_lanes.rs @@ -851,6 +851,7 @@ fn operator_status_snapshot_counts_previous_boot_process_as_attention_not_runnin assert_eq!(run.phase, "executing"); assert_eq!(run.process_id, Some(process::id())); assert_eq!(run.process_alive, Some(false)); + assert_eq!(run.execution_liveness, "process_identity_mismatch"); assert_eq!(run.process_liveness_reason.as_deref(), Some("host_boot_id_mismatch")); assert_eq!(project.active_run_count, 0); assert_eq!(project.attention_count, 1); @@ -893,6 +894,7 @@ fn operator_status_snapshot_counts_reused_pid_as_attention_not_running() { assert_eq!(run.phase, "executing"); assert_eq!(run.process_id, Some(process::id())); assert_eq!(run.process_alive, Some(false)); + assert_eq!(run.execution_liveness, "process_identity_mismatch"); assert_eq!( run.process_liveness_reason.as_deref(), Some("process_start_identity_mismatch") diff --git a/docs/reference/operator-control-plane.md b/docs/reference/operator-control-plane.md index 8dd0d272..a3bf2e1f 100644 --- a/docs/reference/operator-control-plane.md +++ b/docs/reference/operator-control-plane.md @@ -203,8 +203,9 @@ Worktree visibility follows the owning dashboard section: an alive PID plus matching `.decodex-run-activity` `host_boot_id` and `process_start_identity`; a previous-boot marker, same-boot PID reuse, missing identity, or unavailable current host/process identity is recovery input, not proof - of active execution. `process_liveness_reason` explains which identity check failed - when `process_alive` is false. + of active execution. `execution_liveness = process_identity_mismatch` is the stable + summary for previous-boot or PID-reuse evidence, while `process_liveness_reason` + explains the exact failed identity check when `process_alive` is false. `active_lease` is queue lease ownership only; `execution_liveness` explains why the lane is still visible when the queue lease is not held. - Running lanes derive CLI and dashboard text from the same `OperatorRunStatus` @@ -241,6 +242,11 @@ Worktree visibility follows the owning dashboard section: path without losing the bound PR identity. - `Intake Queue` means queued attention still owns the path, including partial retained progress after retries. +- `linear_active_label_present` in `Intake Queue` means the issue still carries + service active ownership while it is also queued, but local status could not prove a + matching active lease. Treat it as a recovery/attention row, not ready work. If its + attention cause is `evidence_missing`, use the retained marker, worktree, and public + Linear state as the available recovery evidence before retrying or cleaning labels. - `Recovery Worktrees` means the path is retained local state after the authoritative runtime owner is gone or cannot explain it as active, review/landing, or queued work. diff --git a/docs/spec/runtime.md b/docs/spec/runtime.md index 66055040..7a022339 100644 --- a/docs/spec/runtime.md +++ b/docs/spec/runtime.md @@ -481,6 +481,7 @@ After a process restart, recent-run history, active lease ownership, retained po - Operator status snapshots may expose an additive `child_agent_activity` object when app-server protocol events have produced one for the current run. The object must stay machine-readable and dashboard/CLI shared, and should describe dynamic observed buckets rather than a fixed workflow: current child bucket and elapsed time, bucket wall/event/tool counts, current/max/cumulative input tokens, cumulative output tokens, largest tool output, and warnings for repeated large outputs. Missing `child_agent_activity` means no child breakdown was captured; existing JSON consumers must continue to work without it. - If the agent Git credential preflight fails, operator status must report the retained lane as a credential failure requiring operator recovery, not as a still-running lane. - If retry budget or needs-attention recovery finds tracked changes in the retained worktree, operator status must report retained partial progress rather than only a generic retry-budget hold. The failure class may be `partial_progress_retained` when no more specific runtime error class is available. Operators should then inspect the patch, finish validation and PR handoff if it is useful, or reset the retained worktree explicitly. +- If Linear still has `decodex:active:` on an issue that also remains queued, but the local runtime cannot prove a matching active lease, status must classify the queued row as blocked with reason `linear_active_label_present`; it must not treat the issue as ready intake. If the retained marker or private execution event rows for that run are missing, status must surface `evidence_missing` in the recovery details. If the retained worktree has tracked changes, that dirty worktree remains owned by queued recovery/attention instead of being hidden as cleanup-only state. - During an active run, operator snapshots must expose `thread_id` as soon as the Codex thread exists, plus monotonically advancing `event_count`, `last_event_type`, and `last_event_at` once protocol events are recorded. These fields may be hydrated either from the current process journal or from the active lane's `.decodex-run-activity` marker when `status` is running in a separate process. - `thread_id = null` is expected only before the worker creates the Codex thread for the current run. `event_count = 0`, `last_event_type = null`, and `last_event_at = null` are expected only before the first protocol event for that same run. After the thread exists and protocol activity has started, those empty values indicate missing hydration rather than normal progress. - Operator snapshots may expose an additive `protocol_activity` object derived from app-server structured messages for the current run. The object stays local/operator-only and should summarize turn status, waiting reason, rate-limit status, and a compact recent event list for high-value app-server activity such as `turn/started`, `turn/completed`, plan updates, diff updates, item start/completion, command output deltas, server request responses, account updates, and rate-limit updates. Missing `protocol_activity` means no structured summary was captured yet; consumers must continue to rely on the older `event_count`, `last_event_type`, `last_event_at`, thread fields, and `child_agent_activity` fields when it is absent.