diff --git a/apps/decodex/src/orchestrator/operator_dashboard.html b/apps/decodex/src/orchestrator/operator_dashboard.html index 7c1dd799..02c8c840 100644 --- a/apps/decodex/src/orchestrator/operator_dashboard.html +++ b/apps/decodex/src/orchestrator/operator_dashboard.html @@ -5047,6 +5047,17 @@

Run History

return Number.isFinite(numericCount) && numericCount > 0 ? String(numericCount) : "NONE"; } + function postReviewReadbackFacts(lane) { + const facts = []; + if (lane.readback_warning) { + facts.push(["Readback", lane.readback_warning]); + } + if (lane.readback_root_cause) { + facts.push(["Root cause", lane.readback_root_cause]); + } + return facts; + } + function renderAttentionFacts(candidate) { const attention = candidate.attention; if (!attention) { @@ -8724,6 +8735,7 @@

Run History

["Threads", reviewThreadToken(lane.unresolved_review_threads)], ["PR", optionalCardToken(lane.pr_url)], ["Branch", optionalCardToken(lane.branch_name)], + ...postReviewReadbackFacts(lane), ], }); continue; @@ -8746,6 +8758,7 @@

Run History

["Threads", reviewThreadToken(lane.unresolved_review_threads)], ["PR", optionalCardToken(lane.pr_url)], ["Branch", optionalCardToken(lane.branch_name)], + ...postReviewReadbackFacts(lane), ], }); continue; @@ -8764,6 +8777,7 @@

Run History

["Threads", reviewThreadToken(lane.unresolved_review_threads)], ["PR", optionalCardToken(lane.pr_url)], ["Branch", optionalCardToken(lane.branch_name)], + ...postReviewReadbackFacts(lane), ], }); continue; @@ -8783,6 +8797,7 @@

Run History

["Threads", reviewThreadToken(lane.unresolved_review_threads)], ["PR", optionalCardToken(lane.pr_url)], ["Branch", optionalCardToken(lane.branch_name)], + ...postReviewReadbackFacts(lane), ], }); } diff --git a/apps/decodex/src/orchestrator/status.rs b/apps/decodex/src/orchestrator/status.rs index 12d6d6ae..8257064b 100644 --- a/apps/decodex/src/orchestrator/status.rs +++ b/apps/decodex/src/orchestrator/status.rs @@ -2370,6 +2370,11 @@ where let review_state = review_state_inspector .inspect_review_state(worktree.worktree_path(), review_handoff.pr_url()) .ok(); + let readback_root_cause = Some( + PullRequestReadbackRootCause::TrackerIssueReadbackFailed + .as_str() + .to_owned(), + ); let ( pr_head_sha, pr_state, @@ -2412,6 +2417,7 @@ where check_state, unresolved_review_threads, readback_warning: Some(String::from("tracker_issue_readback_degraded")), + readback_root_cause, }); } @@ -2622,6 +2628,7 @@ fn post_review_lane_status_from_classification( check_state: classification.check_state, unresolved_review_threads: classification.unresolved_review_threads, readback_warning: classification.readback_warning, + readback_root_cause: classification.readback_root_cause, } } @@ -3149,14 +3156,15 @@ where }, }; let review_state = match review_state_inspector - .inspect_review_state(snapshot.worktree.worktree_path(), review_handoff.pr_url()) + .inspect_review_state_readback(snapshot.worktree.worktree_path(), review_handoff.pr_url()) { Ok(review_state) => review_state, - Err(_error) => { + Err(error) => { return Ok(PostReviewLaneStateLoad::Classification( readback_degraded_post_review_lane_from_handoff( review_handoff, "pull_request_state_read_failed", + error.root_cause(), ), )); }, @@ -3506,6 +3514,7 @@ fn initial_post_review_lane_classification( check_state: review_state.status_check_rollup_state.clone(), unresolved_review_threads: Some(review_state.unresolved_review_threads), readback_warning: None, + readback_root_cause: None, } } @@ -3517,6 +3526,8 @@ fn blocked_post_review_lane_from_state( classification.decision = PostReviewLaneDecision::Block; classification.reason = reason.to_owned(); + classification.readback_root_cause = post_review_readback_root_cause_for_reason(reason) + .map(|root_cause| root_cause.as_str().to_owned()); classification } @@ -3533,6 +3544,8 @@ fn blocked_post_review_lane(reason: &str) -> PostReviewLaneClassification { check_state: None, unresolved_review_threads: None, readback_warning: None, + readback_root_cause: post_review_readback_root_cause_for_reason(reason) + .map(|root_cause| root_cause.as_str().to_owned()), } } @@ -3551,6 +3564,7 @@ fn blocked_post_review_lane_from_handoff( fn readback_degraded_post_review_lane_from_handoff( review_handoff: &ReviewHandoffMarker, reason: &str, + root_cause: PullRequestReadbackRootCause, ) -> PostReviewLaneClassification { PostReviewLaneClassification { decision: PostReviewLaneDecision::WaitForReview, @@ -3563,6 +3577,7 @@ fn readback_degraded_post_review_lane_from_handoff( check_state: None, unresolved_review_threads: None, readback_warning: Some(reason.to_owned()), + readback_root_cause: Some(root_cause.as_str().to_owned()), } } @@ -3588,6 +3603,31 @@ fn blocked_post_review_lane_status( check_state: None, unresolved_review_threads: None, readback_warning: None, + readback_root_cause: post_review_readback_root_cause_for_reason(reason) + .map(|root_cause| root_cause.as_str().to_owned()), + } +} + +fn post_review_readback_root_cause_for_reason( + reason: &str, +) -> Option { + match reason { + "pull_request_repository_parse_failed" => { + Some(PullRequestReadbackRootCause::PullRequestShapeReadFailed) + }, + "pull_request_branch_mismatch" + | "pull_request_head_mismatch" + | "pull_request_head_repository_name_mismatch" + | "pull_request_head_repository_owner_mismatch" + | "pull_request_merge_commit_lineage_check_failed" + | "review_handoff_lineage_check_failed" + | "review_handoff_lineage_mismatch" + | "review_orchestration_branch_mismatch" + | "review_orchestration_head_mismatch" + | "review_orchestration_pr_mismatch" => { + Some(PullRequestReadbackRootCause::LineageValidationFailed) + }, + _ => None, } } @@ -4891,7 +4931,7 @@ fn render_operator_status(snapshot: &OperatorStatusSnapshot) -> String { } else { for lane in &snapshot.post_review_lanes { output.push_str(&format!( - "- issue_id: {}\n issue: {}\n state: {}\n classification: {}\n reason: {}\n branch: {}\n worktree_path: {}\n pr_url: {}\n pr_head_sha: {}\n pr_state: {}\n review_decision: {}\n mergeable: {}\n check_state: {}\n unresolved_review_threads: {}\n readback_warning: {}\n", + "- issue_id: {}\n issue: {}\n state: {}\n classification: {}\n reason: {}\n branch: {}\n worktree_path: {}\n pr_url: {}\n pr_head_sha: {}\n pr_state: {}\n review_decision: {}\n mergeable: {}\n check_state: {}\n unresolved_review_threads: {}\n readback_warning: {}\n readback_root_cause: {}\n", lane.issue_id, lane.issue_identifier, lane.issue_state, @@ -4908,7 +4948,8 @@ fn render_operator_status(snapshot: &OperatorStatusSnapshot) -> String { lane .unresolved_review_threads .map_or_else(|| String::from("none"), |value| value.to_string()), - lane.readback_warning.as_deref().unwrap_or("none") + lane.readback_warning.as_deref().unwrap_or("none"), + lane.readback_root_cause.as_deref().unwrap_or("none") )); } } diff --git a/apps/decodex/src/orchestrator/tests/operator/status/publishing.rs b/apps/decodex/src/orchestrator/tests/operator/status/publishing.rs index 78e54e30..c8a82544 100644 --- a/apps/decodex/src/orchestrator/tests/operator/status/publishing.rs +++ b/apps/decodex/src/orchestrator/tests/operator/status/publishing.rs @@ -89,6 +89,10 @@ fn live_operator_status_snapshot_preserves_retained_handoff_during_linear_backof snapshot.post_review_lanes[0].readback_warning.as_deref(), Some("tracker_issue_readback_degraded") ); + assert_eq!( + snapshot.post_review_lanes[0].readback_root_cause.as_deref(), + Some("tracker_issue_readback_failed") + ); assert!( state_store .connector_backoff(TEST_SERVICE_ID, "linear") @@ -216,6 +220,14 @@ fn operator_state_snapshot_reports_tracker_rate_limit_as_backoff() { assert_eq!(snapshot.post_review_lanes[0].reason, "tracker_issue_readback_degraded"); assert_eq!(snapshot.post_review_lanes[0].pr_url.as_deref(), Some(pr_url)); assert_eq!(snapshot.post_review_lanes[0].pr_head_sha.as_deref(), Some(head_sha)); + assert_eq!( + snapshot.post_review_lanes[0].readback_root_cause.as_deref(), + Some("tracker_issue_readback_failed") + ); + assert_eq!( + snapshot_json["post_review_lanes"][0]["readback_root_cause"], + "tracker_issue_readback_failed" + ); assert_eq!(snapshot.projects[0].connector_state, "backoff"); assert!( tracker.label_queries.borrow().is_empty(), diff --git a/apps/decodex/src/orchestrator/tests/operator/status/text.rs b/apps/decodex/src/orchestrator/tests/operator/status/text.rs index 4d766872..5b5f9813 100644 --- a/apps/decodex/src/orchestrator/tests/operator/status/text.rs +++ b/apps/decodex/src/orchestrator/tests/operator/status/text.rs @@ -218,12 +218,14 @@ fn operator_status_text_surfaces_cleanup_blocker_pr_url() { check_state: Some(String::from("SUCCESS")), unresolved_review_threads: Some(0), readback_warning: None, + readback_root_cause: Some(String::from("lineage_validation_failed")), }], }; let rendered = orchestrator::render_operator_status(&snapshot); assert!(rendered.contains("classification: cleanup_blocked")); assert!(rendered.contains("reason: retry_budget_exhausted")); + assert!(rendered.contains("readback_root_cause: lineage_validation_failed")); assert!(rendered.contains(&format!("pr_url: {pr_url}"))); assert!(!rendered.contains("pr_url: none")); } diff --git a/apps/decodex/src/orchestrator/tests/operator/status_support.rs b/apps/decodex/src/orchestrator/tests/operator/status_support.rs index 78500f49..ef4858f9 100644 --- a/apps/decodex/src/orchestrator/tests/operator/status_support.rs +++ b/apps/decodex/src/orchestrator/tests/operator/status_support.rs @@ -438,6 +438,7 @@ fn operator_status_text_post_review_lanes() -> Vec check_state: Some(String::from("SUCCESS")), unresolved_review_threads: Some(0), readback_warning: None, + readback_root_cause: None, }] } diff --git a/apps/decodex/src/orchestrator/tests/review_landing/classification_checks.rs b/apps/decodex/src/orchestrator/tests/review_landing/classification_checks.rs index 9a7e393e..e5a9a516 100644 --- a/apps/decodex/src/orchestrator/tests/review_landing/classification_checks.rs +++ b/apps/decodex/src/orchestrator/tests/review_landing/classification_checks.rs @@ -1,3 +1,5 @@ +use orchestrator::PullRequestReadbackFailure; + #[test] fn classify_post_review_lane_blocks_completed_issue_until_pull_request_is_merged() { let temp_dir = TempDir::new().expect("temp dir should exist"); @@ -446,6 +448,10 @@ fn classify_post_review_lane_degrades_pull_request_state_read_failures_to_handof classification.readback_warning.as_deref(), Some("pull_request_state_read_failed") ); + assert_eq!( + classification.readback_root_cause.as_deref(), + Some("github_api_read_failed") + ); } #[test] @@ -459,6 +465,10 @@ fn classify_post_review_lane_degrades_missing_or_blank_github_token_env_var() { missing.readback_warning.as_deref(), Some("pull_request_state_read_failed") ); + assert_eq!( + missing.readback_root_cause.as_deref(), + Some("missing_github_token") + ); let env_var = format!("DECODEX_TEST_BLANK_STATUS_GITHUB_TOKEN_ENV_{}", process::id()); let _env_guard = TestEnvVarGuard::set(&env_var, ""); @@ -471,6 +481,30 @@ fn classify_post_review_lane_degrades_missing_or_blank_github_token_env_var() { blank.readback_warning.as_deref(), Some("pull_request_state_read_failed") ); + assert_eq!( + blank.readback_root_cause.as_deref(), + Some("missing_github_token") + ); +} + +#[test] +fn pull_request_readback_root_cause_classifier_maps_cli_and_shape_failures() { + let missing_cli_error = Command::new("decodex-test-missing-gh-command") + .output() + .expect_err("missing command should fail with an io error"); + let missing_cli = PullRequestReadbackFailure::from_report(Report::from(missing_cli_error)); + let shape_failure = PullRequestReadbackFailure::from_report(eyre::eyre!( + "GitHub GraphQL response for `https://github.com/hack-ink/decodex/pull/174` did not include a pull request." + )); + + assert_eq!( + missing_cli.root_cause(), + orchestrator::PullRequestReadbackRootCause::MissingGithubCli + ); + assert_eq!( + shape_failure.root_cause(), + orchestrator::PullRequestReadbackRootCause::PullRequestShapeReadFailed + ); } fn classify_post_review_lane_with_github_token_env_var( diff --git a/apps/decodex/src/orchestrator/tests/review_landing/status_rows.rs b/apps/decodex/src/orchestrator/tests/review_landing/status_rows.rs index c5c73de6..f4467ca0 100644 --- a/apps/decodex/src/orchestrator/tests/review_landing/status_rows.rs +++ b/apps/decodex/src/orchestrator/tests/review_landing/status_rows.rs @@ -125,6 +125,10 @@ fn build_post_review_lane_statuses_preserves_handoff_marker_when_pr_readback_fai lanes[0].readback_warning.as_deref(), Some("pull_request_state_read_failed") ); + assert_eq!( + lanes[0].readback_root_cause.as_deref(), + Some("github_api_read_failed") + ); assert_eq!(lanes[0].pr_state, None); } @@ -923,6 +927,10 @@ fn build_post_review_lane_statuses_blocks_review_handoff_lineage_rewrite() { assert_eq!(lanes.len(), 1); assert_eq!(lanes[0].classification, "blocked"); assert_eq!(lanes[0].reason, "review_handoff_lineage_mismatch"); + assert_eq!( + lanes[0].readback_root_cause.as_deref(), + Some("lineage_validation_failed") + ); assert_eq!(lanes[0].pr_url.as_deref(), Some(pr_url)); } diff --git a/apps/decodex/src/orchestrator/types.rs b/apps/decodex/src/orchestrator/types.rs index c9d6178d..44a928d1 100644 --- a/apps/decodex/src/orchestrator/types.rs +++ b/apps/decodex/src/orchestrator/types.rs @@ -1,11 +1,196 @@ use crate::tracker; +type PullRequestReadbackResult = + std::result::Result; + trait PullRequestReviewStateInspector { fn inspect_review_state( &self, cwd: &Path, pr_url: &str, ) -> crate::prelude::Result; + + fn inspect_review_state_readback( + &self, + cwd: &Path, + pr_url: &str, + ) -> PullRequestReadbackResult { + self.inspect_review_state(cwd, pr_url) + .map_err(PullRequestReadbackFailure::from) + } +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub(crate) enum IssueDispatchMode { + Normal, + Retry, + ReviewRepair, + Closeout, +} +impl IssueDispatchMode { + fn as_str(self) -> &'static str { + match self { + Self::Normal => "normal", + Self::Retry => "retry", + Self::ReviewRepair => "review_repair", + Self::Closeout => "closeout", + } + } + + fn allows_issue( + self, + tracker: &dyn IssueTracker, + issue: &TrackerIssue, + project: &ServiceConfig, + workflow: &WorkflowDocument, + state_store: &StateStore, + hint: RetryIssueStateHint<'_>, + ) -> crate::prelude::Result { + match self { + Self::Normal => { + let queue_label = tracker::automation_queue_label(project.service_id()); + + issue_passes_dispatch_policy(tracker, issue, workflow, &queue_label, false) + }, + Self::Retry => issue_passes_retry_dispatch_policy( + tracker, + issue, + project, + workflow, + state_store, + hint, + ), + Self::ReviewRepair => { + Ok(issue_passes_review_repair_dispatch_policy(tracker, issue, project, workflow)? + && !issue_retry_budget_exhausted(workflow, state_store, &issue.id)?) + }, + Self::Closeout => { + issue_passes_closeout_dispatch_policy(tracker, issue, project, workflow, state_store) + }, + } + } +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub(crate) enum PostReviewLaneDecision { + Continue, + WaitForReview, + NeedsReviewRepair, + ReadyToLand, + CloseoutBlocked, + CleanupBlocked, + Block, +} +impl PostReviewLaneDecision { + fn as_str(self) -> &'static str { + match self { + Self::Continue => "continue", + Self::WaitForReview => "wait_for_review", + Self::NeedsReviewRepair => "needs_review_repair", + Self::ReadyToLand => "ready_to_land", + Self::CloseoutBlocked => "closeout_blocked", + Self::CleanupBlocked => "cleanup_blocked", + Self::Block => "blocked", + } + } +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub(crate) enum RetryKind { + Continuation, + Failure, +} + +pub(crate) enum RetryDispatchDecision { + Blocked { excluded_issue_ids: Vec }, + Dispatch(Box), + Continue, +} + +#[derive(Clone, Debug)] +pub(crate) enum ActiveRunDisposition { + RetainedReviewComplete, + Superseded { + newer_run_id: String, + newer_attempt_number: i64, + }, + Terminal, + NonActive, + Stalled { idle_for: Duration }, + StalledAlreadyNeedsAttention { idle_for: Duration }, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum PullRequestReadbackRootCause { + MissingGithubCli, + MissingGithubToken, + GithubAuthFailed, + GithubApiReadFailed, + GithubResponseParseFailed, + PullRequestShapeReadFailed, + LineageValidationFailed, + TrackerIssueReadbackFailed, +} +impl PullRequestReadbackRootCause { + fn as_str(self) -> &'static str { + match self { + Self::MissingGithubCli => "missing_github_cli", + Self::MissingGithubToken => "missing_github_token", + Self::GithubAuthFailed => "github_auth_failed", + Self::GithubApiReadFailed => "github_api_read_failed", + Self::GithubResponseParseFailed => "github_response_parse_failed", + Self::PullRequestShapeReadFailed => "pull_request_shape_read_failed", + Self::LineageValidationFailed => "lineage_validation_failed", + Self::TrackerIssueReadbackFailed => "tracker_issue_readback_failed", + } + } +} + +enum RetainedReviewLaneLoad { + Skip, + Ready(Box), + Blocked(Box), +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum ReviewOrchestrationPhase { + RequestPending, + WaitingForAck, + WaitingForResult, + RepairRequired, + PassWaitingForGates, + WaitingForMerge, +} +impl ReviewOrchestrationPhase { + fn as_str(self) -> &'static str { + match self { + Self::RequestPending => "request_pending", + Self::WaitingForAck => "waiting_for_ack", + Self::WaitingForResult => "waiting_for_result", + Self::RepairRequired => "repair_required", + Self::PassWaitingForGates => "pass_waiting_for_gates", + Self::WaitingForMerge => "waiting_for_merge", + } + } + + fn parse(value: &str) -> std::result::Result { + match value { + "request_pending" => Ok(Self::RequestPending), + "waiting_for_ack" => Ok(Self::WaitingForAck), + "waiting_for_result" => Ok(Self::WaitingForResult), + "repair_required" => Ok(Self::RepairRequired), + "pass_waiting_for_gates" => Ok(Self::PassWaitingForGates), + "waiting_for_merge" => Ok(Self::WaitingForMerge), + other => Err(format!( + "Unknown review orchestration phase `{other}` in retained review marker." + )), + } + } +} + +enum PostReviewLaneStateLoad { + Classification(PostReviewLaneClassification), + ReviewState(PullRequestReviewState), } /// One bounded run invocation and its optional daemon-planned overrides. @@ -104,6 +289,33 @@ pub(crate) struct RunSummary { continuation_pending: bool, } +#[derive(Debug)] +struct PullRequestReadbackFailure { + root_cause: PullRequestReadbackRootCause, + error: Report, +} +impl PullRequestReadbackFailure { + fn from_report(error: Report) -> Self { + let root_cause = classify_pull_request_readback_report(&error); + + Self { root_cause, error } + } + + fn into_report(self) -> Report { + self.error + } + + fn root_cause(&self) -> PullRequestReadbackRootCause { + self.root_cause + } +} + +impl From for PullRequestReadbackFailure { + fn from(error: Report) -> Self { + Self::from_report(error) + } +} + struct MaterializedDaemonSpawnState { worktree: WorktreeSpec, retry_budget_base: i64, @@ -1017,6 +1229,7 @@ struct OperatorPostReviewLaneStatus { check_state: Option, unresolved_review_threads: Option, readback_warning: Option, + readback_root_cause: Option, } #[derive(Clone, Debug, Eq, PartialEq)] @@ -1062,6 +1275,7 @@ struct PostReviewLaneClassification { check_state: Option, unresolved_review_threads: Option, readback_warning: Option, + readback_root_cause: Option, } struct RetainedReviewLaneBlocked { @@ -1096,6 +1310,15 @@ impl PullRequestReviewStateInspector for GhPullRequestReviewStateInspector { cwd: &Path, pr_url: &str, ) -> crate::prelude::Result { + self.inspect_review_state_readback(cwd, pr_url) + .map_err(PullRequestReadbackFailure::into_report) + } + + fn inspect_review_state_readback( + &self, + cwd: &Path, + pr_url: &str, + ) -> PullRequestReadbackResult { let github_token = resolve_configured_env_var( "github.token_env_var", self.github_token_env_var.as_deref(), @@ -1411,153 +1634,68 @@ struct PullRequestStatusCheckRollup { state: String, } -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub(crate) enum IssueDispatchMode { - Normal, - Retry, - ReviewRepair, - Closeout, -} -impl IssueDispatchMode { - fn as_str(self) -> &'static str { - match self { - Self::Normal => "normal", - Self::Retry => "retry", - Self::ReviewRepair => "review_repair", - Self::Closeout => "closeout", - } +fn classify_pull_request_readback_report(error: &Report) -> PullRequestReadbackRootCause { + if report_has_io_error_kind(error, ErrorKind::NotFound) { + return PullRequestReadbackRootCause::MissingGithubCli; } - - fn allows_issue( - self, - tracker: &dyn IssueTracker, - issue: &TrackerIssue, - project: &ServiceConfig, - workflow: &WorkflowDocument, - state_store: &StateStore, - hint: RetryIssueStateHint<'_>, - ) -> crate::prelude::Result { - match self { - Self::Normal => { - let queue_label = tracker::automation_queue_label(project.service_id()); - - issue_passes_dispatch_policy(tracker, issue, workflow, &queue_label, false) - }, - Self::Retry => issue_passes_retry_dispatch_policy( - tracker, - issue, - project, - workflow, - state_store, - hint, - ), - Self::ReviewRepair => { - Ok(issue_passes_review_repair_dispatch_policy(tracker, issue, project, workflow)? - && !issue_retry_budget_exhausted(workflow, state_store, &issue.id)?) - }, - Self::Closeout => issue_passes_closeout_dispatch_policy( - tracker, - issue, - project, - workflow, - state_store, - ), - } + if report_contains_any( + error, + &[ + "must be configured for this github-backed operation", + "failed to read environment variable", + "must not be blank", + ], + ) { + return PullRequestReadbackRootCause::MissingGithubToken; } -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub(crate) enum PostReviewLaneDecision { - Continue, - WaitForReview, - NeedsReviewRepair, - ReadyToLand, - CloseoutBlocked, - CleanupBlocked, - Block, -} -impl PostReviewLaneDecision { - fn as_str(self) -> &'static str { - match self { - Self::Continue => "continue", - Self::WaitForReview => "wait_for_review", - Self::NeedsReviewRepair => "needs_review_repair", - Self::ReadyToLand => "ready_to_land", - Self::CloseoutBlocked => "closeout_blocked", - Self::CleanupBlocked => "cleanup_blocked", - Self::Block => "blocked", - } + if report_chain_has_serde_json_error(error) { + return PullRequestReadbackRootCause::GithubResponseParseFailed; + } + if report_contains_any( + error, + &[ + "pull request url", + "did not include a repository", + "did not include a pull request", + "without an end cursor", + ], + ) { + return PullRequestReadbackRootCause::PullRequestShapeReadFailed; + } + if report_contains_any( + error, + &[ + "bad credentials", + "requires authentication", + "authentication required", + "not logged in", + "gh auth login", + "http 401", + "http 403", + ], + ) { + return PullRequestReadbackRootCause::GithubAuthFailed; } -} -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub(crate) enum RetryKind { - Continuation, - Failure, + PullRequestReadbackRootCause::GithubApiReadFailed } -pub(crate) enum RetryDispatchDecision { - Blocked { excluded_issue_ids: Vec }, - Dispatch(Box), - Continue, +fn report_has_io_error_kind(error: &Report, kind: ErrorKind) -> bool { + error.chain().any(|cause| { + cause + .downcast_ref::() + .is_some_and(|error| error.kind() == kind) + }) } -#[derive(Clone, Debug)] -pub(crate) enum ActiveRunDisposition { - RetainedReviewComplete, - Superseded { - newer_run_id: String, - newer_attempt_number: i64, - }, - Terminal, - NonActive, - Stalled { idle_for: Duration }, - StalledAlreadyNeedsAttention { idle_for: Duration }, +fn report_chain_has_serde_json_error(error: &Report) -> bool { + error.chain().any(|cause| cause.downcast_ref::().is_some()) } -enum RetainedReviewLaneLoad { - Skip, - Ready(Box), - Blocked(Box), -} +fn report_contains_any(error: &Report, needles: &[&str]) -> bool { + error.chain().any(|cause| { + let message = cause.to_string().to_ascii_lowercase(); -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -enum ReviewOrchestrationPhase { - RequestPending, - WaitingForAck, - WaitingForResult, - RepairRequired, - PassWaitingForGates, - WaitingForMerge, -} -impl ReviewOrchestrationPhase { - fn as_str(self) -> &'static str { - match self { - Self::RequestPending => "request_pending", - Self::WaitingForAck => "waiting_for_ack", - Self::WaitingForResult => "waiting_for_result", - Self::RepairRequired => "repair_required", - Self::PassWaitingForGates => "pass_waiting_for_gates", - Self::WaitingForMerge => "waiting_for_merge", - } - } - - fn parse(value: &str) -> std::result::Result { - match value { - "request_pending" => Ok(Self::RequestPending), - "waiting_for_ack" => Ok(Self::WaitingForAck), - "waiting_for_result" => Ok(Self::WaitingForResult), - "repair_required" => Ok(Self::RepairRequired), - "pass_waiting_for_gates" => Ok(Self::PassWaitingForGates), - "waiting_for_merge" => Ok(Self::WaitingForMerge), - other => Err(format!( - "Unknown review orchestration phase `{other}` in retained review marker." - )), - } - } -} - -enum PostReviewLaneStateLoad { - Classification(PostReviewLaneClassification), - ReviewState(PullRequestReviewState), + needles.iter().any(|needle| message.contains(needle)) + }) } diff --git a/docs/reference/operator-control-plane.md b/docs/reference/operator-control-plane.md index 555d4e5d..d65381e2 100644 --- a/docs/reference/operator-control-plane.md +++ b/docs/reference/operator-control-plane.md @@ -305,7 +305,13 @@ Worktree visibility follows the owning dashboard section: warning when the retained review handoff marker still exists. `decodex status` must keep the issue identifier, branch, marker PR URL, and marker PR head SHA visible so operators can retry status, inspect the PR directly, or run the explicit recovery - path without losing the bound PR identity. + path without losing the bound PR identity. Local status JSON, text output, and + dashboard readback also carry `readback_root_cause` when Decodex can classify the + local diagnostic safely, for example `missing_github_cli`, `missing_github_token`, + `github_auth_failed`, `github_api_read_failed`, `github_response_parse_failed`, + `pull_request_shape_read_failed`, or `lineage_validation_failed`. These diagnostic + tokens are operator-local and must not include tokens, raw API payloads, or private + command output. - `Intake Queue` means queued attention still owns the path, including partial retained progress after retries. - `linear_active_label_present` in `Intake Queue` means the issue still carries diff --git a/docs/spec/runtime.md b/docs/spec/runtime.md index 32430a78..2e7414d5 100644 --- a/docs/spec/runtime.md +++ b/docs/spec/runtime.md @@ -389,7 +389,7 @@ The runtime database stores at least: - typed connector health and external API backoff For child supervision, the active lane may also carry a short-lived worktree heartbeat marker at `.worktrees//.decodex-run-activity`. That marker is advisory, keyed to the current `run_id` plus `attempt_number`, and exists so the control plane can observe child activity across process boundaries, surface active thread and protocol progress in operator status, and keep high-frequency telemetry out of Linear. When the marker records process liveness, it must pair `process_id` with both host boot identity (`host_boot_id`) and process start identity (`process_start_identity`). A marker from a previous boot, a marker missing either identity, a marker whose process start identity no longer matches the live PID, a marker whose PID has exited into an unreaped zombie state, or a marker observed while Decodex cannot read the current host or process identity must not be treated as a live process even if that PID currently exists. Operator snapshots expose `process_liveness_reason` so operators can distinguish stopped processes, previous-boot markers, and same-boot PID reuse from genuine live execution. The marker may also carry an additive `child_agent_activity` JSON summary for the current attempt; that summary is diagnostic state for operator snapshots, not durable scheduling authority. Operator snapshots must keep queue ownership separate from execution liveness: `active_lease` and `queue_lease_state` describe the local queue lease, while `execution_liveness` describes the observed process, app-server thread, or protocol marker that keeps an active lane visible. If a raw attempt is still `starting` after app-server thread, model, or protocol activity is observed, operator-facing `status` must report `running` and preserve the raw value in `attempt_status`. High-frequency heartbeat, child-agent buckets, token counts, idle ages, and other transient liveness details stay local/operator-only under the boundary defined by [`linear-execution-ledger.md`](./linear-execution-ledger.md). -Post-review ownership is stored in the runtime database. Retained handoff rows record the authoritative PR URL, branch lineage, validated PR head OID, run id, and attempt number that completed the `In Review` handoff. Retained orchestration rows record the current post-review phase for that exact handoff identity. If the matching database row is missing, post-review ownership must block as unresolved instead of rebinding from branch-name, current-head, Linear comments, or other heuristics. If a retained review marker exists but a stored handoff or orchestration head no longer matches a clean retained worktree and matching PR head, operator status must keep the marker PR URL visible when known and recovery diagnosis must report the concrete mismatched field before any explicit rebind refresh. +Post-review ownership is stored in the runtime database. Retained handoff rows record the authoritative PR URL, branch lineage, validated PR head OID, run id, and attempt number that completed the `In Review` handoff. Retained orchestration rows record the current post-review phase for that exact handoff identity. If the matching database row is missing, post-review ownership must block as unresolved instead of rebinding from branch-name, current-head, Linear comments, or other heuristics. If a retained review marker exists but a stored handoff or orchestration head no longer matches a clean retained worktree and matching PR head, operator status must keep the marker PR URL visible when known and recovery diagnosis must report the concrete mismatched field before any explicit rebind refresh. When retained PR readback degrades but the handoff identity is still safe to preserve, operator-local status may expose a typed `readback_root_cause` diagnostic such as missing GitHub CLI, missing GitHub token, GitHub auth failure, API/read failure, parse/shape failure, or lineage validation failure while keeping public-safe warning reasons such as `pull_request_state_read_failed` stable. The only source-tree marker that clean-source checks may ignore is the untracked `.decodex-run-activity` heartbeat marker. Review handoff, orchestration, retry, phase timing, and retained PR state belong in the Decodex runtime database, not in root-level or worktree-local review marker files. ### Dispatch-slot handoff invariant