Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 21 additions & 11 deletions apps/decodex/src/manual.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use crate::{
prelude::{Result, eyre},
pull_request::{self, PullRequestLandingState},
runtime,
state::{RUN_ACTIVITY_MARKER_FILE, ReviewHandoffMarker, StateStore},
state::{self, ReviewHandoffMarker, StateStore},
tracker::{
self, IssueTracker, TrackerIssue,
linear::LinearClient,
Expand Down Expand Up @@ -848,15 +848,7 @@ fn ensure_clean_worktree(cwd: &Path) -> Result<()> {
fn is_landing_blocking_status_line(line: &str) -> bool {
let line = line.trim_end();

!line.is_empty() && !is_untracked_decodex_runtime_marker_status_line(line)
}

fn is_untracked_decodex_runtime_marker_status_line(line: &str) -> bool {
let Some(path) = line.strip_prefix("?? ") else {
return false;
};

path == RUN_ACTIVITY_MARKER_FILE
!line.is_empty() && !state::is_untracked_decodex_runtime_artifact_status_line(line)
}

fn validate_landing_state(
Expand Down Expand Up @@ -2304,8 +2296,14 @@ exit 1\n",

fs::write(checkout.join(state::RUN_ACTIVITY_MARKER_FILE), "agent_run\n")
.expect("activity marker should write");

let control_dir = checkout.join(state::RUN_CONTROL_CHANNEL_DIR);

fs::create_dir_all(&control_dir).expect("run-control directory should create");
fs::write(control_dir.join("run-1-1.channel"), "schema=decodex.run_control_channel/v1\n")
.expect("run-control channel should write");
manual::ensure_clean_worktree(&checkout)
.expect("untracked activity marker should not block landing");
.expect("untracked Decodex runtime artifacts should not block landing");
}

#[test]
Expand Down Expand Up @@ -2337,6 +2335,18 @@ exit 1\n",

assert_blocks(&checkout, "nested runtime marker should still block landing");
}
{
let temp_dir = TempDir::new().expect("temp dir should create");
let checkout = init_git_checkout(&temp_dir, "repo");
let nested_control_dir = checkout.join("nested").join(state::RUN_CONTROL_CHANNEL_DIR);

fs::create_dir_all(&nested_control_dir)
.expect("nested control directory should create");
fs::write(nested_control_dir.join("run-1-1.channel"), "channel\n")
.expect("nested control channel should write");

assert_blocks(&checkout, "nested run-control directory should still block landing");
}
{
let temp_dir = TempDir::new().expect("temp dir should create");
let checkout = init_git_checkout(&temp_dir, "repo");
Expand Down
30 changes: 23 additions & 7 deletions apps/decodex/src/recovery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ use crate::{
pull_request::PullRequestLandingState,
runtime,
state::{
ConnectorBackoffInput, RUN_ACTIVITY_MARKER_FILE, ReviewHandoffMarker,
ReviewOrchestrationMarker, StateStore, WorktreeMapping,
self, ConnectorBackoffInput, ReviewHandoffMarker, ReviewOrchestrationMarker, StateStore,
WorktreeMapping,
},
tracker::{
self, IssueTracker, TrackerIssue,
Expand Down Expand Up @@ -1316,15 +1316,11 @@ fn worktree_blocking_status_lines(worktree_path: &Path) -> Result<Vec<String>> {
Ok(status
.lines()
.filter(|line| !line.trim_end().is_empty())
.filter(|line| !is_untracked_runtime_marker(line))
.filter(|line| !state::is_untracked_decodex_runtime_artifact_status_line(line))
.map(ToOwned::to_owned)
.collect())
}

fn is_untracked_runtime_marker(line: &str) -> bool {
line.trim_end().strip_prefix("?? ") == Some(RUN_ACTIVITY_MARKER_FILE)
}

fn trimmed_stdout(stdout: &[u8]) -> Result<String> {
Ok(String::from_utf8(stdout.to_vec())?.trim().to_owned())
}
Expand Down Expand Up @@ -1449,6 +1445,26 @@ mod tests {
(temp_dir, first_head, rebased_head)
}

#[test]
fn worktree_blocking_status_lines_ignores_untracked_decodex_runtime_artifacts() {
let (temp_dir, _, _) = temp_git_worktree("x/pubfi-pub-718");
let repo = temp_dir.path();

fs::write(repo.join(crate::state::RUN_ACTIVITY_MARKER_FILE), "agent_run\n")
.expect("activity marker should write");

let control_dir = repo.join(crate::state::RUN_CONTROL_CHANNEL_DIR);

fs::create_dir_all(&control_dir).expect("run-control directory should create");
fs::write(control_dir.join("run-1-1.channel"), "channel\n")
.expect("run-control channel should write");

let blocking = super::worktree_blocking_status_lines(repo)
.expect("worktree status should be readable");

assert!(blocking.is_empty(), "runtime artifacts should not block rebind: {blocking:?}");
}

#[test]
fn diagnostic_treats_descendant_handoff_head_as_bound() {
let branch_name = "x/pubfi-pub-718";
Expand Down
10 changes: 10 additions & 0 deletions apps/decodex/src/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,14 @@ pub(crate) const RUN_CONTROL_ACTION_FALLBACK: &str = "fallback";
const DISPATCH_SLOT_LOCK_FILE_PREFIX: &str = ".decodex-dispatch-slot";
const ISSUE_CLAIM_LOCK_FILE_PREFIX: &str = ".decodex-issue-claim";

pub(crate) fn is_untracked_decodex_runtime_artifact_status_line(line: &str) -> bool {
let Some(path) = line.trim_end().strip_prefix("?? ") else {
return false;
};

path == RUN_ACTIVITY_MARKER_FILE
|| path == RUN_CONTROL_CHANNEL_DIR
|| path.strip_prefix(RUN_CONTROL_CHANNEL_DIR).is_some_and(|suffix| suffix.starts_with('/'))
}

#[cfg(test)] mod tests;
62 changes: 58 additions & 4 deletions apps/decodex/src/worktree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use libc::{ESRCH, F_GETFL, F_SETFL, O_NONBLOCK, SIGKILL};

use crate::{
prelude::{Result, eyre},
state::RUN_ACTIVITY_MARKER_FILE,
state::{self, RUN_ACTIVITY_MARKER_FILE},
workflow::WorkflowWorkspaceHooks,
};

Expand Down Expand Up @@ -670,11 +670,18 @@ fn worktree_cleanliness(worktree_path: &Path) -> Result<MergedWorktreeCleanlines
stderr.trim()
);
}
if String::from_utf8_lossy(&output.stdout).trim().is_empty() {
return Ok(MergedWorktreeCleanliness::Clean);

let status = String::from_utf8_lossy(&output.stdout);

if status
.lines()
.filter(|line| !line.trim_end().is_empty())
.any(|line| !state::is_untracked_decodex_runtime_artifact_status_line(line))
{
return Ok(MergedWorktreeCleanliness::Dirty);
}

Ok(MergedWorktreeCleanliness::Dirty)
Ok(MergedWorktreeCleanliness::Clean)
}

fn symbolic_ref(repo_root: &Path, ref_name: &str) -> Result<Option<String>> {
Expand Down Expand Up @@ -1392,6 +1399,53 @@ read_first = []
assert_eq!(debts[0].cleanliness, super::MergedWorktreeCleanliness::Dirty);
}

#[test]
fn merged_worktree_cleanup_debts_treats_decodex_runtime_artifacts_as_clean() {
let (_temp_dir, repo_root) = init_repo();
let worktree_root = repo_root.join(".worktrees");
let worktree_path = worktree_root.join("accounts-column-format");

fs::create_dir_all(&worktree_root).expect("worktree root should exist");

run_git(
&repo_root,
&[
"worktree",
"add",
"-b",
"xy/accounts-column-format",
worktree_path.to_str().expect("worktree path should be UTF-8"),
"main",
],
);

fs::write(worktree_path.join("README.md"), "feature work\n")
.expect("worktree file should write");

run_git(&worktree_path, &["add", "README.md"]);
run_git(&worktree_path, &["commit", "-m", "feature work"]);
run_git(
&repo_root,
&["merge", "--no-ff", "xy/accounts-column-format", "-m", "land feature"],
);

fs::write(worktree_path.join(crate::state::RUN_ACTIVITY_MARKER_FILE), "agent_run\n")
.expect("activity marker should write");

let control_dir = worktree_path.join(crate::state::RUN_CONTROL_CHANNEL_DIR);

fs::create_dir_all(&control_dir).expect("run-control directory should create");
fs::write(control_dir.join("run-1-1.channel"), "channel\n")
.expect("run-control channel should write");

let debts = super::merged_worktree_cleanup_debts(&repo_root, &worktree_root, "main")
.expect("cleanup debt scan should succeed");

assert_eq!(debts.len(), 1);
assert_eq!(debts[0].branch_name, "xy/accounts-column-format");
assert_eq!(debts[0].cleanliness, super::MergedWorktreeCleanliness::Clean);
}

#[test]
fn merged_worktree_cleanup_debts_ignores_dirty_worktree_started_from_old_default() {
let (_temp_dir, repo_root) = init_repo();
Expand Down
3 changes: 2 additions & 1 deletion docs/runbook/recover-review-handoff.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ The command rejects the rebind unless all of these are true:
- the issue does not have opt-out or needs-attention labels
- the issue still has `decodex:active:<service-id>` ownership
- the retained worktree branch matches the runtime DB worktree mapping
- the retained worktree has no local changes except `.decodex-run-activity`
- the retained worktree has no local source changes except top-level Decodex runtime
artifacts such as `.decodex-run-activity` and `.decodex-run-control/`
- the PR belongs to the configured GitHub repository
- the PR targets the configured default branch
- the PR is open and non-draft
Expand Down
2 changes: 1 addition & 1 deletion docs/spec/runtime.md
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ The runtime database stores at least:

For child supervision, the active lane may also carry a short-lived worktree heartbeat marker at `.worktrees/<ISSUE>/.decodex-run-activity`. That marker is advisory, keyed to the current `run_id` plus `attempt_number`, and exists so the control plane can observe child activity across process boundaries, surface active thread and protocol progress in operator status, and keep high-frequency telemetry out of Linear. When the marker records process liveness, it must pair `process_id` with both host boot identity (`host_boot_id`) and process start identity (`process_start_identity`). A marker from a previous boot, a marker missing either identity, a marker whose process start identity no longer matches the live PID, a marker whose PID has exited into an unreaped zombie state, or a marker observed while Decodex cannot read the current host or process identity must not be treated as a live process even if that PID currently exists. Operator snapshots expose `process_liveness_reason` so operators can distinguish stopped processes, previous-boot markers, and same-boot PID reuse from genuine live execution. The marker may also carry additive `child_agent_activity`, protocol, account, and legacy review-policy JSON or scalar fields for operator diagnostics. Legacy review-policy marker fields are breadcrumbs only: review-policy gating belongs to the runtime store and must not be overwritten from marker values. Operator snapshots must keep queue ownership separate from execution liveness: `active_lease` and `queue_lease_state` describe the local queue lease, while `execution_liveness` describes the observed process, app-server thread, or protocol marker that keeps an active lane visible. If a raw attempt is still `starting` after app-server thread, model, or protocol activity is observed, operator-facing `status` must report `running` and preserve the raw value in `attempt_status`. High-frequency heartbeat, child-agent buckets, token counts, idle ages, and other transient liveness details stay local/operator-only under the boundary defined by [`linear-execution-ledger.md`](./linear-execution-ledger.md).
Post-review ownership is stored in the runtime database. Retained handoff rows record the authoritative PR URL, branch lineage, validated PR head OID, run id, and attempt number that completed the `In Review` handoff. Retained orchestration rows record the current post-review phase for that exact handoff identity. If the matching database row is missing, post-review ownership must block as unresolved instead of rebinding from branch-name, current-head, Linear comments, or other heuristics. If a retained review marker exists but a stored handoff or orchestration head no longer matches a clean retained worktree and matching PR head, operator status must keep the marker PR URL visible when known and recovery diagnosis must report the concrete mismatched field before any explicit rebind refresh. When retained PR readback degrades but the handoff identity is still safe to preserve, operator-local status may expose a typed `readback_root_cause` diagnostic such as missing GitHub CLI, missing GitHub token, GitHub auth failure, API/read failure, parse/shape failure, or lineage validation failure while keeping public-safe warning reasons such as `pull_request_state_read_failed` stable.
The only source-tree marker that clean-source checks may ignore is the untracked `.decodex-run-activity` heartbeat marker. Durable review handoff, orchestration, review-policy checkpoints, retry, phase timing, and retained PR state belong in the Decodex runtime database, not in root-level or worktree-local review marker files. If the heartbeat marker carries similarly named fields for compatibility or operator diagnostics, those breadcrumb values cannot override runtime-store rows.
The only source-tree runtime artifacts that clean-source checks may ignore are the untracked top-level `.decodex-run-activity` heartbeat marker and `.decodex-run-control/` local control-channel directory. Durable review handoff, orchestration, review-policy checkpoints, retry, phase timing, and retained PR state belong in the Decodex runtime database, not in root-level or worktree-local review marker files. If the heartbeat marker carries similarly named fields for compatibility or operator diagnostics, those breadcrumb values cannot override runtime-store rows.

### Dispatch-slot handoff invariant

Expand Down