Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions python/cube.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,12 @@ model_aliases:
cli_tools:
sonnet-4.5-thinking: cursor-agent
sonnet-4.6-thinking: cursor-agent
claude-opus-4-7-thinking-max: cursor-agent
claude-opus-4-7-thinking-max: claude
gpt-5.3-codex-high: cursor-agent
gpt-5.5-high: cursor-agent
gpt-5.5-extra-high: cursor-agent
codex-gpt-5.5: codex
gpt-5.5-codex: codex
grok-4-20-thinking: cursor-agent
grok: cursor-agent
gemini-2.5-pro: gemini
Expand Down Expand Up @@ -128,9 +130,10 @@ judges:
color: "yellow"

judge_3:
model: "qwen"
label: "Judge Qwen"
model: "gpt"
label: "Judge Security"
color: "magenta"
persona: "security-pentest"

# Paths
paths:
Expand Down
47 changes: 38 additions & 9 deletions python/cube/automation/judge_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from ..core.git import branch_exists, fetch_branches, get_commit_hash, sync_worktree
from ..core.output import console, print_error, print_info, print_success
from ..core.parsers.registry import get_parser
from ..core.session import load_session, save_session
from ..core.session import load_session, load_session_metadata, save_session
from ..core.user_config import get_judge_configs, get_writer_by_key, get_writer_by_key_or_metadata, load_config
from ..models.types import JudgeInfo
from .stream import format_stream_message
Expand Down Expand Up @@ -101,13 +101,37 @@ def _get_cli_review_worktrees(task_id: str, winner: str = None) -> dict:
]


def _load_matching_judge_session(jconfig, task_id: str, review_type: str) -> str | None:
"""Load a judge session whenever one exists.
def _judge_session_metadata(jconfig, cli_name: str) -> str:
"""Build metadata used to avoid resuming sessions across CLI backends."""
return f"{jconfig.label} ({jconfig.model}) | cli={cli_name}"

Resume should be attempted by default. If a CLI rejects or ignores an old
session, the retry/new-session handling will update the saved session.
"""
return load_session(jconfig.key.upper(), f"{task_id}_{review_type}")

def _apply_judge_persona(prompt: str, judge_info: JudgeInfo) -> str:
"""Prepend judge-specific persona instructions when configured."""
if not judge_info.persona:
return prompt

return f"""# JUDGE PERSONA — {judge_info.label}

{judge_info.persona}

Apply this persona as your review lens, but do not ignore the shared review rules below.

---

{prompt}"""


def _load_matching_judge_session(jconfig, task_id: str, review_type: str) -> str | None:
"""Load a judge session only when it matches the current model and CLI backend."""
config = load_config()
cli_name = "cli-review" if jconfig.type == "cli-review" else config.cli_tools.get(jconfig.model, "cursor-agent")
session_task_key = f"{task_id}_{review_type}"
metadata = load_session_metadata(jconfig.key.upper(), session_task_key)
expected = _judge_session_metadata(jconfig, cli_name)
if metadata and metadata != expected:
return None
return load_session(jconfig.key.upper(), session_task_key)


async def _wait_for_valid_decision_file(decision_file: Path, timeout_seconds: float = 5.0) -> None:
Expand Down Expand Up @@ -163,6 +187,7 @@ async def run_judge(
session_id = judge_info.session_id if resume else None
run_dir = WORKTREE_BASE.parent if cli_name == "gemini" else PROJECT_ROOT
judge_specific_prompt = prompt.replace("{{judge_key}}", judge_info.key).replace("{judge_key}", judge_info.key)
judge_specific_prompt = _apply_judge_persona(judge_specific_prompt, judge_info)
decision_type = "peer-review" if judge_info.review_type == "peer-review" else "decision"

from ..core.decision_parser import get_decision_file_path, parse_single_decision_file
Expand Down Expand Up @@ -209,7 +234,10 @@ async def run_judge(
suffix=judge_info.review_type,
session_key=judge_info.key.upper(),
session_task_key=f"{judge_info.task_id}_{judge_info.review_type}",
metadata=f"{judge_info.label} ({judge_info.key}) - {judge_info.task_id} - {judge_info.review_type} - {datetime.now()}",
metadata=(
f"{judge_info.label} ({judge_info.key}) - {judge_info.task_id} - "
f"{judge_info.review_type} - {cli_name} - {datetime.now()}"
),
) as logger:
async for line in stream: # type: ignore[attr-defined]
logger.write_line(line)
Expand All @@ -224,7 +252,7 @@ async def run_judge(
judge_info.key.upper(),
f"{judge_info.task_id}_{judge_info.review_type}",
msg.session_id,
f"{judge_info.label} ({judge_info.model})",
_judge_session_metadata(judge_info, cli_name),
)

formatted = format_stream_message(msg, judge_info.label, judge_info.color)
Expand Down Expand Up @@ -599,6 +627,7 @@ async def launch_judge_panel(
label=jconfig.label,
task_id=task_id,
review_type=review_type,
persona=jconfig.persona,
session_id=session_id,
adapter_config={"type": jconfig.type, "cmd": jconfig.cmd, "name": jconfig.label}
if jconfig.type == "cli-review"
Expand Down
6 changes: 3 additions & 3 deletions python/cube/automation/single_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ async def run_single_writer(writer_info: WriterInfo, prompt: str, resume: bool)
parser = get_parser(cli_name)

layout = SingleAgentLayout
layout.initialize(writer_info.label)
layout.initialize(writer_info.label, task_name=writer_info.task_id)
layout.start()

session_id = writer_info.session_id if resume else None
Expand Down Expand Up @@ -158,7 +158,7 @@ async def launch_single_writer(
wconfig = get_writer_config(writer_key)

layout = SingleAgentLayout
layout.initialize(f"Writer: {wconfig.label}")
layout.initialize(f"Writer: {wconfig.label}", task_name=task_id)

from ..core.writer_metadata import WriterMetadata, save_writer_metadata

Expand Down Expand Up @@ -231,7 +231,7 @@ async def launch_single_writer(
current_prompt = interrupt.message
is_resuming = True
# Re-initialize layout for the resume
layout.initialize(f"Writer: {wconfig.label}")
layout.initialize(f"Writer: {wconfig.label}", task_name=task_id)
continue
except Exception as e:
print_error(f"Writer {writer_info.label} failed: {e}")
Expand Down
3 changes: 2 additions & 1 deletion python/cube/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,7 +504,8 @@ def pr(
console.print()

try:
asyncio.run(create_pr(resolved_task_id, winner))
state = load_state(resolved_task_id)
asyncio.run(create_pr(resolved_task_id, winner, single_mode=bool(state and state.mode == "single")))
except Exception as e:
_print_error(e)
sys.exit(1)
Expand Down
8 changes: 4 additions & 4 deletions python/cube/commands/orchestrate/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ async def synthesis_final_decision(ctx: WorkflowContext) -> PhaseResult:
return PhaseResult(exit=True)

if final_result["approved"] and not final_result["remaining_issues"]:
await create_pr(ctx.task_id, ctx.result["winner"])
await create_pr(ctx.task_id, ctx.result["winner"], single_mode=is_single_mode(ctx))
return PhaseResult(exit=True)

# If no issues to fix, proceed even if not fully approved (UNKNOWN judges etc)
Expand Down Expand Up @@ -284,7 +284,7 @@ async def synthesis_final_peer_review(ctx: WorkflowContext) -> PhaseResult:
phase9_data = ctx.result.get("phase_9_data", {})
if phase9_data.get("fixes_skipped"):
print_info("No fixes were made - skipping re-review")
await create_pr(ctx.task_id, ctx.result["winner"])
await create_pr(ctx.task_id, ctx.result["winner"], single_mode=is_single_mode(ctx))
return PhaseResult(exit=True)

# Resume judges by default (they have context), use --fresh-judges for fresh start
Expand All @@ -295,7 +295,7 @@ async def synthesis_final_peer_review(ctx: WorkflowContext) -> PhaseResult:

final_check = run_decide_peer_review(ctx.task_id)
if final_check["approved"] and not final_check["remaining_issues"]:
await create_pr(ctx.task_id, ctx.result["winner"])
await create_pr(ctx.task_id, ctx.result["winner"], single_mode=is_single_mode(ctx))
elif final_check["approved"]:
print_warning(f"Approved but still has {len(final_check['remaining_issues'])} issue(s) after minor fixes")
console.print()
Expand All @@ -304,7 +304,7 @@ async def synthesis_final_peer_review(ctx: WorkflowContext) -> PhaseResult:
console.print(f" • {_normalize_issue(issue)}")
console.print()
console.print("Creating PR anyway (all judges approved)...")
await create_pr(ctx.task_id, ctx.result["winner"])
await create_pr(ctx.task_id, ctx.result["winner"], single_mode=is_single_mode(ctx))
else:
MAX_MINOR_FIX_LOOPS = 3
if ctx.minor_fix_count == 0:
Expand Down
6 changes: 3 additions & 3 deletions python/cube/commands/orchestrate/phases.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ async def run_synthesis(task_id: str, result: dict, prompts_dir: Path, resume_pr
Save to: `.prompts/synthesis-{task_id}.md`"""

layout = SingleAgentLayout
layout.initialize("Prompter")
layout.initialize("Prompter", task_name=task_id)
layout.start()

try:
Expand Down Expand Up @@ -255,7 +255,7 @@ async def run_peer_review(
Include the worktree location and git commands for reviewing."""

layout = SingleAgentLayout
layout.initialize("Prompter")
layout.initialize("Prompter", task_name=task_id)
layout.start()

try:
Expand Down Expand Up @@ -344,7 +344,7 @@ async def run_minor_fixes(
from ...core.single_layout import SingleAgentLayout

layout = SingleAgentLayout
layout.initialize("Prompter")
layout.initialize("Prompter", task_name=task_id)
layout.start()

try:
Expand Down
8 changes: 4 additions & 4 deletions python/cube/commands/orchestrate/pr.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@

from ...core.config import PROJECT_ROOT
from ...core.output import console, print_success, print_warning
from ...core.user_config import get_writer_by_key_or_metadata


async def create_pr(task_id: str, winner: str):
async def create_pr(task_id: str, winner: str, single_mode: bool = False):
"""Create PR automatically."""
from ...core.user_config import get_writer_by_key_or_metadata

winner_cfg = get_writer_by_key_or_metadata(winner, task_id)
branch = f"writer-{winner_cfg.name}/{task_id}"
writer_line = f"Writer: {winner_cfg.label} ({winner_cfg.key})" if single_mode else f"Winner: {winner_cfg.label}"

console.print(f"[green]✅ Creating PR from: {branch}[/green]")
console.print()
Expand All @@ -29,7 +29,7 @@ async def create_pr(task_id: str, winner: str):
"--title",
f"feat: {task_id}",
"--body",
f"Autonomous implementation via Agent Cube\n\nWinner: Writer {winner}\nBranch: {branch}\n\nReview decisions in `.prompts/decisions/{task_id}-*.json`",
f"Autonomous implementation via Agent Cube\n\n{writer_line}\nBranch: {branch}\n\nReview decisions in `.prompts/decisions/{task_id}-*.json`",
],
cwd=PROJECT_ROOT,
capture_output=True,
Expand Down
6 changes: 3 additions & 3 deletions python/cube/commands/orchestrate/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ async def generate_writer_prompt(task_id: str, task_content: str, prompts_dir: P
### Last Step:
Commit and push when verification passes!"""

layout = SingleAgentLayout.initialize("Prompter")
layout = SingleAgentLayout.initialize("Prompter", task_name=task_id)
layout.start()

# Resume prompter session if exists, otherwise capture new session ID
Expand Down Expand Up @@ -125,7 +125,7 @@ async def generate_panel_prompt(task_id: str, prompts_dir: Path) -> Path:

Include evaluation criteria, scoring rubric, and decision JSON format."""

layout = SingleAgentLayout.initialize("Prompter")
layout = SingleAgentLayout.initialize("Prompter", task_name=task_id)
layout.start()

# Resume prompter session if exists, otherwise capture new session ID
Expand Down Expand Up @@ -263,7 +263,7 @@ def capture_session(sid: str) -> None:

if len(entries) == 1:
entry = entries[0]
layout = SingleAgentLayout.initialize(entry["label"])
layout = SingleAgentLayout.initialize(entry["label"], task_name=task_id)
layout.start()
try:
await run_agent_with_layout(
Expand Down
61 changes: 60 additions & 1 deletion python/cube/commands/pr_fix.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,43 @@ def _delete_reply_plan(path: Path) -> None:
print_warning(f"Could not delete reply plan: {path}")


def _is_non_fast_forward_push_error(stderr: str) -> bool:
"""Return True when git push failed because the remote branch moved."""
error = stderr.lower()
return "non-fast-forward" in error or "fetch first" in error or "tip of your current branch is behind" in error


def _recover_non_fast_forward_push(worktree: Path, branch_name: str) -> bool:
"""Rebase local fix commit(s) onto the latest remote branch before retrying push."""
if not branch_name or branch_name == "HEAD":
return False

fetch = subprocess.run(
["git", "fetch", "origin", branch_name],
cwd=worktree,
capture_output=True,
text=True,
timeout=60,
)
if fetch.returncode != 0:
print_warning(f"Fetch before push retry failed: {fetch.stderr.strip()}")
return False

rebase = subprocess.run(
["git", "rebase", f"origin/{branch_name}"],
cwd=worktree,
capture_output=True,
text=True,
timeout=120,
)
if rebase.returncode == 0:
return True

subprocess.run(["git", "rebase", "--abort"], cwd=worktree, capture_output=True, text=True, timeout=30)
print_warning(f"Rebase before push retry failed: {rebase.stderr.strip() or rebase.stdout.strip()}")
return False


def _reply_plan_entries(reply_plan: dict[str, Any]) -> dict[int, dict[str, Any]]:
"""Return reply plan entries keyed by 1-based comment index."""
entries: dict[int, dict[str, Any]] = {}
Expand Down Expand Up @@ -451,7 +488,7 @@ def _run_fix_agent(
head_before = result.stdout.strip() if result.returncode == 0 else None

layout = SingleAgentLayout
layout.initialize(wconfig.label)
layout.initialize(wconfig.label, task_name=f"PR #{pr_number}")
layout.start()

# Track session ID from stream
Expand Down Expand Up @@ -608,6 +645,28 @@ async def run_fix():
timeout=60,
)
if result.returncode != 0:
if _is_non_fast_forward_push_error(result.stderr):
print_warning("Push was rejected because the remote branch moved; rebasing local fix and retrying")
if _recover_non_fast_forward_push(worktree, branch_name):
result = subprocess.run(
["git", "push", "origin", push_ref],
cwd=worktree,
capture_output=True,
text=True,
timeout=60,
)
if result.returncode == 0:
refreshed_sha = subprocess.run(
["git", "rev-parse", "--short", "HEAD"],
cwd=worktree,
capture_output=True,
text=True,
timeout=10,
)
if refreshed_sha.returncode == 0 and refreshed_sha.stdout.strip():
commit_sha = refreshed_sha.stdout.strip()
return FixAgentResult(commit_sha=commit_sha or "", reply_plan=reply_plan)

print_error(f"Push failed: {result.stderr}")
return None

Expand Down
2 changes: 2 additions & 0 deletions python/cube/core/adapters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from .base import CLIAdapter, read_stream_with_buffer, run_subprocess_streaming
from .claude import ClaudeAdapter
from .cli_review import CLIReviewAdapter
from .codex import CodexAdapter
from .cursor import CursorAdapter
from .gemini import GeminiAdapter
from .generic_cli import GenericCLIAdapter
Expand All @@ -11,6 +12,7 @@
"CLIAdapter",
"ClaudeAdapter",
"CLIReviewAdapter",
"CodexAdapter",
"CursorAdapter",
"GeminiAdapter",
"GenericCLIAdapter",
Expand Down
Loading
Loading