From b7a79ee01c1b8d75072fed5457fbc02adf8d403d Mon Sep 17 00:00:00 2001
From: Jacob Ellis <jacob@aetheron.com>
Date: Thu, 30 Apr 2026 10:35:19 +0930
Subject: [PATCH] feat: add Codex adapter, judge personas, and Claude routing
 updates

---
 python/cube.yaml                             |   9 +-
 python/cube/automation/judge_panel.py        |  47 ++++-
 python/cube/automation/single_writer.py      |   6 +-
 python/cube/cli.py                           |   3 +-
 python/cube/commands/orchestrate/handlers.py |   8 +-
 python/cube/commands/orchestrate/phases.py   |   6 +-
 python/cube/commands/orchestrate/pr.py       |   8 +-
 python/cube/commands/orchestrate/prompts.py  |   6 +-
 python/cube/commands/pr_fix.py               |  61 ++++++-
 python/cube/core/adapters/__init__.py        |   2 +
 python/cube/core/adapters/claude.py          |  20 ++-
 python/cube/core/adapters/codex.py           |  80 +++++++++
 python/cube/core/adapters/registry.py        |   2 +
 python/cube/core/judge_personas.py           |  40 +++++
 python/cube/core/parsers/__init__.py         |   2 +
 python/cube/core/parsers/claude.py           |  21 ++-
 python/cube/core/parsers/codex.py            |  91 ++++++++++
 python/cube/core/parsers/registry.py         |   2 +
 python/cube/core/single_layout.py            |   9 +-
 python/cube/core/user_config.py              |   4 +
 python/cube/models/types.py                  |   1 +
 tests/cli/test_adapters.py                   | 179 ++++++++++++++++++-
 tests/cli/test_pr_fix.py                     |  39 ++++
 tests/core/test_judge_panel_retry.py         |  48 ++++-
 tests/core/test_orchestrate_pr.py            |  51 ++++++
 tests/core/test_single_layout.py             |  23 +++
 tests/core/test_user_config.py               |  30 +++-
 27 files changed, 747 insertions(+), 51 deletions(-)
 create mode 100644 python/cube/core/adapters/codex.py
 create mode 100644 python/cube/core/judge_personas.py
 create mode 100644 python/cube/core/parsers/codex.py
 create mode 100644 tests/core/test_orchestrate_pr.py
 create mode 100644 tests/core/test_single_layout.py

diff --git a/python/cube.yaml b/python/cube.yaml
index bd96dd23..1faf2657 100644
--- a/python/cube.yaml
+++ b/python/cube.yaml
@@ -24,10 +24,12 @@ model_aliases:
 cli_tools:
   sonnet-4.5-thinking: cursor-agent
   sonnet-4.6-thinking: cursor-agent
-  claude-opus-4-7-thinking-max: cursor-agent
+  claude-opus-4-7-thinking-max: claude
   gpt-5.3-codex-high: cursor-agent
   gpt-5.5-high: cursor-agent
   gpt-5.5-extra-high: cursor-agent
+  codex-gpt-5.5: codex
+  gpt-5.5-codex: codex
   grok-4-20-thinking: cursor-agent
   grok: cursor-agent
   gemini-2.5-pro: gemini
@@ -128,9 +130,10 @@ judges:
     color: "yellow"
 
   judge_3:
-    model: "qwen"
-    label: "Judge Qwen"
+    model: "gpt"
+    label: "Judge Security"
     color: "magenta"
+    persona: "security-pentest"
 
 # Paths
 paths:
diff --git a/python/cube/automation/judge_panel.py b/python/cube/automation/judge_panel.py
index 8747e028..c79a674e 100644
--- a/python/cube/automation/judge_panel.py
+++ b/python/cube/automation/judge_panel.py
@@ -11,7 +11,7 @@
 from ..core.git import branch_exists, fetch_branches, get_commit_hash, sync_worktree
 from ..core.output import console, print_error, print_info, print_success
 from ..core.parsers.registry import get_parser
-from ..core.session import load_session, save_session
+from ..core.session import load_session, load_session_metadata, save_session
 from ..core.user_config import get_judge_configs, get_writer_by_key, get_writer_by_key_or_metadata, load_config
 from ..models.types import JudgeInfo
 from .stream import format_stream_message
@@ -101,13 +101,37 @@ def _get_cli_review_worktrees(task_id: str, winner: str = None) -> dict:
 ]
 
 
-def _load_matching_judge_session(jconfig, task_id: str, review_type: str) -> str | None:
-    """Load a judge session whenever one exists.
+def _judge_session_metadata(jconfig, cli_name: str) -> str:
+    """Build metadata used to avoid resuming sessions across CLI backends."""
+    return f"{jconfig.label} ({jconfig.model}) | cli={cli_name}"
 
-    Resume should be attempted by default. If a CLI rejects or ignores an old
-    session, the retry/new-session handling will update the saved session.
-    """
-    return load_session(jconfig.key.upper(), f"{task_id}_{review_type}")
+
+def _apply_judge_persona(prompt: str, judge_info: JudgeInfo) -> str:
+    """Prepend judge-specific persona instructions when configured."""
+    if not judge_info.persona:
+        return prompt
+
+    return f"""# JUDGE PERSONA — {judge_info.label}
+
+{judge_info.persona}
+
+Apply this persona as your review lens, but do not ignore the shared review rules below.
+
+---
+
+{prompt}"""
+
+
+def _load_matching_judge_session(jconfig, task_id: str, review_type: str) -> str | None:
+    """Load a judge session only when it matches the current model and CLI backend."""
+    config = load_config()
+    cli_name = "cli-review" if jconfig.type == "cli-review" else config.cli_tools.get(jconfig.model, "cursor-agent")
+    session_task_key = f"{task_id}_{review_type}"
+    metadata = load_session_metadata(jconfig.key.upper(), session_task_key)
+    expected = _judge_session_metadata(jconfig, cli_name)
+    if metadata and metadata != expected:
+        return None
+    return load_session(jconfig.key.upper(), session_task_key)
 
 
 async def _wait_for_valid_decision_file(decision_file: Path, timeout_seconds: float = 5.0) -> None:
@@ -163,6 +187,7 @@ async def run_judge(
     session_id = judge_info.session_id if resume else None
     run_dir = WORKTREE_BASE.parent if cli_name == "gemini" else PROJECT_ROOT
     judge_specific_prompt = prompt.replace("{{judge_key}}", judge_info.key).replace("{judge_key}", judge_info.key)
+    judge_specific_prompt = _apply_judge_persona(judge_specific_prompt, judge_info)
     decision_type = "peer-review" if judge_info.review_type == "peer-review" else "decision"
 
     from ..core.decision_parser import get_decision_file_path, parse_single_decision_file
@@ -209,7 +234,10 @@ async def run_judge(
                 suffix=judge_info.review_type,
                 session_key=judge_info.key.upper(),
                 session_task_key=f"{judge_info.task_id}_{judge_info.review_type}",
-                metadata=f"{judge_info.label} ({judge_info.key}) - {judge_info.task_id} - {judge_info.review_type} - {datetime.now()}",
+                metadata=(
+                    f"{judge_info.label} ({judge_info.key}) - {judge_info.task_id} - "
+                    f"{judge_info.review_type} - {cli_name} - {datetime.now()}"
+                ),
             ) as logger:
                 async for line in stream:  # type: ignore[attr-defined]
                     logger.write_line(line)
@@ -224,7 +252,7 @@ async def run_judge(
                                 judge_info.key.upper(),
                                 f"{judge_info.task_id}_{judge_info.review_type}",
                                 msg.session_id,
-                                f"{judge_info.label} ({judge_info.model})",
+                                _judge_session_metadata(judge_info, cli_name),
                             )
 
                         formatted = format_stream_message(msg, judge_info.label, judge_info.color)
@@ -599,6 +627,7 @@ async def launch_judge_panel(
                 label=jconfig.label,
                 task_id=task_id,
                 review_type=review_type,
+                persona=jconfig.persona,
                 session_id=session_id,
                 adapter_config={"type": jconfig.type, "cmd": jconfig.cmd, "name": jconfig.label}
                 if jconfig.type == "cli-review"
diff --git a/python/cube/automation/single_writer.py b/python/cube/automation/single_writer.py
index 0e9e53d3..880dd6b1 100644
--- a/python/cube/automation/single_writer.py
+++ b/python/cube/automation/single_writer.py
@@ -37,7 +37,7 @@ async def run_single_writer(writer_info: WriterInfo, prompt: str, resume: bool)
     parser = get_parser(cli_name)
 
     layout = SingleAgentLayout
-    layout.initialize(writer_info.label)
+    layout.initialize(writer_info.label, task_name=writer_info.task_id)
     layout.start()
 
     session_id = writer_info.session_id if resume else None
@@ -158,7 +158,7 @@ async def launch_single_writer(
     wconfig = get_writer_config(writer_key)
 
     layout = SingleAgentLayout
-    layout.initialize(f"Writer: {wconfig.label}")
+    layout.initialize(f"Writer: {wconfig.label}", task_name=task_id)
 
     from ..core.writer_metadata import WriterMetadata, save_writer_metadata
 
@@ -231,7 +231,7 @@ async def launch_single_writer(
             current_prompt = interrupt.message
             is_resuming = True
             # Re-initialize layout for the resume
-            layout.initialize(f"Writer: {wconfig.label}")
+            layout.initialize(f"Writer: {wconfig.label}", task_name=task_id)
             continue
         except Exception as e:
             print_error(f"Writer {writer_info.label} failed: {e}")
diff --git a/python/cube/cli.py b/python/cube/cli.py
index 99562775..f1a3626c 100644
--- a/python/cube/cli.py
+++ b/python/cube/cli.py
@@ -504,7 +504,8 @@ def pr(
     console.print()
 
     try:
-        asyncio.run(create_pr(resolved_task_id, winner))
+        state = load_state(resolved_task_id)
+        asyncio.run(create_pr(resolved_task_id, winner, single_mode=bool(state and state.mode == "single")))
     except Exception as e:
         _print_error(e)
         sys.exit(1)
diff --git a/python/cube/commands/orchestrate/handlers.py b/python/cube/commands/orchestrate/handlers.py
index 01fcd612..a44802e3 100644
--- a/python/cube/commands/orchestrate/handlers.py
+++ b/python/cube/commands/orchestrate/handlers.py
@@ -206,7 +206,7 @@ async def synthesis_final_decision(ctx: WorkflowContext) -> PhaseResult:
         return PhaseResult(exit=True)
 
     if final_result["approved"] and not final_result["remaining_issues"]:
-        await create_pr(ctx.task_id, ctx.result["winner"])
+        await create_pr(ctx.task_id, ctx.result["winner"], single_mode=is_single_mode(ctx))
         return PhaseResult(exit=True)
 
     # If no issues to fix, proceed even if not fully approved (UNKNOWN judges etc)
@@ -284,7 +284,7 @@ async def synthesis_final_peer_review(ctx: WorkflowContext) -> PhaseResult:
     phase9_data = ctx.result.get("phase_9_data", {})
     if phase9_data.get("fixes_skipped"):
         print_info("No fixes were made - skipping re-review")
-        await create_pr(ctx.task_id, ctx.result["winner"])
+        await create_pr(ctx.task_id, ctx.result["winner"], single_mode=is_single_mode(ctx))
         return PhaseResult(exit=True)
 
     # Resume judges by default (they have context), use --fresh-judges for fresh start
@@ -295,7 +295,7 @@ async def synthesis_final_peer_review(ctx: WorkflowContext) -> PhaseResult:
 
     final_check = run_decide_peer_review(ctx.task_id)
     if final_check["approved"] and not final_check["remaining_issues"]:
-        await create_pr(ctx.task_id, ctx.result["winner"])
+        await create_pr(ctx.task_id, ctx.result["winner"], single_mode=is_single_mode(ctx))
     elif final_check["approved"]:
         print_warning(f"Approved but still has {len(final_check['remaining_issues'])} issue(s) after minor fixes")
         console.print()
@@ -304,7 +304,7 @@ async def synthesis_final_peer_review(ctx: WorkflowContext) -> PhaseResult:
             console.print(f"  • {_normalize_issue(issue)}")
         console.print()
         console.print("Creating PR anyway (all judges approved)...")
-        await create_pr(ctx.task_id, ctx.result["winner"])
+        await create_pr(ctx.task_id, ctx.result["winner"], single_mode=is_single_mode(ctx))
     else:
         MAX_MINOR_FIX_LOOPS = 3
         if ctx.minor_fix_count == 0:
diff --git a/python/cube/commands/orchestrate/phases.py b/python/cube/commands/orchestrate/phases.py
index 71ffd6a5..1801b4e1 100644
--- a/python/cube/commands/orchestrate/phases.py
+++ b/python/cube/commands/orchestrate/phases.py
@@ -184,7 +184,7 @@ async def run_synthesis(task_id: str, result: dict, prompts_dir: Path, resume_pr
 Save to: `.prompts/synthesis-{task_id}.md`"""
 
         layout = SingleAgentLayout
-        layout.initialize("Prompter")
+        layout.initialize("Prompter", task_name=task_id)
         layout.start()
 
         try:
@@ -255,7 +255,7 @@ async def run_peer_review(
 Include the worktree location and git commands for reviewing."""
 
         layout = SingleAgentLayout
-        layout.initialize("Prompter")
+        layout.initialize("Prompter", task_name=task_id)
         layout.start()
 
         try:
@@ -344,7 +344,7 @@ async def run_minor_fixes(
     from ...core.single_layout import SingleAgentLayout
 
     layout = SingleAgentLayout
-    layout.initialize("Prompter")
+    layout.initialize("Prompter", task_name=task_id)
     layout.start()
 
     try:
diff --git a/python/cube/commands/orchestrate/pr.py b/python/cube/commands/orchestrate/pr.py
index 663b1af2..50e3fa8b 100644
--- a/python/cube/commands/orchestrate/pr.py
+++ b/python/cube/commands/orchestrate/pr.py
@@ -4,14 +4,14 @@
 
 from ...core.config import PROJECT_ROOT
 from ...core.output import console, print_success, print_warning
+from ...core.user_config import get_writer_by_key_or_metadata
 
 
-async def create_pr(task_id: str, winner: str):
+async def create_pr(task_id: str, winner: str, single_mode: bool = False):
     """Create PR automatically."""
-    from ...core.user_config import get_writer_by_key_or_metadata
-
     winner_cfg = get_writer_by_key_or_metadata(winner, task_id)
     branch = f"writer-{winner_cfg.name}/{task_id}"
+    writer_line = f"Writer: {winner_cfg.label} ({winner_cfg.key})" if single_mode else f"Winner: {winner_cfg.label}"
 
     console.print(f"[green]✅ Creating PR from: {branch}[/green]")
     console.print()
@@ -29,7 +29,7 @@ async def create_pr(task_id: str, winner: str):
                 "--title",
                 f"feat: {task_id}",
                 "--body",
-                f"Autonomous implementation via Agent Cube\n\nWinner: Writer {winner}\nBranch: {branch}\n\nReview decisions in `.prompts/decisions/{task_id}-*.json`",
+                f"Autonomous implementation via Agent Cube\n\n{writer_line}\nBranch: {branch}\n\nReview decisions in `.prompts/decisions/{task_id}-*.json`",
             ],
             cwd=PROJECT_ROOT,
             capture_output=True,
diff --git a/python/cube/commands/orchestrate/prompts.py b/python/cube/commands/orchestrate/prompts.py
index 4444fa67..2c710ffb 100644
--- a/python/cube/commands/orchestrate/prompts.py
+++ b/python/cube/commands/orchestrate/prompts.py
@@ -71,7 +71,7 @@ async def generate_writer_prompt(task_id: str, task_content: str, prompts_dir: P
 ### Last Step:
 Commit and push when verification passes!"""
 
-    layout = SingleAgentLayout.initialize("Prompter")
+    layout = SingleAgentLayout.initialize("Prompter", task_name=task_id)
     layout.start()
 
     # Resume prompter session if exists, otherwise capture new session ID
@@ -125,7 +125,7 @@ async def generate_panel_prompt(task_id: str, prompts_dir: Path) -> Path:
 
 Include evaluation criteria, scoring rubric, and decision JSON format."""
 
-    layout = SingleAgentLayout.initialize("Prompter")
+    layout = SingleAgentLayout.initialize("Prompter", task_name=task_id)
     layout.start()
 
     # Resume prompter session if exists, otherwise capture new session ID
@@ -263,7 +263,7 @@ def capture_session(sid: str) -> None:
 
     if len(entries) == 1:
         entry = entries[0]
-        layout = SingleAgentLayout.initialize(entry["label"])
+        layout = SingleAgentLayout.initialize(entry["label"], task_name=task_id)
         layout.start()
         try:
             await run_agent_with_layout(
diff --git a/python/cube/commands/pr_fix.py b/python/cube/commands/pr_fix.py
index 0b0df1b1..1dd1c478 100644
--- a/python/cube/commands/pr_fix.py
+++ b/python/cube/commands/pr_fix.py
@@ -259,6 +259,43 @@ def _delete_reply_plan(path: Path) -> None:
         print_warning(f"Could not delete reply plan: {path}")
 
 
+def _is_non_fast_forward_push_error(stderr: str) -> bool:
+    """Return True when git push failed because the remote branch moved."""
+    error = stderr.lower()
+    return "non-fast-forward" in error or "fetch first" in error or "tip of your current branch is behind" in error
+
+
+def _recover_non_fast_forward_push(worktree: Path, branch_name: str) -> bool:
+    """Rebase local fix commit(s) onto the latest remote branch before retrying push."""
+    if not branch_name or branch_name == "HEAD":
+        return False
+
+    fetch = subprocess.run(
+        ["git", "fetch", "origin", branch_name],
+        cwd=worktree,
+        capture_output=True,
+        text=True,
+        timeout=60,
+    )
+    if fetch.returncode != 0:
+        print_warning(f"Fetch before push retry failed: {fetch.stderr.strip()}")
+        return False
+
+    rebase = subprocess.run(
+        ["git", "rebase", f"origin/{branch_name}"],
+        cwd=worktree,
+        capture_output=True,
+        text=True,
+        timeout=120,
+    )
+    if rebase.returncode == 0:
+        return True
+
+    subprocess.run(["git", "rebase", "--abort"], cwd=worktree, capture_output=True, text=True, timeout=30)
+    print_warning(f"Rebase before push retry failed: {rebase.stderr.strip() or rebase.stdout.strip()}")
+    return False
+
+
 def _reply_plan_entries(reply_plan: dict[str, Any]) -> dict[int, dict[str, Any]]:
     """Return reply plan entries keyed by 1-based comment index."""
     entries: dict[int, dict[str, Any]] = {}
@@ -451,7 +488,7 @@ def _run_fix_agent(
     head_before = result.stdout.strip() if result.returncode == 0 else None
 
     layout = SingleAgentLayout
-    layout.initialize(wconfig.label)
+    layout.initialize(wconfig.label, task_name=f"PR #{pr_number}")
     layout.start()
 
     # Track session ID from stream
@@ -608,6 +645,28 @@ async def run_fix():
         timeout=60,
     )
     if result.returncode != 0:
+        if _is_non_fast_forward_push_error(result.stderr):
+            print_warning("Push was rejected because the remote branch moved; rebasing local fix and retrying")
+            if _recover_non_fast_forward_push(worktree, branch_name):
+                result = subprocess.run(
+                    ["git", "push", "origin", push_ref],
+                    cwd=worktree,
+                    capture_output=True,
+                    text=True,
+                    timeout=60,
+                )
+                if result.returncode == 0:
+                    refreshed_sha = subprocess.run(
+                        ["git", "rev-parse", "--short", "HEAD"],
+                        cwd=worktree,
+                        capture_output=True,
+                        text=True,
+                        timeout=10,
+                    )
+                    if refreshed_sha.returncode == 0 and refreshed_sha.stdout.strip():
+                        commit_sha = refreshed_sha.stdout.strip()
+                    return FixAgentResult(commit_sha=commit_sha or "", reply_plan=reply_plan)
+
         print_error(f"Push failed: {result.stderr}")
         return None
 
diff --git a/python/cube/core/adapters/__init__.py b/python/cube/core/adapters/__init__.py
index 9de5501f..0c635646 100644
--- a/python/cube/core/adapters/__init__.py
+++ b/python/cube/core/adapters/__init__.py
@@ -3,6 +3,7 @@
 from .base import CLIAdapter, read_stream_with_buffer, run_subprocess_streaming
 from .claude import ClaudeAdapter
 from .cli_review import CLIReviewAdapter
+from .codex import CodexAdapter
 from .cursor import CursorAdapter
 from .gemini import GeminiAdapter
 from .generic_cli import GenericCLIAdapter
@@ -11,6 +12,7 @@
     "CLIAdapter",
     "ClaudeAdapter",
     "CLIReviewAdapter",
+    "CodexAdapter",
     "CursorAdapter",
     "GeminiAdapter",
     "GenericCLIAdapter",
diff --git a/python/cube/core/adapters/claude.py b/python/cube/core/adapters/claude.py
index ab3d63c2..1063455b 100644
--- a/python/cube/core/adapters/claude.py
+++ b/python/cube/core/adapters/claude.py
@@ -7,13 +7,15 @@
 
 from .base import CLIAdapter, run_subprocess_streaming
 
-_MODEL_ALIASES = {
-    "claude-opus": "opus",
-    "claude-sonnet": "sonnet",
-    "claude-code-opus": "opus",
-    "claude-code-sonnet": "sonnet",
-    "claude-4-opus": "opus",
-    "claude-4-sonnet": "sonnet",
+_MODEL_ALIASES: dict[str, tuple[str, str | None]] = {
+    "claude-opus": ("opus", "max"),
+    "claude-code-opus": ("opus", "max"),
+    "claude-code-opus-max": ("opus", "max"),
+    "claude-opus-4-7-thinking-max": ("opus", "max"),
+    "claude-4-opus": ("opus", "max"),
+    "claude-sonnet": ("sonnet", None),
+    "claude-code-sonnet": ("sonnet", None),
+    "claude-4-sonnet": ("sonnet", None),
 }
 
 
@@ -36,7 +38,7 @@ async def run(
 
         env = os.environ.copy()
 
-        cli_model = _MODEL_ALIASES.get(model, model)
+        cli_model, effort = _MODEL_ALIASES.get(model, (model, None))
 
         # Build command
         cmd = [
@@ -51,6 +53,8 @@ async def run(
             "--model",
             cli_model,
         ]
+        if effort:
+            cmd.extend(["--effort", effort])
 
         if resume and session_id:
             cmd.extend(["--resume", session_id])
diff --git a/python/cube/core/adapters/codex.py b/python/cube/core/adapters/codex.py
new file mode 100644
index 00000000..10cdda4a
--- /dev/null
+++ b/python/cube/core/adapters/codex.py
@@ -0,0 +1,80 @@
+"""OpenAI Codex CLI adapter."""
+
+import os
+import shutil
+from pathlib import Path
+from typing import AsyncGenerator, Optional
+
+from .base import CLIAdapter, run_subprocess_streaming
+
+
+class CodexAdapter(CLIAdapter):
+    """Adapter for OpenAI Codex CLI."""
+
+    async def run(
+        self, worktree: Path, model: str, prompt: str, session_id: Optional[str] = None, resume: bool = False
+    ) -> AsyncGenerator[str, None]:
+        """Run codex exec in non-interactive JSON mode."""
+        if not self.check_installed():
+            raise RuntimeError("codex is not installed. " + self.get_install_instructions())
+
+        env = os.environ.copy()
+        env["PATH"] = f"{Path.home() / '.local' / 'bin'}:{env.get('PATH', '')}"
+
+        cmd = [
+            "codex",
+            "exec",
+            "--json",
+            "--model",
+            model,
+            "--full-auto",
+            "--sandbox",
+            "workspace-write",
+            "--cd",
+            str(worktree),
+        ]
+
+        if resume and session_id:
+            cmd.extend(["resume", session_id, prompt])
+        else:
+            cmd.append(prompt)
+
+        last_error = None
+        line_count = 0
+
+        from ..master_log import get_master_log
+
+        try:
+            async for line in run_subprocess_streaming(cmd, worktree, "codex", env, stdin_data=""):
+                line_count += 1
+
+                master_log = get_master_log()
+                if master_log:
+                    master_log.write_raw_line(f"codex-{model}", line)
+
+                if line.startswith('{"type":"error"') or line.startswith("Error:"):
+                    last_error = line[:200]
+
+                yield line
+
+        except RuntimeError:
+            if last_error:
+                raise RuntimeError(f"codex failed: {last_error}")
+            raise
+
+        if line_count == 0:
+            raise RuntimeError("codex produced no output (is it authenticated?)")
+
+    def check_installed(self) -> bool:
+        """Check if codex CLI is installed."""
+        return shutil.which("codex") is not None
+
+    def get_install_instructions(self) -> str:
+        """Get installation instructions."""
+        return """Install OpenAI Codex CLI:
+  npm install -g @openai/codex
+
+After installation, authenticate with:
+  codex login
+
+For CI/headless use, set CODEX_API_KEY."""
diff --git a/python/cube/core/adapters/registry.py b/python/cube/core/adapters/registry.py
index ae59e37f..1b39a722 100644
--- a/python/cube/core/adapters/registry.py
+++ b/python/cube/core/adapters/registry.py
@@ -5,6 +5,7 @@
 from .base import CLIAdapter
 from .claude import ClaudeAdapter
 from .cli_review import CLIReviewAdapter
+from .codex import CodexAdapter
 from .cursor import CursorAdapter
 from .gemini import GeminiAdapter
 from .generic_cli import GenericCLIAdapter
@@ -12,6 +13,7 @@
 _ADAPTERS: Dict[str, Type[CLIAdapter]] = {
     "cursor-agent": CursorAdapter,
     "claude": ClaudeAdapter,
+    "codex": CodexAdapter,
     "gemini": GeminiAdapter,
     "cli-review": CLIReviewAdapter,
 }
diff --git a/python/cube/core/judge_personas.py b/python/cube/core/judge_personas.py
new file mode 100644
index 00000000..57fcf45d
--- /dev/null
+++ b/python/cube/core/judge_personas.py
@@ -0,0 +1,40 @@
+"""Built-in judge personas."""
+
+BUILTIN_JUDGE_PERSONAS: dict[str, str] = {
+    "security-pentest": """You are the security and abuse-case reviewer for this panel.
+
+Your job is to find realistic abuse paths, not generic code-quality nits. Think like a pragmatic penetration tester reviewing a production multi-tenant app:
+
+Primary focus areas:
+- Tenant isolation: `org_id`, brand, account, workspace, project, or customer scope must come from trusted server-side auth/context, not request body/query/client headers.
+- Authorization: verify role/permission checks are made at the operation boundary and cannot be bypassed through alternate routes, background jobs, webhooks, tool calls, or admin/helper APIs.
+- RLS/data boundaries: check composite keys, scoped DB helpers, cross-tenant reads/writes/deletes, system/shared rows, and any migration that changes privileges.
+- Secret handling: API keys, tokens, OAuth refresh tokens, JWTs, provider credentials, signed URLs, and webhook secrets must not leak to logs, browser-visible data, redirects, error messages, or analytics.
+- Injection and SSRF: inspect URL fetches, webhooks, provider callbacks, tool/integration inputs, SQL fragments, shell commands, HTML/Markdown rendering, and file/path handling.
+- Replay and idempotency: webhooks, payment/session flows, async jobs, queues, imports, and retries must be safe against duplicate delivery, stale messages, and out-of-order events.
+- Trust boundaries: treat LLM/tool arguments, third-party callbacks, browser state, and queue metadata as attacker-controlled unless proven otherwise.
+- Failure defaults: missing config, missing claims, unavailable verification, unknown provider state, and partial writes should fail closed with useful logging, not silently continue.
+- Concurrency and durability: flag module-level maps/caches/timers for anything security-, financial-, credential-, idempotency-, or cross-request-sensitive.
+
+Severity bar:
+- Request changes only for issues that create plausible exploitation, cross-tenant leakage, privilege escalation, credential exposure, data corruption, unsafe deletion, or a security-relevant correctness failure.
+- Do not block on theoretical CWE matching, style preferences, or "defense in depth" rewrites unless the current code has a concrete abuse path.
+- If another judge already raised the same generic issue, only repeat it when you can add the security impact, exploit path, or safer minimal fix.
+
+Fix style:
+- Prefer small, direct, fail-closed fixes.
+- Preserve KISS/minimalism; do not recommend broad security architecture when a narrow guard, scoped query, validation rule, or regression test solves the actual risk.
+- Include the attacker-controlled input, the trust boundary crossed, the affected asset, and the minimal fix in each security finding.""",
+}
+
+
+def resolve_judge_persona(persona: str | None) -> str | None:
+    """Resolve a built-in persona name or return inline persona text."""
+    if not persona:
+        return None
+
+    key = persona.strip()
+    if key.startswith("builtin:"):
+        key = key.split(":", 1)[1].strip()
+
+    return BUILTIN_JUDGE_PERSONAS.get(key, persona)
diff --git a/python/cube/core/parsers/__init__.py b/python/cube/core/parsers/__init__.py
index 68db6668..844fcbc5 100644
--- a/python/cube/core/parsers/__init__.py
+++ b/python/cube/core/parsers/__init__.py
@@ -3,6 +3,7 @@
 from .base import ParserAdapter
 from .claude import ClaudeParser
 from .cli_review import CLIReviewParser
+from .codex import CodexParser
 from .cursor import CursorParser
 from .gemini import GeminiParser
 from .kimi import KimiParser
@@ -11,6 +12,7 @@
 __all__ = [
     "CLIReviewParser",
     "ClaudeParser",
+    "CodexParser",
     "CursorParser",
     "GeminiParser",
     "KimiParser",
diff --git a/python/cube/core/parsers/claude.py b/python/cube/core/parsers/claude.py
index dad69570..843e9664 100644
--- a/python/cube/core/parsers/claude.py
+++ b/python/cube/core/parsers/claude.py
@@ -35,6 +35,13 @@ def parse(self, line: str) -> Optional[StreamMessage]:
                 msg.model = data.get("model", "claude")
                 return msg
 
+            if msg.type == "thinking":
+                text = data.get("text") or data.get("thinking")
+                if text:
+                    msg.content = text
+                    return msg
+                return None
+
             # Handle stream_event - extract content deltas
             if msg.type == "stream_event":
                 event = data.get("event", {})
@@ -118,9 +125,12 @@ def parse(self, line: str) -> Optional[StreamMessage]:
                                 return msg
                         # Thinking block
                         elif content_block.get("type") == "thinking":
-                            msg.type = "thinking"
-                            msg.content = content_block.get("thinking", "")
-                            return msg
+                            thinking = content_block.get("thinking", "")
+                            if thinking:
+                                msg.type = "thinking"
+                                msg.content = thinking
+                                return msg
+                            return None
                         # Tool use block
                         elif content_block.get("type") == "tool_use":
                             msg.type = "tool_call"
@@ -175,6 +185,11 @@ def parse(self, line: str) -> Optional[StreamMessage]:
                     msg.content = data.get("result", "")[:200]
                 return msg
 
+            # Telemetry/status events from Claude Code are useful for raw logs,
+            # but noisy in the live judge UI.
+            if msg.type in {"rate_limit_event"}:
+                return None
+
             # Unknown type - return as unknown for logging
             if msg.type not in ("system", "user", "assistant", "result"):
                 msg.type = "unknown"
diff --git a/python/cube/core/parsers/codex.py b/python/cube/core/parsers/codex.py
new file mode 100644
index 00000000..529f22c2
--- /dev/null
+++ b/python/cube/core/parsers/codex.py
@@ -0,0 +1,91 @@
+"""OpenAI Codex CLI JSONL parser."""
+
+import json
+from typing import Any, Optional
+
+from ...models.types import StreamMessage
+from .base import ParserAdapter
+
+
+class CodexParser(ParserAdapter):
+    """Parser for `codex exec --json` events."""
+
+    def parse(self, line: str) -> Optional[StreamMessage]:
+        """Parse a Codex JSONL event."""
+        clean = line.strip()
+        if not clean:
+            return None
+
+        try:
+            data = json.loads(clean)
+        except json.JSONDecodeError:
+            return StreamMessage(type="unknown", content=clean[:500])
+
+        if not isinstance(data, dict):
+            return StreamMessage(type="unknown", content=clean[:500])
+
+        event_type = data.get("type")
+
+        if event_type == "thread.started":
+            return StreamMessage(type="system", subtype="init", session_id=data.get("thread_id"))
+
+        if event_type == "turn.started":
+            return StreamMessage(type="system", subtype="turn_started")
+
+        if event_type == "turn.completed":
+            return StreamMessage(type="result", duration_ms=data.get("duration_ms", 0))
+
+        if event_type == "turn.failed":
+            return StreamMessage(type="error", content=str(data.get("error") or "Codex turn failed")[:200])
+
+        if event_type == "error":
+            return StreamMessage(type="error", content=str(data.get("message") or data.get("error") or data)[:200])
+
+        if isinstance(event_type, str) and event_type.startswith("item."):
+            return _parse_item_event(event_type, data.get("item"))
+
+        return None
+
+    def supports_resume(self) -> bool:
+        """Codex exec supports explicit session resume."""
+        return True
+
+
+def _parse_item_event(event_type: str, item: Any) -> Optional[StreamMessage]:
+    """Parse Codex item events."""
+    if not isinstance(item, dict):
+        return None
+
+    item_type = item.get("type")
+    subtype = "started" if event_type == "item.started" else "completed"
+
+    if item_type == "agent_message":
+        text = item.get("text") or item.get("message") or item.get("content")
+        if text:
+            return StreamMessage(type="assistant", content=str(text))
+        return None
+
+    if item_type == "reasoning":
+        text = item.get("text") or item.get("summary") or item.get("content")
+        if text:
+            return StreamMessage(type="thinking", content=str(text))
+        return None
+
+    if item_type == "command_execution":
+        command = item.get("command") or item.get("cmd") or ""
+        return StreamMessage(type="tool_call", subtype=subtype, tool_name="shell", tool_args={"command": command})
+
+    if item_type == "file_change":
+        path = item.get("path") or item.get("file") or ""
+        return StreamMessage(type="tool_call", subtype=subtype, tool_name="edit", tool_args={"path": path})
+
+    if item_type == "mcp_tool_call":
+        tool_name = item.get("name") or item.get("tool_name") or "mcp"
+        args = item.get("arguments") or item.get("args") or {}
+        return StreamMessage(type="tool_call", subtype=subtype, tool_name=str(tool_name), tool_args=args)
+
+    if item_type == "web_search":
+        query = item.get("query") or ""
+        return StreamMessage(type="tool_call", subtype=subtype, tool_name="web_search", tool_args={"query": query})
+
+    return None
diff --git a/python/cube/core/parsers/registry.py b/python/cube/core/parsers/registry.py
index d52c8da3..4e576ba8 100644
--- a/python/cube/core/parsers/registry.py
+++ b/python/cube/core/parsers/registry.py
@@ -5,6 +5,7 @@
 from .base import ParserAdapter
 from .claude import ClaudeParser
 from .cli_review import CLIReviewParser
+from .codex import CodexParser
 from .cursor import CursorParser
 from .gemini import GeminiParser
 from .kimi import KimiParser
@@ -13,6 +14,7 @@
 _PARSERS: Dict[str, Type[ParserAdapter]] = {
     "cursor-agent": CursorParser,
     "claude": ClaudeParser,
+    "codex": CodexParser,
     "gemini": GeminiParser,
     "kimi": KimiParser,
     "qwen": QwenParser,
diff --git a/python/cube/core/single_layout.py b/python/cube/core/single_layout.py
index b93af367..c3d68f36 100644
--- a/python/cube/core/single_layout.py
+++ b/python/cube/core/single_layout.py
@@ -46,15 +46,16 @@ class SingleAgentLayout(BaseThinkingLayout):
     _lock = RLock()
 
     @classmethod
-    def initialize(cls, title: str = "Agent"):
+    def initialize(cls, title: str = "Agent", task_name: str = None):
         with cls._lock:
             if cls._instance:
                 cls._instance.close()
-            cls._instance = cls({"agent": title}, lines_per_box=3)
+            display_title = f"{title} ▶ {task_name}" if task_name else title
+            cls._instance = cls({"agent": display_title}, lines_per_box=3, task_name=task_name)
         return cls._instance
 
-    def __init__(self, boxes: Dict[str, str], lines_per_box: int = 3):
-        super().__init__(boxes, lines_per_box)
+    def __init__(self, boxes: Dict[str, str], lines_per_box: int = 3, task_name: str = None):
+        super().__init__(boxes, lines_per_box, task_name=task_name)
 
     @classmethod
     def add_thinking(cls, text: str) -> None:
diff --git a/python/cube/core/user_config.py b/python/cube/core/user_config.py
index e193572c..e319311e 100644
--- a/python/cube/core/user_config.py
+++ b/python/cube/core/user_config.py
@@ -7,6 +7,8 @@
 import yaml
 from rich.console import Console
 
+from .judge_personas import resolve_judge_persona
+
 _console_err = Console(stderr=True)
 
 
@@ -37,6 +39,7 @@ class JudgeConfig:
     model: str
     label: str
     color: str
+    persona: Optional[str] = None
     type: str = "llm"  # "llm" or "cli-review"
     cmd: Optional[str] = None
     peer_review_only: bool = False  # Skip in panel, only run in peer-review
@@ -211,6 +214,7 @@ def load_config() -> CubeConfig:
             model=_resolve_model_alias(j.get("model", "sonnet-4.5-thinking"), model_aliases),
             label=j.get("label", key),
             color=j.get("color", "green"),
+            persona=resolve_judge_persona(j.get("persona")),
             type=j.get("type", "llm"),
             cmd=j.get("cmd"),
             peer_review_only=j.get("peer_review_only", False),
diff --git a/python/cube/models/types.py b/python/cube/models/types.py
index 36d3f640..06790ff6 100644
--- a/python/cube/models/types.py
+++ b/python/cube/models/types.py
@@ -30,6 +30,7 @@ class JudgeInfo:
     label: str
     task_id: str
     review_type: str
+    persona: Optional[str] = None
     session_id: Optional[str] = None
     adapter_config: Optional[dict] = None
 
diff --git a/tests/cli/test_adapters.py b/tests/cli/test_adapters.py
index 9978c71e..21085441 100644
--- a/tests/cli/test_adapters.py
+++ b/tests/cli/test_adapters.py
@@ -2,8 +2,9 @@
 from unittest.mock import AsyncMock, Mock, call, patch
 
 import pytest
-from cube.core.adapters import ClaudeAdapter, CursorAdapter, GenericCLIAdapter
+from cube.core.adapters import ClaudeAdapter, CodexAdapter, CursorAdapter, GenericCLIAdapter
 from cube.core.adapters.registry import get_adapter
+from cube.core.parsers.codex import CodexParser
 from cube.core.parsers.kimi import KimiParser
 from cube.core.parsers.qwen import QwenParser
 from cube.core.parsers.registry import get_parser
@@ -88,12 +89,188 @@ async def test_claude_adapter_uses_latest_headless_flags_and_alias(tmp_path):
         "bypassPermissions",
         "--model",
         "opus",
+        "--effort",
+        "max",
         "prompt",
     ]
     assert results == ['{"type":"result","is_error":false,"result":"ok"}']
     assert mock_exec.call_args.kwargs["stdin"] is not None
 
 
+@pytest.mark.asyncio
+async def test_claude_adapter_maps_explicit_opus_47_thinking_max(tmp_path):
+    adapter = ClaudeAdapter()
+    mock_process = make_mock_process(b'{"type":"result","is_error":false,"result":"ok"}\n')
+
+    with (
+        patch("cube.core.adapters.claude.shutil.which", return_value="/usr/local/bin/claude"),
+        patch("asyncio.create_subprocess_exec", return_value=mock_process) as mock_exec,
+    ):
+        async for _ in adapter.run(tmp_path, "claude-opus-4-7-thinking-max", "prompt"):
+            pass
+
+    args = collect_exec_args(mock_exec)
+    assert args[args.index("--model") + 1] == "opus"
+    assert args[args.index("--effort") + 1] == "max"
+
+
+@pytest.mark.asyncio
+async def test_claude_adapter_does_not_force_effort_for_sonnet(tmp_path):
+    adapter = ClaudeAdapter()
+    mock_process = make_mock_process(b'{"type":"result","is_error":false,"result":"ok"}\n')
+
+    with (
+        patch("cube.core.adapters.claude.shutil.which", return_value="/usr/local/bin/claude"),
+        patch("asyncio.create_subprocess_exec", return_value=mock_process) as mock_exec,
+    ):
+        async for _ in adapter.run(tmp_path, "claude-sonnet", "prompt"):
+            pass
+
+    args = collect_exec_args(mock_exec)
+    assert args[args.index("--model") + 1] == "sonnet"
+    assert "--effort" not in args
+
+
+@pytest.mark.asyncio
+async def test_codex_adapter_run_command(tmp_path):
+    adapter = CodexAdapter()
+    mock_process = make_mock_process(
+        b'{"type":"thread.started","thread_id":"session-1"}\n',
+        b'{"type":"item.completed","item":{"type":"agent_message","text":"ok"}}\n',
+    )
+    master_log = Mock()
+
+    with (
+        patch("cube.core.adapters.codex.shutil.which", return_value="/usr/local/bin/codex"),
+        patch("cube.core.master_log.get_master_log", return_value=master_log),
+        patch("asyncio.create_subprocess_exec", return_value=mock_process) as mock_exec,
+    ):
+        results = []
+        async for line in adapter.run(tmp_path, "gpt-5.5-codex", "prompt"):
+            results.append(line)
+
+    args = collect_exec_args(mock_exec)
+    assert args == [
+        "codex",
+        "exec",
+        "--json",
+        "--model",
+        "gpt-5.5-codex",
+        "--full-auto",
+        "--sandbox",
+        "workspace-write",
+        "--cd",
+        str(tmp_path),
+        "prompt",
+    ]
+    assert results == [
+        '{"type":"thread.started","thread_id":"session-1"}',
+        '{"type":"item.completed","item":{"type":"agent_message","text":"ok"}}',
+    ]
+    assert master_log.write_raw_line.call_args_list == [
+        call("codex-gpt-5.5-codex", '{"type":"thread.started","thread_id":"session-1"}'),
+        call("codex-gpt-5.5-codex", '{"type":"item.completed","item":{"type":"agent_message","text":"ok"}}'),
+    ]
+    assert mock_exec.call_args.kwargs["stdin"] is not None
+
+
+@pytest.mark.asyncio
+async def test_codex_adapter_resume_command(tmp_path):
+    adapter = CodexAdapter()
+    mock_process = make_mock_process(b'{"type":"turn.completed"}\n')
+
+    with (
+        patch("cube.core.adapters.codex.shutil.which", return_value="/usr/local/bin/codex"),
+        patch("asyncio.create_subprocess_exec", return_value=mock_process) as mock_exec,
+    ):
+        async for _ in adapter.run(tmp_path, "gpt-5.5-codex", "follow up", session_id="session-1", resume=True):
+            pass
+
+    args = collect_exec_args(mock_exec)
+    assert args == [
+        "codex",
+        "exec",
+        "--json",
+        "--model",
+        "gpt-5.5-codex",
+        "--full-auto",
+        "--sandbox",
+        "workspace-write",
+        "--cd",
+        str(tmp_path),
+        "resume",
+        "session-1",
+        "follow up",
+    ]
+
+
+def test_codex_parser_events():
+    parser = CodexParser()
+
+    init = parser.parse('{"type":"thread.started","thread_id":"session-1"}')
+    assert init is not None
+    assert init.type == "system"
+    assert init.subtype == "init"
+    assert init.session_id == "session-1"
+
+    assistant = parser.parse('{"type":"item.completed","item":{"type":"agent_message","text":"done"}}')
+    assert assistant is not None
+    assert assistant.type == "assistant"
+    assert assistant.content == "done"
+
+    thinking = parser.parse('{"type":"item.completed","item":{"type":"reasoning","text":"thinking"}}')
+    assert thinking is not None
+    assert thinking.type == "thinking"
+    assert thinking.content == "thinking"
+
+    shell = parser.parse('{"type":"item.started","item":{"type":"command_execution","command":"python -m pytest"}}')
+    assert shell is not None
+    assert shell.type == "tool_call"
+    assert shell.subtype == "started"
+    assert shell.tool_name == "shell"
+    assert shell.tool_args == {"command": "python -m pytest"}
+
+    result = parser.parse('{"type":"turn.completed","duration_ms":123}')
+    assert result is not None
+    assert result.type == "result"
+    assert result.duration_ms == 123
+
+
+def test_codex_registry_lookup():
+    assert isinstance(get_adapter("codex"), CodexAdapter)
+    assert isinstance(get_parser("codex"), CodexParser)
+
+
+def test_claude_parser_ignores_rate_limit_events():
+    parser = get_parser("claude")
+    assert (
+        parser.parse('{"type":"rate_limit_event","rate_limit_info":{"status":"allowed","resetsAt":1777000000000}}')
+        is None
+    )
+
+
+def test_claude_parser_routes_top_level_thinking_to_thinking_box():
+    parser = get_parser("claude")
+
+    msg = parser.parse('{"type":"thinking","subtype":"delta","text":"Inspecting the diff"}')
+
+    assert msg is not None
+    assert msg.type == "thinking"
+    assert msg.subtype == "delta"
+    assert msg.content == "Inspecting the diff"
+
+
+def test_claude_parser_ignores_empty_thinking_blocks():
+    parser = get_parser("claude")
+
+    assert (
+        parser.parse(
+            '{"type":"stream_event","event":{"type":"content_block_start","content_block":{"type":"thinking","thinking":"","signature":""}}}'
+        )
+        is None
+    )
+
+
 def make_mock_process(*chunks):
     mock_process = AsyncMock()
     mock_process.wait.return_value = 0
diff --git a/tests/cli/test_pr_fix.py b/tests/cli/test_pr_fix.py
index 1a0a9fc7..6c073b03 100644
--- a/tests/cli/test_pr_fix.py
+++ b/tests/cli/test_pr_fix.py
@@ -8,6 +8,8 @@
 from cube.commands.pr_fix import (
     _fixer_session_metadata,
     _is_compatible_fixer_session_metadata,
+    _is_non_fast_forward_push_error,
+    _recover_non_fast_forward_push,
     _reply_to_processed_comments,
     _sync_pr_worktree,
 )
@@ -362,6 +364,43 @@ def fake_reply_and_resolve(**kwargs):
         assert calls[1]["resolve"] is False
 
 
+class TestPushRecovery:
+    def test_detects_non_fast_forward_push_errors(self):
+        assert _is_non_fast_forward_push_error("! [rejected] HEAD -> branch (non-fast-forward)")
+        assert _is_non_fast_forward_push_error("Updates were rejected because the tip of your current branch is behind")
+        assert _is_non_fast_forward_push_error("hint: fetch first")
+        assert not _is_non_fast_forward_push_error("permission denied")
+
+    def test_recover_non_fast_forward_fetches_and_rebases(self, monkeypatch, tmp_path):
+        calls = []
+
+        def fake_run(cmd, **kwargs):
+            calls.append(tuple(cmd))
+            return subprocess.CompletedProcess(cmd, 0, "", "")
+
+        monkeypatch.setattr(subprocess, "run", fake_run)
+
+        assert _recover_non_fast_forward_push(tmp_path, "writer-codex/package-consolidation") is True
+        assert calls == [
+            ("git", "fetch", "origin", "writer-codex/package-consolidation"),
+            ("git", "rebase", "origin/writer-codex/package-consolidation"),
+        ]
+
+    def test_recover_non_fast_forward_aborts_failed_rebase(self, monkeypatch, tmp_path):
+        calls = []
+
+        def fake_run(cmd, **kwargs):
+            calls.append(tuple(cmd))
+            if cmd[:2] == ["git", "rebase"] and "--abort" not in cmd:
+                return subprocess.CompletedProcess(cmd, 1, "", "conflict")
+            return subprocess.CompletedProcess(cmd, 0, "", "")
+
+        monkeypatch.setattr(subprocess, "run", fake_run)
+
+        assert _recover_non_fast_forward_push(tmp_path, "writer-codex/package-consolidation") is False
+        assert ("git", "rebase", "--abort") in calls
+
+
 class TestSyncPrWorktree:
     def test_existing_clean_writer_worktree_resets_to_origin_branch(self, monkeypatch, tmp_path):
         worktree = tmp_path / ".cube" / "writer-codex-package-consolidation"
diff --git a/tests/core/test_judge_panel_retry.py b/tests/core/test_judge_panel_retry.py
index 5bba441f..fc3eef97 100644
--- a/tests/core/test_judge_panel_retry.py
+++ b/tests/core/test_judge_panel_retry.py
@@ -2,18 +2,60 @@
 from unittest.mock import MagicMock
 
 import pytest
-from cube.automation.judge_panel import _load_matching_judge_session, _wait_for_valid_decision_file, run_judge
+from cube.automation.judge_panel import (
+    _apply_judge_persona,
+    _judge_session_metadata,
+    _load_matching_judge_session,
+    _wait_for_valid_decision_file,
+    run_judge,
+)
 from cube.models.types import JudgeInfo
 
 
-def test_load_matching_judge_session_ignores_metadata_mismatch(monkeypatch):
+def test_load_matching_judge_session_requires_matching_model_and_cli(monkeypatch):
     monkeypatch.setattr("cube.automation.judge_panel.load_session", lambda session_type, task_id: "session-id")
+    monkeypatch.setattr(
+        "cube.automation.judge_panel.load_session_metadata",
+        lambda session_type, task_id: "Judge Opus (opus) | cli=cursor-agent",
+    )
+    monkeypatch.setattr("cube.automation.judge_panel.load_config", lambda: MagicMock(cli_tools={"opus": "claude"}))
+
+    judge = MagicMock(key="judge_1", model="opus", label="Judge Opus", type="llm")
+
+    assert _load_matching_judge_session(judge, "task", "peer-review") is None
+
 
-    judge = MagicMock(key="judge_3", model="new-model")
+def test_load_matching_judge_session_allows_matching_metadata(monkeypatch):
+    judge = MagicMock(key="judge_1", model="opus", label="Judge Opus", type="llm")
+
+    monkeypatch.setattr("cube.automation.judge_panel.load_session", lambda session_type, task_id: "session-id")
+    monkeypatch.setattr(
+        "cube.automation.judge_panel.load_session_metadata",
+        lambda session_type, task_id: _judge_session_metadata(judge, "claude"),
+    )
+    monkeypatch.setattr("cube.automation.judge_panel.load_config", lambda: MagicMock(cli_tools={"opus": "claude"}))
 
     assert _load_matching_judge_session(judge, "task", "peer-review") == "session-id"
 
 
+def test_apply_judge_persona_prepends_configured_persona():
+    judge = JudgeInfo(
+        key="judge_3",
+        model="gpt",
+        color="magenta",
+        label="Judge Security",
+        task_id="task",
+        review_type="peer-review",
+        persona="Focus on tenant isolation and authz bypasses.",
+    )
+
+    prompt = _apply_judge_persona("Base prompt", judge)
+
+    assert prompt.startswith("# JUDGE PERSONA — Judge Security")
+    assert "Focus on tenant isolation and authz bypasses." in prompt
+    assert prompt.endswith("Base prompt")
+
+
 @pytest.mark.asyncio
 async def test_wait_for_valid_decision_file_waits_for_late_write(tmp_path):
     decision_file = tmp_path / "decision.json"
diff --git a/tests/core/test_orchestrate_pr.py b/tests/core/test_orchestrate_pr.py
new file mode 100644
index 00000000..8110c8b1
--- /dev/null
+++ b/tests/core/test_orchestrate_pr.py
@@ -0,0 +1,51 @@
+"""Tests for workflow PR creation."""
+
+import subprocess
+
+import pytest
+from cube.commands.orchestrate.pr import create_pr
+
+
+@pytest.mark.asyncio
+async def test_create_pr_single_mode_uses_writer_label_not_winner(monkeypatch):
+    calls = []
+
+    class Writer:
+        name = "codex"
+        key = "writer_a"
+        label = "Writer GPT"
+
+    def fake_run(cmd, **kwargs):
+        calls.append(cmd)
+        return subprocess.CompletedProcess(cmd, 0, "https://example.test/pr/1\n", "")
+
+    monkeypatch.setattr("cube.commands.orchestrate.pr.get_writer_by_key_or_metadata", lambda winner, task_id: Writer())
+    monkeypatch.setattr(subprocess, "run", fake_run)
+
+    await create_pr("package-consolidation", "writer_a", single_mode=True)
+
+    body = calls[0][calls[0].index("--body") + 1]
+    assert "Writer: Writer GPT (writer_a)" in body
+    assert "Winner:" not in body
+
+
+@pytest.mark.asyncio
+async def test_create_pr_dual_mode_keeps_winner_label(monkeypatch):
+    calls = []
+
+    class Writer:
+        name = "codex"
+        key = "writer_a"
+        label = "Writer GPT"
+
+    def fake_run(cmd, **kwargs):
+        calls.append(cmd)
+        return subprocess.CompletedProcess(cmd, 0, "https://example.test/pr/1\n", "")
+
+    monkeypatch.setattr("cube.commands.orchestrate.pr.get_writer_by_key_or_metadata", lambda winner, task_id: Writer())
+    monkeypatch.setattr(subprocess, "run", fake_run)
+
+    await create_pr("package-consolidation", "writer_a", single_mode=False)
+
+    body = calls[0][calls[0].index("--body") + 1]
+    assert "Winner: Writer GPT" in body
diff --git a/tests/core/test_single_layout.py b/tests/core/test_single_layout.py
new file mode 100644
index 00000000..b0014a6b
--- /dev/null
+++ b/tests/core/test_single_layout.py
@@ -0,0 +1,23 @@
+"""Tests for single-agent layout title behavior."""
+
+from cube.core.single_layout import SingleAgentLayout
+
+
+def test_single_layout_includes_task_name_in_box_title():
+    layout = SingleAgentLayout.initialize("Writer GPT", task_name="package-consolidation")
+
+    try:
+        assert layout.boxes["agent"] == "Writer GPT ▶ package-consolidation"
+        assert layout.task_name == "package-consolidation"
+    finally:
+        SingleAgentLayout.close()
+
+
+def test_single_layout_keeps_title_without_task_name():
+    layout = SingleAgentLayout.initialize("Prompter")
+
+    try:
+        assert layout.boxes["agent"] == "Prompter"
+        assert layout.task_name is None
+    finally:
+        SingleAgentLayout.close()
diff --git a/tests/core/test_user_config.py b/tests/core/test_user_config.py
index 0c5cad56..31e671f3 100644
--- a/tests/core/test_user_config.py
+++ b/tests/core/test_user_config.py
@@ -170,7 +170,12 @@ def test_model_aliases_are_resolved(self, tmp_path, monkeypatch):
             },
             "judges": {
                 "judge_1": {"model": "latest", "label": "Judge GPT", "color": "yellow"},
-                "judge_2": {"model": "opus", "label": "Judge Opus", "color": "green"},
+                "judge_2": {
+                    "model": "opus",
+                    "label": "Judge Opus",
+                    "color": "green",
+                    "persona": "security-pentest",
+                },
             },
         }
         config_path = tmp_path / "cube.yaml"
@@ -186,8 +191,31 @@ def test_model_aliases_are_resolved(self, tmp_path, monkeypatch):
         assert config.writers["writer_b"].model == "claude-opus-4-7-thinking-max"
         assert config.judges["judge_1"].model == "gpt-5.5-high"
         assert config.judges["judge_2"].model == "claude-opus-4-7-thinking-max"
+        assert config.judges["judge_2"].persona is not None
+        assert "security and abuse-case reviewer" in config.judges["judge_2"].persona
         assert resolve_model_alias("latest") == "gpt-5.5-high"
 
+    def test_inline_judge_persona_is_preserved(self, tmp_path, monkeypatch):
+        config_data = {
+            "writers": {"writer_a": {"name": "codex", "model": "gpt-5.5-high", "label": "Writer GPT", "color": "blue"}},
+            "judges": {
+                "judge_1": {
+                    "model": "gpt-5.5-high",
+                    "label": "Judge Custom",
+                    "color": "yellow",
+                    "persona": "Prefer database correctness issues.",
+                }
+            },
+        }
+        config_path = tmp_path / "cube.yaml"
+        with open(config_path, "w") as f:
+            yaml.dump(config_data, f)
+
+        monkeypatch.setattr("cube.core.user_config.find_config_files", lambda: (None, None, config_path))
+
+        config = load_config()
+        assert config.judges["judge_1"].persona == "Prefer database correctness issues."
+
     def test_get_prompter_model(self, mock_config_files):
         """get_prompter_model() returns model name."""
         from cube.core.user_config import get_prompter_model