perlantir · perlantir · Apr 14, 2026 · Apr 14, 2026 · Apr 14, 2026 · Apr 14, 2026
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -37,13 +37,21 @@ jobs:
       - name: Run tests
         run: |
           source .venv/bin/activate
-          python -m pytest tests/ -q --ignore=tests/integration --ignore=tests/e2e --tb=short -n auto
+          python -m pytest tests/ -q --ignore=tests/integration --ignore=tests/e2e --ignore=tests/bench --tb=short -n auto
         env:
           # Ensure tests don't accidentally call real APIs
           OPENROUTER_API_KEY: ""
           OPENAI_API_KEY: ""
           NOUS_API_KEY: ""
 
+      - name: Hot-path bench (Phase 11 perf gate)
+        # Runs after the main suite so a perf regression lands with a clean
+        # signal rather than buried in the full-suite summary. Serial run
+        # avoids xdist's worker variance dominating the p95 samples.
+        run: |
+          source .venv/bin/activate
+          python -m pytest tests/bench/ -o addopts='' --tb=short
+
   e2e:
     runs-on: ubuntu-latest
     timeout-minutes: 10
@@ -71,3 +79,33 @@ jobs:
           OPENROUTER_API_KEY: ""
           OPENAI_API_KEY: ""
           NOUS_API_KEY: ""
+
+  closed-loop:
+    # Phase 10 gate: the full task -> subagent -> compile -> outcome ->
+    # attribution -> re-rank chain must stay green on every push/PR.
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+
+      - name: Set up Python 3.11
+        run: uv python install 3.11
+
+      - name: Install dependencies
+        run: |
+          uv venv .venv --python 3.11
+          source .venv/bin/activate
+          uv pip install -e ".[all,dev]"
+
+      - name: Run closed-loop integration test
+        run: |
+          source .venv/bin/activate
+          python -m pytest tests/integration/test_closed_loop.py -v
+        env:
+          OPENROUTER_API_KEY: ""
+          OPENAI_API_KEY: ""
+          NOUS_API_KEY: ""
diff --git a/.github/workflows/upstream-drift-check.yml b/.github/workflows/upstream-drift-check.yml
@@ -0,0 +1,106 @@
+name: Upstream drift check
+
+# Runs on the 1st of every month — a compromise between the quarterly
+# cadence in project_hermulti_upstream_sync.md and monthly visibility into
+# fast-moving upstream work. Also runnable on demand via workflow_dispatch.
+on:
+  schedule:
+    - cron: '0 12 1 * *'
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  issues: write
+
+jobs:
+  drift:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout hermulti
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Add upstream remote
+        run: |
+          git remote add upstream https://github.com/NousResearch/hermes-agent.git || true
+          git fetch upstream --depth=200
+
+      - name: Measure drift from seed
+        id: drift
+        run: |
+          # Seed commit is documented in memory/project_hermulti_upstream_sync.md
+          # and pinned here. When we do an upstream reseed, bump this to the
+          # new ancestor and reset the issue-worthy threshold.
+          SEED=0493bc7
+
+          COMMITS_AHEAD=$(git log "$SEED..upstream/main" --oneline 2>/dev/null | wc -l | tr -d ' ')
+          SHORTSTAT=$(git diff --shortstat "$SEED" upstream/main 2>/dev/null || echo "compare failed")
+          RECENT=$(git log "$SEED..upstream/main" --oneline 2>/dev/null | head -20)
+
+          echo "commits_ahead=$COMMITS_AHEAD" >> "$GITHUB_OUTPUT"
+          {
+            echo "shortstat<<EOF"
+            echo "$SHORTSTAT"
+            echo "EOF"
+            echo "recent<<EOF"
+            echo "$RECENT"
+            echo "EOF"
+          } >> "$GITHUB_OUTPUT"
+
+          echo "Upstream is ${COMMITS_AHEAD} commits ahead of seed ${SEED}."
+          echo "Diff shortstat: $SHORTSTAT"
+
+      - name: Open issue if drift exceeds threshold
+        if: ${{ fromJSON(steps.drift.outputs.commits_ahead) > 50 }}
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const commitsAhead = ${{ steps.drift.outputs.commits_ahead }};
+            const shortstat = `${{ steps.drift.outputs.shortstat }}`;
+            const recent = `${{ steps.drift.outputs.recent }}`;
+            const title = `Upstream drift: ${commitsAhead} commits from NousResearch/hermes-agent`;
+            const body = [
+              `The monthly drift check found ${commitsAhead} upstream commits since the`,
+              `hermulti seed that have not been merged in.`,
+              ``,
+              `**Diff shortstat (seed..upstream/main):**`,
+              '```',
+              shortstat.trim(),
+              '```',
+              ``,
+              `**Most recent upstream commits:**`,
+              '```',
+              recent.trim(),
+              '```',
+              ``,
+              `See ``memory/project_hermulti_upstream_sync.md`` for the merge strategy`,
+              `— the 4 hazard files (``gateway/run.py``, ``cli.py``, ``run_agent.py``,`,
+              '``hermes_cli/main.py``) need hand review before any 3-way reapply.',
+              ``,
+              `This issue is opened automatically by``.github/workflows/upstream-drift-check.yml``.`,
+            ].join('\n');
+
+            const { data: existing } = await github.rest.issues.listForRepo({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              state: 'open',
+              labels: 'upstream-drift',
+              per_page: 1,
+            });
+            if (existing.length > 0) {
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: existing[0].number,
+                body: `Re-check on ${new Date().toISOString().slice(0,10)}: still ${commitsAhead} commits ahead.\n\n${body}`,
+              });
+            } else {
+              await github.rest.issues.create({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                title,
+                body,
+                labels: ['upstream-drift', 'maintenance'],
+              });
+            }
diff --git a/CLAUDE.md b/CLAUDE.md
diff --git a/KNOWN_ISSUES.md b/KNOWN_ISSUES.md
@@ -0,0 +1,12 @@
+# Known Issues
+
+This document tracks tests that are intentionally skipped because they require
+external resources or test removed behavior that is not currently planned.
+
+## Skipped tests
+
+### tests/cron/test_jobs.py (4 tests)
+Skipped: require the optional `croniter` package, which is not installed in the
+default dev environment. Install `croniter` to run cron-job scheduling tests:
+`pip install croniter`.
+
diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
@@ -510,6 +510,19 @@ def create(self, **kwargs) -> Any:
         if temperature is not None:
             anthropic_kwargs["temperature"] = temperature
 
+        # Cost-ceiling gate: refuse the call if this project is already over
+        # its daily budget. Best-effort — a missing project id or governor
+        # failure falls through to the call rather than blocking.
+        try:
+            from agent.cost_governor import get_governor, current_project_id, estimate_cost_usd, BudgetExceeded
+            pid = current_project_id()
+            if pid:
+                get_governor().check_budget(pid)
+        except BudgetExceeded:
+            raise
+        except Exception:
+            pid = None
+
         response = self._client.messages.create(**anthropic_kwargs)
         assistant_message, finish_reason = normalize_anthropic_response(response)
 
@@ -523,6 +536,14 @@ def create(self, **kwargs) -> Any:
                 completion_tokens=completion_tokens,
                 total_tokens=total_tokens,
             )
+            # Record spend post-response. Best-effort; any failure here must
+            # not propagate to the caller — the LLM result is already in hand.
+            if pid:
+                try:
+                    cost = estimate_cost_usd(model, prompt_tokens, completion_tokens)
+                    get_governor().record_spend(pid, cost)
+                except Exception:
+                    pass
 
         choice = SimpleNamespace(
             index=0,