Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 59 additions & 3 deletions backend/routes/chatbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,17 @@
from pathlib import Path
import anthropic as anthropic_sdk

# Soft cap on tool-use iterations within a single /chat/message stream.
# An "iteration" is one round-trip to Anthropic that may include tool calls.
# We lowered this from 60 to keep runaway agents from hanging the Vercel
# proxy (which times out the SSE connection and surfaces as "Failed to fetch"
# in the browser). When hit, we emit a user-facing fallback message and a
# `done` event with stop_reason="iteration_cap" instead of cutting off mid-stream.
# NOTE: this is per-request. The /chat/message "continue" flow re-enters this
# loop with a fresh budget, but the prior tool transcript is already in the
# conversation so the model resumes mid-thought rather than restarting.
MAX_ITERATIONS = 12

DEFAULT_FAST_MODEL = os.environ.get("ANTHROPIC_FAST_MODEL", "claude-haiku-4-5")
DEFAULT_COMPLEX_MODEL = os.environ.get("ANTHROPIC_COMPLEX_MODEL", "claude-sonnet-4-6")
TITLE_MODEL = os.environ.get("ANTHROPIC_TITLE_MODEL", DEFAULT_FAST_MODEL)
Expand Down Expand Up @@ -418,12 +429,17 @@ async def generate_stream():
try:
conversation = deduplicated.copy()
iteration = 0
max_iterations = 60
max_iterations = MAX_ITERATIONS
total_input_tokens = 0
total_output_tokens = 0
total_cache_read_input_tokens = 0
total_cache_creation_input_tokens = 0
recent_tool_calls: List[str] = []
# Track every tool call across the turn so the cap-hit fallback can
# tell the user what was tried. Counts only (no inputs) keeps PII out
# of the summary and keeps it well under the 300-char target.
tool_call_counts: Dict[str, int] = {}
last_tool_error: str | None = None

client = _get_anthropic_client()
model = _select_chat_model(conversation)
Expand Down Expand Up @@ -529,6 +545,10 @@ async def generate_stream():
yield f"data: {json.dumps({'type': 'thinking_done'})}\n\n"

if not tool_uses:
logger.info(
f"[CHAT] Session {session_id}: converged at {iteration} iterations"
f" stop_reason={last_stop_reason}"
)
# Record token usage for billing
billing = None
try:
Expand Down Expand Up @@ -601,6 +621,15 @@ async def execute_tool_async(tu):
if await request.is_disconnected():
return
completed_tools[tu["id"]] = result
tool_call_counts[tu["name"]] = tool_call_counts.get(tu["name"], 0) + 1
# Capture the most recent tool error so the cap-hit fallback
# can hint at what the agent was struggling with.
if isinstance(result, dict):
err = result.get("error") or result.get("stderr")
if err:
err_str = str(err).strip().splitlines()[-1] if str(err).strip() else ""
if err_str:
last_tool_error = err_str[:120]
result_str = _serialise_tool_result(result)
result_summary = result_str[:5000] + "..." if len(result_str) > 5000 else result_str
yield f"data: {json.dumps({'type': 'tool_result', 'tool_name': tu['name'], 'tool_id': tu['id'], 'status': 'success', 'result_summary': result_summary})}\n\n"
Expand Down Expand Up @@ -629,6 +658,32 @@ async def execute_tool_async(tu):
conversation.append({"role": "user", "content": tool_results})

if iteration >= max_iterations:
logger.info(
f"[CHAT] Session {session_id}: iteration cap hit at {iteration} iterations"
f" — tool_counts={tool_call_counts}"
f"{f' last_error={last_tool_error!r}' if last_tool_error else ''}"
)
# Build a short summary of what was tried. Kept under ~300 chars
# so it reads as a sentence, not a transcript.
if tool_call_counts:
parts = [f"`{name}` {count}×" for name, count in tool_call_counts.items()]
tried_clause = "ran " + ", ".join(parts)
else:
tried_clause = "didn't complete any tool calls"
error_clause = (
f", last attempt errored with \"{last_tool_error}\""
if last_tool_error else ""
)
fallback_message = (
"\n\nI'm spending more iterations than expected on this without converging. "
f"Here's what I tried: {tried_clause}{error_clause}. "
"Could you (a) rephrase the question, (b) enable Plan mode so I can ask "
"clarifying questions first, or (c) try a more specific scenario?"
)
# Hard cap defensively in case tool names balloon the string.
if len(fallback_message) > 600:
fallback_message = fallback_message[:597] + "..."

billing = None
try:
from routes.billing import record_usage
Expand All @@ -643,8 +698,9 @@ async def execute_tool_async(tu):
)
except Exception as e:
logger.warning(f"[CHAT] Failed to record usage: {e}")
yield f"data: {json.dumps({'type': 'chunk', 'content': '\\n\\n*[Reached maximum iterations]*'})}\n\n"
yield f"data: {json.dumps({'type': 'done', 'content': assistant_content, 'session_id': session_id, 'model': model, 'usage': {'input_tokens': total_input_tokens, 'output_tokens': total_output_tokens, 'cache_creation_input_tokens': total_cache_creation_input_tokens, 'cache_read_input_tokens': total_cache_read_input_tokens}, 'cost_gbp': billing['cost_gbp'] if billing else None, 'balance': billing['balance'] if billing else None})}\n\n"
yield f"data: {json.dumps({'type': 'chunk', 'content': fallback_message})}\n\n"
final_content = assistant_content + fallback_message
yield f"data: {json.dumps({'type': 'done', 'content': final_content, 'session_id': session_id, 'model': model, 'stop_reason': 'iteration_cap', 'usage': {'input_tokens': total_input_tokens, 'output_tokens': total_output_tokens, 'cache_creation_input_tokens': total_cache_creation_input_tokens, 'cache_read_input_tokens': total_cache_read_input_tokens}, 'cost_gbp': billing['cost_gbp'] if billing else None, 'balance': billing['balance'] if billing else None})}\n\n"

except Exception as e:
import traceback
Expand Down