diff --git a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py index b627a85394..5d48073653 100644 --- a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py +++ b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py @@ -124,6 +124,44 @@ def _contains_file_id_reference(messages: list[dict[str, Any]]) -> bool: return False +def _warn_on_max_tokens_truncation( + response: Any, + max_tokens: int, + from_agent: Any | None = None, + *, + context: str = "", +) -> None: + """Emit a warning if the Anthropic response was truncated by ``max_tokens``. + + Anthropic returns a ``stop_reason`` field on every Message response + indicating why generation stopped. ``"max_tokens"`` means the output was + cut off before the model finished, which can corrupt downstream parsing + (especially for the final synthesis response after tool use). + + The Anthropic SDK exposes this via ``response.stop_reason``. We surface + the signal as a ``logging.warning`` so users can detect truncation + without wiring up event subscribers. + + Args: + response: The Anthropic Message-like response object. + max_tokens: The configured max_tokens value (for the actionable hint). + from_agent: Optional agent reference for log context. + context: Optional short label (e.g. ``"tool conversation"``) included + in the log message to disambiguate truncation sites. + """ + if response is None: + return + if getattr(response, "stop_reason", None) != "max_tokens": + return + role = getattr(from_agent, "role", None) + agent_hint = f" [{role}]" if role else "" + location = f" ({context})" if context else "" + logging.warning( + f"Truncated response{agent_hint}{location}: stop_reason='max_tokens'. " + f"Consider increasing max_tokens (current: {max_tokens})." + ) + + class AnthropicThinkingConfig(BaseModel): type: Literal["enabled", "disabled"] budget_tokens: int | None = None @@ -844,6 +882,7 @@ def _handle_completion( usage = self._extract_anthropic_token_usage(response) self._track_token_usage_internal(usage) + _warn_on_max_tokens_truncation(response, self.max_tokens, from_agent) if _is_pydantic_model_class(response_model) and response.content: if use_native_structured_output: @@ -1272,6 +1311,12 @@ def _handle_tool_use_conversation( # Track token usage for follow-up call follow_up_usage = self._extract_anthropic_token_usage(final_response) self._track_token_usage_internal(follow_up_usage) + _warn_on_max_tokens_truncation( + final_response, + self.max_tokens, + from_agent, + context="tool conversation", + ) final_content = "" thinking_blocks: list[ThinkingBlock] = [] @@ -1377,6 +1422,7 @@ async def _ahandle_completion( usage = self._extract_anthropic_token_usage(response) self._track_token_usage_internal(usage) + _warn_on_max_tokens_truncation(response, self.max_tokens, from_agent) if _is_pydantic_model_class(response_model) and response.content: if use_native_structured_output: @@ -1676,6 +1722,12 @@ async def _ahandle_tool_use_conversation( follow_up_usage = self._extract_anthropic_token_usage(final_response) self._track_token_usage_internal(follow_up_usage) + _warn_on_max_tokens_truncation( + final_response, + self.max_tokens, + from_agent, + context="tool conversation", + ) final_content = "" if final_response.content: diff --git a/lib/crewai/tests/llms/anthropic/test_anthropic.py b/lib/crewai/tests/llms/anthropic/test_anthropic.py index 81a51c8d6f..0c8cc97864 100644 --- a/lib/crewai/tests/llms/anthropic/test_anthropic.py +++ b/lib/crewai/tests/llms/anthropic/test_anthropic.py @@ -1505,3 +1505,105 @@ def test_anthropic_missing_cache_fields_default_to_zero(): usage = llm._extract_anthropic_token_usage(mock_response) assert usage["cached_prompt_tokens"] == 0 assert usage["cache_creation_tokens"] == 0 + + +def test_warn_on_max_tokens_truncation_emits_warning(caplog): + """The helper warns when stop_reason == 'max_tokens' (issue #5148).""" + import logging as stdlib_logging + + from crewai.llms.providers.anthropic.completion import ( + _warn_on_max_tokens_truncation, + ) + + response = MagicMock() + response.stop_reason = "max_tokens" + + with caplog.at_level(stdlib_logging.WARNING): + _warn_on_max_tokens_truncation(response, max_tokens=4096) + + assert any( + rec.levelno == stdlib_logging.WARNING + and "max_tokens" in rec.getMessage() + and "Truncated response" in rec.getMessage() + for rec in caplog.records + ) + + +def test_warn_on_max_tokens_truncation_silent_for_normal_stop(caplog): + """The helper stays silent when generation finished normally (issue #5148).""" + import logging as stdlib_logging + + from crewai.llms.providers.anthropic.completion import ( + _warn_on_max_tokens_truncation, + ) + + response = MagicMock() + response.stop_reason = "end_turn" + + with caplog.at_level(stdlib_logging.WARNING): + _warn_on_max_tokens_truncation(response, max_tokens=4096) + + assert not any( + rec.levelno == stdlib_logging.WARNING + and "Truncated response" in rec.getMessage() + for rec in caplog.records + ) + + +def test_warn_on_max_tokens_truncation_includes_agent_role(caplog): + """The helper surfaces the agent role when provided (issue #5148).""" + import logging as stdlib_logging + + from crewai.llms.providers.anthropic.completion import ( + _warn_on_max_tokens_truncation, + ) + + response = MagicMock() + response.stop_reason = "max_tokens" + + fake_agent = MagicMock() + fake_agent.role = "Senior Researcher" + + with caplog.at_level(stdlib_logging.WARNING): + _warn_on_max_tokens_truncation( + response, max_tokens=2048, from_agent=fake_agent, context="tool conversation" + ) + + matching = [ + rec.getMessage() + for rec in caplog.records + if rec.levelno == stdlib_logging.WARNING and "Truncated response" in rec.getMessage() + ] + assert matching, "expected a truncation warning" + msg = matching[0] + assert "Senior Researcher" in msg + assert "tool conversation" in msg + assert "2048" in msg + + +def test_handle_completion_logs_truncation_warning(caplog): + """`_handle_completion` surfaces stop_reason='max_tokens' as a warning.""" + import logging as stdlib_logging + + llm = LLM(model="anthropic/claude-3-5-sonnet-20241022") + + mock_response = MagicMock() + text_block = MagicMock() + text_block.text = "Truncated answer" + text_block.__class__ = type(text_block) + mock_response.content = [text_block] + mock_response.usage = MagicMock(input_tokens=10, output_tokens=5) + mock_response.usage.cache_read_input_tokens = 0 + mock_response.usage.cache_creation_input_tokens = 0 + mock_response.stop_reason = "max_tokens" + + with patch.object(llm._client.messages, "create", return_value=mock_response): + with caplog.at_level(stdlib_logging.WARNING): + llm.call("Hello") + + assert any( + rec.levelno == stdlib_logging.WARNING + and "max_tokens" in rec.getMessage() + and "Truncated response" in rec.getMessage() + for rec in caplog.records + )