crewAIInc · Beandon13 · May 5, 2026
diff --git a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py
@@ -124,6 +124,44 @@ def _contains_file_id_reference(messages: list[dict[str, Any]]) -> bool:
     return False
 
 
+def _warn_on_max_tokens_truncation(
+    response: Any,
+    max_tokens: int,
+    from_agent: Any | None = None,
+    *,
+    context: str = "",
+) -> None:
+    """Emit a warning if the Anthropic response was truncated by ``max_tokens``.
+
+    Anthropic returns a ``stop_reason`` field on every Message response
+    indicating why generation stopped. ``"max_tokens"`` means the output was
+    cut off before the model finished, which can corrupt downstream parsing
+    (especially for the final synthesis response after tool use).
+
+    The Anthropic SDK exposes this via ``response.stop_reason``. We surface
+    the signal as a ``logging.warning`` so users can detect truncation
+    without wiring up event subscribers.
+
+    Args:
+        response: The Anthropic Message-like response object.
+        max_tokens: The configured max_tokens value (for the actionable hint).
+        from_agent: Optional agent reference for log context.
+        context: Optional short label (e.g. ``"tool conversation"``) included
+            in the log message to disambiguate truncation sites.
+    """
+    if response is None:
+        return
+    if getattr(response, "stop_reason", None) != "max_tokens":
+        return
+    role = getattr(from_agent, "role", None)
+    agent_hint = f" [{role}]" if role else ""
+    location = f" ({context})" if context else ""
+    logging.warning(
+        f"Truncated response{agent_hint}{location}: stop_reason='max_tokens'. "
+        f"Consider increasing max_tokens (current: {max_tokens})."
+    )
+
+
 class AnthropicThinkingConfig(BaseModel):
     type: Literal["enabled", "disabled"]
     budget_tokens: int | None = None
@@ -844,6 +882,7 @@ def _handle_completion(
 
         usage = self._extract_anthropic_token_usage(response)
         self._track_token_usage_internal(usage)
+        _warn_on_max_tokens_truncation(response, self.max_tokens, from_agent)
 
         if _is_pydantic_model_class(response_model) and response.content:
             if use_native_structured_output:
@@ -1272,6 +1311,12 @@ def _handle_tool_use_conversation(
             # Track token usage for follow-up call
             follow_up_usage = self._extract_anthropic_token_usage(final_response)
             self._track_token_usage_internal(follow_up_usage)
+            _warn_on_max_tokens_truncation(
+                final_response,
+                self.max_tokens,
+                from_agent,
+                context="tool conversation",
+            )
 
             final_content = ""
             thinking_blocks: list[ThinkingBlock] = []
@@ -1377,6 +1422,7 @@ async def _ahandle_completion(
 
         usage = self._extract_anthropic_token_usage(response)
         self._track_token_usage_internal(usage)
+        _warn_on_max_tokens_truncation(response, self.max_tokens, from_agent)
 
         if _is_pydantic_model_class(response_model) and response.content:
             if use_native_structured_output:
@@ -1676,6 +1722,12 @@ async def _ahandle_tool_use_conversation(
 
             follow_up_usage = self._extract_anthropic_token_usage(final_response)
             self._track_token_usage_internal(follow_up_usage)
+            _warn_on_max_tokens_truncation(
+                final_response,
+                self.max_tokens,
+                from_agent,
+                context="tool conversation",
+            )
 
             final_content = ""
             if final_response.content:

diff --git a/lib/crewai/tests/llms/anthropic/test_anthropic.py b/lib/crewai/tests/llms/anthropic/test_anthropic.py
@@ -1505,3 +1505,105 @@ def test_anthropic_missing_cache_fields_default_to_zero():
     usage = llm._extract_anthropic_token_usage(mock_response)
     assert usage["cached_prompt_tokens"] == 0
     assert usage["cache_creation_tokens"] == 0
+
+
+def test_warn_on_max_tokens_truncation_emits_warning(caplog):
+    """The helper warns when stop_reason == 'max_tokens' (issue #5148)."""
+    import logging as stdlib_logging
+
+    from crewai.llms.providers.anthropic.completion import (
+        _warn_on_max_tokens_truncation,
+    )
+
+    response = MagicMock()
+    response.stop_reason = "max_tokens"
+
+    with caplog.at_level(stdlib_logging.WARNING):
+        _warn_on_max_tokens_truncation(response, max_tokens=4096)
+
+    assert any(
+        rec.levelno == stdlib_logging.WARNING
+        and "max_tokens" in rec.getMessage()
+        and "Truncated response" in rec.getMessage()
+        for rec in caplog.records
+    )
+
+
+def test_warn_on_max_tokens_truncation_silent_for_normal_stop(caplog):
+    """The helper stays silent when generation finished normally (issue #5148)."""
+    import logging as stdlib_logging
+
+    from crewai.llms.providers.anthropic.completion import (
+        _warn_on_max_tokens_truncation,
+    )
+
+    response = MagicMock()
+    response.stop_reason = "end_turn"
+
+    with caplog.at_level(stdlib_logging.WARNING):
+        _warn_on_max_tokens_truncation(response, max_tokens=4096)
+
+    assert not any(
+        rec.levelno == stdlib_logging.WARNING
+        and "Truncated response" in rec.getMessage()
+        for rec in caplog.records
+    )
+
+
+def test_warn_on_max_tokens_truncation_includes_agent_role(caplog):
+    """The helper surfaces the agent role when provided (issue #5148)."""
+    import logging as stdlib_logging
+
+    from crewai.llms.providers.anthropic.completion import (
+        _warn_on_max_tokens_truncation,
+    )
+
+    response = MagicMock()
+    response.stop_reason = "max_tokens"
+
+    fake_agent = MagicMock()
+    fake_agent.role = "Senior Researcher"
+
+    with caplog.at_level(stdlib_logging.WARNING):
+        _warn_on_max_tokens_truncation(
+            response, max_tokens=2048, from_agent=fake_agent, context="tool conversation"
+        )
+
+    matching = [
+        rec.getMessage()
+        for rec in caplog.records
+        if rec.levelno == stdlib_logging.WARNING and "Truncated response" in rec.getMessage()
+    ]
+    assert matching, "expected a truncation warning"
+    msg = matching[0]
+    assert "Senior Researcher" in msg
+    assert "tool conversation" in msg
+    assert "2048" in msg
+
+
+def test_handle_completion_logs_truncation_warning(caplog):
+    """`_handle_completion` surfaces stop_reason='max_tokens' as a warning."""
+    import logging as stdlib_logging
+
+    llm = LLM(model="anthropic/claude-3-5-sonnet-20241022")
+
+    mock_response = MagicMock()
+    text_block = MagicMock()
+    text_block.text = "Truncated answer"
+    text_block.__class__ = type(text_block)
+    mock_response.content = [text_block]
+    mock_response.usage = MagicMock(input_tokens=10, output_tokens=5)
+    mock_response.usage.cache_read_input_tokens = 0
+    mock_response.usage.cache_creation_input_tokens = 0
+    mock_response.stop_reason = "max_tokens"
+
+    with patch.object(llm._client.messages, "create", return_value=mock_response):
+        with caplog.at_level(stdlib_logging.WARNING):
+            llm.call("Hello")
+
+    assert any(
+        rec.levelno == stdlib_logging.WARNING
+        and "max_tokens" in rec.getMessage()
+        and "Truncated response" in rec.getMessage()
+        for rec in caplog.records
+    )