Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions lib/crewai/src/crewai/llms/providers/anthropic/completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,44 @@ def _contains_file_id_reference(messages: list[dict[str, Any]]) -> bool:
return False


def _warn_on_max_tokens_truncation(
response: Any,
max_tokens: int,
from_agent: Any | None = None,
*,
context: str = "",
) -> None:
"""Emit a warning if the Anthropic response was truncated by ``max_tokens``.

Anthropic returns a ``stop_reason`` field on every Message response
indicating why generation stopped. ``"max_tokens"`` means the output was
cut off before the model finished, which can corrupt downstream parsing
(especially for the final synthesis response after tool use).

The Anthropic SDK exposes this via ``response.stop_reason``. We surface
the signal as a ``logging.warning`` so users can detect truncation
without wiring up event subscribers.

Args:
response: The Anthropic Message-like response object.
max_tokens: The configured max_tokens value (for the actionable hint).
from_agent: Optional agent reference for log context.
context: Optional short label (e.g. ``"tool conversation"``) included
in the log message to disambiguate truncation sites.
"""
if response is None:
return
if getattr(response, "stop_reason", None) != "max_tokens":
return
role = getattr(from_agent, "role", None)
agent_hint = f" [{role}]" if role else ""
location = f" ({context})" if context else ""
logging.warning(
f"Truncated response{agent_hint}{location}: stop_reason='max_tokens'. "
f"Consider increasing max_tokens (current: {max_tokens})."
)


class AnthropicThinkingConfig(BaseModel):
type: Literal["enabled", "disabled"]
budget_tokens: int | None = None
Expand Down Expand Up @@ -844,6 +882,7 @@ def _handle_completion(

usage = self._extract_anthropic_token_usage(response)
self._track_token_usage_internal(usage)
_warn_on_max_tokens_truncation(response, self.max_tokens, from_agent)

if _is_pydantic_model_class(response_model) and response.content:
if use_native_structured_output:
Expand Down Expand Up @@ -1272,6 +1311,12 @@ def _handle_tool_use_conversation(
# Track token usage for follow-up call
follow_up_usage = self._extract_anthropic_token_usage(final_response)
self._track_token_usage_internal(follow_up_usage)
_warn_on_max_tokens_truncation(
final_response,
self.max_tokens,
from_agent,
context="tool conversation",
)

final_content = ""
thinking_blocks: list[ThinkingBlock] = []
Expand Down Expand Up @@ -1377,6 +1422,7 @@ async def _ahandle_completion(

usage = self._extract_anthropic_token_usage(response)
self._track_token_usage_internal(usage)
_warn_on_max_tokens_truncation(response, self.max_tokens, from_agent)

if _is_pydantic_model_class(response_model) and response.content:
if use_native_structured_output:
Expand Down Expand Up @@ -1676,6 +1722,12 @@ async def _ahandle_tool_use_conversation(

follow_up_usage = self._extract_anthropic_token_usage(final_response)
self._track_token_usage_internal(follow_up_usage)
_warn_on_max_tokens_truncation(
final_response,
self.max_tokens,
from_agent,
context="tool conversation",
)

final_content = ""
if final_response.content:
Expand Down
102 changes: 102 additions & 0 deletions lib/crewai/tests/llms/anthropic/test_anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1505,3 +1505,105 @@ def test_anthropic_missing_cache_fields_default_to_zero():
usage = llm._extract_anthropic_token_usage(mock_response)
assert usage["cached_prompt_tokens"] == 0
assert usage["cache_creation_tokens"] == 0


def test_warn_on_max_tokens_truncation_emits_warning(caplog):
"""The helper warns when stop_reason == 'max_tokens' (issue #5148)."""
import logging as stdlib_logging

from crewai.llms.providers.anthropic.completion import (
_warn_on_max_tokens_truncation,
)

response = MagicMock()
response.stop_reason = "max_tokens"

with caplog.at_level(stdlib_logging.WARNING):
_warn_on_max_tokens_truncation(response, max_tokens=4096)

assert any(
rec.levelno == stdlib_logging.WARNING
and "max_tokens" in rec.getMessage()
and "Truncated response" in rec.getMessage()
for rec in caplog.records
)


def test_warn_on_max_tokens_truncation_silent_for_normal_stop(caplog):
"""The helper stays silent when generation finished normally (issue #5148)."""
import logging as stdlib_logging

from crewai.llms.providers.anthropic.completion import (
_warn_on_max_tokens_truncation,
)

response = MagicMock()
response.stop_reason = "end_turn"

with caplog.at_level(stdlib_logging.WARNING):
_warn_on_max_tokens_truncation(response, max_tokens=4096)

assert not any(
rec.levelno == stdlib_logging.WARNING
and "Truncated response" in rec.getMessage()
for rec in caplog.records
)


def test_warn_on_max_tokens_truncation_includes_agent_role(caplog):
"""The helper surfaces the agent role when provided (issue #5148)."""
import logging as stdlib_logging

from crewai.llms.providers.anthropic.completion import (
_warn_on_max_tokens_truncation,
)

response = MagicMock()
response.stop_reason = "max_tokens"

fake_agent = MagicMock()
fake_agent.role = "Senior Researcher"

with caplog.at_level(stdlib_logging.WARNING):
_warn_on_max_tokens_truncation(
response, max_tokens=2048, from_agent=fake_agent, context="tool conversation"
)

matching = [
rec.getMessage()
for rec in caplog.records
if rec.levelno == stdlib_logging.WARNING and "Truncated response" in rec.getMessage()
]
assert matching, "expected a truncation warning"
msg = matching[0]
assert "Senior Researcher" in msg
assert "tool conversation" in msg
assert "2048" in msg


def test_handle_completion_logs_truncation_warning(caplog):
"""`_handle_completion` surfaces stop_reason='max_tokens' as a warning."""
import logging as stdlib_logging

llm = LLM(model="anthropic/claude-3-5-sonnet-20241022")

mock_response = MagicMock()
text_block = MagicMock()
text_block.text = "Truncated answer"
text_block.__class__ = type(text_block)
mock_response.content = [text_block]
mock_response.usage = MagicMock(input_tokens=10, output_tokens=5)
mock_response.usage.cache_read_input_tokens = 0
mock_response.usage.cache_creation_input_tokens = 0
mock_response.stop_reason = "max_tokens"

with patch.object(llm._client.messages, "create", return_value=mock_response):
with caplog.at_level(stdlib_logging.WARNING):
llm.call("Hello")

assert any(
rec.levelno == stdlib_logging.WARNING
and "max_tokens" in rec.getMessage()
and "Truncated response" in rec.getMessage()
for rec in caplog.records
)