feat: add Gemini thoughtSignature capture and handling

dpolistwm · dpolistwm · commit 9730a99b4061 · 2025-11-21T12:55:58.000-03:00
- Capture thought_signature from Gemini function call responses - Base64 encode thought_signature for storage in message history - Decode and pass thought_signature back to Gemini in subsequent requests - Configure thinking_config to disable thinking text but preserve signatures - Add NotRequired import to content.py for type safety This complements the framework changes by implementing Gemini-specific handling of thought signatures for proper multi-turn function calling with Gemini 3 Pro. See: https://ai.google.dev/gemini-api/docs/thought-signatures
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,11 +1,11 @@
 [build-system]
-requires = ["hatchling", "hatch-vcs"]
+requires = ["hatchling"]  # Removed hatch-vcs for manual version testing
 build-backend = "hatchling.build"
 
 
 [project]
 name = "strands-agents"
-dynamic = ["version"]  # Version determined by git tags
+version = "1.18.0dev"  # Temporary override for testing thought_signature fix
 description = "A model-driven approach to building AI agents in just a few lines of code"
 readme = "README.md"
 requires-python = ">=3.10"
@@ -94,8 +94,8 @@ Documentation = "https://strandsagents.com"
 packages = ["src/strands"]
 
 
-[tool.hatch.version]
-source = "vcs"  # Use git tags for versioning
+# [tool.hatch.version]
+# source = "vcs"  # Temporarily disabled for testing - using manual version
 
 
 [tool.hatch.envs.hatch-static-analysis]
diff --git a/src/strands/models/gemini.py b/src/strands/models/gemini.py
@@ -3,6 +3,7 @@
 - Docs: https://ai.google.dev/api
 """
 
+import base64
 import json
 import logging
 import mimetypes
@@ -141,12 +142,26 @@ def _format_request_content_part(self, content: ContentBlock) -> genai.types.Par
             )
 
         if "toolUse" in content:
+            thought_signature_b64 = cast(Optional[str], content["toolUse"].get("thoughtSignature"))
+            
+            thought_signature = None
+            if thought_signature_b64:
+                try:
+                    thought_signature = base64.b64decode(thought_signature_b64)
+                except Exception as e:
+                    logger.error("toolUseId=<%s> | failed to decode thoughtSignature: %s", content["toolUse"].get("toolUseId"), e)
+            else:
+                # thoughtSignature is now preserved by the Strands framework (as of v1.18+)
+                # If missing, it means the model didn't provide one (e.g., older Gemini versions)
+                logger.debug("toolUseId=<%s> | no thoughtSignature in toolUse (model may not require it)", content["toolUse"].get("toolUseId"))
+
             return genai.types.Part(
                 function_call=genai.types.FunctionCall(
                     args=content["toolUse"]["input"],
                     id=content["toolUse"]["toolUseId"],
                     name=content["toolUse"]["name"],
                 ),
+                thought_signature=thought_signature,
             )
 
         raise TypeError(f"content_type=<{next(iter(content))}> | unsupported type")
@@ -212,9 +227,19 @@ def _format_request_config(
         Returns:
             Gemini request config.
         """
+        # Disable thinking text output when tools are present
+        # Note: Setting include_thoughts=False prevents thinking text in responses but
+        # Gemini still returns thought_signature for function calls. As of Strands v1.18+,
+        # the framework properly preserves this field through the message history.
+        # See: https://ai.google.dev/gemini-api/docs/thought-signatures
+        thinking_config = None
+        if tool_specs:
+            thinking_config = genai.types.ThinkingConfig(include_thoughts=False)
+        
         return genai.types.GenerateContentConfig(
             system_instruction=system_prompt,
             tools=self._format_request_tools(tool_specs),
+            thinking_config=thinking_config,
             **(params or {}),
         )
 
@@ -268,14 +293,24 @@ def _format_chunk(self, event: dict[str, Any]) -> StreamEvent:
                         #       that name be set in the equivalent FunctionResponse type. Consequently, we assign
                         #       function name to toolUseId in our tool use block. And another reason, function_call is
                         #       not guaranteed to have id populated.
+                        tool_use: dict[str, Any] = {
+                            "name": event["data"].function_call.name,
+                            "toolUseId": event["data"].function_call.name,
+                        }
+                        
+                        # Get thought_signature from the event dict (passed from stream method)
+                        thought_sig = event.get("thought_signature")
+                        
+                        if thought_sig:
+                            # Ensure it's bytes for encoding
+                            if isinstance(thought_sig, str):
+                                thought_sig = thought_sig.encode("utf-8")
+                            # Use base64 encoding for storage
+                            tool_use["thoughtSignature"] = base64.b64encode(thought_sig).decode("utf-8")
+                        
                         return {
                             "contentBlockStart": {
-                                "start": {
-                                    "toolUse": {
-                                        "name": event["data"].function_call.name,
-                                        "toolUseId": event["data"].function_call.name,
-                                    },
-                                },
+                                "start": {"toolUse": cast(Any, tool_use)},
                             },
                         }
 
@@ -373,15 +408,33 @@ async def stream(
             yield self._format_chunk({"chunk_type": "content_start", "data_type": "text"})
 
             tool_used = False
+            # Track thought_signature to associate with function calls
+            # According to Gemini docs, thought_signature can be on any part
+            last_thought_signature: Optional[bytes] = None
+            
             async for event in response:
                 candidates = event.candidates
                 candidate = candidates[0] if candidates else None
                 content = candidate.content if candidate else None
                 parts = content.parts if content and content.parts else []
 
                 for part in parts:
+                    # Check ALL parts for thought_signature (Gemini may still include it even with thinking disabled)
+                    if hasattr(part, "thought_signature") and part.thought_signature:
+                        last_thought_signature = part.thought_signature
+                    
                     if part.function_call:
-                        yield self._format_chunk({"chunk_type": "content_start", "data_type": "tool", "data": part})
+                        # Use the last thought_signature captured
+                        effective_thought_signature = last_thought_signature
+                        
+                        yield self._format_chunk(
+                            {
+                                "chunk_type": "content_start",
+                                "data_type": "tool",
+                                "data": part,
+                                "thought_signature": effective_thought_signature,
+                            }
+                        )
                         yield self._format_chunk({"chunk_type": "content_delta", "data_type": "tool", "data": part})
                         yield self._format_chunk({"chunk_type": "content_stop", "data_type": "tool", "data": part})
                         tool_used = True
diff --git a/src/strands/types/content.py b/src/strands/types/content.py
@@ -8,7 +8,7 @@
 
 from typing import Dict, List, Literal, Optional
 
-from typing_extensions import TypedDict
+from typing_extensions import NotRequired, TypedDict
 
 from .citations import CitationsContentBlock
 from .media import DocumentContent, ImageContent, VideoContent