Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
55 changes: 53 additions & 2 deletions galileo-adk/src/galileo_adk/observer.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,10 @@ def on_llm_end(self, run_id: UUID, llm_response: Any, status_code: int = 200) ->
num_output_tokens=usage.get("completion_tokens"),
total_tokens=usage.get("total_tokens"),
status_code=status_code,
image_input_tokens=usage.get("image_input_tokens"),
audio_input_tokens=usage.get("audio_input_tokens"),
audio_output_tokens=usage.get("audio_output_tokens"),
Comment thread
john-weiler marked this conversation as resolved.
image_output_tokens=usage.get("image_output_tokens"),
)

def _is_retriever_tool(self, tool: Any) -> bool:
Expand Down Expand Up @@ -498,19 +502,66 @@ def _extract_tools(self, llm_request: Any) -> list[dict[str, Any]] | None:
return None

def _extract_usage_metadata(self, llm_response: Any) -> dict[str, Any]:
"""Extract token usage metrics from LLM response."""
"""Extract token usage metrics from LLM response.

Also extracts per-modality breakdown (image/audio) from
``usage_metadata.prompt_tokens_details`` and ``candidates_tokens_details``
when the native Gemini SDK returns them (list of ModalityTokenCount objects).
"""
if not llm_response:
return {}
usage = getattr(llm_response, "usage_metadata", None)
if not usage:
return {}
return {

result: dict[str, Any] = {
"prompt_tokens": getattr(usage, "prompt_token_count", None) or getattr(usage, "input_token_count", None),
"completion_tokens": getattr(usage, "candidates_token_count", None)
or getattr(usage, "output_token_count", None),
"total_tokens": getattr(usage, "total_token_count", None),
}

# Per-modality breakdown — only present on native Gemini SDK responses.
prompt_details = getattr(usage, "prompt_tokens_details", None)
candidates_details = getattr(usage, "candidates_tokens_details", None)
if prompt_details or candidates_details:
image_in = 0
audio_in = 0
audio_out = 0
image_out = 0
has_prompt = bool(prompt_details)
has_candidates = bool(candidates_details)
for entry in prompt_details or []:
modality_attr = getattr(entry, "modality", None)
Comment on lines 523 to +535

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_extract_usage_metadata duplicates the Gemini modality breakdown in ::_extract_gemini_modality_breakdown, should we extract a shared helper so both paths use the same logic?

Severity

Want Baz to fix this for you? Activate Fixer

modality = getattr(modality_attr, "value", modality_attr)
if not isinstance(modality, str):
continue
count = getattr(entry, "token_count", None) or 0
upper = modality.upper()
if upper == "IMAGE":
image_in += count
elif upper == "AUDIO":
audio_in += count
for entry in candidates_details or []:
modality_attr = getattr(entry, "modality", None)
modality = getattr(modality_attr, "value", modality_attr)
if not isinstance(modality, str):
continue
count = getattr(entry, "token_count", None) or 0
upper = modality.upper()
if upper == "AUDIO":
audio_out += count
elif upper == "IMAGE":
image_out += count
if has_prompt:
result["image_input_tokens"] = image_in
result["audio_input_tokens"] = audio_in
if has_candidates:
result["audio_output_tokens"] = audio_out
result["image_output_tokens"] = image_out

return result

def _extract_final_output(self, invocation_context: Any) -> str:
if hasattr(invocation_context, "session"):
session = invocation_context.session
Expand Down
12 changes: 12 additions & 0 deletions galileo-adk/src/galileo_adk/span_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@ def end_llm(
num_output_tokens: int | None = None,
total_tokens: int | None = None,
status_code: int = 200,
image_input_tokens: int | None = None,
audio_input_tokens: int | None = None,
audio_output_tokens: int | None = None,
image_output_tokens: int | None = None,
) -> None:
"""End an LLM span.

Expand All @@ -127,6 +131,10 @@ def end_llm(
num_output_tokens: Number of output tokens generated
total_tokens: Total tokens used
status_code: HTTP status code (200 for success)
image_input_tokens: Image input tokens (Gemini native path only)
audio_input_tokens: Audio input tokens (Gemini native path only)
audio_output_tokens: Audio output tokens (Gemini native path only)
image_output_tokens: Image output tokens (Gemini native path only)
"""
if isinstance(output, list):
if not output:
Expand All @@ -149,6 +157,10 @@ def end_llm(
num_output_tokens=num_output_tokens,
total_tokens=total_tokens,
status_code=status_code,
image_input_tokens=image_input_tokens,
audio_input_tokens=audio_input_tokens,
audio_output_tokens=audio_output_tokens,
image_output_tokens=image_output_tokens,
)

def start_tool(
Expand Down
8 changes: 8 additions & 0 deletions galileo-adk/src/galileo_adk/trace_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,10 @@ def add_llm_span(
temperature: float | None = None,
status_code: int | None = None,
time_to_first_token_ns: int | None = None,
image_input_tokens: int | None = None,
audio_input_tokens: int | None = None,
audio_output_tokens: int | None = None,
image_output_tokens: int | None = None,
step_number: int | None = None,
events: list[Any] | None = None,
) -> LoggedLlmSpan:
Expand All @@ -338,6 +342,10 @@ def add_llm_span(
num_output_tokens=num_output_tokens,
num_total_tokens=total_tokens,
time_to_first_token_ns=time_to_first_token_ns,
num_image_input_tokens=image_input_tokens,
num_audio_input_tokens=audio_input_tokens,
num_audio_output_tokens=audio_output_tokens,
num_image_output_tokens=image_output_tokens,
),
events=events,
temperature=temperature,
Expand Down
136 changes: 136 additions & 0 deletions galileo-adk/tests/test_observer.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,3 +275,139 @@ def test_hook_mode_preserves_parent_session_for_sub_invocations(self) -> None:
# Then: the parent session is preserved
assert logger._session_external_id == "parent-session"
assert observer._current_adk_session == "parent-session"


class TestExtractUsageMetadata:
"""Tests for _extract_usage_metadata Gemini modality breakdown extraction."""

def _make_modality_entry(self, modality_value: str, token_count: int) -> MagicMock:
entry = MagicMock()
entry.modality = MagicMock()
entry.modality.value = modality_value
entry.token_count = token_count
return entry

def test_returns_empty_for_no_response(self, observer: GalileoObserver) -> None:
# Given: no response object
# When: extracting usage metadata from None
# Then: an empty dict is returned
assert observer._extract_usage_metadata(None) == {}

def test_returns_empty_for_no_usage_metadata(self, observer: GalileoObserver) -> None:
# Given: a response object with usage_metadata set to None
response = MagicMock()
Comment thread
john-weiler marked this conversation as resolved.
response.usage_metadata = None

# When: extracting usage metadata
# Then: an empty dict is returned
assert observer._extract_usage_metadata(response) == {}

def test_flat_tokens_no_modality(self, observer: GalileoObserver) -> None:
# Given: a response with flat token counts and no modality detail lists
usage = MagicMock()
usage.prompt_token_count = 10
usage.candidates_token_count = 5
usage.total_token_count = 15
usage.prompt_tokens_details = None
usage.candidates_tokens_details = None
response = MagicMock()
response.usage_metadata = usage

# When: extracting usage metadata
result = observer._extract_usage_metadata(response)

# Then: flat token counts are present and no per-modality keys are added
assert result["prompt_tokens"] == 10
assert result["completion_tokens"] == 5
assert result["total_tokens"] == 15
assert "image_input_tokens" not in result
assert "audio_input_tokens" not in result
assert "audio_output_tokens" not in result

def test_audio_and_image_modality_breakdown(self, observer: GalileoObserver) -> None:
# Given: a response with prompt and candidates token detail lists containing audio and image entries
usage = MagicMock()
usage.prompt_token_count = 200
usage.candidates_token_count = 30
usage.total_token_count = 230
usage.prompt_tokens_details = [
self._make_modality_entry("TEXT", 95),
self._make_modality_entry("AUDIO", 100),
self._make_modality_entry("IMAGE", 5),
]
usage.candidates_tokens_details = [
self._make_modality_entry("TEXT", 10),
self._make_modality_entry("AUDIO", 20),
]
response = MagicMock()
response.usage_metadata = usage

# When: extracting usage metadata
result = observer._extract_usage_metadata(response)

# Then: per-modality token counts are extracted correctly
assert result["image_input_tokens"] == 5
assert result["audio_input_tokens"] == 100
assert result["audio_output_tokens"] == 20

def test_no_audio_in_candidates_returns_zero_audio_out(self, observer: GalileoObserver) -> None:
# Given: a response with modality detail lists that contain only TEXT entries
usage = MagicMock()
usage.prompt_token_count = 10
usage.candidates_token_count = 5
usage.total_token_count = 15
usage.prompt_tokens_details = [self._make_modality_entry("TEXT", 10)]
usage.candidates_tokens_details = [self._make_modality_entry("TEXT", 5)]
response = MagicMock()
response.usage_metadata = usage

# When: extracting usage metadata
result = observer._extract_usage_metadata(response)

# Then: modality keys are present and set to 0, not omitted, because the detail lists were present
assert result["image_input_tokens"] == 0
assert result["audio_input_tokens"] == 0
assert result["audio_output_tokens"] == 0

def test_modality_as_string_instead_of_enum(self, observer: GalileoObserver) -> None:
"""Older SDK may return modality as a plain string rather than an enum."""
# Given: a response where modality is a plain string instead of an enum object
entry = MagicMock()
entry.modality = "AUDIO" # string, not enum
entry.token_count = 50
usage = MagicMock()
usage.prompt_token_count = 50
usage.candidates_token_count = 0
usage.total_token_count = 50
usage.prompt_tokens_details = [entry]
usage.candidates_tokens_details = []
response = MagicMock()
response.usage_metadata = usage

# When: extracting usage metadata
result = observer._extract_usage_metadata(response)

# Then: the string modality is handled correctly and the count is extracted
assert result["audio_input_tokens"] == 50

def test_image_output_tokens_from_candidates(self, observer: GalileoObserver) -> None:
# Given: a response with an IMAGE entry in candidates_tokens_details
usage = MagicMock()
usage.prompt_token_count = 10
usage.candidates_token_count = 45
usage.total_token_count = 55
usage.prompt_tokens_details = [self._make_modality_entry("TEXT", 10)]
usage.candidates_tokens_details = [
self._make_modality_entry("TEXT", 5),
self._make_modality_entry("IMAGE", 30),
self._make_modality_entry("AUDIO", 10),
]
response = MagicMock()
response.usage_metadata = usage

# When: extracting usage metadata
result = observer._extract_usage_metadata(response)

# Then: image_output_tokens and audio_output_tokens are both extracted from candidates
assert result["image_output_tokens"] == 30
assert result["audio_output_tokens"] == 10
24 changes: 24 additions & 0 deletions openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27174,6 +27174,30 @@ components:
- type: 'null'
title: Time To First Token Ns
description: Time until the first token was generated in nanoseconds.
num_image_input_tokens:
anyOf:
- type: integer
- type: 'null'
title: Num Image Input Tokens
description: Number of image input tokens (modality breakdown, Gemini native path only).
num_audio_input_tokens:
anyOf:
- type: integer
- type: 'null'
title: Num Audio Input Tokens
description: Number of audio input tokens (modality breakdown, Gemini native path only).
num_audio_output_tokens:
anyOf:
- type: integer
- type: 'null'
title: Num Audio Output Tokens
description: Number of audio output tokens (modality breakdown, Gemini native path only).
num_image_output_tokens:
anyOf:
- type: integer
- type: 'null'
title: Num Image Output Tokens
description: Number of image output tokens (modality breakdown, Gemini native path only).
additionalProperties: true
type: object
title: LlmMetrics
Expand Down
4 changes: 4 additions & 0 deletions src/galileo/handlers/base_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,10 @@ def log_node_tree(self, node: Node) -> None:
num_output_tokens=node.span_params.get("num_output_tokens"),
total_tokens=node.span_params.get("total_tokens"),
time_to_first_token_ns=node.span_params.get("time_to_first_token_ns"),
image_input_tokens=node.span_params.get("image_input_tokens"),
audio_input_tokens=node.span_params.get("audio_input_tokens"),
audio_output_tokens=node.span_params.get("audio_output_tokens"),
image_output_tokens=node.span_params.get("image_output_tokens"),
created_at=created_at,
step_number=step_number,
status_code=node.span_params.get("status_code"),
Expand Down
4 changes: 4 additions & 0 deletions src/galileo/handlers/langchain/async_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,10 @@ async def on_llm_end(
num_input_tokens=result.num_input_tokens,
num_output_tokens=result.num_output_tokens,
total_tokens=result.total_tokens,
image_input_tokens=result.image_input_tokens,
audio_input_tokens=result.audio_input_tokens,
audio_output_tokens=result.audio_output_tokens,
image_output_tokens=result.image_output_tokens,
status_code=200,
)

Expand Down
4 changes: 4 additions & 0 deletions src/galileo/handlers/langchain/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,10 @@ def on_llm_end(self, response: LLMResult, *, run_id: UUID, parent_run_id: UUID |
num_input_tokens=result.num_input_tokens,
num_output_tokens=result.num_output_tokens,
total_tokens=result.total_tokens,
image_input_tokens=result.image_input_tokens,
audio_input_tokens=result.audio_input_tokens,
audio_output_tokens=result.audio_output_tokens,
image_output_tokens=result.image_output_tokens,
status_code=200,
)

Expand Down
Loading
Loading