Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/opengradient/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ async def stream_example():
InferenceResult,
ModelOutput,
ModelRepository,
ResponseFormat,
SchedulerParams,
TextGenerationOutput,
TextGenerationStream,
Expand All @@ -105,6 +106,7 @@ async def stream_example():
"SchedulerParams",
"CandleType",
"CandleOrder",
"ResponseFormat",
"TextGenerationOutput",
"TextGenerationStream",
"x402SettlementMode",
Expand Down
22 changes: 21 additions & 1 deletion src/opengradient/client/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from x402.mechanisms.evm.exact.register import register_exact_evm_client
from x402.mechanisms.evm.upto.register import register_upto_evm_client

from ..types import TEE_LLM, StreamChoice, StreamChunk, StreamDelta, TextGenerationOutput, x402SettlementMode
from ..types import TEE_LLM, ResponseFormat, StreamChoice, StreamChunk, StreamDelta, TextGenerationOutput, x402SettlementMode
from .opg_token import Permit2ApprovalResult, ensure_opg_approval
from .tee_connection import RegistryTEEConnection, StaticTEEConnection, TEEConnectionInterface
from .tee_registry import TEERegistry
Expand Down Expand Up @@ -44,6 +44,7 @@ class _ChatParams:
stop_sequence: Optional[List[str]]
tools: Optional[List[Dict]]
tool_choice: Optional[str]
response_format: Optional[ResponseFormat]
x402_settlement_mode: x402SettlementMode


Expand Down Expand Up @@ -152,6 +153,8 @@ def _chat_payload(self, params: _ChatParams, messages: List[Dict], stream: bool
if params.tools:
payload["tools"] = params.tools
payload["tool_choice"] = params.tool_choice or "auto"
if params.response_format:
payload["response_format"] = params.response_format.to_dict()
return payload

async def _call_with_tee_retry(
Expand Down Expand Up @@ -297,6 +300,7 @@ async def chat(
temperature: float = 0.0,
tools: Optional[List[Dict]] = None,
tool_choice: Optional[str] = None,
response_format: Optional[ResponseFormat] = None,
x402_settlement_mode: x402SettlementMode = x402SettlementMode.BATCH_HASHED,
stream: bool = False,
) -> Union[TextGenerationOutput, AsyncGenerator[StreamChunk, None]]:
Expand All @@ -311,6 +315,11 @@ async def chat(
temperature (float): Temperature for LLM inference, between 0 and 1.
tools (List[dict], optional): Set of tools for function calling.
tool_choice (str, optional): Sets a specific tool to choose.
response_format (ResponseFormat, optional): Enforce a specific output format.
Use ``ResponseFormat(type="json_object")`` for any valid JSON (not supported
by Anthropic models). Use ``ResponseFormat(type="json_schema", json_schema={...})``
to enforce a strict schema (supported by all providers including Anthropic).
Defaults to None (plain text).
x402_settlement_mode (x402SettlementMode, optional): Settlement mode for x402 payments.
- PRIVATE: Payment only, no input/output data on-chain (most privacy-preserving).
- BATCH_HASHED: Aggregates inferences into a Merkle tree with input/output hashes and signatures (default, most cost-efficient).
Expand All @@ -324,15 +333,25 @@ async def chat(
- If stream=True: Async generator yielding StreamChunk objects

Raises:
ValueError: If ``response_format="json_object"`` is used with an Anthropic model.
RuntimeError: If the inference fails.
"""
if response_format is not None and response_format.type == "json_object":
provider = model.split("/")[0]
if provider == "anthropic":
raise ValueError(
"Anthropic models do not support response_format type 'json_object'. "
"Use ResponseFormat(type='json_schema', json_schema={...}) with an explicit schema instead."
)

params = _ChatParams(
model=model.split("/")[1],
max_tokens=max_tokens,
temperature=temperature,
stop_sequence=stop_sequence,
tools=tools,
tool_choice=tool_choice,
response_format=response_format,
x402_settlement_mode=x402_settlement_mode,
)

Expand Down Expand Up @@ -379,6 +398,7 @@ async def _request() -> TextGenerationOutput:
transaction_hash="external",
finish_reason=choices[0].get("finish_reason"),
chat_output=message,
usage=result.get("usage"),
tee_signature=result.get("tee_signature"),
tee_timestamp=result.get("tee_timestamp"),
**tee.metadata(),
Expand Down
72 changes: 71 additions & 1 deletion src/opengradient/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,9 @@ class TextGenerationOutput:
completion_output: Optional[str] = None
"""Raw text returned by a completion request."""

usage: Optional[Dict] = None
"""Token usage for the request. Contains ``prompt_tokens``, ``completion_tokens``, and ``total_tokens`` when reported by the server."""

payment_hash: Optional[str] = None
"""Payment hash for the x402 transaction."""

Expand Down Expand Up @@ -513,10 +516,12 @@ class TEE_LLM(str, Enum):
CLAUDE_OPUS_4_6 = "anthropic/claude-opus-4-6"

# Google models via TEE
# Note: gemini-2.5-flash, gemini-2.5-pro, and gemini-2.5-flash-lite are scheduled
# for deprecation on June 17, 2026 (flash-lite: July 22, 2026). Replacements will be
# gemini-3-flash-preview, gemini-3.1-pro-preview, and gemini-3.1-flash-lite-preview.
GEMINI_2_5_FLASH = "google/gemini-2.5-flash"
GEMINI_2_5_PRO = "google/gemini-2.5-pro"
GEMINI_2_5_FLASH_LITE = "google/gemini-2.5-flash-lite"
GEMINI_3_PRO = "google/gemini-3-pro-preview"
GEMINI_3_FLASH = "google/gemini-3-flash-preview"

# xAI Grok models via TEE
Expand All @@ -526,6 +531,71 @@ class TEE_LLM(str, Enum):
GROK_4_1_FAST_NON_REASONING = "x-ai/grok-4-1-fast-non-reasoning"


@dataclass
class ResponseFormat:
"""Controls the output format enforced by the TEE gateway.

Use ``type="json_object"`` to receive any valid JSON object (supported by
OpenAI, Gemini, and Grok). Use ``type="json_schema"`` with a ``json_schema``
definition to enforce a specific schema (supported by all providers,
including Anthropic).

Attributes:
type: One of ``"text"``, ``"json_object"``, or ``"json_schema"``.
json_schema: Schema definition (required when ``type="json_schema"``).
Must contain ``name`` (str) and ``schema`` (dict).
``strict`` (bool) is optional.

Raises:
ValueError: If ``type`` is not a recognised value, or if
``type="json_schema"`` is used without providing ``json_schema``.

Examples::

# Any valid JSON object — OpenAI, Gemini, Grok only
ResponseFormat(type="json_object")

# Strict schema — all providers including Anthropic
ResponseFormat(
type="json_schema",
json_schema={
"name": "person",
"strict": True,
"schema": {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"},
},
"required": ["name", "age"],
"additionalProperties": False,
},
},
)
"""

type: str
json_schema: Optional[Dict] = None

def __post_init__(self) -> None:
valid_types = ("text", "json_object", "json_schema")
if self.type not in valid_types:
raise ValueError(
f"ResponseFormat.type must be one of {valid_types}, got '{self.type}'"
)
if self.type == "json_schema" and not self.json_schema:
raise ValueError(
"ResponseFormat.json_schema is required when type='json_schema'"
)

def to_dict(self) -> Dict:
"""Serialise to a JSON-compatible dict for the TEE gateway request payload."""
d: Dict = {"type": self.type}
if self.json_schema is not None:
d["json_schema"] = self.json_schema
return d


@dataclass
class SchedulerParams:
frequency: int
Expand Down
2 changes: 1 addition & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading