Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions src/webwright/config/model_ollama.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Ollama model configuration.
#
# Stack on top of base.yaml:
# python -m webwright.run.cli -c base.yaml -c model_ollama.yaml ...
#
# Connects to a local or cloud-hosted Ollama instance via /v1/chat/completions.
# Set OLLAMA_API_KEY env var for cloud-hosted endpoints that require authentication.
# Override model_name via OLLAMA_MODEL env var or edit inline.

model:
model_class: ollama
model_name: llama3.2
ollama_endpoint: http://localhost:11434/v1/chat/completions
11 changes: 11 additions & 0 deletions src/webwright/config/model_ollama_fast.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Ollama fast preset — no reasoning, maximum speed.
#
# Stack on top of base.yaml:
# python -m webwright.run.cli -c base.yaml -c model_ollama_fast.yaml ...

model:
model_class: ollama
model_name: llama3.2
ollama_endpoint: http://localhost:11434/v1/chat/completions
ollama_extra_body:
reasoning_effort: none
8 changes: 8 additions & 0 deletions src/webwright/config/model_ollama_reason.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Ollama reasoning preset — highest quality, slower iteration.

model:
model_class: ollama
model_name: llama3.2
ollama_endpoint: http://localhost:11434/v1/chat/completions
ollama_extra_body:
reasoning_effort: high
1 change: 1 addition & 0 deletions src/webwright/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"openai": "webwright.models.openai_model.OpenAIModel",
"anthropic": "webwright.models.anthropic_model.AnthropicModel",
"openrouter": "webwright.models.openrouter_model.OpenRouterModel",
"ollama": "webwright.models.ollama_model.OllamaModel",
}


Expand Down
106 changes: 106 additions & 0 deletions src/webwright/models/_chat_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
"""Shared chat-completions serialization helpers used by Ollama and OpenRouter model backends."""

from __future__ import annotations

from typing import Any

from webwright.models.base import _safe_int


def serialize_chat_content_part(part: dict[str, Any]) -> dict[str, Any] | None:
part_type = part.get("type")
if part_type in {"input_text", "output_text"}:
return {"type": "text", "text": str(part.get("text", "") or "")}
if part_type == "input_image":
return {
"type": "image_url",
"image_url": {
"url": str(part.get("image_url", "") or ""),
"detail": str(part.get("detail", "high") or "high"),
},
}
return None


def serialize_chat_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
serialized: list[dict[str, Any]] = []
for message in messages:
role = message["role"]
if role == "exit":
continue
mapped_role = "system" if role == "system" else ("assistant" if role == "assistant" else "user")
content = message.get("content", "")
if isinstance(content, str):
serialized.append({"role": mapped_role, "content": content})
continue
parts = [
serialized_part
for part in content
if isinstance(part, dict)
for serialized_part in [serialize_chat_content_part(part)]
if serialized_part is not None
]
if mapped_role == "assistant" or all(part.get("type") == "text" for part in parts):
serialized.append(
{
"role": mapped_role,
"content": "\n".join(str(part.get("text", "") or "") for part in parts),
}
)
else:
serialized.append({"role": mapped_role, "content": parts})
return serialized


def metrics_input_from_chat_messages(chat_messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
metrics_input: list[dict[str, Any]] = []
for message in chat_messages:
content = message.get("content", "")
if isinstance(content, str):
metrics_input.append({"content": [{"type": "input_text", "text": content}]})
continue
parts: list[dict[str, Any]] = []
for part in content:
if not isinstance(part, dict):
continue
if part.get("type") == "text":
parts.append({"type": "input_text", "text": str(part.get("text", "") or "")})
elif part.get("type") == "image_url":
parts.append({"type": "input_image"})
metrics_input.append({"content": parts})
return metrics_input


def extract_chat_completions_text(payload: dict[str, Any]) -> str:
choices = payload.get("choices")
if not isinstance(choices, list) or not choices:
return ""
first_choice = choices[0]
if not isinstance(first_choice, dict):
return ""
message = first_choice.get("message", {})
if not isinstance(message, dict):
return ""
content = message.get("content", "")
if isinstance(content, str):
return content
if isinstance(content, list):
return "\n".join(
str(part.get("text", "") or "")
for part in content
if isinstance(part, dict) and part.get("type") == "text"
)
return ""


def usage_metrics_from_chat_completions(payload: dict[str, Any]) -> dict[str, int]:
usage = payload.get("usage")
if not isinstance(usage, dict):
usage = {}
return {
"input_tokens": _safe_int(usage.get("prompt_tokens")),
"output_tokens": _safe_int(usage.get("completion_tokens")),
"total_tokens": _safe_int(usage.get("total_tokens")),
"cached_input_tokens": 0,
"reasoning_output_tokens": 0,
}
99 changes: 99 additions & 0 deletions src/webwright/models/ollama_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
"""Ollama chat completions model backend.

Drop-in replacement for OpenAI/Anthropic/OpenRouter — uses local or cloud-hosted
Ollama models via the standard /v1/chat/completions endpoint. No API key
required for local instances; set OLLAMA_API_KEY env var for cloud-hosted
Ollama endpoints that require authentication.
"""

from __future__ import annotations

from typing import Any
from urllib.parse import urlparse

from webwright.models._chat_utils import (
extract_chat_completions_text,
metrics_input_from_chat_messages,
serialize_chat_messages,
usage_metrics_from_chat_completions,
)
from webwright.models.base import (
BaseModel,
BaseModelConfig,
OptStr,
)

__all__ = [
"OllamaModel",
"OllamaModelConfig",
]


class OllamaModelConfig(BaseModelConfig):
model_name: OptStr = "llama3.2"
ollama_api_key: OptStr = ""
ollama_endpoint: OptStr = "http://localhost:11434/v1/chat/completions"
ollama_extra_body: dict[str, Any] = {}


def _is_localhost(endpoint: str) -> bool:
host = (urlparse(endpoint).hostname or "").lower()
return host in ("localhost", "127.0.0.1", "::1")


class OllamaModel(BaseModel):
_API_KEY_FIELD = "ollama_api_key"
_ENV_VAR = "OLLAMA_API_KEY"
_LOG_SOURCE = "ollama"
_MAX_RATE_LIMIT_RETRIES = 5
_MAX_TRANSIENT_RETRIES = 5
_DEFAULT_CONFIG_CLASS = OllamaModelConfig

def __init__(self, *, config_class: type | None = None, **kwargs):
# Localhost Ollama doesn't need an API key — skip the key check.
endpoint = kwargs.get("ollama_endpoint", "")
if endpoint and _is_localhost(endpoint):
if "ollama_api_key" not in kwargs:
kwargs["ollama_api_key"] = "sk-noop"
super().__init__(config_class=config_class, **kwargs)

def _request_headers(self) -> dict[str, str]:
headers = {"Content-Type": "application/json"}
if self.config.ollama_api_key and self.config.ollama_api_key != "sk-noop":
headers["Authorization"] = f"Bearer {self.config.ollama_api_key}"
return headers

def _post_url(self) -> str:
return self.config.ollama_endpoint

def _build_payload(self, messages: list[dict[str, Any]]) -> dict[str, Any]:
payload: dict[str, Any] = {
"model": self.config.model_name,
"messages": serialize_chat_messages(messages),
"stream": False,
"response_format": {"type": "json_object"},
"max_tokens": self.config.max_output_tokens,
}
if self.config.ollama_extra_body:
payload.update(self.config.ollama_extra_body)
return payload

def _build_text_payload(self, messages: list[dict[str, Any]]) -> dict[str, Any]:
payload: dict[str, Any] = {
"model": self.config.model_name,
"messages": serialize_chat_messages(messages),
"stream": False,
"max_tokens": self.config.max_output_tokens,
}
if self.config.ollama_extra_body:
payload.update(self.config.ollama_extra_body)
return payload

def _request_metrics_input(self, payload: dict[str, Any]) -> list[dict[str, Any]]:
return metrics_input_from_chat_messages(payload.get("messages") or [])

def _extract_text(self, payload: dict[str, Any]) -> str:
return extract_chat_completions_text(payload)

def _usage_metrics_from_payload(self, payload: dict[str, Any]) -> dict[str, int]:
return usage_metrics_from_chat_completions(payload)