diff --git a/src/webwright/config/model_ollama.yaml b/src/webwright/config/model_ollama.yaml new file mode 100644 index 0000000..efbaa49 --- /dev/null +++ b/src/webwright/config/model_ollama.yaml @@ -0,0 +1,13 @@ +# Ollama model configuration. +# +# Stack on top of base.yaml: +# python -m webwright.run.cli -c base.yaml -c model_ollama.yaml ... +# +# Connects to a local or cloud-hosted Ollama instance via /v1/chat/completions. +# Set OLLAMA_API_KEY env var for cloud-hosted endpoints that require authentication. +# Override model_name via OLLAMA_MODEL env var or edit inline. + +model: + model_class: ollama + model_name: llama3.2 + ollama_endpoint: http://localhost:11434/v1/chat/completions diff --git a/src/webwright/config/model_ollama_fast.yaml b/src/webwright/config/model_ollama_fast.yaml new file mode 100644 index 0000000..c878467 --- /dev/null +++ b/src/webwright/config/model_ollama_fast.yaml @@ -0,0 +1,11 @@ +# Ollama fast preset — no reasoning, maximum speed. +# +# Stack on top of base.yaml: +# python -m webwright.run.cli -c base.yaml -c model_ollama_fast.yaml ... + +model: + model_class: ollama + model_name: llama3.2 + ollama_endpoint: http://localhost:11434/v1/chat/completions + ollama_extra_body: + reasoning_effort: none diff --git a/src/webwright/config/model_ollama_reason.yaml b/src/webwright/config/model_ollama_reason.yaml new file mode 100644 index 0000000..e5d49e5 --- /dev/null +++ b/src/webwright/config/model_ollama_reason.yaml @@ -0,0 +1,8 @@ +# Ollama reasoning preset — highest quality, slower iteration. + +model: + model_class: ollama + model_name: llama3.2 + ollama_endpoint: http://localhost:11434/v1/chat/completions + ollama_extra_body: + reasoning_effort: high diff --git a/src/webwright/models/__init__.py b/src/webwright/models/__init__.py index 6631ac0..3d07c67 100644 --- a/src/webwright/models/__init__.py +++ b/src/webwright/models/__init__.py @@ -9,6 +9,7 @@ "openai": "webwright.models.openai_model.OpenAIModel", "anthropic": "webwright.models.anthropic_model.AnthropicModel", "openrouter": "webwright.models.openrouter_model.OpenRouterModel", + "ollama": "webwright.models.ollama_model.OllamaModel", } diff --git a/src/webwright/models/_chat_utils.py b/src/webwright/models/_chat_utils.py new file mode 100644 index 0000000..3fb00c9 --- /dev/null +++ b/src/webwright/models/_chat_utils.py @@ -0,0 +1,106 @@ +"""Shared chat-completions serialization helpers used by Ollama and OpenRouter model backends.""" + +from __future__ import annotations + +from typing import Any + +from webwright.models.base import _safe_int + + +def serialize_chat_content_part(part: dict[str, Any]) -> dict[str, Any] | None: + part_type = part.get("type") + if part_type in {"input_text", "output_text"}: + return {"type": "text", "text": str(part.get("text", "") or "")} + if part_type == "input_image": + return { + "type": "image_url", + "image_url": { + "url": str(part.get("image_url", "") or ""), + "detail": str(part.get("detail", "high") or "high"), + }, + } + return None + + +def serialize_chat_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]: + serialized: list[dict[str, Any]] = [] + for message in messages: + role = message["role"] + if role == "exit": + continue + mapped_role = "system" if role == "system" else ("assistant" if role == "assistant" else "user") + content = message.get("content", "") + if isinstance(content, str): + serialized.append({"role": mapped_role, "content": content}) + continue + parts = [ + serialized_part + for part in content + if isinstance(part, dict) + for serialized_part in [serialize_chat_content_part(part)] + if serialized_part is not None + ] + if mapped_role == "assistant" or all(part.get("type") == "text" for part in parts): + serialized.append( + { + "role": mapped_role, + "content": "\n".join(str(part.get("text", "") or "") for part in parts), + } + ) + else: + serialized.append({"role": mapped_role, "content": parts}) + return serialized + + +def metrics_input_from_chat_messages(chat_messages: list[dict[str, Any]]) -> list[dict[str, Any]]: + metrics_input: list[dict[str, Any]] = [] + for message in chat_messages: + content = message.get("content", "") + if isinstance(content, str): + metrics_input.append({"content": [{"type": "input_text", "text": content}]}) + continue + parts: list[dict[str, Any]] = [] + for part in content: + if not isinstance(part, dict): + continue + if part.get("type") == "text": + parts.append({"type": "input_text", "text": str(part.get("text", "") or "")}) + elif part.get("type") == "image_url": + parts.append({"type": "input_image"}) + metrics_input.append({"content": parts}) + return metrics_input + + +def extract_chat_completions_text(payload: dict[str, Any]) -> str: + choices = payload.get("choices") + if not isinstance(choices, list) or not choices: + return "" + first_choice = choices[0] + if not isinstance(first_choice, dict): + return "" + message = first_choice.get("message", {}) + if not isinstance(message, dict): + return "" + content = message.get("content", "") + if isinstance(content, str): + return content + if isinstance(content, list): + return "\n".join( + str(part.get("text", "") or "") + for part in content + if isinstance(part, dict) and part.get("type") == "text" + ) + return "" + + +def usage_metrics_from_chat_completions(payload: dict[str, Any]) -> dict[str, int]: + usage = payload.get("usage") + if not isinstance(usage, dict): + usage = {} + return { + "input_tokens": _safe_int(usage.get("prompt_tokens")), + "output_tokens": _safe_int(usage.get("completion_tokens")), + "total_tokens": _safe_int(usage.get("total_tokens")), + "cached_input_tokens": 0, + "reasoning_output_tokens": 0, + } diff --git a/src/webwright/models/ollama_model.py b/src/webwright/models/ollama_model.py new file mode 100644 index 0000000..2e82211 --- /dev/null +++ b/src/webwright/models/ollama_model.py @@ -0,0 +1,99 @@ +"""Ollama chat completions model backend. + +Drop-in replacement for OpenAI/Anthropic/OpenRouter — uses local or cloud-hosted +Ollama models via the standard /v1/chat/completions endpoint. No API key +required for local instances; set OLLAMA_API_KEY env var for cloud-hosted +Ollama endpoints that require authentication. +""" + +from __future__ import annotations + +from typing import Any +from urllib.parse import urlparse + +from webwright.models._chat_utils import ( + extract_chat_completions_text, + metrics_input_from_chat_messages, + serialize_chat_messages, + usage_metrics_from_chat_completions, +) +from webwright.models.base import ( + BaseModel, + BaseModelConfig, + OptStr, +) + +__all__ = [ + "OllamaModel", + "OllamaModelConfig", +] + + +class OllamaModelConfig(BaseModelConfig): + model_name: OptStr = "llama3.2" + ollama_api_key: OptStr = "" + ollama_endpoint: OptStr = "http://localhost:11434/v1/chat/completions" + ollama_extra_body: dict[str, Any] = {} + + +def _is_localhost(endpoint: str) -> bool: + host = (urlparse(endpoint).hostname or "").lower() + return host in ("localhost", "127.0.0.1", "::1") + + +class OllamaModel(BaseModel): + _API_KEY_FIELD = "ollama_api_key" + _ENV_VAR = "OLLAMA_API_KEY" + _LOG_SOURCE = "ollama" + _MAX_RATE_LIMIT_RETRIES = 5 + _MAX_TRANSIENT_RETRIES = 5 + _DEFAULT_CONFIG_CLASS = OllamaModelConfig + + def __init__(self, *, config_class: type | None = None, **kwargs): + # Localhost Ollama doesn't need an API key — skip the key check. + endpoint = kwargs.get("ollama_endpoint", "") + if endpoint and _is_localhost(endpoint): + if "ollama_api_key" not in kwargs: + kwargs["ollama_api_key"] = "sk-noop" + super().__init__(config_class=config_class, **kwargs) + + def _request_headers(self) -> dict[str, str]: + headers = {"Content-Type": "application/json"} + if self.config.ollama_api_key and self.config.ollama_api_key != "sk-noop": + headers["Authorization"] = f"Bearer {self.config.ollama_api_key}" + return headers + + def _post_url(self) -> str: + return self.config.ollama_endpoint + + def _build_payload(self, messages: list[dict[str, Any]]) -> dict[str, Any]: + payload: dict[str, Any] = { + "model": self.config.model_name, + "messages": serialize_chat_messages(messages), + "stream": False, + "response_format": {"type": "json_object"}, + "max_tokens": self.config.max_output_tokens, + } + if self.config.ollama_extra_body: + payload.update(self.config.ollama_extra_body) + return payload + + def _build_text_payload(self, messages: list[dict[str, Any]]) -> dict[str, Any]: + payload: dict[str, Any] = { + "model": self.config.model_name, + "messages": serialize_chat_messages(messages), + "stream": False, + "max_tokens": self.config.max_output_tokens, + } + if self.config.ollama_extra_body: + payload.update(self.config.ollama_extra_body) + return payload + + def _request_metrics_input(self, payload: dict[str, Any]) -> list[dict[str, Any]]: + return metrics_input_from_chat_messages(payload.get("messages") or []) + + def _extract_text(self, payload: dict[str, Any]) -> str: + return extract_chat_completions_text(payload) + + def _usage_metrics_from_payload(self, payload: dict[str, Any]) -> dict[str, int]: + return usage_metrics_from_chat_completions(payload)