diff --git a/.jshintrc b/.jshintrc new file mode 100644 index 000000000..7a6412586 --- /dev/null +++ b/.jshintrc @@ -0,0 +1,8 @@ +{ + "esversion": 11, + "undef": true, + "globals": { + "$": false, + "document": false, + } +} \ No newline at end of file diff --git a/VERSION b/VERSION index 006ffd9f6..94144784e 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ -1.54 +1.55 This file should not be modified. It is used by 4CAT to determine whether it needs to run migration scripts to e.g. update the database structure to a more diff --git a/backend/workers/llm_manager.py b/backend/workers/llm_manager.py new file mode 100644 index 000000000..1b3c62533 --- /dev/null +++ b/backend/workers/llm_manager.py @@ -0,0 +1,83 @@ +""" +Manage LLM models +""" +from backend.lib.worker import BasicWorker +from common.lib.llm.llm_client import LLMProviderClient + +class LLMProviderManager(BasicWorker): + """ + Manages LLM models + + Periodically refreshes the list of available models from an LLM provider. + Can also pull or delete models on demand when queued with a specific task. + + Job details: + - task: "refresh" (default), "pull", or "delete" + - provider: the URL of the LLM provider, as configured in the + llm.providers setting. if not given, run on all providers + + Job remote_id: + - For refresh: "manage-llm-refresh" (periodic) or "manage-llm-manual" (on-demand) + - For pull/delete: the model name to pull or delete + """ + type = "manage-llm" + max_workers = 1 + client = None + + @classmethod + def ensure_job(cls, config=None): + """ + Ensure the daily refresh job is always scheduled + + :return: Job parameters for the worker + """ + return {"remote_id": "manage-llm-refresh", "interval": 86400} + + def work(self): + task = self.job.details.get("task", "refresh") if self.job.details else "refresh" + provider = self.job.details.get("provider", "") if self.job.details else None + model_name = self.job.data["remote_id"] + available_models = None + + for provider_id, provider_config in self.config.get("llm.providers", {}).items(): + if provider and provider != provider_id: + continue + + try: + client = LLMProviderClient.get_client(self.config, provider_config) + except ValueError: + self.log.debug(f"{self.__class__.__name__}: invalid provider type: {provider_config['type']}, skipping") + continue + + # note that technically it is possible to pull/delete a model on + # multiple providers at once (if a model_name is defined but no + # provider). may not be a problem? may be useful one day? + success = False + if task == "pull" and hasattr(client, "pull_model"): + success = client.pull_model(model_name) + + elif task == "delete" and hasattr(client, "delete_model"): + success = client.delete_model(model_name) + + if success or task == "refresh": + # refresh models after pulling/deleting, or when asked to + if available_models is None: + available_models = {} + + for model in client.list_models(): + model = client.build_model_entry(model) + available_models[model["id"]] = model + + self.log.debug(f"{self.__class__.__name__}: ran task '{task}' (model name: {model_name or 'N/A'})") + + elif success is None: + self.log.warning(f"{self.__class__.__name__}: task '{task}' unknown or not supported by client") + else: + self.log.warning(f"{self.__class__.__name__}: task '{task}' failed for model {model_name}") + + if available_models is not None: + enabled_and_available = set(available_models.keys()) & set(self.config.get("llm.enabled_models", [])) + self.config.set("llm.available_models", available_models) + self.config.set("llm.enabled_models", list(enabled_and_available)) + + self.job.finish() diff --git a/backend/workers/refresh_items.py b/backend/workers/refresh_items.py index 8a56c213f..7ab11645d 100644 --- a/backend/workers/refresh_items.py +++ b/backend/workers/refresh_items.py @@ -1,78 +1,26 @@ """ Refresh items """ -import json - -import requests - from backend.lib.worker import BasicWorker class ItemUpdater(BasicWorker): """ Refresh 4CAT items - Refreshes settings that are dependent on external factors + Refreshes settings that are dependent on external factors. + LLM model refreshing is handled by the OllamaManager worker. """ type = "refresh-items" max_workers = 1 - @classmethod - def ensure_job(cls, config=None): - """ - Ensure that the refresher is always running - - This is used to ensure that the refresher is always running, and if it is - not, it will be started by the WorkerManager. - - :return: Job parameters for the worker - """ - return {"remote_id": "refresh-items", "interval": 60} + # ensure_job is intentionally disabled: this worker currently does nothing + # and would only create unnecessary job queue churn. Re-enable when work() + # has actual tasks to perform. + # @classmethod + # def ensure_job(cls, config=None): + # return {"remote_id": "refresh-items", "interval": 60} def work(self): - # Refresh items - self.refresh_settings() - + # Placeholder – no tasks implemented yet. self.job.finish() - - def refresh_settings(self): - """ - Refresh settings - """ - # LLM server settings - llm_provider = self.config.get("llm.provider_type", "none").lower() - llm_server = self.config.get("llm.server", "") - - # For now we only support the Ollama API - if llm_provider == "ollama" and llm_server: - headers = {"Content-Type": "application/json"} - llm_api_key = self.config.get("llm.api_key", "") - llm_auth_type = self.config.get("llm.auth_type", "") - if llm_api_key and llm_auth_type: - headers[llm_auth_type] = llm_api_key - - available_models = {} - try: - response = requests.get(f"{llm_server}/api/tags", headers=headers, timeout=10) - if response.status_code == 200: - settings = response.json() - for model in settings.get("models", []): - model = model["name"] - try: - model_metadata = requests.post(f"{llm_server}/api/show", headers=headers, json={"model": model}, timeout=10).json() - available_models[model] = { - "name": f"{model_metadata['model_info'].get('general.basename', model)} ({model_metadata['details']['parameter_size']} parameters)", - "model_card": f"https://ollama.com/library/{model}", - "provider": "local" - } - - except (requests.RequestException, json.JSONDecodeError, KeyError) as e: - self.log.debug(f"Could not get metadata for model {model} from Ollama - skipping (error: {e})") - - self.config.set("llm.available_models", available_models) - self.log.debug("Refreshed LLM server settings cache") - else: - self.log.warning(f"Could not refresh LLM server settings cache - server returned status code {response.status_code}") - - except requests.RequestException as e: - self.log.warning(f"Could not refresh LLM server settings cache - request error: {str(e)}") \ No newline at end of file diff --git a/common/assets/llms.json b/common/assets/llms.json index 835dbaa09..c17351488 100644 --- a/common/assets/llms.json +++ b/common/assets/llms.json @@ -1,128 +1,140 @@ -{ - "none": { - "name": "", - "model_card": "", - "provider": "", - "default": true - }, - "custom": { - "name": "[custom]", - "model_card": "", - "provider": "" - }, - "gpt-5.4": { - "name": "[OpenAI] GPT-5.4", - "model_card": "https://platform.openai.com/docs/models/gpt-5.4", - "provider": "openai" - }, - "gpt-5-mini": { - "name": "[OpenAI] GPT-5 mini", - "model_card": "https://platform.openai.com/docs/models/gpt-5-mini", - "provider": "openai" - }, - "gpt-5-nano": { - "name": "[OpenAI] GPT-5 nano", - "model_card": "https://platform.openai.com/docs/models/gpt-5-nano", - "provider": "openai" - }, - "gpt-5.4-pro": { - "name": "[OpenAI] GPT-5.4 Pro", - "model_card": "https://platform.openai.com/docs/models/gpt-5.4-pro", - "provider": "openai" - }, - "gpt-4.1-mini": { - "name": "[OpenAI] GPT-4.1 mini", - "model_card": "https://platform.openai.com/docs/models/gpt-4.1-mini", - "provider": "openai" - }, - "gpt-4.1-nano": { - "name": "[OpenAI] GPT-4.1 nano", - "model_card": "https://platform.openai.com/docs/models/gpt-4.1-nano", - "provider": "openai" - }, - "gpt-4.1": { - "name": "[OpenAI] GPT-4.1", - "model_card": "https://platform.openai.com/docs/models/gpt-4.1", - "provider": "openai" - }, - "gpt-4o-mini": { - "name": "[OpenAI] GPT-4o mini", - "model_card": "https://platform.openai.com/docs/models/gpt-4o-mini", - "provider": "openai" - }, - "gpt-4o": { - "name": "[OpenAI] GPT-4o", - "model_card": "https://platform.openai.com/docs/models/gpt-4o", - "provider": "openai" - }, - "gemini-3.1-pro-preview": { - "name": "[Google] Gemini 3.1 Pro", - "model_card": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-1-pro", - "provider": "google" - }, - "gemini-3-flash-preview": { - "name": "[Google] Gemini 3 Flash", - "model_card": "https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-flash", - "provider": "google" - }, - "gemini-3.1-flash-lite-preview": { - "name": "[Google] Gemini 3.1 Flash Lite", - "provider": "google", - "model_card": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-1-flash-lite" - }, - "claude-opus-4-6": { - "name": "[Anthropic] Claude Opus 4.6 (latest)", - "model_card": "https://www.anthropic.com/claude/opus", - "provider": "anthropic" - }, - "claude-sonnet-4-6": { - "name": "[Anthropic] Claude Sonnet 4.6 (latest)", - "model_card": "https://www.anthropic.com/claude/sonnet", - "provider": "anthropic" - }, - "claude-4-5-haiku": { - "name": "[Anthropic] Claude 4.5 Haiku (latest)", - "model_card": "https://www.anthropic.com/claude/haiku", - "provider": "anthropic" - }, - "magistral-small-2509": { - "name": "[Mistral] Magistral Small 1.2 (25.09)", - "model_card": "https://docs.mistral.ai/models/magistral-small-1-2-25-09", - "provider": "mistral" - }, - "magistral-medium-2509": { - "name": "[Mistral] Magistral Medium 1.2 (25.09)", - "model_card": "https://docs.mistral.ai/models/magistral-medium-1-2-25-09", - "provider": "mistral" - }, - "mistral-small-2506": { - "name": "[Mistral] Mistral Small 3.2 (25.06)", - "model_card": "https://docs.mistral.ai/models/mistral-small-3-2-25-06", - "provider": "mistral" - }, - "mistral-medium-2508": { - "name": "[Mistral] Mistral Medium 3.1 (25.08)", - "model_card": "mistral-medium-2508", - "provider": "mistral" - }, - "mistral-large-2512": { - "name": "[Mistral] Mistral Large 3 (25.12)", - "model_card": "https://docs.mistral.ai/models/mistral-large-3-25-12", - "provider": "mistral" - }, - "open-mistral-nemo-2407": { - "name": "[Mistral] Mistral Nemo 12B", - "model_card": "https://docs.mistral.ai/models/mistral-nemo-12b-24-07", - "provider": "mistral" - }, - "deepseek-chat": { - "name": "[DeepSeek] DeepSeek latest (non-reasoning)", - "model_card": "https://api-docs.deepseek.com/quick_start/pricing", - "provider": "deepseek" - }, - "deepseek-reasoner": { - "name": "[DeepSeek] DeepSeek latest (reasoning)", - "model_card": "https://api-docs.deepseek.com/quick_start/pricing", - "provider": "deepseek" - } -} \ No newline at end of file +[ + { + "model": "gpt-5.4", + "name": "[OpenAI] GPT-5.4", + "model_card": "https://platform.openai.com/docs/models/gpt-5.4", + "provider": "openai" + }, + { + "model": "gpt-5-mini", + "name": "[OpenAI] GPT-5 mini", + "model_card": "https://platform.openai.com/docs/models/gpt-5-mini", + "provider": "openai" + }, + { + "model": "gpt-5-nano", + "name": "[OpenAI] GPT-5 nano", + "model_card": "https://platform.openai.com/docs/models/gpt-5-nano", + "provider": "openai" + }, + { + "model": "gpt-5.4-pro", + "name": "[OpenAI] GPT-5.4 Pro", + "model_card": "https://platform.openai.com/docs/models/gpt-5.4-pro", + "provider": "openai" + }, + { + "model": "gpt-4.1-mini", + "name": "[OpenAI] GPT-4.1 mini", + "model_card": "https://platform.openai.com/docs/models/gpt-4.1-mini", + "provider": "openai" + }, + { + "model": "gpt-4.1-nano", + "name": "[OpenAI] GPT-4.1 nano", + "model_card": "https://platform.openai.com/docs/models/gpt-4.1-nano", + "provider": "openai" + }, + { + "model": "gpt-4.1", + "name": "[OpenAI] GPT-4.1", + "model_card": "https://platform.openai.com/docs/models/gpt-4.1", + "provider": "openai" + }, + { + "model": "gpt-4o-mini", + "name": "[OpenAI] GPT-4o mini", + "model_card": "https://platform.openai.com/docs/models/gpt-4o-mini", + "provider": "openai" + }, + { + "model": "gpt-4o", + "name": "[OpenAI] GPT-4o", + "model_card": "https://platform.openai.com/docs/models/gpt-4o", + "provider": "openai" + }, + { + "model": "gemini-3.1-pro-preview", + "name": "[Google] Gemini 3.1 Pro", + "model_card": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-1-pro", + "provider": "google" + }, + { + "model": "gemini-3-flash-preview", + "name": "[Google] Gemini 3 Flash", + "model_card": "https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-flash", + "provider": "google" + }, + { + "model": "gemini-3.1-flash-lite-preview", + "name": "[Google] Gemini 3.1 Flash Lite", + "provider": "google", + "model_card": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-1-flash-lite" + }, + { + "model": "claude-opus-4-6", + "name": "[Anthropic] Claude Opus 4.6 (latest)", + "model_card": "https://www.anthropic.com/claude/opus", + "provider": "anthropic" + }, + { + "model": "claude-sonnet-4-6", + "name": "[Anthropic] Claude Sonnet 4.6 (latest)", + "model_card": "https://www.anthropic.com/claude/sonnet", + "provider": "anthropic" + }, + { + "model": "claude-4-5-haiku", + "name": "[Anthropic] Claude 4.5 Haiku (latest)", + "model_card": "https://www.anthropic.com/claude/haiku", + "provider": "anthropic" + }, + { + "model": "magistral-small-2509", + "name": "[Mistral] Magistral Small 1.2 (25.09)", + "model_card": "https://docs.mistral.ai/models/magistral-small-1-2-25-09", + "provider": "mistral" + }, + { + "model": "magistral-medium-2509", + "name": "[Mistral] Magistral Medium 1.2 (25.09)", + "model_card": "https://docs.mistral.ai/models/magistral-medium-1-2-25-09", + "provider": "mistral" + }, + { + "model": "mistral-small-2506", + "name": "[Mistral] Mistral Small 3.2 (25.06)", + "model_card": "https://docs.mistral.ai/models/mistral-small-3-2-25-06", + "provider": "mistral" + }, + { + "model": "mistral-medium-2508", + "name": "[Mistral] Mistral Medium 3.1 (25.08)", + "model_card": "mistral-medium-2508", + "provider": "mistral" + }, + { + "model": "mistral-large-2512", + "name": "[Mistral] Mistral Large 3 (25.12)", + "model_card": "https://docs.mistral.ai/models/mistral-large-3-25-12", + "provider": "mistral" + }, + { + "model": "open-mistral-nemo-2407", + "name": "[Mistral] Mistral Nemo 12B", + "model_card": "https://docs.mistral.ai/models/mistral-nemo-12b-24-07", + "provider": "mistral" + }, + { + "model": "deepseek-chat", + "name": "[DeepSeek] DeepSeek latest (non-reasoning)", + "model_card": "https://api-docs.deepseek.com/quick_start/pricing", + "provider": "deepseek" + }, + { + "model": "deepseek-reasoner", + "name": "[DeepSeek] DeepSeek latest (reasoning)", + "model_card": "https://api-docs.deepseek.com/quick_start/pricing", + "provider": "deepseek" + } +] \ No newline at end of file diff --git a/common/lib/config_definition.py b/common/lib/config_definition.py index aef363e04..0635a4c97 100644 --- a/common/lib/config_definition.py +++ b/common/lib/config_definition.py @@ -9,6 +9,7 @@ """ from common.lib.user_input import UserInput +import re config_definition = { "datasources.intro": { @@ -575,61 +576,87 @@ # allows 4CAT LLM processors to connect to a local or remote LLM server "llm.intro": { "type": UserInput.OPTION_INFO, - "help": "4CAT LLM processors allow users to utilize common APIs (e.g. OpenAI, Google, Anthropic) as well as connect " - "to local or remote LLM servers. You can also set up your own LLM server using open source software such as " - "[Ollama](https://ollama.com/) and connect 4CAT to it using the settings below for your users." - }, - "llm.host_name": { - "type": UserInput.OPTION_TEXT, - "default": "4CAT LLM Server", - "help": "Name of LLM Server in UI", - "tooltip": "The name that will be shown to users in the interface when selecting an LLM server (or API or custom).", - "global": True - }, - "llm.provider_type": { - "type": UserInput.OPTION_CHOICE, - "help": "LLM Provider Type", - "default": "none", - "options": { - "ollama": "Ollama", - "none": "None", + "help": "4CAT LLM processors allow users to utilize common APIs (e.g. OpenAI, Google, Anthropic) as well as " + "connect to local or remote LLM servers. You can also set up your own LLM server using open source " + "software such as [Ollama](https://ollama.com/) and connect 4CAT to it using the settings below for " + "your users. After configuring providers you can enable and disable available models via the 'LLMs & " + "Providers' page in the Control Panel." + }, + "llm.providers": { + "type": UserInput.OPTION_MULTI_OPTION, + "default": { + "thirdparty-models": { + "name": "Third-party APIs (OpenAI, Google, Claude, Mistral, etc)", + "type": "api", + "url": "", + "auth_header": "", + "auth_key": "" + } }, "global": True, - }, - "llm.server": { - "type": UserInput.OPTION_TEXT, - "default": "", - "help": "LLM Server URL", - "tooltip": "The URL of the LLM server, e.g. http://localhost:5000", - "global": True - }, - "llm.auth_type": { - "type": UserInput.OPTION_TEXT, - "help": "LLM Server Authentication Type", - "default": "", - "tooltip": "The authentication type required to connect to the server (e.g. 'X-API-KEY', 'Authorization'). Passed in the request header with the API key.", - "global": True, - }, - "llm.api_key": { - "type": UserInput.OPTION_TEXT, - "default": "", - "help": "LLM Server API Key", - "tooltip": "The API key to access the LLM server, if required.", - "global": True + "help": "LLM providers", + "dict_key": lambda v: re.sub(r"[^0-9a-zA-Z ]", "", v["name"]).lower().replace(" ", "-") + (("-" + v["url"].split("/")[2].lower()) if "://" in v["url"] else ""), + "options": { + "name": { + "type": UserInput.OPTION_TEXT, + "default": "", + "help": "Name of LLM Server in UI", + "tooltip": "The name that will be shown to users in the interface when selecting an LLM server (or API or custom).", + }, + "type": { + "type": UserInput.OPTION_CHOICE, + "help": "LLM Provider Type", + "default": "none", + "options": { + "ollama": "Ollama", + "litellm": "LiteLLM", + "openai-like": "OpenAI compatible API (LM Studio, vLLM, etc)", + "api": "Third-party models from OpenAI, Anthropic, Mistral, etc", + "none": "None", + }, + }, + "url": { + "type": UserInput.OPTION_TEXT, + "default": "", + "help": "LLM Server URL", + "tooltip": "The URL of the LLM server, e.g. http://localhost:5000. Must start with a schema (e.g. 'https://').", + }, + "auth_header": { + "type": UserInput.OPTION_TEXT, + "help": "Authentication Header", + "default": "", + "tooltip": "The HTTP header used to authenticate with the server (e.g. 'X-API-KEY', 'Authorization'). Passed with the Authentication Key as value.", + }, + "auth_key": { + "type": UserInput.OPTION_TEXT, + "default": "", + "help": "Authentication Key", + "tooltip": "The API key to access the LLM server, if required.", + }, + } }, "llm.available_models": { "type": UserInput.OPTION_TEXT_JSON, "default": {}, "help": "Available LLM models", - "tooltip": "A JSON dictionary of available LLM models on the server. 4CAT will query the LLM server for available models periodically.", + "tooltip": "A JSON dictionary of available LLM models on the server. Refreshed daily by the OllamaManager worker.", + "indirect": True, + "global": True + }, + "llm.enabled_models": { + "type": UserInput.OPTION_TEXT_JSON, + "default": [], + "help": "Enabled LLM models", + "tooltip": "List of model keys enabled for use. Managed via the LLM Server settings panel.", "indirect": True, "global": True }, "llm.access": { "type": UserInput.OPTION_TOGGLE, - "help": "LLM Access", + "help": "Local LLM Access", "default": False, - "tooltip": "Use tags or individual users to allow access to the LLM server (or set True in global for all).", + "tooltip": "If disabled, can only use LLMs from the 'Third-party models' provider. Can be configured per user " + "or tag.", }, # TODO: add setting to restrict models per user/group? @@ -739,5 +766,5 @@ "proxies": "Proxied HTTP requests", "image-visuals": "Image visualization", "extensions": "Extensions", - "llm": "LLM Server Settings" + "llm": "LLM Providers" } diff --git a/common/lib/llm/__init__.py b/common/lib/llm/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/common/lib/llm.py b/common/lib/llm/adapter.py similarity index 55% rename from common/lib/llm.py rename to common/lib/llm/adapter.py index 0901194d1..8e4c7bc26 100644 --- a/common/lib/llm.py +++ b/common/lib/llm/adapter.py @@ -1,9 +1,10 @@ import json import base64 import mimetypes -import requests + from pathlib import Path from typing import List, Optional, Union + from pydantic import SecretStr from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage from langchain_core.language_models.chat_models import BaseChatModel @@ -18,111 +19,86 @@ class LLMAdapter: def __init__( self, - provider: str, - model: str, + config, + model, api_key: Optional[str] = None, - base_url: Optional[str] = None, temperature: float = 0.1, max_tokens: int = 1000, client_kwargs: Optional[dict] = None, ): """ - provider: 'openai', 'google', 'mistral', 'ollama', 'lmstudio', 'anthropic', 'deepseek' - model: model name (e.g., 'gpt-4o-mini', 'claude-3-opus', 'mistral-small', etc.) - api_key: API key if required (OpenAI, Claude, Google, Mistral) - base_url: for local models or Mistral custom endpoints - temperature: temperature hyperparameter, - max_tokens: how many output tokens may be used - client_kwargs: additional client parameters + Instantiate an adapter to interface with an LLM model + + :param config: 4CAT config reader + :param model: Model metadata (as in `llm.available_models` 4CAT setting) + :param api_key: API key, if needed + :param temperature: Temperature hyperparameter + :param max_tokens: Max tokens to generate + :param client_kwargs: Optional parameters for the LLM adapter class """ - self.provider = provider.lower() + known_providers = config.get("llm.providers", {}) + self.model = model + self.provider = known_providers.get(model['provider']) self.api_key = api_key - self.base_url = base_url self.temperature = temperature self.structured_output = False self.parser = None self.max_tokens = max_tokens self.client_kwargs = dict(client_kwargs) if client_kwargs else {} + self.llm: BaseChatModel = self._load_llm() def _load_llm(self) -> BaseChatModel: - if self.provider == "openai": - kwargs = {} - if "o3" not in self.model: - kwargs["temperature"] = self.temperature # temperature not supported for all models - return ChatOpenAI( - model=self.model, - api_key=SecretStr(self.api_key), - base_url=self.base_url or "https://api.openai.com/v1", - max_tokens=self.max_tokens, - **kwargs - ) - elif self.provider == "google": - return ChatGoogleGenerativeAI( - model=self.model, - temperature=self.temperature, - google_api_key=self.api_key, - max_tokens=self.max_tokens - ) - elif self.provider == "anthropic": - return ChatAnthropic( - model_name=self.model, - temperature=self.temperature, - api_key=SecretStr(self.api_key), - max_tokens=self.max_tokens, - timeout=100, - stop=None - ) - elif self.provider == "mistral": - return ChatMistralAI( - model_name=self.model, - temperature=self.temperature, - api_key=SecretStr(self.api_key), - base_url=self.base_url, # Optional override - max_tokens=self.max_tokens, - ) - elif self.provider == "deepseek": - return ChatDeepSeek( - model=self.model, - temperature=self.temperature, - api_key=SecretStr(self.api_key), - base_url=self.base_url, - max_tokens=self.max_tokens if self.max_tokens <= 8192 else 8192, - ) - elif self.provider == "ollama": - ollama_adapter = ChatOllama( - model=self.model, - temperature=self.temperature, - base_url=self.base_url or "http://localhost:11434", - max_tokens=self.max_tokens, - client_kwargs=self.client_kwargs - ) - self.model = ollama_adapter.model - return ollama_adapter - elif self.provider in {"vllm", "lmstudio"}: - # OpenAI-compatible local servers - if self.provider == "lmstudio" and not self.api_key: - self.api_key = "lm-studio" - - # For vLLM, query the server to get the actual model name. We can't leave this empty, unfortunately. - if self.provider == "vllm" and self.model=="vllm_model": - model_name = self.get_vllm_model_name(self.base_url, self.api_key) - self.model = model_name - else: - model_name = self.model if self.model else "lmstudio-model" - - llm = ChatOpenAI( - model=model_name, - temperature=self.temperature, - api_key=SecretStr(self.api_key), - base_url=self.base_url, - max_tokens=self.max_tokens, - ) - self.model = llm.model_name - return llm + chat_params = { + "model": self.model["local_id"], + "api_key": SecretStr(self.api_key), + "base_url": self.provider["url"], + "max_tokens": self.max_tokens, + "temperature": self.temperature, + } + + if self.provider["type"] == "openai": + if "o3" in self.model: + del chat_params["temperature"] + adapter_class = ChatOpenAI + + elif self.provider["type"] == "google": + adapter_class = ChatGoogleGenerativeAI + + elif self.provider["type"] == "anthropic": + chat_params.update({"timeout": 100, "stop": None}) + adapter_class = ChatAnthropic + + elif self.provider["type"] == "mistral": + adapter_class = ChatMistralAI + + elif self.provider["type"] == "deepseek": + chat_params["max_tokens"] = min(self.max_tokens, 8192) + adapter_class = ChatDeepSeek + + elif self.provider["type"] == "ollama": + adapter_class = ChatOllama + chat_params.update({"client_kwargs": self.client_kwargs}) + + elif self.provider["type"] in {"litellm", "openai-like"}: + url = f"{self.provider['url']}/" if not self.provider["url"].endswith("/") else self.provider['url'] + url += "v1/" if not url.endswith("v1/") else "" + + chat_params.update({"base_url": url}) + if self.provider["auth_header"]: + chat_params.update({ + "default_headers": { + self.provider["auth_header"]: self.provider["auth_key"] + } + }) + + adapter_class = ChatOpenAI + else: - raise ValueError(f"Unsupported LLM provider: {self.provider}") + raise ValueError(f"{self.__class__.__name__} Unsupported LLM provider type: {self.provider['type']}") + + return adapter_class(**chat_params) def generate_text( self, @@ -161,7 +137,8 @@ def generate_text( lc_messages = messages kwargs = {"temperature": temperature} - if self.provider in ("google", "ollama") or "o3" in self.model or "gpt-5" in self.model: + if self.provider["type"] in ("google", "ollama") or "o3" in self.model["local_id"] or "gpt-5" in self.model[ + "local_id"]: kwargs = {} try: @@ -172,10 +149,10 @@ def generate_text( return response def create_multimodal_content( - self, - text: str, - media_urls: Optional[List[str]] = None, - media_files: Optional[List[Union[str, Path]]] = None, + self, + text: str, + media_urls: Optional[List[str]] = None, + media_files: Optional[List[Union[str, Path]]] = None, ) -> List[dict]: """ Create multimodal content structure for LangChain messages with media URLs @@ -224,11 +201,11 @@ def create_multimodal_content( return content def _format_media_block( - self, - url: Optional[str] = None, - b64_data: Optional[str] = None, - mime_type: str = "image/jpeg", - media_category: str = "image", + self, + url: Optional[str] = None, + b64_data: Optional[str] = None, + mime_type: str = "image/jpeg", + media_category: str = "image", ) -> dict: """ Format a single media block for the appropriate provider. @@ -239,7 +216,7 @@ def _format_media_block( :param media_category: "image", "video", or "audio" :returns: Provider-formatted content block """ - if self.provider == "anthropic": + if self.provider["type"] == "anthropic": if media_category == "image": if url: return {"type": "image", "source": {"type": "url", "url": url}} @@ -255,13 +232,13 @@ def _format_media_block( return {"type": "document", "source": { "type": "base64", "media_type": mime_type, "data": b64_data }} - elif self.provider == "google": + elif self.provider["type"] == "google": if url: return {"type": "image_url", "image_url": {"url": url}} else: data_uri = f"data:{mime_type};base64,{b64_data}" return {"type": "image_url", "image_url": {"url": data_uri}} - elif self.provider == "ollama": + elif self.provider["type"] == "ollama": if media_category != "image": raise ValueError(f"Ollama provider only supports image media, got category '{media_category}'") if url: @@ -281,7 +258,7 @@ def _format_media_block( return {"type": "image_url", "image_url": {"url": url}} else: data_uri = f"data:{mime_type};base64,{b64_data}" - if media_category == "audio" and self.provider == "openai": + if media_category == "audio" and self.provider["type"] == "openai": return {"type": "input_audio", "input_audio": { "data": b64_data, "format": mime_type.split("/")[-1] }} @@ -297,38 +274,13 @@ def set_structure(self, json_schema): json.dumps(json_schema) # To validate / raise an error # LM Studio needs some more guidance - if self.provider == "lmstudio": + if self.provider["type"] == "lmstudio": json_schema = {"type": "json_schema", "json_schema": {"schema": json_schema}} self.llm = self.llm.bind(response_format=json_schema) else: self.llm = self.llm.with_structured_output(json_schema) self.structured_output = True - @staticmethod - def get_model_options(config) -> dict: - """ - Returns model choice options for UserInput - """ - models = LLMAdapter.get_models(config) - if not models: - return {} - options = {model_id: model_values["name"] for model_id, model_values in models.items()} - return options - - @staticmethod - def get_model_providers(config) -> dict: - """ - Returns available model providers through APIs - """ - models = LLMAdapter.get_models(config) - if not models: - return {} - providers = list(set([model_values.get("provider", "") for model_values in models.values()])) - if not providers: - return {} - options = {provider: provider.capitalize() for provider in providers if provider} - return options - @staticmethod def get_models(config) -> dict: """ @@ -337,36 +289,6 @@ def get_models(config) -> dict: :returns dict, A dict with model IDs as keys and details as values """ - with ( - config.get("PATH_ROOT") - .joinpath("common/assets/llms.json") - .open() as available_models - ): - available_models = json.loads(available_models.read()) - return available_models - - - @staticmethod - def get_vllm_model_name(base_url: str, api_key: str = None) -> str: - """ - Query vLLM server to get the name of the served model. - """ - - try: - # vLLM exposes available models at /v1/models endpoint - models_url = f"{base_url.rstrip('/')}/models" - headers = {} - if api_key: - headers["Authorization"] = f"Bearer {api_key}" - - response = requests.get(models_url, headers=headers, timeout=10) - response.raise_for_status() - models_data = response.json() - - # Get the first available model - if models_data.get("data") and len(models_data["data"]) > 0: - return models_data["data"][0]["id"] - else: - raise ValueError("No models found on vLLM server") - except Exception as e: - raise ValueError(f"Could not retrieve model name from vLLM server: {e}") + available_models = config.get("llm.available_models", {}) + enabled_models = config.get("llm.enabled_models", {}) + return {k: v for k, v in available_models.items() if k in enabled_models} diff --git a/common/lib/llm/clients/__init__.py b/common/lib/llm/clients/__init__.py new file mode 100644 index 000000000..4287ca861 --- /dev/null +++ b/common/lib/llm/clients/__init__.py @@ -0,0 +1 @@ +# \ No newline at end of file diff --git a/common/lib/llm/clients/litellm_client.py b/common/lib/llm/clients/litellm_client.py new file mode 100644 index 000000000..cf65497ff --- /dev/null +++ b/common/lib/llm/clients/litellm_client.py @@ -0,0 +1,60 @@ +""" +Centralized HTTP client for communicating with a LiteLLM server. + +This class owns all direct HTTP calls to LiteLLM's REST API and provides shared +static helpers for capability parsing, display-name formatting, and building +canonical llm.available_models entries. It is a plain helper with no 4CAT +base-class dependency. + +This class is primarily intended for interfacing with LiteLLM, but since +LiteLLM itself is mostly OpenAI API-compatible, this can be used to interface +with the OpenAI API as well. +""" +from common.lib.llm.llm_client import LLMProviderClient + +class LiteLLMClient(LLMProviderClient): + type = "litellm" + + _models_info_path = "/model/info" + _models_info_key = "data" + _model_id_key = "model_name" + + def parse_supported_media_types(self, meta: dict) -> list[str]: + """ + Derive the media types a model supports from its LiteLLM metadata. + + :param meta: ``model info`` response dict, or ``None``. + :returns: Ordered list of supported media type strings. + Returns ``[]`` when ``meta`` is ``None`` + """ + if meta is None or not meta.get("model_info"): + return [] + + media_types = {"text"} # far as I can tell, text is always supported + if meta["model_info"].get("supports_vision"): + media_types.add("image") + + if meta["model_info"].get("supports_audio_input"): + media_types.add("audio") + + # no way to tell if model supports embeddings input as far as I can see... + + return list(media_types) + + def format_display_name(self, meta: dict) -> str: + """ + Build a human-readable display name for a model. + + :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``). + :param meta: ``/api/show`` response dict, or ``None``. + :returns: Human-readable display name string. + """ + model_name = self.get_global_model_id(meta) + + if meta.get("model_name"): + model_name = meta["model_name"] + + if meta["litellm_params"].get("model"): + model_name = "/".join(meta["litellm_params"].get("model").split("/")[1:]) + + return model_name \ No newline at end of file diff --git a/common/lib/llm/clients/ollama_client.py b/common/lib/llm/clients/ollama_client.py new file mode 100644 index 000000000..e21297448 --- /dev/null +++ b/common/lib/llm/clients/ollama_client.py @@ -0,0 +1,182 @@ +""" +Centralized HTTP client for communicating with an Ollama server. + +This class owns all direct HTTP calls to Ollama's REST API and provides shared static +helpers for capability parsing, display-name formatting, and building canonical +llm.available_models entries. It is a plain helper with no 4CAT base-class dependency. +""" +import requests + +from common.lib.llm.llm_client import LLMProviderClient + + +class OllamaClient(LLMProviderClient): + type = "ollama" + + _models_info_path = "/api/tags" + _models_info_key = "models" + _model_id_key = "model" + + def list_models(self) -> list[dict]: + """ + List all models available. + + For Ollama, get some additional model info via an extra API request. + + :return list[dict]: List of models available.: + """ + models = super().list_models() + result = [] + for model in models: + try: + model_info = self._session.post( + f"{self.base_url}/api/show", + json={"model": model[self._model_id_key]}, + headers=self._headers, + timeout=self.timeout, + ).json() + result.append({**model, "metadata": model_info}) + except (requests.exceptions.HTTPError, KeyError) as e: + self.log.warning( + f"{self.__class__.__name__}: failed to fetch additional model info for model {model[self._model_id_key]}: {e}") + + return result + + + + def parse_supported_media_types(self, meta: dict) -> list[str]: + """Derive the media types a model supports from its Ollama metadata. + + **Primary path**: reads ``meta["capabilities"]``: + - ``"completion"`` → ``"text"`` + - ``"vision"`` → ``"image"`` + - ``"embedding"`` → ``"embedding"`` + + **Fallback path** (used when capabilities are absent or only yield ``"text"``): + inspects GGUF ``model_info`` / ``details`` for vision signals and adds + ``"image"`` if any are found. + + :param meta: ``/api/show`` response dict, or ``None``. + :returns: Ordered list of supported media type strings. + Returns ``[]`` when ``meta`` is ``None`` (unknown — callers + should include the model, not block it). + """ + if meta is None or not meta.get("metadata"): + return [] + + capabilities = meta["metadata"].get("capabilities", []) + media_types: list[str] = [] + + _cap_map = { + "completion": "text", + "vision": "image", + "embedding": "embedding", + } + for cap in capabilities: + mapped = _cap_map.get(cap) + if mapped and mapped not in media_types: + media_types.append(mapped) + + # Fallback: GGUF-level vision signals when capabilities list gives no image info + if "image" not in media_types: + details = meta.get("details", {}) + model_info = meta.get("model_info", {}) + projector_info = meta.get("projector_info") + + has_clip_family = "clip" in (details.get("families") or []) + has_vision_keys = any(k.startswith("vision.") for k in model_info) + has_projector = bool(projector_info) + + if has_clip_family or has_vision_keys or has_projector: + media_types.append("image") + + return media_types + + def format_display_name(self, meta: dict) -> str: + """ + Build a human-readable display name for a model. + + :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``). + :param meta: ``/api/show`` response dict, or ``None``. + :returns: Human-readable display name string. + """ + model_name = self.get_model_id(meta) + + extra_bits = [] + if meta.get("metadata") and meta["metadata"].get("model_info"): + more_meta = meta["metadata"]["model_info"] + if more_meta.get("general.basename"): + model_name = more_meta["general.basename"] + + if more_meta.get("general.finetune"): + extra_bits.append(more_meta["general.finetune"]) + + if more_meta.get("general.size_label"): + extra_bits.append(more_meta["general.size_label"]) + + elif meta.get("details") and meta["details"].get("parameter_size"): + extra_bits.append(f"{meta['details']['parameter_size']} parameters") + + model_name += f" ({', '.join(extra_bits)})" + + return model_name + + def get_model_card_url(self, meta: dict) -> str: + """ + Get a URL for a model card for a given model + + :param meta: Model metadata + :return str: Model card URL (empty string if unavailable) + """ + return f"https://ollama.com/library/{meta['model']}" + + def pull_model(self, model_id: str, stream: bool = False) -> bool: + """Pull a model from the Ollama registry. + + :param model_id: Model name (e.g. ``"llama3:8b"``). + :param stream: Whether to stream the response (default ``False``). + :returns: ``True`` on success, ``False`` on failure. + """ + try: + r = self._session.post( + f"{self.base_url}/api/pull", + headers=self._headers, + json={"model": model_id, "stream": stream}, + timeout=600, + ) + + if r.status_code != 200 and self.log: + self.log.warning( + f"{self.__class__.__name__}: failed to pull model {model_id} from {self.base_url}, status code {r.status_code}: {r.text}") + + return r.status_code == 200 + + except requests.RequestException as e: + if self.log: + self.log.warning( + f"{self.__class__.__name__}: failed to pull model {model_id} from {self.base_url}: {e}") + + return False + + def delete_model(self, model_id: str) -> bool: + """Delete a model from the Ollama server. + + :param model_id: Model name (e.g. ``"llama3:8b"``). + :returns: ``True`` on success, ``False`` on failure. + """ + try: + r = self._session.delete( + f"{self.base_url}/api/delete", + headers=self._headers, + json={"model": model_id}, + timeout=30, + ) + if r.status_code != 200 and self.log: + self.log.warning( + f"{self.__class__.__name__}: failed to delete model {model_id} from {self.base_url}, status code {r.status_code}: {r.text}") + return r.status_code == 200 + except requests.RequestException as e: + if self.log: + self.log.warning( + f"{self.__class__.__name__}: failed to delete model {model_id} from {self.base_url}: {e}") + return False diff --git a/common/lib/llm/clients/openai_client.py b/common/lib/llm/clients/openai_client.py new file mode 100644 index 000000000..f8701dd7c --- /dev/null +++ b/common/lib/llm/clients/openai_client.py @@ -0,0 +1,61 @@ +""" +Centralized HTTP client for communicating with an OpenAI compatible server. + +This class owns all direct HTTP calls to an OpenAI style REST API and provides shared +static helpers for capability parsing, display-name formatting, and building +canonical llm.available_models entries. It is a plain helper with no 4CAT +base-class dependency. +""" +from common.lib.llm.llm_client import LLMProviderClient + + +class LMStudioClient(LLMProviderClient): + type = "openai-like" + + _models_info_path = "/api/v1/models" + _models_info_key = "models" + _model_id_key = "key" + + def parse_supported_media_types(self, meta: dict) -> list[str]: + """ + Derive the media types a model supports from its LiteLLM metadata. + + :param meta: ``model info`` response dict, or ``None``. + :returns: Ordered list of supported media type strings. + Returns ``[]`` when ``meta`` is ``None`` + """ + media_types = {"text"} # far as I can tell, text is always supported + + if meta is None or not meta.get("capabilities"): + return list(media_types) + + if meta["capabilities"].get("vision"): + media_types.add("image") + + # no way to tell if model supports embeddings input as far as I can see... + + return list(media_types) + + def format_display_name(self, meta: dict) -> str: + """ + Build a human-readable display name for a model. + + :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``). + :param meta: ``/api/show`` response dict, or ``None``. + :returns: Human-readable display name string. + """ + model_name = self.get_model_id(meta) + + if meta.get("display_name"): + model_name = meta["display_name"] + + extra_bits = [] + if meta.get("publisher"): + extra_bits.append(meta["publisher"]) + + if meta.get("params_string"): + extra_bits.append(meta["params_string"]) + + model_name += f" ({', '.join(extra_bits)})" + + return model_name diff --git a/common/lib/llm/clients/thirdparty_client.py b/common/lib/llm/clients/thirdparty_client.py new file mode 100644 index 000000000..2a2db4dc3 --- /dev/null +++ b/common/lib/llm/clients/thirdparty_client.py @@ -0,0 +1,66 @@ +""" +Fake 'client' to read from local store of known 3d party, API-based LLMs that +can be used with 4CAT +""" +import json + +from common.lib.llm.llm_client import LLMProviderClient + + +class ThirdPartyClient(LLMProviderClient): + type = "api" + + _models_info_key = "models" + _model_id_key = "model" + + def get_status(self): + return 200 + + def list_models(self) -> dict: + with self.config.get("PATH_ROOT").joinpath("common/assets/llms.json").open() as infile: + models = json.load(infile) + + return models + + def parse_supported_media_types(self, meta: dict) -> list[str]: + """ + Derive the media types a model supports from its LiteLLM metadata. + + :param meta: ``model info`` response dict, or ``None``. + :returns: Ordered list of supported media type strings. + Returns ``[]`` when ``meta`` is ``None`` + """ + return meta.get("supported_media_types", ["text"]) + + def format_display_name(self, meta: dict) -> str: + """ + Build a human-readable display name for a model. + + :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``). + :param meta: ``/api/show`` response dict, or ``None``. + :returns: Human-readable display name string. + """ + return meta["name"] + + def build_model_entry(self, meta: dict) -> dict: + """ + Build a canonical ``llm.available_models`` entry for a model. + + :param model_id: Raw model identifier. + :param display_name: Human-readable name (from ``format_display_name``). + :param meta: ``/api/show`` response dict, or ``None`` if unavailable. + :returns: Dict ready to store under ``llm.available_models[model_id]``. + """ + entry = super().build_model_entry(meta) + entry["provider_key"] = meta["provider"] + + return entry + + def get_model_card_url(self, meta: dict) -> str: + """ + Get a URL for a model card for a given model + + :param meta: Model metadata + :return str: Model card URL (empty string if unavailable) + """ + return meta["model_card"] if meta["model_card"] else "" diff --git a/common/lib/llm/llm_client.py b/common/lib/llm/llm_client.py new file mode 100644 index 000000000..b31cb035a --- /dev/null +++ b/common/lib/llm/llm_client.py @@ -0,0 +1,194 @@ +""" +Centralized HTTP client for communicating with an LLM provider. + +This class owns all direct HTTP calls to the provider's REST API and provides +shared static helpers for capability parsing, display-name formatting, and +building canonical llm.available_models entries. It is a plain helper with no +4CAT base-class dependency. +""" + +from abc import abstractmethod + +import requests + + +class LLMProviderClient: + _headers = {} + provider_config = {} + + @staticmethod + def get_client(config, provider_config: dict) -> "LLMProviderClient": + """ + Get a client for an LLM provider + + Returns the appropriate sub-class depending on the provider type. + + :param config: 4CAT config reader + :param dict provider_config: Provider parameters, as configured in + 4CAT + :return LLMProviderClient: + """ + # in-line import because we otherwise get circular import shenanigans + from common.lib.llm.clients.ollama_client import OllamaClient + from common.lib.llm.clients.litellm_client import LiteLLMClient + from common.lib.llm.clients.openai_client import LMStudioClient + from common.lib.llm.clients.thirdparty_client import ThirdPartyClient + + for client_type in (OllamaClient, LiteLLMClient, LMStudioClient, ThirdPartyClient): + if client_type.type == provider_config["type"]: + return client_type(config, provider_config) + + raise ValueError(f"LLMProviderClient: Unknown provider type {provider_config['type']}") + + def __init__(self, config, provider_config: dict, timeout: int = 10, log=None) -> None: + """ + HTTP client for an LLM Provider + + :param dict provider_config: Provider parameters, as configured in 4CAT + :param int timeout: Default request timeout in seconds. + :param Logger log: 4CAT log handler + """ + self.config = config + self.provider_config = provider_config + + self.timeout = timeout + self.auth_type = provider_config.get("auth_header") + self.auth_key = provider_config.get("auth_key") + self.timeout = timeout + + self.base_url = provider_config["url"].rstrip("/") + if self.base_url.endswith("v1"): + # get rid of the 'v1' - we'll add this in the path + self.base_url = f"{self.base_url[:-2]}" + + self._session = requests.Session() + self._headers = {"Content-Type": "application/json"} + + if self.auth_type: + self._headers[self.auth_type] = self.auth_key + + self.log = log + + def get_status(self) -> bool | int: + """ + Check if the server is reachable and responding to requests + + :return: `False` if the server is not responding, or an HTTP status code. + """ + try: + r = self._session.get( + f"{self.base_url}{self._models_info_path}", + headers=self._headers, + timeout=self.timeout, + ) + if self.log and r.status_code != 200: + self.log.warning( + f"{self.__class__.__name__}: server responded with status code {r.status_code} during availability check: {r.text}") + return r.status_code + except requests.RequestException as e: + if self.log: + self.log.warning(f"{self.__class__.__name__}: server is not available at {self.base_url}: {e}") + return False + + def list_models(self) -> list[dict]: + """List available models from the Ollama server. + + :returns: List of model dicts, or ``[]`` on failure. + """ + try: + r = self._session.get( + f"{self.base_url}{self._models_info_path}", + headers=self._headers, + timeout=self.timeout, + ) + if r.status_code == 200: + return r.json().get(self._models_info_key, []) + if self.log: + self.log.warning( + f"{self.__class__.__name__}: failed to list models from {self.base_url}, status code {r.status_code}: {r.text}") + except requests.RequestException as e: + if self.log: + self.log.warning(f"{self.__class__.__name__}: failed to list models from {self.base_url}: {e}") + return [] + + def build_model_entry(self, meta: dict) -> dict: + """ + Build a canonical ``llm.available_models`` entry for a model. + + :param model_id: Raw model identifier. + :param display_name: Human-readable name (from ``format_display_name``). + :param meta: ``/api/show`` response dict, or ``None`` if unavailable. + :returns: Dict ready to store under ``llm.available_models[model_id]``. + """ + return { + "id": self.get_global_model_id(meta), + "local_id": self.get_model_id(meta), + "name": self.format_display_name(meta), + "model_card": self.get_model_card_url(meta), + "provider": self.provider_config["_id"], + "supported_media_types": self.parse_supported_media_types(meta), + "metadata": meta, + } + + def get_model_card_url(self, meta: dict) -> str: + """ + Get a URL for a model card for a given model + + :param meta: Model metadata + :return str: Model card URL (empty string if unavailable) + """ + return "" + + @abstractmethod + def parse_supported_media_types(self, meta: dict) -> list[str]: + """Derive the media types a model supports from its Ollama metadata. + + **Primary path**: reads ``meta["capabilities"]``: + - ``"completion"`` → ``"text"`` + - ``"vision"`` → ``"image"`` + - ``"embedding"`` → ``"embedding"`` + + **Fallback path** (used when capabilities are absent or only yield ``"text"``): + inspects GGUF ``model_info`` / ``details`` for vision signals and adds + ``"image"`` if any are found. + + :param meta: ``/api/show`` response dict, or ``None``. + :returns: Ordered list of supported media type strings. + Returns ``[]`` when ``meta`` is ``None`` (unknown — callers + should include the model, not block it). + """ + pass + + @abstractmethod + def format_display_name(self, meta: dict) -> str: + """ + Build a human-readable display name for a model. + + :param dict meta: Model metadata + :returns str: Human-readable display name string. + """ + pass + + def get_model_id(self, meta: dict) -> str: + """ + Choose a model identifier based on model metadata. + + This is the ID within the provider context, i.e. it is not guaranteed + to be globally unique (use `get_global_model_id()` instead). + + :param dict meta: Model metadata + :return str: Model ID + """ + return meta[self._model_id_key] + + def get_global_model_id(self, meta: dict) -> str: + """ + Choose a model identifier based on model metadata. + + This needs to be a *globally* unique ID, i.e. if multiple providers + provide the same model, the ID should still be unique per provider. + + :param dict meta: Model metadata + :return str: Model ID + """ + return "-".join((self.provider_config["type"], self.provider_config["url"], self.get_model_id(meta))) \ No newline at end of file diff --git a/common/lib/user_input.py b/common/lib/user_input.py index 7fcb6bcb9..a6fe10458 100644 --- a/common/lib/user_input.py +++ b/common/lib/user_input.py @@ -4,6 +4,7 @@ import json import re +from itertools import chain class RequirementsNotMetException(Exception): """ @@ -26,6 +27,7 @@ class UserInput: OPTION_TEXT = "string" # simple string or integer (input text) OPTION_MULTI = "multi" # multiple values out of a list (select multiple) OPTION_MULTI_SELECT = "multi_select" # multiple values out of a dropdown list (select multiple) + OPTION_MULTI_OPTION = "multi_option" # several instances of a collection of controls OPTION_INFO = "info" # just a bit of text, not actual input OPTION_TEXT_LARGE = "textarea" # longer text OPTION_TEXT_JSON = "json" # text, but should be valid JSON @@ -181,6 +183,45 @@ def parse_all(options, input, silently_correct=True): parsed_input[option] = table_input + elif settings.get("type") == UserInput.OPTION_MULTI_OPTION: + # these are collections of other input options that can be + # repeated an arbitrary amount of times and are saved as a + # list of these values + # i.e. forms within forms!!! + item_options = settings["options"] + input_items = {} + for key, value in input.items(): + if key_match := re.match(f"{option}-([0-9]+)-(.+)", key): + input_index = int(key_match[1]) + # note: the index is just used to match inputs to items + # it is not used for ordering + option_item = key_match[2] + if option_item not in item_options: + continue + + if input_index not in input_items: + input_items[input_index] = {} + + input_items[input_index][option_item] = UserInput.parse_value(item_options[option_item], value, input_items[input_index], silently_correct) + + # discard items that are only default values + parsed_input[option] = [] + for input_index, item in input_items.items(): + only_default = True + for key, value in item.items(): + if value != item_options[key]["default"]: + only_default = False + + if not only_default: + parsed_input[option].append(item) + + # may define a mapper to make this a dict + if settings.get("dict_key"): + if callable(settings["dict_key"]): + parsed_input[option] = {settings["dict_key"](value): {**value, "_id": settings["dict_key"](value)} for value in parsed_input[option]} + else: + parsed_input[option] = {value[settings["dict_key"]]: {**value, "_id": value[settings["dict_key"]]} for value in parsed_input[option]} + elif option not in input: # not provided? use default parsed_input[option] = settings.get("default", None) @@ -392,9 +433,16 @@ def parse_value(settings, choice, other_input=None, silently_correct=True): # select box # one out of multiple options # return option if valid, or default - if choice not in settings.get("options"): + options = settings.get("options", []) + + # if we have a categorised set of options, look deeper to get + # valid option values + is_categorised = all([type(o) is dict for o in options.values()]) + match_options = chain(*[list(o.keys()) for o in options.values()]) if is_categorised else options + + if choice not in match_options: if not silently_correct: - raise QueryParametersException(f"Invalid value selected; must be one of {', '.join(settings.get('options', {}).keys())}. {settings}") + raise QueryParametersException(f"Invalid value selected; must be one of {', '.join(match_options)}.") else: return settings.get("default", "") else: diff --git a/docker-compose_ollama.yml b/docker-compose_ollama.yml new file mode 100644 index 000000000..020b12c96 --- /dev/null +++ b/docker-compose_ollama.yml @@ -0,0 +1,54 @@ +# Use this file as an override to add a local Ollama instance to your 4CAT stack. +# +# Usage: +# docker compose -f docker-compose.yml -f docker-compose_ollama.yml up -d +# +# Once running, configure 4CAT via the Control Panel → Settings → LLM: +# LLM Provider Type : ollama +# LLM Server URL : http://ollama:11434 +# +# GPU support (NVIDIA): +# Uncomment the `deploy.resources` block in the ollama service below and +# ensure the NVIDIA Container Toolkit is installed on your host. +# See: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html +# +# GPU support (Apple Silicon / AMD): +# Pass the appropriate device through your host's Docker settings instead. +# Ollama will automatically detect the GPU when it is available inside the container. + +services: + ollama: + image: ollama/ollama:latest + container_name: 4cat_ollama + restart: unless-stopped + volumes: + - 4cat_ollama:/root/.ollama + # Expose the Ollama API on the host for optional external access or + # management with the Ollama CLI. Remove this block if you want to keep + # Ollama accessible only within the Docker network. + ports: + - "127.0.0.1:11434:11434" + healthcheck: + test: ["CMD", "ollama", "ls"] + interval: 10s + timeout: 5s + retries: 5 + # --- NVIDIA GPU support (uncomment to enable) --- + # deploy: + # resources: + # reservations: + # devices: + # - driver: nvidia + # count: all + # capabilities: [gpu] + + # Make the 4CAT backend wait for Ollama to be healthy before starting. + # This prevents initial model-refresh failures on first boot. + backend: + depends_on: + ollama: + condition: service_healthy + +volumes: + 4cat_ollama: + name: 4cat_ollama_data diff --git a/docker/README.md b/docker/README.md index c10444aa7..6ad7e069b 100644 --- a/docker/README.md +++ b/docker/README.md @@ -69,3 +69,76 @@ https://github.com/docker/buildx/issues/426 https://stackoverflow.com/questions/64221861/failed-to-resolve-with-frontend-dockerfile-v0 4. More errors coming soon! (No doubt) + +--- + +## Running a local Ollama instance alongside 4CAT + +4CAT can use a local [Ollama](https://ollama.com) server for LLM-powered processors. +A Docker Compose override file (`docker-compose_ollama.yml`) is included to add +Ollama as a sidecar service so you do not need to run it separately on the host. + +### Quick start + +```bash +docker compose -f docker-compose.yml -f docker-compose_ollama.yml up -d +``` + +This starts the standard 4CAT stack plus an `ollama` container that is only +accessible within the Docker network (and optionally on `localhost:11434` on +the host via the exposed port). + +### Configuring 4CAT to use Ollama + +#### Automatic configuration (fresh Docker install with sidecar) + +When you start 4CAT for the first time using the Ollama override file, the +`docker_setup.py` initialisation script automatically detects the `ollama` +sidecar and sets **LLM Provider Type**, **LLM Server URL**, and **LLM Access** +for you. You can skip to step 2 below. + +#### Manual configuration (or to verify/change settings) + +1. Log in as admin and open **Control Panel → Settings → LLM Providers**. +2. Confirm that a provider with the following settings is present: + + | Setting | Value | + |---|---| + | LLM Provider Type | `ollama` | + | LLM Server URL | `http://ollama:11434` | + | LLM Access | enabled | + +3. Save settings. +4. Open **Control Panel → LLMs & Providers** (visible once *LLM Access* is enabled). +5. Use the **Refresh** button to load available models, then **Pull** a model + (e.g. `llama3.2:3b`) to download it from the Ollama library. +6. Enable the models you want to make available to users. + +### GPU support (NVIDIA) + +Uncomment the `deploy.resources` block in `docker-compose_ollama.yml` and +ensure the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) +is installed on your host. Then restart the stack with the override: + +```bash +docker compose -f docker-compose.yml -f docker-compose_ollama.yml up -d +``` + +### Persisting models + +Models downloaded by Ollama are stored in the `4cat_ollama_data` Docker volume. +They survive container restarts and re-creations unless you explicitly remove +the volume (`docker volume rm 4cat_ollama_data`). + +### Using an external Ollama server + +If you already run Ollama on the host or elsewhere, skip the override file and +point 4CAT directly at that server: + +- **On the same host**: use `http://host.docker.internal:11434` as the LLM Server URL. +- **Remote server**: use the server's reachable URL and configure any required + API key in the *Authentication header* and *Authentication key* settings. + +In both cases, configure the LLM settings manually via **Control Panel → Settings** +(see *Manual configuration* above), using the appropriate server URL instead of +`http://ollama:11434`. diff --git a/docker/docker_setup.py b/docker/docker_setup.py index 450684602..6de76add0 100644 --- a/docker/docker_setup.py +++ b/docker/docker_setup.py @@ -28,7 +28,8 @@ def update_config_from_environment(CONFIG_FILE, config_parser): config_parser['SERVER']['public_port'] = os.environ['PUBLIC_PORT'] # Set API - config_parser['API']['api_host'] = os.environ['API_HOST'] # set in .env; should be backend container_name in docker-compose.py unless frontend and backend are running together in one container + config_parser['API']['api_host'] = os.environ[ + 'API_HOST'] # set in .env; should be backend container_name in docker-compose.py unless frontend and backend are running together in one container # Database configuration config_parser['DATABASE']['db_name'] = os.environ['POSTGRES_DB'] @@ -110,14 +111,16 @@ def _format_host(host: str) -> str: # Backend API config_parser.add_section('API') - config_parser['API']['api_port'] = '4444' # backend internal port set in docker-compose.py; NOT API_PUBLIC_PORT as that is what port Docker exposes to host network + config_parser['API'][ + 'api_port'] = '4444' # backend internal port set in docker-compose.py; NOT API_PUBLIC_PORT as that is what port Docker exposes to host network # File paths # Docker volumes are defined in docker-compose.yml; these rely on one shared volume `data` in the 4CAT root directory config_parser.add_section('PATHS') config_parser['PATHS']['path_images'] = 'data/images' # shared volume defined in docker-compose.yml config_parser['PATHS']['path_data'] = 'data/datasets' # shared volume defined in docker-compose.yml - config_parser['PATHS']['path_lockfile'] = 'backend' # docker-entrypoint.sh looks for pid file here (in event Docker shutdown was not clean) + config_parser['PATHS'][ + 'path_lockfile'] = 'backend' # docker-entrypoint.sh looks for pid file here (in event Docker shutdown was not clean) config_parser['PATHS']['path_sessions'] = 'data/sessions' # shared volume defined in docker-compose.yml config_parser['PATHS']['path_logs'] = 'data/logs/' # shared volume defined in docker-compose.yml @@ -137,7 +140,8 @@ def _format_host(host: str) -> str: config = ConfigManager() config.with_db(Database(logger=None, appname="docker-setup", - dbname=config.DB_NAME, user=config.DB_USER, password=config.DB_PASSWORD, host=config.DB_HOST, port=config.DB_PORT)) + dbname=config.DB_NAME, user=config.DB_USER, password=config.DB_PASSWORD, + host=config.DB_HOST, port=config.DB_PORT)) for path in [config.get('PATH_DATA'), config.get('PATH_IMAGES'), @@ -187,10 +191,12 @@ def _format_host(host: str) -> str: # Check to see if flask.server_name needs to be updated from common.config_manager import ConfigManager from common.lib.database import Database + config = ConfigManager() config.with_db(Database(logger=None, appname="docker-setup", - dbname=config.DB_NAME, user=config.DB_USER, password=config.DB_PASSWORD, host=config.DB_HOST, port=config.DB_PORT)) - + dbname=config.DB_NAME, user=config.DB_USER, password=config.DB_PASSWORD, + host=config.DB_HOST, port=config.DB_PORT)) + public_port = int(config_parser['SERVER']['public_port']) # Port handling here is independent from HTTPS; default is 80 default_port = 80 @@ -201,12 +207,47 @@ def _format_host(host: str) -> str: # Warn only when localhost/IP lacks a required non-default port if existing_port is None and _is_ip_or_localhost(host) and public_port != default_port: formatted_host = _format_host(host) - print(f"Exposed PUBLIC_PORT {public_port} from .env file not included in Server Name; if you are not using a reverse proxy, you may need to update the Server Name variable.") + print( + f"Exposed PUBLIC_PORT {public_port} from .env file not included in Server Name; if you are not using a reverse proxy, you may need to update the Server Name variable.") print( "You can do so by running the following command if you do not have access to the 4CAT frontend Control Panel:\n" f"docker exec 4cat_backend python -c \"from common.config_manager import ConfigManager;config=ConfigManager();config.with_db();config.set('flask.server_name', '{formatted_host}:{public_port}');config.db.commit();\"" ) + # If an Ollama container is available on the Docker network, configure 4CAT to use it. + ollama_url = 'http://ollama:11434' + ollama_id = f"ollama-{ollama_url.split('/')[-1]}" + try: + import requests + + try: + resp = requests.get(f"{ollama_url}/api/tags", timeout=2) + if resp.status_code == 200: + current_llm_providers = config.get("llm.providers") + if any([p["url"] == ollama_url for p in current_llm_providers]): + print("Ollama server already configured in 4CAT settings.") + else: + # set basic LLM settings so the initial admin user does not need to + # configure them manually for local development environments that + # include the Ollama sidecar. + current_llm_providers[ollama_id] = { + "name": "Ollama Server (4CAT, via Docker)", + "url": ollama_url, + "type": "ollama", + "auth_header": "", + "auth_key": "", + "_id": ollama_id + } + config.set('llm.providers', current_llm_providers) + config.db.commit() + print('Detected Ollama on Docker network; configured LLM settings to use it.') + except requests.RequestException: + # Ollama not available; do nothing + pass + except Exception: + # requests other error; skip automatic Ollama configuration + pass + print(f"\nStarting app\n" f"4CAT is accessible at:\n" f"{'https' if config.get('flask.https', False) else 'http'}://{config.get('flask.server_name')}\n") diff --git a/extensions b/extensions new file mode 120000 index 000000000..c25d13e68 --- /dev/null +++ b/extensions @@ -0,0 +1 @@ +/Users/stijn/surfdrive/PycharmProjects/4cat/config/extensions \ No newline at end of file diff --git a/helper-scripts/migrate/migrate-1.54-1.55.py b/helper-scripts/migrate/migrate-1.54-1.55.py new file mode 100644 index 000000000..7a5074774 --- /dev/null +++ b/helper-scripts/migrate/migrate-1.54-1.55.py @@ -0,0 +1,80 @@ +import json +import sys +import os + +from pathlib import Path + +sys.path.insert(0, os.path.join(os.path.abspath(os.path.dirname(__file__)), "../..")) +from common.lib.database import Database +from common.lib.logger import Logger + +import configparser # noqa: E402 + +log = Logger(output=True) +ini = configparser.ConfigParser() +ini.read(Path(__file__).parent.parent.parent.resolve().joinpath("config/config.ini")) +db_config = ini["DATABASE"] + +db = Database( + logger=log, + dbname=db_config["db_name"], + user=db_config["db_user"], + password=db_config["db_password"], + host=db_config["db_host"], + port=db_config["db_port"], + appname="4cat-migrate", +) + +# the separate LLM server settings were consolidated into one overarching 'llm.providers' setting +print(" Checking if llm.providers setting exists...") +has_setting = db.fetchone( + "SELECT COUNT(*) AS num FROM settings WHERE name = 'llm.providers'" +) + +if has_setting["num"] > 0: + print(" ...exists, deleting old settings without overwriting") +else: + print(" ...does not exist, filling with currently configured proviers") + provider_type = db.fetchone("SELECT value FROM settings WHERE name = 'llm.provider_type'") + providers = {} + if not provider_type: + print(" ...no provider currently configured") + else: + url = db.fetchone("SELECT value FROM settings WHERE name = 'llm.server'") + host = url.split("/")[2] if "://" in url else "localhost" + auth_header = db.fetchone("SELECT value FROM settings WHERE name = 'llm.auth_type'") + auth_key = db.fetchone("SELECT value FROM settings WHERE name = 'llm.auth_key'") + provider_name = db.fetchone("SELECT value FROM settings WHERE name = 'llm.host_name'") + provider_id = f"{provider_type}-{host}" + + # vLLM and LM Studio are both openai-like + provider_type = {"ollama": "ollama"}.get(provider_type, "openai-like") + providers[provider_id] = { + "name": provider_name, + "type": provider_type, + "url": url, + "auth_header": auth_header, + "auth_key": auth_key, + "_id": provider_id + } + + # add API models, always present + providers["thirdparty-models"] = { + "name": "Third-party models", + "type": "api", + "url": "", + "auth_header": "", + "auth_key": "", + "_id": "thirdparty-models" + } + + db.insert("settings", {"name": "llm.providers", "value": json.dumps(providers)}) + print(f" ...added {len(providers)} providers") + +print(" Cleaning up old settings") +db.execute("DELETE FROM settings WHERE name LIKE 'llm.%' AND name NOT IN ('llm.providers', 'llm.available_models', 'llm.access')") + +print(" Removing all known models (will be re-indexed on 4CAT restart)") +db.upsert("settings", {"name": "llm.available_models", "value": "{}"}) + +print(" - done!") diff --git a/processors/machine_learning/llm_prompter.py b/processors/machine_learning/llm_prompter.py index c2bd0d02e..5b4080e76 100644 --- a/processors/machine_learning/llm_prompter.py +++ b/processors/machine_learning/llm_prompter.py @@ -5,6 +5,8 @@ import re import time import json +from itertools import chain + import jsonschema import requests @@ -16,7 +18,7 @@ from common.lib.item_mapping import MappedItem from common.lib.exceptions import ProcessorInterruptedException, QueryParametersException, QueryNeedsExplicitConfirmationException from common.lib.helpers import UserInput, nthify, andify, remove_nuls, flatten_dict -from common.lib.llm import LLMAdapter +from common.lib.llm.adapter import LLMAdapter from backend.lib.processor import BasicProcessor class LLMPrompter(BasicProcessor): @@ -55,46 +57,52 @@ def get_queue_id(cls, remote_id, details, dataset) -> str: local_queue = "local_models" if not dataset: return local_queue + + model = dataset.parameters.get("model") + if model.startswith("api"): + # API-based models have their own queue - no local resources being + # used so can be concurrent + return f"llm-api-{dataset.key}" else: - if dataset.parameters.get('api_or_local', 'api') in ["local", "hosted"]: - # Hosted models also go in the local queue since they use the same shared LLM server - return local_queue - - # Queue per model/API type - return f"{cls.type}-{dataset.parameters.get('api_or_local', 'api')}-{dataset.parameters.get('api_model', 'none')}" + # use the model URL as the queue ID (extracted from the model + # global ID) + # this is not fool-proof, but does mean not more than one dataset + # runs per API server - in the scenario of these running locally, + # it means things do not run concurrently (which is good) + return f"llm-local-{dataset.parameters.get('model').split('-')[1]}" + + @classmethod + def get_model_library(cls, config): + available_models = config.get("llm.available_models", {}) + enabled_model_ids = config.get("llm.enabled_models", []) + providers = config.get("llm.providers", {}) + if not config.get("llm.access"): + enabled_model_ids = [_ for _ in enabled_model_ids if _.startswith("api-")] + + models_option = {} + for key, value in {k: v for k, v in available_models.items() if k in enabled_model_ids}.items(): + provider = providers[value["provider"]] + if provider["name"] not in models_option: + models_option[provider["name"]] = {} + + models_option[provider["name"]][key] = value["name"] + + return models_option @classmethod def get_options(cls, parent_dataset=None, config=None) -> dict: # Check if 4CAT wide LLM server is available - if config.get("llm.access", False) and config.get("llm.server", ""): - shared_llm_name = config.get("llm.host_name", "4CAT LLM Server") - shared_llm_models = {model: model_metadata.get("name") for model, model_metadata in config.get("llm.available_models", {}).items()} - shared_llm_default = list(shared_llm_models.keys())[0] if shared_llm_models else "" - else: - shared_llm_name = False - shared_llm_default = "" - shared_llm_models = {} + models = cls.get_model_library(config) # Determine if the parent dataset is a media archive (zip with images/video/audio) is_media_parent = False media_type = "media" - hosted_and_local_available = True if parent_dataset: parent_extension = parent_dataset.get_extension() parent_media_type = parent_dataset.get_media_type() if parent_extension == "zip" and parent_media_type in ("image", "video", "audio"): is_media_parent = True media_type = parent_media_type - if parent_media_type in ("video", "audio"): - # Ollama and LM Studio currently only support text and image - hosted_and_local_available = False - - # Add additional sources for LLM Models - api_or_local_options = {"api": "API"} - if hosted_and_local_available: - api_or_local_options["local"] = "Local" - if shared_llm_name: - api_or_local_options["hosted"] = shared_llm_name options = { "ethics_warning1": { @@ -102,21 +110,12 @@ def get_options(cls, parent_dataset=None, config=None) -> dict: "help": "Always test your prompt on a sample of rows, for instance by first using the " "Random filter processor.", }, - "api_or_local": { - "type": UserInput.OPTION_CHOICE, - "help": "Local or API", - "options": api_or_local_options, - "default": "api" if not shared_llm_name else "hosted", - "tooltip": "You can use 'local' models through Ollama and LM Studio as long as you have a valid " - "and accessible URL through which the model can be reached.", - }, - "api_model": { + "model": { "type": UserInput.OPTION_CHOICE, "help": "API model", - "options": LLMAdapter.get_model_options(config), + "options": models, "default": "none", "tooltip": "Select from the predefined model list or insert manually", - "requires": "api_or_local==api", }, "api_key": { "type": UserInput.OPTION_TEXT, @@ -124,282 +123,194 @@ def get_options(cls, parent_dataset=None, config=None) -> dict: "help": "API key", "tooltip": "Create an API key on the LLM provider's website (e.g. https://admin.mistral.ai/organization" "/api-keys). Note that this often involves billing.", - "requires": "api_or_local==api", - "sensitive": True, - }, - "api_custom_model_provider": { - "type": UserInput.OPTION_CHOICE, - "help": "Model provider", - "requires": "api_model==custom", - "options": LLMAdapter.get_model_providers(config), - "tooltip": "API provider. Currently limited to this list.", - }, - "api_custom_model_id": { - "type": UserInput.OPTION_TEXT, - "help": "Model ID", - "requires": "api_model==custom", - "tooltip": "E.g. 'mistral-small-2503'. Check the API provider's documentation on what model ID to use. " - "Fine-tuned models often require more info; OpenAI for instance requires the following " - "format: ft:[modelname]:[org_id]:[custom_suffix]:", - "default": "", - }, - "local_info": { - "type": UserInput.OPTION_INFO, - "requires": "api_or_local==local", - "help": "You can use local LLMs with LM Studio, Ollama, and vLLM. These applications need to be reachable by " - "this 4CAT server, e.g. by running them on the same machine. For LM Studio and vLLM, " - "use the Base URL to interface with any OpenAI-like API endpoint.", - }, - "local_provider": { - "type": UserInput.OPTION_CHOICE, - "requires": "api_or_local==local", - "options": { - "none": "", - "lmstudio": "LM Studio", - "ollama": "Ollama", - "vllm": "vLLM", - }, - "default": "none", - "help": "Local LLM provider", - }, - "lmstudio-info": { - "type": UserInput.OPTION_INFO, - "requires": "local_provider==lmstudio", - "help": "LM Studio is a desktop application to chat with LLMs, but that you can also run as a local " - "server. See [this link for intructions on how to run LM Studio as a server](https://lmstudio.ai/docs/" - "app/api). When the server is running, the endpoint is shown in the 'Developer' tab on the top " - "right (default: `http://localhost:1234/v1` or `http://host.docker.internal:1234/v1` in Docker). " - "4CAT will use the top-most model you have loaded. ", - }, - "ollama-info": { - "type": UserInput.OPTION_INFO, - "requires": "local_provider==ollama", - "help": "Ollama is a simple command-line application that lets you interface with a range of open-" - "source LLMs and that you can run as a local server. See [this link]" - "(https://github.com/ollama/ollama/blob/main/README.md#quickstart) for instructions.", - }, - "vllm-info": { - "type": UserInput.OPTION_INFO, - "requires": "local_provider==ollama", - "help": "[vLLM](https://docs.vllm.ai/en/latest/getting_started/quickstart/) is a framework for Linux " - "systems capable of fast inference with a single LLM. Communication is done through an " - "OpenAI-like API endpoint. Just change the base URL below and insert an optional API key.", - }, - "local_base_url": { - "type": UserInput.OPTION_TEXT, - "requires": "api_or_local==local", - "default": "", - "help": "Base URL", - "tooltip": "[optional] Leaving this empty will use default values (`http://localhost:1234/v1` or `http://host.docker.internal:1234/v1` for LM " - "Studio, `http://localhost:11434` or `http://host.docker.internal:11434` for Ollama, `http://localhost:8000` or `http://host.docker.internal:8000` for vLLM ).", - }, - "lmstudio_api_key": { - "type": UserInput.OPTION_TEXT, - "default": "", - "help": "LM Studio API key", - "tooltip": "[optional] Uses `lm-studio` by default.", - "requires": "local_provider==lmstudio", + "requires": "model^=api", "sensitive": True, - }, - "vllm_api_key": { - "type": UserInput.OPTION_TEXT, - "default": "", - "help": "vLLM API key", - "tooltip": "[optional] Empty by default.", - "requires": "local_provider==vllm", - "sensitive": True, - }, - "ollama_model": { - "type": UserInput.OPTION_TEXT, - "requires": "local_provider==ollama", - "default": "", - "help": "Ollama model name", - "tooltip": "[required] for example 'llama3.2'", - }, - "hosted_llm_model": { - "type": UserInput.OPTION_CHOICE, - "help": "LLM model", - "options": shared_llm_models, - "default": shared_llm_default, - "requires": "api_or_local==hosted", - }, + } } if is_media_parent: # Media-specific options: show info about media files being attached - options["media_info"] = { - "type": UserInput.OPTION_INFO, - "help": f"The parent dataset contains {media_type} files that will be sent " - f"to the LLM with each prompt. Make sure to use a model that supports " - f"{media_type} input (e.g. vision models for images).
" - f"Not all models support all media types. If the model cannot process " - f"{media_type} files, an error will be returned during processing.", - } - options["system_prompt"] = { - "type": UserInput.OPTION_TEXT_LARGE, - "help": "System prompt", - "tooltip": "[optional] A system prompt can be used to give the LLM general instructions, for instance " - "on the tone of the text. This processor may edit the system prompt to " - "ensure correct output. System prompts are included in the results file.", - "default": "", - } - options["prompt"] = { - "type": UserInput.OPTION_TEXT_LARGE, - "help": "User prompt", - "tooltip": f"Describe what the model should do with each {media_type} file. " - f"No column brackets needed — {media_type} files are attached automatically.", - "default": "", - } + options.update({ + "media_info": { + "type": UserInput.OPTION_INFO, + "help": f"The parent dataset contains {media_type} files that will be sent " + f"to the LLM with each prompt. Make sure to use a model that supports " + f"{media_type} input (e.g. vision models for images).
" + f"Not all models support all media types. If the model cannot process " + f"{media_type} files, an error will be returned during processing.", + }, + "system_prompt": { + "type": UserInput.OPTION_TEXT_LARGE, + "help": "System prompt", + "tooltip": "[optional] A system prompt can be used to give the LLM general instructions, for instance " + "on the tone of the text. This processor may edit the system prompt to " + "ensure correct output. System prompts are included in the results file.", + "default": "", + }, + "prompt": { + "type": UserInput.OPTION_TEXT_LARGE, + "help": "User prompt", + "tooltip": f"Describe what the model should do with each {media_type} file. " + f"No column brackets needed — {media_type} files are attached automatically.", + "default": "", + } + }) + else: - # Text-based dataset options: column brackets, media URL toggle, batching - options["prompt_info"] = { + options.update({ + # Text-based dataset options: column brackets, media URL toggle, batching + "prompt_info": { + "type": UserInput.OPTION_INFO, + "help": "How to prompt
" + "Use `[brackets]` with column names to insert dataset items in the prompt. You " + "can place column brackets in different parts of the prompt or use multiple column names within" + ' a single column bracket to merge items.
Example 1: "Describe the topic ' + 'of this social media post in max. 3 words: `[body, tags]`"
Example 2: ' + "\"Given the following hashtags: `[tags]`, answer whether they are 'related' or 'unrelated' " + 'to the following text: `[body]`"
Prompting is a delicate art. See ' + "processor references on best prompting practices.
For predefined research prompts, see " + "e.g. [Prompt Compass](https://github.com/ErikBorra/PromptCompass/blob/main/prompts.json#L136) " + "or the [Anthropic Prompt Library](https://docs.anthropic.com/en/resources/prompt-library/" + "library).", + }, + "system_prompt": { + "type": UserInput.OPTION_TEXT_LARGE, + "help": "System prompt", + "tooltip": "[optional] A system prompt can be used to give the LLM general instructions, for instance " + "on the tone of the text. This processor may edit the system prompt to " + "ensure correct output. System prompts are included in the results file.", + "default": "", + }, + "prompt": { + "type": UserInput.OPTION_TEXT_LARGE, + "help": "User prompt", + "tooltip": "Use [brackets] with columns names.", + "default": "", + }, + "use_media": { + "type": UserInput.OPTION_TOGGLE, + "help": "Add images", + "tooltip": "Add media URLs for multi-modal processing. Requires a model that supports vision.", + "default": False, + }, + "media_columns": { + "type": UserInput.OPTION_TEXT, + "help": "Columns with image URL(s)", + "default": "", + "inline": True, + "tooltip": "Multiple columns can be selected.", + "requires": "use_media==true", + } + }) + + # Common options for both text and media datasets + options.update({ + "structured_output": { + "type": UserInput.OPTION_TOGGLE, + "help": "Output structured JSON", + "tooltip": "Output in a JSON format instead of text. Note that your chosen model may not support " + "structured output.", + "default": False, + }, + "json_schema_info": { "type": UserInput.OPTION_INFO, - "help": "How to prompt
" - "Use `[brackets]` with column names to insert dataset items in the prompt. You " - "can place column brackets in different parts of the prompt or use multiple column names within" - ' a single column bracket to merge items.
Example 1: "Describe the topic ' - 'of this social media post in max. 3 words: `[body, tags]`"
Example 2: ' - "\"Given the following hashtags: `[tags]`, answer whether they are 'related' or 'unrelated' " - 'to the following text: `[body]`"
Prompting is a delicate art. See ' - "processor references on best prompting practices.
For predefined research prompts, see " - "e.g. [Prompt Compass](https://github.com/ErikBorra/PromptCompass/blob/main/prompts.json#L136) " - "or the [Anthropic Prompt Library](https://docs.anthropic.com/en/resources/prompt-library/" - "library).", - } - options["system_prompt"] = { - "type": UserInput.OPTION_TEXT_LARGE, - "help": "System prompt", - "tooltip": "[optional] A system prompt can be used to give the LLM general instructions, for instance " - "on the tone of the text. This processor may edit the system prompt to " - "ensure correct output. System prompts are included in the results file.", - "default": "", - } - options["prompt"] = { + "help": "Insert a JSON Schema for structured outputs. These define the output that " + "the LLM will adhere to. [See instructions and examples on how to write a JSON Schema]" + "(https://json-schema.org/learn/miscellaneous-examples) and [OpenAI's documentation]" + "(https://platform.openai.com/docs/guides/structured-outputs?api-mode=chat#supported-schemas).", + "requires": "structured_output==true", + }, + "json_schema": { "type": UserInput.OPTION_TEXT_LARGE, - "help": "User prompt", - "tooltip": "Use [brackets] with columns names.", + "help": "JSON schema", + "tooltip": "[required] A JSON schema that the structured output will adhere to", + "requires": "structured_output==true", "default": "", - } - options["use_media"] = { - "type": UserInput.OPTION_TOGGLE, - "help": "Add images", - "tooltip": "Add media URLs for multi-modal processing. Requires a model that supports vision.", - "default": False, - } - options["media_columns"] = { + }, + "temperature": { "type": UserInput.OPTION_TEXT, - "help": "Columns with image URL(s)", - "default": "", - "inline": True, - "tooltip": "Multiple columns can be selected.", - "requires": "use_media==true", + "help": "Temperature", + "default": 0.1, + "coerce_type": float, + "max": 2.0, + "tooltip": "Temperature indicates how strict the model will gravitate towards the most " + "probable next token. A score close to 0 returns more predictable " + "outputs while a score close to 1 leads to more creative outputs. Not supported by all models.", } - - # Common options for both text and media datasets - options["structured_output"] = { - "type": UserInput.OPTION_TOGGLE, - "help": "Output structured JSON", - "tooltip": "Output in a JSON format instead of text. Note that your chosen model may not support " - "structured output.", - "default": False, - } - options["json_schema_info"] = { - "type": UserInput.OPTION_INFO, - "help": "Insert a JSON Schema for structured outputs. These define the output that " - "the LLM will adhere to. [See instructions and examples on how to write a JSON Schema]" - "(https://json-schema.org/learn/miscellaneous-examples) and [OpenAI's documentation]" - "(https://platform.openai.com/docs/guides/structured-outputs?api-mode=chat#supported-schemas).", - "requires": "structured_output==true", - } - options["json_schema"] = { - "type": UserInput.OPTION_TEXT_LARGE, - "help": "JSON schema", - "tooltip": "[required] A JSON schema that the structured output will adhere to", - "requires": "structured_output==true", - "default": "", - } - options["temperature"] = { - "type": UserInput.OPTION_TEXT, - "help": "Temperature", - "default": 0.1, - "coerce_type": float, - "max": 2.0, - "tooltip": "Temperature indicates how strict the model will gravitate towards the most " - "probable next token. A score close to 0 returns more predictable " - "outputs while a score close to 1 leads to more creative outputs. Not supported by all models.", - } + }) if not is_media_parent: - options["truncate_input"] = { + options.update({ + "truncate_input": { + "type": UserInput.OPTION_TEXT, + "help": "Max chars in input value", + "default": 0, + "coerce_type": int, + "tooltip": "This value determines how many characters an inserted dataset value may have. 0 = unlimited.", + "requires": "use_media==false", + }, + "max_tokens": { + "type": UserInput.OPTION_TEXT, + "help": "Max output tokens", + "default": 10000, + "coerce_type": int, + "tooltip": "As a rule of thumb, one token generally corresponds to ~4 characters of " + "text for common English text. This includes tokens spent for reasoning.", + }, + "batches": { + "type": UserInput.OPTION_TEXT, + "help": "Items per prompt", + "coerce_type": int, + "default": 1, + "tooltip": "How many dataset items to insert into the prompt. These will be inserted as a list " + "wherever the column brackets are used (e.g. '[body]').", + "requires": "use_media==false", + }, + "batch_info": { + "type": UserInput.OPTION_INFO, + "help": "Note on batching: Batching may increase speed but reduce accuracy. Models " + "need to support structured output for batching. This processor uses JSON schemas to ensure " + "symmetry between input and output lengths, but models may struggle to match input and output " + "values. Describe the dataset values in plurals in your prompt when batching. If you use " + "multiple column brackets in your prompt, rows with any empty values are skipped.", + "requires": "use_media==false", + } + }) + + options.update({ + "ethics_warning3": { + "type": UserInput.OPTION_INFO, + "requires": "model^=api-", + "help": "When using LLMs through commercial parties, always consider anonymising your data and " + "whether local open-source LLMs are also an option.", + }, + "save_annotations": { + "type": UserInput.OPTION_ANNOTATION, + "label": "prompt outputs", + "default": False, + }, + "hide_think": { + "type": UserInput.OPTION_TOGGLE, + "help": "Hide reasoning", + "default": False, + "tooltip": "Some models include reasoning in their output, between tags. This option " + "removes this tag and its contents from the output.", + }, + "limit": { "type": UserInput.OPTION_TEXT, - "help": "Max chars in input value", + "help": "Only annotate this many items, then stop", "default": 0, "coerce_type": int, - "tooltip": "This value determines how many characters an inserted dataset value may have. 0 = unlimited.", - "requires": "use_media==false", - } - - options["max_tokens"] = { - "type": UserInput.OPTION_TEXT, - "help": "Max output tokens", - "default": 10000, - "coerce_type": int, - "tooltip": "As a rule of thumb, one token generally corresponds to ~4 characters of " - "text for common English text. This includes tokens spent for reasoning.", - } - - if not is_media_parent: - options["batches"] = { + "min": 0, + "delegated": True, + }, + "annotation_label": { "type": UserInput.OPTION_TEXT, - "help": "Items per prompt", - "coerce_type": int, - "default": 1, - "tooltip": "How many dataset items to insert into the prompt. These will be inserted as a list " - "wherever the column brackets are used (e.g. '[body]').", - "requires": "use_media==false", - } - options["batch_info"] = { - "type": UserInput.OPTION_INFO, - "help": "Note on batching: Batching may increase speed but reduce accuracy. Models " - "need to support structured output for batching. This processor uses JSON schemas to ensure " - "symmetry between input and output lengths, but models may struggle to match input and output " - "values. Describe the dataset values in plurals in your prompt when batching. If you use " - "multiple column brackets in your prompt, rows with any empty values are skipped.", - "requires": "use_media==false", + "help": "Label for the annotations to add to the dataset", + "default": "", + "delegated": True, } - - options["ethics_warning3"] = { - "type": UserInput.OPTION_INFO, - "requires": "api_or_local==api", - "help": "When using LLMs through commercial parties, always consider anonymising your data and " - "whether local open-source LLMs are also an option.", - } - options["save_annotations"] = { - "type": UserInput.OPTION_ANNOTATION, - "label": "prompt outputs", - "default": False, - } - options["hide_think"] = { - "type": UserInput.OPTION_TOGGLE, - "help": "Hide reasoning", - "default": False, - "tooltip": "Some models include reasoning in their output, between tags. This option " - "removes this tag and its contents from the output.", - } - options["limit"] = { - "type": UserInput.OPTION_TEXT, - "help": "Only annotate this many items, then stop", - "default": 0, - "coerce_type": int, - "min": 0, - "delegated": True, - } - options["annotation_label"] = { - "type": UserInput.OPTION_TEXT, - "help": "Label for the annotations to add to the dataset", - "default": "", - "delegated": True, - } + }) # Get the media columns for the select media columns option if not is_media_parent and parent_dataset and parent_dataset.get_columns(): @@ -419,20 +330,15 @@ def is_compatible_with(cls, module=None, config=None): # Text-based datasets if module.get_extension() in ["csv", "ndjson"]: return True + # Media datasets (zip archives with images, video, or audio) if module.get_extension() == "zip" and module.get_media_type() in ("image", "video", "audio"): return True return False def process(self): - self.dataset.update_status("Validating settings") - api_model = self.parameters.get("api_model") - if api_model == "none": - api_model = "" - - modal_location = self.parameters.get("api_or_local", "api") hide_think = self.parameters.get("hide_think", False) # Check if the source dataset is a media archive (zip with images/video/audio) @@ -459,85 +365,29 @@ def process(self): # Set value for batch length in prompts batches = max(1, min(self.parameters.get("batches", 1), self.source_dataset.num_rows)) - use_batches = batches > 1 - if media_columns or is_media_archive: # no batching for media files - use_batches = False + use_batches = batches > 1 and not (media_columns or is_media_archive) # no batching for media files + if not use_batches: self.dataset.delete_parameter("batches") # Set all variables through which we can reach the LLM api_key = "" - base_url = None client_kwargs = {} - if modal_location == "local": - provider = self.parameters.get("local_provider", "") - base_url = self.parameters.get("local_base_url", "") - - if not provider: - self.dataset.finish_with_error("Choose a local model provider") - return + # load model and provider metadata + chosen_model_id = self.parameters.get("model") + available_models = {k: v for k, v in self.config.get("llm.available_models").items() if k in self.config.get("llm.enabled_models")} + if chosen_model_id not in available_models: + return self.dataset.finish_with_error(f"Model {chosen_model_id} not supported") - if provider == "lmstudio": - model = "lmstudio_model" - if not base_url: - base_url = "http://127.0.0.1:1234/v1" if not self.config.get("USING_DOCKER", False) else "http://host.docker.internal:1234/v1" - if not self.parameters.get("lmstudio_api_key"): - api_key = "lm-studio" - elif provider == "ollama": - model = self.parameters.get("ollama_model", "") - if not model: - self.dataset.finish_with_error("You need to provide a model name for Ollama (e.g. 'llama3.2')") - return - if not base_url: - base_url = "http://localhost:11434" if not self.config.get("USING_DOCKER", False) else "http://host.docker.internal:11434" - elif provider == "vllm": - model = "vllm_model" - api_key = self.parameters.get("vllm_api_key", "") - if not base_url: - base_url = "http://localhost:8000/v1" - else: - self.dataset.finish_with_error("Local provider not supported, choose either lmstudio or ollama") - return + model = available_models[chosen_model_id] + provider = self.config.get("llm.providers").get(model["provider"]) - elif modal_location == "hosted": - base_url = self.config.get("llm.server", "") - provider = self.config.get("llm.provider_type", "none").lower() - api_key = self.config.get("llm.api_key", "") - llm_auth_type = self.config.get("llm.auth_type", "") - model = self.parameters.get("hosted_llm_model", "") - if api_key and llm_auth_type: - client_kwargs = { - "headers": { - llm_auth_type: api_key - } - } - if provider == "none" or not base_url: - self.dataset.finish_with_error("4CAT LLM server not properly configured; contact the administrator") - return - else: - if not api_model: - self.dataset.finish_with_error("Select an API model or insert one manually") - return - # Models can be set manually - if api_model == "custom": - model = self.parameters.get("api_custom_model_id", "") - provider = self.parameters.get("api_custom_model_provider", "") - if not model: - self.dataset.finish_with_error("You must provide a valid API model name/ID") - return - if not provider: - self.dataset.finish_with_error("You must provide a valid API model provider") - return - else: - model_info = LLMAdapter.get_models(self.config).get(api_model, {}) - provider = model_info.get("provider") - model = api_model + if not provider: + return self.dataset.finish_with_error(f"Model provider for {chosen_model_id} not currently available.") - api_key = self.parameters.get("api_key") or self.config.get(f"api.{provider}.api_key", "") - if not api_key: - self.dataset.finish_with_error("You need to provide a valid API key") - return + if provider["type"] == "api" and not api_key: + return self.dataset.finish_with_error(f"No API key provided for model {chosen_model_id}") # Prompt validation base_prompt = self.parameters.get("prompt", "") @@ -583,14 +433,13 @@ def process(self): # Start LLM self.dataset.update_status("Connecting to LLM provider") - base_url_str = "" if not base_url else f" at base URL '{base_url}'" - self.dataset.log(f"Using LLM provider '{provider}' with model '{model}'{base_url_str}") + base_url_str = "" if not provider["url"] else f" at base URL '{provider['url']}'" + self.dataset.log(f"Using LLM provider '{provider['_id']}' with model '{model['local_id']}'{base_url_str}") try: llm = LLMAdapter( - provider=provider, + config=self.config, model=model, api_key=api_key, - base_url=base_url, temperature=temperature, max_tokens=max_tokens, client_kwargs=client_kwargs @@ -788,7 +637,7 @@ def process(self): "prompt": prompt, "temperature": temperature, "max_tokens": max_tokens, - "model": model, + "model": model["local_id"], "time_created": datetime.fromtimestamp(time_created).strftime("%Y-%m-%d %H:%M:%S"), "time_created_utc": time_created, "batch_number": "", @@ -816,7 +665,7 @@ def process(self): for output_key, output_value in annotation_output.items(): # Skip 'signature' and 'type' annotations for Google - if provider == "google" and ( + if model["provider"] == "google" and ( output_key.endswith(".signature") or output_key.endswith(".type") ): @@ -851,7 +700,7 @@ def process(self): self.dataset.update_progress(row / max_processed) # Rate limits for different providers - if provider == "mistral": + if model["provider"] == "mistral": time.sleep(1) if limit_reached: @@ -966,10 +815,9 @@ def process(self): json_schema = self.get_json_schema_for_batch(n_batched, custom_schema=json_schema_original) # `llm` becomes a RunnableSequence when used, so we'll need to reset it here llm = LLMAdapter( - provider=provider, + config=self.config, model=model, api_key=api_key, - base_url=base_url, temperature=temperature, max_tokens=max_tokens, client_kwargs=client_kwargs @@ -984,7 +832,7 @@ def process(self): batch_str = f" and {n_batched} items batched into the prompt" if use_batches else "" self.dataset.update_status(f"Generating text at row {row:,}/" - f"{max_processed:,} with {model}{batch_str}") + f"{max_processed:,} with {model['name']}{batch_str}") # Now finally generate some text! try: response = llm.generate_text( @@ -1008,15 +856,9 @@ def process(self): self.dataset.finish_with_warning(outputs, f"Not all items processed: {e}") return - # Set model name from the response for more details - if hasattr(response, "response_metadata"): - model = response.response_metadata.get("model_name", model) - if "models/" in model: - model = model.replace("models/", "") - if not response: structured_warning = " with your specified JSON schema" if structured_output else "" - warning = f"{model} could not return text{structured_warning}. Consider editing your prompt or changing settings." + warning = f"{model['name']} could not return text{structured_warning}. Consider editing your prompt or changing settings." self.dataset.finish_with_warning(outputs, warning) return @@ -1100,7 +942,7 @@ def process(self): "prompt": prompt if not use_batches else base_prompt, # Insert dataset values if not batching "temperature": temperature, "max_tokens": max_tokens, - "model": model, + "model": model["local_id"], "time_created": datetime.fromtimestamp(time_created).strftime("%Y-%m-%d %H:%M:%S"), "time_created_utc": time_created, "batch_number": n + 1 if use_batches else "", @@ -1122,7 +964,7 @@ def process(self): for output_key, output_value in annotation_output.items(): # Skip 'signature' and 'type' annotations for Google - if provider == "google" and output_key in ("extras.signature", ".type"): + if model["provider"] == "google" and output_key in ("extras.signature", ".type"): continue annotation = { @@ -1140,7 +982,7 @@ def process(self): n_batched = 0 # Rate limits for different providers - if provider == "mistral": + if model["provider"] == "mistral": time.sleep(1) # Write annotations in batches @@ -1175,7 +1017,7 @@ def process(self): # Final outputs time_end = time.time() time_progressed = str(timedelta(seconds=int(time_end - time_start))) - final_status = f"Finished, {model} generated text in {time_progressed}." + final_status = f"Finished, {model['local_id']} generated text in {time_progressed}." skipped_str = None if not skipped else f" Skipped {skipped} rows because of empty values." if skipped_str: self.dataset.finish_with_warning(i, final_status + skipped_str) @@ -1261,9 +1103,14 @@ def validate_query(query, request, config): :param config: :return: """ - if query["api_or_local"] == "api" and not query.get("api_key"): + is_external_api = query["model"].startswith("api-") + if is_external_api and not query.get("api_key"): raise QueryParametersException("You need to enter an API key when using third-party models.") + allowed_models = LLMPrompter.get_model_library(config) + if query["model"] not in chain(*[v.values() for v in allowed_models.values()]): + raise QueryParametersException(f"The '{query['model']}' model is not currently available.") + # For media archive datasets, use_media won't be present in the query is_media_archive = "use_media" not in query @@ -1277,7 +1124,7 @@ def validate_query(query, request, config): raise QueryParametersException("You need to insert column name(s) in the user prompt within brackets " "(e.g. '[body]' or '[timestamp, author]')") - if query["api_or_local"] == "api" and not query.get("frontend-confirm"): + if is_external_api and not query.get("frontend-confirm"): raise QueryNeedsExplicitConfirmationException("Your data will be sent to a third-party service for " "processing, which will share your data with them and is " "likely to incur costs. Do you want to continue?") diff --git a/processors/machine_learning/prompt_compass.py b/processors/machine_learning/prompt_compass.py index 076bd916f..4f4c7d0d8 100644 --- a/processors/machine_learning/prompt_compass.py +++ b/processors/machine_learning/prompt_compass.py @@ -1,9 +1,11 @@ """ Use a prompt from a preset list """ +import json +from itertools import chain + from backend.lib.preset import ProcessorPreset from common.lib.helpers import UserInput -from common.lib.llm import LLMAdapter from common.lib.exceptions import ( QueryParametersException, @@ -12,8 +14,6 @@ from processors.machine_learning.llm_prompter import LLMPrompter -import json - class PromptCompassRunner(ProcessorPreset): """ Run processor pipeline to feed prompts to LLM Prompter @@ -26,8 +26,8 @@ class PromptCompassRunner(ProcessorPreset): extension = "ndjson" references = [ - "This processor is an implementation of the stand-alone tool [PromptCompass](https://github.com/ErikBorra/PromptCompass) by Erik Borra.", - "See the processor options for references to the sources of each prompt in the library." + "This processor is an implementation of the stand-alone tool [PromptCompass](https://github.com/ErikBorra/PromptCompass) by Erik Borra.", + "See the processor options for references to the sources of each prompt in the library." ] @staticmethod @@ -35,12 +35,13 @@ def get_prompt_library(config): """ Get prompt library from file + :param config: Config reader :return list: List of prompts and metadata """ prompt_library_file = config.get("PATH_ROOT").joinpath("common/assets/prompt_library.json") if not prompt_library_file.exists(): return [] - + with prompt_library_file.open(encoding="utf-8") as infile: prompt_library = json.load(infile) @@ -63,25 +64,6 @@ def get_prompt_library(config): return prompt_library - @staticmethod - def get_available_models(config): - """ - Get available model providers - - Combine the list defined by the LLMAdapter with known local models. - - :param config: Configuration reader - :return dict: Models and metadata - """ - # get cached local models - models = config.get("llm.available_models", {}) - models = {} if models == [] else models - models.update({k: v for k, v in LLMAdapter.get_models(config).items() if k not in ("none", "custom")}) - - models = {k: v for k, v in models.items() if "model_card" in v} - - return models - @staticmethod def is_compatible_with(module=None, config=None): """ @@ -91,9 +73,7 @@ def is_compatible_with(module=None, config=None): :param ConfigManager|None config: Configuration reader (context-aware) :return bool: """ - models = PromptCompassRunner.get_available_models(config) - return (models - and module.is_top_dataset() + return (module.is_top_dataset() and module.get_extension() in ("csv", "ndjson")) @classmethod @@ -108,25 +88,31 @@ def get_options(cls, parent_dataset=None, config=None): :return: """ prompt_library = cls.get_prompt_library(config) - available_models = cls.get_available_models(config) + available_models = config.get("llm.available_models", {}) + enabled_model_ids = config.get("llm.enabled_models", []) + if not config.get("llm.access"): + enabled_model_ids = [_ for _ in enabled_model_ids if _.startswith("api-")] + + enabled_models = {k: v for k, v in available_models.items() if k in enabled_model_ids} options = { "model": { "type": UserInput.OPTION_CHOICE, "help": "Model to use", "tooltip": "Third-party models require an API key to run.", - "options": {("local/" if v["provider"] == "local" else f"{v['provider']}/") + k: v["name"] for k, v in available_models.items()}, - "default": sorted(list(available_models.keys()), key=lambda k: k.startswith("local"))[-1] + "options": LLMPrompter.get_model_library(config), + "default": sorted(list(enabled_models.keys()), key=lambda k: not k.startswith("api"))[-1] if enabled_models else "" }, } - for model, metadata in available_models.items(): - model_key = metadata["provider"] + "/" + model - options[f"{model_key}-info"] = { - "type": UserInput.OPTION_INFO, - "help": f"Read the [model card]({metadata['model_card']}) for {model}.", - "requires": f"model=={model_key}" - } + for model, metadata in enabled_models.items(): + if metadata.get("model_card"): + model_key = metadata["provider"] + "/" + model + options[f"{model_key}-info"] = { + "type": UserInput.OPTION_INFO, + "help": f"Read the [model card]({metadata['model_card']}) for {model}.", + "requires": f"model=={model_key}" + } options.update({ "api_key": { @@ -136,20 +122,20 @@ def get_options(cls, parent_dataset=None, config=None): "cache": True, "tooltip": "Create an API key on the LLM provider's website (e.g. https://admin.mistral.ai/organization" "/api-keys). Note that this often involves billing.", - "requires": "model!^=local" + "requires": "model^=api-" }, "hide_think": { "type": UserInput.OPTION_TOGGLE, "help": "Hide reasoning", "default": False, "tooltip": "Some models include reasoning in their output, between tags. This option " - "removes this tag and its contents from the output.", - "requires": "model^=local/deepseek" + "removes this tag and its contents from the output, if present.", }, "temperature": { "type": UserInput.OPTION_TEXT, "help": "Temperature", - "tooltip": "Between 0 and 1. Higher temperatures increase variability and may lead to strange results", + "tooltip": "Between 0 and 1. Higher temperatures increase variability and may lead to strange " + "results. Does not have an effect on all models.", "coerce_type": float, "min": 0.0, "max": 1.0, @@ -165,7 +151,7 @@ def get_options(cls, parent_dataset=None, config=None): }) for i, task in enumerate(prompt_library): - task_key = f"task-{i+1}" + task_key = f"task-{i + 1}" options[task_key] = { "type": UserInput.OPTION_TEXT_LARGE, "requires": f"task=={task_key}", @@ -227,24 +213,15 @@ def get_processor_pipeline(self): if short_name: self.dataset.update_label(f"PromptCompass ({short_name})") - chosen_model = "/".join(self.parameters.get("model").split("/")[1:]) - models = self.get_available_models(self.config) - if chosen_model not in models: - return self.dataset.finish_with_error(f"Model {self.parameters['model']} is not available, halting processor.") - - model = models[chosen_model] + if self.parameters.get("model") not in self.config.get("llm.enabled_models", []): + return self.dataset.finish_with_error( + f"Model {self.parameters['model']} is not available, halting processor.") pipeline = [ { "type": "llm-prompter", "parameters": { - "api_or_local": "local" if model["provider"] == "local" else "api", - "api_model": chosen_model if model["provider"] != "local" else "", - "api_key": self.parameters.get("api_key"), - "api_custom_model_provider": "", - "local_provider": self.config.get("llm.provider_type"), - "local_base_url": self.config.get("llm.server"), - "ollama_model": chosen_model if model["provider"] == "local" else "", + "model": self.parameters.get("model"), "prompt": self.parameters[self.parameters["task"]], "structured_output": False, "temperature": self.parameters["temperature"], @@ -260,7 +237,6 @@ def get_processor_pipeline(self): return pipeline - @staticmethod def validate_query(query, request, config): """ @@ -273,6 +249,10 @@ def validate_query(query, request, config): :param config: :return: """ + allowed_models = LLMPrompter.get_model_library(config) + if query["model"] not in chain(*[v.values() for v in allowed_models.values()]): + raise QueryParametersException(f"The '{query['model']}' model is not currently available.") + if not query["model"].startswith("local") and not query.get("api_key"): raise QueryParametersException("You need to enter an API key when using third-party models.") @@ -294,4 +274,4 @@ def map_item(item): :param item: :return: """ - return LLMPrompter.map_item(item) \ No newline at end of file + return LLMPrompter.map_item(item) diff --git a/processors/metrics/rank_attribute.py b/processors/metrics/rank_attribute.py index 2f50d623a..8e18bb2bf 100644 --- a/processors/metrics/rank_attribute.py +++ b/processors/metrics/rank_attribute.py @@ -82,11 +82,14 @@ def get_options(cls, parent_dataset=None, config=None): "type": UserInput.OPTION_CHOICE, "options": { "none": "Use column value", - "urls": "URLs", - "hostnames": "Domain names", - "level2-hostnames": "Second-level domain names (e.g. m.youtube.com -> youtube.com)", + "URL-related": { + "urls": "URLs", + "hostnames": "Domain names", + "level2-hostnames": "Second-level domain names (e.g. m.youtube.com -> youtube.com)", + }, "hashtags": "Hashtags (words starting with #)", - "emoji": "Emoji (each used emoji in the column is counted individually)" + "emoji": "Emoji (each used emoji in the column is counted individually)", + "occurrence": "Values (the number of comma-separated values in the given field)" }, "help": "Extract from column", "tooltip": "This can be used to extract more specific values from the value of the selected column(s); for " diff --git a/tests/test_modules.py b/tests/test_modules.py index 87ffc06ad..3b9a82579 100644 --- a/tests/test_modules.py +++ b/tests/test_modules.py @@ -1,3 +1,4 @@ +import traceback import pytest import time import json @@ -218,7 +219,8 @@ def test_processors(logger, fourcat_modules, mock_job, mock_job_queue, mock_data processor_class.get_options(parent_dataset=mock_dataset, config=mock_basic_config) except Exception as e: # Log the failure and add it to the failures list - logger.error(f"Processor {processor_name} failed in get_options: {e}") + trace = traceback.TracebackException.from_exception(e).stack[-1] + logger.error(f"Processor {processor_name} failed in get_options: {e} (in {trace.filename.split('/')[-1]}:{trace.lineno})") failures.append((processor_name, str(e))) # Check if processor Class has "options" attribute @@ -230,11 +232,13 @@ def test_processors(logger, fourcat_modules, mock_job, mock_job_queue, mock_data try: processor_class(logger, job=mock_job, queue=mock_job_queue, manager=None, modules=fourcat_modules) except Exception as e: - logger.error(f"Processor {processor_name} failed in process(): {e}") + trace = traceback.TracebackException.from_exception(e).stack[-1] + logger.error(f"Processor {processor_name} failed in process(): {e} (in {trace.filename.split('/')[-1]}:{trace.lineno})") failures.append((processor_name, str(e))) except Exception as e: - logger.error(f"Processor {processor_name} failed while setting up: {e}") + trace = traceback.TracebackException.from_exception(e).stack[-1] + logger.error(f"Processor {processor_name} failed while setting up: {e} (in {trace.filename.split('/')[-1]}:{trace.lineno})") failures.append((processor_name, str(e))) diff --git a/webtool/__init__.py b/webtool/__init__.py index a15729f74..ef20d3d88 100644 --- a/webtool/__init__.py +++ b/webtool/__init__.py @@ -171,6 +171,7 @@ def time_this(func): import webtool.views.views_restart # noqa: E402 import webtool.views.views_admin # noqa: E402 import webtool.views.views_extensions # noqa: E402 + import webtool.views.views_llm # noqa: E402 import webtool.views.views_user # noqa: E402 import webtool.views.views_dataset # noqa: E402 import webtool.views.views_misc # noqa: E402 @@ -182,6 +183,7 @@ def time_this(func): app.register_blueprint(webtool.views.views_restart.component) app.register_blueprint(webtool.views.views_admin.component) app.register_blueprint(webtool.views.views_extensions.component) + app.register_blueprint(webtool.views.views_llm.component) app.register_blueprint(webtool.views.views_user.component) app.register_blueprint(webtool.views.views_dataset.component) app.register_blueprint(webtool.views.views_misc.component) @@ -241,6 +243,10 @@ def get_datasource_explorer_templates(name): [FileSystemLoader(template_paths), FunctionLoader(get_datasource_explorer_templates)] ) + # enable to {% do %} tag in jinja + app.jinja_env.add_extension("jinja2.ext.do") + app.jinja_env.add_extension("jinja2.ext.debug") + # import custom jinja2 template filters # these also benefit from current_app import webtool.lib.template_filters # noqa: E402 diff --git a/webtool/lib/template_filters.py b/webtool/lib/template_filters.py index 5682c6dbc..02e41dec6 100644 --- a/webtool/lib/template_filters.py +++ b/webtool/lib/template_filters.py @@ -195,6 +195,25 @@ def _jinja2_filter_markdown(text, trim_container=False): def _jinja2_filter_isbool(value): return isinstance(value, bool) +@current_app.template_filter('propmap') +def _jinja2_filter_propmap(data, property, default=None): + """ + Select a property from a sequence of dicts + + To map `{a: b: {prop: value}}` to `{a: value}` for a given `prop`. If + `data` is a dict, preserve key:value pairs. If the property does not exist + in a sequence item, use the `default` value. + + :param data: Sequence or dict to map + :param property: Property to use for mapping + :param default: Value to use if property does not exist in item + :return: Mapped sequence or dict + """ + if type(data) is dict: + return {k: v.get(property, default) for k, v in data.items()} + else: + return [v.get(property, default) for v in data.values()] + @current_app.template_filter('json') def _jinja2_filter_json(data): return json.dumps(data) @@ -396,6 +415,18 @@ def _jinja2_filter_parameter_str(url): return params +@current_app.template_filter("hostname") +def _jinja2_filter_hostname(url: str) -> str: + """ + For a URL, return the hostname + + If no hostname is found, return the original value + + :param str url: + :return str: + """ + return ural.get_hostname(url) or url + @current_app.template_filter("explorer_css") def explorer_css(datasource, scope_class="explorer-content-container"): @@ -429,6 +460,10 @@ def explorer_css(datasource, scope_class="explorer-content-container"): def _jinja2_filter_hasattr(obj, attribute): return hasattr(obj, attribute) +@current_app.template_filter('debug') +def _jinja2_filter_debug(value): + print(value) + @current_app.context_processor def inject_now(): def uniqid(): diff --git a/webtool/static/css/stylesheet.css b/webtool/static/css/stylesheet.css index fecc5cfd7..3e2445911 100644 --- a/webtool/static/css/stylesheet.css +++ b/webtool/static/css/stylesheet.css @@ -330,6 +330,48 @@ article.small .form-element select[multiple] { top: -0.4em; } +.form-multi-option-wrapper, .form-multi-option-header { + margin: 0 1em; +} + +.form-multi-option-wrapper { + margin: 1em; +} + +.form-multi-option-header { + +} + +.form-multi-option-wrapper li { + border-left: 2px solid var(--accent); + margin-bottom: 4px; + position: relative; + margin-left: 20px; +} + +.form-multi-option-wrapper .action-button:not(.hidden) { + position: absolute; + top: 0.5em; + left: 0.5em; + padding: 0 0.4em; +} + +.form-multi-option-wrapper .action-button.delete-button { + left: 2.5em; +} + +.form-multi-option-wrapper li::before { + content: ''; + position: absolute; + top: 0; + left: -17px; + border-left: 2px solid var(--contrast-dark); + border-bottom: 2px solid var(--contrast-dark); + width: 15px; + height: 15px; + +} + .data-overview-link { display: block; font-size: 13px; @@ -441,6 +483,7 @@ h2 .inline-search input, h3 .inline-search input { overflow: hidden; box-sizing: border-box; border-width: 0; + padding: 0; } .ellipsis { font-weight: bold; diff --git a/webtool/static/js/fourcat.js b/webtool/static/js/fourcat.js index e84dc8374..c4fd3762c 100644 --- a/webtool/static/js/fourcat.js +++ b/webtool/static/js/fourcat.js @@ -6,6 +6,7 @@ async function load() { await import("./modules/dataset-page.js"), await import("./modules/dynamic-container.js"), await import("./modules/multichoice.js"), + await import("./modules/multi-form.js"), await import("./modules/popup.js"), await import("./modules/run-processor.js"), await import("./modules/tooltip.js"), diff --git a/webtool/static/js/modules/multi-form.js b/webtool/static/js/modules/multi-form.js new file mode 100644 index 000000000..9ca67bd93 --- /dev/null +++ b/webtool/static/js/modules/multi-form.js @@ -0,0 +1,97 @@ +import {find_parent, reset_form_elements} from "./util.js"; + +export const multiForm = { + init: function () { + const actions = document.createElement('div'); + actions.className = 'multi-form-actions'; + + const add_button = document.createElement('button'); + add_button.className = 'add-button action-button'; + add_button.textContent = '+'; + add_button.addEventListener('click', multiForm.add_item); + + const delete_button = document.createElement('button'); + delete_button.className = 'delete-button action-button'; + delete_button.textContent = 'x'; + delete_button.addEventListener('click', multiForm.delete_item); + + actions.appendChild(add_button); + actions.appendChild(delete_button); + + document.querySelectorAll('.form-multi-option-wrapper').forEach(function (el) { + el.addEventListener('click', multiForm.handle_click); + el.querySelectorAll('li').forEach(function (el) { + const el_actions = actions.cloneNode(true); + el.appendChild(el_actions); + }); + multiForm.renumber(el); + }); + + }, + + handle_click: function (e) { + if(!(e.target.classList.contains('add-button') || e.target.classList.contains('delete-button'))) { + return true; + } + e.preventDefault(); + const wrapper = find_parent(e.target, 'ol'); + if(e.target.classList.contains('delete-button')){ + multiForm.delete_item(e); + } else { + multiForm.add_item(e); + } + multiForm.renumber(wrapper); + }, + + add_item: function (e) { + const ol = find_parent(e.target, 'ol.form-multi-option-wrapper'); + const last_li = find_parent(e.target, 'li'); + const clone = last_li.cloneNode(true); + reset_form_elements(clone) + ol.appendChild(clone); + }, + + delete_item: function (e) { + if(!confirm("Are you sure?")){ + return false; + } + const li = find_parent(e.target, 'li'); + const ol = find_parent(e.target, 'ol.form-multi-option-wrapper'); + + if(ol.querySelectorAll('li').length > 1) { + li.parentNode.removeChild(li); + } else { + // last element; do not remove, but reset to default + reset_form_elements(li); + } + }, + + renumber: function(parent) { + let index = 1; + parent.querySelectorAll('li').forEach(function (el) { + el.setAttribute('data-multi-option-index', index); + el.querySelector('.delete-button').classList.remove('hidden'); + multiForm.renumber_items(el, index); + index += 1; + }) + parent.querySelector('li:last-child .delete-button').classList.add('hidden'); + }, + + renumber_items: function(parent, index) { + const attributes = ['for', 'id', 'name']; + parent.childNodes.forEach(child => { + if (!(child instanceof HTMLElement)) { + return; + } + for(const attribute of attributes) { + if(child.hasAttribute(attribute)) { + child.setAttribute(attribute, child.getAttribute(attribute).replace(/-[0-9+]-/, `-${index}-`)); + } + } + multiForm.renumber_items(child, index); + }); + } + +} + +export const module = multiForm; \ No newline at end of file diff --git a/webtool/static/js/modules/ui-helpers.js b/webtool/static/js/modules/ui-helpers.js index 8b0ef097f..4c9b31ebe 100644 --- a/webtool/static/js/modules/ui-helpers.js +++ b/webtool/static/js/modules/ui-helpers.js @@ -1,5 +1,5 @@ import {popup} from "./popup.js"; -import {find_parent} from "./util.js"; +import {find_parent, hsv2hsl} from "./util.js"; export const ui_helpers = { /** diff --git a/webtool/static/js/modules/util.js b/webtool/static/js/modules/util.js index 6f17d4ec9..1839c13f1 100644 --- a/webtool/static/js/modules/util.js +++ b/webtool/static/js/modules/util.js @@ -219,4 +219,26 @@ export function hsv2rgb(h, s, v) { } return [r * 255, g * 255, b * 255]; +} + +/** + * Recursively reset form elements to default value + * + * We don't use form.reset() for two reasons: + * - We may want to reset a subset of a form + * - The *current* value of an element may not be the designated *default* + * value of the element + * + * @param parent Parent node to recursively reset child nodes of + */ +export function reset_form_elements(parent) { + parent.childNodes.forEach(child => { + if(child instanceof HTMLElement) { + if (child.hasAttribute('data-default')) { + child.value = child.getAttribute('data-default'); + } else { + reset_form_elements(child); + } + } + }) } \ No newline at end of file diff --git a/webtool/templates/components/datasource-option.html b/webtool/templates/components/datasource-option.html index 2a3cbb4a9..cd8694fec 100644 --- a/webtool/templates/components/datasource-option.html +++ b/webtool/templates/components/datasource-option.html @@ -5,6 +5,33 @@
{{ settings.help|markdown|safe }}
{% elif settings.type in ["annotation", "annotations"] %} {# pass - a datasource should never need to annotate itself; this is for processor options only #} + {% elif settings.type == "multi_option" %} +
+

{{ settings.help }}

+
+
    + {# always include an empty item #} + {% set empty_item = settings.options|propmap("default") %} + {% if settings['default'] is mapping %} + {% set current_value = settings.default.values()|list %} + {% else %} + {% set current_value = settings.default %} + {% endif %} + {% do current_value.append(empty_item) %} + {% for item in current_value %} + {% set outerloop = loop %} + {% set last_index = outerloop.index %} +
  1. + {% for sub_option, sub_settings in settings.options.items() %} + {% do sub_settings.update({"original_default": sub_settings.default, "default": item[sub_option]}) %} + {% set sub_option = option ~ "-" ~ outerloop.index ~ "-" ~ sub_option %} + {% with option=sub_option, settings=sub_settings %} + {% include "components/datasource-option.html" %} + {% endwith %} + {% endfor %} +
  2. + {% endfor %} +
{% else %}
@@ -21,7 +48,7 @@

{% endif %} {% elif settings.type == "string" %} - 0 %} step="{{ settings.min }}"{% elif settings.min is defined and settings.min is not none and settings.min|float == 0 %} step="any"{% endif %} type="{% if (settings.min is defined and settings.min is not none) or (settings.max is defined and settings.max is not none) %}number{% elif settings.password %}password{% else %}text{% endif %}" value="{{ settings.default }}"> + 0 %} step="{{ settings.min }}"{% elif settings.min is defined and settings.min is not none and settings.min|float == 0 %} step="any"{% endif %} type="{% if (settings.min is defined and settings.min is not none) or (settings.max is defined and settings.max is not none) %}number{% elif settings.password %}password{% else %}text{% endif %}" value="{{ settings.default }}" data-default="{{ settings.original_default }}"> {% if "tooltip" in settings %} {% endif %} {% elif settings.type == "date" %} - + {% if "tooltip" in settings %} @@ -55,7 +82,7 @@ {% endif %} {% elif settings.type in ("json", "textarea") %} + placeholder="{{ settings.tooltip }}" data-default="{{ settings.original_default }}">{{ settings.default }} {% if "tooltip" in settings %} {% endif %} {% elif settings.type == "choice" %} - {% for value, label in settings.options.items() %} {% endfor %} @@ -105,7 +132,7 @@ {% elif settings.type == "hue" %}
{% set hue_id = uniqid() %} - diff --git a/webtool/templates/components/processor-option.html b/webtool/templates/components/processor-option.html index 1fbdd728e..dbe92ab27 100644 --- a/webtool/templates/components/processor-option.html +++ b/webtool/templates/components/processor-option.html @@ -22,7 +22,16 @@ {% elif option_settings.type == "choice" %} {% elif option_settings.type in ("multi", "annotations") %} diff --git a/webtool/templates/controlpanel/layout.html b/webtool/templates/controlpanel/layout.html index 33c387421..26d03df20 100644 --- a/webtool/templates/controlpanel/layout.html +++ b/webtool/templates/controlpanel/layout.html @@ -18,6 +18,8 @@ Jobs{% endif %} {% if __user_config("privileges.admin.can_restart") %} Extensions{% endif %} + {% if __user_config("privileges.admin.can_manage_settings") and __user_config("llm.access") %} + LLMs & Providers{% endif %} {% if __user_config("privileges.admin.can_manage_users") %} View logs{% endif %} {% if __user_config("privileges.admin.can_manipulate_all_datasets") %} diff --git a/webtool/templates/controlpanel/llm-server.html b/webtool/templates/controlpanel/llm-server.html new file mode 100644 index 000000000..3dc5b5034 --- /dev/null +++ b/webtool/templates/controlpanel/llm-server.html @@ -0,0 +1,175 @@ +{% extends "controlpanel/layout.html" %} + +{% block title %}LLM Server{% endblock %} +{% block body_class %}plain-page admin {{ body_class }}{% endblock %} +{% block subbreadcrumbs %}{% set navigation.sub = "llm" %}{% endblock %} + +{% block body %} +
+
+

LLM Providers

+ + {% if flashes %} +
+ {% for notice in flashes %} +

{{ notice|safe }}

+ {% endfor %} +
+ {% endif %} + +

+ You can add and configure LLM providers via the 'LLM providers' tab on the Settings page. +

+ + {# Server status #} +
+ + + + + + {% if not providers %} + + + + {% else %} + {% for provider_id, provider in providers.items() %} + + + + + {% endfor %} + {% endif %} +
ServerStatus
+ No LLM providers configured. +
{{ provider.type }} {{ provider.name }} + {% if provider.status == "online" %} + Online + {% else %} + {{ provider.status }} + {% endif %} +
+
+ + {# Available models #} +

+ Available Models +
+ + +
+

+ + {% if update_running %} +

+ Models are currently being refreshed or installed - reload the page to see up-to-date list. +

+ {% endif %} + +
+ + + + + + + + + + + + + + {% if available_models %} + {% for model_id, model in available_models.items() %} + + + + + + + {% endfor %} + {% else %} + + + + {% endif %} +
NameProvider/modelCapabilitiesStatus
+ {% if model.model_card %} + {{ model.name }} + {% else %} + {{ model.name }} + {% endif %} + + {{ providers[model.provider]['type'] }}/{{ providers[model.provider].url|hostname }}
+ {{ model.local_id }} +
+ {{ model.supported_media_types | join(", ") }} + + {% if model_id in enabled_models %} +
+ + + +
+ {% else %} +
+ + + +
+ {% endif %} + {% if providers[model.provider].type == "ollama" %} +
+ + + +
+ {% endif %} +
+ {% if providers %} + No models found. Use the Refresh button to fetch available models, or + install a new model below with compatible providers. + {% else %} + No LLM providers configured. + {% endif %} +
+
+
+ + {# Pull a new model, if an ollama server is configured #} + {% if providers.values()|selectattr("type", "equalto", "ollama")|list %} +
+

Install new LLMs

+

Enter a model name (e.g. llama3:8b) to make it available via the configured provider. For + Ollama, model names can be found in the + model library.

+

Pulling large models may take several minutes; the job runs in the background. Note that 4CAT cannot install + models for all LLM providers; if your provider is not listed below, it may not be able to add additional + models to it, or you may need to use an external tool to add new models.

+
+ +
+ + +
+
+ + +
+
+ +
+
+
+ {% endif %} +
+{% endblock %} diff --git a/webtool/views/views_admin.py b/webtool/views/views_admin.py index 3b425bcef..301dc7138 100644 --- a/webtool/views/views_admin.py +++ b/webtool/views/views_admin.py @@ -666,9 +666,10 @@ def manipulate_settings(): global_value = global_settings.get(option, definition.get(option, {}).get("default")) is_changed = tag and global_value != tag_value - default = all_settings.get(option, definition.get(option, {}).get("default")) + default = definition.get(option, {}).get("default") + current_value = all_settings.get(option, definition.get(option, {}).get("default")) if definition.get(option, {}).get("type") == UserInput.OPTION_TEXT_JSON: - default = json.dumps(default) + current_value = json.dumps(current_value) # this is used for organising things in the UI option_owner = option.split(".")[0] @@ -694,7 +695,8 @@ def manipulate_settings(): "default": all_settings.get(option) }), "submenu": submenu, - "default": default, + "default": current_value, # override default so this is the value displayed in the web UI + "original_default": default, # but also save the actual default "tabname": tabname, "is_changed": is_changed } diff --git a/webtool/views/views_llm.py b/webtool/views/views_llm.py new file mode 100644 index 000000000..2c02e603c --- /dev/null +++ b/webtool/views/views_llm.py @@ -0,0 +1,103 @@ +""" +4CAT views for LLM server management +""" +import time + +from flask import Blueprint, render_template, flash, get_flashed_messages, redirect, url_for, request, g +from flask_login import login_required + +from webtool.lib.helpers import setting_required, error +from common.lib.llm.llm_client import LLMProviderClient + +component = Blueprint("llm", __name__) + + +@component.route("/admin/llm/", methods=["GET", "POST"]) +@login_required +@setting_required("privileges.admin.can_manage_settings") +def llm_panel(): + """ + LLM Server management panel + + Shows server status, available models, and controls to pull/delete/refresh + models. Pull, delete, and refresh operations are queued as LLMProviderManager + jobs rather than run synchronously. + """ + if not g.config.get("llm.access"): + return error(403, message="LLM access is not enabled on this server.") + + providers = g.config.get("llm.providers", []) + + if request.method == "POST": + action = request.form.get("action", "").strip() + provider = request.form.get("provider", "").strip() + details = {"provider": provider} if provider else {} + + if action == "refresh": + # Queue a one-time manual refresh job; use a timestamp-based remote_id + # so it is always accepted even if a periodic job already exists. + g.queue.add_job("manage-llm", details={**details, "task": "refresh"}, + remote_id=f"manage-llm-manual-{int(time.time())}") + flash("Model refresh job queued.") + + elif action == "pull": + model_name = request.form.get("model_name", "").strip() + if model_name: + g.queue.add_job("manage-llm", details={**details, "task": "pull"}, remote_id=model_name) + flash(f"Pull job queued for model '{model_name}'.") + else: + flash("Please provide a model name to pull.") + + elif action == "delete": + model_name = request.form.get("model_name", "").strip() + if model_name: + g.queue.add_job("manage-llm", details={**details, "task": "delete"}, remote_id=model_name) + flash(f"Delete job queued for model '{model_name}'.") + + elif action == "enable": + model_name = request.form.get("model_name", "").strip() + if model_name: + enabled_models = list(g.config.get("llm.enabled_models", []) or []) + if model_name not in enabled_models: + enabled_models.append(model_name) + g.config.set("llm.enabled_models", enabled_models) + flash(f"Model '{model_name}' enabled.") + + elif action == "disable": + model_name = request.form.get("model_name", "").strip() + if model_name: + enabled_models = list(g.config.get("llm.enabled_models", []) or []) + if model_name in enabled_models: + enabled_models.remove(model_name) + g.config.set("llm.enabled_models", enabled_models) + flash(f"Model '{model_name}' disabled.") + + return redirect(url_for("llm.llm_panel")) + + # --- GET: render panel --- + + for provider_id, provider in providers.items(): + client = LLMProviderClient.get_client(g.config, provider) + + if provider_status := client.get_status(): + server_status = "online" if provider_status == 200 else f"error (HTTP {provider_status})" + else: + server_status = "unreachable" + + providers[provider_id]["status"] = server_status + + available_models = g.config.get("llm.available_models", {}) or {} + enabled_models = list(g.config.get("llm.enabled_models", []) or []) + + update_running = bool([ + job for job in g.queue.get_all_jobs("manage-llm") if not job.data["interval"] + ]) + + return render_template( + "controlpanel/llm-server.html", + flashes=get_flashed_messages(), + providers=providers, + available_models=available_models, + enabled_models=enabled_models, + update_running=update_running, + )