diff --git a/.jshintrc b/.jshintrc
new file mode 100644
index 000000000..7a6412586
--- /dev/null
+++ b/.jshintrc
@@ -0,0 +1,8 @@
+{
+ "esversion": 11,
+ "undef": true,
+ "globals": {
+ "$": false,
+ "document": false,
+ }
+}
\ No newline at end of file
diff --git a/VERSION b/VERSION
index 006ffd9f6..94144784e 100644
--- a/VERSION
+++ b/VERSION
@@ -1,4 +1,4 @@
-1.54
+1.55
This file should not be modified. It is used by 4CAT to determine whether it
needs to run migration scripts to e.g. update the database structure to a more
diff --git a/backend/workers/llm_manager.py b/backend/workers/llm_manager.py
new file mode 100644
index 000000000..1b3c62533
--- /dev/null
+++ b/backend/workers/llm_manager.py
@@ -0,0 +1,83 @@
+"""
+Manage LLM models
+"""
+from backend.lib.worker import BasicWorker
+from common.lib.llm.llm_client import LLMProviderClient
+
+class LLMProviderManager(BasicWorker):
+ """
+ Manages LLM models
+
+ Periodically refreshes the list of available models from an LLM provider.
+ Can also pull or delete models on demand when queued with a specific task.
+
+ Job details:
+ - task: "refresh" (default), "pull", or "delete"
+ - provider: the URL of the LLM provider, as configured in the
+ llm.providers setting. if not given, run on all providers
+
+ Job remote_id:
+ - For refresh: "manage-llm-refresh" (periodic) or "manage-llm-manual" (on-demand)
+ - For pull/delete: the model name to pull or delete
+ """
+ type = "manage-llm"
+ max_workers = 1
+ client = None
+
+ @classmethod
+ def ensure_job(cls, config=None):
+ """
+ Ensure the daily refresh job is always scheduled
+
+ :return: Job parameters for the worker
+ """
+ return {"remote_id": "manage-llm-refresh", "interval": 86400}
+
+ def work(self):
+ task = self.job.details.get("task", "refresh") if self.job.details else "refresh"
+ provider = self.job.details.get("provider", "") if self.job.details else None
+ model_name = self.job.data["remote_id"]
+ available_models = None
+
+ for provider_id, provider_config in self.config.get("llm.providers", {}).items():
+ if provider and provider != provider_id:
+ continue
+
+ try:
+ client = LLMProviderClient.get_client(self.config, provider_config)
+ except ValueError:
+ self.log.debug(f"{self.__class__.__name__}: invalid provider type: {provider_config['type']}, skipping")
+ continue
+
+ # note that technically it is possible to pull/delete a model on
+ # multiple providers at once (if a model_name is defined but no
+ # provider). may not be a problem? may be useful one day?
+ success = False
+ if task == "pull" and hasattr(client, "pull_model"):
+ success = client.pull_model(model_name)
+
+ elif task == "delete" and hasattr(client, "delete_model"):
+ success = client.delete_model(model_name)
+
+ if success or task == "refresh":
+ # refresh models after pulling/deleting, or when asked to
+ if available_models is None:
+ available_models = {}
+
+ for model in client.list_models():
+ model = client.build_model_entry(model)
+ available_models[model["id"]] = model
+
+ self.log.debug(f"{self.__class__.__name__}: ran task '{task}' (model name: {model_name or 'N/A'})")
+
+ elif success is None:
+ self.log.warning(f"{self.__class__.__name__}: task '{task}' unknown or not supported by client")
+ else:
+ self.log.warning(f"{self.__class__.__name__}: task '{task}' failed for model {model_name}")
+
+ if available_models is not None:
+ enabled_and_available = set(available_models.keys()) & set(self.config.get("llm.enabled_models", []))
+ self.config.set("llm.available_models", available_models)
+ self.config.set("llm.enabled_models", list(enabled_and_available))
+
+ self.job.finish()
diff --git a/backend/workers/refresh_items.py b/backend/workers/refresh_items.py
index 8a56c213f..7ab11645d 100644
--- a/backend/workers/refresh_items.py
+++ b/backend/workers/refresh_items.py
@@ -1,78 +1,26 @@
"""
Refresh items
"""
-import json
-
-import requests
-
from backend.lib.worker import BasicWorker
class ItemUpdater(BasicWorker):
"""
Refresh 4CAT items
- Refreshes settings that are dependent on external factors
+ Refreshes settings that are dependent on external factors.
+ LLM model refreshing is handled by the OllamaManager worker.
"""
type = "refresh-items"
max_workers = 1
- @classmethod
- def ensure_job(cls, config=None):
- """
- Ensure that the refresher is always running
-
- This is used to ensure that the refresher is always running, and if it is
- not, it will be started by the WorkerManager.
-
- :return: Job parameters for the worker
- """
- return {"remote_id": "refresh-items", "interval": 60}
+ # ensure_job is intentionally disabled: this worker currently does nothing
+ # and would only create unnecessary job queue churn. Re-enable when work()
+ # has actual tasks to perform.
+ # @classmethod
+ # def ensure_job(cls, config=None):
+ # return {"remote_id": "refresh-items", "interval": 60}
def work(self):
- # Refresh items
- self.refresh_settings()
-
+ # Placeholder – no tasks implemented yet.
self.job.finish()
-
- def refresh_settings(self):
- """
- Refresh settings
- """
- # LLM server settings
- llm_provider = self.config.get("llm.provider_type", "none").lower()
- llm_server = self.config.get("llm.server", "")
-
- # For now we only support the Ollama API
- if llm_provider == "ollama" and llm_server:
- headers = {"Content-Type": "application/json"}
- llm_api_key = self.config.get("llm.api_key", "")
- llm_auth_type = self.config.get("llm.auth_type", "")
- if llm_api_key and llm_auth_type:
- headers[llm_auth_type] = llm_api_key
-
- available_models = {}
- try:
- response = requests.get(f"{llm_server}/api/tags", headers=headers, timeout=10)
- if response.status_code == 200:
- settings = response.json()
- for model in settings.get("models", []):
- model = model["name"]
- try:
- model_metadata = requests.post(f"{llm_server}/api/show", headers=headers, json={"model": model}, timeout=10).json()
- available_models[model] = {
- "name": f"{model_metadata['model_info'].get('general.basename', model)} ({model_metadata['details']['parameter_size']} parameters)",
- "model_card": f"https://ollama.com/library/{model}",
- "provider": "local"
- }
-
- except (requests.RequestException, json.JSONDecodeError, KeyError) as e:
- self.log.debug(f"Could not get metadata for model {model} from Ollama - skipping (error: {e})")
-
- self.config.set("llm.available_models", available_models)
- self.log.debug("Refreshed LLM server settings cache")
- else:
- self.log.warning(f"Could not refresh LLM server settings cache - server returned status code {response.status_code}")
-
- except requests.RequestException as e:
- self.log.warning(f"Could not refresh LLM server settings cache - request error: {str(e)}")
\ No newline at end of file
diff --git a/common/assets/llms.json b/common/assets/llms.json
index 835dbaa09..c17351488 100644
--- a/common/assets/llms.json
+++ b/common/assets/llms.json
@@ -1,128 +1,140 @@
-{
- "none": {
- "name": "",
- "model_card": "",
- "provider": "",
- "default": true
- },
- "custom": {
- "name": "[custom]",
- "model_card": "",
- "provider": ""
- },
- "gpt-5.4": {
- "name": "[OpenAI] GPT-5.4",
- "model_card": "https://platform.openai.com/docs/models/gpt-5.4",
- "provider": "openai"
- },
- "gpt-5-mini": {
- "name": "[OpenAI] GPT-5 mini",
- "model_card": "https://platform.openai.com/docs/models/gpt-5-mini",
- "provider": "openai"
- },
- "gpt-5-nano": {
- "name": "[OpenAI] GPT-5 nano",
- "model_card": "https://platform.openai.com/docs/models/gpt-5-nano",
- "provider": "openai"
- },
- "gpt-5.4-pro": {
- "name": "[OpenAI] GPT-5.4 Pro",
- "model_card": "https://platform.openai.com/docs/models/gpt-5.4-pro",
- "provider": "openai"
- },
- "gpt-4.1-mini": {
- "name": "[OpenAI] GPT-4.1 mini",
- "model_card": "https://platform.openai.com/docs/models/gpt-4.1-mini",
- "provider": "openai"
- },
- "gpt-4.1-nano": {
- "name": "[OpenAI] GPT-4.1 nano",
- "model_card": "https://platform.openai.com/docs/models/gpt-4.1-nano",
- "provider": "openai"
- },
- "gpt-4.1": {
- "name": "[OpenAI] GPT-4.1",
- "model_card": "https://platform.openai.com/docs/models/gpt-4.1",
- "provider": "openai"
- },
- "gpt-4o-mini": {
- "name": "[OpenAI] GPT-4o mini",
- "model_card": "https://platform.openai.com/docs/models/gpt-4o-mini",
- "provider": "openai"
- },
- "gpt-4o": {
- "name": "[OpenAI] GPT-4o",
- "model_card": "https://platform.openai.com/docs/models/gpt-4o",
- "provider": "openai"
- },
- "gemini-3.1-pro-preview": {
- "name": "[Google] Gemini 3.1 Pro",
- "model_card": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-1-pro",
- "provider": "google"
- },
- "gemini-3-flash-preview": {
- "name": "[Google] Gemini 3 Flash",
- "model_card": "https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-flash",
- "provider": "google"
- },
- "gemini-3.1-flash-lite-preview": {
- "name": "[Google] Gemini 3.1 Flash Lite",
- "provider": "google",
- "model_card": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-1-flash-lite"
- },
- "claude-opus-4-6": {
- "name": "[Anthropic] Claude Opus 4.6 (latest)",
- "model_card": "https://www.anthropic.com/claude/opus",
- "provider": "anthropic"
- },
- "claude-sonnet-4-6": {
- "name": "[Anthropic] Claude Sonnet 4.6 (latest)",
- "model_card": "https://www.anthropic.com/claude/sonnet",
- "provider": "anthropic"
- },
- "claude-4-5-haiku": {
- "name": "[Anthropic] Claude 4.5 Haiku (latest)",
- "model_card": "https://www.anthropic.com/claude/haiku",
- "provider": "anthropic"
- },
- "magistral-small-2509": {
- "name": "[Mistral] Magistral Small 1.2 (25.09)",
- "model_card": "https://docs.mistral.ai/models/magistral-small-1-2-25-09",
- "provider": "mistral"
- },
- "magistral-medium-2509": {
- "name": "[Mistral] Magistral Medium 1.2 (25.09)",
- "model_card": "https://docs.mistral.ai/models/magistral-medium-1-2-25-09",
- "provider": "mistral"
- },
- "mistral-small-2506": {
- "name": "[Mistral] Mistral Small 3.2 (25.06)",
- "model_card": "https://docs.mistral.ai/models/mistral-small-3-2-25-06",
- "provider": "mistral"
- },
- "mistral-medium-2508": {
- "name": "[Mistral] Mistral Medium 3.1 (25.08)",
- "model_card": "mistral-medium-2508",
- "provider": "mistral"
- },
- "mistral-large-2512": {
- "name": "[Mistral] Mistral Large 3 (25.12)",
- "model_card": "https://docs.mistral.ai/models/mistral-large-3-25-12",
- "provider": "mistral"
- },
- "open-mistral-nemo-2407": {
- "name": "[Mistral] Mistral Nemo 12B",
- "model_card": "https://docs.mistral.ai/models/mistral-nemo-12b-24-07",
- "provider": "mistral"
- },
- "deepseek-chat": {
- "name": "[DeepSeek] DeepSeek latest (non-reasoning)",
- "model_card": "https://api-docs.deepseek.com/quick_start/pricing",
- "provider": "deepseek"
- },
- "deepseek-reasoner": {
- "name": "[DeepSeek] DeepSeek latest (reasoning)",
- "model_card": "https://api-docs.deepseek.com/quick_start/pricing",
- "provider": "deepseek"
- }
-}
\ No newline at end of file
+[
+ {
+ "model": "gpt-5.4",
+ "name": "[OpenAI] GPT-5.4",
+ "model_card": "https://platform.openai.com/docs/models/gpt-5.4",
+ "provider": "openai"
+ },
+ {
+ "model": "gpt-5-mini",
+ "name": "[OpenAI] GPT-5 mini",
+ "model_card": "https://platform.openai.com/docs/models/gpt-5-mini",
+ "provider": "openai"
+ },
+ {
+ "model": "gpt-5-nano",
+ "name": "[OpenAI] GPT-5 nano",
+ "model_card": "https://platform.openai.com/docs/models/gpt-5-nano",
+ "provider": "openai"
+ },
+ {
+ "model": "gpt-5.4-pro",
+ "name": "[OpenAI] GPT-5.4 Pro",
+ "model_card": "https://platform.openai.com/docs/models/gpt-5.4-pro",
+ "provider": "openai"
+ },
+ {
+ "model": "gpt-4.1-mini",
+ "name": "[OpenAI] GPT-4.1 mini",
+ "model_card": "https://platform.openai.com/docs/models/gpt-4.1-mini",
+ "provider": "openai"
+ },
+ {
+ "model": "gpt-4.1-nano",
+ "name": "[OpenAI] GPT-4.1 nano",
+ "model_card": "https://platform.openai.com/docs/models/gpt-4.1-nano",
+ "provider": "openai"
+ },
+ {
+ "model": "gpt-4.1",
+ "name": "[OpenAI] GPT-4.1",
+ "model_card": "https://platform.openai.com/docs/models/gpt-4.1",
+ "provider": "openai"
+ },
+ {
+ "model": "gpt-4o-mini",
+ "name": "[OpenAI] GPT-4o mini",
+ "model_card": "https://platform.openai.com/docs/models/gpt-4o-mini",
+ "provider": "openai"
+ },
+ {
+ "model": "gpt-4o",
+ "name": "[OpenAI] GPT-4o",
+ "model_card": "https://platform.openai.com/docs/models/gpt-4o",
+ "provider": "openai"
+ },
+ {
+ "model": "gemini-3.1-pro-preview",
+ "name": "[Google] Gemini 3.1 Pro",
+ "model_card": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-1-pro",
+ "provider": "google"
+ },
+ {
+ "model": "gemini-3-flash-preview",
+ "name": "[Google] Gemini 3 Flash",
+ "model_card": "https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-flash",
+ "provider": "google"
+ },
+ {
+ "model": "gemini-3.1-flash-lite-preview",
+ "name": "[Google] Gemini 3.1 Flash Lite",
+ "provider": "google",
+ "model_card": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-1-flash-lite"
+ },
+ {
+ "model": "claude-opus-4-6",
+ "name": "[Anthropic] Claude Opus 4.6 (latest)",
+ "model_card": "https://www.anthropic.com/claude/opus",
+ "provider": "anthropic"
+ },
+ {
+ "model": "claude-sonnet-4-6",
+ "name": "[Anthropic] Claude Sonnet 4.6 (latest)",
+ "model_card": "https://www.anthropic.com/claude/sonnet",
+ "provider": "anthropic"
+ },
+ {
+ "model": "claude-4-5-haiku",
+ "name": "[Anthropic] Claude 4.5 Haiku (latest)",
+ "model_card": "https://www.anthropic.com/claude/haiku",
+ "provider": "anthropic"
+ },
+ {
+ "model": "magistral-small-2509",
+ "name": "[Mistral] Magistral Small 1.2 (25.09)",
+ "model_card": "https://docs.mistral.ai/models/magistral-small-1-2-25-09",
+ "provider": "mistral"
+ },
+ {
+ "model": "magistral-medium-2509",
+ "name": "[Mistral] Magistral Medium 1.2 (25.09)",
+ "model_card": "https://docs.mistral.ai/models/magistral-medium-1-2-25-09",
+ "provider": "mistral"
+ },
+ {
+ "model": "mistral-small-2506",
+ "name": "[Mistral] Mistral Small 3.2 (25.06)",
+ "model_card": "https://docs.mistral.ai/models/mistral-small-3-2-25-06",
+ "provider": "mistral"
+ },
+ {
+ "model": "mistral-medium-2508",
+ "name": "[Mistral] Mistral Medium 3.1 (25.08)",
+ "model_card": "mistral-medium-2508",
+ "provider": "mistral"
+ },
+ {
+ "model": "mistral-large-2512",
+ "name": "[Mistral] Mistral Large 3 (25.12)",
+ "model_card": "https://docs.mistral.ai/models/mistral-large-3-25-12",
+ "provider": "mistral"
+ },
+ {
+ "model": "open-mistral-nemo-2407",
+ "name": "[Mistral] Mistral Nemo 12B",
+ "model_card": "https://docs.mistral.ai/models/mistral-nemo-12b-24-07",
+ "provider": "mistral"
+ },
+ {
+ "model": "deepseek-chat",
+ "name": "[DeepSeek] DeepSeek latest (non-reasoning)",
+ "model_card": "https://api-docs.deepseek.com/quick_start/pricing",
+ "provider": "deepseek"
+ },
+ {
+ "model": "deepseek-reasoner",
+ "name": "[DeepSeek] DeepSeek latest (reasoning)",
+ "model_card": "https://api-docs.deepseek.com/quick_start/pricing",
+ "provider": "deepseek"
+ }
+]
\ No newline at end of file
diff --git a/common/lib/config_definition.py b/common/lib/config_definition.py
index aef363e04..0635a4c97 100644
--- a/common/lib/config_definition.py
+++ b/common/lib/config_definition.py
@@ -9,6 +9,7 @@
"""
from common.lib.user_input import UserInput
+import re
config_definition = {
"datasources.intro": {
@@ -575,61 +576,87 @@
# allows 4CAT LLM processors to connect to a local or remote LLM server
"llm.intro": {
"type": UserInput.OPTION_INFO,
- "help": "4CAT LLM processors allow users to utilize common APIs (e.g. OpenAI, Google, Anthropic) as well as connect "
- "to local or remote LLM servers. You can also set up your own LLM server using open source software such as "
- "[Ollama](https://ollama.com/) and connect 4CAT to it using the settings below for your users."
- },
- "llm.host_name": {
- "type": UserInput.OPTION_TEXT,
- "default": "4CAT LLM Server",
- "help": "Name of LLM Server in UI",
- "tooltip": "The name that will be shown to users in the interface when selecting an LLM server (or API or custom).",
- "global": True
- },
- "llm.provider_type": {
- "type": UserInput.OPTION_CHOICE,
- "help": "LLM Provider Type",
- "default": "none",
- "options": {
- "ollama": "Ollama",
- "none": "None",
+ "help": "4CAT LLM processors allow users to utilize common APIs (e.g. OpenAI, Google, Anthropic) as well as "
+ "connect to local or remote LLM servers. You can also set up your own LLM server using open source "
+ "software such as [Ollama](https://ollama.com/) and connect 4CAT to it using the settings below for "
+ "your users. After configuring providers you can enable and disable available models via the 'LLMs & "
+ "Providers' page in the Control Panel."
+ },
+ "llm.providers": {
+ "type": UserInput.OPTION_MULTI_OPTION,
+ "default": {
+ "thirdparty-models": {
+ "name": "Third-party APIs (OpenAI, Google, Claude, Mistral, etc)",
+ "type": "api",
+ "url": "",
+ "auth_header": "",
+ "auth_key": ""
+ }
},
"global": True,
- },
- "llm.server": {
- "type": UserInput.OPTION_TEXT,
- "default": "",
- "help": "LLM Server URL",
- "tooltip": "The URL of the LLM server, e.g. http://localhost:5000",
- "global": True
- },
- "llm.auth_type": {
- "type": UserInput.OPTION_TEXT,
- "help": "LLM Server Authentication Type",
- "default": "",
- "tooltip": "The authentication type required to connect to the server (e.g. 'X-API-KEY', 'Authorization'). Passed in the request header with the API key.",
- "global": True,
- },
- "llm.api_key": {
- "type": UserInput.OPTION_TEXT,
- "default": "",
- "help": "LLM Server API Key",
- "tooltip": "The API key to access the LLM server, if required.",
- "global": True
+ "help": "LLM providers",
+ "dict_key": lambda v: re.sub(r"[^0-9a-zA-Z ]", "", v["name"]).lower().replace(" ", "-") + (("-" + v["url"].split("/")[2].lower()) if "://" in v["url"] else ""),
+ "options": {
+ "name": {
+ "type": UserInput.OPTION_TEXT,
+ "default": "",
+ "help": "Name of LLM Server in UI",
+ "tooltip": "The name that will be shown to users in the interface when selecting an LLM server (or API or custom).",
+ },
+ "type": {
+ "type": UserInput.OPTION_CHOICE,
+ "help": "LLM Provider Type",
+ "default": "none",
+ "options": {
+ "ollama": "Ollama",
+ "litellm": "LiteLLM",
+ "openai-like": "OpenAI compatible API (LM Studio, vLLM, etc)",
+ "api": "Third-party models from OpenAI, Anthropic, Mistral, etc",
+ "none": "None",
+ },
+ },
+ "url": {
+ "type": UserInput.OPTION_TEXT,
+ "default": "",
+ "help": "LLM Server URL",
+ "tooltip": "The URL of the LLM server, e.g. http://localhost:5000. Must start with a schema (e.g. 'https://').",
+ },
+ "auth_header": {
+ "type": UserInput.OPTION_TEXT,
+ "help": "Authentication Header",
+ "default": "",
+ "tooltip": "The HTTP header used to authenticate with the server (e.g. 'X-API-KEY', 'Authorization'). Passed with the Authentication Key as value.",
+ },
+ "auth_key": {
+ "type": UserInput.OPTION_TEXT,
+ "default": "",
+ "help": "Authentication Key",
+ "tooltip": "The API key to access the LLM server, if required.",
+ },
+ }
},
"llm.available_models": {
"type": UserInput.OPTION_TEXT_JSON,
"default": {},
"help": "Available LLM models",
- "tooltip": "A JSON dictionary of available LLM models on the server. 4CAT will query the LLM server for available models periodically.",
+ "tooltip": "A JSON dictionary of available LLM models on the server. Refreshed daily by the OllamaManager worker.",
+ "indirect": True,
+ "global": True
+ },
+ "llm.enabled_models": {
+ "type": UserInput.OPTION_TEXT_JSON,
+ "default": [],
+ "help": "Enabled LLM models",
+ "tooltip": "List of model keys enabled for use. Managed via the LLM Server settings panel.",
"indirect": True,
"global": True
},
"llm.access": {
"type": UserInput.OPTION_TOGGLE,
- "help": "LLM Access",
+ "help": "Local LLM Access",
"default": False,
- "tooltip": "Use tags or individual users to allow access to the LLM server (or set True in global for all).",
+ "tooltip": "If disabled, can only use LLMs from the 'Third-party models' provider. Can be configured per user "
+ "or tag.",
},
# TODO: add setting to restrict models per user/group?
@@ -739,5 +766,5 @@
"proxies": "Proxied HTTP requests",
"image-visuals": "Image visualization",
"extensions": "Extensions",
- "llm": "LLM Server Settings"
+ "llm": "LLM Providers"
}
diff --git a/common/lib/llm/__init__.py b/common/lib/llm/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/common/lib/llm.py b/common/lib/llm/adapter.py
similarity index 55%
rename from common/lib/llm.py
rename to common/lib/llm/adapter.py
index 0901194d1..8e4c7bc26 100644
--- a/common/lib/llm.py
+++ b/common/lib/llm/adapter.py
@@ -1,9 +1,10 @@
import json
import base64
import mimetypes
-import requests
+
from pathlib import Path
from typing import List, Optional, Union
+
from pydantic import SecretStr
from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
from langchain_core.language_models.chat_models import BaseChatModel
@@ -18,111 +19,86 @@
class LLMAdapter:
def __init__(
self,
- provider: str,
- model: str,
+ config,
+ model,
api_key: Optional[str] = None,
- base_url: Optional[str] = None,
temperature: float = 0.1,
max_tokens: int = 1000,
client_kwargs: Optional[dict] = None,
):
"""
- provider: 'openai', 'google', 'mistral', 'ollama', 'lmstudio', 'anthropic', 'deepseek'
- model: model name (e.g., 'gpt-4o-mini', 'claude-3-opus', 'mistral-small', etc.)
- api_key: API key if required (OpenAI, Claude, Google, Mistral)
- base_url: for local models or Mistral custom endpoints
- temperature: temperature hyperparameter,
- max_tokens: how many output tokens may be used
- client_kwargs: additional client parameters
+ Instantiate an adapter to interface with an LLM model
+
+ :param config: 4CAT config reader
+ :param model: Model metadata (as in `llm.available_models` 4CAT setting)
+ :param api_key: API key, if needed
+ :param temperature: Temperature hyperparameter
+ :param max_tokens: Max tokens to generate
+ :param client_kwargs: Optional parameters for the LLM adapter class
"""
- self.provider = provider.lower()
+ known_providers = config.get("llm.providers", {})
+
self.model = model
+ self.provider = known_providers.get(model['provider'])
self.api_key = api_key
- self.base_url = base_url
self.temperature = temperature
self.structured_output = False
self.parser = None
self.max_tokens = max_tokens
self.client_kwargs = dict(client_kwargs) if client_kwargs else {}
+
self.llm: BaseChatModel = self._load_llm()
def _load_llm(self) -> BaseChatModel:
- if self.provider == "openai":
- kwargs = {}
- if "o3" not in self.model:
- kwargs["temperature"] = self.temperature # temperature not supported for all models
- return ChatOpenAI(
- model=self.model,
- api_key=SecretStr(self.api_key),
- base_url=self.base_url or "https://api.openai.com/v1",
- max_tokens=self.max_tokens,
- **kwargs
- )
- elif self.provider == "google":
- return ChatGoogleGenerativeAI(
- model=self.model,
- temperature=self.temperature,
- google_api_key=self.api_key,
- max_tokens=self.max_tokens
- )
- elif self.provider == "anthropic":
- return ChatAnthropic(
- model_name=self.model,
- temperature=self.temperature,
- api_key=SecretStr(self.api_key),
- max_tokens=self.max_tokens,
- timeout=100,
- stop=None
- )
- elif self.provider == "mistral":
- return ChatMistralAI(
- model_name=self.model,
- temperature=self.temperature,
- api_key=SecretStr(self.api_key),
- base_url=self.base_url, # Optional override
- max_tokens=self.max_tokens,
- )
- elif self.provider == "deepseek":
- return ChatDeepSeek(
- model=self.model,
- temperature=self.temperature,
- api_key=SecretStr(self.api_key),
- base_url=self.base_url,
- max_tokens=self.max_tokens if self.max_tokens <= 8192 else 8192,
- )
- elif self.provider == "ollama":
- ollama_adapter = ChatOllama(
- model=self.model,
- temperature=self.temperature,
- base_url=self.base_url or "http://localhost:11434",
- max_tokens=self.max_tokens,
- client_kwargs=self.client_kwargs
- )
- self.model = ollama_adapter.model
- return ollama_adapter
- elif self.provider in {"vllm", "lmstudio"}:
- # OpenAI-compatible local servers
- if self.provider == "lmstudio" and not self.api_key:
- self.api_key = "lm-studio"
-
- # For vLLM, query the server to get the actual model name. We can't leave this empty, unfortunately.
- if self.provider == "vllm" and self.model=="vllm_model":
- model_name = self.get_vllm_model_name(self.base_url, self.api_key)
- self.model = model_name
- else:
- model_name = self.model if self.model else "lmstudio-model"
-
- llm = ChatOpenAI(
- model=model_name,
- temperature=self.temperature,
- api_key=SecretStr(self.api_key),
- base_url=self.base_url,
- max_tokens=self.max_tokens,
- )
- self.model = llm.model_name
- return llm
+ chat_params = {
+ "model": self.model["local_id"],
+ "api_key": SecretStr(self.api_key),
+ "base_url": self.provider["url"],
+ "max_tokens": self.max_tokens,
+ "temperature": self.temperature,
+ }
+
+ if self.provider["type"] == "openai":
+ if "o3" in self.model:
+ del chat_params["temperature"]
+ adapter_class = ChatOpenAI
+
+ elif self.provider["type"] == "google":
+ adapter_class = ChatGoogleGenerativeAI
+
+ elif self.provider["type"] == "anthropic":
+ chat_params.update({"timeout": 100, "stop": None})
+ adapter_class = ChatAnthropic
+
+ elif self.provider["type"] == "mistral":
+ adapter_class = ChatMistralAI
+
+ elif self.provider["type"] == "deepseek":
+ chat_params["max_tokens"] = min(self.max_tokens, 8192)
+ adapter_class = ChatDeepSeek
+
+ elif self.provider["type"] == "ollama":
+ adapter_class = ChatOllama
+ chat_params.update({"client_kwargs": self.client_kwargs})
+
+ elif self.provider["type"] in {"litellm", "openai-like"}:
+ url = f"{self.provider['url']}/" if not self.provider["url"].endswith("/") else self.provider['url']
+ url += "v1/" if not url.endswith("v1/") else ""
+
+ chat_params.update({"base_url": url})
+ if self.provider["auth_header"]:
+ chat_params.update({
+ "default_headers": {
+ self.provider["auth_header"]: self.provider["auth_key"]
+ }
+ })
+
+ adapter_class = ChatOpenAI
+
else:
- raise ValueError(f"Unsupported LLM provider: {self.provider}")
+ raise ValueError(f"{self.__class__.__name__} Unsupported LLM provider type: {self.provider['type']}")
+
+ return adapter_class(**chat_params)
def generate_text(
self,
@@ -161,7 +137,8 @@ def generate_text(
lc_messages = messages
kwargs = {"temperature": temperature}
- if self.provider in ("google", "ollama") or "o3" in self.model or "gpt-5" in self.model:
+ if self.provider["type"] in ("google", "ollama") or "o3" in self.model["local_id"] or "gpt-5" in self.model[
+ "local_id"]:
kwargs = {}
try:
@@ -172,10 +149,10 @@ def generate_text(
return response
def create_multimodal_content(
- self,
- text: str,
- media_urls: Optional[List[str]] = None,
- media_files: Optional[List[Union[str, Path]]] = None,
+ self,
+ text: str,
+ media_urls: Optional[List[str]] = None,
+ media_files: Optional[List[Union[str, Path]]] = None,
) -> List[dict]:
"""
Create multimodal content structure for LangChain messages with media URLs
@@ -224,11 +201,11 @@ def create_multimodal_content(
return content
def _format_media_block(
- self,
- url: Optional[str] = None,
- b64_data: Optional[str] = None,
- mime_type: str = "image/jpeg",
- media_category: str = "image",
+ self,
+ url: Optional[str] = None,
+ b64_data: Optional[str] = None,
+ mime_type: str = "image/jpeg",
+ media_category: str = "image",
) -> dict:
"""
Format a single media block for the appropriate provider.
@@ -239,7 +216,7 @@ def _format_media_block(
:param media_category: "image", "video", or "audio"
:returns: Provider-formatted content block
"""
- if self.provider == "anthropic":
+ if self.provider["type"] == "anthropic":
if media_category == "image":
if url:
return {"type": "image", "source": {"type": "url", "url": url}}
@@ -255,13 +232,13 @@ def _format_media_block(
return {"type": "document", "source": {
"type": "base64", "media_type": mime_type, "data": b64_data
}}
- elif self.provider == "google":
+ elif self.provider["type"] == "google":
if url:
return {"type": "image_url", "image_url": {"url": url}}
else:
data_uri = f"data:{mime_type};base64,{b64_data}"
return {"type": "image_url", "image_url": {"url": data_uri}}
- elif self.provider == "ollama":
+ elif self.provider["type"] == "ollama":
if media_category != "image":
raise ValueError(f"Ollama provider only supports image media, got category '{media_category}'")
if url:
@@ -281,7 +258,7 @@ def _format_media_block(
return {"type": "image_url", "image_url": {"url": url}}
else:
data_uri = f"data:{mime_type};base64,{b64_data}"
- if media_category == "audio" and self.provider == "openai":
+ if media_category == "audio" and self.provider["type"] == "openai":
return {"type": "input_audio", "input_audio": {
"data": b64_data, "format": mime_type.split("/")[-1]
}}
@@ -297,38 +274,13 @@ def set_structure(self, json_schema):
json.dumps(json_schema) # To validate / raise an error
# LM Studio needs some more guidance
- if self.provider == "lmstudio":
+ if self.provider["type"] == "lmstudio":
json_schema = {"type": "json_schema", "json_schema": {"schema": json_schema}}
self.llm = self.llm.bind(response_format=json_schema)
else:
self.llm = self.llm.with_structured_output(json_schema)
self.structured_output = True
- @staticmethod
- def get_model_options(config) -> dict:
- """
- Returns model choice options for UserInput
- """
- models = LLMAdapter.get_models(config)
- if not models:
- return {}
- options = {model_id: model_values["name"] for model_id, model_values in models.items()}
- return options
-
- @staticmethod
- def get_model_providers(config) -> dict:
- """
- Returns available model providers through APIs
- """
- models = LLMAdapter.get_models(config)
- if not models:
- return {}
- providers = list(set([model_values.get("provider", "") for model_values in models.values()]))
- if not providers:
- return {}
- options = {provider: provider.capitalize() for provider in providers if provider}
- return options
-
@staticmethod
def get_models(config) -> dict:
"""
@@ -337,36 +289,6 @@ def get_models(config) -> dict:
:returns dict, A dict with model IDs as keys and details as values
"""
- with (
- config.get("PATH_ROOT")
- .joinpath("common/assets/llms.json")
- .open() as available_models
- ):
- available_models = json.loads(available_models.read())
- return available_models
-
-
- @staticmethod
- def get_vllm_model_name(base_url: str, api_key: str = None) -> str:
- """
- Query vLLM server to get the name of the served model.
- """
-
- try:
- # vLLM exposes available models at /v1/models endpoint
- models_url = f"{base_url.rstrip('/')}/models"
- headers = {}
- if api_key:
- headers["Authorization"] = f"Bearer {api_key}"
-
- response = requests.get(models_url, headers=headers, timeout=10)
- response.raise_for_status()
- models_data = response.json()
-
- # Get the first available model
- if models_data.get("data") and len(models_data["data"]) > 0:
- return models_data["data"][0]["id"]
- else:
- raise ValueError("No models found on vLLM server")
- except Exception as e:
- raise ValueError(f"Could not retrieve model name from vLLM server: {e}")
+ available_models = config.get("llm.available_models", {})
+ enabled_models = config.get("llm.enabled_models", {})
+ return {k: v for k, v in available_models.items() if k in enabled_models}
diff --git a/common/lib/llm/clients/__init__.py b/common/lib/llm/clients/__init__.py
new file mode 100644
index 000000000..4287ca861
--- /dev/null
+++ b/common/lib/llm/clients/__init__.py
@@ -0,0 +1 @@
+#
\ No newline at end of file
diff --git a/common/lib/llm/clients/litellm_client.py b/common/lib/llm/clients/litellm_client.py
new file mode 100644
index 000000000..cf65497ff
--- /dev/null
+++ b/common/lib/llm/clients/litellm_client.py
@@ -0,0 +1,60 @@
+"""
+Centralized HTTP client for communicating with a LiteLLM server.
+
+This class owns all direct HTTP calls to LiteLLM's REST API and provides shared
+static helpers for capability parsing, display-name formatting, and building
+canonical llm.available_models entries. It is a plain helper with no 4CAT
+base-class dependency.
+
+This class is primarily intended for interfacing with LiteLLM, but since
+LiteLLM itself is mostly OpenAI API-compatible, this can be used to interface
+with the OpenAI API as well.
+"""
+from common.lib.llm.llm_client import LLMProviderClient
+
+class LiteLLMClient(LLMProviderClient):
+ type = "litellm"
+
+ _models_info_path = "/model/info"
+ _models_info_key = "data"
+ _model_id_key = "model_name"
+
+ def parse_supported_media_types(self, meta: dict) -> list[str]:
+ """
+ Derive the media types a model supports from its LiteLLM metadata.
+
+ :param meta: ``model info`` response dict, or ``None``.
+ :returns: Ordered list of supported media type strings.
+ Returns ``[]`` when ``meta`` is ``None``
+ """
+ if meta is None or not meta.get("model_info"):
+ return []
+
+ media_types = {"text"} # far as I can tell, text is always supported
+ if meta["model_info"].get("supports_vision"):
+ media_types.add("image")
+
+ if meta["model_info"].get("supports_audio_input"):
+ media_types.add("audio")
+
+ # no way to tell if model supports embeddings input as far as I can see...
+
+ return list(media_types)
+
+ def format_display_name(self, meta: dict) -> str:
+ """
+ Build a human-readable display name for a model.
+
+ :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``).
+ :param meta: ``/api/show`` response dict, or ``None``.
+ :returns: Human-readable display name string.
+ """
+ model_name = self.get_global_model_id(meta)
+
+ if meta.get("model_name"):
+ model_name = meta["model_name"]
+
+ if meta["litellm_params"].get("model"):
+ model_name = "/".join(meta["litellm_params"].get("model").split("/")[1:])
+
+ return model_name
\ No newline at end of file
diff --git a/common/lib/llm/clients/ollama_client.py b/common/lib/llm/clients/ollama_client.py
new file mode 100644
index 000000000..e21297448
--- /dev/null
+++ b/common/lib/llm/clients/ollama_client.py
@@ -0,0 +1,182 @@
+"""
+Centralized HTTP client for communicating with an Ollama server.
+
+This class owns all direct HTTP calls to Ollama's REST API and provides shared static
+helpers for capability parsing, display-name formatting, and building canonical
+llm.available_models entries. It is a plain helper with no 4CAT base-class dependency.
+"""
+import requests
+
+from common.lib.llm.llm_client import LLMProviderClient
+
+
+class OllamaClient(LLMProviderClient):
+ type = "ollama"
+
+ _models_info_path = "/api/tags"
+ _models_info_key = "models"
+ _model_id_key = "model"
+
+ def list_models(self) -> list[dict]:
+ """
+ List all models available.
+
+ For Ollama, get some additional model info via an extra API request.
+
+ :return list[dict]: List of models available.:
+ """
+ models = super().list_models()
+ result = []
+ for model in models:
+ try:
+ model_info = self._session.post(
+ f"{self.base_url}/api/show",
+ json={"model": model[self._model_id_key]},
+ headers=self._headers,
+ timeout=self.timeout,
+ ).json()
+ result.append({**model, "metadata": model_info})
+ except (requests.exceptions.HTTPError, KeyError) as e:
+ self.log.warning(
+ f"{self.__class__.__name__}: failed to fetch additional model info for model {model[self._model_id_key]}: {e}")
+
+ return result
+
+
+
+ def parse_supported_media_types(self, meta: dict) -> list[str]:
+ """Derive the media types a model supports from its Ollama metadata.
+
+ **Primary path**: reads ``meta["capabilities"]``:
+ - ``"completion"`` → ``"text"``
+ - ``"vision"`` → ``"image"``
+ - ``"embedding"`` → ``"embedding"``
+
+ **Fallback path** (used when capabilities are absent or only yield ``"text"``):
+ inspects GGUF ``model_info`` / ``details`` for vision signals and adds
+ ``"image"`` if any are found.
+
+ :param meta: ``/api/show`` response dict, or ``None``.
+ :returns: Ordered list of supported media type strings.
+ Returns ``[]`` when ``meta`` is ``None`` (unknown — callers
+ should include the model, not block it).
+ """
+ if meta is None or not meta.get("metadata"):
+ return []
+
+ capabilities = meta["metadata"].get("capabilities", [])
+ media_types: list[str] = []
+
+ _cap_map = {
+ "completion": "text",
+ "vision": "image",
+ "embedding": "embedding",
+ }
+ for cap in capabilities:
+ mapped = _cap_map.get(cap)
+ if mapped and mapped not in media_types:
+ media_types.append(mapped)
+
+ # Fallback: GGUF-level vision signals when capabilities list gives no image info
+ if "image" not in media_types:
+ details = meta.get("details", {})
+ model_info = meta.get("model_info", {})
+ projector_info = meta.get("projector_info")
+
+ has_clip_family = "clip" in (details.get("families") or [])
+ has_vision_keys = any(k.startswith("vision.") for k in model_info)
+ has_projector = bool(projector_info)
+
+ if has_clip_family or has_vision_keys or has_projector:
+ media_types.append("image")
+
+ return media_types
+
+ def format_display_name(self, meta: dict) -> str:
+ """
+ Build a human-readable display name for a model.
+
+ :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``).
+ :param meta: ``/api/show`` response dict, or ``None``.
+ :returns: Human-readable display name string.
+ """
+ model_name = self.get_model_id(meta)
+
+ extra_bits = []
+ if meta.get("metadata") and meta["metadata"].get("model_info"):
+ more_meta = meta["metadata"]["model_info"]
+ if more_meta.get("general.basename"):
+ model_name = more_meta["general.basename"]
+
+ if more_meta.get("general.finetune"):
+ extra_bits.append(more_meta["general.finetune"])
+
+ if more_meta.get("general.size_label"):
+ extra_bits.append(more_meta["general.size_label"])
+
+ elif meta.get("details") and meta["details"].get("parameter_size"):
+ extra_bits.append(f"{meta['details']['parameter_size']} parameters")
+
+ model_name += f" ({', '.join(extra_bits)})"
+
+ return model_name
+
+ def get_model_card_url(self, meta: dict) -> str:
+ """
+ Get a URL for a model card for a given model
+
+ :param meta: Model metadata
+ :return str: Model card URL (empty string if unavailable)
+ """
+ return f"https://ollama.com/library/{meta['model']}"
+
+ def pull_model(self, model_id: str, stream: bool = False) -> bool:
+ """Pull a model from the Ollama registry.
+
+ :param model_id: Model name (e.g. ``"llama3:8b"``).
+ :param stream: Whether to stream the response (default ``False``).
+ :returns: ``True`` on success, ``False`` on failure.
+ """
+ try:
+ r = self._session.post(
+ f"{self.base_url}/api/pull",
+ headers=self._headers,
+ json={"model": model_id, "stream": stream},
+ timeout=600,
+ )
+
+ if r.status_code != 200 and self.log:
+ self.log.warning(
+ f"{self.__class__.__name__}: failed to pull model {model_id} from {self.base_url}, status code {r.status_code}: {r.text}")
+
+ return r.status_code == 200
+
+ except requests.RequestException as e:
+ if self.log:
+ self.log.warning(
+ f"{self.__class__.__name__}: failed to pull model {model_id} from {self.base_url}: {e}")
+
+ return False
+
+ def delete_model(self, model_id: str) -> bool:
+ """Delete a model from the Ollama server.
+
+ :param model_id: Model name (e.g. ``"llama3:8b"``).
+ :returns: ``True`` on success, ``False`` on failure.
+ """
+ try:
+ r = self._session.delete(
+ f"{self.base_url}/api/delete",
+ headers=self._headers,
+ json={"model": model_id},
+ timeout=30,
+ )
+ if r.status_code != 200 and self.log:
+ self.log.warning(
+ f"{self.__class__.__name__}: failed to delete model {model_id} from {self.base_url}, status code {r.status_code}: {r.text}")
+ return r.status_code == 200
+ except requests.RequestException as e:
+ if self.log:
+ self.log.warning(
+ f"{self.__class__.__name__}: failed to delete model {model_id} from {self.base_url}: {e}")
+ return False
diff --git a/common/lib/llm/clients/openai_client.py b/common/lib/llm/clients/openai_client.py
new file mode 100644
index 000000000..f8701dd7c
--- /dev/null
+++ b/common/lib/llm/clients/openai_client.py
@@ -0,0 +1,61 @@
+"""
+Centralized HTTP client for communicating with an OpenAI compatible server.
+
+This class owns all direct HTTP calls to an OpenAI style REST API and provides shared
+static helpers for capability parsing, display-name formatting, and building
+canonical llm.available_models entries. It is a plain helper with no 4CAT
+base-class dependency.
+"""
+from common.lib.llm.llm_client import LLMProviderClient
+
+
+class LMStudioClient(LLMProviderClient):
+ type = "openai-like"
+
+ _models_info_path = "/api/v1/models"
+ _models_info_key = "models"
+ _model_id_key = "key"
+
+ def parse_supported_media_types(self, meta: dict) -> list[str]:
+ """
+ Derive the media types a model supports from its LiteLLM metadata.
+
+ :param meta: ``model info`` response dict, or ``None``.
+ :returns: Ordered list of supported media type strings.
+ Returns ``[]`` when ``meta`` is ``None``
+ """
+ media_types = {"text"} # far as I can tell, text is always supported
+
+ if meta is None or not meta.get("capabilities"):
+ return list(media_types)
+
+ if meta["capabilities"].get("vision"):
+ media_types.add("image")
+
+ # no way to tell if model supports embeddings input as far as I can see...
+
+ return list(media_types)
+
+ def format_display_name(self, meta: dict) -> str:
+ """
+ Build a human-readable display name for a model.
+
+ :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``).
+ :param meta: ``/api/show`` response dict, or ``None``.
+ :returns: Human-readable display name string.
+ """
+ model_name = self.get_model_id(meta)
+
+ if meta.get("display_name"):
+ model_name = meta["display_name"]
+
+ extra_bits = []
+ if meta.get("publisher"):
+ extra_bits.append(meta["publisher"])
+
+ if meta.get("params_string"):
+ extra_bits.append(meta["params_string"])
+
+ model_name += f" ({', '.join(extra_bits)})"
+
+ return model_name
diff --git a/common/lib/llm/clients/thirdparty_client.py b/common/lib/llm/clients/thirdparty_client.py
new file mode 100644
index 000000000..2a2db4dc3
--- /dev/null
+++ b/common/lib/llm/clients/thirdparty_client.py
@@ -0,0 +1,66 @@
+"""
+Fake 'client' to read from local store of known 3d party, API-based LLMs that
+can be used with 4CAT
+"""
+import json
+
+from common.lib.llm.llm_client import LLMProviderClient
+
+
+class ThirdPartyClient(LLMProviderClient):
+ type = "api"
+
+ _models_info_key = "models"
+ _model_id_key = "model"
+
+ def get_status(self):
+ return 200
+
+ def list_models(self) -> dict:
+ with self.config.get("PATH_ROOT").joinpath("common/assets/llms.json").open() as infile:
+ models = json.load(infile)
+
+ return models
+
+ def parse_supported_media_types(self, meta: dict) -> list[str]:
+ """
+ Derive the media types a model supports from its LiteLLM metadata.
+
+ :param meta: ``model info`` response dict, or ``None``.
+ :returns: Ordered list of supported media type strings.
+ Returns ``[]`` when ``meta`` is ``None``
+ """
+ return meta.get("supported_media_types", ["text"])
+
+ def format_display_name(self, meta: dict) -> str:
+ """
+ Build a human-readable display name for a model.
+
+ :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``).
+ :param meta: ``/api/show`` response dict, or ``None``.
+ :returns: Human-readable display name string.
+ """
+ return meta["name"]
+
+ def build_model_entry(self, meta: dict) -> dict:
+ """
+ Build a canonical ``llm.available_models`` entry for a model.
+
+ :param model_id: Raw model identifier.
+ :param display_name: Human-readable name (from ``format_display_name``).
+ :param meta: ``/api/show`` response dict, or ``None`` if unavailable.
+ :returns: Dict ready to store under ``llm.available_models[model_id]``.
+ """
+ entry = super().build_model_entry(meta)
+ entry["provider_key"] = meta["provider"]
+
+ return entry
+
+ def get_model_card_url(self, meta: dict) -> str:
+ """
+ Get a URL for a model card for a given model
+
+ :param meta: Model metadata
+ :return str: Model card URL (empty string if unavailable)
+ """
+ return meta["model_card"] if meta["model_card"] else ""
diff --git a/common/lib/llm/llm_client.py b/common/lib/llm/llm_client.py
new file mode 100644
index 000000000..b31cb035a
--- /dev/null
+++ b/common/lib/llm/llm_client.py
@@ -0,0 +1,194 @@
+"""
+Centralized HTTP client for communicating with an LLM provider.
+
+This class owns all direct HTTP calls to the provider's REST API and provides
+shared static helpers for capability parsing, display-name formatting, and
+building canonical llm.available_models entries. It is a plain helper with no
+4CAT base-class dependency.
+"""
+
+from abc import abstractmethod
+
+import requests
+
+
+class LLMProviderClient:
+ _headers = {}
+ provider_config = {}
+
+ @staticmethod
+ def get_client(config, provider_config: dict) -> "LLMProviderClient":
+ """
+ Get a client for an LLM provider
+
+ Returns the appropriate sub-class depending on the provider type.
+
+ :param config: 4CAT config reader
+ :param dict provider_config: Provider parameters, as configured in
+ 4CAT
+ :return LLMProviderClient:
+ """
+ # in-line import because we otherwise get circular import shenanigans
+ from common.lib.llm.clients.ollama_client import OllamaClient
+ from common.lib.llm.clients.litellm_client import LiteLLMClient
+ from common.lib.llm.clients.openai_client import LMStudioClient
+ from common.lib.llm.clients.thirdparty_client import ThirdPartyClient
+
+ for client_type in (OllamaClient, LiteLLMClient, LMStudioClient, ThirdPartyClient):
+ if client_type.type == provider_config["type"]:
+ return client_type(config, provider_config)
+
+ raise ValueError(f"LLMProviderClient: Unknown provider type {provider_config['type']}")
+
+ def __init__(self, config, provider_config: dict, timeout: int = 10, log=None) -> None:
+ """
+ HTTP client for an LLM Provider
+
+ :param dict provider_config: Provider parameters, as configured in 4CAT
+ :param int timeout: Default request timeout in seconds.
+ :param Logger log: 4CAT log handler
+ """
+ self.config = config
+ self.provider_config = provider_config
+
+ self.timeout = timeout
+ self.auth_type = provider_config.get("auth_header")
+ self.auth_key = provider_config.get("auth_key")
+ self.timeout = timeout
+
+ self.base_url = provider_config["url"].rstrip("/")
+ if self.base_url.endswith("v1"):
+ # get rid of the 'v1' - we'll add this in the path
+ self.base_url = f"{self.base_url[:-2]}"
+
+ self._session = requests.Session()
+ self._headers = {"Content-Type": "application/json"}
+
+ if self.auth_type:
+ self._headers[self.auth_type] = self.auth_key
+
+ self.log = log
+
+ def get_status(self) -> bool | int:
+ """
+ Check if the server is reachable and responding to requests
+
+ :return: `False` if the server is not responding, or an HTTP status code.
+ """
+ try:
+ r = self._session.get(
+ f"{self.base_url}{self._models_info_path}",
+ headers=self._headers,
+ timeout=self.timeout,
+ )
+ if self.log and r.status_code != 200:
+ self.log.warning(
+ f"{self.__class__.__name__}: server responded with status code {r.status_code} during availability check: {r.text}")
+ return r.status_code
+ except requests.RequestException as e:
+ if self.log:
+ self.log.warning(f"{self.__class__.__name__}: server is not available at {self.base_url}: {e}")
+ return False
+
+ def list_models(self) -> list[dict]:
+ """List available models from the Ollama server.
+
+ :returns: List of model dicts, or ``[]`` on failure.
+ """
+ try:
+ r = self._session.get(
+ f"{self.base_url}{self._models_info_path}",
+ headers=self._headers,
+ timeout=self.timeout,
+ )
+ if r.status_code == 200:
+ return r.json().get(self._models_info_key, [])
+ if self.log:
+ self.log.warning(
+ f"{self.__class__.__name__}: failed to list models from {self.base_url}, status code {r.status_code}: {r.text}")
+ except requests.RequestException as e:
+ if self.log:
+ self.log.warning(f"{self.__class__.__name__}: failed to list models from {self.base_url}: {e}")
+ return []
+
+ def build_model_entry(self, meta: dict) -> dict:
+ """
+ Build a canonical ``llm.available_models`` entry for a model.
+
+ :param model_id: Raw model identifier.
+ :param display_name: Human-readable name (from ``format_display_name``).
+ :param meta: ``/api/show`` response dict, or ``None`` if unavailable.
+ :returns: Dict ready to store under ``llm.available_models[model_id]``.
+ """
+ return {
+ "id": self.get_global_model_id(meta),
+ "local_id": self.get_model_id(meta),
+ "name": self.format_display_name(meta),
+ "model_card": self.get_model_card_url(meta),
+ "provider": self.provider_config["_id"],
+ "supported_media_types": self.parse_supported_media_types(meta),
+ "metadata": meta,
+ }
+
+ def get_model_card_url(self, meta: dict) -> str:
+ """
+ Get a URL for a model card for a given model
+
+ :param meta: Model metadata
+ :return str: Model card URL (empty string if unavailable)
+ """
+ return ""
+
+ @abstractmethod
+ def parse_supported_media_types(self, meta: dict) -> list[str]:
+ """Derive the media types a model supports from its Ollama metadata.
+
+ **Primary path**: reads ``meta["capabilities"]``:
+ - ``"completion"`` → ``"text"``
+ - ``"vision"`` → ``"image"``
+ - ``"embedding"`` → ``"embedding"``
+
+ **Fallback path** (used when capabilities are absent or only yield ``"text"``):
+ inspects GGUF ``model_info`` / ``details`` for vision signals and adds
+ ``"image"`` if any are found.
+
+ :param meta: ``/api/show`` response dict, or ``None``.
+ :returns: Ordered list of supported media type strings.
+ Returns ``[]`` when ``meta`` is ``None`` (unknown — callers
+ should include the model, not block it).
+ """
+ pass
+
+ @abstractmethod
+ def format_display_name(self, meta: dict) -> str:
+ """
+ Build a human-readable display name for a model.
+
+ :param dict meta: Model metadata
+ :returns str: Human-readable display name string.
+ """
+ pass
+
+ def get_model_id(self, meta: dict) -> str:
+ """
+ Choose a model identifier based on model metadata.
+
+ This is the ID within the provider context, i.e. it is not guaranteed
+ to be globally unique (use `get_global_model_id()` instead).
+
+ :param dict meta: Model metadata
+ :return str: Model ID
+ """
+ return meta[self._model_id_key]
+
+ def get_global_model_id(self, meta: dict) -> str:
+ """
+ Choose a model identifier based on model metadata.
+
+ This needs to be a *globally* unique ID, i.e. if multiple providers
+ provide the same model, the ID should still be unique per provider.
+
+ :param dict meta: Model metadata
+ :return str: Model ID
+ """
+ return "-".join((self.provider_config["type"], self.provider_config["url"], self.get_model_id(meta)))
\ No newline at end of file
diff --git a/common/lib/user_input.py b/common/lib/user_input.py
index 7fcb6bcb9..a6fe10458 100644
--- a/common/lib/user_input.py
+++ b/common/lib/user_input.py
@@ -4,6 +4,7 @@
import json
import re
+from itertools import chain
class RequirementsNotMetException(Exception):
"""
@@ -26,6 +27,7 @@ class UserInput:
OPTION_TEXT = "string" # simple string or integer (input text)
OPTION_MULTI = "multi" # multiple values out of a list (select multiple)
OPTION_MULTI_SELECT = "multi_select" # multiple values out of a dropdown list (select multiple)
+ OPTION_MULTI_OPTION = "multi_option" # several instances of a collection of controls
OPTION_INFO = "info" # just a bit of text, not actual input
OPTION_TEXT_LARGE = "textarea" # longer text
OPTION_TEXT_JSON = "json" # text, but should be valid JSON
@@ -181,6 +183,45 @@ def parse_all(options, input, silently_correct=True):
parsed_input[option] = table_input
+ elif settings.get("type") == UserInput.OPTION_MULTI_OPTION:
+ # these are collections of other input options that can be
+ # repeated an arbitrary amount of times and are saved as a
+ # list of these values
+ # i.e. forms within forms!!!
+ item_options = settings["options"]
+ input_items = {}
+ for key, value in input.items():
+ if key_match := re.match(f"{option}-([0-9]+)-(.+)", key):
+ input_index = int(key_match[1])
+ # note: the index is just used to match inputs to items
+ # it is not used for ordering
+ option_item = key_match[2]
+ if option_item not in item_options:
+ continue
+
+ if input_index not in input_items:
+ input_items[input_index] = {}
+
+ input_items[input_index][option_item] = UserInput.parse_value(item_options[option_item], value, input_items[input_index], silently_correct)
+
+ # discard items that are only default values
+ parsed_input[option] = []
+ for input_index, item in input_items.items():
+ only_default = True
+ for key, value in item.items():
+ if value != item_options[key]["default"]:
+ only_default = False
+
+ if not only_default:
+ parsed_input[option].append(item)
+
+ # may define a mapper to make this a dict
+ if settings.get("dict_key"):
+ if callable(settings["dict_key"]):
+ parsed_input[option] = {settings["dict_key"](value): {**value, "_id": settings["dict_key"](value)} for value in parsed_input[option]}
+ else:
+ parsed_input[option] = {value[settings["dict_key"]]: {**value, "_id": value[settings["dict_key"]]} for value in parsed_input[option]}
+
elif option not in input:
# not provided? use default
parsed_input[option] = settings.get("default", None)
@@ -392,9 +433,16 @@ def parse_value(settings, choice, other_input=None, silently_correct=True):
# select box
# one out of multiple options
# return option if valid, or default
- if choice not in settings.get("options"):
+ options = settings.get("options", [])
+
+ # if we have a categorised set of options, look deeper to get
+ # valid option values
+ is_categorised = all([type(o) is dict for o in options.values()])
+ match_options = chain(*[list(o.keys()) for o in options.values()]) if is_categorised else options
+
+ if choice not in match_options:
if not silently_correct:
- raise QueryParametersException(f"Invalid value selected; must be one of {', '.join(settings.get('options', {}).keys())}. {settings}")
+ raise QueryParametersException(f"Invalid value selected; must be one of {', '.join(match_options)}.")
else:
return settings.get("default", "")
else:
diff --git a/docker-compose_ollama.yml b/docker-compose_ollama.yml
new file mode 100644
index 000000000..020b12c96
--- /dev/null
+++ b/docker-compose_ollama.yml
@@ -0,0 +1,54 @@
+# Use this file as an override to add a local Ollama instance to your 4CAT stack.
+#
+# Usage:
+# docker compose -f docker-compose.yml -f docker-compose_ollama.yml up -d
+#
+# Once running, configure 4CAT via the Control Panel → Settings → LLM:
+# LLM Provider Type : ollama
+# LLM Server URL : http://ollama:11434
+#
+# GPU support (NVIDIA):
+# Uncomment the `deploy.resources` block in the ollama service below and
+# ensure the NVIDIA Container Toolkit is installed on your host.
+# See: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html
+#
+# GPU support (Apple Silicon / AMD):
+# Pass the appropriate device through your host's Docker settings instead.
+# Ollama will automatically detect the GPU when it is available inside the container.
+
+services:
+ ollama:
+ image: ollama/ollama:latest
+ container_name: 4cat_ollama
+ restart: unless-stopped
+ volumes:
+ - 4cat_ollama:/root/.ollama
+ # Expose the Ollama API on the host for optional external access or
+ # management with the Ollama CLI. Remove this block if you want to keep
+ # Ollama accessible only within the Docker network.
+ ports:
+ - "127.0.0.1:11434:11434"
+ healthcheck:
+ test: ["CMD", "ollama", "ls"]
+ interval: 10s
+ timeout: 5s
+ retries: 5
+ # --- NVIDIA GPU support (uncomment to enable) ---
+ # deploy:
+ # resources:
+ # reservations:
+ # devices:
+ # - driver: nvidia
+ # count: all
+ # capabilities: [gpu]
+
+ # Make the 4CAT backend wait for Ollama to be healthy before starting.
+ # This prevents initial model-refresh failures on first boot.
+ backend:
+ depends_on:
+ ollama:
+ condition: service_healthy
+
+volumes:
+ 4cat_ollama:
+ name: 4cat_ollama_data
diff --git a/docker/README.md b/docker/README.md
index c10444aa7..6ad7e069b 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -69,3 +69,76 @@ https://github.com/docker/buildx/issues/426
https://stackoverflow.com/questions/64221861/failed-to-resolve-with-frontend-dockerfile-v0
4. More errors coming soon! (No doubt)
+
+---
+
+## Running a local Ollama instance alongside 4CAT
+
+4CAT can use a local [Ollama](https://ollama.com) server for LLM-powered processors.
+A Docker Compose override file (`docker-compose_ollama.yml`) is included to add
+Ollama as a sidecar service so you do not need to run it separately on the host.
+
+### Quick start
+
+```bash
+docker compose -f docker-compose.yml -f docker-compose_ollama.yml up -d
+```
+
+This starts the standard 4CAT stack plus an `ollama` container that is only
+accessible within the Docker network (and optionally on `localhost:11434` on
+the host via the exposed port).
+
+### Configuring 4CAT to use Ollama
+
+#### Automatic configuration (fresh Docker install with sidecar)
+
+When you start 4CAT for the first time using the Ollama override file, the
+`docker_setup.py` initialisation script automatically detects the `ollama`
+sidecar and sets **LLM Provider Type**, **LLM Server URL**, and **LLM Access**
+for you. You can skip to step 2 below.
+
+#### Manual configuration (or to verify/change settings)
+
+1. Log in as admin and open **Control Panel → Settings → LLM Providers**.
+2. Confirm that a provider with the following settings is present:
+
+ | Setting | Value |
+ |---|---|
+ | LLM Provider Type | `ollama` |
+ | LLM Server URL | `http://ollama:11434` |
+ | LLM Access | enabled |
+
+3. Save settings.
+4. Open **Control Panel → LLMs & Providers** (visible once *LLM Access* is enabled).
+5. Use the **Refresh** button to load available models, then **Pull** a model
+ (e.g. `llama3.2:3b`) to download it from the Ollama library.
+6. Enable the models you want to make available to users.
+
+### GPU support (NVIDIA)
+
+Uncomment the `deploy.resources` block in `docker-compose_ollama.yml` and
+ensure the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)
+is installed on your host. Then restart the stack with the override:
+
+```bash
+docker compose -f docker-compose.yml -f docker-compose_ollama.yml up -d
+```
+
+### Persisting models
+
+Models downloaded by Ollama are stored in the `4cat_ollama_data` Docker volume.
+They survive container restarts and re-creations unless you explicitly remove
+the volume (`docker volume rm 4cat_ollama_data`).
+
+### Using an external Ollama server
+
+If you already run Ollama on the host or elsewhere, skip the override file and
+point 4CAT directly at that server:
+
+- **On the same host**: use `http://host.docker.internal:11434` as the LLM Server URL.
+- **Remote server**: use the server's reachable URL and configure any required
+ API key in the *Authentication header* and *Authentication key* settings.
+
+In both cases, configure the LLM settings manually via **Control Panel → Settings**
+(see *Manual configuration* above), using the appropriate server URL instead of
+`http://ollama:11434`.
diff --git a/docker/docker_setup.py b/docker/docker_setup.py
index 450684602..6de76add0 100644
--- a/docker/docker_setup.py
+++ b/docker/docker_setup.py
@@ -28,7 +28,8 @@ def update_config_from_environment(CONFIG_FILE, config_parser):
config_parser['SERVER']['public_port'] = os.environ['PUBLIC_PORT']
# Set API
- config_parser['API']['api_host'] = os.environ['API_HOST'] # set in .env; should be backend container_name in docker-compose.py unless frontend and backend are running together in one container
+ config_parser['API']['api_host'] = os.environ[
+ 'API_HOST'] # set in .env; should be backend container_name in docker-compose.py unless frontend and backend are running together in one container
# Database configuration
config_parser['DATABASE']['db_name'] = os.environ['POSTGRES_DB']
@@ -110,14 +111,16 @@ def _format_host(host: str) -> str:
# Backend API
config_parser.add_section('API')
- config_parser['API']['api_port'] = '4444' # backend internal port set in docker-compose.py; NOT API_PUBLIC_PORT as that is what port Docker exposes to host network
+ config_parser['API'][
+ 'api_port'] = '4444' # backend internal port set in docker-compose.py; NOT API_PUBLIC_PORT as that is what port Docker exposes to host network
# File paths
# Docker volumes are defined in docker-compose.yml; these rely on one shared volume `data` in the 4CAT root directory
config_parser.add_section('PATHS')
config_parser['PATHS']['path_images'] = 'data/images' # shared volume defined in docker-compose.yml
config_parser['PATHS']['path_data'] = 'data/datasets' # shared volume defined in docker-compose.yml
- config_parser['PATHS']['path_lockfile'] = 'backend' # docker-entrypoint.sh looks for pid file here (in event Docker shutdown was not clean)
+ config_parser['PATHS'][
+ 'path_lockfile'] = 'backend' # docker-entrypoint.sh looks for pid file here (in event Docker shutdown was not clean)
config_parser['PATHS']['path_sessions'] = 'data/sessions' # shared volume defined in docker-compose.yml
config_parser['PATHS']['path_logs'] = 'data/logs/' # shared volume defined in docker-compose.yml
@@ -137,7 +140,8 @@ def _format_host(host: str) -> str:
config = ConfigManager()
config.with_db(Database(logger=None, appname="docker-setup",
- dbname=config.DB_NAME, user=config.DB_USER, password=config.DB_PASSWORD, host=config.DB_HOST, port=config.DB_PORT))
+ dbname=config.DB_NAME, user=config.DB_USER, password=config.DB_PASSWORD,
+ host=config.DB_HOST, port=config.DB_PORT))
for path in [config.get('PATH_DATA'),
config.get('PATH_IMAGES'),
@@ -187,10 +191,12 @@ def _format_host(host: str) -> str:
# Check to see if flask.server_name needs to be updated
from common.config_manager import ConfigManager
from common.lib.database import Database
+
config = ConfigManager()
config.with_db(Database(logger=None, appname="docker-setup",
- dbname=config.DB_NAME, user=config.DB_USER, password=config.DB_PASSWORD, host=config.DB_HOST, port=config.DB_PORT))
-
+ dbname=config.DB_NAME, user=config.DB_USER, password=config.DB_PASSWORD,
+ host=config.DB_HOST, port=config.DB_PORT))
+
public_port = int(config_parser['SERVER']['public_port'])
# Port handling here is independent from HTTPS; default is 80
default_port = 80
@@ -201,12 +207,47 @@ def _format_host(host: str) -> str:
# Warn only when localhost/IP lacks a required non-default port
if existing_port is None and _is_ip_or_localhost(host) and public_port != default_port:
formatted_host = _format_host(host)
- print(f"Exposed PUBLIC_PORT {public_port} from .env file not included in Server Name; if you are not using a reverse proxy, you may need to update the Server Name variable.")
+ print(
+ f"Exposed PUBLIC_PORT {public_port} from .env file not included in Server Name; if you are not using a reverse proxy, you may need to update the Server Name variable.")
print(
"You can do so by running the following command if you do not have access to the 4CAT frontend Control Panel:\n"
f"docker exec 4cat_backend python -c \"from common.config_manager import ConfigManager;config=ConfigManager();config.with_db();config.set('flask.server_name', '{formatted_host}:{public_port}');config.db.commit();\""
)
+ # If an Ollama container is available on the Docker network, configure 4CAT to use it.
+ ollama_url = 'http://ollama:11434'
+ ollama_id = f"ollama-{ollama_url.split('/')[-1]}"
+ try:
+ import requests
+
+ try:
+ resp = requests.get(f"{ollama_url}/api/tags", timeout=2)
+ if resp.status_code == 200:
+ current_llm_providers = config.get("llm.providers")
+ if any([p["url"] == ollama_url for p in current_llm_providers]):
+ print("Ollama server already configured in 4CAT settings.")
+ else:
+ # set basic LLM settings so the initial admin user does not need to
+ # configure them manually for local development environments that
+ # include the Ollama sidecar.
+ current_llm_providers[ollama_id] = {
+ "name": "Ollama Server (4CAT, via Docker)",
+ "url": ollama_url,
+ "type": "ollama",
+ "auth_header": "",
+ "auth_key": "",
+ "_id": ollama_id
+ }
+ config.set('llm.providers', current_llm_providers)
+ config.db.commit()
+ print('Detected Ollama on Docker network; configured LLM settings to use it.')
+ except requests.RequestException:
+ # Ollama not available; do nothing
+ pass
+ except Exception:
+ # requests other error; skip automatic Ollama configuration
+ pass
+
print(f"\nStarting app\n"
f"4CAT is accessible at:\n"
f"{'https' if config.get('flask.https', False) else 'http'}://{config.get('flask.server_name')}\n")
diff --git a/extensions b/extensions
new file mode 120000
index 000000000..c25d13e68
--- /dev/null
+++ b/extensions
@@ -0,0 +1 @@
+/Users/stijn/surfdrive/PycharmProjects/4cat/config/extensions
\ No newline at end of file
diff --git a/helper-scripts/migrate/migrate-1.54-1.55.py b/helper-scripts/migrate/migrate-1.54-1.55.py
new file mode 100644
index 000000000..7a5074774
--- /dev/null
+++ b/helper-scripts/migrate/migrate-1.54-1.55.py
@@ -0,0 +1,80 @@
+import json
+import sys
+import os
+
+from pathlib import Path
+
+sys.path.insert(0, os.path.join(os.path.abspath(os.path.dirname(__file__)), "../.."))
+from common.lib.database import Database
+from common.lib.logger import Logger
+
+import configparser # noqa: E402
+
+log = Logger(output=True)
+ini = configparser.ConfigParser()
+ini.read(Path(__file__).parent.parent.parent.resolve().joinpath("config/config.ini"))
+db_config = ini["DATABASE"]
+
+db = Database(
+ logger=log,
+ dbname=db_config["db_name"],
+ user=db_config["db_user"],
+ password=db_config["db_password"],
+ host=db_config["db_host"],
+ port=db_config["db_port"],
+ appname="4cat-migrate",
+)
+
+# the separate LLM server settings were consolidated into one overarching 'llm.providers' setting
+print(" Checking if llm.providers setting exists...")
+has_setting = db.fetchone(
+ "SELECT COUNT(*) AS num FROM settings WHERE name = 'llm.providers'"
+)
+
+if has_setting["num"] > 0:
+ print(" ...exists, deleting old settings without overwriting")
+else:
+ print(" ...does not exist, filling with currently configured proviers")
+ provider_type = db.fetchone("SELECT value FROM settings WHERE name = 'llm.provider_type'")
+ providers = {}
+ if not provider_type:
+ print(" ...no provider currently configured")
+ else:
+ url = db.fetchone("SELECT value FROM settings WHERE name = 'llm.server'")
+ host = url.split("/")[2] if "://" in url else "localhost"
+ auth_header = db.fetchone("SELECT value FROM settings WHERE name = 'llm.auth_type'")
+ auth_key = db.fetchone("SELECT value FROM settings WHERE name = 'llm.auth_key'")
+ provider_name = db.fetchone("SELECT value FROM settings WHERE name = 'llm.host_name'")
+ provider_id = f"{provider_type}-{host}"
+
+ # vLLM and LM Studio are both openai-like
+ provider_type = {"ollama": "ollama"}.get(provider_type, "openai-like")
+ providers[provider_id] = {
+ "name": provider_name,
+ "type": provider_type,
+ "url": url,
+ "auth_header": auth_header,
+ "auth_key": auth_key,
+ "_id": provider_id
+ }
+
+ # add API models, always present
+ providers["thirdparty-models"] = {
+ "name": "Third-party models",
+ "type": "api",
+ "url": "",
+ "auth_header": "",
+ "auth_key": "",
+ "_id": "thirdparty-models"
+ }
+
+ db.insert("settings", {"name": "llm.providers", "value": json.dumps(providers)})
+ print(f" ...added {len(providers)} providers")
+
+print(" Cleaning up old settings")
+db.execute("DELETE FROM settings WHERE name LIKE 'llm.%' AND name NOT IN ('llm.providers', 'llm.available_models', 'llm.access')")
+
+print(" Removing all known models (will be re-indexed on 4CAT restart)")
+db.upsert("settings", {"name": "llm.available_models", "value": "{}"})
+
+print(" - done!")
diff --git a/processors/machine_learning/llm_prompter.py b/processors/machine_learning/llm_prompter.py
index c2bd0d02e..5b4080e76 100644
--- a/processors/machine_learning/llm_prompter.py
+++ b/processors/machine_learning/llm_prompter.py
@@ -5,6 +5,8 @@
import re
import time
import json
+from itertools import chain
+
import jsonschema
import requests
@@ -16,7 +18,7 @@
from common.lib.item_mapping import MappedItem
from common.lib.exceptions import ProcessorInterruptedException, QueryParametersException, QueryNeedsExplicitConfirmationException
from common.lib.helpers import UserInput, nthify, andify, remove_nuls, flatten_dict
-from common.lib.llm import LLMAdapter
+from common.lib.llm.adapter import LLMAdapter
from backend.lib.processor import BasicProcessor
class LLMPrompter(BasicProcessor):
@@ -55,46 +57,52 @@ def get_queue_id(cls, remote_id, details, dataset) -> str:
local_queue = "local_models"
if not dataset:
return local_queue
+
+ model = dataset.parameters.get("model")
+ if model.startswith("api"):
+ # API-based models have their own queue - no local resources being
+ # used so can be concurrent
+ return f"llm-api-{dataset.key}"
else:
- if dataset.parameters.get('api_or_local', 'api') in ["local", "hosted"]:
- # Hosted models also go in the local queue since they use the same shared LLM server
- return local_queue
-
- # Queue per model/API type
- return f"{cls.type}-{dataset.parameters.get('api_or_local', 'api')}-{dataset.parameters.get('api_model', 'none')}"
+ # use the model URL as the queue ID (extracted from the model
+ # global ID)
+ # this is not fool-proof, but does mean not more than one dataset
+ # runs per API server - in the scenario of these running locally,
+ # it means things do not run concurrently (which is good)
+ return f"llm-local-{dataset.parameters.get('model').split('-')[1]}"
+
+ @classmethod
+ def get_model_library(cls, config):
+ available_models = config.get("llm.available_models", {})
+ enabled_model_ids = config.get("llm.enabled_models", [])
+ providers = config.get("llm.providers", {})
+ if not config.get("llm.access"):
+ enabled_model_ids = [_ for _ in enabled_model_ids if _.startswith("api-")]
+
+ models_option = {}
+ for key, value in {k: v for k, v in available_models.items() if k in enabled_model_ids}.items():
+ provider = providers[value["provider"]]
+ if provider["name"] not in models_option:
+ models_option[provider["name"]] = {}
+
+ models_option[provider["name"]][key] = value["name"]
+
+ return models_option
@classmethod
def get_options(cls, parent_dataset=None, config=None) -> dict:
# Check if 4CAT wide LLM server is available
- if config.get("llm.access", False) and config.get("llm.server", ""):
- shared_llm_name = config.get("llm.host_name", "4CAT LLM Server")
- shared_llm_models = {model: model_metadata.get("name") for model, model_metadata in config.get("llm.available_models", {}).items()}
- shared_llm_default = list(shared_llm_models.keys())[0] if shared_llm_models else ""
- else:
- shared_llm_name = False
- shared_llm_default = ""
- shared_llm_models = {}
+ models = cls.get_model_library(config)
# Determine if the parent dataset is a media archive (zip with images/video/audio)
is_media_parent = False
media_type = "media"
- hosted_and_local_available = True
if parent_dataset:
parent_extension = parent_dataset.get_extension()
parent_media_type = parent_dataset.get_media_type()
if parent_extension == "zip" and parent_media_type in ("image", "video", "audio"):
is_media_parent = True
media_type = parent_media_type
- if parent_media_type in ("video", "audio"):
- # Ollama and LM Studio currently only support text and image
- hosted_and_local_available = False
-
- # Add additional sources for LLM Models
- api_or_local_options = {"api": "API"}
- if hosted_and_local_available:
- api_or_local_options["local"] = "Local"
- if shared_llm_name:
- api_or_local_options["hosted"] = shared_llm_name
options = {
"ethics_warning1": {
@@ -102,21 +110,12 @@ def get_options(cls, parent_dataset=None, config=None) -> dict:
"help": "Always test your prompt on a sample of rows, for instance by first using the "
"Random filter processor.",
},
- "api_or_local": {
- "type": UserInput.OPTION_CHOICE,
- "help": "Local or API",
- "options": api_or_local_options,
- "default": "api" if not shared_llm_name else "hosted",
- "tooltip": "You can use 'local' models through Ollama and LM Studio as long as you have a valid "
- "and accessible URL through which the model can be reached.",
- },
- "api_model": {
+ "model": {
"type": UserInput.OPTION_CHOICE,
"help": "API model",
- "options": LLMAdapter.get_model_options(config),
+ "options": models,
"default": "none",
"tooltip": "Select from the predefined model list or insert manually",
- "requires": "api_or_local==api",
},
"api_key": {
"type": UserInput.OPTION_TEXT,
@@ -124,282 +123,194 @@ def get_options(cls, parent_dataset=None, config=None) -> dict:
"help": "API key",
"tooltip": "Create an API key on the LLM provider's website (e.g. https://admin.mistral.ai/organization"
"/api-keys). Note that this often involves billing.",
- "requires": "api_or_local==api",
- "sensitive": True,
- },
- "api_custom_model_provider": {
- "type": UserInput.OPTION_CHOICE,
- "help": "Model provider",
- "requires": "api_model==custom",
- "options": LLMAdapter.get_model_providers(config),
- "tooltip": "API provider. Currently limited to this list.",
- },
- "api_custom_model_id": {
- "type": UserInput.OPTION_TEXT,
- "help": "Model ID",
- "requires": "api_model==custom",
- "tooltip": "E.g. 'mistral-small-2503'. Check the API provider's documentation on what model ID to use. "
- "Fine-tuned models often require more info; OpenAI for instance requires the following "
- "format: ft:[modelname]:[org_id]:[custom_suffix]:",
- "default": "",
- },
- "local_info": {
- "type": UserInput.OPTION_INFO,
- "requires": "api_or_local==local",
- "help": "You can use local LLMs with LM Studio, Ollama, and vLLM. These applications need to be reachable by "
- "this 4CAT server, e.g. by running them on the same machine. For LM Studio and vLLM, "
- "use the Base URL to interface with any OpenAI-like API endpoint.",
- },
- "local_provider": {
- "type": UserInput.OPTION_CHOICE,
- "requires": "api_or_local==local",
- "options": {
- "none": "",
- "lmstudio": "LM Studio",
- "ollama": "Ollama",
- "vllm": "vLLM",
- },
- "default": "none",
- "help": "Local LLM provider",
- },
- "lmstudio-info": {
- "type": UserInput.OPTION_INFO,
- "requires": "local_provider==lmstudio",
- "help": "LM Studio is a desktop application to chat with LLMs, but that you can also run as a local "
- "server. See [this link for intructions on how to run LM Studio as a server](https://lmstudio.ai/docs/"
- "app/api). When the server is running, the endpoint is shown in the 'Developer' tab on the top "
- "right (default: `http://localhost:1234/v1` or `http://host.docker.internal:1234/v1` in Docker). "
- "4CAT will use the top-most model you have loaded. ",
- },
- "ollama-info": {
- "type": UserInput.OPTION_INFO,
- "requires": "local_provider==ollama",
- "help": "Ollama is a simple command-line application that lets you interface with a range of open-"
- "source LLMs and that you can run as a local server. See [this link]"
- "(https://github.com/ollama/ollama/blob/main/README.md#quickstart) for instructions.",
- },
- "vllm-info": {
- "type": UserInput.OPTION_INFO,
- "requires": "local_provider==ollama",
- "help": "[vLLM](https://docs.vllm.ai/en/latest/getting_started/quickstart/) is a framework for Linux "
- "systems capable of fast inference with a single LLM. Communication is done through an "
- "OpenAI-like API endpoint. Just change the base URL below and insert an optional API key.",
- },
- "local_base_url": {
- "type": UserInput.OPTION_TEXT,
- "requires": "api_or_local==local",
- "default": "",
- "help": "Base URL",
- "tooltip": "[optional] Leaving this empty will use default values (`http://localhost:1234/v1` or `http://host.docker.internal:1234/v1` for LM "
- "Studio, `http://localhost:11434` or `http://host.docker.internal:11434` for Ollama, `http://localhost:8000` or `http://host.docker.internal:8000` for vLLM ).",
- },
- "lmstudio_api_key": {
- "type": UserInput.OPTION_TEXT,
- "default": "",
- "help": "LM Studio API key",
- "tooltip": "[optional] Uses `lm-studio` by default.",
- "requires": "local_provider==lmstudio",
+ "requires": "model^=api",
"sensitive": True,
- },
- "vllm_api_key": {
- "type": UserInput.OPTION_TEXT,
- "default": "",
- "help": "vLLM API key",
- "tooltip": "[optional] Empty by default.",
- "requires": "local_provider==vllm",
- "sensitive": True,
- },
- "ollama_model": {
- "type": UserInput.OPTION_TEXT,
- "requires": "local_provider==ollama",
- "default": "",
- "help": "Ollama model name",
- "tooltip": "[required] for example 'llama3.2'",
- },
- "hosted_llm_model": {
- "type": UserInput.OPTION_CHOICE,
- "help": "LLM model",
- "options": shared_llm_models,
- "default": shared_llm_default,
- "requires": "api_or_local==hosted",
- },
+ }
}
if is_media_parent:
# Media-specific options: show info about media files being attached
- options["media_info"] = {
- "type": UserInput.OPTION_INFO,
- "help": f"The parent dataset contains {media_type} files that will be sent "
- f"to the LLM with each prompt. Make sure to use a model that supports "
- f"{media_type} input (e.g. vision models for images).
"
- f"Not all models support all media types. If the model cannot process "
- f"{media_type} files, an error will be returned during processing.",
- }
- options["system_prompt"] = {
- "type": UserInput.OPTION_TEXT_LARGE,
- "help": "System prompt",
- "tooltip": "[optional] A system prompt can be used to give the LLM general instructions, for instance "
- "on the tone of the text. This processor may edit the system prompt to "
- "ensure correct output. System prompts are included in the results file.",
- "default": "",
- }
- options["prompt"] = {
- "type": UserInput.OPTION_TEXT_LARGE,
- "help": "User prompt",
- "tooltip": f"Describe what the model should do with each {media_type} file. "
- f"No column brackets needed — {media_type} files are attached automatically.",
- "default": "",
- }
+ options.update({
+ "media_info": {
+ "type": UserInput.OPTION_INFO,
+ "help": f"The parent dataset contains {media_type} files that will be sent "
+ f"to the LLM with each prompt. Make sure to use a model that supports "
+ f"{media_type} input (e.g. vision models for images).
"
+ f"Not all models support all media types. If the model cannot process "
+ f"{media_type} files, an error will be returned during processing.",
+ },
+ "system_prompt": {
+ "type": UserInput.OPTION_TEXT_LARGE,
+ "help": "System prompt",
+ "tooltip": "[optional] A system prompt can be used to give the LLM general instructions, for instance "
+ "on the tone of the text. This processor may edit the system prompt to "
+ "ensure correct output. System prompts are included in the results file.",
+ "default": "",
+ },
+ "prompt": {
+ "type": UserInput.OPTION_TEXT_LARGE,
+ "help": "User prompt",
+ "tooltip": f"Describe what the model should do with each {media_type} file. "
+ f"No column brackets needed — {media_type} files are attached automatically.",
+ "default": "",
+ }
+ })
+
else:
- # Text-based dataset options: column brackets, media URL toggle, batching
- options["prompt_info"] = {
+ options.update({
+ # Text-based dataset options: column brackets, media URL toggle, batching
+ "prompt_info": {
+ "type": UserInput.OPTION_INFO,
+ "help": "How to prompt
"
+ "Use `[brackets]` with column names to insert dataset items in the prompt. You "
+ "can place column brackets in different parts of the prompt or use multiple column names within"
+ ' a single column bracket to merge items.
Example 1: "Describe the topic '
+ 'of this social media post in max. 3 words: `[body, tags]`"
Example 2: '
+ "\"Given the following hashtags: `[tags]`, answer whether they are 'related' or 'unrelated' "
+ 'to the following text: `[body]`"
Prompting is a delicate art. See '
+ "processor references on best prompting practices.
For predefined research prompts, see "
+ "e.g. [Prompt Compass](https://github.com/ErikBorra/PromptCompass/blob/main/prompts.json#L136) "
+ "or the [Anthropic Prompt Library](https://docs.anthropic.com/en/resources/prompt-library/"
+ "library).",
+ },
+ "system_prompt": {
+ "type": UserInput.OPTION_TEXT_LARGE,
+ "help": "System prompt",
+ "tooltip": "[optional] A system prompt can be used to give the LLM general instructions, for instance "
+ "on the tone of the text. This processor may edit the system prompt to "
+ "ensure correct output. System prompts are included in the results file.",
+ "default": "",
+ },
+ "prompt": {
+ "type": UserInput.OPTION_TEXT_LARGE,
+ "help": "User prompt",
+ "tooltip": "Use [brackets] with columns names.",
+ "default": "",
+ },
+ "use_media": {
+ "type": UserInput.OPTION_TOGGLE,
+ "help": "Add images",
+ "tooltip": "Add media URLs for multi-modal processing. Requires a model that supports vision.",
+ "default": False,
+ },
+ "media_columns": {
+ "type": UserInput.OPTION_TEXT,
+ "help": "Columns with image URL(s)",
+ "default": "",
+ "inline": True,
+ "tooltip": "Multiple columns can be selected.",
+ "requires": "use_media==true",
+ }
+ })
+
+ # Common options for both text and media datasets
+ options.update({
+ "structured_output": {
+ "type": UserInput.OPTION_TOGGLE,
+ "help": "Output structured JSON",
+ "tooltip": "Output in a JSON format instead of text. Note that your chosen model may not support "
+ "structured output.",
+ "default": False,
+ },
+ "json_schema_info": {
"type": UserInput.OPTION_INFO,
- "help": "How to prompt
"
- "Use `[brackets]` with column names to insert dataset items in the prompt. You "
- "can place column brackets in different parts of the prompt or use multiple column names within"
- ' a single column bracket to merge items.
Example 1: "Describe the topic '
- 'of this social media post in max. 3 words: `[body, tags]`"
Example 2: '
- "\"Given the following hashtags: `[tags]`, answer whether they are 'related' or 'unrelated' "
- 'to the following text: `[body]`"
Prompting is a delicate art. See '
- "processor references on best prompting practices.
For predefined research prompts, see "
- "e.g. [Prompt Compass](https://github.com/ErikBorra/PromptCompass/blob/main/prompts.json#L136) "
- "or the [Anthropic Prompt Library](https://docs.anthropic.com/en/resources/prompt-library/"
- "library).",
- }
- options["system_prompt"] = {
- "type": UserInput.OPTION_TEXT_LARGE,
- "help": "System prompt",
- "tooltip": "[optional] A system prompt can be used to give the LLM general instructions, for instance "
- "on the tone of the text. This processor may edit the system prompt to "
- "ensure correct output. System prompts are included in the results file.",
- "default": "",
- }
- options["prompt"] = {
+ "help": "Insert a JSON Schema for structured outputs. These define the output that "
+ "the LLM will adhere to. [See instructions and examples on how to write a JSON Schema]"
+ "(https://json-schema.org/learn/miscellaneous-examples) and [OpenAI's documentation]"
+ "(https://platform.openai.com/docs/guides/structured-outputs?api-mode=chat#supported-schemas).",
+ "requires": "structured_output==true",
+ },
+ "json_schema": {
"type": UserInput.OPTION_TEXT_LARGE,
- "help": "User prompt",
- "tooltip": "Use [brackets] with columns names.",
+ "help": "JSON schema",
+ "tooltip": "[required] A JSON schema that the structured output will adhere to",
+ "requires": "structured_output==true",
"default": "",
- }
- options["use_media"] = {
- "type": UserInput.OPTION_TOGGLE,
- "help": "Add images",
- "tooltip": "Add media URLs for multi-modal processing. Requires a model that supports vision.",
- "default": False,
- }
- options["media_columns"] = {
+ },
+ "temperature": {
"type": UserInput.OPTION_TEXT,
- "help": "Columns with image URL(s)",
- "default": "",
- "inline": True,
- "tooltip": "Multiple columns can be selected.",
- "requires": "use_media==true",
+ "help": "Temperature",
+ "default": 0.1,
+ "coerce_type": float,
+ "max": 2.0,
+ "tooltip": "Temperature indicates how strict the model will gravitate towards the most "
+ "probable next token. A score close to 0 returns more predictable "
+ "outputs while a score close to 1 leads to more creative outputs. Not supported by all models.",
}
-
- # Common options for both text and media datasets
- options["structured_output"] = {
- "type": UserInput.OPTION_TOGGLE,
- "help": "Output structured JSON",
- "tooltip": "Output in a JSON format instead of text. Note that your chosen model may not support "
- "structured output.",
- "default": False,
- }
- options["json_schema_info"] = {
- "type": UserInput.OPTION_INFO,
- "help": "Insert a JSON Schema for structured outputs. These define the output that "
- "the LLM will adhere to. [See instructions and examples on how to write a JSON Schema]"
- "(https://json-schema.org/learn/miscellaneous-examples) and [OpenAI's documentation]"
- "(https://platform.openai.com/docs/guides/structured-outputs?api-mode=chat#supported-schemas).",
- "requires": "structured_output==true",
- }
- options["json_schema"] = {
- "type": UserInput.OPTION_TEXT_LARGE,
- "help": "JSON schema",
- "tooltip": "[required] A JSON schema that the structured output will adhere to",
- "requires": "structured_output==true",
- "default": "",
- }
- options["temperature"] = {
- "type": UserInput.OPTION_TEXT,
- "help": "Temperature",
- "default": 0.1,
- "coerce_type": float,
- "max": 2.0,
- "tooltip": "Temperature indicates how strict the model will gravitate towards the most "
- "probable next token. A score close to 0 returns more predictable "
- "outputs while a score close to 1 leads to more creative outputs. Not supported by all models.",
- }
+ })
if not is_media_parent:
- options["truncate_input"] = {
+ options.update({
+ "truncate_input": {
+ "type": UserInput.OPTION_TEXT,
+ "help": "Max chars in input value",
+ "default": 0,
+ "coerce_type": int,
+ "tooltip": "This value determines how many characters an inserted dataset value may have. 0 = unlimited.",
+ "requires": "use_media==false",
+ },
+ "max_tokens": {
+ "type": UserInput.OPTION_TEXT,
+ "help": "Max output tokens",
+ "default": 10000,
+ "coerce_type": int,
+ "tooltip": "As a rule of thumb, one token generally corresponds to ~4 characters of "
+ "text for common English text. This includes tokens spent for reasoning.",
+ },
+ "batches": {
+ "type": UserInput.OPTION_TEXT,
+ "help": "Items per prompt",
+ "coerce_type": int,
+ "default": 1,
+ "tooltip": "How many dataset items to insert into the prompt. These will be inserted as a list "
+ "wherever the column brackets are used (e.g. '[body]').",
+ "requires": "use_media==false",
+ },
+ "batch_info": {
+ "type": UserInput.OPTION_INFO,
+ "help": "Note on batching: Batching may increase speed but reduce accuracy. Models "
+ "need to support structured output for batching. This processor uses JSON schemas to ensure "
+ "symmetry between input and output lengths, but models may struggle to match input and output "
+ "values. Describe the dataset values in plurals in your prompt when batching. If you use "
+ "multiple column brackets in your prompt, rows with any empty values are skipped.",
+ "requires": "use_media==false",
+ }
+ })
+
+ options.update({
+ "ethics_warning3": {
+ "type": UserInput.OPTION_INFO,
+ "requires": "model^=api-",
+ "help": "When using LLMs through commercial parties, always consider anonymising your data and "
+ "whether local open-source LLMs are also an option.",
+ },
+ "save_annotations": {
+ "type": UserInput.OPTION_ANNOTATION,
+ "label": "prompt outputs",
+ "default": False,
+ },
+ "hide_think": {
+ "type": UserInput.OPTION_TOGGLE,
+ "help": "Hide reasoning",
+ "default": False,
+ "tooltip": "Some models include reasoning in their output, between
@@ -29,7 +56,7 @@
{% endif %} {% elif settings.type == "date" %} - + {% if "tooltip" in settings %} @@ -55,7 +82,7 @@ {% endif %} {% elif settings.type in ("json", "textarea") %} + placeholder="{{ settings.tooltip }}" data-default="{{ settings.original_default }}">{{ settings.default }} {% if "tooltip" in settings %}@@ -63,7 +90,7 @@
{% endif %} {% elif settings.type == "choice" %} -