From 6c27094fc658f42cb6745afb220d842c633ba04f Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Thu, 5 Mar 2026 15:43:58 +0100 Subject: [PATCH 01/44] add ollama to docker-compose --- docker-compose_ollama.yml | 54 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 docker-compose_ollama.yml diff --git a/docker-compose_ollama.yml b/docker-compose_ollama.yml new file mode 100644 index 000000000..020b12c96 --- /dev/null +++ b/docker-compose_ollama.yml @@ -0,0 +1,54 @@ +# Use this file as an override to add a local Ollama instance to your 4CAT stack. +# +# Usage: +# docker compose -f docker-compose.yml -f docker-compose_ollama.yml up -d +# +# Once running, configure 4CAT via the Control Panel → Settings → LLM: +# LLM Provider Type : ollama +# LLM Server URL : http://ollama:11434 +# +# GPU support (NVIDIA): +# Uncomment the `deploy.resources` block in the ollama service below and +# ensure the NVIDIA Container Toolkit is installed on your host. +# See: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html +# +# GPU support (Apple Silicon / AMD): +# Pass the appropriate device through your host's Docker settings instead. +# Ollama will automatically detect the GPU when it is available inside the container. + +services: + ollama: + image: ollama/ollama:latest + container_name: 4cat_ollama + restart: unless-stopped + volumes: + - 4cat_ollama:/root/.ollama + # Expose the Ollama API on the host for optional external access or + # management with the Ollama CLI. Remove this block if you want to keep + # Ollama accessible only within the Docker network. + ports: + - "127.0.0.1:11434:11434" + healthcheck: + test: ["CMD", "ollama", "ls"] + interval: 10s + timeout: 5s + retries: 5 + # --- NVIDIA GPU support (uncomment to enable) --- + # deploy: + # resources: + # reservations: + # devices: + # - driver: nvidia + # count: all + # capabilities: [gpu] + + # Make the 4CAT backend wait for Ollama to be healthy before starting. + # This prevents initial model-refresh failures on first boot. + backend: + depends_on: + ollama: + condition: service_healthy + +volumes: + 4cat_ollama: + name: 4cat_ollama_data From 8a8427cd5a1bfc686189498b29ccba5d7c9a59d6 Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Thu, 5 Mar 2026 15:44:39 +0100 Subject: [PATCH 02/44] give me a proper worker who can do neat stuff. --- backend/workers/ollama_manager.py | 171 ++++++++++++++++++++++++++++++ backend/workers/refresh_items.py | 55 +--------- 2 files changed, 173 insertions(+), 53 deletions(-) create mode 100644 backend/workers/ollama_manager.py diff --git a/backend/workers/ollama_manager.py b/backend/workers/ollama_manager.py new file mode 100644 index 000000000..d990b8b14 --- /dev/null +++ b/backend/workers/ollama_manager.py @@ -0,0 +1,171 @@ +""" +Manage Ollama LLM models +""" +import json +import time + +import requests + +from backend.lib.worker import BasicWorker + + +class OllamaManager(BasicWorker): + """ + Manage Ollama LLM models + + Periodically refreshes the list of available models from an Ollama server. + Can also pull or delete models on demand when queued with a specific task. + + Job details: + - task: "refresh" (default), "pull", or "delete" + + Job remote_id: + - For refresh: "manage-ollama-refresh" (periodic) or "manage-ollama-manual" (on-demand) + - For pull/delete: the model name to pull or delete + """ + type = "manage-ollama" + max_workers = 1 + + @classmethod + def ensure_job(cls, config=None): + """ + Ensure the daily refresh job is always scheduled + + :return: Job parameters for the worker + """ + return {"remote_id": "manage-ollama-refresh", "interval": 86400} + + def work(self): + task = self.job.details.get("task", "refresh") if self.job.details else "refresh" + model_name = self.job.data["remote_id"] + + if task == "refresh": + self.refresh_models() + elif task == "pull": + success = self.pull_model(model_name) + if success: + self.refresh_models() + elif task == "delete": + success = self.delete_model(model_name) + if success: + self.refresh_models() + else: + self.log.warning(f"OllamaManager: unknown task '{task}'") + + self.job.finish() + + def _get_llm_headers(self): + """Build request headers for LLM server auth.""" + headers = {"Content-Type": "application/json"} + llm_api_key = self.config.get("llm.api_key", "") + llm_auth_type = self.config.get("llm.auth_type", "") + if llm_api_key and llm_auth_type: + headers[llm_auth_type] = llm_api_key + return headers + + def refresh_models(self): + """ + Query the Ollama server for available models and update llm.available_models. + """ + llm_server = self.config.get("llm.server", "") + if not llm_server: + return + + headers = self._get_llm_headers() + available_models = {} + + try: + response = requests.get(f"{llm_server}/api/tags", headers=headers, timeout=10) + if response.status_code != 200: + self.log.warning(f"OllamaManager: could not refresh model list - server returned {response.status_code}") + return + + for model in response.json().get("models", []): + model_id = model["name"] + try: + meta = requests.post( + f"{llm_server}/api/show", + headers=headers, + json={"model": model_id}, + timeout=10 + ).json() + display_name = ( + f"{meta['model_info']['general.basename']}" + f" ({meta['details']['parameter_size']} parameters)" + ) + except (requests.RequestException, json.JSONDecodeError, KeyError) as e: + self.log.debug(f"OllamaManager: could not get metadata for {model_id} (error: {e}), using name only") + display_name = model_id + + available_models[model_id] = { + "name": display_name, + "model_card": f"https://ollama.com/library/{model_id.split(':')[0]}", + "provider": "local" + } + + self.config.set("llm.available_models", available_models) + self.log.debug(f"OllamaManager: refreshed model list ({len(available_models)} models)") + + except requests.RequestException as e: + self.log.warning(f"OllamaManager: could not refresh model list - request error: {e}") + + def pull_model(self, model_name): + """ + Pull a model from the Ollama registry. + + :param str model_name: Model name (e.g. "llama3:8b") + :return bool: True on success + """ + llm_server = self.config.get("llm.server", "") + if not llm_server: + self.log.warning("OllamaManager: cannot pull model - no LLM server configured") + return False + + headers = self._get_llm_headers() + try: + # stream=False waits for the pull to complete before returning + response = requests.post( + f"{llm_server}/api/pull", + headers=headers, + json={"model": model_name, "stream": False}, + timeout=600 + ) + if response.status_code == 200: + self.log.info(f"OllamaManager: successfully pulled model '{model_name}'") + return True + else: + self.log.warning(f"OllamaManager: could not pull model '{model_name}' - server returned {response.status_code}") + return False + except requests.RequestException as e: + self.log.warning(f"OllamaManager: could not pull model '{model_name}' - request error: {e}") + return False + + def delete_model(self, model_name): + """ + Delete a model from the Ollama server. + + :param str model_name: Model name (e.g. "llama3:8b") + :return bool: True on success + """ + llm_server = self.config.get("llm.server", "") + if not llm_server: + self.log.warning("OllamaManager: cannot delete model - no LLM server configured") + return False + + headers = self._get_llm_headers() + try: + response = requests.delete( + f"{llm_server}/api/delete", + headers=headers, + json={"model": model_name}, + timeout=30 + ) + if response.status_code == 200: + self.log.info(f"OllamaManager: successfully deleted model '{model_name}'") + return True + else: + self.log.warning(f"OllamaManager: could not delete model '{model_name}' - server returned {response.status_code}") + return False + except requests.RequestException as e: + self.log.warning(f"OllamaManager: could not delete model '{model_name}' - request error: {e}") + return False diff --git a/backend/workers/refresh_items.py b/backend/workers/refresh_items.py index 28eb73637..96a7da6b0 100644 --- a/backend/workers/refresh_items.py +++ b/backend/workers/refresh_items.py @@ -1,17 +1,14 @@ """ Refresh items """ -import json - -import requests - from backend.lib.worker import BasicWorker class ItemUpdater(BasicWorker): """ Refresh 4CAT items - Refreshes settings that are dependent on external factors + Refreshes settings that are dependent on external factors. + LLM model refreshing is handled by the OllamaManager worker. """ type = "refresh-items" max_workers = 1 @@ -21,58 +18,10 @@ def ensure_job(cls, config=None): """ Ensure that the refresher is always running - This is used to ensure that the refresher is always running, and if it is - not, it will be started by the WorkerManager. - :return: Job parameters for the worker """ return {"remote_id": "refresh-items", "interval": 60} def work(self): - # Refresh items - self.refresh_settings() - self.job.finish() - - def refresh_settings(self): - """ - Refresh settings - """ - # LLM server settings - llm_provider = self.config.get("llm.provider_type", "none").lower() - llm_server = self.config.get("llm.server", "") - - # For now we only support the Ollama API - if llm_provider == "ollama" and llm_server: - headers = {"Content-Type": "application/json"} - llm_api_key = self.config.get("llm.api_key", "") - llm_auth_type = self.config.get("llm.auth_type", "") - if llm_api_key and llm_auth_type: - headers[llm_auth_type] = llm_api_key - - available_models = {} - try: - response = requests.get(f"{llm_server}/api/tags", headers=headers, timeout=10) - if response.status_code == 200: - settings = response.json() - for model in settings.get("models", []): - model = model["name"] - try: - model_metadata = requests.post(f"{llm_server}/api/show", headers=headers, json={"model": model}, timeout=10).json() - available_models[model] = { - "name": f"{model_metadata['model_info']['general.basename']} ({model_metadata['details']['parameter_size']} parameters)", - "model_card": f"https://ollama.com/library/{model}", - "provider": "local" - } - - except (requests.RequestException, json.JSONDecodeError, KeyError) as e: - self.log.debug(f"Could not get metadata for model {model} from Ollama - skipping (error: {e})") - - self.config.set("llm.available_models", available_models) - self.log.debug("Refreshed LLM server settings cache") - else: - self.log.warning(f"Could not refresh LLM server settings cache - server returned status code {response.status_code}") - - except requests.RequestException as e: - self.log.warning(f"Could not refresh LLM server settings cache - request error: {str(e)}") \ No newline at end of file From 89824e2d475e2ac043c2f198efb31db54bcaa743 Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Thu, 5 Mar 2026 15:53:21 +0100 Subject: [PATCH 03/44] ruff you mean --- backend/workers/ollama_manager.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/backend/workers/ollama_manager.py b/backend/workers/ollama_manager.py index d990b8b14..64808f52d 100644 --- a/backend/workers/ollama_manager.py +++ b/backend/workers/ollama_manager.py @@ -2,8 +2,6 @@ Manage Ollama LLM models """ import json -import time - import requests from backend.lib.worker import BasicWorker From e7aa9af35831ef719b5ae45939b94dba5c28fc17 Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Thu, 5 Mar 2026 16:36:00 +0100 Subject: [PATCH 04/44] add docker setup if ollama present --- docker/docker_setup.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/docker/docker_setup.py b/docker/docker_setup.py index 450684602..aea641c12 100644 --- a/docker/docker_setup.py +++ b/docker/docker_setup.py @@ -207,6 +207,35 @@ def _format_host(host: str) -> str: f"docker exec 4cat_backend python -c \"from common.config_manager import ConfigManager;config=ConfigManager();config.with_db();config.set('flask.server_name', '{formatted_host}:{public_port}');config.db.commit();\"" ) + # If an Ollama container is available on the Docker network, configure 4CAT to use it. + ollama_url = 'http://ollama:11434' + try: + import requests + try: + resp = requests.get(f"{ollama_url}/api/tags", timeout=2) + if resp.status_code == 200: + current_llm_server = config.get("llm.server") + if current_llm_server == ollama_url: + print("Ollama server already configured in 4CAT settings.") + elif current_llm_server and current_llm_server != ollama_url: + # Previously configured LLM server is different; log a warning but do not overwrite user settings + print(f"Warning: Detected Ollama server at {ollama_url} but llm.server is set to {current_llm_server}. To use the Ollama server, update the llm.server setting to {ollama_url} in the 4CAT Control Panel.") + else: + # set basic LLM settings so the initial admin user does not need to + # configure them manually for local development environments that + # include the Ollama sidecar. + config.set('llm.provider_type', 'ollama') + config.set('llm.server', ollama_url) + config.set('llm.access', True) + config.db.commit() + print('Detected Ollama on Docker network; configured LLM settings to use it.') + except requests.RequestException: + # Ollama not available; do nothing + pass + except Exception: + # requests other error; skip automatic Ollama configuration + pass + print(f"\nStarting app\n" f"4CAT is accessible at:\n" f"{'https' if config.get('flask.https', False) else 'http'}://{config.get('flask.server_name')}\n") From 74e01b6263425f161e109d4a55e71bcacd659ee2 Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Thu, 5 Mar 2026 16:36:33 +0100 Subject: [PATCH 05/44] a useful frontend setting panel --- common/lib/config_definition.py | 10 +- webtool/__init__.py | 2 + webtool/templates/controlpanel/layout.html | 2 + .../templates/controlpanel/llm-server.html | 147 ++++++++++++++++++ webtool/views/views_llm.py | 104 +++++++++++++ 5 files changed, 264 insertions(+), 1 deletion(-) create mode 100644 webtool/templates/controlpanel/llm-server.html create mode 100644 webtool/views/views_llm.py diff --git a/common/lib/config_definition.py b/common/lib/config_definition.py index aef363e04..daf20020d 100644 --- a/common/lib/config_definition.py +++ b/common/lib/config_definition.py @@ -621,7 +621,15 @@ "type": UserInput.OPTION_TEXT_JSON, "default": {}, "help": "Available LLM models", - "tooltip": "A JSON dictionary of available LLM models on the server. 4CAT will query the LLM server for available models periodically.", + "tooltip": "A JSON dictionary of available LLM models on the server. Refreshed daily by the OllamaManager worker.", + "indirect": True, + "global": True + }, + "llm.enabled_models": { + "type": UserInput.OPTION_TEXT_JSON, + "default": [], + "help": "Enabled LLM models", + "tooltip": "List of model keys enabled for use. Managed via the LLM Server settings panel.", "indirect": True, "global": True }, diff --git a/webtool/__init__.py b/webtool/__init__.py index 54ac2072c..e807b7ae0 100644 --- a/webtool/__init__.py +++ b/webtool/__init__.py @@ -171,6 +171,7 @@ def time_this(func): import webtool.views.views_restart # noqa: E402 import webtool.views.views_admin # noqa: E402 import webtool.views.views_extensions # noqa: E402 + import webtool.views.views_llm # noqa: E402 import webtool.views.views_user # noqa: E402 import webtool.views.views_dataset # noqa: E402 import webtool.views.views_misc # noqa: E402 @@ -181,6 +182,7 @@ def time_this(func): app.register_blueprint(webtool.views.views_restart.component) app.register_blueprint(webtool.views.views_admin.component) app.register_blueprint(webtool.views.views_extensions.component) + app.register_blueprint(webtool.views.views_llm.component) app.register_blueprint(webtool.views.views_user.component) app.register_blueprint(webtool.views.views_dataset.component) app.register_blueprint(webtool.views.views_misc.component) diff --git a/webtool/templates/controlpanel/layout.html b/webtool/templates/controlpanel/layout.html index b0b0bc0b4..98d87220a 100644 --- a/webtool/templates/controlpanel/layout.html +++ b/webtool/templates/controlpanel/layout.html @@ -18,6 +18,8 @@ Jobs{% endif %} {% if __user_config("privileges.admin.can_restart") %} Extensions{% endif %} + {% if __user_config("privileges.admin.can_manage_settings") and __user_config("llm.access") %} + LLM Server{% endif %} {% if __user_config("privileges.admin.can_manage_users") %} View logs{% endif %} {% if __user_config("privileges.admin.can_manipulate_all_datasets") %} diff --git a/webtool/templates/controlpanel/llm-server.html b/webtool/templates/controlpanel/llm-server.html new file mode 100644 index 000000000..d8a7dbd53 --- /dev/null +++ b/webtool/templates/controlpanel/llm-server.html @@ -0,0 +1,147 @@ +{% extends "controlpanel/layout.html" %} + +{% block title %}LLM Server{% endblock %} +{% block body_class %}plain-page admin {{ body_class }}{% endblock %} +{% block subbreadcrumbs %}{% set navigation.sub = "llm" %}{% endblock %} + +{% block body %} +
+
+

LLM Server

+ + {% if flashes %} +
+ {% for notice in flashes %} +

{{ notice|safe }}

+ {% endfor %} +
+ {% endif %} + + {# Server status #} +
+ + + + + + + + + + + + + +
SettingValue
Server URL{% if llm_server %}{{ llm_server }}{% else %}Not configured{% endif %}
Status + {% if server_status == "online" %} + Online + {% elif server_status == "not configured" %} + Not configured + {% else %} + {{ server_status }} + {% endif %} +
+
+ + {# Available models #} +

Available Models +
+ + +
+

+ +
+ + + + + + + + + + + + + + {% if available_models %} + {% for model_id, model in available_models.items() %} + + + + + + + {% endfor %} + {% else %} + + + + {% endif %} +
ModelDisplay nameStatusActions
+ {{ model_id }} + + {% if model.model_card %} + {{ model.name }} + {% else %} + {{ model.name }} + {% endif %} + + {% if model_id in enabled_models %} + Enabled + {% else %} + Disabled + {% endif %} + + {% if model_id in enabled_models %} +
+ + + +
+ {% else %} +
+ + + +
+ {% endif %} +
+ + + +
+
+ {% if llm_server %} + No models found. Use the Refresh button to fetch available models, or pull a new model below. + {% else %} + Configure the LLM server URL in Settings first. + {% endif %} +
+
+
+ + {# Pull a new model #} + {% if llm_server %} +
+

Pull Model

+

Enter an Ollama model name (e.g. llama3:8b) to pull it from the + Ollama library. + Pulling large models may take several minutes; the job runs in the background.

+
+ +
+ + +
+
+ +
+
+
+ {% endif %} +
+{% endblock %} diff --git a/webtool/views/views_llm.py b/webtool/views/views_llm.py new file mode 100644 index 000000000..41f9df472 --- /dev/null +++ b/webtool/views/views_llm.py @@ -0,0 +1,104 @@ +""" +4CAT views for LLM server management +""" +import time + +import requests + +from flask import Blueprint, render_template, flash, get_flashed_messages, redirect, url_for, request, g +from flask_login import login_required + +from webtool.lib.helpers import setting_required, error + +component = Blueprint("llm", __name__) + + +@component.route("/admin/llm/", methods=["GET", "POST"]) +@login_required +@setting_required("privileges.admin.can_manage_settings") +def llm_panel(): + """ + LLM Server management panel + + Shows server status, available models, and controls to pull/delete/refresh + models. Pull, delete, and refresh operations are queued as OllamaManager + jobs rather than run synchronously. + """ + if not g.config.get("llm.access"): + return error(403, message="LLM access is not enabled on this server.") + + if request.method == "POST": + action = request.form.get("action", "").strip() + + if action == "refresh": + # Queue a one-time manual refresh job; use a timestamp-based remote_id + # so it is always accepted even if a periodic job already exists. + g.queue.add_job("manage-ollama", details={"task": "refresh"}, + remote_id=f"manage-ollama-manual-{int(time.time())}") + flash("Model refresh job queued.") + + elif action == "pull": + model_name = request.form.get("model_name", "").strip() + if model_name: + g.queue.add_job("manage-ollama", details={"task": "pull"}, remote_id=model_name) + flash(f"Pull job queued for model '{model_name}'.") + else: + flash("Please provide a model name to pull.") + + elif action == "delete": + model_name = request.form.get("model_name", "").strip() + if model_name: + g.queue.add_job("manage-ollama", details={"task": "delete"}, remote_id=model_name) + flash(f"Delete job queued for model '{model_name}'.") + + elif action == "enable": + model_name = request.form.get("model_name", "").strip() + if model_name: + enabled_models = list(g.config.get("llm.enabled_models", []) or []) + if model_name not in enabled_models: + enabled_models.append(model_name) + g.config.set("llm.enabled_models", enabled_models) + flash(f"Model '{model_name}' enabled.") + + elif action == "disable": + model_name = request.form.get("model_name", "").strip() + if model_name: + enabled_models = list(g.config.get("llm.enabled_models", []) or []) + if model_name in enabled_models: + enabled_models.remove(model_name) + g.config.set("llm.enabled_models", enabled_models) + flash(f"Model '{model_name}' disabled.") + + return redirect(url_for("llm.llm_panel")) + + # --- GET: render panel --- + + llm_server = g.config.get("llm.server", "") + server_status = "not configured" + + if llm_server: + headers = {"Content-Type": "application/json"} + llm_api_key = g.config.get("llm.api_key", "") + llm_auth_type = g.config.get("llm.auth_type", "") + if llm_api_key and llm_auth_type: + headers[llm_auth_type] = llm_api_key + + try: + resp = requests.get(f"{llm_server}/api/tags", headers=headers, timeout=5) + server_status = "online" if resp.status_code == 200 else f"error (HTTP {resp.status_code})" + except requests.Timeout: + server_status = "unreachable (timeout)" + except requests.RequestException as e: + server_status = f"unreachable ({e})" + + available_models = g.config.get("llm.available_models", {}) or {} + enabled_models = list(g.config.get("llm.enabled_models", []) or []) + + return render_template( + "controlpanel/llm-server.html", + flashes=get_flashed_messages(), + llm_server=llm_server, + server_status=server_status, + available_models=available_models, + enabled_models=enabled_models, + ) From baec03a75257a8780796add212a9ce3826671dd6 Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Thu, 5 Mar 2026 16:50:36 +0100 Subject: [PATCH 06/44] only show enabled models --- processors/machine_learning/llm_prompter.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/processors/machine_learning/llm_prompter.py b/processors/machine_learning/llm_prompter.py index 57d8ee496..959ff8f88 100644 --- a/processors/machine_learning/llm_prompter.py +++ b/processors/machine_learning/llm_prompter.py @@ -66,9 +66,15 @@ def get_queue_id(cls, remote_id, details, dataset) -> str: def get_options(cls, parent_dataset=None, config=None) -> dict: # Check if 4CAT wide LLM server is available if config.get("llm.access", False) and config.get("llm.server", ""): - shared_llm_name = config.get("llm.host_name", "4CAT LLM Server") - shared_llm_models = {model: model_metadata.get("name") for model, model_metadata in config.get("llm.available_models", {}).items()} - shared_llm_default = list(shared_llm_models.keys())[0] if shared_llm_models else "" + # Check some models enabled + shared_llm_enabled_models = config.get("llm.enabled_models", []) + shared_llm_models = {model: model_metadata.get("name") for model, model_metadata in config.get("llm.available_models", {}).items() if model in shared_llm_enabled_models} + if not shared_llm_models: + shared_llm_name = False + shared_llm_default = "" + else: + shared_llm_name = config.get("llm.host_name", "4CAT LLM Server") + shared_llm_default = list(shared_llm_models.keys())[0] if shared_llm_models else "" else: shared_llm_name = False shared_llm_default = "" From 36fe0ed734184e1012033cb8469c2d96a9ca14a3 Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Thu, 5 Mar 2026 16:51:50 +0100 Subject: [PATCH 07/44] update docker readme so people can use ollama --- docker/README.md | 60 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/docker/README.md b/docker/README.md index 00f0862fc..de5dd318b 100644 --- a/docker/README.md +++ b/docker/README.md @@ -42,3 +42,63 @@ https://github.com/docker/buildx/issues/426 https://stackoverflow.com/questions/64221861/failed-to-resolve-with-frontend-dockerfile-v0 4. More errors coming soon! (No doubt) + +--- + +## Running a local Ollama instance alongside 4CAT + +4CAT can use a local [Ollama](https://ollama.com) server for LLM-powered processors. +A Docker Compose override file (`docker-compose_ollama.yml`) is included to add +Ollama as a sidecar service so you do not need to run it separately on the host. + +### Quick start + +```bash +docker compose -f docker-compose.yml -f docker-compose_ollama.yml up -d +``` + +This starts the standard 4CAT stack plus an `ollama` container that is only +accessible within the Docker network (and optionally on `localhost:11434` on +the host via the exposed port). + +### Configuring 4CAT to use Ollama + +1. Log in as admin and open **Control Panel → Settings**. +2. Set the following LLM fields: + + | Setting | Value | + |---|---| + | LLM Provider Type | `ollama` | + | LLM Server URL | `http://ollama:11434` | + | LLM Access | enabled | + +3. Save settings. +4. Open **Control Panel → LLM Server** (visible once *LLM Access* is enabled). +5. Use the **Refresh** button to load available models, then **Pull** a model + (e.g. `llama3.2:3b`) to download it from the Ollama library. +6. Enable the models you want to make available to users. + +### GPU support (NVIDIA) + +Uncomment the `deploy.resources` block in `docker-compose_ollama.yml` and +ensure the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) +is installed on your host. Then restart the stack with the override: + +```bash +docker compose -f docker-compose.yml -f docker-compose_ollama.yml up -d +``` + +### Persisting models + +Models downloaded by Ollama are stored in the `4cat_ollama_data` Docker volume. +They survive container restarts and re-creations unless you explicitly remove +the volume (`docker volume rm 4cat_ollama_data`). + +### Using an external Ollama server + +If you already run Ollama on the host or elsewhere, skip the override file and +point 4CAT directly at that server: + +- **On the same host**: use `http://host.docker.internal:11434` as the LLM Server URL. +- **Remote server**: use the server's reachable URL and configure any required + API key in the *LLM Server API Key* and *LLM Server Authentication Type* settings. From eb4b49a98126002100be6d16a0f879572bb1469f Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Tue, 10 Mar 2026 12:56:32 +0100 Subject: [PATCH 08/44] Cleanup: stale enabled models, refresh_items scheduling, README auto-config docs (#581) * Initial plan * Fix stale enabled models, disable refresh_items scheduling, update README docs Co-authored-by: dale-wahl <32108944+dale-wahl@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: dale-wahl <32108944+dale-wahl@users.noreply.github.com> --- backend/workers/ollama_manager.py | 8 ++++++++ backend/workers/refresh_items.py | 15 +++++++-------- docker/README.md | 15 ++++++++++++++- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/backend/workers/ollama_manager.py b/backend/workers/ollama_manager.py index 64808f52d..3c6950b59 100644 --- a/backend/workers/ollama_manager.py +++ b/backend/workers/ollama_manager.py @@ -104,6 +104,14 @@ def refresh_models(self): self.config.set("llm.available_models", available_models) self.log.debug(f"OllamaManager: refreshed model list ({len(available_models)} models)") + # Reconcile enabled models: remove any that are no longer available + enabled_models = self.config.get("llm.enabled_models", []) + reconciled = [m for m in enabled_models if m in available_models] + if len(reconciled) != len(enabled_models): + removed = set(enabled_models) - set(reconciled) + self.log.info(f"OllamaManager: removed stale enabled model(s): {', '.join(removed)}") + self.config.set("llm.enabled_models", reconciled) + except requests.RequestException as e: self.log.warning(f"OllamaManager: could not refresh model list - request error: {e}") diff --git a/backend/workers/refresh_items.py b/backend/workers/refresh_items.py index 96a7da6b0..7ab11645d 100644 --- a/backend/workers/refresh_items.py +++ b/backend/workers/refresh_items.py @@ -13,15 +13,14 @@ class ItemUpdater(BasicWorker): type = "refresh-items" max_workers = 1 - @classmethod - def ensure_job(cls, config=None): - """ - Ensure that the refresher is always running - - :return: Job parameters for the worker - """ - return {"remote_id": "refresh-items", "interval": 60} + # ensure_job is intentionally disabled: this worker currently does nothing + # and would only create unnecessary job queue churn. Re-enable when work() + # has actual tasks to perform. + # @classmethod + # def ensure_job(cls, config=None): + # return {"remote_id": "refresh-items", "interval": 60} def work(self): + # Placeholder – no tasks implemented yet. self.job.finish() \ No newline at end of file diff --git a/docker/README.md b/docker/README.md index de5dd318b..31843b2ce 100644 --- a/docker/README.md +++ b/docker/README.md @@ -63,8 +63,17 @@ the host via the exposed port). ### Configuring 4CAT to use Ollama +#### Automatic configuration (fresh Docker install with sidecar) + +When you start 4CAT for the first time using the Ollama override file, the +`docker_setup.py` initialisation script automatically detects the `ollama` +sidecar and sets **LLM Provider Type**, **LLM Server URL**, and **LLM Access** +for you. You can skip to step 2 below. + +#### Manual configuration (or to verify/change settings) + 1. Log in as admin and open **Control Panel → Settings**. -2. Set the following LLM fields: +2. Confirm or set the following LLM fields: | Setting | Value | |---|---| @@ -102,3 +111,7 @@ point 4CAT directly at that server: - **On the same host**: use `http://host.docker.internal:11434` as the LLM Server URL. - **Remote server**: use the server's reachable URL and configure any required API key in the *LLM Server API Key* and *LLM Server Authentication Type* settings. + +In both cases, configure the LLM settings manually via **Control Panel → Settings** +(see *Manual configuration* above), using the appropriate server URL instead of +`http://ollama:11434`. From 26f33f57d85ce0b7f468cfcff0e7bedb407a63ca Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Thu, 9 Apr 2026 17:57:31 +0200 Subject: [PATCH 09/44] ollama_manager: get additional info from ollama including capabilities --- backend/workers/ollama_manager.py | 112 ++++++++++++++++++++---------- 1 file changed, 74 insertions(+), 38 deletions(-) diff --git a/backend/workers/ollama_manager.py b/backend/workers/ollama_manager.py index 3c6950b59..9cfa26d73 100644 --- a/backend/workers/ollama_manager.py +++ b/backend/workers/ollama_manager.py @@ -74,46 +74,82 @@ def refresh_models(self): try: response = requests.get(f"{llm_server}/api/tags", headers=headers, timeout=10) - if response.status_code != 200: - self.log.warning(f"OllamaManager: could not refresh model list - server returned {response.status_code}") - return - - for model in response.json().get("models", []): - model_id = model["name"] - try: - meta = requests.post( - f"{llm_server}/api/show", - headers=headers, - json={"model": model_id}, - timeout=10 - ).json() - display_name = ( - f"{meta['model_info']['general.basename']}" - f" ({meta['details']['parameter_size']} parameters)" - ) - except (requests.RequestException, json.JSONDecodeError, KeyError) as e: - self.log.debug(f"OllamaManager: could not get metadata for {model_id} (error: {e}), using name only") - display_name = model_id - - available_models[model_id] = { - "name": display_name, - "model_card": f"https://ollama.com/library/{model_id.split(':')[0]}", - "provider": "local" - } - - self.config.set("llm.available_models", available_models) - self.log.debug(f"OllamaManager: refreshed model list ({len(available_models)} models)") - - # Reconcile enabled models: remove any that are no longer available - enabled_models = self.config.get("llm.enabled_models", []) - reconciled = [m for m in enabled_models if m in available_models] - if len(reconciled) != len(enabled_models): - removed = set(enabled_models) - set(reconciled) - self.log.info(f"OllamaManager: removed stale enabled model(s): {', '.join(removed)}") - self.config.set("llm.enabled_models", reconciled) - except requests.RequestException as e: self.log.warning(f"OllamaManager: could not refresh model list - request error: {e}") + return + + if response.status_code != 200: + self.log.warning(f"OllamaManager: could not refresh model list - server returned {response.status_code}") + return + + for model in response.json().get("models", []): + model_id = model["name"] + try: + meta = self.get_model_metadata(model_id) + except (requests.RequestException, json.JSONDecodeError, KeyError) as e: + self.log.debug(f"OllamaManager: could not get metadata for {model_id} (error: {e}), using name only") + meta = None + if meta: + display_name = ( + f"{meta['model_info']['general.basename']}" + f" ({meta['details']['parameter_size']} parameters)" + ) + success = True + else: + display_name = model_id + meta = {} + success = False + + available_models[model_id] = { + "name": display_name, + "model_card": f"https://ollama.com/library/{model_id.split(':')[0]}", + "provider": "local", + "metadata_success": success, + "capabilities": meta.get("capabilities", []), + "details": meta.get("details", {}), + "modified_at": meta.get("modified_at", None), + } + + self.config.set("llm.available_models", available_models) + self.log.debug(f"OllamaManager: refreshed model list ({len(available_models)} models)") + + # Reconcile enabled models: remove any that are no longer available + enabled_models = self.config.get("llm.enabled_models", []) + reconciled = [m for m in enabled_models if m in available_models] + if len(reconciled) != len(enabled_models): + removed = set(enabled_models) - set(reconciled) + self.log.info(f"OllamaManager: removed stale enabled model(s): {', '.join(removed)}") + self.config.set("llm.enabled_models", reconciled) + + + def get_model_metadata(self, model_name): + """ + Get metadata for a specific model from the Ollama server. + + :param str model_name: Model name (e.g. "llama3:8b") + :return dict or None: Metadata dict on success, None on failure + """ + llm_server = self.config.get("llm.server", "") + if not llm_server: + self.log.warning("OllamaManager: cannot get model metadata - no LLM server configured") + return None + + headers = self._get_llm_headers() + try: + response = requests.post( + f"{llm_server}/api/show", + headers=headers, + json={"model": model_name}, + timeout=10 + ) + if response.status_code == 200: + return response.json() + else: + self.log.warning(f"OllamaManager: could not get metadata for model '{model_name}' - server returned {response.status_code}") + return None + except requests.RequestException as e: + self.log.warning(f"OllamaManager: could not get metadata for model '{model_name}' - request error: {e}") + return None def pull_model(self, model_name): """ From f2501b9c60e497ce77da04948e6718a229045f61 Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Mon, 13 Apr 2026 12:31:13 +0200 Subject: [PATCH 10/44] ollama_manager: display names / ollama get your api together --- backend/workers/ollama_manager.py | 127 +++++++++++++++++++++++++++++- 1 file changed, 123 insertions(+), 4 deletions(-) diff --git a/backend/workers/ollama_manager.py b/backend/workers/ollama_manager.py index 9cfa26d73..c1f2b4572 100644 --- a/backend/workers/ollama_manager.py +++ b/backend/workers/ollama_manager.py @@ -3,6 +3,7 @@ """ import json import requests +import re from backend.lib.worker import BasicWorker @@ -61,6 +62,122 @@ def _get_llm_headers(self): headers[llm_auth_type] = llm_api_key return headers + @staticmethod + def _format_model_display_name(model_id, meta): + """ + Build a friendly display name for a model using metadata where possible. + Falls back to a sensible string derived from `model_id`. + + Dear Ollama: if you add a "display_name" field to your /api/show response, I will use it and not complain about missing metadata fields. Pretty please? :) + Because this is ridiculous. + """ + model_info = meta.get("model_info", {}) if meta else {} + details = meta.get("details", {}) if meta else {} + + # Basename preference: explicit metadata, else model id prefix + basename = None + for key in ("general.basename", "general.base_model.0.name"): + val = model_info.get(key) + if val: + basename = str(val).strip() + break + if not basename: + basename = model_id.split(":", 1)[0].replace("-", " ").replace("_", " ").strip() or model_id + + # Helpers for parsing and formatting parameter counts + def _parse_param_count(val): + if val is None: + return None + if isinstance(val, int): + return val + if isinstance(val, float): + return int(val) + s = str(val).strip() + if not s: + return None + s = s.replace(",", "") + m = re.match(r"^([0-9]+(?:\.[0-9]+)?)\s*([BbMm])$", s) + if m: + num = float(m.group(1)) + suf = m.group(2).upper() + return int(num * (1_000_000_000 if suf == "B" else 1_000_000)) + # try float / scientific + try: + return int(float(s)) + except Exception: + return None + + def _humanize(n): + if n is None: + return None + n = int(n) + if n >= 1_000_000_000: + x = n / 1_000_000_000 + s = f"{x:.1f}" if x < 10 else f"{int(round(x))}" + if s.endswith('.0'): + s = s[:-2] + return f"{s}B" + if n >= 1_000_000: + x = n / 1_000_000 + s = f"{x:.1f}" if x < 10 else f"{int(round(x))}" + if s.endswith('.0'): + s = s[:-2] + return f"{s}M" + return f"{n:,}" + + # Determine param count from prioritized fields + param_candidate = None + for key in ("parameter_size", "parameter_count"): + if key in details: + param_candidate = details.get(key) + break + if param_candidate is None: + param_candidate = model_info.get("general.parameter_count") + param_int = _parse_param_count(param_candidate) + human = _humanize(param_int) + + # Normalize size label if present + size_label = model_info.get("general.size_label") + size_label_norm = str(size_label).strip() if size_label else None + + # Extract tag (suffix after ':') if present + tag = model_id.split(":", 1)[1].strip() if ":" in model_id else None + + # Decide suffix using tag-aware rules + suffix = None + if tag: + t = tag + tl = t.lower() + # Special handling for common tags that often indicate size or version + if tl in ("latest", "stable", "current"): + suffix = f"{t} · {human}" if human else t + # If tag looks like a size (e.g. "1b", "1.7B"), can use as suffix + else: + m = re.match(r"^([0-9]+(?:\.[0-9]+)?)\s*([bBmM])$", t) + if m: + # tag is a size like '1b' or '1.7B' + num = m.group(1) + suf = m.group(2).upper() + tag_size = f"{num}{suf}" + # prefer explicit size_label if it matches + if size_label_norm and size_label_norm.upper() == tag_size.upper(): + suffix = size_label_norm + else: + suffix = tag_size + else: + suffix = f"{t} · {human}" if human else t + else: + # No tag, so just use size if available + if size_label_norm: + suffix = size_label_norm + elif human: + suffix = human + else: + # Nothing useful to show; fallback to model id + return model_id + + return f"{basename} ({suffix})" + def refresh_models(self): """ Query the Ollama server for available models and update llm.available_models. @@ -90,10 +207,11 @@ def refresh_models(self): self.log.debug(f"OllamaManager: could not get metadata for {model_id} (error: {e}), using name only") meta = None if meta: - display_name = ( - f"{meta['model_info']['general.basename']}" - f" ({meta['details']['parameter_size']} parameters)" - ) + try: + display_name = self._format_model_display_name(model_id, meta) + except Exception as e: + self.log.debug(f"OllamaManager: error formatting display name for {model_id}: {e}") + display_name = model_id success = True else: display_name = model_id @@ -105,6 +223,7 @@ def refresh_models(self): "model_card": f"https://ollama.com/library/{model_id.split(':')[0]}", "provider": "local", "metadata_success": success, + "model_info": meta.get("model_info", {}), "capabilities": meta.get("capabilities", []), "details": meta.get("details", {}), "modified_at": meta.get("modified_at", None), From c72d0436b3836160a8a2c02249743d58dfc8be5e Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Mon, 13 Apr 2026 14:56:29 +0200 Subject: [PATCH 11/44] Create OllamaClient to collect model info --- backend/workers/ollama_manager.py | 452 +++++++++--------------------- common/lib/ollama_client.py | 296 +++++++++++++++++++ 2 files changed, 423 insertions(+), 325 deletions(-) create mode 100644 common/lib/ollama_client.py diff --git a/backend/workers/ollama_manager.py b/backend/workers/ollama_manager.py index c1f2b4572..09dafea3a 100644 --- a/backend/workers/ollama_manager.py +++ b/backend/workers/ollama_manager.py @@ -1,332 +1,134 @@ """ Manage Ollama LLM models """ -import json -import requests -import re - from backend.lib.worker import BasicWorker +from common.lib.ollama_client import OllamaClient class OllamaManager(BasicWorker): - """ - Manage Ollama LLM models - - Periodically refreshes the list of available models from an Ollama server. - Can also pull or delete models on demand when queued with a specific task. - - Job details: - - task: "refresh" (default), "pull", or "delete" - - Job remote_id: - - For refresh: "manage-ollama-refresh" (periodic) or "manage-ollama-manual" (on-demand) - - For pull/delete: the model name to pull or delete - """ - type = "manage-ollama" - max_workers = 1 - - @classmethod - def ensure_job(cls, config=None): - """ - Ensure the daily refresh job is always scheduled - - :return: Job parameters for the worker - """ - return {"remote_id": "manage-ollama-refresh", "interval": 86400} - - def work(self): - task = self.job.details.get("task", "refresh") if self.job.details else "refresh" - model_name = self.job.data["remote_id"] - - if task == "refresh": - self.refresh_models() - elif task == "pull": - success = self.pull_model(model_name) - if success: - self.refresh_models() - elif task == "delete": - success = self.delete_model(model_name) - if success: - self.refresh_models() - else: - self.log.warning(f"OllamaManager: unknown task '{task}'") - - self.job.finish() - - def _get_llm_headers(self): - """Build request headers for LLM server auth.""" - headers = {"Content-Type": "application/json"} - llm_api_key = self.config.get("llm.api_key", "") - llm_auth_type = self.config.get("llm.auth_type", "") - if llm_api_key and llm_auth_type: - headers[llm_auth_type] = llm_api_key - return headers - - @staticmethod - def _format_model_display_name(model_id, meta): - """ - Build a friendly display name for a model using metadata where possible. - Falls back to a sensible string derived from `model_id`. - - Dear Ollama: if you add a "display_name" field to your /api/show response, I will use it and not complain about missing metadata fields. Pretty please? :) - Because this is ridiculous. - """ - model_info = meta.get("model_info", {}) if meta else {} - details = meta.get("details", {}) if meta else {} - - # Basename preference: explicit metadata, else model id prefix - basename = None - for key in ("general.basename", "general.base_model.0.name"): - val = model_info.get(key) - if val: - basename = str(val).strip() - break - if not basename: - basename = model_id.split(":", 1)[0].replace("-", " ").replace("_", " ").strip() or model_id - - # Helpers for parsing and formatting parameter counts - def _parse_param_count(val): - if val is None: - return None - if isinstance(val, int): - return val - if isinstance(val, float): - return int(val) - s = str(val).strip() - if not s: - return None - s = s.replace(",", "") - m = re.match(r"^([0-9]+(?:\.[0-9]+)?)\s*([BbMm])$", s) - if m: - num = float(m.group(1)) - suf = m.group(2).upper() - return int(num * (1_000_000_000 if suf == "B" else 1_000_000)) - # try float / scientific - try: - return int(float(s)) - except Exception: - return None - - def _humanize(n): - if n is None: - return None - n = int(n) - if n >= 1_000_000_000: - x = n / 1_000_000_000 - s = f"{x:.1f}" if x < 10 else f"{int(round(x))}" - if s.endswith('.0'): - s = s[:-2] - return f"{s}B" - if n >= 1_000_000: - x = n / 1_000_000 - s = f"{x:.1f}" if x < 10 else f"{int(round(x))}" - if s.endswith('.0'): - s = s[:-2] - return f"{s}M" - return f"{n:,}" - - # Determine param count from prioritized fields - param_candidate = None - for key in ("parameter_size", "parameter_count"): - if key in details: - param_candidate = details.get(key) - break - if param_candidate is None: - param_candidate = model_info.get("general.parameter_count") - param_int = _parse_param_count(param_candidate) - human = _humanize(param_int) - - # Normalize size label if present - size_label = model_info.get("general.size_label") - size_label_norm = str(size_label).strip() if size_label else None - - # Extract tag (suffix after ':') if present - tag = model_id.split(":", 1)[1].strip() if ":" in model_id else None - - # Decide suffix using tag-aware rules - suffix = None - if tag: - t = tag - tl = t.lower() - # Special handling for common tags that often indicate size or version - if tl in ("latest", "stable", "current"): - suffix = f"{t} · {human}" if human else t - # If tag looks like a size (e.g. "1b", "1.7B"), can use as suffix - else: - m = re.match(r"^([0-9]+(?:\.[0-9]+)?)\s*([bBmM])$", t) - if m: - # tag is a size like '1b' or '1.7B' - num = m.group(1) - suf = m.group(2).upper() - tag_size = f"{num}{suf}" - # prefer explicit size_label if it matches - if size_label_norm and size_label_norm.upper() == tag_size.upper(): - suffix = size_label_norm - else: - suffix = tag_size - else: - suffix = f"{t} · {human}" if human else t - else: - # No tag, so just use size if available - if size_label_norm: - suffix = size_label_norm - elif human: - suffix = human - else: - # Nothing useful to show; fallback to model id - return model_id - - return f"{basename} ({suffix})" - - def refresh_models(self): - """ - Query the Ollama server for available models and update llm.available_models. - """ - llm_server = self.config.get("llm.server", "") - if not llm_server: - return - - headers = self._get_llm_headers() - available_models = {} - - try: - response = requests.get(f"{llm_server}/api/tags", headers=headers, timeout=10) - except requests.RequestException as e: - self.log.warning(f"OllamaManager: could not refresh model list - request error: {e}") - return - - if response.status_code != 200: - self.log.warning(f"OllamaManager: could not refresh model list - server returned {response.status_code}") - return - - for model in response.json().get("models", []): - model_id = model["name"] - try: - meta = self.get_model_metadata(model_id) - except (requests.RequestException, json.JSONDecodeError, KeyError) as e: - self.log.debug(f"OllamaManager: could not get metadata for {model_id} (error: {e}), using name only") - meta = None - if meta: - try: - display_name = self._format_model_display_name(model_id, meta) - except Exception as e: - self.log.debug(f"OllamaManager: error formatting display name for {model_id}: {e}") - display_name = model_id - success = True - else: - display_name = model_id - meta = {} - success = False - - available_models[model_id] = { - "name": display_name, - "model_card": f"https://ollama.com/library/{model_id.split(':')[0]}", - "provider": "local", - "metadata_success": success, - "model_info": meta.get("model_info", {}), - "capabilities": meta.get("capabilities", []), - "details": meta.get("details", {}), - "modified_at": meta.get("modified_at", None), - } - - self.config.set("llm.available_models", available_models) - self.log.debug(f"OllamaManager: refreshed model list ({len(available_models)} models)") - - # Reconcile enabled models: remove any that are no longer available - enabled_models = self.config.get("llm.enabled_models", []) - reconciled = [m for m in enabled_models if m in available_models] - if len(reconciled) != len(enabled_models): - removed = set(enabled_models) - set(reconciled) - self.log.info(f"OllamaManager: removed stale enabled model(s): {', '.join(removed)}") - self.config.set("llm.enabled_models", reconciled) - - - def get_model_metadata(self, model_name): - """ - Get metadata for a specific model from the Ollama server. - - :param str model_name: Model name (e.g. "llama3:8b") - :return dict or None: Metadata dict on success, None on failure - """ - llm_server = self.config.get("llm.server", "") - if not llm_server: - self.log.warning("OllamaManager: cannot get model metadata - no LLM server configured") - return None - - headers = self._get_llm_headers() - try: - response = requests.post( - f"{llm_server}/api/show", - headers=headers, - json={"model": model_name}, - timeout=10 - ) - if response.status_code == 200: - return response.json() - else: - self.log.warning(f"OllamaManager: could not get metadata for model '{model_name}' - server returned {response.status_code}") - return None - except requests.RequestException as e: - self.log.warning(f"OllamaManager: could not get metadata for model '{model_name}' - request error: {e}") - return None - - def pull_model(self, model_name): - """ - Pull a model from the Ollama registry. - - :param str model_name: Model name (e.g. "llama3:8b") - :return bool: True on success - """ - llm_server = self.config.get("llm.server", "") - if not llm_server: - self.log.warning("OllamaManager: cannot pull model - no LLM server configured") - return False - - headers = self._get_llm_headers() - try: - # stream=False waits for the pull to complete before returning - response = requests.post( - f"{llm_server}/api/pull", - headers=headers, - json={"model": model_name, "stream": False}, - timeout=600 - ) - if response.status_code == 200: - self.log.info(f"OllamaManager: successfully pulled model '{model_name}'") - return True - else: - self.log.warning(f"OllamaManager: could not pull model '{model_name}' - server returned {response.status_code}") - return False - except requests.RequestException as e: - self.log.warning(f"OllamaManager: could not pull model '{model_name}' - request error: {e}") - return False - - def delete_model(self, model_name): - """ - Delete a model from the Ollama server. - - :param str model_name: Model name (e.g. "llama3:8b") - :return bool: True on success - """ - llm_server = self.config.get("llm.server", "") - if not llm_server: - self.log.warning("OllamaManager: cannot delete model - no LLM server configured") - return False - - headers = self._get_llm_headers() - try: - response = requests.delete( - f"{llm_server}/api/delete", - headers=headers, - json={"model": model_name}, - timeout=30 - ) - if response.status_code == 200: - self.log.info(f"OllamaManager: successfully deleted model '{model_name}'") - return True - else: - self.log.warning(f"OllamaManager: could not delete model '{model_name}' - server returned {response.status_code}") - return False - except requests.RequestException as e: - self.log.warning(f"OllamaManager: could not delete model '{model_name}' - request error: {e}") - return False + """ + Manage Ollama LLM models + + Periodically refreshes the list of available models from an Ollama server. + Can also pull or delete models on demand when queued with a specific task. + + Job details: + - task: "refresh" (default), "pull", or "delete" + + Job remote_id: + - For refresh: "manage-ollama-refresh" (periodic) or "manage-ollama-manual" (on-demand) + - For pull/delete: the model name to pull or delete + """ + type = "manage-ollama" + max_workers = 1 + client = None + + @classmethod + def ensure_job(cls, config=None): + """ + Ensure the daily refresh job is always scheduled + + :return: Job parameters for the worker + """ + return {"remote_id": "manage-ollama-refresh", "interval": 86400} + + def work(self): + task = self.job.details.get("task", "refresh") if self.job.details else "refresh" + model_name = self.job.data["remote_id"] + + if task == "refresh": + self.refresh_models() + elif task == "pull": + success = self.pull_model(model_name) + if success: + self.refresh_models() + elif task == "delete": + success = self.delete_model(model_name) + if success: + self.refresh_models() + else: + self.log.warning(f"OllamaManager: unknown task '{task}'") + + self.job.finish() + + def _get_client(self) -> OllamaClient: + """Return a fresh OllamaClient configured from 4CAT settings.""" + if not self.client: + self.client = OllamaClient.from_config(self.config) + return self.client + + def refresh_models(self): + """ + Query the Ollama server for available models and update llm.available_models. + """ + if not self.config.get("llm.server", ""): + return + + client = self._get_client() + models = client.list_models() + + if not models and not self.config.get("llm.server", ""): + return + + available_models = {} + for model in models: + model_id = model["name"] + meta = client.show_model(model_id) + if meta: + try: + display_name = OllamaClient.format_display_name(model_id, meta) + except Exception as e: + self.log.debug(f"OllamaManager: error formatting display name for {model_id}: {e}") + display_name = model_id + else: + self.log.debug(f"OllamaManager: could not get metadata for {model_id}, using name only") + display_name = model_id + + available_models[model_id] = OllamaClient.build_model_entry(model_id, display_name, meta) + + self.config.set("llm.available_models", available_models) + self.log.debug(f"OllamaManager: refreshed model list ({len(available_models)} models)") + + # Reconcile enabled models: remove any that are no longer available + enabled_models = self.config.get("llm.enabled_models", []) + reconciled = [m for m in enabled_models if m in available_models] + if len(reconciled) != len(enabled_models): + removed = set(enabled_models) - set(reconciled) + self.log.info(f"OllamaManager: removed stale enabled model(s): {', '.join(removed)}") + self.config.set("llm.enabled_models", reconciled) + + def pull_model(self, model_name): + """ + Pull a model from the Ollama registry. + + :param str model_name: Model name (e.g. "llama3:8b") + :return bool: True on success + """ + if not self.config.get("llm.server", ""): + self.log.warning("OllamaManager: cannot pull model - no LLM server configured") + return False + + success = self._get_client().pull_model(model_name) + if success: + self.log.info(f"OllamaManager: successfully pulled model '{model_name}'") + else: + self.log.warning(f"OllamaManager: could not pull model '{model_name}'") + return success + + def delete_model(self, model_name): + """ + Delete a model from the Ollama server. + + :param str model_name: Model name (e.g. "llama3:8b") + :return bool: True on success + """ + if not self.config.get("llm.server", ""): + self.log.warning("OllamaManager: cannot delete model - no LLM server configured") + return False + + success = self._get_client().delete_model(model_name) + if success: + self.log.info(f"OllamaManager: successfully deleted model '{model_name}'") + else: + self.log.warning(f"OllamaManager: could not delete model '{model_name}'") + return success diff --git a/common/lib/ollama_client.py b/common/lib/ollama_client.py new file mode 100644 index 000000000..96438a2f1 --- /dev/null +++ b/common/lib/ollama_client.py @@ -0,0 +1,296 @@ +""" +Centralized HTTP client for communicating with an Ollama server. + +This class owns all direct HTTP calls to Ollama's REST API and provides shared static +helpers for capability parsing, display-name formatting, and building canonical +llm.available_models entries. It is a plain helper with no 4CAT base-class dependency. +""" + +import re +import requests + +from typing import Optional + + +class OllamaClient: + """ + HTTP client for an Ollama server. + + :param base_url: Base URL of the Ollama server (e.g. "http://localhost:11434"). + :param api_key: Optional API key for authentication. + :param auth_type: Header name to use for the API key (e.g. "Authorization"). + :param timeout: Default request timeout in seconds. + """ + + def __init__(self, base_url: str, api_key: Optional[str] = None, + auth_type: Optional[str] = None, timeout: int = 10) -> None: + self.base_url = base_url.rstrip("/") + self.api_key = api_key + self.auth_type = auth_type + self.timeout = timeout + self._session = requests.Session() + + def _headers(self) -> dict: + """Build request headers, including auth if configured.""" + headers = {"Content-Type": "application/json"} + if self.api_key and self.auth_type: + headers[self.auth_type] = self.api_key + return headers + + def list_models(self) -> list[dict]: + """List available models from the Ollama server. + + :returns: List of model dicts from ``/api/tags``, or ``[]`` on failure. + """ + try: + r = self._session.get( + f"{self.base_url}/api/tags", + headers=self._headers(), + timeout=self.timeout, + ) + if r.status_code == 200: + return r.json().get("models", []) + except requests.RequestException: + pass + return [] + + def show_model(self, model_id: str) -> dict | None: + """Fetch full metadata for a model via ``POST /api/show``. + + :param model_id: Model name (e.g. ``"llama3:8b"``). + :returns: Parsed response dict, or ``None`` on failure. + """ + try: + r = self._session.post( + f"{self.base_url}/api/show", + headers=self._headers(), + json={"model": model_id}, + timeout=self.timeout, + ) + if r.status_code == 200: + return r.json() + except requests.RequestException: + pass + return None + + def pull_model(self, model_id: str, stream: bool = False) -> bool: + """Pull a model from the Ollama registry. + + :param model_id: Model name (e.g. ``"llama3:8b"``). + :param stream: Whether to stream the response (default ``False``). + :returns: ``True`` on success, ``False`` on failure. + """ + try: + r = self._session.post( + f"{self.base_url}/api/pull", + headers=self._headers(), + json={"model": model_id, "stream": stream}, + timeout=600, + ) + return r.status_code == 200 + except requests.RequestException: + return False + + def delete_model(self, model_id: str) -> bool: + """Delete a model from the Ollama server. + + :param model_id: Model name (e.g. ``"llama3:8b"``). + :returns: ``True`` on success, ``False`` on failure. + """ + try: + r = self._session.delete( + f"{self.base_url}/api/delete", + headers=self._headers(), + json={"model": model_id}, + timeout=30, + ) + return r.status_code == 200 + except requests.RequestException: + return False + + @staticmethod + def parse_supported_media_types(meta: dict | None) -> list[str]: + """Derive the media types a model supports from its Ollama metadata. + + **Primary path**: reads ``meta["capabilities"]``: + - ``"completion"`` → ``"text"`` + - ``"vision"`` → ``"image"`` + - ``"embedding"`` → ``"embedding"`` + + **Fallback path** (used when capabilities are absent or only yield ``"text"``): + inspects GGUF ``model_info`` / ``details`` for vision signals and adds + ``"image"`` if any are found. + + :param meta: ``/api/show`` response dict, or ``None``. + :returns: Ordered list of supported media type strings. + Returns ``[]`` when ``meta`` is ``None`` (unknown — callers + should include the model, not block it). + """ + if meta is None: + return [] + + capabilities = meta.get("capabilities", []) + media_types: list[str] = [] + + _cap_map = { + "completion": "text", + "vision": "image", + "embedding": "embedding", + } + for cap in capabilities: + mapped = _cap_map.get(cap) + if mapped and mapped not in media_types: + media_types.append(mapped) + + # Fallback: GGUF-level vision signals when capabilities list gives no image info + if "image" not in media_types: + details = meta.get("details", {}) + model_info = meta.get("model_info", {}) + projector_info = meta.get("projector_info") + + has_clip_family = "clip" in (details.get("families") or []) + has_vision_keys = any(k.startswith("vision.") for k in model_info) + has_projector = bool(projector_info) + + if has_clip_family or has_vision_keys or has_projector: + media_types.append("image") + + return media_types + + @staticmethod + def format_display_name(model_id: str, meta: dict | None) -> str: + """Build a human-readable display name for a model. + + Logic is identical to the legacy ``OllamaManager._format_model_display_name`` + and has been moved here so it can be shared across OllamaManager and any + other caller without importing the worker class. + + :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``). + :param meta: ``/api/show`` response dict, or ``None``. + :returns: Human-readable display name string. + """ + model_info = meta.get("model_info", {}) if meta else {} + details = meta.get("details", {}) if meta else {} + + basename = None + for key in ("general.basename", "general.base_model.0.name"): + val = model_info.get(key) + if val: + basename = str(val).strip() + break + if not basename: + basename = model_id.split(":", 1)[0].replace("-", " ").replace("_", " ").strip() or model_id + + def _parse_param_count(val): + if val is None: + return None + if isinstance(val, int): + return val + if isinstance(val, float): + return int(val) + s = str(val).strip().replace(",", "") + if not s: + return None + m = re.match(r"^([0-9]+(?:\.[0-9]+)?)\s*([BbMm])$", s) + if m: + num = float(m.group(1)) + suf = m.group(2).upper() + return int(num * (1_000_000_000 if suf == "B" else 1_000_000)) + try: + return int(float(s)) + except Exception: + return None + + def _humanize(n): + if n is None: + return None + n = int(n) + if n >= 1_000_000_000: + x = n / 1_000_000_000 + s = f"{x:.1f}" if x < 10 else f"{int(round(x))}" + if s.endswith(".0"): + s = s[:-2] + return f"{s}B" + if n >= 1_000_000: + x = n / 1_000_000 + s = f"{x:.1f}" if x < 10 else f"{int(round(x))}" + if s.endswith(".0"): + s = s[:-2] + return f"{s}M" + return f"{n:,}" + + param_candidate = None + for key in ("parameter_size", "parameter_count"): + if key in details: + param_candidate = details.get(key) + break + if param_candidate is None: + param_candidate = model_info.get("general.parameter_count") + human = _humanize(_parse_param_count(param_candidate)) + + size_label = model_info.get("general.size_label") + size_label_norm = str(size_label).strip() if size_label else None + + tag = model_id.split(":", 1)[1].strip() if ":" in model_id else None + + suffix = None + if tag: + tl = tag.lower() + if tl in ("latest", "stable", "current"): + suffix = f"{tag} · {human}" if human else tag + else: + m = re.match(r"^([0-9]+(?:\.[0-9]+)?)\s*([bBmM])$", tag) + if m: + tag_size = f"{m.group(1)}{m.group(2).upper()}" + if size_label_norm and size_label_norm.upper() == tag_size.upper(): + suffix = size_label_norm + else: + suffix = tag_size + else: + suffix = f"{tag} · {human}" if human else tag + else: + if size_label_norm: + suffix = size_label_norm + elif human: + suffix = human + else: + return model_id + + return f"{basename} ({suffix})" + + @staticmethod + def build_model_entry(model_id: str, display_name: str, meta: dict | None) -> dict: + """Build a canonical ``llm.available_models`` entry for a model. + + :param model_id: Raw Ollama model identifier. + :param display_name: Human-readable name (from ``format_display_name``). + :param meta: ``/api/show`` response dict, or ``None`` if unavailable. + :returns: Dict ready to store under ``llm.available_models[model_id]``. + """ + has_meta = bool(meta) + return { + "name": display_name, + "model_card": f"https://ollama.com/library/{model_id.split(':')[0]}", + "provider": "local", + "metadata_success": has_meta, + "model_info": meta.get("model_info", {}) if has_meta else {}, + "capabilities": meta.get("capabilities", []) if has_meta else [], + "details": meta.get("details", {}) if has_meta else {}, + "modified_at": meta.get("modified_at", None) if has_meta else None, + "supported_media_types": OllamaClient.parse_supported_media_types(meta), + } + + @classmethod + def from_config(cls, config) -> "OllamaClient": + """Instantiate an OllamaClient from 4CAT config. + + Reads ``llm.server``, ``llm.api_key``, and ``llm.auth_type``. + + :param config: A 4CAT ``ConfigWrapper`` or ``ConfigManager`` instance. + :returns: Configured ``OllamaClient``. + """ + return cls( + base_url=config.get("llm.server", ""), + api_key=config.get("llm.api_key", "") or None, + auth_type=config.get("llm.auth_type", "") or None, + ) From a79657b8fd8076f10b0ec1437c60b29e3c21c70e Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Mon, 13 Apr 2026 16:03:40 +0200 Subject: [PATCH 12/44] list capabilities in admin panel --- webtool/templates/controlpanel/llm-server.html | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/webtool/templates/controlpanel/llm-server.html b/webtool/templates/controlpanel/llm-server.html index d8a7dbd53..22a5573dd 100644 --- a/webtool/templates/controlpanel/llm-server.html +++ b/webtool/templates/controlpanel/llm-server.html @@ -62,6 +62,7 @@

Available Models Model Display name + Capabilities Status Actions @@ -78,6 +79,9 @@

Available Models {{ model.name }} {% endif %} + + {{ model.capabilities | join(", ") }} + {% if model_id in enabled_models %} Enabled From 43de49b1f1406dddfe62ab4559a39945ce7af583 Mon Sep 17 00:00:00 2001 From: Dale Wahl Date: Wed, 15 Apr 2026 12:55:19 +0200 Subject: [PATCH 13/44] ollama_manager: check for connection first, ollama_client: accept logger --- backend/workers/ollama_manager.py | 9 ++++-- common/lib/ollama_client.py | 49 ++++++++++++++++++++++++++----- 2 files changed, 48 insertions(+), 10 deletions(-) diff --git a/backend/workers/ollama_manager.py b/backend/workers/ollama_manager.py index 09dafea3a..91b8bfac1 100644 --- a/backend/workers/ollama_manager.py +++ b/backend/workers/ollama_manager.py @@ -36,6 +36,11 @@ def work(self): task = self.job.details.get("task", "refresh") if self.job.details else "refresh" model_name = self.job.data["remote_id"] + self.client = self._get_client() # Initialize client once per job run + if not self.client.is_available(): + self.job.finish() + return + if task == "refresh": self.refresh_models() elif task == "pull": @@ -54,7 +59,7 @@ def work(self): def _get_client(self) -> OllamaClient: """Return a fresh OllamaClient configured from 4CAT settings.""" if not self.client: - self.client = OllamaClient.from_config(self.config) + self.client = OllamaClient.from_config(self.config, log=self.log) return self.client def refresh_models(self): @@ -64,7 +69,7 @@ def refresh_models(self): if not self.config.get("llm.server", ""): return - client = self._get_client() + client = self._get_client() models = client.list_models() if not models and not self.config.get("llm.server", ""): diff --git a/common/lib/ollama_client.py b/common/lib/ollama_client.py index 96438a2f1..5d7bea1da 100644 --- a/common/lib/ollama_client.py +++ b/common/lib/ollama_client.py @@ -23,12 +23,13 @@ class OllamaClient: """ def __init__(self, base_url: str, api_key: Optional[str] = None, - auth_type: Optional[str] = None, timeout: int = 10) -> None: + auth_type: Optional[str] = None, timeout: int = 10, log=None) -> None: self.base_url = base_url.rstrip("/") self.api_key = api_key self.auth_type = auth_type self.timeout = timeout self._session = requests.Session() + self.log = log def _headers(self) -> dict: """Build request headers, including auth if configured.""" @@ -36,6 +37,22 @@ def _headers(self) -> dict: if self.api_key and self.auth_type: headers[self.auth_type] = self.api_key return headers + + def is_available(self) -> bool: + """Check if the Ollama server is reachable and responding to /api/tags.""" + try: + r = self._session.get( + f"{self.base_url}/api/tags", + headers=self._headers(), + timeout=self.timeout, + ) + if self.log and r.status_code != 200: + self.log.warning(f"OllamaClient: server responded with status code {r.status_code} during availability check: {r.text}") + return r.status_code == 200 + except requests.RequestException as e: + if self.log: + self.log.warning(f"OllamaClient: server is not available at {self.base_url}: {e}") + return False def list_models(self) -> list[dict]: """List available models from the Ollama server. @@ -50,8 +67,11 @@ def list_models(self) -> list[dict]: ) if r.status_code == 200: return r.json().get("models", []) - except requests.RequestException: - pass + if self.log: + self.log.warning(f"OllamaClient: failed to list models from {self.base_url}, status code {r.status_code}: {r.text}") + except requests.RequestException as e: + if self.log: + self.log.warning(f"OllamaClient: failed to list models from {self.base_url}: {e}") return [] def show_model(self, model_id: str) -> dict | None: @@ -69,8 +89,11 @@ def show_model(self, model_id: str) -> dict | None: ) if r.status_code == 200: return r.json() - except requests.RequestException: - pass + if self.log: + self.log.warning(f"OllamaClient: failed to show model {model_id} from {self.base_url}, status code {r.status_code}: {r.text}") + except requests.RequestException as e: + if self.log: + self.log.warning(f"OllamaClient: failed to show model {model_id} from {self.base_url}: {e}") return None def pull_model(self, model_id: str, stream: bool = False) -> bool: @@ -87,8 +110,12 @@ def pull_model(self, model_id: str, stream: bool = False) -> bool: json={"model": model_id, "stream": stream}, timeout=600, ) + if r.status_code != 200 and self.log: + self.log.warning(f"OllamaClient: failed to pull model {model_id} from {self.base_url}, status code {r.status_code}: {r.text}") return r.status_code == 200 - except requests.RequestException: + except requests.RequestException as e: + if self.log: + self.log.warning(f"OllamaClient: failed to pull model {model_id} from {self.base_url}: {e}") return False def delete_model(self, model_id: str) -> bool: @@ -104,8 +131,12 @@ def delete_model(self, model_id: str) -> bool: json={"model": model_id}, timeout=30, ) + if r.status_code != 200 and self.log: + self.log.warning(f"OllamaClient: failed to delete model {model_id} from {self.base_url}, status code {r.status_code}: {r.text}") return r.status_code == 200 - except requests.RequestException: + except requests.RequestException as e: + if self.log: + self.log.warning(f"OllamaClient: failed to delete model {model_id} from {self.base_url}: {e}") return False @staticmethod @@ -281,16 +312,18 @@ def build_model_entry(model_id: str, display_name: str, meta: dict | None) -> di } @classmethod - def from_config(cls, config) -> "OllamaClient": + def from_config(cls, config, log=None) -> "OllamaClient": """Instantiate an OllamaClient from 4CAT config. Reads ``llm.server``, ``llm.api_key``, and ``llm.auth_type``. :param config: A 4CAT ``ConfigWrapper`` or ``ConfigManager`` instance. + :param log: A logging instance for reporting issues. :returns: Configured ``OllamaClient``. """ return cls( base_url=config.get("llm.server", ""), api_key=config.get("llm.api_key", "") or None, auth_type=config.get("llm.auth_type", "") or None, + log=log, ) From c8da75f438a8f29b935029089cf55f9262c80de6 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 19 May 2026 11:59:40 +0200 Subject: [PATCH 14/44] Multi-form! --- .jshintrc | 8 ++ common/lib/config_definition.py | 94 ++++++++++-------- common/lib/user_input.py | 38 ++++++++ extensions | 1 + processors/metrics/rank_attribute.py | 3 +- webtool/__init__.py | 4 + webtool/lib/template_filters.py | 23 +++++ webtool/static/css/stylesheet.css | 31 ++++++ webtool/static/js/fourcat.js | 1 + webtool/static/js/modules/multi-form.js | 95 +++++++++++++++++++ webtool/static/js/modules/ui-helpers.js | 2 +- webtool/static/js/modules/util.js | 22 +++++ .../components/datasource-option.html | 32 ++++++- webtool/views/views_admin.py | 8 +- 14 files changed, 314 insertions(+), 48 deletions(-) create mode 100644 .jshintrc create mode 120000 extensions create mode 100644 webtool/static/js/modules/multi-form.js diff --git a/.jshintrc b/.jshintrc new file mode 100644 index 000000000..7a6412586 --- /dev/null +++ b/.jshintrc @@ -0,0 +1,8 @@ +{ + "esversion": 11, + "undef": true, + "globals": { + "$": false, + "document": false, + } +} \ No newline at end of file diff --git a/common/lib/config_definition.py b/common/lib/config_definition.py index daf20020d..c64f8d633 100644 --- a/common/lib/config_definition.py +++ b/common/lib/config_definition.py @@ -443,16 +443,30 @@ "global": True }, "flask.autologin.hostnames": { - "type": UserInput.OPTION_TEXT_JSON, + "type": UserInput.OPTION_MULTI_OPTION, "default": [], + "options": { + "hostname": { + "type": UserInput.OPTION_TEXT, + "default": "", + "help": "Host name or IP address" + } + }, "help": "White-listed hostnames", "tooltip": "A list of host names or IP addresses to automatically log in. Docker should include localhost and " "Server Name. Front-end needs to be restarted for changed to apply.", "global": True }, "flask.autologin.api": { - "type": UserInput.OPTION_TEXT_JSON, + "type": UserInput.OPTION_MULTI_OPTION, "default": [], + "options": { + "hostname": { + "type": UserInput.OPTION_TEXT, + "default": "", + "help": "Host name or IP address" + } + }, "help": "White-list for API", "tooltip": "A list of host names or IP addresses to allow access to API endpoints with no rate limiting. " "Docker should include localhost and Server Name. Front-end needs to be restarted for changed to " @@ -579,43 +593,47 @@ "to local or remote LLM servers. You can also set up your own LLM server using open source software such as " "[Ollama](https://ollama.com/) and connect 4CAT to it using the settings below for your users." }, - "llm.host_name": { - "type": UserInput.OPTION_TEXT, - "default": "4CAT LLM Server", - "help": "Name of LLM Server in UI", - "tooltip": "The name that will be shown to users in the interface when selecting an LLM server (or API or custom).", - "global": True - }, - "llm.provider_type": { - "type": UserInput.OPTION_CHOICE, - "help": "LLM Provider Type", - "default": "none", - "options": { - "ollama": "Ollama", - "none": "None", - }, - "global": True, - }, - "llm.server": { - "type": UserInput.OPTION_TEXT, - "default": "", - "help": "LLM Server URL", - "tooltip": "The URL of the LLM server, e.g. http://localhost:5000", - "global": True - }, - "llm.auth_type": { - "type": UserInput.OPTION_TEXT, - "help": "LLM Server Authentication Type", - "default": "", - "tooltip": "The authentication type required to connect to the server (e.g. 'X-API-KEY', 'Authorization'). Passed in the request header with the API key.", + "llm.services": { + "type": UserInput.OPTION_MULTI_OPTION, + "default": [], "global": True, - }, - "llm.api_key": { - "type": UserInput.OPTION_TEXT, - "default": "", - "help": "LLM Server API Key", - "tooltip": "The API key to access the LLM server, if required.", - "global": True + "help": "LLM providers", + "options": { + "host_name": { + "type": UserInput.OPTION_TEXT, + "default": "4CAT LLM Server", + "help": "Name of LLM Server in UI", + "tooltip": "The name that will be shown to users in the interface when selecting an LLM server (or API or custom).", + }, + "provider_type": { + "type": UserInput.OPTION_CHOICE, + "help": "LLM Provider Type", + "default": "none", + "options": { + "ollama": "Ollama", + "litellm": "LiteLLM", + "none": "None", + }, + }, + "server": { + "type": UserInput.OPTION_TEXT, + "default": "", + "help": "LLM Server URL", + "tooltip": "The URL of the LLM server, e.g. http://localhost:5000", + }, + "auth_type": { + "type": UserInput.OPTION_TEXT, + "help": "Authentication Header", + "default": "", + "tooltip": "The HTTP header used to authenticate with the server (e.g. 'X-API-KEY', 'Authorization'). Passed with the Authentication Key as value.", + }, + "api_key": { + "type": UserInput.OPTION_TEXT, + "default": "", + "help": "Authentication Key", + "tooltip": "The API key to access the LLM server, if required.", + }, + } }, "llm.available_models": { "type": UserInput.OPTION_TEXT_JSON, diff --git a/common/lib/user_input.py b/common/lib/user_input.py index 7fcb6bcb9..16a583d74 100644 --- a/common/lib/user_input.py +++ b/common/lib/user_input.py @@ -26,6 +26,7 @@ class UserInput: OPTION_TEXT = "string" # simple string or integer (input text) OPTION_MULTI = "multi" # multiple values out of a list (select multiple) OPTION_MULTI_SELECT = "multi_select" # multiple values out of a dropdown list (select multiple) + OPTION_MULTI_OPTION = "multi_option" # several instances of a collection of controls OPTION_INFO = "info" # just a bit of text, not actual input OPTION_TEXT_LARGE = "textarea" # longer text OPTION_TEXT_JSON = "json" # text, but should be valid JSON @@ -70,6 +71,8 @@ def parse_all(options, input, silently_correct=True): if type(input) is not dict and type(input) is not ImmutableMultiDict: raise TypeError("input must be a dictionary or ImmutableMultiDict") + print(input) + if type(input) is ImmutableMultiDict: # we are not using to_dict, because that messes up multi-selects input = {key: input.getlist(key) for key in input} @@ -181,6 +184,41 @@ def parse_all(options, input, silently_correct=True): parsed_input[option] = table_input + elif settings.get("type") == UserInput.OPTION_MULTI_OPTION: + # these are collections of other input options that can be + # repeated an arbitrary amount of times and are saved as a + # list of these values + # i.e. forms within forms!!! + item_options = settings["options"] + input_items = {} + for key, value in input.items(): + if key_match := re.match(f"{option}-([0-9]+)-(.+)", key): + input_index = int(key_match[1]) + # note: the index is just used to match inputs to items + # it is not used for ordering + option_item = key_match[2] + if option_item not in item_options: + continue + + if input_index not in input_items: + input_items[input_index] = {} + + print(key, value) + input_items[input_index][option_item] = UserInput.parse_value(item_options[option_item], value, input_items[input_index], silently_correct) + + # discard items that are only default values + parsed_input[option] = [] + for input_index, item in input_items.items(): + only_default = True + for key, value in item.items(): + if value != item_options[key]["default"]: + only_default = False + + if not only_default: + parsed_input[option].append(item) + + print(parsed_input[option]) + elif option not in input: # not provided? use default parsed_input[option] = settings.get("default", None) diff --git a/extensions b/extensions new file mode 120000 index 000000000..c25d13e68 --- /dev/null +++ b/extensions @@ -0,0 +1 @@ +/Users/stijn/surfdrive/PycharmProjects/4cat/config/extensions \ No newline at end of file diff --git a/processors/metrics/rank_attribute.py b/processors/metrics/rank_attribute.py index 2f50d623a..3c7c03286 100644 --- a/processors/metrics/rank_attribute.py +++ b/processors/metrics/rank_attribute.py @@ -86,7 +86,8 @@ def get_options(cls, parent_dataset=None, config=None): "hostnames": "Domain names", "level2-hostnames": "Second-level domain names (e.g. m.youtube.com -> youtube.com)", "hashtags": "Hashtags (words starting with #)", - "emoji": "Emoji (each used emoji in the column is counted individually)" + "emoji": "Emoji (each used emoji in the column is counted individually)", + "occurrence": "Values (the number of comma-separated values in the given field)" }, "help": "Extract from column", "tooltip": "This can be used to extract more specific values from the value of the selected column(s); for " diff --git a/webtool/__init__.py b/webtool/__init__.py index e807b7ae0..e7359c423 100644 --- a/webtool/__init__.py +++ b/webtool/__init__.py @@ -241,6 +241,10 @@ def get_datasource_explorer_templates(name): [FileSystemLoader(template_paths), FunctionLoader(get_datasource_explorer_templates)] ) + # enable to {% do %} tag in jinja + app.jinja_env.add_extension("jinja2.ext.do") + app.jinja_env.add_extension("jinja2.ext.debug") + # import custom jinja2 template filters # these also benefit from current_app import webtool.lib.template_filters # noqa: E402 diff --git a/webtool/lib/template_filters.py b/webtool/lib/template_filters.py index 5682c6dbc..81b48ad55 100644 --- a/webtool/lib/template_filters.py +++ b/webtool/lib/template_filters.py @@ -195,6 +195,25 @@ def _jinja2_filter_markdown(text, trim_container=False): def _jinja2_filter_isbool(value): return isinstance(value, bool) +@current_app.template_filter('propmap') +def _jinja2_filter_propmap(data, property, default=None): + """ + Select a property from a sequence of dicts + + To map `{a: b: {prop: value}}` to `{a: value}` for a given `prop`. If + `data` is a dict, preserve key:value pairs. If the property does not exist + in a sequence item, use the `default` value. + + :param data: Sequence or dict to map + :param property: Property to use for mapping + :param default: Value to use if property does not exist in item + :return: Mapped sequence or dict + """ + if type(data) is dict: + return {k: v.get(property, default) for k, v in data.items()} + else: + return [v.get(property, default) for v in data.values()] + @current_app.template_filter('json') def _jinja2_filter_json(data): return json.dumps(data) @@ -429,6 +448,10 @@ def explorer_css(datasource, scope_class="explorer-content-container"): def _jinja2_filter_hasattr(obj, attribute): return hasattr(obj, attribute) +@current_app.template_filter('debug') +def _jinja2_filter_debug(value): + print(value) + @current_app.context_processor def inject_now(): def uniqid(): diff --git a/webtool/static/css/stylesheet.css b/webtool/static/css/stylesheet.css index fecc5cfd7..a4097952f 100644 --- a/webtool/static/css/stylesheet.css +++ b/webtool/static/css/stylesheet.css @@ -330,6 +330,37 @@ article.small .form-element select[multiple] { top: -0.4em; } +.form-multi-option-wrapper, .form-multi-option-header { + margin: 0 1em; +} + +.form-multi-option-wrapper { + margin: 1em; +} + +.form-multi-option-header { + +} + +.form-multi-option-wrapper li { + border-left: 2px solid var(--accent); + margin-bottom: 4px; + position: relative; + margin-left: 20px; +} + +.form-multi-option-wrapper li::before { + content: ''; + position: absolute; + top: 0; + left: -17px; + border-left: 2px solid var(--contrast-dark); + border-bottom: 2px solid var(--contrast-dark); + width: 15px; + height: 15px; + +} + .data-overview-link { display: block; font-size: 13px; diff --git a/webtool/static/js/fourcat.js b/webtool/static/js/fourcat.js index e84dc8374..c4fd3762c 100644 --- a/webtool/static/js/fourcat.js +++ b/webtool/static/js/fourcat.js @@ -6,6 +6,7 @@ async function load() { await import("./modules/dataset-page.js"), await import("./modules/dynamic-container.js"), await import("./modules/multichoice.js"), + await import("./modules/multi-form.js"), await import("./modules/popup.js"), await import("./modules/run-processor.js"), await import("./modules/tooltip.js"), diff --git a/webtool/static/js/modules/multi-form.js b/webtool/static/js/modules/multi-form.js new file mode 100644 index 000000000..389f5d9a8 --- /dev/null +++ b/webtool/static/js/modules/multi-form.js @@ -0,0 +1,95 @@ +import {find_parent, reset_form_elements} from "./util.js"; + +export const multiForm = { + init: function () { + const actions = document.createElement('div'); + actions.className = 'multi-form-actions'; + + const add_button = document.createElement('button'); + add_button.className = 'add-button'; + add_button.textContent = '+'; + add_button.addEventListener('click', multiForm.add_item); + + const delete_button = document.createElement('button'); + delete_button.className = 'delete-button'; + delete_button.textContent = 'x'; + delete_button.addEventListener('click', multiForm.delete_item); + + actions.appendChild(add_button); + actions.appendChild(delete_button); + + document.querySelectorAll('.form-multi-option-wrapper').forEach(function (el) { + el.addEventListener('click', multiForm.handle_click); + el.querySelectorAll('li').forEach(function (el) { + const el_actions = actions.cloneNode(true); + el.appendChild(el_actions); + }) + }) + }, + + handle_click: function (e) { + if(!(e.target.classList.contains('add-button') || e.target.classList.contains('delete-button'))) { + return true; + } + e.preventDefault(); + const wrapper = find_parent(e.target, 'ol'); + if(e.target.classList.contains('delete-button')){ + multiForm.delete_item(e); + } else { + multiForm.add_item(e); + } + multiForm.renumber(wrapper); + }, + + add_item: function (e) { + const ol = find_parent(e.target, 'ol.form-multi-option-wrapper'); + const last_li = find_parent(e.target, 'li'); + const clone = last_li.cloneNode(true); + reset_form_elements(clone) + ol.appendChild(clone); + }, + + delete_item: function (e) { + if(!confirm("Are you sure?")){ + return false; + } + + const li = find_parent(e.target, 'li'); + const ol = find_parent(e.target, 'ol.form-multi-option-wrapper'); + + if(ol.querySelectorAll('li').length > 1) { + li.parentNode.removeChild(li); + } else { + // last element; do not remove, but reset to default + reset_form_elements(li); + } + }, + + renumber: function(parent) { + let index = 1; + parent.querySelectorAll('li').forEach(function (el) { + el.setAttribute('data-multi-option-index', index); + multiForm.renumber_items(el, index); + index += 1; + }) + }, + + renumber_items: function(parent, index) { + const attributes = ['for', 'id', 'name']; + parent.childNodes.forEach(child => { + if (!(child instanceof HTMLElement)) { + return; + } + for(const attribute of attributes) { + console.log(child); + if(child.hasAttribute(attribute)) { + child.setAttribute(attribute, child.getAttribute(attribute).replace(/-[0-9+]-/, `-${index}-`)); + } + } + multiForm.renumber_items(child, index); + }); + } + +} + +export const module = multiForm; \ No newline at end of file diff --git a/webtool/static/js/modules/ui-helpers.js b/webtool/static/js/modules/ui-helpers.js index 8b0ef097f..4c9b31ebe 100644 --- a/webtool/static/js/modules/ui-helpers.js +++ b/webtool/static/js/modules/ui-helpers.js @@ -1,5 +1,5 @@ import {popup} from "./popup.js"; -import {find_parent} from "./util.js"; +import {find_parent, hsv2hsl} from "./util.js"; export const ui_helpers = { /** diff --git a/webtool/static/js/modules/util.js b/webtool/static/js/modules/util.js index 6f17d4ec9..1839c13f1 100644 --- a/webtool/static/js/modules/util.js +++ b/webtool/static/js/modules/util.js @@ -219,4 +219,26 @@ export function hsv2rgb(h, s, v) { } return [r * 255, g * 255, b * 255]; +} + +/** + * Recursively reset form elements to default value + * + * We don't use form.reset() for two reasons: + * - We may want to reset a subset of a form + * - The *current* value of an element may not be the designated *default* + * value of the element + * + * @param parent Parent node to recursively reset child nodes of + */ +export function reset_form_elements(parent) { + parent.childNodes.forEach(child => { + if(child instanceof HTMLElement) { + if (child.hasAttribute('data-default')) { + child.value = child.getAttribute('data-default'); + } else { + reset_form_elements(child); + } + } + }) } \ No newline at end of file diff --git a/webtool/templates/components/datasource-option.html b/webtool/templates/components/datasource-option.html index 2a3cbb4a9..867a339c8 100644 --- a/webtool/templates/components/datasource-option.html +++ b/webtool/templates/components/datasource-option.html @@ -5,6 +5,28 @@
{{ settings.help|markdown|safe }}
{% elif settings.type in ["annotation", "annotations"] %} {# pass - a datasource should never need to annotate itself; this is for processor options only #} + {% elif settings.type == "multi_option" %} +
+

{{ settings.help }}

+
+
    + {# always include an empty item #} + {% set empty_item = settings.options|propmap("default") %} + {% do settings.default.append(empty_item) %} + {% for item in settings.default %} + {% set outerloop = loop %} + {% set last_index = outerloop.index %} +
  1. + {% for sub_option, sub_settings in settings.options.items() %} + {% do sub_settings.update({"original_default": sub_settings.default, "default": item[sub_option]}) %} + {% set sub_option = option ~ "-" ~ outerloop.index ~ "-" ~ sub_option %} + {% with option=sub_option, settings=sub_settings %} + {% include "components/datasource-option.html" %} + {% endwith %} + {% endfor %} +
  2. + {% endfor %} +
{% else %}
@@ -21,7 +43,7 @@

{% endif %} {% elif settings.type == "string" %} - 0 %} step="{{ settings.min }}"{% elif settings.min is defined and settings.min is not none and settings.min|float == 0 %} step="any"{% endif %} type="{% if (settings.min is defined and settings.min is not none) or (settings.max is defined and settings.max is not none) %}number{% elif settings.password %}password{% else %}text{% endif %}" value="{{ settings.default }}"> + 0 %} step="{{ settings.min }}"{% elif settings.min is defined and settings.min is not none and settings.min|float == 0 %} step="any"{% endif %} type="{% if (settings.min is defined and settings.min is not none) or (settings.max is defined and settings.max is not none) %}number{% elif settings.password %}password{% else %}text{% endif %}" value="{{ settings.default }}" data-default="{{ settings.original_default }}"> {% if "tooltip" in settings %} {% endif %} {% elif settings.type == "date" %} - + {% if "tooltip" in settings %} @@ -55,7 +77,7 @@ {% endif %} {% elif settings.type in ("json", "textarea") %} + placeholder="{{ settings.tooltip }}" data-default="{{ settings.original_default }}">{{ settings.default }} {% if "tooltip" in settings %} {% endif %} {% elif settings.type == "choice" %} - {% for value, label in settings.options.items() %} {% endfor %} @@ -105,7 +127,7 @@ {% elif settings.type == "hue" %}
{% set hue_id = uniqid() %} - diff --git a/webtool/views/views_admin.py b/webtool/views/views_admin.py index 3b425bcef..301dc7138 100644 --- a/webtool/views/views_admin.py +++ b/webtool/views/views_admin.py @@ -666,9 +666,10 @@ def manipulate_settings(): global_value = global_settings.get(option, definition.get(option, {}).get("default")) is_changed = tag and global_value != tag_value - default = all_settings.get(option, definition.get(option, {}).get("default")) + default = definition.get(option, {}).get("default") + current_value = all_settings.get(option, definition.get(option, {}).get("default")) if definition.get(option, {}).get("type") == UserInput.OPTION_TEXT_JSON: - default = json.dumps(default) + current_value = json.dumps(current_value) # this is used for organising things in the UI option_owner = option.split(".")[0] @@ -694,7 +695,8 @@ def manipulate_settings(): "default": all_settings.get(option) }), "submenu": submenu, - "default": default, + "default": current_value, # override default so this is the value displayed in the web UI + "original_default": default, # but also save the actual default "tabname": tabname, "is_changed": is_changed } From 4c429dfdf47a9c72fe84a19dcfc0676605dd75ad Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Thu, 21 May 2026 16:49:59 +0200 Subject: [PATCH 15/44] Refactor everything --- backend/workers/llm_manager.py | 83 ++++++ backend/workers/ollama_manager.py | 139 --------- backend/workers/refresh_items.py | 42 --- common/assets/llms.json | 268 +++++++++--------- common/lib/config_definition.py | 31 +- common/lib/llm/__init__.py | 0 common/lib/{llm.py => llm/adapter.py} | 211 +++++--------- common/lib/llm/litellm_client.py | 62 ++++ common/lib/llm/llm_client.py | 190 +++++++++++++ common/lib/llm/lmstudio_client.py | 69 +++++ common/lib/{ => llm}/ollama_client.py | 234 ++++----------- common/lib/llm/thirdparty_client.py | 56 ++++ common/lib/llm/vllm_client.py | 59 ++++ processors/machine_learning/llm_prompter.py | 265 +++-------------- webtool/lib/template_filters.py | 12 + webtool/static/css/stylesheet.css | 12 + webtool/static/js/modules/multi-form.js | 14 +- .../templates/controlpanel/llm-server.html | 66 +++-- webtool/views/views_llm.py | 40 ++- 19 files changed, 935 insertions(+), 918 deletions(-) create mode 100644 backend/workers/llm_manager.py delete mode 100644 backend/workers/ollama_manager.py create mode 100644 common/lib/llm/__init__.py rename common/lib/{llm.py => llm/adapter.py} (60%) create mode 100644 common/lib/llm/litellm_client.py create mode 100644 common/lib/llm/llm_client.py create mode 100644 common/lib/llm/lmstudio_client.py rename common/lib/{ => llm}/ollama_client.py (50%) create mode 100644 common/lib/llm/thirdparty_client.py create mode 100644 common/lib/llm/vllm_client.py diff --git a/backend/workers/llm_manager.py b/backend/workers/llm_manager.py new file mode 100644 index 000000000..3b279de36 --- /dev/null +++ b/backend/workers/llm_manager.py @@ -0,0 +1,83 @@ +""" +Manage LLM models +""" +from backend.lib.worker import BasicWorker +from common.lib.llm.llm_client import LLMProviderClient + +import json + +class LLMProviderManager(BasicWorker): + """ + Manages LLM models + + Periodically refreshes the list of available models from an LLM provider. + Can also pull or delete models on demand when queued with a specific task. + + Job details: + - task: "refresh" (default), "pull", or "delete" + - provider: the URL of the LLM provider, as configured in the + llm.providers setting. if not given, run on all providers + + Job remote_id: + - For refresh: "manage-llm-refresh" (periodic) or "manage-llm-manual" (on-demand) + - For pull/delete: the model name to pull or delete + """ + type = "manage-llm" + max_workers = 1 + client = None + + @classmethod + def ensure_job(cls, config=None): + """ + Ensure the daily refresh job is always scheduled + + :return: Job parameters for the worker + """ + return {"remote_id": "manage-llm-refresh", "interval": 86400} + + def work(self): + task = self.job.details.get("task", "refresh") if self.job.details else "refresh" + provider = self.job.details.get("provider", "") if self.job.details else None + model_name = self.job.data["remote_id"] + available_models = None + + for provider_config in self.config.get("llm.providers", []): + if provider and provider != provider_config["url"]: + continue + + try: + client = LLMProviderClient.get_client(self.config, provider_config) + except ValueError: + self.log.debug(f"{self.__class__.__name__}: invalid provider type: {provider_config['type']}, skipping") + continue + + # note that technically it is possible to pull/delete a model on + # multiple providers at once (if a model_name is defined but no + # provider). may not be a problem? may be useful one day? + success = False + if task == "pull" and hasattr(client, "pull_model"): + success = client.pull_model(model_name) + + elif task == "delete" and hasattr(client, "delete_model"): + success = client.delete_model(model_name) + + if success or task == "refresh": + # refresh models after pulling/deleting, or when asked to + if available_models is None: + available_models = {} + + for model in client.list_models(): + model = client.build_model_entry(model) + available_models[model["id"]] = model + + self.log.debug(f"{self.__class__.__name__}: ran task '{task}' (model name: {model_name or 'N/A'})") + + elif success is None: + self.log.warning(f"{self.__class__.__name__}: task '{task}' unknown or not supported by client") + else: + self.log.warning(f"{self.__class__.__name__}: task '{task}' failed for model {model_name}") + + if available_models is not None: + self.config.set("llm.available_models", available_models) + + self.job.finish() diff --git a/backend/workers/ollama_manager.py b/backend/workers/ollama_manager.py deleted file mode 100644 index 91b8bfac1..000000000 --- a/backend/workers/ollama_manager.py +++ /dev/null @@ -1,139 +0,0 @@ -""" -Manage Ollama LLM models -""" -from backend.lib.worker import BasicWorker -from common.lib.ollama_client import OllamaClient - - -class OllamaManager(BasicWorker): - """ - Manage Ollama LLM models - - Periodically refreshes the list of available models from an Ollama server. - Can also pull or delete models on demand when queued with a specific task. - - Job details: - - task: "refresh" (default), "pull", or "delete" - - Job remote_id: - - For refresh: "manage-ollama-refresh" (periodic) or "manage-ollama-manual" (on-demand) - - For pull/delete: the model name to pull or delete - """ - type = "manage-ollama" - max_workers = 1 - client = None - - @classmethod - def ensure_job(cls, config=None): - """ - Ensure the daily refresh job is always scheduled - - :return: Job parameters for the worker - """ - return {"remote_id": "manage-ollama-refresh", "interval": 86400} - - def work(self): - task = self.job.details.get("task", "refresh") if self.job.details else "refresh" - model_name = self.job.data["remote_id"] - - self.client = self._get_client() # Initialize client once per job run - if not self.client.is_available(): - self.job.finish() - return - - if task == "refresh": - self.refresh_models() - elif task == "pull": - success = self.pull_model(model_name) - if success: - self.refresh_models() - elif task == "delete": - success = self.delete_model(model_name) - if success: - self.refresh_models() - else: - self.log.warning(f"OllamaManager: unknown task '{task}'") - - self.job.finish() - - def _get_client(self) -> OllamaClient: - """Return a fresh OllamaClient configured from 4CAT settings.""" - if not self.client: - self.client = OllamaClient.from_config(self.config, log=self.log) - return self.client - - def refresh_models(self): - """ - Query the Ollama server for available models and update llm.available_models. - """ - if not self.config.get("llm.server", ""): - return - - client = self._get_client() - models = client.list_models() - - if not models and not self.config.get("llm.server", ""): - return - - available_models = {} - for model in models: - model_id = model["name"] - meta = client.show_model(model_id) - if meta: - try: - display_name = OllamaClient.format_display_name(model_id, meta) - except Exception as e: - self.log.debug(f"OllamaManager: error formatting display name for {model_id}: {e}") - display_name = model_id - else: - self.log.debug(f"OllamaManager: could not get metadata for {model_id}, using name only") - display_name = model_id - - available_models[model_id] = OllamaClient.build_model_entry(model_id, display_name, meta) - - self.config.set("llm.available_models", available_models) - self.log.debug(f"OllamaManager: refreshed model list ({len(available_models)} models)") - - # Reconcile enabled models: remove any that are no longer available - enabled_models = self.config.get("llm.enabled_models", []) - reconciled = [m for m in enabled_models if m in available_models] - if len(reconciled) != len(enabled_models): - removed = set(enabled_models) - set(reconciled) - self.log.info(f"OllamaManager: removed stale enabled model(s): {', '.join(removed)}") - self.config.set("llm.enabled_models", reconciled) - - def pull_model(self, model_name): - """ - Pull a model from the Ollama registry. - - :param str model_name: Model name (e.g. "llama3:8b") - :return bool: True on success - """ - if not self.config.get("llm.server", ""): - self.log.warning("OllamaManager: cannot pull model - no LLM server configured") - return False - - success = self._get_client().pull_model(model_name) - if success: - self.log.info(f"OllamaManager: successfully pulled model '{model_name}'") - else: - self.log.warning(f"OllamaManager: could not pull model '{model_name}'") - return success - - def delete_model(self, model_name): - """ - Delete a model from the Ollama server. - - :param str model_name: Model name (e.g. "llama3:8b") - :return bool: True on success - """ - if not self.config.get("llm.server", ""): - self.log.warning("OllamaManager: cannot delete model - no LLM server configured") - return False - - success = self._get_client().delete_model(model_name) - if success: - self.log.info(f"OllamaManager: successfully deleted model '{model_name}'") - else: - self.log.warning(f"OllamaManager: could not delete model '{model_name}'") - return success diff --git a/backend/workers/refresh_items.py b/backend/workers/refresh_items.py index 1a9811603..7ab11645d 100644 --- a/backend/workers/refresh_items.py +++ b/backend/workers/refresh_items.py @@ -23,46 +23,4 @@ class ItemUpdater(BasicWorker): def work(self): # Placeholder – no tasks implemented yet. self.job.finish() - - def refresh_settings(self): - """ - Refresh settings - """ - # LLM server settings - llm_provider = self.config.get("llm.provider_type", "none").lower() - llm_server = self.config.get("llm.server", "") - - # For now we only support the Ollama API - if llm_provider == "ollama" and llm_server: - headers = {"Content-Type": "application/json"} - llm_api_key = self.config.get("llm.api_key", "") - llm_auth_type = self.config.get("llm.auth_type", "") - if llm_api_key and llm_auth_type: - headers[llm_auth_type] = llm_api_key - - available_models = {} - try: - response = requests.get(f"{llm_server}/api/tags", headers=headers, timeout=10) - if response.status_code == 200: - settings = response.json() - for model in settings.get("models", []): - model = model["name"] - try: - model_metadata = requests.post(f"{llm_server}/api/show", headers=headers, json={"model": model}, timeout=10).json() - available_models[model] = { - "name": f"{model_metadata['model_info'].get('general.basename', model)} ({model_metadata['details']['parameter_size']} parameters)", - "model_card": f"https://ollama.com/library/{model}", - "provider": "local" - } - - except (requests.RequestException, json.JSONDecodeError, KeyError) as e: - self.log.debug(f"Could not get metadata for model {model} from Ollama - skipping (error: {e})") - - self.config.set("llm.available_models", available_models) - self.log.debug("Refreshed LLM server settings cache") - else: - self.log.warning(f"Could not refresh LLM server settings cache - server returned status code {response.status_code}") - - except requests.RequestException as e: - self.log.warning(f"Could not refresh LLM server settings cache - request error: {str(e)}") \ No newline at end of file diff --git a/common/assets/llms.json b/common/assets/llms.json index 835dbaa09..c17351488 100644 --- a/common/assets/llms.json +++ b/common/assets/llms.json @@ -1,128 +1,140 @@ -{ - "none": { - "name": "", - "model_card": "", - "provider": "", - "default": true - }, - "custom": { - "name": "[custom]", - "model_card": "", - "provider": "" - }, - "gpt-5.4": { - "name": "[OpenAI] GPT-5.4", - "model_card": "https://platform.openai.com/docs/models/gpt-5.4", - "provider": "openai" - }, - "gpt-5-mini": { - "name": "[OpenAI] GPT-5 mini", - "model_card": "https://platform.openai.com/docs/models/gpt-5-mini", - "provider": "openai" - }, - "gpt-5-nano": { - "name": "[OpenAI] GPT-5 nano", - "model_card": "https://platform.openai.com/docs/models/gpt-5-nano", - "provider": "openai" - }, - "gpt-5.4-pro": { - "name": "[OpenAI] GPT-5.4 Pro", - "model_card": "https://platform.openai.com/docs/models/gpt-5.4-pro", - "provider": "openai" - }, - "gpt-4.1-mini": { - "name": "[OpenAI] GPT-4.1 mini", - "model_card": "https://platform.openai.com/docs/models/gpt-4.1-mini", - "provider": "openai" - }, - "gpt-4.1-nano": { - "name": "[OpenAI] GPT-4.1 nano", - "model_card": "https://platform.openai.com/docs/models/gpt-4.1-nano", - "provider": "openai" - }, - "gpt-4.1": { - "name": "[OpenAI] GPT-4.1", - "model_card": "https://platform.openai.com/docs/models/gpt-4.1", - "provider": "openai" - }, - "gpt-4o-mini": { - "name": "[OpenAI] GPT-4o mini", - "model_card": "https://platform.openai.com/docs/models/gpt-4o-mini", - "provider": "openai" - }, - "gpt-4o": { - "name": "[OpenAI] GPT-4o", - "model_card": "https://platform.openai.com/docs/models/gpt-4o", - "provider": "openai" - }, - "gemini-3.1-pro-preview": { - "name": "[Google] Gemini 3.1 Pro", - "model_card": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-1-pro", - "provider": "google" - }, - "gemini-3-flash-preview": { - "name": "[Google] Gemini 3 Flash", - "model_card": "https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-flash", - "provider": "google" - }, - "gemini-3.1-flash-lite-preview": { - "name": "[Google] Gemini 3.1 Flash Lite", - "provider": "google", - "model_card": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-1-flash-lite" - }, - "claude-opus-4-6": { - "name": "[Anthropic] Claude Opus 4.6 (latest)", - "model_card": "https://www.anthropic.com/claude/opus", - "provider": "anthropic" - }, - "claude-sonnet-4-6": { - "name": "[Anthropic] Claude Sonnet 4.6 (latest)", - "model_card": "https://www.anthropic.com/claude/sonnet", - "provider": "anthropic" - }, - "claude-4-5-haiku": { - "name": "[Anthropic] Claude 4.5 Haiku (latest)", - "model_card": "https://www.anthropic.com/claude/haiku", - "provider": "anthropic" - }, - "magistral-small-2509": { - "name": "[Mistral] Magistral Small 1.2 (25.09)", - "model_card": "https://docs.mistral.ai/models/magistral-small-1-2-25-09", - "provider": "mistral" - }, - "magistral-medium-2509": { - "name": "[Mistral] Magistral Medium 1.2 (25.09)", - "model_card": "https://docs.mistral.ai/models/magistral-medium-1-2-25-09", - "provider": "mistral" - }, - "mistral-small-2506": { - "name": "[Mistral] Mistral Small 3.2 (25.06)", - "model_card": "https://docs.mistral.ai/models/mistral-small-3-2-25-06", - "provider": "mistral" - }, - "mistral-medium-2508": { - "name": "[Mistral] Mistral Medium 3.1 (25.08)", - "model_card": "mistral-medium-2508", - "provider": "mistral" - }, - "mistral-large-2512": { - "name": "[Mistral] Mistral Large 3 (25.12)", - "model_card": "https://docs.mistral.ai/models/mistral-large-3-25-12", - "provider": "mistral" - }, - "open-mistral-nemo-2407": { - "name": "[Mistral] Mistral Nemo 12B", - "model_card": "https://docs.mistral.ai/models/mistral-nemo-12b-24-07", - "provider": "mistral" - }, - "deepseek-chat": { - "name": "[DeepSeek] DeepSeek latest (non-reasoning)", - "model_card": "https://api-docs.deepseek.com/quick_start/pricing", - "provider": "deepseek" - }, - "deepseek-reasoner": { - "name": "[DeepSeek] DeepSeek latest (reasoning)", - "model_card": "https://api-docs.deepseek.com/quick_start/pricing", - "provider": "deepseek" - } -} \ No newline at end of file +[ + { + "model": "gpt-5.4", + "name": "[OpenAI] GPT-5.4", + "model_card": "https://platform.openai.com/docs/models/gpt-5.4", + "provider": "openai" + }, + { + "model": "gpt-5-mini", + "name": "[OpenAI] GPT-5 mini", + "model_card": "https://platform.openai.com/docs/models/gpt-5-mini", + "provider": "openai" + }, + { + "model": "gpt-5-nano", + "name": "[OpenAI] GPT-5 nano", + "model_card": "https://platform.openai.com/docs/models/gpt-5-nano", + "provider": "openai" + }, + { + "model": "gpt-5.4-pro", + "name": "[OpenAI] GPT-5.4 Pro", + "model_card": "https://platform.openai.com/docs/models/gpt-5.4-pro", + "provider": "openai" + }, + { + "model": "gpt-4.1-mini", + "name": "[OpenAI] GPT-4.1 mini", + "model_card": "https://platform.openai.com/docs/models/gpt-4.1-mini", + "provider": "openai" + }, + { + "model": "gpt-4.1-nano", + "name": "[OpenAI] GPT-4.1 nano", + "model_card": "https://platform.openai.com/docs/models/gpt-4.1-nano", + "provider": "openai" + }, + { + "model": "gpt-4.1", + "name": "[OpenAI] GPT-4.1", + "model_card": "https://platform.openai.com/docs/models/gpt-4.1", + "provider": "openai" + }, + { + "model": "gpt-4o-mini", + "name": "[OpenAI] GPT-4o mini", + "model_card": "https://platform.openai.com/docs/models/gpt-4o-mini", + "provider": "openai" + }, + { + "model": "gpt-4o", + "name": "[OpenAI] GPT-4o", + "model_card": "https://platform.openai.com/docs/models/gpt-4o", + "provider": "openai" + }, + { + "model": "gemini-3.1-pro-preview", + "name": "[Google] Gemini 3.1 Pro", + "model_card": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-1-pro", + "provider": "google" + }, + { + "model": "gemini-3-flash-preview", + "name": "[Google] Gemini 3 Flash", + "model_card": "https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-flash", + "provider": "google" + }, + { + "model": "gemini-3.1-flash-lite-preview", + "name": "[Google] Gemini 3.1 Flash Lite", + "provider": "google", + "model_card": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-1-flash-lite" + }, + { + "model": "claude-opus-4-6", + "name": "[Anthropic] Claude Opus 4.6 (latest)", + "model_card": "https://www.anthropic.com/claude/opus", + "provider": "anthropic" + }, + { + "model": "claude-sonnet-4-6", + "name": "[Anthropic] Claude Sonnet 4.6 (latest)", + "model_card": "https://www.anthropic.com/claude/sonnet", + "provider": "anthropic" + }, + { + "model": "claude-4-5-haiku", + "name": "[Anthropic] Claude 4.5 Haiku (latest)", + "model_card": "https://www.anthropic.com/claude/haiku", + "provider": "anthropic" + }, + { + "model": "magistral-small-2509", + "name": "[Mistral] Magistral Small 1.2 (25.09)", + "model_card": "https://docs.mistral.ai/models/magistral-small-1-2-25-09", + "provider": "mistral" + }, + { + "model": "magistral-medium-2509", + "name": "[Mistral] Magistral Medium 1.2 (25.09)", + "model_card": "https://docs.mistral.ai/models/magistral-medium-1-2-25-09", + "provider": "mistral" + }, + { + "model": "mistral-small-2506", + "name": "[Mistral] Mistral Small 3.2 (25.06)", + "model_card": "https://docs.mistral.ai/models/mistral-small-3-2-25-06", + "provider": "mistral" + }, + { + "model": "mistral-medium-2508", + "name": "[Mistral] Mistral Medium 3.1 (25.08)", + "model_card": "mistral-medium-2508", + "provider": "mistral" + }, + { + "model": "mistral-large-2512", + "name": "[Mistral] Mistral Large 3 (25.12)", + "model_card": "https://docs.mistral.ai/models/mistral-large-3-25-12", + "provider": "mistral" + }, + { + "model": "open-mistral-nemo-2407", + "name": "[Mistral] Mistral Nemo 12B", + "model_card": "https://docs.mistral.ai/models/mistral-nemo-12b-24-07", + "provider": "mistral" + }, + { + "model": "deepseek-chat", + "name": "[DeepSeek] DeepSeek latest (non-reasoning)", + "model_card": "https://api-docs.deepseek.com/quick_start/pricing", + "provider": "deepseek" + }, + { + "model": "deepseek-reasoner", + "name": "[DeepSeek] DeepSeek latest (reasoning)", + "model_card": "https://api-docs.deepseek.com/quick_start/pricing", + "provider": "deepseek" + } +] \ No newline at end of file diff --git a/common/lib/config_definition.py b/common/lib/config_definition.py index c64f8d633..e5c2a726f 100644 --- a/common/lib/config_definition.py +++ b/common/lib/config_definition.py @@ -443,30 +443,16 @@ "global": True }, "flask.autologin.hostnames": { - "type": UserInput.OPTION_MULTI_OPTION, + "type": UserInput.OPTION_TEXT_JSON, "default": [], - "options": { - "hostname": { - "type": UserInput.OPTION_TEXT, - "default": "", - "help": "Host name or IP address" - } - }, "help": "White-listed hostnames", "tooltip": "A list of host names or IP addresses to automatically log in. Docker should include localhost and " "Server Name. Front-end needs to be restarted for changed to apply.", "global": True }, "flask.autologin.api": { - "type": UserInput.OPTION_MULTI_OPTION, + "type": UserInput.OPTION_TEXT_JSON, "default": [], - "options": { - "hostname": { - "type": UserInput.OPTION_TEXT, - "default": "", - "help": "Host name or IP address" - } - }, "help": "White-list for API", "tooltip": "A list of host names or IP addresses to allow access to API endpoints with no rate limiting. " "Docker should include localhost and Server Name. Front-end needs to be restarted for changed to " @@ -593,41 +579,42 @@ "to local or remote LLM servers. You can also set up your own LLM server using open source software such as " "[Ollama](https://ollama.com/) and connect 4CAT to it using the settings below for your users." }, - "llm.services": { + "llm.providers": { "type": UserInput.OPTION_MULTI_OPTION, "default": [], "global": True, "help": "LLM providers", "options": { - "host_name": { + "name": { "type": UserInput.OPTION_TEXT, "default": "4CAT LLM Server", "help": "Name of LLM Server in UI", "tooltip": "The name that will be shown to users in the interface when selecting an LLM server (or API or custom).", }, - "provider_type": { + "type": { "type": UserInput.OPTION_CHOICE, "help": "LLM Provider Type", "default": "none", "options": { "ollama": "Ollama", "litellm": "LiteLLM", + "api": "Third-party models via APIs (OpenAI, Mistral, etc)", "none": "None", }, }, - "server": { + "url": { "type": UserInput.OPTION_TEXT, "default": "", "help": "LLM Server URL", "tooltip": "The URL of the LLM server, e.g. http://localhost:5000", }, - "auth_type": { + "auth_header": { "type": UserInput.OPTION_TEXT, "help": "Authentication Header", "default": "", "tooltip": "The HTTP header used to authenticate with the server (e.g. 'X-API-KEY', 'Authorization'). Passed with the Authentication Key as value.", }, - "api_key": { + "auth_key": { "type": UserInput.OPTION_TEXT, "default": "", "help": "Authentication Key", diff --git a/common/lib/llm/__init__.py b/common/lib/llm/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/common/lib/llm.py b/common/lib/llm/adapter.py similarity index 60% rename from common/lib/llm.py rename to common/lib/llm/adapter.py index 0901194d1..5a3ece227 100644 --- a/common/lib/llm.py +++ b/common/lib/llm/adapter.py @@ -1,9 +1,11 @@ import json import base64 import mimetypes -import requests + from pathlib import Path from typing import List, Optional, Union + +from langchain_community.chat_models import ChatLiteLLM from pydantic import SecretStr from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage from langchain_core.language_models.chat_models import BaseChatModel @@ -18,111 +20,89 @@ class LLMAdapter: def __init__( self, - provider: str, - model: str, + config, + model, api_key: Optional[str] = None, - base_url: Optional[str] = None, temperature: float = 0.1, max_tokens: int = 1000, client_kwargs: Optional[dict] = None, ): """ - provider: 'openai', 'google', 'mistral', 'ollama', 'lmstudio', 'anthropic', 'deepseek' - model: model name (e.g., 'gpt-4o-mini', 'claude-3-opus', 'mistral-small', etc.) - api_key: API key if required (OpenAI, Claude, Google, Mistral) - base_url: for local models or Mistral custom endpoints - temperature: temperature hyperparameter, - max_tokens: how many output tokens may be used - client_kwargs: additional client parameters + Instantiate an adapter to interface with an LLM model + + :param config: 4CAT config reader + :param model: Model metadata (as in `llm.available_models` 4CAT setting) + :param api_key: API key, if needed + :param temperature: Temperature hyperparameter + :param max_tokens: Max tokens to generate + :param client_kwargs: Optional parameters for the LLM adapter class """ - self.provider = provider.lower() + known_providers = {p['url']: p for p in config.get("llm.providers")} + self.model = model + self.provider = known_providers.get(model['provider']) self.api_key = api_key - self.base_url = base_url self.temperature = temperature self.structured_output = False self.parser = None self.max_tokens = max_tokens self.client_kwargs = dict(client_kwargs) if client_kwargs else {} + self.llm: BaseChatModel = self._load_llm() def _load_llm(self) -> BaseChatModel: - if self.provider == "openai": - kwargs = {} - if "o3" not in self.model: - kwargs["temperature"] = self.temperature # temperature not supported for all models - return ChatOpenAI( - model=self.model, - api_key=SecretStr(self.api_key), - base_url=self.base_url or "https://api.openai.com/v1", - max_tokens=self.max_tokens, - **kwargs - ) - elif self.provider == "google": - return ChatGoogleGenerativeAI( - model=self.model, - temperature=self.temperature, - google_api_key=self.api_key, - max_tokens=self.max_tokens - ) - elif self.provider == "anthropic": - return ChatAnthropic( - model_name=self.model, - temperature=self.temperature, - api_key=SecretStr(self.api_key), - max_tokens=self.max_tokens, - timeout=100, - stop=None - ) - elif self.provider == "mistral": - return ChatMistralAI( - model_name=self.model, - temperature=self.temperature, - api_key=SecretStr(self.api_key), - base_url=self.base_url, # Optional override - max_tokens=self.max_tokens, - ) - elif self.provider == "deepseek": - return ChatDeepSeek( - model=self.model, - temperature=self.temperature, - api_key=SecretStr(self.api_key), - base_url=self.base_url, - max_tokens=self.max_tokens if self.max_tokens <= 8192 else 8192, - ) - elif self.provider == "ollama": - ollama_adapter = ChatOllama( - model=self.model, - temperature=self.temperature, - base_url=self.base_url or "http://localhost:11434", - max_tokens=self.max_tokens, - client_kwargs=self.client_kwargs - ) - self.model = ollama_adapter.model - return ollama_adapter - elif self.provider in {"vllm", "lmstudio"}: + chat_params = { + "model": self.model["local_id"], + "api_key": SecretStr(self.api_key), + "base_url": self.provider["url"], + "max_tokens": self.max_tokens, + "temperature": self.temperature, + } + + if self.provider["type"] == "openai": + if "o3" in self.model: + del chat_params["temperature"] + chat_params["base_url"] = self.provider["url"] or "https://api.openai.com/v1" + adapter_class = ChatOpenAI + + elif self.provider["type"] == "google": + adapter_class = ChatGoogleGenerativeAI + + elif self.provider["type"] == "anthropic": + chat_params.update({"timeout": 100, "stop": None}) + adapter_class = ChatAnthropic + + elif self.provider["type"] == "mistral": + adapter_class = ChatMistralAI + + elif self.provider["type"] == "deepseek": + chat_params["max_tokens"] = min(self.max_tokens, 8192) + adapter_class = ChatDeepSeek + + elif self.provider["type"] == "ollama": + adapter_class = ChatOllama + chat_params.update({"client_kwargs": self.client_kwargs}) + + elif self.provider["type"] == "litellm": + adapter_class = ChatOpenAI + if self.provider["auth_header"]: + chat_params.update({ + "default_headers": { + self.provider["auth_header"]: self.provider["auth_key"] + } + }) + + elif self.provider["type"] in {"vllm", "lmstudio", "litellm"}: # OpenAI-compatible local servers if self.provider == "lmstudio" and not self.api_key: self.api_key = "lm-studio" - # For vLLM, query the server to get the actual model name. We can't leave this empty, unfortunately. - if self.provider == "vllm" and self.model=="vllm_model": - model_name = self.get_vllm_model_name(self.base_url, self.api_key) - self.model = model_name - else: - model_name = self.model if self.model else "lmstudio-model" - - llm = ChatOpenAI( - model=model_name, - temperature=self.temperature, - api_key=SecretStr(self.api_key), - base_url=self.base_url, - max_tokens=self.max_tokens, - ) - self.model = llm.model_name - return llm + adapter_class = ChatOpenAI + else: - raise ValueError(f"Unsupported LLM provider: {self.provider}") + raise ValueError(f"{self.__class__.__name__} Unsupported LLM provider type: {self.provider['type']}") + + return adapter_class(**chat_params) def generate_text( self, @@ -161,7 +141,7 @@ def generate_text( lc_messages = messages kwargs = {"temperature": temperature} - if self.provider in ("google", "ollama") or "o3" in self.model or "gpt-5" in self.model: + if self.provider["type"] in ("google", "ollama") or "o3" in self.model["local_id"] or "gpt-5" in self.model["local_id"]: kwargs = {} try: @@ -304,31 +284,6 @@ def set_structure(self, json_schema): self.llm = self.llm.with_structured_output(json_schema) self.structured_output = True - @staticmethod - def get_model_options(config) -> dict: - """ - Returns model choice options for UserInput - """ - models = LLMAdapter.get_models(config) - if not models: - return {} - options = {model_id: model_values["name"] for model_id, model_values in models.items()} - return options - - @staticmethod - def get_model_providers(config) -> dict: - """ - Returns available model providers through APIs - """ - models = LLMAdapter.get_models(config) - if not models: - return {} - providers = list(set([model_values.get("provider", "") for model_values in models.values()])) - if not providers: - return {} - options = {provider: provider.capitalize() for provider in providers if provider} - return options - @staticmethod def get_models(config) -> dict: """ @@ -337,36 +292,6 @@ def get_models(config) -> dict: :returns dict, A dict with model IDs as keys and details as values """ - with ( - config.get("PATH_ROOT") - .joinpath("common/assets/llms.json") - .open() as available_models - ): - available_models = json.loads(available_models.read()) - return available_models - - - @staticmethod - def get_vllm_model_name(base_url: str, api_key: str = None) -> str: - """ - Query vLLM server to get the name of the served model. - """ - - try: - # vLLM exposes available models at /v1/models endpoint - models_url = f"{base_url.rstrip('/')}/models" - headers = {} - if api_key: - headers["Authorization"] = f"Bearer {api_key}" - - response = requests.get(models_url, headers=headers, timeout=10) - response.raise_for_status() - models_data = response.json() - - # Get the first available model - if models_data.get("data") and len(models_data["data"]) > 0: - return models_data["data"][0]["id"] - else: - raise ValueError("No models found on vLLM server") - except Exception as e: - raise ValueError(f"Could not retrieve model name from vLLM server: {e}") + available_models = config.get("llm.available_models", {}) + enabled_models = config.get("llm.enabled_models", {}) + return {k: v for k, v in available_models.items() if k in enabled_models} \ No newline at end of file diff --git a/common/lib/llm/litellm_client.py b/common/lib/llm/litellm_client.py new file mode 100644 index 000000000..f6f7d59ff --- /dev/null +++ b/common/lib/llm/litellm_client.py @@ -0,0 +1,62 @@ +""" +Centralized HTTP client for communicating with a LiteLLM server. + +This class owns all direct HTTP calls to LiteLLM's REST API and provides shared +static helpers for capability parsing, display-name formatting, and building +canonical llm.available_models entries. It is a plain helper with no 4CAT +base-class dependency. + +This class is primarily intended for interfacing with LiteLLM, but since +LiteLLM itself is mostly OpenAI API-compatible, this can be used to interface +with the OpenAI API as well. +""" +import requests + +from common.lib.llm.llm_client import LLMProviderClient + +class LiteLLMClient(LLMProviderClient): + type = "litellm" + + _models_info_path = "/model/info" + _models_info_key = "data" + _model_id_key = "model_name" + + def parse_supported_media_types(self, meta: dict) -> list[str]: + """ + Derive the media types a model supports from its LiteLLM metadata. + + :param meta: ``model info`` response dict, or ``None``. + :returns: Ordered list of supported media type strings. + Returns ``[]`` when ``meta`` is ``None`` + """ + if meta is None or not meta.get("model_info"): + return [] + + media_types = {"text"} # far as I can tell, text is always supported + if meta["model_info"].get("supports_vision"): + media_types.add("image") + + if meta["model_info"].get("supports_audio_input"): + media_types.add("sound") + + # no way to tell if model supports embeddings input as far as I can see... + + return list(media_types) + + def format_display_name(self, meta: dict) -> str: + """ + Build a human-readable display name for a model. + + :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``). + :param meta: ``/api/show`` response dict, or ``None``. + :returns: Human-readable display name string. + """ + model_name = self.get_global_model_id(meta) + + if meta.get("model_name"): + model_name = meta["model_name"] + + if meta["litellm_params"].get("model"): + model_name = "/".join(meta["litellm_params"].get("model").split("/")[1:]) + + return model_name \ No newline at end of file diff --git a/common/lib/llm/llm_client.py b/common/lib/llm/llm_client.py new file mode 100644 index 000000000..b31968399 --- /dev/null +++ b/common/lib/llm/llm_client.py @@ -0,0 +1,190 @@ +""" +Centralized HTTP client for communicating with an LLM provider. + +This class owns all direct HTTP calls to the provider's REST API and provides +shared static helpers for capability parsing, display-name formatting, and +building canonical llm.available_models entries. It is a plain helper with no +4CAT base-class dependency. +""" + +from abc import abstractmethod + +import requests + + +class LLMProviderClient: + _headers = {} + _meta = {} + + @staticmethod + def get_client(config, provider_config: dict) -> "LLMProviderClient": + """ + Get a client for an LLM provider + + Returns the appropriate sub-class depending on the provider type. + + :param config: 4CAT config reader + :param dict provider_config: Provider parameters, as configured in + 4CAT + :return LLMProviderClient: + """ + # in-line import because we otherwise get circular import shenanigans + from common.lib.llm.ollama_client import OllamaClient + from common.lib.llm.litellm_client import LiteLLMClient + from common.lib.llm.lmstudio_client import LMStudioClient + from common.lib.llm.thirdparty_client import ThirdPartyClient + + for client_type in (OllamaClient, LiteLLMClient, LMStudioClient, ThirdPartyClient): + if client_type.type == provider_config["type"]: + return client_type(config, provider_config) + + raise ValueError(f"LLMProviderClient: Unknown provider type {provider_config['type']}") + + def __init__(self, config, provider_config: dict, timeout: int = 10, log=None) -> None: + """ + HTTP client for an LLM Provider + + :param dict provider_config: Provider parameters, as configured in 4CAT + :param int timeout: Default request timeout in seconds. + :param Logger log: 4CAT log handler + """ + self.config = config + + self._meta = provider_config + self.base_url = provider_config["url"].rstrip("/") + self.auth_type = provider_config.get("auth_header") + self.auth_key = provider_config.get("auth_key") + self.timeout = timeout + + self._session = requests.Session() + self._headers = {"Content-Type": "application/json"} + + if self.auth_type: + self._headers[self.auth_type] = self.auth_key + + self.log = log + + def get_status(self) -> bool | int: + """ + Check if the server is reachable and responding to requests + + :return: `False` if the server is not responding, or an HTTP status code. + """ + try: + r = self._session.get( + f"{self.base_url}{self._models_info_path}", + headers=self._headers, + timeout=self.timeout, + ) + if self.log and r.status_code != 200: + self.log.warning( + f"{self.__class__.__name__}: server responded with status code {r.status_code} during availability check: {r.text}") + return r.status_code + except requests.RequestException as e: + if self.log: + self.log.warning(f"{self.__class__.__name__}: server is not available at {self.base_url}: {e}") + return False + + def list_models(self) -> list[dict]: + """List available models from the Ollama server. + + :returns: List of model dicts, or ``[]`` on failure. + """ + try: + r = self._session.get( + f"{self.base_url}{self._models_info_path}", + headers=self._headers, + timeout=self.timeout, + ) + if r.status_code == 200: + return r.json().get(self._models_info_key, []) + if self.log: + self.log.warning( + f"{self.__class__.__name__}: failed to list models from {self.base_url}, status code {r.status_code}: {r.text}") + except requests.RequestException as e: + if self.log: + self.log.warning(f"{self.__class__.__name__}: failed to list models from {self.base_url}: {e}") + return [] + + def build_model_entry(self, meta: dict) -> dict: + """ + Build a canonical ``llm.available_models`` entry for a model. + + :param model_id: Raw model identifier. + :param display_name: Human-readable name (from ``format_display_name``). + :param meta: ``/api/show`` response dict, or ``None`` if unavailable. + :returns: Dict ready to store under ``llm.available_models[model_id]``. + """ + return { + "id": self.get_global_model_id(meta), + "local_id": self.get_model_id(meta), + "name": self.format_display_name(meta), + "model_card": self.get_model_card_url(meta), + "provider_type": self._meta["type"], + "provider": self._meta["url"], + "supported_media_types": self.parse_supported_media_types(meta), + "metadata": meta, + } + + def get_model_card_url(self, meta: dict) -> str: + """ + Get a URL for a model card for a given model + + :param meta: Model metadata + :return str: Model card URL (empty string if unavailable) + """ + return "" + + @abstractmethod + def parse_supported_media_types(self, meta: dict) -> list[str]: + """Derive the media types a model supports from its Ollama metadata. + + **Primary path**: reads ``meta["capabilities"]``: + - ``"completion"`` → ``"text"`` + - ``"vision"`` → ``"image"`` + - ``"embedding"`` → ``"embedding"`` + + **Fallback path** (used when capabilities are absent or only yield ``"text"``): + inspects GGUF ``model_info`` / ``details`` for vision signals and adds + ``"image"`` if any are found. + + :param meta: ``/api/show`` response dict, or ``None``. + :returns: Ordered list of supported media type strings. + Returns ``[]`` when ``meta`` is ``None`` (unknown — callers + should include the model, not block it). + """ + pass + + @abstractmethod + def format_display_name(self, meta: dict) -> str: + """ + Build a human-readable display name for a model. + + :param dict meta: Model metadata + :returns str: Human-readable display name string. + """ + pass + + def get_model_id(self, meta: dict) -> str: + """ + Choose a model identifier based on model metadata. + + This is the ID within the provider context, i.e. it is not guaranteed + to be globally unique (use `get_global_model_id()` instead). + + :param dict meta: Model metadata + :return str: Model ID + """ + return meta[self._model_id_key] + + def get_global_model_id(self, meta: dict) -> str: + """ + Choose a model identifier based on model metadata. + + This needs to be a *globally* unique ID, i.e. if multiple providers + provide the same model, the ID should still be unique per provider. + + :param dict meta: Model metadata + :return str: Model ID + """ + return "-".join((self._meta["type"], self._meta["url"], self.get_model_id(meta))) \ No newline at end of file diff --git a/common/lib/llm/lmstudio_client.py b/common/lib/llm/lmstudio_client.py new file mode 100644 index 000000000..5ceb99c27 --- /dev/null +++ b/common/lib/llm/lmstudio_client.py @@ -0,0 +1,69 @@ +""" +Centralized HTTP client for communicating with a LiteLLM server. + +This class owns all direct HTTP calls to LiteLLM's REST API and provides shared +static helpers for capability parsing, display-name formatting, and building +canonical llm.available_models entries. It is a plain helper with no 4CAT +base-class dependency. + +This class is primarily intended for interfacing with LiteLLM, but since +LiteLLM itself is mostly OpenAI API-compatible, this can be used to interface +with the OpenAI API as well. +""" +import requests + +from common.lib.llm.llm_client import LLMProviderClient + +class LMStudioClient(LLMProviderClient): + type = "lmstudio" + + _models_info_path = "/api/v1/models" + _models_info_key = "models" + _model_id_key = "key" + + def parse_supported_media_types(self, meta: dict) -> list[str]: + """ + Derive the media types a model supports from its LiteLLM metadata. + + :param meta: ``model info`` response dict, or ``None``. + :returns: Ordered list of supported media type strings. + Returns ``[]`` when ``meta`` is ``None`` + """ + if meta is None or not meta.get("capabilities"): + return [] + + media_types = {"text"} # far as I can tell, text is always supported + if meta["capabilities"].get("vision"): + media_types.add("image") + + if meta["model_info"].get("supports_audio_input"): + media_types.add("sound") + + # no way to tell if model supports embeddings input as far as I can see... + + return list(media_types) + + def format_display_name(self, meta: dict) -> str: + """ + Build a human-readable display name for a model. + + :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``). + :param meta: ``/api/show`` response dict, or ``None``. + :returns: Human-readable display name string. + """ + model_name = self.get_global_model_id(meta) + + if meta.get("display_name"): + model_name = meta["display_name"] + + extra_bits = [] + if "publisher" in meta: + extra_bits.append(meta["publisher"]) + + if "params_string" in meta: + extra_bits.append(meta["params_string"]) + + + model_name += f" {', '.join(extra_bits)}" + + return model_name \ No newline at end of file diff --git a/common/lib/ollama_client.py b/common/lib/llm/ollama_client.py similarity index 50% rename from common/lib/ollama_client.py rename to common/lib/llm/ollama_client.py index 5d7bea1da..096249c1a 100644 --- a/common/lib/ollama_client.py +++ b/common/lib/llm/ollama_client.py @@ -9,138 +9,16 @@ import re import requests -from typing import Optional - - -class OllamaClient: - """ - HTTP client for an Ollama server. - - :param base_url: Base URL of the Ollama server (e.g. "http://localhost:11434"). - :param api_key: Optional API key for authentication. - :param auth_type: Header name to use for the API key (e.g. "Authorization"). - :param timeout: Default request timeout in seconds. - """ - - def __init__(self, base_url: str, api_key: Optional[str] = None, - auth_type: Optional[str] = None, timeout: int = 10, log=None) -> None: - self.base_url = base_url.rstrip("/") - self.api_key = api_key - self.auth_type = auth_type - self.timeout = timeout - self._session = requests.Session() - self.log = log - - def _headers(self) -> dict: - """Build request headers, including auth if configured.""" - headers = {"Content-Type": "application/json"} - if self.api_key and self.auth_type: - headers[self.auth_type] = self.api_key - return headers - - def is_available(self) -> bool: - """Check if the Ollama server is reachable and responding to /api/tags.""" - try: - r = self._session.get( - f"{self.base_url}/api/tags", - headers=self._headers(), - timeout=self.timeout, - ) - if self.log and r.status_code != 200: - self.log.warning(f"OllamaClient: server responded with status code {r.status_code} during availability check: {r.text}") - return r.status_code == 200 - except requests.RequestException as e: - if self.log: - self.log.warning(f"OllamaClient: server is not available at {self.base_url}: {e}") - return False - - def list_models(self) -> list[dict]: - """List available models from the Ollama server. - - :returns: List of model dicts from ``/api/tags``, or ``[]`` on failure. - """ - try: - r = self._session.get( - f"{self.base_url}/api/tags", - headers=self._headers(), - timeout=self.timeout, - ) - if r.status_code == 200: - return r.json().get("models", []) - if self.log: - self.log.warning(f"OllamaClient: failed to list models from {self.base_url}, status code {r.status_code}: {r.text}") - except requests.RequestException as e: - if self.log: - self.log.warning(f"OllamaClient: failed to list models from {self.base_url}: {e}") - return [] - - def show_model(self, model_id: str) -> dict | None: - """Fetch full metadata for a model via ``POST /api/show``. - - :param model_id: Model name (e.g. ``"llama3:8b"``). - :returns: Parsed response dict, or ``None`` on failure. - """ - try: - r = self._session.post( - f"{self.base_url}/api/show", - headers=self._headers(), - json={"model": model_id}, - timeout=self.timeout, - ) - if r.status_code == 200: - return r.json() - if self.log: - self.log.warning(f"OllamaClient: failed to show model {model_id} from {self.base_url}, status code {r.status_code}: {r.text}") - except requests.RequestException as e: - if self.log: - self.log.warning(f"OllamaClient: failed to show model {model_id} from {self.base_url}: {e}") - return None +from common.lib.llm.llm_client import LLMProviderClient - def pull_model(self, model_id: str, stream: bool = False) -> bool: - """Pull a model from the Ollama registry. +class OllamaClient(LLMProviderClient): + type = "ollama" - :param model_id: Model name (e.g. ``"llama3:8b"``). - :param stream: Whether to stream the response (default ``False``). - :returns: ``True`` on success, ``False`` on failure. - """ - try: - r = self._session.post( - f"{self.base_url}/api/pull", - headers=self._headers(), - json={"model": model_id, "stream": stream}, - timeout=600, - ) - if r.status_code != 200 and self.log: - self.log.warning(f"OllamaClient: failed to pull model {model_id} from {self.base_url}, status code {r.status_code}: {r.text}") - return r.status_code == 200 - except requests.RequestException as e: - if self.log: - self.log.warning(f"OllamaClient: failed to pull model {model_id} from {self.base_url}: {e}") - return False - - def delete_model(self, model_id: str) -> bool: - """Delete a model from the Ollama server. - - :param model_id: Model name (e.g. ``"llama3:8b"``). - :returns: ``True`` on success, ``False`` on failure. - """ - try: - r = self._session.delete( - f"{self.base_url}/api/delete", - headers=self._headers(), - json={"model": model_id}, - timeout=30, - ) - if r.status_code != 200 and self.log: - self.log.warning(f"OllamaClient: failed to delete model {model_id} from {self.base_url}, status code {r.status_code}: {r.text}") - return r.status_code == 200 - except requests.RequestException as e: - if self.log: - self.log.warning(f"OllamaClient: failed to delete model {model_id} from {self.base_url}: {e}") - return False + _models_info_path = "/api/tags" + _models_info_key = "models" + _model_id_key = "model" - @staticmethod - def parse_supported_media_types(meta: dict | None) -> list[str]: + def parse_supported_media_types(self, meta: dict) -> list[str]: """Derive the media types a model supports from its Ollama metadata. **Primary path**: reads ``meta["capabilities"]``: @@ -188,19 +66,15 @@ def parse_supported_media_types(meta: dict | None) -> list[str]: return media_types - @staticmethod - def format_display_name(model_id: str, meta: dict | None) -> str: - """Build a human-readable display name for a model. - - Logic is identical to the legacy ``OllamaManager._format_model_display_name`` - and has been moved here so it can be shared across OllamaManager and any - other caller without importing the worker class. + def format_display_name(self, meta: dict) -> str: + """ + Build a human-readable display name for a model. - :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``). - :param meta: ``/api/show`` response dict, or ``None``. - :returns: Human-readable display name string. + :param dict meta: Model metadata + :returns str: Human-readable display name string. """ model_info = meta.get("model_info", {}) if meta else {} + model_id = self.get_global_model_id(meta) details = meta.get("details", {}) if meta else {} basename = None @@ -264,7 +138,6 @@ def _humanize(n): tag = model_id.split(":", 1)[1].strip() if ":" in model_id else None - suffix = None if tag: tl = tag.lower() if tl in ("latest", "stable", "current"): @@ -289,41 +162,58 @@ def _humanize(n): return f"{basename} ({suffix})" - @staticmethod - def build_model_entry(model_id: str, display_name: str, meta: dict | None) -> dict: - """Build a canonical ``llm.available_models`` entry for a model. + def get_model_card_url(self, meta: dict) -> str: + """ + Get a URL for a model card for a given model - :param model_id: Raw Ollama model identifier. - :param display_name: Human-readable name (from ``format_display_name``). - :param meta: ``/api/show`` response dict, or ``None`` if unavailable. - :returns: Dict ready to store under ``llm.available_models[model_id]``. + :param meta: Model metadata + :return str: Model card URL (empty string if unavailable) """ - has_meta = bool(meta) - return { - "name": display_name, - "model_card": f"https://ollama.com/library/{model_id.split(':')[0]}", - "provider": "local", - "metadata_success": has_meta, - "model_info": meta.get("model_info", {}) if has_meta else {}, - "capabilities": meta.get("capabilities", []) if has_meta else [], - "details": meta.get("details", {}) if has_meta else {}, - "modified_at": meta.get("modified_at", None) if has_meta else None, - "supported_media_types": OllamaClient.parse_supported_media_types(meta), - } + return f"https://ollama.com/library/{meta['model']}" + + def pull_model(self, model_id: str, stream: bool = False) -> bool: + """Pull a model from the Ollama registry. + + :param model_id: Model name (e.g. ``"llama3:8b"``). + :param stream: Whether to stream the response (default ``False``). + :returns: ``True`` on success, ``False`` on failure. + """ + try: + r = self._session.post( + f"{self.base_url}/api/pull", + headers=self._headers, + json={"model": model_id, "stream": stream}, + timeout=600, + ) - @classmethod - def from_config(cls, config, log=None) -> "OllamaClient": - """Instantiate an OllamaClient from 4CAT config. + if r.status_code != 200 and self.log: + self.log.warning(f"{self.__class__.__name__}: failed to pull model {model_id} from {self.base_url}, status code {r.status_code}: {r.text}") - Reads ``llm.server``, ``llm.api_key``, and ``llm.auth_type``. + return r.status_code == 200 + + except requests.RequestException as e: + if self.log: + self.log.warning(f"{self.__class__.__name__}: failed to pull model {model_id} from {self.base_url}: {e}") + + return False + + def delete_model(self, model_id: str) -> bool: + """Delete a model from the Ollama server. - :param config: A 4CAT ``ConfigWrapper`` or ``ConfigManager`` instance. - :param log: A logging instance for reporting issues. - :returns: Configured ``OllamaClient``. + :param model_id: Model name (e.g. ``"llama3:8b"``). + :returns: ``True`` on success, ``False`` on failure. """ - return cls( - base_url=config.get("llm.server", ""), - api_key=config.get("llm.api_key", "") or None, - auth_type=config.get("llm.auth_type", "") or None, - log=log, - ) + try: + r = self._session.delete( + f"{self.base_url}/api/delete", + headers=self._headers, + json={"model": model_id}, + timeout=30, + ) + if r.status_code != 200 and self.log: + self.log.warning(f"{self.__class__.__name__}: failed to delete model {model_id} from {self.base_url}, status code {r.status_code}: {r.text}") + return r.status_code == 200 + except requests.RequestException as e: + if self.log: + self.log.warning(f"{self.__class__.__name__}: failed to delete model {model_id} from {self.base_url}: {e}") + return False \ No newline at end of file diff --git a/common/lib/llm/thirdparty_client.py b/common/lib/llm/thirdparty_client.py new file mode 100644 index 000000000..e6ac5a29c --- /dev/null +++ b/common/lib/llm/thirdparty_client.py @@ -0,0 +1,56 @@ +""" +Fake 'client' to read from local store of known 3d party, API-based LLMs that +can be used with 4CAT +""" +import json + +from common.lib.llm.llm_client import LLMProviderClient + +class ThirdPartyClient(LLMProviderClient): + type = "api" + + _models_info_key = "models" + _model_id_key = "model" + + def get_status(self): + return 200 + + def list_models(self) -> dict: + with self.config.get("PATH_ROOT").joinpath("common/assets/llms.json").open() as infile: + models = json.load(infile) + + return models + + def parse_supported_media_types(self, meta: dict) -> list[str]: + """ + Derive the media types a model supports from its LiteLLM metadata. + + :param meta: ``model info`` response dict, or ``None``. + :returns: Ordered list of supported media type strings. + Returns ``[]`` when ``meta`` is ``None`` + """ + return meta.get("supported_media_types", ["text"]) + + def format_display_name(self, meta: dict) -> str: + """ + Build a human-readable display name for a model. + + :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``). + :param meta: ``/api/show`` response dict, or ``None``. + :returns: Human-readable display name string. + """ + return meta["name"] + + def build_model_entry(self, meta: dict) -> dict: + """ + Build a canonical ``llm.available_models`` entry for a model. + + :param model_id: Raw model identifier. + :param display_name: Human-readable name (from ``format_display_name``). + :param meta: ``/api/show`` response dict, or ``None`` if unavailable. + :returns: Dict ready to store under ``llm.available_models[model_id]``. + """ + entry = super().build_model_entry(meta) + entry["provider"] = meta["provider"] + + return entry \ No newline at end of file diff --git a/common/lib/llm/vllm_client.py b/common/lib/llm/vllm_client.py new file mode 100644 index 000000000..512ce0e6f --- /dev/null +++ b/common/lib/llm/vllm_client.py @@ -0,0 +1,59 @@ +""" +Centralized HTTP client for communicating with a vLLM server. + +This class owns all direct HTTP calls to vLLM's REST API and provides shared +static helpers for capability parsing, display-name formatting, and building +canonical llm.available_models entries. It is a plain helper with no 4CAT +base-class dependency. + +""" +import requests + +from common.lib.llm.llm_client import LLMProviderClient + +class VLLMClient(LLMProviderClient): + type = "vllm" + + _models_info_path = "/model/info" + _models_info_key = "data" + _model_id_key = "model" + + def parse_supported_media_types(self, meta: dict) -> list[str]: + """ + Derive the media types a model supports from its LiteLLM metadata. + + :param meta: ``model info`` response dict, or ``None``. + :returns: Ordered list of supported media type strings. + Returns ``[]`` when ``meta`` is ``None`` + """ + if meta is None or not meta.get("model_info"): + return [] + + media_types = {"text"} # far as I can tell, text is always supported + if meta["model_info"].get("supports_vision"): + media_types.add("image") + + if meta["model_info"].get("supports_audio_input"): + media_types.add("sound") + + # no way to tell if model supports embeddings input as far as I can see... + + return list(media_types) + + def format_display_name(self, meta: dict) -> str: + """ + Build a human-readable display name for a model. + + :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``). + :param meta: ``/api/show`` response dict, or ``None``. + :returns: Human-readable display name string. + """ + model_name = self.get_global_model_id(meta) + + if meta.get("model_name"): + model_name = meta["model_name"] + + if meta["litellm_params"].get("model"): + model_name = "/".join(meta["litellm_params"].get("model").split("/")[1:]) + + return model_name \ No newline at end of file diff --git a/processors/machine_learning/llm_prompter.py b/processors/machine_learning/llm_prompter.py index 26d16e46e..605dbdc21 100644 --- a/processors/machine_learning/llm_prompter.py +++ b/processors/machine_learning/llm_prompter.py @@ -13,10 +13,12 @@ from jsonschema.exceptions import ValidationError, SchemaError from datetime import datetime, timedelta +from matplotlib.style.core import available + from common.lib.item_mapping import MappedItem from common.lib.exceptions import ProcessorInterruptedException, QueryParametersException, QueryNeedsExplicitConfirmationException from common.lib.helpers import UserInput, nthify, andify, remove_nuls, flatten_dict -from common.lib.llm import LLMAdapter +from common.lib.llm.adapter import LLMAdapter from backend.lib.processor import BasicProcessor class LLMPrompter(BasicProcessor): @@ -66,20 +68,12 @@ def get_queue_id(cls, remote_id, details, dataset) -> str: @classmethod def get_options(cls, parent_dataset=None, config=None) -> dict: # Check if 4CAT wide LLM server is available - if config.get("llm.access", False) and config.get("llm.server", ""): - # Check some models enabled - shared_llm_enabled_models = config.get("llm.enabled_models", []) - shared_llm_models = {model: model_metadata.get("name") for model, model_metadata in config.get("llm.available_models", {}).items() if model in shared_llm_enabled_models} - if not shared_llm_models: - shared_llm_name = False - shared_llm_default = "" - else: - shared_llm_name = config.get("llm.host_name", "4CAT LLM Server") - shared_llm_default = list(shared_llm_models.keys())[0] if shared_llm_models else "" - else: - shared_llm_name = False - shared_llm_default = "" - shared_llm_models = {} + available_models = config.get("llm.available_models", []) + enabled_model_ids = config.get("llm.enabled_models", []) + if not config.get("llm.access"): + enabled_model_ids = [_ for _ in enabled_model_ids if _.startswith("api-")] + + enabled_models = {k: v for k, v in available_models.items() if k in enabled_model_ids} # Determine if the parent dataset is a media archive (zip with images/video/audio) is_media_parent = False @@ -94,13 +88,6 @@ def get_options(cls, parent_dataset=None, config=None) -> dict: if parent_media_type in ("video", "audio"): # Ollama and LM Studio currently only support text and image hosted_and_local_available = False - - # Add additional sources for LLM Models - api_or_local_options = {"api": "API"} - if hosted_and_local_available: - api_or_local_options["local"] = "Local" - if shared_llm_name: - api_or_local_options["hosted"] = shared_llm_name options = { "ethics_warning1": { @@ -108,21 +95,14 @@ def get_options(cls, parent_dataset=None, config=None) -> dict: "help": "Always test your prompt on a sample of rows, for instance by first using the " "Random filter processor.", }, - "api_or_local": { - "type": UserInput.OPTION_CHOICE, - "help": "Local or API", - "options": api_or_local_options, - "default": "api" if not shared_llm_name else "hosted", - "tooltip": "You can use 'local' models through Ollama and LM Studio as long as you have a valid " - "and accessible URL through which the model can be reached.", - }, - "api_model": { + "model": { "type": UserInput.OPTION_CHOICE, "help": "API model", - "options": LLMAdapter.get_model_options(config), + "options": { + model_id: model["name"] for model_id, model in enabled_models.items() + }, "default": "none", "tooltip": "Select from the predefined model list or insert manually", - "requires": "api_or_local==api", }, "api_key": { "type": UserInput.OPTION_TEXT, @@ -130,105 +110,9 @@ def get_options(cls, parent_dataset=None, config=None) -> dict: "help": "API key", "tooltip": "Create an API key on the LLM provider's website (e.g. https://admin.mistral.ai/organization" "/api-keys). Note that this often involves billing.", - "requires": "api_or_local==api", + "requires": "api_model^=api", "sensitive": True, - }, - "api_custom_model_provider": { - "type": UserInput.OPTION_CHOICE, - "help": "Model provider", - "requires": "api_model==custom", - "options": LLMAdapter.get_model_providers(config), - "tooltip": "API provider. Currently limited to this list.", - }, - "api_custom_model_id": { - "type": UserInput.OPTION_TEXT, - "help": "Model ID", - "requires": "api_model==custom", - "tooltip": "E.g. 'mistral-small-2503'. Check the API provider's documentation on what model ID to use. " - "Fine-tuned models often require more info; OpenAI for instance requires the following " - "format: ft:[modelname]:[org_id]:[custom_suffix]:", - "default": "", - }, - "local_info": { - "type": UserInput.OPTION_INFO, - "requires": "api_or_local==local", - "help": "You can use local LLMs with LM Studio, Ollama, and vLLM. These applications need to be reachable by " - "this 4CAT server, e.g. by running them on the same machine. For LM Studio and vLLM, " - "use the Base URL to interface with any OpenAI-like API endpoint.", - }, - "local_provider": { - "type": UserInput.OPTION_CHOICE, - "requires": "api_or_local==local", - "options": { - "none": "", - "lmstudio": "LM Studio", - "ollama": "Ollama", - "vllm": "vLLM", - }, - "default": "none", - "help": "Local LLM provider", - }, - "lmstudio-info": { - "type": UserInput.OPTION_INFO, - "requires": "local_provider==lmstudio", - "help": "LM Studio is a desktop application to chat with LLMs, but that you can also run as a local " - "server. See [this link for intructions on how to run LM Studio as a server](https://lmstudio.ai/docs/" - "app/api). When the server is running, the endpoint is shown in the 'Developer' tab on the top " - "right (default: `http://localhost:1234/v1` or `http://host.docker.internal:1234/v1` in Docker). " - "4CAT will use the top-most model you have loaded. ", - }, - "ollama-info": { - "type": UserInput.OPTION_INFO, - "requires": "local_provider==ollama", - "help": "Ollama is a simple command-line application that lets you interface with a range of open-" - "source LLMs and that you can run as a local server. See [this link]" - "(https://github.com/ollama/ollama/blob/main/README.md#quickstart) for instructions.", - }, - "vllm-info": { - "type": UserInput.OPTION_INFO, - "requires": "local_provider==ollama", - "help": "[vLLM](https://docs.vllm.ai/en/latest/getting_started/quickstart/) is a framework for Linux " - "systems capable of fast inference with a single LLM. Communication is done through an " - "OpenAI-like API endpoint. Just change the base URL below and insert an optional API key.", - }, - "local_base_url": { - "type": UserInput.OPTION_TEXT, - "requires": "api_or_local==local", - "default": "", - "help": "Base URL", - "tooltip": "[optional] Leaving this empty will use default values (`http://localhost:1234/v1` or `http://host.docker.internal:1234/v1` for LM " - "Studio, `http://localhost:11434` or `http://host.docker.internal:11434` for Ollama, `http://localhost:8000` or `http://host.docker.internal:8000` for vLLM ).", - }, - "lmstudio_api_key": { - "type": UserInput.OPTION_TEXT, - "default": "", - "help": "LM Studio API key", - "tooltip": "[optional] Uses `lm-studio` by default.", - "requires": "local_provider==lmstudio", - "sensitive": True, - }, - "vllm_api_key": { - "type": UserInput.OPTION_TEXT, - "default": "", - "help": "vLLM API key", - "tooltip": "[optional] Empty by default.", - "requires": "local_provider==vllm", - "sensitive": True, - }, - "ollama_model": { - "type": UserInput.OPTION_TEXT, - "requires": "local_provider==ollama", - "default": "", - "help": "Ollama model name", - "tooltip": "[required] for example 'llama3.2'", - }, - "hosted_llm_model": { - "type": UserInput.OPTION_CHOICE, - "help": "LLM model", - "options": shared_llm_models, - "default": shared_llm_default, - "requires": "api_or_local==hosted", - }, + } } if is_media_parent: @@ -431,14 +315,8 @@ def is_compatible_with(cls, module=None, config=None): return False def process(self): - self.dataset.update_status("Validating settings") - api_model = self.parameters.get("api_model") - if api_model == "none": - api_model = "" - - modal_location = self.parameters.get("api_or_local", "api") hide_think = self.parameters.get("hide_think", False) # Check if the source dataset is a media archive (zip with images/video/audio) @@ -476,74 +354,22 @@ def process(self): base_url = None client_kwargs = {} - if modal_location == "local": - provider = self.parameters.get("local_provider", "") - base_url = self.parameters.get("local_base_url", "") + # load model and providermetadata + chosen_model_id = self.parameters.get("model") + available_models = {k: v for k, v in self.config.get("llm.available_models").items() if k in self.config.get("llm.enabled_models")} + if chosen_model_id not in available_models: + return self.dataset.finish_with_error(f"Model {chosen_model_id} not supported") - if not provider: - self.dataset.finish_with_error("Choose a local model provider") - return + model = available_models[chosen_model_id] - if provider == "lmstudio": - model = "lmstudio_model" - if not base_url: - base_url = "http://127.0.0.1:1234/v1" if not self.config.get("USING_DOCKER", False) else "http://host.docker.internal:1234/v1" - if not self.parameters.get("lmstudio_api_key"): - api_key = "lm-studio" - elif provider == "ollama": - model = self.parameters.get("ollama_model", "") - if not model: - self.dataset.finish_with_error("You need to provide a model name for Ollama (e.g. 'llama3.2')") - return - if not base_url: - base_url = "http://localhost:11434" if not self.config.get("USING_DOCKER", False) else "http://host.docker.internal:11434" - elif provider == "vllm": - model = "vllm_model" - api_key = self.parameters.get("vllm_api_key", "") - if not base_url: - base_url = "http://localhost:8000/v1" - else: - self.dataset.finish_with_error("Local provider not supported, choose either lmstudio or ollama") - return + if model["provider_type"] == "api" and not api_key: + return self.dataset.finish_with_error(f"No API key provided for model {chosen_model_id}") - elif modal_location == "hosted": - base_url = self.config.get("llm.server", "") - provider = self.config.get("llm.provider_type", "none").lower() - api_key = self.config.get("llm.api_key", "") - llm_auth_type = self.config.get("llm.auth_type", "") - model = self.parameters.get("hosted_llm_model", "") - if api_key and llm_auth_type: - client_kwargs = { - "headers": { - llm_auth_type: api_key - } - } - if provider == "none" or not base_url: - self.dataset.finish_with_error("4CAT LLM server not properly configured; contact the administrator") - return - else: - if not api_model: - self.dataset.finish_with_error("Select an API model or insert one manually") - return - # Models can be set manually - if api_model == "custom": - model = self.parameters.get("api_custom_model_id", "") - provider = self.parameters.get("api_custom_model_provider", "") - if not model: - self.dataset.finish_with_error("You must provide a valid API model name/ID") - return - if not provider: - self.dataset.finish_with_error("You must provide a valid API model provider") - return - else: - model_info = LLMAdapter.get_models(self.config).get(api_model, {}) - provider = model_info.get("provider") - model = api_model + available_providers = {p["url"]: p for p in self.config.get("llm.providers")} + if model["provider"] not in available_providers: + return self.dataset.finish_with_error(f"Model provider {model['provider']} unknown") - api_key = self.parameters.get("api_key") or self.config.get(f"api.{provider}.api_key", "") - if not api_key: - self.dataset.finish_with_error("You need to provide a valid API key") - return + provider = available_providers[model["provider"]] # Prompt validation base_prompt = self.parameters.get("prompt", "") @@ -589,14 +415,13 @@ def process(self): # Start LLM self.dataset.update_status("Connecting to LLM provider") - base_url_str = "" if not base_url else f" at base URL '{base_url}'" - self.dataset.log(f"Using LLM provider '{provider}' with model '{model}'{base_url_str}") + base_url_str = "" if not provider["url"] else f" at base URL '{provider['url']}'" + self.dataset.log(f"Using LLM provider '{model['provider']}' with model '{model}'{base_url_str}") try: llm = LLMAdapter( - provider=provider, + config=self.config, model=model, api_key=api_key, - base_url=base_url, temperature=temperature, max_tokens=max_tokens, client_kwargs=client_kwargs @@ -794,7 +619,7 @@ def process(self): "prompt": prompt, "temperature": temperature, "max_tokens": max_tokens, - "model": model, + "model": model["local_id"], "time_created": datetime.fromtimestamp(time_created).strftime("%Y-%m-%d %H:%M:%S"), "time_created_utc": time_created, "batch_number": "", @@ -822,7 +647,7 @@ def process(self): for output_key, output_value in annotation_output.items(): # Skip 'signature' and 'type' annotations for Google - if provider == "google" and ( + if model["provider"] == "google" and ( output_key.endswith(".signature") or output_key.endswith(".type") ): @@ -857,7 +682,7 @@ def process(self): self.dataset.update_progress(row / max_processed) # Rate limits for different providers - if provider == "mistral": + if model["provider"] == "mistral": time.sleep(1) if limit_reached: @@ -972,10 +797,9 @@ def process(self): json_schema = self.get_json_schema_for_batch(n_batched, custom_schema=json_schema_original) # `llm` becomes a RunnableSequence when used, so we'll need to reset it here llm = LLMAdapter( - provider=provider, + config=self.config, model=model, api_key=api_key, - base_url=base_url, temperature=temperature, max_tokens=max_tokens, client_kwargs=client_kwargs @@ -990,7 +814,7 @@ def process(self): batch_str = f" and {n_batched} items batched into the prompt" if use_batches else "" self.dataset.update_status(f"Generating text at row {row:,}/" - f"{max_processed:,} with {model}{batch_str}") + f"{max_processed:,} with {model['name']}{batch_str}") # Now finally generate some text! try: response = llm.generate_text( @@ -1014,15 +838,9 @@ def process(self): self.dataset.finish_with_warning(outputs, f"Not all items processed: {e}") return - # Set model name from the response for more details - if hasattr(response, "response_metadata"): - model = response.response_metadata.get("model_name", model) - if "models/" in model: - model = model.replace("models/", "") - if not response: structured_warning = " with your specified JSON schema" if structured_output else "" - warning = f"{model} could not return text{structured_warning}. Consider editing your prompt or changing settings." + warning = f"{model['name']} could not return text{structured_warning}. Consider editing your prompt or changing settings." self.dataset.finish_with_warning(outputs, warning) return @@ -1106,7 +924,7 @@ def process(self): "prompt": prompt if not use_batches else base_prompt, # Insert dataset values if not batching "temperature": temperature, "max_tokens": max_tokens, - "model": model, + "model": model["local_id"], "time_created": datetime.fromtimestamp(time_created).strftime("%Y-%m-%d %H:%M:%S"), "time_created_utc": time_created, "batch_number": n + 1 if use_batches else "", @@ -1128,7 +946,7 @@ def process(self): for output_key, output_value in annotation_output.items(): # Skip 'signature' and 'type' annotations for Google - if provider == "google" and output_key in ("extras.signature", ".type"): + if model["provider"] == "google" and output_key in ("extras.signature", ".type"): continue annotation = { @@ -1146,7 +964,7 @@ def process(self): n_batched = 0 # Rate limits for different providers - if provider == "mistral": + if model["provider"] == "mistral": time.sleep(1) # Write annotations in batches @@ -1267,7 +1085,8 @@ def validate_query(query, request, config): :param config: :return: """ - if query["api_or_local"] == "api" and not query.get("api_key"): + is_external_api = query["model"].startswith("api-") + if is_external_api and not query.get("api_key"): raise QueryParametersException("You need to enter an API key when using third-party models.") # For media archive datasets, use_media won't be present in the query @@ -1283,7 +1102,7 @@ def validate_query(query, request, config): raise QueryParametersException("You need to insert column name(s) in the user prompt within brackets " "(e.g. '[body]' or '[timestamp, author]')") - if query["api_or_local"] == "api" and not query.get("frontend-confirm"): + if is_external_api and not query.get("frontend-confirm"): raise QueryNeedsExplicitConfirmationException("Your data will be sent to a third-party service for " "processing, which will share your data with them and is " "likely to incur costs. Do you want to continue?") diff --git a/webtool/lib/template_filters.py b/webtool/lib/template_filters.py index 81b48ad55..02e41dec6 100644 --- a/webtool/lib/template_filters.py +++ b/webtool/lib/template_filters.py @@ -415,6 +415,18 @@ def _jinja2_filter_parameter_str(url): return params +@current_app.template_filter("hostname") +def _jinja2_filter_hostname(url: str) -> str: + """ + For a URL, return the hostname + + If no hostname is found, return the original value + + :param str url: + :return str: + """ + return ural.get_hostname(url) or url + @current_app.template_filter("explorer_css") def explorer_css(datasource, scope_class="explorer-content-container"): diff --git a/webtool/static/css/stylesheet.css b/webtool/static/css/stylesheet.css index a4097952f..3e2445911 100644 --- a/webtool/static/css/stylesheet.css +++ b/webtool/static/css/stylesheet.css @@ -349,6 +349,17 @@ article.small .form-element select[multiple] { margin-left: 20px; } +.form-multi-option-wrapper .action-button:not(.hidden) { + position: absolute; + top: 0.5em; + left: 0.5em; + padding: 0 0.4em; +} + +.form-multi-option-wrapper .action-button.delete-button { + left: 2.5em; +} + .form-multi-option-wrapper li::before { content: ''; position: absolute; @@ -472,6 +483,7 @@ h2 .inline-search input, h3 .inline-search input { overflow: hidden; box-sizing: border-box; border-width: 0; + padding: 0; } .ellipsis { font-weight: bold; diff --git a/webtool/static/js/modules/multi-form.js b/webtool/static/js/modules/multi-form.js index 389f5d9a8..9ca67bd93 100644 --- a/webtool/static/js/modules/multi-form.js +++ b/webtool/static/js/modules/multi-form.js @@ -6,12 +6,12 @@ export const multiForm = { actions.className = 'multi-form-actions'; const add_button = document.createElement('button'); - add_button.className = 'add-button'; + add_button.className = 'add-button action-button'; add_button.textContent = '+'; add_button.addEventListener('click', multiForm.add_item); const delete_button = document.createElement('button'); - delete_button.className = 'delete-button'; + delete_button.className = 'delete-button action-button'; delete_button.textContent = 'x'; delete_button.addEventListener('click', multiForm.delete_item); @@ -23,8 +23,10 @@ export const multiForm = { el.querySelectorAll('li').forEach(function (el) { const el_actions = actions.cloneNode(true); el.appendChild(el_actions); - }) - }) + }); + multiForm.renumber(el); + }); + }, handle_click: function (e) { @@ -53,7 +55,6 @@ export const multiForm = { if(!confirm("Are you sure?")){ return false; } - const li = find_parent(e.target, 'li'); const ol = find_parent(e.target, 'ol.form-multi-option-wrapper'); @@ -69,9 +70,11 @@ export const multiForm = { let index = 1; parent.querySelectorAll('li').forEach(function (el) { el.setAttribute('data-multi-option-index', index); + el.querySelector('.delete-button').classList.remove('hidden'); multiForm.renumber_items(el, index); index += 1; }) + parent.querySelector('li:last-child .delete-button').classList.add('hidden'); }, renumber_items: function(parent, index) { @@ -81,7 +84,6 @@ export const multiForm = { return; } for(const attribute of attributes) { - console.log(child); if(child.hasAttribute(attribute)) { child.setAttribute(attribute, child.getAttribute(attribute).replace(/-[0-9+]-/, `-${index}-`)); } diff --git a/webtool/templates/controlpanel/llm-server.html b/webtool/templates/controlpanel/llm-server.html index 22a5573dd..240ebae69 100644 --- a/webtool/templates/controlpanel/llm-server.html +++ b/webtool/templates/controlpanel/llm-server.html @@ -21,30 +21,37 @@

LLM Server

- - + + + {% if not providers %} - - + + {% else %} + {% for provider in providers %} - + + {% endfor %} + {% endif %}
SettingValueServerStatus
Server URL{% if llm_server %}{{ llm_server }}{% else %}Not configured{% endif %} + No LLM providers configured. You can add and configure servers via the 'LLM' tab on the Settings page. +
Status{{ provider.name }} - {% if server_status == "online" %} + {% if provider.status == "online" %} Online - {% elif server_status == "not configured" %} + {% elif provider.status == "not configured" %} Not configured {% else %} - {{ server_status }} + {{ provider.status }} {% endif %}
{# Available models #} -

Available Models +

+ Available Models
@@ -57,10 +64,11 @@

Available Models + - Model + Provider/model Display name Capabilities Status @@ -70,7 +78,8 @@

Available Models {% for model_id, model in available_models.items() %} - {{ model_id }} + {{ model.provider_type }}/{{ model.provider|hostname }} + {{ model.local_id }} {% if model.model_card %} @@ -80,7 +89,7 @@

Available Models {% endif %} - {{ model.capabilities | join(", ") }} + {{ model.supported_media_types | join(", ") }} {% if model_id in enabled_models %} @@ -103,6 +112,7 @@

Available Models {% endif %} + {% if model.provider_type == "ollama" %}
@@ -110,16 +120,17 @@

Available Models Delete + {% endif %} {% endfor %} {% else %} - - {% if llm_server %} + + {% if providers %} No models found. Use the Refresh button to fetch available models, or pull a new model below. {% else %} - Configure the LLM server URL in Settings first. + No LLM providers configured. {% endif %} @@ -128,15 +139,28 @@

Available Models

- {# Pull a new model #} - {% if llm_server %} + {# Pull a new model, if an ollama server is configured #} + {% if providers|selectattr("type", "equalto", "ollama")|list %}
-

Pull Model

-

Enter an Ollama model name (e.g. llama3:8b) to pull it from the - Ollama library. - Pulling large models may take several minutes; the job runs in the background.

+

Install new LLMs

+

Enter a model name (e.g. llama3:8b) to make it available via the configured provider. For + Ollama, model names can be found in + model library.

+

Pulling large models may take several minutes; the job runs in the background. Note that 4CAT cannot install + models for all LLM providers; if your provider is not listed below, it may not be able to add additional + models to it, or you may need to use an external tool to add new models.

+
+ + +
diff --git a/webtool/views/views_llm.py b/webtool/views/views_llm.py index 41f9df472..7f32117c1 100644 --- a/webtool/views/views_llm.py +++ b/webtool/views/views_llm.py @@ -9,6 +9,7 @@ from flask_login import login_required from webtool.lib.helpers import setting_required, error +from common.lib.llm.llm_client import LLMProviderClient component = Blueprint("llm", __name__) @@ -21,26 +22,30 @@ def llm_panel(): LLM Server management panel Shows server status, available models, and controls to pull/delete/refresh - models. Pull, delete, and refresh operations are queued as OllamaManager + models. Pull, delete, and refresh operations are queued as LLMProviderManager jobs rather than run synchronously. """ if not g.config.get("llm.access"): return error(403, message="LLM access is not enabled on this server.") + providers = g.config.get("llm.providers", []) + if request.method == "POST": action = request.form.get("action", "").strip() + provider = request.form.get("provider", "").strip() + details = {"provider": provider} if provider else {} if action == "refresh": # Queue a one-time manual refresh job; use a timestamp-based remote_id # so it is always accepted even if a periodic job already exists. - g.queue.add_job("manage-ollama", details={"task": "refresh"}, - remote_id=f"manage-ollama-manual-{int(time.time())}") + g.queue.add_job("manage-llm", details={**details, "task": "refresh"}, + remote_id=f"manage-llm-manual-{int(time.time())}") flash("Model refresh job queued.") elif action == "pull": model_name = request.form.get("model_name", "").strip() if model_name: - g.queue.add_job("manage-ollama", details={"task": "pull"}, remote_id=model_name) + g.queue.add_job("manage-llm", details={**details, "task": "pull"}, remote_id=model_name) flash(f"Pull job queued for model '{model_name}'.") else: flash("Please provide a model name to pull.") @@ -48,7 +53,7 @@ def llm_panel(): elif action == "delete": model_name = request.form.get("model_name", "").strip() if model_name: - g.queue.add_job("manage-ollama", details={"task": "delete"}, remote_id=model_name) + g.queue.add_job("manage-llm", details={**details, "task": "delete"}, remote_id=model_name) flash(f"Delete job queued for model '{model_name}'.") elif action == "enable": @@ -73,23 +78,15 @@ def llm_panel(): # --- GET: render panel --- - llm_server = g.config.get("llm.server", "") - server_status = "not configured" + for i, provider in enumerate(providers): + client = LLMProviderClient.get_client(g.config, provider) - if llm_server: - headers = {"Content-Type": "application/json"} - llm_api_key = g.config.get("llm.api_key", "") - llm_auth_type = g.config.get("llm.auth_type", "") - if llm_api_key and llm_auth_type: - headers[llm_auth_type] = llm_api_key + if provider_status := client.get_status(): + server_status = "online" if provider_status == 200 else f"error (HTTP {provider_status})" + else: + server_status = "unreachable" - try: - resp = requests.get(f"{llm_server}/api/tags", headers=headers, timeout=5) - server_status = "online" if resp.status_code == 200 else f"error (HTTP {resp.status_code})" - except requests.Timeout: - server_status = "unreachable (timeout)" - except requests.RequestException as e: - server_status = f"unreachable ({e})" + providers[i]["status"] = server_status available_models = g.config.get("llm.available_models", {}) or {} enabled_models = list(g.config.get("llm.enabled_models", []) or []) @@ -97,8 +94,7 @@ def llm_panel(): return render_template( "controlpanel/llm-server.html", flashes=get_flashed_messages(), - llm_server=llm_server, - server_status=server_status, + providers=providers, available_models=available_models, enabled_models=enabled_models, ) From a6ecbc21afd54590b89178db356977ef92e0da04 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Thu, 21 May 2026 17:02:29 +0200 Subject: [PATCH 16/44] Formatting --- common/lib/config_definition.py | 13 ++++++++-- common/lib/llm/adapter.py | 24 +++++++++---------- common/lib/llm/clients/__init__.py | 0 .../lib/llm/{ => clients}/litellm_client.py | 2 -- .../lib/llm/{ => clients}/lmstudio_client.py | 6 ++--- common/lib/llm/{ => clients}/ollama_client.py | 15 ++++++++---- .../llm/{ => clients}/thirdparty_client.py | 3 ++- common/lib/llm/{ => clients}/vllm_client.py | 5 ++-- common/lib/llm/llm_client.py | 8 +++---- 9 files changed, 43 insertions(+), 33 deletions(-) create mode 100644 common/lib/llm/clients/__init__.py rename common/lib/llm/{ => clients}/litellm_client.py (99%) rename common/lib/llm/{ => clients}/lmstudio_client.py (98%) rename common/lib/llm/{ => clients}/ollama_client.py (91%) rename common/lib/llm/{ => clients}/thirdparty_client.py (98%) rename common/lib/llm/{ => clients}/vllm_client.py (97%) diff --git a/common/lib/config_definition.py b/common/lib/config_definition.py index e5c2a726f..c0ad5f990 100644 --- a/common/lib/config_definition.py +++ b/common/lib/config_definition.py @@ -581,7 +581,15 @@ }, "llm.providers": { "type": UserInput.OPTION_MULTI_OPTION, - "default": [], + "default": [ + { + "name": "Third-party APIs (OpenAI, Google, Claude, Mistral, etc)", + "type": "api", + "url": "", + "auth_header": "", + "auth_key": "" + } + ], "global": True, "help": "LLM providers", "options": { @@ -598,7 +606,8 @@ "options": { "ollama": "Ollama", "litellm": "LiteLLM", - "api": "Third-party models via APIs (OpenAI, Mistral, etc)", + "lmstudio": "LM Studio", + "vllm": "vLLM", "none": "None", }, }, diff --git a/common/lib/llm/adapter.py b/common/lib/llm/adapter.py index 5a3ece227..0c372f30d 100644 --- a/common/lib/llm/adapter.py +++ b/common/lib/llm/adapter.py @@ -5,7 +5,6 @@ from pathlib import Path from typing import List, Optional, Union -from langchain_community.chat_models import ChatLiteLLM from pydantic import SecretStr from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage from langchain_core.language_models.chat_models import BaseChatModel @@ -141,7 +140,8 @@ def generate_text( lc_messages = messages kwargs = {"temperature": temperature} - if self.provider["type"] in ("google", "ollama") or "o3" in self.model["local_id"] or "gpt-5" in self.model["local_id"]: + if self.provider["type"] in ("google", "ollama") or "o3" in self.model["local_id"] or "gpt-5" in self.model[ + "local_id"]: kwargs = {} try: @@ -152,10 +152,10 @@ def generate_text( return response def create_multimodal_content( - self, - text: str, - media_urls: Optional[List[str]] = None, - media_files: Optional[List[Union[str, Path]]] = None, + self, + text: str, + media_urls: Optional[List[str]] = None, + media_files: Optional[List[Union[str, Path]]] = None, ) -> List[dict]: """ Create multimodal content structure for LangChain messages with media URLs @@ -204,11 +204,11 @@ def create_multimodal_content( return content def _format_media_block( - self, - url: Optional[str] = None, - b64_data: Optional[str] = None, - mime_type: str = "image/jpeg", - media_category: str = "image", + self, + url: Optional[str] = None, + b64_data: Optional[str] = None, + mime_type: str = "image/jpeg", + media_category: str = "image", ) -> dict: """ Format a single media block for the appropriate provider. @@ -294,4 +294,4 @@ def get_models(config) -> dict: """ available_models = config.get("llm.available_models", {}) enabled_models = config.get("llm.enabled_models", {}) - return {k: v for k, v in available_models.items() if k in enabled_models} \ No newline at end of file + return {k: v for k, v in available_models.items() if k in enabled_models} diff --git a/common/lib/llm/clients/__init__.py b/common/lib/llm/clients/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/common/lib/llm/litellm_client.py b/common/lib/llm/clients/litellm_client.py similarity index 99% rename from common/lib/llm/litellm_client.py rename to common/lib/llm/clients/litellm_client.py index f6f7d59ff..0da9dc682 100644 --- a/common/lib/llm/litellm_client.py +++ b/common/lib/llm/clients/litellm_client.py @@ -10,8 +10,6 @@ LiteLLM itself is mostly OpenAI API-compatible, this can be used to interface with the OpenAI API as well. """ -import requests - from common.lib.llm.llm_client import LLMProviderClient class LiteLLMClient(LLMProviderClient): diff --git a/common/lib/llm/lmstudio_client.py b/common/lib/llm/clients/lmstudio_client.py similarity index 98% rename from common/lib/llm/lmstudio_client.py rename to common/lib/llm/clients/lmstudio_client.py index 5ceb99c27..b7b0906dc 100644 --- a/common/lib/llm/lmstudio_client.py +++ b/common/lib/llm/clients/lmstudio_client.py @@ -10,10 +10,9 @@ LiteLLM itself is mostly OpenAI API-compatible, this can be used to interface with the OpenAI API as well. """ -import requests - from common.lib.llm.llm_client import LLMProviderClient + class LMStudioClient(LLMProviderClient): type = "lmstudio" @@ -63,7 +62,6 @@ def format_display_name(self, meta: dict) -> str: if "params_string" in meta: extra_bits.append(meta["params_string"]) - model_name += f" {', '.join(extra_bits)}" - return model_name \ No newline at end of file + return model_name diff --git a/common/lib/llm/ollama_client.py b/common/lib/llm/clients/ollama_client.py similarity index 91% rename from common/lib/llm/ollama_client.py rename to common/lib/llm/clients/ollama_client.py index 096249c1a..3064c1173 100644 --- a/common/lib/llm/ollama_client.py +++ b/common/lib/llm/clients/ollama_client.py @@ -11,6 +11,7 @@ from common.lib.llm.llm_client import LLMProviderClient + class OllamaClient(LLMProviderClient): type = "ollama" @@ -187,13 +188,15 @@ def pull_model(self, model_id: str, stream: bool = False) -> bool: ) if r.status_code != 200 and self.log: - self.log.warning(f"{self.__class__.__name__}: failed to pull model {model_id} from {self.base_url}, status code {r.status_code}: {r.text}") + self.log.warning( + f"{self.__class__.__name__}: failed to pull model {model_id} from {self.base_url}, status code {r.status_code}: {r.text}") return r.status_code == 200 except requests.RequestException as e: if self.log: - self.log.warning(f"{self.__class__.__name__}: failed to pull model {model_id} from {self.base_url}: {e}") + self.log.warning( + f"{self.__class__.__name__}: failed to pull model {model_id} from {self.base_url}: {e}") return False @@ -211,9 +214,11 @@ def delete_model(self, model_id: str) -> bool: timeout=30, ) if r.status_code != 200 and self.log: - self.log.warning(f"{self.__class__.__name__}: failed to delete model {model_id} from {self.base_url}, status code {r.status_code}: {r.text}") + self.log.warning( + f"{self.__class__.__name__}: failed to delete model {model_id} from {self.base_url}, status code {r.status_code}: {r.text}") return r.status_code == 200 except requests.RequestException as e: if self.log: - self.log.warning(f"{self.__class__.__name__}: failed to delete model {model_id} from {self.base_url}: {e}") - return False \ No newline at end of file + self.log.warning( + f"{self.__class__.__name__}: failed to delete model {model_id} from {self.base_url}: {e}") + return False diff --git a/common/lib/llm/thirdparty_client.py b/common/lib/llm/clients/thirdparty_client.py similarity index 98% rename from common/lib/llm/thirdparty_client.py rename to common/lib/llm/clients/thirdparty_client.py index e6ac5a29c..8d2f48602 100644 --- a/common/lib/llm/thirdparty_client.py +++ b/common/lib/llm/clients/thirdparty_client.py @@ -6,6 +6,7 @@ from common.lib.llm.llm_client import LLMProviderClient + class ThirdPartyClient(LLMProviderClient): type = "api" @@ -53,4 +54,4 @@ def build_model_entry(self, meta: dict) -> dict: entry = super().build_model_entry(meta) entry["provider"] = meta["provider"] - return entry \ No newline at end of file + return entry diff --git a/common/lib/llm/vllm_client.py b/common/lib/llm/clients/vllm_client.py similarity index 97% rename from common/lib/llm/vllm_client.py rename to common/lib/llm/clients/vllm_client.py index 512ce0e6f..3c218d3af 100644 --- a/common/lib/llm/vllm_client.py +++ b/common/lib/llm/clients/vllm_client.py @@ -7,10 +7,9 @@ base-class dependency. """ -import requests - from common.lib.llm.llm_client import LLMProviderClient + class VLLMClient(LLMProviderClient): type = "vllm" @@ -56,4 +55,4 @@ def format_display_name(self, meta: dict) -> str: if meta["litellm_params"].get("model"): model_name = "/".join(meta["litellm_params"].get("model").split("/")[1:]) - return model_name \ No newline at end of file + return model_name diff --git a/common/lib/llm/llm_client.py b/common/lib/llm/llm_client.py index b31968399..08485fc75 100644 --- a/common/lib/llm/llm_client.py +++ b/common/lib/llm/llm_client.py @@ -29,10 +29,10 @@ def get_client(config, provider_config: dict) -> "LLMProviderClient": :return LLMProviderClient: """ # in-line import because we otherwise get circular import shenanigans - from common.lib.llm.ollama_client import OllamaClient - from common.lib.llm.litellm_client import LiteLLMClient - from common.lib.llm.lmstudio_client import LMStudioClient - from common.lib.llm.thirdparty_client import ThirdPartyClient + from common.lib.llm.clients.ollama_client import OllamaClient + from common.lib.llm.clients.litellm_client import LiteLLMClient + from common.lib.llm.clients.lmstudio_client import LMStudioClient + from common.lib.llm.clients.thirdparty_client import ThirdPartyClient for client_type in (OllamaClient, LiteLLMClient, LMStudioClient, ThirdPartyClient): if client_type.type == provider_config["type"]: From b9b3d0a61409ee1c5f84c4d2c9b137427d00d558 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Thu, 21 May 2026 17:04:26 +0200 Subject: [PATCH 17/44] ruff --- backend/workers/llm_manager.py | 2 -- processors/machine_learning/llm_prompter.py | 3 --- webtool/views/views_llm.py | 2 -- 3 files changed, 7 deletions(-) diff --git a/backend/workers/llm_manager.py b/backend/workers/llm_manager.py index 3b279de36..398b835ca 100644 --- a/backend/workers/llm_manager.py +++ b/backend/workers/llm_manager.py @@ -4,8 +4,6 @@ from backend.lib.worker import BasicWorker from common.lib.llm.llm_client import LLMProviderClient -import json - class LLMProviderManager(BasicWorker): """ Manages LLM models diff --git a/processors/machine_learning/llm_prompter.py b/processors/machine_learning/llm_prompter.py index 605dbdc21..1ffe2cf98 100644 --- a/processors/machine_learning/llm_prompter.py +++ b/processors/machine_learning/llm_prompter.py @@ -13,8 +13,6 @@ from jsonschema.exceptions import ValidationError, SchemaError from datetime import datetime, timedelta -from matplotlib.style.core import available - from common.lib.item_mapping import MappedItem from common.lib.exceptions import ProcessorInterruptedException, QueryParametersException, QueryNeedsExplicitConfirmationException from common.lib.helpers import UserInput, nthify, andify, remove_nuls, flatten_dict @@ -351,7 +349,6 @@ def process(self): # Set all variables through which we can reach the LLM api_key = "" - base_url = None client_kwargs = {} # load model and providermetadata diff --git a/webtool/views/views_llm.py b/webtool/views/views_llm.py index 7f32117c1..863d7e9fe 100644 --- a/webtool/views/views_llm.py +++ b/webtool/views/views_llm.py @@ -3,8 +3,6 @@ """ import time -import requests - from flask import Blueprint, render_template, flash, get_flashed_messages, redirect, url_for, request, g from flask_login import login_required From bdf07e2d6449b58b3d4f239b1d8d03588ae1d0e1 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Thu, 21 May 2026 17:56:09 +0200 Subject: [PATCH 18/44] Reshuffle OpenAI-related clients --- common/lib/config_definition.py | 6 +- common/lib/llm/adapter.py | 13 +- common/lib/llm/clients/__init__.py | 1 + .../{lmstudio_client.py => openai_client.py} | 24 +- common/lib/llm/clients/vllm_client.py | 58 --- common/lib/llm/llm_client.py | 10 +- processors/machine_learning/llm_prompter.py | 338 +++++++++--------- 7 files changed, 197 insertions(+), 253 deletions(-) rename common/lib/llm/clients/{lmstudio_client.py => openai_client.py} (74%) delete mode 100644 common/lib/llm/clients/vllm_client.py diff --git a/common/lib/config_definition.py b/common/lib/config_definition.py index c0ad5f990..7cf0c6483 100644 --- a/common/lib/config_definition.py +++ b/common/lib/config_definition.py @@ -595,7 +595,7 @@ "options": { "name": { "type": UserInput.OPTION_TEXT, - "default": "4CAT LLM Server", + "default": "", "help": "Name of LLM Server in UI", "tooltip": "The name that will be shown to users in the interface when selecting an LLM server (or API or custom).", }, @@ -606,8 +606,8 @@ "options": { "ollama": "Ollama", "litellm": "LiteLLM", - "lmstudio": "LM Studio", - "vllm": "vLLM", + "openai-like": "OpenAI compatible API (LM Studio, vLLM, etc)", + "api": "Third-party models from OpenAI, Anthropic, Mistral, etc", "none": "None", }, }, diff --git a/common/lib/llm/adapter.py b/common/lib/llm/adapter.py index 0c372f30d..9fe80eb49 100644 --- a/common/lib/llm/adapter.py +++ b/common/lib/llm/adapter.py @@ -61,7 +61,6 @@ def _load_llm(self) -> BaseChatModel: if self.provider["type"] == "openai": if "o3" in self.model: del chat_params["temperature"] - chat_params["base_url"] = self.provider["url"] or "https://api.openai.com/v1" adapter_class = ChatOpenAI elif self.provider["type"] == "google": @@ -82,8 +81,11 @@ def _load_llm(self) -> BaseChatModel: adapter_class = ChatOllama chat_params.update({"client_kwargs": self.client_kwargs}) - elif self.provider["type"] == "litellm": - adapter_class = ChatOpenAI + elif self.provider["type"] in {"litellm", "openai-like"}: + url = f"{self.provider['url']}/" if not self.provider["url"].endswith("/") else self.provider['url'] + url += "v1/" if not url.endswith("v1/") else "" + + chat_params.update({"base_url": url}) if self.provider["auth_header"]: chat_params.update({ "default_headers": { @@ -91,11 +93,6 @@ def _load_llm(self) -> BaseChatModel: } }) - elif self.provider["type"] in {"vllm", "lmstudio", "litellm"}: - # OpenAI-compatible local servers - if self.provider == "lmstudio" and not self.api_key: - self.api_key = "lm-studio" - adapter_class = ChatOpenAI else: diff --git a/common/lib/llm/clients/__init__.py b/common/lib/llm/clients/__init__.py index e69de29bb..4287ca861 100644 --- a/common/lib/llm/clients/__init__.py +++ b/common/lib/llm/clients/__init__.py @@ -0,0 +1 @@ +# \ No newline at end of file diff --git a/common/lib/llm/clients/lmstudio_client.py b/common/lib/llm/clients/openai_client.py similarity index 74% rename from common/lib/llm/clients/lmstudio_client.py rename to common/lib/llm/clients/openai_client.py index b7b0906dc..fa04446c0 100644 --- a/common/lib/llm/clients/lmstudio_client.py +++ b/common/lib/llm/clients/openai_client.py @@ -1,20 +1,16 @@ """ -Centralized HTTP client for communicating with a LiteLLM server. +Centralized HTTP client for communicating with an OpenAI compatible server. -This class owns all direct HTTP calls to LiteLLM's REST API and provides shared +This class owns all direct HTTP calls to an OpenAI style REST API and provides shared static helpers for capability parsing, display-name formatting, and building canonical llm.available_models entries. It is a plain helper with no 4CAT base-class dependency. - -This class is primarily intended for interfacing with LiteLLM, but since -LiteLLM itself is mostly OpenAI API-compatible, this can be used to interface -with the OpenAI API as well. """ from common.lib.llm.llm_client import LLMProviderClient class LMStudioClient(LLMProviderClient): - type = "lmstudio" + type = "openai-like" _models_info_path = "/api/v1/models" _models_info_key = "models" @@ -28,16 +24,14 @@ def parse_supported_media_types(self, meta: dict) -> list[str]: :returns: Ordered list of supported media type strings. Returns ``[]`` when ``meta`` is ``None`` """ + media_types = {"text"} # far as I can tell, text is always supported + if meta is None or not meta.get("capabilities"): - return [] + return list(media_types) - media_types = {"text"} # far as I can tell, text is always supported if meta["capabilities"].get("vision"): media_types.add("image") - if meta["model_info"].get("supports_audio_input"): - media_types.add("sound") - # no way to tell if model supports embeddings input as far as I can see... return list(media_types) @@ -56,12 +50,12 @@ def format_display_name(self, meta: dict) -> str: model_name = meta["display_name"] extra_bits = [] - if "publisher" in meta: + if meta.get("publisher"): extra_bits.append(meta["publisher"]) - if "params_string" in meta: + if meta.get("params_string"): extra_bits.append(meta["params_string"]) - model_name += f" {', '.join(extra_bits)}" + model_name += f" ({', '.join(extra_bits)})" return model_name diff --git a/common/lib/llm/clients/vllm_client.py b/common/lib/llm/clients/vllm_client.py deleted file mode 100644 index 3c218d3af..000000000 --- a/common/lib/llm/clients/vllm_client.py +++ /dev/null @@ -1,58 +0,0 @@ -""" -Centralized HTTP client for communicating with a vLLM server. - -This class owns all direct HTTP calls to vLLM's REST API and provides shared -static helpers for capability parsing, display-name formatting, and building -canonical llm.available_models entries. It is a plain helper with no 4CAT -base-class dependency. - -""" -from common.lib.llm.llm_client import LLMProviderClient - - -class VLLMClient(LLMProviderClient): - type = "vllm" - - _models_info_path = "/model/info" - _models_info_key = "data" - _model_id_key = "model" - - def parse_supported_media_types(self, meta: dict) -> list[str]: - """ - Derive the media types a model supports from its LiteLLM metadata. - - :param meta: ``model info`` response dict, or ``None``. - :returns: Ordered list of supported media type strings. - Returns ``[]`` when ``meta`` is ``None`` - """ - if meta is None or not meta.get("model_info"): - return [] - - media_types = {"text"} # far as I can tell, text is always supported - if meta["model_info"].get("supports_vision"): - media_types.add("image") - - if meta["model_info"].get("supports_audio_input"): - media_types.add("sound") - - # no way to tell if model supports embeddings input as far as I can see... - - return list(media_types) - - def format_display_name(self, meta: dict) -> str: - """ - Build a human-readable display name for a model. - - :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``). - :param meta: ``/api/show`` response dict, or ``None``. - :returns: Human-readable display name string. - """ - model_name = self.get_global_model_id(meta) - - if meta.get("model_name"): - model_name = meta["model_name"] - - if meta["litellm_params"].get("model"): - model_name = "/".join(meta["litellm_params"].get("model").split("/")[1:]) - - return model_name diff --git a/common/lib/llm/llm_client.py b/common/lib/llm/llm_client.py index 08485fc75..b59d38ab0 100644 --- a/common/lib/llm/llm_client.py +++ b/common/lib/llm/llm_client.py @@ -31,7 +31,7 @@ def get_client(config, provider_config: dict) -> "LLMProviderClient": # in-line import because we otherwise get circular import shenanigans from common.lib.llm.clients.ollama_client import OllamaClient from common.lib.llm.clients.litellm_client import LiteLLMClient - from common.lib.llm.clients.lmstudio_client import LMStudioClient + from common.lib.llm.clients.openai_client import LMStudioClient from common.lib.llm.clients.thirdparty_client import ThirdPartyClient for client_type in (OllamaClient, LiteLLMClient, LMStudioClient, ThirdPartyClient): @@ -51,11 +51,17 @@ def __init__(self, config, provider_config: dict, timeout: int = 10, log=None) - self.config = config self._meta = provider_config - self.base_url = provider_config["url"].rstrip("/") + + self.timeout = timeout self.auth_type = provider_config.get("auth_header") self.auth_key = provider_config.get("auth_key") self.timeout = timeout + self.base_url = provider_config["url"].rstrip("/") + if self.base_url.endswith("v1"): + # get rid of the 'v1' - we'll add this in the path + self.base_url = f"{self.base_url[:-2]}" + self._session = requests.Session() self._headers = {"Content-Type": "application/json"} diff --git a/processors/machine_learning/llm_prompter.py b/processors/machine_learning/llm_prompter.py index 1ffe2cf98..9f174f148 100644 --- a/processors/machine_learning/llm_prompter.py +++ b/processors/machine_learning/llm_prompter.py @@ -76,16 +76,12 @@ def get_options(cls, parent_dataset=None, config=None) -> dict: # Determine if the parent dataset is a media archive (zip with images/video/audio) is_media_parent = False media_type = "media" - hosted_and_local_available = True if parent_dataset: parent_extension = parent_dataset.get_extension() parent_media_type = parent_dataset.get_media_type() if parent_extension == "zip" and parent_media_type in ("image", "video", "audio"): is_media_parent = True media_type = parent_media_type - if parent_media_type in ("video", "audio"): - # Ollama and LM Studio currently only support text and image - hosted_and_local_available = False options = { "ethics_warning1": { @@ -115,179 +111,187 @@ def get_options(cls, parent_dataset=None, config=None) -> dict: if is_media_parent: # Media-specific options: show info about media files being attached - options["media_info"] = { - "type": UserInput.OPTION_INFO, - "help": f"The parent dataset contains {media_type} files that will be sent " - f"to the LLM with each prompt. Make sure to use a model that supports " - f"{media_type} input (e.g. vision models for images).
" - f"Not all models support all media types. If the model cannot process " - f"{media_type} files, an error will be returned during processing.", - } - options["system_prompt"] = { - "type": UserInput.OPTION_TEXT_LARGE, - "help": "System prompt", - "tooltip": "[optional] A system prompt can be used to give the LLM general instructions, for instance " - "on the tone of the text. This processor may edit the system prompt to " - "ensure correct output. System prompts are included in the results file.", - "default": "", - } - options["prompt"] = { - "type": UserInput.OPTION_TEXT_LARGE, - "help": "User prompt", - "tooltip": f"Describe what the model should do with each {media_type} file. " - f"No column brackets needed — {media_type} files are attached automatically.", - "default": "", - } + options.update({ + "media_info": { + "type": UserInput.OPTION_INFO, + "help": f"The parent dataset contains {media_type} files that will be sent " + f"to the LLM with each prompt. Make sure to use a model that supports " + f"{media_type} input (e.g. vision models for images).
" + f"Not all models support all media types. If the model cannot process " + f"{media_type} files, an error will be returned during processing.", + }, + "system_prompt": { + "type": UserInput.OPTION_TEXT_LARGE, + "help": "System prompt", + "tooltip": "[optional] A system prompt can be used to give the LLM general instructions, for instance " + "on the tone of the text. This processor may edit the system prompt to " + "ensure correct output. System prompts are included in the results file.", + "default": "", + }, + "prompt": { + "type": UserInput.OPTION_TEXT_LARGE, + "help": "User prompt", + "tooltip": f"Describe what the model should do with each {media_type} file. " + f"No column brackets needed — {media_type} files are attached automatically.", + "default": "", + } + }) + else: - # Text-based dataset options: column brackets, media URL toggle, batching - options["prompt_info"] = { + options.update({ + # Text-based dataset options: column brackets, media URL toggle, batching + "prompt_info": { + "type": UserInput.OPTION_INFO, + "help": "How to prompt
" + "Use `[brackets]` with column names to insert dataset items in the prompt. You " + "can place column brackets in different parts of the prompt or use multiple column names within" + ' a single column bracket to merge items.
Example 1: "Describe the topic ' + 'of this social media post in max. 3 words: `[body, tags]`"
Example 2: ' + "\"Given the following hashtags: `[tags]`, answer whether they are 'related' or 'unrelated' " + 'to the following text: `[body]`"
Prompting is a delicate art. See ' + "processor references on best prompting practices.
For predefined research prompts, see " + "e.g. [Prompt Compass](https://github.com/ErikBorra/PromptCompass/blob/main/prompts.json#L136) " + "or the [Anthropic Prompt Library](https://docs.anthropic.com/en/resources/prompt-library/" + "library).", + }, + "system_prompt": { + "type": UserInput.OPTION_TEXT_LARGE, + "help": "System prompt", + "tooltip": "[optional] A system prompt can be used to give the LLM general instructions, for instance " + "on the tone of the text. This processor may edit the system prompt to " + "ensure correct output. System prompts are included in the results file.", + "default": "", + }, + "prompt": { + "type": UserInput.OPTION_TEXT_LARGE, + "help": "User prompt", + "tooltip": "Use [brackets] with columns names.", + "default": "", + }, + "use_media": { + "type": UserInput.OPTION_TOGGLE, + "help": "Add images", + "tooltip": "Add media URLs for multi-modal processing. Requires a model that supports vision.", + "default": False, + }, + "media_columns": { + "type": UserInput.OPTION_TEXT, + "help": "Columns with image URL(s)", + "default": "", + "inline": True, + "tooltip": "Multiple columns can be selected.", + "requires": "use_media==true", + } + }) + + # Common options for both text and media datasets + options.update({ + "structured_output": { + "type": UserInput.OPTION_TOGGLE, + "help": "Output structured JSON", + "tooltip": "Output in a JSON format instead of text. Note that your chosen model may not support " + "structured output.", + "default": False, + }, + "json_schema_info": { "type": UserInput.OPTION_INFO, - "help": "How to prompt
" - "Use `[brackets]` with column names to insert dataset items in the prompt. You " - "can place column brackets in different parts of the prompt or use multiple column names within" - ' a single column bracket to merge items.
Example 1: "Describe the topic ' - 'of this social media post in max. 3 words: `[body, tags]`"
Example 2: ' - "\"Given the following hashtags: `[tags]`, answer whether they are 'related' or 'unrelated' " - 'to the following text: `[body]`"
Prompting is a delicate art. See ' - "processor references on best prompting practices.
For predefined research prompts, see " - "e.g. [Prompt Compass](https://github.com/ErikBorra/PromptCompass/blob/main/prompts.json#L136) " - "or the [Anthropic Prompt Library](https://docs.anthropic.com/en/resources/prompt-library/" - "library).", - } - options["system_prompt"] = { - "type": UserInput.OPTION_TEXT_LARGE, - "help": "System prompt", - "tooltip": "[optional] A system prompt can be used to give the LLM general instructions, for instance " - "on the tone of the text. This processor may edit the system prompt to " - "ensure correct output. System prompts are included in the results file.", - "default": "", - } - options["prompt"] = { + "help": "Insert a JSON Schema for structured outputs. These define the output that " + "the LLM will adhere to. [See instructions and examples on how to write a JSON Schema]" + "(https://json-schema.org/learn/miscellaneous-examples) and [OpenAI's documentation]" + "(https://platform.openai.com/docs/guides/structured-outputs?api-mode=chat#supported-schemas).", + "requires": "structured_output==true", + }, + "json_schema": { "type": UserInput.OPTION_TEXT_LARGE, - "help": "User prompt", - "tooltip": "Use [brackets] with columns names.", + "help": "JSON schema", + "tooltip": "[required] A JSON schema that the structured output will adhere to", + "requires": "structured_output==true", "default": "", - } - options["use_media"] = { - "type": UserInput.OPTION_TOGGLE, - "help": "Add images", - "tooltip": "Add media URLs for multi-modal processing. Requires a model that supports vision.", - "default": False, - } - options["media_columns"] = { + }, + "temperature": { "type": UserInput.OPTION_TEXT, - "help": "Columns with image URL(s)", - "default": "", - "inline": True, - "tooltip": "Multiple columns can be selected.", - "requires": "use_media==true", + "help": "Temperature", + "default": 0.1, + "coerce_type": float, + "max": 2.0, + "tooltip": "Temperature indicates how strict the model will gravitate towards the most " + "probable next token. A score close to 0 returns more predictable " + "outputs while a score close to 1 leads to more creative outputs. Not supported by all models.", } - - # Common options for both text and media datasets - options["structured_output"] = { - "type": UserInput.OPTION_TOGGLE, - "help": "Output structured JSON", - "tooltip": "Output in a JSON format instead of text. Note that your chosen model may not support " - "structured output.", - "default": False, - } - options["json_schema_info"] = { - "type": UserInput.OPTION_INFO, - "help": "Insert a JSON Schema for structured outputs. These define the output that " - "the LLM will adhere to. [See instructions and examples on how to write a JSON Schema]" - "(https://json-schema.org/learn/miscellaneous-examples) and [OpenAI's documentation]" - "(https://platform.openai.com/docs/guides/structured-outputs?api-mode=chat#supported-schemas).", - "requires": "structured_output==true", - } - options["json_schema"] = { - "type": UserInput.OPTION_TEXT_LARGE, - "help": "JSON schema", - "tooltip": "[required] A JSON schema that the structured output will adhere to", - "requires": "structured_output==true", - "default": "", - } - options["temperature"] = { - "type": UserInput.OPTION_TEXT, - "help": "Temperature", - "default": 0.1, - "coerce_type": float, - "max": 2.0, - "tooltip": "Temperature indicates how strict the model will gravitate towards the most " - "probable next token. A score close to 0 returns more predictable " - "outputs while a score close to 1 leads to more creative outputs. Not supported by all models.", - } + }) if not is_media_parent: - options["truncate_input"] = { + options.update({ + "truncate_input": { + "type": UserInput.OPTION_TEXT, + "help": "Max chars in input value", + "default": 0, + "coerce_type": int, + "tooltip": "This value determines how many characters an inserted dataset value may have. 0 = unlimited.", + "requires": "use_media==false", + }, + "max_tokens": { + "type": UserInput.OPTION_TEXT, + "help": "Max output tokens", + "default": 10000, + "coerce_type": int, + "tooltip": "As a rule of thumb, one token generally corresponds to ~4 characters of " + "text for common English text. This includes tokens spent for reasoning.", + }, + "batches": { + "type": UserInput.OPTION_TEXT, + "help": "Items per prompt", + "coerce_type": int, + "default": 1, + "tooltip": "How many dataset items to insert into the prompt. These will be inserted as a list " + "wherever the column brackets are used (e.g. '[body]').", + "requires": "use_media==false", + }, + "batch_info": { + "type": UserInput.OPTION_INFO, + "help": "Note on batching: Batching may increase speed but reduce accuracy. Models " + "need to support structured output for batching. This processor uses JSON schemas to ensure " + "symmetry between input and output lengths, but models may struggle to match input and output " + "values. Describe the dataset values in plurals in your prompt when batching. If you use " + "multiple column brackets in your prompt, rows with any empty values are skipped.", + "requires": "use_media==false", + } + }) + + options.update({ + "ethics_warning3": { + "type": UserInput.OPTION_INFO, + "requires": "api_or_local==api", + "help": "When using LLMs through commercial parties, always consider anonymising your data and " + "whether local open-source LLMs are also an option.", + }, + "save_annotations": { + "type": UserInput.OPTION_ANNOTATION, + "label": "prompt outputs", + "default": False, + }, + "hide_think": { + "type": UserInput.OPTION_TOGGLE, + "help": "Hide reasoning", + "default": False, + "tooltip": "Some models include reasoning in their output, between tags. This option " + "removes this tag and its contents from the output.", + }, + "limit": { "type": UserInput.OPTION_TEXT, - "help": "Max chars in input value", + "help": "Only annotate this many items, then stop", "default": 0, "coerce_type": int, - "tooltip": "This value determines how many characters an inserted dataset value may have. 0 = unlimited.", - "requires": "use_media==false", - } - - options["max_tokens"] = { - "type": UserInput.OPTION_TEXT, - "help": "Max output tokens", - "default": 10000, - "coerce_type": int, - "tooltip": "As a rule of thumb, one token generally corresponds to ~4 characters of " - "text for common English text. This includes tokens spent for reasoning.", - } - - if not is_media_parent: - options["batches"] = { + "min": 0, + "delegated": True, + }, + "annotation_label": { "type": UserInput.OPTION_TEXT, - "help": "Items per prompt", - "coerce_type": int, - "default": 1, - "tooltip": "How many dataset items to insert into the prompt. These will be inserted as a list " - "wherever the column brackets are used (e.g. '[body]').", - "requires": "use_media==false", - } - options["batch_info"] = { - "type": UserInput.OPTION_INFO, - "help": "Note on batching: Batching may increase speed but reduce accuracy. Models " - "need to support structured output for batching. This processor uses JSON schemas to ensure " - "symmetry between input and output lengths, but models may struggle to match input and output " - "values. Describe the dataset values in plurals in your prompt when batching. If you use " - "multiple column brackets in your prompt, rows with any empty values are skipped.", - "requires": "use_media==false", + "help": "Label for the annotations to add to the dataset", + "default": "", + "delegated": True, } - - options["ethics_warning3"] = { - "type": UserInput.OPTION_INFO, - "requires": "api_or_local==api", - "help": "When using LLMs through commercial parties, always consider anonymising your data and " - "whether local open-source LLMs are also an option.", - } - options["save_annotations"] = { - "type": UserInput.OPTION_ANNOTATION, - "label": "prompt outputs", - "default": False, - } - options["hide_think"] = { - "type": UserInput.OPTION_TOGGLE, - "help": "Hide reasoning", - "default": False, - "tooltip": "Some models include reasoning in their output, between tags. This option " - "removes this tag and its contents from the output.", - } - options["limit"] = { - "type": UserInput.OPTION_TEXT, - "help": "Only annotate this many items, then stop", - "default": 0, - "coerce_type": int, - "min": 0, - "delegated": True, - } - options["annotation_label"] = { - "type": UserInput.OPTION_TEXT, - "help": "Label for the annotations to add to the dataset", - "default": "", - "delegated": True, - } + }) # Get the media columns for the select media columns option if not is_media_parent and parent_dataset and parent_dataset.get_columns(): @@ -351,7 +355,7 @@ def process(self): api_key = "" client_kwargs = {} - # load model and providermetadata + # load model and provider metadata chosen_model_id = self.parameters.get("model") available_models = {k: v for k, v in self.config.get("llm.available_models").items() if k in self.config.get("llm.enabled_models")} if chosen_model_id not in available_models: @@ -413,7 +417,7 @@ def process(self): # Start LLM self.dataset.update_status("Connecting to LLM provider") base_url_str = "" if not provider["url"] else f" at base URL '{provider['url']}'" - self.dataset.log(f"Using LLM provider '{model['provider']}' with model '{model}'{base_url_str}") + self.dataset.log(f"Using LLM provider '{model['provider_type'] if provider['url'] else provider['provider']}' with model '{model['local_id']}'{base_url_str}") try: llm = LLMAdapter( config=self.config, @@ -996,7 +1000,7 @@ def process(self): # Final outputs time_end = time.time() time_progressed = str(timedelta(seconds=int(time_end - time_start))) - final_status = f"Finished, {model} generated text in {time_progressed}." + final_status = f"Finished, {model['local_id']} generated text in {time_progressed}." skipped_str = None if not skipped else f" Skipped {skipped} rows because of empty values." if skipped_str: self.dataset.finish_with_warning(i, final_status + skipped_str) From d86a3092a2568384c3e95a553383be33f4a2bfa7 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Thu, 21 May 2026 18:02:43 +0200 Subject: [PATCH 19/44] Control panel text & names --- common/lib/config_definition.py | 8 +++++--- webtool/templates/controlpanel/layout.html | 2 +- webtool/templates/controlpanel/llm-server.html | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/common/lib/config_definition.py b/common/lib/config_definition.py index 7cf0c6483..0078fa681 100644 --- a/common/lib/config_definition.py +++ b/common/lib/config_definition.py @@ -575,9 +575,11 @@ # allows 4CAT LLM processors to connect to a local or remote LLM server "llm.intro": { "type": UserInput.OPTION_INFO, - "help": "4CAT LLM processors allow users to utilize common APIs (e.g. OpenAI, Google, Anthropic) as well as connect " - "to local or remote LLM servers. You can also set up your own LLM server using open source software such as " - "[Ollama](https://ollama.com/) and connect 4CAT to it using the settings below for your users." + "help": "4CAT LLM processors allow users to utilize common APIs (e.g. OpenAI, Google, Anthropic) as well as " + "connect to local or remote LLM servers. You can also set up your own LLM server using open source " + "software such as [Ollama](https://ollama.com/) and connect 4CAT to it using the settings below for " + "your users. After configuring providers you can enable and disable available models via the 'LLMs & " + "Providers' page in the Control Panel." }, "llm.providers": { "type": UserInput.OPTION_MULTI_OPTION, diff --git a/webtool/templates/controlpanel/layout.html b/webtool/templates/controlpanel/layout.html index 400a33846..26d03df20 100644 --- a/webtool/templates/controlpanel/layout.html +++ b/webtool/templates/controlpanel/layout.html @@ -19,7 +19,7 @@ {% if __user_config("privileges.admin.can_restart") %} Extensions{% endif %} {% if __user_config("privileges.admin.can_manage_settings") and __user_config("llm.access") %} - LLM Server{% endif %} + LLMs & Providers{% endif %} {% if __user_config("privileges.admin.can_manage_users") %} View logs{% endif %} {% if __user_config("privileges.admin.can_manipulate_all_datasets") %} diff --git a/webtool/templates/controlpanel/llm-server.html b/webtool/templates/controlpanel/llm-server.html index 240ebae69..4b4285dd8 100644 --- a/webtool/templates/controlpanel/llm-server.html +++ b/webtool/templates/controlpanel/llm-server.html @@ -7,7 +7,7 @@ {% block body %}
-

LLM Server

+

LLM Providers

{% if flashes %}
From feb4a8451478f8125f7fc11a3a4426c53fd6428a Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Thu, 21 May 2026 18:29:13 +0200 Subject: [PATCH 20/44] Rework Ollama model info API request & parsing --- common/lib/llm/clients/ollama_client.py | 141 ++++++------------ common/lib/llm/clients/openai_client.py | 2 +- .../templates/controlpanel/llm-server.html | 24 +-- 3 files changed, 57 insertions(+), 110 deletions(-) diff --git a/common/lib/llm/clients/ollama_client.py b/common/lib/llm/clients/ollama_client.py index 3064c1173..7c05c94f7 100644 --- a/common/lib/llm/clients/ollama_client.py +++ b/common/lib/llm/clients/ollama_client.py @@ -5,8 +5,6 @@ helpers for capability parsing, display-name formatting, and building canonical llm.available_models entries. It is a plain helper with no 4CAT base-class dependency. """ - -import re import requests from common.lib.llm.llm_client import LLMProviderClient @@ -19,6 +17,33 @@ class OllamaClient(LLMProviderClient): _models_info_key = "models" _model_id_key = "model" + def list_models(self) -> list[dict]: + """ + List all models available. + + For Ollama, get some additional model info via an extra API request. + + :return list[dict]: List of models available.: + """ + models = super().list_models() + result = [] + for model in models: + try: + model_info = self._session.post( + f"{self.base_url}/api/show", + json={"model": model[self._model_id_key]}, + headers=self._headers, + timeout=self.timeout, + ).json() + result.append({**model, "model_info": model_info["model_info"]}) + except (requests.exceptions.HTTPError, KeyError) as e: + self.log.warning( + f"{self.__class__.__name__}: failed to fetch additional model info for model {model[self._model_id_key]}: {e}") + + return result + + + def parse_supported_media_types(self, meta: dict) -> list[str]: """Derive the media types a model supports from its Ollama metadata. @@ -71,97 +96,29 @@ def format_display_name(self, meta: dict) -> str: """ Build a human-readable display name for a model. - :param dict meta: Model metadata - :returns str: Human-readable display name string. + :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``). + :param meta: ``/api/show`` response dict, or ``None``. + :returns: Human-readable display name string. """ - model_info = meta.get("model_info", {}) if meta else {} - model_id = self.get_global_model_id(meta) - details = meta.get("details", {}) if meta else {} - - basename = None - for key in ("general.basename", "general.base_model.0.name"): - val = model_info.get(key) - if val: - basename = str(val).strip() - break - if not basename: - basename = model_id.split(":", 1)[0].replace("-", " ").replace("_", " ").strip() or model_id - - def _parse_param_count(val): - if val is None: - return None - if isinstance(val, int): - return val - if isinstance(val, float): - return int(val) - s = str(val).strip().replace(",", "") - if not s: - return None - m = re.match(r"^([0-9]+(?:\.[0-9]+)?)\s*([BbMm])$", s) - if m: - num = float(m.group(1)) - suf = m.group(2).upper() - return int(num * (1_000_000_000 if suf == "B" else 1_000_000)) - try: - return int(float(s)) - except Exception: - return None - - def _humanize(n): - if n is None: - return None - n = int(n) - if n >= 1_000_000_000: - x = n / 1_000_000_000 - s = f"{x:.1f}" if x < 10 else f"{int(round(x))}" - if s.endswith(".0"): - s = s[:-2] - return f"{s}B" - if n >= 1_000_000: - x = n / 1_000_000 - s = f"{x:.1f}" if x < 10 else f"{int(round(x))}" - if s.endswith(".0"): - s = s[:-2] - return f"{s}M" - return f"{n:,}" - - param_candidate = None - for key in ("parameter_size", "parameter_count"): - if key in details: - param_candidate = details.get(key) - break - if param_candidate is None: - param_candidate = model_info.get("general.parameter_count") - human = _humanize(_parse_param_count(param_candidate)) - - size_label = model_info.get("general.size_label") - size_label_norm = str(size_label).strip() if size_label else None - - tag = model_id.split(":", 1)[1].strip() if ":" in model_id else None - - if tag: - tl = tag.lower() - if tl in ("latest", "stable", "current"): - suffix = f"{tag} · {human}" if human else tag - else: - m = re.match(r"^([0-9]+(?:\.[0-9]+)?)\s*([bBmM])$", tag) - if m: - tag_size = f"{m.group(1)}{m.group(2).upper()}" - if size_label_norm and size_label_norm.upper() == tag_size.upper(): - suffix = size_label_norm - else: - suffix = tag_size - else: - suffix = f"{tag} · {human}" if human else tag - else: - if size_label_norm: - suffix = size_label_norm - elif human: - suffix = human - else: - return model_id - - return f"{basename} ({suffix})" + model_name = self.get_model_id(meta) + + extra_bits = [] + if meta.get("model_info"): + if meta["model_info"].get("general.basename"): + model_name = meta["model_info"]["general.basename"] + + if meta["model_info"].get("general.finetune"): + extra_bits.append(meta["model_info"]["general.finetune"]) + + if meta["model_info"].get("general.size_label"): + extra_bits.append(meta["model_info"]["general.size_label"]) + + elif meta.get("details") and meta["details"].get("parameter_size"): + extra_bits.append(f"{meta['details']['parameter_size']} parameters") + + model_name += f" ({', '.join(extra_bits)})" + + return model_name def get_model_card_url(self, meta: dict) -> str: """ diff --git a/common/lib/llm/clients/openai_client.py b/common/lib/llm/clients/openai_client.py index fa04446c0..f8701dd7c 100644 --- a/common/lib/llm/clients/openai_client.py +++ b/common/lib/llm/clients/openai_client.py @@ -44,7 +44,7 @@ def format_display_name(self, meta: dict) -> str: :param meta: ``/api/show`` response dict, or ``None``. :returns: Human-readable display name string. """ - model_name = self.get_global_model_id(meta) + model_name = self.get_model_id(meta) if meta.get("display_name"): model_name = meta["display_name"] diff --git a/webtool/templates/controlpanel/llm-server.html b/webtool/templates/controlpanel/llm-server.html index 4b4285dd8..c31b92e93 100644 --- a/webtool/templates/controlpanel/llm-server.html +++ b/webtool/templates/controlpanel/llm-server.html @@ -33,12 +33,10 @@

LLM Providers

{% else %} {% for provider in providers %} - {{ provider.name }} + {{ provider.type }} {{ provider.name }} {% if provider.status == "online" %} Online - {% elif provider.status == "not configured" %} - Not configured {% else %} {{ provider.status }} {% endif %} @@ -64,23 +62,17 @@

- + Name Provider/model - Display name Capabilities Status - Actions {% if available_models %} {% for model_id, model in available_models.items() %} - - {{ model.provider_type }}/{{ model.provider|hostname }} - {{ model.local_id }} - {% if model.model_card %} {{ model.name }} @@ -89,14 +81,11 @@

{% endif %} - {{ model.supported_media_types | join(", ") }} + {{ model.provider_type }}/{{ model.provider|hostname }}
+ {{ model.local_id }} - {% if model_id in enabled_models %} - Enabled - {% else %} - Disabled - {% endif %} + {{ model.supported_media_types | join(", ") }} {% if model_id in enabled_models %} @@ -128,7 +117,8 @@

{% if providers %} - No models found. Use the Refresh button to fetch available models, or pull a new model below. + No models found. Use the Refresh button to fetch available models, or + install a new model below with compatible providers. {% else %} No LLM providers configured. {% endif %} From 9084cc5857a6eb4489fb0cb9390b14ce0be0ebb4 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Thu, 21 May 2026 18:33:37 +0200 Subject: [PATCH 21/44] Fix Ollama capability detection --- common/lib/llm/clients/ollama_client.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/common/lib/llm/clients/ollama_client.py b/common/lib/llm/clients/ollama_client.py index 7c05c94f7..e21297448 100644 --- a/common/lib/llm/clients/ollama_client.py +++ b/common/lib/llm/clients/ollama_client.py @@ -35,7 +35,7 @@ def list_models(self) -> list[dict]: headers=self._headers, timeout=self.timeout, ).json() - result.append({**model, "model_info": model_info["model_info"]}) + result.append({**model, "metadata": model_info}) except (requests.exceptions.HTTPError, KeyError) as e: self.log.warning( f"{self.__class__.__name__}: failed to fetch additional model info for model {model[self._model_id_key]}: {e}") @@ -61,10 +61,10 @@ def parse_supported_media_types(self, meta: dict) -> list[str]: Returns ``[]`` when ``meta`` is ``None`` (unknown — callers should include the model, not block it). """ - if meta is None: + if meta is None or not meta.get("metadata"): return [] - capabilities = meta.get("capabilities", []) + capabilities = meta["metadata"].get("capabilities", []) media_types: list[str] = [] _cap_map = { @@ -103,15 +103,16 @@ def format_display_name(self, meta: dict) -> str: model_name = self.get_model_id(meta) extra_bits = [] - if meta.get("model_info"): - if meta["model_info"].get("general.basename"): - model_name = meta["model_info"]["general.basename"] + if meta.get("metadata") and meta["metadata"].get("model_info"): + more_meta = meta["metadata"]["model_info"] + if more_meta.get("general.basename"): + model_name = more_meta["general.basename"] - if meta["model_info"].get("general.finetune"): - extra_bits.append(meta["model_info"]["general.finetune"]) + if more_meta.get("general.finetune"): + extra_bits.append(more_meta["general.finetune"]) - if meta["model_info"].get("general.size_label"): - extra_bits.append(meta["model_info"]["general.size_label"]) + if more_meta.get("general.size_label"): + extra_bits.append(more_meta["general.size_label"]) elif meta.get("details") and meta["details"].get("parameter_size"): extra_bits.append(f"{meta['details']['parameter_size']} parameters") From c645d47cdf7ecef4156965e51e763e2fb0618e06 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Thu, 21 May 2026 18:35:25 +0200 Subject: [PATCH 22/44] Fix model card URL for external model APIs --- common/lib/llm/clients/thirdparty_client.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/common/lib/llm/clients/thirdparty_client.py b/common/lib/llm/clients/thirdparty_client.py index 8d2f48602..e1df93d45 100644 --- a/common/lib/llm/clients/thirdparty_client.py +++ b/common/lib/llm/clients/thirdparty_client.py @@ -55,3 +55,12 @@ def build_model_entry(self, meta: dict) -> dict: entry["provider"] = meta["provider"] return entry + + def get_model_card_url(self, meta: dict) -> str: + """ + Get a URL for a model card for a given model + + :param meta: Model metadata + :return str: Model card URL (empty string if unavailable) + """ + return meta["model_card"] if meta["model_card"] else "" From 5d466915c29e2bbabaa40a5bffa21c23161c0066 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Thu, 21 May 2026 18:47:50 +0200 Subject: [PATCH 23/44] Add notice to page if currently updating --- webtool/templates/controlpanel/llm-server.html | 12 +++++++++++- webtool/views/views_llm.py | 5 +++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/webtool/templates/controlpanel/llm-server.html b/webtool/templates/controlpanel/llm-server.html index c31b92e93..af9ffe689 100644 --- a/webtool/templates/controlpanel/llm-server.html +++ b/webtool/templates/controlpanel/llm-server.html @@ -17,6 +17,10 @@

LLM Providers

{% endif %} +

+ You can add and configure LLM providers via the 'LLM providers' tab on the Settings page. +

+ {# Server status #}
@@ -27,7 +31,7 @@

LLM Providers

{% if not providers %} {% else %} @@ -56,6 +60,12 @@

+ {% if update_running %} +

+ Models are currently being refreshed or installed - reload the page to see up-to-date list. +

+ {% endif %} +
- No LLM providers configured. You can add and configure servers via the 'LLM' tab on the Settings page. + No LLM providers configured.
diff --git a/webtool/views/views_llm.py b/webtool/views/views_llm.py index 863d7e9fe..8a70ad910 100644 --- a/webtool/views/views_llm.py +++ b/webtool/views/views_llm.py @@ -89,10 +89,15 @@ def llm_panel(): available_models = g.config.get("llm.available_models", {}) or {} enabled_models = list(g.config.get("llm.enabled_models", []) or []) + update_running = bool([ + job for job in g.queue.get_all_jobs("manage-llm") if not job.data["interval"] + ]) + return render_template( "controlpanel/llm-server.html", flashes=get_flashed_messages(), providers=providers, available_models=available_models, enabled_models=enabled_models, + update_running=update_running, ) From 3acc27bf31a98ab3f8b61b35ba5f756ad89661b3 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Thu, 21 May 2026 18:48:00 +0200 Subject: [PATCH 24/44] Update setting category name --- common/lib/config_definition.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/lib/config_definition.py b/common/lib/config_definition.py index 0078fa681..78cd1b7dc 100644 --- a/common/lib/config_definition.py +++ b/common/lib/config_definition.py @@ -763,5 +763,5 @@ "proxies": "Proxied HTTP requests", "image-visuals": "Image visualization", "extensions": "Extensions", - "llm": "LLM Server Settings" + "llm": "LLM Providers" } From a281a82f728988d69887212774b43168dc52f824 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Thu, 21 May 2026 18:55:40 +0200 Subject: [PATCH 25/44] Fix PromptCompass --- common/lib/llm/clients/litellm_client.py | 2 +- processors/machine_learning/prompt_compass.py | 36 +++++++------------ 2 files changed, 13 insertions(+), 25 deletions(-) diff --git a/common/lib/llm/clients/litellm_client.py b/common/lib/llm/clients/litellm_client.py index 0da9dc682..cf65497ff 100644 --- a/common/lib/llm/clients/litellm_client.py +++ b/common/lib/llm/clients/litellm_client.py @@ -35,7 +35,7 @@ def parse_supported_media_types(self, meta: dict) -> list[str]: media_types.add("image") if meta["model_info"].get("supports_audio_input"): - media_types.add("sound") + media_types.add("audio") # no way to tell if model supports embeddings input as far as I can see... diff --git a/processors/machine_learning/prompt_compass.py b/processors/machine_learning/prompt_compass.py index 076bd916f..033166cfd 100644 --- a/processors/machine_learning/prompt_compass.py +++ b/processors/machine_learning/prompt_compass.py @@ -3,7 +3,7 @@ """ from backend.lib.preset import ProcessorPreset from common.lib.helpers import UserInput -from common.lib.llm import LLMAdapter +from common.lib.llm.adapter import LLMAdapter from common.lib.exceptions import ( QueryParametersException, @@ -63,25 +63,6 @@ def get_prompt_library(config): return prompt_library - @staticmethod - def get_available_models(config): - """ - Get available model providers - - Combine the list defined by the LLMAdapter with known local models. - - :param config: Configuration reader - :return dict: Models and metadata - """ - # get cached local models - models = config.get("llm.available_models", {}) - models = {} if models == [] else models - models.update({k: v for k, v in LLMAdapter.get_models(config).items() if k not in ("none", "custom")}) - - models = {k: v for k, v in models.items() if "model_card" in v} - - return models - @staticmethod def is_compatible_with(module=None, config=None): """ @@ -108,15 +89,22 @@ def get_options(cls, parent_dataset=None, config=None): :return: """ prompt_library = cls.get_prompt_library(config) - available_models = cls.get_available_models(config) + available_models = config.get("llm.available_models", []) + enabled_model_ids = config.get("llm.enabled_models", []) + if not config.get("llm.access"): + enabled_model_ids = [_ for _ in enabled_model_ids if _.startswith("api-")] + + enabled_models = {k: v for k, v in available_models.items() if k in enabled_model_ids} options = { "model": { "type": UserInput.OPTION_CHOICE, "help": "Model to use", "tooltip": "Third-party models require an API key to run.", - "options": {("local/" if v["provider"] == "local" else f"{v['provider']}/") + k: v["name"] for k, v in available_models.items()}, - "default": sorted(list(available_models.keys()), key=lambda k: k.startswith("local"))[-1] + "options": { + model_id: model["name"] for model_id, model in enabled_models.items() + }, + "default": sorted(list(enabled_models.keys()), key=lambda k: not k.startswith("api"))[-1] }, } @@ -136,7 +124,7 @@ def get_options(cls, parent_dataset=None, config=None): "cache": True, "tooltip": "Create an API key on the LLM provider's website (e.g. https://admin.mistral.ai/organization" "/api-keys). Note that this often involves billing.", - "requires": "model!^=local" + "requires": "model^=api" }, "hide_think": { "type": UserInput.OPTION_TOGGLE, From 4813c07be5437de2014e026c0f5228a14c0a66c0 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Thu, 21 May 2026 18:56:41 +0200 Subject: [PATCH 26/44] No longer need LLMAdapter in PromptCompass... --- processors/machine_learning/prompt_compass.py | 1 - 1 file changed, 1 deletion(-) diff --git a/processors/machine_learning/prompt_compass.py b/processors/machine_learning/prompt_compass.py index 033166cfd..e43e5987c 100644 --- a/processors/machine_learning/prompt_compass.py +++ b/processors/machine_learning/prompt_compass.py @@ -3,7 +3,6 @@ """ from backend.lib.preset import ProcessorPreset from common.lib.helpers import UserInput -from common.lib.llm.adapter import LLMAdapter from common.lib.exceptions import ( QueryParametersException, From 3f3c8d5a3a7f68388919dc4096b81ab4cc0d88a5 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Thu, 21 May 2026 19:50:55 +0200 Subject: [PATCH 27/44] Fix LLMPrompter queue IDs --- processors/machine_learning/llm_prompter.py | 24 +++++++++++++-------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/processors/machine_learning/llm_prompter.py b/processors/machine_learning/llm_prompter.py index 9f174f148..7026aa6c3 100644 --- a/processors/machine_learning/llm_prompter.py +++ b/processors/machine_learning/llm_prompter.py @@ -55,13 +55,19 @@ def get_queue_id(cls, remote_id, details, dataset) -> str: local_queue = "local_models" if not dataset: return local_queue + + model = dataset.parameters.get("model") + if model.startswith("api"): + # API-based models have their own queue - no local resources being + # used so can be concurrent + return f"llm-api-{dataset.key}" else: - if dataset.parameters.get('api_or_local', 'api') in ["local", "hosted"]: - # Hosted models also go in the local queue since they use the same shared LLM server - return local_queue - - # Queue per model/API type - return f"{cls.type}-{dataset.parameters.get('api_or_local', 'api')}-{dataset.parameters.get('api_model', 'none')}" + # use the model URL as the queue ID (extracted from the model + # global ID) + # this is not fool-proof, but does mean not more than one dataset + # runs per API server - in the scenario of these running locally, + # it means things do not run concurrently (which is good) + return f"llm-local-{dataset.parameters.get('model').split('-')[1]}" @classmethod def get_options(cls, parent_dataset=None, config=None) -> dict: @@ -311,6 +317,7 @@ def is_compatible_with(cls, module=None, config=None): # Text-based datasets if module.get_extension() in ["csv", "ndjson"]: return True + # Media datasets (zip archives with images, video, or audio) if module.get_extension() == "zip" and module.get_media_type() in ("image", "video", "audio"): return True @@ -345,9 +352,8 @@ def process(self): # Set value for batch length in prompts batches = max(1, min(self.parameters.get("batches", 1), self.source_dataset.num_rows)) - use_batches = batches > 1 - if media_columns or is_media_archive: # no batching for media files - use_batches = False + use_batches = batches > 1 and not (media_columns or is_media_archive) # no batching for media files + if not use_batches: self.dataset.delete_parameter("batches") From 098a7197b47b8b057300d8c5385e5055a2bb1e05 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Wed, 27 May 2026 13:00:01 +0200 Subject: [PATCH 28/44] Enabled models updating --- backend/workers/llm_manager.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/workers/llm_manager.py b/backend/workers/llm_manager.py index 398b835ca..14615344f 100644 --- a/backend/workers/llm_manager.py +++ b/backend/workers/llm_manager.py @@ -76,6 +76,8 @@ def work(self): self.log.warning(f"{self.__class__.__name__}: task '{task}' failed for model {model_name}") if available_models is not None: + enabled_and_available = set(available_models.keys()) & set(self.config.get("llm.enabled_models", [])) self.config.set("llm.available_models", available_models) + self.config.set("llm.enabled_models", list(enabled_and_available) self.job.finish() From 5d9e5051c948c81cd284eb4ee0c13afb9f786ca0 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Thu, 28 May 2026 18:18:53 +0200 Subject: [PATCH 29/44] typoes --- backend/workers/llm_manager.py | 2 +- processors/machine_learning/prompt_compass.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/backend/workers/llm_manager.py b/backend/workers/llm_manager.py index 14615344f..38d9405e0 100644 --- a/backend/workers/llm_manager.py +++ b/backend/workers/llm_manager.py @@ -78,6 +78,6 @@ def work(self): if available_models is not None: enabled_and_available = set(available_models.keys()) & set(self.config.get("llm.enabled_models", [])) self.config.set("llm.available_models", available_models) - self.config.set("llm.enabled_models", list(enabled_and_available) + self.config.set("llm.enabled_models", list(enabled_and_available)) self.job.finish() diff --git a/processors/machine_learning/prompt_compass.py b/processors/machine_learning/prompt_compass.py index e43e5987c..46386790e 100644 --- a/processors/machine_learning/prompt_compass.py +++ b/processors/machine_learning/prompt_compass.py @@ -71,9 +71,7 @@ def is_compatible_with(module=None, config=None): :param ConfigManager|None config: Configuration reader (context-aware) :return bool: """ - models = PromptCompassRunner.get_available_models(config) - return (models - and module.is_top_dataset() + return (module.is_top_dataset() and module.get_extension() in ("csv", "ndjson")) @classmethod From 4b94c41aa8babb38c10826ec2413df5138acdd9b Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Mon, 1 Jun 2026 17:41:01 +0200 Subject: [PATCH 30/44] Rework provider config to store as dict One little hack to prevent a lot of others --- backend/workers/llm_manager.py | 4 +-- common/lib/config_definition.py | 4 ++- common/lib/llm/adapter.py | 12 ++++---- common/lib/llm/clients/thirdparty_client.py | 2 +- common/lib/llm/llm_client.py | 10 +++---- common/lib/user_input.py | 9 ++++-- processors/machine_learning/llm_prompter.py | 27 ++++++++++++----- processors/machine_learning/prompt_compass.py | 29 +++++++++---------- processors/metrics/rank_attribute.py | 8 +++-- .../components/datasource-option.html | 9 ++++-- .../components/processor-option.html | 11 ++++++- .../templates/controlpanel/llm-server.html | 10 ++++--- webtool/views/views_llm.py | 4 +-- 13 files changed, 86 insertions(+), 53 deletions(-) diff --git a/backend/workers/llm_manager.py b/backend/workers/llm_manager.py index 38d9405e0..1b3c62533 100644 --- a/backend/workers/llm_manager.py +++ b/backend/workers/llm_manager.py @@ -39,8 +39,8 @@ def work(self): model_name = self.job.data["remote_id"] available_models = None - for provider_config in self.config.get("llm.providers", []): - if provider and provider != provider_config["url"]: + for provider_id, provider_config in self.config.get("llm.providers", {}).items(): + if provider and provider != provider_id: continue try: diff --git a/common/lib/config_definition.py b/common/lib/config_definition.py index 78cd1b7dc..0c06d0203 100644 --- a/common/lib/config_definition.py +++ b/common/lib/config_definition.py @@ -9,6 +9,7 @@ """ from common.lib.user_input import UserInput +import re config_definition = { "datasources.intro": { @@ -594,6 +595,7 @@ ], "global": True, "help": "LLM providers", + "dict_key": lambda v: re.sub(r"[^0-9a-zA-Z ]", "", v["name"]).lower().replace(" ", "-") + (("-" + v["url"].split("/")[2].lower()) if "://" in v["url"] else ""), "options": { "name": { "type": UserInput.OPTION_TEXT, @@ -617,7 +619,7 @@ "type": UserInput.OPTION_TEXT, "default": "", "help": "LLM Server URL", - "tooltip": "The URL of the LLM server, e.g. http://localhost:5000", + "tooltip": "The URL of the LLM server, e.g. http://localhost:5000. Must start with a schema (e.g. 'https://').", }, "auth_header": { "type": UserInput.OPTION_TEXT, diff --git a/common/lib/llm/adapter.py b/common/lib/llm/adapter.py index 9fe80eb49..8e4c7bc26 100644 --- a/common/lib/llm/adapter.py +++ b/common/lib/llm/adapter.py @@ -36,7 +36,7 @@ def __init__( :param max_tokens: Max tokens to generate :param client_kwargs: Optional parameters for the LLM adapter class """ - known_providers = {p['url']: p for p in config.get("llm.providers")} + known_providers = config.get("llm.providers", {}) self.model = model self.provider = known_providers.get(model['provider']) @@ -216,7 +216,7 @@ def _format_media_block( :param media_category: "image", "video", or "audio" :returns: Provider-formatted content block """ - if self.provider == "anthropic": + if self.provider["type"] == "anthropic": if media_category == "image": if url: return {"type": "image", "source": {"type": "url", "url": url}} @@ -232,13 +232,13 @@ def _format_media_block( return {"type": "document", "source": { "type": "base64", "media_type": mime_type, "data": b64_data }} - elif self.provider == "google": + elif self.provider["type"] == "google": if url: return {"type": "image_url", "image_url": {"url": url}} else: data_uri = f"data:{mime_type};base64,{b64_data}" return {"type": "image_url", "image_url": {"url": data_uri}} - elif self.provider == "ollama": + elif self.provider["type"] == "ollama": if media_category != "image": raise ValueError(f"Ollama provider only supports image media, got category '{media_category}'") if url: @@ -258,7 +258,7 @@ def _format_media_block( return {"type": "image_url", "image_url": {"url": url}} else: data_uri = f"data:{mime_type};base64,{b64_data}" - if media_category == "audio" and self.provider == "openai": + if media_category == "audio" and self.provider["type"] == "openai": return {"type": "input_audio", "input_audio": { "data": b64_data, "format": mime_type.split("/")[-1] }} @@ -274,7 +274,7 @@ def set_structure(self, json_schema): json.dumps(json_schema) # To validate / raise an error # LM Studio needs some more guidance - if self.provider == "lmstudio": + if self.provider["type"] == "lmstudio": json_schema = {"type": "json_schema", "json_schema": {"schema": json_schema}} self.llm = self.llm.bind(response_format=json_schema) else: diff --git a/common/lib/llm/clients/thirdparty_client.py b/common/lib/llm/clients/thirdparty_client.py index e1df93d45..2a2db4dc3 100644 --- a/common/lib/llm/clients/thirdparty_client.py +++ b/common/lib/llm/clients/thirdparty_client.py @@ -52,7 +52,7 @@ def build_model_entry(self, meta: dict) -> dict: :returns: Dict ready to store under ``llm.available_models[model_id]``. """ entry = super().build_model_entry(meta) - entry["provider"] = meta["provider"] + entry["provider_key"] = meta["provider"] return entry diff --git a/common/lib/llm/llm_client.py b/common/lib/llm/llm_client.py index b59d38ab0..b31cb035a 100644 --- a/common/lib/llm/llm_client.py +++ b/common/lib/llm/llm_client.py @@ -14,7 +14,7 @@ class LLMProviderClient: _headers = {} - _meta = {} + provider_config = {} @staticmethod def get_client(config, provider_config: dict) -> "LLMProviderClient": @@ -49,8 +49,7 @@ def __init__(self, config, provider_config: dict, timeout: int = 10, log=None) - :param Logger log: 4CAT log handler """ self.config = config - - self._meta = provider_config + self.provider_config = provider_config self.timeout = timeout self.auth_type = provider_config.get("auth_header") @@ -126,8 +125,7 @@ def build_model_entry(self, meta: dict) -> dict: "local_id": self.get_model_id(meta), "name": self.format_display_name(meta), "model_card": self.get_model_card_url(meta), - "provider_type": self._meta["type"], - "provider": self._meta["url"], + "provider": self.provider_config["_id"], "supported_media_types": self.parse_supported_media_types(meta), "metadata": meta, } @@ -193,4 +191,4 @@ def get_global_model_id(self, meta: dict) -> str: :param dict meta: Model metadata :return str: Model ID """ - return "-".join((self._meta["type"], self._meta["url"], self.get_model_id(meta))) \ No newline at end of file + return "-".join((self.provider_config["type"], self.provider_config["url"], self.get_model_id(meta))) \ No newline at end of file diff --git a/common/lib/user_input.py b/common/lib/user_input.py index 16a583d74..ec0798b3e 100644 --- a/common/lib/user_input.py +++ b/common/lib/user_input.py @@ -1,3 +1,4 @@ +from attr.validators import is_callable from dateutil.parser import parse as parse_datetime from common.lib.exceptions import QueryParametersException from werkzeug.datastructures import ImmutableMultiDict @@ -203,7 +204,6 @@ def parse_all(options, input, silently_correct=True): if input_index not in input_items: input_items[input_index] = {} - print(key, value) input_items[input_index][option_item] = UserInput.parse_value(item_options[option_item], value, input_items[input_index], silently_correct) # discard items that are only default values @@ -217,7 +217,12 @@ def parse_all(options, input, silently_correct=True): if not only_default: parsed_input[option].append(item) - print(parsed_input[option]) + # may define a mapper to make this a dict + if settings.get("dict_key"): + if callable(settings["dict_key"]): + parsed_input[option] = {settings["dict_key"](value): {**value, "_id": settings["dict_key"](value)} for value in parsed_input[option]} + else: + parsed_input[option] = {value[settings["dict_key"]]: {**value, "_id": value[settings["dict_key"]]} for value in parsed_input[option]} elif option not in input: # not provided? use default diff --git a/processors/machine_learning/llm_prompter.py b/processors/machine_learning/llm_prompter.py index 7026aa6c3..3d6633919 100644 --- a/processors/machine_learning/llm_prompter.py +++ b/processors/machine_learning/llm_prompter.py @@ -70,14 +70,27 @@ def get_queue_id(cls, remote_id, details, dataset) -> str: return f"llm-local-{dataset.parameters.get('model').split('-')[1]}" @classmethod - def get_options(cls, parent_dataset=None, config=None) -> dict: - # Check if 4CAT wide LLM server is available + def get_model_library(cls, config): available_models = config.get("llm.available_models", []) enabled_model_ids = config.get("llm.enabled_models", []) + providers = config.get("llm.providers", {}) if not config.get("llm.access"): enabled_model_ids = [_ for _ in enabled_model_ids if _.startswith("api-")] - enabled_models = {k: v for k, v in available_models.items() if k in enabled_model_ids} + models_option = {} + for key, value in {k: v for k, v in available_models.items() if k in enabled_model_ids}.items(): + provider = providers[value["provider"]] + if provider["name"] not in models_option: + models_option[provider["name"]] = {} + + models_option[provider["name"]][key] = value["name"] + + return models_option + + @classmethod + def get_options(cls, parent_dataset=None, config=None) -> dict: + # Check if 4CAT wide LLM server is available + models = cls.get_model_library(config) # Determine if the parent dataset is a media archive (zip with images/video/audio) is_media_parent = False @@ -98,9 +111,7 @@ def get_options(cls, parent_dataset=None, config=None) -> dict: "model": { "type": UserInput.OPTION_CHOICE, "help": "API model", - "options": { - model_id: model["name"] for model_id, model in enabled_models.items() - }, + "options": models, "default": "none", "tooltip": "Select from the predefined model list or insert manually", }, @@ -110,7 +121,7 @@ def get_options(cls, parent_dataset=None, config=None) -> dict: "help": "API key", "tooltip": "Create an API key on the LLM provider's website (e.g. https://admin.mistral.ai/organization" "/api-keys). Note that this often involves billing.", - "requires": "api_model^=api", + "requires": "model^=api", "sensitive": True, } } @@ -267,7 +278,7 @@ def get_options(cls, parent_dataset=None, config=None) -> dict: options.update({ "ethics_warning3": { "type": UserInput.OPTION_INFO, - "requires": "api_or_local==api", + "requires": "model^=api-", "help": "When using LLMs through commercial parties, always consider anonymising your data and " "whether local open-source LLMs are also an option.", }, diff --git a/processors/machine_learning/prompt_compass.py b/processors/machine_learning/prompt_compass.py index 46386790e..7e4343bf7 100644 --- a/processors/machine_learning/prompt_compass.py +++ b/processors/machine_learning/prompt_compass.py @@ -13,7 +13,7 @@ import json -class PromptCompassRunner(ProcessorPreset): +class PromptCompassRunner(): """ Run processor pipeline to feed prompts to LLM Prompter """ @@ -98,20 +98,19 @@ def get_options(cls, parent_dataset=None, config=None): "type": UserInput.OPTION_CHOICE, "help": "Model to use", "tooltip": "Third-party models require an API key to run.", - "options": { - model_id: model["name"] for model_id, model in enabled_models.items() - }, + "options": LLMPrompter.get_model_library(config), "default": sorted(list(enabled_models.keys()), key=lambda k: not k.startswith("api"))[-1] }, } - for model, metadata in available_models.items(): - model_key = metadata["provider"] + "/" + model - options[f"{model_key}-info"] = { - "type": UserInput.OPTION_INFO, - "help": f"Read the [model card]({metadata['model_card']}) for {model}.", - "requires": f"model=={model_key}" - } + for model, metadata in enabled_models.items(): + if metadata.get("model_card"): + model_key = metadata["provider"] + "/" + model + options[f"{model_key}-info"] = { + "type": UserInput.OPTION_INFO, + "help": f"Read the [model card]({metadata['model_card']}) for {model}.", + "requires": f"model=={model_key}" + } options.update({ "api_key": { @@ -121,20 +120,20 @@ def get_options(cls, parent_dataset=None, config=None): "cache": True, "tooltip": "Create an API key on the LLM provider's website (e.g. https://admin.mistral.ai/organization" "/api-keys). Note that this often involves billing.", - "requires": "model^=api" + "requires": "model^=api-" }, "hide_think": { "type": UserInput.OPTION_TOGGLE, "help": "Hide reasoning", "default": False, "tooltip": "Some models include reasoning in their output, between tags. This option " - "removes this tag and its contents from the output.", - "requires": "model^=local/deepseek" + "removes this tag and its contents from the output, if present.", }, "temperature": { "type": UserInput.OPTION_TEXT, "help": "Temperature", - "tooltip": "Between 0 and 1. Higher temperatures increase variability and may lead to strange results", + "tooltip": "Between 0 and 1. Higher temperatures increase variability and may lead to strange " + "results. Does not have an effect on all models.", "coerce_type": float, "min": 0.0, "max": 1.0, diff --git a/processors/metrics/rank_attribute.py b/processors/metrics/rank_attribute.py index 3c7c03286..8e18bb2bf 100644 --- a/processors/metrics/rank_attribute.py +++ b/processors/metrics/rank_attribute.py @@ -82,9 +82,11 @@ def get_options(cls, parent_dataset=None, config=None): "type": UserInput.OPTION_CHOICE, "options": { "none": "Use column value", - "urls": "URLs", - "hostnames": "Domain names", - "level2-hostnames": "Second-level domain names (e.g. m.youtube.com -> youtube.com)", + "URL-related": { + "urls": "URLs", + "hostnames": "Domain names", + "level2-hostnames": "Second-level domain names (e.g. m.youtube.com -> youtube.com)", + }, "hashtags": "Hashtags (words starting with #)", "emoji": "Emoji (each used emoji in the column is counted individually)", "occurrence": "Values (the number of comma-separated values in the given field)" diff --git a/webtool/templates/components/datasource-option.html b/webtool/templates/components/datasource-option.html index 867a339c8..cd8694fec 100644 --- a/webtool/templates/components/datasource-option.html +++ b/webtool/templates/components/datasource-option.html @@ -12,8 +12,13 @@

{{ settings.help }}

    {# always include an empty item #} {% set empty_item = settings.options|propmap("default") %} - {% do settings.default.append(empty_item) %} - {% for item in settings.default %} + {% if settings['default'] is mapping %} + {% set current_value = settings.default.values()|list %} + {% else %} + {% set current_value = settings.default %} + {% endif %} + {% do current_value.append(empty_item) %} + {% for item in current_value %} {% set outerloop = loop %} {% set last_index = outerloop.index %}
  1. diff --git a/webtool/templates/components/processor-option.html b/webtool/templates/components/processor-option.html index 1fbdd728e..dbe92ab27 100644 --- a/webtool/templates/components/processor-option.html +++ b/webtool/templates/components/processor-option.html @@ -22,7 +22,16 @@ {% elif option_settings.type == "choice" %} {% elif option_settings.type in ("multi", "annotations") %} diff --git a/webtool/templates/controlpanel/llm-server.html b/webtool/templates/controlpanel/llm-server.html index af9ffe689..691df0d42 100644 --- a/webtool/templates/controlpanel/llm-server.html +++ b/webtool/templates/controlpanel/llm-server.html @@ -35,7 +35,7 @@

    LLM Providers

    {% else %} - {% for provider in providers %} + {% for provider_id, provider in providers.items() %}
{{ provider.type }} {{ provider.name }} @@ -91,7 +91,7 @@

{% endif %}

- {{ model.provider_type }}/{{ model.provider|hostname }}
+ {{ providers[model.provider]['type'] }}/{{ providers[model.provider].url|hostname }}
{{ model.local_id }}
@@ -111,7 +111,7 @@

{% endif %} - {% if model.provider_type == "ollama" %} + {% if providers[model.provider].type == "ollama" %}
@@ -140,7 +140,9 @@

{# Pull a new model, if an ollama server is configured #} - {% if providers|selectattr("type", "equalto", "ollama")|list %} + {% set can_add_models = False %} + {% for provider_id, provider in providers.items() %}{% if provider.type == "ollama" %}{% set can_add_models = True %}{% endif %}{% endfor %} + {% if can_add_models %}

Install new LLMs

Enter a model name (e.g. llama3:8b) to make it available via the configured provider. For diff --git a/webtool/views/views_llm.py b/webtool/views/views_llm.py index 8a70ad910..2c02e603c 100644 --- a/webtool/views/views_llm.py +++ b/webtool/views/views_llm.py @@ -76,7 +76,7 @@ def llm_panel(): # --- GET: render panel --- - for i, provider in enumerate(providers): + for provider_id, provider in providers.items(): client = LLMProviderClient.get_client(g.config, provider) if provider_status := client.get_status(): @@ -84,7 +84,7 @@ def llm_panel(): else: server_status = "unreachable" - providers[i]["status"] = server_status + providers[provider_id]["status"] = server_status available_models = g.config.get("llm.available_models", {}) or {} enabled_models = list(g.config.get("llm.enabled_models", []) or []) From fe5e8f10b0fd7447f11fa9fade0393fa902797d5 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Mon, 1 Jun 2026 18:05:00 +0200 Subject: [PATCH 31/44] Further cleanup --- common/lib/user_input.py | 8 +++++--- processors/machine_learning/llm_prompter.py | 14 ++++++-------- processors/machine_learning/prompt_compass.py | 14 ++------------ 3 files changed, 13 insertions(+), 23 deletions(-) diff --git a/common/lib/user_input.py b/common/lib/user_input.py index ec0798b3e..df0d87c4a 100644 --- a/common/lib/user_input.py +++ b/common/lib/user_input.py @@ -1,10 +1,10 @@ -from attr.validators import is_callable from dateutil.parser import parse as parse_datetime from common.lib.exceptions import QueryParametersException from werkzeug.datastructures import ImmutableMultiDict import json import re +from itertools import chain class RequirementsNotMetException(Exception): """ @@ -435,9 +435,11 @@ def parse_value(settings, choice, other_input=None, silently_correct=True): # select box # one out of multiple options # return option if valid, or default - if choice not in settings.get("options"): + options = settings.get("options", []) + match_options = chain(*[list(o.keys()) for o in options.values()]) if type(options) is dict else options + if choice not in match_options: if not silently_correct: - raise QueryParametersException(f"Invalid value selected; must be one of {', '.join(settings.get('options', {}).keys())}. {settings}") + raise QueryParametersException(f"Invalid value selected; must be one of {', '.join(match_options)}.") else: return settings.get("default", "") else: diff --git a/processors/machine_learning/llm_prompter.py b/processors/machine_learning/llm_prompter.py index 3d6633919..b5cb94e78 100644 --- a/processors/machine_learning/llm_prompter.py +++ b/processors/machine_learning/llm_prompter.py @@ -379,15 +379,13 @@ def process(self): return self.dataset.finish_with_error(f"Model {chosen_model_id} not supported") model = available_models[chosen_model_id] + provider = self.config.get("llm.providers").get(model["provider"]) - if model["provider_type"] == "api" and not api_key: - return self.dataset.finish_with_error(f"No API key provided for model {chosen_model_id}") - - available_providers = {p["url"]: p for p in self.config.get("llm.providers")} - if model["provider"] not in available_providers: - return self.dataset.finish_with_error(f"Model provider {model['provider']} unknown") + if not provider: + return self.dataset.finish_with_error(f"Model provider for {chosen_model_id} not currently available.") - provider = available_providers[model["provider"]] + if provider["type"] == "api" and not api_key: + return self.dataset.finish_with_error(f"No API key provided for model {chosen_model_id}") # Prompt validation base_prompt = self.parameters.get("prompt", "") @@ -434,7 +432,7 @@ def process(self): # Start LLM self.dataset.update_status("Connecting to LLM provider") base_url_str = "" if not provider["url"] else f" at base URL '{provider['url']}'" - self.dataset.log(f"Using LLM provider '{model['provider_type'] if provider['url'] else provider['provider']}' with model '{model['local_id']}'{base_url_str}") + self.dataset.log(f"Using LLM provider '{provider['_id']}' with model '{model['local_id']}'{base_url_str}") try: llm = LLMAdapter( config=self.config, diff --git a/processors/machine_learning/prompt_compass.py b/processors/machine_learning/prompt_compass.py index 7e4343bf7..56fec81b2 100644 --- a/processors/machine_learning/prompt_compass.py +++ b/processors/machine_learning/prompt_compass.py @@ -211,24 +211,14 @@ def get_processor_pipeline(self): if short_name: self.dataset.update_label(f"PromptCompass ({short_name})") - chosen_model = "/".join(self.parameters.get("model").split("/")[1:]) - models = self.get_available_models(self.config) - if chosen_model not in models: + if self.parameters.get("model") not in config.get("llm.enabled_models", []): return self.dataset.finish_with_error(f"Model {self.parameters['model']} is not available, halting processor.") - model = models[chosen_model] - pipeline = [ { "type": "llm-prompter", "parameters": { - "api_or_local": "local" if model["provider"] == "local" else "api", - "api_model": chosen_model if model["provider"] != "local" else "", - "api_key": self.parameters.get("api_key"), - "api_custom_model_provider": "", - "local_provider": self.config.get("llm.provider_type"), - "local_base_url": self.config.get("llm.server"), - "ollama_model": chosen_model if model["provider"] == "local" else "", + "model": self.parameters.get("model"), "prompt": self.parameters[self.parameters["task"]], "structured_output": False, "temperature": self.parameters["temperature"], From bd1fd4594d6a428cac2a6c046f2382c277e69222 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 2 Jun 2026 11:13:08 +0200 Subject: [PATCH 32/44] Fix INPUT_CHOICE checking if categorised --- common/lib/user_input.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/common/lib/user_input.py b/common/lib/user_input.py index df0d87c4a..a6fe10458 100644 --- a/common/lib/user_input.py +++ b/common/lib/user_input.py @@ -72,8 +72,6 @@ def parse_all(options, input, silently_correct=True): if type(input) is not dict and type(input) is not ImmutableMultiDict: raise TypeError("input must be a dictionary or ImmutableMultiDict") - print(input) - if type(input) is ImmutableMultiDict: # we are not using to_dict, because that messes up multi-selects input = {key: input.getlist(key) for key in input} @@ -436,7 +434,12 @@ def parse_value(settings, choice, other_input=None, silently_correct=True): # one out of multiple options # return option if valid, or default options = settings.get("options", []) - match_options = chain(*[list(o.keys()) for o in options.values()]) if type(options) is dict else options + + # if we have a categorised set of options, look deeper to get + # valid option values + is_categorised = all([type(o) is dict for o in options.values()]) + match_options = chain(*[list(o.keys()) for o in options.values()]) if is_categorised else options + if choice not in match_options: if not silently_correct: raise QueryParametersException(f"Invalid value selected; must be one of {', '.join(match_options)}.") From a6a8f11189f2ef447f5ab08040a31114f200ae26 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 2 Jun 2026 11:37:18 +0200 Subject: [PATCH 33/44] Migrate script --- helper-scripts/migrate/migrate-1.54-1.55.py | 80 +++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 helper-scripts/migrate/migrate-1.54-1.55.py diff --git a/helper-scripts/migrate/migrate-1.54-1.55.py b/helper-scripts/migrate/migrate-1.54-1.55.py new file mode 100644 index 000000000..7a5074774 --- /dev/null +++ b/helper-scripts/migrate/migrate-1.54-1.55.py @@ -0,0 +1,80 @@ +import json +import sys +import os + +from pathlib import Path + +sys.path.insert(0, os.path.join(os.path.abspath(os.path.dirname(__file__)), "../..")) +from common.lib.database import Database +from common.lib.logger import Logger + +import configparser # noqa: E402 + +log = Logger(output=True) +ini = configparser.ConfigParser() +ini.read(Path(__file__).parent.parent.parent.resolve().joinpath("config/config.ini")) +db_config = ini["DATABASE"] + +db = Database( + logger=log, + dbname=db_config["db_name"], + user=db_config["db_user"], + password=db_config["db_password"], + host=db_config["db_host"], + port=db_config["db_port"], + appname="4cat-migrate", +) + +# the separate LLM server settings were consolidated into one overarching 'llm.providers' setting +print(" Checking if llm.providers setting exists...") +has_setting = db.fetchone( + "SELECT COUNT(*) AS num FROM settings WHERE name = 'llm.providers'" +) + +if has_setting["num"] > 0: + print(" ...exists, deleting old settings without overwriting") +else: + print(" ...does not exist, filling with currently configured proviers") + provider_type = db.fetchone("SELECT value FROM settings WHERE name = 'llm.provider_type'") + providers = {} + if not provider_type: + print(" ...no provider currently configured") + else: + url = db.fetchone("SELECT value FROM settings WHERE name = 'llm.server'") + host = url.split("/")[2] if "://" in url else "localhost" + auth_header = db.fetchone("SELECT value FROM settings WHERE name = 'llm.auth_type'") + auth_key = db.fetchone("SELECT value FROM settings WHERE name = 'llm.auth_key'") + provider_name = db.fetchone("SELECT value FROM settings WHERE name = 'llm.host_name'") + provider_id = f"{provider_type}-{host}" + + # vLLM and LM Studio are both openai-like + provider_type = {"ollama": "ollama"}.get(provider_type, "openai-like") + providers[provider_id] = { + "name": provider_name, + "type": provider_type, + "url": url, + "auth_header": auth_header, + "auth_key": auth_key, + "_id": provider_id + } + + # add API models, always present + providers["thirdparty-models"] = { + "name": "Third-party models", + "type": "api", + "url": "", + "auth_header": "", + "auth_key": "", + "_id": "thirdparty-models" + } + + db.insert("settings", {"name": "llm.providers", "value": json.dumps(providers)}) + print(f" ...added {len(providers)} providers") + +print(" Cleaning up old settings") +db.execute("DELETE FROM settings WHERE name LIKE 'llm.%' AND name NOT IN ('llm.providers', 'llm.available_models', 'llm.access')") + +print(" Removing all known models (will be re-indexed on 4CAT restart)") +db.upsert("settings", {"name": "llm.available_models", "value": "{}"}) + +print(" - done!") From 1863e7288b665b214c75990dad54daa6ced38ac9 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 2 Jun 2026 11:39:04 +0200 Subject: [PATCH 34/44] PromptCompass is BACK --- processors/machine_learning/prompt_compass.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/processors/machine_learning/prompt_compass.py b/processors/machine_learning/prompt_compass.py index 56fec81b2..3fbc3adfa 100644 --- a/processors/machine_learning/prompt_compass.py +++ b/processors/machine_learning/prompt_compass.py @@ -13,7 +13,8 @@ import json -class PromptCompassRunner(): + +class PromptCompassRunner(ProcessorPreset): """ Run processor pipeline to feed prompts to LLM Prompter """ @@ -25,8 +26,8 @@ class PromptCompassRunner(): extension = "ndjson" references = [ - "This processor is an implementation of the stand-alone tool [PromptCompass](https://github.com/ErikBorra/PromptCompass) by Erik Borra.", - "See the processor options for references to the sources of each prompt in the library." + "This processor is an implementation of the stand-alone tool [PromptCompass](https://github.com/ErikBorra/PromptCompass) by Erik Borra.", + "See the processor options for references to the sources of each prompt in the library." ] @staticmethod @@ -39,7 +40,7 @@ def get_prompt_library(config): prompt_library_file = config.get("PATH_ROOT").joinpath("common/assets/prompt_library.json") if not prompt_library_file.exists(): return [] - + with prompt_library_file.open(encoding="utf-8") as infile: prompt_library = json.load(infile) @@ -149,7 +150,7 @@ def get_options(cls, parent_dataset=None, config=None): }) for i, task in enumerate(prompt_library): - task_key = f"task-{i+1}" + task_key = f"task-{i + 1}" options[task_key] = { "type": UserInput.OPTION_TEXT_LARGE, "requires": f"task=={task_key}", @@ -212,7 +213,8 @@ def get_processor_pipeline(self): self.dataset.update_label(f"PromptCompass ({short_name})") if self.parameters.get("model") not in config.get("llm.enabled_models", []): - return self.dataset.finish_with_error(f"Model {self.parameters['model']} is not available, halting processor.") + return self.dataset.finish_with_error( + f"Model {self.parameters['model']} is not available, halting processor.") pipeline = [ { @@ -234,7 +236,6 @@ def get_processor_pipeline(self): return pipeline - @staticmethod def validate_query(query, request, config): """ @@ -268,4 +269,4 @@ def map_item(item): :param item: :return: """ - return LLMPrompter.map_item(item) \ No newline at end of file + return LLMPrompter.map_item(item) From e7d0e42eb7306b0bd97de62bfa98584d2508798d Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 2 Jun 2026 11:41:17 +0200 Subject: [PATCH 35/44] Fix erroneous config reference --- processors/machine_learning/prompt_compass.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/processors/machine_learning/prompt_compass.py b/processors/machine_learning/prompt_compass.py index 3fbc3adfa..8d1d75359 100644 --- a/processors/machine_learning/prompt_compass.py +++ b/processors/machine_learning/prompt_compass.py @@ -35,6 +35,7 @@ def get_prompt_library(config): """ Get prompt library from file + :param config: Config reader :return list: List of prompts and metadata """ prompt_library_file = config.get("PATH_ROOT").joinpath("common/assets/prompt_library.json") @@ -212,7 +213,7 @@ def get_processor_pipeline(self): if short_name: self.dataset.update_label(f"PromptCompass ({short_name})") - if self.parameters.get("model") not in config.get("llm.enabled_models", []): + if self.parameters.get("model") not in self.config.get("llm.enabled_models", []): return self.dataset.finish_with_error( f"Model {self.parameters['model']} is not available, halting processor.") From 9eee696bf1c6b5de0317a509f385c9f8b8c09140 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 2 Jun 2026 11:47:54 +0200 Subject: [PATCH 36/44] Guard against forbidden model selection --- processors/machine_learning/llm_prompter.py | 6 ++++++ processors/machine_learning/prompt_compass.py | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/processors/machine_learning/llm_prompter.py b/processors/machine_learning/llm_prompter.py index b5cb94e78..088fa6e75 100644 --- a/processors/machine_learning/llm_prompter.py +++ b/processors/machine_learning/llm_prompter.py @@ -5,6 +5,8 @@ import re import time import json +from itertools import chain + import jsonschema import requests @@ -1105,6 +1107,10 @@ def validate_query(query, request, config): if is_external_api and not query.get("api_key"): raise QueryParametersException("You need to enter an API key when using third-party models.") + allowed_models = LLMPrompter.get_model_library(config) + if query["model"] not in chain(*[v.values() for v in allowed_models.values()]): + raise QueryParametersException(f"The '{query['model']}' model is not currently available.") + # For media archive datasets, use_media won't be present in the query is_media_archive = "use_media" not in query diff --git a/processors/machine_learning/prompt_compass.py b/processors/machine_learning/prompt_compass.py index 8d1d75359..00fad8cf2 100644 --- a/processors/machine_learning/prompt_compass.py +++ b/processors/machine_learning/prompt_compass.py @@ -249,6 +249,10 @@ def validate_query(query, request, config): :param config: :return: """ + allowed_models = LLMPrompter.get_model_library(config) + if query["model"] not in chain(*[v.values() for v in allowed_models.values()]): + raise QueryParametersException(f"The '{query['model']}' model is not currently available.") + if not query["model"].startswith("local") and not query.get("api_key"): raise QueryParametersException("You need to enter an API key when using third-party models.") From b1da7b5a1b48964a28d9666300a0d83a7767dc3d Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 2 Jun 2026 11:47:58 +0200 Subject: [PATCH 37/44] Bump version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 006ffd9f6..94144784e 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ -1.54 +1.55 This file should not be modified. It is used by 4CAT to determine whether it needs to run migration scripts to e.g. update the database structure to a more From 3d725eacffce9a8ff688373beea5d6184fc400be Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 2 Jun 2026 11:49:45 +0200 Subject: [PATCH 38/44] Fix default value for llm.providers --- common/lib/config_definition.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/common/lib/config_definition.py b/common/lib/config_definition.py index 0c06d0203..88c015dac 100644 --- a/common/lib/config_definition.py +++ b/common/lib/config_definition.py @@ -584,15 +584,15 @@ }, "llm.providers": { "type": UserInput.OPTION_MULTI_OPTION, - "default": [ - { + "default": { + "thirdparty-models": { "name": "Third-party APIs (OpenAI, Google, Claude, Mistral, etc)", "type": "api", "url": "", "auth_header": "", "auth_key": "" } - ], + }, "global": True, "help": "LLM providers", "dict_key": lambda v: re.sub(r"[^0-9a-zA-Z ]", "", v["name"]).lower().replace(" ", "-") + (("-" + v["url"].split("/")[2].lower()) if "://" in v["url"] else ""), From 41c701dec9f1513b0efbd2957786db3e6a99a0e7 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 2 Jun 2026 11:50:54 +0200 Subject: [PATCH 39/44] sdadasdasdads --- processors/machine_learning/prompt_compass.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/processors/machine_learning/prompt_compass.py b/processors/machine_learning/prompt_compass.py index 00fad8cf2..17f7326d8 100644 --- a/processors/machine_learning/prompt_compass.py +++ b/processors/machine_learning/prompt_compass.py @@ -1,6 +1,9 @@ """ Use a prompt from a preset list """ +import json +from itertools import chain + from backend.lib.preset import ProcessorPreset from common.lib.helpers import UserInput @@ -11,9 +14,6 @@ from processors.machine_learning.llm_prompter import LLMPrompter -import json - - class PromptCompassRunner(ProcessorPreset): """ Run processor pipeline to feed prompts to LLM Prompter From d46821acbfef87f2a955d6375306e946f0a6bb61 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 2 Jun 2026 11:54:00 +0200 Subject: [PATCH 40/44] Update description of llm.access setting --- common/lib/config_definition.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/common/lib/config_definition.py b/common/lib/config_definition.py index 88c015dac..0635a4c97 100644 --- a/common/lib/config_definition.py +++ b/common/lib/config_definition.py @@ -653,9 +653,10 @@ }, "llm.access": { "type": UserInput.OPTION_TOGGLE, - "help": "LLM Access", + "help": "Local LLM Access", "default": False, - "tooltip": "Use tags or individual users to allow access to the LLM server (or set True in global for all).", + "tooltip": "If disabled, can only use LLMs from the 'Third-party models' provider. Can be configured per user " + "or tag.", }, # TODO: add setting to restrict models per user/group? From 1a147e8c0ee0cab89f89955599776152bf6b33ac Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 2 Jun 2026 12:25:53 +0200 Subject: [PATCH 41/44] Add filename and line no to test error output --- tests/test_modules.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/test_modules.py b/tests/test_modules.py index 87ffc06ad..3b9a82579 100644 --- a/tests/test_modules.py +++ b/tests/test_modules.py @@ -1,3 +1,4 @@ +import traceback import pytest import time import json @@ -218,7 +219,8 @@ def test_processors(logger, fourcat_modules, mock_job, mock_job_queue, mock_data processor_class.get_options(parent_dataset=mock_dataset, config=mock_basic_config) except Exception as e: # Log the failure and add it to the failures list - logger.error(f"Processor {processor_name} failed in get_options: {e}") + trace = traceback.TracebackException.from_exception(e).stack[-1] + logger.error(f"Processor {processor_name} failed in get_options: {e} (in {trace.filename.split('/')[-1]}:{trace.lineno})") failures.append((processor_name, str(e))) # Check if processor Class has "options" attribute @@ -230,11 +232,13 @@ def test_processors(logger, fourcat_modules, mock_job, mock_job_queue, mock_data try: processor_class(logger, job=mock_job, queue=mock_job_queue, manager=None, modules=fourcat_modules) except Exception as e: - logger.error(f"Processor {processor_name} failed in process(): {e}") + trace = traceback.TracebackException.from_exception(e).stack[-1] + logger.error(f"Processor {processor_name} failed in process(): {e} (in {trace.filename.split('/')[-1]}:{trace.lineno})") failures.append((processor_name, str(e))) except Exception as e: - logger.error(f"Processor {processor_name} failed while setting up: {e}") + trace = traceback.TracebackException.from_exception(e).stack[-1] + logger.error(f"Processor {processor_name} failed while setting up: {e} (in {trace.filename.split('/')[-1]}:{trace.lineno})") failures.append((processor_name, str(e))) From fc91a4435bf6975cb507934f7732947f5cbb3833 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 2 Jun 2026 12:27:32 +0200 Subject: [PATCH 42/44] Fix init issues in LLM processors --- processors/machine_learning/llm_prompter.py | 2 +- processors/machine_learning/prompt_compass.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/processors/machine_learning/llm_prompter.py b/processors/machine_learning/llm_prompter.py index 088fa6e75..5b4080e76 100644 --- a/processors/machine_learning/llm_prompter.py +++ b/processors/machine_learning/llm_prompter.py @@ -73,7 +73,7 @@ def get_queue_id(cls, remote_id, details, dataset) -> str: @classmethod def get_model_library(cls, config): - available_models = config.get("llm.available_models", []) + available_models = config.get("llm.available_models", {}) enabled_model_ids = config.get("llm.enabled_models", []) providers = config.get("llm.providers", {}) if not config.get("llm.access"): diff --git a/processors/machine_learning/prompt_compass.py b/processors/machine_learning/prompt_compass.py index 17f7326d8..4f4c7d0d8 100644 --- a/processors/machine_learning/prompt_compass.py +++ b/processors/machine_learning/prompt_compass.py @@ -88,7 +88,7 @@ def get_options(cls, parent_dataset=None, config=None): :return: """ prompt_library = cls.get_prompt_library(config) - available_models = config.get("llm.available_models", []) + available_models = config.get("llm.available_models", {}) enabled_model_ids = config.get("llm.enabled_models", []) if not config.get("llm.access"): enabled_model_ids = [_ for _ in enabled_model_ids if _.startswith("api-")] @@ -101,7 +101,7 @@ def get_options(cls, parent_dataset=None, config=None): "help": "Model to use", "tooltip": "Third-party models require an API key to run.", "options": LLMPrompter.get_model_library(config), - "default": sorted(list(enabled_models.keys()), key=lambda k: not k.startswith("api"))[-1] + "default": sorted(list(enabled_models.keys()), key=lambda k: not k.startswith("api"))[-1] if enabled_models else "" }, } From 8c4b34a1f94521208d1f6066671fde2df88e276a Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 2 Jun 2026 12:38:23 +0200 Subject: [PATCH 43/44] Fix "add model" panel show/hide on LLM page --- webtool/templates/controlpanel/llm-server.html | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/webtool/templates/controlpanel/llm-server.html b/webtool/templates/controlpanel/llm-server.html index 691df0d42..04e29c80a 100644 --- a/webtool/templates/controlpanel/llm-server.html +++ b/webtool/templates/controlpanel/llm-server.html @@ -140,9 +140,7 @@

{# Pull a new model, if an ollama server is configured #} - {% set can_add_models = False %} - {% for provider_id, provider in providers.items() %}{% if provider.type == "ollama" %}{% set can_add_models = True %}{% endif %}{% endfor %} - {% if can_add_models %} + {% if providers.values()|selectattr("type", "equalto", "ollama")|list %}

Install new LLMs

Enter a model name (e.g. llama3:8b) to make it available via the configured provider. For From 46fee9fc8e4c6d854a79baac369f32108ff86823 Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Tue, 2 Jun 2026 12:41:37 +0200 Subject: [PATCH 44/44] the --- webtool/templates/controlpanel/llm-server.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/webtool/templates/controlpanel/llm-server.html b/webtool/templates/controlpanel/llm-server.html index 04e29c80a..3dc5b5034 100644 --- a/webtool/templates/controlpanel/llm-server.html +++ b/webtool/templates/controlpanel/llm-server.html @@ -144,7 +144,7 @@

Install new LLMs

Enter a model name (e.g. llama3:8b) to make it available via the configured provider. For - Ollama, model names can be found in + Ollama, model names can be found in the model library.

Pulling large models may take several minutes; the job runs in the background. Note that 4CAT cannot install models for all LLM providers; if your provider is not listed below, it may not be able to add additional @@ -166,7 +166,7 @@

Install new LLMs

- +