From 5bc3c55f85c218d45a016788f3385025750c941a Mon Sep 17 00:00:00 2001 From: Chris Date: Mon, 1 Dec 2025 08:48:02 +0100 Subject: [PATCH 1/3] feat(backend): add authentication support to OpenAIHTTPBackend Add api_key, bearer_token, and headers parameters to OpenAIHTTPBackend to enable authentication with OpenAI-compatible servers that require it. Changes: - Add api_key, bearer_token, and headers params to __init__ - Read default values from settings.openai when not explicitly provided - Apply auth headers to backend validation (/health endpoint) - Apply auth headers to available_models() requests - Merge default headers with request-specific headers in resolve() This fixes issues when using GuideLLM with LiteLLM proxy or other OpenAI-compatible servers that require authentication on all endpoints including /health. Usage: # Via environment variables (recommended) export GUIDELLM__OPENAI__API_KEY=sk-xxx guidellm benchmark run --target http://litellm:4000 ... # Via backend kwargs guidellm benchmark run --target http://litellm:4000 \ --backend-kwargs '{"api_key": "sk-xxx"}' ... --- src/guidellm/backends/openai.py | 46 ++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/src/guidellm/backends/openai.py b/src/guidellm/backends/openai.py index 57e2d95a6..d62badf86 100644 --- a/src/guidellm/backends/openai.py +++ b/src/guidellm/backends/openai.py @@ -52,6 +52,9 @@ def __init__( self, target: str, model: str = "", + api_key: str | None = None, + bearer_token: str | None = None, + headers: dict[str, str] | None = None, api_routes: dict[str, str] | None = None, response_handlers: dict[str, Any] | None = None, timeout: float = 60.0, @@ -65,6 +68,9 @@ def __init__( :param target: Base URL of the OpenAI-compatible server :param model: Model identifier for generation requests + :param api_key: API key for authentication (used as Bearer token) + :param bearer_token: Bearer token for authentication (alternative to api_key) + :param headers: Additional headers to include in all requests :param api_routes: Custom API endpoint routes mapping :param response_handlers: Custom response handlers for different request types :param timeout: Request timeout in seconds @@ -79,6 +85,29 @@ def __init__( self.target = target.rstrip("/").removesuffix("/v1") self.model = model + # Build default headers with authentication + from guidellm.settings import settings + + self._default_headers: dict[str, str] = {} + + # Merge headers from settings first (lowest priority) + if settings.openai.headers: + self._default_headers.update(settings.openai.headers) + + # Add explicit headers parameter (medium priority) + if headers: + self._default_headers.update(headers) + + # Resolve API key (highest priority): explicit param > settings + resolved_api_key = api_key or settings.openai.api_key + resolved_bearer_token = bearer_token or settings.openai.bearer_token + + # Set Authorization header if we have credentials + if resolved_api_key: + self._default_headers["Authorization"] = f"Bearer {resolved_api_key}" + elif resolved_bearer_token: + self._default_headers["Authorization"] = f"Bearer {resolved_bearer_token}" + # Store configuration self.api_routes = api_routes or { "health": "health", @@ -184,7 +213,7 @@ async def available_models(self) -> list[str]: raise RuntimeError("Backend not started up for process.") target = f"{self.target}/{self.api_routes['models']}" - response = await self._async_client.get(target) + response = await self._async_client.get(target, headers=self._default_headers) response.raise_for_status() return [item["id"] for item in response.json()["data"]] @@ -245,13 +274,19 @@ async def resolve( # type: ignore[override] request.request_type, handler_overrides=self.response_handlers ) + # Merge default headers with request-specific headers + merged_headers = { + **self._default_headers, + **(request.arguments.headers or {}), + } + if not request.arguments.stream: request_info.timings.request_start = time.time() response = await self._async_client.request( request.arguments.method or "POST", request_url, params=request.arguments.params, - headers=request.arguments.headers, + headers=merged_headers, json=request_json, data=request_data, files=request_files, @@ -269,7 +304,7 @@ async def resolve( # type: ignore[override] request.arguments.method or "POST", request_url, params=request.arguments.params, - headers=request.arguments.headers, + headers=merged_headers, json=request_json, data=request_data, files=request_files, @@ -331,4 +366,9 @@ def _resolve_validate_kwargs( if "method" not in validate_kwargs: validate_kwargs["method"] = "GET" + # Include default headers (with auth) in validation request + if self._default_headers: + existing_headers = validate_kwargs.get("headers", {}) + validate_kwargs["headers"] = {**self._default_headers, **existing_headers} + return validate_kwargs From 300d6691937194bf38646207d5636a0f0506da93 Mon Sep 17 00:00:00 2001 From: Chris Date: Tue, 2 Dec 2025 22:29:33 +0100 Subject: [PATCH 2/3] =?UTF-8?q?feat:=20ajouter=20support=20Docker=20et=20p?= =?UTF-8?q?r=C3=A9fixe=20hf:=20pour=20datasets=20HuggingFace?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 52 +++++++++++++++++++ .../data/deserializers/huggingface.py | 34 ++++++++---- 2 files changed, 76 insertions(+), 10 deletions(-) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..4f3384202 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,52 @@ +# GuideLLM with uv, CUDA 13, and embedded dataset +FROM nvidia/cuda:13.0.0-base-ubuntu22.04 + +# Install system dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + git \ + ffmpeg \ + curl \ + ca-certificates \ + software-properties-common \ + && add-apt-repository ppa:deadsnakes/ppa \ + && apt-get update \ + && apt-get install -y --no-install-recommends \ + python3.12 \ + python3.12-venv \ + python3.12-dev \ + && rm -rf /var/lib/apt/lists/* + +# Install uv +COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv + +# Create symlink for python command +RUN ln -s /usr/bin/python3.12 /usr/bin/python + +# Create non-root user +RUN useradd -m -s /bin/bash guidellm +USER guidellm +WORKDIR /home/guidellm + +# Copy source code +COPY --chown=guidellm:guidellm . /home/guidellm/guidellm-src + +# Install guidellm with uv +WORKDIR /home/guidellm/guidellm-src +RUN uv venv --python python3.12 /home/guidellm/.venv && \ + . /home/guidellm/.venv/bin/activate && \ + uv pip install --no-cache -e . + +# Pre-download LibriSpeech dataset (parquet revision) +RUN . /home/guidellm/.venv/bin/activate && \ + python -c "from datasets import load_dataset; load_dataset('hf://datasets/distil-whisper/librispeech_asr@refs%2Fconvert%2Fparquet', 'clean', split='validation', streaming=False, cache_dir='/home/guidellm/.cache/huggingface')" + +# Add venv to PATH +ENV PATH="/home/guidellm/.venv/bin:$PATH" +ENV HF_HOME="/home/guidellm/.cache/huggingface" + +# Create results volume +WORKDIR /home/guidellm +VOLUME /results + +ENTRYPOINT ["guidellm"] +CMD ["benchmark", "run"] diff --git a/src/guidellm/data/deserializers/huggingface.py b/src/guidellm/data/deserializers/huggingface.py index efe6882a5..95eb3a776 100644 --- a/src/guidellm/data/deserializers/huggingface.py +++ b/src/guidellm/data/deserializers/huggingface.py @@ -46,7 +46,20 @@ def __call__( load_error = None - if ( + # Handle hf: prefix for explicit HuggingFace Hub datasets + if isinstance(data, str) and data.startswith("hf:"): + hub_id = data[3:] # Remove "hf:" prefix + try: + return load_dataset(hub_id, **data_kwargs) + except ( + FileNotFoundDatasetsError, + DatasetNotFoundError, + DataFilesNotFoundError, + ) as err: + load_error = err + # Fall through to raise error below + + elif ( isinstance(data, str | Path) and (path := Path(data)).exists() and ((path.is_file() and path.suffix == ".py") or path.is_dir()) @@ -71,15 +84,16 @@ def __call__( ) as err2: load_error = err2 - try: - # Handle dataset identifier from the Hugging Face Hub - return load_dataset(str(data), **data_kwargs) - except ( - FileNotFoundDatasetsError, - DatasetNotFoundError, - DataFilesNotFoundError, - ) as err: - load_error = err + else: + try: + # Handle dataset identifier from the Hugging Face Hub + return load_dataset(str(data), **data_kwargs) + except ( + FileNotFoundDatasetsError, + DatasetNotFoundError, + DataFilesNotFoundError, + ) as err: + load_error = err not_supported = DataNotSupportedError( "Unsupported data for HuggingFaceDatasetDeserializer, " From 6dfd250c3d6675e89e44789741a01f9348c64dba Mon Sep 17 00:00:00 2001 From: Chris Date: Tue, 2 Dec 2025 23:29:56 +0100 Subject: [PATCH 3/3] =?UTF-8?q?feat(docker):=20ajouter=20.dockerignore=20e?= =?UTF-8?q?t=20optimiser=20Dockerfile=20pour=20r=C3=A9duire=20la=20taille?= =?UTF-8?q?=20d'image?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .dockerignore | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ Dockerfile | 10 +++++++--- 2 files changed, 56 insertions(+), 3 deletions(-) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..2b1cd6ba4 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,49 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Virtual environments +.venv +.venv_temp +venv/ +ENV/ +env/ + +# IDEs +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# Git +.git/ +.gitignore + +# Dataset cache (will be copied separately) +dataset_cache/ + +# Logs +*.log + +# OS +.DS_Store +Thumbs.db diff --git a/Dockerfile b/Dockerfile index 4f3384202..b68dc53d5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,10 @@ # GuideLLM with uv, CUDA 13, and embedded dataset FROM nvidia/cuda:13.0.0-base-ubuntu22.04 +# Configure timezone non-interactively +ENV DEBIAN_FRONTEND=noninteractive +ENV TZ=UTC + # Install system dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ git \ @@ -36,9 +40,9 @@ RUN uv venv --python python3.12 /home/guidellm/.venv && \ . /home/guidellm/.venv/bin/activate && \ uv pip install --no-cache -e . -# Pre-download LibriSpeech dataset (parquet revision) -RUN . /home/guidellm/.venv/bin/activate && \ - python -c "from datasets import load_dataset; load_dataset('hf://datasets/distil-whisper/librispeech_asr@refs%2Fconvert%2Fparquet', 'clean', split='validation', streaming=False, cache_dir='/home/guidellm/.cache/huggingface')" +# NOTE: Dataset will be downloaded at runtime to save image size +# The LibriSpeech dataset (~500MB) would make the image too large +# HuggingFace will cache it in HF_HOME=/home/guidellm/.cache/huggingface # Add venv to PATH ENV PATH="/home/guidellm/.venv/bin:$PATH"