-
Notifications
You must be signed in to change notification settings - Fork 106
feat(backend): add authentication support to OpenAIHTTPBackend #491
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,49 @@ | ||
| # Python | ||
| __pycache__/ | ||
| *.py[cod] | ||
| *$py.class | ||
| *.so | ||
| .Python | ||
| build/ | ||
| develop-eggs/ | ||
| dist/ | ||
| downloads/ | ||
| eggs/ | ||
| .eggs/ | ||
| lib/ | ||
| lib64/ | ||
| parts/ | ||
| sdist/ | ||
| var/ | ||
| wheels/ | ||
| *.egg-info/ | ||
| .installed.cfg | ||
| *.egg | ||
|
|
||
| # Virtual environments | ||
| .venv | ||
| .venv_temp | ||
| venv/ | ||
| ENV/ | ||
| env/ | ||
|
|
||
| # IDEs | ||
| .vscode/ | ||
| .idea/ | ||
| *.swp | ||
| *.swo | ||
| *~ | ||
|
|
||
| # Git | ||
| .git/ | ||
| .gitignore | ||
|
|
||
| # Dataset cache (will be copied separately) | ||
| dataset_cache/ | ||
|
|
||
| # Logs | ||
| *.log | ||
|
|
||
| # OS | ||
| .DS_Store | ||
| Thumbs.db |
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Drop this we have an OCI compliant https://github.com/vllm-project/guidellm/blob/main/Containerfile |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,56 @@ | ||
| # GuideLLM with uv, CUDA 13, and embedded dataset | ||
| FROM nvidia/cuda:13.0.0-base-ubuntu22.04 | ||
|
|
||
| # Configure timezone non-interactively | ||
| ENV DEBIAN_FRONTEND=noninteractive | ||
| ENV TZ=UTC | ||
|
|
||
| # Install system dependencies | ||
| RUN apt-get update && apt-get install -y --no-install-recommends \ | ||
| git \ | ||
| ffmpeg \ | ||
| curl \ | ||
| ca-certificates \ | ||
| software-properties-common \ | ||
| && add-apt-repository ppa:deadsnakes/ppa \ | ||
| && apt-get update \ | ||
| && apt-get install -y --no-install-recommends \ | ||
| python3.12 \ | ||
| python3.12-venv \ | ||
| python3.12-dev \ | ||
| && rm -rf /var/lib/apt/lists/* | ||
|
|
||
| # Install uv | ||
| COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv | ||
|
|
||
| # Create symlink for python command | ||
| RUN ln -s /usr/bin/python3.12 /usr/bin/python | ||
|
|
||
| # Create non-root user | ||
| RUN useradd -m -s /bin/bash guidellm | ||
| USER guidellm | ||
| WORKDIR /home/guidellm | ||
|
|
||
| # Copy source code | ||
| COPY --chown=guidellm:guidellm . /home/guidellm/guidellm-src | ||
|
|
||
| # Install guidellm with uv | ||
| WORKDIR /home/guidellm/guidellm-src | ||
| RUN uv venv --python python3.12 /home/guidellm/.venv && \ | ||
| . /home/guidellm/.venv/bin/activate && \ | ||
| uv pip install --no-cache -e . | ||
|
|
||
| # NOTE: Dataset will be downloaded at runtime to save image size | ||
| # The LibriSpeech dataset (~500MB) would make the image too large | ||
| # HuggingFace will cache it in HF_HOME=/home/guidellm/.cache/huggingface | ||
|
|
||
| # Add venv to PATH | ||
| ENV PATH="/home/guidellm/.venv/bin:$PATH" | ||
| ENV HF_HOME="/home/guidellm/.cache/huggingface" | ||
|
|
||
| # Create results volume | ||
| WORKDIR /home/guidellm | ||
| VOLUME /results | ||
|
|
||
| ENTRYPOINT ["guidellm"] | ||
| CMD ["benchmark", "run"] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -52,6 +52,9 @@ def __init__( | |
| self, | ||
| target: str, | ||
| model: str = "", | ||
| api_key: str | None = None, | ||
| bearer_token: str | None = None, | ||
| headers: dict[str, str] | None = None, | ||
| api_routes: dict[str, str] | None = None, | ||
| response_handlers: dict[str, Any] | None = None, | ||
| timeout: float = 60.0, | ||
|
|
@@ -65,6 +68,9 @@ def __init__( | |
|
|
||
| :param target: Base URL of the OpenAI-compatible server | ||
| :param model: Model identifier for generation requests | ||
| :param api_key: API key for authentication (used as Bearer token) | ||
| :param bearer_token: Bearer token for authentication (alternative to api_key) | ||
| :param headers: Additional headers to include in all requests | ||
| :param api_routes: Custom API endpoint routes mapping | ||
| :param response_handlers: Custom response handlers for different request types | ||
| :param timeout: Request timeout in seconds | ||
|
|
@@ -79,6 +85,29 @@ def __init__( | |
| self.target = target.rstrip("/").removesuffix("/v1") | ||
| self.model = model | ||
|
|
||
| # Build default headers with authentication | ||
| from guidellm.settings import settings | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't late import. This should be in import section at top. |
||
|
|
||
| self._default_headers: dict[str, str] = {} | ||
|
|
||
| # Merge headers from settings first (lowest priority) | ||
| if settings.openai.headers: | ||
| self._default_headers.update(settings.openai.headers) | ||
|
|
||
| # Add explicit headers parameter (medium priority) | ||
| if headers: | ||
| self._default_headers.update(headers) | ||
|
|
||
| # Resolve API key (highest priority): explicit param > settings | ||
| resolved_api_key = api_key or settings.openai.api_key | ||
| resolved_bearer_token = bearer_token or settings.openai.bearer_token | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you actually remove all references to bearer_token. I don't think we need both. |
||
|
|
||
| # Set Authorization header if we have credentials | ||
| if resolved_api_key: | ||
| self._default_headers["Authorization"] = f"Bearer {resolved_api_key}" | ||
| elif resolved_bearer_token: | ||
| self._default_headers["Authorization"] = f"Bearer {resolved_bearer_token}" | ||
|
|
||
| # Store configuration | ||
| self.api_routes = api_routes or { | ||
| "health": "health", | ||
|
|
@@ -184,7 +213,7 @@ async def available_models(self) -> list[str]: | |
| raise RuntimeError("Backend not started up for process.") | ||
|
|
||
| target = f"{self.target}/{self.api_routes['models']}" | ||
| response = await self._async_client.get(target) | ||
| response = await self._async_client.get(target, headers=self._default_headers) | ||
| response.raise_for_status() | ||
|
|
||
| return [item["id"] for item in response.json()["data"]] | ||
|
|
@@ -245,13 +274,19 @@ async def resolve( # type: ignore[override] | |
| request.request_type, handler_overrides=self.response_handlers | ||
| ) | ||
|
|
||
| # Merge default headers with request-specific headers | ||
| merged_headers = { | ||
| **self._default_headers, | ||
| **(request.arguments.headers or {}), | ||
| } | ||
|
|
||
| if not request.arguments.stream: | ||
| request_info.timings.request_start = time.time() | ||
| response = await self._async_client.request( | ||
| request.arguments.method or "POST", | ||
| request_url, | ||
| params=request.arguments.params, | ||
| headers=request.arguments.headers, | ||
| headers=merged_headers, | ||
| json=request_json, | ||
| data=request_data, | ||
| files=request_files, | ||
|
|
@@ -269,7 +304,7 @@ async def resolve( # type: ignore[override] | |
| request.arguments.method or "POST", | ||
| request_url, | ||
| params=request.arguments.params, | ||
| headers=request.arguments.headers, | ||
| headers=merged_headers, | ||
| json=request_json, | ||
| data=request_data, | ||
| files=request_files, | ||
|
|
@@ -331,4 +366,9 @@ def _resolve_validate_kwargs( | |
| if "method" not in validate_kwargs: | ||
| validate_kwargs["method"] = "GET" | ||
|
|
||
| # Include default headers (with auth) in validation request | ||
| if self._default_headers: | ||
| existing_headers = validate_kwargs.get("headers", {}) | ||
| validate_kwargs["headers"] = {**self._default_headers, **existing_headers} | ||
|
|
||
| return validate_kwargs | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you move these changes to a new PR? We have had some internal discussion on making arguments to |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Drop this we have an OCI compliant
.containerignore:https://github.com/vllm-project/guidellm/blob/main/.containerignore