Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions .dockerignore
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Drop this we have an OCI compliant .containerignore:

https://github.com/vllm-project/guidellm/blob/main/.containerignore

Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg

# Virtual environments
.venv
.venv_temp
venv/
ENV/
env/

# IDEs
.vscode/
.idea/
*.swp
*.swo
*~

# Git
.git/
.gitignore

# Dataset cache (will be copied separately)
dataset_cache/

# Logs
*.log

# OS
.DS_Store
Thumbs.db
56 changes: 56 additions & 0 deletions Dockerfile
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Drop this we have an OCI compliant Containerfile:

https://github.com/vllm-project/guidellm/blob/main/Containerfile

Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# GuideLLM with uv, CUDA 13, and embedded dataset
FROM nvidia/cuda:13.0.0-base-ubuntu22.04

# Configure timezone non-interactively
ENV DEBIAN_FRONTEND=noninteractive
ENV TZ=UTC

# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
git \
ffmpeg \
curl \
ca-certificates \
software-properties-common \
&& add-apt-repository ppa:deadsnakes/ppa \
&& apt-get update \
&& apt-get install -y --no-install-recommends \
python3.12 \
python3.12-venv \
python3.12-dev \
&& rm -rf /var/lib/apt/lists/*

# Install uv
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv

# Create symlink for python command
RUN ln -s /usr/bin/python3.12 /usr/bin/python

# Create non-root user
RUN useradd -m -s /bin/bash guidellm
USER guidellm
WORKDIR /home/guidellm

# Copy source code
COPY --chown=guidellm:guidellm . /home/guidellm/guidellm-src

# Install guidellm with uv
WORKDIR /home/guidellm/guidellm-src
RUN uv venv --python python3.12 /home/guidellm/.venv && \
. /home/guidellm/.venv/bin/activate && \
uv pip install --no-cache -e .

# NOTE: Dataset will be downloaded at runtime to save image size
# The LibriSpeech dataset (~500MB) would make the image too large
# HuggingFace will cache it in HF_HOME=/home/guidellm/.cache/huggingface

# Add venv to PATH
ENV PATH="/home/guidellm/.venv/bin:$PATH"
ENV HF_HOME="/home/guidellm/.cache/huggingface"

# Create results volume
WORKDIR /home/guidellm
VOLUME /results

ENTRYPOINT ["guidellm"]
CMD ["benchmark", "run"]
46 changes: 43 additions & 3 deletions src/guidellm/backends/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ def __init__(
self,
target: str,
model: str = "",
api_key: str | None = None,
bearer_token: str | None = None,
headers: dict[str, str] | None = None,
api_routes: dict[str, str] | None = None,
response_handlers: dict[str, Any] | None = None,
timeout: float = 60.0,
Expand All @@ -65,6 +68,9 @@ def __init__(

:param target: Base URL of the OpenAI-compatible server
:param model: Model identifier for generation requests
:param api_key: API key for authentication (used as Bearer token)
:param bearer_token: Bearer token for authentication (alternative to api_key)
:param headers: Additional headers to include in all requests
:param api_routes: Custom API endpoint routes mapping
:param response_handlers: Custom response handlers for different request types
:param timeout: Request timeout in seconds
Expand All @@ -79,6 +85,29 @@ def __init__(
self.target = target.rstrip("/").removesuffix("/v1")
self.model = model

# Build default headers with authentication
from guidellm.settings import settings
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't late import. This should be in import section at top.


self._default_headers: dict[str, str] = {}

# Merge headers from settings first (lowest priority)
if settings.openai.headers:
self._default_headers.update(settings.openai.headers)

# Add explicit headers parameter (medium priority)
if headers:
self._default_headers.update(headers)

# Resolve API key (highest priority): explicit param > settings
resolved_api_key = api_key or settings.openai.api_key
resolved_bearer_token = bearer_token or settings.openai.bearer_token
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you actually remove all references to bearer_token. I don't think we need both.


# Set Authorization header if we have credentials
if resolved_api_key:
self._default_headers["Authorization"] = f"Bearer {resolved_api_key}"
elif resolved_bearer_token:
self._default_headers["Authorization"] = f"Bearer {resolved_bearer_token}"

# Store configuration
self.api_routes = api_routes or {
"health": "health",
Expand Down Expand Up @@ -184,7 +213,7 @@ async def available_models(self) -> list[str]:
raise RuntimeError("Backend not started up for process.")

target = f"{self.target}/{self.api_routes['models']}"
response = await self._async_client.get(target)
response = await self._async_client.get(target, headers=self._default_headers)
response.raise_for_status()

return [item["id"] for item in response.json()["data"]]
Expand Down Expand Up @@ -245,13 +274,19 @@ async def resolve( # type: ignore[override]
request.request_type, handler_overrides=self.response_handlers
)

# Merge default headers with request-specific headers
merged_headers = {
**self._default_headers,
**(request.arguments.headers or {}),
}

if not request.arguments.stream:
request_info.timings.request_start = time.time()
response = await self._async_client.request(
request.arguments.method or "POST",
request_url,
params=request.arguments.params,
headers=request.arguments.headers,
headers=merged_headers,
json=request_json,
data=request_data,
files=request_files,
Expand All @@ -269,7 +304,7 @@ async def resolve( # type: ignore[override]
request.arguments.method or "POST",
request_url,
params=request.arguments.params,
headers=request.arguments.headers,
headers=merged_headers,
json=request_json,
data=request_data,
files=request_files,
Expand Down Expand Up @@ -331,4 +366,9 @@ def _resolve_validate_kwargs(
if "method" not in validate_kwargs:
validate_kwargs["method"] = "GET"

# Include default headers (with auth) in validation request
if self._default_headers:
existing_headers = validate_kwargs.get("headers", {})
validate_kwargs["headers"] = {**self._default_headers, **existing_headers}

return validate_kwargs
34 changes: 24 additions & 10 deletions src/guidellm/data/deserializers/huggingface.py
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you move these changes to a new PR? We have had some internal discussion on making arguments to --data more explicit.

Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,20 @@ def __call__(

load_error = None

if (
# Handle hf: prefix for explicit HuggingFace Hub datasets
if isinstance(data, str) and data.startswith("hf:"):
hub_id = data[3:] # Remove "hf:" prefix
try:
return load_dataset(hub_id, **data_kwargs)
except (
FileNotFoundDatasetsError,
DatasetNotFoundError,
DataFilesNotFoundError,
) as err:
load_error = err
# Fall through to raise error below

elif (
isinstance(data, str | Path)
and (path := Path(data)).exists()
and ((path.is_file() and path.suffix == ".py") or path.is_dir())
Expand All @@ -71,15 +84,16 @@ def __call__(
) as err2:
load_error = err2

try:
# Handle dataset identifier from the Hugging Face Hub
return load_dataset(str(data), **data_kwargs)
except (
FileNotFoundDatasetsError,
DatasetNotFoundError,
DataFilesNotFoundError,
) as err:
load_error = err
else:
try:
# Handle dataset identifier from the Hugging Face Hub
return load_dataset(str(data), **data_kwargs)
except (
FileNotFoundDatasetsError,
DatasetNotFoundError,
DataFilesNotFoundError,
) as err:
load_error = err

not_supported = DataNotSupportedError(
"Unsupported data for HuggingFaceDatasetDeserializer, "
Expand Down