Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions .github/scripts/cuda-setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/usr/bin/env bash
# Shared CUDA environment setup for GPU CI and bench workflows.
# Finds the CUDA toolkit, detects the GPU's compute capability, and
# exports env vars that candle-kernels/build.rs needs to compile WMMA
# kernels to matching SASS.
#
# Exports (via $GITHUB_ENV / $GITHUB_PATH):
# PATH — prepends <cuda>/bin
# CUDA_PATH — root of the CUDA installation
# LIBRARY_PATH — <cuda>/lib64
# LD_LIBRARY_PATH — <cuda>/lib64
# INFERRS_WMMA_ARCH — numeric compute cap (e.g. 120)
# CUDA_COMPUTE_CAP — same value, used by bindgen_cuda
set -euo pipefail

# ---------- Find CUDA installation ----------
CUDA_FOUND=""
for d in /usr/local/cuda /opt/cuda /usr; do
if [ -x "$d/bin/nvcc" ]; then
echo "Found CUDA at $d"
echo "$d/bin" >> "$GITHUB_PATH"
echo "CUDA_PATH=$d" >> "$GITHUB_ENV"
echo "LIBRARY_PATH=$d/lib64:${LIBRARY_PATH:-}" >> "$GITHUB_ENV"
echo "LD_LIBRARY_PATH=$d/lib64:${LD_LIBRARY_PATH:-}" >> "$GITHUB_ENV"
CUDA_FOUND="$d"
break
fi
done
if [ -z "$CUDA_FOUND" ]; then
echo "::error::No CUDA installation found (checked /usr/local/cuda, /opt/cuda, /usr)"
exit 1
fi
"$CUDA_FOUND/bin/nvcc" --version

# ---------- Detect GPU compute capability ----------
RAW=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader | head -1 | tr -d '.')
echo "Detected GPU compute capability: sm_${RAW}"
echo "INFERRS_WMMA_ARCH=${RAW}" >> "$GITHUB_ENV"
echo "CUDA_COMPUTE_CAP=${RAW}" >> "$GITHUB_ENV"

# ---------- GPU info ----------
nvidia-smi
nvidia-smi --query-gpu=name,compute_cap,memory.total,driver_version --format=csv

# ---------- Blackwell nvcc version check ----------
# sm_120+ requires CUDA toolkit >= 12.8.
NVCC_VER=$("$CUDA_FOUND/bin/nvcc" --version | grep -o 'release [0-9]*\.[0-9]*' | awk '{print $2}')
echo "nvcc version: ${NVCC_VER}"
if [ "${RAW}" -ge 120 ]; then
MAJOR=$(echo "$NVCC_VER" | cut -d. -f1)
MINOR=$(echo "$NVCC_VER" | cut -d. -f2)
if [ "$MAJOR" -lt 12 ] || { [ "$MAJOR" -eq 12 ] && [ "$MINOR" -lt 8 ]; }; then
echo "::error::sm_${RAW} (Blackwell) requires CUDA >= 12.8 but found ${NVCC_VER}"
exit 1
fi
fi
87 changes: 87 additions & 0 deletions .github/workflows/gpu-bench.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
name: GPU Bench

on:
workflow_dispatch:
inputs:
model:
description: 'HuggingFace model ID or local path on runner'
required: true
prompt_len:
description: 'Number of synthetic prompt tokens'
required: false
default: '128'
runs:
description: 'Number of timed benchmark runs'
required: false
default: '5'
profile:
description: 'Run nsys profiling (true/false)'
required: false
default: 'false'

env:
CARGO_TERM_COLOR: always

jobs:
bench:
name: Benchmark (nvidia-runner-1)
runs-on: [self-hosted, Linux, X64, cuda]
timeout-minutes: 120
steps:
- uses: actions/checkout@v4

- name: Install system dependencies
run: |
dpkg -s pkg-config libssl-dev &>/dev/null || \
{ sudo apt-get update -q && sudo apt-get install -y --no-install-recommends pkg-config libssl-dev; }

- name: Set up CUDA + detect GPU
run: bash .github/scripts/cuda-setup.sh

- uses: dtolnay/rust-toolchain@stable

- uses: Swatinem/rust-cache@v2
with:
key: gpu-bench

- name: Build (release, CUDA backend)
run: cargo build --release -p inferrs -p inferrs-backend-cuda

- name: Run inferrs bench
run: |
cargo run --release -p inferrs -- bench \
--model "${{ inputs.model }}" \
--warmup 1 \
--runs "${{ inputs.runs }}" \
--prompt-len "${{ inputs.prompt_len }}"

- name: Run nsys profile
if: inputs.profile == 'true'
run: |
nsys profile \
--output /tmp/inferrs-bench-trace \
--force-overwrite true \
--trace cuda,nvtx \
--sample none \
./target/release/inferrs bench \
--model "${{ inputs.model }}" \
--warmup 0 \
--runs 1 \
--prompt-len "${{ inputs.prompt_len }}"

- name: Upload nsys trace
if: inputs.profile == 'true'
uses: actions/upload-artifact@v4
with:
name: nsys-trace
path: /tmp/inferrs-bench-trace.*
retention-days: 14

- name: Kernel-level stats (nsys stats)
if: inputs.profile == 'true'
run: |
nsys stats /tmp/inferrs-bench-trace.nsys-rep \
--report gpukernsum \
--format csv \
--output /tmp/inferrs-kernel-stats || true
cat /tmp/inferrs-kernel-stats*.csv 2>/dev/null || echo "nsys stats not available"
29 changes: 4 additions & 25 deletions .github/workflows/gpu-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,39 +20,18 @@ jobs:

- name: Install system dependencies
run: |
sudo apt-get update -q
sudo apt-get install -y --no-install-recommends \
pkg-config libssl-dev
dpkg -s pkg-config libssl-dev &>/dev/null || \
{ sudo apt-get update -q && sudo apt-get install -y --no-install-recommends pkg-config libssl-dev; }

- name: Set up CUDA environment
run: |
# Find the CUDA installation (common paths on self-hosted runners)
for d in /usr/local/cuda /opt/cuda /usr; do
if [ -x "$d/bin/nvcc" ]; then
echo "Found CUDA at $d"
echo "$d/bin" >> "$GITHUB_PATH"
echo "CUDA_PATH=$d" >> "$GITHUB_ENV"
echo "LIBRARY_PATH=$d/lib64:${LIBRARY_PATH:-}" >> "$GITHUB_ENV"
echo "LD_LIBRARY_PATH=$d/lib64:${LD_LIBRARY_PATH:-}" >> "$GITHUB_ENV"
NVCC="$d/bin/nvcc"
break
fi
done
"${NVCC:-nvcc}" --version
- name: Set up CUDA + detect GPU
run: bash .github/scripts/cuda-setup.sh

- uses: dtolnay/rust-toolchain@stable

- uses: Swatinem/rust-cache@v2
with:
key: gpu-ci

- name: Show GPU info
run: nvidia-smi

# Let bindgen_cuda auto-detect the GPU's compute capability via
# nvidia-smi. The WMMA .cu files now compile to no-op stubs on
# SM < 7.0 (via INFERRS_NO_WMMA), so this works on Pascal (6.1)
# as well as Volta+ GPUs.
- name: Build (with CUDA backend)
run: cargo build -p inferrs -p inferrs-backend-cuda

Expand Down
Loading