diff --git a/.github/scripts/cuda-setup.sh b/.github/scripts/cuda-setup.sh new file mode 100644 index 00000000..c1722f73 --- /dev/null +++ b/.github/scripts/cuda-setup.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +# Shared CUDA environment setup for GPU CI and bench workflows. +# Finds the CUDA toolkit, detects the GPU's compute capability, and +# exports env vars that candle-kernels/build.rs needs to compile WMMA +# kernels to matching SASS. +# +# Exports (via $GITHUB_ENV / $GITHUB_PATH): +# PATH — prepends /bin +# CUDA_PATH — root of the CUDA installation +# LIBRARY_PATH — /lib64 +# LD_LIBRARY_PATH — /lib64 +# INFERRS_WMMA_ARCH — numeric compute cap (e.g. 120) +# CUDA_COMPUTE_CAP — same value, used by bindgen_cuda +set -euo pipefail + +# ---------- Find CUDA installation ---------- +CUDA_FOUND="" +for d in /usr/local/cuda /opt/cuda /usr; do + if [ -x "$d/bin/nvcc" ]; then + echo "Found CUDA at $d" + echo "$d/bin" >> "$GITHUB_PATH" + echo "CUDA_PATH=$d" >> "$GITHUB_ENV" + echo "LIBRARY_PATH=$d/lib64:${LIBRARY_PATH:-}" >> "$GITHUB_ENV" + echo "LD_LIBRARY_PATH=$d/lib64:${LD_LIBRARY_PATH:-}" >> "$GITHUB_ENV" + CUDA_FOUND="$d" + break + fi +done +if [ -z "$CUDA_FOUND" ]; then + echo "::error::No CUDA installation found (checked /usr/local/cuda, /opt/cuda, /usr)" + exit 1 +fi +"$CUDA_FOUND/bin/nvcc" --version + +# ---------- Detect GPU compute capability ---------- +RAW=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader | head -1 | tr -d '.') +echo "Detected GPU compute capability: sm_${RAW}" +echo "INFERRS_WMMA_ARCH=${RAW}" >> "$GITHUB_ENV" +echo "CUDA_COMPUTE_CAP=${RAW}" >> "$GITHUB_ENV" + +# ---------- GPU info ---------- +nvidia-smi +nvidia-smi --query-gpu=name,compute_cap,memory.total,driver_version --format=csv + +# ---------- Blackwell nvcc version check ---------- +# sm_120+ requires CUDA toolkit >= 12.8. +NVCC_VER=$("$CUDA_FOUND/bin/nvcc" --version | grep -o 'release [0-9]*\.[0-9]*' | awk '{print $2}') +echo "nvcc version: ${NVCC_VER}" +if [ "${RAW}" -ge 120 ]; then + MAJOR=$(echo "$NVCC_VER" | cut -d. -f1) + MINOR=$(echo "$NVCC_VER" | cut -d. -f2) + if [ "$MAJOR" -lt 12 ] || { [ "$MAJOR" -eq 12 ] && [ "$MINOR" -lt 8 ]; }; then + echo "::error::sm_${RAW} (Blackwell) requires CUDA >= 12.8 but found ${NVCC_VER}" + exit 1 + fi +fi diff --git a/.github/workflows/gpu-bench.yml b/.github/workflows/gpu-bench.yml new file mode 100644 index 00000000..68a83475 --- /dev/null +++ b/.github/workflows/gpu-bench.yml @@ -0,0 +1,87 @@ +name: GPU Bench + +on: + workflow_dispatch: + inputs: + model: + description: 'HuggingFace model ID or local path on runner' + required: true + prompt_len: + description: 'Number of synthetic prompt tokens' + required: false + default: '128' + runs: + description: 'Number of timed benchmark runs' + required: false + default: '5' + profile: + description: 'Run nsys profiling (true/false)' + required: false + default: 'false' + +env: + CARGO_TERM_COLOR: always + +jobs: + bench: + name: Benchmark (nvidia-runner-1) + runs-on: [self-hosted, Linux, X64, cuda] + timeout-minutes: 120 + steps: + - uses: actions/checkout@v4 + + - name: Install system dependencies + run: | + dpkg -s pkg-config libssl-dev &>/dev/null || \ + { sudo apt-get update -q && sudo apt-get install -y --no-install-recommends pkg-config libssl-dev; } + + - name: Set up CUDA + detect GPU + run: bash .github/scripts/cuda-setup.sh + + - uses: dtolnay/rust-toolchain@stable + + - uses: Swatinem/rust-cache@v2 + with: + key: gpu-bench + + - name: Build (release, CUDA backend) + run: cargo build --release -p inferrs -p inferrs-backend-cuda + + - name: Run inferrs bench + run: | + cargo run --release -p inferrs -- bench \ + --model "${{ inputs.model }}" \ + --warmup 1 \ + --runs "${{ inputs.runs }}" \ + --prompt-len "${{ inputs.prompt_len }}" + + - name: Run nsys profile + if: inputs.profile == 'true' + run: | + nsys profile \ + --output /tmp/inferrs-bench-trace \ + --force-overwrite true \ + --trace cuda,nvtx \ + --sample none \ + ./target/release/inferrs bench \ + --model "${{ inputs.model }}" \ + --warmup 0 \ + --runs 1 \ + --prompt-len "${{ inputs.prompt_len }}" + + - name: Upload nsys trace + if: inputs.profile == 'true' + uses: actions/upload-artifact@v4 + with: + name: nsys-trace + path: /tmp/inferrs-bench-trace.* + retention-days: 14 + + - name: Kernel-level stats (nsys stats) + if: inputs.profile == 'true' + run: | + nsys stats /tmp/inferrs-bench-trace.nsys-rep \ + --report gpukernsum \ + --format csv \ + --output /tmp/inferrs-kernel-stats || true + cat /tmp/inferrs-kernel-stats*.csv 2>/dev/null || echo "nsys stats not available" diff --git a/.github/workflows/gpu-ci.yml b/.github/workflows/gpu-ci.yml index 6e563a12..de896a8b 100644 --- a/.github/workflows/gpu-ci.yml +++ b/.github/workflows/gpu-ci.yml @@ -20,25 +20,11 @@ jobs: - name: Install system dependencies run: | - sudo apt-get update -q - sudo apt-get install -y --no-install-recommends \ - pkg-config libssl-dev + dpkg -s pkg-config libssl-dev &>/dev/null || \ + { sudo apt-get update -q && sudo apt-get install -y --no-install-recommends pkg-config libssl-dev; } - - name: Set up CUDA environment - run: | - # Find the CUDA installation (common paths on self-hosted runners) - for d in /usr/local/cuda /opt/cuda /usr; do - if [ -x "$d/bin/nvcc" ]; then - echo "Found CUDA at $d" - echo "$d/bin" >> "$GITHUB_PATH" - echo "CUDA_PATH=$d" >> "$GITHUB_ENV" - echo "LIBRARY_PATH=$d/lib64:${LIBRARY_PATH:-}" >> "$GITHUB_ENV" - echo "LD_LIBRARY_PATH=$d/lib64:${LD_LIBRARY_PATH:-}" >> "$GITHUB_ENV" - NVCC="$d/bin/nvcc" - break - fi - done - "${NVCC:-nvcc}" --version + - name: Set up CUDA + detect GPU + run: bash .github/scripts/cuda-setup.sh - uses: dtolnay/rust-toolchain@stable @@ -46,13 +32,6 @@ jobs: with: key: gpu-ci - - name: Show GPU info - run: nvidia-smi - - # Let bindgen_cuda auto-detect the GPU's compute capability via - # nvidia-smi. The WMMA .cu files now compile to no-op stubs on - # SM < 7.0 (via INFERRS_NO_WMMA), so this works on Pascal (6.1) - # as well as Volta+ GPUs. - name: Build (with CUDA backend) run: cargo build -p inferrs -p inferrs-backend-cuda