-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathDockerfile.worker.gpu
More file actions
182 lines (156 loc) · 6.54 KB
/
Dockerfile.worker.gpu
File metadata and controls
182 lines (156 loc) · 6.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
# VLog Remote Transcoding Worker with GPU Support (Rocky Linux 10)
# Containerized worker with NVIDIA NVENC and Intel VAAPI hardware encoding
#
# Build: docker build -f Dockerfile.worker.gpu -t vlog-worker-gpu .
# Run (NVIDIA): docker run --gpus all -e VLOG_WORKER_API_KEY=<key> vlog-worker-gpu
# Run (Intel): docker run --device /dev/dri -e VLOG_WORKER_API_KEY=<key> vlog-worker-gpu
# Run (Both): docker run --gpus all --device /dev/dri -e VLOG_WORKER_API_KEY=<key> vlog-worker-gpu
#
# Base: Rocky Linux 10 with RPM Fusion packages
# - FFmpeg 7.1.2 with nvenc, vaapi, qsv encoders (from RPM Fusion)
# - intel-media-driver 25.2.6 for Battlemage/Arc support (from RPM Fusion)
# - NVIDIA libraries injected at runtime via nvidia-container-toolkit
# Pin base image version for reproducible builds
# Update this periodically and test before deploying
ARG ROCKY_VERSION=10
# Code version for worker compatibility checking
# Set this during build: docker build --build-arg CODE_VERSION=$(git rev-parse --short HEAD) ...
ARG CODE_VERSION=dev
ARG BUILD_TIMESTAMP=""
# =============================================================================
# Stage 1: Builder - Install build dependencies and Python packages
# =============================================================================
FROM rockylinux/rockylinux:${ROCKY_VERSION} AS builder
# Install EPEL and RPM Fusion repositories
RUN dnf install -y epel-release && \
dnf install -y --nogpgcheck \
https://mirrors.rpmfusion.org/free/el/rpmfusion-free-release-10.noarch.rpm \
https://mirrors.rpmfusion.org/nonfree/el/rpmfusion-nonfree-release-10.noarch.rpm && \
dnf clean all
# Install Python 3.12 and build dependencies for packages with C extensions (argon2-cffi)
RUN dnf install -y \
python3.12 \
python3.12-pip \
python3.12-devel \
gcc \
libffi-devel \
&& dnf clean all && \
alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1 && \
alternatives --install /usr/bin/pip3 pip3 /usr/bin/pip3.12 1
WORKDIR /build
# Copy package files for dependency installation
COPY pyproject.toml ./
# Install Python dependencies to a specific location for copying
# Note: filelock and jaraco-context are security patches for transitive dependencies
RUN pip3 install --no-cache-dir --prefix=/install \
fastapi>=0.100.0 \
uvicorn>=0.23.0 \
python-multipart>=0.0.6 \
asyncpg>=0.29.0 \
"databases[postgresql]>=0.8.0" \
psycopg2-binary>=2.9.0 \
sqlalchemy>=2.0.0 \
alembic>=1.13.0 \
python-slugify>=8.0.0 \
aiofiles>=23.0.0 \
httpx>=0.25.0 \
watchdog>=3.0.0 \
slowapi>=0.1.9 \
limits>=3.0.0 \
"filelock>=3.20.3" \
"jaraco-context>=6.1.0"
# Copy source code and install the package
COPY config.py ./
COPY code_version.py ./
COPY api/ api/
COPY worker/ worker/
COPY cli/ cli/
# Create constraints file for security-patched transitive dependencies
RUN echo "filelock>=3.20.3" > /tmp/constraints.txt && \
echo "jaraco-context>=6.1.0" >> /tmp/constraints.txt && \
pip3 install --no-cache-dir --prefix=/install -c /tmp/constraints.txt .
# =============================================================================
# Stage 2: Runtime - Minimal production image
# =============================================================================
FROM rockylinux/rockylinux:${ROCKY_VERSION} AS runtime
# Install EPEL and RPM Fusion repositories
RUN dnf install -y epel-release && \
dnf install -y --nogpgcheck \
https://mirrors.rpmfusion.org/free/el/rpmfusion-free-release-10.noarch.rpm \
https://mirrors.rpmfusion.org/nonfree/el/rpmfusion-nonfree-release-10.noarch.rpm && \
dnf clean all
# Install FFmpeg with hardware acceleration support and curl for health checks
# RPM Fusion's FFmpeg 7.1.2 includes nvenc, vaapi, and qsv encoders
RUN dnf install -y \
ffmpeg \
ffmpeg-libs \
curl \
&& dnf update -y \
&& dnf clean all
# Install Intel VAAPI driver for Arc/Battlemage support
# intel-media-driver 25.2.6 from RPM Fusion supports Battlemage (Arc B580)
# Note: libva and mesa-dri-drivers are pulled in as ffmpeg dependencies
RUN dnf install -y \
intel-media-driver \
&& dnf clean all
# Install Python 3.12 runtime (no devel packages needed)
RUN dnf install -y \
python3.12 \
python3.12-pip \
&& dnf clean all && \
alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1 && \
alternatives --install /usr/bin/pip3 pip3 /usr/bin/pip3.12 1
# Create non-root user for security
RUN useradd -m -u 1000 -s /bin/bash vlog && \
usermod -aG video vlog
WORKDIR /app
# Copy installed Python packages from builder
COPY --from=builder /install /usr/local
# Ensure security-patched packages are at correct versions
# (dnf-installed Python packages may have brought in older transitive deps)
RUN pip3 install --no-cache-dir --upgrade \
"filelock>=3.20.3" \
"jaraco-context>=6.1.0" \
"wheel>=0.46.2"
# Copy source code
COPY --chown=vlog:vlog config.py ./
COPY --chown=vlog:vlog code_version.py ./
COPY --chown=vlog:vlog api/ api/
COPY --chown=vlog:vlog worker/ worker/
COPY --chown=vlog:vlog cli/ cli/
COPY --chown=vlog:vlog pyproject.toml ./
# Re-declare build args for use in this stage (args don't persist across stages)
ARG CODE_VERSION=dev
ARG BUILD_TIMESTAMP=""
# Switch to non-root user
USER vlog
# Create work directory
RUN mkdir -p /tmp/vlog-worker
# Environment variables
# VLOG_CODE_VERSION and VLOG_BUILD_TIMESTAMP are used for worker compatibility checking
ENV VLOG_WORKER_API_URL=http://vlog-worker-api:9002 \
VLOG_WORKER_WORK_DIR=/tmp/vlog-worker \
VLOG_WORKER_HEARTBEAT_INTERVAL=30 \
VLOG_WORKER_POLL_INTERVAL=10 \
# Code version for compatibility checking
VLOG_CODE_VERSION=${CODE_VERSION} \
VLOG_BUILD_TIMESTAMP=${BUILD_TIMESTAMP} \
# GPU detection (auto-detect by default)
VLOG_HWACCEL_TYPE=auto \
VLOG_HWACCEL_PREFERRED_CODEC=av1 \
# NVIDIA runtime (nvidia-container-toolkit injects libraries)
NVIDIA_VISIBLE_DEVICES=all \
NVIDIA_DRIVER_CAPABILITIES=compute,video,utility \
# Intel VAAPI - use renderD128 by default (can be overridden)
LIBVA_DRIVER_NAME=iHD \
# Python settings
PYTHONUNBUFFERED=1
# Health check - verify worker's health server is responding
# The worker exposes /health on port 8080 which verifies:
# - Worker process is alive
# - API connection is working
# - FFmpeg is available (including GPU codecs)
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
CMD curl -f http://localhost:8080/health || exit 1
# Run the remote transcoder
CMD ["python3", "-m", "worker.remote_transcoder"]