@@ -85,7 +85,7 @@ ARG GET_PIP_URL
8585RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
8686 && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
8787 && apt-get update -y \
88- && apt-get install -y ccache software-properties-common git curl sudo python3-pip \
88+ && apt-get install -y ccache software-properties-common git curl sudo python3-pip libibverbs-dev \
8989 && curl -LsSf https://astral.sh/uv/install.sh | sh \
9090 && $HOME/.local/bin/uv venv /opt/venv --python ${PYTHON_VERSION} \
9191 && rm -f /usr/bin/python3 /usr/bin/python3-config /usr/bin/pip \
@@ -224,6 +224,22 @@ RUN --mount=type=cache,target=/root/.cache/ccache \
224224 python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38; \
225225 fi
226226
227+ # Install DeepGEMM from source
228+ ARG DEEPGEMM_GIT_REF
229+ COPY tools/install_deepgemm.sh /tmp/install_deepgemm.sh
230+ RUN --mount=type=cache,target=/root/.cache/uv \
231+ VLLM_DOCKER_BUILD_CONTEXT=1 TORCH_CUDA_ARCH_LIST="9.0a 10.0a" /tmp/install_deepgemm.sh --cuda-version "${CUDA_VERSION}" ${DEEPGEMM_GIT_REF:+--ref "$DEEPGEMM_GIT_REF" } --wheel-dir /tmp/deepgemm/dist
232+
233+ # Ensure the wheel dir exists so later-stage COPY won't fail when DeepGEMM is skipped
234+ RUN mkdir -p /tmp/deepgemm/dist && touch /tmp/deepgemm/dist/.deepgemm_skipped
235+
236+ COPY tools/ep_kernels/install_python_libraries.sh /tmp/install_python_libraries.sh
237+ # Install EP kernels(pplx-kernels and DeepEP)
238+ RUN --mount=type=cache,target=/root/.cache/uv \
239+ export TORCH_CUDA_ARCH_LIST='9.0a 10.0a' && \
240+ /tmp/install_python_libraries.sh /tmp/ep_kernels_workspace wheel && \
241+ find /tmp/ep_kernels_workspace/nvshmem -name '*.a' -delete
242+
227243# Check the size of the wheel if RUN_WHEEL_CHECK is true
228244COPY .buildkite/check-wheel-size.py check-wheel-size.py
229245# sync the default value with .buildkite/check-wheel-size.py
@@ -289,7 +305,7 @@ RUN PYTHON_VERSION_STR=$(echo ${PYTHON_VERSION} | sed 's/\.//g') && \
289305RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
290306 && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
291307 && apt-get update -y \
292- && apt-get install -y ccache software-properties-common git curl wget sudo vim python3-pip \
308+ && apt-get install -y software-properties-common curl sudo python3-pip \
293309 && apt-get install -y ffmpeg libsm6 libxext6 libgl1 \
294310 && if [ ! -z ${DEADSNAKES_MIRROR_URL} ] ; then \
295311 if [ ! -z "${DEADSNAKES_GPGKEY_URL}" ] ; then \
@@ -356,36 +372,32 @@ RUN --mount=type=cache,target=/root/.cache/uv \
356372. /etc/environment && \
357373uv pip list
358374
359- # Even when we build Flashinfer with AOT mode, there's still
360- # some issues w.r.t. JIT compilation. Therefore we need to
361- # install build dependencies for JIT compilation.
362- # TODO: Remove this once FlashInfer AOT wheel is fixed
363- COPY requirements/build.txt requirements/build.txt
375+ # Install deepgemm wheel that has been built in the `build` stage
364376RUN --mount=type=cache,target=/root/.cache/uv \
365- uv pip install --system -r requirements/build.txt \
377+ --mount=type=bind,from=build,source=/tmp/deepgemm/dist,target=/tmp/deepgemm/dist,ro \
378+ sh -c 'if ls /tmp/deepgemm/dist/*.whl >/dev/null 2>&1; then \
379+ uv pip install --system /tmp/deepgemm/dist/*.whl; \
380+ else \
381+ echo "No DeepGEMM wheels to install; skipping."; \
382+ fi'
383+
384+ # Pytorch now installs NVSHMEM, setting LD_LIBRARY_PATH (https://github.com/pytorch/pytorch/blob/d38164a545b4a4e4e0cf73ce67173f70574890b6/.ci/manywheel/build_cuda.sh#L141C14-L141C36)
385+ ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
386+
387+ # Install EP kernels wheels (pplx-kernels and DeepEP) that have been built in the `build` stage
388+ RUN --mount=type=bind,from=build,src=/tmp/ep_kernels_workspace/dist,target=/vllm-workspace/ep_kernels/dist \
389+ --mount=type=cache,target=/root/.cache/uv \
390+ uv pip install --system ep_kernels/dist/*.whl --verbose \
366391 --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.' )
367392
368- # Install DeepGEMM from source
369- ARG DEEPGEMM_GIT_REF
370- COPY tools/install_deepgemm.sh /tmp/install_deepgemm.sh
371- RUN --mount=type=cache,target=/root/.cache/uv \
372- VLLM_DOCKER_BUILD_CONTEXT=1 TORCH_CUDA_ARCH_LIST="9.0a 10.0a" /tmp/install_deepgemm.sh --cuda-version "${CUDA_VERSION}" ${DEEPGEMM_GIT_REF:+--ref "$DEEPGEMM_GIT_REF" }
373-
374- COPY tools/install_gdrcopy.sh install_gdrcopy.sh
375- RUN set -eux; \
393+ RUN --mount=type=bind,source=tools/install_gdrcopy.sh,target=/tmp/install_gdrcopy.sh,ro \
394+ set -eux; \
376395 case "${TARGETPLATFORM}" in \
377396 linux/arm64) UUARCH="aarch64" ;; \
378397 linux/amd64) UUARCH="x64" ;; \
379398 *) echo "Unsupported TARGETPLATFORM: ${TARGETPLATFORM}" >&2; exit 1 ;; \
380399 esac; \
381- ./install_gdrcopy.sh "${GDRCOPY_OS_VERSION}" "${GDRCOPY_CUDA_VERSION}" "${UUARCH}" ; \
382- rm ./install_gdrcopy.sh
383-
384- # Install EP kernels(pplx-kernels and DeepEP)
385- COPY tools/ep_kernels/install_python_libraries.sh install_python_libraries.sh
386- ENV CUDA_HOME=/usr/local/cuda
387- RUN export TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-9.0a 10.0a+PTX}" \
388- && bash install_python_libraries.sh
400+ /tmp/install_gdrcopy.sh "${GDRCOPY_OS_VERSION}" "${GDRCOPY_CUDA_VERSION}" "${UUARCH}"
389401
390402# CUDA image changed from /usr/local/nvidia to /usr/local/cuda in 12.8 but will
391403# return to /usr/local/nvidia in 13.0 to allow container providers to mount drivers
@@ -415,6 +427,11 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
415427# Use copy mode to avoid hardlink failures with Docker cache mounts
416428ENV UV_LINK_MODE=copy
417429
430+ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
431+ && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
432+ && apt-get update -y \
433+ && apt-get install -y git
434+
418435# install development dependencies (for testing)
419436RUN --mount=type=cache,target=/root/.cache/uv \
420437 CUDA_MAJOR="${CUDA_VERSION%%.*}" ; \
@@ -455,12 +472,11 @@ ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
455472# Reference: https://github.com/astral-sh/uv/pull/1694
456473ENV UV_HTTP_TIMEOUT=500
457474
458- COPY requirements/kv_connectors.txt requirements/kv_connectors.txt
459-
460475# install additional dependencies for openai api server
461476RUN --mount=type=cache,target=/root/.cache/uv \
477+ --mount=type=bind,source=requirements/kv_connectors.txt,target=/tmp/kv_connectors.txt,ro \
462478 if [ "$INSTALL_KV_CONNECTORS" = "true" ]; then \
463- uv pip install --system -r requirements /kv_connectors.txt; \
479+ uv pip install --system -r /tmp /kv_connectors.txt; \
464480 fi; \
465481 if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
466482 BITSANDBYTES_VERSION="0.42.0" ; \
0 commit comments