From 8e760101db1fb35f08237119cfa1171f6c6a6561 Mon Sep 17 00:00:00 2001
From: Weida Hong <wdhongtw@google.com>
Date: Thu, 4 Dec 2025 18:18:19 +0000
Subject: [PATCH] Reduce image size and enhance caching

- Mount cache directory across layers when necessary.
- Allow cache directory usage for pip command.

Signed-off-by: Weida Hong <wdhongtw@google.com>
---
 docker/Dockerfile | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index b8f99e773..e449aaf61 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -23,12 +23,12 @@ RUN git clone $VLLM_REPO /workspace/vllm
 RUN if [ -n "$VLLM_COMMIT_HASH" ]; then \
         git checkout $VLLM_COMMIT_HASH; \
     fi
-RUN pip install -r requirements/tpu.txt --retries 3
-RUN VLLM_TARGET_DEVICE="tpu" pip install -e .
+RUN --mount=type=cache,target=/root/.cache/pip pip install -r requirements/tpu.txt --retries 3
+RUN --mount=type=cache,target=/root/.cache/pip VLLM_TARGET_DEVICE="tpu" pip install -e .
 
 # Install test dependencies
-RUN python3 -m pip install -e tests/vllm_test_utils
-RUN python3 -m pip install --no-cache-dir \
+RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install -e tests/vllm_test_utils
+RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install \
     git+https://github.com/thuml/depyf.git \
     pytest-asyncio \
     git+https://github.com/EleutherAI/lm-evaluation-harness.git@206b7722158f58c35b7ffcd53b035fdbdda5126d#egg=lm-eval[api] \
@@ -39,18 +39,18 @@ RUN python3 -m pip install --no-cache-dir \
 WORKDIR /workspace/tpu_inference
 # Install requirements first and cache so we don't need to re-install on code change.
 COPY requirements.txt .
-RUN pip install -r requirements.txt --retries 3
+RUN --mount=type=cache,target=/root/.cache/pip pip install -r requirements.txt --retries 3
 COPY requirements_benchmarking.txt .
 # These are needed for the E2E benchmarking tests (i.e. tests/e2e/benchmarking/mlperf.sh)
-RUN pip install -r requirements_benchmarking.txt --retries 3
+RUN --mount=type=cache,target=/root/.cache/pip pip install -r requirements_benchmarking.txt --retries 3
 COPY . .
-RUN pip install -e .
+RUN --mount=type=cache,target=/root/.cache/pip pip install -e .
 
 # TODO (jacobplatin): remove when v7x is supported in JAX/Libtpu officially
 # NOTE: it's important that this is done after installing tpu_inference above,
 # so that the v7x-specific dependencies can override any existing ones.
 COPY requirements_v7x.txt .
-RUN if [ "$IS_FOR_V7X" = "true" ]; then \
+RUN --mount=type=cache,target=/root/.cache/pip if [ "$IS_FOR_V7X" = "true" ]; then \
         pip install -r requirements_v7x.txt; \
     fi