From d7461048f90bf5149224af0e2b6889f8ecb220b7 Mon Sep 17 00:00:00 2001
From: Pooya Moradi <pooyam@google.com>
Date: Wed, 3 Dec 2025 01:29:30 +0000
Subject: [PATCH] Optimize Dockerfile to reduce image size and build time.

Signed-off-by: Pooya Moradi <pooyam@google.com>
---
 docker/Dockerfile | 31 +++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index b8f99e773..d6eb82b1e 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -11,9 +11,12 @@ ARG IS_FOR_V7X="false"
 RUN pip uninstall -y torch torch_xla torchvision
 
 # Install some basic utilities
-RUN apt-get update && apt-get install -y \
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    --mount=type=cache,target=/var/lib/apt,sharing=locked \
+    apt-get update && apt-get install -y \
     git \
-    libopenblas-base libopenmpi-dev libomp-dev
+    libopenblas-base libopenmpi-dev libomp-dev \
+    && rm -rf /var/lib/apt/lists/*
 
 # Build vLLM
 WORKDIR /workspace/vllm
@@ -23,12 +26,16 @@ RUN git clone $VLLM_REPO /workspace/vllm
 RUN if [ -n "$VLLM_COMMIT_HASH" ]; then \
         git checkout $VLLM_COMMIT_HASH; \
     fi
-RUN pip install -r requirements/tpu.txt --retries 3
-RUN VLLM_TARGET_DEVICE="tpu" pip install -e .
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip install -r requirements/tpu.txt --retries 3
+RUN --mount=type=cache,target=/root/.cache/pip \
+    VLLM_TARGET_DEVICE="tpu" pip install -e .
 
 # Install test dependencies
-RUN python3 -m pip install -e tests/vllm_test_utils
-RUN python3 -m pip install --no-cache-dir \
+RUN --mount=type=cache,target=/root/.cache/pip \
+    python3 -m pip install -e tests/vllm_test_utils
+RUN --mount=type=cache,target=/root/.cache/pip \
+    python3 -m pip install \
     git+https://github.com/thuml/depyf.git \
     pytest-asyncio \
     git+https://github.com/EleutherAI/lm-evaluation-harness.git@206b7722158f58c35b7ffcd53b035fdbdda5126d#egg=lm-eval[api] \
@@ -39,18 +46,22 @@ RUN python3 -m pip install --no-cache-dir \
 WORKDIR /workspace/tpu_inference
 # Install requirements first and cache so we don't need to re-install on code change.
 COPY requirements.txt .
-RUN pip install -r requirements.txt --retries 3
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip install -r requirements.txt --retries 3
 COPY requirements_benchmarking.txt .
 # These are needed for the E2E benchmarking tests (i.e. tests/e2e/benchmarking/mlperf.sh)
-RUN pip install -r requirements_benchmarking.txt --retries 3
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip install -r requirements_benchmarking.txt --retries 3
 COPY . .
-RUN pip install -e .
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip install -e .
 
 # TODO (jacobplatin): remove when v7x is supported in JAX/Libtpu officially
 # NOTE: it's important that this is done after installing tpu_inference above,
 # so that the v7x-specific dependencies can override any existing ones.
 COPY requirements_v7x.txt .
-RUN if [ "$IS_FOR_V7X" = "true" ]; then \
+RUN --mount=type=cache,target=/root/.cache/pip \
+    if [ "$IS_FOR_V7X" = "true" ]; then \
         pip install -r requirements_v7x.txt; \
     fi