Skip to content

Commit 55630b4

Browse files
committed
split cmd for better caching
1 parent f87dc66 commit 55630b4

1 file changed

Lines changed: 20 additions & 18 deletions

File tree

dev/spark/Dockerfile

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -41,25 +41,27 @@ COPY --chown=spark:spark spark-defaults.conf ${SPARK_HOME}/conf/
4141
RUN mkdir -p /home/iceberg/spark-events && \
4242
chown -R spark:spark /home/iceberg
4343

44-
# Required JAR dependencies
45-
ENV JARS_TO_DOWNLOAD="\
46-
org/apache/spark/spark-connect_${SCALA_VERSION}/${SPARK_VERSION}/spark-connect_${SCALA_VERSION}-${SPARK_VERSION}.jar \
47-
org/apache/iceberg/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}/${ICEBERG_VERSION}/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar \
48-
org/apache/iceberg/iceberg-aws-bundle/${ICEBERG_VERSION}/iceberg-aws-bundle-${ICEBERG_VERSION}.jar \
49-
org/apache/hadoop/hadoop-aws/${HADOOP_VERSION}/hadoop-aws-${HADOOP_VERSION}.jar \
50-
software/amazon/awssdk/bundle/${AWS_SDK_VERSION}/bundle-${AWS_SDK_VERSION}.jar"
44+
# Download version-independent JARs first (cached across Iceberg version changes)
45+
RUN cd "${SPARK_HOME}/jars" && \
46+
curl -fsSL --retry 3 --retry-delay 5 \
47+
-o "spark-connect_${SCALA_VERSION}-${SPARK_VERSION}.jar" \
48+
"${MAVEN_MIRROR}/org/apache/spark/spark-connect_${SCALA_VERSION}/${SPARK_VERSION}/spark-connect_${SCALA_VERSION}-${SPARK_VERSION}.jar" && \
49+
curl -fsSL --retry 3 --retry-delay 5 \
50+
-o "hadoop-aws-${HADOOP_VERSION}.jar" \
51+
"${MAVEN_MIRROR}/org/apache/hadoop/hadoop-aws/${HADOOP_VERSION}/hadoop-aws-${HADOOP_VERSION}.jar" && \
52+
curl -fsSL --retry 3 --retry-delay 5 \
53+
-o "bundle-${AWS_SDK_VERSION}.jar" \
54+
"${MAVEN_MIRROR}/software/amazon/awssdk/bundle/${AWS_SDK_VERSION}/bundle-${AWS_SDK_VERSION}.jar" && \
55+
chown -R spark:spark "${SPARK_HOME}/jars"
5156

52-
# Download JARs with retry logic
53-
RUN set -e && \
54-
cd "${SPARK_HOME}/jars" && \
55-
for jar_path in ${JARS_TO_DOWNLOAD}; do \
56-
jar_name=$(basename "${jar_path}") && \
57-
echo "Downloading ${jar_name}..." && \
58-
curl -fsSL --retry 3 --retry-delay 5 \
59-
-o "${jar_name}" \
60-
"${MAVEN_MIRROR}/${jar_path}" && \
61-
echo "✓ Downloaded ${jar_name}"; \
62-
done && \
57+
# Download Iceberg-specific JARs (only invalidated when ICEBERG_VERSION changes)
58+
RUN cd "${SPARK_HOME}/jars" && \
59+
curl -fsSL --retry 3 --retry-delay 5 \
60+
-o "iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar" \
61+
"${MAVEN_MIRROR}/org/apache/iceberg/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}/${ICEBERG_VERSION}/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar" && \
62+
curl -fsSL --retry 3 --retry-delay 5 \
63+
-o "iceberg-aws-bundle-${ICEBERG_VERSION}.jar" \
64+
"${MAVEN_MIRROR}/org/apache/iceberg/iceberg-aws-bundle/${ICEBERG_VERSION}/iceberg-aws-bundle-${ICEBERG_VERSION}.jar" && \
6365
chown -R spark:spark "${SPARK_HOME}/jars"
6466

6567
USER spark

0 commit comments

Comments
 (0)