diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index beef33e0..2b37c41b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -44,7 +44,7 @@ jobs: with: mode: start github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} - ec2-image-id: ami-079a0c9425d45e778 + ec2-image-id: ami-00bea05327004f12a ec2-instance-type: t3.2xlarge subnet-id: subnet-0c4a81ef57cf2ebe3 security-group-id: sg-01cb8c0cb0de3ba00 diff --git a/images/datascience-notebook/Dockerfile b/images/datascience-notebook/Dockerfile index f172251e..2aaa6b11 100644 --- a/images/datascience-notebook/Dockerfile +++ b/images/datascience-notebook/Dockerfile @@ -1,4 +1,4 @@ -# If you are building manually, pass PYTHON_VERSION/PY_VER_SHORT/JUPYTERHUB_VERSION with --build-arg +# If you are building manually, pass PYTHON_VERSION/PY_VER_SHORT/JUPYTERHUB_VERSION with --build-arg ARG PYTHON_VERSION=python-3.11.8 ARG PY_VER_SHORT=3.11 ARG JUPYTERHUB_VERSION=4.1.5 @@ -14,6 +14,12 @@ ENV DEBCONF_NOWARNINGS="yes" RUN apt-get update -y && \ apt-get -qq install -y --no-install-recommends \ git \ + libgdal-dev \ + libproj-dev \ + libgeos-dev \ + libudunits2-dev \ + libgit2-dev \ + pkg-config \ curl \ rsync \ unzip \ @@ -64,35 +70,37 @@ USER jovyan # Python/Mamba Deps ## Package versions -ARG JUPYTERSERVER_VERSION=2.14.2 NBGRADER_VERSION=0.9.3 JUPYTERLAB_VERSION=4.2.4 NBCONVERT_VERSION=7.16.4 NOTEBOOK_VERSION=7.2.1 NBCLASSIC_VERSION=1.1.0 +ARG JUPYTERHUB_VERSION=4.1.5 JUPYTERSERVER_VERSION=2.14.2 NBGRADER_VERSION=0.9.3 JUPYTERLAB_VERSION=4.2.4 NBCONVERT_VERSION=7.16.4 NOTEBOOK_VERSION=7.2.1 NBCLASSIC_VERSION=1.1.0 ARG PANDAS_VERSION=2.2.3 STATSMODELS_VERSION=0.14.4 BOTTLENECK_VERSION=1.4.2 NUMEXPR_VERSION=2.10.2 +# Install uv for faster package installations +RUN pip3 install --no-cache-dir --upgrade uv + # Install essential+datascience pip packages ## mistune added for nbgrader issues -RUN mamba install -c conda-forge pillow typing-extensions tzlocal appdirs gputil mock pytest umap-learn && \ - mamba install -c conda-forge nltk statsmodels=$STATSMODELS_VERSION pandas=$PANDAS_VERSION mistune && \ - mamba install -c conda-forge dpkt nose datascience pyarrow bottleneck=$BOTTLENECK_VERSION umap-learn numexpr=$NUMEXPR_VESION && \ +RUN uv pip install pillow typing-extensions tzlocal appdirs gputil mock pytest umap-learn --system && \ + uv pip install nltk statsmodels==$STATSMODELS_VERSION pandas==$PANDAS_VERSION mistune --system && \ + uv pip install dpkt nose datascience pyarrow bottleneck==$BOTTLENECK_VERSION umap-learn numexpr==$NUMEXPR_VERSION --system && \ python -c 'import matplotlib.pyplot' && \ fix-permissions $CONDA_DIR && \ fix-permissions /home/$NB_USER && \ - mamba clean --all + uv cache clean # Install jupyterlab+extensions -RUN mamba install -c conda-forge jupyterhub=$JUPYTERHUB_VERSION jupyter_server=$JUPYTERSERVER_VERSION && \ - mamba install -c conda-forge jupyterlab=$JUPYTERLAB_VERSION notebook=$NOTEBOOK_VERSION nbclassic=$NBCLASSIC_VERSION && \ - # (TODO: Re-enable collab once RTC is fixed) mamba install -c conda-forge jupyterlab_rise jupyter_server_terminals jupyter-collaboration && \ - mamba install -c conda-forge jupyterlab_rise jupyter_server_terminals && \ - mamba install -c conda-forge jupyterlab-latex jupyterlab-git jupyterlab-fasta jupyterlab-geojson && \ - mamba install -c conda-forge nbconvert=$NBCONVERT_VERSION nbgrader=$NBGRADER_VERSION && \ +RUN uv pip install jupyterhub==$JUPYTERHUB_VERSION jupyter_server==$JUPYTERSERVER_VERSION --system && \ + uv pip install jupyterlab==$JUPYTERLAB_VERSION notebook==$NOTEBOOK_VERSION nbclassic==$NBCLASSIC_VERSION --system && \ + # (TODO: Re-enable collab once RTC is fixed) uv pip install jupyterlab_rise jupyter_server_terminals jupyter-collaboration --system && \ + uv pip install jupyterlab_rise jupyter_server_terminals --system && \ + uv pip install jupyterlab-latex jupyterlab-git jupyterlab-fasta jupyterlab-geojson --system && \ + uv pip install nbconvert==$NBCONVERT_VERSION nbgrader==$NBGRADER_VERSION --system && \ fix-permissions $CONDA_DIR && \ fix-permissions /home/$NB_USER && \ - mamba clean --all + uv cache clean # Install R packages -RUN mamba install -c conda-forge r-markdown r-covr r-git2r r-crosstalk r-dt -y && \ +RUN R -e "install.packages(c('sf', 'markdown', 'covr', 'git2r', 'crosstalk', 'DT'), repos='https://cloud.r-project.org')" && \ fix-permissions $CONDA_DIR && \ - fix-permissions /home/$NB_USER && \ - mamba clean -a -y + fix-permissions /home/$NB_USER # Run install-python + cleanup RUN /usr/share/datahub/scripts/install-python-all.sh && \ @@ -107,7 +115,4 @@ RUN jupyter labextension disable @jupyterlab/extensionmanager-extension ENV NBGRADER_COURSEID="NA" ENV JUPYTERHUB_USER=${NB_USER} -RUN pip cache purge -RUN conda clean -t -p -i -y - WORKDIR /home/jovyan diff --git a/images/scipy-ml-notebook/Dockerfile b/images/scipy-ml-notebook/Dockerfile index 17ec040f..e5cf5fa7 100644 --- a/images/scipy-ml-notebook/Dockerfile +++ b/images/scipy-ml-notebook/Dockerfile @@ -50,71 +50,42 @@ RUN chmod 777 /etc/datahub-profile.d/*.sh /tmp/activate.sh USER jovyan -# CUDA setup w/mamba -## TODO: Investigate this command, seems to duplicate cuda packages for nvidia (pypi + conda-forge). -# cuda-toolkit is a skeleton package on CUDA 12, unlike CUDA <= 11 -RUN mamba install -c "nvidia/label/cuda-12.6.0" cuda-nvcc \ - cuda-toolkit=$CUDA_VERSION \ - # For CUDA 11: cudatoolkit=$CUDA_VERSION \ - cuda-version=$CUDA_VERSION \ - nccl \ - -y && \ +RUN pip3 install --no-cache-dir --upgrade uv + +RUN uv pip install --system \ + --extra-index-url https://pypi.nvidia.com \ + --extra-index-url https://download.pytorch.org/whl/cu126 \ + nvidia-cuda-nvcc-cu12 \ + nvidia-nccl-cu12 \ + cuda-python \ + protobuf==$PROTOBUF_VERSION \ + opencv-contrib-python-headless \ + opencv-python \ + PyQt5 \ + pycocotools \ + pillow \ + scapy \ + nvidia-cudnn-cu12==$CUDNN_VERSION \ + torch==$TORCH_VERSION \ + torchvision \ + torchaudio \ + tensorflow==$TENSORFLOW_VERSION \ + tensorflow-datasets \ + tensorrt==$TENSORRT_VERSION \ + keras==$KERAS_VERSION \ + tf-keras==$TF_KERAS_VERSION \ + transformers \ + datasets \ + accelerate \ + huggingface-hub \ + timm \ + && \ fix-permissions $CONDA_DIR && \ fix-permissions /home/$NB_USER && \ - mamba clean -a -y - -# Install scipy pip packages -## install protobuf to avoid weird base type error. seems like if we don't then it'll be installed twice. -## https://github.com/spesmilo/electrum/issues/7825 -## pip cache purge didnt work here for some reason. -RUN pip install --no-cache-dir protobuf==$PROTOBUF_VERSION -## cuda-python installed to have parity with tensorflow and cudnn -## Install pillow<7 due to dependency issue https://github.com/pytorch/vision/issues/1712 -## tensorrt installed to fix not having libnvinfer that has caused tensorflow issues. -RUN pip install opencv-contrib-python-headless \ - opencv-python && \ - fix-permissions $CONDA_DIR && \ - fix-permissions /home/$NB_USER && \ - pip cache purge - -RUN mamba install -c conda-forge pyqt pycocotools pillow scapy && \ - fix-permissions $CONDA_DIR && \ - fix-permissions /home/$NB_USER && \ - mamba clean --all - -# Install CUDA/Torch/Tensorflow/Keras w/pip -## no purge required but no-cache-dir is used. pip purge will actually break the build here! -## Beware of potentially needing to update these if we update the drivers. -## Check tensorrt_env_vars.sh if you have to bump tensorrt! - -## tf-keras is keras 2.x for higher versions of tensorflow that would normally require keras 3 -RUN pip install nvidia-cudnn-cu12==$CUDNN_VERSION torch==$TORCH_VERSION torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126 && \ - pip install tensorflow==$TENSORFLOW_VERSION tensorflow-datasets tensorrt==$TENSORRT_VERSION keras==$KERAS_VERSION tf-keras==$TF_KERAS_VERSION && \ - fix-permissions $CONDA_DIR && \ - fix-permissions /home/$NB_USER && \ - mamba clean -a -y && \ - pip cache purge - -RUN pip install transformers datasets accelerate huggingface-hub timm && \ - fix-permissions $CONDA_DIR && \ - fix-permissions /home/$NB_USER && \ - mamba clean -a -y && \ - pip cache purge + uv cache clean USER $NB_UID:$NB_GID ENV PATH=${PATH}:/usr/local/nvidia/bin:/opt/conda/bin -# CUDA fixes for CONDA -## Copy libdevice file to the required path -RUN mkdir -p $CONDA_DIR/lib/nvvm/libdevice && \ - cp $CONDA_DIR/nvvm/libdevice/libdevice.10.bc $CONDA_DIR/lib/nvvm/libdevice/ - #CUDA 11: cp $CONDA_DIR/lib/libdevice.10.bc $CONDA_DIR/lib/nvvm/libdevice/ - -# TensorRT fix for tensorflow -## https://github.com/tensorflow/tensorflow/issues/61468 (could not find TensorRT) -## This will most definitely have to be changed after 8.6.1... -RUN ln -s /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer_plugin.so.8 /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer_plugin.so.$TENSORRT_VERSION && \ - ln -s /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer.so.8 /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer.so.$TENSORRT_VERSION - # Run datahub scripts RUN . /tmp/activate.sh diff --git a/images/spec.yml b/images/spec.yml index 6add41e5..c1b1eafc 100644 --- a/images/spec.yml +++ b/images/spec.yml @@ -22,10 +22,10 @@ images: integration_tests: false info_cmds: [PY_VER, PIP_LIST, CUDA_VERSION, CONDA_INFO, CONDA_LIST, APT_PKG_LIST] prune: false # comment if scipy-ml stops being the last container - #prepull: false #-- uncomment to disable prepulling behavior for scipy-ml. gives you space on machine in exchange for build time. + # prepull: false #-- uncomment to disable prepulling behavior for scipy-ml. gives you space on machine in exchange for build time. tag: - prefix: "2025.2" + prefix: "2025.3" all_info_cmds: PY_VER: