Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
7ac06fd
Changed datascience image to install packages using uv
kevinsun0904 May 21, 2025
7d03256
isntalled typing extensions using conda
kevinsun0904 May 21, 2025
dad1231
fixed typo
kevinsun0904 May 21, 2025
4a1a35a
changed versioning from conda to pip
kevinsun0904 May 21, 2025
eaa9894
fixed typo
kevinsun0904 May 21, 2025
5775285
Redeclare JUPYTERHUB_VERSION after FROM call
kevinsun0904 May 21, 2025
f161c86
Small fix
kevinsun0904 May 21, 2025
4025cf7
fixed typo
kevinsun0904 May 21, 2025
ff1cdb9
fixed typo
kevinsun0904 May 21, 2025
6df26d5
Migrated scipy-ml to uv
kevinsun0904 May 27, 2025
923d524
Removed commented out notebooks
kevinsun0904 May 27, 2025
b886b54
Changed pyqt to PyQt5
kevinsun0904 May 27, 2025
3e51d4f
removed prepull from all notebooks
kevinsun0904 May 27, 2025
dc915c4
Removed comments and migrated cuda to pip
kevinsun0904 May 28, 2025
781d494
Added index-url
kevinsun0904 May 28, 2025
5b76494
Commented out cuda conda fix
kevinsun0904 May 28, 2025
324c5ed
Removed comments
kevinsun0904 May 29, 2025
8284e2d
datascience-notebook: add r-sf; ensure modern sqlite for JupyterLab; …
yifanzhu1121 Aug 11, 2025
bb4583e
Update main.yml
RockfordMankiniUCSD Aug 13, 2025
8751623
Update Dockerfile
RockfordMankiniUCSD Aug 14, 2025
6b1edcb
Update Dockerfile
RockfordMankiniUCSD Aug 14, 2025
4cd836f
Update Dockerfile
RockfordMankiniUCSD Aug 14, 2025
97a1766
Update Dockerfile
N2akiti Oct 23, 2025
7633fc2
Update Dockerfile
N2akiti Oct 23, 2025
a74d4cb
Update Dockerfile
N2akiti Oct 23, 2025
d55f2b6
Update Dockerfile
N2akiti Oct 26, 2025
8ddea29
Update Dockerfile
N2akiti Oct 26, 2025
bec36b8
Update Dockerfile
N2akiti Oct 27, 2025
5feb0ce
Update Dockerfile
N2akiti Oct 27, 2025
5944b46
Update Dockerfile
N2akiti Nov 1, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ jobs:
with:
mode: start
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
ec2-image-id: ami-079a0c9425d45e778
ec2-image-id: ami-00bea05327004f12a
ec2-instance-type: t3.2xlarge
subnet-id: subnet-0c4a81ef57cf2ebe3
security-group-id: sg-01cb8c0cb0de3ba00
Expand Down
43 changes: 24 additions & 19 deletions images/datascience-notebook/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# If you are building manually, pass PYTHON_VERSION/PY_VER_SHORT/JUPYTERHUB_VERSION with --build-arg
# If you are building manually, pass PYTHON_VERSION/PY_VER_SHORT/JUPYTERHUB_VERSION with --build-arg
ARG PYTHON_VERSION=python-3.11.8
ARG PY_VER_SHORT=3.11
ARG JUPYTERHUB_VERSION=4.1.5
Expand All @@ -14,6 +14,12 @@ ENV DEBCONF_NOWARNINGS="yes"
RUN apt-get update -y && \
apt-get -qq install -y --no-install-recommends \
git \
libgdal-dev \
libproj-dev \
libgeos-dev \
libudunits2-dev \
libgit2-dev \
pkg-config \
curl \
rsync \
unzip \
Expand Down Expand Up @@ -64,35 +70,37 @@ USER jovyan

# Python/Mamba Deps
## Package versions
ARG JUPYTERSERVER_VERSION=2.14.2 NBGRADER_VERSION=0.9.3 JUPYTERLAB_VERSION=4.2.4 NBCONVERT_VERSION=7.16.4 NOTEBOOK_VERSION=7.2.1 NBCLASSIC_VERSION=1.1.0
ARG JUPYTERHUB_VERSION=4.1.5 JUPYTERSERVER_VERSION=2.14.2 NBGRADER_VERSION=0.9.3 JUPYTERLAB_VERSION=4.2.4 NBCONVERT_VERSION=7.16.4 NOTEBOOK_VERSION=7.2.1 NBCLASSIC_VERSION=1.1.0
ARG PANDAS_VERSION=2.2.3 STATSMODELS_VERSION=0.14.4 BOTTLENECK_VERSION=1.4.2 NUMEXPR_VERSION=2.10.2

# Install uv for faster package installations
RUN pip3 install --no-cache-dir --upgrade uv

# Install essential+datascience pip packages
## mistune added for nbgrader issues
RUN mamba install -c conda-forge pillow typing-extensions tzlocal appdirs gputil mock pytest umap-learn && \
mamba install -c conda-forge nltk statsmodels=$STATSMODELS_VERSION pandas=$PANDAS_VERSION mistune && \
mamba install -c conda-forge dpkt nose datascience pyarrow bottleneck=$BOTTLENECK_VERSION umap-learn numexpr=$NUMEXPR_VESION && \
RUN uv pip install pillow typing-extensions tzlocal appdirs gputil mock pytest umap-learn --system && \
uv pip install nltk statsmodels==$STATSMODELS_VERSION pandas==$PANDAS_VERSION mistune --system && \
uv pip install dpkt nose datascience pyarrow bottleneck==$BOTTLENECK_VERSION umap-learn numexpr==$NUMEXPR_VERSION --system && \
python -c 'import matplotlib.pyplot' && \
fix-permissions $CONDA_DIR && \
fix-permissions /home/$NB_USER && \
mamba clean --all
uv cache clean

# Install jupyterlab+extensions
RUN mamba install -c conda-forge jupyterhub=$JUPYTERHUB_VERSION jupyter_server=$JUPYTERSERVER_VERSION && \
mamba install -c conda-forge jupyterlab=$JUPYTERLAB_VERSION notebook=$NOTEBOOK_VERSION nbclassic=$NBCLASSIC_VERSION && \
# (TODO: Re-enable collab once RTC is fixed) mamba install -c conda-forge jupyterlab_rise jupyter_server_terminals jupyter-collaboration && \
mamba install -c conda-forge jupyterlab_rise jupyter_server_terminals && \
mamba install -c conda-forge jupyterlab-latex jupyterlab-git jupyterlab-fasta jupyterlab-geojson && \
mamba install -c conda-forge nbconvert=$NBCONVERT_VERSION nbgrader=$NBGRADER_VERSION && \
RUN uv pip install jupyterhub==$JUPYTERHUB_VERSION jupyter_server==$JUPYTERSERVER_VERSION --system && \
uv pip install jupyterlab==$JUPYTERLAB_VERSION notebook==$NOTEBOOK_VERSION nbclassic==$NBCLASSIC_VERSION --system && \
# (TODO: Re-enable collab once RTC is fixed) uv pip install jupyterlab_rise jupyter_server_terminals jupyter-collaboration --system && \
uv pip install jupyterlab_rise jupyter_server_terminals --system && \
uv pip install jupyterlab-latex jupyterlab-git jupyterlab-fasta jupyterlab-geojson --system && \
uv pip install nbconvert==$NBCONVERT_VERSION nbgrader==$NBGRADER_VERSION --system && \
fix-permissions $CONDA_DIR && \
fix-permissions /home/$NB_USER && \
mamba clean --all
uv cache clean

# Install R packages
RUN mamba install -c conda-forge r-markdown r-covr r-git2r r-crosstalk r-dt -y && \
RUN R -e "install.packages(c('sf', 'markdown', 'covr', 'git2r', 'crosstalk', 'DT'), repos='https://cloud.r-project.org')" && \
fix-permissions $CONDA_DIR && \
fix-permissions /home/$NB_USER && \
mamba clean -a -y
fix-permissions /home/$NB_USER

# Run install-python + cleanup
RUN /usr/share/datahub/scripts/install-python-all.sh && \
Expand All @@ -107,7 +115,4 @@ RUN jupyter labextension disable @jupyterlab/extensionmanager-extension
ENV NBGRADER_COURSEID="NA"
ENV JUPYTERHUB_USER=${NB_USER}

RUN pip cache purge
RUN conda clean -t -p -i -y

WORKDIR /home/jovyan
91 changes: 31 additions & 60 deletions images/scipy-ml-notebook/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -50,71 +50,42 @@ RUN chmod 777 /etc/datahub-profile.d/*.sh /tmp/activate.sh

USER jovyan

# CUDA setup w/mamba
## TODO: Investigate this command, seems to duplicate cuda packages for nvidia (pypi + conda-forge).
# cuda-toolkit is a skeleton package on CUDA 12, unlike CUDA <= 11
RUN mamba install -c "nvidia/label/cuda-12.6.0" cuda-nvcc \
cuda-toolkit=$CUDA_VERSION \
# For CUDA 11: cudatoolkit=$CUDA_VERSION \
cuda-version=$CUDA_VERSION \
nccl \
-y && \
RUN pip3 install --no-cache-dir --upgrade uv

RUN uv pip install --system \
--extra-index-url https://pypi.nvidia.com \
--extra-index-url https://download.pytorch.org/whl/cu126 \
nvidia-cuda-nvcc-cu12 \
nvidia-nccl-cu12 \
cuda-python \
protobuf==$PROTOBUF_VERSION \
opencv-contrib-python-headless \
opencv-python \
PyQt5 \
pycocotools \
pillow \
scapy \
nvidia-cudnn-cu12==$CUDNN_VERSION \
torch==$TORCH_VERSION \
torchvision \
torchaudio \
tensorflow==$TENSORFLOW_VERSION \
tensorflow-datasets \
tensorrt==$TENSORRT_VERSION \
keras==$KERAS_VERSION \
tf-keras==$TF_KERAS_VERSION \
transformers \
datasets \
accelerate \
huggingface-hub \
timm \
&& \
fix-permissions $CONDA_DIR && \
fix-permissions /home/$NB_USER && \
mamba clean -a -y

# Install scipy pip packages
## install protobuf to avoid weird base type error. seems like if we don't then it'll be installed twice.
## https://github.com/spesmilo/electrum/issues/7825
## pip cache purge didnt work here for some reason.
RUN pip install --no-cache-dir protobuf==$PROTOBUF_VERSION
## cuda-python installed to have parity with tensorflow and cudnn
## Install pillow<7 due to dependency issue https://github.com/pytorch/vision/issues/1712
## tensorrt installed to fix not having libnvinfer that has caused tensorflow issues.
RUN pip install opencv-contrib-python-headless \
opencv-python && \
fix-permissions $CONDA_DIR && \
fix-permissions /home/$NB_USER && \
pip cache purge

RUN mamba install -c conda-forge pyqt pycocotools pillow scapy && \
fix-permissions $CONDA_DIR && \
fix-permissions /home/$NB_USER && \
mamba clean --all

# Install CUDA/Torch/Tensorflow/Keras w/pip
## no purge required but no-cache-dir is used. pip purge will actually break the build here!
## Beware of potentially needing to update these if we update the drivers.
## Check tensorrt_env_vars.sh if you have to bump tensorrt!

## tf-keras is keras 2.x for higher versions of tensorflow that would normally require keras 3
RUN pip install nvidia-cudnn-cu12==$CUDNN_VERSION torch==$TORCH_VERSION torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126 && \
pip install tensorflow==$TENSORFLOW_VERSION tensorflow-datasets tensorrt==$TENSORRT_VERSION keras==$KERAS_VERSION tf-keras==$TF_KERAS_VERSION && \
fix-permissions $CONDA_DIR && \
fix-permissions /home/$NB_USER && \
mamba clean -a -y && \
pip cache purge

RUN pip install transformers datasets accelerate huggingface-hub timm && \
fix-permissions $CONDA_DIR && \
fix-permissions /home/$NB_USER && \
mamba clean -a -y && \
pip cache purge
uv cache clean

USER $NB_UID:$NB_GID
ENV PATH=${PATH}:/usr/local/nvidia/bin:/opt/conda/bin

# CUDA fixes for CONDA
## Copy libdevice file to the required path
RUN mkdir -p $CONDA_DIR/lib/nvvm/libdevice && \
cp $CONDA_DIR/nvvm/libdevice/libdevice.10.bc $CONDA_DIR/lib/nvvm/libdevice/
#CUDA 11: cp $CONDA_DIR/lib/libdevice.10.bc $CONDA_DIR/lib/nvvm/libdevice/

# TensorRT fix for tensorflow
## https://github.com/tensorflow/tensorflow/issues/61468 (could not find TensorRT)
## This will most definitely have to be changed after 8.6.1...
RUN ln -s /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer_plugin.so.8 /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer_plugin.so.$TENSORRT_VERSION && \
ln -s /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer.so.8 /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer.so.$TENSORRT_VERSION

# Run datahub scripts
RUN . /tmp/activate.sh
4 changes: 2 additions & 2 deletions images/spec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ images:
integration_tests: false
info_cmds: [PY_VER, PIP_LIST, CUDA_VERSION, CONDA_INFO, CONDA_LIST, APT_PKG_LIST]
prune: false # comment if scipy-ml stops being the last container
#prepull: false #-- uncomment to disable prepulling behavior for scipy-ml. gives you space on machine in exchange for build time.
# prepull: false #-- uncomment to disable prepulling behavior for scipy-ml. gives you space on machine in exchange for build time.

tag:
prefix: "2025.2"
prefix: "2025.3"

all_info_cmds:
PY_VER:
Expand Down
Loading