EPFLiGHT · BoyeGuillaume · Oct 7, 2025 · Sep 26, 2025 · Sep 26, 2025 · Sep 29, 2025
diff --git a/.gitmodules b/.gitmodules
@@ -1,3 +1,6 @@
 [submodule "third-party/verl"]
 	path = third-party/verl
 	url = git@github.com:EPFLiGHT/verl.git
+[submodule "third-party/sglang"]
+	path = third-party/sglang
+	url = git@github.com:EPFLiGHT/sglang.git
diff --git a/config/helper/verl_hydra_gen.yaml b/config/helper/verl_hydra_gen.yaml
diff --git a/config/rl/ds/config-baai-taco.yaml b/config/rl/ds/config-baai-taco.yaml
@@ -0,0 +1,43 @@
+# Notice: TACO make use of dataset script that are no longer supported, need to
+# downgrade datasets to datasets<=3.6.0 to load the dataset.
+hydra:
+  searchpath:
+    - pkg://multimeditron.config
+
+defaults:
+  - preprocess-ds
+  - _self_
+
+source:
+  type: hf # Supported types: 'hf', 'jsonl'
+  kwargs:
+    path: BAAI/TACO
+    split: train
+
+tokenizer:
+  enable: false
+  model: null
+  use_fast: true
+  attachment_token: <|reserved_special_token_0|>
+
+output: /capstor/store/cscs/swissai/a127/meditron/multimediset/reasoning/taco.parquet
+num_processes: 128
+
+processes:
+  - type: python
+    kwargs:
+      remove_columns: [
+        'question', 'solutions', 'starter_code', 'input_output', 'name', 'url', 'Expected Auxiliary Space', 'Expected Time Complexity',
+        'raw_tags', 'skill_types', 'tags', 'source',
+        'date', 'picture_num',
+      ]
+      imports: ['re']
+      func: |
+        {
+          "prompt": data["question"],
+          "solution": data["solutions"],
+          "checks": data["input_output"],
+        }
+  - type: shuffle
+    kwargs:
+      seed: 42
diff --git a/config/rl/ds/config-math-shepherd.yaml b/config/rl/ds/config-math-shepherd.yaml
@@ -0,0 +1,43 @@
+hydra:
+  searchpath:
+    - pkg://multimeditron.config
+
+defaults:
+  - preprocess-ds
+  - _self_
+
+source:
+  type: hf # Supported types: 'hf', 'jsonl'
+  kwargs:
+    path: trl-lib/math_shepherd
+    split: train
+
+tokenizer:
+  enable: false
+  model: null
+  use_fast: true
+  attachment_token: <|reserved_special_token_0|>
+
+output: /capstor/store/cscs/swissai/a127/meditron/multimediset/reasoning/math-shepherd.parquet
+num_processes: 128
+
+processes:
+  - type: python-filter
+    kwargs:
+      func: |
+        all(k for k in data["labels"])
+
+  - type: python
+    kwargs:
+      remove_columns: ['labels', 'completions']
+      imports: ['re']
+      func: |
+        {
+          "prompt": [{"content": data["prompt"], "role": "user"}],
+          "reward_model": {"ground_truth": data["completions"]},
+          "data_source": "math-shepherd",
+        }
+
+  - type: shuffle
+    kwargs:
+      seed: 42
diff --git a/config/rl/ds/config-nemotron-post-training.yaml b/config/rl/ds/config-nemotron-post-training.yaml
@@ -0,0 +1,36 @@
+hydra:
+  searchpath:
+    - pkg://multimeditron.config
+
+defaults:
+  - preprocess-ds
+  - _self_
+
+source:
+  type: hf # Supported types: 'hf', 'jsonl'
+  kwargs:
+    path: nvidia/Llama-Nemotron-Post-Training-Dataset
+    split: code
+
+tokenizer:
+  enable: false
+  model: null
+  use_fast: true
+  attachment_token: <|reserved_special_token_0|>
+
+output: /capstor/store/cscs/swissai/a127/meditron/multimediset/reasoning/nemotron-post-training-code.parquet
+num_processes: 32
+
+processes:
+  - type: python
+    kwargs:
+      remove_columns: ['input', 'output', 'category', 'license', 'reasoning', 'generator', 'used_in_training', 'version', 'system_prompt']
+      imports: ['re']
+      func:
+        - output_p = re.sub(r"<think>(([^<]|<(?!\/think>))*)<\/think>", '', data["output"], flags=re.MULTILINE).strip()
+        - code_p = list(re.finditer(r"```python\n(([^`]|`(?!``))*)\n```", output_p))
+        - |
+          {
+            "prompt": data["input"][0]["content"],
+            "response": code_p[-1].group(1).strip() if len(code_p) > 0 else output_p,
+          }
diff --git a/config/rl/grpo/test.yaml b/config/rl/grpo/test.yaml
@@ -0,0 +1,18 @@
+hydra:
+  searchpath:
+    - pkg://multimeditron.config
+    - pkg://verl.trainer.config
+
+defaults:
+  - verl_trainer
+  - _self_
+
+data:
+  train_files:
+    - /capstor/store/cscs/swissai/a127/meditron/multimediset/reasoning/math-shepherd.parquet
+    # - ./mock_dataset/mock_dataset.parquet
+  val_files:
+    - /capstor/store/cscs/swissai/a127/meditron/multimediset/reasoning/math-shepherd-val.parquet
+    # - ./mock_dataset/mock_dataset.parquet
+  prompt_key: prompt
+  reward_fn_key: data_source
diff --git a/docker/Dockerfile.verl b/docker/Dockerfile.verl
@@ -61,6 +61,22 @@ RUN pip3 install --upgrade pip && \
       'nvidia-cudnn-frontend>=1.13.0' 'nvidia-cudnn-cu12>=9.11.0.98'
 RUN conda install -n py312 -y mpi4py
 
+# Install nsjail runtime dependencies in base image
+RUN apt-get update -y && \
+    apt-get install -y libc6 libstdc++6 libprotobuf32 libnl-route-3-200 && \
+    rm -rf /var/lib/apt/lists/*
+
+########################################################################
+# Build nsjail in separated image
+FROM base AS build-nsjail
+
+RUN apt-get update -y && \
+    apt-get install -y \
+       autoconf bison flex gcc g++ libprotobuf-dev \
+       libnl-route-3-dev libtool make pkg-config protobuf-compiler git
+RUN git clone https://github.com/google/nsjail.git --depth 1 -b 3.4 /nsjail && \
+    cd /nsjail && make clean && make
+
 ########################################################################
 # Download Z-Shell enhancements.
 FROM docker.io/alpine/git:2.40.1 AS git-pure
@@ -78,9 +94,10 @@ RUN git clone --depth 1 ${ZSHS_URL} /opt/zsh/zsh-syntax-highlighting
 # This layer can be distributed so that subsequent users
 
 FROM base AS final
-
 ENV HYDRA_FULL_ERROR=1
 
+COPY --from=build-nsjail /nsjail/nsjail /bin
+
 # A final record of the dependencies from pip freeze.
 # RUN pip freeze > ${DEPENDENCIES_DIR}/requirements-freeze-final.txt
 # RUN pip list --format freeze > ${DEPENDENCIES_DIR}/requirements-list-final.txt
@@ -125,7 +142,9 @@ RUN pip3 install --upgrade pip && pip3 install -U \
         nvidia-ml-py flashinfer-python
 RUN pip3 install --upgrade pip && pip3 install -v -U --no-build-isolation \
         "sglang[test]==0.5.2" sgl_kernel
-RUN pip3 uninstall -y pynvml
+RUN pip3 uninstall -y pynvml datasets && \
+    pip3 install datasets
+
 
 # Add code tunnel for remote code development directly on the docker image
 RUN mkdir -p /tmp/code
@@ -136,6 +155,10 @@ RUN curl -Lk 'https://code.visualstudio.com/sha/download?build=stable&os=cli-alp
 WORKDIR /
 RUN rm -rf /tmp/code
 
+# Cleanup duty to make the docker image as lightweight as possible
+RUN rm -rf /var/lib/apt/lists/* && \
+    apt-get clean && \
+    pip cache purge
+
 # Entrypoint command (zsh)
 CMD ["/bin/zsh"]
-
diff --git a/pyproject.toml b/pyproject.toml
@@ -21,7 +21,7 @@ dependencies = [
     "click",
     "ray",
     "hydra-core",
-    "pydantic",
+    "rich",
     "pydanclick",
     "webdataset",
     "transformers",

diff --git a/scripts/download-datasets.sh b/scripts/download-datasets.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+# Usage: <script> name0 name1 ... (if no names are given, provide a list of all datasets)
+set -eou pipefail
+
+# Check that the multimeditron scripts have been installed
+if ! command -v mm &> /dev/null
+then
+    echo "mm command could not be found, please install the multimeditron package"
+    exit 1
+fi
+
+# List of all of the datasets, command to run
+BASE_CONFIG_PATH=$(realpath $(dirname "$0")/../config)
+echo "Base config path: $BASE_CONFIG_PATH"
+DATASETS=(
+    "math-shepherd mm preprocess-ds -c $BASE_CONFIG_PATH/rl/ds/config-math-shepherd.yaml"
+    "math-shepherd-val mm preprocess-ds -c $BASE_CONFIG_PATH/rl/ds/config-math-shepherd.yaml source.kwargs.split=test output=/capstor/store/cscs/swissai/a127/meditron/multimediset/reasoning/math-shepherd-val.parquet"
+    "baai-taco mm preprocess-ds -c $BASE_CONFIG_PATH/rl/ds/config-baai-taco.yaml"
+    "nemotron mm preprocess-ds -c $BASE_CONFIG_PATH/rl/ds/config-nemotron-post-training.yaml"
+)
+
+# Extract the names of the datasets
+ALL_DATASET_NAMES=()
+for entry in "${DATASETS[@]}"; do
+    name=$(echo $entry | cut -d' ' -f1)
+    ALL_DATASET_NAMES+=("$name")
+done
+
+# If no arguments are given, display the list of all datasets and exit
+if [ "$#" -eq 0 ]; then
+    echo "No dataset names provided. Available datasets are:"
+    for name in "${ALL_DATASET_NAMES[@]}"; do
+        echo " - $name"
+    done
+    exit 0
+fi
+
+# Download the specified datasets
+for name in "$@"; do
+    found=false
+    for entry in "${DATASETS[@]}"; do
+        entry_name=$(echo $entry | cut -d' ' -f1)
+        if [ "$name" == "$entry_name" ]; then
+            found=true
+            echo "Downloading dataset: $name"
+            # Execute the command to download the dataset
+            cmd=$(echo $entry | cut -d' ' -f2-)
+            echo "Running command: $cmd"
+            $cmd
+            echo "Finished downloading dataset: $name"
+            break
+        fi
+    done
+    if [ "$found" = false ]; then
+        echo "Dataset name '$name' not recognized. Available datasets are:"
+        for valid_name in "${ALL_DATASET_NAMES[@]}"; do
+            echo " - $valid_name"   
+        done
+        exit 1
+    fi
+done
diff --git a/scripts/generate_hydra_verl_config.py b/scripts/generate_hydra_verl_config.py
diff --git a/src/multimeditron/cli/__init__.py b/src/multimeditron/cli/__init__.py
@@ -24,4 +24,5 @@ def main_cli():
 
 from .preprocess import *
 from .verl import *
+from .debug import *
 from .train import *
diff --git a/src/multimeditron/cli/debug.py b/src/multimeditron/cli/debug.py
@@ -0,0 +1,44 @@
+from multimeditron.cli import EPILOG, CONFIG_PATH, main_cli
+from multimeditron.utils import get_torch_dtype
+from datasets import load_dataset
+import ray
+from ray import serve
+from fastapi import Request
+
+
+@serve.deployment(num_replicas=2)  # scale horizontally if needed
+class PyExecService:
+    def __init__(self):
+        # create a single NsJailExecutor actor for each replica
+        # self.executor = NsJailExecutor.remote()
+        pass
+
+    async def __call__(self, request: Request):
+        """
+        HTTP handler:
+        - expects POST with JSON body {"code": "print('hello')", "timeout": 5}
+        - runs code in nsjail
+        - returns JSON result
+        """
+        data = await request.json()
+        code = data.get("code", "")
+        # timeout = data.get("timeout", 5)
+
+        if not code.strip():
+            return {"error": "No code provided"}
+
+        # execute asynchronously via Ray
+        # result = await self.executor.execute.remote(code, wall_timeout=timeout)
+
+        return {"not": "implemented"}
+
+@main_cli.command("serve")
+def _serve():
+    # Start ray if not already running
+    ray.init(address="auto", namespace="serve")
+
+    # Deploy service
+    app = PyExecService.bind()
+    serve.run(app, blocking=True)
+
+    print("🚀 Ray Serve running at http://127.0.0.1:8000/PyExecService")