ModelEngine-Group · harrisonyhq · Oct 9, 2025 · Dec 11, 2025
@@ -2,4 +2,6 @@ self-hosted-runner:
   # Labels of self-hosted runner in array of strings.
   labels:
     - default
-    - arc-runner-ucm
+    - gpu
+    - 116
+
@@ -1,30 +1,52 @@
 name: offline_inference_test
-on: 
-    workflow_dispatch:
+
+on:
+  workflow_dispatch:
+
+  schedule:
+    - cron: '0 19 * * *' 
 
 jobs:
   offline-inference:
-    runs-on: arc-runner-ucm       
+    runs-on: [self-hosted, gpu]
+
     steps:
-      - uses: actions/checkout@v4
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event_name == 'schedule' && 'develop' || github.ref_name }}
+
+      - name: Show actual branch & commit
+        run: |
+          echo "== Working branch =="
+          echo "Latest commit: $(git log --oneline -1)"
+          echo "On branch : $(git branch --show-current || git describe --tags --exact-match 2>/dev/null || git rev-parse --short HEAD)"
+
       - run: nvidia-smi
+
       - name: Run offline_inference in container
         run: |
           docker run --rm \
             --gpus all \
+            --network=host \
             -v ${{ github.workspace }}:/workspace/unified-cache-management \
-            -v /home_116/models/Qwen2.5-1.5B-Instruct:/home/models/Qwen2.5-1.5B-Instruct \
+            -v /home/models/Qwen2.5-1.5B-Instruct:/home/models/Qwen2.5-1.5B-Instruct \
             -w /workspace/unified-cache-management \
             --entrypoint /bin/bash \
             vllm/vllm-openai:v0.9.2 \
             -c "
-              set -euo pipefail
+              set -euxo pipefail
               export PLATFORM=cuda
               export MODEL_PATH=/home/models/Qwen2.5-1.5B-Instruct
+              export http_proxy=http://172.80.0.1:7890
               pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+              git config --global http.version HTTP/1.1
+              git config --global http.sslVerify false
+              git config --global http.proxy \$http_proxy
+              git config --global https.proxy \$http_proxy
               pip install -v -e . --no-build-isolation
-              cd \$(pip show vllm | grep Location | awk '{print \$2}') &&
+              cd \$(pip show vllm | grep Location | awk '{print \$2}')
               git apply /workspace/unified-cache-management/ucm/integration/vllm/patch/0.9.2/vllm-adapt.patch
               cd /workspace/unified-cache-management
               python3 examples/offline_inference.py
-            "
+            "
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,4 +2,6 @@ self-hosted-runner: @@
       # Labels of self-hosted runner in array of strings.
       labels:
         - default
-        - arc-runner-ucm
+        - gpu
+        - 116