Skip to content

Commit 941e68f

Browse files
authored
[Docs] Add volcano engine startup docs and quick start (#1725)
Add volcano engine startup docs and quick start Signed-off-by: Jiaxin Shan <seedjeffwan@gmail.com>
1 parent a89f190 commit 941e68f

File tree

6 files changed

+350
-15
lines changed

6 files changed

+350
-15
lines changed

dist/chart/values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ gpuOptimizer:
8686
imagePullSecrets: []
8787
container:
8888
image:
89-
repository: aibrix/runtime
89+
repository: aibrix/metadata-service
9090
tag: nightly
9191
resources:
9292
limits:

dist/chart/vke.yaml

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ controllerManager:
77
imagePullSecrets: []
88
container:
99
image:
10-
repository: aibrix-container-registry-cn-beijing.cr.volces.com/aibrix/controller-manager
11-
tag: v0.4.1
10+
repository: aibrix-public-release-cn-beijing.cr.volces.com/aibrix/controller-manager
11+
tag: v0.5.0-rc.2
1212
imagePullPolicy: IfNotPresent
1313
resources:
1414
limits:
@@ -38,12 +38,12 @@ gatewayPlugin:
3838
imagePullSecrets: []
3939
initContainer:
4040
image:
41-
repository: aibrix-container-registry-cn-beijing.cr.volces.com/aibrix/busybox
41+
repository: aibrix-public-release-cn-beijing.cr.volces.com/aibrix/busybox
4242
tag: stable
4343
container:
4444
image:
45-
repository: aibrix-container-registry-cn-beijing.cr.volces.com/aibrix/gateway-plugins
46-
tag: v0.4.1
45+
repository: aibrix-public-release-cn-beijing.cr.volces.com/aibrix/gateway-plugins
46+
tag: v0.5.0-rc.2
4747
imagePullPolicy: IfNotPresent
4848
resources:
4949
limits:
@@ -71,8 +71,8 @@ gpuOptimizer:
7171
imagePullSecrets: []
7272
container:
7373
image:
74-
repository: aibrix-container-registry-cn-beijing.cr.volces.com/aibrix/runtime
75-
tag: v0.4.1
74+
repository: aibrix-public-release-cn-beijing.cr.volces.com/aibrix/runtime
75+
tag: v0.5.0-rc.2
7676
resources:
7777
limits:
7878
cpu: 500m
@@ -93,7 +93,7 @@ gateway:
9393
tolerations: []
9494
container:
9595
envoy:
96-
image: aibrix-container-registry-cn-beijing.cr.volces.com/envoyproxy/envoy:v1.33.2
96+
image: aibrix-public-release-cn-beijing.cr.volces.com/envoyproxy/envoy:v1.33.2
9797
resources:
9898
requests:
9999
cpu: "1"
@@ -102,7 +102,7 @@ gateway:
102102
cpu: "1"
103103
memory: 1Gi
104104
shutdownManager:
105-
image: aibrix-container-registry-cn-beijing.cr.volces.com/envoyproxy/gateway:v1.2.8
105+
image: aibrix-public-release-cn-beijing.cr.volces.com/envoyproxy/gateway:v1.2.8
106106
resources:
107107
requests:
108108
cpu: 10m
@@ -122,12 +122,12 @@ metadata:
122122
imagePullSecrets: []
123123
initContainer:
124124
image:
125-
repository: aibrix-container-registry-cn-beijing.cr.volces.com/aibrix/busybox
125+
repository: aibrix-public-release-cn-beijing.cr.volces.com/aibrix/busybox
126126
tag: stable
127127
container:
128128
image:
129-
repository: aibrix-container-registry-cn-beijing.cr.volces.com/aibrix/metadata-service
130-
tag: v0.4.1
129+
repository: aibrix-public-release-cn-beijing.cr.volces.com/aibrix/metadata-service
130+
tag: v0.5.0-rc.2
131131
imagePullPolicy: IfNotPresent
132132
resources:
133133
limits:
@@ -146,7 +146,7 @@ metadata:
146146
imagePullSecrets: []
147147
container:
148148
image:
149-
repository: aibrix-container-registry-cn-beijing.cr.volces.com/aibrix/redis
149+
repository: aibrix-public-release-cn-beijing.cr.volces.com/aibrix/redis
150150
tag: "7.4"
151151
resources:
152152
requests:
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
.. _vke:
2+
3+
==============
4+
Volcano Engine
5+
==============
6+
7+
Introduction
8+
------------
9+
10+
This doc deploys AIBrix in Volcano Engine Kubernetes Engine.
11+
12+
Steps
13+
-----
14+
15+
AIBrix Installation
16+
~~~~~~~~~~~~~~~~~~~
17+
18+
1. Assume you already have VKE cluster up and running
19+
2. Install AIBrix on VKE
20+
21+
.. code-block:: console
22+
23+
kubectl apply -k config/overlays/vke/dependency --server-side
24+
25+
helm install aibrix dist/chart -f dist/chart/vke.yaml -n aibrix-system --create-namespace
26+
27+
3. Wait for components to complete running.
28+
29+
Download Model in TOS
30+
~~~~~~~~~~~~~~~~~~~~~
31+
32+
Download models in TOS and create the credential in the cluster.
33+
34+
.. code-block:: console
35+
36+
kubectl create secret generic tos-credential --from-literal=TOS_ACCESS_KEY=<YOUR_ACCESS_KEY> --from-literal=TOS_SECRET_KEY=<YOUR_SECRET_KEY>
37+
38+
39+
Deploy base model
40+
~~~~~~~~~~~~~~~~~
41+
42+
Save yaml as `model.yaml` and run `kubectl apply -f model.yaml`.
43+
44+
.. literalinclude:: ../../../../samples/quickstart/vke/model.yaml
45+
:language: yaml
46+
47+
Deploy Prefill-Decode (PD) Disaggregation Model
48+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
49+
50+
Save yaml as `pd-model.yaml` and run `kubectl apply -f pd-model.yaml`.
51+
52+
.. literalinclude:: ../../../../samples/quickstart/vke/pd-model.yaml
53+
:language: yaml
54+
55+
56+
Inference
57+
~~~~~~~~~
58+
59+
Once the model is ready and running, you can test it by running:
60+
61+
.. code-block:: bash
62+
63+
LB_IP=$(kubectl get svc/envoy-aibrix-system-aibrix-eg-903790dc -n envoy-gateway-system -o=jsonpath='{.status.loadBalancer.ingress[0].ip}')
64+
ENDPOINT="${LB_IP}:80"
65+
66+
curl http://${ENDPOINT}/v1/chat/completions \
67+
-H "Content-Type: application/json" \
68+
-H "routing-strategy: random" \ # change to `pd` if you deployed in disaggregation mode
69+
-d '{
70+
"model": "deepseek-r1-distill-llama-8b",
71+
"messages": [
72+
{"role": "system", "content": "You are a helpful assistant."},
73+
{"role": "user", "content": "help me write a random generator in python"}
74+
]
75+
}'

hack/release/sync-dependency-images.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ TARGET_REGISTRY=$1
1212

1313
# List of images to sync in the format "source_image:tag new_repo_path"
1414
IMAGES=(
15-
"redis:latest ${TARGET_REGISTRY}/aibrix/redis:latest"
15+
"redis:7.4 ${TARGET_REGISTRY}/aibrix/redis:7.4"
1616
"envoyproxy/envoy:v1.33.2 ${TARGET_REGISTRY}/aibrix/envoy:v1.33.2"
1717
"envoyproxy/gateway:v1.2.8 ${TARGET_REGISTRY}/aibrix/gateway:v1.2.8"
1818
"aibrix/kuberay-operator:v1.2.1-patch-20250726 ${TARGET_REGISTRY}/aibrix/kuberay-operator:v1.2.1-patch-20250726"

samples/quickstart/vke/model.yaml

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: deepseek-r1-distill-llama-8b
5+
labels:
6+
model.aibrix.ai/name: deepseek-r1-distill-llama-8b
7+
model.aibrix.ai/port: "8000"
8+
spec:
9+
replicas: 1
10+
strategy:
11+
rollingUpdate:
12+
maxSurge: 1
13+
maxUnavailable: 1
14+
type: RollingUpdate
15+
selector:
16+
matchLabels:
17+
model.aibrix.ai/name: deepseek-r1-distill-llama-8b
18+
template:
19+
metadata:
20+
labels:
21+
model.aibrix.ai/name: deepseek-r1-distill-llama-8b
22+
annotations:
23+
prometheus.io/path: "/metrics"
24+
prometheus.io/port: "8000"
25+
prometheus.io/scrape: "true"
26+
spec:
27+
initContainers:
28+
- command:
29+
- aibrix_download
30+
- --model-uri
31+
- tos://aibrix-artifact-testing/models/DeepSeek-R1-Distill-Llama-8B/
32+
- --local-dir
33+
- /models/
34+
env:
35+
- name: DOWNLOADER_NUM_CONNECTIONS
36+
value: "16"
37+
- name: DOWNLOADER_NUM_THREADS
38+
value: "16"
39+
- name: DOWNLOADER_ALLOW_FILE_SUFFIX
40+
value: json, safetensors
41+
- name: TOS_ACCESS_KEY
42+
valueFrom:
43+
secretKeyRef:
44+
key: TOS_ACCESS_KEY
45+
name: tos-credential
46+
- name: TOS_SECRET_KEY
47+
valueFrom:
48+
secretKeyRef:
49+
key: TOS_SECRET_KEY
50+
name: tos-credential
51+
- name: TOS_ENDPOINT
52+
value: https://tos-s3-cn-beijing.ivolces.com
53+
- name: TOS_REGION
54+
value: cn-beijing
55+
image: aibrix-public-release-cn-beijing.cr.volces.com/aibrix/runtime:v0.5.0-rc.2
56+
name: init-model
57+
volumeMounts:
58+
- mountPath: /models
59+
name: model-hostpath
60+
containers:
61+
- name: vllm-openai
62+
image: aibrix-public-release-cn-beijing.cr.volces.com/vllm/vllm-openai:0.11.0
63+
imagePullPolicy: Always
64+
command:
65+
- python3
66+
- -m
67+
- vllm.entrypoints.openai.api_server
68+
- --port
69+
- "8000"
70+
- --uvicorn-log-level
71+
- warning
72+
- --model
73+
- /models/DeepSeek-R1-Distill-Llama-8B/
74+
- --trust-remote-code
75+
- --served-model-name
76+
- deepseek-r1-distill-llama-8b
77+
- --disable-fastapi-docs
78+
volumeMounts:
79+
- mountPath: /models
80+
name: model-hostpath
81+
resources:
82+
limits:
83+
nvidia.com/gpu: "1"
84+
cpu: "12"
85+
memory: "48G"
86+
requests:
87+
nvidia.com/gpu: "1"
88+
cpu: "12"
89+
memory: "48G"
90+
volumes:
91+
- name: model-hostpath
92+
hostPath:
93+
path: /root/models
94+
type: DirectoryOrCreate
95+
96+
---
97+
98+
apiVersion: v1
99+
kind: Service
100+
metadata:
101+
labels:
102+
model.aibrix.ai/name: deepseek-r1-distill-llama-8b
103+
prometheus-discovery: "true"
104+
annotations:
105+
prometheus.io/scrape: "true"
106+
prometheus.io/port: "8080"
107+
name: deepseek-r1-distill-llama-8b # Note: The Service name must match the label value `model.aibrix.ai/name` in the Deployment
108+
namespace: default
109+
spec:
110+
ports:
111+
- name: serve
112+
port: 8000
113+
protocol: TCP
114+
targetPort: 8000
115+
- name: http
116+
port: 8080
117+
protocol: TCP
118+
targetPort: 8080
119+
selector:
120+
model.aibrix.ai/name: deepseek-r1-distill-llama-8b
121+
type: ClusterIP

0 commit comments

Comments
 (0)