@@ -11,25 +11,21 @@ increment_version:
1111TENANCY: =${TENANCY_NAME}
1212CONTAINER_REGISTRY: =${REGION_KEY}.ocir.io
1313
14- TGI_INFERENCE_IMAGE: =${CONTAINER_REGISTRY}/${TENANCY}/text-generation-interface-odsc :0.9.3-v
15- TGI_CONTAINER_NAME: =tgi-odsc
14+ TGI_INFERENCE_IMAGE: =${CONTAINER_REGISTRY}/${TENANCY}/text-generation-interface:0.9.3-v
15+ VLLM_INFERENCE_IMAGE: =${CONTAINER_REGISTRY}/${TENANCY}/vllm:0.2.2-v
1616
17- VLLM_INFERENCE_IMAGE: =${CONTAINER_REGISTRY}/${TENANCY}/vllm-odsc:0.2.2-v
18- VLLM_CONTAINER_NAME: =vllm-odsc
19-
20- SDXL_INFERENCE_IMAGE: =${CONTAINER_REGISTRY}/${TENANCY}/sdxl:1.0.
21-
22- MODEL_DIR: =${PWD}/hfdata
17+ # MODEL_DIR:=${PWD}/hfdata
18+ MODEL_DIR: =${PWD}
2319TARGET_DIR: =/home/datascience
2420HF_DIR =/home/datascience/.cache
2521
26- token : =${PWD}/token
27- target_token : =/opt/ds/model/deployed_model/token
28- model : =meta-llama/Llama-2-13b -chat-hf
29- port : =8080
30- params : ="--max-batch-prefill-tokens 1024"
31- local_model : =/opt/ds/model/deployed_model
32- tensor_parallelism : =1
22+ TOKEN : =${PWD}/token
23+ TARGET_TOKEN : =/opt/ds/model/deployed_model/token
24+ MODEL : =meta-llama/Llama-2-7b -chat-hf
25+ PORT : =8080
26+ PARAMS : ="--max-batch-prefill-tokens 1024"
27+ LOCAL_MODEL : =/opt/ds/model/deployed_model
28+ TENSOR_PARALLELISM : =1
3329
3430# Detect the architecture of the current machine
3531ARCH := $(shell uname -m)
@@ -57,68 +53,51 @@ build.vllm: check-env init increment_version
5753 -t ${VLLM_INFERENCE_IMAGE} $(shell cat version.txt) \
5854 -f Dockerfile.vllm .
5955
60- build.sdxl : check-env init increment_version
61- $(DOCKER_BUILD_CMD ) --network host \
62- -t ${SDXL_INFERENCE_IMAGE} $(shell cat version.txt) \
63- -f Dockerfile.sdxl .
64-
6556run.tgi.hf : check-env
66- docker run --rm -it --gpus all --shm-size 1g \
67- -p ${port} :${port} \
68- -e PORT=${port} \
69- -e TOKEN_FILE=${target_token} \
70- -e PARAMS=${params} \
71- -e MODEL=${model} \
72- -v ${MODEL_DIR} :${TARGET_DIR} \
73- -v ${token} :${target_token} \
74- --name ${TGI_CONTAINER_NAME} ${TGI_INFERENCE_IMAGE}
57+ docker run --gpus all --shm-size 10gb \
58+ -p ${PORT} :${PORT} \
59+ -e TOKEN_FILE=${TARGET_TOKEN} \
60+ -e PARAMS=${PARAMS} \
61+ -e MODEL=${MODEL} \
62+ -v ${MODEL_DIR} :${TARGET_DIR} \
63+ -v ${TOKEN} :${TARGET_TOKEN} \
64+ ${TGI_INFERENCE_IMAGE} $(shell cat version.txt)
7565
7666run.tgi.oci : check-env
77- docker run --rm -it --gpus all --shm-size 1g \
78- -p ${port} :${port} \
79- -e PORT=${port} \
80- -e PARAMS=${params} \
81- -e MODEL=${local_model} \
82- -v ${MODEL_DIR} :${TARGET_DIR} \
83- --name ${TGI_CONTAINER_NAME} ${TGI_INFERENCE_IMAGE}
67+ docker run --gpus all --shm-size 10gb \
68+ -p ${PORT} :${PORT} \
69+ -e PARAMS=${PARAMS} \
70+ -e MODEL=${LOCAL_MODEL} \
71+ -v ${MODEL_DIR} :${TARGET_DIR} \
72+ ${TGI_INFERENCE_IMAGE} $(shell cat version.txt)
8473
8574run.vllm.hf : check-env
86- docker run --rm -it --gpus all --shm-size 1g \
87- -p ${port} :${port} \
88- -e PORT=${port} \
89- -e UVICORN_NO_USE_COLORS=1 \
90- -e TOKEN_FILE=${target_token} \
91- -e MODEL=${model} \
92- -e TENSOR_PARALLELISM=${tensor_parallelism} \
93- -e HUGGINGFACE_HUB_CACHE=${HF_DIR} \
94- -v ${MODEL_DIR} :${TARGET_DIR} \
95- -v ${token} :${target_token} \
96- --name ${VLLM_CONTAINER_NAME} ${VLLM_INFERENCE_IMAGE}
75+ docker run --gpus all --shm-size 10gb \
76+ -p ${PORT} :${PORT} \
77+ -e TOKEN_FILE=${TARGET_TOKEN} \
78+ -e MODEL=${MODEL} \
79+ -v ${MODEL_DIR} :${TARGET_DIR} \
80+ -v ${TOKEN} :${TARGET_TOKEN} \
81+ ${VLLM_INFERENCE_IMAGE} $(shell cat version.txt)
9782
9883run.vllm.oci : check-env
99- docker run --rm -d --gpus all --shm-size 1g \
100- -p ${port} :${port} \
101- -e PORT=${port} \
102- -e UVICORN_NO_USE_COLORS=1 \
103- -e MODEL=${local_model} \
104- -e TENSOR_PARALLELISM=${tensor_parallelism} \
105- -v ${MODEL_DIR} :${TARGET_DIR} \
106- --name ${VLLM_CONTAINER_NAME} ${VLLM_INFERENCE_IMAGE}
84+ docker run --rm -d --gpus all --shm-size 10gb \
85+ -e PORT=${PORT} \
86+ -e MODEL=${LOCAL_MODEL} \
87+ -v ${MODEL_DIR} :${TARGET_DIR} \
88+ ${VLLM_INFERENCE_IMAGE} $(shell cat version.txt)
10789
108- stop.tgi : check-env
109- docker stop ${TGI_CONTAINER_NAME} $ (shell cat version.txt )
90+ stop :
91+ docker stop $(shell docker ps -a -q )
11092
111- stop.vllm : check-env
112- docker stop ${VLLM_CONTAINER_NAME} $ (shell cat version.txt )
93+ remove :
94+ docker rm $ (shell docker ps -a -q )
11395
11496push.tgi : check-env
11597 docker push ${TGI_INFERENCE_IMAGE} $(shell cat version.txt)
11698
11799push.vllm : check-env
118100 docker push ${VLLM_INFERENCE_IMAGE} $(shell cat version.txt)
119101
120- push.sdxl : check-env
121- docker push ${SDXL_INFERENCE_IMAGE} $(shell cat version.txt)
122-
123102app :
124103 MODEL=${model} gradio app.py
0 commit comments