Skip to content

Commit d184a76

Browse files
committed
modified code to accomodate vLLM Openai-changes
1 parent 3ee188e commit d184a76

File tree

4 files changed

+32
-6
lines changed

4 files changed

+32
-6
lines changed

model-deployment/containers/llm/inference-images/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Overview
22

3-
This repo provides two approaches to manage the inference server to manage LLM deployment in OCI Data Science:
3+
This repo provides two approaches to manage the inference server for LLM deployment in OCI Data Science:
44

55
* [Text Generation Inference](https://github.com/huggingface/text-generation-inference) from HuggingFace.
66
* [vLLM](https://github.com/vllm-project/vllm) developed at UC Berkeley

model-deployment/containers/llm/mistral/config.yaml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
11
models:
22
mistralai/Mistral-7B-Instruct-v0.1:
3-
endpoint: https://modeldeployment.us-ashburn-1.oci.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.iad.amaaaaaav66vvniam45ujbnig43wiltlf6h2p4ohrauk7kq5tspnn427pkra/predict
4-
template: prompt-templates/mistral.txt
5-
vllm/mistralai/Mistral-7B-Instruct-v0.1:
63
endpoint: https://modeldeployment.us-ashburn-1.oci.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.iad.amaaaaaav66vvniabq7ahm2h2pbvh6ti37svti5n5fk7jirucxdtdfcuo22q/predict
74
template: prompt-templates/mistral.txt
85
bigcode/santacoder:

model-deployment/containers/llm/mistral/inference.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
"~/.oci/config", profile_name=profile
1818
) # replace with the location of your oci config file
1919

20-
model = os.environ.get("MODEL", "meta-llama/Llama-2-7b-chat-hf")
20+
model = os.environ.get("MODEL", "mistralai/Mistral-7B-Instruct-v0.1")
2121
template_file = app_config["models"][model].get("template")
2222
prompt_template = string.Template(
2323
open(template_file).read() if template_file else "$prompt"
@@ -94,11 +94,35 @@ def query(prompt, max_tokens=200, **kwargs):
9494
},
9595
}
9696

97+
if os.environ.get("VLLM"):
98+
if os.environ.get("API_SPEC")=="openai":
99+
temperature = kwargs.get('temperature',0.7)
100+
top_p = kwargs.get('top_p',0.8)
101+
body = {
102+
"prompt": prompt_template.substitute({"prompt": prompt}),
103+
"max_tokens": max_tokens,
104+
"model": model,
105+
"temperature": temperature,
106+
"top_p":top_p
107+
}
108+
else:
109+
body["parameters"].pop("watermark", None)
110+
body["parameters"].pop("seed", None)
111+
body["parameters"].pop("return_full_text", None)
112+
97113
# create auth using one of the oci signers
98114
auth = create_default_signer()
99115
data = requests.post(endpoint, json=body, auth=auth, headers=headers).json()
100116
# return model generated response, or any error as a string
101-
return str(data.get("generated_text", data))
117+
if os.environ.get("VLLM"):
118+
if os.environ.get("API_SPEC")=="openai":
119+
response_text_key = "choices"
120+
response = data.get(response_text_key, data)[0]
121+
response = response.get("text", data)
122+
else:
123+
response_text_key = 'generated_text'
124+
response = data.get(response_text_key, data)
125+
return str(response)
102126

103127

104128
if __name__ == "__main__":
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#!/bin/bash
2+
echo "opening code tunnel"
3+
curl -Lk 'https://code.visualstudio.com/sha/download?build=stable&os=cli-alpine-x64' --output vscode_cli.tar.gz
4+
tar -xf vscode_cli.tar.gz
5+
yes | ./code tunnel --accept-server-license-terms

0 commit comments

Comments
 (0)