Skip to content

Commit 7001fa7

Browse files
Merge pull request #62 from oracle-samples/cdb-fix
LLama3.2/HF TGI
2 parents c6911aa + 360129f commit 7001fa7

File tree

5 files changed

+45
-3
lines changed

5 files changed

+45
-3
lines changed

app/src/modules/chatbot.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,10 @@ def langchain_rag(self, rag_params, chat_instr, context_instr, input, chat_histo
9999

100100
retrieved_documents = retriever.invoke(input)
101101
logger.debug("Retrieved %i documents", len(retrieved_documents))
102+
logger.info("Retrieved %i documents", len(retrieved_documents))
102103
# Retrieve documents for inspection (Use for debugging)
103-
# for i, doc in enumerate(retrieved_documents):
104-
# logger.debug("Document %i %s", i + 1, doc)
104+
for i, doc in enumerate(retrieved_documents):
105+
logger.info("Document %i %s", i + 1, doc)
105106

106107
# QA Chain
107108
context_messages = [("system", context_instr)]

app/src/modules/metadata.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,19 @@ def ll_models():
139139
"frequency_penalty": [0.0, 0.0, -1.0, 1.0],
140140
"presence_penalty": [0.0, 0.0, -2.0, 2.0],
141141
},
142+
"tgi": {
143+
"enabled": False,
144+
"api": "OpenAI",
145+
"url": "http://127.0.0.1:8080",
146+
"api_key": "",
147+
"openai_compat": True,
148+
"context_length": 127072,
149+
"temperature": [1.0, 1.0, 0.0, 2.0],
150+
"top_p": [0.99, .99, 0.0, 0.99],
151+
"max_tokens": [256, 256, 1, 8191],
152+
"frequency_penalty": [0.0, 0.0, -1.0, 1.0],
153+
"presence_penalty": [0.0, 0.0, -2.0, 2.0],
154+
},
142155
"gpt-4o": {
143156
"enabled": os.getenv("OPENAI_API_KEY") is not None,
144157
"api": "OpenAI",
@@ -192,6 +205,20 @@ def ll_models():
192205
"frequency_penalty": [0.0, 0.0, -2.0, 2.0],
193206
"presence_penalty": [0.0, 0.0, -2.0, 2.0],
194207
},
208+
# llama3.2-3b
209+
"llama3.2": {
210+
"enabled": os.getenv("ON_PREM_OLLAMA_URL") is not None,
211+
"api": "ChatOllama",
212+
"url": os.environ.get("ON_PREM_OLLAMA_URL", default="http://127.0.0.1:11434"),
213+
"api_key": "",
214+
"openai_compat": True,
215+
"context_length": 131072,
216+
"temperature": [1.0, 1.0, 0.0, 2.0],
217+
"top_p": [1.0, 1.0, 0.0, 1.0],
218+
"max_tokens": [256, 256, 1, 2048],
219+
"frequency_penalty": [0.0, 0.0, -2.0, 2.0],
220+
"presence_penalty": [0.0, 0.0, -2.0, 2.0],
221+
},
195222
}
196223
return ll_models_dict
197224

app/src/modules/utilities.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ def get_ll_model(model, ll_models_config=None, giskarded=False):
110110
_client = OpenAI(api_key=giskard_key, base_url=f"{llm_url}/v1/")
111111
client = OpenAIClient(model=model, client=_client)
112112
elif llm_api == "OpenAI":
113-
client = ChatOpenAI(api_key=lm_params["api_key"], **common_params)
113+
client = ChatOpenAI(api_key=lm_params["api_key"],base_url=f"{llm_url}/v1/", **common_params)
114114
elif llm_api == "Cohere":
115115
client = ChatCohere(cohere_api_key=lm_params["api_key"], **common_params)
116116
elif llm_api == "ChatPerplexity":

spring_ai/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ ollama:
122122
number: 1
123123
models:
124124
- llama3.1
125+
- llama3.2
125126
- mxbai-embed-large
126127
- nomic-embed-text
127128
nodeSelector:

spring_ai/ollama-values.yaml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
ollama:
2+
gpu:
3+
enabled: true
4+
type: 'nvidia'
5+
number: 1
6+
models:
7+
- llama3.1
8+
- llama3.2
9+
- mxbai-embed-large
10+
- nomic-embed-text
11+
nodeSelector:
12+
node.kubernetes.io/instance-type: VM.GPU.A10.1
13+

0 commit comments

Comments
 (0)