Skip to content

Commit f5a37fa

Browse files
committed
New LLMs
LLama3.2/HF TGI support, SpringAI fix
1 parent 74b47dd commit f5a37fa

File tree

7 files changed

+51
-7
lines changed

7 files changed

+51
-7
lines changed

app/src/modules/chatbot.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,10 @@ def langchain_rag(self, rag_params, chat_instr, context_instr, input, chat_histo
9999

100100
retrieved_documents = retriever.invoke(input)
101101
logger.debug("Retrieved %i documents", len(retrieved_documents))
102+
logger.info("Retrieved %i documents", len(retrieved_documents))
102103
# Retrieve documents for inspection (Use for debugging)
103-
# for i, doc in enumerate(retrieved_documents):
104-
# logger.debug("Document %i %s", i + 1, doc)
104+
for i, doc in enumerate(retrieved_documents):
105+
logger.info("Document %i %s", i + 1, doc)
105106

106107
# QA Chain
107108
context_messages = [("system", context_instr)]

app/src/modules/metadata.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,19 @@ def ll_models():
139139
"frequency_penalty": [0.0, 0.0, -1.0, 1.0],
140140
"presence_penalty": [0.0, 0.0, -2.0, 2.0],
141141
},
142+
"tgi": {
143+
"enabled": False,
144+
"api": "OpenAI",
145+
"url": "http://127.0.0.1:8080",
146+
"api_key": "",
147+
"openai_compat": True,
148+
"context_length": 127072,
149+
"temperature": [1.0, 1.0, 0.0, 2.0],
150+
"top_p": [0.99, .99, 0.0, 0.99],
151+
"max_tokens": [256, 256, 1, 8191],
152+
"frequency_penalty": [0.0, 0.0, -1.0, 1.0],
153+
"presence_penalty": [0.0, 0.0, -2.0, 2.0],
154+
},
142155
"gpt-4o": {
143156
"enabled": os.getenv("OPENAI_API_KEY") is not None,
144157
"api": "OpenAI",
@@ -192,6 +205,20 @@ def ll_models():
192205
"frequency_penalty": [0.0, 0.0, -2.0, 2.0],
193206
"presence_penalty": [0.0, 0.0, -2.0, 2.0],
194207
},
208+
# llama3.2-3b
209+
"llama3.2": {
210+
"enabled": os.getenv("ON_PREM_OLLAMA_URL") is not None,
211+
"api": "ChatOllama",
212+
"url": os.environ.get("ON_PREM_OLLAMA_URL", default="http://127.0.0.1:11434"),
213+
"api_key": "",
214+
"openai_compat": True,
215+
"context_length": 131072,
216+
"temperature": [1.0, 1.0, 0.0, 2.0],
217+
"top_p": [1.0, 1.0, 0.0, 1.0],
218+
"max_tokens": [256, 256, 1, 2048],
219+
"frequency_penalty": [0.0, 0.0, -2.0, 2.0],
220+
"presence_penalty": [0.0, 0.0, -2.0, 2.0],
221+
},
195222
}
196223
return ll_models_dict
197224

app/src/modules/utilities.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ def get_ll_model(model, ll_models_config=None, giskarded=False):
110110
_client = OpenAI(api_key=giskard_key, base_url=f"{llm_url}/v1/")
111111
client = OpenAIClient(model=model, client=_client)
112112
elif llm_api == "OpenAI":
113-
client = ChatOpenAI(api_key=lm_params["api_key"], **common_params)
113+
client = ChatOpenAI(api_key=lm_params["api_key"],base_url=f"{llm_url}/v1/", **common_params)
114114
elif llm_api == "Cohere":
115115
client = ChatCohere(cohere_api_key=lm_params["api_key"], **common_params)
116116
elif llm_api == "ChatPerplexity":

spring_ai/README.md

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,9 @@ ollama:
122122
number: 1
123123
models:
124124
- llama3.1
125+
- llama3.2
125126
- mxbai-embed-large
127+
- nomic-embed-text
126128
nodeSelector:
127129
node.kubernetes.io/instance-type: VM.GPU.A10.1
128130
```
@@ -138,9 +140,10 @@ kubectl -n ollama exec svc/ollama -- ollama ls
138140
```
139141
it should be:
140142
```
141-
NAME ID SIZE MODIFIED
142-
llama3.1:latest 42182419e950 4.7 GB About a minute ago
143-
mxbai-embed-large:latest 468836162de7 669 MB About a minute ago
143+
NAME ID SIZE MODIFIED
144+
nomic-embed-text:latest 0a109f422b47 274 MB 3 minutes ago
145+
mxbai-embed-large:latest 468836162de7 669 MB 3 minutes ago
146+
llama3.1:latest a80c4f17acd5 2.0 GB 3 minutes ago
144147
```
145148
* test a single LLM:
146149
```

spring_ai/ollama-values.yaml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
ollama:
2+
gpu:
3+
enabled: true
4+
type: 'nvidia'
5+
number: 1
6+
models:
7+
- llama3.1
8+
- llama3.2
9+
- mxbai-embed-large
10+
- nomic-embed-text
11+
nodeSelector:
12+
node.kubernetes.io/instance-type: VM.GPU.A10.1
13+

spring_ai/pom.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@
7575
<dependency>
7676
<groupId>org.springframework.ai</groupId>
7777
<artifactId>spring-ai-oracle-store-spring-boot-starter</artifactId>
78+
<version>1.0.0-SNAPSHOT</version>
7879
</dependency>
7980
<!--<dependency>
8081
<groupId>org.springframework.ai</groupId>

spring_ai/src/main/java/org/springframework/ai/openai/samples/helloworld/AIController.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
import org.springframework.ai.reader.pdf.config.PdfDocumentReaderConfig;
1212
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
1313
import org.springframework.ai.vectorstore.SearchRequest;
14-
import org.springframework.ai.vectorstore.SimpleVectorStore.Similarity;
1514
import org.springframework.ai.vectorstore.VectorStore;
1615
import org.springframework.beans.factory.annotation.Autowired;
1716
import org.springframework.beans.factory.annotation.Qualifier;

0 commit comments

Comments
 (0)