Skip to content

Commit 8beb8b7

Browse files
committed
vLLM support for LangChain MCP export
1 parent 8020ad2 commit 8beb8b7

File tree

5 files changed

+42
-15
lines changed

5 files changed

+42
-15
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,4 +67,4 @@ spring_ai/drop.sql
6767
src/client/spring_ai/target/classes/*
6868
api_server_key
6969
.env
70-
testEmbeddings.py
70+

src/client/content/config/tabs/settings.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,9 @@ def spring_ai_conf_check(ll_model: dict, embed_model: dict) -> str:
161161

162162
ll_provider = ll_model.get("provider", "")
163163
embed_provider = embed_model.get("provider", "")
164-
164+
logger.info(f"llm chat:{ll_provider} - embeddings:{embed_provider}")
165+
if all("openai_compatible" in p for p in (ll_provider, embed_provider)):
166+
return "openai_compatible"
165167
if all("openai" in p for p in (ll_provider, embed_provider)):
166168
return "openai"
167169
if all("ollama" in p for p in (ll_provider, embed_provider)):
@@ -343,6 +345,8 @@ def display_settings():
343345
embed_config = {}
344346
spring_ai_conf = spring_ai_conf_check(ll_config, embed_config)
345347

348+
logger.info(f"config found:{spring_ai_conf}")
349+
346350
if spring_ai_conf == "hybrid":
347351
st.markdown(f"""
348352
The current configuration combination of embedding and language models
@@ -353,21 +357,23 @@ def display_settings():
353357
else:
354358
col_left, col_centre, _ = st.columns([3, 4, 3])
355359
with col_left:
356-
st.download_button(
357-
label="Download SpringAI",
358-
data=spring_ai_zip(spring_ai_conf, ll_config, embed_config), # Generate zip on the fly
359-
file_name="spring_ai.zip", # Zip file name
360-
mime="application/zip", # Mime type for zip file
361-
disabled=spring_ai_conf == "hybrid",
362-
)
363-
with col_centre:
364360
st.download_button(
365361
label="Download LangchainMCP",
366362
data=langchain_mcp_zip(settings), # Generate zip on the fly
367363
file_name="langchain_mcp.zip", # Zip file name
368364
mime="application/zip", # Mime type for zip file
369365
disabled=spring_ai_conf == "hybrid",
370366
)
367+
with col_centre:
368+
if (spring_ai_conf != "openai_compatible"):
369+
st.download_button(
370+
label="Download SpringAI",
371+
data=spring_ai_zip(spring_ai_conf, ll_config, embed_config), # Generate zip on the fly
372+
file_name="spring_ai.zip", # Zip file name
373+
mime="application/zip", # Mime type for zip file
374+
disabled=spring_ai_conf == "hybrid",
375+
)
376+
371377

372378

373379
if __name__ == "__main__":

src/client/mcp/rag/optimizer_utils/config.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,11 @@ def get_llm(data):
3939
llm = OllamaLLM(model=model, base_url=url)
4040
logger.info("Ollama LLM created")
4141
elif provider == "openai":
42-
llm = llm = ChatOpenAI(model=model, api_key=api_key)
42+
llm = ChatOpenAI(model=model, api_key=api_key)
4343
logger.info("OpenAI LLM created")
44+
elif provider =="openai_compatible":
45+
llm = ChatOpenAI(model=model, api_key=api_key,base_url=url)
46+
logger.info("OpenAI compatible LLM created")
4447
return llm
4548

4649

@@ -60,9 +63,13 @@ def get_embeddings(data):
6063
if provider == "ollama":
6164
embeddings = OllamaEmbeddings(model=model, base_url=url)
6265
logger.info("Ollama Embeddings connection successful")
63-
elif (provider == "openai") or (provider == "openai_compatible"):
66+
elif (provider == "openai"):
6467
embeddings = OpenAIEmbeddings(model=model, api_key=api_key)
6568
logger.info("OpenAI embeddings connection successful")
69+
elif (provider == "openai_compatible"):
70+
embeddings = OpenAIEmbeddings(model=model, api_key=api_key,base_url=url,check_embedding_ctx_length=False)
71+
logger.info("OpenAI compatible embeddings connection successful")
72+
6673
return embeddings
6774

6875

@@ -80,7 +87,7 @@ def get_vectorstore(data, embeddings):
8087
distance_metric=data["client_settings"]["vector_search"]["distance_metric"]
8188
index_type=data["client_settings"]["vector_search"]["index_type"]
8289

83-
db_table=(table_alias+"_"+model+"_"+chunk_size+"_"+chunk_overlap+"_"+distance_metric+"_"+index_type).upper().replace("-", "_")
90+
db_table=(table_alias+"_"+model+"_"+chunk_size+"_"+chunk_overlap+"_"+distance_metric+"_"+index_type).upper().replace("-", "_").replace("/", "_")
8491
logger.info(f"db_table:{db_table}")
8592

8693

src/client/mcp/rag/optimizer_utils/rag.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def rag_tool_base(question: str) -> str:
5454

5555
logger.info("rag_prompt:")
5656
logger.info(rag_prompt)
57-
template = """DOCUMENTS: {context} \n"""+rag_prompt+"""\nQuestion: {question} """
57+
template = rag_prompt+"""\n# DOCUMENTS :\n {context} \n"""+"""\n # Question: {question} """
5858
logger.info(template)
5959
logger.info(f"user_question: {user_question}")
6060
prompt = PromptTemplate.from_template(template)

src/server/bootstrap/models.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,20 @@ def main() -> list[Model]:
8888
"max_completion_tokens": 2048,
8989
"frequency_penalty": 0.0,
9090
},
91+
{
92+
# This is intentionally last to line up with docos
93+
"id": "meta-llama/Llama-3.2-1B-Instruct",
94+
"enabled": os.getenv("ON_PREM_VLLM_URL") is not None,
95+
"type": "ll",
96+
"provider": "openai_compatible",
97+
"api_key": "",
98+
"openai_compat": True,
99+
"url": os.environ.get("ON_PREM_VLLM_URL", default="http://gpu:8000/v1"),
100+
"context_length": 131072,
101+
"temperature": 1.0,
102+
"max_completion_tokens": 2048,
103+
"frequency_penalty": 0.0,
104+
},
91105
{
92106
# This is intentionally last to line up with docos
93107
"id": "llama3.1",
@@ -133,7 +147,7 @@ def main() -> list[Model]:
133147
"max_chunk_size": 512,
134148
},
135149
{
136-
"id": "text-embedding-nomic-embed-text-v1.5",
150+
"id": "nomic-ai/nomic-embed-text-v1",
137151
"enabled": False,
138152
"type": "embed",
139153
"provider": "openai_compatible",

0 commit comments

Comments
 (0)