Skip to content

Commit 8020ad2

Browse files
committed
disable pre-tokenization for not OpenAI models in embedding models
1 parent 66a93d4 commit 8020ad2

File tree

2 files changed

+14
-5
lines changed

2 files changed

+14
-5
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,3 +67,4 @@ spring_ai/drop.sql
6767
src/client/spring_ai/target/classes/*
6868
api_server_key
6969
.env
70+
testEmbeddings.py

src/server/api/utils/models.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -126,11 +126,19 @@ def get_client(model_config: dict, oci_config: schema.OracleCloudSettings, giska
126126

127127
if full_model_config["type"] == "embed" and not giskard:
128128
if provider != "oci":
129-
kwargs = {
130-
"provider": "openai" if provider == "openai_compatible" else provider,
131-
"model": full_model_config["id"],
132-
"base_url": full_model_config["url"],
133-
}
129+
if provider == "openai_compatible":
130+
kwargs = {
131+
"provider": "openai",
132+
"model": full_model_config["id"],
133+
"base_url": full_model_config["url"],
134+
"check_embedding_ctx_length":False #To avoid Tiktoken pre-transform on not OpenAI provided server
135+
}
136+
else:
137+
kwargs = {
138+
"provider": "openai" if provider == "openai_compatible" else provider,
139+
"model": full_model_config["id"],
140+
"base_url": full_model_config["url"],
141+
}
134142
if full_model_config.get("api_key"): # only add if set
135143
kwargs["api_key"] = full_model_config["api_key"]
136144
client = init_embeddings(**kwargs)

0 commit comments

Comments
 (0)