From cedb317b26cbfb5ce3c2b824b4d1f87483639cc6 Mon Sep 17 00:00:00 2001 From: SamYuan1990 Date: Thu, 4 Jun 2026 13:29:56 +0800 Subject: [PATCH 1/3] nit fix Signed-off-by: SamYuan1990 --- example/Readme.md | 80 ++++++++++++++++++++++++++------- scl/embeddings/embedding.py | 5 +-- scl/embeddings/web_embedding.py | 2 +- 3 files changed, 67 insertions(+), 20 deletions(-) diff --git a/example/Readme.md b/example/Readme.md index d49a94e..1a42b3d 100644 --- a/example/Readme.md +++ b/example/Readme.md @@ -1,24 +1,72 @@ -export EMBEDDING_LOCAL_MODEL_PATH=/Users/yuanyi/OpenSource/OBTest/StructuredContextLanguage/bge-m3 -export EMBEDDING_CACHE_PATH=/Users/yuanyi/OpenSource/OBTest/StructuredContextLanguage/embeddings_cache.json +## Steps for set up this example + +``` +uv sync +export EMBEDDING_LOCAL_MODEL_PATH=path_to_your_embedding_weight/bge-m3 +or +export EMBEDDING_BASE_URL=http://0.0.0.0:9080 +export EMBEDDING_API_KEY="any" python example/BFCL/gothroughfunctions.py +``` + +to run embedding service via restful way you can run code below in your own device. +``` +# server.py +import numpy as np +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel +from typing import List, Union +from sentence_transformers import SentenceTransformer +import uvicorn +# 1. 加载模型(可换成你自己的路径或 HuggingFace 模型名) +model = SentenceTransformer("./bge-m3") -## Step 0, config Otel, server +app = FastAPI(title="Local Embedding Service (OpenAI format)") -## Step 1 maybe client able to rag procedure in a service or FAAS(function as a service) way +class EmbeddingRequest(BaseModel): + input: Union[str, List[str]] + model: str = "local-model" -# | Case number | File format | Context RAG | Memory | Function call | -# | 1 | pdf | Autonomy(enable by default) | n/A | Autonomy | -## case 2 test with memory(relative case) -# | 2 | n/A | Autonomy(enable by default) | Autonomy | Autonomy | -## todo here a feed back loop here to show the Autonomy of context RAG. +class EmbeddingResponse(BaseModel): + object: str = "list" + data: List[dict] + model: str + usage: dict -## case 3 test with memory(no relative case) -# | 3 | n/A | Autonomy(enable by default) | New Thread(Autonomy) | Autonomy | -## case 4 test with hit -# | 4 | n/A | Autonomy(enable by default) | by config | Autonomy | -## case 5 test with context hit -# | 5 | n/A | by config | n/A | Autonomy | +@app.post("/embeddings", response_model=EmbeddingResponse) +async def get_embeddings(req: EmbeddingRequest): + # 统一转为列表 + texts = [req.input] if isinstance(req.input, str) else req.input + + # 2. 编码 + embeddings = model.encode(texts, normalize_embeddings=True) # shape: (n, dim) + + # 3. 计算 token 数(用模型自带 tokenizer) + tokenizer = model.tokenizer + encoded = tokenizer(texts, padding=True, truncation=True, return_tensors="pt") + token_counts = encoded["attention_mask"].sum(dim=1).tolist() + + # 4. 构建 OpenAI 格式响应 + data = [] + for i, (emb, tc) in enumerate(zip(embeddings, token_counts)): + data.append({ + "object": "embedding", + "embedding": emb.tolist(), + "index": i + }) + + total_tokens = sum(token_counts) + return EmbeddingResponse( + data=data, + model=req.model, + usage={ + "prompt_tokens": total_tokens, + "total_tokens": total_tokens + } + ) -## Get result from server \ No newline at end of file +if __name__ == "__main__": + uvicorn.run(app, host="0.0.0.0", port=9080) +``` diff --git a/scl/embeddings/embedding.py b/scl/embeddings/embedding.py index 86b8030..079f73e 100644 --- a/scl/embeddings/embedding.py +++ b/scl/embeddings/embedding.py @@ -53,8 +53,7 @@ def __init__(self): # a backend that lacks a real configuration value. self._local_available = bool(getattr(config, "embedding_local_model_path", None)) self._web_available = bool( - getattr(config, "embedding_api_key", None) - and getattr(config, "embedding_base_url", None) + getattr(config, "embedding_base_url", None) ) if self._local_available: from scl.embeddings.local_embedding import LocalEmbeddingClient @@ -104,7 +103,7 @@ def embed(self, text): if self._web_available: self.logger.info("Using web API for embedding") embedding = self.web_client.embed(text) - self.cache.set(text, embedding) + #self.cache.set(text, embedding) self._counter.add(1, {"source": "web"}) return embedding diff --git a/scl/embeddings/web_embedding.py b/scl/embeddings/web_embedding.py index 24a6900..472831a 100644 --- a/scl/embeddings/web_embedding.py +++ b/scl/embeddings/web_embedding.py @@ -51,7 +51,7 @@ def _init_subclass(self): @tracer.start_as_current_span("embed") def embed(self, text): """Call the web API and return the embedding vector.""" - time.sleep(5) # rate limit / timeout guard + #time.sleep(5) # rate limit / timeout guard self.logger.info("Web embedding request (text length: %d)", len(text)) self.logger.debug("Full text: %s", text) From f846d0be90e6d3b394c7fa8aa291b210cb9eed69 Mon Sep 17 00:00:00 2001 From: SamYuan1990 Date: Thu, 4 Jun 2026 13:52:04 +0800 Subject: [PATCH 2/3] nit fix for lint Signed-off-by: SamYuan1990 --- scl/embeddings/web_embedding.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/scl/embeddings/web_embedding.py b/scl/embeddings/web_embedding.py index 472831a..a7f323b 100644 --- a/scl/embeddings/web_embedding.py +++ b/scl/embeddings/web_embedding.py @@ -7,9 +7,6 @@ Generic OpenAI‑compatible embedding client (used as the final fallback). Does NOT maintain a local cache – that is handled by the coordinator. """ - -import time - from openai import OpenAI from scl.embeddings.base_embedding import BaseEmbeddingClient From 9cfe75ca282c93612659db86db68addbd4967f49 Mon Sep 17 00:00:00 2001 From: SamYuan1990 Date: Thu, 4 Jun 2026 13:53:54 +0800 Subject: [PATCH 3/3] lint fix Signed-off-by: SamYuan1990 --- scl/embeddings/embedding.py | 6 ++---- scl/embeddings/web_embedding.py | 3 ++- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/scl/embeddings/embedding.py b/scl/embeddings/embedding.py index 079f73e..6f61646 100644 --- a/scl/embeddings/embedding.py +++ b/scl/embeddings/embedding.py @@ -52,9 +52,7 @@ def __init__(self): # We use getattr + bool check so a default-None attribute doesn't activate # a backend that lacks a real configuration value. self._local_available = bool(getattr(config, "embedding_local_model_path", None)) - self._web_available = bool( - getattr(config, "embedding_base_url", None) - ) + self._web_available = bool(getattr(config, "embedding_base_url", None)) if self._local_available: from scl.embeddings.local_embedding import LocalEmbeddingClient @@ -103,7 +101,7 @@ def embed(self, text): if self._web_available: self.logger.info("Using web API for embedding") embedding = self.web_client.embed(text) - #self.cache.set(text, embedding) + # self.cache.set(text, embedding) self._counter.add(1, {"source": "web"}) return embedding diff --git a/scl/embeddings/web_embedding.py b/scl/embeddings/web_embedding.py index a7f323b..17fd012 100644 --- a/scl/embeddings/web_embedding.py +++ b/scl/embeddings/web_embedding.py @@ -7,6 +7,7 @@ Generic OpenAI‑compatible embedding client (used as the final fallback). Does NOT maintain a local cache – that is handled by the coordinator. """ + from openai import OpenAI from scl.embeddings.base_embedding import BaseEmbeddingClient @@ -48,7 +49,7 @@ def _init_subclass(self): @tracer.start_as_current_span("embed") def embed(self, text): """Call the web API and return the embedding vector.""" - #time.sleep(5) # rate limit / timeout guard + # time.sleep(5) # rate limit / timeout guard self.logger.info("Web embedding request (text length: %d)", len(text)) self.logger.debug("Full text: %s", text)