From cedb317b26cbfb5ce3c2b824b4d1f87483639cc6 Mon Sep 17 00:00:00 2001
From: SamYuan1990 <yy19902439@126.com>
Date: Thu, 4 Jun 2026 13:29:56 +0800
Subject: [PATCH 1/3] nit fix

Signed-off-by: SamYuan1990 <yy19902439@126.com>
---
 example/Readme.md               | 80 ++++++++++++++++++++++++++-------
 scl/embeddings/embedding.py     |  5 +--
 scl/embeddings/web_embedding.py |  2 +-
 3 files changed, 67 insertions(+), 20 deletions(-)

diff --git a/example/Readme.md b/example/Readme.md
index d49a94e..1a42b3d 100644
--- a/example/Readme.md
+++ b/example/Readme.md
@@ -1,24 +1,72 @@
-export EMBEDDING_LOCAL_MODEL_PATH=/Users/yuanyi/OpenSource/OBTest/StructuredContextLanguage/bge-m3
-export EMBEDDING_CACHE_PATH=/Users/yuanyi/OpenSource/OBTest/StructuredContextLanguage/embeddings_cache.json
+## Steps for set up this example
+
+```
+uv sync
+export EMBEDDING_LOCAL_MODEL_PATH=path_to_your_embedding_weight/bge-m3
+or
+export EMBEDDING_BASE_URL=http://0.0.0.0:9080
+export EMBEDDING_API_KEY="any"
 python example/BFCL/gothroughfunctions.py
+```
+
+to run embedding service via restful way you can run code below in your own device.
 
+```
+# server.py
+import numpy as np
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from typing import List, Union
+from sentence_transformers import SentenceTransformer
+import uvicorn
 
+# 1. 加载模型（可换成你自己的路径或 HuggingFace 模型名）
+model = SentenceTransformer("./bge-m3")
 
-## Step 0, config Otel, server
+app = FastAPI(title="Local Embedding Service (OpenAI format)")
 
-## Step 1 maybe client able to rag procedure in a service or FAAS(function as a service) way
+class EmbeddingRequest(BaseModel):
+    input: Union[str, List[str]]
+    model: str = "local-model"
 
-# | Case number | File format | Context RAG | Memory | Function call | 
-# | 1 | pdf | Autonomy(enable by default) | n/A | Autonomy |
-## case 2 test with memory(relative case)
-# | 2 | n/A | Autonomy(enable by default) | Autonomy | Autonomy | 
-## todo here a feed back loop here to show the Autonomy of context RAG.
+class EmbeddingResponse(BaseModel):
+    object: str = "list"
+    data: List[dict]
+    model: str
+    usage: dict
 
-## case 3 test with memory(no relative case)
-# | 3 | n/A | Autonomy(enable by default) | New Thread(Autonomy) | Autonomy | 
-## case 4 test with hit
-# | 4 | n/A | Autonomy(enable by default) | by config | Autonomy |
-## case 5 test with context hit
-# | 5 | n/A | by config | n/A | Autonomy |
+@app.post("/embeddings", response_model=EmbeddingResponse)
+async def get_embeddings(req: EmbeddingRequest):
+    # 统一转为列表
+    texts = [req.input] if isinstance(req.input, str) else req.input
+    
+    # 2. 编码
+    embeddings = model.encode(texts, normalize_embeddings=True)  # shape: (n, dim)
+    
+    # 3. 计算 token 数（用模型自带 tokenizer）
+    tokenizer = model.tokenizer
+    encoded = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
+    token_counts = encoded["attention_mask"].sum(dim=1).tolist()
+    
+    # 4. 构建 OpenAI 格式响应
+    data = []
+    for i, (emb, tc) in enumerate(zip(embeddings, token_counts)):
+        data.append({
+            "object": "embedding",
+            "embedding": emb.tolist(),
+            "index": i
+        })
+    
+    total_tokens = sum(token_counts)
+    return EmbeddingResponse(
+        data=data,
+        model=req.model,
+        usage={
+            "prompt_tokens": total_tokens,
+            "total_tokens": total_tokens
+        }
+    )
 
-## Get result from server
\ No newline at end of file
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=9080)
+```
diff --git a/scl/embeddings/embedding.py b/scl/embeddings/embedding.py
index 86b8030..079f73e 100644
--- a/scl/embeddings/embedding.py
+++ b/scl/embeddings/embedding.py
@@ -53,8 +53,7 @@ def __init__(self):
         # a backend that lacks a real configuration value.
         self._local_available = bool(getattr(config, "embedding_local_model_path", None))
         self._web_available = bool(
-            getattr(config, "embedding_api_key", None)
-            and getattr(config, "embedding_base_url", None)
+            getattr(config, "embedding_base_url", None)
         )
         if self._local_available:
             from scl.embeddings.local_embedding import LocalEmbeddingClient
@@ -104,7 +103,7 @@ def embed(self, text):
         if self._web_available:
             self.logger.info("Using web API for embedding")
             embedding = self.web_client.embed(text)
-            self.cache.set(text, embedding)
+            #self.cache.set(text, embedding)
             self._counter.add(1, {"source": "web"})
             return embedding
 
diff --git a/scl/embeddings/web_embedding.py b/scl/embeddings/web_embedding.py
index 24a6900..472831a 100644
--- a/scl/embeddings/web_embedding.py
+++ b/scl/embeddings/web_embedding.py
@@ -51,7 +51,7 @@ def _init_subclass(self):
     @tracer.start_as_current_span("embed")
     def embed(self, text):
         """Call the web API and return the embedding vector."""
-        time.sleep(5)  # rate limit / timeout guard
+        #time.sleep(5)  # rate limit / timeout guard
         self.logger.info("Web embedding request (text length: %d)", len(text))
         self.logger.debug("Full text: %s", text)
 

From f846d0be90e6d3b394c7fa8aa291b210cb9eed69 Mon Sep 17 00:00:00 2001
From: SamYuan1990 <yy19902439@126.com>
Date: Thu, 4 Jun 2026 13:52:04 +0800
Subject: [PATCH 2/3] nit fix for lint

Signed-off-by: SamYuan1990 <yy19902439@126.com>
---
 scl/embeddings/web_embedding.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/scl/embeddings/web_embedding.py b/scl/embeddings/web_embedding.py
index 472831a..a7f323b 100644
--- a/scl/embeddings/web_embedding.py
+++ b/scl/embeddings/web_embedding.py
@@ -7,9 +7,6 @@
 Generic OpenAI‑compatible embedding client (used as the final fallback).
 Does NOT maintain a local cache – that is handled by the coordinator.
 """
-
-import time
-
 from openai import OpenAI
 
 from scl.embeddings.base_embedding import BaseEmbeddingClient

From 9cfe75ca282c93612659db86db68addbd4967f49 Mon Sep 17 00:00:00 2001
From: SamYuan1990 <yy19902439@126.com>
Date: Thu, 4 Jun 2026 13:53:54 +0800
Subject: [PATCH 3/3] lint fix

Signed-off-by: SamYuan1990 <yy19902439@126.com>
---
 scl/embeddings/embedding.py     | 6 ++----
 scl/embeddings/web_embedding.py | 3 ++-
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/scl/embeddings/embedding.py b/scl/embeddings/embedding.py
index 079f73e..6f61646 100644
--- a/scl/embeddings/embedding.py
+++ b/scl/embeddings/embedding.py
@@ -52,9 +52,7 @@ def __init__(self):
         # We use getattr + bool check so a default-None attribute doesn't activate
         # a backend that lacks a real configuration value.
         self._local_available = bool(getattr(config, "embedding_local_model_path", None))
-        self._web_available = bool(
-            getattr(config, "embedding_base_url", None)
-        )
+        self._web_available = bool(getattr(config, "embedding_base_url", None))
         if self._local_available:
             from scl.embeddings.local_embedding import LocalEmbeddingClient
 
@@ -103,7 +101,7 @@ def embed(self, text):
         if self._web_available:
             self.logger.info("Using web API for embedding")
             embedding = self.web_client.embed(text)
-            #self.cache.set(text, embedding)
+            # self.cache.set(text, embedding)
             self._counter.add(1, {"source": "web"})
             return embedding
 
diff --git a/scl/embeddings/web_embedding.py b/scl/embeddings/web_embedding.py
index a7f323b..17fd012 100644
--- a/scl/embeddings/web_embedding.py
+++ b/scl/embeddings/web_embedding.py
@@ -7,6 +7,7 @@
 Generic OpenAI‑compatible embedding client (used as the final fallback).
 Does NOT maintain a local cache – that is handled by the coordinator.
 """
+
 from openai import OpenAI
 
 from scl.embeddings.base_embedding import BaseEmbeddingClient
@@ -48,7 +49,7 @@ def _init_subclass(self):
     @tracer.start_as_current_span("embed")
     def embed(self, text):
         """Call the web API and return the embedding vector."""
-        #time.sleep(5)  # rate limit / timeout guard
+        # time.sleep(5)  # rate limit / timeout guard
         self.logger.info("Web embedding request (text length: %d)", len(text))
         self.logger.debug("Full text: %s", text)