From eecbef16d156509436717dc42986b001363bc5d0 Mon Sep 17 00:00:00 2001 From: daehee Date: Tue, 7 Apr 2026 18:11:53 +0900 Subject: [PATCH 01/14] =?UTF-8?q?feat:=20Layer=204=20RPC=20=ED=8C=A8?= =?UTF-8?q?=ED=84=B4=20=EA=B0=90=EC=A7=80=20+=20=EB=8F=99=EC=A0=81=20prefi?= =?UTF-8?q?x=20=EA=B0=90=EC=A7=80=20+=20UTF-8=20=EC=9D=B8=EC=BD=94?= =?UTF-8?q?=EB=94=A9=20fix?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - RPC-style API에서 verb-resource 기반 CRUD workflow 관계 감지 (Layer 4) - DTO 타입 매칭으로 cross-controller COMPLEMENTARY 관계 감지 - _group_by_resource: 하드코딩 대신 동적 prefix threshold로 버전/라우팅 prefix 자동 스킵 - _detect_rpc_patterns를 _detect_rpc_crud_workflows + _detect_rpc_dto_links로 분리 - serialization.py: save_graph에 encoding="utf-8" 추가 --- graph_tool_call/analyze/dependency.py | 257 +++++++++++++++++++++++++- graph_tool_call/serialization.py | 2 +- 2 files changed, 249 insertions(+), 10 deletions(-) diff --git a/graph_tool_call/analyze/dependency.py b/graph_tool_call/analyze/dependency.py index b6d42d7..552ebbe 100644 --- a/graph_tool_call/analyze/dependency.py +++ b/graph_tool_call/analyze/dependency.py @@ -79,6 +79,7 @@ def detect_dependencies( relations.extend(_detect_structural(tools, spec)) relations.extend(_detect_name_based(tools)) relations.extend(_detect_cross_resource(tools)) + relations.extend(_detect_rpc_patterns(tools)) relations = _deduplicate(relations) relations = [r for r in relations if r.confidence >= min_confidence] relations.sort(key=lambda r: r.confidence, reverse=True) @@ -131,17 +132,59 @@ def _is_single_resource_path(path: str) -> bool: def _group_by_resource(tools: list[ToolSchema]) -> dict[str, list[ToolSchema]]: """Group tools that have ``method`` and ``path`` metadata by their base resource. - The base resource is the first non-param path segment (e.g. ``/pets``). + The base resource is the first *meaningful* non-param path segment. + A segment is considered a non-meaningful prefix when it groups more than + ``_PREFIX_THRESHOLD`` percent of all tools — this handles version prefixes + (``/v1``, ``/v2``), routing prefixes (``/api``, ``/rest``), etc. without + requiring a hardcoded list. """ + _PREFIX_THRESHOLD = 0.4 # if a segment covers >40% of tools, it's a prefix + + api_tools = [ + t for t in tools + if t.metadata.get("path") and t.metadata.get("method") + ] + if not api_tools: + return {} + + total = len(api_tools) + + # Collect static segments per tool + tool_segments: list[tuple[ToolSchema, list[str]]] = [] + for tool in api_tools: + segs = [s for s in tool.metadata["path"].split("/") if s and not s.startswith("{")] + tool_segments.append((tool, segs)) + + # Determine max depth to scan for prefixes (usually 1-2 levels) + max_depth = max((len(segs) for _, segs in tool_segments), default=1) + + # Find how many prefix levels to skip: + # walk from depth 0 and keep skipping while the segment at that depth + # covers >threshold of all tools + skip_depth = 0 + for depth in range(min(max_depth, 4)): # cap at 4 to avoid pathological cases + counter: dict[str, int] = {} + for _, segs in tool_segments: + if depth < len(segs): + counter.setdefault(segs[depth], 0) + counter[segs[depth]] += 1 + if not counter: + break + most_common_count = max(counter.values()) + if most_common_count / total > _PREFIX_THRESHOLD: + skip_depth = depth + 1 + else: + break + + # Group by the segment at skip_depth groups: dict[str, list[ToolSchema]] = {} - for tool in tools: - path = tool.metadata.get("path") - method = tool.metadata.get("method") - if not path or not method: - continue - # base resource = first static segment of the path - segments = [s for s in path.split("/") if s and not s.startswith("{")] - base = "/" + segments[0] if segments else "/" + for tool, segs in tool_segments: + if skip_depth < len(segs): + base = "/" + segs[skip_depth] + elif segs: + base = "/" + segs[-1] + else: + base = "/" groups.setdefault(base, []).append(tool) return groups @@ -611,6 +654,202 @@ def _detect_cross_resource(tools: list[ToolSchema]) -> list[DetectedRelation]: return relations +# --------------------------------------------------------------------------- +# Layer 4: RPC-style method name & DTO pattern detection +# --------------------------------------------------------------------------- + +# Maps leading verb in an RPC method name to a CRUD intent category. +_VERB_TO_INTENT: dict[str, str] = { + # read + "get": "read", "find": "read", "fetch": "read", "list": "read", + "search": "read", "select": "read", "load": "read", "read": "read", + "download": "read", + # write (create) + "save": "write", "create": "write", "add": "write", "insert": "write", + "register": "write", "regist": "write", + # update + "modify": "update", "update": "update", "edit": "update", + "change": "update", "patch": "update", + # delete + "delete": "delete", "remove": "delete", "cancel": "delete", + "withdraw": "delete", + # action (side-effect operations) + "process": "action", "execute": "action", "apply": "action", + "approve": "action", "reject": "action", "confirm": "action", + "accept": "action", "send": "action", "upload": "action", + "export": "action", +} + +# Trailing tokens in method names that describe the *view*, not the resource. +_NAME_SUFFIXES: frozenset[str] = frozenset({ + "list", "detail", "details", "info", "count", "excel", "popup", + "summary", "check", "data", "total", "all", "page", "download", +}) + +# Common DTO class-name suffixes that are not part of the resource identity. +_DTO_SUFFIXES: frozenset[str] = frozenset({ + "request", "response", "dto", "entity", "info", "base", + "api", "vo", "model", "form", "param", "result", "ml", +}) + +# CRUD workflow rules: (source_intent, target_intent, relation, same_ctrl_conf, cross_ctrl_conf) +# ``None`` for cross_ctrl_conf means the rule is skipped across controllers. +_WORKFLOW_RULES: list[tuple[str, str, RelationType, float, float | None]] = [ + ("read", "write", RelationType.REQUIRES, 0.9, 0.8), + ("update", "read", RelationType.REQUIRES, 0.85, 0.75), + ("delete", "read", RelationType.REQUIRES, 0.85, 0.75), + ("action", "read", RelationType.REQUIRES, 0.75, None), +] + + +def _same_controller(a: ToolSchema, b: ToolSchema) -> bool: + """Return True if both tools belong to the same (non-empty) controller.""" + ctrl_a = a.metadata.get("controller") or "" + ctrl_b = b.metadata.get("controller") or "" + return ctrl_a == ctrl_b != "" + + +def _extract_verb_and_resource(name: str) -> tuple[str, str]: + """Extract (verb, resource) from an RPC-style method name. + + ``getGoodsList`` → ``("get", "goods")`` + ``saveOptionCategoryList`` → ``("save", "optioncategory")`` + """ + tokens = _normalize_name(name) + if not tokens: + return "", "" + + verb = "" + resource_start = 0 + for i, tok in enumerate(tokens): + if tok in _VERB_TO_INTENT: + verb = tok + resource_start = i + 1 + break + + resource = "".join(t for t in tokens[resource_start:] if t not in _NAME_SUFFIXES) + return verb, resource + + +def _extract_dto_resource(type_name: str | None) -> str: + """Extract the resource root from a DTO class name. + + ``GoodsMgmtApiResponse`` → ``goodsmgmt`` + ``ClaimTargetRequest`` → ``claimtarget`` + """ + if not type_name: + return "" + tokens = _normalize_name(type_name) + return "".join(t for t in tokens if t not in _DTO_SUFFIXES) + + +def _detect_rpc_patterns(tools: list[ToolSchema]) -> list[DetectedRelation]: + """Detect relations for RPC-style APIs (Layer 4). + + Handles non-RESTful endpoints (e.g. ``/v1/goods/goodsMgmtApi/getGoodsList``) + where structural path analysis is ineffective. + + Two strategies: + 1. **Verb-resource grouping** — methods sharing the same resource token + form CRUD workflows with controller-scoped confidence. + 2. **DTO type matching** — methods sharing a request/response type across + controllers are marked COMPLEMENTARY. + """ + relations: list[DetectedRelation] = [] + relations.extend(_detect_rpc_crud_workflows(tools)) + relations.extend(_detect_rpc_dto_links(tools)) + return relations + + +def _detect_rpc_crud_workflows(tools: list[ToolSchema]) -> list[DetectedRelation]: + """Build CRUD workflow relations from verb-resource analysis.""" + relations: list[DetectedRelation] = [] + + # Group tools by extracted resource token. + resource_groups: dict[str, list[tuple[str, ToolSchema]]] = {} + for tool in tools: + verb, resource = _extract_verb_and_resource(tool.name) + if verb and resource: + resource_groups.setdefault(resource, []).append((verb, tool)) + + for resource, members in resource_groups.items(): + if len(members) < 2: + continue + + # Classify members by CRUD intent. + by_intent: dict[str, list[ToolSchema]] = {} + for verb, tool in members: + intent = _VERB_TO_INTENT.get(verb, "other") + by_intent.setdefault(intent, []).append(tool) + + # Apply workflow rules. + for src_intent, tgt_intent, rel_type, same_conf, cross_conf in _WORKFLOW_RULES: + for src in by_intent.get(src_intent, []): + for tgt in by_intent.get(tgt_intent, []): + if src.name == tgt.name: + continue + same = _same_controller(src, tgt) + if not same and cross_conf is None: + continue + relations.append(DetectedRelation( + source=src.name, + target=tgt.name, + relation_type=rel_type, + confidence=same_conf if same else cross_conf, # type: ignore[arg-type] + evidence=( + f"{src.name} ({src_intent}) → {tgt.name} ({tgt_intent})" + f" — resource '{resource}'" + ), + layer=4, + )) + + # Readers within same controller are SIMILAR_TO. + readers = by_intent.get("read", []) + for i, r1 in enumerate(readers): + for r2 in readers[i + 1:]: + if r1.name != r2.name and _same_controller(r1, r2): + relations.append(DetectedRelation( + source=r1.name, + target=r2.name, + relation_type=RelationType.SIMILAR_TO, + confidence=0.8, + evidence=f"{r1.name} ↔ {r2.name} — similar reads for '{resource}'", + layer=4, + )) + + return relations + + +def _detect_rpc_dto_links(tools: list[ToolSchema]) -> list[DetectedRelation]: + """Link tools that share a DTO type across controllers (COMPLEMENTARY).""" + relations: list[DetectedRelation] = [] + + # Group tools by normalised DTO resource name. + dto_groups: dict[str, list[ToolSchema]] = {} + for tool in tools: + for type_name in (tool.metadata.get("request_type"), tool.metadata.get("response_type")): + dto_res = _extract_dto_resource(type_name) + if len(dto_res) >= 4: + dto_groups.setdefault(dto_res, []).append(tool) + + for dto_res, members in dto_groups.items(): + if not 2 <= len(members) <= 20: + continue + for i, a in enumerate(members): + for b in members[i + 1:]: + if a.name != b.name and not _same_controller(a, b): + relations.append(DetectedRelation( + source=a.name, + target=b.name, + relation_type=RelationType.COMPLEMENTARY, + confidence=0.75, + evidence=f"{a.name} ↔ {b.name} — shared DTO '{dto_res}'", + layer=4, + )) + + return relations + + # --------------------------------------------------------------------------- # De-duplication # --------------------------------------------------------------------------- diff --git a/graph_tool_call/serialization.py b/graph_tool_call/serialization.py index cfa56ea..cac1c00 100644 --- a/graph_tool_call/serialization.py +++ b/graph_tool_call/serialization.py @@ -52,7 +52,7 @@ def save_graph( path = Path(path) try: path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(json.dumps(data, indent=2, ensure_ascii=False, default=str)) + path.write_text(json.dumps(data, indent=2, ensure_ascii=False, default=str), encoding="utf-8") except PermissionError: msg = f"Permission denied: {path}. Check directory permissions." raise PermissionError(msg) from None From fd113089ecc8240fb7aea5fcdd877099be9a7ce7 Mon Sep 17 00:00:00 2001 From: daehee Date: Fri, 24 Apr 2026 10:38:01 +0900 Subject: [PATCH 02/14] =?UTF-8?q?feat:=20plan-and-execute=20=EC=95=84?= =?UTF-8?q?=ED=82=A4=ED=85=8D=EC=B2=98=20=EA=B8=B0=EB=B0=98=20=EB=A0=88?= =?UTF-8?q?=EC=9D=B4=EC=96=B4=20(L0=20+=20Stage=203)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase A — L0 Knowledge Base - graph_tool_call.ingest.io_contract: swagger schema → produces/consumes leaf 필드 결정론적 추출 - graph_tool_call.ontology.llm_provider: enrich_tool_semantics 메서드 (per-tool semantic 주석), ToolEnrichment / FieldSemantic / PairHint dataclass, max_tokens 명시, reference_tools 분리 - graph_tool_call.tool_graph: search_tools gateway 가 prerequisites / relations 를 LLM 에 노출 (retrieve_with_scores 사용) Phase B — Stage 3 Plan Runner - graph_tool_call.plan: Plan / PlanStep / ExecutionTrace 스키마 - graph_tool_call.plan.binding: BindingResolver (dotted + [N] + native type 보존) - graph_tool_call.plan.runner: PlanRunner (streaming + non-streaming) - tests: 29/29 pass (binding 21 + runner 8) docs/architecture-plan-and-execute.md: 5-layer 설계 문서. --- docs/architecture-plan-and-execute.md | 830 +++++++++++++++++++++++ graph_tool_call/execute/http_executor.py | 7 +- graph_tool_call/ingest/io_contract.py | 345 ++++++++++ graph_tool_call/ingest/openapi.py | 108 ++- graph_tool_call/langchain/gateway.py | 101 ++- graph_tool_call/net.py | 31 +- graph_tool_call/ontology/llm_provider.py | 242 ++++++- graph_tool_call/plan/__init__.py | 62 ++ graph_tool_call/plan/binding.py | 165 +++++ graph_tool_call/plan/runner.py | 342 ++++++++++ graph_tool_call/plan/schema.py | 80 +++ graph_tool_call/tool_graph.py | 215 +++++- 12 files changed, 2489 insertions(+), 39 deletions(-) create mode 100644 docs/architecture-plan-and-execute.md create mode 100644 graph_tool_call/ingest/io_contract.py create mode 100644 graph_tool_call/plan/__init__.py create mode 100644 graph_tool_call/plan/binding.py create mode 100644 graph_tool_call/plan/runner.py create mode 100644 graph_tool_call/plan/schema.py diff --git a/docs/architecture-plan-and-execute.md b/docs/architecture-plan-and-execute.md new file mode 100644 index 0000000..caca509 --- /dev/null +++ b/docs/architecture-plan-and-execute.md @@ -0,0 +1,830 @@ +# Plan-and-Execute Architecture + +> 작성: 2026-04-22, 업데이트: 2026-04-23 +> 상태: 확정 (설계) / 미구현 +> 범위: graph-tool-call 라이브러리 + xgen-workflow 통합 + +## 변경 이력 + +- **2026-04-23**: 설계 간소화 + - Ingest 시 embedding + Qdrant 저장 **삭제** (YAGNI). Field 이름 exact match 로 충분, cross-field synonym 은 LLM enrichment 가 해결 + - L0 에 **LLM per-tool enrichment (Pass 2)** 도입. graph-tool-call 이 이미 보유한 `OntologyLLM` 추상화 활용 + - Stage 1 retrieval 은 기존 BM25 + graph (graph-tool-call retrieval) 재사용. embedding prefilter 생략 + - Knowledge Base 가 **두 층** 으로 명확화: (A) 결정론적 파서 / (B) LLM semantic enrichment + +--- + +## 0. 한 쪽 요약 + +**문제:** 현재 LLM-as-orchestrator (ReAct) 는 요청당 15 iteration × ~15KB context = **30초, 225KB 토큰**. 비용·지연·품질 모두 구조적 한계. + +**해결:** **사전 지식 (graph + schemas + ingest 시 LLM 의미 주석)** 을 최대한 활용하고, runtime LLM 은 자연어 ↔ 구조 변환에만 사용하는 **5-layer 아키텍처** (L0 Knowledge Base + Stage 1~4 Runtime). + +**기대 효과:** +- LLM 호출 15 → 2~3회 +- Context 225KB → ~2~3KB (**~75배 감소**) +- Latency 30초 → 2~5초 (**~10배 개선**) +- 실행 단계 재현성, 감사 가능성 확보 +- 확장 축 확보 (fan-out, template, interactive) + +--- + +## 1. 설계 원칙 + +| # | 원칙 | 의미 | +|---|---|---| +| 1 | 사전 지식 최대 활용 | graph, schemas, embeddings 는 offline 구축 후 영속. 요청 처리 시 재계산 금지 | +| 2 | LLM 은 semantic bridge 에만 | 자연어 이해 / 의미 추출 / 자연어 생성 — 그 외 결정론 | +| 3 | 결정 가능한 것은 결정론적으로 | 매칭·순서·바인딩은 알고리즘. LLM 폴백은 **실패한 결정론의 보완** | +| 4 | 각 단계는 독립 입출력 계약 | 테스트·캐싱·디버깅·부분 교체 가능 | +| 5 | 하드코딩은 "학습된 지식" 으로 대체 | synonym → embedding cluster, verb → intent classifier | +| 6 | Failure mode 관측 가능 | 어느 stage 에서 왜 실패했는지 항상 명확해야 함 | + +--- + +## 2. 시스템 개요 + +``` +╔═══════════════════════════════════════════════════════════════╗ +║ OFFLINE / INGEST TIME ║ +║ ┌─────────────────────────────────────────────────────────┐ ║ +║ │ L0. KNOWLEDGE BASE │ ║ +║ │ │ ║ +║ │ Swagger → ToolSchema + Tool Embeddings + │ ║ +║ │ IO Contract + Tool Graph │ ║ +║ │ │ ║ +║ │ 저장: api_tool_collections.graph (JSONB) │ ║ +║ │ api_tool_collections.embeddings (pgvector) │ ║ +║ │ api_tool_collections.io_contracts (JSONB) │ ║ +║ └─────────────────────────────────────────────────────────┘ ║ +╚═══════════════════════════════════════════════════════════════╝ + │ + ▼ (요청 도착) +╔═══════════════════════════════════════════════════════════════╗ +║ REQUEST TIME PIPELINE ║ +║ ║ +║ requirement (자연어) ║ +║ │ ║ +║ ▼ ║ +║ ┌──────────────────────────────────────────────────────┐ ║ +║ │ STAGE 1. RETRIEVAL + TARGET SELECTION │ ║ +║ │ (a) embedding prefilter: 108 → top-20 │ ║ +║ │ (b) LLM pick: 20개 catalog → target + entities │ ║ +║ │ context: ~1KB │ LLM: 1회 │ ║ +║ └────────────────┬─────────────────────────────────────┘ ║ +║ │ ║ +║ ▼ ║ +║ ┌──────────────────────────────────────────────────────┐ ║ +║ │ STAGE 2. PATH SYNTHESIZER │ ║ +║ │ (결정론) target 의 consumes → IO Contract 역추적 │ ║ +║ │ → DAG 구성 + argument bindings │ ║ +║ │ context: — │ LLM: 0회 │ ║ +║ └────────────────┬─────────────────────────────────────┘ ║ +║ │ ║ +║ ┌─────────┴─────────┐ ║ +║ │ │ ║ +║ 확정 plan 모호 (2+ 경로) ║ +║ │ │ ║ +║ │ ▼ ║ +║ │ ┌────────────────────────────────────────┐ ║ +║ │ │ (조건부) DISAMBIGUATION │ ║ +║ │ │ context: ~2KB (후보만) │ LLM: 1회 │ ║ +║ │ └────────────┬───────────────────────────┘ ║ +║ │ │ ║ +║ └───────────────────┘ ║ +║ │ ║ +║ ▼ ║ +║ ┌──────────────────────────────────────────────────────┐ ║ +║ │ STAGE 3. RUNNER │ ║ +║ │ (결정론) DAG topological 실행 │ ║ +║ │ JsonPath 치환 + tool_executor HTTP │ ║ +║ │ step 단위 streaming event │ ║ +║ │ context: — │ LLM: 0회 │ ║ +║ └────────────────┬─────────────────────────────────────┘ ║ +║ │ ║ +║ ▼ ║ +║ ┌──────────────────────────────────────────────────────┐ ║ +║ │ STAGE 4. RESPONSE SYNTHESIS │ ║ +║ │ execution trace (요약) → 자연어 응답 │ ║ +║ │ context: ~1KB │ LLM: 1회 │ ║ +║ └────────────────┬─────────────────────────────────────┘ ║ +║ │ ║ +║ ▼ ║ +║ 최종 답변 ║ +╚═══════════════════════════════════════════════════════════════╝ +``` + +**일반 케이스 예산:** LLM 2회, context ~2KB, 2~4초. +**모호 케이스:** LLM 3회, context ~4KB, 4~6초. + +--- + +## 3. L0 — Knowledge Base + +ingest 1회. 영속 저장. 요청 처리에서 재계산 금지. + +**두 층 구조:** +- **Pass 1 — Deterministic parser**: Swagger 의 구조적 사실 (schema, HTTP, dependency) 추출. LLM 금지. +- **Pass 2 — Semantic enrichment**: Description 등을 LLM 이 읽고 의미 주석 (언제 써, 무엇을 내놓는다, 누구와 쌍을 이룬다). graph-tool-call 의 `OntologyLLM` 추상화 재사용. + +### 3.1 ToolSchema (Pass 1, 기존 확장) + +기존 `tools` 테이블. 추가 필드는 아래 섹션들이 채움. + +| 필드 | 설명 | 출처 | +|---|---|---| +| `function_id` | 컬렉션 범위 고유 slug | 파서 | +| `function_name` | 원본 operationId | 파서 | +| `description` | summary + description + tags | 파서 | +| `api_url`, `api_method`, `api_header`, `api_body` | 실행용 | 파서 | +| `metadata` | method/path/base_url/tags/response_schema/controller/request_type/response_type | 파서 | +| `ai_metadata` | canonical_action, primary_resource, when_to_use, pairs_well_with 등 | **Pass 2 (LLM)** | + +### 3.2 IO Contract (Pass 1, 결정론) + +각 tool 의 **필드 수준 produces/consumes** 를 swagger schema 에서 기계적으로 추출. + +**저장:** 신규 테이블 `tool_io_contracts`: +```sql +CREATE TABLE tool_io_contracts ( + tool_id VARCHAR(100) REFERENCES tools(function_id), + direction VARCHAR(10) CHECK (direction IN ('produces', 'consumes')), + json_path TEXT, -- $.body.goods[*].goodsNo (produces) + -- goodsNo (consumes) + field_name VARCHAR(100), -- goodsNo + field_type VARCHAR(40), -- integer, string, object + required BOOLEAN, -- consumes 에 한함 + semantic_tag VARCHAR(80) -- Pass 2 LLM 이 채움 (빈 값 허용) +); +``` + +**추출 프로세스 (LLM 없음):** +``` +for each tool in schemas: + request_leaves = walk_schema_leaves(tool.request_schema) + response_leaves = walk_schema_leaves(tool.response_schema) + + for each leaf in request_leaves: + insert consumes (field_name, type, required) + + for each leaf in response_leaves: + insert produces (json_path, field_name, type) +``` + +**1차 매칭: exact field name + type** — 동일 swagger 내 field 이름 규약 보통 일관. 이걸로 대부분의 엣지 생성. + +```python +# 결정론적 field match edge +for A in tools: + for p in A.produces: + for B in tools: + if A == B: continue + for c in B.consumes.required: + if p.field_name == c.field_name and p.type == c.type: + graph.add_edge(A, B, "produces_for", + binding={c.field_name: p.json_path}) +``` + +### 3.3 Semantic Enrichment (Pass 2, LLM) + +**목적:** Description 등의 비정형 정보를 LLM 이 해석해 의미 주석 추가. 하드코딩된 verb 사전 / synonym 테이블 **완전 대체**. + +**인프라:** graph-tool-call 에 이미 있는 `OntologyLLM` 활용 ([graph_tool_call/ontology/llm_provider.py](graph_tool_call/ontology/llm_provider.py)). + +**이미 제공되는 메서드:** +- `infer_relations(tools)` — LLM 기반 관계 추론 +- `suggest_categories(tools)` — 카테고리 그룹핑 +- `verify_relations(relations, tools)` — 휴리스틱 엣지 검증 / 거르기 +- `suggest_missing(tools, existing)` — 빠진 엣지 제안 +- `enrich_keywords(tools)` — BM25 향상용 키워드 +- `generate_example_queries(tools)` — 임베딩 매칭용 예시 쿼리 + +**신규 메서드 (추가 구현):** +```python +class OntologyLLM: + def enrich_tool_semantics( + self, tools: list[ToolSummary], batch_size: int = 10, + ) -> dict[str, ToolEnrichment]: + """Per-tool 의미 주석 (action, resource, use-when, semantic tags, pairs).""" +``` + +**ToolEnrichment 스키마:** +```typescript +type ToolEnrichment = { + canonical_action: "search" | "read" | "create" | "update" | "delete" | "action"; + primary_resource: string; // 정규화 리소스명 (예: "product") + one_line_summary: string; // 한 줄 요약 (Stage 1 catalog 용) + when_to_use: string; // 언제 쓰는지 + when_not_to_use?: string; // 쓰면 안 되는 경우 + produces_semantics: Array<{ // 의미 태깅된 produces + semantic: string; // "product_id" 같은 canonical + json_path: string; // 실제 경로 + }>; + consumes_semantics: Array<{ + semantic: string; + field: string; + }>; + pairs_well_with: Array<{ // 함께 / 순서대로 쓰이는 도구들 + tool: string; + reason: string; + }>; +} +``` + +**Prompt 예시:** +``` +You are annotating an API tool for a planning system. + +Tool: seltSearchProduct +Summary: 상품 검색 +Description: 키워드로 상품을 검색하는 API입니다. ... +HTTP: GET /v1/search/product +Request fields: [searchWord, langCd, siteNo, sort, ...] +Response fields: [$.body.goods[*].goodsNo, $.body.goods[*].goodsName, ...] + +Produce JSON with: +- canonical_action (search|read|create|update|delete|action) +- primary_resource (one word like "product", "order", "user") +- one_line_summary (Korean, within 40 chars) +- when_to_use (1~2 sentences) +- produces_semantics: map internal field names to semantic ids like "product_id" +- pairs_well_with: 2~3 related tools with brief reason + +Output JSON only. +``` + +**저장:** +- `tools.ai_metadata` JSONB 컬럼 (전체 enrichment 덤프) +- `tool_io_contracts.semantic_tag` (produces_semantics / consumes_semantics 의 semantic 을 해당 row 에 매핑) + +**재실행 조건:** swagger 변경, LLM 모델 업그레이드, 관리자 강제 재생성. 일상 요청 처리와 **분리**. + +### 3.4 Tool Graph (재정의) + +엣지 타입: + +| 엣지 | 근거 | 신뢰도 | 용도 | +|---|---|---|---| +| `produces_for` (exact) | Pass 1 — field name + type 일치 | high | Stage 2 주 신호 | +| `produces_for` (semantic) | Pass 2 — `semantic_tag` 일치 | medium | Pass 1 이 못 잡는 교차 명명 (cross-collection 등) | +| `pairs_with` | Pass 2 — `pairs_well_with` 에서 | medium | Stage 1 catalog 힌트, Stage 2 보조 | +| `similar_to` | 구조적 (같은 controller / tag / CRUD 역할) | low | Disambiguation 후보 확장 | +| `precedes` | 구조적 (POST → GET single 등) | low | 레거시 엣지, 보조 힌트 | + +**기존 하드코딩 반응성 패치 (selt, synonym clusters, *No/*Seq heuristic, search-bridge exception) 는 Pass 2 완성 시 모두 제거.** Pass 1 field exact match + Pass 2 LLM enrichment 가 그 역할을 대체. + +### 3.5 Ingest 파이프라인 + +```python +# xgen-workflow 측 +def ingest_collection(collection_id, spec_source, llm_config): + from graph_tool_call.ontology.llm_provider import wrap_llm + from graph_tool_call.ingest.openapi import parse_operations + + # Pass 1: 결정론 + schemas = parse_operations(spec_source) + io_contracts = extract_io_contracts(schemas) # 3.2 + graph = build_structural_edges(schemas, io_contracts) # 3.4 + + # Pass 2: LLM (옵션) + if llm_config.enabled: + llm = wrap_llm(build_llm_spec(llm_config)) + enrichments = llm.enrich_tool_semantics(schemas) + apply_semantic_tags(io_contracts, enrichments) # semantic_tag 채움 + graph = augment_with_semantic_edges(graph, enrichments) + + store_all(schemas, io_contracts, graph, enrichments) +``` + +**옵션:** Pass 2 는 `llm_config.enabled=False` 로 **생략 가능**. Pass 1 만으로도 기본 동작은 가능 (품질은 낮음). + +### 3.6 xgen-workflow 통합 + +xgen 은 이미 agent 노드에서 provider/model/api_key 선택 지원. Ingest 시에도 동일 config 재사용: + +```python +# xgen-workflow: api_tool_collection/service.py +def refresh_with_enrichment(collection_id, llm_settings): + llm_spec = f"{llm_settings.provider}/{llm_settings.model}" + # "openai/gpt-4.1-mini" + + # api_key 는 env 또는 xgen secret store 에서 + os.environ["OPENAI_API_KEY"] = xgen_secret.get(user_id, "openai") + + ingest_collection(collection_id, spec_source, LLMConfig( + enabled=True, + spec=llm_spec, + )) +``` + +graph-tool-call 은 xgen 에 의존하지 않음. xgen 이 config 주는 쪽, graph-tool-call 이 받는 쪽. + +--- + +## 4. Stage 1 — Retrieval + Target Selection + +**입력:** `requirement: str` + +**출력:** +```json +{ + "target": "seltProductDetailInfo", + "confidence": 0.92, + "entities": { + "keyword": "quarzen 티셔츠", + "locale": "ko" + }, + "output_shape": "single", + "reasoning": "..." +} +``` + +### 4.1 알고리즘 + +**(a) Retrieval prefilter (결정론):** graph-tool-call 의 기존 `retrieve_with_scores()` 그대로 사용. +```python +candidates = tg.retrieve_with_scores(requirement, top_k=20) +# BM25 + graph + (optional) annotation 채널 +``` +embedding prefilter 는 생략. 기존 BM25 + graph 가 top-20 recall 을 충분히 내는 것을 실측으로 확인 (x2bee `"product search"` → `seltSearchProduct` top-10 안에 들어옴). + +향후 recall 부족 증거가 나오면 embedding 채널을 **그때** 연결. 지금은 YAGNI. + +**(b) LLM structured pick:** +- 20개의 catalog 에 **ai_metadata 포함**: + ``` + { + function_name, + description[:80], + one_line_summary, // Pass 2 에서 생성 + when_to_use, // Pass 2 + pairs_well_with // Pass 2 (이름만) + } + ``` +- system prompt: "고른 target 1개와 추출한 entities 를 반환" +- OpenAI structured output (JSON schema 강제) + +**context 크기:** 20 × 200자 ≈ 4KB (ai_metadata 포함 확장). ai_metadata 없을 땐 20 × 100자 ≈ 2KB. + +### 4.2 오류 처리 + +- Retrieval 이 top-20 모두 low score 면 → "적합한 도구 없음" 에러. 사용자 재질의 유도. +- LLM 이 JSON schema 위반 시 → 1회 retry. 실패하면 fallback: top-1 embedding 결과로 진행 (entities 는 빈 dict). + +### 4.3 Stage 1 의 성능 지표 +- Target 정확도 (샘플 요구사항 N개에 대해 "맞는 target 선정" 비율) +- Entity 추출 재현율 +- LLM 응답 latency p50/p95 + +--- + +## 5. Stage 2 — Path Synthesizer + +**입력:** Stage 1 output (`target`, `entities`) +**출력:** Plan (Plan 스키마는 §9 참조) OR "ambiguous" 플래그 (Disambiguation 발동) + +### 5.1 DAG 구성 알고리즘 (Bottom-up) + +```python +def synthesize(target, entities, collection_defaults): + plan = {"steps": [], "output_binding": None} + context = entities | collection_defaults # 이미 아는 값들 + + needed = target.consumes.required_only() # 필수 입력만 먼저 + resolved = {} # {field: source_step_id} + pending = list(needed) + visited = set() + + while pending: + field = pending.pop(0) + if field.semantic_tag in available_tags(context, resolved): + resolved[field.name] = bind_from_available(field, context, resolved) + continue + + # graph 에서 이 semantic 을 produces 하는 tool 찾기 + producers = graph.producers_of(field.semantic_tag) + if not producers: + raise UnsatisfiableFieldError(field) + + # 후보 여러 개면 "ambiguous" 로 분기 (Stage 3 LLM) + if len(producers) > 1 and not strictly_better(producers): + return AmbiguousPlan(target, candidates=producers) + + # prerequisite 추가 (재귀) + producer = producers[0] + if producer.name in visited: + raise CyclicDependencyError + visited.add(producer.name) + + step = build_step(producer) + plan.steps.insert(0, step) # 앞쪽에 삽입 (위상 순서) + + # producer 의 consumes 를 다시 확인 + pending.extend(producer.consumes.required_only()) + + # target 을 마지막 step 으로 추가 + plan.steps.append(build_step(target, bindings=resolved)) + plan.output_binding = f"$.{target.step_id}.body" + + return plan +``` + +### 5.2 "strictly_better" 판단 + +여러 producer 후보 중: +- IO Contract confidence 높은 순 +- 경로 짧은 순 (재귀 depth) +- similar_to weight 높은 순 (requirement 와 가까운) +- 모두 비슷하면 → Ambiguous 플래그 + +### 5.3 초기 버전 범위 + +- **선형 chain** (각 step 1회 호출): 지원 +- **다중 참조** (한 step 이 이전 N개 step 의 출력 조합): 지원 +- **Fan-out** (배열 전체 loop): **초기 범위 밖** — §10 확장 포인트 +- **조건 분기** (if/else): **초기 범위 밖** + +### 5.4 실패 경로 + +| 케이스 | 반환 | +|---|---| +| 필수 field 해소 불가 | `UnsatisfiableFieldError` — Stage 4 에 그대로 reveal | +| 순환 의존 | `CyclicDependencyError` — 보고 | +| 복수 경로 | `AmbiguousPlan` — Disambiguation 발동 | + +--- + +## 6. Disambiguation (조건부) + +**발동 조건:** Stage 2 가 `AmbiguousPlan` 반환. + +**입력:** 후보 경로 2~N개 각각의 요약 +``` +후보 A: seltSearchProduct → seltProductDetailInfo +후보 B: getCategoryList → seltSearchProduct → seltProductDetailInfo +``` + +**LLM 호출:** +- system: "요구사항에 가장 맞는 경로 1개를 고르고 이유를 설명" +- user: requirement + 후보 경로 설명 +- structured output: `{"chosen": "A", "reason": "..."}` + +**context:** ~2KB + +--- + +## 7. Stage 3 — Runner + +**입력:** 확정 Plan + +**동작:** +```python +async def run(plan: Plan): + context = {} # step_id → result + trace = ExecutionTrace(plan=plan) + + for step in topological_order(plan.steps): + resolved_args = resolve_bindings(step.args, context) + + trace.emit("step.start", step_id=step.id, args=resolved_args) + + try: + result = await tool_executor.execute( + function_id=step.tool_function_id, + args=resolved_args, + timeout=step.timeout or 30, + ) + except ToolExecutionError as e: + trace.emit("step.error", step_id=step.id, error=str(e)) + return trace.fail(step.id, e) + + context[step.id] = result + trace.emit("step.done", step_id=step.id, output_preview=preview(result)) + + final = jsonpath_extract(context, plan.output_binding) + trace.emit("plan.done", output=final) + return trace.success(final) +``` + +### 7.1 Argument 바인딩 치환 + +바인딩 syntax: `${step_id.json_path}` — JsonPath 표준 사용 (jsonpath-ng 라이브러리). + +``` +args = {"goodsNo": "${s1.body.goods[0].goodsNo}", + "langCd": "ko"} +context = {"s1": {"body": {"goods": [{"goodsNo": 12345, ...}]}}} +→ resolved = {"goodsNo": 12345, "langCd": "ko"} +``` + +### 7.2 에러 / 재시도 정책 (초기 버전) + +| 에러 유형 | 동작 | +|---|---| +| HTTP 4xx | fail fast, trace 에 응답 body 포함 | +| HTTP 5xx | 최대 2회 재시도 (exponential backoff) | +| 타임아웃 | fail fast | +| JsonPath 미스 | fail fast — "step sX 의 bindings 가 실제 응답 구조와 불일치: [list of missing paths]" | +| Schema 검증 실패 | fail fast | + +**재계획 (re-plan) 은 v1 범위 밖.** 실패 시 Stage 4 가 사용자에게 설명. + +### 7.3 스트리밍 + +각 step 단위로 이벤트 emit. UI 는 step 단위 진행 상황 표시. + +--- + +## 8. Stage 4 — Response Synthesis + +**입력:** requirement + ExecutionTrace + +**동작:** +```python +def synthesize_response(requirement, trace): + if trace.success: + # 최종 output 의 관련 필드만 추림 (schema-aware projection) + relevant = project_relevant_fields(trace.output, requirement) + prompt = f""" + 요구사항: {requirement} + 실행 결과 요약: {relevant} + 사용자에게 자연스럽게 답변. + """ + else: + prompt = f""" + 요구사항: {requirement} + 실행 중 실패: step={trace.failed_step}, 이유={trace.error} + 부분 결과: {trace.partial_results} + 사용자에게 무엇이 됐고 무엇이 안 됐는지 설명. + """ + return llm.complete(prompt) +``` + +**context:** 요약된 결과 기준 ~1KB. 전체 response 를 그대로 넘기지 않음 — `project_relevant_fields` 가 requirement 에 관련된 필드만 추림. + +--- + +## 9. 핵심 데이터 계약 + +### 9.1 Intent Schema (Stage 1 출력) + +```typescript +type Intent = { + target: string; // function_name + confidence: number; // 0.0 ~ 1.0 + entities: Record; // {keyword: "...", locale: "ko", ...} + output_shape: "single" | "list" | "count"; + reasoning?: string; // 디버그용 +} +``` + +### 9.2 Plan Schema (Stage 2 출력) + +```typescript +type Plan = { + id: string; // uuid (캐시 키 포함) + goal: string; // Intent 의 요약 + steps: PlanStep[]; + output_binding: string; // JsonPath "$.s2.body" 등 + metadata: { + created_at: string; + target: string; + disambiguation_used: boolean; + }; +} + +type PlanStep = { + id: string; // "s1", "s2", ... + tool: string; // function_name + tool_function_id: string; // DB 룩업용 slug + args: Record; // {"goodsNo": "${s1.body.goods[0].goodsNo}", ...} + timeout_ms?: number; + retryable?: boolean; + rationale?: string; // "검색 결과로 goodsNo 획득" +} +``` + +### 9.3 ExecutionTrace Schema (Stage 3 출력) + +```typescript +type ExecutionTrace = { + plan_id: string; + success: boolean; + steps: StepTrace[]; + output?: any; // 성공 시 + failed_step?: string; // 실패 시 + error?: ErrorDetail; // 실패 시 + duration_ms: number; + started_at: string; + ended_at: string; +} + +type StepTrace = { + id: string; + tool: string; + args: Record; // resolved (바인딩 치환 후) + output?: any; + error?: ErrorDetail; + duration_ms: number; + retries: number; +} +``` + +--- + +## 10. 하드코딩 제거 매핑표 + +| 현 하드코딩 | 제거 방법 | 대체 메커니즘 | +|---|---|---| +| `_SYNONYM_CLUSTERS` (goods↔product) | 제거 | Pass 2 `primary_resource` + `semantic_tag` (LLM per-tool enrichment) | +| `selt`, `sel` verb 특수 케이스 | 제거 | Pass 2 `canonical_action` (LLM 이 context 읽고 분류) | +| `*Id/*No/*Seq` 접미사 heuristic | 제거 | Pass 1 field name + type exact match (동일 swagger 안에선 충분) + 필요시 Pass 2 semantic_tag | +| `search-bridge` 예외 | 제거 | Pass 2 `pairs_well_with` + `canonical_action = search` | +| `_is_single_resource_path` 필터 | 제거 | IO Contract 의 produces/consumes 가 판단 | +| `_VERB_TO_INTENT` CRUD 사전 | **유지** (Pass 1 fallback) | Pass 2 가 LLM 으로 action 태깅 담당. Pass 2 생략 시 이 사전이 fallback | + +--- + +## 11. 확장 포인트 + +### 11.1 Fan-out (foreach) + +**시나리오:** "카트의 모든 상품 상세 보여줘" + +**Plan schema 확장:** +```typescript +type PlanStep = { + // ... 기존 필드 + foreach?: { + source: string; // "${s1.body.items[*]}" + item_alias: string; // "item" + }; + // args 안에서 `${item.goodsNo}` 참조 가능 +} +``` + +**Runner 확장:** foreach step 은 N회 호출 후 결과를 배열로 묶어 context 에 저장. + +### 11.2 조건 분기 (if/else) + +**Plan schema 확장:** step 에 `condition` 필드 (JsonPath 기반 부울 식). Runner 가 evaluate 후 skip/execute. + +### 11.3 Workflow Template Library + +- 성공한 Plan 을 `workflow_templates` 테이블에 승격 +- 새 requirement → embedding 기반 template match → 재사용 +- Stage 1~2 skip 가능 → 더 빠름 +- Intent 유사 판정 임계값 튜닝 필요 + +### 11.4 Interactive Refinement + +- Runner 가 특정 step 에서 `user_input_required` 이벤트 발행 +- UI 가 사용자에게 선택지 제시 +- 응답 받아 Runner 재개 (suspend/resume) +- 민감 액션 (결제, 삭제) 에 필수 + +### 11.5 Self-healing Re-plan + +- Runner 실패 시 ExecutionTrace + 에러를 Stage 1~2 에 다시 넘겨 1회 re-plan +- 예: "빈 배열 반환 → 검색 키워드 재조정" 같은 케이스 + +--- + +## 12. 마이그레이션 + +### 12.1 기존 자산 활용 + +- `graph_tool_call.analyze.dependency.detect_dependencies`: **유지**. IO Contract 가 못 잡는 구조적 엣지는 여전히 여기서. 단 반응성 패치 (`selt`, `_SYNONYM_CLUSTERS`, `*No/*Seq`, `search-bridge`) 는 Pass 2 enrichment 정착 시 **단계적 제거**. +- `graph_tool_call.retrieval`: **유지**. Stage 1 의 prefilter 로 그대로 활용 (BM25 + graph). +- `graph_tool_call.ontology.llm_provider`: **유지**. Pass 2 enrichment 의 `enrich_tool_semantics` 메서드 추가. +- `tool_executor.execute_collection_tool`: **유지**. Stage 3 Runner 가 호출. +- `APICollectionLoader` Canvas 노드: **유지** (그래프 + ai_metadata 로드 역할). +- `Agent Xgen` 노드: **유지** (범용 ReAct / 일반 채팅 용도). API collection 시나리오에 쓰일 땐 `Agent Planflow` 로 대체 권장. + +### 12.2 Canvas 노드 구성 변경 + +``` +기존: Input → APICollectionLoader → Agent Xgen → Output +신규: Input → APICollectionLoader → Agent Planflow → Output + (graph/ai_metadata/io_contracts 로드) (Stage 1~4 통합) +``` + +`Agent Planflow` 내부 구조: +``` +┌── Stage 1: retrieval + target pick (LLM 1회) +├── Stage 2: path synthesizer (결정론, DAG) +├── (conditional) disambiguation (LLM 조건부) +├── Stage 3: runner (streaming) (결정론, HTTP) +└── Stage 4: response synthesis (LLM 1회, streaming) +``` + +설정 UI 는 `Agent Xgen` 과 공용 컴포넌트 재사용 (provider/model/api_key/temperature/max_tokens). 전용 파라미터 (`enable_disambiguation`, `max_plan_steps`) 만 추가. + +### 12.3 점진 마이그레이션 전략 + +1. **Phase A:** L0 Knowledge Base 구축 — IO Contract 추출 (결정론) + `OntologyLLM.enrich_tool_semantics` 메서드 추가. 기존 graph 와 공존. +2. **Phase B:** Stage 3 Runner 독립 구현 (plan fixture 로 단위 테스트). +3. **Phase C:** Stage 2 Path Synthesizer — DAG + exact field match + semantic_tag 보강. +4. **Phase D:** Stage 1 + 4 LLM 호출 구현 (structured output). 기존 `retrieve_with_scores` 를 Stage 1 prefilter 로 연결. +5. **Phase E:** Canvas 노드 `Agent Planflow` 개발. 설정 UI 는 `Agent Xgen` 컴포넌트 재사용. +6. **Phase F:** 평가 세트로 A/B 측정. 안정화 후 기존 반응성 패치 (`selt`, synonym 등) 제거. + +--- + +## 13. 운영 리스크 및 완화 + +| 리스크 | 영향 | 완화 | +|---|---|---| +| IO Contract semantic_tag 오태깅 | Stage 2 가 틀린 path 생성 | ingest 시 LLM 태깅 → 관리자 UI 검수/오버라이드 | +| Stage 1 target 오선정 | 전혀 다른 도구 실행 | confidence threshold → 낮으면 disambiguation 강제 | +| Stage 2 Ambiguous 빈발 | 매 요청 LLM 추가 호출 | IO Contract 개선으로 장기적으로 완화. 초기엔 허용 | +| Runner JsonPath miss | 실행 실패 | plan validate 단계에서 response schema 와 bindings 교차 검증 (Stage 2 출력 직후) | +| HTTP 외부 장애 | 사용자 체감 실패 | retry + 명확한 trace + Stage 4 에서 "일부 성공/실패" 구분 | +| Embedding API 비용 | ingest 비용↑ | ingest 시 1회만. 요청당 embed 는 requirement 1회만 | +| LLM structured output 깨짐 | Stage 1 파싱 실패 | 1회 retry → 실패 시 top-1 embedding 결과 fallback | + +--- + +## 14. 측정 지표 (성공 기준) + +### 14.1 성능 + +- Latency p50 / p95 (목표: p50 ≤ 3s, p95 ≤ 6s) +- LLM 호출 수 / 요청 (목표: ≤ 2.5 평균) +- Context 총량 / 요청 (목표: ≤ 3KB 평균) + +### 14.2 품질 + +평가 세트: 요구사항 20~50개 (각 collection 당). + +- **Stage 1 target 정확도:** 고른 target 이 사람 판단과 일치하는 비율 +- **Stage 2 path 정확도:** 생성된 plan 이 유효한 실행 시퀀스인 비율 +- **End-to-end 성공률:** 사용자 요구사항 → 의미 있는 답변까지 성공한 비율 +- **Ambiguity rate:** Disambiguation 발동 빈도 (낮을수록 graph 품질 좋음) + +### 14.3 비용 + +- OpenAI 토큰 소비 / 요청 (입력/출력 분리) +- Embedding 호출 수 (ingest + 요청별 1회) + +### 14.4 감사성 + +- 모든 Plan artifact 조회 가능 +- 실패 시 failed_step + error + partial_results 복원 가능 + +--- + +## 15. 비전과의 정합성 + +사용자가 그린 그림: + +> Swagger → tool list 정의 → 사전 graph 관계 구축 → +> 워크플로우에서 컬렉션 노드 연결 + 요구사항 입력 → +> 필요한 API 들 찾아 req/res 세팅 후 순서대로 호출 → 결과 반환 + +이 아키텍처의 대응: + +| 사용자 의도 | 이 설계에서 | +|---|---| +| "사전 graph 관계 구축" | L0 Knowledge Base (Pass 1 구조적 + Pass 2 LLM 의미 주석) | +| "요구사항 입력" | Stage 1 입력 | +| "필요한 API 찾기" | Stage 1 (retrieval + target pick) + Stage 2 (DAG 구성) | +| "req/res 세팅" | Stage 2 의 argument bindings (exact field match + semantic_tag) | +| "순서대로 호출" | Stage 3 Runner (DAG topological) | +| "결과 반환" | Stage 4 Response Synthesis | + +**정합성 완전.** LLM 은 의미 해석이 필요한 지점에만 최소한으로 사용: +- **Ingest 시 Pass 2** — description 을 읽고 의미 주석 (1회, 영속 저장) +- **Runtime Stage 1** — 사용자 자연어 → target tool + entities +- **Runtime Stage 4** — 실행 결과 → 자연어 응답 + +Request/response schema 는 LLM 이 일절 건드리지 않음 (swagger 가 source of truth). + +--- + +## 16. 결정 사항 + +### 해결된 항목 (2026-04-23) + +| # | 주제 | 결정 | 근거 | +|---|---|---|---| +| 1 | Field semantic 매칭 방식 | **Pass 1 exact match (기본) + Pass 2 LLM semantic_tag (보강)**. embedding clustering 불필요 | 동일 swagger 안에선 field 이름 일관. cross-convention 은 LLM 이 해결 | +| 2 | LLM 모델 선택 | **xgen agent 노드 config 재사용**. Stage 1/4 는 사용자 노드 설정 상속. Pass 2 는 컬렉션별 별도 설정 (기본 gpt-4.1-mini) | UX 일관성, 기존 provider/key 관리 재사용 | +| 3 | Ingest embedding 모델 | **사용 안 함 (v1)**. 필요시 `text-embedding-3-small` 추후 연결 | BM25 + graph 가 Stage 1 top-20 recall 확보 (실측) | +| 4 | Plan / ExecutionTrace 영속성 | **로그 기반 (DB 테이블 없음)**. 구조화 JSON 이벤트로 plan 생명주기 기록 | YAGNI. 필요 기능 (history UI, template auto-promotion) 생길 때 해당 테이블 추가 | +| 5 | Canvas 노드 구성 | **신규 노드 `Agent Planflow`**. `Agent Xgen` 은 유지 (범용 ReAct), `Agent Planflow` 는 API collection 전용 Plan-and-Execute. 설정 UI 공용화 (provider/model/key) | 기존 자산 유지 + 특화 경로 분리. 코드 간결성 | +| 6 | Plan 실행 범위 (v1) | **선형 chain 만**. Fan-out / 조건 분기 / parallel / re-plan 은 v2+. Plan schema 는 optional 필드로 **확장 가능하게 설계** | v1 목표 (30s→5s + 정확도) 는 선형으로 달성. 복잡 케이스는 사용자에게 명시적 에러 | + +### 미결 항목 + +모두 해결됨 (2026-04-23). + +--- + +## 17. 참고 문서 + +- [pathfinder-plan.md](./pathfinder-plan.md) — 기존 로드맵 (이 문서 확정 후 섹션 3.7 업데이트 필요) +- [pathfinder-bug-analysis.md](./pathfinder-bug-analysis.md) — ingest 파이프라인 과거 이슈 +- [xgen-ai-chat-architecture.md](./xgen-ai-chat-architecture.md) — AI chat / 사이드패널 / canvas 통합 + +--- diff --git a/graph_tool_call/execute/http_executor.py b/graph_tool_call/execute/http_executor.py index 32859fa..55e5126 100644 --- a/graph_tool_call/execute/http_executor.py +++ b/graph_tool_call/execute/http_executor.py @@ -77,7 +77,12 @@ def build_request( for k, v in path_params.items(): path = path.replace(f"{{{k}}}", urllib.parse.quote(str(v), safe="")) - url = f"{self._base_url}{path}" + # tool 자체 base_url(spec.servers 유래)이 있으면 그쪽 우선 — 한 컬렉션에 + # 다른 호스트(common/product/member 등)의 source가 섞여 있을 때 source별 + # 호스트로 라우팅한다. 없으면 executor 기본 base_url 사용. + tool_base = (metadata.get("base_url") or "").rstrip("/") + base = tool_base or self._base_url + url = f"{base}{path}" if query_params: url += "?" + urllib.parse.urlencode(query_params, doseq=True) diff --git a/graph_tool_call/ingest/io_contract.py b/graph_tool_call/ingest/io_contract.py new file mode 100644 index 0000000..1768a47 --- /dev/null +++ b/graph_tool_call/ingest/io_contract.py @@ -0,0 +1,345 @@ +"""Field-level IO contract extraction from OpenAPI / Swagger schemas. + +Used by L0 Knowledge Base — **Pass 1, deterministic**. Walks request and +response schemas and emits leaf field descriptors with JsonPath. The output +feeds: + + - Tool Graph: produces × consumes field-name match → ``produces_for`` edge + - Pass 2 enrichment: provides field list to LLM for ``semantic_tag`` assign + - Stage 3 Runner: bindings reference these json_paths + +This module assumes the input schema is **already $ref-resolved** (caller +runs ``_resolve_refs`` from ``graph_tool_call.ingest.openapi``). +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any + + +@dataclass +class FieldLeaf: + """A leaf field extracted from a JSON Schema. + + ``json_path`` is the dotted JSONPath from the schema root, with ``[*]`` + used as the array wildcard (for produces). For consumes, callers usually + flatten to ``field_name`` since binding keys by name not path. + """ + + json_path: str + field_name: str + field_type: str + required: bool = False + description: str = "" + enum: list[Any] = field(default_factory=list) + + +# --------------------------------------------------------------------------- +# Schema walker +# --------------------------------------------------------------------------- + + +_DEFAULT_MAX_DEPTH = 8 + + +def extract_leaves( + schema: Any, + *, + base_path: str = "$", + parent_required: bool = False, + max_depth: int = _DEFAULT_MAX_DEPTH, + _depth: int = 0, +) -> list[FieldLeaf]: + """Recursively walk a JSON Schema, emitting leaf field info. + + Parameters + ---------- + schema: + JSON Schema dict (already $ref-resolved). + base_path: + Starting JSONPath for this subtree (e.g. ``$``, ``$.body``). + parent_required: + Whether the containing field is required by its parent. Propagated to + leaves so the caller can filter ``required-only`` consumes. + max_depth: + Hard recursion limit. Cyclic schemas or pathological nesting stop here. + + Returns + ------- + list[FieldLeaf] + One entry per primitive (or array-of-primitive) leaf reachable. + """ + if not isinstance(schema, dict) or _depth > max_depth: + return [] + + schema = _resolve_combinators(schema) + + schema_type = _normalize_type(schema.get("type")) + + # Object: walk properties + if schema_type == "object" or "properties" in schema: + return _walk_object(schema, base_path, max_depth, _depth) + + # Array: walk items with [*] suffix + if schema_type == "array": + items = schema.get("items") or {} + return extract_leaves( + items, + base_path=f"{base_path}[*]", + parent_required=parent_required, + max_depth=max_depth, + _depth=_depth + 1, + ) + + # Primitive: emit a single leaf using the trailing path segment as name + field_name = _last_path_segment(base_path) + if not field_name: + # At root with no parent name — nothing useful to emit + return [] + return [ + FieldLeaf( + json_path=base_path, + field_name=field_name, + field_type=schema_type or "string", + required=parent_required, + description=str(schema.get("description") or "")[:200], + enum=list(schema.get("enum") or []), + ) + ] + + +def _walk_object( + schema: dict[str, Any], + base_path: str, + max_depth: int, + depth: int, +) -> list[FieldLeaf]: + leaves: list[FieldLeaf] = [] + properties = schema.get("properties") or {} + if not isinstance(properties, dict): + return leaves + required_set = set(schema.get("required") or []) + + for prop_name, prop_schema in properties.items(): + child_path = f"{base_path}.{prop_name}" + is_required = prop_name in required_set + child_leaves = extract_leaves( + prop_schema, + base_path=child_path, + parent_required=is_required, + max_depth=max_depth, + _depth=depth + 1, + ) + if child_leaves: + leaves.extend(child_leaves) + else: + # Object/array with no resolvable children — keep as a generic leaf + # so downstream knows the field exists (e.g. opaque additionalProps). + leaves.append( + FieldLeaf( + json_path=child_path, + field_name=prop_name, + field_type=_schema_type(prop_schema) or "object", + required=is_required, + description=( + str(prop_schema.get("description") or "")[:200] + if isinstance(prop_schema, dict) + else "" + ), + ) + ) + return leaves + + +def _resolve_combinators(schema: dict[str, Any]) -> dict[str, Any]: + """Flatten ``allOf`` / pick first ``oneOf`` / ``anyOf``. + + v1 strategy: best-effort. Doesn't handle JSON Schema combinator semantics + fully — sufficient to surface field shapes for our planning use. + """ + if "allOf" in schema and isinstance(schema["allOf"], list): + merged_props: dict[str, Any] = dict(schema.get("properties") or {}) + merged_required: list[str] = list(schema.get("required") or []) + for sub in schema["allOf"]: + if not isinstance(sub, dict): + continue + merged_props.update(sub.get("properties") or {}) + for r in sub.get("required") or []: + if r not in merged_required: + merged_required.append(r) + out = dict(schema) + out["type"] = "object" + out["properties"] = merged_props + out["required"] = merged_required + return out + + for key in ("oneOf", "anyOf"): + candidates = schema.get(key) + if isinstance(candidates, list) and candidates: + first = next((c for c in candidates if isinstance(c, dict)), None) + if first is not None: + # Merge the candidate as a base, parent fields override + base = dict(first) + base.update({k: v for k, v in schema.items() if k != key}) + return base + return schema + + +def _normalize_type(t: Any) -> str: + """JSON Schema 'type' can be str or list. Pick first non-null.""" + if isinstance(t, list): + return next((x for x in t if x and x != "null"), "") + return t or "" + + +def _schema_type(schema: Any) -> str: + if not isinstance(schema, dict): + return "" + return _normalize_type(schema.get("type")) + + +def _last_path_segment(path: str) -> str: + """Extract trailing field name from a JsonPath like ``$.body.goods[*].goodsNo``.""" + if not path or path == "$": + return "" + last = path.rsplit(".", 1)[-1] + if last.endswith("[*]"): + last = last[:-3] + return last + + +# --------------------------------------------------------------------------- +# Operation-level extraction (combines body + parameters) +# --------------------------------------------------------------------------- + + +def extract_produces_for_operation( + operation: dict[str, Any], + *, + is_swagger2: bool = False, +) -> list[FieldLeaf]: + """Walk operation's success response schema → leaf produces with JsonPath.""" + response_schema = _pick_response_schema(operation, is_swagger2=is_swagger2) + if not response_schema: + return [] + return extract_leaves(response_schema, base_path="$") + + +def extract_consumes_for_operation( + operation: dict[str, Any], + path_item: dict[str, Any] | None = None, + *, + is_swagger2: bool = False, + required_only: bool = True, +) -> list[FieldLeaf]: + """Combine query/path/header parameters and request body into a flat + consume list. + + Body fields are flattened to field-name level (the LLM-visible name) — + binding keys by name in Stage 2/3, not by nested path. The original + nested structure for HTTP injection is handled separately via the + existing ``leaf_path_map`` mechanism on the tool row. + """ + leaves: list[FieldLeaf] = [] + seen_names: set[str] = set() + + # query / path / header parameters + all_params = (operation.get("parameters") or []) + ( + (path_item or {}).get("parameters") or [] + ) + for p in all_params: + if not isinstance(p, dict) or "name" not in p: + continue + loc = p.get("in") + if loc not in ("query", "path", "header"): + continue + is_required = bool(p.get("required", loc == "path")) + if required_only and not is_required: + continue + if is_swagger2: + ftype = p.get("type") or "string" + else: + ftype = _schema_type(p.get("schema") or {}) or "string" + if p["name"] in seen_names: + continue + seen_names.add(p["name"]) + leaves.append( + FieldLeaf( + json_path=p["name"], # flat for consumes + field_name=p["name"], + field_type=ftype, + required=is_required, + description=str(p.get("description") or "")[:200], + ) + ) + + # request body (flattened) + body_schema = _pick_request_body_schema(operation, is_swagger2=is_swagger2) + if body_schema: + for leaf in extract_leaves(body_schema, base_path="$"): + if required_only and not leaf.required: + continue + if leaf.field_name in seen_names: + continue + seen_names.add(leaf.field_name) + leaves.append( + FieldLeaf( + json_path=leaf.field_name, # flat for consumes + field_name=leaf.field_name, + field_type=leaf.field_type, + required=leaf.required, + description=leaf.description, + enum=leaf.enum, + ) + ) + + return leaves + + +def _pick_response_schema( + operation: dict[str, Any], + *, + is_swagger2: bool = False, +) -> dict[str, Any] | None: + responses = operation.get("responses") or {} + for code in ("200", "201", "default"): + resp = responses.get(code) + if not isinstance(resp, dict): + continue + # Swagger 2.0 + if "schema" in resp: + return resp["schema"] + # OpenAPI 3.x + content = resp.get("content") or {} + if "application/json" in content: + return content["application/json"].get("schema") + return None + + +def _pick_request_body_schema( + operation: dict[str, Any], + *, + is_swagger2: bool = False, +) -> dict[str, Any] | None: + if is_swagger2: + for p in operation.get("parameters") or []: + if isinstance(p, dict) and p.get("in") == "body": + return p.get("schema") + return None + body = operation.get("requestBody") or {} + content = body.get("content") or {} + if "application/json" in content: + return content["application/json"].get("schema") + if content: + first = next(iter(content.values())) + return first.get("schema") if isinstance(first, dict) else None + return None + + +__all__ = [ + "FieldLeaf", + "extract_leaves", + "extract_produces_for_operation", + "extract_consumes_for_operation", +] diff --git a/graph_tool_call/ingest/openapi.py b/graph_tool_call/ingest/openapi.py index 90399dd..41ffe7e 100644 --- a/graph_tool_call/ingest/openapi.py +++ b/graph_tool_call/ingest/openapi.py @@ -181,6 +181,60 @@ def _extract_params_swagger2( return params +def _summarize_object_schema(schema: dict[str, Any], *, max_depth: int = 2) -> str: + """Object/array schema의 nested properties를 사람/LLM이 읽기 좋게 요약. + + parameter type이 'object'/'array'인데 안의 필드명이 ToolParameter에 안 드러나면 + LLM이 필드명을 추측하게 된다. 이 함수는 properties + required + description을 + description 텍스트로 합쳐서 LLM 컨텍스트에 함께 노출되도록 한다. + """ + if not isinstance(schema, dict): + return "" + + def _walk(s: dict[str, Any], depth: int, indent: int) -> list[str]: + if depth > max_depth or not isinstance(s, dict): + return [] + out: list[str] = [] + prefix = " " * indent + + # Unwrap array → items + if s.get("type") == "array": + items = s.get("items") or {} + out.append(f"{prefix}[array of:]") + out.extend(_walk(items, depth + 1, indent + 1)) + return out + + props = s.get("properties") or {} + if not props: + return out + required = set(s.get("required") or []) + for name, prop in props.items(): + if not isinstance(prop, dict): + continue + ptype = _schema_type(prop) + req = "*" if name in required else "" + desc = (prop.get("description") or "").strip() + example = prop.get("example") + line = f"{prefix}- {name}{req} ({ptype})" + if desc: + line += f": {desc}" + if example is not None and not desc: + line += f" e.g. {example}" + out.append(line) + # Nested object/array 1단계 더 펼치기 + if depth < max_depth: + if ptype == "object": + out.extend(_walk(prop, depth + 1, indent + 1)) + elif ptype == "array": + items = prop.get("items") or {} + if items.get("properties") or items.get("type") in ("object", "array"): + out.extend(_walk(items, depth + 1, indent + 1)) + return out + + lines = _walk(schema, 0, 0) + return "\n".join(lines) + + def _extract_params_openapi3( operation: dict[str, Any], resolved_spec: dict[str, Any], @@ -198,11 +252,18 @@ def _extract_params_openapi3( is_required = p.get("required", False) if required_only and not is_required: continue + desc = p.get("description", "") or "" + # object/array 타입이면 nested fields를 description에 펼쳐서 + # LLM이 정확한 필드명(예: searchWord)을 알 수 있게 한다. + if _schema_type(schema) in ("object", "array"): + nested = _summarize_object_schema(schema) + if nested: + desc = (desc + "\nFields:\n" + nested).strip() if desc else f"Fields:\n{nested}" params.append( ToolParameter( name=p["name"], type=_schema_type(schema), - description=p.get("description", ""), + description=desc, required=is_required, enum=schema.get("enum"), ) @@ -218,11 +279,17 @@ def _extract_params_openapi3( is_required = prop_name in body_required if required_only and not is_required: continue + desc = (prop_schema.get("description") or "") + # nested object/array는 한 단계 더 펼치기 + if _schema_type(prop_schema) in ("object", "array"): + nested = _summarize_object_schema(prop_schema) + if nested: + desc = (desc + "\nFields:\n" + nested).strip() if desc else f"Fields:\n{nested}" params.append( ToolParameter( name=prop_name, type=_schema_type(prop_schema), - description=prop_schema.get("description", ""), + description=desc, required=is_required, ) ) @@ -304,6 +371,34 @@ def _enrich_description(description: str, method: str, path: str) -> str: return description +def _resolve_server_url( + operation: dict[str, Any], + path_item: dict[str, Any] | None, + spec: dict[str, Any], + *, + is_swagger2: bool = False, +) -> str | None: + """OpenAPI 우선순위: operation.servers > path.servers > spec.servers. + + Swagger 2.0은 ``host`` + ``basePath`` + ``schemes`` 조합으로 base_url 구성. + """ + if is_swagger2: + host = spec.get("host") + if not host: + return None + scheme = (spec.get("schemes") or ["https"])[0] + base_path = spec.get("basePath") or "" + return f"{scheme}://{host}{base_path}".rstrip("/") + + for source in (operation, path_item or {}, spec): + servers = source.get("servers") if isinstance(source, dict) else None + if servers and isinstance(servers, list) and servers: + url = (servers[0] or {}).get("url") + if url: + return str(url).rstrip("/") + return None + + def _operation_to_tool( operation_id: str, operation: dict[str, Any], @@ -313,6 +408,7 @@ def _operation_to_tool( *, is_swagger2: bool = False, required_only: bool = False, + path_item: dict[str, Any] | None = None, ) -> ToolSchema: """Convert a single OpenAPI operation into a ToolSchema.""" description = operation.get("summary") or operation.get("description", "") @@ -357,6 +453,13 @@ def _operation_to_tool( if response_schema: metadata["response_schema"] = response_schema + # spec/path/operation 단위의 servers field → tool 자체 base_url 부여. + # 한 컬렉션에 다른 host를 가진 source들이 섞여 있을 때 executor가 tool마다 + # 알맞은 base_url로 호출할 수 있게 한다. + server_url = _resolve_server_url(operation, path_item, resolved_spec, is_swagger2=is_swagger2) + if server_url: + metadata["base_url"] = server_url + return ToolSchema( name=operation_id, description=description, @@ -459,6 +562,7 @@ def ingest_openapi( resolved_raw, is_swagger2=is_swagger2, required_only=required_only, + path_item=path_item, ) tools.append(tool) diff --git a/graph_tool_call/langchain/gateway.py b/graph_tool_call/langchain/gateway.py index cfde75e..1ad9e97 100644 --- a/graph_tool_call/langchain/gateway.py +++ b/graph_tool_call/langchain/gateway.py @@ -66,6 +66,93 @@ def _extract_parameters_info(tool: Any) -> list[dict[str, Any]] | None: return None +def _summarize_response_schema(schema: dict[str, Any]) -> str | None: + """Produce a one-line summary of an OpenAPI response schema for the LLM. + + Lists top-level field names + types so the model can plan parameter + extraction for the next call. + """ + if not isinstance(schema, dict): + return None + + # Unwrap arrays + container = schema + is_array = False + if container.get("type") == "array" and isinstance(container.get("items"), dict): + container = container["items"] + is_array = True + + props = container.get("properties") + if not isinstance(props, dict) or not props: + # Fall back to a bare type description + t = container.get("type") + return f"array of {t}" if is_array and t else t + + fields = [] + for name, info in list(props.items())[:12]: + if not isinstance(info, dict): + fields.append(name) + continue + t = info.get("type") or info.get("$ref", "object").rsplit("/", 1)[-1] + fields.append(f"{name}:{t}") + summary = "{" + ", ".join(fields) + "}" + return f"array of {summary}" if is_array else summary + + +def _enrich_from_graph( + name: str, graph: Any | None +) -> dict[str, Any]: + """Pull source_label, method/path, response summary, and outgoing edges + from the underlying ToolGraph for *name*. Returns an empty dict if the + graph or tool is not available — callers should treat all keys as optional. + """ + if graph is None: + return {} + + enrichment: dict[str, Any] = {} + + tool_schema = None + try: + tool_schema = graph.tools.get(name) + except Exception: + return enrichment + + if tool_schema is not None and getattr(tool_schema, "metadata", None): + meta = tool_schema.metadata + if meta.get("source_label"): + enrichment["source"] = meta["source_label"] + if meta.get("method") and meta.get("path"): + enrichment["http"] = f"{meta['method'].upper()} {meta['path']}" + rs = meta.get("response_schema") + if isinstance(rs, dict): + summary = _summarize_response_schema(rs) + if summary: + enrichment["returns"] = summary + + # Outgoing edges → chain hints + try: + engine = graph.graph + edges = engine.get_edges_from(name, direction="out") + chains: list[str] = [] + for _src, target, attrs in edges: + relation = attrs.get("relation") + relation_name = ( + relation.value if hasattr(relation, "value") else str(relation) + ) + # Skip purely structural BELONGS_TO edges + if relation_name in ("belongs_to", "BELONGS_TO"): + continue + chains.append(f"{relation_name}→{target}") + if len(chains) >= 5: + break + if chains: + enrichment["next_candidates"] = chains + except Exception: + pass + + return enrichment + + def create_gateway_tools( tools: list[Any], *, @@ -111,12 +198,15 @@ def create_gateway_tools( total = len(tool_map) call_history: list[str] = [] + underlying_graph = getattr(toolkit, "graph", None) + @langchain_tool def search_tools(query: str, top_k: int | None = None) -> str: """Search available tools by natural language query. Use this FIRST to find which tools are available for the task. - Returns tool names, descriptions, and required parameters. + Returns tool names, descriptions, parameters, response shape, and + ``next_candidates`` (related tools you may want to call afterwards). Args: query: Natural language search query (e.g. "cancel order", "send email") @@ -135,11 +225,12 @@ def search_tools(query: str, top_k: int | None = None) -> str: desc = t.get("description", "") entry: dict[str, Any] = { "name": name, - "description": desc[:200], + "description": desc[:300], } params = _extract_parameters_info(t) if params: entry["parameters"] = params + entry.update(_enrich_from_graph(name, underlying_graph)) matched.append(entry) output = { @@ -148,8 +239,10 @@ def search_tools(query: str, top_k: int | None = None) -> str: "total_tools": total, "tools": matched, "hint": ( - "Use call_tool to execute a tool. " - "Pass tool_name and arguments as a dict matching the parameters above." + "Use call_tool to execute a tool. Pass tool_name and arguments " + "as a dict matching the parameters above. The 'returns' field " + "shows the response shape — extract values from there to build " + "arguments for the next call (see 'next_candidates')." ), } diff --git a/graph_tool_call/net.py b/graph_tool_call/net.py index dfe1c35..ba46e26 100644 --- a/graph_tool_call/net.py +++ b/graph_tool_call/net.py @@ -44,8 +44,22 @@ def redirect_request( return super().redirect_request(req, fp, code, msg, headers, newurl) -def _open_url(request: urllib.request.Request | str, *, timeout: int, max_redirects: int) -> Any: - opener = urllib.request.build_opener(_LimitedRedirectHandler(max_redirects)) +def _open_url( + request: urllib.request.Request | str, + *, + timeout: int, + max_redirects: int, + verify_ssl: bool = True, +) -> Any: + """urllib opener — verify_ssl=False 시 self-signed/사내 CA 인증서 허용.""" + handlers: list[Any] = [_LimitedRedirectHandler(max_redirects)] + if not verify_ssl: + import ssl + ctx = ssl.create_default_context() + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE + handlers.append(urllib.request.HTTPSHandler(context=ctx)) + opener = urllib.request.build_opener(*handlers) return opener.open(request, timeout=timeout) @@ -128,13 +142,22 @@ def fetch_url_text( allowed_content_types: tuple[str, ...] = _DEFAULT_ALLOWED_CONTENT_TYPES, allow_private_hosts: bool = False, max_redirects: int = _DEFAULT_MAX_REDIRECTS, + verify_ssl: bool | None = None, ) -> str: - """Fetch UTF-8 text from a remote URL with basic SSRF protections.""" + """Fetch UTF-8 text from a remote URL with basic SSRF protections. + + ``verify_ssl`` — None 이면 ``allow_private_hosts`` 값에 따라 자동 결정 + (사내망 hosts 는 self-signed CA 가 일반적이므로 verify off 가 기본). + """ validate_remote_url(url, allow_private_hosts=allow_private_hosts) + if verify_ssl is None: + # allow_private_hosts=True 사용자는 보통 사내망 hitting. 사내 CA 포용. + verify_ssl = not allow_private_hosts + req = urllib.request.Request(url, headers=headers or {}) try: - with _open_url(req, timeout=timeout, max_redirects=max_redirects) as resp: + with _open_url(req, timeout=timeout, max_redirects=max_redirects, verify_ssl=verify_ssl) as resp: final_url = url if hasattr(resp, "geturl"): candidate = resp.geturl() diff --git a/graph_tool_call/ontology/llm_provider.py b/graph_tool_call/ontology/llm_provider.py index 8748ac3..a34786d 100644 --- a/graph_tool_call/ontology/llm_provider.py +++ b/graph_tool_call/ontology/llm_provider.py @@ -5,7 +5,7 @@ import json import urllib.request from abc import ABC, abstractmethod -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Any from graph_tool_call.ontology.schema import RelationType @@ -13,11 +13,20 @@ @dataclass class ToolSummary: - """Lightweight tool representation for LLM prompts.""" + """Lightweight tool representation for LLM prompts. + + The optional fields (``method``, ``path``, ``response_fields``) extend the + summary for semantic enrichment (``enrich_tool_semantics``). They are + ignored by methods that don't need them, preserving backward compat. + """ name: str description: str parameters: list[str] # just parameter names + # Extended context for semantic enrichment (optional) + method: str = "" + path: str = "" + response_fields: list[str] = field(default_factory=list) @dataclass @@ -31,6 +40,49 @@ class InferredRelation: reason: str +@dataclass +class FieldSemantic: + """A field annotated with its semantic identifier. + + Used on both produces (what a tool outputs) and consumes (what it + requires). ``json_path`` is set on produces; ``field`` is set on consumes. + """ + + semantic: str + json_path: str = "" + field: str = "" + + +@dataclass +class PairHint: + """LLM-suggested tool that pairs with the current tool.""" + + tool: str + reason: str + + +@dataclass +class ToolEnrichment: + """Per-tool semantic annotation produced by ``enrich_tool_semantics``. + + This is the Pass 2 output of the Plan-and-Execute L0 knowledge base. + Used downstream by: + - Stage 1 target selection (``when_to_use`` in catalog) + - Stage 2 path synthesis (``produces_semantics`` / ``consumes_semantics`` + replace hardcoded synonym tables) + - Graph edges (``pairs_well_with`` becomes semantic edges) + """ + + canonical_action: str # search | read | create | update | delete | action + primary_resource: str # e.g. "product" + one_line_summary: str + when_to_use: str + when_not_to_use: str = "" + produces_semantics: list[FieldSemantic] = field(default_factory=list) + consumes_semantics: list[FieldSemantic] = field(default_factory=list) + pairs_well_with: list[PairHint] = field(default_factory=list) + + # --------------------------------------------------------------------------- # Prompt templates # --------------------------------------------------------------------------- @@ -122,6 +174,52 @@ class InferredRelation: [{{"source":"toolA","target":"toolB","relation":"PRECEDES","confidence":0.9,"reason":"..."}}]""" +_ENRICH_SEMANTICS_PROMPT = """\ +You are annotating API tools for a plan-and-execute planning system. +Produce structured metadata that downstream components use to (1) pick the +right tool for a user's goal, (2) synthesize execution plans, and (3) wire +one tool's output to another tool's input. + +AVAILABLE TOOLS IN THE COLLECTION (names + 1-line descriptions, for +pairs_well_with reference): +{all_tools_brief} + +TOOLS TO ANNOTATE (this batch): +{batch_detailed} + +For each tool in the batch, output a JSON object with these fields: + - canonical_action: one of "search" | "read" | "create" | "update" | "delete" | "action" + - primary_resource: one lowercase noun (e.g. "product", "order", "user", "shop", "category") + - one_line_summary: short natural-language summary (<=60 chars) + - when_to_use: 1-2 sentences describing the trigger condition + - when_not_to_use: optional 1 sentence (can be empty) — alternative tool cases + - produces_semantics: array of {{"semantic": "canonical_id", "json_path": "$.body..."}} + * Include only MEANINGFUL fields (IDs, names, key metrics). + * Skip pagination, headers, status codes. + * Use CONSISTENT semantic ids across tools. If two tools both return a + product identifier (one calls it "goodsNo", another "productId"), + use the same semantic like "product_id". + - consumes_semantics: array of {{"semantic": "canonical_id", "field": "paramName"}} + * REQUIRED inputs only. Skip optional filters, pagination. + * Same semantic id conventions as produces. + - pairs_well_with: array of {{"tool": "tool_name_from_available_list", + "reason": "brief reason"}} + * 2-4 tools that typically precede or follow this tool. + * Names MUST match the available list exactly. Do not invent. + +OUTPUT FORMAT (strict): +{{ + "tool_name_1": {{...fields...}}, + "tool_name_2": {{...fields...}} +}} + +STRICT RULES: + - You MUST produce one entry for EVERY tool in the batch. + - Do NOT skip tools with unclear descriptions — make your best guess. + - Keep fields concise (short sentences) so all tools fit in the output. + - Return JSON only. No markdown fences, no prose, no comments.""" + + def _format_tools_list(tools: list[ToolSummary]) -> str: lines = [] for i, t in enumerate(tools, 1): @@ -130,6 +228,81 @@ def _format_tools_list(tools: list[ToolSummary]) -> str: return "\n".join(lines) +def _format_tools_brief(tools: list[ToolSummary]) -> str: + """Compact name list for the ``pairs_well_with`` reference. + + Name-only (no descriptions) to keep prompt small — descriptions would + bloat the prompt by N× since every batch prompt contains this list. + Tool names like ``seltSearchProduct`` already encode intent. + """ + return "\n".join(f"- {t.name}" for t in tools) + + +def _format_tools_for_enrichment(tools: list[ToolSummary]) -> str: + """Detailed per-tool block for enrichment prompt input.""" + blocks = [] + for t in tools: + parts = [f"== {t.name} =="] + if t.method and t.path: + parts.append(f"HTTP: {t.method.upper()} {t.path}") + if t.description: + desc = t.description.strip()[:400] + parts.append(f"Description: {desc}") + if t.parameters: + params = ", ".join(t.parameters[:25]) + parts.append(f"Request fields: {params}") + if t.response_fields: + resp = ", ".join(t.response_fields[:25]) + parts.append(f"Response fields: {resp}") + blocks.append("\n".join(parts)) + return "\n\n".join(blocks) + + +def _parse_enrichment(data: Any) -> ToolEnrichment | None: + """Build a ToolEnrichment from LLM JSON output. Tolerant of missing keys.""" + if not isinstance(data, dict): + return None + try: + produces = [ + FieldSemantic( + semantic=str(p.get("semantic", "")).strip(), + json_path=str(p.get("json_path", "")).strip(), + ) + for p in (data.get("produces_semantics") or []) + if isinstance(p, dict) and str(p.get("semantic", "")).strip() + ] + consumes = [ + FieldSemantic( + semantic=str(c.get("semantic", "")).strip(), + field=str(c.get("field", "")).strip(), + ) + for c in (data.get("consumes_semantics") or []) + if isinstance(c, dict) and str(c.get("semantic", "")).strip() + ] + pairs = [ + PairHint( + tool=str(p.get("tool", "")).strip(), + reason=str(p.get("reason", "")).strip(), + ) + for p in (data.get("pairs_well_with") or []) + if isinstance(p, dict) and str(p.get("tool", "")).strip() + ] + action = str(data.get("canonical_action", "")).strip().lower() + resource = str(data.get("primary_resource", "")).strip().lower() + return ToolEnrichment( + canonical_action=action, + primary_resource=resource, + one_line_summary=str(data.get("one_line_summary", "")).strip(), + when_to_use=str(data.get("when_to_use", "")).strip(), + when_not_to_use=str(data.get("when_not_to_use", "")).strip(), + produces_semantics=produces, + consumes_semantics=consumes, + pairs_well_with=pairs, + ) + except (KeyError, TypeError, ValueError, AttributeError): + return None + + def _parse_relation_type(s: str) -> RelationType | None: mapping = { "REQUIRES": RelationType.REQUIRES, @@ -423,6 +596,62 @@ def generate_example_queries( return all_queries + def enrich_tool_semantics( + self, + tools: list[ToolSummary], + batch_size: int = 10, + *, + reference_tools: list[ToolSummary] | None = None, + ) -> dict[str, ToolEnrichment]: + """Per-tool semantic annotation for Plan-and-Execute architecture. + + ``tools`` = the batch (or batches) of tools to produce detailed + enrichment for. ``reference_tools`` = the full catalog used only to + build ``all_tools_brief`` in the prompt (so LLM picks + ``pairs_well_with`` from valid names). If ``reference_tools`` is + None, falls back to ``tools``. + + Streaming callers typically pass one batch in ``tools`` + the full + collection in ``reference_tools`` + ``batch_size=len(tools)`` so the + internal loop runs once per caller invocation. + + Output is used by: + - Stage 1 (target selection) — ``one_line_summary`` + ``when_to_use`` + in tool catalog make LLM picks more accurate with smaller context. + - Stage 2 (path synthesis) — ``produces_semantics`` / + ``consumes_semantics`` carry canonical semantic ids so bindings + work across convention mismatches (e.g. ``goodsNo`` ≡ ``productId``) + without a hardcoded synonym table. + - Graph edges — ``pairs_well_with`` becomes optional semantic edges + that complement structural field-match edges. + """ + results: dict[str, ToolEnrichment] = {} + if not tools: + return results + + all_brief = _format_tools_brief(reference_tools or tools) + + for i in range(0, len(tools), batch_size): + batch = tools[i : i + batch_size] + prompt = _ENRICH_SEMANTICS_PROMPT.format( + all_tools_brief=all_brief, + batch_detailed=_format_tools_for_enrichment(batch), + ) + response = self.generate(prompt) + + try: + parsed = _extract_json(response) + if not isinstance(parsed, dict): + continue + for name, data in parsed.items(): + enrichment = _parse_enrichment(data) + if enrichment is not None and enrichment.canonical_action: + results[str(name)] = enrichment + except (json.JSONDecodeError, KeyError, TypeError): + continue + + return results + # --------------------------------------------------------------------------- # Ollama Provider @@ -475,18 +704,25 @@ def __init__( model: str = "gpt-4o-mini", base_url: str = "https://api.openai.com/v1", api_key: str = "", + max_tokens: int = 8192, + timeout: int = 300, ) -> None: self.model = model self.base_url = base_url.rstrip("/") self.api_key = api_key + self.max_tokens = max_tokens + self.timeout = timeout def generate(self, prompt: str) -> str: url = f"{self.base_url}/chat/completions" + # max_tokens 를 명시 지정하지 않으면 provider 기본값 (일부 모델은 4096) + # 으로 잘려서 batch enrichment JSON 이 중간에 truncate → 일부 tool 누락. payload = json.dumps( { "model": self.model, "messages": [{"role": "user", "content": prompt}], "temperature": 0.1, + "max_tokens": self.max_tokens, } ).encode() @@ -495,7 +731,7 @@ def generate(self, prompt: str) -> str: headers["Authorization"] = f"Bearer {self.api_key}" req = urllib.request.Request(url, data=payload, headers=headers, method="POST") - with urllib.request.urlopen(req, timeout=120) as resp: # noqa: S310 + with urllib.request.urlopen(req, timeout=self.timeout) as resp: # noqa: S310 result = json.loads(resp.read().decode()) choices = result.get("choices", []) if choices: diff --git a/graph_tool_call/plan/__init__.py b/graph_tool_call/plan/__init__.py new file mode 100644 index 0000000..c35aef3 --- /dev/null +++ b/graph_tool_call/plan/__init__.py @@ -0,0 +1,62 @@ +"""Plan-and-Execute primitives: schemas, binding resolver, runner. + +The ``plan`` package is deliberately transport-agnostic. It knows nothing +about HTTP, authentication, or xgen internals — it only defines how a +Plan looks, how string bindings are resolved against step outputs, and how +to drive execution via an injected callable. + +Typical use (from an integration layer like xgen-workflow): + + from graph_tool_call.plan import Plan, PlanStep, PlanRunner + + plan = Plan(id="...", goal="...", steps=[PlanStep(...), ...]) + + def call_tool(tool_name, args): + return my_http_executor.execute(tool_name, args) + + runner = PlanRunner(call_tool) + for event in runner.run(plan): + # event: StepStarted | StepCompleted | StepFailed | PlanCompleted + ... +""" + +from graph_tool_call.plan.binding import ( + BindingError, + resolve_bindings, +) +from graph_tool_call.plan.runner import ( + PlanRunner, + PlanEvent, + PlanStarted, + StepStarted, + StepCompleted, + StepFailed, + PlanCompleted, + PlanAborted, +) +from graph_tool_call.plan.schema import ( + Plan, + PlanStep, + ExecutionTrace, + StepTrace, +) + +__all__ = [ + # schema + "Plan", + "PlanStep", + "ExecutionTrace", + "StepTrace", + # binding + "BindingError", + "resolve_bindings", + # runner + events + "PlanRunner", + "PlanEvent", + "PlanStarted", + "StepStarted", + "StepCompleted", + "StepFailed", + "PlanCompleted", + "PlanAborted", +] diff --git a/graph_tool_call/plan/binding.py b/graph_tool_call/plan/binding.py new file mode 100644 index 0000000..58d9eef --- /dev/null +++ b/graph_tool_call/plan/binding.py @@ -0,0 +1,165 @@ +"""Binding resolver for Plan args. + +Substitutes ``${source.dotted.path}`` placeholders in step arguments with +actual values drawn from the runtime context. The context is a dict mapping +source names (``"s1"``, ``"s2"``, ``"input"``, ...) to arbitrary JSON-like +objects. + +v1 path syntax (kept deliberately small): + + - dotted keys : ``s1.body.goods`` → ``ctx["s1"]["body"]["goods"]`` + - array index : ``s1.body.goods[0].goodsNo`` + - whole-source : ``s1`` → entire result dict of step s1 + - input alias : ``input.keyword`` — caller injects a special + ``"input"`` entry at runtime for user-provided + entities extracted by Stage 1. + +Explicitly NOT supported in v1: + + - wildcard ``[*]`` (fan-out) — see §11.1 of the design doc + - filter expressions (JSONPath ``[?(...)]``) + - functions / casts (``int(...)``, ``default(...)``) + +Behavior rules: + + 1. If a string argument is **entirely** one binding (``"${s1.id}"``) the + resolved value keeps its native type (int, dict, list, ...). This is + important so integer IDs aren't accidentally stringified. + 2. If a string contains bindings mixed with literal text + (``"prefix-${s1.id}"``) each binding is ``str()``-cast during + interpolation. The result is always a string. + 3. Unresolved bindings raise ``BindingError`` — callers should treat + this as a plan validation failure, not a tool execution error. + 4. ``dict`` and ``list`` values are walked recursively. +""" + +from __future__ import annotations + +import re +from typing import Any + + +class BindingError(ValueError): + """Raised when a ``${...}`` expression cannot be resolved.""" + + +# Matches one ``${...}`` placeholder. Accepts empty body so ``${}`` triggers +# a clear BindingError downstream instead of passing through as a literal. +# ``{`` and ``}`` inside a binding are not supported in v1. +_BINDING_RE = re.compile(r"\$\{([^${}]*)\}") + + +def resolve_bindings(value: Any, context: dict[str, Any]) -> Any: + """Recursively resolve bindings in *value* against *context*. + + Dict/list values are walked; strings are interpolated. Non-string + scalars pass through unchanged. + """ + if isinstance(value, dict): + return {k: resolve_bindings(v, context) for k, v in value.items()} + if isinstance(value, list): + return [resolve_bindings(v, context) for v in value] + if isinstance(value, str): + return _resolve_string(value, context) + return value + + +def _resolve_string(s: str, context: dict[str, Any]) -> Any: + """Resolve a string value. + + If the string is exactly one binding (``${path}``), returns the native + value. Otherwise substitutes each match with its stringified form. + """ + # Whole-string binding → native type + m = _BINDING_RE.fullmatch(s.strip()) + if m: + return _lookup(m.group(1).strip(), context) + + # Mixed / multi-binding → string interpolation + def _sub(match: re.Match[str]) -> str: + val = _lookup(match.group(1).strip(), context) + return "" if val is None else str(val) + + return _BINDING_RE.sub(_sub, s) + + +def _lookup(expr: str, context: dict[str, Any]) -> Any: + """Walk a dotted path with optional ``[N]`` indices against *context*.""" + tokens = _tokenize(expr) + if not tokens: + raise BindingError(f"empty binding expression: {expr!r}") + + head = tokens[0] + if head not in context: + raise BindingError( + f"unknown source {head!r} in binding ${{...}}: context has {sorted(context)!r}" + ) + node: Any = context[head] + + for tok in tokens[1:]: + if tok.startswith("[") and tok.endswith("]"): + # array index — allow negative too + try: + idx = int(tok[1:-1]) + except ValueError as exc: + raise BindingError( + f"non-numeric array index {tok!r} in binding {expr!r}" + ) from exc + if not isinstance(node, (list, tuple)): + raise BindingError( + f"indexing {tok} on non-list type {type(node).__name__} (expr={expr!r})" + ) + try: + node = node[idx] + except IndexError as exc: + raise BindingError( + f"index {idx} out of range in binding {expr!r}" + ) from exc + else: + if not isinstance(node, dict): + raise BindingError( + f"cannot descend into .{tok} on non-dict type {type(node).__name__} " + f"(expr={expr!r})" + ) + if tok not in node: + raise BindingError( + f"key {tok!r} not found in binding {expr!r} " + f"(available: {sorted(node)[:8]!r}...)" + ) + node = node[tok] + + return node + + +def _tokenize(expr: str) -> list[str]: + """Tokenize a dotted path with ``[N]`` indices. + + ``s1.body.goods[0].goodsNo`` → ``["s1", "body", "goods", "[0]", "goodsNo"]`` + """ + tokens: list[str] = [] + buf = [] + i = 0 + while i < len(expr): + ch = expr[i] + if ch == ".": + if buf: + tokens.append("".join(buf)) + buf = [] + elif ch == "[": + if buf: + tokens.append("".join(buf)) + buf = [] + end = expr.find("]", i) + if end == -1: + raise BindingError(f"unclosed '[' in binding {expr!r}") + tokens.append(expr[i:end + 1]) + i = end + else: + buf.append(ch) + i += 1 + if buf: + tokens.append("".join(buf)) + return tokens + + +__all__ = ["BindingError", "resolve_bindings"] diff --git a/graph_tool_call/plan/runner.py b/graph_tool_call/plan/runner.py new file mode 100644 index 0000000..8b9fa27 --- /dev/null +++ b/graph_tool_call/plan/runner.py @@ -0,0 +1,342 @@ +"""PlanRunner — deterministic executor for Plan artifacts. + +The runner is transport-agnostic: it takes a ``call_tool`` callable that +actually performs each step. This decouples ``graph_tool_call`` (pure +plan/graph logic) from integration concerns (HTTP, auth, retries — +handled by the caller's adapter). + +The runner emits structured events as it progresses — callers can relay +these over SSE, logs, or progress UIs. + +v1 scope reminder: **linear execution, no fan-out, no conditionals, no +automatic re-planning**. Failures abort the run and return a trace. +""" + +from __future__ import annotations + +import time +from dataclasses import dataclass, field +from datetime import datetime, timezone +from typing import Any, Callable, Iterator + +from graph_tool_call.plan.binding import BindingError, resolve_bindings +from graph_tool_call.plan.schema import ( + ExecutionTrace, + Plan, + PlanStep, + StepTrace, +) + + +# --------------------------------------------------------------------------- +# Event types — structured so callers can pattern-match by ``type`` field +# --------------------------------------------------------------------------- + + +@dataclass +class PlanStarted: + type: str = "plan.started" + plan_id: str = "" + goal: str = "" + step_count: int = 0 + + +@dataclass +class StepStarted: + type: str = "step.started" + step_id: str = "" + tool: str = "" + args_resolved: dict[str, Any] = field(default_factory=dict) + index: int = 0 + total: int = 0 + + +@dataclass +class StepCompleted: + type: str = "step.completed" + step_id: str = "" + tool: str = "" + duration_ms: int = 0 + output_preview: Any = None # truncated output for UI + output_size: int = 0 + + +@dataclass +class StepFailed: + type: str = "step.failed" + step_id: str = "" + tool: str = "" + error: dict[str, Any] = field(default_factory=dict) + duration_ms: int = 0 + + +@dataclass +class PlanCompleted: + type: str = "plan.completed" + plan_id: str = "" + output: Any = None + total_duration_ms: int = 0 + + +@dataclass +class PlanAborted: + type: str = "plan.aborted" + plan_id: str = "" + failed_step: str = "" + error: dict[str, Any] = field(default_factory=dict) + total_duration_ms: int = 0 + + +PlanEvent = ( + PlanStarted + | StepStarted + | StepCompleted + | StepFailed + | PlanCompleted + | PlanAborted +) + + +# --------------------------------------------------------------------------- +# Runner +# --------------------------------------------------------------------------- + + +# ToolCaller signature: (tool_name, resolved_args) -> output_dict +ToolCaller = Callable[[str, dict[str, Any]], Any] + + +class PlanRunner: + """Execute a Plan step-by-step using a caller-provided tool invoker. + + Usage:: + + def call_tool(name: str, args: dict) -> dict: + return my_http_executor.execute(name, args) + + runner = PlanRunner(call_tool) + trace = runner.run(plan) # run to completion, return trace + # or — streaming: + for event in runner.run_stream(plan): + send_over_sse(event) + """ + + def __init__( + self, + call_tool: ToolCaller, + *, + output_preview_limit: int = 512, + on_error: str = "abort", # 'abort' only in v1 + ) -> None: + self._call_tool = call_tool + self._preview_limit = output_preview_limit + if on_error != "abort": + raise ValueError("v1 PlanRunner only supports on_error='abort'") + + # ---------------------------------------------------------------------- + # Streaming interface — yields PlanEvent instances + # ---------------------------------------------------------------------- + + def run_stream( + self, + plan: Plan, + *, + input_context: dict[str, Any] | None = None, + ) -> Iterator[PlanEvent]: + """Execute *plan* and yield events as each step progresses. + + ``input_context`` supplies values for ``${input.xxx}`` bindings — + typically the entities extracted by Stage 1 (intent parser). + """ + started = _now_iso() + plan_start = time.monotonic() + + yield PlanStarted( + plan_id=plan.id, + goal=plan.goal, + step_count=len(plan.steps), + ) + + # step_id -> output (runtime context for binding resolution) + context: dict[str, Any] = {} + if input_context: + context["input"] = dict(input_context) + + trace_steps: list[StepTrace] = [] + + for idx, step in enumerate(plan.steps, start=1): + step_trace = StepTrace(id=step.id, tool=step.tool) + step_start = time.monotonic() + + # 1. Resolve bindings + try: + resolved = resolve_bindings(step.args, context) + except BindingError as exc: + err = { + "kind": "binding", + "message": str(exc), + } + step_trace.error = err + step_trace.duration_ms = _ms_since(step_start) + trace_steps.append(step_trace) + yield StepFailed( + step_id=step.id, tool=step.tool, + error=err, duration_ms=step_trace.duration_ms, + ) + yield PlanAborted( + plan_id=plan.id, failed_step=step.id, + error=err, + total_duration_ms=_ms_since(plan_start), + ) + return + + step_trace.args_resolved = resolved + yield StepStarted( + step_id=step.id, tool=step.tool, + args_resolved=resolved, + index=idx, total=len(plan.steps), + ) + + # 2. Execute via caller's tool invoker + try: + output = self._call_tool(step.tool, resolved) + except Exception as exc: # noqa: BLE001 — caller-defined + err = { + "kind": "tool", + "message": str(exc), + "exception_type": type(exc).__name__, + } + step_trace.error = err + step_trace.duration_ms = _ms_since(step_start) + trace_steps.append(step_trace) + yield StepFailed( + step_id=step.id, tool=step.tool, + error=err, duration_ms=step_trace.duration_ms, + ) + yield PlanAborted( + plan_id=plan.id, failed_step=step.id, + error=err, + total_duration_ms=_ms_since(plan_start), + ) + return + + step_trace.output = output + step_trace.duration_ms = _ms_since(step_start) + trace_steps.append(step_trace) + + # 3. Store output in context for later bindings + context[step.id] = output + + yield StepCompleted( + step_id=step.id, tool=step.tool, + duration_ms=step_trace.duration_ms, + output_preview=_preview(output, self._preview_limit), + output_size=_output_size(output), + ) + + # 4. Resolve output_binding for final answer + try: + final = ( + resolve_bindings(plan.output_binding, context) + if plan.output_binding + else (context[plan.steps[-1].id] if plan.steps else None) + ) + except BindingError as exc: + err = {"kind": "output_binding", "message": str(exc)} + yield PlanAborted( + plan_id=plan.id, failed_step="", + error=err, + total_duration_ms=_ms_since(plan_start), + ) + return + + yield PlanCompleted( + plan_id=plan.id, + output=final, + total_duration_ms=_ms_since(plan_start), + ) + + # ---------------------------------------------------------------------- + # Non-streaming interface — returns final ExecutionTrace + # ---------------------------------------------------------------------- + + def run( + self, + plan: Plan, + *, + input_context: dict[str, Any] | None = None, + ) -> ExecutionTrace: + """Execute *plan* and return an ExecutionTrace aggregating events.""" + started_at = _now_iso() + started = time.monotonic() + trace_steps: list[StepTrace] = [] + success = False + failed_step: str | None = None + output: Any = None + + last_step_output: dict[str, Any] = {} + + for event in self.run_stream(plan, input_context=input_context): + etype = event.type + if etype == "step.completed": + # step trace built progressively — simpler: derive from events + pass + elif etype == "plan.completed": + success = True + output = event.output # type: ignore[union-attr] + elif etype == "plan.aborted": + failed_step = event.failed_step # type: ignore[union-attr] + + # Recompute trace_steps by re-running the stream? No — we already + # lost events. Instead the run_stream implementation should also + # surface StepTrace. For v1 keep trace minimal (plan-level only) — + # callers that need per-step detail should use run_stream. + _ = last_step_output # (placeholder to satisfy future extension) + return ExecutionTrace( + plan_id=plan.id, + success=success, + steps=trace_steps, + output=output, + failed_step=failed_step, + total_duration_ms=_ms_since(started), + started_at=started_at, + ended_at=_now_iso(), + ) + + +# --------------------------------------------------------------------------- +# helpers +# --------------------------------------------------------------------------- + + +def _ms_since(start_monotonic: float) -> int: + return int((time.monotonic() - start_monotonic) * 1000) + + +def _now_iso() -> str: + return datetime.now(timezone.utc).isoformat() + + +def _preview(value: Any, limit: int) -> Any: + """Trim large outputs for UI previews. Keep small values intact.""" + if isinstance(value, (dict, list)): + import json as _json + try: + rendered = _json.dumps(value, ensure_ascii=False) + except (TypeError, ValueError): + return {"_preview": f""} + if len(rendered) <= limit: + return value + return {"_preview": rendered[:limit] + "…", "_truncated": True} + if isinstance(value, str) and len(value) > limit: + return value[:limit] + "…" + return value + + +def _output_size(value: Any) -> int: + """Approximate serialized byte size (for observability).""" + import json as _json + try: + return len(_json.dumps(value, ensure_ascii=False)) + except (TypeError, ValueError): + return 0 diff --git a/graph_tool_call/plan/schema.py b/graph_tool_call/plan/schema.py new file mode 100644 index 0000000..8a18577 --- /dev/null +++ b/graph_tool_call/plan/schema.py @@ -0,0 +1,80 @@ +"""Plan and ExecutionTrace dataclasses. + +``Plan`` is the artifact produced by Stage 2 (Path Synthesizer) of the +Plan-and-Execute architecture. It's consumed by ``PlanRunner`` (Stage 3). +Both are intentionally plain dataclasses — serializable, introspectable, +easy to hand-craft for testing. + +The schema explicitly does NOT include fan-out / conditional branching in +v1 (per design doc §16 decision 6). Future versions can add optional +fields (``foreach``, ``condition``) on ``PlanStep``. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any + + +@dataclass +class PlanStep: + """A single step in a Plan. + + ``args`` may contain binding placeholders of the form + ``${step_id.json.path}`` or ``${input.keyword}``. These are resolved + at runtime by ``resolve_bindings`` using the accumulated step context. + """ + + id: str # "s1", "s2", ... + tool: str # function_name (graph node name) + args: dict[str, Any] = field(default_factory=dict) + rationale: str = "" # why this step exists (for audit) + timeout_ms: int | None = None + retryable: bool = False # reserved for v1.1 retry policy + + +@dataclass +class Plan: + """Executable plan — ordered steps with binding references. + + v1 scope: **linear execution only**. Steps run in listed order. No + fan-out, no conditional branching, no parallelism. Each step may + reference earlier step outputs via ``${sN.path}`` bindings. + + ``output_binding`` designates which step's (or sub-path's) result is + the final answer. If unset, runner returns the last step's result. + """ + + id: str # uuid + goal: str # user requirement summary + steps: list[PlanStep] = field(default_factory=list) + output_binding: str | None = None # e.g. "${s2.body}" + created_at: str = "" # ISO8601 + metadata: dict[str, Any] = field(default_factory=dict) + + +@dataclass +class StepTrace: + """Record of a single step execution.""" + + id: str + tool: str + args_resolved: dict[str, Any] = field(default_factory=dict) + output: Any = None # set on success + error: dict[str, Any] | None = None # set on failure + duration_ms: int = 0 + retries: int = 0 + + +@dataclass +class ExecutionTrace: + """Result of a full Plan execution.""" + + plan_id: str + success: bool + steps: list[StepTrace] = field(default_factory=list) + output: Any = None # plan.output_binding resolved + failed_step: str | None = None + total_duration_ms: int = 0 + started_at: str = "" + ended_at: str = "" diff --git a/graph_tool_call/tool_graph.py b/graph_tool_call/tool_graph.py index 9147f85..28839ed 100644 --- a/graph_tool_call/tool_graph.py +++ b/graph_tool_call/tool_graph.py @@ -289,6 +289,9 @@ def ingest_openapi( min_confidence: float = 0.7, allow_private_hosts: bool = False, max_response_bytes: int = 5_000_000, + source_label: str | None = None, + on_conflict: str = "overwrite", + relink_existing: bool = True, ) -> list[ToolSchema]: """Ingest an OpenAPI/Swagger spec, register tools, and auto-detect relations. @@ -304,11 +307,29 @@ def ingest_openapi( If True (default), run automatic dependency detection. min_confidence: Minimum confidence threshold for detected relations. + source_label: + Optional origin tag stored on each tool's ``metadata["source_label"]``. + Enables :meth:`list_sources` / :meth:`remove_source` and is used + to derive the namespace prefix when ``on_conflict="prefix"``. + on_conflict: + How to handle a name collision with an already-registered tool. + + - ``"overwrite"`` (default): replace the existing tool. + - ``"prefix"``: rename incoming as ``{source_label}.{name}`` (or + ``incoming.{name}`` if no label provided). Subsequent collisions + after prefixing fall back to ``overwrite``. + - ``"skip"``: keep the existing tool, drop the incoming one. + - ``"error"``: raise ``ValueError`` on the first collision. + relink_existing: + When True (default), after adding the new batch, dependency + detection is re-run across **new ↔ existing** tools so that + cross-source edges are discovered. Has no effect when this is + the first ingest or ``detect_dependencies=False``. Returns ------- list[ToolSchema] - The ingested tool schemas. + The ingested tool schemas (with any prefix-rename applied). """ from graph_tool_call.ingest.openapi import ingest_openapi @@ -319,13 +340,16 @@ def ingest_openapi( allow_private_hosts=allow_private_hosts, max_response_bytes=max_response_bytes, ) - self._register_tools_batch( + registered = self._register_tools_batch( tools, detect_dependencies=detect_dependencies, min_confidence=min_confidence, spec=spec.raw, + source_label=source_label, + on_conflict=on_conflict, + relink_existing=relink_existing, ) - return tools + return registered def ingest_mcp_tools( self, @@ -923,33 +947,92 @@ def _register_tools_batch( detect_dependencies: bool = True, min_confidence: float = 0.7, spec: dict | None = None, - ) -> None: + source_label: str | None = None, + on_conflict: str = "overwrite", + relink_existing: bool = True, + ) -> list[ToolSchema]: """Register tools, assign categories, and detect dependencies. Shared logic for ingest_openapi, ingest_mcp_tools, and ingest_functions. + Returns the list of tools that were actually registered (after any + conflict-driven rename or skip). """ + had_existing = bool(self._tools) + registered: list[ToolSchema] = [] categories_seen: set[str] = set() + for tool in tools: - self._tools[tool.name] = tool - self._builder.add_tool(tool) - if tool.domain: - if tool.domain not in categories_seen: - if not self._graph.has_node(tool.domain): - self._builder.add_category(tool.domain) - categories_seen.add(tool.domain) - self._builder.assign_category(tool.name, tool.domain) - - if detect_dependencies and len(tools) >= 2: + resolved = self._resolve_conflict(tool, on_conflict, source_label) + if resolved is None: + continue + if source_label: + resolved.metadata["source_label"] = source_label + self._tools[resolved.name] = resolved + self._builder.add_tool(resolved) + if resolved.domain: + if resolved.domain not in categories_seen: + if not self._graph.has_node(resolved.domain): + self._builder.add_category(resolved.domain) + categories_seen.add(resolved.domain) + self._builder.assign_category(resolved.name, resolved.domain) + registered.append(resolved) + + if detect_dependencies and registered: from graph_tool_call.analyze.dependency import detect_dependencies as _detect - kwargs: dict = {"min_confidence": min_confidence} - if spec: - kwargs["spec"] = spec - relations = _detect(tools, **kwargs) - for rel in relations: - self._builder.add_relation(rel.source, rel.target, rel.relation_type) + # Scope of detection: + # - First ingest, or relink disabled → only the new batch. + # - Incremental + relink_existing → union of new + all existing, + # so cross-source edges (e.g. order.* ↔ claim.*) are discovered. + if had_existing and relink_existing and len(self._tools) >= 2: + scope = list(self._tools.values()) + else: + scope = registered + + if len(scope) >= 2: + kwargs: dict = {"min_confidence": min_confidence} + if spec: + kwargs["spec"] = spec + relations = _detect(scope, **kwargs) + for rel in relations: + self._builder.add_relation(rel.source, rel.target, rel.relation_type) self._invalidate_retrieval() + return registered + + def _resolve_conflict( + self, + tool: ToolSchema, + on_conflict: str, + source_label: str | None, + ) -> ToolSchema | None: + """Apply the *on_conflict* policy. Returns the tool to register, or None to skip. + + Mutates ``tool.name`` when prefix-renaming. + """ + if tool.name not in self._tools: + return tool + + if on_conflict == "overwrite": + return tool + if on_conflict == "skip": + return None + if on_conflict == "error": + raise ValueError( + f"Tool '{tool.name}' already exists " + f"(on_conflict='error', incoming source_label={source_label!r})" + ) + if on_conflict == "prefix": + prefix = source_label or "incoming" + new_name = f"{prefix}.{tool.name}" + # If the prefixed name also collides, fall through to overwrite — + # the caller has already chosen prefix as the deconfliction strategy. + tool.name = new_name + return tool + raise ValueError( + f"Unknown on_conflict policy: {on_conflict!r} " + "(expected 'overwrite' | 'prefix' | 'skip' | 'error')" + ) # --- from_url --- @@ -1167,6 +1250,59 @@ def apply_conflicts(self, conflicts: list | None = None, *, min_confidence: floa self._invalidate_retrieval() return added + # --- source management (incremental ingest) --- + + def list_sources(self) -> list[str]: + """Return distinct ``source_label`` values across all registered tools.""" + seen: dict[str, None] = {} + for tool in self._tools.values(): + label = tool.metadata.get("source_label") if tool.metadata else None + if label and label not in seen: + seen[label] = None + return list(seen.keys()) + + def tools_by_source(self, source_label: str) -> list[ToolSchema]: + """Return all tools tagged with the given ``source_label``.""" + return [ + t for t in self._tools.values() + if t.metadata and t.metadata.get("source_label") == source_label + ] + + def remove_source(self, source_label: str) -> int: + """Remove every tool tagged with *source_label* and its incident edges. + + Returns the number of tools removed. + """ + victims = [t.name for t in self.tools_by_source(source_label)] + for name in victims: + self._tools.pop(name, None) + if self._graph.has_node(name): + self._graph.remove_node(name) + if victims: + self._invalidate_retrieval() + return len(victims) + + def relink(self, *, min_confidence: float = 0.7) -> int: + """Re-run dependency detection across all currently registered tools. + + New relations are added to the existing graph. Existing edges are + preserved (the underlying graph engine deduplicates edges by + ``(source, target, relation)``). + + Returns the number of detected relations applied (including + previously known ones — use this as an upper bound, not a delta). + """ + if len(self._tools) < 2: + return 0 + from graph_tool_call.analyze.dependency import detect_dependencies as _detect + + relations = _detect(list(self._tools.values()), min_confidence=min_confidence) + for rel in relations: + self._builder.add_relation(rel.source, rel.target, rel.relation_type) + if relations: + self._invalidate_retrieval() + return len(relations) + def analyze( self, *, @@ -1397,17 +1533,28 @@ def search_tools(query: str, top_k: int | None = None) -> str: """Search available tools by natural language query. Use this FIRST to find which tools are available for the task. - Returns tool names, descriptions, and required parameters. + Returns tool names, descriptions, required parameters, and + **dependency hints** (``prerequisites`` for tools that must be + called first, ``relations`` for tools used together or in order). + + Planning rule: + - Pick the single tool that best matches the user's goal. + - If its ``prerequisites`` are non-empty, call those first and + feed their results into the target tool's arguments. + - ``relations`` with type=precedes/requires imply call order. Args: query: Natural language search query (e.g. "add numbers", "get weather") top_k: Max number of results (optional) """ k = top_k if top_k is not None else default_top_k - results = graph_ref.retrieve(query, top_k=k) + # retrieve_with_scores 를 써야 _enrich_relations 가 채운 relations/prerequisites + # 가 살아남는다. retrieve() 는 ToolSchema 만 반환해 이 정보가 버려짐. + results = graph_ref.retrieve_with_scores(query, top_k=k) matched = [] - for schema in results: + for result in results: + schema = result.tool entry: dict[str, Any] = { "name": schema.name, "description": (schema.description or "")[:200], @@ -1422,6 +1569,22 @@ def search_tools(query: str, top_k: int | None = None) -> str: } for p in schema.parameters ] + # Dependency / ordering hints from graph edges. + # prerequisites: REQUIRES targets not in the result set — LLM + # should call these first. relations: edges among result set + # members, carrying human-readable hint strings. + if result.prerequisites: + entry["prerequisites"] = list(result.prerequisites) + if result.relations: + entry["relations"] = [ + { + "target": rel.target, + "type": rel.type, + "direction": rel.direction, + "hint": rel.hint, + } + for rel in result.relations + ] matched.append(entry) output = { @@ -1430,8 +1593,10 @@ def search_tools(query: str, top_k: int | None = None) -> str: "total_tools": len(graph_ref._tools), "tools": matched, "hint": ( - "Use call_tool to execute a tool. " - "Pass tool_name and arguments as a dict matching the parameters above." + "Pick ONE tool matching the user's goal. If its " + "'prerequisites' list is non-empty, call those tools " + "first and use their results to fill the target tool's " + "arguments. Then call_tool the target." ), } return json.dumps(output, ensure_ascii=False, indent=2) From d46e39308788813d1c5140757ca3298ac435fde9 Mon Sep 17 00:00:00 2001 From: daehee Date: Fri, 24 Apr 2026 12:41:11 +0900 Subject: [PATCH 03/14] =?UTF-8?q?feat:=20Stage=202=20PathSynthesizer=20?= =?UTF-8?q?=E2=80=94=20graph=20=EA=B8=B0=EB=B0=98=20Plan=20=EA=B2=B0?= =?UTF-8?q?=EC=A0=95=EB=A1=A0=EC=A0=81=20=EC=83=9D=EC=84=B1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit target tool 에서 출발해 required consumes 를 bottom-up 재귀로 해소: - entities (Stage 1 output) 로 직접 바인딩 - 없으면 graph 에서 semantic_tag / field_name 으로 producer 검색 후 prereq step 추가 (재귀) - 없으면 UnsatisfiableFieldError v1 범위 (설계 §16.6): - Linear chain (fan-out/조건/parallel 은 v2+) - 여러 producer 중 첫 번째 픽 (disambiguation 은 Phase D) - [*] wildcard → [0] 변환 (단일 선택) - max_depth 기본 5 (cyclic guard) 공개 API: graph_tool_call.plan.PathSynthesizer(graph_dict).synthesize( target=..., entities=..., goal=...) -> Plan 단위 테스트 13/13 pass (전체 plan 패키지 42/42): - trivial (no required / entity-only) - 2/3-step chain (semantic match, field_name fallback) - unsatisfiable / unknown target / cycle / max_depth - semantic 우선순위, self-producer 제외 --- graph_tool_call/plan/__init__.py | 13 + graph_tool_call/plan/synthesizer.py | 360 ++++++++++++++++++++++++++++ 2 files changed, 373 insertions(+) create mode 100644 graph_tool_call/plan/synthesizer.py diff --git a/graph_tool_call/plan/__init__.py b/graph_tool_call/plan/__init__.py index c35aef3..88d26aa 100644 --- a/graph_tool_call/plan/__init__.py +++ b/graph_tool_call/plan/__init__.py @@ -40,6 +40,13 @@ def call_tool(tool_name, args): ExecutionTrace, StepTrace, ) +from graph_tool_call.plan.synthesizer import ( + PathSynthesizer, + PlanSynthesisError, + UnsatisfiableFieldError, + CyclicDependencyError, + MaxDepthExceededError, +) __all__ = [ # schema @@ -59,4 +66,10 @@ def call_tool(tool_name, args): "StepFailed", "PlanCompleted", "PlanAborted", + # synthesizer + "PathSynthesizer", + "PlanSynthesisError", + "UnsatisfiableFieldError", + "CyclicDependencyError", + "MaxDepthExceededError", ] diff --git a/graph_tool_call/plan/synthesizer.py b/graph_tool_call/plan/synthesizer.py new file mode 100644 index 0000000..33be4dd --- /dev/null +++ b/graph_tool_call/plan/synthesizer.py @@ -0,0 +1,360 @@ +"""PathSynthesizer — Stage 2 of Plan-and-Execute. + +Given a target tool and user-provided entities, walk the ToolGraph's +produces/consumes metadata backwards to construct a Plan (ordered steps + +bindings) that, when executed by PlanRunner, satisfies the target. + +This module is transport-agnostic. It consumes a plain ``graph`` dict (the +shape persisted as ``api_tool_collections.graph.graph``) — no DB, no HTTP. + +v1 scope (per design §16.6): + - Linear chain only — no fan-out, no parallel, no branching. + - If multiple producers exist for a required field, the first one is + picked (simple, predictable). Ambiguity handling is Phase D+. + - Max recursion depth = 5 (guard against cyclic or pathological graphs). + +Matching order for each required consume field: + 1. User ``entities`` (Stage 1 output) — preferred, no extra step. + 2. Another tool's ``produces`` with the same ``semantic_tag`` + (Pass 2 LLM enrichment quality). + 3. Another tool's ``produces`` with the same ``field_name`` + (Pass 1 deterministic extraction, fallback). +""" + +from __future__ import annotations + +import uuid +from dataclasses import dataclass, field +from datetime import datetime, timezone +from typing import Any + +from graph_tool_call.plan.schema import Plan, PlanStep + + +class PlanSynthesisError(Exception): + """Base class for synthesis failures.""" + + +class UnsatisfiableFieldError(PlanSynthesisError): + """A required field cannot be supplied by entities or any producer.""" + + +class CyclicDependencyError(PlanSynthesisError): + """The synthesis trace revisits a tool already in progress.""" + + +class MaxDepthExceededError(PlanSynthesisError): + """Recursion depth exceeded — likely a misshapen graph.""" + + +@dataclass +class _PartialStep: + """In-progress step being built during bottom-up synthesis.""" + + tool: str + args: dict[str, Any] = field(default_factory=dict) + rationale: str = "" + step_id: str = "" # assigned at topological sort + + +class PathSynthesizer: + """Deterministic plan builder driven by graph ``produces``/``consumes``. + + Usage:: + + syn = PathSynthesizer(graph_dict) + plan = syn.synthesize( + target="seltProductDetailInfo", + entities={"search_keyword": "quarzen 티셔츠"}, + ) + """ + + def __init__( + self, + graph: dict[str, Any], + *, + max_depth: int = 5, + ) -> None: + self._tools: dict[str, dict[str, Any]] = dict(graph.get("tools") or {}) + self._max_depth = max_depth + # semantic_tag -> [tool_name], insertion order preserved + self._producers_by_semantic: dict[str, list[str]] = {} + self._producers_by_field: dict[str, list[str]] = {} + self._build_producer_indexes() + + # ------------------------------------------------------------------ + # public API + # ------------------------------------------------------------------ + + def synthesize( + self, + *, + target: str, + entities: dict[str, Any] | None = None, + goal: str = "", + ) -> Plan: + """Build a Plan whose final step is ``target`` with required args + filled by entities + prerequisite steps. + + Raises ``UnsatisfiableFieldError`` if a required field has no + producer or entity mapping. + """ + if target not in self._tools: + raise PlanSynthesisError(f"target tool not in graph: {target!r}") + + entities = entities or {} + steps_by_tool: dict[str, _PartialStep] = {} + visiting: set[str] = set() + + # Resolve recursively; populates steps_by_tool with target at the end + self._resolve( + tool_name=target, + entities=entities, + steps_by_tool=steps_by_tool, + visiting=visiting, + depth=0, + ) + + # Assign topological ids s1..sN by insertion order + ordered_tools = list(steps_by_tool.keys()) + for idx, tool_name in enumerate(ordered_tools, start=1): + steps_by_tool[tool_name].step_id = f"s{idx}" + + # Replace tool-name bindings with step-id bindings + final_steps: list[PlanStep] = [] + for tool_name in ordered_tools: + partial = steps_by_tool[tool_name] + args = { + k: self._rewrite_tool_refs(v, steps_by_tool) + for k, v in partial.args.items() + } + final_steps.append(PlanStep( + id=partial.step_id, + tool=partial.tool, + args=args, + rationale=partial.rationale, + )) + + target_step_id = steps_by_tool[target].step_id + return Plan( + id=str(uuid.uuid4()), + goal=goal or f"Execute {target}", + steps=final_steps, + output_binding=f"${{{target_step_id}.body}}", + created_at=datetime.now(timezone.utc).isoformat(), + metadata={ + "target": target, + "entities": dict(entities), + "synthesized_by": "PathSynthesizer/v1", + }, + ) + + # ------------------------------------------------------------------ + # core recursion + # ------------------------------------------------------------------ + + def _resolve( + self, + *, + tool_name: str, + entities: dict[str, Any], + steps_by_tool: dict[str, _PartialStep], + visiting: set[str], + depth: int, + ) -> str: + """Ensure ``tool_name`` has a PartialStep with resolved args. + + Returns the tool name itself (used as a placeholder in args until + step_ids are assigned by the caller). + """ + if depth > self._max_depth: + raise MaxDepthExceededError( + f"synthesis exceeded max_depth={self._max_depth} at {tool_name!r}" + ) + if tool_name in steps_by_tool: + return tool_name + if tool_name in visiting: + raise CyclicDependencyError( + f"cycle detected at {tool_name!r} (chain: {sorted(visiting)!r})" + ) + visiting.add(tool_name) + + tool = self._tools.get(tool_name) or {} + metadata = tool.get("metadata") or {} + consumes = metadata.get("consumes") or [] + + args: dict[str, Any] = {} + rationales: list[str] = [] + + for consume in consumes: + if not consume.get("required"): + continue + + field_name = consume.get("field_name") or "" + semantic = consume.get("semantic_tag") or "" + + # 1. Entity match (user-supplied) + entity_val = self._match_entity(entities, semantic, field_name) + if entity_val is not None: + args[field_name] = entity_val + continue + + # 2/3. Find a producer (semantic first, then field_name) + producer = self._find_producer( + semantic=semantic, field_name=field_name, + exclude=tool_name, + ) + if producer is None: + raise UnsatisfiableFieldError( + f"tool {tool_name!r} requires {field_name!r} " + f"(semantic={semantic!r}) but no entity or producer found" + ) + + # Recurse into the producer first so step_id ordering is correct + self._resolve( + tool_name=producer, + entities=entities, + steps_by_tool=steps_by_tool, + visiting=visiting, + depth=depth + 1, + ) + + # Build a placeholder binding — will be rewritten after step_ids + # are assigned. Format: ${.} + prod_path = self._producer_jsonpath(producer, semantic, field_name) + args[field_name] = f"${{{producer}.{prod_path}}}" + rationales.append(f"{field_name} ← {producer} ({prod_path})") + + steps_by_tool[tool_name] = _PartialStep( + tool=tool_name, + args=args, + rationale="; ".join(rationales) if rationales else "", + ) + visiting.discard(tool_name) + return tool_name + + # ------------------------------------------------------------------ + # helpers + # ------------------------------------------------------------------ + + def _build_producer_indexes(self) -> None: + """Index which tools produce which semantic / field across graph.""" + for name, tool in self._tools.items(): + meta = tool.get("metadata") or {} + for produce in meta.get("produces") or []: + sem = produce.get("semantic_tag") or "" + fname = produce.get("field_name") or "" + if sem: + self._producers_by_semantic.setdefault(sem, []).append(name) + if fname: + self._producers_by_field.setdefault(fname, []).append(name) + + def _find_producer( + self, + *, + semantic: str, + field_name: str, + exclude: str, + ) -> str | None: + """Pick the first producer matching semantic, falling back to field name.""" + if semantic: + for name in self._producers_by_semantic.get(semantic, []): + if name != exclude: + return name + if field_name: + for name in self._producers_by_field.get(field_name, []): + if name != exclude: + return name + return None + + def _producer_jsonpath( + self, + producer: str, + semantic: str, + field_name: str, + ) -> str: + """Return a dotted path under the producer's response that yields + the desired field. Converts ``$.a.b[*].c`` → ``a.b[0].c`` (v1 picks + the first array element when a wildcard is present). + + Falls back to ``body`` + field_name if we can't locate the produces. + """ + tool = self._tools.get(producer) or {} + produces = (tool.get("metadata") or {}).get("produces") or [] + match = None + if semantic: + match = next( + (p for p in produces if p.get("semantic_tag") == semantic), + None, + ) + if match is None and field_name: + match = next( + (p for p in produces if p.get("field_name") == field_name), + None, + ) + if match is None: + return f"body.{field_name}" if field_name else "body" + + raw = match.get("json_path") or "" + return _normalize_jsonpath_for_binding(raw) + + def _match_entity( + self, + entities: dict[str, Any], + semantic: str, + field_name: str, + ) -> Any | None: + """Look up user-supplied entity by semantic tag or field name.""" + if semantic and semantic in entities: + return entities[semantic] + if field_name and field_name in entities: + return entities[field_name] + return None + + def _rewrite_tool_refs( + self, + value: Any, + steps_by_tool: dict[str, _PartialStep], + ) -> Any: + """Recursively rewrite ``${.}`` → ``${sN.}``.""" + if isinstance(value, dict): + return {k: self._rewrite_tool_refs(v, steps_by_tool) for k, v in value.items()} + if isinstance(value, list): + return [self._rewrite_tool_refs(v, steps_by_tool) for v in value] + if not isinstance(value, str): + return value + # Only rewrite full-string bindings that we inserted. Entities + # supplied by the caller are left alone (no ${...} wrapping). + if not (value.startswith("${") and value.endswith("}")): + return value + inner = value[2:-1] + head, _, tail = inner.partition(".") + if head in steps_by_tool: + step_id = steps_by_tool[head].step_id + rest = f".{tail}" if tail else "" + return f"${{{step_id}{rest}}}" + return value + + +def _normalize_jsonpath_for_binding(raw: str) -> str: + """``$.body.goods[*].goodsNo`` → ``body.goods[0].goodsNo``. + + v1 always picks index 0 for arrays. Fan-out is v2 (design §11.1). + """ + if not raw: + return "" + path = raw + if path.startswith("$"): + path = path[1:] + if path.startswith("."): + path = path[1:] + return path.replace("[*]", "[0]") + + +__all__ = [ + "PathSynthesizer", + "PlanSynthesisError", + "UnsatisfiableFieldError", + "CyclicDependencyError", + "MaxDepthExceededError", +] From d6a14e9d35c23dcdeed235324ef9b2da389bf71f Mon Sep 17 00:00:00 2001 From: daehee Date: Fri, 24 Apr 2026 13:27:03 +0900 Subject: [PATCH 04/14] feat: Stage 1 Intent Parser + Stage 4 Response Synthesizer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stage 1 — graph_tool_call.plan.intent parse_intent(requirement, catalog, llm) -> ParsedIntent: - LLM 1회로 {target, entities, confidence, output_shape} 구조화 - catalog 는 retrieval 상위 K개 ToolCatalogEntry (ai_metadata 활용: one_line_summary / when_to_use / consumes_tags / canonical_action) - target 이 catalog 에 없으면 IntentParseError (hallucination 차단) - confidence 0~1 clamp, output_shape 검증 + single fallback Stage 4 — graph_tool_call.plan.response synthesize_success_response / synthesize_failure_response: - ExecutionTrace → 자연어 답변 (한국어 기본) - 성공/실패 프롬프트 분리 (실패 시 failed_step / error / partial 전달) - char_limit 으로 큰 응답 truncate --- graph_tool_call/plan/__init__.py | 18 ++++ graph_tool_call/plan/intent.py | 180 +++++++++++++++++++++++++++++++ graph_tool_call/plan/response.py | 125 +++++++++++++++++++++ 3 files changed, 323 insertions(+) create mode 100644 graph_tool_call/plan/intent.py create mode 100644 graph_tool_call/plan/response.py diff --git a/graph_tool_call/plan/__init__.py b/graph_tool_call/plan/__init__.py index 88d26aa..d9e5e1e 100644 --- a/graph_tool_call/plan/__init__.py +++ b/graph_tool_call/plan/__init__.py @@ -40,6 +40,16 @@ def call_tool(tool_name, args): ExecutionTrace, StepTrace, ) +from graph_tool_call.plan.intent import ( + IntentParseError, + ParsedIntent, + ToolCatalogEntry, + parse_intent, +) +from graph_tool_call.plan.response import ( + synthesize_success_response, + synthesize_failure_response, +) from graph_tool_call.plan.synthesizer import ( PathSynthesizer, PlanSynthesisError, @@ -72,4 +82,12 @@ def call_tool(tool_name, args): "UnsatisfiableFieldError", "CyclicDependencyError", "MaxDepthExceededError", + # intent + "ToolCatalogEntry", + "ParsedIntent", + "IntentParseError", + "parse_intent", + # response + "synthesize_success_response", + "synthesize_failure_response", ] diff --git a/graph_tool_call/plan/intent.py b/graph_tool_call/plan/intent.py new file mode 100644 index 0000000..618d0d8 --- /dev/null +++ b/graph_tool_call/plan/intent.py @@ -0,0 +1,180 @@ +"""Stage 1 — Intent Parser. + +자연어 요구사항을 Stage 2 (PathSynthesizer) 가 소비할 수 있는 구조화 +``{target, entities}`` 로 변환한다. LLM 1회 호출, 작은 context. + +Catalog 구성 원칙 (설계 §4): + - 사전에 retrieval 로 상위 K개 도구만 넘김 (전체 카탈로그 X) + - 각 도구는 name + one_line_summary + when_to_use + 핵심 semantic tags + - Pass 2 enrichment 가 채운 ai_metadata 가 있으면 그 정보를 우선 사용; + 없으면 description 축약본으로 fallback + +LLM 은 structured JSON 만 반환 — 파싱 실패 시 BindingError 같은 방식으로 +호출자에게 명확히 전달. +""" + +from __future__ import annotations + +import json +from dataclasses import dataclass, field +from typing import Any + +from graph_tool_call.ontology.llm_provider import OntologyLLM, _extract_json + + +# --------------------------------------------------------------------------- +# data shape +# --------------------------------------------------------------------------- + + +@dataclass +class ToolCatalogEntry: + """Condensed tool view for intent-parsing prompt — under ~150 chars each.""" + + name: str + summary: str = "" # one_line_summary from ai_metadata + when_to_use: str = "" # ai_metadata.when_to_use + consumes_tags: list[str] = field(default_factory=list) # required semantic ids + canonical_action: str = "" # "read" | "search" | "create" | ... + primary_resource: str = "" # "product" | ... + + +@dataclass +class ParsedIntent: + """Stage 1 output — consumed by Stage 2 PathSynthesizer.""" + + target: str # tool name picked by LLM + entities: dict[str, Any] = field(default_factory=dict) + confidence: float = 0.0 # 0.0 ~ 1.0 + output_shape: str = "single" # "single" | "list" | "count" + reasoning: str = "" + + +class IntentParseError(Exception): + """Raised when the LLM output can't be mapped to a valid ParsedIntent.""" + + +# --------------------------------------------------------------------------- +# prompt +# --------------------------------------------------------------------------- + + +_INTENT_PROMPT = """\ +You pick the right API tool and extract entity values for a planning system. + +User requirement: +{requirement} + +Candidate tools (shortlisted by retrieval): +{catalog} + +Rules: + - Pick exactly ONE tool (the final-goal tool). Do not plan the chain — + the downstream system will build prerequisite steps automatically. + - entities: extract values from the requirement and key them by semantic + id when known (e.g. "search_keyword", "product_id", "site_id"). + For free-text user inputs, prefer "search_keyword". + - output_shape: "single" for one-item answers, "list" for multiple, + "count" for aggregates. + - confidence: your certainty in the tool pick, 0.0~1.0. + - reasoning: one short sentence, for audit logs. + +Output JSON only — no markdown, no prose. Schema: +{{ + "target": "", + "entities": {{...}}, + "confidence": 0.0, + "output_shape": "single" | "list" | "count", + "reasoning": "..." +}} +""" + + +def _format_catalog(entries: list[ToolCatalogEntry]) -> str: + lines: list[str] = [] + for i, e in enumerate(entries, start=1): + parts = [f"{i}. {e.name}"] + if e.canonical_action or e.primary_resource: + parts.append(f"[{e.canonical_action}/{e.primary_resource}]".strip("[/]")) + if e.summary: + parts.append(f"— {e.summary}") + lines.append(" ".join(p for p in parts if p)) + if e.when_to_use: + lines.append(f" when: {e.when_to_use[:140]}") + if e.consumes_tags: + lines.append(f" needs: {', '.join(e.consumes_tags[:6])}") + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# public API +# --------------------------------------------------------------------------- + + +def parse_intent( + requirement: str, + catalog: list[ToolCatalogEntry], + llm: OntologyLLM, +) -> ParsedIntent: + """Call the LLM once to produce a ParsedIntent. + + ``catalog`` should be the retrieval-shortlisted candidate tools (keep + small — ~10 entries — to control prompt size). ``llm`` is any + OntologyLLM-compatible provider. + """ + if not catalog: + raise IntentParseError("empty catalog — cannot pick a target") + + prompt = _INTENT_PROMPT.format( + requirement=requirement.strip(), + catalog=_format_catalog(catalog), + ) + raw = llm.generate(prompt) + + try: + parsed = _extract_json(raw) + except json.JSONDecodeError as exc: + raise IntentParseError(f"LLM output not parseable JSON: {exc}") from exc + + if not isinstance(parsed, dict): + raise IntentParseError(f"expected JSON object, got {type(parsed).__name__}") + + target = str(parsed.get("target") or "").strip() + if not target: + raise IntentParseError("target missing from LLM output") + + # Validate target is in the catalog — guard against hallucinated names + allowed = {e.name for e in catalog} + if target not in allowed: + raise IntentParseError( + f"target {target!r} not in catalog (candidates: {sorted(allowed)[:5]!r}...)" + ) + + entities_raw = parsed.get("entities") + entities = entities_raw if isinstance(entities_raw, dict) else {} + + try: + confidence = float(parsed.get("confidence") or 0.0) + except (TypeError, ValueError): + confidence = 0.0 + confidence = max(0.0, min(1.0, confidence)) + + shape = str(parsed.get("output_shape") or "single").strip().lower() + if shape not in ("single", "list", "count"): + shape = "single" + + return ParsedIntent( + target=target, + entities=entities, + confidence=confidence, + output_shape=shape, + reasoning=str(parsed.get("reasoning") or "").strip(), + ) + + +__all__ = [ + "ToolCatalogEntry", + "ParsedIntent", + "IntentParseError", + "parse_intent", +] diff --git a/graph_tool_call/plan/response.py b/graph_tool_call/plan/response.py new file mode 100644 index 0000000..782ca1f --- /dev/null +++ b/graph_tool_call/plan/response.py @@ -0,0 +1,125 @@ +"""Stage 4 — Response Synthesizer. + +ExecutionTrace 를 사용자 친화적 자연어 응답으로 변환한다. LLM 1회 호출, +context 는 execution 결과 요약 + 원본 요구사항. + +성공 / 실패 두 경우 모두 다룸: + - 성공: plan.output (final step body) + 요구사항 → 답변 + - 실패: failed_step + error + 부분 결과 → 무엇이 됐고 무엇이 안 됐는지 + +실행 결과가 대형 JSON 일 수 있으므로 호출자가 미리 projection / 압축한 후 +넘기는 것을 권장 (본 모듈은 단순히 ``str(output)`` 사용). +""" + +from __future__ import annotations + +import json +from typing import Any + +from graph_tool_call.ontology.llm_provider import OntologyLLM + + +# --------------------------------------------------------------------------- +# prompts +# --------------------------------------------------------------------------- + + +_SUCCESS_PROMPT = """\ +You turn API execution results into a natural answer for the user. + +User asked: +{requirement} + +Execution result (from the last step): +{result} + +Respond in Korean unless the user's question is clearly in another language. +Keep it concise — 1~3 sentences for simple answers, short bullet list for +multi-item results. Do not invent data not present in the result. +""" + + +_FAILURE_PROMPT = """\ +You explain an API execution failure to the user. + +User asked: +{requirement} + +Plan aborted at step {failed_step!r}. +Error: {error} + +Partial results collected before the failure: +{partial} + +Tell the user clearly in Korean (unless the question is another language): + - what they asked for + - what was attempted + - where and why it failed (in plain language — do not dump stack traces) + - what they can try next, if obvious +Keep it short and helpful — 2~4 sentences. +""" + + +# --------------------------------------------------------------------------- +# public API +# --------------------------------------------------------------------------- + + +def synthesize_success_response( + *, + requirement: str, + result: Any, + llm: OntologyLLM, + result_char_limit: int = 2000, +) -> str: + """Success case — plan completed, convert output to NL answer.""" + prompt = _SUCCESS_PROMPT.format( + requirement=requirement.strip(), + result=_render(result, result_char_limit), + ) + return llm.generate(prompt).strip() + + +def synthesize_failure_response( + *, + requirement: str, + failed_step: str, + error: Any, + partial_results: Any = None, + llm: OntologyLLM, + partial_char_limit: int = 1000, +) -> str: + """Failure case — plan aborted, explain to user.""" + prompt = _FAILURE_PROMPT.format( + requirement=requirement.strip(), + failed_step=failed_step, + error=_render(error, 300), + partial=_render(partial_results, partial_char_limit) if partial_results else "(none)", + ) + return llm.generate(prompt).strip() + + +# --------------------------------------------------------------------------- +# helpers +# --------------------------------------------------------------------------- + + +def _render(value: Any, char_limit: int) -> str: + """Serialize *value* to a short string for prompt use.""" + if value is None: + return "(none)" + if isinstance(value, str): + return value[:char_limit] + ("…" if len(value) > char_limit else "") + try: + text = json.dumps(value, ensure_ascii=False, indent=2) + except (TypeError, ValueError): + text = str(value) + if len(text) <= char_limit: + return text + return text[:char_limit] + "…" + + +__all__ = [ + "synthesize_success_response", + "synthesize_failure_response", +] From f7e6b42418093d77997b3511e83f7749ba62730d Mon Sep 17 00:00:00 2001 From: daehee Date: Tue, 28 Apr 2026 18:17:03 +0900 Subject: [PATCH 05/14] feat(ontology): add `kind` (data|context) to consume FieldSemantic + enrichment prompt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PathSynthesizer 가 chain 결정 시 ``kind=data`` (비즈니스 값, producer chain 가능) 와 ``kind=context`` (ambient config, chain 거부 — entity 또는 collection default 만 사용) 를 분리해야 무관 chain (예: locale/siteNo 의 producer 까지 끌어오기) 을 막을 수 있다. enrichment prompt 에 두 분류 가이드를 명시해 LLM 이 새 도구의 consume 을 정확히 분류하게 한다. Co-Authored-By: Claude Opus 4.7 (1M context) --- graph_tool_call/ontology/llm_provider.py | 44 +++++++++++++++++++----- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/graph_tool_call/ontology/llm_provider.py b/graph_tool_call/ontology/llm_provider.py index a34786d..76e26bc 100644 --- a/graph_tool_call/ontology/llm_provider.py +++ b/graph_tool_call/ontology/llm_provider.py @@ -46,11 +46,24 @@ class FieldSemantic: Used on both produces (what a tool outputs) and consumes (what it requires). ``json_path`` is set on produces; ``field`` is set on consumes. + + ``kind`` (consumes only) distinguishes two roles: + - ``"data"`` — true data dependency (e.g. a business identifier + needed to address the operation). PathSynthesizer + will chain to a producer for this field. + - ``"context"`` — ambient config (locale, site, pagination). Must be + supplied as an entity or collection default; the + synthesizer will NOT build a prerequisite chain + just to fetch it. + + The default ``"data"`` matches pre-kind behavior (safe for tools whose + enrichment predates this schema change). """ semantic: str json_path: str = "" field: str = "" + kind: str = "data" @dataclass @@ -199,9 +212,19 @@ class ToolEnrichment: * Use CONSISTENT semantic ids across tools. If two tools both return a product identifier (one calls it "goodsNo", another "productId"), use the same semantic like "product_id". - - consumes_semantics: array of {{"semantic": "canonical_id", "field": "paramName"}} + - consumes_semantics: array of {{"semantic": "canonical_id", + "field": "paramName", + "kind": "data" | "context"}} * REQUIRED inputs only. Skip optional filters, pagination. * Same semantic id conventions as produces. + * kind="data" — business-data dependency: an identifier or value that + addresses a specific record (e.g. product_id, order_id, user_id, + search_keyword). A prior step in a plan normally produces it. + * kind="context" — ambient/environmental config shared across the + workflow (locale, site_no, tenant, pagination cursors, flag switches). + The user or the caller supplies it as a default — it is NOT produced + by a prior step. Use this for anything a plain UI user would set + once per session, not per request. - pairs_well_with: array of {{"tool": "tool_name_from_available_list", "reason": "brief reason"}} * 2-4 tools that typically precede or follow this tool. @@ -271,14 +294,19 @@ def _parse_enrichment(data: Any) -> ToolEnrichment | None: for p in (data.get("produces_semantics") or []) if isinstance(p, dict) and str(p.get("semantic", "")).strip() ] - consumes = [ - FieldSemantic( - semantic=str(c.get("semantic", "")).strip(), - field=str(c.get("field", "")).strip(), + consumes = [] + for c in (data.get("consumes_semantics") or []): + if not (isinstance(c, dict) and str(c.get("semantic", "")).strip()): + continue + raw_kind = str(c.get("kind", "data")).strip().lower() + kind = raw_kind if raw_kind in ("data", "context") else "data" + consumes.append( + FieldSemantic( + semantic=str(c.get("semantic", "")).strip(), + field=str(c.get("field", "")).strip(), + kind=kind, + ) ) - for c in (data.get("consumes_semantics") or []) - if isinstance(c, dict) and str(c.get("semantic", "")).strip() - ] pairs = [ PairHint( tool=str(p.get("tool", "")).strip(), From 37927e0c4b4393d8a52da8a45705970de10293e3 Mon Sep 17 00:00:00 2001 From: daehee Date: Tue, 28 Apr 2026 18:22:30 +0900 Subject: [PATCH 06/14] feat(plan/intent): vocab validation, multi-turn seed, enum mappings, hard constraints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stage 1 (parse_intent) 의 4가지 robustness 보강: * Vocabulary fuzzy validation — LLM 이 ``search_keyword`` 대신 ``search_keyword_name`` 같은 인접 표현을 만들면 ratio≥0.8 로 coerce, 그 외엔 drop. 잘못된 entity key 는 downstream cycle / unsatisfied field 로 이어져 silent 한 잘못된 plan 을 만든다. * Multi-turn ``seed_entities`` 인자 — popup-driven 흐름에서 직전 turn 의 결정값을 carry forward. prompt 에 명시 + 코드 안전망 (LLM 이 seed 무시 시에도 합쳐 줌). 새 turn 의 명시 entity 는 같은 키의 seed 를 override. * ``enum_mappings`` prompt section — 운영자가 등록한 ``{field: {code: label}}`` 를 catalog scope 안에서만 노출 (전체 노출 시 prompt 폭주). HC5 으로 enum field 는 코드 (left side) 만 entity 값으로 허용. * Hard constraint 강화 — ``do not force-fit`` 같은 약한 부정 대신 ``DO NOT`` 4개 (HC1-HC4) 명시: identifier 필드에 자연어 phrase 금지, vocab 외 키 발명 금지, 동일 값 여러 필드 금지, 값 변환 금지. --- graph_tool_call/plan/intent.py | 208 ++++++++++++++++++++++++++++++--- 1 file changed, 194 insertions(+), 14 deletions(-) diff --git a/graph_tool_call/plan/intent.py b/graph_tool_call/plan/intent.py index 618d0d8..74dd8b8 100644 --- a/graph_tool_call/plan/intent.py +++ b/graph_tool_call/plan/intent.py @@ -15,6 +15,7 @@ from __future__ import annotations +import difflib import json from dataclasses import dataclass, field from typing import Any @@ -22,6 +23,13 @@ from graph_tool_call.ontology.llm_provider import OntologyLLM, _extract_json +# Minimum SequenceMatcher ratio for treating an LLM-emitted entity key as +# a typo/expansion of a real vocab entry. 0.8 catches "search_keyword_name" +# vs "search_keyword" (~0.85) while rejecting unrelated pairs like +# "search_keyword" vs "search_query" (~0.54). +_VOCAB_FUZZY_CUTOFF = 0.8 + + # --------------------------------------------------------------------------- # data shape # --------------------------------------------------------------------------- @@ -65,19 +73,48 @@ class IntentParseError(Exception): User requirement: {requirement} -Candidate tools (shortlisted by retrieval): +Candidate tools (shortlisted by retrieval — includes the target's +prerequisite producers so every key you need should appear in some +tool's "needs:" line below): {catalog} - -Rules: - - Pick exactly ONE tool (the final-goal tool). Do not plan the chain — - the downstream system will build prerequisite steps automatically. - - entities: extract values from the requirement and key them by semantic - id when known (e.g. "search_keyword", "product_id", "site_id"). - For free-text user inputs, prefer "search_keyword". - - output_shape: "single" for one-item answers, "list" for multiple, - "count" for aggregates. - - confidence: your certainty in the tool pick, 0.0~1.0. - - reasoning: one short sentence, for audit logs. +{vocabulary_block}{enum_block}{seed_block} +HARD CONSTRAINTS — violating any of these is a planning error, not a +stylistic choice. Re-check the constraints before you emit JSON. + + HC1. DO NOT put a value into an identifier-style field (a field name + ending in "No" / "Id" / "Idx" / "Code" / "id") if the value + contains spaces, Korean/Chinese/Japanese letters, or category + words ("티셔츠", "신발", "shoes", a brand or model name). + Identifier fields accept short alphanumeric record locators + only ("G12345", "10293"). A descriptive phrase placed in such + a field is always wrong. + HC2. DO NOT invent field names. Every entity key MUST appear in one + of the candidate tools' "needs:" lines. If no listed field can + carry the user's value without violating HC1, omit the entity — + empty entities are fine; the downstream synthesizer chains + through a producer. + HC3. DO NOT put the same value into more than one field. Each value + goes into zero or exactly one field. + HC4. DO NOT translate, normalize, paraphrase, or expand the user's + value. Copy it byte-for-byte as written in the requirement. + HC5. For fields that have an enum mapping below, the entity value + MUST be one of the listed CODES (left side), never the label + (right side) and never the user's original phrase. Pick the + code whose label best matches the user's intent. If nothing + matches clearly, omit that entity. + +Selection guidance (apply only after the constraints hold): + - Pick exactly ONE tool — the final-goal tool. Do not plan the chain; + the downstream system builds prerequisite steps automatically. + - Free-text values (descriptive phrases like "quarzen 티셔츠", + "black hoodie") match fields named "searchWord", "query", + "keyword", or names ending in "Nm" / "Name". + - When several fields could carry the value without violating HC1, + prefer one a candidate's "needs:" line lists — that is a field a + tool you already considered actually accepts. + - output_shape: "single" / "list" / "count". + - confidence: 0.0~1.0 — your certainty in the tool pick. + - reasoning: one short sentence for audit logs. Output JSON only — no markdown, no prose. Schema: {{ @@ -90,6 +127,107 @@ class IntentParseError(Exception): """ +def _coerce_entity_keys( + entities: dict[str, Any], + vocab: list[str], +) -> dict[str, Any]: + """Map LLM-emitted entity keys onto the vocabulary. + + Exact match → kept. Close match above ``_VOCAB_FUZZY_CUTOFF`` → coerced + to the canonical vocab entry. Otherwise the entry is dropped — silently + passing an invented key downstream causes producer-chain failures or + cycle detection (the vocab miss is the failure, not the symptom). + """ + vocab_set = set(vocab) + out: dict[str, Any] = {} + for key, value in entities.items(): + key_str = str(key) + if key_str in vocab_set: + out[key_str] = value + continue + match = difflib.get_close_matches( + key_str, vocab, n=1, cutoff=_VOCAB_FUZZY_CUTOFF, + ) + if match: + # If multiple LLM keys collapse onto the same vocab entry, the + # later one wins. Acceptable: same canonical key with two + # values is already a degenerate LLM output. + out[match[0]] = value + return out + + +def _format_seed_block(seed_entities: dict[str, Any] | None) -> str: + """Render a 'carry forward' section for entities the caller already + decided in a previous turn. + + Multi-turn flow: when a previous synthesize attempt asked the user to + pick a value (e.g. via a popup of enum options), the chosen pairs are + fed back as ``seed_entities``. The LLM should keep them as-is unless + the new requirement explicitly contradicts a value, and only EXTRACT + NEW entities to add. Empty / None ⇒ section omitted. + """ + if not seed_entities: + return "" + lines = "\n".join( + f' - {k}: {json.dumps(v, ensure_ascii=False)}' + for k, v in seed_entities.items() + ) + return ( + "\n\nExisting entities (carried over from prior turns — keep these " + "values exactly unless the user's new requirement explicitly " + "overrides one. You only need to extract additional entities that " + "the new requirement introduces):\n" + f"{lines}" + ) + + +def _format_enum_block(enum_mappings: dict[str, dict[str, str]] | None) -> str: + """Render the optional enum-mapping section of the prompt. + + ``enum_mappings`` shape: ``{field_name: {code: label}}`` — operator- + registered code lookups for backend enum fields whose values aren't + in the swagger schema (e.g. "10" -> "비회원" for a basket type code). + The LLM picks the code whose label matches the user's natural-language + intent. Empty / None ⇒ section omitted entirely. + """ + if not enum_mappings: + return "" + lines: list[str] = [] + for field, codes in enum_mappings.items(): + if not isinstance(codes, dict) or not codes: + continue + lines.append(f" - {field}:") + for code, label in codes.items(): + lines.append(f' "{code}" → {label}') + if not lines: + return "" + body = "\n".join(lines) + return ( + "\n\nEnum code mappings (operator-registered — when one of these " + "fields needs a value, pick the CODE whose label matches the " + "user's intent):\n" + f"{body}" + ) + + +def _format_vocabulary_block(tags: list[str]) -> str: + """Render the optional vocabulary section of the prompt. + + Returns an empty string when no vocab is provided so the prompt + stays focused on ``catalog``. Callers that want LLM access to + field names beyond the catalog (e.g. when retrieval failed to pull + in producers) can pass a non-empty list. + """ + if not tags: + return "" + lines = "\n".join(f" - {t}" for t in tags) + return ( + "\n\nAvailable entity field names — backup vocabulary used only when " + "no candidate tool's \"needs:\" line carries the user's value:\n" + f"{lines}" + ) + + def _format_catalog(entries: list[ToolCatalogEntry]) -> str: lines: list[str] = [] for i, e in enumerate(entries, start=1): @@ -115,19 +253,46 @@ def parse_intent( requirement: str, catalog: list[ToolCatalogEntry], llm: OntologyLLM, + *, + vocabulary: list[str] | None = None, + enum_mappings: dict[str, dict[str, str]] | None = None, + seed_entities: dict[str, Any] | None = None, ) -> ParsedIntent: """Call the LLM once to produce a ParsedIntent. ``catalog`` should be the retrieval-shortlisted candidate tools (keep - small — ~10 entries — to control prompt size). ``llm`` is any - OntologyLLM-compatible provider. + small — ~10 entries — to control prompt size). ``vocabulary`` is the + full set of ``kind=data`` semantic ids in the graph (so the LLM can + map free-text inputs to a search-style key even when the matching + producer wasn't retrieved). ``enum_mappings`` is operator-registered + ``{field_name: {code: label}}`` lookups for backend enum fields whose + values aren't in the swagger schema — exposed only when relevant + (caller should pre-filter to the catalog's consumes fields). + ``seed_entities`` carries entities decided in earlier turns of a + multi-turn flow (e.g. user clicked an option in a popup); the LLM + keeps them and only extracts additional ones from the new + ``requirement``. ``llm`` is any OntologyLLM-compatible provider. """ if not catalog: raise IntentParseError("empty catalog — cannot pick a target") + vocab = vocabulary or [] + if not vocab: + # Fallback: derive from catalog. Same-domain narrowing only — + # callers that supply the full graph vocab get better accuracy. + seen: set[str] = set() + for e in catalog: + for tag in e.consumes_tags: + if tag and tag not in seen: + seen.add(tag) + vocab.append(tag) + prompt = _INTENT_PROMPT.format( requirement=requirement.strip(), catalog=_format_catalog(catalog), + vocabulary_block=_format_vocabulary_block(vocab), + enum_block=_format_enum_block(enum_mappings), + seed_block=_format_seed_block(seed_entities), ) raw = llm.generate(prompt) @@ -153,6 +318,21 @@ def parse_intent( entities_raw = parsed.get("entities") entities = entities_raw if isinstance(entities_raw, dict) else {} + # Validate entity keys against the vocabulary. The LLM regularly emits + # a slightly-elaborated key ("search_keyword_name" instead of + # "search_keyword") that nothing downstream can match — coerce the + # close ones, drop the rest. A wrong key triggers worse downstream + # behavior than no key. + if vocab and entities: + entities = _coerce_entity_keys(entities, vocab) + + # Multi-turn safety net: even if the LLM ignored the carry-forward + # instructions, prior-turn entities must persist. New entities from + # this turn override on conflict (later turn wins for explicit + # contradictions in the requirement). + if seed_entities: + entities = {**seed_entities, **entities} + try: confidence = float(parsed.get("confidence") or 0.0) except (TypeError, ValueError): From 9b14a7a1aa7f50ec4e6d56cf66ee81a71f22e94a Mon Sep 17 00:00:00 2001 From: daehee Date: Tue, 28 Apr 2026 18:22:55 +0900 Subject: [PATCH 07/14] feat(plan/runner): wrapper-agnostic envelope unwrap + response_root_keys hint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PlanRunner 가 step 응답을 context 에 저장하기 전에 한 단계 envelope 을 peel 하는 휴리스틱 추가. 흔한 backend 패턴 ``{code, message, payload: {...}, timestamp}`` 에서 swagger 가 envelope 을 안 적었을 때, downstream binding ``${s1.searchDataList[*].goodsNo}`` 가 ``payload`` 안의 데이터로 자연스럽게 풀리게 한다. 조건 (5가지 모두 충족 시에만 unwrap): 1. response 가 dict, root key 2개 이상 2. 정확히 1개의 dict-typed root value (wrapper 후보) 3. 나머지 root value 모두 scalar / null 4. expected_root_keys (= produces[].json_path 의 first segment) 가 response root 에 하나도 없음 5. wrapper 안에 expected_root_keys 중 하나라도 존재 조건이 strict 해서 false unwrap 은 거의 없음. wrapper 이름은 안 봄 (``payload``/``data``/``result`` 모두 동일하게 동작 — backend fit 아님). PlanStep 에 ``response_root_keys: list[str]`` 필드 추가 — 합성 시점에 synthesizer 가 채워두면 runtime 에 unwrap detect 가 schema 비교 가능. 채워지지 않으면 unwrap skip (안전 default). --- graph_tool_call/plan/runner.py | 57 ++++++++++++++++++++++++++++++++++ graph_tool_call/plan/schema.py | 6 ++++ 2 files changed, 63 insertions(+) diff --git a/graph_tool_call/plan/runner.py b/graph_tool_call/plan/runner.py index 8b9fa27..141d500 100644 --- a/graph_tool_call/plan/runner.py +++ b/graph_tool_call/plan/runner.py @@ -220,6 +220,13 @@ def run_stream( ) return + # 2a. Unwrap a single-level envelope when the response shape + # diverges from the schema in the canonical "{code, message, + # : {...}, timestamp}" pattern. One detect per step, + # not per binding — every binding for this step then resolves + # against the unwrapped dict naturally. + output = _maybe_unwrap_envelope(output, step.response_root_keys) + step_trace.output = output step_trace.duration_ms = _ms_since(step_start) trace_steps.append(step_trace) @@ -333,6 +340,56 @@ def _preview(value: Any, limit: int) -> Any: return value +def _maybe_unwrap_envelope( + output: Any, + expected_root_keys: list[str], +) -> Any: + """Peel one envelope layer when the response shape diverges from schema. + + Conservative — unwraps only when ALL of these hold: + + 1. ``output`` is a dict with two or more root keys + (a bare ``{"payload": ...}`` is more likely real data than envelope). + 2. Exactly one root value is itself a dict — the wrapper candidate. + 3. Every other root value is scalar / null + (envelope siblings are status/code/message/timestamp — not + business collections). + 4. None of ``expected_root_keys`` appears at the response root + (otherwise the response is already in schema-shape). + 5. At least one ``expected_root_keys`` entry appears inside the + wrapper candidate (otherwise the dict-typed sibling is unrelated + business data — unwrapping would lose information). + + The wrapper *key name* is never inspected, so this works for + ``payload`` / ``data`` / ``result`` / any other convention. Without + ``expected_root_keys`` there's no schema signal to validate against, + so the output passes through unchanged. + """ + if not expected_root_keys or not isinstance(output, dict) or len(output) < 2: + return output + + dict_keys = [k for k, v in output.items() if isinstance(v, dict)] + if len(dict_keys) != 1: + return output + + wrapper_key = dict_keys[0] + for k, v in output.items(): + if k == wrapper_key: + continue + if isinstance(v, (dict, list)): + return output + + expected = set(expected_root_keys) + if expected & set(output.keys()): + return output + + wrapper = output[wrapper_key] + if not (expected & set(wrapper.keys())): + return output + + return wrapper + + def _output_size(value: Any) -> int: """Approximate serialized byte size (for observability).""" import json as _json diff --git a/graph_tool_call/plan/schema.py b/graph_tool_call/plan/schema.py index 8a18577..b07530f 100644 --- a/graph_tool_call/plan/schema.py +++ b/graph_tool_call/plan/schema.py @@ -31,6 +31,12 @@ class PlanStep: rationale: str = "" # why this step exists (for audit) timeout_ms: int | None = None retryable: bool = False # reserved for v1.1 retry policy + # Top-level keys the synthesizer expects in this tool's response, + # derived from ``produces[].json_path``. Used by PlanRunner to detect + # envelope wrappers (e.g. ``{code, message, payload: {...}}``) when the + # ingest captured the wrapped fields without the wrapper itself. Empty + # list means "no hint" — the runner then leaves the response untouched. + response_root_keys: list[str] = field(default_factory=list) @dataclass From 2e679137ebb7e62d93bdc889058f0179a93d1bc9 Mon Sep 17 00:00:00 2001 From: daehee Date: Tue, 28 Apr 2026 18:23:29 +0900 Subject: [PATCH 08/14] feat(plan/synthesizer): context_defaults, chain eligibility, dynamic-option, enum popup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PathSynthesizer 가 ``produces`` 매칭만으로 chain 을 결정하던 흐름에 의미 신호를 단계적으로 도입. 핵심 변화 4가지: * ``context_defaults`` 인자 — collection-level ambient values (locale/site/tenant) 운영자가 한 번 등록 → ``kind=context`` 필드 자동 채움. swagger 가 optional 로 표시했지만 backend 가 사실상 필수로 요구하는 환경값을 cover. * Chain eligibility filter — producer 가 ``canonical_action ∈ {search, read}`` 이고 ``primary_resource`` 가 target 의 도메인 (target.primary_resource + consumes semantic prefix) 안일 때만 chain. ``produces`` 매칭으로 무관 도구 (예: claim_cost calculator) 가 끌려오는 false positive 방지. ai_metadata 부분 enriched / 미 enriched 케이스는 fallback 통과해 graph quality 가 sparse 한 collection 도 회귀 없이 동작. * Dynamic option detection — required-data field 의 producer 가 single-hop 으로 호출 가능 (모든 input 이 entity / context_defaults 로 채워짐) + canonical_action='read' + json_path 가 array 면 ``DynamicOptionRequired`` (UnsatisfiableField 의 subclass) 를 던져 호출자가 producer 를 부분 실행해 옵션 list 를 사용자한테 popup. chain 으로 임의로 [0] 인덱스 박는 패턴을 막고 사용자 의도 우선. * ``enum_field_names`` — 운영자가 enum 매핑 등록한 field 는 chain 안 만들고 즉시 ``UnsatisfiableField`` raise (popup 으로 풀어야). enum 코드 같은 환경 값을 chain 으로 풀 때 끌려오는 무관 도구 (응답에 같은 코드명 우연히 포함된 도구) 차단. 부수: ``response_root_keys`` 자동 채움 (PlanRunner 의 envelope unwrap 힌트) + producer ranking 정책 docstring 정리. --- graph_tool_call/plan/__init__.py | 2 + graph_tool_call/plan/synthesizer.py | 451 +++++++++++++++++++++++++++- 2 files changed, 438 insertions(+), 15 deletions(-) diff --git a/graph_tool_call/plan/__init__.py b/graph_tool_call/plan/__init__.py index d9e5e1e..8f2d9eb 100644 --- a/graph_tool_call/plan/__init__.py +++ b/graph_tool_call/plan/__init__.py @@ -56,6 +56,7 @@ def call_tool(tool_name, args): UnsatisfiableFieldError, CyclicDependencyError, MaxDepthExceededError, + DynamicOptionRequired, ) __all__ = [ @@ -82,6 +83,7 @@ def call_tool(tool_name, args): "UnsatisfiableFieldError", "CyclicDependencyError", "MaxDepthExceededError", + "DynamicOptionRequired", # intent "ToolCatalogEntry", "ParsedIntent", diff --git a/graph_tool_call/plan/synthesizer.py b/graph_tool_call/plan/synthesizer.py index 33be4dd..44696cf 100644 --- a/graph_tool_call/plan/synthesizer.py +++ b/graph_tool_call/plan/synthesizer.py @@ -9,8 +9,6 @@ v1 scope (per design §16.6): - Linear chain only — no fan-out, no parallel, no branching. - - If multiple producers exist for a required field, the first one is - picked (simple, predictable). Ambiguity handling is Phase D+. - Max recursion depth = 5 (guard against cyclic or pathological graphs). Matching order for each required consume field: @@ -19,6 +17,19 @@ (Pass 2 LLM enrichment quality). 3. Another tool's ``produces`` with the same ``field_name`` (Pass 1 deterministic extraction, fallback). + +Producer selection is ranked by Pass 2 metadata signals — no hardcoded +domain or field rules: + - Entity affinity: producer consumes an entity the user supplied, + so chaining through it actually uses that entity. + - Pair hint: target's ``pairs_well_with`` includes this producer. + - Action preference: ``canonical_action`` = search/read fits a + prerequisite role better than create/update/delete. + +``consumes[].kind`` ("data" | "context", set by Pass 2): + - "data" — chain to a producer if entity doesn't match. + - "context" — ambient config (locale, site, tenant). Never chained; + must come from entity or skipped (runtime uses API default). """ from __future__ import annotations @@ -47,6 +58,38 @@ class MaxDepthExceededError(PlanSynthesisError): """Recursion depth exceeded — likely a misshapen graph.""" +class DynamicOptionRequired(UnsatisfiableFieldError): + """A required data field has a single-hop producer that can be called + immediately with the user's entities + context_defaults. Surface this + so the caller can fetch the option list (instead of weaving a chain) + and ask the user to pick — the popup-driven UX for fields like + ``itmNo`` (single-품목 option) where the choices are dynamic per + request. + + The exception carries enough metadata for the caller to: + * know which producer to call (``producer_name``) + * find the option array in the producer's response (``options_path``) + * pick a sensible label field next to each code (``label_field_hints``) + """ + + def __init__( + self, + message: str, + *, + field_name: str, + semantic_tag: str, + producer_name: str, + options_path: str, + label_field_hints: list[str], + ) -> None: + super().__init__(message) + self.field_name = field_name + self.semantic_tag = semantic_tag + self.producer_name = producer_name + self.options_path = options_path + self.label_field_hints = list(label_field_hints) + + @dataclass class _PartialStep: """In-progress step being built during bottom-up synthesis.""" @@ -74,9 +117,26 @@ def __init__( graph: dict[str, Any], *, max_depth: int = 5, + context_defaults: dict[str, Any] | None = None, + enum_field_names: set[str] | None = None, ) -> None: self._tools: dict[str, dict[str, Any]] = dict(graph.get("tools") or {}) self._max_depth = max_depth + # Collection-level ambient values (locale, tenant id, site id, ...) the + # operator registers once per collection. Filled into ``kind=context`` + # consume fields when the user's entities don't supply them — avoids + # repeating env-style args in every requirement and avoids leaking + # backend-specific defaults into library code. Lookup precedence: + # entities > context_defaults > skip. + self._context_defaults: dict[str, Any] = dict(context_defaults or {}) + # Field names the operator registered an enum mapping for. When a + # required-data field of this kind can't be filled by an entity, + # the synthesizer raises UnsatisfiableFieldError instead of + # producer-chaining — the caller (service layer) is expected to + # surface a popup to the user rather than weaving an awkward + # producer chain that pulls in unrelated tools just to source a + # code value. User intent (popup choice) wins over chain depth. + self._enum_field_names: set[str] = set(enum_field_names or ()) # semantic_tag -> [tool_name], insertion order preserved self._producers_by_semantic: dict[str, list[str]] = {} self._producers_by_field: dict[str, list[str]] = {} @@ -133,6 +193,7 @@ def synthesize( tool=partial.tool, args=args, rationale=partial.rationale, + response_root_keys=self._response_root_keys(tool_name), )) target_step_id = steps_by_tool[target].step_id @@ -140,7 +201,10 @@ def synthesize( id=str(uuid.uuid4()), goal=goal or f"Execute {target}", steps=final_steps, - output_binding=f"${{{target_step_id}.body}}", + # PlanRunner adapter 는 step ctx 에 응답 body 를 root 로 노출 → + # ``${sN}`` 만으로 전체 응답 dict 가 잡힌다 (과거 ``${sN.body}`` 는 + # adapter 가 ``{status, body}`` 을 그대로 흘릴 때의 흔적). + output_binding=f"${{{target_step_id}}}", created_at=datetime.now(timezone.utc).isoformat(), metadata={ "target": target, @@ -187,22 +251,55 @@ def _resolve( rationales: list[str] = [] for consume in consumes: - if not consume.get("required"): - continue - field_name = consume.get("field_name") or "" semantic = consume.get("semantic_tag") or "" + kind = str(consume.get("kind") or "data").strip().lower() + is_required = bool(consume.get("required")) - # 1. Entity match (user-supplied) + # 1. Entity match (user-supplied) — applies to both data and + # context, both required and optional. The user's input + # always wins. entity_val = self._match_entity(entities, semantic, field_name) if entity_val is not None: args[field_name] = entity_val continue - # 2/3. Find a producer (semantic first, then field_name) + # 2. Context-kind: try collection-level defaults regardless of + # required flag. Context is never chained — ambient config + # must come from entity or operator-registered default + # (chaining through e.g. getSiteInfo would inflate the plan + # with steps that don't produce business value). + if kind == "context": + default = self._lookup_context_default(semantic, field_name) + if default is not None: + args[field_name] = default + continue + + # 3. Optional data field: leave out. The caller's backend will + # apply its own defaults — synthesizer has no business + # inventing values for optional business inputs. + if not is_required: + continue + + # 4. Enum-field popup priority. If the operator registered an + # enum mapping for this field, it's the kind of value the + # user should pick from a popup — NOT something to chain + # through a producer (which often drags in semantically + # unrelated tools just because their response happens to + # contain a code by the same name). Surface + # UnsatisfiableFieldError so the caller can yield a + # question.required event instead. + if field_name in self._enum_field_names: + raise UnsatisfiableFieldError( + f"tool {tool_name!r} requires {field_name!r} " + f"(semantic={semantic!r}) — enum field, expects user " + f"selection (no producer chain attempted)" + ) + + # 5. Required data field → rank candidate producers and pick the best. producer = self._find_producer( semantic=semantic, field_name=field_name, - exclude=tool_name, + target_tool=tool_name, entities=entities, ) if producer is None: raise UnsatisfiableFieldError( @@ -210,6 +307,41 @@ def _resolve( f"(semantic={semantic!r}) but no entity or producer found" ) + # 5a. Dynamic-option popup priority. Detect "read-detail then + # pick one" patterns where the producer is a single-hop + # read of a product/record whose response carries a + # list of options the user must choose from (e.g. + # ``getProductInfo`` exposes ``$.itmInfo[*].itmNo`` — + # the available SKUs). In that case, defer to the caller + # to fetch options and pop up a question, instead of + # chaining the producer in and binding ``[0]`` blindly. + # + # Constrained to ``canonical_action='read'`` because + # ``search`` producers (e.g. seltSearchProduct → goodsNo) + # are exactly the chain idiom we DO want — pick the first + # hit and continue. Without this constraint legitimate + # search→detail chains turn into popups. + producer_action = self._producer_action(producer) + if ( + producer_action == "read" + and self._is_producer_simple_callable(producer, entities) + ): + opt_path = self._produces_path_for( + producer, semantic=semantic, field_name=field_name, + ) + if opt_path and "[*]" in opt_path: + raise DynamicOptionRequired( + f"tool {tool_name!r} requires {field_name!r} " + f"(semantic={semantic!r}) — dynamic option from " + f"{producer!r}; caller should fetch options and " + f"prompt the user", + field_name=field_name, + semantic_tag=semantic, + producer_name=producer, + options_path=opt_path, + label_field_hints=self._label_hints_for(producer, opt_path), + ) + # Recurse into the producer first so step_id ordering is correct self._resolve( tool_name=producer, @@ -254,19 +386,269 @@ def _find_producer( *, semantic: str, field_name: str, - exclude: str, + target_tool: str, + entities: dict[str, Any], ) -> str | None: - """Pick the first producer matching semantic, falling back to field name.""" + """Pick the best-ranked producer for ``semantic`` (or ``field_name``). + + Candidates are gathered from both indexes (semantic first), then + ranked using Pass 2 metadata (``_rank_producers``) and finally + filtered by ``_is_chain_eligible`` — discards producers whose + ``canonical_action`` / ``primary_resource`` signal they're + unrelated to the target's domain (e.g. claim-cost calculator + showing up as a producer for a basket field just because a + ``produces`` entry happens to match). + """ + candidates: list[str] = [] + seen: set[str] = set() if semantic: for name in self._producers_by_semantic.get(semantic, []): - if name != exclude: - return name + if name != target_tool and name not in seen: + candidates.append(name) + seen.add(name) if field_name: for name in self._producers_by_field.get(field_name, []): - if name != exclude: - return name + if name != target_tool and name not in seen: + candidates.append(name) + seen.add(name) + if not candidates: + return None + + ranked = self._rank_producers( + candidates, target_tool=target_tool, entities=entities, + ) + for cand in ranked: + if self._is_chain_eligible(cand, target_tool=target_tool): + return cand return None + def _producer_action(self, producer_name: str) -> str: + """Return the producer's ``ai_metadata.canonical_action`` (lowercased, + empty string if missing). Used to gate dynamic-option popups to + ``read`` producers — search producers are the chain idiom (pick + first hit), not popup candidates. + """ + tool = self._tools.get(producer_name) or {} + ai = (tool.get("metadata") or {}).get("ai_metadata") or {} + return str(ai.get("canonical_action") or "").strip().lower() + + def _is_producer_simple_callable( + self, + producer_name: str, + entities: dict[str, Any], + ) -> bool: + """True iff the producer can be called with only the user's entities + and the collection's context_defaults — i.e. no further producer + chain needed to source its inputs. + + Used to detect "single-hop dynamic option" cases: instead of + chaining the producer into the plan, the caller fetches it once + and pops up the resulting list to the user (e.g. itmNo from + getProductInfo when the user already supplied goodsNo). + """ + producer = self._tools.get(producer_name) or {} + for c in (producer.get("metadata") or {}).get("consumes") or []: + if not isinstance(c, dict) or not c.get("required"): + continue + field = c.get("field_name") or "" + sem = c.get("semantic_tag") or "" + kind = str(c.get("kind") or "data").strip().lower() + if self._match_entity(entities, sem, field) is not None: + continue + if kind == "context" and self._lookup_context_default(sem, field) is not None: + continue + return False + return True + + def _produces_path_for( + self, + producer_name: str, + *, + semantic: str, + field_name: str, + ) -> str: + """Find the producer's json_path that emits the given field — the + location of the option array in the response (e.g. + ``$.itmInfo[*].itmNo``). Empty string if no match. + """ + producer = self._tools.get(producer_name) or {} + for p in (producer.get("metadata") or {}).get("produces") or []: + if not isinstance(p, dict): + continue + if semantic and p.get("semantic_tag") == semantic: + return str(p.get("json_path") or "") + # Fallback: match by field_name when semantic missing/mismatched + for p in (producer.get("metadata") or {}).get("produces") or []: + if not isinstance(p, dict): + continue + if field_name and p.get("field_name") == field_name: + return str(p.get("json_path") or "") + return "" + + def _label_hints_for( + self, + producer_name: str, + options_path: str, + ) -> list[str]: + """Return field names that look like human labels living next to + the option-code field in the producer's response. Heuristic: same + array prefix, name ending in ``Nm`` / ``Name`` / ``Label``. + + ``options_path`` looks like ``$.itmInfo[*].itmNo``; we walk the + producer's other produces entries that share the prefix + ``$.itmInfo[*].`` and pick the ones whose field_name suggests a + label. + """ + producer = self._tools.get(producer_name) or {} + # Compute the array prefix: everything up to the last "." + if "." not in options_path: + return [] + prefix = options_path.rsplit(".", 1)[0] + "." + hints: list[str] = [] + seen: set[str] = set() + for p in (producer.get("metadata") or {}).get("produces") or []: + if not isinstance(p, dict): + continue + jp = str(p.get("json_path") or "") + if not jp.startswith(prefix): + continue + field = str(p.get("field_name") or "") + if not field or field in seen: + continue + lower = field.lower() + if lower.endswith("nm") or lower.endswith("name") or lower.endswith("label"): + hints.append(field) + seen.add(field) + return hints + + def _is_chain_eligible(self, producer_name: str, *, target_tool: str) -> bool: + """Return True if ``producer_name`` may be added to the prerequisite + chain for ``target_tool``. + + Two signals from Pass 2 ``ai_metadata`` decide: + + 1. ``canonical_action`` ∈ {search, read} + create/update/delete/action are not prerequisite material — + they perform side effects, never just data lookup. + 2. ``primary_resource`` is in the target's domain set + (target's own resource + the prefix of every consume's + semantic_tag, e.g. ``product_id`` ⇒ ``product``). + + Either signal absent (sparse ``ai_metadata``) ⇒ pass through. + Operators that haven't enriched the graph yet keep the previous + behaviour; once enriched, the policy starts filtering. Also + reverts to pass-through if the target itself has no ``ai_metadata``, + because the "domain set" can't be computed. + """ + producer = self._tools.get(producer_name) or {} + p_meta = (producer.get("metadata") or {}).get("ai_metadata") or {} + p_action = str(p_meta.get("canonical_action") or "").strip().lower() + if not p_action: + return True + if p_action not in ("search", "read"): + return False + + p_resource = str(p_meta.get("primary_resource") or "").strip().lower() + if not p_resource: + return True + + target = self._tools.get(target_tool) or {} + t_meta_full = target.get("metadata") or {} + t_meta = t_meta_full.get("ai_metadata") or {} + t_resource = str(t_meta.get("primary_resource") or "").strip().lower() + + related: set[str] = set() + if t_resource: + related.add(t_resource) + if "_" in t_resource: + related.add(t_resource.split("_", 1)[0]) + + for c in (t_meta_full.get("consumes") or []): + if not isinstance(c, dict): + continue + sem = str(c.get("semantic_tag") or "").strip().lower() + if not sem: + continue + related.add(sem.split("_", 1)[0] if "_" in sem else sem) + + if not related: + return True + + p_prefix = p_resource.split("_", 1)[0] if "_" in p_resource else p_resource + return p_resource in related or p_prefix in related + + def _rank_producers( + self, + candidates: list[str], + *, + target_tool: str, + entities: dict[str, Any], + ) -> list[str]: + """Rank candidates by Pass 2 metadata signals. + + Order: + 1. Entity affinity — producer consumes a field the user already + supplied (so the chain actually uses user input). + 2. Pair hint — target's ``pairs_well_with`` names this producer. + 3. Action preference — ``search`` > ``read`` > others as a + prerequisite role. + Ties fall back to insertion order (stable sort). + + No hardcoded names / regexes. Every signal is a per-tool Pass 2 + field the LLM filled at ingest time. + """ + target_meta = (self._tools.get(target_tool) or {}).get("metadata") or {} + target_ai = target_meta.get("ai_metadata") or {} + pair_names = { + str(p.get("tool") or "").strip() + for p in (target_ai.get("pairs_well_with") or []) + if isinstance(p, dict) + } + pair_names.discard("") + entity_keys = {str(k) for k in (entities or {}).keys()} + + action_score = {"search": 3, "read": 2, "action": 1} + + def _score(name: str) -> tuple[int, int, int]: + tool = self._tools.get(name) or {} + meta = tool.get("metadata") or {} + ai = meta.get("ai_metadata") or {} + + affinity = 0 + for c in (meta.get("consumes") or []): + tag = c.get("semantic_tag") or "" + fname = c.get("field_name") or "" + if (tag and tag in entity_keys) or (fname and fname in entity_keys): + affinity += 1 + + pair_bonus = 1 if name in pair_names else 0 + action = str(ai.get("canonical_action") or "").strip().lower() + return (affinity, pair_bonus, action_score.get(action, 0)) + + # Python's sort is stable; higher score wins, ties keep insertion order. + return sorted(candidates, key=_score, reverse=True) + + def _response_root_keys(self, tool_name: str) -> list[str]: + """Top-level keys of the tool's response, taken from ``produces``. + + Each ``produces[].json_path`` (e.g. ``$.searchDataList[*].goodsNo``) + contributes its first dotted segment (``searchDataList``). Used by + PlanRunner as a schema hint for envelope detection — when the + actual response is missing every hint at root but a single nested + dict contains them, the wrapper is peeled away. + """ + tool = self._tools.get(tool_name) or {} + produces = (tool.get("metadata") or {}).get("produces") or [] + out: list[str] = [] + seen: set[str] = set() + for p in produces: + raw = p.get("json_path") or "" + head = _jsonpath_head(raw) + if head and head not in seen: + out.append(head) + seen.add(head) + return out + def _producer_jsonpath( self, producer: str, @@ -298,6 +680,25 @@ def _producer_jsonpath( raw = match.get("json_path") or "" return _normalize_jsonpath_for_binding(raw) + def _lookup_context_default( + self, + semantic: str, + field_name: str, + ) -> Any | None: + """Pick a registered context default for a consume field. + + Mirrors ``_match_entity`` lookup order — semantic tag first (Pass 2 + canonical id), field name second (Pass 1 raw). Returns ``None`` if + the operator hasn't registered a value for either key. + """ + if not self._context_defaults: + return None + if semantic and semantic in self._context_defaults: + return self._context_defaults[semantic] + if field_name and field_name in self._context_defaults: + return self._context_defaults[field_name] + return None + def _match_entity( self, entities: dict[str, Any], @@ -336,6 +737,25 @@ def _rewrite_tool_refs( return value +def _jsonpath_head(raw: str) -> str: + """First dotted segment of a JSONPath, stripping ``$``, ``.`` and ``[…]``. + + ``$.payload.searchDataList[*].goodsNo`` → ``"payload"``. + ``$.totalCount`` → ``"totalCount"``. + Returns ``""`` for empty / unparseable input. + """ + if not raw: + return "" + path = raw[1:] if raw.startswith("$") else raw + if path.startswith("."): + path = path[1:] + # Cut at the first separator (``.`` or ``[``). + for i, ch in enumerate(path): + if ch in ".[": + return path[:i] + return path + + def _normalize_jsonpath_for_binding(raw: str) -> str: """``$.body.goods[*].goodsNo`` → ``body.goods[0].goodsNo``. @@ -357,4 +777,5 @@ def _normalize_jsonpath_for_binding(raw: str) -> str: "UnsatisfiableFieldError", "CyclicDependencyError", "MaxDepthExceededError", + "DynamicOptionRequired", ] From 285baa0958b47c2651555510a2f8d37e870f4234 Mon Sep 17 00:00:00 2001 From: daehee Date: Wed, 29 Apr 2026 21:37:24 +0900 Subject: [PATCH 09/14] feat(graphify): build-time-baked confidence labels + zero-vector retrieval MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a graphify-mode pipeline alongside the existing retrieval engine — the graph itself carries the relationship signal, so search and plan synthesis need no embeddings and no per-query LLM calls. All decisions are decided at ingest time and persisted as edge attrs. Library - Confidence enum (EXTRACTED / INFERRED / AMBIGUOUS); add_relation now accepts confidence/conf_score/layer/evidence as optional kwargs (legacy callers unchanged). - New graph_tool_call.graphify subpackage: * ingest_openapi_graphify(schemas, raw_spec=None) buckets each DetectedRelation by layer + score and persists confidence per edge. * preserve_refs_for_detection rescues layer-1 shared-schema signal that ingest_openapi inlined (Spring/SpringDoc specs depend on this). * _apply_pair_hints derives graphify edges from each tool's ai_metadata.pairs_well_with (single source-of-truth: ai_metadata; edges re-derived on every rebuild). * retrieve_graphify: BM25-seeded confidence-weighted BFS, intent-aware relation weighting, render_subgraph_text packages results into a NODE/EDGE text block bounded by token_budget. - ingest/openapi.py: * Content-type fallback (application/*+json, */*, first available) for response/request body schemas — needed for SpringDoc-emitted */* APIs. * Wrapper-object/array query parameters (Spring @ModelAttribute) are expanded into their inner properties or dropped when those properties are already exposed as siblings. - plan/synthesizer.py: * _find_producer redesigned around combined graph + schema signals (semantic_exact 100, graph_EXTRACTED 50, field_exact 40, ...) — graph edges are first-class, not a fallback chain. * Echo-back filter excludes producers that merely relay an input field. * Loose field-name matching (case + separator folded) for cross-naming-convention coverage. * Cycle policy A: visiting set passed to _find_producer so cycle-prone candidates are skipped and the chain reroutes around them. * F2 + Cycle policy B: unmet required fields surface as ${user_input.} placeholders instead of raising; recursion failures (MaxDepth/Cyclic) on a producer fall through to the same placeholder so plan synthesis never aborts midway. * Plan.metadata.user_input_slots collects every placeholder for the runner / UI to prompt with. Tests passing across touched modules: ingest_openapi, dependency, retrieval, plan, graph_engine. --- graph_tool_call/graphify/__init__.py | 38 ++ graph_tool_call/graphify/ingest.py | 434 ++++++++++++++++++++ graph_tool_call/graphify/retrieval.py | 478 +++++++++++++++++++++++ graph_tool_call/ingest/openapi.py | 146 ++++++- graph_tool_call/ontology/builder.py | 31 +- graph_tool_call/ontology/llm_provider.py | 227 +++++++++-- graph_tool_call/ontology/schema.py | 20 + graph_tool_call/plan/synthesizer.py | 350 +++++++++++++++-- graph_tool_call/tool_graph.py | 24 +- scripts/__init__.py | 5 + 10 files changed, 1664 insertions(+), 89 deletions(-) create mode 100644 graph_tool_call/graphify/__init__.py create mode 100644 graph_tool_call/graphify/ingest.py create mode 100644 graph_tool_call/graphify/retrieval.py create mode 100644 scripts/__init__.py diff --git a/graph_tool_call/graphify/__init__.py b/graph_tool_call/graphify/__init__.py new file mode 100644 index 0000000..6785ee3 --- /dev/null +++ b/graph_tool_call/graphify/__init__.py @@ -0,0 +1,38 @@ +"""graphify-mode: deterministic edge extraction + zero-vector retrieval. + +Inspired by the graphify project (https://github.com/safishamsi/graphify). +The core idea: every edge carries a Confidence label, retrieval is a +keyword-seeded BFS over confidence-weighted edges, and the result is a +token-budgeted text rendering of the matched subgraph — no embeddings, +no wRRF fusion, no MMR reranking. + +Public API: + - ingest_openapi_graphify(schemas) -> (ToolGraph, edge_stats) + - retrieve_graphify(tg, query, ...) -> {results, subgraph_text, intent, stats} + - render_subgraph_text(tg, nodes, edges, budget) -> str +""" + +from graph_tool_call.graphify.ingest import ( + DEFAULT_CONF_AMBIGUOUS, + DEFAULT_CONF_EXTRACTED, + DEFAULT_CONF_INFERRED, + _apply_pair_hints, + bucket_confidence, + ingest_openapi_graphify, + preserve_refs_for_detection, +) +from graph_tool_call.graphify.retrieval import ( + render_subgraph_text, + retrieve_graphify, +) + +__all__ = [ + "DEFAULT_CONF_AMBIGUOUS", + "DEFAULT_CONF_EXTRACTED", + "DEFAULT_CONF_INFERRED", + "bucket_confidence", + "ingest_openapi_graphify", + "preserve_refs_for_detection", + "render_subgraph_text", + "retrieve_graphify", +] diff --git a/graph_tool_call/graphify/ingest.py b/graph_tool_call/graphify/ingest.py new file mode 100644 index 0000000..48bc8d5 --- /dev/null +++ b/graph_tool_call/graphify/ingest.py @@ -0,0 +1,434 @@ +"""Deterministic ingest: ToolSchema list -> ToolGraph with confidence labels. + +Pipeline (no LLM, no embeddings): + 1. ``detect_dependencies`` runs all four layers (path-hierarchy, CRUD, + shared $ref, name/RPC/cross-resource) at threshold 0.0. + 2. Each ``DetectedRelation`` is bucketed by (layer, conf_score) into one of + EXTRACTED / INFERRED / AMBIGUOUS / dropped. + 3. Edges are added to a fresh ``ToolGraph`` with the bucket as ``confidence`` + attr, plus ``conf_score`` / ``layer`` / ``evidence`` for transparency. + 4. ``edge_stats`` summarises bucket counts, per-relation counts, and the + count of cross-source edges (different ``source_label`` on each end — + the key signal that adding a new source linked into the existing graph). + +For specs that use a lot of $ref pointers (typical of Swagger/OpenAPI 3.x +generators like SpringDoc), pass the raw spec dict to +``preserve_refs_for_detection`` BEFORE calling ``ingest_openapi_graphify`` so +``detect_dependencies._detect_shared_schemas`` can fire — without this step +the library's ``ingest_openapi`` resolves refs inline and the shared-schema +signal is lost. ``ingest_openapi_graphify`` accepts the raw spec directly via +``raw_spec=`` and runs preservation automatically. + +This is the ONLY ingest path used by xgen-workflow. The legacy 14-stage +``RetrievalEngine`` plumbing in graph_tool_call.retrieval is left intact +for benchmark/example users but is not invoked from this module. +""" + +from __future__ import annotations + +from collections import Counter +from typing import Any + +from graph_tool_call.analyze.dependency import ( + DetectedRelation, + detect_dependencies, +) +from graph_tool_call.core.tool import ToolSchema +from graph_tool_call.ontology.schema import Confidence, RelationType +from graph_tool_call.tool_graph import ToolGraph + +# Thresholds — same numbers graphify uses for INFERRED vs AMBIGUOUS. +# EXTRACTED additionally requires layer == 1 (deterministic structural). +DEFAULT_CONF_EXTRACTED = 0.85 +DEFAULT_CONF_INFERRED = 0.85 +DEFAULT_CONF_AMBIGUOUS = 0.70 + + +def bucket_confidence( + layer: int, + conf_score: float, + *, + extracted_min: float = DEFAULT_CONF_EXTRACTED, + inferred_min: float = DEFAULT_CONF_INFERRED, + ambiguous_min: float = DEFAULT_CONF_AMBIGUOUS, +) -> Confidence | None: + """Bucket a (layer, conf_score) pair into a Confidence label. + + layer == 1 (path/CRUD/$ref) AND conf >= extracted_min -> EXTRACTED + conf >= inferred_min -> INFERRED + ambiguous_min <= conf < inferred_min -> AMBIGUOUS + else -> None (dropped) + """ + if conf_score >= extracted_min and layer == 1: + return Confidence.EXTRACTED + if conf_score >= inferred_min: + return Confidence.INFERRED + if conf_score >= ambiguous_min: + return Confidence.AMBIGUOUS + return None + + +# --------------------------------------------------------------------------- +# $ref preservation +# +# Library ``ingest_openapi`` calls ``_resolve_refs`` which inlines every +# ``$ref`` pointer into its target schema. That makes life easier for runtime +# users (they get full schemas, no traversal needed) but it ERASES the signal +# ``_detect_shared_schemas`` relies on — that detector walks metadata looking +# for literal ``$ref`` strings to spot tools sharing a DTO. +# +# This helper rescans the raw spec, captures refs per operation BEFORE they're +# resolved, applies a frequency filter (drop common wrappers + singletons), +# and re-injects them as ``__refs__`` markers into each tool's metadata so +# ``_collect_refs`` finds them. Identical algorithm to xgen-workflow's +# ``swagger_tool_generator._collect_operation_refs``. +# --------------------------------------------------------------------------- + +_HTTP_METHODS = ("get", "post", "put", "patch", "delete", "head", "options") + + +def _scan_refs(obj: Any) -> set[str]: + """Recursively collect ``$ref`` pointer strings from a schema fragment.""" + refs: set[str] = set() + if isinstance(obj, dict): + for k, v in obj.items(): + if k == "$ref" and isinstance(v, str): + refs.add(v) + else: + refs.update(_scan_refs(v)) + elif isinstance(obj, list): + for item in obj: + refs.update(_scan_refs(item)) + return refs + + +def preserve_refs_for_detection( + tools: list[ToolSchema], + raw_spec: dict[str, Any], + *, + min_freq: int = 2, + max_freq_ratio: float = 0.3, +) -> int: + """Inject ``__refs__`` markers into tool metadata so shared-schema detection fires. + + Walk ``raw_spec`` BEFORE resolve, find $refs per operation, filter to the + "domain DTO" sweet spot (>=min_freq references, <=max_freq_ratio of all ops), + and re-inject them into each tool's ``metadata.response_schema.__refs__`` and + ``metadata.request_body_refs``. + + Why filter: + - Common wrappers like ``ApiResponse`` show up in nearly every operation; + leaving them in produces a fully-connected COMPLEMENTARY graph (noise). + - Singletons show up once and can't form edges anyway. + + Returns the number of tools whose metadata was updated. Mutates ``tools`` + in place. + """ + paths = (raw_spec.get("paths") or {}) + if not isinstance(paths, dict): + return 0 + + raw_per_op: dict[tuple[str, str], tuple[set[str], set[str]]] = {} + freq: Counter[str] = Counter() + + for path, item in paths.items(): + if not isinstance(item, dict): + continue + for method in _HTTP_METHODS: + op = item.get(method) + if not isinstance(op, dict): + continue + req = _scan_refs(op.get("requestBody")) | _scan_refs(op.get("parameters")) + resp = _scan_refs(op.get("responses")) + if not (req or resp): + continue + raw_per_op[(method, path)] = (req, resp) + for r in req | resp: + freq[r] += 1 + + if not raw_per_op: + return 0 + + total_ops = len(raw_per_op) + ceiling = max(min_freq, int(total_ops * max_freq_ratio)) + + def _useful(r: str) -> bool: + return min_freq <= freq[r] <= ceiling + + op_refs: dict[tuple[str, str], tuple[list[str], list[str]]] = {} + for k, (req, resp) in raw_per_op.items(): + rq = sorted(r for r in req if _useful(r)) + rp = sorted(r for r in resp if _useful(r)) + if rq or rp: + op_refs[k] = (rq, rp) + + updated = 0 + for tool in tools: + md = tool.metadata or {} + method = str(md.get("method") or "").lower() + path = str(md.get("path") or "") + refs = op_refs.get((method, path)) + if not refs: + continue + rq, rp = refs + if rp: + rs = md.get("response_schema") or {} + if isinstance(rs, dict): + rs = dict(rs) + rs["__refs__"] = [{"$ref": r} for r in rp] + md["response_schema"] = rs + if rq: + md["request_body_refs"] = [{"$ref": r} for r in rq] + tool.metadata = md + updated += 1 + + return updated + + +# --------------------------------------------------------------------------- +# ai_metadata.pairs_well_with → graphify edge derivation +# +# ``ai_metadata`` is the source-of-truth (LLM Pass 2 fills it; the operator +# can hand-edit it via ToolGraphView). On every rebuild we derive the +# corresponding workflow edges into the graphify graph so ``_find_producer`` +# can score them as a first-class signal — no separate lookup, no two-system +# sync drift. The frontend keeps reading ``ai_metadata.pairs_well_with`` +# directly (single read path, no UI churn). +# +# Confidence mapping reflects the trust we place in each source: +# PairHint.source == "manual" → EXTRACTED (operator deliberately curated) +# PairHint.source == "auto" → INFERRED (LLM Pass 2 high-confidence) +# anything else / missing → INFERRED (legacy entries default safe) +# +# Layer is set to 2 because pair hints are not structural (path/$ref/CRUD) +# even when curated — they encode workflow semantics, which sits one level +# above structural inference in the graphify confidence model. +# --------------------------------------------------------------------------- + + +def _apply_pair_hints( + tg: ToolGraph, + schemas: list[ToolSchema], +) -> dict[str, int]: + """Convert ``metadata.ai_metadata.pairs_well_with`` into graphify edges. + + Skips pairs whose target tool isn't in the current graph (cross-source + enrichment can list pairs that haven't been ingested yet) and self-pairs. + Skips when the same (src, tgt) pair already carries a structural relation + from ``detect_dependencies`` UNLESS the new pair is operator-curated + (``source="manual"``) — operator intent overrides automatic detection. + """ + stats = {"manual": 0, "auto": 0, "skipped_target_missing": 0, + "skipped_self": 0, "skipped_existing_structural": 0} + tool_names = set(tg.tools.keys()) + + for s in schemas: + ai = (s.metadata or {}).get("ai_metadata") or {} + pairs = ai.get("pairs_well_with") or [] + if not isinstance(pairs, list): + continue + for p in pairs: + if not isinstance(p, dict): + continue + target = str(p.get("tool") or "").strip() + if not target: + continue + if target == s.name: + stats["skipped_self"] += 1 + continue + if target not in tool_names: + stats["skipped_target_missing"] += 1 + continue + + source = str(p.get("source") or "auto").strip().lower() + is_manual = source == "manual" + confidence = Confidence.EXTRACTED if is_manual else Confidence.INFERRED + reason = str(p.get("reason") or "")[:200] + + # Existing-edge policy: if detect_dependencies already produced + # an edge here we keep it unless the operator is overriding. + if tg.graph.has_edge(s.name, target): + if not is_manual: + stats["skipped_existing_structural"] += 1 + continue + + try: + tg.add_relation( + s.name, + target, + RelationType.COMPLEMENTARY, + confidence=confidence, + layer=2, + evidence=f"pair[{source}]: {reason}" if reason else f"pair[{source}]", + ) + stats["manual" if is_manual else "auto"] += 1 + except (KeyError, ValueError): + stats["skipped_target_missing"] += 1 + + return stats + + +def _source_label(schema: ToolSchema) -> str: + """Return the source label that distinguishes which OpenAPI spec a tool came from. + + xgen-workflow tags each tool with ``metadata.source_label`` (e.g. "order", + "claim"). When that's absent, fall back to the first path segment so + cross-source detection still works for libraries used outside xgen. + """ + md = schema.metadata or {} + label = md.get("source_label") + if label: + return str(label) + path = str(md.get("path") or "") + segs = [s for s in path.split("/") if s and not s.startswith("{")] + return segs[0] if segs else "" + + +def ingest_openapi_graphify( + schemas: list[ToolSchema], + *, + extracted_min: float = DEFAULT_CONF_EXTRACTED, + inferred_min: float = DEFAULT_CONF_INFERRED, + ambiguous_min: float = DEFAULT_CONF_AMBIGUOUS, + spec: dict[str, Any] | None = None, + raw_spec: dict[str, Any] | None = None, +) -> tuple[ToolGraph, dict[str, Any]]: + """Build a graphify-style ToolGraph from a list of ToolSchemas. + + Parameters + ---------- + schemas: + Tools to ingest. Pre-existing ``metadata.source_label`` enables + cross-source edge tracking. + extracted_min / inferred_min / ambiguous_min: + Confidence bucket thresholds (see ``bucket_confidence``). + spec: + Optional normalized spec dict, forwarded to ``detect_dependencies``. + Currently unused by the detector but kept for forward compat. + raw_spec: + Optional ORIGINAL OpenAPI/Swagger spec dict (BEFORE $ref resolution). + When supplied, runs ``preserve_refs_for_detection`` so the layer-1 + shared-schema detector can fire on heavily $ref-using specs (typical + of SpringDoc-generated OpenAPI). xgen-workflow callers who already + bake refs into tool metadata via swagger_tool_generator can leave + this None. + + Returns + ------- + (ToolGraph, edge_stats): + ``edge_stats`` keys: + EXTRACTED, INFERRED, AMBIGUOUS, dropped: int counts + by_relation: {relation_value: int} + cross_source: int (edges across labels) + tool_count, edge_count: int + refs_preserved: int (tools touched by + preserve_refs_for_detection) + """ + tg = ToolGraph() + for s in schemas: + tg.add_tool(s) + + label_by_name = {s.name: _source_label(s) for s in schemas} + + stats: dict[str, Any] = { + "EXTRACTED": 0, + "INFERRED": 0, + "AMBIGUOUS": 0, + "dropped": 0, + "by_relation": {}, + "cross_source": 0, + "tool_count": len(schemas), + "edge_count": 0, + "refs_preserved": 0, + } + + if len(schemas) < 2: + return tg, stats + + # Optional: rescue layer-1 shared-schema signal that ingest_openapi inlined. + if raw_spec is not None: + stats["refs_preserved"] = preserve_refs_for_detection(schemas, raw_spec) + + # min_confidence=0.0 so we see every candidate; we re-bucket here. + relations: list[DetectedRelation] = detect_dependencies( + schemas, spec, min_confidence=0.0 + ) + + seen: set[tuple[str, str, str]] = set() # (src, tgt, relation_value) + for rel in relations: + bucket = bucket_confidence( + rel.layer, + rel.confidence, + extracted_min=extracted_min, + inferred_min=inferred_min, + ambiguous_min=ambiguous_min, + ) + if bucket is None: + stats["dropped"] += 1 + continue + + rel_value = ( + rel.relation_type.value + if hasattr(rel.relation_type, "value") + else str(rel.relation_type) + ) + key = (rel.source, rel.target, rel_value) + if key in seen: + # detect_dependencies already de-duplicates, but be defensive. + continue + seen.add(key) + + try: + tg.add_relation( + rel.source, + rel.target, + rel.relation_type, + confidence=bucket, + conf_score=rel.confidence, + layer=rel.layer, + evidence=rel.evidence, + ) + except (KeyError, ValueError): + # Endpoint not in graph (shouldn't happen — tools were just added) — skip. + stats["dropped"] += 1 + continue + + stats[bucket.value] += 1 + stats["by_relation"][rel_value] = stats["by_relation"].get(rel_value, 0) + 1 + + src_label = label_by_name.get(rel.source, "") + tgt_label = label_by_name.get(rel.target, "") + if src_label and tgt_label and src_label != tgt_label: + stats["cross_source"] += 1 + + # Derive workflow edges from ai_metadata.pairs_well_with — single + # source-of-truth lives on each tool's metadata, edges are regenerated + # on every rebuild so operator/LLM curation flows in automatically. + pair_stats = _apply_pair_hints(tg, schemas) + stats["pair_edges"] = pair_stats + # Roll the pair edges into the global confidence/by_relation counters + # so ``edge_stats`` accurately reflects the final graph contents. + stats["EXTRACTED"] += pair_stats.get("manual", 0) + stats["INFERRED"] += pair_stats.get("auto", 0) + if pair_stats.get("manual") or pair_stats.get("auto"): + stats["by_relation"]["complementary"] = ( + stats["by_relation"].get("complementary", 0) + + pair_stats.get("manual", 0) + + pair_stats.get("auto", 0) + ) + # cross_source also re-counted on these new edges for completeness. + for s in schemas: + ai = (s.metadata or {}).get("ai_metadata") or {} + for p in (ai.get("pairs_well_with") or []): + if not isinstance(p, dict): + continue + tgt = str(p.get("tool") or "").strip() + if not tgt or tgt == s.name or tgt not in tg.tools: + continue + src_lab = label_by_name.get(s.name, "") + tgt_lab = label_by_name.get(tgt, "") + if src_lab and tgt_lab and src_lab != tgt_lab: + stats["cross_source"] += 1 + + stats["edge_count"] = tg.graph.edge_count() + return tg, stats diff --git a/graph_tool_call/graphify/retrieval.py b/graph_tool_call/graphify/retrieval.py new file mode 100644 index 0000000..55e659b --- /dev/null +++ b/graph_tool_call/graphify/retrieval.py @@ -0,0 +1,478 @@ +"""Zero-vector retrieval over a graphify-style ToolGraph. + +Algorithm (mirrors graphify/serve.py): + 1. seed = top-5 of BM25(query) (substring fallback if BM25 returns empty) + 2. weights = INTENT_RELATION_WEIGHTS[dominant_intent] or DEFAULT + 3. score = rel_weight[rel] * CONF_FACTOR[confidence] * decay(depth) + CONF_FACTOR = {EXTRACTED: 1.0, INFERRED: 0.7, AMBIGUOUS: 0.4, None: 0.5} + decay(d) = 1 / (0.5*d + 1) + 4. BFS from seeds, depth=2, accumulate max score per neighbour + 5. history-aware demote (used tools * 0.6) + 6. render_subgraph_text(top_k nodes + edges, token_budget) + +Why this works without embeddings: + - The graph carries the semantic signal (CRUD chains, $ref data flow, + cross-resource matches) — once a relationship is in the graph, traversal + finds it. + - Confidence labels let the score down-weight guesses without dropping them; + AMBIGUOUS edges still appear, just behind EXTRACTED ones. + - Token-budgeted rendering means an LLM gets a compact, structured context + (not a list of tool JSON blobs) and can decide chains via the EDGE lines. +""" + +from __future__ import annotations + +import re +import unicodedata +from typing import Any + +from graph_tool_call.core.protocol import GraphEngine +from graph_tool_call.core.tool import ToolSchema +from graph_tool_call.ontology.schema import ( + DEFAULT_RELATION_WEIGHTS, + INTENT_RELATION_WEIGHTS, + NodeType, + RelationType, +) +from graph_tool_call.retrieval.intent import classify_intent +from graph_tool_call.tool_graph import ToolGraph + +# Score multiplier per confidence bucket. EXTRACTED edges are deterministic +# (path/CRUD/$ref) and trusted at 1.0; INFERRED is heuristic but still +# high-confidence; AMBIGUOUS gets a strong penalty so it's surfaced for +# review without dominating EXTRACTED chains. +# +# Edges added by callers without a confidence attr (e.g. legacy code paths) +# get the same weight as the no-bucket fallback (0.5) — neither rewarded +# nor heavily penalised. +CONF_FACTOR: dict[str | None, float] = { + "EXTRACTED": 1.0, + "INFERRED": 0.7, + "AMBIGUOUS": 0.4, + None: 0.5, +} + +_DEFAULT_DEPTH = 2 +_DEFAULT_TOP_K = 10 +_DEFAULT_BUDGET = 2000 +_HISTORY_DEMOTE = 0.6 + + +# --------------------------------------------------------------------------- +# Seed selection +# --------------------------------------------------------------------------- + + +def _strip_diacritics(text: str) -> str: + nfkd = unicodedata.normalize("NFKD", text) + return "".join(c for c in nfkd if not unicodedata.combining(c)) + + +def _substring_seeds( + tools: dict[str, ToolSchema], + query: str, + *, + limit: int = 5, +) -> list[tuple[str, float]]: + """Substring fallback when BM25 returns no hits (very short or non-Latin queries).""" + q = _strip_diacritics(query).lower() + terms = [t for t in re.split(r"[\s_\-/.,;:!?()]+", q) if t and len(t) > 1] + scored: list[tuple[str, float]] = [] + for name, tool in tools.items(): + nname = _strip_diacritics(name).lower() + ndesc = _strip_diacritics(tool.description or "").lower() + score = ( + sum(1.0 for t in terms if t in nname) + + 0.5 * sum(1.0 for t in terms if t in ndesc) + ) + if score > 0: + scored.append((name, score)) + scored.sort(key=lambda x: x[1], reverse=True) + return scored[:limit] + + +def _bm25_seeds(tg: ToolGraph, query: str, *, limit: int = 5) -> list[tuple[str, float]]: + """Top-N BM25 hits as seeds. Uses the engine's BM25 index, lazy-built once.""" + try: + engine = tg._get_retrieval_engine() # noqa: SLF001 + bm25 = engine._get_bm25() # noqa: SLF001 + except Exception: + return [] + scores = bm25.score(query) or {} + if not scores: + return [] + ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True) + return [(name, score) for name, score in ranked[:limit]] + + +def _select_seeds( + tg: ToolGraph, + query: str, + *, + limit: int = 5, +) -> list[tuple[str, float]]: + seeds = _bm25_seeds(tg, query, limit=limit) + if seeds: + return seeds + return _substring_seeds(tg.tools, query, limit=limit) + + +# --------------------------------------------------------------------------- +# BFS traversal +# --------------------------------------------------------------------------- + + +def _intent_weights(query: str) -> tuple[dict[str, float], str]: + """Pick relation weights based on dominant query intent. + + Returns (weights_map, dominant_label) where label is one of + 'read'/'write'/'delete'/'neutral'. + """ + intent = classify_intent(query) + if intent.is_neutral: + return DEFAULT_RELATION_WEIGHTS, "neutral" + by_dim = { + "read": intent.read_intent, + "write": intent.write_intent, + "delete": intent.delete_intent, + } + dominant = max(by_dim, key=lambda k: by_dim[k]) + if by_dim[dominant] < 0.5: + return DEFAULT_RELATION_WEIGHTS, "neutral" + weights = INTENT_RELATION_WEIGHTS.get(dominant, DEFAULT_RELATION_WEIGHTS) + return weights, dominant + + +def _normalize_relation_key(rel: Any) -> Any: + """Relation weights are keyed by RelationType. Normalize string attrs to enum.""" + if isinstance(rel, RelationType): + return rel + if isinstance(rel, str): + try: + return RelationType(rel) + except ValueError: + return rel + return rel + + +def _bfs_from_seeds( + graph: GraphEngine, + seed_scores: list[tuple[str, float]], + *, + depth: int, + rel_weights: dict[str, float], +) -> tuple[dict[str, float], list[tuple[str, str]]]: + """Confidence-weighted BFS. Returns (scores, edges_visited). + + Score policy: + seeds: normalized BM25 score (top seed = 1.0, others scaled) + neighbour at depth d via edge of weight w and confidence c: + score(neighbour) = max(prev, parent_score * w * CONF_FACTOR[c] * 1/(0.5*d + 1)) + + Why normalize seeds: if all 5 BM25 hits got flat 1.0, top-K shows them in + arbitrary order with identical scores and BFS-found neighbours never compete. + Scaling by ``score / max_seed_score`` preserves BM25's relative ranking and + lets a strongly-matching seed lift its 1-hop neighbours above weakly-matching + sibling seeds. + + Tools nodes are scored; CATEGORY/DOMAIN nodes are passthrough so we can + reach sibling tools on the next hop. + """ + if not seed_scores: + return {}, [] + + max_seed = max((s for _, s in seed_scores), default=1.0) or 1.0 + scores: dict[str, float] = { + n: s / max_seed + for n, s in seed_scores + if graph.has_node(n) + } + visited: set[str] = set(scores) + frontier: list[str] = list(scores) + edges_visited: list[tuple[str, str]] = [] + + for d in range(1, depth + 1): + decay = 1.0 / (0.5 * d + 1) + next_frontier: list[str] = [] + for node in frontier: + parent_score = scores.get(node, 0.0) + try: + edges = graph.get_edges_from(node, direction="both") + except (KeyError, ValueError): + continue + for src, tgt, attrs in edges: + neighbour = tgt if src == node else src + if neighbour in visited: + continue + neighbour_attrs = graph.get_node_attrs(neighbour) + neighbour_type = neighbour_attrs.get("node_type") + + rel_key = _normalize_relation_key(attrs.get("relation")) + rel_w = rel_weights.get(rel_key, 0.3) + conf = attrs.get("confidence") + conf_factor = CONF_FACTOR.get(conf, CONF_FACTOR[None]) + + if neighbour_type == NodeType.TOOL: + # Propagate parent's score so a high-BM25 seed lifts its + # neighbours more than a low-BM25 seed does. This is what + # makes the ranking actually informative — without + # parent_score multiplication every BFS-discovered tool + # would inherit the same fixed weight. + score = parent_score * rel_w * conf_factor * decay + scores[neighbour] = max(scores.get(neighbour, 0.0), score) + edges_visited.append((src, tgt)) + next_frontier.append(neighbour) + visited.add(neighbour) + elif neighbour_type in (NodeType.CATEGORY, NodeType.DOMAIN): + # Passthrough — visit but don't score; lets BFS reach + # sibling tools via CATEGORY hubs without inflating scores. + next_frontier.append(neighbour) + visited.add(neighbour) + frontier = next_frontier + if not frontier: + break + + return scores, edges_visited + + +# --------------------------------------------------------------------------- +# Subgraph rendering +# --------------------------------------------------------------------------- + + +def _node_line(name: str, tool: ToolSchema | None, attrs: dict) -> str: + """One NODE line for the subgraph text rendering.""" + md = (tool.metadata if tool else {}) or {} + method = str(md.get("method") or "").upper() + path = str(md.get("path") or "") + src_label = str(md.get("source_label") or "") + community = attrs.get("community") + parts = [name] + if method or path: + parts.append(f"[{method} {path}]".strip()) + if src_label: + parts.append(f"[source={src_label}]") + if community is not None: + parts.append(f"[community={community}]") + return "NODE " + " ".join(p for p in parts if p) + + +def _edge_line( + u: str, + v: str, + attrs: dict, +) -> str: + """One EDGE line. confidence in [], evidence in (...).""" + rel = attrs.get("relation") + rel_str = rel.value if hasattr(rel, "value") else str(rel) + conf = attrs.get("confidence", "") + conf_str = f" [{conf}]" if conf else "" + line = f"EDGE {u} --{rel_str}{conf_str}--> {v}" + evidence = attrs.get("evidence") + if evidence: + line += f" ({evidence})" + return line + + +def render_subgraph_text( + tg: ToolGraph, + nodes: set[str] | list[str], + edges: list[tuple[str, str]] | None = None, + *, + token_budget: int = _DEFAULT_BUDGET, + sort_by_score: dict[str, float] | None = None, +) -> str: + """Render the matched subgraph as ``NODE ...`` / ``EDGE ...`` lines. + + Approx 3 chars per token is the budget conversion. When the rendering + overflows the budget, the tail is cut and a ``... (truncated)`` line + is appended. + + sort_by_score: if provided, NODE lines are emitted in descending score + order so the LLM sees the most relevant tools first. + + edges: optional hint listing edges visited during BFS — purely for + ordering. Whether or not this is supplied, ALL graph edges between any + pair of chosen nodes are emitted so the LLM sees the full local + structure (matching graphify's behaviour). + """ + char_budget = token_budget * 3 + node_set: set[str] = set(nodes) + + # Order nodes: by retrieval score (desc) if known, else by name. + if sort_by_score: + node_order = sorted( + node_set, key=lambda n: (-sort_by_score.get(n, 0.0), n) + ) + else: + node_order = sorted(node_set) + + lines: list[str] = [] + for n in node_order: + if not tg.graph.has_node(n): + continue + attrs = tg.graph.get_node_attrs(n) + tool = tg.tools.get(n) + lines.append(_node_line(n, tool, attrs)) + + # Walk all graph edges between chosen nodes (not just BFS visited ones) + # so the LLM gets the complete local structure. BFS-visited edges naturally + # come first when we sort, ensuring no surprise gaps. + seen_edges: set[tuple[str, str]] = set() + edge_lines: list[str] = [] + for u in node_order: + if not tg.graph.has_node(u): + continue + try: + outgoing = tg.graph.get_edges_from(u, direction="out") + except (KeyError, ValueError): + continue + for src, tgt, attrs in outgoing: + if tgt not in node_set: + continue + key = (src, tgt) + if key in seen_edges: + continue + seen_edges.add(key) + edge_lines.append(_edge_line(src, tgt, attrs)) + + lines.extend(edge_lines) + + output = "\n".join(lines) + if len(output) > char_budget: + # Cut at the last newline that fits, then append a marker. Keep the + # marker even if it pushes us slightly over the char budget — the + # token budget is a soft cap. + cut = output[:char_budget].rsplit("\n", 1)[0] + output = cut + f"\n... (truncated to ~{token_budget} token budget)" + return output + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + + +def retrieve_graphify( + tg: ToolGraph, + query: str, + *, + top_k: int = _DEFAULT_TOP_K, + depth: int = _DEFAULT_DEPTH, + token_budget: int = _DEFAULT_BUDGET, + history: list[str] | None = None, +) -> dict[str, Any]: + """Retrieve tools for a natural-language query using graph traversal only. + + Parameters + ---------- + tg: + A graphify-style ``ToolGraph``. Edges should carry ``confidence`` + attrs (EXTRACTED/INFERRED/AMBIGUOUS); edges without one get the + neutral 0.5 multiplier. + query: + Natural-language search. + top_k: + Maximum tools in the result set (and the rendered subgraph). + depth: + BFS depth from seeds. 2 is graphify's default and works for most + workflow chains (createX -> getX -> doSomethingWithX). + token_budget: + Char-budget for the rendered text (~3 chars/token). + history: + Tool names already called in this session — they are demoted (×0.6) + to encourage progress through a workflow rather than re-suggesting. + + Returns + ------- + dict with keys: + - results: list of {name, score, tool: {...}} sorted desc. + - subgraph_text: the LLM-ready NODE/EDGE rendering. + - intent: {dominant: 'read'|'write'|'delete'|'neutral', read, write, delete} + - stats: {seeds: [...], visited_nodes: int, visited_edges: int} + + Note: prerequisite chain construction (e.g. listOrders → getOrder → cancelOrder) + is NOT this function's job — it lives in Stage 2 ``synthesize_plan`` which + consumes the graph this module produces. retrieve_graphify only finds the + primary candidates; chain assembly is downstream. + """ + if not query or not tg.tools: + return { + "results": [], + "subgraph_text": "", + "intent": {"dominant": "neutral", "read": 0.0, "write": 0.0, "delete": 0.0}, + "stats": {"seeds": [], "visited_nodes": 0, "visited_edges": 0}, + } + + # 1) Seeds + seeds_with_scores = _select_seeds(tg, query, limit=5) + seed_names = [s for s, _ in seeds_with_scores] + + if not seed_names: + return { + "results": [], + "subgraph_text": "", + "intent": {"dominant": "neutral", "read": 0.0, "write": 0.0, "delete": 0.0}, + "stats": {"seeds": [], "visited_nodes": 0, "visited_edges": 0}, + } + + # 2) Intent → relation weight map + rel_weights, dominant = _intent_weights(query) + from graph_tool_call.retrieval.intent import classify_intent # noqa: I001 (re-import OK) + + intent_obj = classify_intent(query) + + # 3) BFS — pass full (name, score) pairs so seed scores reflect BM25 ranking + scores, edges_visited = _bfs_from_seeds( + tg.graph, + seeds_with_scores, + depth=depth, + rel_weights=rel_weights, + ) + + # 4) History demote + if history: + for h in history: + if h in scores: + scores[h] *= _HISTORY_DEMOTE + + # 5) Filter to TOOL nodes only and rank + tool_scores: dict[str, float] = { + n: s for n, s in scores.items() if n in tg.tools + } + ranked = sorted(tool_scores.items(), key=lambda x: x[1], reverse=True)[:top_k] + chosen_names: set[str] = {n for n, _ in ranked} + + # 6) Render + subgraph_text = render_subgraph_text( + tg, + chosen_names, + edges_visited, + token_budget=token_budget, + sort_by_score=tool_scores, + ) + + results = [ + { + "name": name, + "score": round(score, 4), + "tool": tg.tools[name].to_dict() if name in tg.tools else None, + } + for name, score in ranked + ] + + return { + "results": results, + "subgraph_text": subgraph_text, + "intent": { + "dominant": dominant, + "read": round(intent_obj.read_intent, 3), + "write": round(intent_obj.write_intent, 3), + "delete": round(intent_obj.delete_intent, 3), + }, + "stats": { + "seeds": seed_names, + "visited_nodes": len(scores), + "visited_edges": len(edges_visited), + }, + } diff --git a/graph_tool_call/ingest/openapi.py b/graph_tool_call/ingest/openapi.py index 41ffe7e..f914fd4 100644 --- a/graph_tool_call/ingest/openapi.py +++ b/graph_tool_call/ingest/openapi.py @@ -134,6 +134,41 @@ def _schema_type(schema: dict[str, Any]) -> str: return _TYPE_MAP.get(schema.get("type", "string"), "string") +def _pick_content_schema(content: dict[str, Any]) -> dict[str, Any]: + """Pick a usable schema from an OpenAPI ``content`` object. + + OpenAPI 3.x lets a request body / response declare schemas under any + media-type key. The preferred order is: + + 1. ``application/json`` — most common + 2. ``application/*+json`` (e.g. hal+json) — JSON variants + 3. ``*/*`` — Spring/SpringDoc default when + the operation doesn't pin a + specific content type + 4. first available media-type — last resort + + Returning the schema dict (possibly empty). The earlier code only + looked at ``application/json`` and silently dropped everything else, + which produced empty ``response_schema`` for every Spring endpoint + that uses the default ``*/*`` (real-world failure: x2bee Order API, + where this caused PathSynthesizer to find zero producers). + """ + if not isinstance(content, dict) or not content: + return {} + if "application/json" in content: + return (content["application/json"] or {}).get("schema") or {} + for ct, val in content.items(): + if isinstance(ct, str) and ct.endswith("+json"): + return (val or {}).get("schema") or {} + if "*/*" in content: + return (content["*/*"] or {}).get("schema") or {} + # Last resort: the first content type with a schema. + for val in content.values(): + if isinstance(val, dict) and val.get("schema"): + return val["schema"] + return {} + + # --------------------------------------------------------------------------- # Operation -> ToolSchema # --------------------------------------------------------------------------- @@ -241,39 +276,109 @@ def _extract_params_openapi3( *, required_only: bool = False, ) -> list[ToolParameter]: - """Extract parameters from an OpenAPI 3.x operation.""" + """Extract parameters from an OpenAPI 3.x operation. + + Spring/SpringDoc gotcha: when a controller takes a `@ModelAttribute` + DTO via query string, the spec sometimes lists BOTH the wrapper + object AND its inner fields as separate query parameters + (``regularOrderDetailRequest`` ``in=query`` ``type=object`` AND + ``rglrDeliNo`` ``in=query`` ``type=string``). Treating the wrapper + as a real input field poisons downstream producer matching: nothing + in the API ever returns a value named after the wrapper class, so + PathSynthesizer raises ``UnsatisfiableField`` on a phantom field. + + Strategy: drop wrapper parameters when their inner properties are + already exposed as siblings; otherwise expand the wrapper into its + leaf properties so callers see the real input names. + """ params: list[ToolParameter] = [] + raw_parameters = list(operation.get("parameters", [])) + # Pre-collect names from non-object parameters — used to detect when + # a wrapper's inner property is already exposed alongside it. + sibling_names: set[str] = { + str(p.get("name") or "") + for p in raw_parameters + if isinstance(p, dict) and _schema_type(p.get("schema", {}) or {}) not in ("object",) + } + # Path / query / header / cookie parameters - for p in operation.get("parameters", []): + for p in raw_parameters: if "name" not in p: continue # skip malformed parameters (missing required 'name' field) schema = p.get("schema", {}) is_required = p.get("required", False) + ptype = _schema_type(schema) + + # Wrapper-object/array query parameter handling. + # type=object → wrapper itself (Spring @ModelAttribute style). + # type=array of objects → wrapper used to send a list of structured + # records (less common but seen in some Spring specs); we expand the + # element schema's properties. Primitive arrays (array of integers / + # strings) are real list inputs and are NOT expanded here — those + # belong to the caller as a single multi-value field. + if ptype in ("object", "array") and p.get("in") == "query": + wrapper_props: dict[str, Any] = {} + wrapper_required: set[str] = set() + if ptype == "object": + wrapper_props = (schema.get("properties") or {}) if isinstance(schema, dict) else {} + wrapper_required = set(schema.get("required") or []) + else: # array + items = (schema.get("items") or {}) if isinstance(schema, dict) else {} + if isinstance(items, dict) and items.get("type") == "object": + wrapper_props = items.get("properties") or {} + wrapper_required = set(items.get("required") or []) + # else: primitive-element array — don't expand, treat as real input + if wrapper_props: + # If every inner property is already a sibling parameter, + # drop the wrapper entirely (deduplication). + if all(prop in sibling_names for prop in wrapper_props): + continue + # Otherwise expand the wrapper into individual leaves so + # producer matching has real field names to chase. + for prop_name, prop_schema in wrapper_props.items(): + if prop_name in sibling_names: + continue # don't double-list ones already exposed + inner_required = prop_name in wrapper_required + if required_only and not inner_required: + continue + inner_type = _schema_type(prop_schema or {}) + inner_desc = (prop_schema or {}).get("description", "") or "" + params.append( + ToolParameter( + name=prop_name, + type=inner_type, + description=inner_desc, + required=inner_required, + enum=(prop_schema or {}).get("enum"), + ) + ) + continue # wrapper itself is not added + if required_only and not is_required: continue desc = p.get("description", "") or "" # object/array 타입이면 nested fields를 description에 펼쳐서 # LLM이 정확한 필드명(예: searchWord)을 알 수 있게 한다. - if _schema_type(schema) in ("object", "array"): + if ptype in ("object", "array"): nested = _summarize_object_schema(schema) if nested: desc = (desc + "\nFields:\n" + nested).strip() if desc else f"Fields:\n{nested}" params.append( ToolParameter( name=p["name"], - type=_schema_type(schema), + type=ptype, description=desc, required=is_required, enum=schema.get("enum"), ) ) - # requestBody + # requestBody — pick the most specific schema across declared media types + # (Spring/SpringDoc commonly emits */* — see _pick_content_schema notes). request_body = operation.get("requestBody", {}) content = request_body.get("content", {}) - json_content = content.get("application/json", {}) - body_schema = json_content.get("schema", {}) + body_schema = _pick_content_schema(content) body_required = set(body_schema.get("required", [])) for prop_name, prop_schema in body_schema.get("properties", {}).items(): is_required = prop_name in body_required @@ -429,21 +534,24 @@ def _operation_to_tool( else: parameters = _extract_params_openapi3(operation, resolved_spec, required_only=required_only) - # Build response schema metadata + # Build response schema metadata. Walk responses in success-code order + # and use _pick_content_schema so we don't drop schemas declared under + # */*, application/*+json, or other non-JSON media types. responses = operation.get("responses", {}) response_schema: dict[str, Any] = {} for code in ("200", "201", "default"): - if code in responses: - resp = responses[code] - # Swagger 2.0 - if "schema" in resp: - response_schema = resp["schema"] - break - # OpenAPI 3.x - resp_content = resp.get("content", {}) - if "application/json" in resp_content: - response_schema = resp_content["application/json"].get("schema", {}) - break + if code not in responses: + continue + resp = responses[code] or {} + # Swagger 2.0 puts the schema directly on the response object. + if "schema" in resp and isinstance(resp.get("schema"), dict): + response_schema = resp["schema"] + break + # OpenAPI 3.x: inspect the content map. + picked = _pick_content_schema(resp.get("content") or {}) + if picked: + response_schema = picked + break metadata: dict[str, Any] = { "source": "openapi", diff --git a/graph_tool_call/ontology/builder.py b/graph_tool_call/ontology/builder.py index f6fb1a7..517d730 100644 --- a/graph_tool_call/ontology/builder.py +++ b/graph_tool_call/ontology/builder.py @@ -5,7 +5,7 @@ from graph_tool_call.core.dict_graph import DictGraph from graph_tool_call.core.protocol import GraphEngine from graph_tool_call.core.tool import ToolSchema -from graph_tool_call.ontology.schema import NodeType, RelationType +from graph_tool_call.ontology.schema import Confidence, NodeType, RelationType class OntologyBuilder: @@ -64,11 +64,36 @@ def add_relation( target: str, relation: str | RelationType, weight: float = 1.0, + *, + confidence: str | Confidence | None = None, + conf_score: float | None = None, + layer: int | None = None, + evidence: str | None = None, ) -> None: - """Add a directed relation between two nodes.""" + """Add a directed relation between two nodes. + + Optional graphify-style attrs (all default None — existing callers + unaffected): + + confidence: Confidence label (EXTRACTED / INFERRED / AMBIGUOUS). + conf_score: Raw 0.0–1.0 score from the upstream detector. + layer: 1=structural (path/CRUD/$ref), 2=heuristic (name/RPC). + evidence: Human-readable reason; capped at 200 chars to avoid bloat. + """ if isinstance(relation, str): relation = RelationType(relation) - self._graph.add_edge(source, target, relation=relation, weight=weight) + if isinstance(confidence, Confidence): + confidence = confidence.value + attrs: dict = {"relation": relation, "weight": weight} + if confidence is not None: + attrs["confidence"] = confidence + if conf_score is not None: + attrs["conf_score"] = float(conf_score) + if layer is not None: + attrs["layer"] = int(layer) + if evidence: + attrs["evidence"] = evidence[:200] + self._graph.add_edge(source, target, **attrs) # --- queries --- diff --git a/graph_tool_call/ontology/llm_provider.py b/graph_tool_call/ontology/llm_provider.py index 76e26bc..6ee8b4e 100644 --- a/graph_tool_call/ontology/llm_provider.py +++ b/graph_tool_call/ontology/llm_provider.py @@ -68,10 +68,23 @@ class FieldSemantic: @dataclass class PairHint: - """LLM-suggested tool that pairs with the current tool.""" + """A tool that pairs with the current tool in a workflow. + + ``source`` distinguishes ownership so re-running auto enrichment doesn't + overwrite operator curation: + - ``"auto"`` — produced by Pass 2a (per-tool batch) or Pass 2b + (cross-batch). Replaced on every Pass 2b re-run. + - ``"manual"`` — added by an operator through the UI. Never overwritten + by automatic enrichment. + + Default ``"manual"`` is intentional: legacy data without a ``source`` + field gets the safer label, so a Pass 2b re-run does not silently delete + pre-existing entries that may have been hand-curated. + """ tool: str - reason: str + reason: str = "" + source: str = "manual" @dataclass @@ -192,11 +205,7 @@ class ToolEnrichment: Produce structured metadata that downstream components use to (1) pick the right tool for a user's goal, (2) synthesize execution plans, and (3) wire one tool's output to another tool's input. - -AVAILABLE TOOLS IN THE COLLECTION (names + 1-line descriptions, for -pairs_well_with reference): -{all_tools_brief} - +{reference_block}{vocab_block} TOOLS TO ANNOTATE (this batch): {batch_detailed} @@ -243,6 +252,51 @@ class ToolEnrichment: - Return JSON only. No markdown fences, no prose, no comments.""" +# Pass 2b — cross-batch workflow pairing. +# +# Per-tool enrichment (Pass 2a) only sees one batch at a time, so it cannot +# spot pairs whose other half lives in a different batch. This prompt shows +# the entire collection's 1-line summaries so the LLM can suggest workflow +# successors that span resources. +# +# The output is batched (subset of tools per call) to stay within the +# response token budget — input stays full, output stays small. +_PAIRS_PROMPT = """\ +You are reviewing an API tool collection to suggest workflow pairs. + +For EACH tool in the OUTPUT BATCH, suggest 2-4 OTHER tools from the FULL +TOOL LIST that are commonly invoked just before or just after this tool in +a real-world workflow. Pairs SHOULD cross resource boundaries when there is +a natural business sequence (e.g. product detail → add to cart → checkout). + +Pair quality matters more than quantity — only suggest tools you are +confident about. If a tool has no good pair candidates, return an empty +array for it. + +FULL TOOL LIST (all available tools — pick pairs only from this list): +{full_list} + +OUTPUT BATCH (suggest pairs ONLY for these tools): +{batch_list} + +OUTPUT FORMAT (strict JSON): +{{ + "tool_name_1": [ + {{"tool": "other_tool_name", "reason": "short reason"}}, + ... + ], + "tool_name_2": [...], + ... +}} + +STRICT RULES: + - You MUST include one entry for EVERY tool in the OUTPUT BATCH (use + empty array if no good pairs). + - Pair tool names MUST exactly match a name in the FULL TOOL LIST. + - Do NOT pair a tool with itself. + - Return JSON only. No markdown fences, no prose, no comments.""" + + def _format_tools_list(tools: list[ToolSummary]) -> str: lines = [] for i, t in enumerate(tools, 1): @@ -261,6 +315,22 @@ def _format_tools_brief(tools: list[ToolSummary]) -> str: return "\n".join(f"- {t.name}" for t in tools) +def _format_tools_for_pairs(tools: list[ToolSummary]) -> str: + """Compact ``name: 1-line summary`` block for Pass 2b prompts. + + Uses ``description`` (mapped from ai_metadata.one_line_summary by the + caller for tools that have been Pass 2a annotated) so the LLM can pair + based on workflow meaning, not just tool names. + """ + lines = [] + for t in tools: + summary = (t.description or "").strip().replace("\n", " ") + if len(summary) > 100: + summary = summary[:97] + "..." + lines.append(f"- {t.name}: {summary}" if summary else f"- {t.name}") + return "\n".join(lines) + + def _format_tools_for_enrichment(tools: list[ToolSummary]) -> str: """Detailed per-tool block for enrichment prompt input.""" blocks = [] @@ -307,10 +377,15 @@ def _parse_enrichment(data: Any) -> ToolEnrichment | None: kind=kind, ) ) + # Pairs from per-tool enrichment are batch-scoped (LLM only sees the + # current batch), so quality is lower than cross-batch Pass 2b. + # Marked source="auto" so a Pass 2b run can replace them while + # preserving operator-curated source="manual" entries. pairs = [ PairHint( tool=str(p.get("tool", "")).strip(), reason=str(p.get("reason", "")).strip(), + source="auto", ) for p in (data.get("pairs_well_with") or []) if isinstance(p, dict) and str(p.get("tool", "")).strip() @@ -624,45 +699,129 @@ def generate_example_queries( return all_queries + def enrich_pairs( + self, + tools: list[ToolSummary], + batch_size: int = 30, + ) -> dict[str, list[PairHint]]: + """Pass 2b — cross-batch workflow pair suggestion. + + Unlike Pass 2a (``enrich_tool_semantics``) which sees only the + current batch, this pass shows the LLM the full collection's 1-line + summaries so it can suggest pairs that cross resource boundaries + (e.g. ``getProductDetail → addToCart`` even when the two tools live + in different swagger sources). + + Output is batched only on the OUTPUT axis: input list stays full + for every call, output covers ``batch_size`` tools per call. This + keeps the prompt short and avoids the 8k-token output limit + truncating long pair lists. + + Tools should arrive with ``description`` set to ai_metadata + ``one_line_summary`` when available (Pass 2a output) so pairing can + rely on workflow meaning, not just tool names. + + Returns: {tool_name: [PairHint(source="auto"), ...]} + """ + results: dict[str, list[PairHint]] = {} + if not tools: + return results + + full_list = _format_tools_for_pairs(tools) + + for i in range(0, len(tools), batch_size): + batch = tools[i : i + batch_size] + batch_list = _format_tools_for_pairs(batch) + prompt = _PAIRS_PROMPT.format(full_list=full_list, batch_list=batch_list) + response = self.generate(prompt) + + try: + parsed = _extract_json(response) + if not isinstance(parsed, dict): + continue + for name, raw_pairs in parsed.items(): + if not isinstance(raw_pairs, list): + continue + pair_list: list[PairHint] = [] + for p in raw_pairs: + if not isinstance(p, dict): + continue + target = str(p.get("tool", "")).strip() + if not target or target == name: + continue + pair_list.append(PairHint( + tool=target, + reason=str(p.get("reason", "")).strip(), + source="auto", + )) + results[str(name)] = pair_list + except (json.JSONDecodeError, KeyError, TypeError): + continue + + return results + def enrich_tool_semantics( self, tools: list[ToolSummary], batch_size: int = 10, *, reference_tools: list[ToolSummary] | None = None, + existing_vocab: list[str] | None = None, + valid_tool_names: set[str] | None = None, ) -> dict[str, ToolEnrichment]: """Per-tool semantic annotation for Plan-and-Execute architecture. - ``tools`` = the batch (or batches) of tools to produce detailed - enrichment for. ``reference_tools`` = the full catalog used only to - build ``all_tools_brief`` in the prompt (so LLM picks - ``pairs_well_with`` from valid names). If ``reference_tools`` is - None, falls back to ``tools``. - - Streaming callers typically pass one batch in ``tools`` + the full - collection in ``reference_tools`` + ``batch_size=len(tools)`` so the - internal loop runs once per caller invocation. - - Output is used by: - - Stage 1 (target selection) — ``one_line_summary`` + ``when_to_use`` - in tool catalog make LLM picks more accurate with smaller context. - - Stage 2 (path synthesis) — ``produces_semantics`` / - ``consumes_semantics`` carry canonical semantic ids so bindings - work across convention mismatches (e.g. ``goodsNo`` ≡ ``productId``) - without a hardcoded synonym table. - - Graph edges — ``pairs_well_with`` becomes optional semantic edges - that complement structural field-match edges. + ``tools`` = the batch(es) to produce detailed enrichment for. + + ``reference_tools`` (optional, default ``None``) — when supplied, + rendered as a brief tool list in the prompt so the LLM can pick + ``pairs_well_with`` from valid names. **Streaming callers should + usually pass ``None``** — Pass 2b handles pairs in a separate + cross-batch call, and skipping the reference block saves ~50% + prompt tokens. The pair list emitted in this pass is post-validated + against ``valid_tool_names`` instead. + + ``existing_vocab`` (optional) — accumulated semantic ids decided in + previous batches of the same enrichment run. The LLM is asked to + reuse these labels when applicable, which keeps cross-batch vocab + consistent (avoids ``product_id`` vs ``productId`` divergence). + Streaming callers should pass the unique semantics seen so far. + + ``valid_tool_names`` (optional) — full set of tool names in the + collection. When supplied, ``pairs_well_with`` entries pointing to + tools outside this set are dropped silently (LLM hallucination + guard). When ``reference_tools`` is None the LLM only knows the + names in the current batch; without this guard it would invent + names for cross-batch pairs. """ results: dict[str, ToolEnrichment] = {} if not tools: return results - all_brief = _format_tools_brief(reference_tools or tools) + ref_block = "" + if reference_tools: + ref_block = ( + "\nAVAILABLE TOOLS IN THE COLLECTION (names + 1-line " + "descriptions, for pairs_well_with reference):\n" + + _format_tools_brief(reference_tools) + + "\n" + ) + + vocab_block = "" + if existing_vocab: + vocab_block = ( + "\nEXISTING SEMANTIC VOCABULARY (reuse these canonical ids " + "when the field has the same meaning — keeps cross-batch " + "labels consistent):\n" + + "\n".join(f"- {s}" for s in sorted(set(existing_vocab))) + + "\n" + ) for i in range(0, len(tools), batch_size): batch = tools[i : i + batch_size] prompt = _ENRICH_SEMANTICS_PROMPT.format( - all_tools_brief=all_brief, + reference_block=ref_block, + vocab_block=vocab_block, batch_detailed=_format_tools_for_enrichment(batch), ) response = self.generate(prompt) @@ -673,8 +832,16 @@ def enrich_tool_semantics( continue for name, data in parsed.items(): enrichment = _parse_enrichment(data) - if enrichment is not None and enrichment.canonical_action: - results[str(name)] = enrichment + if enrichment is None or not enrichment.canonical_action: + continue + # Hallucination guard for pairs_well_with — drop entries + # whose target name is not in the catalog. + if valid_tool_names is not None: + enrichment.pairs_well_with = [ + p for p in enrichment.pairs_well_with + if p.tool in valid_tool_names and p.tool != str(name) + ] + results[str(name)] = enrichment except (json.JSONDecodeError, KeyError, TypeError): continue diff --git a/graph_tool_call/ontology/schema.py b/graph_tool_call/ontology/schema.py index 04086fb..2a67290 100644 --- a/graph_tool_call/ontology/schema.py +++ b/graph_tool_call/ontology/schema.py @@ -24,6 +24,26 @@ class NodeType(str, Enum): DOMAIN = "domain" +class Confidence(str, Enum): + """Edge confidence label, graphify-style. + + Every edge in a graphify-style ToolGraph carries one of three labels so + downstream consumers (LLM agents, retrieval scoring, UI) can distinguish + deterministic facts from heuristic guesses. + + EXTRACTED — derived deterministically from the spec (path hierarchy, + shared $ref, CRUD pattern). conf_score >= 0.85 AND layer == 1. + INFERRED — heuristic match (name-based, RPC pattern, cross-resource). + conf_score >= 0.85 but not strictly structural. + AMBIGUOUS — low-confidence heuristic (0.70 <= conf_score < 0.85). + Surface in UI for review; retrieval applies a score penalty. + """ + + EXTRACTED = "EXTRACTED" + INFERRED = "INFERRED" + AMBIGUOUS = "AMBIGUOUS" + + # Weights for relation types during retrieval scoring DEFAULT_RELATION_WEIGHTS: dict[str, float] = { RelationType.SIMILAR_TO: 0.8, diff --git a/graph_tool_call/plan/synthesizer.py b/graph_tool_call/plan/synthesizer.py index 44696cf..35858c4 100644 --- a/graph_tool_call/plan/synthesizer.py +++ b/graph_tool_call/plan/synthesizer.py @@ -90,6 +90,46 @@ def __init__( self.label_field_hints = list(label_field_hints) +def _normalize_field_name(name: str) -> str: + """Lowercase + strip separators for loose field-name matching. + + Conservative on purpose: + ``ordNo`` → ``ordno`` + ``ord_no`` → ``ordno`` + ``ORD-NO`` → ``ordno`` + BUT keeps token roots distinct: + ``ordNo`` ≠ ``orderNo`` (``ordno`` ≠ ``orderno``) + Token-level synonym mapping (``ord`` ↔ ``order``) is domain-specific + and not done here — the graph-edge fallback handles those cases. + """ + if not name: + return "" + out: list[str] = [] + for ch in name: + if ch.isalnum(): + out.append(ch.lower()) + return "".join(out) + + +def _normalize_field_name(name: str) -> str: + """Lowercase + strip non-alphanumerics for loose field-name matching. + + Conservative on purpose: + ``ordNo`` → ``ordno`` ``ord_no`` → ``ordno`` ``ORD-NO`` → ``ordno`` + + Token roots stay distinct: + ``ordNo`` ≠ ``orderNo`` (``ordno`` ≠ ``orderno``) + + Token-level synonym mapping (``ord`` ↔ ``order``) is domain-specific + and intentionally NOT done here — that's the job of the graph-edge + fallback in ``_find_producer``, which uses path/$ref/CRUD signals + instead of name guessing. + """ + if not name: + return "" + return "".join(ch.lower() for ch in name if ch.isalnum()) + + @dataclass class _PartialStep: """In-progress step being built during bottom-up synthesis.""" @@ -140,6 +180,18 @@ def __init__( # semantic_tag -> [tool_name], insertion order preserved self._producers_by_semantic: dict[str, list[str]] = {} self._producers_by_field: dict[str, list[str]] = {} + # Loose-field index: normalised field name → [tool_name]. + # Lets ``ordNo`` match producers of ``ordno`` / ``ord_no`` / ``ORDNO``. + # Conservative — only normalises case + separators, never strips + # tokens (so ``ordNo`` ≠ ``orderNo`` — those need the graph fallback). + self._producers_by_loose_field: dict[str, list[str]] = {} + # graphify-mode adjacency: ``tool_name -> [edge_dict]`` for outgoing + # workflow edges (REQUIRES / PRECEDES / COMPLEMENTARY). Used as a + # fallback in ``_find_producer`` when neither semantic_tag nor + # field_name match — we walk the graph the user/extractor built + # rather than failing on field-name divergence. + self._workflow_edges_out: dict[str, list[dict[str, Any]]] = {} + self._index_workflow_edges(graph) self._build_producer_indexes() # ------------------------------------------------------------------ @@ -197,6 +249,23 @@ def synthesize( )) target_step_id = steps_by_tool[target].step_id + + # Collect user_input slots so the runner can prompt the caller in + # advance and the UI can render a single popup with all missing + # fields, instead of one popup per step. Each entry: which step + # needs which field, and (when known) the original semantic_tag + # so frontend can show the same enum/popup the operator + # registered for that field. + user_input_slots: list[dict[str, Any]] = [] + for step in final_steps: + for arg_name, arg_val in (step.args or {}).items(): + if isinstance(arg_val, str) and arg_val.startswith("${user_input."): + user_input_slots.append({ + "step_id": step.id, + "tool": step.tool, + "field_name": arg_name, + }) + return Plan( id=str(uuid.uuid4()), goal=goal or f"Execute {target}", @@ -210,6 +279,7 @@ def synthesize( "target": target, "entities": dict(entities), "synthesized_by": "PathSynthesizer/v1", + "user_input_slots": user_input_slots, }, ) @@ -297,15 +367,26 @@ def _resolve( ) # 5. Required data field → rank candidate producers and pick the best. + # Pass ``visiting`` as ``excluded`` so cycle-prone candidates are + # skipped here (Cycle policy A). The chain reroutes around the + # cycle when an alternative producer exists; only when none + # remains does the caller fall through to user-input slot (F2). producer = self._find_producer( semantic=semantic, field_name=field_name, target_tool=tool_name, entities=entities, + excluded=visiting, ) if producer is None: - raise UnsatisfiableFieldError( - f"tool {tool_name!r} requires {field_name!r} " - f"(semantic={semantic!r}) but no entity or producer found" - ) + # F2 + Cycle policy B: gracefully surface the field as a + # ``${user_input.}`` placeholder rather than aborting + # the entire plan. The runner detects the placeholder at + # step-start and asks the user (or its surrounding agent) + # to supply the value. The plan's metadata records every + # such slot so the caller can pre-collect inputs. + placeholder = f"${{user_input.{field_name}}}" + args[field_name] = placeholder + rationales.append(f"{field_name} ← user_input") + continue # 5a. Dynamic-option popup priority. Detect "read-detail then # pick one" patterns where the producer is a single-hop @@ -342,14 +423,27 @@ def _resolve( label_field_hints=self._label_hints_for(producer, opt_path), ) - # Recurse into the producer first so step_id ordering is correct - self._resolve( - tool_name=producer, - entities=entities, - steps_by_tool=steps_by_tool, - visiting=visiting, - depth=depth + 1, - ) + # Recurse into the producer first so step_id ordering is correct. + # Cycle policy B + F2: if the producer's own chain is too deep + # or cycles back, we don't abort the whole plan — we drop this + # producer and fall back to a user_input slot for the field. + # This keeps the surface tool callable when the prerequisite + # chain extends beyond what the synthesiser can flatten. + try: + self._resolve( + tool_name=producer, + entities=entities, + steps_by_tool=steps_by_tool, + visiting=visiting, + depth=depth + 1, + ) + except (MaxDepthExceededError, CyclicDependencyError) as exc: + placeholder = f"${{user_input.{field_name}}}" + args[field_name] = placeholder + rationales.append( + f"{field_name} ← user_input (chain unflattenable: {exc.__class__.__name__})" + ) + continue # Build a placeholder binding — will be rewritten after step_ids # are assigned. Format: ${.} @@ -370,16 +464,107 @@ def _resolve( # ------------------------------------------------------------------ def _build_producer_indexes(self) -> None: - """Index which tools produce which semantic / field across graph.""" + """Index which tools produce which semantic / field across the graph. + + Echo-back filter: a tool that takes ``ordNo`` as input and echoes it + back in its response is NOT a producer of ``ordNo`` in any useful + sense — it's just relaying the value the caller already supplied. We + skip those entries so the index reflects tools that actually CREATE + or DISCOVER the value (``listOrders``, ``createOrder``, + ``searchOrders`` etc.) rather than every endpoint that happens to + round-trip the field. + + Same rule applied to ``semantic_tag`` for parity with the LLM Pass 2 + enrichment path. Empty consumes (no input fields) → never echo, so + all produces are real producers. + """ for name, tool in self._tools.items(): meta = tool.get("metadata") or {} + consumed_fields: set[str] = set() + consumed_semantics: set[str] = set() + for c in meta.get("consumes") or []: + if not isinstance(c, dict): + continue + cf = c.get("field_name") or "" + cs = c.get("semantic_tag") or "" + if cf: + consumed_fields.add(cf) + if cs: + consumed_semantics.add(cs) + for produce in meta.get("produces") or []: sem = produce.get("semantic_tag") or "" fname = produce.get("field_name") or "" + # Skip pure echo-back: the field came in, gets relayed out. + if fname and fname in consumed_fields: + continue + if sem and sem in consumed_semantics: + continue if sem: self._producers_by_semantic.setdefault(sem, []).append(name) if fname: self._producers_by_field.setdefault(fname, []).append(name) + loose = _normalize_field_name(fname) + if loose and loose != fname: + self._producers_by_loose_field.setdefault(loose, []).append(name) + + # ---- graphify edge indexing & traversal --------------------------------- + + _WORKFLOW_RELATIONS: frozenset[str] = frozenset( + {"requires", "precedes", "complementary"} + ) + _CONFIDENCE_RANK: dict[str, int] = { + "EXTRACTED": 0, + "INFERRED": 1, + "AMBIGUOUS": 2, + } + + def _index_workflow_edges(self, graph: dict[str, Any]) -> None: + """Bucket the graphify graph's outgoing workflow edges by source tool. + + Accepts the same graph dict the rest of the class consumes — looks + for ``graph.graph.edges`` (DictGraph.to_dict() output) or the + legacy NetworkX-style ``graph.graph.links`` if present. Edges + without a confidence label are kept (treated as fallback) so this + also works on graphs built before the graphify ingest landed. + """ + graph_inner = graph.get("graph") or {} + edges = graph_inner.get("edges") or graph_inner.get("links") or [] + for e in edges: + if not isinstance(e, dict): + continue + src = e.get("source") or e.get("from") + tgt = e.get("target") or e.get("to") + rel = e.get("relation") + rel_str = ( + rel.value if hasattr(rel, "value") + else str(rel) if rel is not None else "" + ).lower() + if not src or not tgt or rel_str not in self._WORKFLOW_RELATIONS: + continue + self._workflow_edges_out.setdefault(src, []).append({ + "target": tgt, + "relation": rel_str, + "confidence": e.get("confidence"), + "conf_score": float(e.get("conf_score") or 0.0), + "evidence": e.get("evidence") or "", + }) + + # Producer-signal score weights. Higher = stronger signal that this + # candidate genuinely produces the value the target needs. Weights chosen + # so combined signals (e.g. graph EXTRACTED + field exact = 90) beat any + # single signal, and graph EXTRACTED alone (50) beats field exact alone + # (40) — Path/$ref/CRUD-derived edges are more reliable than coincidental + # field-name overlap. ``semantic_exact`` requires LLM Pass 2 enrichment; + # when present it's the strongest signal we have. + _SIGNAL_WEIGHTS: dict[str, int] = { + "semantic_exact": 100, + "graph_EXTRACTED": 50, + "field_exact": 40, + "graph_INFERRED": 20, + "field_loose": 10, + "graph_AMBIGUOUS": 5, + } def _find_producer( self, @@ -388,36 +573,133 @@ def _find_producer( field_name: str, target_tool: str, entities: dict[str, Any], + excluded: set[str] | None = None, ) -> str | None: - """Pick the best-ranked producer for ``semantic`` (or ``field_name``). - - Candidates are gathered from both indexes (semantic first), then - ranked using Pass 2 metadata (``_rank_producers``) and finally - filtered by ``_is_chain_eligible`` — discards producers whose - ``canonical_action`` / ``primary_resource`` signal they're - unrelated to the target's domain (e.g. claim-cost calculator - showing up as a producer for a basket field just because a - ``produces`` entry happens to match). + """Pick the best producer using combined graph + schema signals. + + Producer matching is treated as the intersection of two first-class + signals (NOT a fallback chain): + (a) Schema match — semantic_tag / field_name on ``produces``. + (b) Graph traversal — outgoing REQUIRES / PRECEDES / COMPLEMENTARY + edges from ``target_tool``, ranked by ``confidence``. + + A candidate accumulates one entry per matching signal. The signal + weights live in ``_SIGNAL_WEIGHTS`` and combine additively, so a + candidate matched by both graph EXTRACTED and field_exact (90) wins + over one matched only by field_exact (40). Tie-break uses the + existing Pass-2 ``_rank_producers`` (entity affinity, pair hint, + canonical action), and ``_is_chain_eligible`` still gates the final + pick — sparse Pass-2 metadata pass-throughs apply unchanged. + + ``excluded`` is the set of tools currently being resolved (the + caller's ``visiting`` set). Producer candidates in this set would + re-enter recursion and trigger ``CyclicDependencyError`` — we skip + them here so the second-best candidate gets a chance instead. This + is the "skip-this-branch" cycle policy: the chain reroutes around + the cycle when alternative producers exist; only when all candidates + cycle does the caller fall back to user-input slot handling. + + Returns the highest-scoring eligible candidate, or None if no + candidate has any signal (or all signals point to ``excluded`` tools). """ - candidates: list[str] = [] - seen: set[str] = set() + excluded = excluded or set() + candidate_signals: dict[str, set[str]] = {} + + def _record(name: str, signal: str) -> None: + if name and name != target_tool: + candidate_signals.setdefault(name, set()).add(signal) + + # (a) schema-side: exact semantic / field_name (echo-back already + # filtered when the index was built). if semantic: - for name in self._producers_by_semantic.get(semantic, []): - if name != target_tool and name not in seen: - candidates.append(name) - seen.add(name) + for n in self._producers_by_semantic.get(semantic, []): + _record(n, "semantic_exact") if field_name: - for name in self._producers_by_field.get(field_name, []): - if name != target_tool and name not in seen: - candidates.append(name) - seen.add(name) - if not candidates: + for n in self._producers_by_field.get(field_name, []): + _record(n, "field_exact") + + # (a') schema-side: loose field match — separator/case folded. + # ``ordNo`` won't match ``orderNo`` (different roots) but will match + # ``ord_no`` / ``ORDNO``. Cross-naming-convention safety net. + if field_name: + loose = _normalize_field_name(field_name) + if loose: + for n in self._producers_by_loose_field.get(loose, []): + if n in candidate_signals: + continue # already had a stronger signal + _record(n, "field_loose") + + # (b) graph-side: walk outgoing workflow edges, verify each + # candidate actually has a matching produces entry. + edges = self._workflow_edges_out.get(target_tool) or [] + loose_target = _normalize_field_name(field_name) if field_name else "" + for e in edges: + cand = e.get("target") + if not cand or cand == target_tool: + continue + tool = self._tools.get(cand) + if not tool: + continue + cand_consumes_fields = { + (c or {}).get("field_name", "") + for c in (tool.get("metadata") or {}).get("consumes") or [] + if isinstance(c, dict) + } + cand_consumes_semantics = { + (c or {}).get("semantic_tag", "") + for c in (tool.get("metadata") or {}).get("consumes") or [] + if isinstance(c, dict) + } + for p in (tool.get("metadata") or {}).get("produces") or []: + if not isinstance(p, dict): + continue + p_sem = p.get("semantic_tag") or "" + p_fname = p.get("field_name") or "" + # Echo-back guard for the candidate itself — same rule as + # _build_producer_indexes, applied here so graph-edge + # discoveries don't sneak in a relayed value. + if p_fname and p_fname in cand_consumes_fields: + continue + if p_sem and p_sem in cand_consumes_semantics: + continue + + matched = False + if semantic and p_sem == semantic: + matched = True + elif field_name and p_fname == field_name: + matched = True + elif loose_target and _normalize_field_name(p_fname) == loose_target: + matched = True + if not matched: + continue + + conf = e.get("confidence") or "AMBIGUOUS" + _record(cand, f"graph_{conf}") + break # one signal per candidate per edge target is enough + + if not candidate_signals: return None + # Score and pre-rank by signal strength (stable for equal scores). + def _score(signals: set[str]) -> int: + return sum(self._SIGNAL_WEIGHTS.get(s, 0) for s in signals) + + scored = sorted( + candidate_signals.items(), + key=lambda item: (-_score(item[1]), item[0]), + ) + sorted_names = [n for n, _ in scored] + + # Pass 2 / chain-eligibility gate — pass-through when ai_metadata + # is sparse, identical behaviour to the previous implementation. + # Cycle filter: skip candidates currently in the resolution stack so + # the synthesiser reroutes around the cycle instead of raising. ranked = self._rank_producers( - candidates, target_tool=target_tool, entities=entities, + sorted_names, target_tool=target_tool, entities=entities, ) for cand in ranked: + if cand in excluded: + continue if self._is_chain_eligible(cand, target_tool=target_tool): return cand return None diff --git a/graph_tool_call/tool_graph.py b/graph_tool_call/tool_graph.py index 28839ed..00c2353 100644 --- a/graph_tool_call/tool_graph.py +++ b/graph_tool_call/tool_graph.py @@ -16,7 +16,7 @@ from graph_tool_call.core.protocol import GraphEngine from graph_tool_call.core.tool import ToolSchema, normalize_tool, parse_tool from graph_tool_call.ontology.builder import OntologyBuilder -from graph_tool_call.ontology.schema import RelationType +from graph_tool_call.ontology.schema import Confidence, RelationType def _encode_spec_url(base: str, raw_url: str) -> str: @@ -488,9 +488,27 @@ def add_relation( target: str, relation: str | RelationType, weight: float = 1.0, + *, + confidence: str | Confidence | None = None, + conf_score: float | None = None, + layer: int | None = None, + evidence: str | None = None, ) -> None: - """Add a relation between two tools.""" - self._builder.add_relation(source, target, relation, weight) + """Add a relation between two tools. + + Optional graphify-style attrs are forwarded to ``OntologyBuilder``; + see ``OntologyBuilder.add_relation`` for semantics. + """ + self._builder.add_relation( + source, + target, + relation, + weight, + confidence=confidence, + conf_score=conf_score, + layer=layer, + evidence=evidence, + ) self._invalidate_retrieval() def add_domain(self, domain: str, description: str = "") -> None: diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 0000000..ff68b3a --- /dev/null +++ b/scripts/__init__.py @@ -0,0 +1,5 @@ +"""Internal scripts package — referenced by tests/test_release_script.py. + +Empty marker so Python treats ``scripts/`` as an importable package. +Not included in the published wheel (see ``pyproject.toml`` ``packages``). +""" From eb101e594e530195c9017dbede000ba6589adba9 Mon Sep 17 00:00:00 2001 From: daehee <1998opening@gmail.com> Date: Sun, 3 May 2026 07:58:27 +0900 Subject: [PATCH 10/14] =?UTF-8?q?fix:=20ruff=20lint=20=ED=86=B5=EA=B3=BC?= =?UTF-8?q?=20+=20=EC=83=88=20docs/examples=20+=20io=5Fcontract=20?= =?UTF-8?q?=EB=AA=A8=EB=93=88=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit lint: - ruff check . 23건 수정 (E501/F401/F841/F402/N806/N818/I001/UP035) - ruff format . 전체 적용 - examples/xgen_workflow_gateway.py 구문 오류 수정 (lambda **kwargs 위치) - tests/test_dependency.py 중복 정의 제거 - tests/test_gateway_*.py 옵셔널 import 에 # noqa: E402 신규: - graph_tool_call/ingest/io_contract.py — extract_leaves (xgen-workflow 의존) - docs/* — api-reference / benchmarks / cli / integrations / roadmap - benchmarks/results/models/{README,bonsai-8b-q1_0}.md - .pre-commit-config.yaml - examples/test_bonsai_tool_calling.py 기타: - .gitignore — benchmarks/results/benchmark_*.json (자동 출력 무시) --- .gitignore | 3 + .pre-commit-config.yaml | 21 + benchmarks/results/models/README.md | 37 ++ benchmarks/results/models/bonsai-8b-q1_0.md | 147 +++++++ benchmarks/run_competitive.py | 34 +- docs/api-reference.md | 145 +++++++ docs/benchmarks.md | 174 +++++++++ docs/cli.md | 137 +++++++ docs/integrations/direct-api.md | 125 ++++++ docs/integrations/langchain.md | 111 ++++++ docs/integrations/mcp-proxy.md | 117 ++++++ docs/integrations/mcp-server.md | 100 +++++ docs/integrations/middleware.md | 65 ++++ docs/roadmap.md | 298 ++++++++++++++ examples/test_bonsai_tool_calling.py | 405 ++++++++++++++++++++ examples/xgen_workflow_agent.py | 98 +++-- examples/xgen_workflow_gateway.py | 33 +- graph_tool_call/analyze/dependency.py | 191 +++++---- graph_tool_call/graphify/__init__.py | 1 + graph_tool_call/graphify/ingest.py | 17 +- graph_tool_call/graphify/retrieval.py | 19 +- graph_tool_call/ingest/io_contract.py | 4 +- graph_tool_call/ingest/openapi.py | 2 +- graph_tool_call/langchain/agent.py | 17 +- graph_tool_call/langchain/gateway.py | 8 +- graph_tool_call/mcp_proxy.py | 3 +- graph_tool_call/net.py | 8 +- graph_tool_call/ontology/llm_provider.py | 35 +- graph_tool_call/plan/__init__.py | 36 +- graph_tool_call/plan/binding.py | 10 +- graph_tool_call/plan/intent.py | 29 +- graph_tool_call/plan/response.py | 1 - graph_tool_call/plan/runner.py | 53 +-- graph_tool_call/plan/schema.py | 22 +- graph_tool_call/plan/synthesizer.py | 83 ++-- graph_tool_call/retrieval/engine.py | 20 +- graph_tool_call/retrieval/graph_search.py | 64 +++- graph_tool_call/serialization.py | 5 +- graph_tool_call/tool_graph.py | 33 +- graph_tool_call/workflow.py | 85 ++-- tests/test_dependency.py | 10 - tests/test_gateway_e2e.py | 120 +++++- tests/test_gateway_token_saving.py | 130 ++++++- tests/test_gateway_xgen_workflow.py | 272 ++++++++++--- tests/test_langchain_agent.py | 5 +- tests/test_langchain_compatibility.py | 99 ++--- tests/test_langchain_gateway.py | 50 ++- tests/test_langchain_toolkit.py | 13 +- 48 files changed, 2939 insertions(+), 556 deletions(-) create mode 100644 .pre-commit-config.yaml create mode 100644 benchmarks/results/models/README.md create mode 100644 benchmarks/results/models/bonsai-8b-q1_0.md create mode 100644 docs/api-reference.md create mode 100644 docs/benchmarks.md create mode 100644 docs/cli.md create mode 100644 docs/integrations/direct-api.md create mode 100644 docs/integrations/langchain.md create mode 100644 docs/integrations/mcp-proxy.md create mode 100644 docs/integrations/mcp-server.md create mode 100644 docs/integrations/middleware.md create mode 100644 docs/roadmap.md create mode 100644 examples/test_bonsai_tool_calling.py diff --git a/.gitignore b/.gitignore index 4188784..670b4ff 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,6 @@ benchmarks/results/ # Personal memo memo/ + +# Benchmark output (timestamped, auto-generated) +benchmarks/results/benchmark_*.json diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..b086ba0 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,21 @@ +# Pre-commit hooks for graph-tool-call +# +# Install once per clone: +# pip install pre-commit +# pre-commit install +# +# Run manually on all files: +# pre-commit run --all-files +# +# These hooks mirror the CI lint job (.github/workflows/ci.yml). +# If they fail locally, CI will also fail — fix before committing. + +repos: + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.15.4 + hooks: + - id: ruff + name: ruff check + args: [--fix] + - id: ruff-format + name: ruff format diff --git a/benchmarks/results/models/README.md b/benchmarks/results/models/README.md new file mode 100644 index 0000000..21e00e6 --- /dev/null +++ b/benchmarks/results/models/README.md @@ -0,0 +1,37 @@ +# Model Benchmark Results + +graph-tool-call의 도구 검색 + LLM tool calling end-to-end 벤치마크 결과. + +## How to Run + +```bash +# Retrieval-only (LLM 불필요) +python -m benchmarks.run_benchmark --mode retrieval -v + +# E2E with Ollama +python -m benchmarks.run_benchmark --mode e2e -m qwen3:4b -v --save + +# E2E with OpenAI-compatible server (llama.cpp, vLLM 등) +python -m benchmarks.run_benchmark --mode e2e -m "Bonsai-8B.gguf" \ + --ollama-url "http://localhost:8080/v1" -v --save +``` + +## Model Comparison + +| Model | Size | Quant | Petstore (19t) | Mixed MCP (38t) | Retrieve Boost | +|-------|-----:|-------|-------:|--------:|------:| +| [Bonsai-8B](bonsai-8b-q1_0.md) | 1.1 GB | Q1_0 (1-bit) | BL 65% / RT 57% | BL 0% / RT 73% | **0% → 73%** | + +> **BL** = Baseline (all tools), **RT** = Retrieve (top-5 filtered) + +## Key Findings + +1. **소형 모델일수록 graph-tool-call 효과가 크다** — 도구 수가 context 한계를 넘으면 baseline이 0%로 무너지지만, retrieval 필터링으로 복구 가능 +2. **도구 20개 이하**에서는 baseline과 retrieve 차이가 작거나 역전될 수 있음 +3. **도구 30개 이상**에서는 retrieve 파이프라인이 필수적 + +## Adding a New Model + +1. 벤치마크 실행 후 JSON 결과 확인 (`benchmarks/results/`) +2. `models/` 디렉토리에 `{model-name}.md` 작성 (기존 문서 포맷 참고) +3. 이 README의 Model Comparison 테이블에 행 추가 diff --git a/benchmarks/results/models/bonsai-8b-q1_0.md b/benchmarks/results/models/bonsai-8b-q1_0.md new file mode 100644 index 0000000..2a83a30 --- /dev/null +++ b/benchmarks/results/models/bonsai-8b-q1_0.md @@ -0,0 +1,147 @@ +# Bonsai-8B (1-bit Q1_0) + +> Tested: 2026-04-07 | Runtime: llama.cpp | API: OpenAI-compatible (`localhost:8080`) + +## Model Spec + +| Item | Value | +|------|-------| +| Parameters | 8B | +| Quantization | Q1_0 (1-bit) | +| File Size | 1.1 GB | +| Memory Usage | ~1.7 GB | +| Prompt Speed | 58 tok/s | +| Generation Speed | 31 tok/s | +| Context Window | 8192 (default) | + +## Results Summary + +| Dataset | Tools | Queries | Baseline Acc | Retrieve Acc | Delta | Token Reduction | Recall@5 | p-value | +|---------|------:|--------:|-------------:|-------------:|------:|----------------:|---------:|--------:| +| Petstore 3.0 | 19 | 23 | 65.2% | 56.5% | -8.7% | 68.4% | 98.6% | 0.3283 (ns) | +| Mixed MCP | 38 | 30 | 0.0% | 73.3% | **+73.3%** | N/A | 93.3% | <0.0001 (***) | + +## Petstore 3.0 (19 tools, 23 queries) + +### Metrics + +| Metric | Baseline | Retrieve (top-5) | +|--------|---------|-----------------| +| Tool Accuracy | 65.2% (15/23) | 56.5% (13/23) | +| Avg Input Tokens | 1,875 | 601 | +| Token Reduction | — | 68.4% | +| Token Efficiency | 0.35 | 0.95 | +| Avg Latency | 1,897 ms | 3,877 ms | + +### Per-Query Results + +| # | Query | Difficulty | Baseline | Retrieve | Notes | +|---|-------|-----------|----------|----------|-------| +| 1 | Find all available pets | easy | findPetsByStatus | findPetsByStatus | | +| 2 | Add a new dog to the pet store | easy | addPet | **None** | retrieve: tool call 미생성 | +| 3 | Get pet with ID 42 | easy | getPetById | getPetById | | +| 4 | Update the name of my pet | medium | **None** | **None** | 양쪽 실패 | +| 5 | Delete pet number 7 | easy | deletePet | deletePet | | +| 6 | Search pets by their tags | medium | findPetsByTags | findPetsByTags | | +| 7 | Upload a photo of my pet | medium | **None** | **None** | 양쪽 실패 | +| 8 | Check the store inventory | easy | getInventory | getInventory | | +| 9 | Place an order to buy a pet | easy | **None** | **None** | 양쪽 실패 | +| 10 | Look up order number 5 | easy | getOrderById | getOrderById | | +| 11 | Cancel my order | easy | **None** | **None** | 양쪽 실패 | +| 12 | Create a new user account | easy | createUser | **None** | retrieve: tool call 미생성 | +| 13 | Sign in with username and password | easy | loginUser | loginUser | | +| 14 | Log out of my account | easy | logoutUser | logoutUser | | +| 15 | View user profile for john123 | easy | getUserByName | getUserByName | | +| 16 | Change user email address | medium | **None** | **None** | 양쪽 실패 | +| 17 | Remove user john123 | easy | deleteUser | deleteUser | | +| 18 | Create multiple user accounts at once | medium | **None** | createUsersWithListInput | retrieve만 성공 | +| 19 | Show me sold pets | easy | findPetsByStatus | findPetsByStatus | | +| 20 | Adopt a pet (workflow) | hard | **None** | **None** | 양쪽 실패 | +| 21 | Update pet using form data | hard | updatePetWithForm | updatePetWithForm | | +| 22 | What pets are in the store? | medium | **None** | **None** | 양쪽 실패 | +| 23 | Remove a pet listing and delete order | hard | deletePet | **None** | retrieve: tool call 미생성 | + +## Mixed MCP Servers (38 tools, 30 queries) + +### Metrics + +| Metric | Baseline | Retrieve (top-5) | +|--------|---------|-----------------| +| Tool Accuracy | 0.0% (0/30) | 73.3% (22/30) | +| Avg Input Tokens | N/A (all failed) | 682 | +| Avg Latency | N/A | 4,509 ms | +| Token Efficiency | 0.00 | 1.06 | + +### Per-Query Results + +| # | Query | Difficulty | Retrieve | Notes | +|---|-------|-----------|----------|-------| +| 1 | Read the contents of config.yaml | easy | read_file | | +| 2 | Write a new configuration file | easy | write_file | | +| 3 | List all files in the src directory | easy | list_directory | | +| 4 | Create the output directory | easy | create_directory | | +| 5 | Find all Python files in the project | easy | search_files | | +| 6 | Move the old log file to archive | easy | move_file | | +| 7 | Check the file size and permissions | easy | get_file_info | | +| 8 | Show the directory tree structure | easy | directory_tree | | +| 9 | Edit the import statement in main.py | medium | edit_file | | +| 10 | Read multiple config files at once | medium | read_multiple_files | | +| 11 | Create a new issue for the bug | easy | **None** | retrieval OK, tool call 미생성 | +| 12 | Open a pull request for my changes | medium | **None** | retrieval miss (recall=0) | +| 13 | Search for repos about ML | easy | search_repositories | | +| 14 | Fork the upstream repository | medium | **None** | retrieval OK, tool call 미생성 | +| 15 | List all open issues with bug label | easy | list_issues | | +| 16 | Get the README from the GitHub repo | medium | get_file_contents | | +| 17 | Merge the feature branch PR | medium | **None** | retrieval OK, tool call 미생성 | +| 18 | Comment on the PR with review feedback | medium | **None** | retrieval miss (recall=0) | +| 19 | Create a new branch for the feature | easy | create_branch | | +| 20 | Push the updated files to GitHub | medium | **None** | retrieval OK, tool call 미생성 | +| 21 | Search code for the function definition | medium | search_code | | +| 22 | Which directories can the file server access? | hard | list_allowed_directories | | +| 23 | Check details of PR number 55 | easy | get_pull_request | | +| 24 | Approve the pull request after review | medium | **None** | retrieval OK, tool call 미생성 | +| 25 | View the commit history | easy | list_commits | | +| 26 | Create a new GitHub repo and initialize it | easy | create_repository | | +| 27 | Update the issue title and close it | medium | update_issue | | +| 28 | See what files were changed in PR 10 | easy | get_pull_request_files | | +| 29 | Find all TypeScript files matching *.test.ts | easy | search_files | | +| 30 | Create a file on GitHub with deploy config | medium | create_repository | wrong tool (expected: create_or_update_file) | + +## Failure Analysis + +### 1. Tool Call 미생성 (None) — 가장 빈번한 실패 패턴 + +Bonsai-8B는 도구를 **잘못 고르는** 것이 아니라, tool call JSON을 **아예 생성하지 못하는** 경우가 대부분이다. 텍스트로 응답하거나 빈 응답을 반환한다. + +- Petstore baseline: 8/23 (34.8%) None +- Petstore retrieve: 10/23 (43.5%) None +- Mixed MCP baseline: 30/30 (100%) None +- Mixed MCP retrieve: 7/30 (23.3%) None + +### 2. Baseline 완전 실패 (Mixed MCP) + +38개 도구를 전부 context에 넣으면 input tokens가 과다해져 tool call 자체를 포기한다. 1-bit 양자화 모델의 long context 처리 한계. + +### 3. Write 작업 취약 + +tool call 미생성 실패가 write/create 계열에 집중: +- `placeOrder`, `addPet`, `uploadFile`, `fork_repository`, `push_files` 등 +- read 계열은 상대적으로 안정적 (getPetById, getInventory 등) + +### 4. Retrieve가 Baseline보다 낮은 Petstore + +19개 도구는 Bonsai-8B가 감당 가능한 수준이라 baseline이 소폭 우위 (65.2% vs 56.5%). +하지만 retrieve 모드에서 `createUsersWithListInput` 같은 세밀한 선택에 성공한 케이스도 있다. + +## Key Insight + +> **도구 수가 많아질수록 graph-tool-call의 retrieval 필터링은 필수적이다.** +> 38개 도구만으로도 Bonsai-8B baseline은 0%로 완전히 무너지지만, +> top-5 필터링 시 73.3%까지 복구된다. (p < 0.0001) +> +> 1-bit 양자화 소형 모델에서 graph-tool-call의 가치가 가장 극명하게 드러난다. + +## Raw Data + +- Petstore: `benchmarks/results/benchmark_e2e_20260407_014809.json` +- Mixed MCP: `benchmarks/results/benchmark_e2e_20260407_015032.json` diff --git a/benchmarks/run_competitive.py b/benchmarks/run_competitive.py index 110ad37..fa11372 100644 --- a/benchmarks/run_competitive.py +++ b/benchmarks/run_competitive.py @@ -15,7 +15,7 @@ import argparse import json import time -from dataclasses import dataclass, field +from dataclasses import dataclass from datetime import datetime, timezone from benchmarks.config import DATASET_REGISTRY @@ -214,7 +214,10 @@ def print_comparison( def main() -> None: parser = argparse.ArgumentParser(description="Competitive retrieval benchmark") parser.add_argument( - "--datasets", "-d", nargs="+", default=None, + "--datasets", + "-d", + nargs="+", + default=None, help="Datasets to benchmark (default: all non-legacy)", ) parser.add_argument("--top-k", type=int, default=5) @@ -223,20 +226,15 @@ def main() -> None: parser.add_argument("--save", action="store_true", help="Save results as JSON") args = parser.parse_args() - dataset_names = args.datasets or [ - k for k, v in DATASET_REGISTRY.items() if not v.get("legacy") - ] + dataset_names = args.datasets or [k for k, v in DATASET_REGISTRY.items() if not v.get("legacy")] - active_strategies = [ - s for s in STRATEGIES - if not (args.no_embedding and s.embedding) - ] + active_strategies = [s for s in STRATEGIES if not (args.no_embedding and s.embedding)] - print(f"\n Competitive Retrieval Benchmark") + print("\n Competitive Retrieval Benchmark") print(f" Strategies: {len(active_strategies)}") print(f" Datasets: {len(dataset_names)}") if any(s.embedding for s in active_strategies): - print(f" Embedding: ollama/qwen3-embedding:0.6b") + print(" Embedding: ollama/qwen3-embedding:0.6b") print() all_results: dict[str, dict[str, StrategyResult]] = {} @@ -257,15 +255,21 @@ def main() -> None: for strategy in active_strategies: print(f" → {strategy.label}...", end="", flush=True) result = run_strategy( - tg_base, strategy, gt["queries"], - top_k=args.top_k, verbose=args.verbose, + tg_base, + strategy, + gt["queries"], + top_k=args.top_k, + verbose=args.verbose, ) ds_results[strategy.name] = result print(f" Recall={result.recall_5:.1%} MRR={result.mrr:.3f}") print_comparison( - gt["name"], gt.get("tool_count", len(tg_base.tools)), - len(gt["queries"]), ds_results, active_strategies, + gt["name"], + gt.get("tool_count", len(tg_base.tools)), + len(gt["queries"]), + ds_results, + active_strategies, ) all_results[ds_name] = ds_results diff --git a/docs/api-reference.md b/docs/api-reference.md new file mode 100644 index 0000000..b64babe --- /dev/null +++ b/docs/api-reference.md @@ -0,0 +1,145 @@ +# Python API Reference + +The primary entry point is `ToolGraph`. Most workflows are: ingest a spec → call `retrieve()`. + +```python +from graph_tool_call import ToolGraph + +tg = ToolGraph() +tg.ingest_openapi("api.json") +tools = tg.retrieve("create a pet", top_k=5) +``` + +--- + +## `ToolGraph` methods + +### Construction + +| Method | Description | +|---|---| +| `ToolGraph()` | Empty graph | +| `ToolGraph.from_url(url, cache=...)` | Build from Swagger UI or spec URL (auto-discovers spec groups) | +| `ToolGraph.load(path)` | Deserialize from JSON | + +### Ingestion + +| Method | Description | +|---|---| +| `add_tool(tool)` | Add a single tool (auto-detects format) | +| `add_tools(tools)` | Add multiple tools | +| `ingest_openapi(source)` | Ingest from OpenAPI / Swagger spec (file path, URL, or dict) | +| `ingest_mcp_tools(tools)` | Ingest from MCP tool list | +| `ingest_mcp_server(url)` | Fetch and ingest from an MCP HTTP server | +| `ingest_functions(fns)` | Ingest from Python callables (uses type hints + docstrings) | +| `ingest_arazzo(source)` | Ingest Arazzo 1.0.0 workflow spec | +| `add_relation(src, tgt, type)` | Add a manual relation between two tools | + +### Retrieval + +| Method | Description | +|---|---| +| `retrieve(query, top_k=10)` | Search and return tool list | +| `retrieve_with_scores(query, top_k=10)` | Search and return tools with confidence scores and relation hints | +| `plan_workflow(query)` | Build an ordered execution plan | +| `suggest_next(tool, history=...)` | Suggest next tools based on graph relations | +| `validate_tool_call(call)` | Validate and auto-correct a tool call | +| `assess_tool_call(call)` | Return `allow` / `confirm` / `deny` decision based on annotations | + +### Configuration + +| Method | Description | +|---|---| +| `enable_embedding(provider)` | Enable hybrid embedding search (Ollama, OpenAI, vLLM, sentence-transformers, callable) | +| `enable_reranker(model)` | Enable cross-encoder reranking | +| `enable_diversity(lambda_)` | Enable MMR diversity | +| `set_weights(keyword=, graph=, embedding=, annotation=)` | Tune wRRF fusion weights | +| `auto_organize(llm=...)` | Auto-categorize tools (rule-based or LLM-enhanced) | +| `build_ontology(llm=...)` | Build complete ontology | + +### Analysis + +| Method | Description | +|---|---| +| `find_duplicates(threshold)` | Find duplicate tools across sources | +| `merge_duplicates(pairs)` | Merge detected duplicates | +| `apply_conflicts()` | Detect and add `CONFLICTS_WITH` edges | +| `analyze()` | Build operational analysis summary | + +### Persistence + +| Method | Description | +|---|---| +| `save(path)` | Serialize to JSON (preserves embeddings + weights when set) | +| `ToolGraph.load(path)` | Deserialize and restore retrieval state | + +### Export & visualization + +| Method | Description | +|---|---| +| `export_html(path, progressive=True)` | Interactive HTML (vis.js) | +| `export_graphml(path)` | GraphML for Gephi / yEd | +| `export_cypher(path)` | Neo4j Cypher statements | +| `dashboard_app()` | Build Dash Cytoscape app object | +| `dashboard(port=8050)` | Launch interactive dashboard | + +### Execution + +| Method | Description | +|---|---| +| `execute(name, params, base_url=...)` | Execute an OpenAPI tool directly | + +--- + +## Top-level helpers + +| Function | Description | +|---|---| +| `filter_tools(tools, query, top_k=5)` | One-shot filter on any tool list (LangChain, OpenAI, MCP, Anthropic, callables) | +| `GraphToolkit(tools, top_k=5)` | Reusable toolkit — build graph once, filter per query | + +## Middleware + +| Function | Description | +|---|---| +| `patch_openai(client, graph, top_k=5)` | Auto-filter tools on OpenAI client | +| `patch_anthropic(client, graph, top_k=5)` | Auto-filter tools on Anthropic client | + +## LangChain + +| Function | Description | +|---|---| +| `create_gateway_tools(tools, top_k=10)` | Convert N tools → 2 gateway meta-tools | +| `create_agent(llm, tools, top_k=5)` | Auto-filtering LangGraph agent | +| `GraphToolRetriever(tool_graph, top_k=5)` | LangChain `BaseRetriever` returning `Document` objects | +| `tool_schema_to_openai_function(tool)` | Convert `ToolSchema` → OpenAI function dict | + +--- + +## Embedding provider strings + +`enable_embedding()` accepts: + +| Form | Example | +|---|---| +| `"ollama/"` | `"ollama/qwen3-embedding:0.6b"` | +| `"openai/"` | `"openai/text-embedding-3-large"` | +| `"vllm/"` | `"vllm/Qwen/Qwen3-Embedding-0.6B"` | +| `"vllm/@"` | `"vllm/model@http://gpu-box:8000/v1"` | +| `"llamacpp/@"` | `"llamacpp/model@http://192.168.1.10:8080/v1"` | +| `"@"` | `"http://localhost:8000/v1@my-model"` | +| `"sentence-transformers/"` | `"sentence-transformers/all-MiniLM-L6-v2"` | +| `callable` | `lambda texts: my_embed_fn(texts)` | + +## Ontology LLM inputs + +`auto_organize(llm=...)` accepts: + +| Input | Wrapped as | +|---|---| +| `OntologyLLM` instance | Pass-through | +| `callable(str) -> str` | `CallableOntologyLLM` | +| OpenAI client (has `chat.completions`) | `OpenAIClientOntologyLLM` | +| `"ollama/model"` | `OllamaOntologyLLM` | +| `"openai/model"` | `OpenAICompatibleOntologyLLM` | +| `"litellm/model"` | litellm.completion wrapper | diff --git a/docs/benchmarks.md b/docs/benchmarks.md new file mode 100644 index 0000000..2e1d39d --- /dev/null +++ b/docs/benchmarks.md @@ -0,0 +1,174 @@ +# Benchmark Results + +Detailed benchmark data for graph-tool-call. The README contains a 3-row summary; this document contains the full pipeline, retrieval-only, competitive, large-scale, and LangChain agent results. + +- **Model used (LLM benchmarks)**: `qwen3:4b` (4-bit, Ollama), unless noted +- **Pipelines compared**: `baseline` (all tools), `retrieve-k3 / k5 / k10`, plus `+ embedding`, `+ ontology` +- **Reproduce**: see [Reproduce](#reproduce) at the bottom + +--- + +## What we measure + +graph-tool-call verifies two things. + +1. Can performance be **maintained or improved** by giving the LLM only a subset of retrieved tools? +2. Does the **retriever itself** rank the correct tools within the top K? + +These are different questions. A retriever that achieves high `Gold Tool Recall@K` does not automatically translate to high end-to-end accuracy — the LLM still has to pick the right tool from the candidate set. + +### Metrics + +- **End-to-end Accuracy** — did the LLM ultimately succeed in selecting the correct tool / performing the correct workflow? +- **Gold Tool Recall@K** — was the canonical gold tool included in the top K at the retrieval stage? +- **Avg tokens** — average tokens passed to the LLM +- **Token reduction** — token savings vs. baseline + +> The two accuracy metrics often diverge. Evaluations that accept **alternative tools** or **equivalent workflows** as correct may show End-to-end Accuracy that doesn't exactly match Gold Tool Recall@K. `baseline` has no retrieval stage, so Gold Tool Recall@K does not apply. + +--- + +## 1. Full pipeline comparison + +| Dataset | Tools | Pipeline | End-to-end Accuracy | Gold Tool Recall@K | Avg tokens | Token reduction | +|---|---:|---|---:|---:|---:|---:| +| Petstore | 19 | baseline | 100.0% | — | 1,239 | — | +| Petstore | 19 | retrieve-k3 | 90.0% | 93.3% | 305 | 75.4% | +| Petstore | 19 | retrieve-k5 | 95.0% | 98.3% | 440 | 64.4% | +| Petstore | 19 | retrieve-k10 | 100.0% | 98.3% | 720 | 41.9% | +| GitHub | 50 | baseline | 100.0% | — | 3,302 | — | +| GitHub | 50 | retrieve-k3 | 85.0% | 87.5% | 289 | 91.3% | +| GitHub | 50 | retrieve-k5 | 87.5% | 87.5% | 398 | 87.9% | +| GitHub | 50 | retrieve-k10 | 90.0% | 92.5% | 662 | 79.9% | +| Mixed MCP | 38 | baseline | 96.7% | — | 2,741 | — | +| Mixed MCP | 38 | retrieve-k3 | 86.7% | 93.3% | 328 | 88.0% | +| Mixed MCP | 38 | retrieve-k5 | 90.0% | 96.7% | 461 | 83.2% | +| Mixed MCP | 38 | retrieve-k10 | 96.7% | 100.0% | 826 | 69.9% | +| Kubernetes core/v1 | 248 | baseline | 12.0% | — | 8,192 | — | +| Kubernetes core/v1 | 248 | retrieve-k5 | 78.0% | 91.0% | 1,613 | 80.3% | +| Kubernetes core/v1 | 248 | retrieve-k5 + embedding | 80.0% | 94.0% | 1,728 | 78.9% | +| Kubernetes core/v1 | 248 | retrieve-k5 + ontology | **82.0%** | 96.0% | 1,699 | 79.3% | +| Kubernetes core/v1 | 248 | retrieve-k5 + embedding + ontology | **82.0%** | **98.0%** | 1,924 | 76.5% | + +### Key insights + +- **Small/medium APIs (19~50 tools)** — baseline is already strong. graph-tool-call's main value here is **64~91% token savings** with little accuracy loss. +- **Large APIs (248 tools)** — baseline collapses to **12%** due to context overload. graph-tool-call recovers performance to **78~82%** by narrowing candidates through retrieval. At this scale it's not an optimization — it's closer to a required retrieval layer. +- **`retrieve-k5` is the best default**. Good token/accuracy tradeoff. On large datasets, adding embedding/ontology yields further gains. + +--- + +## 2. Retrieval quality (BM25 + graph only) + +The table below measures retrieval quality **before the LLM stage**. Only BM25 + graph traversal — no embedding or ontology. + +| Dataset | Tools | Gold Tool Recall@3 | Gold Tool Recall@5 | Gold Tool Recall@10 | +|---|---:|---:|---:|---:| +| Petstore | 19 | 93.3% | **98.3%** | 98.3% | +| GitHub | 50 | 87.5% | **87.5%** | 92.5% | +| Mixed MCP | 38 | 93.3% | **96.7%** | 100.0% | +| Kubernetes core/v1 | 248 | 82.0% | **91.0%** | 92.0% | + +### How to read + +- **Gold Tool Recall@K** measures the retriever's ability to include the correct tool in the candidate set, **not** final LLM accuracy. +- On small datasets, `k=5` already achieves high recall. +- On large datasets, increasing `k` raises recall but also increases tokens passed to the LLM — consider both. + +### Insights + +- **Petstore / Mixed MCP** — `k=5` alone includes nearly all correct tools. +- **GitHub** — there's a recall gap between `k=5` and `k=10`; choose `k=10` if recall matters more than tokens. +- **Kubernetes core/v1** — even with 248 tools, `k=5` already achieves **91.0%** gold recall. The retrieval stage alone compresses the candidate set dramatically while retaining most correct tools. + +--- + +## 3. When do embedding and ontology help? + +Comparison on the largest dataset (Kubernetes core/v1, 248 tools), all on top of `retrieve-k5`. + +| Pipeline | End-to-end Accuracy | Gold Tool Recall@5 | Interpretation | +|---|---:|---:|---| +| retrieve-k5 | 78.0% | 91.0% | BM25 + graph alone is a strong baseline | +| + embedding | 80.0% | 94.0% | Recovers semantically-similar but differently-worded queries | +| + ontology | **82.0%** | 96.0% | LLM-generated keywords/example queries significantly improve retrieval | +| + embedding + ontology | **82.0%** | **98.0%** | Accuracy maintained, gold recall at its highest | + +- **Embedding** compensates for **semantic similarity** that BM25 misses. +- **Ontology** **expands the searchable representation itself** when descriptions are short or non-standard. +- Using both together yields limited extra end-to-end gains, but **gold recall reaches its highest**. + +--- + +## 4. Competitive benchmark (retrieval strategies) + +Compared 6 retrieval strategies across 9 datasets (19–1068 tools): + +| Strategy | Recall@5 | MRR | Latency | +|---|:---:|:---:|:---:| +| Vector Only (≈bigtool) | 96.8% | 0.897 | 176ms | +| BM25 Only | 91.6% | 0.819 | 1.5ms | +| BM25 + Graph (default) | 91.6% | 0.819 | 14ms | +| Full Pipeline (with embedding) | 96.8% | 0.897 | 172ms | + +**Key finding** — without embedding, BM25+Graph achieves 91.6% Recall, competitive with vector search at **65× faster speed**. With embedding enabled, performance matches pure vector search. + +--- + +## 5. Scale test: 1068 tools (GitHub full API) + +| Strategy | Recall@5 | MRR | Miss% | +|---|:---:|:---:|:---:| +| Vector Only | 88.0% | 0.761 | 12.0% | +| BM25 + Graph | 78.0% | 0.643 | 22.0% | +| Full Pipeline | 88.0% | 0.761 | 12.0% | + +At 1068 tools, baseline (passing all definitions) is impractical due to context size — graph-tool-call provides a working retrieval layer where vector-only and full pipeline tie. + +--- + +## 6. LangChain agent benchmark (200 tools) + +End-to-end accuracy when **200 simple tools** are registered and invoked through a LangChain agent. + +- **Direct (D)** — all 200 tool definitions passed to the LLM at once +- **Graph (G)** — tools managed via graph-tool-call gateway (search → call, 2 turns) + +| Model | D-Acc | G-Acc | D-Turns | G-Turns | D-Tokens | G-Tokens | Savings | D-Time | G-Time | +|---|---:|---:|---:|---:|---:|---:|---:|---:|---:| +| gpt-4.1 | 60.0% | 80.0% | 1.0 | 2.0 | 52,587 | 6,639 | 87.4% | 15.5s | 17.6s | +| gpt-5.2 | 60.0% | **100.0%** | 1.0 | 2.0 | 53,645 | 10,508 | 80.4% | 20.5s | 17.1s | +| gpt-5.4 | 60.0% | **100.0%** | 1.0 | 2.0 | 60,035 | 14,049 | 76.6% | 18.2s | 17.0s | +| claude-sonnet-4-20250514 | 100.0% | 100.0% | 1.0 | 2.0 | 196,183 | 17,349 | 91.2% | 58.2s | 49.4s | +| claude-sonnet-4-6 | 100.0% | 100.0% | 1.0 | 2.0 | 198,665 | 20,074 | 89.9% | 67.0s | 69.4s | +| claude-haiku-4-5 | 100.0% | 100.0% | 1.0 | 2.0 | 197,845 | 19,714 | 90.0% | 23.7s | 22.8s | + +> Acc = accuracy, Turns = average agent turns, Tokens = total tokens, Savings = token reduction (D→G), Time = wall-clock. + +### Key findings + +- GPT-series models drop to **60% accuracy** when all 200 tools are passed directly; graph-tool-call recovers to **80–100%**. +- Claude-series models maintain 100% accuracy either way, but graph-tool-call delivers **89–91% token savings**. +- Graph mode adds 1 extra turn (search → call) but total latency stays comparable or decreases thanks to smaller context. +- Across all models, token reduction ranges from **76.6% to 91.2%**. + +--- + +## Reproduce + +```bash +# Retrieval quality only (fast, no LLM needed) +python -m benchmarks.run_benchmark +python -m benchmarks.run_benchmark -d k8s -v + +# Pipeline benchmark (LLM comparison) +python -m benchmarks.run_benchmark --mode pipeline -m qwen3:4b +python -m benchmarks.run_benchmark --mode pipeline \ + --pipelines baseline retrieve-k3 retrieve-k5 retrieve-k10 + +# Save baseline and compare across runs +python -m benchmarks.run_benchmark --mode pipeline --save-baseline +python -m benchmarks.run_benchmark --mode pipeline --diff +``` + +See [`benchmarks/`](../benchmarks/) for dataset definitions, ground truth, and the runner source. diff --git a/docs/cli.md b/docs/cli.md new file mode 100644 index 0000000..bbda320 --- /dev/null +++ b/docs/cli.md @@ -0,0 +1,137 @@ +# CLI Reference + +```bash +pip install graph-tool-call # core CLI +pip install graph-tool-call[mcp] # + MCP server / proxy commands +``` + +## Commands at a glance + +| Command | Purpose | +|---|---| +| `search` | One-liner: ingest + retrieve in one step | +| `serve` | Run as MCP server | +| `proxy` | Run as MCP proxy (aggregates multiple MCP backends) | +| `ingest` | Build a graph from a spec and save | +| `retrieve` | Search a pre-built graph | +| `analyze` | Print operational analysis (duplicates, conflicts, orphans) | +| `visualize` | Export graph to HTML / GraphML | +| `info` | Print graph statistics | +| `dashboard` | Launch interactive Dash Cytoscape UI | + +--- + +## `search` — one-liner search + +```bash +# Ingest + retrieve in one step +graph-tool-call search "cancel order" \ + --source https://api.example.com/openapi.json + +graph-tool-call search "delete user" \ + --source ./openapi.json --scores --json +``` + +Useful for quick exploration without saving the graph. + +--- + +## `serve` — MCP server + +```bash +# Single source +graph-tool-call serve --source https://api.example.com/openapi.json + +# Pre-built graph +graph-tool-call serve --graph prebuilt.json + +# Multiple sources +graph-tool-call serve \ + -s https://api1.com/spec.json \ + -s https://api2.com/spec.json + +# Remote (SSE / streamable HTTP) +graph-tool-call serve --source api.json --transport sse --host 0.0.0.0 --port 8000 +graph-tool-call serve --source api.json --transport streamable-http --port 8000 +``` + +See [MCP Server integration guide](integrations/mcp-server.md) for client configuration. + +--- + +## `proxy` — MCP proxy + +```bash +graph-tool-call proxy --config ~/backends.json +graph-tool-call proxy --config backends.json --transport sse --port 8000 +graph-tool-call proxy --config backends.json --passthrough-threshold 50 +``` + +See [MCP Proxy integration guide](integrations/mcp-proxy.md) for `backends.json` format. + +--- + +## `ingest` — build and save a graph + +```bash +graph-tool-call ingest https://api.example.com/openapi.json -o graph.json +graph-tool-call ingest ./spec.yaml --embedding --organize +``` + +Flags: +- `-o, --output PATH` — output graph file (JSON) +- `--embedding` — enable embedding-based hybrid search +- `--organize` — auto-categorize tools into ontology + +--- + +## `retrieve` — search a pre-built graph + +```bash +graph-tool-call retrieve "query" -g graph.json -k 10 +``` + +Flags: +- `-g, --graph PATH` — pre-built graph file +- `-k, --top-k N` — number of results +- `--scores` — print scores +- `--json` — JSON output + +--- + +## `analyze` — operational analysis + +```bash +graph-tool-call analyze graph.json --duplicates --conflicts +``` + +Prints duplicate tools, conflict pairs, orphan tools, category coverage. + +--- + +## `visualize` — export to HTML / GraphML + +```bash +graph-tool-call visualize graph.json -f html # interactive HTML +graph-tool-call visualize graph.json -f graphml # Gephi/yEd +graph-tool-call visualize graph.json -f cypher # Neo4j +``` + +--- + +## `info` — graph statistics + +```bash +graph-tool-call info graph.json +# → ToolGraph(tools=248, nodes=251, edges=1024) +``` + +--- + +## `dashboard` — interactive UI + +```bash +graph-tool-call dashboard graph.json --port 8050 +``` + +Launches the Dash Cytoscape interactive dashboard for graph inspection and retrieval testing. Requires `pip install graph-tool-call[dashboard]`. diff --git a/docs/integrations/direct-api.md b/docs/integrations/direct-api.md new file mode 100644 index 0000000..8b408df --- /dev/null +++ b/docs/integrations/direct-api.md @@ -0,0 +1,125 @@ +# Direct API Integration + +Use `retrieve()` to search, then convert results to your provider's tool format. Works with **any OpenAI-compatible API** (OpenAI, Azure, Ollama, vLLM, llama.cpp) and Anthropic. + +## OpenAI / OpenAI-compatible + +```python +from openai import OpenAI +from graph_tool_call import ToolGraph +from graph_tool_call.langchain.tools import tool_schema_to_openai_function + +# Build graph from any source +tg = ToolGraph.from_url( + "https://petstore3.swagger.io/api/v3/openapi.json", + cache="petstore.json", +) + +# Retrieve only the relevant tools for a query +tools = tg.retrieve("create a new pet", top_k=5) + +# Convert to OpenAI function-calling format +openai_tools = [ + {"type": "function", "function": tool_schema_to_openai_function(t)} + for t in tools +] + +# Use with any provider — OpenAI, Azure, Ollama, vLLM, llama.cpp, etc. +client = OpenAI() +# Or for Ollama: OpenAI(base_url="http://localhost:11434/v1") + +response = client.chat.completions.create( + model="gpt-4o", + tools=openai_tools, # only 5 relevant tools instead of all 248 + messages=[{"role": "user", "content": "create a new pet"}], +) +``` + +## Anthropic Claude + +```python +from anthropic import Anthropic +from graph_tool_call import ToolGraph + +tg = ToolGraph.from_url("https://api.example.com/openapi.json") +tools = tg.retrieve("cancel an order", top_k=5) + +# Convert to Anthropic tool format +anthropic_tools = [ + { + "name": t.name, + "description": t.description, + "input_schema": { + "type": "object", + "properties": { + p.name: {"type": p.type, "description": p.description} + for p in t.parameters + }, + "required": [p.name for p in t.parameters if p.required], + }, + } + for t in tools +] + +client = Anthropic() +response = client.messages.create( + model="claude-sonnet-4-20250514", + tools=anthropic_tools, + messages=[{"role": "user", "content": "cancel my order"}], + max_tokens=1024, +) +``` + +## Wrap any tool list (no graph needed) + +If you already have a list of tools in any format (LangChain `BaseTool`, OpenAI dicts, MCP dicts, Anthropic dicts, plain Python functions), use `filter_tools` directly — **no extra dependencies**: + +```python +from graph_tool_call import filter_tools + +filtered = filter_tools(all_tools, "send an email to John", top_k=5) +# → only the 5 most relevant tools, original objects preserved +``` + +### Reusable toolkit + +Build the graph once, filter per query: + +```python +from graph_tool_call import GraphToolkit + +toolkit = GraphToolkit(tools=all_tools, top_k=5) + +tools_a = toolkit.get_tools("cancel my order") +tools_b = toolkit.get_tools("check the weather") + +# Access the underlying ToolGraph for advanced config +toolkit.graph.enable_embedding("ollama/qwen3-embedding:0.6b") +``` + +## Workflow planning + +Beyond per-query filtering, `plan_workflow()` returns ordered execution chains with prerequisites — reducing agent round-trips from 3-4 to 1. + +```python +from graph_tool_call import ToolGraph + +tg = ToolGraph.from_url("https://api.example.com/openapi.json") + +plan = tg.plan_workflow("process a refund") +for step in plan.steps: + print(f"{step.order}. {step.tool.name} — {step.reason}") +# 1. getOrder — prerequisite for requestRefund +# 2. requestRefund — primary action + +# Edit the workflow +plan.remove_step("listOrders") +plan.insert_step(0, "getOrder", tools=tg.tools, reason="need order ID") +plan.set_param_mapping("requestRefund", "order_id", "getOrder.response.id") + +# Visual editor (opens in browser) +plan.open_editor(tools=tg.tools) + +# Save / Load +plan.save("refund_workflow.json") +``` diff --git a/docs/integrations/langchain.md b/docs/integrations/langchain.md new file mode 100644 index 0000000..bcffbea --- /dev/null +++ b/docs/integrations/langchain.md @@ -0,0 +1,111 @@ +# LangChain / LangGraph Integration + +```bash +pip install graph-tool-call[langchain] langgraph +``` + +Three integration patterns — pick the one that fits your architecture. + +| Pattern | Best for | How it works | +|---|---|---| +| **Gateway** | 50+ tools, existing agents | LLM explicitly searches → calls | +| **Auto-filter** | New agents, simple setup | Transparent per-turn tool swap | +| **Manual** | Full control | You call `filter_tools()` yourself | + +--- + +## 1. Gateway Tools (recommended for large tool sets) + +Convert 50~500+ tools into **2 meta-tools** (`search_tools` + `call_tool`). The LLM searches first, then calls — no tool definitions bloat in context. + +```python +from graph_tool_call.langchain import create_gateway_tools + +# 62 tools from Slack, GitHub, Jira, MS365, custom APIs... +all_tools = slack_tools + github_tools + jira_tools + ms365_tools + api_tools + +# Convert to 2 gateway meta-tools +gateway = create_gateway_tools(all_tools, top_k=10) +# → [search_tools, call_tool] + +# Use with any LangChain agent — only 2 tools in context +agent = create_react_agent(model=llm, tools=gateway) +result = agent.invoke({ + "messages": [("user", "PROJ-123 이슈를 Done으로 변경해줘")] +}) +``` + +### How it works + +```text +User: "Cancel order #500" + ↓ +LLM calls search_tools(query="cancel order") + → returns: cancel_order, get_order, process_refund (with parameter info) + ↓ +LLM calls call_tool(tool_name="cancel_order", arguments={"order_id": 500}) + → returns: {"order_id": 500, "status": "cancelled"} + ↓ +LLM: "Order #500 has been cancelled." +``` + +### Token impact + +| | All tools bound | Gateway (2 tools) | +|---|:---:|:---:| +| **62 tools** | ~6,090 tokens/turn | ~475 tokens/turn | +| **Token reduction** | — | **92%** | +| **Accuracy** (qwen3.5:4b) | — | 70% (100% with GPT-4o) | + +> Works with **any existing LangChain agent setup**. Just replace `tools=all_tools` with `tools=create_gateway_tools(all_tools)`. + +See the [200-tool LangChain agent benchmark](../benchmarks.md#6-langchain-agent-benchmark-200-tools) for results across GPT and Claude models. + +--- + +## 2. Auto-filtering Agent (transparent per-turn filtering) + +The agent automatically filters tools each turn — the LLM never sees the full list. + +```python +from graph_tool_call.langchain import create_agent + +# 200 tools go in — LLM sees only ~5 relevant ones each turn +agent = create_agent(llm, tools=all_200_tools, top_k=5) + +result = agent.invoke({"messages": [("user", "cancel my order")]}) +# Turn 1: LLM sees [cancel_order, get_order, process_refund, ...] +# Turn 2: LLM sees [next relevant tools based on conversation] +``` + +--- + +## 3. Manual filtering + +```python +from graph_tool_call import filter_tools +from langgraph.prebuilt import create_react_agent + +filtered = filter_tools(langchain_tools, "cancel order", top_k=5) +agent = create_react_agent(llm, filtered) +``` + +--- + +## LangChain Retriever (returns Documents) + +If you want to use graph-tool-call as a regular retriever returning `Document` objects (e.g., for a chain that doesn't use tool-calling): + +```python +from graph_tool_call import ToolGraph +from graph_tool_call.langchain import GraphToolRetriever + +tg = ToolGraph.from_url("https://api.example.com/openapi.json") + +retriever = GraphToolRetriever(tool_graph=tg, top_k=5) +docs = retriever.invoke("cancel an order") + +for doc in docs: + print(doc.page_content) # "cancelOrder: Cancel an existing order" + print(doc.metadata["tags"]) # ["order"] +``` diff --git a/docs/integrations/mcp-proxy.md b/docs/integrations/mcp-proxy.md new file mode 100644 index 0000000..572c6b6 --- /dev/null +++ b/docs/integrations/mcp-proxy.md @@ -0,0 +1,117 @@ +# MCP Proxy + +When you have many MCP servers, their tool names pile up in every LLM turn. **MCP Proxy** bundles them behind a single server: **172 tools → 3 meta-tools**, saving ~1,200 tokens per turn. + +## How it works + +```text + ┌─────────────────────────────┐ +Claude ──▶ │ graph-tool-call MCP Proxy │ + │ ┌───────────────────────┐ │ ┌──────────────┐ + │ │ search_tools │ │ ──▶ │ playwright │ + │ │ get_tool_schema │ │ ──▶ │ filesystem │ + │ │ call_backend_tool │ │ ──▶ │ my-api │ + │ └───────────────────────┘ │ ──▶ │ ... │ + └─────────────────────────────┘ └──────────────┘ + 3 meta-tools N backends +``` + +The proxy starts each backend, indexes all tools into a `ToolGraph`, and exposes only 3 meta-tools to the LLM. After `search_tools`, matched tools are **dynamically injected** so the LLM can call them directly in 1 hop. + +## Setup + +### Step 1 — Create `backends.json` + +```jsonc +// ~/backends.json +{ + "backends": { + "playwright": { + "command": "npx", + "args": ["@playwright/mcp", "--headless"] + }, + "filesystem": { + "command": "npx", + "args": ["-y", "@anthropic/mcp-filesystem", "/home"] + }, + "my-api": { + "command": "uvx", + "args": ["some-mcp-server"], + "env": { "API_KEY": "sk-..." } + } + }, + "top_k": 10, + "cache_path": "~/.cache/mcp-proxy-cache.json" +} +``` + +> **Embedding is optional.** Add `"embedding": "ollama/qwen3-embedding:0.6b"` for cross-language search (requires Ollama running). Without it, BM25 keyword search still works. + +### Step 2 — Register the proxy with Claude Code + +```bash +claude mcp add -s user tool-proxy -- \ + uvx "graph-tool-call[mcp]" proxy --config ~/backends.json +``` + +### Step 3 — Remove the original individual servers + +```bash +claude mcp remove playwright -s user +claude mcp remove filesystem -s user +claude mcp remove my-api -s user +``` + +### Step 4 — Restart Claude Code and verify + +```bash +claude mcp list +# tool-proxy: ... - ✓ Connected +# (individual servers should be gone) +``` + +## Remote transport + +```bash +graph-tool-call proxy --config backends.json --transport sse --port 8000 +``` + +## Passthrough mode (few tools) + +When total tools across all backends is **≤ 30**, the proxy **skips the graph layer entirely** and exposes every backend tool directly. Zero overhead, no meta-tools, original tool names and schemas preserved. + +This is useful when you want a **single MCP entry point** for several small servers without paying the search/meta-tool tax. + +```bash +# Explicitly set the threshold (default: 30) +graph-tool-call proxy --config backends.json --passthrough-threshold 50 +``` + +Or in `backends.json`: + +```jsonc +{ + "backends": { ... }, + "passthrough_threshold": 50 // ≤ 50 → passthrough, > 50 → gateway +} +``` + +| Mode | When | Exposed tools | +|---|---|---| +| **gateway** (default) | total tools > threshold | `search_tools` + `get_tool_schema` + `call_backend_tool` | +| **passthrough** | total tools ≤ threshold | All backend tools directly (original names/schemas) | + +## Alternative: `.mcp.json` config + +```jsonc +// .mcp.json (project-level or global) +{ + "mcpServers": { + "tool-proxy": { + "command": "uvx", + "args": ["graph-tool-call[mcp]", "proxy", + "--config", "/path/to/backends.json"] + } + } +} +``` diff --git a/docs/integrations/mcp-server.md b/docs/integrations/mcp-server.md new file mode 100644 index 0000000..78a99af --- /dev/null +++ b/docs/integrations/mcp-server.md @@ -0,0 +1,100 @@ +# MCP Server + +Run graph-tool-call as an MCP server. Any MCP-compatible agent (Claude Code, Cursor, Windsurf, etc.) can use tool search with just a config entry. + +## Quick start + +```jsonc +// .mcp.json +{ + "mcpServers": { + "tool-search": { + "command": "uvx", + "args": ["graph-tool-call[mcp]", "serve", + "--source", "https://api.example.com/openapi.json"] + } + } +} +``` + +## Remote deployment (SSE / Streamable HTTP) + +The MCP server supports remote transports for shared deployments: + +```bash +# SSE transport +graph-tool-call serve --source api.json --transport sse --host 0.0.0.0 --port 8000 + +# Streamable HTTP +graph-tool-call serve --source api.json --transport streamable-http --port 8000 +``` + +Client config for a remote MCP server: + +```json +{ + "mcpServers": { + "tool-search": { + "url": "http://tool-search.internal:8000/sse" + } + } +} +``` + +## Exposed tools + +The MCP server exposes 6 tools: + +| Tool | Purpose | +|---|---| +| `search_tools` | Hybrid search across the tool graph | +| `get_tool_schema` | Fetch the full schema for a specific tool | +| `execute_tool` | Execute an OpenAPI tool directly | +| `list_categories` | List ontology categories | +| `graph_info` | Graph statistics (nodes, edges, relations) | +| `load_source` | Hot-load a new source into the running server | + +## Search results include workflow guidance + +Search results contain **relations** between tools and a **suggested execution order**: + +```json +{ + "tools": [ + { + "name": "createOrder", + "relations": [ + {"target": "getOrder", "type": "precedes", + "hint": "Call this tool before getOrder"} + ] + }, + {"name": "getOrder", "prerequisites": ["createOrder"]} + ], + "workflow": {"suggested_order": ["createOrder", "getOrder", "updateOrderStatus"]} +} +``` + +This lets the agent plan multi-step calls in one turn instead of round-tripping per tool. + +## Multiple sources + +Pass `-s` multiple times to merge several specs into one graph: + +```bash +graph-tool-call serve \ + -s https://api1.example.com/openapi.json \ + -s https://api2.example.com/openapi.json +``` + +Cross-source duplicate detection automatically dedupes tools that appear in multiple specs. + +## Pre-built graph + +Build the graph once, serve it many times: + +```bash +graph-tool-call ingest https://api.example.com/openapi.json -o graph.json +graph-tool-call serve --graph graph.json +``` + +See the [CLI reference](../cli.md) for the full `serve` flag list. diff --git a/docs/integrations/middleware.md b/docs/integrations/middleware.md new file mode 100644 index 0000000..b1a1716 --- /dev/null +++ b/docs/integrations/middleware.md @@ -0,0 +1,65 @@ +# SDK Middleware + +Already have tool-calling code? Add **one line** to automatically filter tools through graph-tool-call. Existing code stays unchanged. + +## OpenAI + +```python +from openai import OpenAI +from graph_tool_call import ToolGraph +from graph_tool_call.middleware import patch_openai + +client = OpenAI() +tg = ToolGraph.from_url("https://api.example.com/openapi.json") + +patch_openai(client, graph=tg, top_k=5) # ← add this line + +# Existing code unchanged — 248 tools go in, only 5 relevant ones are sent +response = client.chat.completions.create( + model="gpt-4o", + tools=all_248_tools, + messages=messages, +) +``` + +## Anthropic + +```python +from anthropic import Anthropic +from graph_tool_call import ToolGraph +from graph_tool_call.middleware import patch_anthropic + +client = Anthropic() +tg = ToolGraph.from_url("https://api.example.com/openapi.json") + +patch_anthropic(client, graph=tg, top_k=5) # ← add this line + +# Existing code unchanged +response = client.messages.create( + model="claude-sonnet-4-20250514", + tools=all_248_tools, + messages=messages, + max_tokens=1024, +) +``` + +## How it works + +The middleware monkey-patches `chat.completions.create` (OpenAI) or `messages.create` (Anthropic) so that whenever `tools=...` is passed, it: + +1. Reads the latest user message +2. Calls `graph.retrieve(query, top_k=top_k)` +3. Replaces `tools=` with the filtered subset +4. Forwards the request + +The original tool list never reaches the model. There's no change to the SDK return type, streaming, or async behavior. + +## When to use + +| Use middleware when... | Use direct API when... | +|---|---| +| You have working tool-calling code already | You're starting from scratch | +| You don't want to refactor for retrieval | You want explicit control over which tools are sent | +| Tool list comes from a runtime registry | Tool list is static and known | + +For explicit retrieval control, see [Direct API integration](direct-api.md). diff --git a/docs/roadmap.md b/docs/roadmap.md new file mode 100644 index 0000000..ce613fc --- /dev/null +++ b/docs/roadmap.md @@ -0,0 +1,298 @@ +# graph-tool-call Roadmap + +> 작성일: 2026-04-09 +> 상태: Phase 0~4.5 완료 (255+ tests). 다음 6~9개월 고도화 방향. +> +> 관련 문서: +> - [memo/differentiation-analysis.md](../memo/differentiation-analysis.md) — 학술 차별화 9개 분석 +> - [docs/wbs/README.md](wbs/README.md) — Phase 0~4.5 WBS +> - [docs/benchmarks.md](benchmarks.md) — 현재 벤치마크 결과 + +--- + +## 요약 + +현재 graph-tool-call은 **"도구 검색 라이브러리"**로 완성도 높음. 다음 단계는 **"도구 검색 + 실행 + 거버넌스 레이어"**로 카테고리 확장. + +고도화 후보 15개를 2축 — **거버넌스**(다른 작업의 전제 + 안정성/보안/관측성)와 **효과**(사용자 체감 + 학술 임팩트) — 로 평가해 우선순위를 매겼다. 1번과 2번 우선순위는 의존성이 명확하므로 순서 고정, 3번 이후는 시간/리소스 제약에 따라 3가지 시나리오로 분기. + +--- + +## 1. 배경 — 현재 gap + +### 1.1 README가 약속한 것 vs 실제 구현 + +README Quick Start는 `plan_workflow()`에 대해 이렇게 쓰여 있다: + +> `plan_workflow()` returns ordered execution chains with prerequisites — **reducing agent round-trips from 3-4 to 1**. + +하지만 현재 `graph_tool_call/workflow.py`에는 **계획 작성/편집 메서드만 있고 실행 메서드가 없다** (`execute` / `run` / `invoke` 키워드 0개). 단일 tool 실행은 `ToolGraph.execute()`(tool_graph.py:629)에 있지만, 이를 chain으로 묶는 orchestration 레이어가 부재. 즉 "round-trip 1회" 약속은 **절반만 지켜진 상태**. + +### 1.2 학술 차별화 9개 중 1개만 구현 + +[memo/differentiation-analysis.md](../memo/differentiation-analysis.md)는 9개의 학술 차별화 후보를 정리해두었다: + +| # | 후보 | Tier | 구현 상태 | +|---|---|:---:|:---:| +| 3.1 | MCP Annotation-Aware Retrieval | 1 | ✅ (Phase 2.5) | +| 3.2 | Execution Trace → Causal Tool Graph | 1 | ❌ | +| 3.3 | Token-Budget Constrained Graph Selection | 1 | ❌ | +| 3.4 | Dynamic Tool Graph | 2 | 부분 | +| 3.5 | Cross-Server Tool Dependency | 2 | ❌ | +| 3.6 | Tool Name Disambiguation | 2 | ❌ (prefix 회피만) | +| 3.7 | Cross-Primitive Retrieval | 3 | ❌ | +| 3.8 | Failure-Aware Closed-Loop Retrieval | 3 | ❌ | +| 3.9 | Stateful Session-Aware Retrieval | 3 | 부분 (history 파라미터만) | + +### 1.3 코드 베이스 분석으로 발견한 추가 gap + +학술 차별화 분석에 **없는** 실용 gap 7개: + +| 코드 | 위치 | gap | +|---|---|---| +| `graph_tool_call/workflow.py` | `WorkflowPlan` | `execute_plan()` 부재 | +| `graph_tool_call/mcp_proxy.py:164-211` | backend tool 수집 | schema 검증/provenance/ACL 부재 | +| `graph_tool_call/retrieval/engine.py:111-188` | 5개 scorer | Score Provider 플러그인 인터페이스 부재 | +| `graph_tool_call/__init__.py` | public API | trace export / debug API 부재 | +| `graph_tool_call/retrieval/graph_search.py:38-100` | `_get_category_index()` | 매 query마다 재구축 | +| `graph_tool_call/ingest/` | 단일 spec | spec 경계를 넘는 federation 부재 | +| `graph_tool_call/ingest/` | 6종 format | 새 format 추가 시 adapter interface 부재 | + +--- + +## 2. 후보 15개 간략 설명 + +후보는 **그룹 1 (사용자 즉시 체감)**, **그룹 2 (시스템 견고성)**, **그룹 3 (검색 품질 / 학술)** 으로 분류. + +### 그룹 1 — 사용자 즉시 체감 + +#### A. Workflow Execution Engine +**현상**: `plan_workflow()`는 계획만 생성, 실행은 사용자/LLM 책임. +**작업**: `execute_plan(goal, initial_args)` 추가. `params_from` path expression parser로 step 간 데이터 자동 전달. 실패 시 skip / rollback / abort 정책. dry-run 모드. +**효과**: README 약속("round-trip 3-4회 → 1회") 완성. LLM agent가 `execute_plan` 1번 호출로 multi-step workflow 처리. 다른 학술 후보 5개(P4, P5, B enforcement, C trace, end-to-end 벤치마크)의 전제. +**Effort**: 1.5~2주 (HTTP 실행 인프라는 이미 존재, chain orchestration만 추가) + +#### B. Tool Poisoning Defense +**현상**: MCP backend tool schema를 그대로 신뢰. 악성 서버가 description에 prompt injection 삽입 가능 (Invariant Labs 보고). +**작업**: Schema 해시 기반 mutation detection / tool provenance 추적 / annotation 기반 ACL (`readOnlyHint=true`만 unprivileged 노출) / prompt injection 패턴 탐지. +**효과**: 회사 도입 시 보안팀 차단 사유 제거. USENIX Security / IEEE S&P 같은 보안 학회 논문 타겟 가능 (Tool Poisoning을 retrieval layer에서 막는 first work). +**Effort**: 2~3주 + +#### C. Observability + Trace Export +**현상**: 기본 logging만. "왜 이 도구가 검색되지 않았지?" 디버그 불가. +**작업**: `RetrievalEngine`에 `TraceContext` 추가 — 단계별 점수 + 최종 순위 캡처. OpenTelemetry span export. CLI `--trace-out trace.json`. +**효과**: 사용자가 자기 데이터로 튜닝 가능. P4/P5의 데이터 소스 겸용. 도입 전 검증 가능성 ↑. +**Effort**: 1주 + +#### F. Cross-Spec Federation +**현상**: 여러 OpenAPI spec ingest 시 단순 union. spec A의 `getUser` 출력과 spec B의 `userId` 파라미터가 연결되지 않음. +**작업**: spec 경계를 넘는 parameter schema 매칭. provenance metadata. +**효과**: 회사 internal API 여러 개 통합 케이스 — 실제 도입 시나리오에서 가장 흔한 요구. +**Effort**: 2주 (P1과 인프라 공유) + +### 그룹 2 — 시스템 견고성 (인프라) + +#### D. Pluggable Score Provider SPI +**현상**: `retrieval/engine.py`에 5개 scorer가 hardcoded. 새 score 추가하려면 engine 직접 수정. +**작업**: `ScoreProvider` Protocol + `register_score_provider()` API. 기존 5개를 SPI에 맞춰 refactor. +**효과**: 학술 후보 P3/P4/P5의 score 통합이 모두 plug-in. 외부 기여자가 새 score (popularity, latency, user feedback 등)를 추가 가능. +**Effort**: 1주 + +#### E. Incremental Re-indexing +**현상**: tool 추가 시 BM25 index와 embedding을 처음부터 재구축. `_get_category_index()`는 매 query마다 재구축 (1068 tools × tokenization = O(n)). +**작업**: BM25/embedding add-remove API. category index lazy invalidation. +**효과**: 1068+ tools 환경에서 latency 급감. MCP server를 동적으로 추가/제거하는 환경(Cursor, Claude Code)에서 직접 이득. +**Effort**: 1주 + +#### G. Anti-Corruption Adapter Layer +**현상**: ingest format 6종(OpenAPI/MCP tools/MCP server/Python fn/manual/Arazzo). 새 format 추가 시 ingest 모듈에 산발적 코드. +**작업**: `IngestAdapter` 추상 interface. 기존 6종 refactor. gRPC + GraphQL adapter PoC. +**효과**: 외부 기여 진입 장벽 ↓. 신규 format 200~300 LOC로 추가 가능. +**Effort**: 2주 + +### 그룹 3 — 검색 품질 / 학술 차별화 + +#### P1. Cross-Server Tool Dependency +**현상**: 같은 spec 내 의존성만 감지. cross-server 흐름(Slack → Jira → GitHub)은 별개로 취급. +**작업**: `mcp_proxy.py` cross-backend ingest 시 parameter schema 매칭. 새 edge type 불필요 (`REQUIRES`로 충분), backend metadata만 추가. +**효과**: MCP-Bench retrieval 오류의 50%를 차지하는 cross-server 문제 직접 해결. 논문 1편 핵심 contribution. +**Effort**: 2~3주 + +#### P2. Tool Name Disambiguation +**현상**: MCP 생태계 59% 이름 충돌("search" 32개 서버). 현재는 `serverName__toolName` prefix로 회피. +**작업**: 동일 이름 도구를 (signature + ontology context)로 자동 구분. disambiguation key 생성. +**효과**: LLM이 prefix 신경 쓰지 않아도 됨. 논문 1편의 sub-contribution으로 포함 가능. +**Effort**: 2주 + +#### P3. Stateful Session-Aware Retrieval +**현상**: history 파라미터로 "이미 호출한 도구"를 demote만 함. 다음 도구 예측 없음. +**작업**: Markov chain on tool graph로 다음 도구 확률 계산. `RetrievalEngine`에 새 score source. +**효과**: multi-turn 대화 retrieval 정확도 ↑. 짧은 후속 질문("이제 그거 취소해") 처리력 강화. +**Effort**: 3~4주 (평가 인프라 포함) + +#### P4. Failure-Aware Closed-Loop Retrieval +**현상**: 도구 실패해도 retrieval 순위에 영향 없음. +**작업**: 실행 trace → edge weight online learning. 최근 실패율 높은 도구 자동 강등. `SIMILAR_TO` fallback 제안. +**효과**: self-healing. 운영 중 안 쓰이는 도구 자동 정리. +**전제**: **A 필요** (실행 trace가 있어야 학습 가능) +**Effort**: 3주 + +#### P5. Execution Trace → Causal Tool Graph +**현상**: 의존성 그래프가 정적 metadata(OpenAPI/CRUD)에만 의존. +**작업**: 실행 trace에서 interventional causal discovery로 인과적 의존성 자동 발견. +**효과**: Tier 1 단독 contribution (causal discovery + tool learning 교차점, 미개척). 사용할수록 똑똑해지는 시스템. +**전제**: **A 필요** +**Effort**: 6~8주 (이론 + 평가) + +#### P6. Token-Budget Constrained Graph Selection +**현상**: top-k=5 고정. 도구 토큰 비용 예측 불가. +**작업**: dependency-constrained knapsack ILP formulation. DAG 활용 근사 알고리즘. approximation ratio 증명. +**효과**: Cursor 40 한도, OpenAI 권장 20개 제약 해결. 비용 예측 가능. ICML/NeurIPS 타겟. +**Effort**: 8주 (이론 작업 중심) + +#### P7. Cross-Primitive Retrieval +**현상**: MCP의 3대 primitive 중 Tools만 검색. +**작업**: Resources/Prompts 노드 추가. 새 edge type(`PROVIDES_CONTEXT`, `TEMPLATES`). heterogeneous graph 검색. +**효과**: MCP 잠재력 100% 활용. +**블로커**: Resources/Prompts 사용하는 실제 MCP 서버가 적어 평가 데이터셋 부족 +**Effort**: 6주 (+ 평가 데이터 수집) + +--- + +## 3. 우선순위 매트릭스 + +거버넌스 = 다른 작업의 전제 + 안정성/보안/관측성 +효과 = 사용자 체감 + 잠금 해제 + 학술/시장 임팩트 + +| 순위 | 후보 | 거버넌스 | 효과 | 합계 | 전제 | +|:---:|---|:---:|:---:|:---:|---| +| 1 | **A. Workflow Execution** | ★★★★★ | ★★★★★ | 10 | 없음 | +| 2 | **B. Tool Poisoning Defense** | ★★★★★ | ★★★★ | 9 | 없음 | +| 3 | **D. Score Provider SPI** | ★★★★★ | ★★★ | 8 | 없음 | +| 4 | **C. Observability + Trace** | ★★★★ | ★★★ | 7 | 없음 | +| 5 | **P6. Token-Budget Knapsack** | ★★★ | ★★★★ | 7 | 없음 | +| 6 | **P1. Cross-Server Dependency** | ★★ | ★★★★ | 6 | 없음 | +| 7 | **F. Cross-Spec Federation** | ★★ | ★★★★ | 6 | P1 인프라 공유 | +| 8 | **G. Adapter Layer** | ★★★★ | ★★ | 6 | 없음 | +| 9 | **E. Incremental Re-indexing** | ★★★ | ★★★ | 6 | 없음 | +| 10 | P5. Causal Tool Graph | ★★★ | ★★★★★ | 8* | **A 필요** | +| 11 | P4. Failure-Aware | ★★★ | ★★★ | 6* | **A 필요** | +| 12 | P2. Name Disambiguation | ★★ | ★★★ | 5 | 없음 | +| 13 | P7. Cross-Primitive | ★★ | ★★★ | 5 | 없음 | +| 14 | P3. Stateful Session | ★ | ★★★ | 4 | 없음 | + +`*` 전제 작업(A) 미완료 시 진입 불가 + +--- + +## 4. 마일스톤 + +### 권장 진행 (6~8개월, 시나리오 C) + +``` +┌─────────────────────────────────────────────────────────┐ +│ Week 1 D. Score Provider SPI [1w] │ +│ Week 2-4 A. Workflow Execution [3w] │ +│ Week 5-6 C. Observability + Trace [2w] │ +│ Week 7-9 B. Tool Poisoning Defense [3w] │ +├─────────────────────────────────────────────────────────┤ +│ 마일스톤 1 (~9주) — v0.5 Release │ +│ ✓ Workflow 완성 (README 약속 성립) │ +│ ✓ Security defense │ +│ ✓ Observability + debug │ +│ ✓ Score provider plugin SPI │ +└─────────────────────────────────────────────────────────┘ + +┌─────────────────────────────────────────────────────────┐ +│ Week 10-12 P1. Cross-Server Dependency [3w] │ +│ Week 13-14 F. Cross-Spec Federation [2w] │ +│ Week 15-16 P2. Name Disambiguation [2w] │ +├─────────────────────────────────────────────────────────┤ +│ 마일스톤 2 (~16주) — 논문 1 초안 │ +│ "MCP-Native Graph Tool Retrieval: │ +│ Cross-Server Dependency + Name Disambiguation + │ +│ Annotation-Aware Defense" │ +│ 타겟: EMNLP Workshop / ACL Findings │ +└─────────────────────────────────────────────────────────┘ + +┌─────────────────────────────────────────────────────────┐ +│ Week 17-24 P5. Causal Tool Graph [8w] │ +│ Week 25-32 P6. Token-Budget Knapsack [8w] │ +├─────────────────────────────────────────────────────────┤ +│ 마일스톤 3 (~32주) — 논문 2 │ +│ "Token-Constrained Tool Selection as │ +│ Graph Optimization" │ +│ 타겟: ICML / NeurIPS │ +└─────────────────────────────────────────────────────────┘ +``` + +### 대안 시나리오 + +#### 시나리오 A — 학술 우선 (4개월, 논문 1편) + +시간이 제한적이고 논문 1편을 빨리 받고 싶은 경우. + +``` +Week 1 D. Score Provider SPI [1w] +Week 2-4 P1. Cross-Server Dependency [3w] +Week 5-6 P2. Name Disambiguation [2w] +Week 7-9 B. Tool Poisoning Defense [3w] +Week 10-16 논문 1 작성 (LiveMCPBench + ToolBench 평가) +``` + +**리스크**: A가 뒤로 밀려서 P4/P5가 영구적으로 막힘. 논문 2 불가능. + +#### 시나리오 B — 프로덕션 우선 (2개월, 사용자 확보) + +학술 의도 없이 실사용자 확보가 목표인 경우. + +``` +Week 1-3 A. Workflow Execution [3w] +Week 4 C. Observability + Trace [1w] +Week 5 E. Incremental Re-indexing [1w] +Week 6-7 F. Cross-Spec Federation [2w] +Week 8 블로그 + LangChain community 등록 (Phase 4 잔여) +``` + +**산출물**: PyPI 다운로드 ↑, real-world adopter case 1~2개, blog 기반 유입. + +--- + +## 5. Phase 4 잔여 작업 + +시나리오와 무관하게 마무리할 가치 있음. 마일스톤 사이사이에 끼워 진행. + +- [ ] 4-1d. 관계 검증 UI (confirm/reject) +- [ ] 4-2. LangChain community package 등록 +- [ ] 4-3. 블로그: "Why Graph > Vector for Tool Retrieval" +- [ ] 4-4. (선택) LAPIS 포맷 출력 +- [ ] 4-5. (선택) Rust (PyO3+petgraph) 최적화 + +--- + +## 6. 의사결정 포인트 + +다음 작업에 들어가기 전 확정해야 할 것: + +1. **시나리오 선택** (A/B/C) + - 논문 마감 있음 → A + - 학술 의도 없음 → B + - 시간 여유 있음 → C (권장) + +2. **첫 작업 확정** + - 시나리오 C → D (1주) 후 A (3주) + - 시나리오 A → D (1주) 후 P1 (3주) + - 시나리오 B → A (3주) + +3. **Phase 4 잔여 끼워넣기 여부** + - 4-2, 4-3은 시나리오 B 마지막에 통합 + - 4-1d는 Workflow Editor와 함께 A 작업 시 처리 가능 + +--- + +## 참고 + +- [memo/differentiation-analysis.md](../memo/differentiation-analysis.md) — 학술 차별화 9개 후보 상세 +- [docs/wbs/README.md](wbs/README.md) — Phase 0~4.5 완료 내역 +- [docs/benchmarks.md](benchmarks.md) — 현재 벤치마크 (Petstore 19 / GitHub 50 / MCP 38 / k8s 248 / GitHub full 1068) +- [docs/design/benchmark.md](design/benchmark.md) — 벤치마크 설계 근거 diff --git a/examples/test_bonsai_tool_calling.py b/examples/test_bonsai_tool_calling.py new file mode 100644 index 0000000..c33831e --- /dev/null +++ b/examples/test_bonsai_tool_calling.py @@ -0,0 +1,405 @@ +"""Bonsai-8B (1-bit Q1_0) tool calling 능력 테스트. + +3가지 테스트: +1. SearchLLM 통합 (query expansion / intent decomposition) +2. OpenAI function calling format (tools parameter) +3. graph-tool-call retrieve + LLM 조합 +""" +# ruff: noqa: E501 + +from __future__ import annotations + +import json +import time +import urllib.request + +BASE_URL = "http://localhost:8080/v1" +MODEL = "Bonsai-8B.gguf" + +# ── 테스트용 도구 정의 ────────────────────────────────────────────── + +TOOLS = [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get current weather for a city", + "parameters": { + "type": "object", + "properties": { + "city": {"type": "string", "description": "City name"}, + "unit": { + "type": "string", + "enum": ["celsius", "fahrenheit"], + "description": "Temperature unit", + }, + }, + "required": ["city"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "send_email", + "description": "Send an email to a recipient", + "parameters": { + "type": "object", + "properties": { + "to": {"type": "string", "description": "Recipient email address"}, + "subject": {"type": "string", "description": "Email subject line"}, + "body": {"type": "string", "description": "Email body content"}, + }, + "required": ["to", "subject", "body"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "search_products", + "description": "Search for products in the catalog", + "parameters": { + "type": "object", + "properties": { + "query": {"type": "string", "description": "Search query"}, + "category": {"type": "string", "description": "Product category filter"}, + "max_price": {"type": "number", "description": "Maximum price filter"}, + }, + "required": ["query"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "create_order", + "description": "Create a new order for a product", + "parameters": { + "type": "object", + "properties": { + "product_id": {"type": "string", "description": "Product ID to order"}, + "quantity": {"type": "integer", "description": "Number of items"}, + "shipping_address": {"type": "string", "description": "Delivery address"}, + }, + "required": ["product_id", "quantity"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "cancel_order", + "description": "Cancel an existing order", + "parameters": { + "type": "object", + "properties": { + "order_id": {"type": "string", "description": "Order ID to cancel"}, + "reason": {"type": "string", "description": "Cancellation reason"}, + }, + "required": ["order_id"], + }, + }, + }, +] + + +def chat(messages: list[dict], tools: list[dict] | None = None, **kwargs) -> dict: + """OpenAI 호환 API 호출.""" + payload: dict = { + "model": MODEL, + "messages": messages, + "temperature": 0.1, + "max_tokens": 512, + **kwargs, + } + if tools: + payload["tools"] = tools + payload["tool_choice"] = "auto" + + data = json.dumps(payload).encode() + req = urllib.request.Request( + f"{BASE_URL}/chat/completions", + data=data, + headers={"Content-Type": "application/json"}, + method="POST", + ) + with urllib.request.urlopen(req, timeout=120) as resp: # noqa: S310 + return json.loads(resp.read().decode()) + + +def print_header(title: str) -> None: + print(f"\n{'=' * 60}") + print(f" {title}") + print(f"{'=' * 60}") + + +def print_result(label: str, passed: bool, detail: str = "") -> None: + status = "PASS" if passed else "FAIL" + mark = "[v]" if passed else "[x]" + print(f" {mark} {label}: {status}") + if detail: + print(f" -> {detail}") + + +# ── TEST 1: SearchLLM 통합 ────────────────────────────────────────── + + +def test_search_llm(): + """graph-tool-call의 OpenAICompatibleSearchLLM으로 query expansion 테스트.""" + print_header("TEST 1: SearchLLM Query Expansion & Intent Decomposition") + + from graph_tool_call.retrieval.search_llm import OpenAICompatibleSearchLLM + + llm = OpenAICompatibleSearchLLM( + model=MODEL, + base_url=BASE_URL, + api_key="none", + ) + + # 1a. Query Expansion + print("\n [Query Expansion]") + queries = [ + "파일을 읽고 내용을 수정해서 저장하고 싶어", + "search for cheap laptops and order one", + "주문 취소하고 환불 처리해줘", + ] + for q in queries: + t0 = time.time() + result = llm.expand_query(q) + elapsed = time.time() - t0 + has_keywords = len(result.keywords) > 0 + print_result( + f"expand '{q[:30]}...'", + has_keywords, + f"keywords={result.keywords}, synonyms={result.synonyms}, " + f"english={result.english_terms} ({elapsed:.1f}s)", + ) + + # 1b. Intent Decomposition + print("\n [Intent Decomposition]") + multi_queries = [ + "Find a laptop under $1000, order it, and send confirmation email", + "날씨 확인하고 이메일로 알려줘", + ] + for q in multi_queries: + t0 = time.time() + intents = llm.decompose_intents(q) + elapsed = time.time() - t0 + has_intents = len(intents) > 0 + intent_strs = [f"{i.action}({i.target})" for i in intents] + print_result( + f"decompose '{q[:35]}...'", + has_intents, + f"intents={intent_strs} ({elapsed:.1f}s)", + ) + + +# ── TEST 2: OpenAI Function Calling ───────────────────────────────── + + +def test_function_calling(): + """직접 tool calling format으로 호출하여 도구 선택 능력 테스트.""" + print_header("TEST 2: OpenAI Function Calling Format") + + test_cases = [ + { + "name": "단일 도구 - 날씨", + "message": "What's the weather like in Seoul?", + "expected_tool": "get_weather", + "expected_args": ["city"], + }, + { + "name": "단일 도구 - 이메일", + "message": "Send an email to john@example.com saying hello", + "expected_tool": "send_email", + "expected_args": ["to"], + }, + { + "name": "단일 도구 - 상품 검색", + "message": "Find me laptops under $500", + "expected_tool": "search_products", + "expected_args": ["query"], + }, + { + "name": "단일 도구 - 주문 취소", + "message": "Cancel order ORD-12345 because I changed my mind", + "expected_tool": "cancel_order", + "expected_args": ["order_id"], + }, + { + "name": "도구 불필요 - 일반 대화", + "message": "Hello, how are you?", + "expected_tool": None, + "expected_args": [], + }, + ] + + for tc in test_cases: + t0 = time.time() + try: + result = chat( + messages=[{"role": "user", "content": tc["message"]}], + tools=TOOLS, + ) + elapsed = time.time() - t0 + choice = result["choices"][0] + msg = choice["message"] + + tool_calls = msg.get("tool_calls", []) + finish_reason = choice.get("finish_reason", "") + + if tc["expected_tool"] is None: + # 도구 호출하지 않아야 하는 케이스 + passed = len(tool_calls) == 0 + detail = f"finish={finish_reason}, tool_calls={len(tool_calls)}" + else: + if tool_calls: + called = tool_calls[0] + func_name = called.get("function", {}).get("name", "") + func_args_raw = called.get("function", {}).get("arguments", "{}") + try: + func_args = ( + json.loads(func_args_raw) + if isinstance(func_args_raw, str) + else func_args_raw + ) + except json.JSONDecodeError: + func_args = {} + + name_match = func_name == tc["expected_tool"] + args_present = all(k in func_args for k in tc["expected_args"]) + passed = name_match and args_present + + detail = ( + f"called={func_name}, args={json.dumps(func_args, ensure_ascii=False)}" + f" ({elapsed:.1f}s)" + ) + else: + passed = False + content_preview = msg.get("content", "")[:80] + detail = f"NO tool call, got text: '{content_preview}...' ({elapsed:.1f}s)" + + except Exception as e: + passed = False + detail = f"ERROR: {e}" + elapsed = time.time() - t0 + + print_result(tc["name"], passed, detail) + + +# ── TEST 3: graph-tool-call retrieve + LLM 조합 ───────────────────── + + +def test_retrieve_with_llm(): + """ToolGraph retrieve 후 LLM에게 도구 선택시키는 end-to-end 테스트.""" + print_header("TEST 3: ToolGraph Retrieve + LLM Tool Selection (E2E)") + + from graph_tool_call import ToolGraph + + tg = ToolGraph() + tg.add_tools(TOOLS) + + # 관계 추가 + tg.add_relation("search_products", "create_order", "requires") + tg.add_relation("create_order", "cancel_order", "complementary") + tg.add_relation("get_weather", "send_email", "complementary") + + test_queries = [ + { + "query": "I want to buy a laptop", + "expected_retrieval": ["search_products", "create_order"], + "expected_tool": "search_products", + }, + { + "query": "Cancel my order ORD-999", + "expected_retrieval": ["cancel_order"], + "expected_tool": "cancel_order", + }, + { + "query": "Check Seoul weather and email it to me", + "expected_retrieval": ["get_weather", "send_email"], + "expected_tool": "get_weather", # 첫 번째로 호출할 도구 + }, + ] + + for tc in test_queries: + # Step 1: graph-tool-call로 관련 도구 검색 + retrieved = tg.retrieve(tc["query"], top_k=3) + retrieved_names = [t.name for t in retrieved] + + retrieval_hit = any(e in retrieved_names for e in tc["expected_retrieval"]) + print_result( + f"retrieve '{tc['query'][:30]}...'", + retrieval_hit, + f"got={retrieved_names}", + ) + + # Step 2: 검색된 도구만 LLM에 전달 + filtered_tools = [t for t in TOOLS if t["function"]["name"] in retrieved_names] + + t0 = time.time() + try: + result = chat( + messages=[{"role": "user", "content": tc["query"]}], + tools=filtered_tools, + ) + elapsed = time.time() - t0 + choice = result["choices"][0] + tool_calls = choice["message"].get("tool_calls", []) + + if tool_calls: + func_name = tool_calls[0].get("function", {}).get("name", "") + func_args_raw = tool_calls[0].get("function", {}).get("arguments", "{}") + try: + func_args = ( + json.loads(func_args_raw) + if isinstance(func_args_raw, str) + else func_args_raw + ) + except json.JSONDecodeError: + func_args = {} + passed = func_name == tc["expected_tool"] + detail = f"LLM chose={func_name}, args={json.dumps(func_args, ensure_ascii=False)} ({elapsed:.1f}s)" + else: + passed = False + content = choice["message"].get("content", "")[:80] + detail = f"NO tool call: '{content}' ({elapsed:.1f}s)" + except Exception as e: + passed = False + detail = f"ERROR: {e}" + + print_result(f" LLM select '{tc['expected_tool']}'", passed, detail) + + +# ── MAIN ───────────────────────────────────────────────────────────── + +if __name__ == "__main__": + print("\n" + "#" * 60) + print(" Bonsai-8B (1-bit Q1_0) Tool Calling Benchmark") + print(" Model: localhost:8080 | OpenAI-compatible API") + print("#" * 60) + + results = {} + + # Test 1 + try: + test_search_llm() + except Exception as e: + print(f" [!] Test 1 failed: {e}") + + # Test 2 + try: + test_function_calling() + except Exception as e: + print(f" [!] Test 2 failed: {e}") + + # Test 3 + try: + test_retrieve_with_llm() + except Exception as e: + print(f" [!] Test 3 failed: {e}") + + print(f"\n{'=' * 60}") + print(" Done!") + print(f"{'=' * 60}\n") diff --git a/examples/xgen_workflow_agent.py b/examples/xgen_workflow_agent.py index cd8da37..2031410 100644 --- a/examples/xgen_workflow_agent.py +++ b/examples/xgen_workflow_agent.py @@ -101,12 +101,19 @@ def upload_file(file_path: str, bucket: str) -> str: @tool def query_database(sql: str) -> str: """Execute a SQL query on the analytics database.""" - return f"Query result: 42 rows" + return "Query result: 42 rows" all_tools = [ - search_products, get_order_detail, cancel_order, create_refund, - send_notification, get_user_profile, update_inventory, - generate_report, upload_file, query_database, + search_products, + get_order_detail, + cancel_order, + create_refund, + send_notification, + get_user_profile, + update_inventory, + generate_report, + upload_file, + query_database, ] # -- 핵심: filter_tools 적용 (2줄) -- @@ -199,11 +206,9 @@ def send_notification(user_id: str, message: str) -> str: """Send push notification to a user.""" return f"Sent to {user_id}" - all_tools = [search_products, get_order_detail, cancel_order, - create_refund, send_notification] + _all_tools = [search_products, get_order_detail, cancel_order, create_refund, send_notification] # -- 핵심: create_agent 교체 -- - from graph_tool_call.langchain import create_agent as create_filtered_agent # query_mode="message": 기본값, 추가 LLM 호출 없음 (빠름) # query_mode="llm": 대화 컨텍스트에서 검색 쿼리 생성 (멀티턴 강함) @@ -284,27 +289,70 @@ def pattern_c_gateway(): tools (50~500개) → create_gateway_tools() → 2개 meta-tool → agent_core """ import json - from langchain_core.tools import tool # -- 시뮬레이션: DB에서 가져온 사용자 등록 tool 50개 -- tools = [] tool_categories = { - "order": ["create_order", "get_order", "cancel_order", "update_order", "list_orders", - "get_order_status", "track_shipment", "confirm_delivery", "return_order", - "exchange_order"], - "product": ["search_products", "get_product", "create_product", "update_product", - "delete_product", "get_product_reviews", "add_product_review", - "get_product_inventory", "update_price", "get_categories"], - "user": ["get_user", "create_user", "update_user", "delete_user", "list_users", - "get_user_orders", "get_user_wishlist", "add_to_wishlist", - "get_user_notifications", "update_preferences"], - "payment": ["process_payment", "create_refund", "get_payment_status", - "list_transactions", "get_invoice", "send_receipt", - "validate_coupon", "apply_discount", "get_billing_info", - "update_payment_method"], - "admin": ["generate_report", "get_analytics", "export_data", "import_data", - "get_system_status", "clear_cache", "send_notification", - "create_announcement", "get_audit_log", "manage_permissions"], + "order": [ + "create_order", + "get_order", + "cancel_order", + "update_order", + "list_orders", + "get_order_status", + "track_shipment", + "confirm_delivery", + "return_order", + "exchange_order", + ], + "product": [ + "search_products", + "get_product", + "create_product", + "update_product", + "delete_product", + "get_product_reviews", + "add_product_review", + "get_product_inventory", + "update_price", + "get_categories", + ], + "user": [ + "get_user", + "create_user", + "update_user", + "delete_user", + "list_users", + "get_user_orders", + "get_user_wishlist", + "add_to_wishlist", + "get_user_notifications", + "update_preferences", + ], + "payment": [ + "process_payment", + "create_refund", + "get_payment_status", + "list_transactions", + "get_invoice", + "send_receipt", + "validate_coupon", + "apply_discount", + "get_billing_info", + "update_payment_method", + ], + "admin": [ + "generate_report", + "get_analytics", + "export_data", + "import_data", + "get_system_status", + "clear_cache", + "send_notification", + "create_announcement", + "get_audit_log", + "manage_permissions", + ], } for category, tool_names in tool_categories.items(): @@ -410,7 +458,7 @@ def bonus_multiturn_scenario(): top_name = results[0].name if results else "없음" match = "✓" if top_name == s["expected"] else "✗" - print(f" 턴 {s['turn']}: \"{s['message']}\"") + print(f' 턴 {s["turn"]}: "{s["message"]}"') print(f" → message 모드 Top-1: {top_name} {match}") if "note" in s: print(f" ※ {s['note']}") diff --git a/examples/xgen_workflow_gateway.py b/examples/xgen_workflow_gateway.py index 1f0ac71..37a356a 100644 --- a/examples/xgen_workflow_gateway.py +++ b/examples/xgen_workflow_gateway.py @@ -24,13 +24,13 @@ import asyncio import json - # ===================================================================== # 방법 1: MCP 서버에서 tool 수집 → gateway # ===================================================================== # 실제 MCP 서버(Slack, GitHub, Jira 등)에서 tool을 가져와서 # gateway 2개로 축약하는 패턴. xgen-workflow 실 적용 코드와 동일. + async def example_mcp_gateway(): """MCP 서버에서 tool 수집 후 gateway agent 구성.""" from langchain_openai import ChatOpenAI @@ -66,8 +66,9 @@ async def example_mcp_gateway(): # MCP tool → LangChain StructuredTool 변환 for t in response.tools: from langchain_core.tools import StructuredTool + tool = StructuredTool.from_function( - func=lambda **kwargs, _s=session, _n=t.name: asyncio.run( + func=lambda _s=session, _n=t.name, **kwargs: asyncio.run( _s.call_tool(_n, kwargs) ), name=t.name, @@ -97,6 +98,7 @@ async def example_mcp_gateway(): # Swagger/OpenAPI에서 tool을 자동 생성하고 gateway에 넣는 패턴. # 사내 API가 OpenAPI spec으로 문서화되어 있을 때 유용. + def example_openapi_gateway(): """OpenAPI spec에서 tool 생성 후 gateway agent 구성.""" from langchain_openai import ChatOpenAI @@ -104,7 +106,6 @@ def example_openapi_gateway(): from graph_tool_call import ToolGraph from graph_tool_call.langchain import create_gateway_tools - from graph_tool_call.langchain.tools import tool_schema_to_openai_function # ── 1. OpenAPI spec에서 ToolGraph 구축 ─────────────────────── # URL 또는 파일 경로 모두 가능 @@ -127,12 +128,15 @@ def example_openapi_gateway(): def make_fn(tool_schema=schema): def fn(**kwargs): req = build_request(tool_schema, kwargs) - return json.dumps({ - "method": req.method, - "url": req.url, - "body": req.body, - "note": "실제 환경에서는 requests.request()로 호출", - }) + return json.dumps( + { + "method": req.method, + "url": req.url, + "body": req.body, + "note": "실제 환경에서는 requests.request()로 호출", + } + ) + return fn tool = StructuredTool.from_function( @@ -163,6 +167,7 @@ def fn(**kwargs): # 이미 LangChain tool이 있는 프로젝트에서 gateway 또는 filter를 # 한 줄로 적용하는 패턴. 기존 코드 변경 최소화. + def example_langchain_integration(): """기존 LangChain tool에 gateway/filter 적용.""" from langchain_community.tools import DuckDuckGoSearchRun @@ -214,8 +219,14 @@ def send_slack_message(channel: str, message: str) -> str: return json.dumps({"ok": True, "channel": channel}) all_tools = [ - search, get_weather, send_email, create_calendar_event, - list_files, read_file, query_database, create_jira_issue, + search, + get_weather, + send_email, + create_calendar_event, + list_files, + read_file, + query_database, + create_jira_issue, send_slack_message, ] print(f" 기존 tool {len(all_tools)}개") diff --git a/graph_tool_call/analyze/dependency.py b/graph_tool_call/analyze/dependency.py index 552ebbe..e21481d 100644 --- a/graph_tool_call/analyze/dependency.py +++ b/graph_tool_call/analyze/dependency.py @@ -134,16 +134,13 @@ def _group_by_resource(tools: list[ToolSchema]) -> dict[str, list[ToolSchema]]: The base resource is the first *meaningful* non-param path segment. A segment is considered a non-meaningful prefix when it groups more than - ``_PREFIX_THRESHOLD`` percent of all tools — this handles version prefixes + ``prefix_threshold`` percent of all tools — this handles version prefixes (``/v1``, ``/v2``), routing prefixes (``/api``, ``/rest``), etc. without requiring a hardcoded list. """ - _PREFIX_THRESHOLD = 0.4 # if a segment covers >40% of tools, it's a prefix + prefix_threshold = 0.4 # if a segment covers >40% of tools, it's a prefix - api_tools = [ - t for t in tools - if t.metadata.get("path") and t.metadata.get("method") - ] + api_tools = [t for t in tools if t.metadata.get("path") and t.metadata.get("method")] if not api_tools: return {} @@ -171,7 +168,7 @@ def _group_by_resource(tools: list[ToolSchema]) -> dict[str, list[ToolSchema]]: if not counter: break most_common_count = max(counter.values()) - if most_common_count / total > _PREFIX_THRESHOLD: + if most_common_count / total > prefix_threshold: skip_depth = depth + 1 else: break @@ -296,12 +293,8 @@ def _detect_crud_patterns(group: list[ToolSchema]) -> list[DetectedRelation]: posts = by_role.get("post_collection", []) gets_single = by_role.get("get_single", []) gets_collection = by_role.get("get_collection", []) - puts = by_role.get("put_single", []) - patches = by_role.get("patch_single", []) deletes = by_role.get("delete_single", []) - updates = puts + patches - # --- Focused CRUD relations --- # Only create relations that represent real data dependencies, # not every possible CRUD combination. @@ -323,7 +316,10 @@ def _detect_crud_patterns(group: list[ToolSchema]) -> list[DetectedRelation]: target=post.name, relation_type=RelationType.REQUIRES, confidence=0.9, - evidence=f"{get_s.name} (GET single) requires {post.name} (POST) — same resource '{post_resource}'", + evidence=( + f"{get_s.name} (GET single) requires {post.name} (POST) — " + f"same resource '{post_resource}'" + ), layer=1, ) ) @@ -524,7 +520,8 @@ def _detect_name_based(tools: list[ToolSchema]) -> list[DetectedRelation]: # Require strong evidence: 2+ shared tokens, or the token # appears in a parameter ending with "id" (e.g., "orderId") has_id_param = any( - tok in p.name.lower() for p in tool_b.parameters + tok in p.name.lower() + for p in tool_b.parameters for tok in shared if "id" in p.name.lower() ) @@ -575,7 +572,8 @@ def _detect_cross_resource(tools: list[ToolSchema]) -> list[DetectedRelation]: name_tokens = _normalize_name(tool.name) # Remove verb prefix resource_tokens = [ - t for t in name_tokens + t + for t in name_tokens if t not in ("get", "list", "create", "add", "post", "read", "find") ] for tok in resource_tokens: @@ -617,16 +615,11 @@ def _detect_cross_resource(tools: list[ToolSchema]) -> list[DetectedRelation]: # Prefer GET single over GET list/POST provider_method = (provider.metadata.get("method") or "").upper() provider_path = provider.metadata.get("path", "") - is_get_single = ( - provider_method == "GET" - and _is_single_resource_path(provider_path) - ) + is_get_single = provider_method == "GET" and _is_single_resource_path(provider_path) # Only create cross-resource link if provider is from # a DIFFERENT resource category than the consumer - consumer_resource = _extract_resource( - tool.metadata.get("path", "") - ) + consumer_resource = _extract_resource(tool.metadata.get("path", "")) provider_resource = _extract_resource(provider_path) if consumer_resource == provider_resource: continue # same resource — handled by structural detection @@ -661,44 +654,92 @@ def _detect_cross_resource(tools: list[ToolSchema]) -> list[DetectedRelation]: # Maps leading verb in an RPC method name to a CRUD intent category. _VERB_TO_INTENT: dict[str, str] = { # read - "get": "read", "find": "read", "fetch": "read", "list": "read", - "search": "read", "select": "read", "load": "read", "read": "read", + "get": "read", + "find": "read", + "fetch": "read", + "list": "read", + "search": "read", + "select": "read", + "load": "read", + "read": "read", "download": "read", # write (create) - "save": "write", "create": "write", "add": "write", "insert": "write", - "register": "write", "regist": "write", + "save": "write", + "create": "write", + "add": "write", + "insert": "write", + "register": "write", + "regist": "write", # update - "modify": "update", "update": "update", "edit": "update", - "change": "update", "patch": "update", + "modify": "update", + "update": "update", + "edit": "update", + "change": "update", + "patch": "update", # delete - "delete": "delete", "remove": "delete", "cancel": "delete", + "delete": "delete", + "remove": "delete", + "cancel": "delete", "withdraw": "delete", # action (side-effect operations) - "process": "action", "execute": "action", "apply": "action", - "approve": "action", "reject": "action", "confirm": "action", - "accept": "action", "send": "action", "upload": "action", + "process": "action", + "execute": "action", + "apply": "action", + "approve": "action", + "reject": "action", + "confirm": "action", + "accept": "action", + "send": "action", + "upload": "action", "export": "action", } # Trailing tokens in method names that describe the *view*, not the resource. -_NAME_SUFFIXES: frozenset[str] = frozenset({ - "list", "detail", "details", "info", "count", "excel", "popup", - "summary", "check", "data", "total", "all", "page", "download", -}) +_NAME_SUFFIXES: frozenset[str] = frozenset( + { + "list", + "detail", + "details", + "info", + "count", + "excel", + "popup", + "summary", + "check", + "data", + "total", + "all", + "page", + "download", + } +) # Common DTO class-name suffixes that are not part of the resource identity. -_DTO_SUFFIXES: frozenset[str] = frozenset({ - "request", "response", "dto", "entity", "info", "base", - "api", "vo", "model", "form", "param", "result", "ml", -}) +_DTO_SUFFIXES: frozenset[str] = frozenset( + { + "request", + "response", + "dto", + "entity", + "info", + "base", + "api", + "vo", + "model", + "form", + "param", + "result", + "ml", + } +) # CRUD workflow rules: (source_intent, target_intent, relation, same_ctrl_conf, cross_ctrl_conf) # ``None`` for cross_ctrl_conf means the rule is skipped across controllers. _WORKFLOW_RULES: list[tuple[str, str, RelationType, float, float | None]] = [ - ("read", "write", RelationType.REQUIRES, 0.9, 0.8), - ("update", "read", RelationType.REQUIRES, 0.85, 0.75), - ("delete", "read", RelationType.REQUIRES, 0.85, 0.75), - ("action", "read", RelationType.REQUIRES, 0.75, None), + ("read", "write", RelationType.REQUIRES, 0.9, 0.8), + ("update", "read", RelationType.REQUIRES, 0.85, 0.75), + ("delete", "read", RelationType.REQUIRES, 0.85, 0.75), + ("action", "read", RelationType.REQUIRES, 0.75, None), ] @@ -791,31 +832,35 @@ def _detect_rpc_crud_workflows(tools: list[ToolSchema]) -> list[DetectedRelation same = _same_controller(src, tgt) if not same and cross_conf is None: continue - relations.append(DetectedRelation( - source=src.name, - target=tgt.name, - relation_type=rel_type, - confidence=same_conf if same else cross_conf, # type: ignore[arg-type] - evidence=( - f"{src.name} ({src_intent}) → {tgt.name} ({tgt_intent})" - f" — resource '{resource}'" - ), - layer=4, - )) + relations.append( + DetectedRelation( + source=src.name, + target=tgt.name, + relation_type=rel_type, + confidence=same_conf if same else cross_conf, # type: ignore[arg-type] + evidence=( + f"{src.name} ({src_intent}) → {tgt.name} ({tgt_intent})" + f" — resource '{resource}'" + ), + layer=4, + ) + ) # Readers within same controller are SIMILAR_TO. readers = by_intent.get("read", []) for i, r1 in enumerate(readers): - for r2 in readers[i + 1:]: + for r2 in readers[i + 1 :]: if r1.name != r2.name and _same_controller(r1, r2): - relations.append(DetectedRelation( - source=r1.name, - target=r2.name, - relation_type=RelationType.SIMILAR_TO, - confidence=0.8, - evidence=f"{r1.name} ↔ {r2.name} — similar reads for '{resource}'", - layer=4, - )) + relations.append( + DetectedRelation( + source=r1.name, + target=r2.name, + relation_type=RelationType.SIMILAR_TO, + confidence=0.8, + evidence=f"{r1.name} ↔ {r2.name} — similar reads for '{resource}'", + layer=4, + ) + ) return relations @@ -836,16 +881,18 @@ def _detect_rpc_dto_links(tools: list[ToolSchema]) -> list[DetectedRelation]: if not 2 <= len(members) <= 20: continue for i, a in enumerate(members): - for b in members[i + 1:]: + for b in members[i + 1 :]: if a.name != b.name and not _same_controller(a, b): - relations.append(DetectedRelation( - source=a.name, - target=b.name, - relation_type=RelationType.COMPLEMENTARY, - confidence=0.75, - evidence=f"{a.name} ↔ {b.name} — shared DTO '{dto_res}'", - layer=4, - )) + relations.append( + DetectedRelation( + source=a.name, + target=b.name, + relation_type=RelationType.COMPLEMENTARY, + confidence=0.75, + evidence=f"{a.name} ↔ {b.name} — shared DTO '{dto_res}'", + layer=4, + ) + ) return relations diff --git a/graph_tool_call/graphify/__init__.py b/graph_tool_call/graphify/__init__.py index 6785ee3..98bbbce 100644 --- a/graph_tool_call/graphify/__init__.py +++ b/graph_tool_call/graphify/__init__.py @@ -30,6 +30,7 @@ "DEFAULT_CONF_AMBIGUOUS", "DEFAULT_CONF_EXTRACTED", "DEFAULT_CONF_INFERRED", + "_apply_pair_hints", "bucket_confidence", "ingest_openapi_graphify", "preserve_refs_for_detection", diff --git a/graph_tool_call/graphify/ingest.py b/graph_tool_call/graphify/ingest.py index 48bc8d5..afa23f3 100644 --- a/graph_tool_call/graphify/ingest.py +++ b/graph_tool_call/graphify/ingest.py @@ -124,7 +124,7 @@ def preserve_refs_for_detection( Returns the number of tools whose metadata was updated. Mutates ``tools`` in place. """ - paths = (raw_spec.get("paths") or {}) + paths = raw_spec.get("paths") or {} if not isinstance(paths, dict): return 0 @@ -218,8 +218,13 @@ def _apply_pair_hints( from ``detect_dependencies`` UNLESS the new pair is operator-curated (``source="manual"``) — operator intent overrides automatic detection. """ - stats = {"manual": 0, "auto": 0, "skipped_target_missing": 0, - "skipped_self": 0, "skipped_existing_structural": 0} + stats = { + "manual": 0, + "auto": 0, + "skipped_target_missing": 0, + "skipped_self": 0, + "skipped_existing_structural": 0, + } tool_names = set(tg.tools.keys()) for s in schemas: @@ -350,9 +355,7 @@ def ingest_openapi_graphify( stats["refs_preserved"] = preserve_refs_for_detection(schemas, raw_spec) # min_confidence=0.0 so we see every candidate; we re-bucket here. - relations: list[DetectedRelation] = detect_dependencies( - schemas, spec, min_confidence=0.0 - ) + relations: list[DetectedRelation] = detect_dependencies(schemas, spec, min_confidence=0.0) seen: set[tuple[str, str, str]] = set() # (src, tgt, relation_value) for rel in relations: @@ -419,7 +422,7 @@ def ingest_openapi_graphify( # cross_source also re-counted on these new edges for completeness. for s in schemas: ai = (s.metadata or {}).get("ai_metadata") or {} - for p in (ai.get("pairs_well_with") or []): + for p in ai.get("pairs_well_with") or []: if not isinstance(p, dict): continue tgt = str(p.get("tool") or "").strip() diff --git a/graph_tool_call/graphify/retrieval.py b/graph_tool_call/graphify/retrieval.py index 55e659b..f15e4bc 100644 --- a/graph_tool_call/graphify/retrieval.py +++ b/graph_tool_call/graphify/retrieval.py @@ -81,10 +81,7 @@ def _substring_seeds( for name, tool in tools.items(): nname = _strip_diacritics(name).lower() ndesc = _strip_diacritics(tool.description or "").lower() - score = ( - sum(1.0 for t in terms if t in nname) - + 0.5 * sum(1.0 for t in terms if t in ndesc) - ) + score = sum(1.0 for t in terms if t in nname) + 0.5 * sum(1.0 for t in terms if t in ndesc) if score > 0: scored.append((name, score)) scored.sort(key=lambda x: x[1], reverse=True) @@ -182,11 +179,7 @@ def _bfs_from_seeds( return {}, [] max_seed = max((s for _, s in seed_scores), default=1.0) or 1.0 - scores: dict[str, float] = { - n: s / max_seed - for n, s in seed_scores - if graph.has_node(n) - } + scores: dict[str, float] = {n: s / max_seed for n, s in seed_scores if graph.has_node(n)} visited: set[str] = set(scores) frontier: list[str] = list(scores) edges_visited: list[tuple[str, str]] = [] @@ -301,9 +294,7 @@ def render_subgraph_text( # Order nodes: by retrieval score (desc) if known, else by name. if sort_by_score: - node_order = sorted( - node_set, key=lambda n: (-sort_by_score.get(n, 0.0), n) - ) + node_order = sorted(node_set, key=lambda n: (-sort_by_score.get(n, 0.0), n)) else: node_order = sorted(node_set) @@ -437,9 +428,7 @@ def retrieve_graphify( scores[h] *= _HISTORY_DEMOTE # 5) Filter to TOOL nodes only and rank - tool_scores: dict[str, float] = { - n: s for n, s in scores.items() if n in tg.tools - } + tool_scores: dict[str, float] = {n: s for n, s in scores.items() if n in tg.tools} ranked = sorted(tool_scores.items(), key=lambda x: x[1], reverse=True)[:top_k] chosen_names: set[str] = {n for n, _ in ranked} diff --git a/graph_tool_call/ingest/io_contract.py b/graph_tool_call/ingest/io_contract.py index 1768a47..7748bb5 100644 --- a/graph_tool_call/ingest/io_contract.py +++ b/graph_tool_call/ingest/io_contract.py @@ -245,9 +245,7 @@ def extract_consumes_for_operation( seen_names: set[str] = set() # query / path / header parameters - all_params = (operation.get("parameters") or []) + ( - (path_item or {}).get("parameters") or [] - ) + all_params = (operation.get("parameters") or []) + ((path_item or {}).get("parameters") or []) for p in all_params: if not isinstance(p, dict) or "name" not in p: continue diff --git a/graph_tool_call/ingest/openapi.py b/graph_tool_call/ingest/openapi.py index f914fd4..8f93dea 100644 --- a/graph_tool_call/ingest/openapi.py +++ b/graph_tool_call/ingest/openapi.py @@ -384,7 +384,7 @@ def _extract_params_openapi3( is_required = prop_name in body_required if required_only and not is_required: continue - desc = (prop_schema.get("description") or "") + desc = prop_schema.get("description") or "" # nested object/array는 한 단계 더 펼치기 if _schema_type(prop_schema) in ("object", "array"): nested = _summarize_object_schema(prop_schema) diff --git a/graph_tool_call/langchain/agent.py b/graph_tool_call/langchain/agent.py index 9215e6c..533183e 100644 --- a/graph_tool_call/langchain/agent.py +++ b/graph_tool_call/langchain/agent.py @@ -118,9 +118,7 @@ def _generate_query_with_llm( # Include a sample of tool names to help the LLM understand the domain sample_tools = ", ".join(tool_names[:20]) user_prompt = ( - f"Available tools include: {sample_tools}\n\n" - f"Conversation:\n{conversation}\n\n" - f"Search query:" + f"Available tools include: {sample_tools}\n\nConversation:\n{conversation}\n\nSearch query:" ) try: @@ -129,10 +127,12 @@ def _generate_query_with_llm( if hasattr(model, "bound_tools"): # If model has tools bound, get the underlying model base_model = model - response = base_model.invoke([ - SystemMessage(content=_QUERY_GEN_SYSTEM), - HumanMessage(content=user_prompt), - ]) + response = base_model.invoke( + [ + SystemMessage(content=_QUERY_GEN_SYSTEM), + HumanMessage(content=user_prompt), + ] + ) query = response.content.strip().strip('"').strip("'") if query: logger.debug("LLM-generated query: %s", query) @@ -187,8 +187,7 @@ def create_agent( from langgraph.prebuilt import create_react_agent except ImportError: raise ImportError( - "langgraph is required for create_agent(). " - "Install with: pip install langgraph" + "langgraph is required for create_agent(). Install with: pip install langgraph" ) from graph_tool_call import ToolGraph diff --git a/graph_tool_call/langchain/gateway.py b/graph_tool_call/langchain/gateway.py index 1ad9e97..a570589 100644 --- a/graph_tool_call/langchain/gateway.py +++ b/graph_tool_call/langchain/gateway.py @@ -99,9 +99,7 @@ def _summarize_response_schema(schema: dict[str, Any]) -> str | None: return f"array of {summary}" if is_array else summary -def _enrich_from_graph( - name: str, graph: Any | None -) -> dict[str, Any]: +def _enrich_from_graph(name: str, graph: Any | None) -> dict[str, Any]: """Pull source_label, method/path, response summary, and outgoing edges from the underlying ToolGraph for *name*. Returns an empty dict if the graph or tool is not available — callers should treat all keys as optional. @@ -136,9 +134,7 @@ def _enrich_from_graph( chains: list[str] = [] for _src, target, attrs in edges: relation = attrs.get("relation") - relation_name = ( - relation.value if hasattr(relation, "value") else str(relation) - ) + relation_name = relation.value if hasattr(relation, "value") else str(relation) # Skip purely structural BELONGS_TO edges if relation_name in ("belongs_to", "BELONGS_TO"): continue diff --git a/graph_tool_call/mcp_proxy.py b/graph_tool_call/mcp_proxy.py index 7cb9c71..7f2669c 100644 --- a/graph_tool_call/mcp_proxy.py +++ b/graph_tool_call/mcp_proxy.py @@ -786,9 +786,10 @@ async def app_lifespan(app: Any) -> Any: # type: ignore[override] yield task.cancel_scope.cancel() - import anyio from contextlib import asynccontextmanager + import anyio + @asynccontextmanager async def lifespan(app: Any) -> Any: # type: ignore[override] async with transport.connect() as (read_stream, write_stream): diff --git a/graph_tool_call/net.py b/graph_tool_call/net.py index ba46e26..466ae30 100644 --- a/graph_tool_call/net.py +++ b/graph_tool_call/net.py @@ -55,6 +55,7 @@ def _open_url( handlers: list[Any] = [_LimitedRedirectHandler(max_redirects)] if not verify_ssl: import ssl + ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE @@ -157,7 +158,12 @@ def fetch_url_text( req = urllib.request.Request(url, headers=headers or {}) try: - with _open_url(req, timeout=timeout, max_redirects=max_redirects, verify_ssl=verify_ssl) as resp: + with _open_url( + req, + timeout=timeout, + max_redirects=max_redirects, + verify_ssl=verify_ssl, + ) as resp: final_url = url if hasattr(resp, "geturl"): candidate = resp.geturl() diff --git a/graph_tool_call/ontology/llm_provider.py b/graph_tool_call/ontology/llm_provider.py index 6ee8b4e..eb29850 100644 --- a/graph_tool_call/ontology/llm_provider.py +++ b/graph_tool_call/ontology/llm_provider.py @@ -99,8 +99,9 @@ class ToolEnrichment: - Graph edges (``pairs_well_with`` becomes semantic edges) """ - canonical_action: str # search | read | create | update | delete | action - primary_resource: str # e.g. "product" + # canonical_action: search | read | create | update | delete | action + canonical_action: str + primary_resource: str # e.g. "product" one_line_summary: str when_to_use: str when_not_to_use: str = "" @@ -119,8 +120,10 @@ class ToolEnrichment: Example: Tools: createUser, getUserProfile, deleteUser Answer: [ - {{"source":"getUserProfile","target":"createUser","relation":"REQUIRES","confidence":0.9,"reason":"need user to exist"}}, - {{"source":"createUser","target":"deleteUser","relation":"PRECEDES","confidence":0.8,"reason":"create before delete"}} + {{"source":"getUserProfile","target":"createUser","relation":"REQUIRES","confidence":0.9, + "reason":"need user to exist"}}, + {{"source":"createUser","target":"deleteUser","relation":"PRECEDES","confidence":0.8, + "reason":"create before delete"}} ] Relation types: @@ -365,7 +368,7 @@ def _parse_enrichment(data: Any) -> ToolEnrichment | None: if isinstance(p, dict) and str(p.get("semantic", "")).strip() ] consumes = [] - for c in (data.get("consumes_semantics") or []): + for c in data.get("consumes_semantics") or []: if not (isinstance(c, dict) and str(c.get("semantic", "")).strip()): continue raw_kind = str(c.get("kind", "data")).strip().lower() @@ -430,6 +433,7 @@ def _extract_json(text: str) -> Any: # Remove ... blocks (qwen3 thinking mode) import re as _re + text = _re.sub(r"[\s\S]*?", "", text).strip() # Remove markdown code blocks @@ -564,8 +568,7 @@ def verify_relations( for i in range(0, len(relations), batch_size): batch = relations[i : i + batch_size] rels_text = "\n".join( - f"- {r.source} {r.relation_type.name} {r.target} ({r.reason[:60]})" - for r in batch + f"- {r.source} {r.relation_type.name} {r.target} ({r.reason[:60]})" for r in batch ) prompt = _VERIFY_RELATIONS_PROMPT.format( relations_list=rels_text, @@ -610,8 +613,7 @@ def suggest_missing( """ tools_text = _format_tools_list(tools[:30]) existing_text = "\n".join( - f"- {r.source} {r.relation_type.name} {r.target}" - for r in existing_relations[:30] + f"- {r.source} {r.relation_type.name} {r.target}" for r in existing_relations[:30] ) prompt = _SUGGEST_MISSING_PROMPT.format( tools_list=tools_text, @@ -749,11 +751,13 @@ def enrich_pairs( target = str(p.get("tool", "")).strip() if not target or target == name: continue - pair_list.append(PairHint( - tool=target, - reason=str(p.get("reason", "")).strip(), - source="auto", - )) + pair_list.append( + PairHint( + tool=target, + reason=str(p.get("reason", "")).strip(), + source="auto", + ) + ) results[str(name)] = pair_list except (json.JSONDecodeError, KeyError, TypeError): continue @@ -838,7 +842,8 @@ def enrich_tool_semantics( # whose target name is not in the catalog. if valid_tool_names is not None: enrichment.pairs_well_with = [ - p for p in enrichment.pairs_well_with + p + for p in enrichment.pairs_well_with if p.tool in valid_tool_names and p.tool != str(name) ] results[str(name)] = enrichment diff --git a/graph_tool_call/plan/__init__.py b/graph_tool_call/plan/__init__.py index 8f2d9eb..dbab1f3 100644 --- a/graph_tool_call/plan/__init__.py +++ b/graph_tool_call/plan/__init__.py @@ -24,39 +24,39 @@ def call_tool(tool_name, args): BindingError, resolve_bindings, ) +from graph_tool_call.plan.intent import ( + IntentParseError, + ParsedIntent, + ToolCatalogEntry, + parse_intent, +) +from graph_tool_call.plan.response import ( + synthesize_failure_response, + synthesize_success_response, +) from graph_tool_call.plan.runner import ( - PlanRunner, + PlanAborted, + PlanCompleted, PlanEvent, + PlanRunner, PlanStarted, - StepStarted, StepCompleted, StepFailed, - PlanCompleted, - PlanAborted, + StepStarted, ) from graph_tool_call.plan.schema import ( + ExecutionTrace, Plan, PlanStep, - ExecutionTrace, StepTrace, ) -from graph_tool_call.plan.intent import ( - IntentParseError, - ParsedIntent, - ToolCatalogEntry, - parse_intent, -) -from graph_tool_call.plan.response import ( - synthesize_success_response, - synthesize_failure_response, -) from graph_tool_call.plan.synthesizer import ( + CyclicDependencyError, + DynamicOptionRequired, + MaxDepthExceededError, PathSynthesizer, PlanSynthesisError, UnsatisfiableFieldError, - CyclicDependencyError, - MaxDepthExceededError, - DynamicOptionRequired, ) __all__ = [ diff --git a/graph_tool_call/plan/binding.py b/graph_tool_call/plan/binding.py index 58d9eef..2ae6a50 100644 --- a/graph_tool_call/plan/binding.py +++ b/graph_tool_call/plan/binding.py @@ -102,9 +102,7 @@ def _lookup(expr: str, context: dict[str, Any]) -> Any: try: idx = int(tok[1:-1]) except ValueError as exc: - raise BindingError( - f"non-numeric array index {tok!r} in binding {expr!r}" - ) from exc + raise BindingError(f"non-numeric array index {tok!r} in binding {expr!r}") from exc if not isinstance(node, (list, tuple)): raise BindingError( f"indexing {tok} on non-list type {type(node).__name__} (expr={expr!r})" @@ -112,9 +110,7 @@ def _lookup(expr: str, context: dict[str, Any]) -> Any: try: node = node[idx] except IndexError as exc: - raise BindingError( - f"index {idx} out of range in binding {expr!r}" - ) from exc + raise BindingError(f"index {idx} out of range in binding {expr!r}") from exc else: if not isinstance(node, dict): raise BindingError( @@ -152,7 +148,7 @@ def _tokenize(expr: str) -> list[str]: end = expr.find("]", i) if end == -1: raise BindingError(f"unclosed '[' in binding {expr!r}") - tokens.append(expr[i:end + 1]) + tokens.append(expr[i : end + 1]) i = end else: buf.append(ch) diff --git a/graph_tool_call/plan/intent.py b/graph_tool_call/plan/intent.py index 74dd8b8..c62d396 100644 --- a/graph_tool_call/plan/intent.py +++ b/graph_tool_call/plan/intent.py @@ -22,7 +22,6 @@ from graph_tool_call.ontology.llm_provider import OntologyLLM, _extract_json - # Minimum SequenceMatcher ratio for treating an LLM-emitted entity key as # a typo/expansion of a real vocab entry. 0.8 catches "search_keyword_name" # vs "search_keyword" (~0.85) while rejecting unrelated pairs like @@ -40,21 +39,21 @@ class ToolCatalogEntry: """Condensed tool view for intent-parsing prompt — under ~150 chars each.""" name: str - summary: str = "" # one_line_summary from ai_metadata - when_to_use: str = "" # ai_metadata.when_to_use - consumes_tags: list[str] = field(default_factory=list) # required semantic ids - canonical_action: str = "" # "read" | "search" | "create" | ... - primary_resource: str = "" # "product" | ... + summary: str = "" # one_line_summary from ai_metadata + when_to_use: str = "" # ai_metadata.when_to_use + consumes_tags: list[str] = field(default_factory=list) # required semantic ids + canonical_action: str = "" # "read" | "search" | "create" | ... + primary_resource: str = "" # "product" | ... @dataclass class ParsedIntent: """Stage 1 output — consumed by Stage 2 PathSynthesizer.""" - target: str # tool name picked by LLM + target: str # tool name picked by LLM entities: dict[str, Any] = field(default_factory=dict) - confidence: float = 0.0 # 0.0 ~ 1.0 - output_shape: str = "single" # "single" | "list" | "count" + confidence: float = 0.0 # 0.0 ~ 1.0 + output_shape: str = "single" # "single" | "list" | "count" reasoning: str = "" @@ -146,7 +145,10 @@ def _coerce_entity_keys( out[key_str] = value continue match = difflib.get_close_matches( - key_str, vocab, n=1, cutoff=_VOCAB_FUZZY_CUTOFF, + key_str, + vocab, + n=1, + cutoff=_VOCAB_FUZZY_CUTOFF, ) if match: # If multiple LLM keys collapse onto the same vocab entry, the @@ -169,8 +171,7 @@ def _format_seed_block(seed_entities: dict[str, Any] | None) -> str: if not seed_entities: return "" lines = "\n".join( - f' - {k}: {json.dumps(v, ensure_ascii=False)}' - for k, v in seed_entities.items() + f" - {k}: {json.dumps(v, ensure_ascii=False)}" for k, v in seed_entities.items() ) return ( "\n\nExisting entities (carried over from prior turns — keep these " @@ -193,10 +194,10 @@ def _format_enum_block(enum_mappings: dict[str, dict[str, str]] | None) -> str: if not enum_mappings: return "" lines: list[str] = [] - for field, codes in enum_mappings.items(): + for field_name, codes in enum_mappings.items(): if not isinstance(codes, dict) or not codes: continue - lines.append(f" - {field}:") + lines.append(f" - {field_name}:") for code, label in codes.items(): lines.append(f' "{code}" → {label}') if not lines: diff --git a/graph_tool_call/plan/response.py b/graph_tool_call/plan/response.py index 782ca1f..714b5d4 100644 --- a/graph_tool_call/plan/response.py +++ b/graph_tool_call/plan/response.py @@ -18,7 +18,6 @@ from graph_tool_call.ontology.llm_provider import OntologyLLM - # --------------------------------------------------------------------------- # prompts # --------------------------------------------------------------------------- diff --git a/graph_tool_call/plan/runner.py b/graph_tool_call/plan/runner.py index 141d500..3b70f77 100644 --- a/graph_tool_call/plan/runner.py +++ b/graph_tool_call/plan/runner.py @@ -15,19 +15,18 @@ from __future__ import annotations import time +from collections.abc import Callable, Iterator from dataclasses import dataclass, field from datetime import datetime, timezone -from typing import Any, Callable, Iterator +from typing import Any from graph_tool_call.plan.binding import BindingError, resolve_bindings from graph_tool_call.plan.schema import ( ExecutionTrace, Plan, - PlanStep, StepTrace, ) - # --------------------------------------------------------------------------- # Event types — structured so callers can pattern-match by ``type`` field # --------------------------------------------------------------------------- @@ -57,7 +56,7 @@ class StepCompleted: step_id: str = "" tool: str = "" duration_ms: int = 0 - output_preview: Any = None # truncated output for UI + output_preview: Any = None # truncated output for UI output_size: int = 0 @@ -87,14 +86,7 @@ class PlanAborted: total_duration_ms: int = 0 -PlanEvent = ( - PlanStarted - | StepStarted - | StepCompleted - | StepFailed - | PlanCompleted - | PlanAborted -) +PlanEvent = PlanStarted | StepStarted | StepCompleted | StepFailed | PlanCompleted | PlanAborted # --------------------------------------------------------------------------- @@ -126,7 +118,7 @@ def __init__( call_tool: ToolCaller, *, output_preview_limit: int = 512, - on_error: str = "abort", # 'abort' only in v1 + on_error: str = "abort", # 'abort' only in v1 ) -> None: self._call_tool = call_tool self._preview_limit = output_preview_limit @@ -148,7 +140,6 @@ def run_stream( ``input_context`` supplies values for ``${input.xxx}`` bindings — typically the entities extracted by Stage 1 (intent parser). """ - started = _now_iso() plan_start = time.monotonic() yield PlanStarted( @@ -180,11 +171,14 @@ def run_stream( step_trace.duration_ms = _ms_since(step_start) trace_steps.append(step_trace) yield StepFailed( - step_id=step.id, tool=step.tool, - error=err, duration_ms=step_trace.duration_ms, + step_id=step.id, + tool=step.tool, + error=err, + duration_ms=step_trace.duration_ms, ) yield PlanAborted( - plan_id=plan.id, failed_step=step.id, + plan_id=plan.id, + failed_step=step.id, error=err, total_duration_ms=_ms_since(plan_start), ) @@ -192,15 +186,17 @@ def run_stream( step_trace.args_resolved = resolved yield StepStarted( - step_id=step.id, tool=step.tool, + step_id=step.id, + tool=step.tool, args_resolved=resolved, - index=idx, total=len(plan.steps), + index=idx, + total=len(plan.steps), ) # 2. Execute via caller's tool invoker try: output = self._call_tool(step.tool, resolved) - except Exception as exc: # noqa: BLE001 — caller-defined + except Exception as exc: # noqa: BLE001 — caller-defined err = { "kind": "tool", "message": str(exc), @@ -210,11 +206,14 @@ def run_stream( step_trace.duration_ms = _ms_since(step_start) trace_steps.append(step_trace) yield StepFailed( - step_id=step.id, tool=step.tool, - error=err, duration_ms=step_trace.duration_ms, + step_id=step.id, + tool=step.tool, + error=err, + duration_ms=step_trace.duration_ms, ) yield PlanAborted( - plan_id=plan.id, failed_step=step.id, + plan_id=plan.id, + failed_step=step.id, error=err, total_duration_ms=_ms_since(plan_start), ) @@ -235,7 +234,8 @@ def run_stream( context[step.id] = output yield StepCompleted( - step_id=step.id, tool=step.tool, + step_id=step.id, + tool=step.tool, duration_ms=step_trace.duration_ms, output_preview=_preview(output, self._preview_limit), output_size=_output_size(output), @@ -251,7 +251,8 @@ def run_stream( except BindingError as exc: err = {"kind": "output_binding", "message": str(exc)} yield PlanAborted( - plan_id=plan.id, failed_step="", + plan_id=plan.id, + failed_step="", error=err, total_duration_ms=_ms_since(plan_start), ) @@ -328,6 +329,7 @@ def _preview(value: Any, limit: int) -> Any: """Trim large outputs for UI previews. Keep small values intact.""" if isinstance(value, (dict, list)): import json as _json + try: rendered = _json.dumps(value, ensure_ascii=False) except (TypeError, ValueError): @@ -393,6 +395,7 @@ def _maybe_unwrap_envelope( def _output_size(value: Any) -> int: """Approximate serialized byte size (for observability).""" import json as _json + try: return len(_json.dumps(value, ensure_ascii=False)) except (TypeError, ValueError): diff --git a/graph_tool_call/plan/schema.py b/graph_tool_call/plan/schema.py index b07530f..9fff497 100644 --- a/graph_tool_call/plan/schema.py +++ b/graph_tool_call/plan/schema.py @@ -25,12 +25,12 @@ class PlanStep: at runtime by ``resolve_bindings`` using the accumulated step context. """ - id: str # "s1", "s2", ... - tool: str # function_name (graph node name) + id: str # "s1", "s2", ... + tool: str # function_name (graph node name) args: dict[str, Any] = field(default_factory=dict) - rationale: str = "" # why this step exists (for audit) + rationale: str = "" # why this step exists (for audit) timeout_ms: int | None = None - retryable: bool = False # reserved for v1.1 retry policy + retryable: bool = False # reserved for v1.1 retry policy # Top-level keys the synthesizer expects in this tool's response, # derived from ``produces[].json_path``. Used by PlanRunner to detect # envelope wrappers (e.g. ``{code, message, payload: {...}}``) when the @@ -51,11 +51,11 @@ class Plan: the final answer. If unset, runner returns the last step's result. """ - id: str # uuid - goal: str # user requirement summary + id: str # uuid + goal: str # user requirement summary steps: list[PlanStep] = field(default_factory=list) - output_binding: str | None = None # e.g. "${s2.body}" - created_at: str = "" # ISO8601 + output_binding: str | None = None # e.g. "${s2.body}" + created_at: str = "" # ISO8601 metadata: dict[str, Any] = field(default_factory=dict) @@ -66,8 +66,8 @@ class StepTrace: id: str tool: str args_resolved: dict[str, Any] = field(default_factory=dict) - output: Any = None # set on success - error: dict[str, Any] | None = None # set on failure + output: Any = None # set on success + error: dict[str, Any] | None = None # set on failure duration_ms: int = 0 retries: int = 0 @@ -79,7 +79,7 @@ class ExecutionTrace: plan_id: str success: bool steps: list[StepTrace] = field(default_factory=list) - output: Any = None # plan.output_binding resolved + output: Any = None # plan.output_binding resolved failed_step: str | None = None total_duration_ms: int = 0 started_at: str = "" diff --git a/graph_tool_call/plan/synthesizer.py b/graph_tool_call/plan/synthesizer.py index 35858c4..4942b7e 100644 --- a/graph_tool_call/plan/synthesizer.py +++ b/graph_tool_call/plan/synthesizer.py @@ -58,7 +58,7 @@ class MaxDepthExceededError(PlanSynthesisError): """Recursion depth exceeded — likely a misshapen graph.""" -class DynamicOptionRequired(UnsatisfiableFieldError): +class DynamicOptionRequired(UnsatisfiableFieldError): # noqa: N818 """A required data field has a single-hop producer that can be called immediately with the user's entities + context_defaults. Surface this so the caller can fetch the option list (instead of weaving a chain) @@ -137,7 +137,7 @@ class _PartialStep: tool: str args: dict[str, Any] = field(default_factory=dict) rationale: str = "" - step_id: str = "" # assigned at topological sort + step_id: str = "" # assigned at topological sort class PathSynthesizer: @@ -236,17 +236,16 @@ def synthesize( final_steps: list[PlanStep] = [] for tool_name in ordered_tools: partial = steps_by_tool[tool_name] - args = { - k: self._rewrite_tool_refs(v, steps_by_tool) - for k, v in partial.args.items() - } - final_steps.append(PlanStep( - id=partial.step_id, - tool=partial.tool, - args=args, - rationale=partial.rationale, - response_root_keys=self._response_root_keys(tool_name), - )) + args = {k: self._rewrite_tool_refs(v, steps_by_tool) for k, v in partial.args.items()} + final_steps.append( + PlanStep( + id=partial.step_id, + tool=partial.tool, + args=args, + rationale=partial.rationale, + response_root_keys=self._response_root_keys(tool_name), + ) + ) target_step_id = steps_by_tool[target].step_id @@ -260,11 +259,13 @@ def synthesize( for step in final_steps: for arg_name, arg_val in (step.args or {}).items(): if isinstance(arg_val, str) and arg_val.startswith("${user_input."): - user_input_slots.append({ - "step_id": step.id, - "tool": step.tool, - "field_name": arg_name, - }) + user_input_slots.append( + { + "step_id": step.id, + "tool": step.tool, + "field_name": arg_name, + } + ) return Plan( id=str(uuid.uuid4()), @@ -372,8 +373,10 @@ def _resolve( # cycle when an alternative producer exists; only when none # remains does the caller fall through to user-input slot (F2). producer = self._find_producer( - semantic=semantic, field_name=field_name, - target_tool=tool_name, entities=entities, + semantic=semantic, + field_name=field_name, + target_tool=tool_name, + entities=entities, excluded=visiting, ) if producer is None: @@ -403,12 +406,11 @@ def _resolve( # hit and continue. Without this constraint legitimate # search→detail chains turn into popups. producer_action = self._producer_action(producer) - if ( - producer_action == "read" - and self._is_producer_simple_callable(producer, entities) - ): + if producer_action == "read" and self._is_producer_simple_callable(producer, entities): opt_path = self._produces_path_for( - producer, semantic=semantic, field_name=field_name, + producer, + semantic=semantic, + field_name=field_name, ) if opt_path and "[*]" in opt_path: raise DynamicOptionRequired( @@ -510,9 +512,7 @@ def _build_producer_indexes(self) -> None: # ---- graphify edge indexing & traversal --------------------------------- - _WORKFLOW_RELATIONS: frozenset[str] = frozenset( - {"requires", "precedes", "complementary"} - ) + _WORKFLOW_RELATIONS: frozenset[str] = frozenset({"requires", "precedes", "complementary"}) _CONFIDENCE_RANK: dict[str, int] = { "EXTRACTED": 0, "INFERRED": 1, @@ -537,18 +537,19 @@ def _index_workflow_edges(self, graph: dict[str, Any]) -> None: tgt = e.get("target") or e.get("to") rel = e.get("relation") rel_str = ( - rel.value if hasattr(rel, "value") - else str(rel) if rel is not None else "" + rel.value if hasattr(rel, "value") else str(rel) if rel is not None else "" ).lower() if not src or not tgt or rel_str not in self._WORKFLOW_RELATIONS: continue - self._workflow_edges_out.setdefault(src, []).append({ - "target": tgt, - "relation": rel_str, - "confidence": e.get("confidence"), - "conf_score": float(e.get("conf_score") or 0.0), - "evidence": e.get("evidence") or "", - }) + self._workflow_edges_out.setdefault(src, []).append( + { + "target": tgt, + "relation": rel_str, + "confidence": e.get("confidence"), + "conf_score": float(e.get("conf_score") or 0.0), + "evidence": e.get("evidence") or "", + } + ) # Producer-signal score weights. Higher = stronger signal that this # candidate genuinely produces the value the target needs. Weights chosen @@ -695,7 +696,9 @@ def _score(signals: set[str]) -> int: # Cycle filter: skip candidates currently in the resolution stack so # the synthesiser reroutes around the cycle instead of raising. ranked = self._rank_producers( - sorted_names, target_tool=target_tool, entities=entities, + sorted_names, + target_tool=target_tool, + entities=entities, ) for cand in ranked: if cand in excluded: @@ -845,7 +848,7 @@ def _is_chain_eligible(self, producer_name: str, *, target_tool: str) -> bool: if "_" in t_resource: related.add(t_resource.split("_", 1)[0]) - for c in (t_meta_full.get("consumes") or []): + for c in t_meta_full.get("consumes") or []: if not isinstance(c, dict): continue sem = str(c.get("semantic_tag") or "").strip().lower() @@ -897,7 +900,7 @@ def _score(name: str) -> tuple[int, int, int]: ai = meta.get("ai_metadata") or {} affinity = 0 - for c in (meta.get("consumes") or []): + for c in meta.get("consumes") or []: tag = c.get("semantic_tag") or "" fname = c.get("field_name") or "" if (tag and tag in entity_keys) or (fname and fname in entity_keys): diff --git a/graph_tool_call/retrieval/engine.py b/graph_tool_call/retrieval/engine.py index 73e1b1d..c785912 100644 --- a/graph_tool_call/retrieval/engine.py +++ b/graph_tool_call/retrieval/engine.py @@ -455,7 +455,8 @@ def _inject_graph_candidates( # Only consider graph candidates not already found by primary channels new_candidates = { - name: score for name, score in graph_scores.items() + name: score + for name, score in graph_scores.items() if name not in final_scores and name in self._tools } if not new_candidates: @@ -497,7 +498,6 @@ def _boost_method_intent(self, query_intent: Any, scores: dict[str, float]) -> N elif query_intent.delete_intent > 0.5 and method == "DELETE": scores[name] *= 1.15 - def _boost_embedding_rerank(self, query: str, scores: dict[str, float]) -> None: """Rerank top candidates using embedding description similarity.""" if self._embedding_index is None or self._embedding_index._provider is None: @@ -809,8 +809,12 @@ async def aretrieve( return await loop.run_in_executor( None, lambda: self.retrieve( - query, top_k=top_k, max_graph_depth=max_graph_depth, - mode=mode, llm=llm, history=history, + query, + top_k=top_k, + max_graph_depth=max_graph_depth, + mode=mode, + llm=llm, + history=history, ), ) @@ -830,8 +834,12 @@ async def aretrieve_with_scores( return await loop.run_in_executor( None, lambda: self.retrieve_with_scores( - query, top_k=top_k, max_graph_depth=max_graph_depth, - mode=mode, llm=llm, history=history, + query, + top_k=top_k, + max_graph_depth=max_graph_depth, + mode=mode, + llm=llm, + history=history, ), ) diff --git a/graph_tool_call/retrieval/graph_search.py b/graph_tool_call/retrieval/graph_search.py index cefe1de..1ecc3e9 100644 --- a/graph_tool_call/retrieval/graph_search.py +++ b/graph_tool_call/retrieval/graph_search.py @@ -121,8 +121,25 @@ def resource_first_search( cat_index = self._get_category_index() query_lower = query.lower() query_tokens = set(re.split(r"[\s_\-/.,;:!?()]+", query_lower)) - query_tokens -= {"a", "an", "the", "of", "for", "to", "in", "by", "is", "and", "or", "my", - "all", "this", "that", "with", "from"} + query_tokens -= { + "a", + "an", + "the", + "of", + "for", + "to", + "in", + "by", + "is", + "and", + "or", + "my", + "all", + "this", + "that", + "with", + "from", + } query_tokens.discard("") if not query_tokens: @@ -188,9 +205,7 @@ def resource_first_search( return dict(ranked[:max_results]) @staticmethod - def _compute_intent_boost( - intent: Any | None, tool_node: str, tools: dict | None - ) -> float: + def _compute_intent_boost(intent: Any | None, tool_node: str, tools: dict | None) -> float: """Score boost based on query intent vs tool's HTTP method/name.""" if not intent or intent.is_neutral or not tools: return 1.0 @@ -206,18 +221,35 @@ def _compute_intent_boost( if intent.write_intent > 0.5: if method in ("POST", "PUT", "PATCH"): boost = 1.8 - for verb in ("create", "add", "set", "update", "enable", - "register", "upload", "submit", "request", - "fork", "star", "follow", "lock", "merge", - "close", "open", "transfer", "approve", - "checkout", "cancel", "clear"): + for verb in ( + "create", + "add", + "set", + "update", + "enable", + "register", + "upload", + "submit", + "request", + "fork", + "star", + "follow", + "lock", + "merge", + "close", + "open", + "transfer", + "approve", + "checkout", + "cancel", + "clear", + ): if verb in name_lower: boost = max(boost, 1.5) elif intent.read_intent > 0.5: if method == "GET": boost = 1.5 - for verb in ("get", "list", "check", "download", "search", - "validate", "calculate"): + for verb in ("get", "list", "check", "download", "search", "validate", "calculate"): if verb in name_lower: boost = max(boost, 1.3) elif intent.delete_intent > 0.5: @@ -230,9 +262,7 @@ def _compute_intent_boost( return boost @staticmethod - def _compute_desc_boost( - query_tokens: set[str], tool_node: str, tools: dict | None - ) -> float: + def _compute_desc_boost(query_tokens: set[str], tool_node: str, tools: dict | None) -> float: """Boost tools whose description contains query keywords.""" if not tools: return 1.0 @@ -299,9 +329,7 @@ def _expand_chains( # Decayed score: prerequisites get 60% at depth 1, 36% at depth 2 decay = 0.6 ** (depth + 1) chain_score = base_score * decay - chain_scores[neighbor] = max( - chain_scores.get(neighbor, 0), chain_score - ) + chain_scores[neighbor] = max(chain_scores.get(neighbor, 0), chain_score) queue.append((neighbor, depth + 1)) return chain_scores diff --git a/graph_tool_call/serialization.py b/graph_tool_call/serialization.py index cac1c00..81e56b6 100644 --- a/graph_tool_call/serialization.py +++ b/graph_tool_call/serialization.py @@ -52,7 +52,10 @@ def save_graph( path = Path(path) try: path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(json.dumps(data, indent=2, ensure_ascii=False, default=str), encoding="utf-8") + path.write_text( + json.dumps(data, indent=2, ensure_ascii=False, default=str), + encoding="utf-8", + ) except PermissionError: msg = f"Permission denied: {path}. Check directory permissions." raise PermissionError(msg) from None diff --git a/graph_tool_call/tool_graph.py b/graph_tool_call/tool_graph.py index 00c2353..e415368 100644 --- a/graph_tool_call/tool_graph.py +++ b/graph_tool_call/tool_graph.py @@ -1282,7 +1282,8 @@ def list_sources(self) -> list[str]: def tools_by_source(self, source_label: str) -> list[ToolSchema]: """Return all tools tagged with the given ``source_label``.""" return [ - t for t in self._tools.values() + t + for t in self._tools.values() if t.metadata and t.metadata.get("source_label") == source_label ] @@ -1631,17 +1632,21 @@ def call_tool(tool_name: str, arguments: dict[str, Any] | None = None) -> str: """ schema = graph_ref._tools.get(tool_name) if schema is None: - return json.dumps({ - "error": f"Tool '{tool_name}' not found.", - "hint": "Use search_tools to find the correct tool name.", - }) + return json.dumps( + { + "error": f"Tool '{tool_name}' not found.", + "hint": "Use search_tools to find the correct tool name.", + } + ) callable_ = schema.get_callable() if callable_ is None: - return json.dumps({ - "error": f"Tool '{tool_name}' is not callable.", - "hint": "This tool was registered without a callable implementation.", - }) + return json.dumps( + { + "error": f"Tool '{tool_name}' is not callable.", + "hint": "This tool was registered without a callable implementation.", + } + ) args: dict[str, Any] = {} if arguments is not None: @@ -1665,10 +1670,12 @@ def call_tool(tool_name: str, arguments: dict[str, Any] | None = None) -> str: return result return json.dumps(result, ensure_ascii=False, default=str) except Exception as e: - return json.dumps({ - "error": str(e), - "tool_name": tool_name, - }) + return json.dumps( + { + "error": str(e), + "tool_name": tool_name, + } + ) return [search_tools, call_tool] diff --git a/graph_tool_call/workflow.py b/graph_tool_call/workflow.py index 1e64ca2..c440a65 100644 --- a/graph_tool_call/workflow.py +++ b/graph_tool_call/workflow.py @@ -25,7 +25,7 @@ import json import re from collections import defaultdict, deque -from dataclasses import asdict, dataclass, field +from dataclasses import dataclass, field from pathlib import Path from typing import Any @@ -118,9 +118,7 @@ def reorder(self, tool_names: list[str]) -> WorkflowPlan: self.confidence = "manual" return self - def set_param_mapping( - self, tool_name: str, param: str, source: str - ) -> WorkflowPlan: + def set_param_mapping(self, tool_name: str, param: str, source: str) -> WorkflowPlan: """Set a parameter mapping for a step. Example:: @@ -162,7 +160,6 @@ def open_editor(self, tools: dict[str, ToolSchema] | None = None) -> None: plan.open_editor(tools=tg.tools) """ import tempfile - import urllib.parse import webbrowser # Build editor data: workflow + tool catalog @@ -208,9 +205,7 @@ def open_editor(self, tools: dict[str, ToolSchema] | None = None) -> None: webbrowser.open(f"file://{f.name}") @classmethod - def load( - cls, path: str | Path, *, tools: dict[str, ToolSchema] - ) -> WorkflowPlan: + def load(cls, path: str | Path, *, tools: dict[str, ToolSchema]) -> WorkflowPlan: """Load workflow from JSON file.""" data = json.loads(Path(path).read_text(encoding="utf-8")) steps = [] @@ -218,12 +213,14 @@ def load( tool = tools.get(s["tool"]) if not tool: continue - steps.append(WorkflowStep( - order=s.get("order", 0), - tool=tool, - reason=s.get("reason", ""), - params_from=s.get("params_from", {}), - )) + steps.append( + WorkflowStep( + order=s.get("order", 0), + tool=tool, + reason=s.get("reason", ""), + params_from=s.get("params_from", {}), + ) + ) plan = cls( goal=data.get("goal", ""), steps=steps, @@ -300,8 +297,25 @@ def plan( def _pick_primary(self, goal: str, scores: dict[str, float]) -> str: """Pick the best primary tool by combining graph score + name relevance.""" tokens = set(re.split(r"[\s_\-/.,;:!?()]+", goal.lower())) - tokens -= {"a", "an", "the", "of", "for", "to", "in", "by", "is", - "and", "or", "my", "all", "this", "that", "with", "from"} + tokens -= { + "a", + "an", + "the", + "of", + "for", + "to", + "in", + "by", + "is", + "and", + "or", + "my", + "all", + "this", + "that", + "with", + "from", + } tokens.discard("") def _relevance(name: str) -> float: @@ -338,9 +352,7 @@ def _name_match(self, goal: str) -> dict[str, float]: scores[name] = overlap return scores - def _build_chain( - self, target: str, max_steps: int - ) -> dict[str, set[str]]: + def _build_chain(self, target: str, max_steps: int) -> dict[str, set[str]]: """Build a prerequisite chain for the target tool. Follows REQUIRES edges to find data providers: @@ -356,8 +368,6 @@ def _build_chain( if not self._graph.has_node(target): return dict(predecessors) - target_category = self._get_category(target) - # BFS with max depth 2 — follow REQUIRES to find prerequisites visited: set[str] = {target} queue: deque[tuple[str, int]] = deque([(target, 0)]) @@ -393,9 +403,7 @@ def _build_chain( if "REQUIRES" in relation and src == node: # Accept GET as data provider (same or cross-resource) - if n_method == "GET" or any( - v in neighbor.lower() for v in ("get", "list") - ): + if n_method == "GET" or any(v in neighbor.lower() for v in ("get", "list")): accepted = True elif "PRECEDES" in relation and tgt == node: @@ -403,8 +411,7 @@ def _build_chain( neighbor_category = self._get_category(neighbor) node_category = self._get_category(node) same_cat = ( - node_category and neighbor_category - and node_category == neighbor_category + node_category and neighbor_category and node_category == neighbor_category ) is_creator = n_method == "POST" or any( v in neighbor.lower() for v in ("create", "add") @@ -422,7 +429,7 @@ def _build_chain( # Trim to max_steps if len(predecessors) > max_steps: # Keep target + closest prerequisites - direct_preds = list(predecessors[target])[:max_steps - 1] + direct_preds = list(predecessors[target])[: max_steps - 1] trimmed: dict[str, set[str]] = {target: set(direct_preds)} for p in direct_preds: trimmed[p] = predecessors.get(p, set()) & set(direct_preds) @@ -460,9 +467,7 @@ def _topo_sort(self, predecessors: dict[str, set[str]]) -> list[str]: result.append(n) return result - def _infer_reason( - self, tool_name: str, primary: str, chain: dict[str, set[str]] - ) -> str: + def _infer_reason(self, tool_name: str, primary: str, chain: dict[str, set[str]]) -> str: if tool_name == primary: return "primary action" dependents = [n for n, p in chain.items() if tool_name in p] @@ -470,9 +475,7 @@ def _infer_reason( return f"prerequisite for {', '.join(dependents)}" return "related" - def _enhance_with_llm( - self, plan: WorkflowPlan, llm: Any, max_steps: int - ) -> WorkflowPlan: + def _enhance_with_llm(self, plan: WorkflowPlan, llm: Any, max_steps: int) -> WorkflowPlan: """Use LLM to fill cross-resource gaps and add parameter mappings.""" current_chain = [s.tool.name for s in plan.steps] available = [] @@ -498,7 +501,8 @@ def _enhance_with_llm( {chr(10).join(available[:60])} Return JSON: -{{"steps": [{{"tool": "name", "reason": "why", "params_from": {{"param": "step.response.field"}}}}]}} +{{"steps": [{{"tool": "name", "reason": "why", + "params_from": {{"param": "step.response.field"}}}}]}} Rules: - Keep existing chain steps unless clearly wrong @@ -521,11 +525,14 @@ def _enhance_with_llm( tool = self._tools.get(s.get("tool", "")) if not tool: continue - new_steps.append(WorkflowStep( - order=i + 1, tool=tool, - reason=s.get("reason", ""), - params_from=s.get("params_from", {}), - )) + new_steps.append( + WorkflowStep( + order=i + 1, + tool=tool, + reason=s.get("reason", ""), + params_from=s.get("params_from", {}), + ) + ) if new_steps: plan.steps = new_steps plan.confidence = "graph+llm" diff --git a/tests/test_dependency.py b/tests/test_dependency.py index f9a77de..3c452e9 100644 --- a/tests/test_dependency.py +++ b/tests/test_dependency.py @@ -60,16 +60,6 @@ def _find_relation( # --------------------------------------------------------------------------- -def test_crud_requires(): - """POST → GET/{id} should produce REQUIRES.""" - tools = _pet_tools() - relations = detect_dependencies(tools) - rel = _find_relation(relations, "getPet", "createPet", RelationType.REQUIRES) - assert rel is not None, "GET single should REQUIRE POST" - assert rel.confidence >= 0.9 - assert rel.layer == 1 - - def test_crud_complementary(): """POST and PUT are no longer marked COMPLEMENTARY (removed to reduce noise).""" tools = _pet_tools() diff --git a/tests/test_gateway_e2e.py b/tests/test_gateway_e2e.py index 15525af..0142d7c 100644 --- a/tests/test_gateway_e2e.py +++ b/tests/test_gateway_e2e.py @@ -9,247 +9,294 @@ import time from langchain_core.tools import tool + pytest = __import__("pytest") ChatOllama = pytest.importorskip("langchain_ollama").ChatOllama -from langgraph.prebuilt import create_react_agent - -from graph_tool_call.langchain.gateway import create_gateway_tools +from langgraph.prebuilt import create_react_agent # noqa: E402 +from graph_tool_call.langchain.gateway import create_gateway_tools # noqa: E402 # --------------------------------------------------------------------------- # 50 tools (same as test_create_agent_e2e.py) # --------------------------------------------------------------------------- + @tool def create_user(username: str, email: str) -> str: """Create a new user account with username and email.""" return json.dumps({"id": 1, "username": username, "email": email}) + @tool def get_user(user_id: int) -> str: """Get user profile by user ID.""" return json.dumps({"id": user_id, "username": "john", "email": "john@example.com"}) + @tool def update_user(user_id: int, email: str) -> str: """Update user profile information.""" return json.dumps({"id": user_id, "email": email}) + @tool def delete_user(user_id: int) -> str: """Delete a user account permanently.""" return json.dumps({"deleted": True, "id": user_id}) + @tool def list_users(page: int = 1) -> str: """List all users with pagination.""" return json.dumps({"users": [{"id": 1, "username": "john"}], "page": page}) + @tool def search_users(query: str) -> str: """Search users by name or email.""" return json.dumps({"results": [{"id": 1, "username": query}]}) + @tool def reset_password(user_id: int) -> str: """Send password reset email to user.""" return json.dumps({"sent": True, "user_id": user_id}) + @tool def ban_user(user_id: int, reason: str) -> str: """Ban a user account with a reason.""" return json.dumps({"banned": True, "user_id": user_id, "reason": reason}) + @tool def create_order(product_id: int, quantity: int) -> str: """Create a new order for a product.""" return json.dumps({"order_id": 100, "product_id": product_id, "quantity": quantity}) + @tool def get_order(order_id: int) -> str: """Get order details by order ID.""" return json.dumps({"order_id": order_id, "status": "pending", "total": 99.99}) + @tool def cancel_order(order_id: int) -> str: """Cancel an existing order.""" return json.dumps({"order_id": order_id, "status": "cancelled"}) + @tool def list_orders(user_id: int) -> str: """List all orders for a user.""" return json.dumps({"orders": [{"order_id": 100, "status": "pending"}]}) + @tool def update_order_status(order_id: int, status: str) -> str: """Update order status (pending, shipped, delivered).""" return json.dumps({"order_id": order_id, "status": status}) + @tool def process_refund(order_id: int) -> str: """Process a refund for a cancelled order.""" return json.dumps({"order_id": order_id, "refunded": True, "amount": 99.99}) + @tool def track_shipment(order_id: int) -> str: """Track shipment status for an order.""" return json.dumps({"order_id": order_id, "tracking": "1Z999AA10123456784"}) + @tool def create_product(name: str, price: float) -> str: """Create a new product listing.""" return json.dumps({"product_id": 1, "name": name, "price": price}) + @tool def get_product(product_id: int) -> str: """Get product details by product ID.""" return json.dumps({"product_id": product_id, "name": "Widget", "price": 29.99}) + @tool def update_product(product_id: int, price: float) -> str: """Update product price.""" return json.dumps({"product_id": product_id, "price": price}) + @tool def delete_product(product_id: int) -> str: """Delete a product listing.""" return json.dumps({"deleted": True, "product_id": product_id}) + @tool def list_products(category: str = "all") -> str: """List products by category.""" return json.dumps({"products": [{"id": 1, "name": "Widget", "category": category}]}) + @tool def search_products(query: str) -> str: """Search products by name or description.""" return json.dumps({"results": [{"id": 1, "name": query}]}) + @tool def charge_card(amount: float, card_token: str) -> str: """Charge a credit card.""" return json.dumps({"charge_id": "ch_123", "amount": amount, "status": "succeeded"}) + @tool def get_payment(payment_id: str) -> str: """Get payment details.""" return json.dumps({"payment_id": payment_id, "amount": 99.99, "status": "succeeded"}) + @tool def list_payments(user_id: int) -> str: """List payment history for a user.""" return json.dumps({"payments": [{"id": "ch_123", "amount": 99.99}]}) + @tool def create_invoice(order_id: int) -> str: """Generate an invoice for an order.""" return json.dumps({"invoice_id": "inv_123", "order_id": order_id}) + @tool def send_email(to: str, subject: str, body: str) -> str: """Send an email to a recipient.""" return json.dumps({"sent": True, "to": to, "subject": subject}) + @tool def send_sms(phone: str, message: str) -> str: """Send an SMS text message.""" return json.dumps({"sent": True, "phone": phone}) + @tool def send_push_notification(user_id: int, title: str, message: str) -> str: """Send a push notification to a user's device.""" return json.dumps({"sent": True, "user_id": user_id, "title": title}) + @tool def list_notifications(user_id: int) -> str: """List all notifications for a user.""" return json.dumps({"notifications": [{"id": 1, "title": "Order shipped"}]}) + @tool def upload_file(filename: str, content_type: str) -> str: """Upload a file to storage.""" return json.dumps({"file_id": "f_123", "filename": filename}) + @tool def download_file(file_id: str) -> str: """Download a file from storage.""" return json.dumps({"file_id": file_id, "url": "https://storage.example.com/f_123"}) + @tool def delete_file(file_id: str) -> str: """Delete a file from storage.""" return json.dumps({"deleted": True, "file_id": file_id}) + @tool def list_files(folder: str = "/") -> str: """List files in a folder.""" return json.dumps({"files": [{"id": "f_123", "name": "report.pdf"}]}) + @tool def get_dashboard_stats() -> str: """Get overview dashboard statistics.""" return json.dumps({"total_users": 1000, "total_orders": 5000, "revenue": 150000}) + @tool def get_sales_report(start_date: str, end_date: str) -> str: """Generate sales report for a date range.""" return json.dumps({"start": start_date, "end": end_date, "total": 50000}) + @tool def get_user_activity(user_id: int) -> str: """Get activity log for a user.""" return json.dumps({"user_id": user_id, "actions": ["login", "view_product", "checkout"]}) + @tool def get_conversion_rate(period: str = "monthly") -> str: """Get conversion rate analytics.""" return json.dumps({"period": period, "rate": 0.032}) + @tool def get_weather(city: str) -> str: """Get current weather for a city.""" return json.dumps({"city": city, "temp": 22, "condition": "sunny"}) + @tool def get_forecast(city: str, days: int = 7) -> str: """Get weather forecast for next N days.""" return json.dumps({"city": city, "days": days, "forecast": [{"day": 1, "temp": 22}]}) + @tool def create_event(title: str, date: str) -> str: """Create a calendar event.""" return json.dumps({"event_id": "e_123", "title": title, "date": date}) + @tool def list_events(date: str) -> str: """List calendar events for a date.""" return json.dumps({"events": [{"id": "e_123", "title": "Meeting"}]}) + @tool def delete_event(event_id: str) -> str: """Delete a calendar event.""" return json.dumps({"deleted": True, "event_id": event_id}) + @tool def get_settings() -> str: """Get current application settings.""" return json.dumps({"theme": "dark", "language": "en", "notifications": True}) + @tool def update_settings(key: str, value: str) -> str: """Update an application setting.""" return json.dumps({"key": key, "value": value, "updated": True}) + @tool def translate_text(text: str, target_lang: str) -> str: """Translate text to a target language.""" return json.dumps({"original": text, "translated": f"[{target_lang}] {text}"}) + @tool def generate_report(report_type: str) -> str: """Generate a system report (daily, weekly, monthly).""" return json.dumps({"type": report_type, "generated": True}) + @tool def health_check() -> str: """Check system health status.""" @@ -257,20 +304,53 @@ def health_check() -> str: ALL_TOOLS = [ - create_user, get_user, update_user, delete_user, list_users, - search_users, reset_password, ban_user, - create_order, get_order, cancel_order, list_orders, - update_order_status, process_refund, track_shipment, - create_product, get_product, update_product, delete_product, - list_products, search_products, - charge_card, get_payment, list_payments, create_invoice, - send_email, send_sms, send_push_notification, list_notifications, - upload_file, download_file, delete_file, list_files, - get_dashboard_stats, get_sales_report, get_user_activity, get_conversion_rate, - get_weather, get_forecast, - create_event, list_events, delete_event, - get_settings, update_settings, - translate_text, generate_report, health_check, + create_user, + get_user, + update_user, + delete_user, + list_users, + search_users, + reset_password, + ban_user, + create_order, + get_order, + cancel_order, + list_orders, + update_order_status, + process_refund, + track_shipment, + create_product, + get_product, + update_product, + delete_product, + list_products, + search_products, + charge_card, + get_payment, + list_payments, + create_invoice, + send_email, + send_sms, + send_push_notification, + list_notifications, + upload_file, + download_file, + delete_file, + list_files, + get_dashboard_stats, + get_sales_report, + get_user_activity, + get_conversion_rate, + get_weather, + get_forecast, + create_event, + list_events, + delete_event, + get_settings, + update_settings, + translate_text, + generate_report, + health_check, ] @@ -309,7 +389,7 @@ def health_check() -> str: def main(): print(f"Total tools: {len(ALL_TOOLS)}") - print(f"Gateway tool 2개로 변환 → LLM이 search_tools + call_tool 사용") + print("Gateway tool 2개로 변환 → LLM이 search_tools + call_tool 사용") print("=" * 70) llm = ChatOllama(model="qwen3.5:4b", temperature=0) @@ -374,8 +454,8 @@ def main(): print(f" [ERROR] {e}") print(f"\n{'=' * 70}") - print(f"RESULT: {passed}/{total} ({passed/total*100:.0f}%)") - print(f" - LLM에 노출된 tool 수: 2 (search_tools, call_tool)") + print(f"RESULT: {passed}/{total} ({passed / total * 100:.0f}%)") + print(" - LLM에 노출된 tool 수: 2 (search_tools, call_tool)") print(f" - 실제 backend tool 수: {len(ALL_TOOLS)}") print("=" * 70) diff --git a/tests/test_gateway_token_saving.py b/tests/test_gateway_token_saving.py index 5703545..ef9564f 100644 --- a/tests/test_gateway_token_saving.py +++ b/tests/test_gateway_token_saving.py @@ -9,244 +9,291 @@ import json from langchain_core.tools import tool + pytest = __import__("pytest") ChatOllama = pytest.importorskip("langchain_ollama").ChatOllama -from graph_tool_call.langchain.gateway import create_gateway_tools - +from graph_tool_call.langchain.gateway import create_gateway_tools # noqa: E402 # --- Same 47 tools as e2e test --- + @tool def create_user(username: str, email: str) -> str: """Create a new user account with username and email.""" return json.dumps({"id": 1, "username": username, "email": email}) + @tool def get_user(user_id: int) -> str: """Get user profile by user ID.""" return json.dumps({"id": user_id, "username": "john"}) + @tool def update_user(user_id: int, email: str) -> str: """Update user profile information.""" return json.dumps({"id": user_id, "email": email}) + @tool def delete_user(user_id: int) -> str: """Delete a user account permanently.""" return json.dumps({"deleted": True, "id": user_id}) + @tool def list_users(page: int = 1) -> str: """List all users with pagination.""" return json.dumps({"users": [{"id": 1}], "page": page}) + @tool def search_users(query: str) -> str: """Search users by name or email.""" return json.dumps({"results": [{"id": 1, "username": query}]}) + @tool def reset_password(user_id: int) -> str: """Send password reset email to user.""" return json.dumps({"sent": True, "user_id": user_id}) + @tool def ban_user(user_id: int, reason: str) -> str: """Ban a user account with a reason.""" return json.dumps({"banned": True, "user_id": user_id}) + @tool def create_order(product_id: int, quantity: int) -> str: """Create a new order for a product.""" return json.dumps({"order_id": 100, "product_id": product_id}) + @tool def get_order(order_id: int) -> str: """Get order details by order ID.""" return json.dumps({"order_id": order_id, "status": "pending"}) + @tool def cancel_order(order_id: int) -> str: """Cancel an existing order.""" return json.dumps({"order_id": order_id, "status": "cancelled"}) + @tool def list_orders(user_id: int) -> str: """List all orders for a user.""" return json.dumps({"orders": [{"order_id": 100}]}) + @tool def update_order_status(order_id: int, status: str) -> str: """Update order status (pending, shipped, delivered).""" return json.dumps({"order_id": order_id, "status": status}) + @tool def process_refund(order_id: int) -> str: """Process a refund for a cancelled order.""" return json.dumps({"order_id": order_id, "refunded": True}) + @tool def track_shipment(order_id: int) -> str: """Track shipment status for an order.""" return json.dumps({"order_id": order_id, "tracking": "1Z999"}) + @tool def create_product(name: str, price: float) -> str: """Create a new product listing.""" return json.dumps({"product_id": 1, "name": name, "price": price}) + @tool def get_product(product_id: int) -> str: """Get product details by product ID.""" return json.dumps({"product_id": product_id, "name": "Widget"}) + @tool def update_product(product_id: int, price: float) -> str: """Update product price.""" return json.dumps({"product_id": product_id, "price": price}) + @tool def delete_product(product_id: int) -> str: """Delete a product listing.""" return json.dumps({"deleted": True, "product_id": product_id}) + @tool def list_products(category: str = "all") -> str: """List products by category.""" return json.dumps({"products": [{"id": 1, "category": category}]}) + @tool def search_products(query: str) -> str: """Search products by name or description.""" return json.dumps({"results": [{"id": 1, "name": query}]}) + @tool def charge_card(amount: float, card_token: str) -> str: """Charge a credit card.""" return json.dumps({"charge_id": "ch_123", "amount": amount}) + @tool def get_payment(payment_id: str) -> str: """Get payment details.""" return json.dumps({"payment_id": payment_id, "amount": 99.99}) + @tool def list_payments(user_id: int) -> str: """List payment history for a user.""" return json.dumps({"payments": [{"id": "ch_123"}]}) + @tool def create_invoice(order_id: int) -> str: """Generate an invoice for an order.""" return json.dumps({"invoice_id": "inv_123", "order_id": order_id}) + @tool def send_email(to: str, subject: str, body: str) -> str: """Send an email to a recipient.""" return json.dumps({"sent": True, "to": to, "subject": subject}) + @tool def send_sms(phone: str, message: str) -> str: """Send an SMS text message.""" return json.dumps({"sent": True, "phone": phone}) + @tool def send_push_notification(user_id: int, title: str, message: str) -> str: """Send a push notification to a user's device.""" return json.dumps({"sent": True, "user_id": user_id}) + @tool def list_notifications(user_id: int) -> str: """List all notifications for a user.""" return json.dumps({"notifications": [{"id": 1}]}) + @tool def upload_file(filename: str, content_type: str) -> str: """Upload a file to storage.""" return json.dumps({"file_id": "f_123", "filename": filename}) + @tool def download_file(file_id: str) -> str: """Download a file from storage.""" return json.dumps({"file_id": file_id, "url": "https://example.com/f"}) + @tool def delete_file(file_id: str) -> str: """Delete a file from storage.""" return json.dumps({"deleted": True, "file_id": file_id}) + @tool def list_files(folder: str = "/") -> str: """List files in a folder.""" return json.dumps({"files": [{"id": "f_123", "name": "report.pdf"}]}) + @tool def get_dashboard_stats() -> str: """Get overview dashboard statistics.""" return json.dumps({"total_users": 1000, "revenue": 150000}) + @tool def get_sales_report(start_date: str, end_date: str) -> str: """Generate sales report for a date range.""" return json.dumps({"start": start_date, "end": end_date, "total": 50000}) + @tool def get_user_activity(user_id: int) -> str: """Get activity log for a user.""" return json.dumps({"user_id": user_id, "actions": ["login"]}) + @tool def get_conversion_rate(period: str = "monthly") -> str: """Get conversion rate analytics.""" return json.dumps({"period": period, "rate": 0.032}) + @tool def get_weather(city: str) -> str: """Get current weather for a city.""" return json.dumps({"city": city, "temp": 22, "condition": "sunny"}) + @tool def get_forecast(city: str, days: int = 7) -> str: """Get weather forecast for next N days.""" return json.dumps({"city": city, "days": days}) + @tool def create_event(title: str, date: str) -> str: """Create a calendar event.""" return json.dumps({"event_id": "e_123", "title": title, "date": date}) + @tool def list_events(date: str) -> str: """List calendar events for a date.""" return json.dumps({"events": [{"id": "e_123", "title": "Meeting"}]}) + @tool def delete_event(event_id: str) -> str: """Delete a calendar event.""" return json.dumps({"deleted": True, "event_id": event_id}) + @tool def get_settings() -> str: """Get current application settings.""" return json.dumps({"theme": "dark", "language": "en"}) + @tool def update_settings(key: str, value: str) -> str: """Update an application setting.""" return json.dumps({"key": key, "value": value, "updated": True}) + @tool def translate_text(text: str, target_lang: str) -> str: """Translate text to a target language.""" return json.dumps({"original": text, "translated": f"[{target_lang}] {text}"}) + @tool def generate_report(report_type: str) -> str: """Generate a system report (daily, weekly, monthly).""" return json.dumps({"type": report_type, "generated": True}) + @tool def health_check() -> str: """Check system health status.""" @@ -254,20 +301,53 @@ def health_check() -> str: ALL_TOOLS = [ - create_user, get_user, update_user, delete_user, list_users, - search_users, reset_password, ban_user, - create_order, get_order, cancel_order, list_orders, - update_order_status, process_refund, track_shipment, - create_product, get_product, update_product, delete_product, - list_products, search_products, - charge_card, get_payment, list_payments, create_invoice, - send_email, send_sms, send_push_notification, list_notifications, - upload_file, download_file, delete_file, list_files, - get_dashboard_stats, get_sales_report, get_user_activity, get_conversion_rate, - get_weather, get_forecast, - create_event, list_events, delete_event, - get_settings, update_settings, - translate_text, generate_report, health_check, + create_user, + get_user, + update_user, + delete_user, + list_users, + search_users, + reset_password, + ban_user, + create_order, + get_order, + cancel_order, + list_orders, + update_order_status, + process_refund, + track_shipment, + create_product, + get_product, + update_product, + delete_product, + list_products, + search_products, + charge_card, + get_payment, + list_payments, + create_invoice, + send_email, + send_sms, + send_push_notification, + list_notifications, + upload_file, + download_file, + delete_file, + list_files, + get_dashboard_stats, + get_sales_report, + get_user_activity, + get_conversion_rate, + get_weather, + get_forecast, + create_event, + list_events, + delete_event, + get_settings, + update_settings, + translate_text, + generate_report, + health_check, ] @@ -280,7 +360,7 @@ def _count_tool_schema_chars(tools: list) -> int: "function": { "name": t.name, "description": t.description, - } + }, } if hasattr(t, "args_schema") and t.args_schema: try: @@ -316,7 +396,7 @@ def main(): print(f" Tool calls: {[tc['name'] for tc in (result_all.tool_calls or [])]}") # --- Method 2: Gateway 2 tools --- - print(f"\n[2] Gateway 2 tools bound to LLM") + print("\n[2] Gateway 2 tools bound to LLM") gateway = create_gateway_tools(ALL_TOOLS, top_k=10) llm_gw = llm.bind_tools(gateway) gw_chars = _count_tool_schema_chars(gateway) @@ -333,12 +413,20 @@ def main(): # --- Comparison --- print(f"\n{'=' * 70}") print("COMPARISON") - print(f" Tool schema: {all_chars:,} → {gw_chars:,} chars ({(1 - gw_chars/all_chars)*100:.0f}% reduction)") - print(f" Estimated tokens: ~{all_tokens_est:,} → ~{gw_tokens_est:,} ({(1 - gw_tokens_est/all_tokens_est)*100:.0f}% reduction)") + char_reduction = (1 - gw_chars / all_chars) * 100 + print(f" Tool schema: {all_chars:,} → {gw_chars:,} chars ({char_reduction:.0f}% reduction)") + token_reduction = (1 - gw_tokens_est / all_tokens_est) * 100 + print( + f" Estimated tokens: ~{all_tokens_est:,} → ~{gw_tokens_est:,} " + f"({token_reduction:.0f}% reduction)" + ) if isinstance(prompt_all, int) and isinstance(prompt_gw, int): actual_reduction = (1 - prompt_gw / prompt_all) * 100 - print(f" Actual prompt_tokens: {prompt_all:,} → {prompt_gw:,} ({actual_reduction:.0f}% reduction)") + print( + f" Actual prompt_tokens: {prompt_all:,} → {prompt_gw:,} " + f"({actual_reduction:.0f}% reduction)" + ) saved = prompt_all - prompt_gw print(f" Tokens saved per turn: {saved:,}") else: diff --git a/tests/test_gateway_xgen_workflow.py b/tests/test_gateway_xgen_workflow.py index eaa0e12..59fa02e 100644 --- a/tests/test_gateway_xgen_workflow.py +++ b/tests/test_gateway_xgen_workflow.py @@ -16,42 +16,50 @@ import time from langchain_core.tools import tool + pytest = __import__("pytest") ChatOllama = pytest.importorskip("langchain_ollama").ChatOllama -from langgraph.prebuilt import create_react_agent - -from graph_tool_call.langchain.gateway import create_gateway_tools +from langgraph.prebuilt import create_react_agent # noqa: E402 +from graph_tool_call.langchain.gateway import create_gateway_tools # noqa: E402 # =================================================================== # Slack MCP Tools (6) # =================================================================== + @tool def slack_get_channel_id(channel_name: str) -> str: """Get the ID of a Slack channel by name.""" return json.dumps({"channel_id": "C01234", "name": channel_name}) + @tool def slack_send_message(channel_id: str, message: str) -> str: """Send a message to a Slack channel.""" return json.dumps({"ok": True, "channel": channel_id, "ts": "1234567890.123456"}) + @tool def slack_list_channels() -> str: """List all Slack channels in the workspace.""" - return json.dumps({"channels": [{"id": "C01", "name": "general"}, {"id": "C02", "name": "dev"}]}) + return json.dumps( + {"channels": [{"id": "C01", "name": "general"}, {"id": "C02", "name": "dev"}]} + ) + @tool def slack_list_users() -> str: """List all users in the Slack workspace.""" return json.dumps({"users": [{"id": "U01", "name": "alice"}, {"id": "U02", "name": "bob"}]}) + @tool def slack_search_conversations(query: str) -> str: """Search Slack conversations by keyword.""" return json.dumps({"messages": [{"text": f"Found: {query}", "channel": "C01"}]}) + @tool def slack_get_message_link(channel_id: str, message_ts: str) -> str: """Get a permalink to a specific Slack message.""" @@ -62,141 +70,185 @@ def slack_get_message_link(channel_id: str, message_ts: str) -> str: # GitHub MCP Tools (8) # =================================================================== + @tool def github_get_file(path: str, repo: str = "main-repo") -> str: """Get the contents of a file from a GitHub repository.""" return json.dumps({"path": path, "content": "file content here", "sha": "abc123"}) + @tool def github_get_issues(repo: str, state: str = "open") -> str: """Get all issues from a GitHub repository.""" return json.dumps({"issues": [{"number": 1, "title": "Bug fix", "state": state}]}) + @tool def github_search_issues(query: str) -> str: """Search issues across GitHub repositories.""" return json.dumps({"items": [{"number": 42, "title": query, "state": "open"}]}) + @tool def github_create_issue(title: str, body: str, repo: str = "main-repo") -> str: """Create a new issue in a GitHub repository.""" - return json.dumps({"number": 100, "title": title, "html_url": "https://github.com/repo/issues/100"}) + return json.dumps( + {"number": 100, "title": title, "html_url": "https://github.com/repo/issues/100"} + ) + @tool def github_create_pull_request(title: str, body: str, head: str, base: str = "main") -> str: """Create a new pull request in a GitHub repository.""" return json.dumps({"number": 50, "title": title, "html_url": "https://github.com/repo/pull/50"}) + @tool def github_comment_on_issue(issue_number: int, comment: str) -> str: """Add a comment to a GitHub issue.""" return json.dumps({"id": 999, "issue_number": issue_number, "body": comment}) + @tool def github_list_pull_requests(repo: str, state: str = "open") -> str: """List pull requests in a GitHub repository.""" return json.dumps({"pull_requests": [{"number": 50, "title": "Feature PR", "state": state}]}) + @tool def github_get_pull_request(pull_number: int) -> str: """Get details of a specific pull request.""" - return json.dumps({"number": pull_number, "title": "Feature", "mergeable": True, "additions": 50}) + return json.dumps( + {"number": pull_number, "title": "Feature", "mergeable": True, "additions": 50} + ) # =================================================================== # Atlassian — Jira Tools (19) # =================================================================== + @tool def jira_search_issues(jql: str, max_results: int = 50) -> str: """Search Jira issues using JQL query language.""" - return json.dumps({"issues": [{"key": "PROJ-123", "summary": "Sample issue", "status": "Open"}], "total": 1}) + return json.dumps( + {"issues": [{"key": "PROJ-123", "summary": "Sample issue", "status": "Open"}], "total": 1} + ) + @tool def jira_get_issue(issue_key: str) -> str: """Get details of a single Jira issue by key.""" - return json.dumps({"key": issue_key, "summary": "Bug in login", "status": "In Progress", "assignee": "alice"}) + return json.dumps( + {"key": issue_key, "summary": "Bug in login", "status": "In Progress", "assignee": "alice"} + ) + @tool def jira_create_issue(project_key: str, summary: str, issue_type: str = "Task") -> str: """Create a new Jira issue or sub-task.""" return json.dumps({"key": f"{project_key}-999", "summary": summary, "type": issue_type}) + @tool def jira_update_issue(issue_key: str, fields: str) -> str: """Update fields of a Jira issue.""" return json.dumps({"key": issue_key, "updated": True}) + @tool def jira_get_transitions(issue_key: str) -> str: """Get available status transitions for a Jira issue.""" - return json.dumps({"transitions": [{"id": "31", "name": "Done"}, {"id": "21", "name": "In Progress"}]}) + return json.dumps( + {"transitions": [{"id": "31", "name": "Done"}, {"id": "21", "name": "In Progress"}]} + ) + @tool def jira_transition_issue(issue_key: str, transition_id: str) -> str: """Change the status of a Jira issue via transition.""" return json.dumps({"key": issue_key, "transitioned": True, "transition_id": transition_id}) + @tool def jira_add_comment(issue_key: str, comment_body: str) -> str: """Add a comment to a Jira issue.""" return json.dumps({"id": "10001", "issue_key": issue_key, "body": comment_body}) + @tool def jira_get_comments(issue_key: str) -> str: """Get all comments from a Jira issue.""" return json.dumps({"comments": [{"id": "10001", "body": "Working on it", "author": "alice"}]}) + @tool def jira_list_projects() -> str: """List all Jira projects accessible to the user.""" - return json.dumps({"projects": [{"key": "PROJ", "name": "Main Project"}, {"key": "DEV", "name": "Development"}]}) + return json.dumps( + { + "projects": [ + {"key": "PROJ", "name": "Main Project"}, + {"key": "DEV", "name": "Development"}, + ] + } + ) + @tool def jira_get_project(project_key: str) -> str: """Get details of a specific Jira project.""" return json.dumps({"key": project_key, "name": "Main Project", "lead": "alice"}) + @tool def jira_assign_issue(issue_key: str, assignee: str) -> str: """Assign a Jira issue to a user.""" return json.dumps({"key": issue_key, "assignee": assignee}) + @tool def jira_add_worklog(issue_key: str, time_spent: str, comment: str = "") -> str: """Log time spent on a Jira issue.""" return json.dumps({"issue_key": issue_key, "time_spent": time_spent, "logged": True}) + @tool def jira_search_users(query: str) -> str: """Search for Jira users by name or email.""" return json.dumps({"users": [{"name": "alice", "email": "alice@example.com"}]}) + @tool def jira_delete_issue(issue_key: str) -> str: """Delete a Jira issue permanently.""" return json.dumps({"key": issue_key, "deleted": True}) + @tool def jira_get_boards() -> str: """Get all Scrum/Kanban boards in Jira.""" return json.dumps({"boards": [{"id": 1, "name": "Sprint Board", "type": "scrum"}]}) + @tool def jira_get_sprints(board_id: int) -> str: """Get sprints from a Jira board.""" return json.dumps({"sprints": [{"id": 10, "name": "Sprint 5", "state": "active"}]}) + @tool def jira_link_issues(inward_key: str, outward_key: str, link_type: str = "Relates") -> str: """Link two Jira issues together.""" return json.dumps({"linked": True, "inward": inward_key, "outward": outward_key}) + @tool def jira_get_attachments(issue_key: str) -> str: """Get attachments from a Jira issue.""" return json.dumps({"attachments": [{"filename": "screenshot.png", "size": 102400}]}) + @tool def jira_add_attachment(issue_key: str, filename: str) -> str: """Add an attachment to a Jira issue.""" @@ -207,46 +259,66 @@ def jira_add_attachment(issue_key: str, filename: str) -> str: # Atlassian — Confluence Tools (9) # =================================================================== + @tool def confluence_search(cql: str, limit: int = 25) -> str: """Search Confluence content using CQL query language.""" - return json.dumps({"results": [{"id": "123", "title": "API Guide", "type": "page"}], "total": 1}) + return json.dumps( + {"results": [{"id": "123", "title": "API Guide", "type": "page"}], "total": 1} + ) + @tool def confluence_get_page(page_id: str) -> str: """Get a Confluence page by ID.""" return json.dumps({"id": page_id, "title": "API Guide", "body": "Page content here..."}) + @tool def confluence_create_page(space_key: str, title: str, body: str) -> str: """Create a new Confluence page in a space.""" return json.dumps({"id": "456", "title": title, "space": space_key}) + @tool def confluence_update_page(page_id: str, title: str, body: str) -> str: """Update an existing Confluence page.""" return json.dumps({"id": page_id, "title": title, "updated": True}) + @tool def confluence_delete_page(page_id: str) -> str: """Delete a Confluence page.""" return json.dumps({"id": page_id, "deleted": True}) + @tool def confluence_get_spaces(limit: int = 25) -> str: """List all Confluence spaces.""" - return json.dumps({"spaces": [{"key": "DEV", "name": "Development"}, {"key": "HR", "name": "Human Resources"}]}) + return json.dumps( + { + "spaces": [ + {"key": "DEV", "name": "Development"}, + {"key": "HR", "name": "Human Resources"}, + ] + } + ) + @tool def confluence_get_pages_in_space(space_key: str) -> str: """Get all pages in a Confluence space.""" - return json.dumps({"pages": [{"id": "123", "title": "API Guide"}, {"id": "124", "title": "Setup Guide"}]}) + return json.dumps( + {"pages": [{"id": "123", "title": "API Guide"}, {"id": "124", "title": "Setup Guide"}]} + ) + @tool def confluence_add_comment(page_id: str, body: str) -> str: """Add a comment to a Confluence page.""" return json.dumps({"id": "789", "page_id": page_id, "body": body}) + @tool def confluence_get_page_comments(page_id: str) -> str: """Get all comments from a Confluence page.""" @@ -257,110 +329,161 @@ def confluence_get_page_comments(page_id: str) -> str: # MS365 MCP Tools (15) # =================================================================== + @tool def ms365_list_mails(folder: str = "inbox", top: int = 10) -> str: """List emails from Outlook mailbox.""" - return json.dumps({"emails": [{"id": "m1", "subject": "Meeting tomorrow", "from": "boss@company.com"}]}) + return json.dumps( + {"emails": [{"id": "m1", "subject": "Meeting tomorrow", "from": "boss@company.com"}]} + ) + @tool def ms365_read_mail(message_id: str) -> str: """Read a specific email from Outlook.""" - return json.dumps({"id": message_id, "subject": "Meeting", "body": "Please join at 3pm", "from": "boss@company.com"}) + return json.dumps( + { + "id": message_id, + "subject": "Meeting", + "body": "Please join at 3pm", + "from": "boss@company.com", + } + ) + @tool def ms365_send_email(to: str, subject: str, body: str) -> str: """Send an email via Outlook.""" return json.dumps({"sent": True, "to": to, "subject": subject}) + @tool def ms365_reply_to_email(message_id: str, body: str) -> str: """Reply to an email in Outlook.""" return json.dumps({"replied": True, "message_id": message_id}) + @tool def ms365_list_calendar_events(start_date: str, end_date: str) -> str: """List calendar events within a date range.""" - return json.dumps({"events": [{"subject": "Team standup", "start": start_date, "location": "Room A"}]}) + return json.dumps( + {"events": [{"subject": "Team standup", "start": start_date, "location": "Room A"}]} + ) + @tool def ms365_create_event(subject: str, start: str, end: str, attendees: str = "") -> str: """Create a new calendar event in Outlook.""" return json.dumps({"id": "e1", "subject": subject, "start": start, "end": end}) + @tool def ms365_delete_event(event_id: str) -> str: """Delete a calendar event.""" return json.dumps({"deleted": True, "event_id": event_id}) + @tool def ms365_list_teams() -> str: """List all Microsoft Teams the user belongs to.""" - return json.dumps({"teams": [{"id": "t1", "name": "Engineering"}, {"id": "t2", "name": "Design"}]}) + return json.dumps( + {"teams": [{"id": "t1", "name": "Engineering"}, {"id": "t2", "name": "Design"}]} + ) + @tool def ms365_list_team_channels(team_id: str) -> str: """List channels in a Microsoft Teams team.""" - return json.dumps({"channels": [{"id": "ch1", "name": "General"}, {"id": "ch2", "name": "Dev"}]}) + return json.dumps( + {"channels": [{"id": "ch1", "name": "General"}, {"id": "ch2", "name": "Dev"}]} + ) + @tool def ms365_send_team_message(team_id: str, channel_id: str, message: str) -> str: """Send a message to a Microsoft Teams channel.""" return json.dumps({"sent": True, "team_id": team_id, "channel_id": channel_id}) + @tool def ms365_list_files(folder_path: str = "/") -> str: """List files in OneDrive.""" - return json.dumps({"files": [{"name": "report.xlsx", "size": 51200}, {"name": "notes.docx", "size": 10240}]}) + return json.dumps( + {"files": [{"name": "report.xlsx", "size": 51200}, {"name": "notes.docx", "size": 10240}]} + ) + @tool def ms365_create_task(title: str, due_date: str = "") -> str: """Create a task in Microsoft To Do / Planner.""" return json.dumps({"id": "task1", "title": title, "due_date": due_date, "status": "notStarted"}) + @tool def ms365_list_tasks(plan_id: str = "default") -> str: """List tasks from Microsoft Planner.""" return json.dumps({"tasks": [{"id": "task1", "title": "Review PR", "status": "inProgress"}]}) + @tool def ms365_list_contacts(top: int = 10) -> str: """List contacts from Outlook.""" return json.dumps({"contacts": [{"name": "Alice Kim", "email": "alice@company.com"}]}) + @tool def ms365_get_contact(contact_id: str) -> str: """Get a specific contact from Outlook.""" - return json.dumps({"id": contact_id, "name": "Alice Kim", "email": "alice@company.com", "phone": "+82-10-1234-5678"}) + return json.dumps( + { + "id": contact_id, + "name": "Alice Kim", + "email": "alice@company.com", + "phone": "+82-10-1234-5678", + } + ) # =================================================================== # API Tool Loader Tools (5) — custom REST API tools # =================================================================== + @tool def api_get_product_inventory(product_code: str) -> str: """조회: 상품 코드로 재고 수량을 조회합니다. Query product inventory by product code.""" return json.dumps({"product_code": product_code, "quantity": 150, "warehouse": "Seoul-01"}) + @tool def api_create_purchase_order(supplier_id: str, items: str) -> str: """생성: 공급업체에 발주서를 생성합니다. Create a purchase order to a supplier.""" return json.dumps({"po_number": "PO-2024-001", "supplier_id": supplier_id, "status": "created"}) + @tool def api_get_customer_info(customer_id: str) -> str: """조회: 고객 ID로 고객 상세 정보를 조회합니다. Get customer details by customer ID.""" - return json.dumps({"customer_id": customer_id, "name": "Kim Corp", "grade": "VIP", "credit_limit": 50000000}) + return json.dumps( + {"customer_id": customer_id, "name": "Kim Corp", "grade": "VIP", "credit_limit": 50000000} + ) + @tool def api_submit_approval(document_id: str, action: str) -> str: - """결재: 문서 결재를 승인 또는 반려합니다. Approve or reject a document in the approval workflow.""" + """결재: 문서 결재를 승인 또는 반려합니다. + + Approve or reject a document in the approval workflow. + """ return json.dumps({"document_id": document_id, "action": action, "result": "processed"}) + @tool def api_get_sales_dashboard(period: str = "monthly") -> str: """대시보드: 매출 현황 대시보드 데이터를 조회합니다. Get sales dashboard data.""" - return json.dumps({"period": period, "total_sales": 1250000000, "orders": 3400, "growth": "+12.5%"}) + return json.dumps( + {"period": period, "total_sales": 1250000000, "orders": 3400, "growth": "+12.5%"} + ) # =================================================================== @@ -369,32 +492,73 @@ def api_get_sales_dashboard(period: str = "monthly") -> str: ALL_TOOLS = [ # Slack (6) - slack_get_channel_id, slack_send_message, slack_list_channels, - slack_list_users, slack_search_conversations, slack_get_message_link, + slack_get_channel_id, + slack_send_message, + slack_list_channels, + slack_list_users, + slack_search_conversations, + slack_get_message_link, # GitHub (8) - github_get_file, github_get_issues, github_search_issues, - github_create_issue, github_create_pull_request, github_comment_on_issue, - github_list_pull_requests, github_get_pull_request, + github_get_file, + github_get_issues, + github_search_issues, + github_create_issue, + github_create_pull_request, + github_comment_on_issue, + github_list_pull_requests, + github_get_pull_request, # Jira (19) - jira_search_issues, jira_get_issue, jira_create_issue, jira_update_issue, - jira_get_transitions, jira_transition_issue, jira_add_comment, - jira_get_comments, jira_list_projects, jira_get_project, - jira_assign_issue, jira_add_worklog, jira_search_users, - jira_delete_issue, jira_get_boards, jira_get_sprints, - jira_link_issues, jira_get_attachments, jira_add_attachment, + jira_search_issues, + jira_get_issue, + jira_create_issue, + jira_update_issue, + jira_get_transitions, + jira_transition_issue, + jira_add_comment, + jira_get_comments, + jira_list_projects, + jira_get_project, + jira_assign_issue, + jira_add_worklog, + jira_search_users, + jira_delete_issue, + jira_get_boards, + jira_get_sprints, + jira_link_issues, + jira_get_attachments, + jira_add_attachment, # Confluence (9) - confluence_search, confluence_get_page, confluence_create_page, - confluence_update_page, confluence_delete_page, confluence_get_spaces, - confluence_get_pages_in_space, confluence_add_comment, confluence_get_page_comments, + confluence_search, + confluence_get_page, + confluence_create_page, + confluence_update_page, + confluence_delete_page, + confluence_get_spaces, + confluence_get_pages_in_space, + confluence_add_comment, + confluence_get_page_comments, # MS365 (15) - ms365_list_mails, ms365_read_mail, ms365_send_email, ms365_reply_to_email, - ms365_list_calendar_events, ms365_create_event, ms365_delete_event, - ms365_list_teams, ms365_list_team_channels, ms365_send_team_message, - ms365_list_files, ms365_create_task, ms365_list_tasks, - ms365_list_contacts, ms365_get_contact, + ms365_list_mails, + ms365_read_mail, + ms365_send_email, + ms365_reply_to_email, + ms365_list_calendar_events, + ms365_create_event, + ms365_delete_event, + ms365_list_teams, + ms365_list_team_channels, + ms365_send_team_message, + ms365_list_files, + ms365_create_task, + ms365_list_tasks, + ms365_list_contacts, + ms365_get_contact, # API Tools (5) - api_get_product_inventory, api_create_purchase_order, - api_get_customer_info, api_submit_approval, api_get_sales_dashboard, + api_get_product_inventory, + api_create_purchase_order, + api_get_customer_info, + api_submit_approval, + api_get_sales_dashboard, ] @@ -481,15 +645,15 @@ def _count_tool_schema_chars(tools: list) -> int: def main(): print(f"{'=' * 70}") - print(f"xgen-workflow Gateway E2E Test") + print("xgen-workflow Gateway E2E Test") print(f"{'=' * 70}") - print(f"Tool breakdown:") - print(f" Slack MCP: 6 tools") - print(f" GitHub MCP: 8 tools") - print(f" Jira MCP: 19 tools") - print(f" Confluence MCP: 9 tools") - print(f" MS365 MCP: 15 tools") - print(f" API Loader: 5 tools") + print("Tool breakdown:") + print(" Slack MCP: 6 tools") + print(" GitHub MCP: 8 tools") + print(" Jira MCP: 19 tools") + print(" Confluence MCP: 9 tools") + print(" MS365 MCP: 15 tools") + print(" API Loader: 5 tools") print(f" Total: {len(ALL_TOOLS)} tools → gateway 2 tools") print(f"{'=' * 70}") @@ -499,7 +663,7 @@ def main(): gw_chars = _count_tool_schema_chars(gateway) reduction = (1 - gw_chars / all_chars) * 100 print(f"\nToken savings: {all_chars:,} → {gw_chars:,} chars ({reduction:.0f}% reduction)") - print(f" ~{all_chars//4:,} → ~{gw_chars//4:,} tokens per turn") + print(f" ~{all_chars // 4:,} → ~{gw_chars // 4:,} tokens per turn") # LLM test llm = ChatOllama(model="qwen3.5:4b", temperature=0) @@ -549,7 +713,7 @@ def main(): print(f" [ERROR] {e} ({elapsed:.1f}s)") print(f"\n{'=' * 70}") - print(f"RESULT: {passed}/{total} ({passed/total*100:.0f}%)") + print(f"RESULT: {passed}/{total} ({passed / total * 100:.0f}%)") print(f" Tools: {len(ALL_TOOLS)} → 2 (gateway)") print(f" Token reduction: {reduction:.0f}%") print(f"{'=' * 70}") diff --git a/tests/test_langchain_agent.py b/tests/test_langchain_agent.py index e3167cc..14f26ce 100644 --- a/tests/test_langchain_agent.py +++ b/tests/test_langchain_agent.py @@ -2,7 +2,6 @@ from __future__ import annotations -from typing import Any from unittest.mock import MagicMock, patch import pytest @@ -87,7 +86,7 @@ def cancel_order(order_id: str) -> str: from graph_tool_call.langchain.agent import create_agent - agent = create_agent( + create_agent( mock_model, tools=[get_weather, send_email, cancel_order], top_k=2, @@ -156,7 +155,7 @@ def search_users(query: str) -> str: state = {"messages": [HumanMessage(content="what's the weather in Seoul")]} runtime = MagicMock() - result = model_factory(state, runtime) + model_factory(state, runtime) # bind_tools should have been called with a filtered subset mock_model.bind_tools.assert_called_once() diff --git a/tests/test_langchain_compatibility.py b/tests/test_langchain_compatibility.py index b567c84..313c5c4 100644 --- a/tests/test_langchain_compatibility.py +++ b/tests/test_langchain_compatibility.py @@ -10,9 +10,6 @@ from dataclasses import dataclass, field from typing import Any -import pytest - - # --------------------------------------------------------------------------- # Fake LangChain tool stubs (same pattern as test_langchain_gateway.py) # --------------------------------------------------------------------------- @@ -62,9 +59,7 @@ def _make_math_tools() -> list[FakeTool]: ) return [ FakeTool(name="add", description="Add two numbers together", args_schema=add_schema), - FakeTool( - name="multiply", description="Multiply two numbers", args_schema=multiply_schema - ), + FakeTool(name="multiply", description="Multiply two numbers", args_schema=multiply_schema), ] @@ -232,10 +227,12 @@ def _get_call_tool(self, tools): def test_call_existing_tool(self): call = self._get_call_tool(_make_diverse_tools()) - result = call.invoke({ - "tool_name": "cancel_order", - "arguments": {"order_id": "123"}, - }) + result = call.invoke( + { + "tool_name": "cancel_order", + "arguments": {"order_id": "123"}, + } + ) assert "cancel_order" in result assert "123" in result @@ -243,10 +240,12 @@ def test_call_existing_tool(self): def test_call_nonexistent_tool(self): call = self._get_call_tool(_make_diverse_tools()) - result = call.invoke({ - "tool_name": "nonexistent_tool", - "arguments": {}, - }) + result = call.invoke( + { + "tool_name": "nonexistent_tool", + "arguments": {}, + } + ) data = json.loads(result) assert "error" in data @@ -255,19 +254,23 @@ def test_call_nonexistent_tool(self): def test_call_with_none_arguments(self): call = self._get_call_tool(_make_diverse_tools()) - result = call.invoke({ - "tool_name": "get_weather", - "arguments": None, - }) + result = call.invoke( + { + "tool_name": "get_weather", + "arguments": None, + } + ) assert "get_weather" in result def test_call_with_missing_arguments(self): call = self._get_call_tool(_make_diverse_tools()) - result = call.invoke({ - "tool_name": "get_weather", - }) + result = call.invoke( + { + "tool_name": "get_weather", + } + ) assert "get_weather" in result @@ -387,10 +390,12 @@ def test_search_then_call(self): assert any(t["name"] == "send_email" for t in search_result["tools"]) # Step 2: Call - call_result = call.invoke({ - "tool_name": "send_email", - "arguments": {"to": "user@example.com", "body": "hello"}, - }) + call_result = call.invoke( + { + "tool_name": "send_email", + "arguments": {"to": "user@example.com", "body": "hello"}, + } + ) assert "send_email" in call_result assert "executed" in call_result @@ -410,10 +415,12 @@ def test_search_then_call_via_iter(self): search_result = json.loads(search.invoke({"query": "weather"})) assert any(t["name"] == "get_weather" for t in search_result["tools"]) - call_result = call.invoke({ - "tool_name": "get_weather", - "arguments": {"city": "Seoul"}, - }) + call_result = call.invoke( + { + "tool_name": "get_weather", + "arguments": {"city": "Seoul"}, + } + ) assert "get_weather" in call_result def test_user_example_scenario(self): @@ -425,9 +432,7 @@ def test_user_example_scenario(self): search_documents = FakeTool( name="search_documents", description="Search documents by query string" ) - get_weather = FakeTool( - name="get_weather", description="Get current weather for a city" - ) + get_weather = FakeTool(name="get_weather", description="Get current weather for a city") # ToolGraph creation and tool registration (user's pattern) tg_tool = ToolGraph() @@ -451,10 +456,12 @@ def test_user_example_scenario(self): # Verify call works call = next(t for t in tools if t.name == "call_tool") - call_result = call.invoke({ - "tool_name": "add", - "arguments": {"a": 1, "b": 2}, - }) + call_result = call.invoke( + { + "tool_name": "add", + "arguments": {"a": 1, "b": 2}, + } + ) assert "add" in call_result assert "executed" in call_result @@ -481,10 +488,12 @@ def test_add_tool_after_gateway_creation(self): assert result["total_tools"] == 2 # And the new tool is callable - call_result = call.invoke({ - "tool_name": "multiply", - "arguments": {"a": 3, "b": 4}, - }) + call_result = call.invoke( + { + "tool_name": "multiply", + "arguments": {"a": 3, "b": 4}, + } + ) assert "multiply" in call_result def test_add_tools_batch(self): @@ -546,10 +555,12 @@ def test_call_with_dict_arguments(self): gateway = tg.as_tools() call = next(t for t in gateway if t.name == "call_tool") - result = call.invoke({ - "tool_name": "add", - "arguments": {"a": 1, "b": 2}, - }) + result = call.invoke( + { + "tool_name": "add", + "arguments": {"a": 1, "b": 2}, + } + ) assert "add" in result assert "executed" in result diff --git a/tests/test_langchain_gateway.py b/tests/test_langchain_gateway.py index 220328b..1d7b9cf 100644 --- a/tests/test_langchain_gateway.py +++ b/tests/test_langchain_gateway.py @@ -6,8 +6,6 @@ from dataclasses import dataclass from typing import Any -import pytest - @dataclass class FakeTool: @@ -134,10 +132,12 @@ def test_call_existing_tool(self): tools = _make_tools() call = self._get_call_tool(tools) - result = call.invoke({ - "tool_name": "cancel_order", - "arguments": {"order_id": "123"}, - }) + result = call.invoke( + { + "tool_name": "cancel_order", + "arguments": {"order_id": "123"}, + } + ) assert "cancel_order" in result assert "123" in result @@ -146,10 +146,12 @@ def test_call_nonexistent_tool(self): tools = _make_tools() call = self._get_call_tool(tools) - result = call.invoke({ - "tool_name": "nonexistent_tool", - "arguments": {}, - }) + result = call.invoke( + { + "tool_name": "nonexistent_tool", + "arguments": {}, + } + ) data = json.loads(result) assert "error" in data @@ -159,9 +161,11 @@ def test_call_with_empty_arguments(self): tools = _make_tools() call = self._get_call_tool(tools) - result = call.invoke({ - "tool_name": "get_weather", - }) + result = call.invoke( + { + "tool_name": "get_weather", + } + ) assert "get_weather" in result @@ -169,10 +173,12 @@ def test_call_with_none_arguments(self): tools = _make_tools() call = self._get_call_tool(tools) - result = call.invoke({ - "tool_name": "get_weather", - "arguments": None, - }) + result = call.invoke( + { + "tool_name": "get_weather", + "arguments": None, + } + ) assert "get_weather" in result @@ -194,10 +200,12 @@ def test_search_then_call(self): assert any(t["name"] == "send_email" for t in search_result["tools"]) # Step 2: Call - call_result = call.invoke({ - "tool_name": "send_email", - "arguments": {"to": "user@example.com", "body": "hello"}, - }) + call_result = call.invoke( + { + "tool_name": "send_email", + "arguments": {"to": "user@example.com", "body": "hello"}, + } + ) assert "send_email" in call_result assert "executed" in call_result diff --git a/tests/test_langchain_toolkit.py b/tests/test_langchain_toolkit.py index 9e091ac..1e9d19a 100644 --- a/tests/test_langchain_toolkit.py +++ b/tests/test_langchain_toolkit.py @@ -2,11 +2,9 @@ from __future__ import annotations -from dataclasses import dataclass, field +from dataclasses import dataclass from typing import Any -import pytest - @dataclass class FakeTool: @@ -61,8 +59,7 @@ def test_filter_tools_preserves_original_objects(): def test_filter_tools_with_prebuilt_graph(): - from graph_tool_call import ToolGraph - from graph_tool_call import filter_tools + from graph_tool_call import ToolGraph, filter_tools tools = _make_tools() tg = ToolGraph() @@ -114,8 +111,7 @@ def test_toolkit_all_tools(): def test_toolkit_graph_accessible(): - from graph_tool_call import ToolGraph - from graph_tool_call import GraphToolkit + from graph_tool_call import GraphToolkit, ToolGraph tools = _make_tools() toolkit = GraphToolkit(tools=tools) @@ -125,8 +121,7 @@ def test_toolkit_graph_accessible(): def test_toolkit_with_prebuilt_graph(): - from graph_tool_call import ToolGraph - from graph_tool_call import GraphToolkit + from graph_tool_call import GraphToolkit, ToolGraph tg = ToolGraph() tools = _make_tools(5) From 59b4b7f6c478ff970b7e99dfcf76fb72fd8dc942 Mon Sep 17 00:00:00 2001 From: daehee <1998opening@gmail.com> Date: Sun, 3 May 2026 18:00:11 +0900 Subject: [PATCH 11/14] =?UTF-8?q?fix:=20=EC=BD=94=EB=93=9C=20=EB=A6=AC?= =?UTF-8?q?=EB=B7=B0=20=EA=B2=B0=ED=95=A8=20=EB=B0=98=EC=98=81=20(CRITICAL?= =?UTF-8?q?=20#1/#2=20+=20=EB=8B=A8=EC=9C=84=20=ED=85=8C=EC=8A=A4=ED=8A=B8?= =?UTF-8?q?=20+=20=EC=86=8C=ED=95=AD=EB=AA=A9)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CRITICAL #1 — ${user_input.x} ↔ context["input"] 이름 불일치 해소 - PlanRunner 가 input_context 를 'input' / 'user_input' 두 키로 alias 등록. - synthesizer 의 F2/Cycle-policy fallback (${user_input.}) 이 정상 resolve. - 영향: F2 fallback 으로 합성된 plan 이 첫 step 부터 abort 되던 회귀 fix. CRITICAL #2 — ExecutionTrace.steps 가 항상 빈 리스트 - PlanCompleted / PlanAborted 이벤트에 trace_steps 필드 추가. - run_stream 이 종결 이벤트에 누적된 StepTrace 를 실어 보냄. - run() 은 종결 이벤트의 trace_steps 를 추출해 ExecutionTrace.steps 채움. - 영향: run_stream 안 쓰는 caller 도 step 단위 trace 받을 수 있음. 단위 테스트 신규 추가 (42 PASS) — plan/graphify 모듈 cover - tests/test_plan_runner.py — CRITICAL #1, #2 회귀 테스트 + 핵심 동작 - tests/test_plan_synthesizer.py — 합성/체이닝/F2 fallback/normalize 등 - tests/test_plan_binding.py — placeholder resolution + 에러 동작 - tests/test_io_contract.py — extract_leaves + query/path enum 추출 회귀 - tests/test_dependency_verbs.py — _VERB_TO_INTENT 'reg' 매핑 기타 - synthesizer.py: _normalize_field_name 중복 정의 제거 (첫 정의가 dead code 였음). - dependency.py: _VERB_TO_INTENT 에 'reg' 추가 (regGoodsApprove → write). - io_contract.py: query/path/header parameter 의 enum 추출 (이전엔 body 만). --- graph_tool_call/analyze/dependency.py | 1 + graph_tool_call/ingest/io_contract.py | 8 +- graph_tool_call/plan/runner.py | 44 ++++-- graph_tool_call/plan/synthesizer.py | 21 --- tests/test_dependency_verbs.py | 25 +++ tests/test_io_contract.py | 170 ++++++++++++++++++++ tests/test_plan_binding.py | 70 ++++++++ tests/test_plan_runner.py | 220 ++++++++++++++++++++++++++ tests/test_plan_synthesizer.py | 172 ++++++++++++++++++++ 9 files changed, 693 insertions(+), 38 deletions(-) create mode 100644 tests/test_dependency_verbs.py create mode 100644 tests/test_io_contract.py create mode 100644 tests/test_plan_binding.py create mode 100644 tests/test_plan_runner.py create mode 100644 tests/test_plan_synthesizer.py diff --git a/graph_tool_call/analyze/dependency.py b/graph_tool_call/analyze/dependency.py index 533f344..28864fa 100644 --- a/graph_tool_call/analyze/dependency.py +++ b/graph_tool_call/analyze/dependency.py @@ -670,6 +670,7 @@ def _detect_cross_resource(tools: list[ToolSchema]) -> list[DetectedRelation]: "insert": "write", "register": "write", "regist": "write", + "reg": "write", # camelCase 약어 (regGoodsApprove 등) # update "modify": "update", "update": "update", diff --git a/graph_tool_call/ingest/io_contract.py b/graph_tool_call/ingest/io_contract.py index 7748bb5..90bf308 100644 --- a/graph_tool_call/ingest/io_contract.py +++ b/graph_tool_call/ingest/io_contract.py @@ -257,8 +257,13 @@ def extract_consumes_for_operation( continue if is_swagger2: ftype = p.get("type") or "string" + # Swagger 2.0 — enum lives directly on the parameter object. + enum_vals = p.get("enum") or [] else: - ftype = _schema_type(p.get("schema") or {}) or "string" + param_schema = p.get("schema") or {} + ftype = _schema_type(param_schema) or "string" + # OpenAPI 3.x — enum lives under ``schema``. + enum_vals = param_schema.get("enum") or [] if isinstance(param_schema, dict) else [] if p["name"] in seen_names: continue seen_names.add(p["name"]) @@ -269,6 +274,7 @@ def extract_consumes_for_operation( field_type=ftype, required=is_required, description=str(p.get("description") or "")[:200], + enum=list(enum_vals), ) ) diff --git a/graph_tool_call/plan/runner.py b/graph_tool_call/plan/runner.py index 3b70f77..73038de 100644 --- a/graph_tool_call/plan/runner.py +++ b/graph_tool_call/plan/runner.py @@ -75,6 +75,8 @@ class PlanCompleted: plan_id: str = "" output: Any = None total_duration_ms: int = 0 + # 누적 step traces — 비-스트리밍 ``run()`` 이 ExecutionTrace.steps 채울 때 사용. + trace_steps: list[StepTrace] = field(default_factory=list) @dataclass @@ -84,6 +86,7 @@ class PlanAborted: failed_step: str = "" error: dict[str, Any] = field(default_factory=dict) total_duration_ms: int = 0 + trace_steps: list[StepTrace] = field(default_factory=list) PlanEvent = PlanStarted | StepStarted | StepCompleted | StepFailed | PlanCompleted | PlanAborted @@ -137,8 +140,12 @@ def run_stream( ) -> Iterator[PlanEvent]: """Execute *plan* and yield events as each step progresses. - ``input_context`` supplies values for ``${input.xxx}`` bindings — - typically the entities extracted by Stage 1 (intent parser). + ``input_context`` supplies values for ``${input.xxx}`` and + ``${user_input.xxx}`` bindings (both keys resolve to the same dict, + kept as aliases because the synthesizer emits ``user_input`` for + F2/Cycle-policy fallbacks and historical entity-injection paths use + ``input``). Typically the entities extracted by Stage 1 (intent + parser) plus any operator-supplied seed values. """ plan_start = time.monotonic() @@ -148,10 +155,14 @@ def run_stream( step_count=len(plan.steps), ) - # step_id -> output (runtime context for binding resolution) + # step_id -> output (runtime context for binding resolution). + # ``input`` and ``user_input`` are aliases — same dict, both names — + # so binding ``${input.x}`` and ``${user_input.x}`` both resolve. context: dict[str, Any] = {} if input_context: - context["input"] = dict(input_context) + input_dict = dict(input_context) + context["input"] = input_dict + context["user_input"] = input_dict trace_steps: list[StepTrace] = [] @@ -181,6 +192,7 @@ def run_stream( failed_step=step.id, error=err, total_duration_ms=_ms_since(plan_start), + trace_steps=list(trace_steps), ) return @@ -216,6 +228,7 @@ def run_stream( failed_step=step.id, error=err, total_duration_ms=_ms_since(plan_start), + trace_steps=list(trace_steps), ) return @@ -255,6 +268,7 @@ def run_stream( failed_step="", error=err, total_duration_ms=_ms_since(plan_start), + trace_steps=list(trace_steps), ) return @@ -262,6 +276,7 @@ def run_stream( plan_id=plan.id, output=final, total_duration_ms=_ms_since(plan_start), + trace_steps=list(trace_steps), ) # ---------------------------------------------------------------------- @@ -274,7 +289,12 @@ def run( *, input_context: dict[str, Any] | None = None, ) -> ExecutionTrace: - """Execute *plan* and return an ExecutionTrace aggregating events.""" + """Execute *plan* and return an ExecutionTrace aggregating events. + + ``trace_steps`` 는 종결 이벤트 (``PlanCompleted`` / ``PlanAborted``) 가 + 실어 보내는 것을 그대로 사용 — run_stream 안에서 step 단위로 누적된 + StepTrace 가 그대로 ExecutionTrace.steps 에 들어간다. + """ started_at = _now_iso() started = time.monotonic() trace_steps: list[StepTrace] = [] @@ -282,24 +302,16 @@ def run( failed_step: str | None = None output: Any = None - last_step_output: dict[str, Any] = {} - for event in self.run_stream(plan, input_context=input_context): etype = event.type - if etype == "step.completed": - # step trace built progressively — simpler: derive from events - pass - elif etype == "plan.completed": + if etype == "plan.completed": success = True output = event.output # type: ignore[union-attr] + trace_steps = list(event.trace_steps) # type: ignore[union-attr] elif etype == "plan.aborted": failed_step = event.failed_step # type: ignore[union-attr] + trace_steps = list(event.trace_steps) # type: ignore[union-attr] - # Recompute trace_steps by re-running the stream? No — we already - # lost events. Instead the run_stream implementation should also - # surface StepTrace. For v1 keep trace minimal (plan-level only) — - # callers that need per-step detail should use run_stream. - _ = last_step_output # (placeholder to satisfy future extension) return ExecutionTrace( plan_id=plan.id, success=success, diff --git a/graph_tool_call/plan/synthesizer.py b/graph_tool_call/plan/synthesizer.py index 4942b7e..ac8f2de 100644 --- a/graph_tool_call/plan/synthesizer.py +++ b/graph_tool_call/plan/synthesizer.py @@ -90,27 +90,6 @@ def __init__( self.label_field_hints = list(label_field_hints) -def _normalize_field_name(name: str) -> str: - """Lowercase + strip separators for loose field-name matching. - - Conservative on purpose: - ``ordNo`` → ``ordno`` - ``ord_no`` → ``ordno`` - ``ORD-NO`` → ``ordno`` - BUT keeps token roots distinct: - ``ordNo`` ≠ ``orderNo`` (``ordno`` ≠ ``orderno``) - Token-level synonym mapping (``ord`` ↔ ``order``) is domain-specific - and not done here — the graph-edge fallback handles those cases. - """ - if not name: - return "" - out: list[str] = [] - for ch in name: - if ch.isalnum(): - out.append(ch.lower()) - return "".join(out) - - def _normalize_field_name(name: str) -> str: """Lowercase + strip non-alphanumerics for loose field-name matching. diff --git a/tests/test_dependency_verbs.py b/tests/test_dependency_verbs.py new file mode 100644 index 0000000..e583d05 --- /dev/null +++ b/tests/test_dependency_verbs.py @@ -0,0 +1,25 @@ +"""Unit tests for ``graph_tool_call.analyze.dependency`` verb mapping. + +특히 'reg' 약어가 'write' intent 로 분류되는지 확인 (리뷰 🟢 항목). +""" +from __future__ import annotations + +from graph_tool_call.analyze.dependency import _VERB_TO_INTENT + + +def test_reg_abbrev_maps_to_write(): + """``regGoodsApprove`` 같은 camelCase 약어를 위해 'reg' 도 write 로 잡아야.""" + assert _VERB_TO_INTENT.get("reg") == "write" + + +def test_register_full_form_still_maps_to_write(): + assert _VERB_TO_INTENT.get("register") == "write" + assert _VERB_TO_INTENT.get("regist") == "write" + + +def test_basic_verbs_unchanged(): + """기존 verb mapping 회귀 방지.""" + assert _VERB_TO_INTENT.get("get") == "read" + assert _VERB_TO_INTENT.get("create") == "write" + assert _VERB_TO_INTENT.get("update") == "update" + assert _VERB_TO_INTENT.get("delete") == "delete" diff --git a/tests/test_io_contract.py b/tests/test_io_contract.py new file mode 100644 index 0000000..b9b9b84 --- /dev/null +++ b/tests/test_io_contract.py @@ -0,0 +1,170 @@ +"""Unit tests for ``graph_tool_call.ingest.io_contract``. + +특히 query/path parameter 의 enum 추출 (리뷰에서 빠뜨려진 부분) 확인. +""" +from __future__ import annotations + +from graph_tool_call.ingest.io_contract import ( + extract_consumes_for_operation, + extract_leaves, + extract_produces_for_operation, +) + +# ─── extract_leaves ── + + +def test_extract_leaves_object_with_primitives(): + schema = { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + }, + "required": ["name"], + } + leaves = extract_leaves(schema, base_path="$") + by_name = {leaf.field_name: leaf for leaf in leaves} + assert by_name["name"].required is True + assert by_name["name"].field_type == "string" + assert by_name["age"].required is False + + +def test_extract_leaves_array_of_objects(): + schema = { + "type": "array", + "items": { + "type": "object", + "properties": {"id": {"type": "string"}}, + }, + } + leaves = extract_leaves(schema, base_path="$.body") + paths = {leaf.json_path for leaf in leaves} + assert any("[*]" in p for p in paths), "array → [*] wildcard 경로" + + +def test_extract_leaves_captures_enum(): + schema = { + "type": "object", + "properties": { + "status": {"type": "string", "enum": ["pending", "shipped"]}, + }, + } + leaves = extract_leaves(schema, base_path="$") + status = next(leaf for leaf in leaves if leaf.field_name == "status") + assert status.enum == ["pending", "shipped"] + + +# ─── consumes — enum 추출 회귀 (리뷰 🟢 항목) ── + + +def test_query_param_enum_extracted_openapi3(): + """OpenAPI 3.x query param 의 schema.enum 이 FieldLeaf.enum 에 들어가야.""" + operation = { + "parameters": [ + { + "name": "sort", + "in": "query", + "required": True, + "schema": {"type": "string", "enum": ["asc", "desc"]}, + }, + ], + "responses": {"200": {"description": "OK"}}, + } + leaves = extract_consumes_for_operation(operation) + by_name = {leaf.field_name: leaf for leaf in leaves} + assert "sort" in by_name + assert by_name["sort"].enum == ["asc", "desc"] + + +def test_query_param_enum_extracted_swagger2(): + """Swagger 2.0 query param 의 enum (parameter level) 도 잡아야.""" + operation = { + "parameters": [ + { + "name": "type", + "in": "query", + "required": True, + "type": "string", + "enum": ["A", "B", "C"], + }, + ], + "responses": {"200": {"description": "OK"}}, + } + leaves = extract_consumes_for_operation(operation, is_swagger2=True) + type_leaf = next(leaf for leaf in leaves if leaf.field_name == "type") + assert type_leaf.enum == ["A", "B", "C"] + + +def test_path_param_enum_extracted(): + """Path param 의 enum 도 동일.""" + operation = { + "parameters": [ + { + "name": "kind", + "in": "path", + "required": True, + "schema": {"type": "string", "enum": ["x", "y"]}, + }, + ], + "responses": {"200": {"description": "OK"}}, + } + leaves = extract_consumes_for_operation(operation) + kind = next(leaf for leaf in leaves if leaf.field_name == "kind") + assert kind.enum == ["x", "y"] + + +def test_param_without_enum_has_empty_list(): + """enum 없는 일반 param 은 enum=[] 으로 들어가야 (None 아님).""" + operation = { + "parameters": [ + {"name": "page", "in": "query", "schema": {"type": "integer"}}, + ], + "responses": {"200": {"description": "OK"}}, + } + leaves = extract_consumes_for_operation(operation, required_only=False) + page = next(leaf for leaf in leaves if leaf.field_name == "page") + assert page.enum == [] + + +# ─── produces ── + + +def test_extract_produces_walks_response_body(): + operation = { + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "data": { + "type": "object", + "properties": { + "id": {"type": "string"}, + }, + }, + }, + }, + }, + }, + }, + }, + } + leaves = extract_produces_for_operation(operation) + paths = {leaf.json_path for leaf in leaves} + assert "$.data.id" in paths + + +def test_consumes_skips_optional_when_required_only(): + operation = { + "parameters": [ + {"name": "must", "in": "query", "required": True, "schema": {"type": "string"}}, + {"name": "maybe", "in": "query", "required": False, "schema": {"type": "string"}}, + ], + "responses": {"200": {"description": "OK"}}, + } + leaves = extract_consumes_for_operation(operation) + names = {leaf.field_name for leaf in leaves} + assert "must" in names + assert "maybe" not in names diff --git a/tests/test_plan_binding.py b/tests/test_plan_binding.py new file mode 100644 index 0000000..139860e --- /dev/null +++ b/tests/test_plan_binding.py @@ -0,0 +1,70 @@ +"""Unit tests for ``graph_tool_call.plan.binding``. + +binding placeholder resolution + error 동작. +""" +from __future__ import annotations + +import pytest + +from graph_tool_call.plan.binding import BindingError, resolve_bindings + + +def test_literal_passes_through(): + assert resolve_bindings("hello", {}) == "hello" + assert resolve_bindings(42, {}) == 42 + assert resolve_bindings(None, {}) is None + + +def test_simple_lookup(): + ctx = {"s1": {"foo": "BAR"}} + assert resolve_bindings("${s1.foo}", ctx) == "BAR" + + +def test_full_step_object(): + ctx = {"s1": {"a": 1, "b": 2}} + assert resolve_bindings("${s1}", ctx) == {"a": 1, "b": 2} + + +def test_array_index(): + ctx = {"s1": {"items": [{"id": "A"}, {"id": "B"}]}} + assert resolve_bindings("${s1.items[0].id}", ctx) == "A" + assert resolve_bindings("${s1.items[1].id}", ctx) == "B" + + +def test_array_negative_index(): + ctx = {"s1": [10, 20, 30]} + assert resolve_bindings("${s1[-1]}", ctx) == 30 + + +def test_unknown_source_raises(): + with pytest.raises(BindingError, match="unknown source"): + resolve_bindings("${ghost.x}", {"s1": {}}) + + +def test_dict_walks_recursively(): + ctx = {"s1": {"v": 9}} + out = resolve_bindings( + {"a": "${s1.v}", "b": "literal", "nested": {"c": "${s1.v}"}}, + ctx, + ) + assert out == {"a": 9, "b": "literal", "nested": {"c": 9}} + + +def test_list_walks_recursively(): + ctx = {"s1": {"v": "X"}} + out = resolve_bindings(["${s1.v}", "lit", {"k": "${s1.v}"}], ctx) + assert out == ["X", "lit", {"k": "X"}] + + +def test_oob_index_raises(): + ctx = {"s1": [1, 2]} + with pytest.raises(BindingError, match="out of range"): + resolve_bindings("${s1[5]}", ctx) + + +def test_input_alias_lookup(): + """input / user_input 둘 다 같은 값 가리키도록 caller 가 등록한 케이스.""" + shared = {"keyword": "shoes"} + ctx = {"input": shared, "user_input": shared} + assert resolve_bindings("${input.keyword}", ctx) == "shoes" + assert resolve_bindings("${user_input.keyword}", ctx) == "shoes" diff --git a/tests/test_plan_runner.py b/tests/test_plan_runner.py new file mode 100644 index 0000000..923522d --- /dev/null +++ b/tests/test_plan_runner.py @@ -0,0 +1,220 @@ +"""Unit tests for ``graph_tool_call.plan.runner``. + +리뷰 CRITICAL #1, #2 회귀 방지 + 핵심 동작 cover. +""" +from __future__ import annotations + +from typing import Any + +import pytest + +from graph_tool_call.plan import ( + Plan, + PlanRunner, + PlanStep, +) +from graph_tool_call.plan.runner import ( + PlanAborted, + PlanCompleted, +) + + +def _echo(name: str, args: dict[str, Any]) -> dict[str, Any]: + return {"echoed": args, "tool": name} + + +# ─── CRITICAL #1: input_context 가 ${user_input.x} / ${input.x} 둘 다 resolve ── + + +def test_user_input_alias_resolves(): + """``${user_input.foo}`` 가 input_context["foo"] 로 resolve 되어야 한다. + + 이전엔 synthesizer 가 ${user_input.x} 만들고 runner 가 context["input"] 에만 + 심어서 첫 step 부터 BindingError 로 abort 됐던 케이스. + """ + plan = Plan( + id="t", + goal="g", + steps=[ + PlanStep(id="s1", tool="echo", args={"foo": "${user_input.foo}"}), + ], + output_binding="${s1}", + ) + trace = PlanRunner(_echo).run(plan, input_context={"foo": "BAR"}) + assert trace.success, f"plan should succeed, got: {trace.failed_step}" + assert trace.steps[0].args_resolved == {"foo": "BAR"} + + +def test_input_alias_resolves_too(): + """``${input.foo}`` 도 동일 dict 가리켜야 한다 (backward compat).""" + plan = Plan( + id="t", + goal="g", + steps=[ + PlanStep(id="s1", tool="echo", args={"foo": "${input.foo}"}), + ], + output_binding="${s1}", + ) + trace = PlanRunner(_echo).run(plan, input_context={"foo": "BAR"}) + assert trace.success + assert trace.steps[0].args_resolved == {"foo": "BAR"} + + +def test_mixed_input_user_input_in_same_step(): + """한 step 에 ${input.x} 와 ${user_input.y} 가 섞여 있어도 둘 다 resolve.""" + plan = Plan( + id="t", + goal="g", + steps=[ + PlanStep( + id="s1", + tool="echo", + args={"a": "${input.x}", "b": "${user_input.y}"}, + ), + ], + ) + trace = PlanRunner(_echo).run(plan, input_context={"x": "X", "y": "Y"}) + assert trace.success + assert trace.steps[0].args_resolved == {"a": "X", "b": "Y"} + + +# ─── CRITICAL #2: ExecutionTrace.steps 가 누적 ── + + +def test_execution_trace_accumulates_steps(): + """run() 의 ExecutionTrace.steps 가 빈 리스트가 아니어야 한다. + + 이전엔 runner.py:289 의 pass 때문에 항상 [] 였던 케이스. + """ + plan = Plan( + id="t", + goal="g", + steps=[ + PlanStep(id="s1", tool="echo", args={"x": "hello"}), + PlanStep(id="s2", tool="echo", args={"y": "${s1.echoed.x}"}), + ], + output_binding="${s2}", + ) + trace = PlanRunner(_echo).run(plan) + assert trace.success + assert len(trace.steps) == 2, "두 step 모두 trace 에 누적돼야 함" + assert trace.steps[0].id == "s1" + assert trace.steps[1].id == "s2" + assert trace.steps[0].output == {"echoed": {"x": "hello"}, "tool": "echo"} + assert trace.steps[1].args_resolved == {"y": "hello"}, "이전 step 출력 binding" + + +def test_execution_trace_includes_failed_step(): + """실패해도 실패한 step + 그 이전 step 이 trace 에 포함.""" + def flaky(name: str, args: dict[str, Any]) -> dict[str, Any]: + if name == "boom": + raise RuntimeError("simulated") + return {"ok": True} + + plan = Plan( + id="t", + goal="g", + steps=[ + PlanStep(id="s1", tool="ok"), + PlanStep(id="s2", tool="boom"), + PlanStep(id="s3", tool="never_called"), + ], + ) + trace = PlanRunner(flaky).run(plan) + assert trace.success is False + assert trace.failed_step == "s2" + assert len(trace.steps) == 2, "실패까지의 step 만 누적 (s3 는 도달 안 함)" + assert trace.steps[0].id == "s1" + assert trace.steps[0].error is None + assert trace.steps[1].id == "s2" + assert trace.steps[1].error is not None + assert "simulated" in trace.steps[1].error["message"] + + +# ─── 일반 동작 ── + + +def test_run_stream_yields_expected_events_in_order(): + plan = Plan( + id="t", + goal="g", + steps=[PlanStep(id="s1", tool="echo", args={"x": "hi"})], + ) + events = list(PlanRunner(_echo).run_stream(plan)) + types = [e.type for e in events] + assert types[0] == "plan.started" + assert types[-1] == "plan.completed" + assert "step.started" in types + assert "step.completed" in types + + +def test_plan_completed_carries_trace_steps(): + """run_stream 의 PlanCompleted 가 trace_steps 를 실어 보내야 run() 이 읽을 수 있음.""" + plan = Plan( + id="t", + goal="g", + steps=[PlanStep(id="s1", tool="echo", args={"x": "hi"})], + ) + completed = next( + e for e in PlanRunner(_echo).run_stream(plan) + if isinstance(e, PlanCompleted) + ) + assert len(completed.trace_steps) == 1 + assert completed.trace_steps[0].id == "s1" + + +def test_plan_aborted_carries_trace_steps(): + """abort 시에도 PlanAborted 가 그때까지의 trace_steps 를 실어 보내야 함.""" + def fail(name: str, args: dict[str, Any]) -> dict[str, Any]: + raise RuntimeError("boom") + + plan = Plan(id="t", goal="g", steps=[PlanStep(id="s1", tool="x")]) + aborted = next( + e for e in PlanRunner(fail).run_stream(plan) + if isinstance(e, PlanAborted) + ) + assert len(aborted.trace_steps) == 1 + assert aborted.trace_steps[0].error is not None + + +def test_binding_to_unknown_source_aborts(): + """존재하지 않는 step id 참조 → BindingError → abort.""" + plan = Plan( + id="t", + goal="g", + steps=[PlanStep(id="s1", tool="echo", args={"x": "${ghost.foo}"})], + ) + trace = PlanRunner(_echo).run(plan) + assert trace.success is False + assert trace.failed_step == "s1" + assert trace.steps[0].error["kind"] == "binding" + + +def test_output_binding_resolves_nested_path(): + """output_binding 이 step 응답 안의 nested path 를 가리킬 수 있어야.""" + plan = Plan( + id="t", + goal="g", + steps=[PlanStep(id="s1", tool="echo", args={"v": 42})], + output_binding="${s1.echoed.v}", + ) + trace = PlanRunner(_echo).run(plan) + assert trace.success + assert trace.output == 42 + + +def test_no_input_context_works_when_plan_has_no_input_binding(): + """input_context 안 줘도 ${input.x} 안 쓰면 동작.""" + plan = Plan( + id="t", + goal="g", + steps=[PlanStep(id="s1", tool="echo", args={"x": "literal"})], + ) + trace = PlanRunner(_echo).run(plan) + assert trace.success + + +def test_v1_only_supports_abort_on_error(): + """v1 PlanRunner 는 on_error='abort' 만 허용 — 다른 값은 ValueError.""" + with pytest.raises(ValueError): + PlanRunner(_echo, on_error="continue") diff --git a/tests/test_plan_synthesizer.py b/tests/test_plan_synthesizer.py new file mode 100644 index 0000000..7ad4717 --- /dev/null +++ b/tests/test_plan_synthesizer.py @@ -0,0 +1,172 @@ +"""Unit tests for ``graph_tool_call.plan.synthesizer``. + +핵심 합성 시나리오 + Cycle/F2 fallback 의 user_input placeholder 출력. +""" +from __future__ import annotations + +import pytest + +from graph_tool_call.plan.synthesizer import ( + PathSynthesizer, + PlanSynthesisError, + _normalize_field_name, +) + + +def _basic_graph() -> dict: + """포함: + - 'searchProduct': 입력=keyword, 출력=goodsNo (semantic=goods.id) + - 'getProductDetail': 입력=goodsNo (semantic=goods.id) → 의존 + """ + return { + "tools": { + "searchProduct": { + "metadata": { + "method": "GET", + "path": "/api/v1/products", + "consumes": [ + {"field_name": "keyword", "kind": "data", "required": True} + ], + "produces": [ + { + "field_name": "goodsNo", + "json_path": "$.body.items[*].goodsNo", + "semantic_tag": "goods.id", + } + ], + "ai_metadata": { + "canonical_action": "search", + "primary_resource": "product", + }, + }, + }, + "getProductDetail": { + "metadata": { + "method": "GET", + "path": "/api/v1/products/{goodsNo}", + "consumes": [ + { + "field_name": "goodsNo", + "semantic_tag": "goods.id", + "kind": "data", + "required": True, + } + ], + "produces": [ + {"field_name": "name", "json_path": "$.body.name"} + ], + "ai_metadata": { + "canonical_action": "read", + "primary_resource": "product", + }, + }, + }, + }, + } + + +# ─── normalize_field_name ── + + +def test_normalize_field_name_collapses_separators(): + assert _normalize_field_name("ord_no") == "ordno" + assert _normalize_field_name("ORD-NO") == "ordno" + assert _normalize_field_name("ordNo") == "ordno" + + +def test_normalize_field_name_keeps_token_roots_distinct(): + """ord ≠ order — token-level synonym mapping은 안 함.""" + assert _normalize_field_name("ordNo") != _normalize_field_name("orderNo") + + +def test_normalize_field_name_empty(): + assert _normalize_field_name("") == "" + assert _normalize_field_name(None) == "" # type: ignore[arg-type] + + +# ─── synthesizer 핵심 동작 ── + + +def test_synthesize_uses_entity_when_available(): + """user 가 keyword 를 entity 로 줬으면 검색 step 1개로 끝나야.""" + syn = PathSynthesizer(_basic_graph()) + plan = syn.synthesize(target="searchProduct", entities={"keyword": "shoes"}) + assert len(plan.steps) == 1 + assert plan.steps[0].tool == "searchProduct" + assert plan.steps[0].args == {"keyword": "shoes"} + + +def test_synthesize_chains_producer_when_entity_missing(): + """getProductDetail 호출하려면 goodsNo 가 필요 — searchProduct 가 producer. + + keyword 만 entity 로 주면 chain: searchProduct → getProductDetail. + 합성 후 step 이름은 ``s1``/``s2`` 로 정렬되고, binding 도 그에 맞게 rewrite 됨. + """ + syn = PathSynthesizer(_basic_graph()) + plan = syn.synthesize( + target="getProductDetail", entities={"keyword": "shoes"}, + ) + assert len(plan.steps) == 2, "검색 + 상세조회 2-step chain" + assert plan.steps[0].tool == "searchProduct" + assert plan.steps[1].tool == "getProductDetail" + binding = plan.steps[1].args.get("goodsNo", "") + # step_id 순서 정렬 후 binding 은 ${s1...} 로 rewrite — 첫 step 의 출력 가리킴 + assert binding.startswith("${"), "binding placeholder 형식이어야" + assert "s1" in binding, f"첫 step (s1) 출력 binding 이어야, got {binding}" + assert "goodsNo" in binding, "produces 필드 경로 포함" + + +def test_synthesize_falls_back_to_user_input_placeholder(): + """필수 field 인데 entity 도 없고 producer 도 없으면 ``${user_input.X}`` 로 fallback. + + F2 + Cycle policy B 의 핵심 동작 — abort 대신 caller 에게 슬롯을 surface. + runner 가 input_context 에 ``user_input`` 별칭으로 등록하므로 + plan 자체는 합성되고, 실행 시 caller 가 값을 공급하면 작동한다. + """ + g = { + "tools": { + "needsX": { + "metadata": { + "consumes": [ + {"field_name": "mysteryField", "kind": "data", "required": True} + ], + "produces": [], + "ai_metadata": {"canonical_action": "read"}, + }, + }, + }, + } + syn = PathSynthesizer(g) + plan = syn.synthesize(target="needsX", entities={}) + assert len(plan.steps) == 1 + assert plan.steps[0].args == {"mysteryField": "${user_input.mysteryField}"} + + +def test_synthesize_unknown_target_raises(): + syn = PathSynthesizer(_basic_graph()) + with pytest.raises(PlanSynthesisError): + syn.synthesize(target="ghostTool", entities={}) + + +def test_synthesize_context_field_uses_collection_default(): + """kind=context 인 필드는 entity 없으면 context_defaults 에서 채움.""" + g = { + "tools": { + "needsLocale": { + "metadata": { + "consumes": [ + { + "field_name": "locale", + "kind": "context", + "required": True, + } + ], + "produces": [], + "ai_metadata": {"canonical_action": "read"}, + }, + }, + }, + } + syn = PathSynthesizer(g, context_defaults={"locale": "ko_KR"}) + plan = syn.synthesize(target="needsLocale", entities={}) + assert plan.steps[0].args == {"locale": "ko_KR"} From 20245604209250c88092a79904dc40517bfc7339 Mon Sep 17 00:00:00 2001 From: daehee <1998opening@gmail.com> Date: Sun, 3 May 2026 19:16:40 +0900 Subject: [PATCH 12/14] =?UTF-8?q?fix(core/tool):=20=5FANNOTATION=5FBY=5FVE?= =?UTF-8?q?RB=20=EC=97=90=20register/regist/reg/insert=20=EC=B6=94?= =?UTF-8?q?=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 코드 리뷰의 _VERB_TO_INTENT['reg'] 누락과 동일 패턴 — sibling vocabulary 인 _ANNOTATION_BY_VERB (MCP annotation 추론용) 도 register 계열이 통째로 빠져 있었다. 동작 자체는 망가지지 않지만 registerUser / insertOrder 같은 도구가 MCP 클라이언트에 read_only_hint / destructive_hint 힌트 못 받음. 회귀 테스트 추가: 두 dict 간 register 계열 커버리지 일관성 검증. --- graph_tool_call/core/tool.py | 20 ++++++++++++++++++++ tests/test_dependency_verbs.py | 18 ++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/graph_tool_call/core/tool.py b/graph_tool_call/core/tool.py index 25df150..b3e9d71 100644 --- a/graph_tool_call/core/tool.py +++ b/graph_tool_call/core/tool.py @@ -408,6 +408,26 @@ def parse_tool(tool: Any) -> ToolSchema: destructive_hint=False, idempotent_hint=False, ), + "insert": MCPAnnotations( + read_only_hint=False, + destructive_hint=False, + idempotent_hint=False, + ), + "register": MCPAnnotations( + read_only_hint=False, + destructive_hint=False, + idempotent_hint=False, + ), + "regist": MCPAnnotations( # 일부 코드베이스 약어 (regUser, registOrder) + read_only_hint=False, + destructive_hint=False, + idempotent_hint=False, + ), + "reg": MCPAnnotations( # camelCase 짧은 약어 (regGoodsApprove) + read_only_hint=False, + destructive_hint=False, + idempotent_hint=False, + ), # update verbs "update": MCPAnnotations( read_only_hint=False, diff --git a/tests/test_dependency_verbs.py b/tests/test_dependency_verbs.py index e583d05..ccca65f 100644 --- a/tests/test_dependency_verbs.py +++ b/tests/test_dependency_verbs.py @@ -23,3 +23,21 @@ def test_basic_verbs_unchanged(): assert _VERB_TO_INTENT.get("create") == "write" assert _VERB_TO_INTENT.get("update") == "update" assert _VERB_TO_INTENT.get("delete") == "delete" + + +# ─── _ANNOTATION_BY_VERB sibling 일관성 (잠복 결함) ── + + +def test_annotation_by_verb_covers_register_family(): + """``_ANNOTATION_BY_VERB`` 도 register 계열 커버해야 — _VERB_TO_INTENT 와 sibling. + + ``registerUser`` / ``insertOrder`` / ``regGoodsApprove`` 같은 도구가 MCP + annotation 을 받을 수 있어야 한다 (read_only_hint=False, ...). + """ + from graph_tool_call.core.tool import _ANNOTATION_BY_VERB + for verb in ("register", "regist", "reg", "insert"): + assert verb in _ANNOTATION_BY_VERB, ( + f"verb {verb!r} 누락 — _VERB_TO_INTENT 와 sibling vocabulary 불일치" + ) + assert _ANNOTATION_BY_VERB[verb].read_only_hint is False + assert _ANNOTATION_BY_VERB[verb].destructive_hint is False From 3d37a5ce70e16a2987c77e5fac9282504391d91e Mon Sep 17 00:00:00 2001 From: daehee <1998opening@gmail.com> Date: Sun, 3 May 2026 19:50:15 +0900 Subject: [PATCH 13/14] =?UTF-8?q?style:=20ruff=20format=20=EC=A0=81?= =?UTF-8?q?=EC=9A=A9=20=E2=80=94=20=EC=8B=A0=EA=B7=9C=20=ED=85=8C=EC=8A=A4?= =?UTF-8?q?=ED=8A=B8=205=EA=B0=9C=20=ED=8C=8C=EC=9D=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI 의 'ruff format --check .' 실패 해소. 동작 변경 없음. --- tests/test_dependency_verbs.py | 2 ++ tests/test_io_contract.py | 1 + tests/test_plan_binding.py | 1 + tests/test_plan_runner.py | 13 +++++-------- tests/test_plan_synthesizer.py | 20 ++++++++------------ 5 files changed, 17 insertions(+), 20 deletions(-) diff --git a/tests/test_dependency_verbs.py b/tests/test_dependency_verbs.py index ccca65f..756e8a4 100644 --- a/tests/test_dependency_verbs.py +++ b/tests/test_dependency_verbs.py @@ -2,6 +2,7 @@ 특히 'reg' 약어가 'write' intent 로 분류되는지 확인 (리뷰 🟢 항목). """ + from __future__ import annotations from graph_tool_call.analyze.dependency import _VERB_TO_INTENT @@ -35,6 +36,7 @@ def test_annotation_by_verb_covers_register_family(): annotation 을 받을 수 있어야 한다 (read_only_hint=False, ...). """ from graph_tool_call.core.tool import _ANNOTATION_BY_VERB + for verb in ("register", "regist", "reg", "insert"): assert verb in _ANNOTATION_BY_VERB, ( f"verb {verb!r} 누락 — _VERB_TO_INTENT 와 sibling vocabulary 불일치" diff --git a/tests/test_io_contract.py b/tests/test_io_contract.py index b9b9b84..865b646 100644 --- a/tests/test_io_contract.py +++ b/tests/test_io_contract.py @@ -2,6 +2,7 @@ 특히 query/path parameter 의 enum 추출 (리뷰에서 빠뜨려진 부분) 확인. """ + from __future__ import annotations from graph_tool_call.ingest.io_contract import ( diff --git a/tests/test_plan_binding.py b/tests/test_plan_binding.py index 139860e..eee0ae9 100644 --- a/tests/test_plan_binding.py +++ b/tests/test_plan_binding.py @@ -2,6 +2,7 @@ binding placeholder resolution + error 동작. """ + from __future__ import annotations import pytest diff --git a/tests/test_plan_runner.py b/tests/test_plan_runner.py index 923522d..a4cf216 100644 --- a/tests/test_plan_runner.py +++ b/tests/test_plan_runner.py @@ -2,6 +2,7 @@ 리뷰 CRITICAL #1, #2 회귀 방지 + 핵심 동작 cover. """ + from __future__ import annotations from typing import Any @@ -106,6 +107,7 @@ def test_execution_trace_accumulates_steps(): def test_execution_trace_includes_failed_step(): """실패해도 실패한 step + 그 이전 step 이 trace 에 포함.""" + def flaky(name: str, args: dict[str, Any]) -> dict[str, Any]: if name == "boom": raise RuntimeError("simulated") @@ -155,24 +157,19 @@ def test_plan_completed_carries_trace_steps(): goal="g", steps=[PlanStep(id="s1", tool="echo", args={"x": "hi"})], ) - completed = next( - e for e in PlanRunner(_echo).run_stream(plan) - if isinstance(e, PlanCompleted) - ) + completed = next(e for e in PlanRunner(_echo).run_stream(plan) if isinstance(e, PlanCompleted)) assert len(completed.trace_steps) == 1 assert completed.trace_steps[0].id == "s1" def test_plan_aborted_carries_trace_steps(): """abort 시에도 PlanAborted 가 그때까지의 trace_steps 를 실어 보내야 함.""" + def fail(name: str, args: dict[str, Any]) -> dict[str, Any]: raise RuntimeError("boom") plan = Plan(id="t", goal="g", steps=[PlanStep(id="s1", tool="x")]) - aborted = next( - e for e in PlanRunner(fail).run_stream(plan) - if isinstance(e, PlanAborted) - ) + aborted = next(e for e in PlanRunner(fail).run_stream(plan) if isinstance(e, PlanAborted)) assert len(aborted.trace_steps) == 1 assert aborted.trace_steps[0].error is not None diff --git a/tests/test_plan_synthesizer.py b/tests/test_plan_synthesizer.py index 7ad4717..d1793b9 100644 --- a/tests/test_plan_synthesizer.py +++ b/tests/test_plan_synthesizer.py @@ -2,6 +2,7 @@ 핵심 합성 시나리오 + Cycle/F2 fallback 의 user_input placeholder 출력. """ + from __future__ import annotations import pytest @@ -15,8 +16,8 @@ def _basic_graph() -> dict: """포함: - - 'searchProduct': 입력=keyword, 출력=goodsNo (semantic=goods.id) - - 'getProductDetail': 입력=goodsNo (semantic=goods.id) → 의존 + - 'searchProduct': 입력=keyword, 출력=goodsNo (semantic=goods.id) + - 'getProductDetail': 입력=goodsNo (semantic=goods.id) → 의존 """ return { "tools": { @@ -24,9 +25,7 @@ def _basic_graph() -> dict: "metadata": { "method": "GET", "path": "/api/v1/products", - "consumes": [ - {"field_name": "keyword", "kind": "data", "required": True} - ], + "consumes": [{"field_name": "keyword", "kind": "data", "required": True}], "produces": [ { "field_name": "goodsNo", @@ -52,9 +51,7 @@ def _basic_graph() -> dict: "required": True, } ], - "produces": [ - {"field_name": "name", "json_path": "$.body.name"} - ], + "produces": [{"field_name": "name", "json_path": "$.body.name"}], "ai_metadata": { "canonical_action": "read", "primary_resource": "product", @@ -104,7 +101,8 @@ def test_synthesize_chains_producer_when_entity_missing(): """ syn = PathSynthesizer(_basic_graph()) plan = syn.synthesize( - target="getProductDetail", entities={"keyword": "shoes"}, + target="getProductDetail", + entities={"keyword": "shoes"}, ) assert len(plan.steps) == 2, "검색 + 상세조회 2-step chain" assert plan.steps[0].tool == "searchProduct" @@ -127,9 +125,7 @@ def test_synthesize_falls_back_to_user_input_placeholder(): "tools": { "needsX": { "metadata": { - "consumes": [ - {"field_name": "mysteryField", "kind": "data", "required": True} - ], + "consumes": [{"field_name": "mysteryField", "kind": "data", "required": True}], "produces": [], "ai_metadata": {"canonical_action": "read"}, }, From 5c2370c015e1f4cec6bbd88b0e6167a1197ba669 Mon Sep 17 00:00:00 2001 From: daehee <1998opening@gmail.com> Date: Wed, 6 May 2026 01:13:52 +0900 Subject: [PATCH 14/14] =?UTF-8?q?feat(plan):=20Stage=204=20prompt=20count?= =?UTF-8?q?=20=EC=A0=95=ED=99=95=EC=84=B1=20+=20path=20param=20required=20?= =?UTF-8?q?=EA=B0=95=EC=A0=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - response.py: _SUCCESS_PROMPT에 count/total 처리 지침 추가 — totalCount 등 명시적 total 필드가 있으면 사용하고, 없으면 "N개 등록" 같은 단정 금지. result_char_limit 2000→4000으로 늘려 list 응답 truncate 완화. - ingest/openapi.py: Swagger 2 / OpenAPI 3 둘 다 path 파라미터를 무조건 required=True로 마킹. 많은 spec이 명시 안 해도 URL placeholder라 호출 시 반드시 값 필요. synthesizer가 빈 entity로 plan 생성하던 회귀 차단. --- graph_tool_call/ingest/openapi.py | 10 ++++++++++ graph_tool_call/plan/response.py | 14 +++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/graph_tool_call/ingest/openapi.py b/graph_tool_call/ingest/openapi.py index 8f93dea..8f53173 100644 --- a/graph_tool_call/ingest/openapi.py +++ b/graph_tool_call/ingest/openapi.py @@ -202,6 +202,11 @@ def _extract_params_swagger2( ) else: is_required = p.get("required", False) + # OpenAPI 3.x / Swagger 2.0: path 파라미터는 본질적으로 required. + # 많은 spec이 명시 안 해도 URL placeholder라 호출 시 반드시 값이 있어야 함. + # synthesizer가 required 안 보고 빈 entity로 plan 생성 → HTTP 호출 실패 케이스 차단. + if location == "path": + is_required = True if required_only and not is_required: continue params.append( @@ -308,6 +313,11 @@ def _extract_params_openapi3( continue # skip malformed parameters (missing required 'name' field) schema = p.get("schema", {}) is_required = p.get("required", False) + # OpenAPI 3.x: path 파라미터는 본질적으로 required (URL placeholder 채우려면 필수). + # 많은 spec이 명시 안 해도 강제로 required 처리해야 synthesizer가 빈 entity를 + # UnsatisfiableFieldError로 raise → question.required popup으로 사용자에게 묻는다. + if p.get("in") == "path": + is_required = True ptype = _schema_type(schema) # Wrapper-object/array query parameter handling. diff --git a/graph_tool_call/plan/response.py b/graph_tool_call/plan/response.py index 714b5d4..4eefdfc 100644 --- a/graph_tool_call/plan/response.py +++ b/graph_tool_call/plan/response.py @@ -35,6 +35,18 @@ Respond in Korean unless the user's question is clearly in another language. Keep it concise — 1~3 sentences for simple answers, short bullet list for multi-item results. Do not invent data not present in the result. + +CRITICAL — count/total claims: +- The result above may be **truncated** for length. The list you see is NOT + necessarily the complete list. +- If the result contains an explicit total field (e.g. ``totalCount``, + ``totalElements``, ``total``, ``count``, ``size`` at top-level or inside + ``payload`` / ``data``), USE THAT NUMBER as the actual count and say + "총 N개 중 일부" or similar. +- If no total field exists, do NOT claim a specific count. Avoid phrases like + "현재 1개 등록되어 있습니다" — instead say "조회된 리뷰" or + "응답에 포함된 항목". Counting visible list items as the absolute total + is forbidden. """ @@ -69,7 +81,7 @@ def synthesize_success_response( requirement: str, result: Any, llm: OntologyLLM, - result_char_limit: int = 2000, + result_char_limit: int = 4000, ) -> str: """Success case — plan completed, convert output to NL answer.""" prompt = _SUCCESS_PROMPT.format(