feat: rank and filter discovery by curated manifest relevance

seanphan · seanphan · commit a685fd5234f4 · 2026-02-21T11:27:24.000-08:00
diff --git a/src/agenticflow_cli/main.py b/src/agenticflow_cli/main.py
@@ -8,6 +8,7 @@
 import os
 import sys
 import re
+from functools import lru_cache
 from time import perf_counter
 from pathlib import Path
 from typing import Any, Callable, Mapping
@@ -49,6 +50,9 @@
 CODE_SEARCH_SCHEMA_VERSION = "agenticflow.code.search.v1"
 CODE_EXECUTE_SCHEMA_VERSION = "agenticflow.code.execute.v1"
 CLI_CONFIG_DIR_ENV_VAR = "AGENTICFLOW_CLI_DIR"
+CURATED_MANIFEST_PATH = Path(__file__).resolve().parent / "public_ops_manifest.json"
+SUPPORT_SCOPE_EXECUTED = "supported-executed"
+SUPPORT_SCOPE_BLOCKED = "supported-blocked-policy"
 
 
 def _add_common_call_flags(parser: argparse.ArgumentParser) -> None:
@@ -643,18 +647,81 @@ def _catalog_operation_item(operation: Any) -> dict[str, Any]:
     }
 
 
+@lru_cache(maxsize=1)
+def _manifest_scope_by_operation_id() -> dict[str, str]:
+    try:
+        raw = json.loads(CURATED_MANIFEST_PATH.read_text(encoding="utf-8"))
+    except Exception:
+        return {}
+    if not isinstance(raw, list):
+        return {}
+
+    scopes: dict[str, str] = {}
+    for item in raw:
+        if not isinstance(item, Mapping):
+            continue
+        operation_id = item.get("operation_id")
+        support_scope = item.get("support_scope")
+        if isinstance(operation_id, str) and operation_id and isinstance(support_scope, str):
+            scopes[operation_id] = support_scope
+    return scopes
+
+
+def _should_use_curated_manifest(spec_file: Path, public_only: bool) -> bool:
+    if not public_only:
+        return False
+    try:
+        return spec_file.resolve() == default_spec_path().resolve()
+    except Exception:
+        return False
+
+
+def _apply_curated_manifest_filter(
+    operations: list[Any],
+    *,
+    public_only: bool,
+    spec_file: Path,
+) -> list[Any]:
+    if not _should_use_curated_manifest(spec_file, public_only):
+        return operations
+
+    manifest_scope = _manifest_scope_by_operation_id()
+    if not manifest_scope:
+        return operations
+    allowed_operation_ids = set(manifest_scope)
+    return [
+        operation
+        for operation in operations
+        if getattr(operation, "operation_id", None) in allowed_operation_ids
+    ]
+
+
 def _catalog_records(
-    registry: OperationRegistry, public_only: bool
+    registry: OperationRegistry,
+    public_only: bool,
+    spec_file: Path,
 ) -> list[dict[str, Any]]:
     operations = _list_operations(registry, public_only=public_only, tag=None)
+    operations = _apply_curated_manifest_filter(
+        operations,
+        public_only=public_only,
+        spec_file=spec_file,
+    )
     records = [_catalog_operation_item(operation) for operation in operations]
     return sorted(records, key=lambda item: (item["path"], item["method"], item["operation_id"]))
 
 
 def _catalog_operations(
-    registry: OperationRegistry, public_only: bool
+    registry: OperationRegistry,
+    public_only: bool,
+    spec_file: Path,
 ) -> list[Any]:
     operations = _list_operations(registry, public_only=public_only, tag=None)
+    operations = _apply_curated_manifest_filter(
+        operations,
+        public_only=public_only,
+        spec_file=spec_file,
+    )
     return sorted(
         operations,
         key=lambda item: (item.path, item.method, item.operation_id),
@@ -711,6 +778,7 @@ def _rank_catalog_operations(
     task: str,
     max_cost: float | None = None,
     max_latency_ms: float | None = None,
+    manifest_scope_by_operation_id: Mapping[str, str] | None = None,
 ) -> list[dict[str, Any]]:
     task_terms = _tokenize_catalog_text(task)
     if not task_terms:
@@ -736,13 +804,64 @@ def _rank_catalog_operations(
         if max_latency_ms is not None and latency > max_latency_ms:
             continue
 
-        score = round((relevance * 10) - cost - (latency / 200), 3)
+        support_scope = None
+        if manifest_scope_by_operation_id is not None:
+            support_scope = manifest_scope_by_operation_id.get(
+                operation_record["operation_id"]
+            )
+        scope_bonus = 0.0
+        if support_scope == SUPPORT_SCOPE_EXECUTED:
+            scope_bonus = 4.0
+        elif support_scope == SUPPORT_SCOPE_BLOCKED:
+            scope_bonus = 2.0
+
+        dependency_bonus = 0.0
+        builder_terms = {
+            "build",
+            "builder",
+            "create",
+            "workflow",
+            "workflows",
+            "agent",
+            "agents",
+            "workforce",
+            "dependencies",
+            "dependency",
+        }
+        if task_terms.intersection(builder_terms):
+            dependency_tokens = {
+                "node",
+                "nodes",
+                "connection",
+                "connections",
+                "provider",
+                "providers",
+                "template",
+                "templates",
+                "validate",
+                "schema",
+            }
+            dependency_bonus = float(
+                min(3, len(operation_tokens.intersection(dependency_tokens)))
+            )
+
+        score = round(
+            (relevance * 10)
+            - cost
+            - (latency / 200)
+            + scope_bonus
+            + dependency_bonus,
+            3,
+        )
         ranked.append(
             {
                 **operation_record,
                 "relevance": relevance,
                 "cost": cost,
                 "estimated_latency_ms": latency,
+                "support_scope": support_scope,
+                "scope_bonus": scope_bonus,
+                "dependency_bonus": dependency_bonus,
                 "score": score,
             }
         )
@@ -1406,6 +1525,11 @@ def _invoke_sdk_operation(
 def _run_ops_command(args: argparse.Namespace, registry: OperationRegistry) -> int:
     if args.ops_command == "list":
         operations = _list_operations(registry, args.public_only, args.tag)
+        operations = _apply_curated_manifest_filter(
+            operations,
+            public_only=args.public_only,
+            spec_file=args.spec_file,
+        )
         for operation in operations:
             operation_id = getattr(operation, "operation_id", "")
             method = getattr(operation, "method", "")
@@ -1456,7 +1580,11 @@ def _run_catalog_command(
     args: argparse.Namespace, registry: OperationRegistry
 ) -> int:
     if args.catalog_command == "export":
-        items = _catalog_records(registry, public_only=args.public_only)
+        items = _catalog_records(
+            registry,
+            public_only=args.public_only,
+            spec_file=args.spec_file,
+        )
         if args.json:
             payload = {
                 "schema_version": CATALOG_EXPORT_SCHEMA_VERSION,
@@ -1474,12 +1602,22 @@ def _run_catalog_command(
         return 0
 
     if args.catalog_command == "rank":
-        operations = _catalog_operations(registry, public_only=args.public_only)
+        operations = _catalog_operations(
+            registry,
+            public_only=args.public_only,
+            spec_file=args.spec_file,
+        )
+        manifest_scope = (
+            _manifest_scope_by_operation_id()
+            if _should_use_curated_manifest(args.spec_file, args.public_only)
+            else None
+        )
         ranked = _rank_catalog_operations(
             operations,
             task=args.task,
             max_cost=args.max_cost,
             max_latency_ms=args.max_latency_ms,
+            manifest_scope_by_operation_id=manifest_scope,
         )
         if args.json:
             payload = {
@@ -2191,11 +2329,21 @@ def _run_code_search_command(
     registry: OperationRegistry,
     sdk_client: AgenticFlowSDK,
 ) -> int:
+    manifest_scope = (
+        _manifest_scope_by_operation_id()
+        if _should_use_curated_manifest(args.spec_file, args.public_only)
+        else None
+    )
     ranked = _rank_catalog_operations(
-        _catalog_operations(registry, public_only=args.public_only),
+        _catalog_operations(
+            registry,
+            public_only=args.public_only,
+            spec_file=args.spec_file,
+        ),
         task=args.task,
         max_cost=args.max_cost,
         max_latency_ms=args.max_latency_ms,
+        manifest_scope_by_operation_id=manifest_scope,
     )
 
     if args.limit is not None and args.limit > 0:
diff --git a/tests/unit/test_main.py b/tests/unit/test_main.py
@@ -148,6 +148,32 @@ def _write_catalog_spec(path: Path) -> None:
     )
 
 
+def _write_rank_scope_spec(path: Path) -> None:
+    path.write_text(
+        json.dumps(
+            {
+                "openapi": "3.1.0",
+                "paths": {
+                    "/v1/public/a": {
+                        "get": {
+                            "operationId": "op_a",
+                            "tags": ["public", "workflow", "nodes"],
+                            "responses": {"200": {"description": "ok"}},
+                        },
+                    },
+                    "/v1/public/b": {
+                        "get": {
+                            "operationId": "op_b",
+                            "tags": ["public", "workflow", "nodes"],
+                            "responses": {"200": {"description": "ok"}},
+                        },
+                    },
+                },
+            }
+        )
+    )
+
+
 def _snapshot_operation_ids() -> set[str]:
     registry = OperationRegistry.from_spec(load_openapi_spec(default_spec_path()))
     return {op.operation_id for op in registry.list_operations(public_only=False)}
@@ -247,6 +273,32 @@ def test_ops_list_public_only_outputs_only_public_operations(capsys, tmp_path: P
     assert "admin_items" not in out
 
 
+def test_ops_list_public_only_applies_curated_manifest_filter_when_enabled(
+    capsys,
+    tmp_path: Path,
+    monkeypatch,
+) -> None:
+    spec_file = tmp_path / "openapi.json"
+    _write_rank_scope_spec(spec_file)
+    monkeypatch.setattr(
+        main_module,
+        "_should_use_curated_manifest",
+        lambda *_args, **_kwargs: True,
+    )
+    monkeypatch.setattr(
+        main_module,
+        "_manifest_scope_by_operation_id",
+        lambda: {"op_a": "supported-executed"},
+    )
+
+    rc = run_cli(["--spec-file", str(spec_file), "ops", "list", "--public-only"])
+    out = capsys.readouterr().out
+
+    assert rc == 0
+    assert "op_a" in out
+    assert "op_b" not in out
+
+
 def test_call_dry_run_by_operation_id(capsys, tmp_path: Path) -> None:
     spec_file = tmp_path / "openapi.json"
     _write_spec(spec_file)
@@ -728,6 +780,48 @@ def test_catalog_rank_json_applies_relevance_heuristic(
     assert payload["heuristic"]["formula"] == "score = relevance*10 - cost - latency/200"
 
 
+def test_catalog_rank_prefers_executed_manifest_scope_for_builder_tasks(
+    capsys,
+    tmp_path: Path,
+    monkeypatch,
+) -> None:
+    spec_file = tmp_path / "openapi.json"
+    _write_rank_scope_spec(spec_file)
+    monkeypatch.setattr(
+        main_module,
+        "_should_use_curated_manifest",
+        lambda *_args, **_kwargs: True,
+    )
+    monkeypatch.setattr(
+        main_module,
+        "_manifest_scope_by_operation_id",
+        lambda: {
+            "op_a": "supported-executed",
+            "op_b": "supported-blocked-policy",
+        },
+    )
+
+    rc = run_cli(
+        [
+            "--spec-file",
+            str(spec_file),
+            "catalog",
+            "rank",
+            "--public-only",
+            "--task",
+            "build workflow",
+            "--json",
+        ],
+    )
+    payload = json.loads(capsys.readouterr().out)
+    ranked = payload["items"]
+
+    assert rc == 0
+    assert ranked[0]["operation_id"] == "op_a"
+    assert ranked[0]["support_scope"] == "supported-executed"
+    assert ranked[0]["scope_bonus"] > ranked[1]["scope_bonus"]
+
+
 def test_workflow_run_routes_to_expected_operation(capsys, monkeypatch) -> None:
     captured: dict[str, object] = {}