From aeb07a53004426824853ef7a0d3096b737a8a2c8 Mon Sep 17 00:00:00 2001 From: m-peko Date: Tue, 24 Mar 2026 10:40:05 +0100 Subject: [PATCH] Expose prompts to private client --- examples/get_benchmark_prompts.py | 48 +++++ .../resources/benchmarks/benchmarks.py | 174 ++++++++++++++++++ tests/resources/test_benchmarks.py | 146 +++++++++++++++ 3 files changed, 368 insertions(+) create mode 100644 examples/get_benchmark_prompts.py diff --git a/examples/get_benchmark_prompts.py b/examples/get_benchmark_prompts.py new file mode 100644 index 0000000..f366cd8 --- /dev/null +++ b/examples/get_benchmark_prompts.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 +"""Fetch prompts from a benchmark (custom or public).""" + +from layerlens import Stratix + + +def main(): + client = Stratix() + + # Find a benchmark with prompts + benchmarks = client.benchmarks.get() + benchmark = next((b for b in benchmarks if b.prompt_count and b.prompt_count > 0), None) + if benchmark is None: + print("No benchmarks with prompts found.") + return + + print(f"Benchmark: {benchmark.name} ({benchmark.key})") + print(f"Total prompts: {benchmark.prompt_count}\n") + + # --- Get a single page of prompts + page = client.benchmarks.get_prompts(benchmark.id, page=1, page_size=5) + if page: + print(f"Page 1 ({len(page.prompts)} of {page.count}):") + for p in page.prompts: + inp = str(p.input)[:80] + print(f" [{p.id}] {inp}") + + # --- Get all prompts (auto-paginated) + all_prompts = client.benchmarks.get_all_prompts(benchmark.id) + print(f"\nAll prompts fetched: {len(all_prompts)}") + + # --- Search and sort + results = client.benchmarks.get_prompts( + benchmark.id, + search_field="truth", + search_value="the", + sort_by="id", + sort_order="asc", + page_size=3, + ) + if results: + print(f"\nSearch results ({results.count} matches):") + for p in results.prompts: + print(f" [{p.id}] truth: {p.truth[:60]}") + + +if __name__ == "__main__": + main() diff --git a/src/layerlens/resources/benchmarks/benchmarks.py b/src/layerlens/resources/benchmarks/benchmarks.py index fca94c6..47fa24d 100644 --- a/src/layerlens/resources/benchmarks/benchmarks.py +++ b/src/layerlens/resources/benchmarks/benchmarks.py @@ -1,6 +1,7 @@ from __future__ import annotations import os +import math import mimetypes from typing import Any, Dict, List, Literal, Optional @@ -8,14 +9,18 @@ from ...models import ( Benchmark, + BenchmarkPrompt, CustomBenchmark, PublicBenchmark, BenchmarksResponse, + BenchmarkPromptsData, CreateBenchmarkResponse, ) from ..._resource import SyncAPIResource, AsyncAPIResource from ..._constants import DEFAULT_TIMEOUT +DEFAULT_PROMPTS_PAGE_SIZE = 100 + MAX_UPLOAD_SIZE = 50 * 1024 * 1024 # 50 MB @@ -163,6 +168,99 @@ def remove( new_ids = [b.id for b in current if b.id not in remove_set] return self._patch_project_benchmarks(new_ids, timeout) + def get_prompts( + self, + benchmark_id: str, + *, + page: Optional[int] = None, + page_size: Optional[int] = None, + search_field: Optional[Literal["id", "input", "truth"]] = None, + search_value: Optional[str] = None, + sort_by: Optional[Literal["id", "input", "truth"]] = None, + sort_order: Optional[Literal["asc", "desc"]] = None, + timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT, + ) -> Optional[BenchmarkPromptsData]: + """Fetch a page of prompts for a benchmark. + + Uses the org-scoped endpoint: + GET /organizations/{org}/projects/{proj}/benchmarks/{id}/prompts + + Args: + benchmark_id: The benchmark / dataset ID. + page: Page number (1-based). + page_size: Number of prompts per page. + search_field: Field to search in. + search_value: Value to search for. + sort_by: Field to sort by. + sort_order: Sort direction. + timeout: Request timeout override. + + Returns: + BenchmarkPromptsData with prompts list and count, or None on failure. + """ + params: Dict[str, str] = {} + if page is not None: + params["page"] = str(page) + if page_size is not None: + params["page_size"] = str(page_size) + if search_field: + params["search"] = search_field + if search_value: + params["search_value"] = search_value + if sort_by: + params["sort_by"] = sort_by + if sort_order: + params["sort_order"] = sort_order + + url = f"/organizations/{self._client.organization_id}/projects/{self._client.project_id}/benchmarks/{benchmark_id}/prompts" + resp = self._get( + url, + params=params, + timeout=timeout, + cast_to=dict, + ) + + if not isinstance(resp, dict): + return None + + # Unwrap {"status": ..., "data": {...}} envelope if present + if "data" in resp and "status" in resp: + resp = resp["data"] + + return BenchmarkPromptsData.model_validate(resp) + + def get_all_prompts( + self, + benchmark_id: str, + *, + timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT, + ) -> List[BenchmarkPrompt]: + """Fetch all prompts for a benchmark, automatically paginating.""" + all_prompts: List[BenchmarkPrompt] = [] + page = 1 + page_size = DEFAULT_PROMPTS_PAGE_SIZE + + while True: + resp = self.get_prompts( + benchmark_id, + page=page, + page_size=page_size, + timeout=timeout, + ) + if resp is None or not resp.prompts: + break + + all_prompts.extend(resp.prompts) + + total_count = resp.count + total_pages = math.ceil(total_count / page_size) if total_count > 0 else 0 + if page >= total_pages: + break + + page += 1 + + return all_prompts + def _patch_project_benchmarks( self, dataset_ids: List[str], @@ -452,6 +550,82 @@ async def remove( new_ids = [b.id for b in current if b.id not in remove_set] return await self._patch_project_benchmarks(new_ids, timeout) + async def get_prompts( + self, + benchmark_id: str, + *, + page: Optional[int] = None, + page_size: Optional[int] = None, + search_field: Optional[Literal["id", "input", "truth"]] = None, + search_value: Optional[str] = None, + sort_by: Optional[Literal["id", "input", "truth"]] = None, + sort_order: Optional[Literal["asc", "desc"]] = None, + timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT, + ) -> Optional[BenchmarkPromptsData]: + """Fetch a page of prompts for a benchmark.""" + params: Dict[str, str] = {} + if page is not None: + params["page"] = str(page) + if page_size is not None: + params["page_size"] = str(page_size) + if search_field: + params["search"] = search_field + if search_value: + params["search_value"] = search_value + if sort_by: + params["sort_by"] = sort_by + if sort_order: + params["sort_order"] = sort_order + + url = f"/organizations/{self._client.organization_id}/projects/{self._client.project_id}/benchmarks/{benchmark_id}/prompts" + resp = await self._get( + url, + params=params, + timeout=timeout, + cast_to=dict, + ) + + if not isinstance(resp, dict): + return None + + # Unwrap {"status": ..., "data": {...}} envelope if present + if "data" in resp and "status" in resp: + resp = resp["data"] + + return BenchmarkPromptsData.model_validate(resp) + + async def get_all_prompts( + self, + benchmark_id: str, + *, + timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT, + ) -> List[BenchmarkPrompt]: + """Fetch all prompts for a benchmark, automatically paginating.""" + all_prompts: List[BenchmarkPrompt] = [] + page = 1 + page_size = DEFAULT_PROMPTS_PAGE_SIZE + + while True: + resp = await self.get_prompts( + benchmark_id, + page=page, + page_size=page_size, + timeout=timeout, + ) + if resp is None or not resp.prompts: + break + + all_prompts.extend(resp.prompts) + + total_count = resp.count + total_pages = math.ceil(total_count / page_size) if total_count > 0 else 0 + if page >= total_pages: + break + + page += 1 + + return all_prompts + async def _patch_project_benchmarks( self, dataset_ids: List[str], diff --git a/tests/resources/test_benchmarks.py b/tests/resources/test_benchmarks.py index 53843c0..e736a18 100644 --- a/tests/resources/test_benchmarks.py +++ b/tests/resources/test_benchmarks.py @@ -5,9 +5,11 @@ from layerlens.models import ( Benchmark, + BenchmarkPrompt, CustomBenchmark, PublicBenchmark, BenchmarksResponse, + BenchmarkPromptsData, CreateBenchmarkResponse, ) from layerlens._constants import DEFAULT_TIMEOUT @@ -1152,3 +1154,147 @@ def test_filter_case_insensitive( assert len(result) == 1 assert result[0].key == "mmlu" + + +class TestBenchmarksGetPrompts: + """Test Benchmarks.get_prompts() and get_all_prompts() methods.""" + + @pytest.fixture + def mock_client(self): + client = Mock() + client.organization_id = "org-123" + client.project_id = "proj-456" + return client + + @pytest.fixture + def benchmarks_resource(self, mock_client): + return Benchmarks(mock_client) + + @pytest.fixture + def sample_prompts_response(self): + return { + "prompts": [ + {"id": "p1", "input": [{"role": "user", "content": "What is 2+2?"}], "truth": "4"}, + {"id": "p2", "input": "Translate hello", "truth": "Bonjour"}, + ], + "count": 2, + } + + def test_get_prompts_success(self, benchmarks_resource, sample_prompts_response): + """get_prompts() returns BenchmarkPromptsData from org-scoped endpoint.""" + benchmarks_resource._get.return_value = sample_prompts_response + + result = benchmarks_resource.get_prompts("bench-123") + + assert isinstance(result, BenchmarkPromptsData) + assert len(result.prompts) == 2 + assert result.count == 2 + assert result.prompts[0].id == "p1" + assert result.prompts[1].truth == "Bonjour" + + def test_get_prompts_uses_org_scoped_url(self, benchmarks_resource, sample_prompts_response): + """get_prompts() calls the org/project-scoped endpoint.""" + benchmarks_resource._get.return_value = sample_prompts_response + + benchmarks_resource.get_prompts("bench-123") + + call_args = benchmarks_resource._get.call_args + assert call_args[0][0] == "/organizations/org-123/projects/proj-456/benchmarks/bench-123/prompts" + + def test_get_prompts_passes_query_params(self, benchmarks_resource, sample_prompts_response): + """get_prompts() passes pagination and search params.""" + benchmarks_resource._get.return_value = sample_prompts_response + + benchmarks_resource.get_prompts( + "bench-123", + page=2, + page_size=50, + search_field="input", + search_value="hello", + sort_by="truth", + sort_order="desc", + ) + + call_kwargs = benchmarks_resource._get.call_args.kwargs + assert call_kwargs["params"] == { + "page": "2", + "page_size": "50", + "search": "input", + "search_value": "hello", + "sort_by": "truth", + "sort_order": "desc", + } + + def test_get_prompts_returns_none_on_invalid_response(self, benchmarks_resource): + """get_prompts() returns None when response is not a dict.""" + benchmarks_resource._get.return_value = "not-a-dict" + + result = benchmarks_resource.get_prompts("bench-123") + + assert result is None + + def test_get_prompts_omits_unset_params(self, benchmarks_resource, sample_prompts_response): + """get_prompts() only sends params that are explicitly set.""" + benchmarks_resource._get.return_value = sample_prompts_response + + benchmarks_resource.get_prompts("bench-123", page=1) + + call_kwargs = benchmarks_resource._get.call_args.kwargs + assert call_kwargs["params"] == {"page": "1"} + + def test_get_all_prompts_single_page(self, benchmarks_resource): + """get_all_prompts() returns all prompts when they fit in one page.""" + benchmarks_resource.get_prompts = Mock( + return_value=BenchmarkPromptsData( + prompts=[ + BenchmarkPrompt(id="p1", input="Q1", truth="A1"), + BenchmarkPrompt(id="p2", input="Q2", truth="A2"), + ], + count=2, + ) + ) + + result = benchmarks_resource.get_all_prompts("bench-123") + + assert len(result) == 2 + assert result[0].id == "p1" + + def test_get_all_prompts_paginates(self, benchmarks_resource): + """get_all_prompts() fetches multiple pages until all prompts are collected.""" + page1 = BenchmarkPromptsData( + prompts=[BenchmarkPrompt(id=f"p{i}", input=f"Q{i}", truth=f"A{i}") for i in range(100)], + count=150, + ) + page2 = BenchmarkPromptsData( + prompts=[BenchmarkPrompt(id=f"p{i}", input=f"Q{i}", truth=f"A{i}") for i in range(100, 150)], + count=150, + ) + + benchmarks_resource.get_prompts = Mock(side_effect=[page1, page2]) + + result = benchmarks_resource.get_all_prompts("bench-123") + + assert len(result) == 150 + assert benchmarks_resource.get_prompts.call_count == 2 + + def test_get_all_prompts_empty(self, benchmarks_resource): + """get_all_prompts() returns empty list when no prompts exist.""" + benchmarks_resource.get_prompts = Mock(return_value=None) + + result = benchmarks_resource.get_all_prompts("bench-123") + + assert result == [] + + def test_get_all_prompts_stops_on_empty_page(self, benchmarks_resource): + """get_all_prompts() stops when a page returns no prompts.""" + page1 = BenchmarkPromptsData( + prompts=[BenchmarkPrompt(id="p1", input="Q", truth="A")], + count=1, + ) + + benchmarks_resource.get_prompts = Mock(return_value=page1) + + result = benchmarks_resource.get_all_prompts("bench-123") + + assert len(result) == 1 + assert benchmarks_resource.get_prompts.call_count == 1