From aeb07a53004426824853ef7a0d3096b737a8a2c8 Mon Sep 17 00:00:00 2001
From: m-peko <marinpeko5@gmail.com>
Date: Tue, 24 Mar 2026 10:40:05 +0100
Subject: [PATCH] Expose prompts to private client

---
 examples/get_benchmark_prompts.py             |  48 +++++
 .../resources/benchmarks/benchmarks.py        | 174 ++++++++++++++++++
 tests/resources/test_benchmarks.py            | 146 +++++++++++++++
 3 files changed, 368 insertions(+)
 create mode 100644 examples/get_benchmark_prompts.py

diff --git a/examples/get_benchmark_prompts.py b/examples/get_benchmark_prompts.py
new file mode 100644
index 0000000..f366cd8
--- /dev/null
+++ b/examples/get_benchmark_prompts.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+"""Fetch prompts from a benchmark (custom or public)."""
+
+from layerlens import Stratix
+
+
+def main():
+    client = Stratix()
+
+    # Find a benchmark with prompts
+    benchmarks = client.benchmarks.get()
+    benchmark = next((b for b in benchmarks if b.prompt_count and b.prompt_count > 0), None)
+    if benchmark is None:
+        print("No benchmarks with prompts found.")
+        return
+
+    print(f"Benchmark: {benchmark.name} ({benchmark.key})")
+    print(f"Total prompts: {benchmark.prompt_count}\n")
+
+    # --- Get a single page of prompts
+    page = client.benchmarks.get_prompts(benchmark.id, page=1, page_size=5)
+    if page:
+        print(f"Page 1 ({len(page.prompts)} of {page.count}):")
+        for p in page.prompts:
+            inp = str(p.input)[:80]
+            print(f"  [{p.id}] {inp}")
+
+    # --- Get all prompts (auto-paginated)
+    all_prompts = client.benchmarks.get_all_prompts(benchmark.id)
+    print(f"\nAll prompts fetched: {len(all_prompts)}")
+
+    # --- Search and sort
+    results = client.benchmarks.get_prompts(
+        benchmark.id,
+        search_field="truth",
+        search_value="the",
+        sort_by="id",
+        sort_order="asc",
+        page_size=3,
+    )
+    if results:
+        print(f"\nSearch results ({results.count} matches):")
+        for p in results.prompts:
+            print(f"  [{p.id}] truth: {p.truth[:60]}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/layerlens/resources/benchmarks/benchmarks.py b/src/layerlens/resources/benchmarks/benchmarks.py
index fca94c6..47fa24d 100644
--- a/src/layerlens/resources/benchmarks/benchmarks.py
+++ b/src/layerlens/resources/benchmarks/benchmarks.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import os
+import math
 import mimetypes
 from typing import Any, Dict, List, Literal, Optional
 
@@ -8,14 +9,18 @@
 
 from ...models import (
     Benchmark,
+    BenchmarkPrompt,
     CustomBenchmark,
     PublicBenchmark,
     BenchmarksResponse,
+    BenchmarkPromptsData,
     CreateBenchmarkResponse,
 )
 from ..._resource import SyncAPIResource, AsyncAPIResource
 from ..._constants import DEFAULT_TIMEOUT
 
+DEFAULT_PROMPTS_PAGE_SIZE = 100
+
 MAX_UPLOAD_SIZE = 50 * 1024 * 1024  # 50 MB
 
 
@@ -163,6 +168,99 @@ def remove(
         new_ids = [b.id for b in current if b.id not in remove_set]
         return self._patch_project_benchmarks(new_ids, timeout)
 
+    def get_prompts(
+        self,
+        benchmark_id: str,
+        *,
+        page: Optional[int] = None,
+        page_size: Optional[int] = None,
+        search_field: Optional[Literal["id", "input", "truth"]] = None,
+        search_value: Optional[str] = None,
+        sort_by: Optional[Literal["id", "input", "truth"]] = None,
+        sort_order: Optional[Literal["asc", "desc"]] = None,
+        timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT,
+    ) -> Optional[BenchmarkPromptsData]:
+        """Fetch a page of prompts for a benchmark.
+
+        Uses the org-scoped endpoint:
+        GET /organizations/{org}/projects/{proj}/benchmarks/{id}/prompts
+
+        Args:
+            benchmark_id: The benchmark / dataset ID.
+            page: Page number (1-based).
+            page_size: Number of prompts per page.
+            search_field: Field to search in.
+            search_value: Value to search for.
+            sort_by: Field to sort by.
+            sort_order: Sort direction.
+            timeout: Request timeout override.
+
+        Returns:
+            BenchmarkPromptsData with prompts list and count, or None on failure.
+        """
+        params: Dict[str, str] = {}
+        if page is not None:
+            params["page"] = str(page)
+        if page_size is not None:
+            params["page_size"] = str(page_size)
+        if search_field:
+            params["search"] = search_field
+        if search_value:
+            params["search_value"] = search_value
+        if sort_by:
+            params["sort_by"] = sort_by
+        if sort_order:
+            params["sort_order"] = sort_order
+
+        url = f"/organizations/{self._client.organization_id}/projects/{self._client.project_id}/benchmarks/{benchmark_id}/prompts"
+        resp = self._get(
+            url,
+            params=params,
+            timeout=timeout,
+            cast_to=dict,
+        )
+
+        if not isinstance(resp, dict):
+            return None
+
+        # Unwrap {"status": ..., "data": {...}} envelope if present
+        if "data" in resp and "status" in resp:
+            resp = resp["data"]
+
+        return BenchmarkPromptsData.model_validate(resp)
+
+    def get_all_prompts(
+        self,
+        benchmark_id: str,
+        *,
+        timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT,
+    ) -> List[BenchmarkPrompt]:
+        """Fetch all prompts for a benchmark, automatically paginating."""
+        all_prompts: List[BenchmarkPrompt] = []
+        page = 1
+        page_size = DEFAULT_PROMPTS_PAGE_SIZE
+
+        while True:
+            resp = self.get_prompts(
+                benchmark_id,
+                page=page,
+                page_size=page_size,
+                timeout=timeout,
+            )
+            if resp is None or not resp.prompts:
+                break
+
+            all_prompts.extend(resp.prompts)
+
+            total_count = resp.count
+            total_pages = math.ceil(total_count / page_size) if total_count > 0 else 0
+            if page >= total_pages:
+                break
+
+            page += 1
+
+        return all_prompts
+
     def _patch_project_benchmarks(
         self,
         dataset_ids: List[str],
@@ -452,6 +550,82 @@ async def remove(
         new_ids = [b.id for b in current if b.id not in remove_set]
         return await self._patch_project_benchmarks(new_ids, timeout)
 
+    async def get_prompts(
+        self,
+        benchmark_id: str,
+        *,
+        page: Optional[int] = None,
+        page_size: Optional[int] = None,
+        search_field: Optional[Literal["id", "input", "truth"]] = None,
+        search_value: Optional[str] = None,
+        sort_by: Optional[Literal["id", "input", "truth"]] = None,
+        sort_order: Optional[Literal["asc", "desc"]] = None,
+        timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT,
+    ) -> Optional[BenchmarkPromptsData]:
+        """Fetch a page of prompts for a benchmark."""
+        params: Dict[str, str] = {}
+        if page is not None:
+            params["page"] = str(page)
+        if page_size is not None:
+            params["page_size"] = str(page_size)
+        if search_field:
+            params["search"] = search_field
+        if search_value:
+            params["search_value"] = search_value
+        if sort_by:
+            params["sort_by"] = sort_by
+        if sort_order:
+            params["sort_order"] = sort_order
+
+        url = f"/organizations/{self._client.organization_id}/projects/{self._client.project_id}/benchmarks/{benchmark_id}/prompts"
+        resp = await self._get(
+            url,
+            params=params,
+            timeout=timeout,
+            cast_to=dict,
+        )
+
+        if not isinstance(resp, dict):
+            return None
+
+        # Unwrap {"status": ..., "data": {...}} envelope if present
+        if "data" in resp and "status" in resp:
+            resp = resp["data"]
+
+        return BenchmarkPromptsData.model_validate(resp)
+
+    async def get_all_prompts(
+        self,
+        benchmark_id: str,
+        *,
+        timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT,
+    ) -> List[BenchmarkPrompt]:
+        """Fetch all prompts for a benchmark, automatically paginating."""
+        all_prompts: List[BenchmarkPrompt] = []
+        page = 1
+        page_size = DEFAULT_PROMPTS_PAGE_SIZE
+
+        while True:
+            resp = await self.get_prompts(
+                benchmark_id,
+                page=page,
+                page_size=page_size,
+                timeout=timeout,
+            )
+            if resp is None or not resp.prompts:
+                break
+
+            all_prompts.extend(resp.prompts)
+
+            total_count = resp.count
+            total_pages = math.ceil(total_count / page_size) if total_count > 0 else 0
+            if page >= total_pages:
+                break
+
+            page += 1
+
+        return all_prompts
+
     async def _patch_project_benchmarks(
         self,
         dataset_ids: List[str],
diff --git a/tests/resources/test_benchmarks.py b/tests/resources/test_benchmarks.py
index 53843c0..e736a18 100644
--- a/tests/resources/test_benchmarks.py
+++ b/tests/resources/test_benchmarks.py
@@ -5,9 +5,11 @@
 
 from layerlens.models import (
     Benchmark,
+    BenchmarkPrompt,
     CustomBenchmark,
     PublicBenchmark,
     BenchmarksResponse,
+    BenchmarkPromptsData,
     CreateBenchmarkResponse,
 )
 from layerlens._constants import DEFAULT_TIMEOUT
@@ -1152,3 +1154,147 @@ def test_filter_case_insensitive(
 
         assert len(result) == 1
         assert result[0].key == "mmlu"
+
+
+class TestBenchmarksGetPrompts:
+    """Test Benchmarks.get_prompts() and get_all_prompts() methods."""
+
+    @pytest.fixture
+    def mock_client(self):
+        client = Mock()
+        client.organization_id = "org-123"
+        client.project_id = "proj-456"
+        return client
+
+    @pytest.fixture
+    def benchmarks_resource(self, mock_client):
+        return Benchmarks(mock_client)
+
+    @pytest.fixture
+    def sample_prompts_response(self):
+        return {
+            "prompts": [
+                {"id": "p1", "input": [{"role": "user", "content": "What is 2+2?"}], "truth": "4"},
+                {"id": "p2", "input": "Translate hello", "truth": "Bonjour"},
+            ],
+            "count": 2,
+        }
+
+    def test_get_prompts_success(self, benchmarks_resource, sample_prompts_response):
+        """get_prompts() returns BenchmarkPromptsData from org-scoped endpoint."""
+        benchmarks_resource._get.return_value = sample_prompts_response
+
+        result = benchmarks_resource.get_prompts("bench-123")
+
+        assert isinstance(result, BenchmarkPromptsData)
+        assert len(result.prompts) == 2
+        assert result.count == 2
+        assert result.prompts[0].id == "p1"
+        assert result.prompts[1].truth == "Bonjour"
+
+    def test_get_prompts_uses_org_scoped_url(self, benchmarks_resource, sample_prompts_response):
+        """get_prompts() calls the org/project-scoped endpoint."""
+        benchmarks_resource._get.return_value = sample_prompts_response
+
+        benchmarks_resource.get_prompts("bench-123")
+
+        call_args = benchmarks_resource._get.call_args
+        assert call_args[0][0] == "/organizations/org-123/projects/proj-456/benchmarks/bench-123/prompts"
+
+    def test_get_prompts_passes_query_params(self, benchmarks_resource, sample_prompts_response):
+        """get_prompts() passes pagination and search params."""
+        benchmarks_resource._get.return_value = sample_prompts_response
+
+        benchmarks_resource.get_prompts(
+            "bench-123",
+            page=2,
+            page_size=50,
+            search_field="input",
+            search_value="hello",
+            sort_by="truth",
+            sort_order="desc",
+        )
+
+        call_kwargs = benchmarks_resource._get.call_args.kwargs
+        assert call_kwargs["params"] == {
+            "page": "2",
+            "page_size": "50",
+            "search": "input",
+            "search_value": "hello",
+            "sort_by": "truth",
+            "sort_order": "desc",
+        }
+
+    def test_get_prompts_returns_none_on_invalid_response(self, benchmarks_resource):
+        """get_prompts() returns None when response is not a dict."""
+        benchmarks_resource._get.return_value = "not-a-dict"
+
+        result = benchmarks_resource.get_prompts("bench-123")
+
+        assert result is None
+
+    def test_get_prompts_omits_unset_params(self, benchmarks_resource, sample_prompts_response):
+        """get_prompts() only sends params that are explicitly set."""
+        benchmarks_resource._get.return_value = sample_prompts_response
+
+        benchmarks_resource.get_prompts("bench-123", page=1)
+
+        call_kwargs = benchmarks_resource._get.call_args.kwargs
+        assert call_kwargs["params"] == {"page": "1"}
+
+    def test_get_all_prompts_single_page(self, benchmarks_resource):
+        """get_all_prompts() returns all prompts when they fit in one page."""
+        benchmarks_resource.get_prompts = Mock(
+            return_value=BenchmarkPromptsData(
+                prompts=[
+                    BenchmarkPrompt(id="p1", input="Q1", truth="A1"),
+                    BenchmarkPrompt(id="p2", input="Q2", truth="A2"),
+                ],
+                count=2,
+            )
+        )
+
+        result = benchmarks_resource.get_all_prompts("bench-123")
+
+        assert len(result) == 2
+        assert result[0].id == "p1"
+
+    def test_get_all_prompts_paginates(self, benchmarks_resource):
+        """get_all_prompts() fetches multiple pages until all prompts are collected."""
+        page1 = BenchmarkPromptsData(
+            prompts=[BenchmarkPrompt(id=f"p{i}", input=f"Q{i}", truth=f"A{i}") for i in range(100)],
+            count=150,
+        )
+        page2 = BenchmarkPromptsData(
+            prompts=[BenchmarkPrompt(id=f"p{i}", input=f"Q{i}", truth=f"A{i}") for i in range(100, 150)],
+            count=150,
+        )
+
+        benchmarks_resource.get_prompts = Mock(side_effect=[page1, page2])
+
+        result = benchmarks_resource.get_all_prompts("bench-123")
+
+        assert len(result) == 150
+        assert benchmarks_resource.get_prompts.call_count == 2
+
+    def test_get_all_prompts_empty(self, benchmarks_resource):
+        """get_all_prompts() returns empty list when no prompts exist."""
+        benchmarks_resource.get_prompts = Mock(return_value=None)
+
+        result = benchmarks_resource.get_all_prompts("bench-123")
+
+        assert result == []
+
+    def test_get_all_prompts_stops_on_empty_page(self, benchmarks_resource):
+        """get_all_prompts() stops when a page returns no prompts."""
+        page1 = BenchmarkPromptsData(
+            prompts=[BenchmarkPrompt(id="p1", input="Q", truth="A")],
+            count=1,
+        )
+
+        benchmarks_resource.get_prompts = Mock(return_value=page1)
+
+        result = benchmarks_resource.get_all_prompts("bench-123")
+
+        assert len(result) == 1
+        assert benchmarks_resource.get_prompts.call_count == 1