Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 26 additions & 23 deletions docs/SUMMARY.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,32 +8,35 @@
* [Quick Start Guide](getting-started/quickstart.md)

## API Reference
* [Client Configuration](api-reference/client.md)
* [Public Client](api-reference/public-client.md)
* [Evaluations](api-reference/evaluations.md)
* [Results](api-reference/results.md)
* [Models & Benchmarks](api-reference/models-benchmarks.md)
* [Judges](api-reference/judges.md)
* [Traces](api-reference/traces.md)
* [Trace Evaluations](api-reference/trace-evaluations.md)
* [Judge Optimizations](api-reference/judge-optimizations.md)
* [Error Handling](api-reference/errors.md)
* [Overview](api-reference/README.md)
* [Client Configuration](api-reference/client.md)
* [Public Client](api-reference/public-client.md)
* [Evaluations](api-reference/evaluations.md)
* [Results](api-reference/results.md)
* [Models & Benchmarks](api-reference/models-benchmarks.md)
* [Judges](api-reference/judges.md)
* [Traces](api-reference/traces.md)
* [Trace Evaluations](api-reference/trace-evaluations.md)
* [Judge Optimizations](api-reference/judge-optimizations.md)
* [Error Handling](api-reference/errors.md)

## Code Examples
* [Creating Evaluations](examples/creating-evaluations.md)
* [Retrieving Results](examples/retrieving-results.md)
* [Models and Benchmarks](examples/models-and-benchmarks.md)
* [Judges and Traces](examples/judges-and-traces.md)
* [Public API](examples/public-api.md)
* [Overview](examples/README.md)
* [Creating Evaluations](examples/creating-evaluations.md)
* [Retrieving Results](examples/retrieving-results.md)
* [Models and Benchmarks](examples/models-and-benchmarks.md)
* [Judges and Traces](examples/judges-and-traces.md)
* [Public API](examples/public-api.md)

## Troubleshooting
* [Common Issues](troubleshooting/common-issues.md)
* [Authentication Problems](troubleshooting/authentication.md)
* [Error Codes Reference](troubleshooting/error-codes.md)
* [Overview](troubleshooting/README.md)
* [Common Issues](troubleshooting/common-issues.md)
* [Authentication Problems](troubleshooting/authentication.md)
* [Error Codes Reference](troubleshooting/error-codes.md)

## Security Best Practices
* [API Key Management](security/api-key-management.md)
* [Environment Variables](security/environment-variables.md)
* [Rate Limiting](security/rate-limiting.md)
* [Data Privacy](security/data-privacy.md)

* [Overview](security/README.md)
* [API Key Management](security/api-key-management.md)
* [Environment Variables](security/environment-variables.md)
* [Rate Limiting](security/rate-limiting.md)
* [Data Privacy](security/data-privacy.md)
6 changes: 3 additions & 3 deletions examples/evaluation_sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ async def main():

# --- Get evaluations sorted by submission date (newest first)
response = await client.evaluations.get_many(
sort_by="submittedAt",
sort_by="submitted_at",
order="desc",
page_size=5,
)
Expand All @@ -34,7 +34,7 @@ async def main():

# --- Get evaluations sorted by average duration (fastest first)
response = await client.evaluations.get_many(
sort_by="averageDuration",
sort_by="average_duration",
order="asc",
page_size=5,
)
Expand Down Expand Up @@ -66,7 +66,7 @@ async def main():
# Replace with actual benchmark IDs from your organization
response = await client.evaluations.get_many(
benchmark_ids=["your-benchmark-id"],
sort_by="submittedAt",
sort_by="submitted_at",
order="desc",
)
if response:
Expand Down
2 changes: 1 addition & 1 deletion examples/public_evaluations.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def main():
response = client.evaluations.get_many(
page=1,
page_size=5,
sort_by="submittedAt",
sort_by="submitted_at",
order="desc",
)
if response:
Expand Down
2 changes: 1 addition & 1 deletion examples/public_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def main():
print(f"\nFound {response.total_count} open-source models")

# --- Sort by release date (newest first)
response = client.models.get(sort_by="releasedAt", order="desc", page_size=5)
response = client.models.get(sort_by="released_at", order="desc", page_size=5)
print(f"\nNewest 5 models:")
for model in response.models:
print(f" - {model.name} (released_at={model.released_at})")
Expand Down
2 changes: 1 addition & 1 deletion src/layerlens/_version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "1.3.2"
__version__ = "1.3.3"

# Will be templated during the build
__git_commit__ = "__GIT_COMMIT__"
5 changes: 4 additions & 1 deletion src/layerlens/models/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@

from typing import List, Optional

from pydantic import Field, BaseModel
from pydantic import Field, BaseModel, ConfigDict


class Benchmark(BaseModel):
model_config = ConfigDict(extra="allow")

id: str
key: str
name: str
Expand All @@ -32,6 +34,7 @@ def type(self) -> str:
class PublicBenchmark(Benchmark):
description: Optional[str] = Field(None, alias="full_description")
language: Optional[str] = None
categories: Optional[List[str]] = None
prompt_count: Optional[int] = None
deprecated: Optional[bool] = None

Expand Down
4 changes: 3 additions & 1 deletion src/layerlens/models/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@

from typing import Optional

from pydantic import BaseModel
from pydantic import BaseModel, ConfigDict


class Model(BaseModel):
model_config = ConfigDict(extra="allow")

id: str
key: str
name: str
Expand Down
18 changes: 18 additions & 0 deletions src/layerlens/resources/benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ def get(
type: Literal["custom", "public"] | None = None,
name: Optional[str] = None,
key: Optional[str] = None,
categories: Optional[List[str]] = None,
languages: Optional[List[str]] = None,
) -> Optional[List[Benchmark]]:
base_url = f"/organizations/{self._client.organization_id}/projects/{self._client.project_id}/benchmarks"

Expand All @@ -49,6 +51,10 @@ def fetch(bench_type: str) -> BenchmarksResponse | None:
params["name"] = name
if key:
params["key"] = key
if categories:
params["categories"] = ",".join(categories)
if languages:
params["languages"] = ",".join(languages)

resp = self._get(
base_url,
Expand Down Expand Up @@ -77,6 +83,9 @@ def cast_benchmark(b: Benchmark, bench_type: str) -> Benchmark:
if resp:
benchmarks.extend([cast_benchmark(b, type) for b in resp.data.benchmarks])

if name:
benchmarks = [b for b in benchmarks if name.lower() in b.name.lower()]

return benchmarks

def get_by_id(self, id: str, *, timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT) -> Optional[Benchmark]:
Expand Down Expand Up @@ -304,6 +313,8 @@ async def get(
type: Literal["custom", "public"] | None = None,
name: Optional[str] = None,
key: Optional[str] = None,
categories: Optional[List[str]] = None,
languages: Optional[List[str]] = None,
) -> Optional[List[Benchmark]]:
base_url = f"/organizations/{self._client.organization_id}/projects/{self._client.project_id}/benchmarks"

Expand All @@ -313,6 +324,10 @@ async def fetch(bench_type: str) -> Optional[BenchmarksResponse]:
params["name"] = name
if key:
params["key"] = key
if categories:
params["categories"] = ",".join(categories)
if languages:
params["languages"] = ",".join(languages)

resp = await self._get(
base_url,
Expand Down Expand Up @@ -341,6 +356,9 @@ def cast_benchmark(b: Benchmark, bench_type: str) -> Benchmark:
if resp:
benchmarks.extend([cast_benchmark(b, type) for b in resp.data.benchmarks])

if name:
benchmarks = [b for b in benchmarks if name.lower() in b.name.lower()]

return benchmarks

async def get_by_id(
Expand Down
16 changes: 8 additions & 8 deletions src/layerlens/resources/comparisons/comparisons.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ def compare(
if page is not None:
params["page"] = str(page)
if page_size is not None:
params["pageSize"] = str(page_size)
params["page_size"] = str(page_size)
if outcome_filter:
params["outcomeFilter"] = outcome_filter
params["outcome_filter"] = outcome_filter
if search:
params["search"] = search

Expand Down Expand Up @@ -79,7 +79,7 @@ def compare_models(
model_ids=[model_id_1],
benchmark_ids=[benchmark_id],
status=EvaluationStatus.SUCCESS,
sort_by="submittedAt",
sort_by="submitted_at",
order="desc",
page_size=1,
timeout=timeout,
Expand All @@ -90,7 +90,7 @@ def compare_models(
model_ids=[model_id_2],
benchmark_ids=[benchmark_id],
status=EvaluationStatus.SUCCESS,
sort_by="submittedAt",
sort_by="submitted_at",
order="desc",
page_size=1,
timeout=timeout,
Expand Down Expand Up @@ -127,9 +127,9 @@ async def compare(
if page is not None:
params["page"] = str(page)
if page_size is not None:
params["pageSize"] = str(page_size)
params["page_size"] = str(page_size)
if outcome_filter:
params["outcomeFilter"] = outcome_filter
params["outcome_filter"] = outcome_filter
if search:
params["search"] = search

Expand Down Expand Up @@ -169,7 +169,7 @@ async def compare_models(
model_ids=[model_id_1],
benchmark_ids=[benchmark_id],
status=EvaluationStatus.SUCCESS,
sort_by="submittedAt",
sort_by="submitted_at",
order="desc",
page_size=1,
timeout=timeout,
Expand All @@ -180,7 +180,7 @@ async def compare_models(
model_ids=[model_id_2],
benchmark_ids=[benchmark_id],
status=EvaluationStatus.SUCCESS,
sort_by="submittedAt",
sort_by="submitted_at",
order="desc",
page_size=1,
timeout=timeout,
Expand Down
28 changes: 14 additions & 14 deletions src/layerlens/resources/evaluations/evaluations.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def get_many(
*,
page: Optional[int] = None,
page_size: Optional[int] = None,
sort_by: Optional[Literal["submittedAt", "accuracy", "averageDuration"]] = None,
sort_by: Optional[Literal["submitted_at", "accuracy", "average_duration"]] = None,
order: Optional[Literal["asc", "desc"]] = None,
model_ids: Optional[List[str]] = None,
benchmark_ids: Optional[List[str]] = None,
Expand All @@ -94,7 +94,7 @@ def get_many(
Args:
page: Page number for pagination (1-based, defaults to 1 if not provided)
page_size: Number of evaluations per page (default: 100, optional)
sort_by: Sort evaluations by field (submittedAt, accuracy, averageDuration)
sort_by: Sort evaluations by field (submitted_at, accuracy, average_duration)
order: Sort order (asc or desc)
model_ids: Filter by model IDs
benchmark_ids: Filter by benchmark/dataset IDs
Expand All @@ -105,18 +105,18 @@ def get_many(
EvaluationsResponse object or None
"""
params = {
"organizationID": self._client.organization_id,
"projectID": self._client.project_id,
"organization_id": self._client.organization_id,
"project_id": self._client.project_id,
}

effective_page_size = min(max(page_size, 1), MAX_PAGE_SIZE) if page_size is not None else DEFAULT_PAGE_SIZE
effective_page = page if page is not None else DEFAULT_PAGE

params["page"] = str(effective_page)
params["pageSize"] = str(effective_page_size)
params["page_size"] = str(effective_page_size)

if sort_by:
params["sortBy"] = sort_by
params["sort_by"] = sort_by
if order:
params["order"] = order
if model_ids:
Expand Down Expand Up @@ -154,7 +154,7 @@ def get_many(

try:
return EvaluationsResponse.model_validate(resp_with_pagination)
except Exception:
except (ValueError, KeyError):
return None

def wait_for_completion(
Expand Down Expand Up @@ -236,7 +236,7 @@ async def get_many(
*,
page: Optional[int] = None,
page_size: Optional[int] = None,
sort_by: Optional[Literal["submittedAt", "accuracy", "averageDuration"]] = None,
sort_by: Optional[Literal["submitted_at", "accuracy", "average_duration"]] = None,
order: Optional[Literal["asc", "desc"]] = None,
model_ids: Optional[List[str]] = None,
benchmark_ids: Optional[List[str]] = None,
Expand All @@ -249,7 +249,7 @@ async def get_many(
Args:
page: Page number for pagination (1-based, defaults to 1 if not provided)
page_size: Number of evaluations per page (default: 100, optional)
sort_by: Sort evaluations by field (submittedAt, accuracy, averageDuration)
sort_by: Sort evaluations by field (submitted_at, accuracy, average_duration)
order: Sort order (asc or desc)
model_ids: Filter by model IDs
benchmark_ids: Filter by benchmark/dataset IDs
Expand All @@ -260,18 +260,18 @@ async def get_many(
EvaluationsResponse object or None
"""
params = {
"organizationID": self._client.organization_id,
"projectID": self._client.project_id,
"organization_id": self._client.organization_id,
"project_id": self._client.project_id,
}

effective_page_size = min(max(page_size, 1), MAX_PAGE_SIZE) if page_size is not None else DEFAULT_PAGE_SIZE
effective_page = page if page is not None else DEFAULT_PAGE

params["page"] = str(effective_page)
params["pageSize"] = str(effective_page_size)
params["page_size"] = str(effective_page_size)

if sort_by:
params["sortBy"] = sort_by
params["sort_by"] = sort_by
if order:
params["order"] = order
if model_ids:
Expand Down Expand Up @@ -309,7 +309,7 @@ async def get_many(

try:
return EvaluationsResponse.model_validate(resp_with_pagination)
except Exception:
except (ValueError, KeyError):
return None

async def wait_for_completion(
Expand Down
4 changes: 2 additions & 2 deletions src/layerlens/resources/judges/judges.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def get_many(
effective_page = page if page is not None else DEFAULT_PAGE

params["page"] = str(effective_page)
params["pageSize"] = str(effective_page_size)
params["page_size"] = str(effective_page_size)

resp = self._get(
self._base_url(),
Expand Down Expand Up @@ -221,7 +221,7 @@ async def get_many(
effective_page = page if page is not None else DEFAULT_PAGE

params["page"] = str(effective_page)
params["pageSize"] = str(effective_page_size)
params["page_size"] = str(effective_page_size)

resp = await self._get(
self._base_url(),
Expand Down
Loading
Loading