From 515e907e935977570303debb1b177c57f018b6e1 Mon Sep 17 00:00:00 2001 From: m-peko Date: Wed, 27 Aug 2025 15:42:41 +0200 Subject: [PATCH 1/3] Rename atlas to layerlens --- examples/all_results_no_pagination.py | 2 +- examples/async_client.py | 2 +- examples/async_client_simple.py | 2 +- examples/async_run_evaluations.py | 2 +- examples/client.py | 2 +- examples/client_simple.py | 2 +- examples/fetch_results_async.py | 2 +- examples/get_benchmarks.py | 2 +- examples/get_evaluation.py | 2 +- examples/get_models.py | 2 +- examples/paginated_results.py | 2 +- pyproject.toml | 8 +- scripts/get_version.sh | 2 +- scripts/lint | 2 +- scripts/test_coverage | 2 +- src/{atlas => layerlens}/__init__.py | 0 src/{atlas => layerlens}/_base_client.py | 0 src/{atlas => layerlens}/_client.py | 0 src/{atlas => layerlens}/_constants.py | 0 src/{atlas => layerlens}/_exceptions.py | 0 src/{atlas => layerlens}/_resource.py | 0 src/{atlas => layerlens}/_utils.py | 0 src/{atlas => layerlens}/_version.py | 0 src/{atlas => layerlens}/models/__init__.py | 0 src/{atlas => layerlens}/models/api.py | 0 src/{atlas => layerlens}/models/benchmark.py | 0 src/{atlas => layerlens}/models/evaluation.py | 0 src/{atlas => layerlens}/models/model.py | 0 .../models/organization.py | 0 .../resources/benchmarks/__init__.py | 0 .../resources/benchmarks/benchmarks.py | 0 .../resources/evaluations/__init__.py | 0 .../resources/evaluations/evaluations.py | 0 .../resources/models/__init__.py | 0 .../resources/models/models.py | 0 .../resources/results/__init__.py | 0 .../resources/results/results.py | 0 tests/resources/test_benchmarks.py | 14 ++-- tests/resources/test_evaluations.py | 12 +-- tests/resources/test_models_resource.py | 14 ++-- tests/resources/test_results.py | 18 ++-- tests/test_base_client.py | 32 +++++-- tests/test_client.py | 84 +++++++++---------- tests/test_exceptions.py | 10 ++- tests/test_integration.py | 30 +++---- tests/test_models.py | 2 +- tests/test_resource.py | 12 ++- tests/test_utils.py | 2 +- 48 files changed, 147 insertions(+), 119 deletions(-) rename src/{atlas => layerlens}/__init__.py (100%) rename src/{atlas => layerlens}/_base_client.py (100%) rename src/{atlas => layerlens}/_client.py (100%) rename src/{atlas => layerlens}/_constants.py (100%) rename src/{atlas => layerlens}/_exceptions.py (100%) rename src/{atlas => layerlens}/_resource.py (100%) rename src/{atlas => layerlens}/_utils.py (100%) rename src/{atlas => layerlens}/_version.py (100%) rename src/{atlas => layerlens}/models/__init__.py (100%) rename src/{atlas => layerlens}/models/api.py (100%) rename src/{atlas => layerlens}/models/benchmark.py (100%) rename src/{atlas => layerlens}/models/evaluation.py (100%) rename src/{atlas => layerlens}/models/model.py (100%) rename src/{atlas => layerlens}/models/organization.py (100%) rename src/{atlas => layerlens}/resources/benchmarks/__init__.py (100%) rename src/{atlas => layerlens}/resources/benchmarks/benchmarks.py (100%) rename src/{atlas => layerlens}/resources/evaluations/__init__.py (100%) rename src/{atlas => layerlens}/resources/evaluations/evaluations.py (100%) rename src/{atlas => layerlens}/resources/models/__init__.py (100%) rename src/{atlas => layerlens}/resources/models/models.py (100%) rename src/{atlas => layerlens}/resources/results/__init__.py (100%) rename src/{atlas => layerlens}/resources/results/results.py (100%) diff --git a/examples/all_results_no_pagination.py b/examples/all_results_no_pagination.py index efc93d3..3a91e6b 100644 --- a/examples/all_results_no_pagination.py +++ b/examples/all_results_no_pagination.py @@ -2,7 +2,7 @@ import asyncio -from atlas import AsyncAtlas +from layerlens import AsyncAtlas async def main(): diff --git a/examples/async_client.py b/examples/async_client.py index 571e8cc..b2d6fb1 100644 --- a/examples/async_client.py +++ b/examples/async_client.py @@ -2,7 +2,7 @@ import asyncio -from atlas import AsyncAtlas +from layerlens import AsyncAtlas async def main(): diff --git a/examples/async_client_simple.py b/examples/async_client_simple.py index f6e081c..ed2a94e 100644 --- a/examples/async_client_simple.py +++ b/examples/async_client_simple.py @@ -2,7 +2,7 @@ import asyncio -from atlas import AsyncAtlas +from layerlens import AsyncAtlas async def main(): diff --git a/examples/async_run_evaluations.py b/examples/async_run_evaluations.py index b850d7b..c997e2f 100644 --- a/examples/async_run_evaluations.py +++ b/examples/async_run_evaluations.py @@ -2,7 +2,7 @@ import asyncio -from atlas import AsyncAtlas +from layerlens import AsyncAtlas async def create_and_run_evaluation(client, model, benchmark, eval_number): diff --git a/examples/client.py b/examples/client.py index 9661215..58b6ee7 100644 --- a/examples/client.py +++ b/examples/client.py @@ -1,6 +1,6 @@ #!/usr/bin/env -S poetry run python -from atlas import Atlas +from layerlens import Atlas # Construct sync client (API key from env or inline) client = Atlas() diff --git a/examples/client_simple.py b/examples/client_simple.py index 7560658..28b264b 100644 --- a/examples/client_simple.py +++ b/examples/client_simple.py @@ -1,6 +1,6 @@ #!/usr/bin/env -S poetry run python -from atlas import Atlas +from layerlens import Atlas # Construct sync client (API key from env or inline) client = Atlas() diff --git a/examples/fetch_results_async.py b/examples/fetch_results_async.py index cc04232..5d276c2 100644 --- a/examples/fetch_results_async.py +++ b/examples/fetch_results_async.py @@ -2,7 +2,7 @@ import asyncio -from atlas import AsyncAtlas +from layerlens import AsyncAtlas async def fetch_evaluation_results(client, evaluation_id): diff --git a/examples/get_benchmarks.py b/examples/get_benchmarks.py index 6e5c6ef..344d89b 100644 --- a/examples/get_benchmarks.py +++ b/examples/get_benchmarks.py @@ -2,7 +2,7 @@ import asyncio -from atlas import AsyncAtlas +from layerlens import AsyncAtlas async def main(): diff --git a/examples/get_evaluation.py b/examples/get_evaluation.py index 8af3940..0d841fc 100644 --- a/examples/get_evaluation.py +++ b/examples/get_evaluation.py @@ -2,7 +2,7 @@ import asyncio -from atlas import AsyncAtlas +from layerlens import AsyncAtlas async def main(): diff --git a/examples/get_models.py b/examples/get_models.py index 6059f90..a4d9d1c 100644 --- a/examples/get_models.py +++ b/examples/get_models.py @@ -2,7 +2,7 @@ import asyncio -from atlas import AsyncAtlas +from layerlens import AsyncAtlas async def main(): diff --git a/examples/paginated_results.py b/examples/paginated_results.py index d70defc..7be00d7 100644 --- a/examples/paginated_results.py +++ b/examples/paginated_results.py @@ -2,7 +2,7 @@ import asyncio -from atlas import AsyncAtlas +from layerlens import AsyncAtlas async def main(): diff --git a/pyproject.toml b/pyproject.toml index fef0e24..b5ec9a9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [project] -name = "atlas" +name = "layerlens" version = "1.0.2" description = "The official Python library for the LayerLens Atlas API" license = "Apache-2.0" @@ -28,7 +28,7 @@ Homepage = "https://github.com/LayerLens/atlas-python" Repository = "https://github.com/LayerLens/atlas-python" [project.scripts] -atlas = "atlas.cli:main" +layerlens = "layerlens.cli:main" [tool.rye] @@ -57,7 +57,7 @@ format = { chain = [ "check:ruff" = "ruff check ." "fix:ruff" = "ruff check --fix ." -"check:importable" = "python -c 'import atlas'" +"check:importable" = "python -c 'import layerlens'" # Type checking for production code only (excludes tests) "typecheck:src" = { chain = ["typecheck:pyright:src", "typecheck:mypy:src"] } @@ -67,7 +67,7 @@ typecheck = { chain = ["typecheck:pyright", "typecheck:mypy"] } "typecheck:pyright" = "pyright" "typecheck:pyright:src" = "pyright src" -"typecheck:verify-types" = "pyright --verifytypes atlas --ignoreexternal" +"typecheck:verify-types" = "pyright --verifytypes layerlens --ignoreexternal" "typecheck:mypy" = "mypy ." "typecheck:mypy:src" = "mypy src" diff --git a/scripts/get_version.sh b/scripts/get_version.sh index 32971ce..42caa27 100755 --- a/scripts/get_version.sh +++ b/scripts/get_version.sh @@ -4,7 +4,7 @@ set -e ROOT_DIR=$(git rev-parse --show-toplevel) -VERSION_FILE="$ROOT_DIR/src/atlas/_version.py" +VERSION_FILE="$ROOT_DIR/src/layerlens/_version.py" echo "Debug: ROOT_DIR=$ROOT_DIR" >&2 echo "Debug: VERSION_FILE=$VERSION_FILE" >&2 diff --git a/scripts/lint b/scripts/lint index 2a3c99e..505f5a2 100755 --- a/scripts/lint +++ b/scripts/lint @@ -8,4 +8,4 @@ echo "==> Running lints" rye run lint echo "==> Making sure it imports" -rye run python -c 'import atlas' \ No newline at end of file +rye run python -c 'import layerlens' \ No newline at end of file diff --git a/scripts/test_coverage b/scripts/test_coverage index c22dd6e..5797d36 100755 --- a/scripts/test_coverage +++ b/scripts/test_coverage @@ -5,4 +5,4 @@ set -e cd "$(dirname "$0")/.." echo "==> Running tests" -rye run pytest tests/ --cov=src/atlas --cov-report=term --tb=no \ No newline at end of file +rye run pytest tests/ --cov=src/layerlens --cov-report=term --tb=no \ No newline at end of file diff --git a/src/atlas/__init__.py b/src/layerlens/__init__.py similarity index 100% rename from src/atlas/__init__.py rename to src/layerlens/__init__.py diff --git a/src/atlas/_base_client.py b/src/layerlens/_base_client.py similarity index 100% rename from src/atlas/_base_client.py rename to src/layerlens/_base_client.py diff --git a/src/atlas/_client.py b/src/layerlens/_client.py similarity index 100% rename from src/atlas/_client.py rename to src/layerlens/_client.py diff --git a/src/atlas/_constants.py b/src/layerlens/_constants.py similarity index 100% rename from src/atlas/_constants.py rename to src/layerlens/_constants.py diff --git a/src/atlas/_exceptions.py b/src/layerlens/_exceptions.py similarity index 100% rename from src/atlas/_exceptions.py rename to src/layerlens/_exceptions.py diff --git a/src/atlas/_resource.py b/src/layerlens/_resource.py similarity index 100% rename from src/atlas/_resource.py rename to src/layerlens/_resource.py diff --git a/src/atlas/_utils.py b/src/layerlens/_utils.py similarity index 100% rename from src/atlas/_utils.py rename to src/layerlens/_utils.py diff --git a/src/atlas/_version.py b/src/layerlens/_version.py similarity index 100% rename from src/atlas/_version.py rename to src/layerlens/_version.py diff --git a/src/atlas/models/__init__.py b/src/layerlens/models/__init__.py similarity index 100% rename from src/atlas/models/__init__.py rename to src/layerlens/models/__init__.py diff --git a/src/atlas/models/api.py b/src/layerlens/models/api.py similarity index 100% rename from src/atlas/models/api.py rename to src/layerlens/models/api.py diff --git a/src/atlas/models/benchmark.py b/src/layerlens/models/benchmark.py similarity index 100% rename from src/atlas/models/benchmark.py rename to src/layerlens/models/benchmark.py diff --git a/src/atlas/models/evaluation.py b/src/layerlens/models/evaluation.py similarity index 100% rename from src/atlas/models/evaluation.py rename to src/layerlens/models/evaluation.py diff --git a/src/atlas/models/model.py b/src/layerlens/models/model.py similarity index 100% rename from src/atlas/models/model.py rename to src/layerlens/models/model.py diff --git a/src/atlas/models/organization.py b/src/layerlens/models/organization.py similarity index 100% rename from src/atlas/models/organization.py rename to src/layerlens/models/organization.py diff --git a/src/atlas/resources/benchmarks/__init__.py b/src/layerlens/resources/benchmarks/__init__.py similarity index 100% rename from src/atlas/resources/benchmarks/__init__.py rename to src/layerlens/resources/benchmarks/__init__.py diff --git a/src/atlas/resources/benchmarks/benchmarks.py b/src/layerlens/resources/benchmarks/benchmarks.py similarity index 100% rename from src/atlas/resources/benchmarks/benchmarks.py rename to src/layerlens/resources/benchmarks/benchmarks.py diff --git a/src/atlas/resources/evaluations/__init__.py b/src/layerlens/resources/evaluations/__init__.py similarity index 100% rename from src/atlas/resources/evaluations/__init__.py rename to src/layerlens/resources/evaluations/__init__.py diff --git a/src/atlas/resources/evaluations/evaluations.py b/src/layerlens/resources/evaluations/evaluations.py similarity index 100% rename from src/atlas/resources/evaluations/evaluations.py rename to src/layerlens/resources/evaluations/evaluations.py diff --git a/src/atlas/resources/models/__init__.py b/src/layerlens/resources/models/__init__.py similarity index 100% rename from src/atlas/resources/models/__init__.py rename to src/layerlens/resources/models/__init__.py diff --git a/src/atlas/resources/models/models.py b/src/layerlens/resources/models/models.py similarity index 100% rename from src/atlas/resources/models/models.py rename to src/layerlens/resources/models/models.py diff --git a/src/atlas/resources/results/__init__.py b/src/layerlens/resources/results/__init__.py similarity index 100% rename from src/atlas/resources/results/__init__.py rename to src/layerlens/resources/results/__init__.py diff --git a/src/atlas/resources/results/results.py b/src/layerlens/resources/results/results.py similarity index 100% rename from src/atlas/resources/results/results.py rename to src/layerlens/resources/results/results.py diff --git a/tests/resources/test_benchmarks.py b/tests/resources/test_benchmarks.py index e65c14b..d9b1c5b 100644 --- a/tests/resources/test_benchmarks.py +++ b/tests/resources/test_benchmarks.py @@ -3,14 +3,14 @@ import httpx import pytest -from atlas.models import ( +from layerlens.models import ( Benchmark, CustomBenchmark, PublicBenchmark, BenchmarksResponse, ) -from atlas._constants import DEFAULT_TIMEOUT -from atlas.resources.benchmarks.benchmarks import Benchmarks +from layerlens._constants import DEFAULT_TIMEOUT +from layerlens.resources.benchmarks.benchmarks import Benchmarks class TestBenchmarks: @@ -318,7 +318,7 @@ def benchmarks_resource(self, mock_client): def test_get_benchmarks_handles_api_error(self, benchmarks_resource): """get method propagates API errors.""" - from atlas._exceptions import APIStatusError + from layerlens._exceptions import APIStatusError mock_response = Mock() mock_response.status_code = 404 @@ -332,7 +332,7 @@ def test_get_benchmarks_handles_api_error(self, benchmarks_resource): def test_get_benchmarks_handles_auth_error(self, benchmarks_resource): """get method propagates authentication errors.""" - from atlas._exceptions import AuthenticationError + from layerlens._exceptions import AuthenticationError mock_response = Mock() mock_response.status_code = 401 @@ -346,7 +346,7 @@ def test_get_benchmarks_handles_auth_error(self, benchmarks_resource): def test_get_benchmarks_handles_connection_error(self, benchmarks_resource): """get method propagates connection errors.""" - from atlas._exceptions import APIConnectionError + from layerlens._exceptions import APIConnectionError mock_request = Mock() connection_error = APIConnectionError(request=mock_request) @@ -357,7 +357,7 @@ def test_get_benchmarks_handles_connection_error(self, benchmarks_resource): def test_get_benchmarks_handles_timeout_error(self, benchmarks_resource): """get method propagates timeout errors.""" - from atlas._exceptions import APITimeoutError + from layerlens._exceptions import APITimeoutError mock_request = Mock() timeout_error = APITimeoutError(mock_request) diff --git a/tests/resources/test_evaluations.py b/tests/resources/test_evaluations.py index 94aa776..4cffcb9 100644 --- a/tests/resources/test_evaluations.py +++ b/tests/resources/test_evaluations.py @@ -3,14 +3,14 @@ import httpx import pytest -from atlas.models import ( +from layerlens.models import ( Evaluation, EvaluationStatus, EvaluationsResponse, CreateEvaluationsResponse, ) -from atlas._constants import DEFAULT_TIMEOUT -from atlas.resources.evaluations.evaluations import Evaluations +from layerlens._constants import DEFAULT_TIMEOUT +from layerlens.resources.evaluations.evaluations import Evaluations class TestEvaluations: @@ -339,7 +339,7 @@ def evaluations_resource(self, mock_client): def test_create_evaluation_handles_api_error(self, evaluations_resource): """create method propagates API errors.""" - from atlas._exceptions import APIStatusError + from layerlens._exceptions import APIStatusError mock_model = Mock() mock_model.id = "invalid-model" @@ -359,7 +359,7 @@ def test_create_evaluation_handles_api_error(self, evaluations_resource): def test_create_evaluation_handles_connection_error(self, evaluations_resource): """create method propagates connection errors.""" - from atlas._exceptions import APIConnectionError + from layerlens._exceptions import APIConnectionError mock_model = Mock() mock_model.id = "invalid-model" @@ -376,7 +376,7 @@ def test_create_evaluation_handles_connection_error(self, evaluations_resource): def test_create_evaluation_handles_timeout_error(self, evaluations_resource): """create method propagates timeout errors.""" - from atlas._exceptions import APITimeoutError + from layerlens._exceptions import APITimeoutError mock_model = Mock() mock_model.id = "invalid-model" diff --git a/tests/resources/test_models_resource.py b/tests/resources/test_models_resource.py index 47e8d56..d9718b1 100644 --- a/tests/resources/test_models_resource.py +++ b/tests/resources/test_models_resource.py @@ -3,9 +3,9 @@ import httpx import pytest -from atlas.models import CustomModel, PublicModel, ModelsResponse -from atlas._constants import DEFAULT_TIMEOUT -from atlas.resources.models.models import Models +from layerlens.models import CustomModel, PublicModel, ModelsResponse +from layerlens._constants import DEFAULT_TIMEOUT +from layerlens.resources.models.models import Models class TestModels: @@ -388,7 +388,7 @@ def models_resource(self, mock_client): def test_get_models_handles_api_error(self, models_resource): """get method propagates API errors.""" - from atlas._exceptions import APIStatusError + from layerlens._exceptions import APIStatusError mock_response = Mock() mock_response.status_code = 500 @@ -402,7 +402,7 @@ def test_get_models_handles_api_error(self, models_resource): def test_get_models_handles_forbidden_error(self, models_resource): """get method propagates permission errors.""" - from atlas._exceptions import PermissionDeniedError + from layerlens._exceptions import PermissionDeniedError mock_response = Mock() mock_response.status_code = 403 @@ -416,7 +416,7 @@ def test_get_models_handles_forbidden_error(self, models_resource): def test_get_models_handles_connection_error(self, models_resource): """get method propagates connection errors.""" - from atlas._exceptions import APIConnectionError + from layerlens._exceptions import APIConnectionError mock_request = Mock() connection_error = APIConnectionError(request=mock_request) @@ -427,7 +427,7 @@ def test_get_models_handles_connection_error(self, models_resource): def test_get_models_handles_timeout_error(self, models_resource): """get method propagates timeout errors.""" - from atlas._exceptions import APITimeoutError + from layerlens._exceptions import APITimeoutError mock_request = Mock() timeout_error = APITimeoutError(mock_request) diff --git a/tests/resources/test_results.py b/tests/resources/test_results.py index 6dd9517..99ec14f 100644 --- a/tests/resources/test_results.py +++ b/tests/resources/test_results.py @@ -4,9 +4,9 @@ import httpx import pytest -from atlas.models import Result, Pagination, ResultMetrics, ResultsResponse -from atlas._constants import DEFAULT_TIMEOUT -from atlas.resources.results.results import Results +from layerlens.models import Result, Pagination, ResultMetrics, ResultsResponse +from layerlens._constants import DEFAULT_TIMEOUT +from layerlens.resources.results.results import Results class TestResults: @@ -284,7 +284,7 @@ def results_resource(self, mock_client): def test_get_results_handles_not_found_error(self, results_resource): """get method propagates not found errors.""" - from atlas._exceptions import NotFoundError + from layerlens._exceptions import NotFoundError mock_response = Mock() mock_response.status_code = 404 @@ -298,7 +298,7 @@ def test_get_results_handles_not_found_error(self, results_resource): def test_get_results_handles_auth_error(self, results_resource): """get method propagates authentication errors.""" - from atlas._exceptions import AuthenticationError + from layerlens._exceptions import AuthenticationError mock_response = Mock() mock_response.status_code = 401 @@ -312,7 +312,7 @@ def test_get_results_handles_auth_error(self, results_resource): def test_get_results_handles_permission_error(self, results_resource): """get method propagates permission errors.""" - from atlas._exceptions import PermissionDeniedError + from layerlens._exceptions import PermissionDeniedError mock_response = Mock() mock_response.status_code = 403 @@ -326,7 +326,7 @@ def test_get_results_handles_permission_error(self, results_resource): def test_get_results_handles_server_error(self, results_resource): """get method propagates server errors.""" - from atlas._exceptions import InternalServerError + from layerlens._exceptions import InternalServerError mock_response = Mock() mock_response.status_code = 500 @@ -340,7 +340,7 @@ def test_get_results_handles_server_error(self, results_resource): def test_get_results_handles_connection_error(self, results_resource): """get method propagates connection errors.""" - from atlas._exceptions import APIConnectionError + from layerlens._exceptions import APIConnectionError mock_request = Mock() connection_error = APIConnectionError(request=mock_request) @@ -351,7 +351,7 @@ def test_get_results_handles_connection_error(self, results_resource): def test_get_results_handles_timeout_error(self, results_resource): """get method propagates timeout errors.""" - from atlas._exceptions import APITimeoutError + from layerlens._exceptions import APITimeoutError mock_request = Mock() timeout_error = APITimeoutError(mock_request) diff --git a/tests/test_base_client.py b/tests/test_base_client.py index 878a391..179d23d 100644 --- a/tests/test_base_client.py +++ b/tests/test_base_client.py @@ -4,8 +4,8 @@ import httpx import pytest -from atlas import _exceptions -from atlas._base_client import BaseClient +from layerlens import _exceptions +from layerlens._base_client import BaseClient @dataclass @@ -61,7 +61,9 @@ def test_default_headers_structure(self, client): def test_default_headers_includes_auth(self, client): """default_headers merges auth_headers.""" with patch.object( - type(client), "auth_headers", new_callable=lambda: property(lambda _: {"Authorization": "Bearer token"}) + type(client), + "auth_headers", + new_callable=lambda: property(lambda _: {"Authorization": "Bearer token"}), ): headers = client.default_headers @@ -77,7 +79,11 @@ def test_request_cast_without_cast_to(self, mock_request, client, mock_response) assert result is mock_response mock_request.assert_called_once_with( - method="GET", url="/test", json=None, params=None, headers=client.default_headers + method="GET", + url="/test", + json=None, + params=None, + headers=client.default_headers, ) @patch("httpx.Client.request") @@ -115,7 +121,11 @@ def test_request_cast_with_body_and_params(self, mock_request, client, mock_resp client._request_cast("POST", "/test", body=body, params=params) mock_request.assert_called_once_with( - method="POST", url="/test", json=body, params=params, headers=client.default_headers + method="POST", + url="/test", + json=body, + params=params, + headers=client.default_headers, ) @patch("httpx.Client.request") @@ -149,7 +159,11 @@ def test_get_cast_delegates_correctly(self, mock_request, client, mock_response) assert isinstance(result, ResponseModel) mock_request.assert_called_once_with( - method="GET", url="/test", json=None, params=params, headers={**client.default_headers, **headers} + method="GET", + url="/test", + json=None, + params=params, + headers={**client.default_headers, **headers}, ) @patch("httpx.Client.request") @@ -163,7 +177,11 @@ def test_post_cast_delegates_correctly(self, mock_request, client, mock_response assert isinstance(result, ResponseModel) mock_request.assert_called_once_with( - method="POST", url="/test", json=body, params=None, headers={**client.default_headers, **headers} + method="POST", + url="/test", + json=body, + params=None, + headers={**client.default_headers, **headers}, ) def test_make_status_error_from_response_with_json(self, client): diff --git a/tests/test_client.py b/tests/test_client.py index af34dfe..558494a 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -2,8 +2,8 @@ import pytest -from atlas import Atlas -from atlas._exceptions import AtlasError +from layerlens import Atlas +from layerlens._exceptions import AtlasError class TestAtlasClientInitialization: @@ -19,7 +19,7 @@ def mock_org(self): def test_none_values_fallback_to_env(self, mock_env_vars, mock_org): """None values explicitly passed fallback to environment.""" _ = mock_env_vars # Fixture used for side effects - with patch("atlas.Atlas._get_organization", return_value=mock_org): + with patch("layerlens.Atlas._get_organization", return_value=mock_org): client = Atlas(api_key=None) assert client.api_key == "test-api-key" @@ -27,7 +27,7 @@ def test_none_values_fallback_to_env(self, mock_env_vars, mock_org): @pytest.mark.parametrize("base_url", ["https://custom.api.com", "https://staging.layerlens.ai/api/v1"]) def test_custom_base_url(self, base_url, mock_org): """Client accepts custom base URL.""" - with patch("atlas.Atlas._get_organization", return_value=mock_org): + with patch("layerlens.Atlas._get_organization", return_value=mock_org): client = Atlas(api_key="test-key", base_url=base_url) assert str(client.base_url).rstrip("/") == base_url.rstrip("/") @@ -36,14 +36,14 @@ def test_custom_timeout(self, mock_org): """Client accepts custom timeout.""" import httpx - with patch("atlas.Atlas._get_organization", return_value=mock_org): + with patch("layerlens.Atlas._get_organization", return_value=mock_org): client = Atlas(api_key="test-key", timeout=30.0) assert isinstance(client.timeout, httpx.Timeout) def test_auth_headers_with_api_key(self, mock_org): """auth_headers property returns correct headers when API key is set.""" - with patch("atlas.Atlas._get_organization", return_value=mock_org): + with patch("layerlens.Atlas._get_organization", return_value=mock_org): client = Atlas(api_key="test-api-key") headers = client.auth_headers @@ -52,7 +52,7 @@ def test_auth_headers_with_api_key(self, mock_org): def test_auth_headers_without_api_key(self, mock_org): """auth_headers property returns empty dict when no API key.""" - with patch("atlas.Atlas._get_organization", return_value=mock_org): + with patch("layerlens.Atlas._get_organization", return_value=mock_org): with pytest.raises( AtlasError, match="The api_key client option must be set either by passing api_key to the client or by setting the LAYERLENS_ATLAS_API_KEY environment variable", @@ -61,7 +61,7 @@ def test_auth_headers_without_api_key(self, mock_org): def test_auth_headers_with_empty_api_key(self, mock_org): """auth_headers property returns empty dict when API key is empty string.""" - with patch("atlas.Atlas._get_organization", return_value=mock_org): + with patch("layerlens.Atlas._get_organization", return_value=mock_org): with pytest.raises( AtlasError, match="The api_key client option must be set either by passing api_key to the client or by setting the LAYERLENS_ATLAS_API_KEY environment variable", @@ -70,14 +70,14 @@ def test_auth_headers_with_empty_api_key(self, mock_org): def test_copy_method(self, mock_org): """copy method creates new client with overridden parameters.""" - with patch("atlas.Atlas._get_organization", return_value=mock_org): + with patch("layerlens.Atlas._get_organization", return_value=mock_org): original_client = Atlas( api_key="original-key", base_url="https://original.api.com", timeout=10.0, ) - with patch("atlas.Atlas._get_organization", return_value=mock_org): + with patch("layerlens.Atlas._get_organization", return_value=mock_org): new_client = original_client.copy(api_key="new-key", timeout=20.0) # Check overridden values @@ -91,20 +91,20 @@ def test_copy_method(self, mock_org): def test_copy_method_partial_override(self, mock_org): """copy method allows partial parameter override.""" - with patch("atlas.Atlas._get_organization", return_value=mock_org): + with patch("layerlens.Atlas._get_organization", return_value=mock_org): original_client = Atlas(api_key="original-key") - with patch("atlas.Atlas._get_organization", return_value=mock_org): + with patch("layerlens.Atlas._get_organization", return_value=mock_org): new_client = original_client.copy(api_key="new-key") assert new_client.api_key == "new-key" def test_with_options_alias(self, mock_org): """with_options is an alias for copy method.""" - with patch("atlas.Atlas._get_organization", return_value=mock_org): + with patch("layerlens.Atlas._get_organization", return_value=mock_org): original_client = Atlas(api_key="original-key") - with patch("atlas.Atlas._get_organization", return_value=mock_org): + with patch("layerlens.Atlas._get_organization", return_value=mock_org): new_client = original_client.with_options(api_key="new-key") assert new_client.api_key == "new-key" @@ -113,10 +113,10 @@ def test_with_options_alias(self, mock_org): def test_copy_method_timeout_override(self, mock_org): """copy method properly overrides timeout when original is None.""" # Create a client with no explicit timeout (uses default) - with patch("atlas.Atlas._get_organization", return_value=mock_org): + with patch("layerlens.Atlas._get_organization", return_value=mock_org): original_client = Atlas(api_key="original-key") - with patch("atlas.Atlas._get_organization", return_value=mock_org): + with patch("layerlens.Atlas._get_organization", return_value=mock_org): new_client = original_client.copy(timeout=30.0) import httpx @@ -147,9 +147,9 @@ def _create_mock_response(self, status_code): def test_make_status_error_bad_request(self, mock_org): """_make_status_error creates BadRequestError for 400 status.""" - from atlas._exceptions import BadRequestError + from layerlens._exceptions import BadRequestError - with patch("atlas.Atlas._get_organization", return_value=mock_org): + with patch("layerlens.Atlas._get_organization", return_value=mock_org): client = Atlas(api_key="test-key") mock_response = self._create_mock_response(400) mock_body = {"error": "Bad request"} @@ -161,9 +161,9 @@ def test_make_status_error_bad_request(self, mock_org): def test_make_status_error_unauthorized(self, mock_org): """_make_status_error creates AuthenticationError for 401 status.""" - from atlas._exceptions import AuthenticationError + from layerlens._exceptions import AuthenticationError - with patch("atlas.Atlas._get_organization", return_value=mock_org): + with patch("layerlens.Atlas._get_organization", return_value=mock_org): client = Atlas(api_key="test-key") mock_response = self._create_mock_response(401) mock_body = {"error": "Unauthorized"} @@ -175,9 +175,9 @@ def test_make_status_error_unauthorized(self, mock_org): def test_make_status_error_forbidden(self, mock_org): """_make_status_error creates PermissionDeniedError for 403 status.""" - from atlas._exceptions import PermissionDeniedError + from layerlens._exceptions import PermissionDeniedError - with patch("atlas.Atlas._get_organization", return_value=mock_org): + with patch("layerlens.Atlas._get_organization", return_value=mock_org): client = Atlas(api_key="test-key") mock_response = self._create_mock_response(403) mock_body = {"error": "Forbidden"} @@ -189,9 +189,9 @@ def test_make_status_error_forbidden(self, mock_org): def test_make_status_error_not_found(self, mock_org): """_make_status_error creates NotFoundError for 404 status.""" - from atlas._exceptions import NotFoundError + from layerlens._exceptions import NotFoundError - with patch("atlas.Atlas._get_organization", return_value=mock_org): + with patch("layerlens.Atlas._get_organization", return_value=mock_org): client = Atlas(api_key="test-key") mock_response = self._create_mock_response(404) mock_body = {"error": "Not found"} @@ -203,9 +203,9 @@ def test_make_status_error_not_found(self, mock_org): def test_make_status_error_conflict(self, mock_org): """_make_status_error creates ConflictError for 409 status.""" - from atlas._exceptions import ConflictError + from layerlens._exceptions import ConflictError - with patch("atlas.Atlas._get_organization", return_value=mock_org): + with patch("layerlens.Atlas._get_organization", return_value=mock_org): client = Atlas(api_key="test-key") mock_response = self._create_mock_response(409) mock_body = {"error": "Conflict"} @@ -217,9 +217,9 @@ def test_make_status_error_conflict(self, mock_org): def test_make_status_error_unprocessable_entity(self, mock_org): """_make_status_error creates UnprocessableEntityError for 422 status.""" - from atlas._exceptions import UnprocessableEntityError + from layerlens._exceptions import UnprocessableEntityError - with patch("atlas.Atlas._get_organization", return_value=mock_org): + with patch("layerlens.Atlas._get_organization", return_value=mock_org): client = Atlas(api_key="test-key") mock_response = self._create_mock_response(422) mock_body = {"error": "Unprocessable entity"} @@ -231,9 +231,9 @@ def test_make_status_error_unprocessable_entity(self, mock_org): def test_make_status_error_rate_limit(self, mock_org): """_make_status_error creates RateLimitError for 429 status.""" - from atlas._exceptions import RateLimitError + from layerlens._exceptions import RateLimitError - with patch("atlas.Atlas._get_organization", return_value=mock_org): + with patch("layerlens.Atlas._get_organization", return_value=mock_org): client = Atlas(api_key="test-key") mock_response = self._create_mock_response(429) mock_body = {"error": "Rate limited"} @@ -245,9 +245,9 @@ def test_make_status_error_rate_limit(self, mock_org): def test_make_status_error_internal_server_error(self, mock_org): """_make_status_error creates InternalServerError for 500+ status.""" - from atlas._exceptions import InternalServerError + from layerlens._exceptions import InternalServerError - with patch("atlas.Atlas._get_organization", return_value=mock_org): + with patch("layerlens.Atlas._get_organization", return_value=mock_org): client = Atlas(api_key="test-key") mock_response = self._create_mock_response(500) mock_body = {"error": "Internal server error"} @@ -259,9 +259,9 @@ def test_make_status_error_internal_server_error(self, mock_org): def test_make_status_error_gateway_timeout(self, mock_org): """_make_status_error creates InternalServerError for 502 status.""" - from atlas._exceptions import InternalServerError + from layerlens._exceptions import InternalServerError - with patch("atlas.Atlas._get_organization", return_value=mock_org): + with patch("layerlens.Atlas._get_organization", return_value=mock_org): client = Atlas(api_key="test-key") mock_response = self._create_mock_response(502) mock_body = {"error": "Gateway timeout"} @@ -273,9 +273,9 @@ def test_make_status_error_gateway_timeout(self, mock_org): def test_make_status_error_unknown_status(self, mock_org): """_make_status_error creates generic APIStatusError for unknown status codes.""" - from atlas._exceptions import APIStatusError + from layerlens._exceptions import APIStatusError - with patch("atlas.Atlas._get_organization", return_value=mock_org): + with patch("layerlens.Atlas._get_organization", return_value=mock_org): client = Atlas(api_key="test-key") mock_response = self._create_mock_response(418) # I'm a teapot mock_body = {"error": "Unknown error"} @@ -287,9 +287,9 @@ def test_make_status_error_unknown_status(self, mock_org): def test_make_status_error_with_non_mapping_body(self, mock_org): """_make_status_error handles non-mapping body correctly.""" - from atlas._exceptions import NotFoundError + from layerlens._exceptions import NotFoundError - with patch("atlas.Atlas._get_organization", return_value=mock_org): + with patch("layerlens.Atlas._get_organization", return_value=mock_org): client = Atlas(api_key="test-key") mock_response = self._create_mock_response(404) mock_body = "Simple string error" @@ -301,9 +301,9 @@ def test_make_status_error_with_non_mapping_body(self, mock_org): def test_make_status_error_with_none_body(self, mock_org): """_make_status_error handles None body correctly.""" - from atlas._exceptions import BadRequestError + from layerlens._exceptions import BadRequestError - with patch("atlas.Atlas._get_organization", return_value=mock_org): + with patch("layerlens.Atlas._get_organization", return_value=mock_org): client = Atlas(api_key="test-key") mock_response = self._create_mock_response(400) @@ -314,9 +314,9 @@ def test_make_status_error_with_none_body(self, mock_org): def test_make_status_error_with_complex_body(self, mock_org): """_make_status_error extracts error from complex body structure.""" - from atlas._exceptions import AuthenticationError + from layerlens._exceptions import AuthenticationError - with patch("atlas.Atlas._get_organization", return_value=mock_org): + with patch("layerlens.Atlas._get_organization", return_value=mock_org): client = Atlas(api_key="test-key") mock_response = self._create_mock_response(401) mock_body = { diff --git a/tests/test_exceptions.py b/tests/test_exceptions.py index 4471471..5a34150 100644 --- a/tests/test_exceptions.py +++ b/tests/test_exceptions.py @@ -4,7 +4,7 @@ import httpx import pytest -from atlas._exceptions import ( +from layerlens._exceptions import ( APIError, AtlasError, ConflictError, @@ -304,7 +304,13 @@ def test_exception_str_representation(self): def test_exception_with_complex_body(self): """Exception handles complex body structures.""" mock_request = Mock(spec=httpx.Request) - body = {"error": {"code": "VALIDATION_ERROR", "details": ["Field 'name' is required"]}, "request_id": "req-456"} + body = { + "error": { + "code": "VALIDATION_ERROR", + "details": ["Field 'name' is required"], + }, + "request_id": "req-456", + } error = APIError("Validation failed", mock_request, body=body) assert isinstance(error.body, dict) diff --git a/tests/test_integration.py b/tests/test_integration.py index dede2de..71eb122 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -4,8 +4,8 @@ import httpx import pytest -from atlas import Atlas -from atlas.models import ( +from layerlens import Atlas +from layerlens.models import ( Model, Result, Benchmark, @@ -104,7 +104,7 @@ def atlas_client(self): mock_org.id = "org-123" mock_org.projects = [Mock(id="proj-456")] - with patch("atlas.Atlas._get_organization", return_value=mock_org): + with patch("layerlens.Atlas._get_organization", return_value=mock_org): return Atlas(api_key="workflow-test-key") def test_complete_evaluation_workflow(self, atlas_client): @@ -213,7 +213,7 @@ def test_complete_evaluation_workflow(self, atlas_client): def test_workflow_with_error_handling(self, atlas_client): """Test workflow handles errors gracefully.""" - from atlas._exceptions import NotFoundError + from layerlens._exceptions import NotFoundError mock_response = Mock() mock_response.status_code = 404 @@ -274,7 +274,7 @@ def atlas_client(self): mock_org.id = "org-123" mock_org.projects = [Mock(id="proj-456")] - with patch("atlas.Atlas._get_organization", return_value=mock_org): + with patch("layerlens.Atlas._get_organization", return_value=mock_org): return Atlas(api_key="interaction-test-key") def test_evaluation_creation_with_model_and_benchmark_objects(self, atlas_client): @@ -433,7 +433,7 @@ def mock_org(self): def test_client_has_all_resource_properties(self, mock_org): """Atlas client exposes all resource properties.""" - with patch("atlas.Atlas._get_organization", return_value=mock_org): + with patch("layerlens.Atlas._get_organization", return_value=mock_org): client = Atlas(api_key="property-test-key") # Verify available resource properties exist @@ -441,15 +441,15 @@ def test_client_has_all_resource_properties(self, mock_org): assert hasattr(client, "results") # Verify they are the correct types - from atlas.resources.results import Results - from atlas.resources.evaluations import Evaluations + from layerlens.resources.results import Results + from layerlens.resources.evaluations import Evaluations assert isinstance(client.evaluations, Evaluations) assert isinstance(client.results, Results) def test_resource_properties_share_same_client(self, mock_org): """All resource properties share the same client instance.""" - with patch("atlas.Atlas._get_organization", return_value=mock_org): + with patch("layerlens.Atlas._get_organization", return_value=mock_org): client = Atlas(api_key="shared-client-test") # Verify all resources use the same client @@ -477,10 +477,10 @@ def mock_org2(self): def test_multiple_atlas_clients_independent(self, mock_org1, mock_org2): """Multiple Atlas client instances operate independently.""" - with patch("atlas.Atlas._get_organization", return_value=mock_org1): + with patch("layerlens.Atlas._get_organization", return_value=mock_org1): client1 = Atlas(api_key="client-1-key") - with patch("atlas.Atlas._get_organization", return_value=mock_org2): + with patch("layerlens.Atlas._get_organization", return_value=mock_org2): client2 = Atlas(api_key="client-2-key") # Verify clients are independent @@ -493,10 +493,10 @@ def test_multiple_atlas_clients_independent(self, mock_org1, mock_org2): def test_resource_operations_isolated(self, mock_org1, mock_org2): """Operations on different client resources are isolated.""" - with patch("atlas.Atlas._get_organization", return_value=mock_org1): + with patch("layerlens.Atlas._get_organization", return_value=mock_org1): client1 = Atlas(api_key="iso-test-1") - with patch("atlas.Atlas._get_organization", return_value=mock_org2): + with patch("layerlens.Atlas._get_organization", return_value=mock_org2): client2 = Atlas(api_key="iso-test-2") result_data = { @@ -566,7 +566,7 @@ def mock_org(self): def test_evaluation_workflow_error_propagation(self, mock_org): """Errors in evaluation workflow are properly propagated.""" - from atlas._exceptions import APIStatusError, APIConnectionError + from layerlens._exceptions import APIStatusError, APIConnectionError # Create model and benchmark objects model_data = { @@ -601,7 +601,7 @@ def test_evaluation_workflow_error_propagation(self, mock_org): model = Model(**model_data) benchmark = Benchmark(**benchmark_data) - with patch("atlas.Atlas._get_organization", return_value=mock_org): + with patch("layerlens.Atlas._get_organization", return_value=mock_org): client = Atlas(api_key="error-test-key") mock_response = Mock() diff --git a/tests/test_models.py b/tests/test_models.py index 95470cf..af733f4 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -3,7 +3,7 @@ import pytest from pydantic import ValidationError -from atlas.models import ( +from layerlens.models import ( Result, Evaluation, Pagination, diff --git a/tests/test_resource.py b/tests/test_resource.py index 254d016..7502c54 100644 --- a/tests/test_resource.py +++ b/tests/test_resource.py @@ -2,7 +2,7 @@ import pytest -from atlas._resource import SyncAPIResource +from layerlens._resource import SyncAPIResource class TestSyncAPIResource: @@ -243,7 +243,7 @@ def resource_instance(self, mock_client): def test_resource_propagates_get_errors(self, resource_instance, mock_client): """Resource propagates errors from _get calls.""" - from atlas._exceptions import APIStatusError + from layerlens._exceptions import APIStatusError mock_response = Mock() mock_response.status_code = 404 @@ -257,7 +257,7 @@ def test_resource_propagates_get_errors(self, resource_instance, mock_client): def test_resource_propagates_post_errors(self, resource_instance, mock_client): """Resource propagates errors from _post calls.""" - from atlas._exceptions import APIConnectionError + from layerlens._exceptions import APIConnectionError mock_request = Mock() connection_error = APIConnectionError(request=mock_request) @@ -345,7 +345,11 @@ def get_with_retry(self, url: str, max_retries: int = 3): mock_client = Mock() # First two calls fail, third succeeds - mock_client.get_cast.side_effect = [Exception("First failure"), Exception("Second failure"), {"success": True}] + mock_client.get_cast.side_effect = [ + Exception("First failure"), + Exception("Second failure"), + {"success": True}, + ] resource = RetryableResource(mock_client) diff --git a/tests/test_utils.py b/tests/test_utils.py index 85b438b..1be7f06 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -4,7 +4,7 @@ import pytest -from atlas._utils import ( +from layerlens._utils import ( SENSITIVE_HEADERS, SensitiveHeadersFilter, is_dict, From 30c13ebecf0a38f5103a737632e02b798cdd2a38 Mon Sep 17 00:00:00 2001 From: m-peko Date: Wed, 27 Aug 2025 16:18:20 +0200 Subject: [PATCH 2/3] Update package name in docs --- docs/README.md | 7 +- docs/api-reference/client.md | 8 +- docs/api-reference/errors.md | 172 ++++++++++++------------ docs/api-reference/evaluations.md | 162 +++++++++++----------- docs/api-reference/models-benchmarks.md | 57 ++++---- docs/api-reference/results.md | 116 ++++++++-------- docs/examples/creating-evaluations.md | 27 ++-- docs/security/api-key-management.md | 4 +- docs/security/environment-variables.md | 2 +- docs/security/rate-limiting.md | 12 +- src/README.md | 24 ++-- 11 files changed, 292 insertions(+), 299 deletions(-) diff --git a/docs/README.md b/docs/README.md index 82a083c..9bfbc6e 100644 --- a/docs/README.md +++ b/docs/README.md @@ -9,7 +9,9 @@ Atlas is an evaluation platform that allows you to benchmark AI models against v ## Quick Start ### Install LayerLens python sdk + Install the layerlens python sdk using the following command + ```bash pip install layerlens --index-url https://sdk.layerlens.ai ``` @@ -31,7 +33,7 @@ Before triggering an evaluation using the sdk, login to your organization at [ap #### Using synchronous client ```python -from atlas import Atlas +from layerlens import Atlas # Construct sync client client = Atlas() @@ -59,12 +61,11 @@ from atlas import Atlas ) ``` - #### Using Async Client ```python import asyncio -from atlas import AsyncAtlas +from layerlens import AsyncAtlas async def run_evaluation_async(): # Construct async client diff --git a/docs/api-reference/client.md b/docs/api-reference/client.md index 100512d..30a57c7 100644 --- a/docs/api-reference/client.md +++ b/docs/api-reference/client.md @@ -7,7 +7,7 @@ The `Atlas` (syncronous) and `AsyncAtlas` (asyncronous) classes are the main ent ### Syncronous Client ```python -from atlas import Atlas +from layerlens import Atlas # Construct syncronous client # Loads for api key from the "LAYERLENS_ATLAS_API_KEY" enviornment variable @@ -21,7 +21,7 @@ client = Atlas(api_key="your_api_key") ```python import asyncio -from atlas import AsyncAtlas +from layerlens import AsyncAtlas # Construct async client # Loads for api key from the "LAYERLENS_ATLAS_API_KEY" enviornment variable @@ -56,7 +56,7 @@ LAYERLENS_ATLAS_API_KEY="your_api_key_here" ### Simple Timeout ```python -from atlas import Atlas +from layerlens import Atlas # 30-second timeout for all requests client = Atlas(timeout=30.0) @@ -88,4 +88,4 @@ evaluation = client.with_options(timeout=120.0).evaluations.create( model=model, benchmark=benchmark ) -``` \ No newline at end of file +``` diff --git a/docs/api-reference/errors.md b/docs/api-reference/errors.md index 7fec705..c590d3b 100644 --- a/docs/api-reference/errors.md +++ b/docs/api-reference/errors.md @@ -32,12 +32,12 @@ AtlasError Base exception for all Atlas-related errors. ```python -import atlas +import layerlens try: - client = atlas.Atlas() + client = layerlens.Atlas() evaluation = client.evaluations.create(model="gpt-4", benchmark="mmlu") -except atlas.AtlasError as e: +except layerlens.AtlasError as e: print(f"Atlas error occurred: {e}") ``` @@ -52,12 +52,12 @@ Base exception for all API-related errors. Contains additional context about the - `body`: Response body (if available) ```python -import atlas +import layerlens try: - client = atlas.Atlas() + client = layerlens.Atlas() evaluation = client.evaluations.create(model="gpt-4", benchmark="mmlu") -except atlas.APIError as e: +except layerlens.APIError as e: print(f"API error: {e.message}") print(f"Request URL: {e.request.url}") print(f"Response body: {e.body}") @@ -77,12 +77,12 @@ Raised when the client cannot connect to the API server. - Firewall blocking requests ```python -import atlas +import layerlens try: - client = atlas.Atlas() + client = layerlens.Atlas() evaluation = client.evaluations.create(model="gpt-4", benchmark="mmlu") -except atlas.APIConnectionError as e: +except layerlens.APIConnectionError as e: print("Connection failed - check your network connection") print(f"Error details: {e}") ``` @@ -92,12 +92,12 @@ except atlas.APIConnectionError as e: Raised when a request times out. ```python -import atlas +import layerlens try: - client = atlas.Atlas(timeout=0.2) # Very short timeout + client = layerlens.Atlas(timeout=0.2) # Very short timeout evaluation = client.evaluations.create(model="gpt-4", benchmark="mmlu") -except atlas.APITimeoutError: +except layerlens.APITimeoutError: print("Request timed out - try increasing timeout or check network") ``` @@ -116,13 +116,13 @@ All HTTP status errors inherit from `APIStatusError` and include additional prop Request was malformed or contained invalid parameters. ```python -import atlas +import layerlens try: - client = atlas.Atlas() + client = layerlens.Atlas() # Invalid parameters evaluation = client.evaluations.create(model="", benchmark="") -except atlas.BadRequestError as e: +except layerlens.BadRequestError as e: print(f"Bad request: {e}") print(f"Status code: {e.status_code}") ``` @@ -132,12 +132,12 @@ except atlas.BadRequestError as e: API key is missing, invalid, or expired. ```python -import atlas +import layerlens try: - client = atlas.Atlas(api_key="invalid_key") + client = layerlens.Atlas(api_key="invalid_key") evaluation = client.evaluations.create(model="gpt-4", benchmark="mmlu") -except atlas.AuthenticationError: +except layerlens.AuthenticationError: print("Authentication failed - check your API key") print("Make sure LAYERLENS_ATLAS_API_KEY is set correctly") ``` @@ -147,12 +147,12 @@ except atlas.AuthenticationError: Valid API key but insufficient permissions for the requested operation. ```python -import atlas +import layerlens try: - client = atlas.Atlas() + client = layerlens.Atlas() evaluation = client.evaluations.create(model="restricted-model", benchmark="mmlu") -except atlas.PermissionDeniedError: +except layerlens.PermissionDeniedError: print("Permission denied - check your organization/project access") print("Contact your administrator for access to this resource") ``` @@ -162,12 +162,12 @@ except atlas.PermissionDeniedError: Requested resource (model, benchmark, evaluation) does not exist. ```python -import atlas +import layerlens try: - client = atlas.Atlas() + client = layerlens.Atlas() evaluation = client.evaluations.create(model="nonexistent-model", benchmark="mmlu") -except atlas.NotFoundError: +except layerlens.NotFoundError: print("Model or benchmark not found") print("Check available models and benchmarks in the Atlas dashboard") ``` @@ -177,13 +177,13 @@ except atlas.NotFoundError: Request conflicts with current resource state. ```python -import atlas +import layerlens try: - client = atlas.Atlas() + client = layerlens.Atlas() # Some operation that conflicts with current state evaluation = client.evaluations.create(model="gpt-4", benchmark="mmlu") -except atlas.ConflictError: +except layerlens.ConflictError: print("Request conflicts with current state") ``` @@ -192,12 +192,12 @@ except atlas.ConflictError: Request parameters are valid but cannot be processed. ```python -import atlas +import layerlens try: - client = atlas.Atlas() + client = layerlens.Atlas() evaluation = client.evaluations.create(model="gpt-4", benchmark="invalid-benchmark") -except atlas.UnprocessableEntityError as e: +except layerlens.UnprocessableEntityError as e: print(f"Cannot process request: {e}") print("Parameters are valid but operation cannot be completed") ``` @@ -207,13 +207,13 @@ except atlas.UnprocessableEntityError as e: Too many requests sent in a given time period. ```python -import atlas +import layerlens import time try: - client = atlas.Atlas() + client = layerlens.Atlas() evaluation = client.evaluations.create(model="gpt-4", benchmark="mmlu") -except atlas.RateLimitError as e: +except layerlens.RateLimitError as e: print("Rate limit exceeded") # Extract retry-after header if available retry_after = e.response.headers.get('retry-after') @@ -230,12 +230,12 @@ except atlas.RateLimitError as e: Server-side error occurred. ```python -import atlas +import layerlens try: - client = atlas.Atlas() + client = layerlens.Atlas() evaluation = client.evaluations.create(model="gpt-4", benchmark="mmlu") -except atlas.InternalServerError as e: +except layerlens.InternalServerError as e: print(f"Server error: {e.status_code}") print("This is a server-side issue - try again later") print(f"Request ID: {e.request_id}") # For support tickets @@ -246,9 +246,9 @@ except atlas.InternalServerError as e: ### 1. Handle Specific Exceptions ```python -import atlas +import layerlens import time -from atlas import Atlas +from layerlens import Atlas def robust_create_evaluation(model: str, benchmark: str, max_retries: int = 3): client = Atlas() @@ -258,25 +258,25 @@ def robust_create_evaluation(model: str, benchmark: str, max_retries: int = 3): evaluation = client.evaluations.create(model=model, benchmark=benchmark) return evaluation - except atlas.AuthenticationError: + except layerlens.AuthenticationError: print("❌ Authentication failed - check your API key") break # Don't retry auth errors - except atlas.PermissionDeniedError: + except layerlens.PermissionDeniedError: print("❌ Permission denied - contact your administrator") break # Don't retry permission errors - except atlas.NotFoundError: + except layerlens.NotFoundError: print(f"❌ Model '{model}' or benchmark '{benchmark}' not found") break # Don't retry not found errors - except atlas.RateLimitError as e: + except layerlens.RateLimitError as e: retry_after = e.response.headers.get('retry-after', 60) print(f"⏳ Rate limited - waiting {retry_after} seconds...") time.sleep(int(retry_after)) continue # Retry after waiting - except atlas.InternalServerError: + except layerlens.InternalServerError: if attempt < max_retries - 1: wait_time = 2 ** attempt # Exponential backoff print(f"🔄 Server error - retrying in {wait_time}s (attempt {attempt + 1})") @@ -286,7 +286,7 @@ def robust_create_evaluation(model: str, benchmark: str, max_retries: int = 3): print("❌ Server error - max retries exceeded") break - except atlas.APIConnectionError: + except layerlens.APIConnectionError: if attempt < max_retries - 1: wait_time = 2 ** attempt print(f"🔄 Connection error - retrying in {wait_time}s (attempt {attempt + 1})") @@ -296,7 +296,7 @@ def robust_create_evaluation(model: str, benchmark: str, max_retries: int = 3): print("❌ Connection failed - check your network") break - except atlas.APIError as e: + except layerlens.APIError as e: print(f"❌ Unexpected API error: {e}") break @@ -306,8 +306,8 @@ def robust_create_evaluation(model: str, benchmark: str, max_retries: int = 3): ### 2. Graceful Degradation ```python -import atlas -from atlas import Atlas +import layerlens +from layerlens import Atlas def get_evaluation_results_with_fallback(evaluation_id: str): client = Atlas() @@ -320,16 +320,16 @@ def get_evaluation_results_with_fallback(evaluation_id: str): else: return {"success": False, "data": None, "message": "No results found"} - except atlas.NotFoundError: + except layerlens.NotFoundError: return {"success": False, "data": None, "message": "Evaluation not found"} - except atlas.AuthenticationError: + except layerlens.AuthenticationError: return {"success": False, "data": None, "message": "Authentication required"} - except atlas.APIConnectionError: + except layerlens.APIConnectionError: return {"success": False, "data": None, "message": "Service temporarily unavailable"} - except atlas.APIError as e: + except layerlens.APIError as e: return {"success": False, "data": None, "message": f"Service error: {e}"} # Usage @@ -344,8 +344,8 @@ else: ```python import logging -import atlas -from atlas import Atlas +import layerlens +from layerlens import Atlas # Configure logging logging.basicConfig(level=logging.INFO) @@ -365,23 +365,23 @@ def monitored_api_call(): logger.warning("Evaluation creation returned None") return None - except atlas.RateLimitError as e: + except layerlens.RateLimitError as e: logger.warning(f"Rate limited - request ID: {e.request_id}") raise - except atlas.AuthenticationError: + except layerlens.AuthenticationError: logger.error("Authentication failed - check API key configuration") raise - except atlas.APIConnectionError: + except layerlens.APIConnectionError: logger.error("Network connection failed") raise - except atlas.InternalServerError as e: + except layerlens.InternalServerError as e: logger.error(f"Server error: {e.status_code} - request ID: {e.request_id}") raise - except atlas.APIError as e: + except layerlens.APIError as e: logger.error(f"Unexpected API error: {e} - request ID: {getattr(e, 'request_id', 'N/A')}") raise ``` @@ -389,9 +389,9 @@ def monitored_api_call(): ### 4. Context Managers for Resource Management ```python -import atlas +import layerlens from contextlib import contextmanager -from atlas import Atlas +from layerlens import Atlas @contextmanager def atlas_client(): @@ -400,10 +400,10 @@ def atlas_client(): try: client = Atlas() yield client - except atlas.AuthenticationError: + except layerlens.AuthenticationError: print("Authentication failed") raise - except atlas.APIConnectionError: + except layerlens.APIConnectionError: print("Connection failed") raise finally: @@ -415,7 +415,7 @@ try: with atlas_client() as client: evaluation = client.evaluations.create(model="gpt-4", benchmark="mmlu") results = client.results.get(evaluation_id=evaluation.id) -except atlas.AtlasError: +except layerlens.AtlasError: print("Atlas operation failed") ``` @@ -424,13 +424,13 @@ except atlas.AtlasError: ### Status Error Properties ```python -import atlas -from atlas import Atlas +import layerlens +from layerlens import Atlas try: client = Atlas() evaluation = client.evaluations.create(model="invalid", benchmark="invalid") -except atlas.APIStatusError as e: +except layerlens.APIStatusError as e: print(f"Status Code: {e.status_code}") print(f"Request ID: {e.request_id}") print(f"Response Headers: {dict(e.response.headers)}") @@ -442,10 +442,10 @@ except atlas.APIStatusError as e: ### Extracting Useful Information ```python -import atlas -from atlas import Atlas +import layerlens +from layerlens import Atlas -def extract_error_info(error: atlas.APIError): +def extract_error_info(error: layerlens.APIError): info = { "type": type(error).__name__, "message": str(error), @@ -468,7 +468,7 @@ def extract_error_info(error: atlas.APIError): try: client = Atlas() evaluation = client.evaluations.create(model="gpt-4", benchmark="mmlu") -except atlas.APIError as e: +except layerlens.APIError as e: error_info = extract_error_info(e) print(f"Error details: {error_info}") ``` @@ -477,20 +477,20 @@ except atlas.APIError as e: ```python import pytest -import atlas +import layerlens from unittest.mock import Mock, patch -from atlas import Atlas +from layerlens import Atlas def test_authentication_error_handling(): """Test that authentication errors are handled properly""" - with patch('atlas.Atlas') as mock_atlas: - mock_atlas.side_effect = atlas.AuthenticationError( + with patch('layerlens.Atlas') as mock_atlas: + mock_atlas.side_effect = layerlens.AuthenticationError( "Invalid API key", request=Mock(), response=Mock() ) - with pytest.raises(atlas.AuthenticationError): + with pytest.raises(layerlens.AuthenticationError): client = Atlas() client.evaluations.create(model="gpt-4", benchmark="mmlu") @@ -508,7 +508,7 @@ def test_rate_limit_retry(): # Missing API key try: client = Atlas(api_key=None) -except atlas.AtlasError as e: +except layerlens.AtlasError as e: print(f"Configuration error: {e}") ``` @@ -519,14 +519,14 @@ except atlas.AtlasError as e: try: client = Atlas(timeout=0.1) # Very short timeout evaluation = client.evaluations.create(model="gpt-4", benchmark="mmlu") -except atlas.APITimeoutError: +except layerlens.APITimeoutError: print("Request timed out") # Network connectivity try: # Simulate network issues evaluation = client.evaluations.create(model="gpt-4", benchmark="mmlu") -except atlas.APIConnectionError: +except layerlens.APIConnectionError: print("Network connectivity issue") ``` @@ -537,15 +537,15 @@ except atlas.APIConnectionError: ```python import time import random -import atlas -from atlas import Atlas +import layerlens +from layerlens import Atlas def exponential_backoff_retry(func, max_retries=3, base_delay=1): """Retry function with exponential backoff""" for attempt in range(max_retries): try: return func() - except (atlas.InternalServerError, atlas.APIConnectionError) as e: + except (layerlens.InternalServerError, layerlens.APIConnectionError) as e: if attempt == max_retries - 1: raise @@ -566,8 +566,8 @@ evaluation = exponential_backoff_retry(create_evaluation) ```python import time from enum import Enum -from atlas import Atlas -import atlas +from layerlens import Atlas +import layerlens class CircuitState(Enum): CLOSED = "closed" @@ -585,7 +585,7 @@ class CircuitBreaker: def call(self, func, *args, **kwargs): if self.state == CircuitState.OPEN: if time.time() - self.last_failure_time < self.timeout: - raise atlas.APIConnectionError(message="Circuit breaker is OPEN") + raise layerlens.APIConnectionError(message="Circuit breaker is OPEN") else: self.state = CircuitState.HALF_OPEN @@ -593,7 +593,7 @@ class CircuitBreaker: result = func(*args, **kwargs) self.on_success() return result - except (atlas.InternalServerError, atlas.APIConnectionError) as e: + except (layerlens.InternalServerError, layerlens.APIConnectionError) as e: self.on_failure() raise @@ -617,6 +617,6 @@ try: model="gpt-4", benchmark="mmlu" ) -except atlas.APIError as e: +except layerlens.APIError as e: print(f"Circuit breaker prevented call or operation failed: {e}") ``` diff --git a/docs/api-reference/evaluations.md b/docs/api-reference/evaluations.md index 46234b1..ce75bbc 100644 --- a/docs/api-reference/evaluations.md +++ b/docs/api-reference/evaluations.md @@ -6,7 +6,6 @@ The `evaluations` resource on the atlas client allows you to create and manage e An evaluation runs a specified model against a benchmark dataset and returns comprehensive metrics. - The below example trigger evaluations using `gpt-4o` against `simpleQA`. > Before running the below examples ensure the model and benchmark being run are present on your organiztion. @@ -16,7 +15,7 @@ The below example trigger evaluations using `gpt-4o` against `simpleQA`. Below is an example showing how to trigger an evaluation, waiting for it to complete and finally fetching the evaluations results. ```python -from atlas import Atlas +from layerlens import Atlas # Construct sync client (API key from env or inline) client = Atlas() @@ -63,7 +62,7 @@ if evaluation.is_success: ```python import asyncio -from atlas import AsyncAtlas +from layerlens import AsyncAtlas async def main(): @@ -104,11 +103,11 @@ Creates a new evaluation for the specified model and benchmark. #### Parameters -| Parameter | Type | Required | Description | -|-----------|------|----------|-------------| -| `model` | `Model` | Yes | The model to evaluate | -| `benchmark` | `Benchmark` | Yes | The benchmark to evaluate | -| `timeout` | `float \| httpx.Timeout \| None` | No | Override request timeout | +| Parameter | Type | Required | Description | +| ----------- | -------------------------------- | -------- | ------------------------- | +| `model` | `Model` | Yes | The model to evaluate | +| `benchmark` | `Benchmark` | Yes | The benchmark to evaluate | +| `timeout` | `float \| httpx.Timeout \| None` | No | Override request timeout | #### Returns @@ -120,27 +119,26 @@ Polls an evaluation until it completes (success, failure, or timeout) or the spe #### Parameters -| Parameter | Type | Required | Description | -|-----------|------|----------|-------------| -| `evaluation` | `Evaluation` | Yes | The evaluation object to monitor | -| `interval_seconds` | `int` | No | Polling interval in seconds (default: 30) | -| `timeout_seconds` | `int \| None` | No | Maximum time to wait in seconds (no limit if None) | +| Parameter | Type | Required | Description | +| ------------------ | ------------- | -------- | -------------------------------------------------- | +| `evaluation` | `Evaluation` | Yes | The evaluation object to monitor | +| `interval_seconds` | `int` | No | Polling interval in seconds (default: 30) | +| `timeout_seconds` | `int \| None` | No | Maximum time to wait in seconds (no limit if None) | #### Returns Returns the updated `Evaluation` object when completed, or `None` if polling fails. - ### `get_by_id(evaluation_id, timeout=None)` Retrieves an existing evaluation by its unique identifier. #### Parameters -| Parameter | Type | Required | Description | -|-----------|------|----------|-------------| -| `evaluation_id` | `str` | Yes | The unique evaluation identifier | -| `timeout` | `float \| httpx.Timeout \| None` | No | Override request timeout | +| Parameter | Type | Required | Description | +| --------------- | -------------------------------- | -------- | -------------------------------- | +| `evaluation_id` | `str` | Yes | The unique evaluation identifier | +| `timeout` | `float \| httpx.Timeout \| None` | No | Override request timeout | #### Returns @@ -149,7 +147,7 @@ Returns an `Evaluation` object if found, `None` if the evaluation does not exist #### Example ```python -from atlas import Atlas +from layerlens import Atlas client = Atlas() @@ -161,12 +159,12 @@ evaluation = client.evaluations.get_by_id(evaluation_id) #### Async Usage ```python -from atlas import AsyncAtlas +from layerlens import AsyncAtlas import asyncio async def get_evaluation(): client = AsyncAtlas() - + evaluation = await client.evaluations.get_by_id("eval_abc123xyz") if evaluation: print(f"Found evaluation: {evaluation.id}") @@ -185,32 +183,32 @@ Retrieves multiple evaluations with optional pagination support. #### Parameters -| Parameter | Type | Required | Description | -|-----------|------|----------|-------------| -| `page` | `int \| None` | No | Page number for pagination (1-based, defaults to 1) | -| `page_size` | `int \| None` | No | Number of evaluations per page (default: 100, max: 500) | -| `timeout` | `float \| httpx.Timeout \| None` | No | Override request timeout | +| Parameter | Type | Required | Description | +| ----------- | -------------------------------- | -------- | ------------------------------------------------------- | +| `page` | `int \| None` | No | Page number for pagination (1-based, defaults to 1) | +| `page_size` | `int \| None` | No | Number of evaluations per page (default: 100, max: 500) | +| `timeout` | `float \| httpx.Timeout \| None` | No | Override request timeout | #### Returns Returns an `EvaluationsResponse` object containing: + - `evaluations`: List of `Evaluation` objects - `pagination`: Pagination metadata with `page`, `page_size`, `total_pages`, and `total_count` Returns `None` if the request fails. - ### `get_results(page=None, page_size=None, timeout=None)` Fetches results for this evaluation with pagination support. This is a synchronous method. #### Parameters -| Parameter | Type | Required | Description | -|-----------|------|----------|-------------| -| `page` | `int \| None` | No | Page number for pagination (1-based, defaults to 1) | -| `page_size` | `int \| None` | No | Number of results per page (default: 100, max: 500) | -| `timeout` | `float \| httpx.Timeout \| None` | No | Override request timeout | +| Parameter | Type | Required | Description | +| ----------- | -------------------------------- | -------- | --------------------------------------------------- | +| `page` | `int \| None` | No | Page number for pagination (1-based, defaults to 1) | +| `page_size` | `int \| None` | No | Number of results per page (default: 100, max: 500) | +| `timeout` | `float \| httpx.Timeout \| None` | No | Override request timeout | #### Returns @@ -219,7 +217,7 @@ Returns a `ResultsResponse` object containing results and pagination metadata, o #### Example ```python -from atlas import Atlas +from layerlens import Atlas client = Atlas() @@ -236,9 +234,9 @@ Fetches all results for this evaluation by automatically handling pagination. Th #### Parameters -| Parameter | Type | Required | Description | -|-----------|------|----------|-------------| -| `timeout` | `float \| httpx.Timeout \| None` | No | Override request timeout | +| Parameter | Type | Required | Description | +| --------- | -------------------------------- | -------- | ------------------------ | +| `timeout` | `float \| httpx.Timeout \| None` | No | Override request timeout | #### Returns @@ -247,7 +245,7 @@ Returns a list of `Result` objects containing all results for the evaluation. #### Example ```python -from atlas import Atlas +from layerlens import Atlas client = Atlas() @@ -264,19 +262,17 @@ if evaluation: | `"success"` | Evaluation finished successfully | | `"failure"` | Evaluation failed due to an error | - - ### `get_results_async(page=None, page_size=None, timeout=None)` Asynchronously fetches results for this evaluation with pagination support. This requires an async client to be attached. #### Parameters -| Parameter | Type | Required | Description | -|-----------|------|----------|-------------| -| `page` | `int \| None` | No | Page number for pagination (1-based, defaults to 1) | -| `page_size` | `int \| None` | No | Number of results per page (default: 100, max: 500) | -| `timeout` | `float \| httpx.Timeout \| None` | No | Override request timeout | +| Parameter | Type | Required | Description | +| ----------- | -------------------------------- | -------- | --------------------------------------------------- | +| `page` | `int \| None` | No | Page number for pagination (1-based, defaults to 1) | +| `page_size` | `int \| None` | No | Number of results per page (default: 100, max: 500) | +| `timeout` | `float \| httpx.Timeout \| None` | No | Override request timeout | #### Returns @@ -285,35 +281,36 @@ Returns a `ResultsResponse` object containing results and pagination metadata, o #### Example ```python -from atlas import AsyncAtlas +from layerlens import AsyncAtlas import asyncio async def fetch_evaluation_results(): client = AsyncAtlas() - + # Get an evaluation evaluation = await client.evaluations.get_by_id("eval_12345") - + if evaluation: # Fetch first page of results asynchronously first_page_results = await evaluation.get_results_async(page=1, page_size=50) - + if first_page_results: return first_page_results - + return [] results = asyncio.run(fetch_evaluation_results()) ``` ### `get_all_results(timeout=None)` + Fetches all results for this evaluation by automatically handling pagination. This is a synchronous method. #### Parameters -| Parameter | Type | Required | Description | -|-----------|------|----------|-------------| -| `timeout` | `float \| httpx.Timeout \| None` | No | Override request timeout | +| Parameter | Type | Required | Description | +| --------- | -------------------------------- | -------- | ------------------------ | +| `timeout` | `float \| httpx.Timeout \| None` | No | Override request timeout | #### Returns @@ -322,7 +319,7 @@ Returns a list of `Result` objects containing all results for the evaluation. #### Example ```python -from atlas import Atlas +from layerlens import Atlas client = Atlas() @@ -332,20 +329,19 @@ evaluation = client.evaluations.get_by_id("eval_12345") if evaluation: # Fetch all results (handles pagination automatically) all_results = evaluation.get_all_results() - + print(f"Retrieved {len(all_results)} total results") ``` - ### `get_all_results_async(timeout=None)` Asynchronously fetches all results for this evaluation by automatically handling pagination. #### Parameters -| Parameter | Type | Required | Description | -|-----------|------|----------|-------------| -| `timeout` | `float \| httpx.Timeout \| None` | No | Override request timeout | +| Parameter | Type | Required | Description | +| --------- | -------------------------------- | -------- | ------------------------ | +| `timeout` | `float \| httpx.Timeout \| None` | No | Override request timeout | #### Returns @@ -354,59 +350,57 @@ Returns a list of `Result` objects containing all results for the evaluation. #### Example ```python -from atlas import AsyncAtlas +from layerlens import AsyncAtlas import asyncio async def fetch_all_evaluation_results(): client = AsyncAtlas() - + # Get an evaluation evaluation = await client.evaluations.get_by_id("eval_12345") - + if evaluation: # Fetch all results asynchronously (handles pagination automatically) all_results = await evaluation.get_all_results_async() - + print(f"Retrieved {len(all_results)} total results") - + return all_results - + return None results = asyncio.run(fetch_all_evaluation_results()) ``` - ## Response Objects The `create`, `get_by_id` and `get_many` method returns an `Evaluation` objects with the following properties: ### Evaluation Object Properties -| Property | Type | Description | -|----------|------|-------------| -| `id` | `str` | Unique evaluation identifier | -| `status` | `EvaluationStatus` | Current evaluation status (enum) | -| `submitted_at` | `int` | Unix timestamp when evaluation was submitted | -| `finished_at` | `int` | Unix timestamp when evaluation finished | -| `model_id` | `str` | ID of the model used in the evaluation | -| `benchmark_id` | `str` | ID of the benchmark used (aliased as "dataset_id" in API) | -| `average_duration` | `int` | Average response time in milliseconds | -| `accuracy` | `float` | Overall accuracy score (0.0 to 1.0) | - +| Property | Type | Description | +| ------------------ | ------------------ | --------------------------------------------------------- | +| `id` | `str` | Unique evaluation identifier | +| `status` | `EvaluationStatus` | Current evaluation status (enum) | +| `submitted_at` | `int` | Unix timestamp when evaluation was submitted | +| `finished_at` | `int` | Unix timestamp when evaluation finished | +| `model_id` | `str` | ID of the model used in the evaluation | +| `benchmark_id` | `str` | ID of the benchmark used (aliased as "dataset_id" in API) | +| `average_duration` | `int` | Average response time in milliseconds | +| `accuracy` | `float` | Overall accuracy score (0.0 to 1.0) | #### Evaluation Status The `status` field is an `EvaluationStatus` enum with the following values: -| Status | Description | -|--------|-------------| -| `"pending"` | Evaluation queued but not yet started | -| `"in-progress"` | Evaluation currently in progress | -| `"paused"` | Evaluation has been paused | -| `"success"` | Evaluation finished successfully | -| `"failure"` | Evaluation failed due to an error | - +| Status | Description | +| --------------- | ------------------------------------- | +| `"pending"` | Evaluation queued but not yet started | +| `"in-progress"` | Evaluation currently in progress | +| `"paused"` | Evaluation has been paused | +| `"success"` | Evaluation finished successfully | +| `"failure"` | Evaluation failed due to an error | ## Next Steps + - Explore [code examples](../examples/retrieving-results.md) for common analysis patterns diff --git a/docs/api-reference/models-benchmarks.md b/docs/api-reference/models-benchmarks.md index ba6a433..eb45fe3 100644 --- a/docs/api-reference/models-benchmarks.md +++ b/docs/api-reference/models-benchmarks.md @@ -11,6 +11,7 @@ Running evaluations on the atlas platform require a model and benchmark to be se ### Finding Available Models and Benchmarks #### 1. Using the Atlas Dashboard + The most reliable way to find available models and benchmarks: 1. Log into your Atlas dashboard. @@ -20,7 +21,7 @@ The most reliable way to find available models and benchmarks: #### 2. Using the python sdk ```python -from atlas import Atlas +from layerlens import Atlas # Construct sync client (API key from env or inline) client = Atlas() @@ -32,7 +33,6 @@ models = client.models.get() benchmarks = client.benchmarks.get() ``` - ## Models ### `get(type=None, name=None, companies=None, regions=None, licenses=None, timeout=None)` @@ -41,33 +41,29 @@ Retrieves a list of available models with optional filtering parameters. Both th #### Parameters -| Parameter | Type | Required | Description | -|-----------|------|----------|-------------| -| `type` | `Literal["custom", "public"] \| None` | No | Filter by model type. If `None`, returns both custom and public models | -| `name` | `str \| None` | No | Filter models by name (partial match search) | -| `companies` | `List[str] \| None` | No | Filter by model companies/providers | -| `regions` | `List[str] \| None` | No | Filter by supported regions | -| `licenses` | `List[str] \| None` | No | Filter by license types | -| `timeout` | `float \| httpx.Timeout \| None` | No | Override request timeout | +| Parameter | Type | Required | Description | +| ----------- | ------------------------------------- | -------- | ---------------------------------------------------------------------- | +| `type` | `Literal["custom", "public"] \| None` | No | Filter by model type. If `None`, returns both custom and public models | +| `name` | `str \| None` | No | Filter models by name (partial match search) | +| `companies` | `List[str] \| None` | No | Filter by model companies/providers | +| `regions` | `List[str] \| None` | No | Filter by supported regions | +| `licenses` | `List[str] \| None` | No | Filter by license types | +| `timeout` | `float \| httpx.Timeout \| None` | No | Override request timeout | #### Returns Returns an `Optional[List[Model]]` - a list of `Model` objects that match the filter criteria. Returns an empty list `[]` if no models match the criteria, or `None` if there's an error. - #### Model Object Properties Each `Model` object in the returned list contains: -| Property | Type | Description | -|----------|------|-------------| -| `id` | `str` | Unique model identifier for use in evaluations | -| `name` | `str` | Human-readable model name | -| `key` | `str` | Unique model key/identifier that is similar to the name | -| `description` | `str` | Text description of the model | - - - +| Property | Type | Description | +| ------------- | ----- | ------------------------------------------------------- | +| `id` | `str` | Unique model identifier for use in evaluations | +| `name` | `str` | Human-readable model name | +| `key` | `str` | Unique model key/identifier that is similar to the name | +| `description` | `str` | Text description of the model | ## Benchmarks @@ -77,11 +73,11 @@ Retrieves a list of available benchmarks with optional filtering parameters. Bot #### Parameters -| Parameter | Type | Required | Description | -|-----------|------|----------|-------------| -| `type` | `Literal["custom", "public"] \| None` | No | Filter by benchmark type. If `None`, returns both custom and public benchmarks | -| `name` | `str \| None` | No | Filter benchmarks by name (partial match search) | -| `timeout` | `float \| httpx.Timeout \| None` | No | Override request timeout | +| Parameter | Type | Required | Description | +| --------- | ------------------------------------- | -------- | ------------------------------------------------------------------------------ | +| `type` | `Literal["custom", "public"] \| None` | No | Filter by benchmark type. If `None`, returns both custom and public benchmarks | +| `name` | `str \| None` | No | Filter benchmarks by name (partial match search) | +| `timeout` | `float \| httpx.Timeout \| None` | No | Override request timeout | #### Returns @@ -89,13 +85,12 @@ Returns a `List[Benchmark]` containing available benchmarks that match the filte Returns `Optional[List[Benchmark]]` - a list of `Benchmark` objects that match the filter criteria. Returns an empty list `[]` if no benchmarks match the criteria, or `None` if there's an error. - #### Benchmark Object Properties Each `Benchmark` object in the returned list contains: -| Property | Type | Description | -|----------|------|-------------| -| `id` | `str` | Unique benchmark identifier for use in evaluations | -| `key` | `str` | Unique benchmark key/identifier that is similar to the name | -| `name` | `str` | Human-readable benchmark name | \ No newline at end of file +| Property | Type | Description | +| -------- | ----- | ----------------------------------------------------------- | +| `id` | `str` | Unique benchmark identifier for use in evaluations | +| `key` | `str` | Unique benchmark key/identifier that is similar to the name | +| `name` | `str` | Human-readable benchmark name | diff --git a/docs/api-reference/results.md b/docs/api-reference/results.md index 59840fc..33ed755 100644 --- a/docs/api-reference/results.md +++ b/docs/api-reference/results.md @@ -2,7 +2,6 @@ The `results` resource allows you to retrieve detailed results from completed or partially completed evaluations. This provides granular insight into how your model performed on individual test cases. - ## Overview Results contain detailed information about each test case in an evaluation. @@ -17,10 +16,10 @@ Retrieves all results for a specific evaluation by automatically iterating throu #### Parameters -| Parameter | Type | Required | Description | -|-----------|------|----------|-------------| -| `evaluation` | `Evaluation` | Yes | The evaluation object to get results for | -| `timeout` | `float \| httpx.Timeout \| None` | No | Override request timeout | +| Parameter | Type | Required | Description | +| ------------ | -------------------------------- | -------- | ---------------------------------------- | +| `evaluation` | `Evaluation` | Yes | The evaluation object to get results for | +| `timeout` | `float \| httpx.Timeout \| None` | No | Override request timeout | #### Returns @@ -29,7 +28,7 @@ Returns a `List[Result]` containing all result objects across all pages. Returns #### Example ```python -from atlas import Atlas +from layerlens import Atlas client = Atlas() @@ -40,29 +39,29 @@ if not evaluation: else: # Get all results at once all_results = client.results.get_all(evaluation=evaluation) - + print(f"Retrieved {len(all_results)} total results") ``` #### Async Usage ```python -from atlas import AsyncAtlas +from layerlens import AsyncAtlas import asyncio async def get_all_results(): client = AsyncAtlas() - + # Get evaluation first evaluation = await client.evaluations.get_by_id("eval_12345") if not evaluation: print("Evaluation not found") return - + # Get all results asynchronously all_results = await client.results.get_all(evaluation=evaluation) print(f"Retrieved {len(all_results)} total results asynchronously") - + return all_results # Run the async fetching of results @@ -75,10 +74,10 @@ Retrieves all results for a specific evaluation by evaluation ID, automatically #### Parameters -| Parameter | Type | Required | Description | -|-----------|------|----------|-------------| -| `evaluation_id` | `str` | Yes | The evaluation identifier to get results for | -| `timeout` | `float \| httpx.Timeout \| None` | No | Override request timeout | +| Parameter | Type | Required | Description | +| --------------- | -------------------------------- | -------- | -------------------------------------------- | +| `evaluation_id` | `str` | Yes | The evaluation identifier to get results for | +| `timeout` | `float \| httpx.Timeout \| None` | No | Override request timeout | #### Returns @@ -87,7 +86,7 @@ Returns a `List[Result]` containing all result objects across all pages. Returns #### Example ```python -from atlas import Atlas +from layerlens import Atlas client = Atlas() @@ -98,18 +97,18 @@ all_results = client.results.get_all_by_id(evaluation_id="eval_12345") #### Async Usage ```python -from atlas import AsyncAtlas +from layerlens import AsyncAtlas import asyncio async def get_all_results(): client = AsyncAtlas() - + # Get all results asynchronously all_results = await client.results.get_all_by_id(evaluation_id="eval_12345") - + if all_results: print(f"Retrieved {len(all_results)} total results") - + else: print("No results found") @@ -123,18 +122,19 @@ Retrieves detailed results for a specific evaluation with optional pagination su #### Parameters -| Parameter | Type | Required | Description | -|-----------|------|----------|-------------| -| `evaluation_id` | `str` | Yes | The evaluation identifier to get results for | -| `page` | `int \| None` | No | Page number for pagination. If not provided, returns first page is returned by default | -| `page_size` | `int \| None` | No | Number of results per page (default: 100). Maximum allowed page_size is 500 | -| `timeout` | `float \| httpx.Timeout \| None` | No | Override request timeout | +| Parameter | Type | Required | Description | +| --------------- | -------------------------------- | -------- | -------------------------------------------------------------------------------------- | +| `evaluation_id` | `str` | Yes | The evaluation identifier to get results for | +| `page` | `int \| None` | No | Page number for pagination. If not provided, returns first page is returned by default | +| `page_size` | `int \| None` | No | Number of results per page (default: 100). Maximum allowed page_size is 500 | +| `timeout` | `float \| httpx.Timeout \| None` | No | Override request timeout | #### Returns Returns a `ResultsResponse` object containing results, evaluation metadata, and pagination information if successful, `None` if no results are found or the evaluation doesn't exist. The `ResultsResponse` object includes: + - `results`: List of `Result` objects for the current page - `evaluation_id`: The evaluation ID - `pagination`: Pagination metadata (total_count, page_size, total_pages) @@ -142,8 +142,9 @@ The `ResultsResponse` object includes: #### Examples ##### Basic Usage (All Results) + ```python -from atlas import Atlas +from layerlens import Atlas client = Atlas() @@ -156,7 +157,7 @@ if results_data: print(f"Total available: {results_data.pagination.total_count}") print(f"Page size: {results_data.pagination.page_size}") print(f"Total pages: {results_data.pagination.total_pages}") - + # Access individual results for i, result in enumerate(results_data.results[:3]): # Show first 3 print(f"\nResult {i+1}:") @@ -168,6 +169,7 @@ else: ``` ##### Paginated Access + ```python # Get specific page with custom page size results_data = client.results.get( @@ -179,7 +181,7 @@ results_data = client.results.get( if results_data: print(f"Page 2 of {results_data.pagination.total_pages}") print(f"Showing {len(results_data.results)} of {results_data.pagination.total_count} total results") - + # Process current page for result in results_data.results: # Process each result @@ -187,6 +189,7 @@ if results_data: ``` ##### Iterating Through All Pages + ```python # Process all results by iterating through pages evaluation_id = "eval_12345" @@ -199,17 +202,17 @@ while True: page=page, page_size=page_size ) - + if not results_data or not results_data.results: break - + print(f"Processing page {page}/{results_data.pagination.total_pages}") - + # Process current page results for result in results_data.results: # Your processing logic here pass - + # Move to next page if page >= results_data.pagination.total_pages: break @@ -218,7 +221,6 @@ while True: print("Finished processing all results") ``` - ## Pagination Information The `pagination` object in the response provides detailed pagination metadata: @@ -228,18 +230,18 @@ results_data = client.results.get(evaluation_id="eval_12345", page=1, page_size= if results_data: pagination = results_data.pagination - + print(f"Current page info:") print(f" Total results available: {pagination.total_count}") print(f" Results per page: {pagination.page_size}") print(f" Total pages: {pagination.total_pages}") print(f" Results on current page: {len(results_data.results)}") - + # Calculate current page number (if needed) # Page number isn't stored in pagination object, so track it yourself current_page = 1 # You would track this in your code print(f" Current page: {current_page}") - + # Check if there are more pages has_more_pages = current_page < pagination.total_pages print(f" Has more pages: {has_more_pages}") @@ -247,11 +249,11 @@ if results_data: ### Pagination Properties -| Property | Type | Description | -|----------|------|-------------| -| `total_count` | `int` | Total number of results available across all pages | -| `page_size` | `int` | Number of results per page (as requested or default) | -| `total_pages` | `int` | Total number of pages available | +| Property | Type | Description | +| ------------- | ----- | ---------------------------------------------------- | +| `total_count` | `int` | Total number of results available across all pages | +| `page_size` | `int` | Number of results per page (as requested or default) | +| `total_pages` | `int` | Total number of pages available | ## Result Object @@ -259,21 +261,21 @@ Each `Result` object contains the following properties: ### Core Properties -| Property | Type | Description | -|----------|------|-------------| -| `subset` | `str` | The benchmark subset or category this test case belongs to | -| `prompt` | `str` | The input prompt given to the model | -| `result` | `str` | The model's response/output | -| `truth` | `str` | The expected or correct answer | -| `score` | `float` | Individual score for this test case (typically 0.0 to 1.0) | -| `duration` | `timedelta` | Time taken for the model to respond | -| `metrics` | `Dict[str, float]` | Additional metrics specific to this test case | +| Property | Type | Description | +| ---------- | ------------------ | ---------------------------------------------------------- | +| `subset` | `str` | The benchmark subset or category this test case belongs to | +| `prompt` | `str` | The input prompt given to the model | +| `result` | `str` | The model's response/output | +| `truth` | `str` | The expected or correct answer | +| `score` | `float` | Individual score for this test case (typically 0.0 to 1.0) | +| `duration` | `timedelta` | Time taken for the model to respond | +| `metrics` | `Dict[str, float]` | Additional metrics specific to this test case | ### Understanding Properties - **`subset`**: Groups related test cases (e.g., "elementary_mathematics", "world_history") - **`prompt`**: The exact input sent to the model -- **`result`**: The model's actual response +- **`result`**: The model's actual response - **`truth`**: The ground truth or expected answer for comparison - **`score`**: Individual test case score, usually binary (0.0 or 1.0) for correctness - **`duration`**: Response latency as a Python `timedelta` object @@ -282,21 +284,22 @@ Each `Result` object contains the following properties: ## Working with Large Result Sets ### Fetching results async + Results can contain thousands of individual test cases. Consider using the async client to load results asynchronously: ```python import asyncio -from atlas import AsyncAtlas +from layerlens import AsyncAtlas async def fetch_results_async(): async_client = AsyncAtlas() - + # Get evaluation first evaluation = await async_client.evaluations.get_by_id("eval_12345") if not evaluation: print("Evaluation not found") return None - + # async results fetching all pages of results results = await async_client.results.get_all(evaluation=evaluation) if results: @@ -309,4 +312,5 @@ asyncio.run(fetch_results_async()) ``` ## Next Steps -- Explore [code examples](../examples/retrieving-results.md) for common analysis patterns \ No newline at end of file + +- Explore [code examples](../examples/retrieving-results.md) for common analysis patterns diff --git a/docs/examples/creating-evaluations.md b/docs/examples/creating-evaluations.md index 9364602..5370bc6 100644 --- a/docs/examples/creating-evaluations.md +++ b/docs/examples/creating-evaluations.md @@ -1,6 +1,6 @@ # Creating Evaluations -Examples for creating evaluations on the Atlas platform using the Layerlens python sdk. +Examples for creating evaluations on the Atlas platform using the Layerlens python sdk. > Before running the below examples ensure the model and benchmark being run are present on your organiztion. @@ -11,7 +11,7 @@ Examples for creating evaluations on the Atlas platform using the Layerlens pyth Below is an example showing how to trigger an evaluation, waiting for it to complete and finally fetching the evaluations results. ```python -from atlas import Atlas +from layerlens import Atlas # Construct sync client (API key from env or inline) client = Atlas() @@ -58,7 +58,7 @@ if evaluation.is_success: ```python import asyncio -from atlas import AsyncAtlas +from layerlens import AsyncAtlas async def main(): @@ -89,29 +89,28 @@ if __name__ == "__main__": asyncio.run(main()) ``` - ## Error Handling ```python -from atlas import Atlas -import atlas +from layerlens import Atlas +import layerlens client = Atlas() try: models = client.models.get() benchmarks = client.benchmarks.get() - + evaluation = client.evaluations.create( model=models[0], benchmark=benchmarks[0] ) - -except atlas.AuthenticationError: + +except layerlens.AuthenticationError: print("Check your API key") -except atlas.NotFoundError: +except layerlens.NotFoundError: print("Model or benchmark not found") -except atlas.APIError as e: +except layerlens.APIError as e: print(f"API error: {e}") ``` @@ -120,7 +119,7 @@ except atlas.APIError as e: ```python import asyncio -from atlas import AsyncAtlas +from layerlens import AsyncAtlas async def create_and_run_evaluation(client, model, benchmark, eval_number): @@ -187,7 +186,7 @@ if __name__ == "__main__": import asyncio -from atlas import AsyncAtlas +from layerlens import AsyncAtlas async def fetch_evaluation_results(client, evaluation_id): @@ -228,4 +227,4 @@ async def main(): if __name__ == "__main__": asyncio.run(main()) -``` \ No newline at end of file +``` diff --git a/docs/security/api-key-management.md b/docs/security/api-key-management.md index 8a27d83..bf5242f 100644 --- a/docs/security/api-key-management.md +++ b/docs/security/api-key-management.md @@ -32,7 +32,7 @@ API keys are sensitive credentials that provide access to your Atlas organizatio ```python import os -from atlas import Atlas +from layerlens import Atlas # Secure: Load from environment variables client = Atlas( @@ -80,7 +80,7 @@ import os # Load environment variables from .env file load_dotenv() -from atlas import Atlas +from layerlens import Atlas # Now environment variables are available client = Atlas() diff --git a/docs/security/environment-variables.md b/docs/security/environment-variables.md index 1236903..2830e42 100644 --- a/docs/security/environment-variables.md +++ b/docs/security/environment-variables.md @@ -121,7 +121,7 @@ load_dotenv('.env.development') load_dotenv('/path/to/your/.env') # Verify variables are loaded -from atlas import Atlas +from layerlens import Atlas try: client = Atlas() # Will use environment variables diff --git a/docs/security/rate-limiting.md b/docs/security/rate-limiting.md index bb4eab9..8761266 100644 --- a/docs/security/rate-limiting.md +++ b/docs/security/rate-limiting.md @@ -9,20 +9,20 @@ This guide covers how to handle rate limiting when using the Atlas Python SDK, i When you exceed rate limits, the API returns a `429 Too Many Requests` status: ```python -import atlas -from atlas import Atlas +import layerlens +from layerlens import Atlas try: client = Atlas() - + # Making too many requests quickly for i in range(100): evaluation = client.evaluations.create( - model="gpt-4", + model="gpt-4", benchmark="mmlu" ) - -except atlas.RateLimitError as e: + +except layerlens.RateLimitError as e: print(f"Rate limited: {e}") print(f"Status code: {e.status_code}") # 429 print(f"Response headers: {dict(e.response.headers)}") diff --git a/src/README.md b/src/README.md index ece6659..2ae26e0 100644 --- a/src/README.md +++ b/src/README.md @@ -13,7 +13,7 @@ pip install atlas --index-url https://sdk.layerlens.ai ```python import os -from atlas import Atlas +from layerlens import Atlas client = Atlas( # This is the default and can be omitted @@ -30,15 +30,15 @@ print(response.output_text) ## Handling errors -When the library is unable to connect to the API (for example, due to network connection problems or a timeout), a subclass of `atlas.APIConnectionError` is raised. +When the library is unable to connect to the API (for example, due to network connection problems or a timeout), a subclass of `layerlens.APIConnectionError` is raised. -When the API returns a non-success status code (that is, 4xx or 5xx response), a subclass of `atlas.APIStatusError` is raised, containing `status_code` and `response` properties. +When the API returns a non-success status code (that is, 4xx or 5xx response), a subclass of `layerlens.APIStatusError` is raised, containing `status_code` and `response` properties. -All errors inherit from `atlas.APIError`. +All errors inherit from `layerlens.APIError`. ```python -import atlas -from atlas import Atlas +import layerlens +from layerlens import Atlas client = Atlas() @@ -47,12 +47,12 @@ try: model="random-model", benchmark="random-benchmark", ) -except atlas.APIConnectionError as e: +except layerlens.APIConnectionError as e: print("The server could not be reached") print(e.__cause__) # an underlying Exception, likely raised within httpx. -except atlas.RateLimitError as e: +except layerlens.RateLimitError as e: print("A 429 status code was received; we should back off a bit.") -except atlas.APIStatusError as e: +except layerlens.APIStatusError as e: print("Another non-200-range status code was received") print(e.status_code) print(e.response) @@ -76,7 +76,7 @@ Error codes are as follows: By default requests time out after 10 minutes. You can configure this with a `timeout` option, which accepts a float or an [`httpx.Timeout`](https://www.python-httpx.org/advanced/timeouts/#fine-tuning-the-configuration) object: ```python -from atlas import Atlas +from layerlens import Atlas # Configure the default for all requests: client = Atlas( @@ -109,8 +109,8 @@ If you've upgraded to the latest version but aren't seeing any new features you You can determine the version that is being used at runtime with: ```py -import atlas -print(atlas.__version__) +import layerlens +print(layerlens.__version__) ``` ## Requirements From a71f369fec97b87af8040d7c0f132ddc07760cb8 Mon Sep 17 00:00:00 2001 From: Robert Leonard Date: Wed, 27 Aug 2025 10:39:20 -0400 Subject: [PATCH 3/3] Update docs with get_by_id and get_by_key --- docs/README.md | 38 +++++++--------- docs/api-reference/models-benchmarks.md | 60 +++++++++++++++++++++++++ docs/examples/creating-evaluations.md | 38 ++++++++-------- 3 files changed, 95 insertions(+), 41 deletions(-) diff --git a/docs/README.md b/docs/README.md index 9bfbc6e..43a5907 100644 --- a/docs/README.md +++ b/docs/README.md @@ -38,21 +38,17 @@ from layerlens import Atlas # Construct sync client client = Atlas() - # --- Models replace with the model name you want to run - models = client.models.get(type="public", name="gpt-4o") + # --- Models replace with the model key you want to run + model = client.models.get_by_key("openai/gpt-4o") - if not models: - print("gpt-4o not found on organization, exiting") - - model = models[0] + if not model: + print("Model not found") # --- Benchmarks replace with the benchmark name you want to run - benchmarks = client.benchmarks.get(type="public", name="simpleQA") - - if not benchmarks: - print("SimpleQA benchmark not found on organization, exiting") + benchmark = client.benchmarks.get_by_key("aime2024") - benchmark = benchmarks[0] + if not benchmark: + print("benchmark not found") # --- Create evaluation evaluation = client.evaluations.create( @@ -71,24 +67,20 @@ async def run_evaluation_async(): # Construct async client client = AsyncAtlas() - # --- Models replace with the model name you want to run - models = await client.models.get(type="public",name="gpt-4o") - print(f"Models found: {models}") + # --- Model to use + model = await client.models.get_by_key("openai/gpt-4o") - if not models: - print("gpt-4o not found, exiting") + if not model: + print("Model not found") return - model = models[0] - # --- Benchmarks replace with the benchmark name you want to run - benchmarks = await client.benchmarks.get(type="public", name="simpleQA") + # --- Benchmark to use + benchmark = await client.benchmarks.get_by_key("aime2024") - if not benchmarks: - print("SimpleQA benchmark not found, exiting") + if not benchmark: + print("benchmark not found") return - benchmark = benchmarks[0] - # --- Create evaluation evaluation = await client.evaluations.create( model=model, diff --git a/docs/api-reference/models-benchmarks.md b/docs/api-reference/models-benchmarks.md index eb45fe3..6844f12 100644 --- a/docs/api-reference/models-benchmarks.md +++ b/docs/api-reference/models-benchmarks.md @@ -65,6 +65,36 @@ Each `Model` object in the returned list contains: | `key` | `str` | Unique model key/identifier that is similar to the name | | `description` | `str` | Text description of the model | +### `get_by_id(id, timeout=None)` + +Retrieves a specific model by its unique identifier. Both the `Atlas` and `AsyncAtlas` clients have this method. + +#### Parameters + +| Parameter | Type | Required | Description | +| --------- | -------------------------------- | -------- | ---------------------------- | +| `id` | `str` | Yes | Unique model identifier | +| `timeout` | `float \| httpx.Timeout \| None` | No | Override request timeout | + +#### Returns + +Returns an `Optional[Model]` - a single `Model` object if found, or `None` if the model doesn't exist or there's an error. + +### `get_by_key(key, timeout=None)` + +Retrieves a specific model by its unique key. Both the `Atlas` and `AsyncAtlas` clients have this method. + +#### Parameters + +| Parameter | Type | Required | Description | +| --------- | -------------------------------- | -------- | ---------------------------- | +| `key` | `str` | Yes | Unique model key identifier | +| `timeout` | `float \| httpx.Timeout \| None` | No | Override request timeout | + +#### Returns + +Returns an `Optional[Model]` - a single `Model` object if found, or `None` if the model doesn't exist or there's an error. + ## Benchmarks ### `get(type=None, name=None, timeout=None)` @@ -94,3 +124,33 @@ Each `Benchmark` object in the returned list contains: | `id` | `str` | Unique benchmark identifier for use in evaluations | | `key` | `str` | Unique benchmark key/identifier that is similar to the name | | `name` | `str` | Human-readable benchmark name | + +### `get_by_id(id, timeout=None)` + +Retrieves a specific benchmark by its unique identifier. Both the `Atlas` and `AsyncAtlas` clients have this method. + +#### Parameters + +| Parameter | Type | Required | Description | +| --------- | -------------------------------- | -------- | ------------------------------- | +| `id` | `str` | Yes | Unique benchmark identifier | +| `timeout` | `float \| httpx.Timeout \| None` | No | Override request timeout | + +#### Returns + +Returns an `Optional[Benchmark]` - a single `Benchmark` object if found, or `None` if the benchmark doesn't exist or there's an error. + +### `get_by_key(key, timeout=None)` + +Retrieves a specific benchmark by its unique key. Both the `Atlas` and `AsyncAtlas` clients have this method. + +#### Parameters + +| Parameter | Type | Required | Description | +| --------- | -------------------------------- | -------- | ---------------------------------- | +| `key` | `str` | Yes | Unique benchmark key identifier | +| `timeout` | `float \| httpx.Timeout \| None` | No | Override request timeout | + +#### Returns + +Returns an `Optional[Benchmark]` - a single `Benchmark` object if found, or `None` if the benchmark doesn't exist or there's an error. diff --git a/docs/examples/creating-evaluations.md b/docs/examples/creating-evaluations.md index 5370bc6..8b4c9e6 100644 --- a/docs/examples/creating-evaluations.md +++ b/docs/examples/creating-evaluations.md @@ -16,21 +16,17 @@ from layerlens import Atlas # Construct sync client (API key from env or inline) client = Atlas() -# --- Models -models = client.models.get(type="public", name="gpt-4o") +# --- Models replace with the model key you want to run +model = client.models.get_by_key("openai/gpt-4o") -if not models: - print("gpt-4o not found") +if not model: + print("Model not found") -model = models[0] +# --- Benchmarks replace with the benchmark name you want to run +benchmark = client.benchmarks.get_by_key("aime2024") -# --- Benchmarks -benchmarks = client.benchmarks.get(type="public", name="simpleQA") - -if not benchmarks: - print("SimpleQA benchmark not found, exiting") - -benchmark = benchmarks[0] +if not benchmark: + print("benchmark not found") # --- Create evaluation evaluation = client.evaluations.create( @@ -65,13 +61,19 @@ async def main(): # Construct async client client = AsyncAtlas() - # --- Models - models = await client.models.get(type="public", name="gpt-4o") - model = models[0] + # --- Model to use + model = await client.models.get_by_key("openai/gpt-4o") - # --- Benchmarks - benchmarks = await client.benchmarks.get(type="public", name="simpleQA") - benchmark = benchmarks[0] + if not model: + print("Model not found") + return + + # --- Benchmark to use + benchmark = await client.benchmarks.get_by_key("aime2024") + + if not benchmark: + print("benchmark not found") + return # --- Create evaluation