diff --git a/.github/workflows/check-format.yaml b/.github/workflows/check-format.yaml new file mode 100644 index 0000000..be3cd04 --- /dev/null +++ b/.github/workflows/check-format.yaml @@ -0,0 +1,37 @@ +name: Check Format + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + format: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v5 + + - name: Install Rye + uses: eifinger/setup-rye@v4 + with: + version: latest + + - name: Sync Rye environment + run: rye sync + + - name: Run format script and check output + run: | + set -e # exit on any command failure + OUTPUT=$(./scripts/format 2>&1) + echo "$OUTPUT" + + # Fail only if "reformatted" exists + if echo "$OUTPUT" | grep -q "reformatted"; then + echo "Some files were reformatted. Please run './scripts/format' locally and commit changes." + exit 1 + fi diff --git a/.github/workflows/check-lint.yaml b/.github/workflows/check-lint.yaml new file mode 100644 index 0000000..55ce38e --- /dev/null +++ b/.github/workflows/check-lint.yaml @@ -0,0 +1,28 @@ +name: Check Lint + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + format: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v5 + + - name: Install Rye + uses: eifinger/setup-rye@v4 + with: + version: latest + + - name: Sync Rye environment + run: rye sync + + - name: Check lint + run: ./scripts/lint diff --git a/.gitignore b/.gitignore index efd18f1..10ac96c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ node_modules/ +venv/ .prism.log _dev diff --git a/examples/async_run_evaluations.py b/examples/async_run_evaluations.py index d71acff..b850d7b 100644 --- a/examples/async_run_evaluations.py +++ b/examples/async_run_evaluations.py @@ -9,7 +9,7 @@ async def create_and_run_evaluation(client, model, benchmark, eval_number): """Create and run a single evaluation, tracking progress.""" try: print(f"Starting evaluation #{eval_number}...") - + # Create evaluation evaluation = await client.evaluations.create(model=model, benchmark=benchmark) print(f"✓ Created evaluation #{eval_number}: {evaluation.id}, status={evaluation.status}") @@ -18,7 +18,7 @@ async def create_and_run_evaluation(client, model, benchmark, eval_number): evaluation = await client.evaluations.wait_for_completion( evaluation, interval_seconds=10, - timeout_seconds=600 # 10 minutes + timeout_seconds=600, # 10 minutes ) print(f"✓ Evaluation #{eval_number} ({evaluation.id}) finished with status={evaluation.status}") @@ -30,7 +30,7 @@ async def create_and_run_evaluation(client, model, benchmark, eval_number): else: print(f"✗ Evaluation #{eval_number} did not succeed") return eval_number, evaluation.id, 0, False - + except Exception as e: print(f"✗ Error in evaluation #{eval_number}: {e}") return eval_number, None, 0, False @@ -51,7 +51,7 @@ async def main(): # Use first model and benchmark for all evaluations target_model = models[0] target_benchmark = benchmarks[0] - + print(f"Using model: {target_model}") print(f"Using benchmark: {target_benchmark}") print("=" * 80) @@ -59,21 +59,18 @@ async def main(): # Create 3 evaluation tasks num_evaluations = 3 print(f"Starting {num_evaluations} evaluations in parallel...") - - tasks = [ - create_and_run_evaluation(client, target_model, target_benchmark, i + 1) - for i in range(num_evaluations) - ] + + tasks = [create_and_run_evaluation(client, target_model, target_benchmark, i + 1) for i in range(num_evaluations)] # Execute all evaluations concurrently results = await asyncio.gather(*tasks, return_exceptions=True) - + # Summary print("=" * 80) print("SUMMARY:") successful = 0 total_results = 0 - + for result in results: if isinstance(result, Exception): print(f"Exception occurred: {result}") @@ -85,7 +82,7 @@ async def main(): print(f"Evaluation #{eval_num} ({eval_id}): SUCCESS - {result_count} results") else: print(f"Evaluation #{eval_num} ({eval_id}): FAILED") - + print(f"\nOverall: {successful}/{num_evaluations} evaluations succeeded") print(f"Total results collected: {total_results}") diff --git a/examples/fetch_results_async.py b/examples/fetch_results_async.py index b434411..cc04232 100644 --- a/examples/fetch_results_async.py +++ b/examples/fetch_results_async.py @@ -11,13 +11,13 @@ async def fetch_evaluation_results(client, evaluation_id): print(f"Fetching evaluation {evaluation_id}...") evaluation = await client.evaluations.get_by_id(evaluation_id) print(f"Found evaluation {evaluation.id}, status={evaluation.status}") - + # Get all results for this evaluation results = await client.results.get_all(evaluation=evaluation) print(f"Loaded {len(results)} results for evaluation {evaluation_id}") print(f"Results for {evaluation_id}: {results}") print("-" * 80) - + return evaluation_id, results except Exception as e: print(f"Error fetching evaluation {evaluation_id}: {e}") @@ -30,23 +30,17 @@ async def main(): # List of evaluation IDs to fetch exmple - evaluation_ids = [ - "68a65a3de7ad047fb5d8e7d4", - "688a254c673f6b2835cc7278" - ] + evaluation_ids = ["68a65a3de7ad047fb5d8e7d4", "688a254c673f6b2835cc7278"] print(f"Starting async fetch for {len(evaluation_ids)} evaluations...") print("=" * 80) # Create tasks for concurrent execution - tasks = [ - fetch_evaluation_results(client, eval_id) - for eval_id in evaluation_ids - ] + tasks = [fetch_evaluation_results(client, eval_id) for eval_id in evaluation_ids] # Execute all tasks concurrently and print results as they complete results = await asyncio.gather(*tasks, return_exceptions=True) - + print("=" * 80) print("Summary:") successful = sum(1 for _, result in results if result is not None and not isinstance(result, Exception)) diff --git a/examples/get_benchmarks.py b/examples/get_benchmarks.py index bdbb9d1..6e5c6ef 100644 --- a/examples/get_benchmarks.py +++ b/examples/get_benchmarks.py @@ -21,5 +21,6 @@ async def main(): print(f"Found {len(benchmarks)} benchmarks with type {benchmark_type}") print(benchmarks) + if __name__ == "__main__": asyncio.run(main()) diff --git a/examples/get_evaluation.py b/examples/get_evaluation.py index 615e232..8af3940 100644 --- a/examples/get_evaluation.py +++ b/examples/get_evaluation.py @@ -15,5 +15,6 @@ async def main(): print(f"Found evaluation {evaluation.id}") print(evaluation) + if __name__ == "__main__": asyncio.run(main()) diff --git a/examples/get_models.py b/examples/get_models.py index 83061d0..6059f90 100644 --- a/examples/get_models.py +++ b/examples/get_models.py @@ -33,5 +33,6 @@ async def main(): print(f"Found {len(models)} models with type {model_type}") print(models) + if __name__ == "__main__": asyncio.run(main()) diff --git a/examples/paginated_results.py b/examples/paginated_results.py index 52e6cf9..d70defc 100644 --- a/examples/paginated_results.py +++ b/examples/paginated_results.py @@ -16,7 +16,7 @@ async def main(): # --- Benchmarks benchmarks = await client.benchmarks.get() print(f"Found {len(benchmarks)} benchmarks") - + # --- Create evaluation evaluation = await client.evaluations.create( model=models[0], @@ -37,58 +37,54 @@ async def main(): # --- Results with pagination if evaluation.is_success: print("Fetching all results with pagination...") - + all_results = [] page = 1 page_size = 50 - + while True: print(f"Fetching page {page} (page size: {page_size})...") - + # Get results for current page - results_data = await client.results.get_by_id( - evaluation_id=evaluation.id, - page=page, - page_size=page_size - ) - + results_data = await client.results.get_by_id(evaluation_id=evaluation.id, page=page, page_size=page_size) + if not results_data or not results_data.results: print("No more results to fetch") break - + # Add current page results to our collection all_results.extend(results_data.results) - + # Show progress if page == 1: total_count = results_data.pagination.total_count total_pages = results_data.pagination.total_pages print(f"Total results: {total_count:,}") print(f"Total pages: {total_pages}") - + print(f"Page {page}: Retrieved {len(results_data.results)} results") print(f"Running total: {len(all_results):,} results") - + # Check if we've reached the last page if page >= results_data.pagination.total_pages: print("Reached last page") break - + page += 1 - + # Summary of all results print(f"\n=== PAGINATION COMPLETE ===") print(f"Total results collected: {len(all_results):,}") - + if all_results: # Calculate some basic statistics correct_answers = sum(1 for r in all_results if r.score > 0.5) accuracy = correct_answers / len(all_results) avg_score = sum(r.score for r in all_results) / len(all_results) - + print(f"Overall accuracy: {accuracy:.1%} ({correct_answers:,}/{len(all_results):,})") print(f"Average score: {avg_score:.3f}") - + # Show a few example results print(f"\nFirst 3 results:") for i, result in enumerate(all_results[:3], 1): @@ -96,7 +92,7 @@ async def main(): print(f" Prompt: {result.prompt[:100]}...") print(f" Response: {result.result[:100]}...") print() - + else: print("Evaluation did not succeed, no results to show.") diff --git a/src/atlas/_client.py b/src/atlas/_client.py index 587fc7a..a1c2a6b 100644 --- a/src/atlas/_client.py +++ b/src/atlas/_client.py @@ -54,7 +54,7 @@ def __init__( if base_url is None: base_url = os.environ.get("LAYERLENS_ATLAS_BASE_URL") if base_url is None: - base_url = "https://8bg48mbhyi.execute-api.us-east-1.amazonaws.com/prod/api/v1" + base_url = "https://api.layerlens.ai/api/v1" super().__init__( base_url=base_url, @@ -196,7 +196,7 @@ def __init__( if base_url is None: base_url = os.environ.get("LAYERLENS_ATLAS_BASE_URL") if base_url is None: - base_url = "https://8bg48mbhyi.execute-api.us-east-1.amazonaws.com/prod/api/v1" + base_url = "https://api.layerlens.ai/api/v1" super().__init__(base_url=base_url, timeout=timeout) diff --git a/src/atlas/models/__init__.py b/src/atlas/models/__init__.py index f522371..e793879 100644 --- a/src/atlas/models/__init__.py +++ b/src/atlas/models/__init__.py @@ -6,6 +6,7 @@ BenchmarksResponse, EvaluationsResponse, OrganizationResponse, + CreateEvaluationsResponse, ) from .model import Model, CustomModel, PublicModel from .benchmark import Benchmark, CustomBenchmark, PublicBenchmark @@ -13,22 +14,23 @@ from .organization import Project, Organization __all__ = [ - "BenchmarksResponse", - "EvaluationsResponse", - "ModelsResponse", - "OrganizationResponse", - "ResultsResponse", "Benchmark", + "BenchmarksResponse", + "CreateEvaluationsResponse", "CustomBenchmark", - "PublicBenchmark", + "CustomModel", "Evaluation", "EvaluationStatus", - "Pagination", - "Result", - "ResultMetrics", + "EvaluationsResponse", "Model", - "CustomModel", - "PublicModel", + "ModelsResponse", "Organization", + "OrganizationResponse", + "Pagination", "Project", + "PublicBenchmark", + "PublicModel", + "Result", + "ResultMetrics", + "ResultsResponse", ] diff --git a/src/atlas/models/api.py b/src/atlas/models/api.py index bbb86ae..eba00db 100644 --- a/src/atlas/models/api.py +++ b/src/atlas/models/api.py @@ -19,10 +19,15 @@ class Data(BaseModel): data: Data -class EvaluationsResponse(BaseModel): +class CreateEvaluationsResponse(BaseModel): data: List[Evaluation] +class EvaluationsResponse(BaseModel): + evaluations: List[Evaluation] + pagination: Pagination + + class ModelsResponse(BaseModel): class Data(BaseModel): models: List[Model] diff --git a/src/atlas/resources/benchmarks/benchmarks.py b/src/atlas/resources/benchmarks/benchmarks.py index 33be623..d6239ab 100644 --- a/src/atlas/resources/benchmarks/benchmarks.py +++ b/src/atlas/resources/benchmarks/benchmarks.py @@ -16,7 +16,7 @@ def get( timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT, type: Literal["custom", "public"] | None = None, name: Optional[str] = None, - ) -> List[Benchmark] | None: + ) -> Optional[List[Benchmark]]: base_url = f"/organizations/{self._client.organization_id}/projects/{self._client.project_id}/benchmarks" def fetch(bench_type: str) -> BenchmarksResponse | None: @@ -61,7 +61,7 @@ async def get( timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT, type: Literal["custom", "public"] | None = None, name: Optional[str] = None, - ) -> List[Benchmark] | None: + ) -> Optional[List[Benchmark]]: base_url = f"/organizations/{self._client.organization_id}/projects/{self._client.project_id}/benchmarks" async def fetch(bench_type: str) -> Optional[BenchmarksResponse]: diff --git a/src/atlas/resources/evaluations/evaluations.py b/src/atlas/resources/evaluations/evaluations.py index c2664fe..c97162e 100644 --- a/src/atlas/resources/evaluations/evaluations.py +++ b/src/atlas/resources/evaluations/evaluations.py @@ -1,5 +1,6 @@ from __future__ import annotations +import math import time import asyncio from typing import Optional @@ -13,10 +14,15 @@ CustomModel, CustomBenchmark, EvaluationsResponse, + CreateEvaluationsResponse, ) from ..._resource import SyncAPIResource, AsyncAPIResource from ..._constants import DEFAULT_TIMEOUT +DEFAULT_PAGE = 1 +DEFAULT_PAGE_SIZE = 100 +MAX_PAGE_SIZE = 500 + class Evaluations(SyncAPIResource): def create( @@ -37,9 +43,9 @@ def create( } ], timeout=timeout, - cast_to=EvaluationsResponse, + cast_to=CreateEvaluationsResponse, ) - if isinstance(evaluations, EvaluationsResponse) and len(evaluations.data) > 0: + if isinstance(evaluations, CreateEvaluationsResponse) and len(evaluations.data) > 0: evaluation = evaluations.data[0] evaluation.attach_client(self._client) return evaluation @@ -69,6 +75,66 @@ def get_by_id( return evaluation return None + def get_many( + self, + *, + page: Optional[int] = None, + page_size: Optional[int] = None, + timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT, + ) -> Optional[EvaluationsResponse]: + """ + Get evaluations with optional pagination. + + Args: + page: Page number for pagination (1-based, defaults to 1 if not provided) + page_size: Number of evaluations per page (default: 100, optional) + timeout: Request timeout + + Returns: + EvaluationsResponse object or None + """ + params = { + "organizationID": self._client.organization_id, + "projectID": self._client.project_id, + } + + effective_page_size = min(max(page_size, 1), MAX_PAGE_SIZE) if page_size is not None else DEFAULT_PAGE_SIZE + effective_page = page if page is not None else DEFAULT_PAGE + + params["page"] = str(effective_page) + params["pageSize"] = str(effective_page_size) + + resp = self._get( + f"/evaluations", + params=params, + timeout=timeout, + cast_to=dict, + ) + if not resp or not isinstance(resp, dict): + return None + + evaluations = [e if isinstance(e, Evaluation) else Evaluation(**e) for e in resp.get("evaluations", [])] + for e in evaluations: + e.attach_client(self._client) + + total_count = resp.get("total_count", 0) + total_pages = math.ceil(total_count / effective_page_size) if total_count > 0 and effective_page_size > 0 else 0 + + resp_with_pagination = { + "evaluations": evaluations, + "pagination": { + "page": effective_page, + "page_size": effective_page_size, + "total_pages": total_pages, + "total_count": total_count, + }, + } + + try: + return EvaluationsResponse.model_validate(resp_with_pagination) + except Exception: + return None + def wait_for_completion( self, evaluation: Evaluation, @@ -111,9 +177,9 @@ async def create( } ], timeout=timeout, - cast_to=EvaluationsResponse, + cast_to=CreateEvaluationsResponse, ) - if isinstance(evaluations, EvaluationsResponse) and len(evaluations.data) > 0: + if isinstance(evaluations, CreateEvaluationsResponse) and len(evaluations.data) > 0: evaluation = evaluations.data[0] evaluation.attach_client(self._client) return evaluation @@ -143,6 +209,66 @@ async def get_by_id( return evaluation return None + async def get_many( + self, + *, + page: Optional[int] = None, + page_size: Optional[int] = None, + timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT, + ) -> Optional[EvaluationsResponse]: + """ + Get evaluations with optional pagination. + + Args: + page: Page number for pagination (1-based, defaults to 1 if not provided) + page_size: Number of evaluations per page (default: 100, optional) + timeout: Request timeout + + Returns: + EvaluationsResponse object or None + """ + params = { + "organizationID": self._client.organization_id, + "projectID": self._client.project_id, + } + + effective_page_size = min(max(page_size, 1), MAX_PAGE_SIZE) if page_size is not None else DEFAULT_PAGE_SIZE + effective_page = page if page is not None else DEFAULT_PAGE + + params["page"] = str(effective_page) + params["pageSize"] = str(effective_page_size) + + resp = await self._get( + f"/evaluations", + params=params, + timeout=timeout, + cast_to=dict, + ) + if not resp or not isinstance(resp, dict): + return None + + evaluations = [e if isinstance(e, Evaluation) else Evaluation(**e) for e in resp.get("evaluations", [])] + for e in evaluations: + e.attach_client(self._client) + + total_count = resp.get("total_count", 0) + total_pages = math.ceil(total_count / effective_page_size) if total_count > 0 and effective_page_size > 0 else 0 + + resp_with_pagination = { + "evaluations": evaluations, + "pagination": { + "page": effective_page, + "page_size": effective_page_size, + "total_pages": total_pages, + "total_count": total_count, + }, + } + + try: + return EvaluationsResponse.model_validate(resp_with_pagination) + except Exception: + return None + async def wait_for_completion( self, evaluation: Evaluation, diff --git a/src/atlas/resources/models/models.py b/src/atlas/resources/models/models.py index af55ab6..d4d3ad3 100644 --- a/src/atlas/resources/models/models.py +++ b/src/atlas/resources/models/models.py @@ -19,7 +19,7 @@ def get( companies: Optional[List[str]] = None, regions: Optional[List[str]] = None, licenses: Optional[List[str]] = None, - ) -> List[Model] | None: + ) -> Optional[List[Model]]: base_url = f"/organizations/{self._client.organization_id}/projects/{self._client.project_id}/models" def fetch(model_type: str) -> ModelsResponse | None: @@ -73,7 +73,7 @@ async def get( companies: Optional[List[str]] = None, regions: Optional[List[str]] = None, licenses: Optional[List[str]] = None, - ) -> List[Model] | None: + ) -> Optional[List[Model]]: base_url = f"/organizations/{self._client.organization_id}/projects/{self._client.project_id}/models" async def fetch(model_type: str) -> ModelsResponse | None: diff --git a/src/atlas/resources/results/results.py b/src/atlas/resources/results/results.py index b970992..7d8e87b 100644 --- a/src/atlas/resources/results/results.py +++ b/src/atlas/resources/results/results.py @@ -49,7 +49,7 @@ def get_by_id( page: Optional[int] = None, page_size: Optional[int] = None, timeout: Optional[float | httpx.Timeout] = DEFAULT_TIMEOUT, - ) -> ResultsResponse | None: + ) -> Optional[ResultsResponse]: """ Get evaluation results with optional pagination. @@ -201,7 +201,7 @@ async def get_by_id( page: Optional[int] = None, page_size: Optional[int] = None, timeout: Optional[float | httpx.Timeout] = DEFAULT_TIMEOUT, - ) -> ResultsResponse | None: + ) -> Optional[ResultsResponse]: """ Get evaluation results with optional pagination. diff --git a/tests/resources/test_evaluations.py b/tests/resources/test_evaluations.py index d510d02..94aa776 100644 --- a/tests/resources/test_evaluations.py +++ b/tests/resources/test_evaluations.py @@ -3,7 +3,12 @@ import httpx import pytest -from atlas.models import Evaluation, EvaluationStatus, EvaluationsResponse +from atlas.models import ( + Evaluation, + EvaluationStatus, + EvaluationsResponse, + CreateEvaluationsResponse, +) from atlas._constants import DEFAULT_TIMEOUT from atlas.resources.evaluations.evaluations import Evaluations @@ -61,9 +66,9 @@ def sample_evaluation_data(self): @pytest.fixture def mock_evaluations_response(self, sample_evaluation_data): - """Mock EvaluationsResponse response.""" + """Mock CreateEvaluationsResponse response.""" evaluation = Evaluation(**sample_evaluation_data) - return EvaluationsResponse(data=[evaluation]) + return CreateEvaluationsResponse(data=[evaluation]) def test_evaluations_initialization(self, mock_client): """Evaluations resource initializes correctly.""" @@ -113,7 +118,7 @@ def test_create_evaluation_request_parameters( } ], timeout=DEFAULT_TIMEOUT, - cast_to=EvaluationsResponse, + cast_to=CreateEvaluationsResponse, ) def test_create_evaluation_with_custom_timeout( @@ -158,7 +163,7 @@ def test_create_evaluation_with_httpx_timeout( def test_create_evaluation_empty_response(self, mock_model, mock_benchmark, evaluations_resource): """create method returns None when no evaluations in response.""" - empty_response = EvaluationsResponse(data=[]) + empty_response = CreateEvaluationsResponse(data=[]) evaluations_resource._post.return_value = empty_response result = evaluations_resource.create(model=mock_model, benchmark=mock_benchmark) @@ -174,7 +179,7 @@ def test_create_evaluation_none_response(self, mock_model, mock_benchmark, evalu assert result is None def test_create_evaluation_invalid_response_type(self, mock_model, mock_benchmark, evaluations_resource): - """create method handles non-EvaluationsResponse response gracefully.""" + """create method handles non-CreateEvaluationsResponse response gracefully.""" evaluations_resource._post.return_value = "invalid-response" result = evaluations_resource.create(model=mock_model, benchmark=mock_benchmark) @@ -190,7 +195,7 @@ def test_create_evaluation_multiple_evaluations_returns_first( eval2_data["id"] = "eval-456" eval2 = Evaluation(**eval2_data) - response = EvaluationsResponse(data=[eval1, eval2]) + response = CreateEvaluationsResponse(data=[eval1, eval2]) evaluations_resource._post.return_value = response result = evaluations_resource.create(model=mock_model, benchmark=mock_benchmark) @@ -251,7 +256,7 @@ def test_create_evaluation_cast_to_parameter( evaluations_resource.create(model=mock_model, benchmark=mock_benchmark) call_args = evaluations_resource._post.call_args - assert call_args.kwargs["cast_to"] is EvaluationsResponse + assert call_args.kwargs["cast_to"] is CreateEvaluationsResponse def test_create_evaluation_timeout_default( self, @@ -283,6 +288,37 @@ def test_create_evaluation_with_none_timeout( call_args = evaluations_resource._post.call_args assert call_args.kwargs["timeout"] is None + def test_get_all_returns_evaluations(self, evaluations_resource, mock_client, sample_evaluation_data): + """get_all returns list of evaluations when response is valid.""" + evaluation = Evaluation(**sample_evaluation_data) + response = {"evaluations": [evaluation], "total_count": 1} + evaluations_resource._get.return_value = response + + result = evaluations_resource.get_many() + + assert isinstance(result, EvaluationsResponse) + assert result.evaluations[0].id == "eval-123" + evaluations_resource._get.assert_called_once_with( + "/evaluations", + params={ + "organizationID": mock_client.organization_id, + "projectID": mock_client.project_id, + "page": "1", + "pageSize": "100", + }, + timeout=DEFAULT_TIMEOUT, + cast_to=dict, + ) + assert result.evaluations[0]._client is mock_client + + def test_get_all_returns_none_on_invalid_response(self, evaluations_resource): + """get_all returns None when response is invalid type.""" + evaluations_resource._get.return_value = "not-a-response" + + result = evaluations_resource.get_many() + + assert result is None + class TestEvaluationsErrorHandling: """Test error handling in Evaluations resource.""" @@ -390,7 +426,7 @@ def test_create_evaluation_end_to_end_flow(self): } evaluation = Evaluation(**evaluation_data) - response = EvaluationsResponse(data=[evaluation]) + response = CreateEvaluationsResponse(data=[evaluation]) mock_client.post_cast.return_value = response # Test the resource diff --git a/tests/test_integration.py b/tests/test_integration.py index 0ba831f..dede2de 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -11,7 +11,7 @@ Benchmark, Evaluation, EvaluationStatus, - EvaluationsResponse, + CreateEvaluationsResponse, ) @@ -172,7 +172,7 @@ def test_complete_evaluation_workflow(self, atlas_client): result = Result(**result_data) # Mock responses - evaluations_response = EvaluationsResponse(data=[evaluation]) + evaluations_response = CreateEvaluationsResponse(data=[evaluation]) with patch.object(atlas_client, "get_cast") as mock_get, patch.object(atlas_client, "post_cast") as mock_post: # Configure mocks for the workflow @@ -329,7 +329,7 @@ def test_evaluation_creation_with_model_and_benchmark_objects(self, atlas_client benchmark = Benchmark(**benchmark_data) evaluation = Evaluation(**evaluation_data) - evaluations_response = EvaluationsResponse(data=[evaluation]) + evaluations_response = CreateEvaluationsResponse(data=[evaluation]) with patch.object(atlas_client, "post_cast") as mock_post: mock_post.return_value = evaluations_response diff --git a/tests/test_models.py b/tests/test_models.py index 52bccdc..95470cf 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -16,7 +16,7 @@ ResultsResponse, EvaluationStatus, BenchmarksResponse, - EvaluationsResponse, + CreateEvaluationsResponse, ) @@ -108,7 +108,7 @@ def evaluation_data(self): def test_evaluations_with_list_of_evaluations(self, evaluation_data): """Evaluations model accepts list of Evaluation objects.""" evaluations_data = {"data": [evaluation_data, evaluation_data]} - evaluations = EvaluationsResponse(**evaluations_data) + evaluations = CreateEvaluationsResponse(**evaluations_data) assert len(evaluations.data) == 2 assert all(isinstance(eval, Evaluation) for eval in evaluations.data) @@ -116,7 +116,7 @@ def test_evaluations_with_list_of_evaluations(self, evaluation_data): def test_evaluations_empty_list(self): """Evaluations model accepts empty list.""" - evaluations = EvaluationsResponse(data=[]) + evaluations = CreateEvaluationsResponse(data=[]) assert evaluations.data == [] assert isinstance(evaluations.data, list) @@ -124,7 +124,7 @@ def test_evaluations_empty_list(self): def test_evaluations_invalid_data_structure(self): """Evaluations model validates data structure.""" with pytest.raises(ValidationError): - EvaluationsResponse(data="not-a-list") # type: ignore[arg-type] + CreateEvaluationsResponse(data="not-a-list") # type: ignore[arg-type] class TestResult: