diff --git a/.github/workflows/check-format.yaml b/.github/workflows/check-format.yaml
new file mode 100644
index 0000000..be3cd04
--- /dev/null
+++ b/.github/workflows/check-format.yaml
@@ -0,0 +1,37 @@
+name: Check Format
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  format:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v5
+
+      - name: Install Rye
+        uses: eifinger/setup-rye@v4
+        with:
+          version: latest
+
+      - name: Sync Rye environment
+        run: rye sync
+
+      - name: Run format script and check output
+        run: |
+          set -e  # exit on any command failure
+          OUTPUT=$(./scripts/format 2>&1)
+          echo "$OUTPUT"
+
+          # Fail only if "reformatted" exists
+          if echo "$OUTPUT" | grep -q "reformatted"; then
+            echo "Some files were reformatted. Please run './scripts/format' locally and commit changes."
+            exit 1
+          fi
diff --git a/.github/workflows/check-lint.yaml b/.github/workflows/check-lint.yaml
new file mode 100644
index 0000000..55ce38e
--- /dev/null
+++ b/.github/workflows/check-lint.yaml
@@ -0,0 +1,28 @@
+name: Check Lint
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  format:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v5
+
+      - name: Install Rye
+        uses: eifinger/setup-rye@v4
+        with:
+          version: latest
+
+      - name: Sync Rye environment
+        run: rye sync
+
+      - name: Check lint
+        run: ./scripts/lint
diff --git a/.gitignore b/.gitignore
index efd18f1..10ac96c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 node_modules/
+venv/
 
 .prism.log
 _dev
diff --git a/examples/async_run_evaluations.py b/examples/async_run_evaluations.py
index d71acff..b850d7b 100644
--- a/examples/async_run_evaluations.py
+++ b/examples/async_run_evaluations.py
@@ -9,7 +9,7 @@ async def create_and_run_evaluation(client, model, benchmark, eval_number):
     """Create and run a single evaluation, tracking progress."""
     try:
         print(f"Starting evaluation #{eval_number}...")
-        
+
         # Create evaluation
         evaluation = await client.evaluations.create(model=model, benchmark=benchmark)
         print(f"✓ Created evaluation #{eval_number}: {evaluation.id}, status={evaluation.status}")
@@ -18,7 +18,7 @@ async def create_and_run_evaluation(client, model, benchmark, eval_number):
         evaluation = await client.evaluations.wait_for_completion(
             evaluation,
             interval_seconds=10,
-            timeout_seconds=600  # 10 minutes
+            timeout_seconds=600,  # 10 minutes
         )
         print(f"✓ Evaluation #{eval_number} ({evaluation.id}) finished with status={evaluation.status}")
 
@@ -30,7 +30,7 @@ async def create_and_run_evaluation(client, model, benchmark, eval_number):
         else:
             print(f"✗ Evaluation #{eval_number} did not succeed")
             return eval_number, evaluation.id, 0, False
-            
+
     except Exception as e:
         print(f"✗ Error in evaluation #{eval_number}: {e}")
         return eval_number, None, 0, False
@@ -51,7 +51,7 @@ async def main():
     # Use first model and benchmark for all evaluations
     target_model = models[0]
     target_benchmark = benchmarks[0]
-    
+
     print(f"Using model: {target_model}")
     print(f"Using benchmark: {target_benchmark}")
     print("=" * 80)
@@ -59,21 +59,18 @@ async def main():
     # Create 3 evaluation tasks
     num_evaluations = 3
     print(f"Starting {num_evaluations} evaluations in parallel...")
-    
-    tasks = [
-        create_and_run_evaluation(client, target_model, target_benchmark, i + 1)
-        for i in range(num_evaluations)
-    ]
+
+    tasks = [create_and_run_evaluation(client, target_model, target_benchmark, i + 1) for i in range(num_evaluations)]
 
     # Execute all evaluations concurrently
     results = await asyncio.gather(*tasks, return_exceptions=True)
-    
+
     # Summary
     print("=" * 80)
     print("SUMMARY:")
     successful = 0
     total_results = 0
-    
+
     for result in results:
         if isinstance(result, Exception):
             print(f"Exception occurred: {result}")
@@ -85,7 +82,7 @@ async def main():
                 print(f"Evaluation #{eval_num} ({eval_id}): SUCCESS - {result_count} results")
             else:
                 print(f"Evaluation #{eval_num} ({eval_id}): FAILED")
-    
+
     print(f"\nOverall: {successful}/{num_evaluations} evaluations succeeded")
     print(f"Total results collected: {total_results}")
 
diff --git a/examples/fetch_results_async.py b/examples/fetch_results_async.py
index b434411..cc04232 100644
--- a/examples/fetch_results_async.py
+++ b/examples/fetch_results_async.py
@@ -11,13 +11,13 @@ async def fetch_evaluation_results(client, evaluation_id):
         print(f"Fetching evaluation {evaluation_id}...")
         evaluation = await client.evaluations.get_by_id(evaluation_id)
         print(f"Found evaluation {evaluation.id}, status={evaluation.status}")
-        
+
         # Get all results for this evaluation
         results = await client.results.get_all(evaluation=evaluation)
         print(f"Loaded {len(results)} results for evaluation {evaluation_id}")
         print(f"Results for {evaluation_id}: {results}")
         print("-" * 80)
-        
+
         return evaluation_id, results
     except Exception as e:
         print(f"Error fetching evaluation {evaluation_id}: {e}")
@@ -30,23 +30,17 @@ async def main():
 
     # List of evaluation IDs to fetch exmple
 
-    evaluation_ids = [
-        "68a65a3de7ad047fb5d8e7d4",
-        "688a254c673f6b2835cc7278"
-    ]
+    evaluation_ids = ["68a65a3de7ad047fb5d8e7d4", "688a254c673f6b2835cc7278"]
 
     print(f"Starting async fetch for {len(evaluation_ids)} evaluations...")
     print("=" * 80)
 
     # Create tasks for concurrent execution
-    tasks = [
-        fetch_evaluation_results(client, eval_id) 
-        for eval_id in evaluation_ids
-    ]
+    tasks = [fetch_evaluation_results(client, eval_id) for eval_id in evaluation_ids]
 
     # Execute all tasks concurrently and print results as they complete
     results = await asyncio.gather(*tasks, return_exceptions=True)
-    
+
     print("=" * 80)
     print("Summary:")
     successful = sum(1 for _, result in results if result is not None and not isinstance(result, Exception))
diff --git a/examples/get_benchmarks.py b/examples/get_benchmarks.py
index bdbb9d1..6e5c6ef 100644
--- a/examples/get_benchmarks.py
+++ b/examples/get_benchmarks.py
@@ -21,5 +21,6 @@ async def main():
     print(f"Found {len(benchmarks)} benchmarks with type {benchmark_type}")
     print(benchmarks)
 
+
 if __name__ == "__main__":
     asyncio.run(main())
diff --git a/examples/get_evaluation.py b/examples/get_evaluation.py
index 615e232..8af3940 100644
--- a/examples/get_evaluation.py
+++ b/examples/get_evaluation.py
@@ -15,5 +15,6 @@ async def main():
     print(f"Found evaluation {evaluation.id}")
     print(evaluation)
 
+
 if __name__ == "__main__":
     asyncio.run(main())
diff --git a/examples/get_models.py b/examples/get_models.py
index 83061d0..6059f90 100644
--- a/examples/get_models.py
+++ b/examples/get_models.py
@@ -33,5 +33,6 @@ async def main():
     print(f"Found {len(models)} models with type {model_type}")
     print(models)
 
+
 if __name__ == "__main__":
     asyncio.run(main())
diff --git a/examples/paginated_results.py b/examples/paginated_results.py
index 52e6cf9..d70defc 100644
--- a/examples/paginated_results.py
+++ b/examples/paginated_results.py
@@ -16,7 +16,7 @@ async def main():
     # --- Benchmarks
     benchmarks = await client.benchmarks.get()
     print(f"Found {len(benchmarks)} benchmarks")
-    
+
     # --- Create evaluation
     evaluation = await client.evaluations.create(
         model=models[0],
@@ -37,58 +37,54 @@ async def main():
     # --- Results with pagination
     if evaluation.is_success:
         print("Fetching all results with pagination...")
-        
+
         all_results = []
         page = 1
         page_size = 50
-        
+
         while True:
             print(f"Fetching page {page} (page size: {page_size})...")
-            
+
             # Get results for current page
-            results_data = await client.results.get_by_id(
-                evaluation_id=evaluation.id,
-                page=page,
-                page_size=page_size
-            )
-            
+            results_data = await client.results.get_by_id(evaluation_id=evaluation.id, page=page, page_size=page_size)
+
             if not results_data or not results_data.results:
                 print("No more results to fetch")
                 break
-            
+
             # Add current page results to our collection
             all_results.extend(results_data.results)
-            
+
             # Show progress
             if page == 1:
                 total_count = results_data.pagination.total_count
                 total_pages = results_data.pagination.total_pages
                 print(f"Total results: {total_count:,}")
                 print(f"Total pages: {total_pages}")
-            
+
             print(f"Page {page}: Retrieved {len(results_data.results)} results")
             print(f"Running total: {len(all_results):,} results")
-            
+
             # Check if we've reached the last page
             if page >= results_data.pagination.total_pages:
                 print("Reached last page")
                 break
-            
+
             page += 1
-        
+
         # Summary of all results
         print(f"\n=== PAGINATION COMPLETE ===")
         print(f"Total results collected: {len(all_results):,}")
-        
+
         if all_results:
             # Calculate some basic statistics
             correct_answers = sum(1 for r in all_results if r.score > 0.5)
             accuracy = correct_answers / len(all_results)
             avg_score = sum(r.score for r in all_results) / len(all_results)
-            
+
             print(f"Overall accuracy: {accuracy:.1%} ({correct_answers:,}/{len(all_results):,})")
             print(f"Average score: {avg_score:.3f}")
-            
+
             # Show a few example results
             print(f"\nFirst 3 results:")
             for i, result in enumerate(all_results[:3], 1):
@@ -96,7 +92,7 @@ async def main():
                 print(f"     Prompt: {result.prompt[:100]}...")
                 print(f"     Response: {result.result[:100]}...")
                 print()
-        
+
     else:
         print("Evaluation did not succeed, no results to show.")
 
diff --git a/src/atlas/_client.py b/src/atlas/_client.py
index 587fc7a..a1c2a6b 100644
--- a/src/atlas/_client.py
+++ b/src/atlas/_client.py
@@ -54,7 +54,7 @@ def __init__(
         if base_url is None:
             base_url = os.environ.get("LAYERLENS_ATLAS_BASE_URL")
         if base_url is None:
-            base_url = "https://8bg48mbhyi.execute-api.us-east-1.amazonaws.com/prod/api/v1"
+            base_url = "https://api.layerlens.ai/api/v1"
 
         super().__init__(
             base_url=base_url,
@@ -196,7 +196,7 @@ def __init__(
         if base_url is None:
             base_url = os.environ.get("LAYERLENS_ATLAS_BASE_URL")
         if base_url is None:
-            base_url = "https://8bg48mbhyi.execute-api.us-east-1.amazonaws.com/prod/api/v1"
+            base_url = "https://api.layerlens.ai/api/v1"
 
         super().__init__(base_url=base_url, timeout=timeout)
 
diff --git a/src/atlas/models/__init__.py b/src/atlas/models/__init__.py
index f522371..e793879 100644
--- a/src/atlas/models/__init__.py
+++ b/src/atlas/models/__init__.py
@@ -6,6 +6,7 @@
     BenchmarksResponse,
     EvaluationsResponse,
     OrganizationResponse,
+    CreateEvaluationsResponse,
 )
 from .model import Model, CustomModel, PublicModel
 from .benchmark import Benchmark, CustomBenchmark, PublicBenchmark
@@ -13,22 +14,23 @@
 from .organization import Project, Organization
 
 __all__ = [
-    "BenchmarksResponse",
-    "EvaluationsResponse",
-    "ModelsResponse",
-    "OrganizationResponse",
-    "ResultsResponse",
     "Benchmark",
+    "BenchmarksResponse",
+    "CreateEvaluationsResponse",
     "CustomBenchmark",
-    "PublicBenchmark",
+    "CustomModel",
     "Evaluation",
     "EvaluationStatus",
-    "Pagination",
-    "Result",
-    "ResultMetrics",
+    "EvaluationsResponse",
     "Model",
-    "CustomModel",
-    "PublicModel",
+    "ModelsResponse",
     "Organization",
+    "OrganizationResponse",
+    "Pagination",
     "Project",
+    "PublicBenchmark",
+    "PublicModel",
+    "Result",
+    "ResultMetrics",
+    "ResultsResponse",
 ]
diff --git a/src/atlas/models/api.py b/src/atlas/models/api.py
index bbb86ae..eba00db 100644
--- a/src/atlas/models/api.py
+++ b/src/atlas/models/api.py
@@ -19,10 +19,15 @@ class Data(BaseModel):
     data: Data
 
 
-class EvaluationsResponse(BaseModel):
+class CreateEvaluationsResponse(BaseModel):
     data: List[Evaluation]
 
 
+class EvaluationsResponse(BaseModel):
+    evaluations: List[Evaluation]
+    pagination: Pagination
+
+
 class ModelsResponse(BaseModel):
     class Data(BaseModel):
         models: List[Model]
diff --git a/src/atlas/resources/benchmarks/benchmarks.py b/src/atlas/resources/benchmarks/benchmarks.py
index 33be623..d6239ab 100644
--- a/src/atlas/resources/benchmarks/benchmarks.py
+++ b/src/atlas/resources/benchmarks/benchmarks.py
@@ -16,7 +16,7 @@ def get(
         timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT,
         type: Literal["custom", "public"] | None = None,
         name: Optional[str] = None,
-    ) -> List[Benchmark] | None:
+    ) -> Optional[List[Benchmark]]:
         base_url = f"/organizations/{self._client.organization_id}/projects/{self._client.project_id}/benchmarks"
 
         def fetch(bench_type: str) -> BenchmarksResponse | None:
@@ -61,7 +61,7 @@ async def get(
         timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT,
         type: Literal["custom", "public"] | None = None,
         name: Optional[str] = None,
-    ) -> List[Benchmark] | None:
+    ) -> Optional[List[Benchmark]]:
         base_url = f"/organizations/{self._client.organization_id}/projects/{self._client.project_id}/benchmarks"
 
         async def fetch(bench_type: str) -> Optional[BenchmarksResponse]:
diff --git a/src/atlas/resources/evaluations/evaluations.py b/src/atlas/resources/evaluations/evaluations.py
index c2664fe..c97162e 100644
--- a/src/atlas/resources/evaluations/evaluations.py
+++ b/src/atlas/resources/evaluations/evaluations.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import math
 import time
 import asyncio
 from typing import Optional
@@ -13,10 +14,15 @@
     CustomModel,
     CustomBenchmark,
     EvaluationsResponse,
+    CreateEvaluationsResponse,
 )
 from ..._resource import SyncAPIResource, AsyncAPIResource
 from ..._constants import DEFAULT_TIMEOUT
 
+DEFAULT_PAGE = 1
+DEFAULT_PAGE_SIZE = 100
+MAX_PAGE_SIZE = 500
+
 
 class Evaluations(SyncAPIResource):
     def create(
@@ -37,9 +43,9 @@ def create(
                 }
             ],
             timeout=timeout,
-            cast_to=EvaluationsResponse,
+            cast_to=CreateEvaluationsResponse,
         )
-        if isinstance(evaluations, EvaluationsResponse) and len(evaluations.data) > 0:
+        if isinstance(evaluations, CreateEvaluationsResponse) and len(evaluations.data) > 0:
             evaluation = evaluations.data[0]
             evaluation.attach_client(self._client)
             return evaluation
@@ -69,6 +75,66 @@ def get_by_id(
             return evaluation
         return None
 
+    def get_many(
+        self,
+        *,
+        page: Optional[int] = None,
+        page_size: Optional[int] = None,
+        timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT,
+    ) -> Optional[EvaluationsResponse]:
+        """
+        Get evaluations with optional pagination.
+
+        Args:
+            page: Page number for pagination (1-based, defaults to 1 if not provided)
+            page_size: Number of evaluations per page (default: 100, optional)
+            timeout: Request timeout
+
+        Returns:
+            EvaluationsResponse object or None
+        """
+        params = {
+            "organizationID": self._client.organization_id,
+            "projectID": self._client.project_id,
+        }
+
+        effective_page_size = min(max(page_size, 1), MAX_PAGE_SIZE) if page_size is not None else DEFAULT_PAGE_SIZE
+        effective_page = page if page is not None else DEFAULT_PAGE
+
+        params["page"] = str(effective_page)
+        params["pageSize"] = str(effective_page_size)
+
+        resp = self._get(
+            f"/evaluations",
+            params=params,
+            timeout=timeout,
+            cast_to=dict,
+        )
+        if not resp or not isinstance(resp, dict):
+            return None
+
+        evaluations = [e if isinstance(e, Evaluation) else Evaluation(**e) for e in resp.get("evaluations", [])]
+        for e in evaluations:
+            e.attach_client(self._client)
+
+        total_count = resp.get("total_count", 0)
+        total_pages = math.ceil(total_count / effective_page_size) if total_count > 0 and effective_page_size > 0 else 0
+
+        resp_with_pagination = {
+            "evaluations": evaluations,
+            "pagination": {
+                "page": effective_page,
+                "page_size": effective_page_size,
+                "total_pages": total_pages,
+                "total_count": total_count,
+            },
+        }
+
+        try:
+            return EvaluationsResponse.model_validate(resp_with_pagination)
+        except Exception:
+            return None
+
     def wait_for_completion(
         self,
         evaluation: Evaluation,
@@ -111,9 +177,9 @@ async def create(
                 }
             ],
             timeout=timeout,
-            cast_to=EvaluationsResponse,
+            cast_to=CreateEvaluationsResponse,
         )
-        if isinstance(evaluations, EvaluationsResponse) and len(evaluations.data) > 0:
+        if isinstance(evaluations, CreateEvaluationsResponse) and len(evaluations.data) > 0:
             evaluation = evaluations.data[0]
             evaluation.attach_client(self._client)
             return evaluation
@@ -143,6 +209,66 @@ async def get_by_id(
             return evaluation
         return None
 
+    async def get_many(
+        self,
+        *,
+        page: Optional[int] = None,
+        page_size: Optional[int] = None,
+        timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT,
+    ) -> Optional[EvaluationsResponse]:
+        """
+        Get evaluations with optional pagination.
+
+        Args:
+            page: Page number for pagination (1-based, defaults to 1 if not provided)
+            page_size: Number of evaluations per page (default: 100, optional)
+            timeout: Request timeout
+
+        Returns:
+            EvaluationsResponse object or None
+        """
+        params = {
+            "organizationID": self._client.organization_id,
+            "projectID": self._client.project_id,
+        }
+
+        effective_page_size = min(max(page_size, 1), MAX_PAGE_SIZE) if page_size is not None else DEFAULT_PAGE_SIZE
+        effective_page = page if page is not None else DEFAULT_PAGE
+
+        params["page"] = str(effective_page)
+        params["pageSize"] = str(effective_page_size)
+
+        resp = await self._get(
+            f"/evaluations",
+            params=params,
+            timeout=timeout,
+            cast_to=dict,
+        )
+        if not resp or not isinstance(resp, dict):
+            return None
+
+        evaluations = [e if isinstance(e, Evaluation) else Evaluation(**e) for e in resp.get("evaluations", [])]
+        for e in evaluations:
+            e.attach_client(self._client)
+
+        total_count = resp.get("total_count", 0)
+        total_pages = math.ceil(total_count / effective_page_size) if total_count > 0 and effective_page_size > 0 else 0
+
+        resp_with_pagination = {
+            "evaluations": evaluations,
+            "pagination": {
+                "page": effective_page,
+                "page_size": effective_page_size,
+                "total_pages": total_pages,
+                "total_count": total_count,
+            },
+        }
+
+        try:
+            return EvaluationsResponse.model_validate(resp_with_pagination)
+        except Exception:
+            return None
+
     async def wait_for_completion(
         self,
         evaluation: Evaluation,
diff --git a/src/atlas/resources/models/models.py b/src/atlas/resources/models/models.py
index af55ab6..d4d3ad3 100644
--- a/src/atlas/resources/models/models.py
+++ b/src/atlas/resources/models/models.py
@@ -19,7 +19,7 @@ def get(
         companies: Optional[List[str]] = None,
         regions: Optional[List[str]] = None,
         licenses: Optional[List[str]] = None,
-    ) -> List[Model] | None:
+    ) -> Optional[List[Model]]:
         base_url = f"/organizations/{self._client.organization_id}/projects/{self._client.project_id}/models"
 
         def fetch(model_type: str) -> ModelsResponse | None:
@@ -73,7 +73,7 @@ async def get(
         companies: Optional[List[str]] = None,
         regions: Optional[List[str]] = None,
         licenses: Optional[List[str]] = None,
-    ) -> List[Model] | None:
+    ) -> Optional[List[Model]]:
         base_url = f"/organizations/{self._client.organization_id}/projects/{self._client.project_id}/models"
 
         async def fetch(model_type: str) -> ModelsResponse | None:
diff --git a/src/atlas/resources/results/results.py b/src/atlas/resources/results/results.py
index b970992..7d8e87b 100644
--- a/src/atlas/resources/results/results.py
+++ b/src/atlas/resources/results/results.py
@@ -49,7 +49,7 @@ def get_by_id(
         page: Optional[int] = None,
         page_size: Optional[int] = None,
         timeout: Optional[float | httpx.Timeout] = DEFAULT_TIMEOUT,
-    ) -> ResultsResponse | None:
+    ) -> Optional[ResultsResponse]:
         """
         Get evaluation results with optional pagination.
 
@@ -201,7 +201,7 @@ async def get_by_id(
         page: Optional[int] = None,
         page_size: Optional[int] = None,
         timeout: Optional[float | httpx.Timeout] = DEFAULT_TIMEOUT,
-    ) -> ResultsResponse | None:
+    ) -> Optional[ResultsResponse]:
         """
         Get evaluation results with optional pagination.
 
diff --git a/tests/resources/test_evaluations.py b/tests/resources/test_evaluations.py
index d510d02..94aa776 100644
--- a/tests/resources/test_evaluations.py
+++ b/tests/resources/test_evaluations.py
@@ -3,7 +3,12 @@
 import httpx
 import pytest
 
-from atlas.models import Evaluation, EvaluationStatus, EvaluationsResponse
+from atlas.models import (
+    Evaluation,
+    EvaluationStatus,
+    EvaluationsResponse,
+    CreateEvaluationsResponse,
+)
 from atlas._constants import DEFAULT_TIMEOUT
 from atlas.resources.evaluations.evaluations import Evaluations
 
@@ -61,9 +66,9 @@ def sample_evaluation_data(self):
 
     @pytest.fixture
     def mock_evaluations_response(self, sample_evaluation_data):
-        """Mock EvaluationsResponse response."""
+        """Mock CreateEvaluationsResponse response."""
         evaluation = Evaluation(**sample_evaluation_data)
-        return EvaluationsResponse(data=[evaluation])
+        return CreateEvaluationsResponse(data=[evaluation])
 
     def test_evaluations_initialization(self, mock_client):
         """Evaluations resource initializes correctly."""
@@ -113,7 +118,7 @@ def test_create_evaluation_request_parameters(
                 }
             ],
             timeout=DEFAULT_TIMEOUT,
-            cast_to=EvaluationsResponse,
+            cast_to=CreateEvaluationsResponse,
         )
 
     def test_create_evaluation_with_custom_timeout(
@@ -158,7 +163,7 @@ def test_create_evaluation_with_httpx_timeout(
 
     def test_create_evaluation_empty_response(self, mock_model, mock_benchmark, evaluations_resource):
         """create method returns None when no evaluations in response."""
-        empty_response = EvaluationsResponse(data=[])
+        empty_response = CreateEvaluationsResponse(data=[])
         evaluations_resource._post.return_value = empty_response
 
         result = evaluations_resource.create(model=mock_model, benchmark=mock_benchmark)
@@ -174,7 +179,7 @@ def test_create_evaluation_none_response(self, mock_model, mock_benchmark, evalu
         assert result is None
 
     def test_create_evaluation_invalid_response_type(self, mock_model, mock_benchmark, evaluations_resource):
-        """create method handles non-EvaluationsResponse response gracefully."""
+        """create method handles non-CreateEvaluationsResponse response gracefully."""
         evaluations_resource._post.return_value = "invalid-response"
 
         result = evaluations_resource.create(model=mock_model, benchmark=mock_benchmark)
@@ -190,7 +195,7 @@ def test_create_evaluation_multiple_evaluations_returns_first(
         eval2_data["id"] = "eval-456"
         eval2 = Evaluation(**eval2_data)
 
-        response = EvaluationsResponse(data=[eval1, eval2])
+        response = CreateEvaluationsResponse(data=[eval1, eval2])
         evaluations_resource._post.return_value = response
 
         result = evaluations_resource.create(model=mock_model, benchmark=mock_benchmark)
@@ -251,7 +256,7 @@ def test_create_evaluation_cast_to_parameter(
         evaluations_resource.create(model=mock_model, benchmark=mock_benchmark)
 
         call_args = evaluations_resource._post.call_args
-        assert call_args.kwargs["cast_to"] is EvaluationsResponse
+        assert call_args.kwargs["cast_to"] is CreateEvaluationsResponse
 
     def test_create_evaluation_timeout_default(
         self,
@@ -283,6 +288,37 @@ def test_create_evaluation_with_none_timeout(
         call_args = evaluations_resource._post.call_args
         assert call_args.kwargs["timeout"] is None
 
+    def test_get_all_returns_evaluations(self, evaluations_resource, mock_client, sample_evaluation_data):
+        """get_all returns list of evaluations when response is valid."""
+        evaluation = Evaluation(**sample_evaluation_data)
+        response = {"evaluations": [evaluation], "total_count": 1}
+        evaluations_resource._get.return_value = response
+
+        result = evaluations_resource.get_many()
+
+        assert isinstance(result, EvaluationsResponse)
+        assert result.evaluations[0].id == "eval-123"
+        evaluations_resource._get.assert_called_once_with(
+            "/evaluations",
+            params={
+                "organizationID": mock_client.organization_id,
+                "projectID": mock_client.project_id,
+                "page": "1",
+                "pageSize": "100",
+            },
+            timeout=DEFAULT_TIMEOUT,
+            cast_to=dict,
+        )
+        assert result.evaluations[0]._client is mock_client
+
+    def test_get_all_returns_none_on_invalid_response(self, evaluations_resource):
+        """get_all returns None when response is invalid type."""
+        evaluations_resource._get.return_value = "not-a-response"
+
+        result = evaluations_resource.get_many()
+
+        assert result is None
+
 
 class TestEvaluationsErrorHandling:
     """Test error handling in Evaluations resource."""
@@ -390,7 +426,7 @@ def test_create_evaluation_end_to_end_flow(self):
         }
 
         evaluation = Evaluation(**evaluation_data)
-        response = EvaluationsResponse(data=[evaluation])
+        response = CreateEvaluationsResponse(data=[evaluation])
         mock_client.post_cast.return_value = response
 
         # Test the resource
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 0ba831f..dede2de 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -11,7 +11,7 @@
     Benchmark,
     Evaluation,
     EvaluationStatus,
-    EvaluationsResponse,
+    CreateEvaluationsResponse,
 )
 
 
@@ -172,7 +172,7 @@ def test_complete_evaluation_workflow(self, atlas_client):
         result = Result(**result_data)
 
         # Mock responses
-        evaluations_response = EvaluationsResponse(data=[evaluation])
+        evaluations_response = CreateEvaluationsResponse(data=[evaluation])
 
         with patch.object(atlas_client, "get_cast") as mock_get, patch.object(atlas_client, "post_cast") as mock_post:
             # Configure mocks for the workflow
@@ -329,7 +329,7 @@ def test_evaluation_creation_with_model_and_benchmark_objects(self, atlas_client
         benchmark = Benchmark(**benchmark_data)
         evaluation = Evaluation(**evaluation_data)
 
-        evaluations_response = EvaluationsResponse(data=[evaluation])
+        evaluations_response = CreateEvaluationsResponse(data=[evaluation])
 
         with patch.object(atlas_client, "post_cast") as mock_post:
             mock_post.return_value = evaluations_response
diff --git a/tests/test_models.py b/tests/test_models.py
index 52bccdc..95470cf 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -16,7 +16,7 @@
     ResultsResponse,
     EvaluationStatus,
     BenchmarksResponse,
-    EvaluationsResponse,
+    CreateEvaluationsResponse,
 )
 
 
@@ -108,7 +108,7 @@ def evaluation_data(self):
     def test_evaluations_with_list_of_evaluations(self, evaluation_data):
         """Evaluations model accepts list of Evaluation objects."""
         evaluations_data = {"data": [evaluation_data, evaluation_data]}
-        evaluations = EvaluationsResponse(**evaluations_data)
+        evaluations = CreateEvaluationsResponse(**evaluations_data)
 
         assert len(evaluations.data) == 2
         assert all(isinstance(eval, Evaluation) for eval in evaluations.data)
@@ -116,7 +116,7 @@ def test_evaluations_with_list_of_evaluations(self, evaluation_data):
 
     def test_evaluations_empty_list(self):
         """Evaluations model accepts empty list."""
-        evaluations = EvaluationsResponse(data=[])
+        evaluations = CreateEvaluationsResponse(data=[])
 
         assert evaluations.data == []
         assert isinstance(evaluations.data, list)
@@ -124,7 +124,7 @@ def test_evaluations_empty_list(self):
     def test_evaluations_invalid_data_structure(self):
         """Evaluations model validates data structure."""
         with pytest.raises(ValidationError):
-            EvaluationsResponse(data="not-a-list")  # type: ignore[arg-type]
+            CreateEvaluationsResponse(data="not-a-list")  # type: ignore[arg-type]
 
 
 class TestResult: