diff --git a/README.md b/README.md index cfdd197..224e580 100644 --- a/README.md +++ b/README.md @@ -18,8 +18,6 @@ from atlas import Atlas client = Atlas( # This is the default and can be omitted api_key=os.environ.get("LAYERLENS_ATLAS_API_KEY"), - organization_id=os.environ.get("LAYERLENS_ATLAS_ORG_ID"), - project_id=os.environ.get("LAYERLENS_ATLAS_PROJECT_ID"), ) evaluation = client.evaluations.create( diff --git a/docs/README.md b/docs/README.md index ced013e..a57f613 100644 --- a/docs/README.md +++ b/docs/README.md @@ -24,8 +24,6 @@ from atlas import Atlas # Initialize the client client = Atlas( api_key=os.environ.get("LAYERLENS_ATLAS_API_KEY"), - organization_id=os.environ.get("LAYERLENS_ATLAS_ORG_ID"), - project_id=os.environ.get("LAYERLENS_ATLAS_PROJECT_ID"), ) # Create an evaluation @@ -60,4 +58,4 @@ if evaluation: ## License -This SDK is released under the MIT License. \ No newline at end of file +This SDK is released under the MIT License. diff --git a/docs/api-reference/client.md b/docs/api-reference/client.md index a2720bb..155b659 100644 --- a/docs/api-reference/client.md +++ b/docs/api-reference/client.md @@ -11,26 +11,20 @@ from atlas import Atlas client = Atlas() # Explicit configuration -client = Atlas( - api_key="your_api_key", - organization_id="your_org_id", - project_id="your_project_id" -) +client = Atlas(api_key="your_api_key") ``` ## Constructor Parameters -### `Atlas(api_key, organization_id, project_id, base_url, timeout)` +### `Atlas(api_key, base_url, timeout)` -| Parameter | Type | Required | Default | Description | -|-----------|------|----------|---------|-------------| -| `api_key` | `str \| None` | Yes* | `None` | Your LayerLens Atlas API key | -| `organization_id` | `str \| None` | Yes* | `None` | Your organization identifier | -| `project_id` | `str \| None` | Yes* | `None` | The project you want to work with | -| `base_url` | `str \| httpx.URL \| None` | No | Atlas API URL | Custom API base URL | -| `timeout` | `float \| httpx.Timeout \| None` | No | 10 minutes | Request timeout configuration | +| Parameter | Type | Required | Default | Description | +| ---------- | -------------------------------- | -------- | ------------- | ----------------------------- | +| `api_key` | `str \| None` | Yes\* | `None` | Your LayerLens Atlas API key | +| `base_url` | `str \| httpx.URL \| None` | No | Atlas API URL | Custom API base URL | +| `timeout` | `float \| httpx.Timeout \| None` | No | 10 minutes | Request timeout configuration | -*Required unless set via environment variables +\*Required unless set via environment variables ## Environment Variable Configuration @@ -38,8 +32,6 @@ The client automatically loads configuration from these environment variables: ```bash LAYERLENS_ATLAS_API_KEY="your_api_key_here" -LAYERLENS_ATLAS_ORG_ID="your_org_id_here" -LAYERLENS_ATLAS_PROJECT_ID="your_project_id_here" LAYERLENS_ATLAS_BASE_URL="https://custom-endpoint.com/api/v1" # Optional ``` @@ -63,7 +55,7 @@ from atlas import Atlas client = Atlas( timeout=httpx.Timeout( connect=5.0, # Connection timeout: 5 seconds - read=60.0, # Read timeout: 60 seconds + read=60.0, # Read timeout: 60 seconds write=30.0, # Write timeout: 30 seconds pool=10.0 # Connection pool timeout: 10 seconds ) @@ -90,10 +82,7 @@ Create a new client instance with modified configuration: ```python # Base client -client = Atlas(api_key="key1", organization_id="org1") - -# Create a copy with different project -project_client = client.copy(project_id="different_project") +client = Atlas(api_key="key1") # Create a copy with different timeout slow_client = client.copy(timeout=300.0) # 5 minutes @@ -108,7 +97,7 @@ client = Atlas() # Use different timeout for this request only evaluation = client.with_options(timeout=60.0).evaluations.create( - model="gpt-4", + model="gpt-4", benchmark="mmlu" ) @@ -126,11 +115,12 @@ client = Atlas() # Access evaluations resource client.evaluations.create(model="gpt-4", benchmark="mmlu") -# Access results resource +# Access results resource client.results.get(evaluation_id="eval_123") ``` Available resources: + - `client.evaluations` - Create and manage evaluations - `client.results` - Retrieve evaluation results - More resources coming soon... @@ -151,7 +141,7 @@ except atlas.AuthenticationError: # 401 - Invalid API key print("Authentication failed") except atlas.PermissionDeniedError: - # 403 - Valid API key, insufficient permissions + # 403 - Valid API key, insufficient permissions print("Permission denied") except atlas.NotFoundError: # 404 - Resource not found @@ -184,9 +174,11 @@ You don't need to manually handle authentication headers. ## Base URL Configuration ### Default Base URL + The client uses the default LayerLens Atlas API endpoint unless overridden. ### Custom Base URL + For enterprise or self-hosted deployments: ```python @@ -204,6 +196,7 @@ client = Atlas() # Will use custom base URL from environment ## Best Practices ### 1. Use Environment Variables + ```python # ✅ Good - secure and flexible client = Atlas() @@ -213,6 +206,7 @@ client = Atlas(api_key="hardcoded_key") ``` ### 2. Configure Appropriate Timeouts + ```python # ✅ Good - reasonable timeout for evaluation creation client = Atlas(timeout=120.0) # 2 minutes @@ -222,6 +216,7 @@ client = Atlas(timeout=5.0) # 5 seconds might be too short ``` ### 3. Handle Errors Gracefully + ```python # ✅ Good - specific error handling try: @@ -235,6 +230,7 @@ except atlas.APIError as e: ``` ### 4. Reuse Client Instances + ```python # ✅ Good - reuse the same client client = Atlas() @@ -274,4 +270,4 @@ for model in ["gpt-4", "claude-3", "llama-2"]: for thread in threads: thread.join() -``` \ No newline at end of file +``` diff --git a/docs/api-reference/errors.md b/docs/api-reference/errors.md index ae172d4..7fec705 100644 --- a/docs/api-reference/errors.md +++ b/docs/api-reference/errors.md @@ -28,6 +28,7 @@ AtlasError ### Base Exceptions #### `AtlasError` + Base exception for all Atlas-related errors. ```python @@ -41,9 +42,11 @@ except atlas.AtlasError as e: ``` #### `APIError` + Base exception for all API-related errors. Contains additional context about the request. **Properties:** + - `message`: Error message - `request`: The HTTP request that caused the error - `body`: Response body (if available) @@ -63,9 +66,11 @@ except atlas.APIError as e: ### Connection Errors #### `APIConnectionError` + Raised when the client cannot connect to the API server. **Common causes:** + - Network connectivity issues - DNS resolution problems - Server is down @@ -83,6 +88,7 @@ except atlas.APIConnectionError as e: ``` #### `APITimeoutError` + Raised when a request times out. ```python @@ -100,11 +106,13 @@ except atlas.APITimeoutError: All HTTP status errors inherit from `APIStatusError` and include additional properties: **Properties:** + - `status_code`: HTTP status code - `response`: Full HTTP response object - `request_id`: Request ID for tracking (if provided by server) #### `BadRequestError` (400) + Request was malformed or contained invalid parameters. ```python @@ -120,6 +128,7 @@ except atlas.BadRequestError as e: ``` #### `AuthenticationError` (401) + API key is missing, invalid, or expired. ```python @@ -134,6 +143,7 @@ except atlas.AuthenticationError: ``` #### `PermissionDeniedError` (403) + Valid API key but insufficient permissions for the requested operation. ```python @@ -148,6 +158,7 @@ except atlas.PermissionDeniedError: ``` #### `NotFoundError` (404) + Requested resource (model, benchmark, evaluation) does not exist. ```python @@ -162,6 +173,7 @@ except atlas.NotFoundError: ``` #### `ConflictError` (409) + Request conflicts with current resource state. ```python @@ -176,6 +188,7 @@ except atlas.ConflictError: ``` #### `UnprocessableEntityError` (422) + Request parameters are valid but cannot be processed. ```python @@ -190,6 +203,7 @@ except atlas.UnprocessableEntityError as e: ``` #### `RateLimitError` (429) + Too many requests sent in a given time period. ```python @@ -212,6 +226,7 @@ except atlas.RateLimitError as e: ``` #### `InternalServerError` (500+) + Server-side error occurred. ```python @@ -237,30 +252,30 @@ from atlas import Atlas def robust_create_evaluation(model: str, benchmark: str, max_retries: int = 3): client = Atlas() - + for attempt in range(max_retries): try: evaluation = client.evaluations.create(model=model, benchmark=benchmark) return evaluation - + except atlas.AuthenticationError: print("❌ Authentication failed - check your API key") break # Don't retry auth errors - + except atlas.PermissionDeniedError: print("❌ Permission denied - contact your administrator") break # Don't retry permission errors - + except atlas.NotFoundError: print(f"❌ Model '{model}' or benchmark '{benchmark}' not found") break # Don't retry not found errors - + except atlas.RateLimitError as e: retry_after = e.response.headers.get('retry-after', 60) print(f"⏳ Rate limited - waiting {retry_after} seconds...") time.sleep(int(retry_after)) continue # Retry after waiting - + except atlas.InternalServerError: if attempt < max_retries - 1: wait_time = 2 ** attempt # Exponential backoff @@ -270,7 +285,7 @@ def robust_create_evaluation(model: str, benchmark: str, max_retries: int = 3): else: print("❌ Server error - max retries exceeded") break - + except atlas.APIConnectionError: if attempt < max_retries - 1: wait_time = 2 ** attempt @@ -280,11 +295,11 @@ def robust_create_evaluation(model: str, benchmark: str, max_retries: int = 3): else: print("❌ Connection failed - check your network") break - + except atlas.APIError as e: print(f"❌ Unexpected API error: {e}") break - + return None ``` @@ -296,24 +311,24 @@ from atlas import Atlas def get_evaluation_results_with_fallback(evaluation_id: str): client = Atlas() - + try: results = client.results.get(evaluation_id=evaluation_id) - + if results: return {"success": True, "data": results, "message": "Results retrieved successfully"} else: return {"success": False, "data": None, "message": "No results found"} - + except atlas.NotFoundError: return {"success": False, "data": None, "message": "Evaluation not found"} - + except atlas.AuthenticationError: return {"success": False, "data": None, "message": "Authentication required"} - + except atlas.APIConnectionError: return {"success": False, "data": None, "message": "Service temporarily unavailable"} - + except atlas.APIError as e: return {"success": False, "data": None, "message": f"Service error: {e}"} @@ -338,34 +353,34 @@ logger = logging.getLogger(__name__) def monitored_api_call(): client = Atlas() - + try: logger.info("Creating evaluation...") evaluation = client.evaluations.create(model="gpt-4", benchmark="mmlu") - + if evaluation: logger.info(f"Evaluation created successfully: {evaluation.id}") return evaluation else: logger.warning("Evaluation creation returned None") return None - + except atlas.RateLimitError as e: logger.warning(f"Rate limited - request ID: {e.request_id}") raise - + except atlas.AuthenticationError: logger.error("Authentication failed - check API key configuration") raise - + except atlas.APIConnectionError: logger.error("Network connection failed") raise - + except atlas.InternalServerError as e: logger.error(f"Server error: {e.status_code} - request ID: {e.request_id}") raise - + except atlas.APIError as e: logger.error(f"Unexpected API error: {e} - request ID: {getattr(e, 'request_id', 'N/A')}") raise @@ -437,16 +452,16 @@ def extract_error_info(error: atlas.APIError): "request_url": error.request.url if hasattr(error, 'request') else None, "request_method": error.request.method if hasattr(error, 'request') else None, } - + if hasattr(error, 'status_code'): info["status_code"] = error.status_code - + if hasattr(error, 'request_id'): info["request_id"] = error.request_id - + if hasattr(error, 'response'): info["response_headers"] = dict(error.response.headers) - + return info # Usage @@ -470,11 +485,11 @@ def test_authentication_error_handling(): """Test that authentication errors are handled properly""" with patch('atlas.Atlas') as mock_atlas: mock_atlas.side_effect = atlas.AuthenticationError( - "Invalid API key", - request=Mock(), + "Invalid API key", + request=Mock(), response=Mock() ) - + with pytest.raises(atlas.AuthenticationError): client = Atlas() client.evaluations.create(model="gpt-4", benchmark="mmlu") @@ -495,13 +510,6 @@ try: client = Atlas(api_key=None) except atlas.AtlasError as e: print(f"Configuration error: {e}") - -# Invalid organization/project -try: - client = Atlas(organization_id="invalid", project_id="invalid") - evaluation = client.evaluations.create(model="gpt-4", benchmark="mmlu") -except atlas.PermissionDeniedError: - print("Invalid organization or project ID") ``` ### Network Issues @@ -540,7 +548,7 @@ def exponential_backoff_retry(func, max_retries=3, base_delay=1): except (atlas.InternalServerError, atlas.APIConnectionError) as e: if attempt == max_retries - 1: raise - + delay = base_delay * (2 ** attempt) + random.uniform(0, 1) print(f"Attempt {attempt + 1} failed, retrying in {delay:.2f}s...") time.sleep(delay) @@ -573,14 +581,14 @@ class CircuitBreaker: self.failure_count = 0 self.last_failure_time = None self.state = CircuitState.CLOSED - + def call(self, func, *args, **kwargs): if self.state == CircuitState.OPEN: if time.time() - self.last_failure_time < self.timeout: raise atlas.APIConnectionError(message="Circuit breaker is OPEN") else: self.state = CircuitState.HALF_OPEN - + try: result = func(*args, **kwargs) self.on_success() @@ -588,11 +596,11 @@ class CircuitBreaker: except (atlas.InternalServerError, atlas.APIConnectionError) as e: self.on_failure() raise - + def on_success(self): self.failure_count = 0 self.state = CircuitState.CLOSED - + def on_failure(self): self.failure_count += 1 self.last_failure_time = time.time() @@ -605,8 +613,8 @@ client = Atlas() try: evaluation = breaker.call( - client.evaluations.create, - model="gpt-4", + client.evaluations.create, + model="gpt-4", benchmark="mmlu" ) except atlas.APIError as e: diff --git a/docs/examples/advanced-usage.md b/docs/examples/advanced-usage.md index 95695f0..d50383c 100644 --- a/docs/examples/advanced-usage.md +++ b/docs/examples/advanced-usage.md @@ -20,9 +20,8 @@ client = Atlas() ``` Required environment variables: + - `LAYERLENS_ATLAS_API_KEY` - Your Atlas API key -- `LAYERLENS_ATLAS_ORG_ID` - Your organization ID -- `LAYERLENS_ATLAS_PROJECT_ID` - Your project ID ## Pagination Best Practices @@ -36,36 +35,36 @@ from atlas import Atlas def understand_pagination(evaluation_id: str): """Understand pagination metadata""" client = Atlas() - + # Get first page results_data = client.results.get(evaluation_id=evaluation_id) - + if results_data: pagination = results_data.pagination - + print(f" Pagination Overview:") print(f" Total results: {pagination.total_count:,}") print(f" Page size: {pagination.page_size}") print(f" Total pages: {pagination.total_pages}") print(f" Current page has: {len(results_data.results)} results") - + # Calculate some useful info is_paginated = pagination.total_pages > 1 results_per_page = pagination.page_size last_page_size = pagination.total_count % pagination.page_size or pagination.page_size - + print(f"\n Analysis:") print(f" Is paginated: {is_paginated}") print(f" Results per page: {results_per_page}") print(f" Last page size: {last_page_size}") - + if is_paginated: print(f"\n To access all {pagination.total_count:,} results:") print(f" - Iterate through {pagination.total_pages} pages") print(f" - Or use batch processing patterns") - + return pagination - + return None # Usage @@ -79,52 +78,52 @@ def efficient_pagination_strategies(): """Demonstrate different pagination approaches""" client = Atlas() evaluation_id = "eval_12345" - + # Strategy 1: Small pages for real-time processing print(" Strategy 1: Small pages for real-time feedback") page_size = 25 page = 1 - + while True: results_data = client.results.get( evaluation_id=evaluation_id, page=page, page_size=page_size ) - + if not results_data or not results_data.results: break - + print(f" Processing page {page}: {len(results_data.results)} results") - + # Process immediately for result in results_data.results: # Real-time processing logic pass - + if page >= results_data.pagination.total_pages: break page += 1 - + print("\n Strategy 2: Large pages for batch processing") page_size = 200 # Larger pages page = 1 - + while True: results_data = client.results.get( evaluation_id=evaluation_id, page=page, page_size=page_size ) - + if not results_data or not results_data.results: break - + print(f" Batch processing page {page}: {len(results_data.results)} results") - + # Batch process entire page process_batch(results_data.results) - + if page >= results_data.pagination.total_pages: break page += 1 @@ -150,23 +149,23 @@ import atlas def run_evaluation_batch(models, benchmarks): """Run evaluations for multiple model-benchmark combinations""" client = Atlas() - + results = {'successful': [], 'failed': []} - + for model in models: for benchmark in benchmarks: print(f"Creating evaluation: {model} on {benchmark}") - + try: evaluation = client.evaluations.create( model=model, benchmark=benchmark ) - + if evaluation: results['successful'].append({ 'model': model, - 'benchmark': benchmark, + 'benchmark': benchmark, 'evaluation_id': evaluation.id }) print(f" Created: {evaluation.id}") @@ -176,21 +175,21 @@ def run_evaluation_batch(models, benchmarks): 'benchmark': benchmark, 'error': 'No evaluation returned' }) - + except atlas.RateLimitError: print("Rate limited, waiting 60 seconds...") time.sleep(60) - + except atlas.APIError as e: print(f" Failed: {e}") results['failed'].append({ 'model': model, - 'benchmark': benchmark, + 'benchmark': benchmark, 'error': str(e) }) - + time.sleep(2) - + return results # Usage @@ -214,18 +213,18 @@ import atlas def create_evaluation_with_retries(model, benchmark, max_retries=3): """Create evaluation with automatic retries""" client = Atlas() - + for attempt in range(max_retries): try: evaluation = client.evaluations.create( model=model, benchmark=benchmark ) - + if evaluation: print(f" Success on attempt {attempt + 1}") return evaluation - + except atlas.RateLimitError as e: print(f"Rate limited on attempt {attempt + 1}") if attempt < max_retries - 1: @@ -236,22 +235,22 @@ def create_evaluation_with_retries(model, benchmark, max_retries=3): time.sleep(wait_time) else: raise - + except atlas.NotFoundError: print(f" Model '{model}' or benchmark '{benchmark}' not found") return None - + except atlas.AuthenticationError: print(" Authentication failed - check your API key") raise - + except atlas.APIError as e: print(f" API error on attempt {attempt + 1}: {e}") if attempt < max_retries - 1: time.sleep(2 ** attempt) # Exponential backoff else: raise - + return None # Usage @@ -270,13 +269,13 @@ from typing import Dict, List def analyze_evaluation_results(evaluation_id: str) -> Dict: """Analyze results from an evaluation""" client = Atlas() - + try: results = client.results.get(evaluation_id=evaluation_id) - + if not results: return {"error": "No results found"} - + # Basic analysis analysis = { "total_results": len(results), @@ -284,10 +283,10 @@ def analyze_evaluation_results(evaluation_id: str) -> Dict: "overall_accuracy": 0, "avg_duration": 0 } - + total_score = 0 total_duration = 0 - + for result in results: # Track by subset if result.subset not in analysis["subsets"]: @@ -296,23 +295,23 @@ def analyze_evaluation_results(evaluation_id: str) -> Dict: "total_score": 0, "accuracy": 0 } - + analysis["subsets"][result.subset]["count"] += 1 analysis["subsets"][result.subset]["total_score"] += result.score - + total_score += result.score total_duration += result.duration.total_seconds() - + # Calculate averages analysis["overall_accuracy"] = total_score / len(results) analysis["avg_duration"] = total_duration / len(results) - + # Calculate subset accuracies for subset_data in analysis["subsets"].values(): subset_data["accuracy"] = subset_data["total_score"] / subset_data["count"] - + return analysis - + except atlas.APIError as e: return {"error": str(e)} @@ -323,7 +322,7 @@ if "error" not in analysis: print(f" Total results: {analysis['total_results']}") print(f" Overall accuracy: {analysis['overall_accuracy']:.2%}") print(f" Average duration: {analysis['avg_duration']:.2f}s") - + print(f" By subset:") for subset, data in analysis['subsets'].items(): print(f" {subset}: {data['accuracy']:.2%} ({data['count']} results)") @@ -344,7 +343,7 @@ dev_client = Atlas(timeout=30.0) # 30 seconds # Production: More patient prod_client = Atlas(timeout=600.0) # 10 minutes -# Long-running batch jobs: Very patient +# Long-running batch jobs: Very patient batch_client = Atlas(timeout=1800.0) # 30 minutes def adaptive_timeout_client(operation_type="default"): @@ -355,7 +354,7 @@ def adaptive_timeout_client(operation_type="default"): "batch": 1800.0, # For batch processing "patient": 3600.0 # For very long evaluations } - + timeout = timeouts.get(operation_type, timeouts["default"]) return Atlas(timeout=timeout) @@ -384,18 +383,18 @@ logger = logging.getLogger('atlas-client') def create_evaluation_with_logging(model, benchmark): """Create evaluation with comprehensive logging""" client = Atlas() - + logger.info(f"Creating evaluation: {model} on {benchmark}") start_time = time.time() - + try: evaluation = client.evaluations.create( model=model, benchmark=benchmark ) - + duration = time.time() - start_time - + if evaluation: logger.info( f"Evaluation created successfully: {evaluation.id} " @@ -408,7 +407,7 @@ def create_evaluation_with_logging(model, benchmark): f"(duration: {duration:.2f}s)" ) return None - + except atlas.APIError as e: duration = time.time() - start_time logger.error( @@ -433,11 +432,11 @@ def check_atlas_health(): """Simple health check for Atlas service""" try: client = Atlas(timeout=10.0) # Short timeout for health check - + # Try to create a test evaluation (will fail but tests connectivity) try: client.evaluations.create( - model="__health_check__", + model="__health_check__", benchmark="__health_check__" ) except atlas.NotFoundError: @@ -446,10 +445,10 @@ def check_atlas_health(): except atlas.BadRequestError: # Also expected - invalid parameters return {"status": "healthy", "message": "API is reachable"} - + except atlas.AuthenticationError: return { - "status": "unhealthy", + "status": "unhealthy", "error": "Authentication failed - check API key" } except atlas.APIConnectionError: @@ -459,7 +458,7 @@ def check_atlas_health(): } except atlas.APITimeoutError: return { - "status": "unhealthy", + "status": "unhealthy", "error": "Health check timed out" } except Exception as e: @@ -504,17 +503,17 @@ def create_evaluation(): data = request.get_json() model = data.get('model') benchmark = data.get('benchmark') - + if not model or not benchmark: return jsonify({ "error": "Missing required fields: model, benchmark" }), 400 - + evaluation = atlas_client.evaluations.create( model=model, benchmark=benchmark ) - + if evaluation: return jsonify({ "success": True, @@ -526,16 +525,16 @@ def create_evaluation(): "success": False, "error": "Failed to create evaluation" }), 500 - + except atlas.NotFoundError: return jsonify({ "success": False, "error": "Model or benchmark not found" }), 404 - + except atlas.APIError as e: return jsonify({ - "success": False, + "success": False, "error": str(e) }), 500 @@ -544,7 +543,7 @@ def get_results(evaluation_id): """Get evaluation results endpoint""" try: results = atlas_client.results.get(evaluation_id=evaluation_id) - + if results: return jsonify({ "success": True, @@ -563,7 +562,7 @@ def get_results(evaluation_id): "success": False, "error": "No results found" }), 404 - + except atlas.APIError as e: return jsonify({ "success": False, diff --git a/docs/examples/creating-evaluations.md b/docs/examples/creating-evaluations.md index 37da6bf..78e2900 100644 --- a/docs/examples/creating-evaluations.md +++ b/docs/examples/creating-evaluations.md @@ -37,11 +37,7 @@ Using explicit client configuration instead of environment variables: from atlas import Atlas # Explicit configuration -client = Atlas( - api_key="your_api_key_here", - organization_id="your_org_id", - project_id="your_project_id" -) +client = Atlas(api_key="your_api_key_here") evaluation = client.evaluations.create( model="claude-3-opus", @@ -67,16 +63,16 @@ def compare_models_on_benchmark(models: list, benchmark: str): """Create evaluations for multiple models on the same benchmark""" client = Atlas() evaluations = [] - + print(f"🔄 Creating evaluations for {len(models)} models on {benchmark}") - + for model in models: try: evaluation = client.evaluations.create( model=model, benchmark=benchmark ) - + if evaluation: evaluations.append({ "model": model, @@ -87,19 +83,19 @@ def compare_models_on_benchmark(models: list, benchmark: str): print(f"✅ {model}: {evaluation.id}") else: print(f"❌ Failed to create evaluation for {model}") - + except Exception as e: print(f"❌ Error creating evaluation for {model}: {e}") - + # Brief pause between requests to avoid rate limits time.sleep(0.5) - + return evaluations # Usage models_to_compare = [ "gpt-4", - "gpt-3.5-turbo", + "gpt-3.5-turbo", "claude-3-opus", "claude-3-sonnet", "llama-2-70b" @@ -125,16 +121,16 @@ def evaluate_model_on_benchmarks(model: str, benchmarks: list): """Evaluate a single model across multiple benchmarks""" client = Atlas() evaluations = [] - + print(f"🔄 Evaluating {model} on {len(benchmarks)} benchmarks") - + for benchmark in benchmarks: try: evaluation = client.evaluations.create( model=model, benchmark=benchmark ) - + if evaluation: evaluations.append({ "benchmark": benchmark, @@ -145,18 +141,18 @@ def evaluate_model_on_benchmarks(model: str, benchmarks: list): print(f"✅ {benchmark}: {evaluation.id}") else: print(f"❌ Failed to create evaluation for {benchmark}") - + except Exception as e: print(f"❌ Error evaluating on {benchmark}: {e}") - + time.sleep(0.5) - + return evaluations # Usage benchmarks_to_test = [ "mmlu", - "hellaswag", + "hellaswag", "arc-challenge", "truthfulqa", "gsm8k" @@ -183,22 +179,22 @@ def create_evaluation_matrix(models: list, benchmarks: list, delay: float = 1.0) client = Atlas() results = {} total_combinations = len(models) * len(benchmarks) - + print(f"🔄 Creating {total_combinations} evaluations...") - + for i, (model, benchmark) in enumerate(itertools.product(models, benchmarks), 1): print(f"\n[{i}/{total_combinations}] {model} + {benchmark}") - + try: evaluation = client.evaluations.create( model=model, benchmark=benchmark ) - + if evaluation: if model not in results: results[model] = {} - + results[model][benchmark] = { "evaluation_id": evaluation.id, "model_name": evaluation.model_name, @@ -209,7 +205,7 @@ def create_evaluation_matrix(models: list, benchmarks: list, delay: float = 1.0) print(f"✅ Success: {evaluation.id}") else: print(f"❌ Failed: No evaluation created") - + except Exception as e: print(f"❌ Error: {e}") if model not in results: @@ -218,11 +214,11 @@ def create_evaluation_matrix(models: list, benchmarks: list, delay: float = 1.0) "error": str(e), "success": False } - + # Rate limiting if i < total_combinations: time.sleep(delay) - + return results # Usage @@ -261,36 +257,36 @@ import time import random def create_evaluation_with_retry( - model: str, - benchmark: str, + model: str, + benchmark: str, max_retries: int = 3, base_delay: float = 1.0 ): """Create evaluation with exponential backoff retry logic""" client = Atlas() - + for attempt in range(max_retries): try: print(f"🔄 Attempt {attempt + 1}/{max_retries}: Creating evaluation...") - + evaluation = client.evaluations.create( model=model, benchmark=benchmark, timeout=120.0 # 2-minute timeout ) - + if evaluation: print(f"✅ Success on attempt {attempt + 1}: {evaluation.id}") return evaluation else: print(f"❌ Evaluation creation returned None on attempt {attempt + 1}") - + except atlas.RateLimitError as e: retry_after = e.response.headers.get('retry-after', base_delay * (2 ** attempt)) print(f"⏳ Rate limited, waiting {retry_after}s...") time.sleep(float(retry_after)) continue - + except atlas.InternalServerError: if attempt < max_retries - 1: delay = base_delay * (2 ** attempt) + random.uniform(0, 1) @@ -300,7 +296,7 @@ def create_evaluation_with_retry( else: print("❌ Server error - max retries exceeded") break - + except atlas.APIConnectionError: if attempt < max_retries - 1: delay = base_delay * (2 ** attempt) @@ -310,23 +306,23 @@ def create_evaluation_with_retry( else: print("❌ Connection failed - max retries exceeded") break - + except atlas.AuthenticationError: print("❌ Authentication failed - check your API key") break - + except atlas.NotFoundError: print(f"❌ Model '{model}' or benchmark '{benchmark}' not found") break - + except atlas.PermissionDeniedError: print("❌ Permission denied - check your access rights") break - + except atlas.APIError as e: print(f"❌ API error: {e}") break - + return None # Usage @@ -351,25 +347,25 @@ from atlas import Atlas def validate_and_create_evaluation(model: str, benchmark: str): """Validate model and benchmark before creating evaluation""" client = Atlas() - + # Pre-validation checks if not model or not model.strip(): print("❌ Model cannot be empty") return None - + if not benchmark or not benchmark.strip(): print("❌ Benchmark cannot be empty") return None - + print(f"🔍 Validating {model} + {benchmark}...") - + try: # Attempt to create the evaluation evaluation = client.evaluations.create( model=model.strip(), benchmark=benchmark.strip() ) - + if evaluation: print(f"✅ Validation successful!") print(f" Evaluation ID: {evaluation.id}") @@ -380,7 +376,7 @@ def validate_and_create_evaluation(model: str, benchmark: str): else: print("❌ Validation failed: No evaluation returned") return None - + except atlas.NotFoundError: print(f"❌ Validation failed: Model '{model}' or benchmark '{benchmark}' not found") print("💡 Suggestions:") @@ -388,17 +384,17 @@ def validate_and_create_evaluation(model: str, benchmark: str): print(" • Verify available options in Atlas dashboard") print(" • Ensure your organization has access to these resources") return None - + except atlas.AuthenticationError: print("❌ Authentication failed") print("💡 Check your API key configuration") return None - + except atlas.PermissionDeniedError: print("❌ Permission denied") print("💡 Contact your administrator for access") return None - + except atlas.APIError as e: print(f"❌ Validation failed: {e}") return None @@ -414,7 +410,7 @@ test_combinations = [ for model, benchmark in test_combinations: print(f"\n{'='*50}") evaluation = validate_and_create_evaluation(model, benchmark) - + if evaluation: print(f"Ready to monitor evaluation: {evaluation.id}") ``` @@ -429,13 +425,13 @@ import httpx def create_evaluations_with_custom_timeouts(): """Demonstrate different timeout configurations""" - + # Quick timeout for testing connectivity quick_client = Atlas(timeout=30.0) # 30 seconds - - # Standard timeout for regular evaluations + + # Standard timeout for regular evaluations standard_client = Atlas(timeout=300.0) # 5 minutes - + # Long timeout for complex evaluations patient_client = Atlas( timeout=httpx.Timeout( @@ -445,7 +441,7 @@ def create_evaluations_with_custom_timeouts(): pool=30.0 # 30s for connection pool ) ) - + # Test connectivity with quick client print("🔍 Testing connectivity...") try: @@ -460,7 +456,7 @@ def create_evaluations_with_custom_timeouts(): except atlas.APIError as e: print(f"❌ API error during connectivity test: {e}") return - + # Create standard evaluation print("\n🔄 Creating standard evaluation...") try: @@ -472,7 +468,7 @@ def create_evaluations_with_custom_timeouts(): print(f"✅ Standard evaluation created: {standard_eval.id}") except atlas.APITimeoutError: print("❌ Standard evaluation timed out") - + # Create complex evaluation with patient timeout print("\n🔄 Creating complex evaluation...") try: @@ -497,9 +493,9 @@ from atlas import Atlas def create_evaluation_with_override_timeout(): """Override timeout for specific requests""" client = Atlas(timeout=60.0) # Default 1-minute timeout - + evaluations = [] - + # Quick evaluation with short timeout print("🔄 Quick evaluation (30s timeout)...") try: @@ -512,7 +508,7 @@ def create_evaluation_with_override_timeout(): print(f"✅ Quick: {quick_eval.id}") except atlas.APITimeoutError: print("❌ Quick evaluation timed out") - + # Standard evaluation (uses default timeout) print("\n🔄 Standard evaluation (default 60s timeout)...") try: @@ -525,7 +521,7 @@ def create_evaluation_with_override_timeout(): print(f"✅ Standard: {standard_eval.id}") except atlas.APITimeoutError: print("❌ Standard evaluation timed out") - + # Long evaluation with extended timeout print("\n🔄 Long evaluation (5min timeout)...") try: @@ -538,7 +534,7 @@ def create_evaluation_with_override_timeout(): print(f"✅ Long: {long_eval.id}") except atlas.APITimeoutError: print("❌ Long evaluation timed out") - + return evaluations evaluations = create_evaluation_with_override_timeout() @@ -570,21 +566,21 @@ def create_evaluation_with_logging(model: str, benchmark: str, context: dict = N """Create evaluation with comprehensive logging""" client = Atlas() context = context or {} - + logger.info(f"Starting evaluation creation: {model} + {benchmark}") logger.info(f"Context: {context}") - + start_time = datetime.now() - + try: evaluation = client.evaluations.create( model=model, benchmark=benchmark ) - + end_time = datetime.now() duration = (end_time - start_time).total_seconds() - + if evaluation: logger.info(f"✅ Evaluation created successfully in {duration:.2f}s") logger.info(f" ID: {evaluation.id}") @@ -592,7 +588,7 @@ def create_evaluation_with_logging(model: str, benchmark: str, context: dict = N logger.info(f" Benchmark: {evaluation.dataset_name}") logger.info(f" Status: {evaluation.status}") logger.info(f" Submitted at: {evaluation.submitted_at}") - + return { "success": True, "evaluation": evaluation, @@ -607,23 +603,23 @@ def create_evaluation_with_logging(model: str, benchmark: str, context: dict = N "duration": duration, "timestamp": start_time.isoformat() } - + except atlas.RateLimitError as e: logger.warning(f"⏳ Rate limited - request ID: {getattr(e, 'request_id', 'N/A')}") return {"success": False, "error": "rate_limited", "retry_after": e.response.headers.get('retry-after')} - + except atlas.AuthenticationError: logger.error("❌ Authentication failed - check API key") return {"success": False, "error": "authentication_failed"} - + except atlas.NotFoundError: logger.error(f"❌ Model '{model}' or benchmark '{benchmark}' not found") return {"success": False, "error": "not_found", "model": model, "benchmark": benchmark} - + except atlas.APIError as e: logger.error(f"❌ API error: {e}") return {"success": False, "error": str(e), "error_type": type(e).__name__} - + except Exception as e: logger.error(f"❌ Unexpected error: {e}") return {"success": False, "error": f"unexpected: {e}"} @@ -639,7 +635,7 @@ results = [] for config in evaluation_configs: result = create_evaluation_with_logging(**config) results.append(result) - + if not result["success"]: logger.error(f"Failed to create evaluation: {config}") @@ -666,76 +662,76 @@ import atlas class EvaluationStrategy(ABC): """Abstract base class for evaluation strategies""" - + @abstractmethod def get_model_benchmark_pairs(self) -> List[tuple]: pass - + @abstractmethod def get_description(self) -> str: pass class GeneralIntelligenceStrategy(EvaluationStrategy): """Strategy for general intelligence assessment""" - + def get_model_benchmark_pairs(self) -> List[tuple]: models = ["gpt-4", "claude-3-opus", "llama-2-70b"] benchmarks = ["mmlu", "arc-challenge", "hellaswag"] return [(m, b) for m in models for b in benchmarks] - + def get_description(self) -> str: return "General intelligence assessment across major benchmarks" class CodeGenerationStrategy(EvaluationStrategy): """Strategy for code generation assessment""" - + def get_model_benchmark_pairs(self) -> List[tuple]: models = ["gpt-4", "code-llama-34b", "claude-3-sonnet"] benchmarks = ["humaneval", "mbpp"] return [(m, b) for m in models for b in benchmarks] - + def get_description(self) -> str: return "Code generation capability assessment" class MathReasoningStrategy(EvaluationStrategy): """Strategy for mathematical reasoning assessment""" - + def get_model_benchmark_pairs(self) -> List[tuple]: models = ["gpt-4", "claude-3-opus", "minerva-62b"] benchmarks = ["gsm8k", "math"] return [(m, b) for m in models for b in benchmarks] - + def get_description(self) -> str: return "Mathematical reasoning and problem-solving assessment" class EvaluationFactory: """Factory for creating evaluations based on strategies""" - + def __init__(self): self.client = Atlas() - + def execute_strategy(self, strategy: EvaluationStrategy) -> Dict[str, Any]: """Execute an evaluation strategy""" pairs = strategy.get_model_benchmark_pairs() description = strategy.get_description() - + print(f"🔄 Executing strategy: {description}") print(f"📊 Creating {len(pairs)} evaluations...") - + results = { "strategy": description, "evaluations": [], "errors": [], "summary": {"total": len(pairs), "successful": 0, "failed": 0} } - + for model, benchmark in pairs: try: evaluation = self.client.evaluations.create( model=model, benchmark=benchmark ) - + if evaluation: results["evaluations"].append({ "model": model, @@ -755,7 +751,7 @@ class EvaluationFactory: }) results["summary"]["failed"] += 1 print(f"❌ {model} + {benchmark}: Failed") - + except atlas.APIError as e: results["errors"].append({ "model": model, @@ -765,7 +761,7 @@ class EvaluationFactory: }) results["summary"]["failed"] += 1 print(f"❌ {model} + {benchmark}: {e}") - + return results # Usage @@ -782,7 +778,7 @@ all_results = [] for strategy in strategies: result = factory.execute_strategy(strategy) all_results.append(result) - + print(f"\n📈 Strategy Results: {result['strategy']}") print(f" Successful: {result['summary']['successful']}") print(f" Failed: {result['summary']['failed']}") diff --git a/docs/getting-started/authentication.md b/docs/getting-started/authentication.md index 5658bb4..61adcc5 100644 --- a/docs/getting-started/authentication.md +++ b/docs/getting-started/authentication.md @@ -7,7 +7,7 @@ The Atlas Python SDK uses API key authentication to securely access the LayerLen You need three pieces of information to use the Atlas SDK: 1. **API Key** - Your secret API key for authentication -2. **Organization ID** - Your organization identifier +2. **Organization ID** - Your organization identifier 3. **Project ID** - The project you want to work with ## Getting Your Credentials @@ -24,24 +24,21 @@ The most secure way to configure authentication is using environment variables: ### Setting Environment Variables **Linux/macOS:** + ```bash export LAYERLENS_ATLAS_API_KEY="your_api_key_here" -export LAYERLENS_ATLAS_ORG_ID="your_org_id_here" -export LAYERLENS_ATLAS_PROJECT_ID="your_project_id_here" ``` **Windows (Command Prompt):** + ```cmd set LAYERLENS_ATLAS_API_KEY=your_api_key_here -set LAYERLENS_ATLAS_ORG_ID=your_org_id_here -set LAYERLENS_ATLAS_PROJECT_ID=your_project_id_here ``` **Windows (PowerShell):** + ```powershell $env:LAYERLENS_ATLAS_API_KEY="your_api_key_here" -$env:LAYERLENS_ATLAS_ORG_ID="your_org_id_here" -$env:LAYERLENS_ATLAS_PROJECT_ID="your_project_id_here" ``` ### Using a `.env` File @@ -50,8 +47,6 @@ Create a `.env` file in your project root: ```bash LAYERLENS_ATLAS_API_KEY=your_api_key_here -LAYERLENS_ATLAS_ORG_ID=your_org_id_here -LAYERLENS_ATLAS_PROJECT_ID=your_project_id_here ``` Then load it in your Python code: @@ -90,11 +85,7 @@ You can also pass credentials directly to the client: ```python from atlas import Atlas -client = Atlas( - api_key="your_api_key_here", - organization_id="your_org_id_here", - project_id="your_project_id_here" -) +client = Atlas(api_key="your_api_key_here") ``` ### Mixed Configuration @@ -105,11 +96,7 @@ You can mix environment variables with explicit parameters: import os from atlas import Atlas -client = Atlas( - api_key=os.environ.get("LAYERLENS_ATLAS_API_KEY"), - organization_id="override_org_id", # Override from environment - project_id=os.environ.get("LAYERLENS_ATLAS_PROJECT_ID") -) +client = Atlas(api_key=os.environ.get("LAYERLENS_ATLAS_API_KEY")) ``` ## Advanced Configuration @@ -166,4 +153,4 @@ except atlas.AtlasError as e: ## Next Steps -Once authentication is configured, proceed to the [Quick Start Guide](quickstart.md) to make your first API call. \ No newline at end of file +Once authentication is configured, proceed to the [Quick Start Guide](quickstart.md) to make your first API call. diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md index e47aaf6..1c34cde 100644 --- a/docs/getting-started/quickstart.md +++ b/docs/getting-started/quickstart.md @@ -21,8 +21,6 @@ from atlas import Atlas # Initialize the client (uses environment variables) client = Atlas( api_key=os.environ.get("LAYERLENS_ATLAS_API_KEY"), - organization_id=os.environ.get("LAYERLENS_ATLAS_ORG_ID"), - project_id=os.environ.get("LAYERLENS_ATLAS_PROJECT_ID") ) # Create an evaluation @@ -71,10 +69,10 @@ Once your evaluation is complete, you can retrieve detailed results: # Wait for evaluation to complete, then get results if evaluation and evaluation.status == "completed": results = client.results.get(evaluation_id=evaluation.id) - + if results: print(f"📊 Retrieved {len(results)} results") - + # Examine the first result first_result = results[0] print(f"\nFirst Result:") @@ -99,44 +97,44 @@ from atlas import Atlas def main(): # Initialize client client = Atlas() - + print("🚀 Creating evaluation...") - + try: # Create evaluation evaluation = client.evaluations.create( model="gpt-3.5-turbo", benchmark="mmlu" ) - + if not evaluation: print("❌ Failed to create evaluation") return - + print(f"✅ Evaluation created: {evaluation.id}") print(f" Status: {evaluation.status}") - + # Poll for completion (in a real app, use webhooks instead) print("\n⏳ Waiting for evaluation to complete...") - + while evaluation.status not in ["completed", "failed", "cancelled"]: time.sleep(30) # Wait 30 seconds - + # In practice, you'd re-fetch the evaluation status # This is a simplified example print(f" Status: {evaluation.status}") - + if evaluation.status == "completed": print(f"🎉 Evaluation completed!") print(f" Accuracy: {evaluation.accuracy:.2%}") - + # Get detailed results results = client.results.get(evaluation_id=evaluation.id) print(f"📊 Retrieved {len(results) if results else 0} detailed results") - + else: print(f"❌ Evaluation failed with status: {evaluation.status}") - + except Exception as e: print(f"❌ Error: {e}") @@ -194,4 +192,4 @@ Now that you've successfully made your first API call: - **Documentation**: Browse the complete [API Reference](../api-reference/) - **Examples**: Check out more [Code Examples](../examples/) - **Support**: Contact LayerLens support through your dashboard for technical assistance -- **Status**: Check [status.layerlens.com](https://status.layerlens.com) for service updates \ No newline at end of file +- **Status**: Check [status.layerlens.com](https://status.layerlens.com) for service updates diff --git a/docs/security/api-key-management.md b/docs/security/api-key-management.md index ca89c19..bc55cb0 100644 --- a/docs/security/api-key-management.md +++ b/docs/security/api-key-management.md @@ -9,6 +9,7 @@ This guide covers best practices for securely managing your Atlas API keys throu API keys are sensitive credentials that provide access to your Atlas organization and projects. They should be treated with the same level of security as passwords or other authentication tokens. **Risks of compromised API keys**: + - Unauthorized access to your evaluations and data - Unintended usage charges on your account - Potential data breaches or intellectual property theft @@ -28,6 +29,7 @@ API keys are sensitive credentials that provide access to your Atlas organizatio ### Environment Variables (Recommended) **✅ Good - Using environment variables**: + ```python import os from atlas import Atlas @@ -35,47 +37,42 @@ from atlas import Atlas # Secure: Load from environment variables client = Atlas( api_key=os.getenv('LAYERLENS_ATLAS_API_KEY'), - organization_id=os.getenv('LAYERLENS_ATLAS_ORG_ID'), - project_id=os.getenv('LAYERLENS_ATLAS_PROJECT_ID') ) ``` + ### Setting Environment Variables Securely **Linux/macOS**: + ```bash # Add to your shell profile (.bashrc, .zshrc, etc.) export LAYERLENS_ATLAS_API_KEY="sk-your-key-here" -export LAYERLENS_ATLAS_ORG_ID="org-your-org-here" -export LAYERLENS_ATLAS_PROJECT_ID="proj-your-project-here" # Reload your shell configuration source ~/.bashrc # or ~/.zshrc ``` **Windows**: + ```cmd # Command Prompt (persistent) setx LAYERLENS_ATLAS_API_KEY "sk-your-key-here" -setx LAYERLENS_ATLAS_ORG_ID "org-your-org-here" -setx LAYERLENS_ATLAS_PROJECT_ID "proj-your-project-here" # PowerShell (session-only) $env:LAYERLENS_ATLAS_API_KEY="sk-your-key-here" -$env:LAYERLENS_ATLAS_ORG_ID="org-your-org-here" -$env:LAYERLENS_ATLAS_PROJECT_ID="proj-your-project-here" ``` ### Using .env Files **Create a .env file** (never commit this to version control): + ```bash # .env LAYERLENS_ATLAS_API_KEY=sk-your-key-here -LAYERLENS_ATLAS_ORG_ID=org-your-org-here -LAYERLENS_ATLAS_PROJECT_ID=proj-your-project-here ``` **Load .env file in Python**: + ```python from dotenv import load_dotenv import os @@ -90,6 +87,7 @@ client = Atlas() ``` **Important**: Add `.env` to your `.gitignore` file: + ```bash # .gitignore .env @@ -103,6 +101,7 @@ client = Atlas() #### Using External Secret Managers **AWS Secrets Manager**: + ```python import boto3 import json @@ -112,15 +111,13 @@ def get_atlas_credentials_from_aws(): """Retrieve Atlas credentials from AWS Secrets Manager""" session = boto3.session.Session() client = session.client('secretsmanager', region_name='us-east-1') - + try: response = client.get_secret_value(SecretId='layerlens/atlas/credentials') secrets = json.loads(response['SecretString']) - + return { 'api_key': secrets['api_key'], - 'organization_id': secrets['organization_id'], - 'project_id': secrets['project_id'] } except Exception as e: print(f"Error retrieving secrets: {e}") @@ -131,6 +128,7 @@ credentials = get_atlas_credentials_from_aws() if credentials: client = Atlas(**credentials) ``` + ## Environment-Specific Key Management ### Separating Development and Production Keys @@ -144,25 +142,19 @@ from atlas import Atlas def get_atlas_client(): """Get Atlas client based on environment""" environment = os.getenv('ATLAS_ENV', 'development') - + if environment == 'development': return Atlas( api_key=os.getenv('DEV_ATLAS_API_KEY'), - organization_id=os.getenv('DEV_ATLAS_ORG_ID'), - project_id=os.getenv('DEV_ATLAS_PROJECT_ID'), base_url=os.getenv('DEV_ATLAS_BASE_URL') # Dev server if applicable ) elif environment == 'staging': return Atlas( api_key=os.getenv('STAGING_ATLAS_API_KEY'), - organization_id=os.getenv('STAGING_ATLAS_ORG_ID'), - project_id=os.getenv('STAGING_ATLAS_PROJECT_ID') ) elif environment == 'production': return Atlas( api_key=os.getenv('PROD_ATLAS_API_KEY'), - organization_id=os.getenv('PROD_ATLAS_ORG_ID'), - project_id=os.getenv('PROD_ATLAS_PROJECT_ID') ) else: raise ValueError(f"Unknown environment: {environment}") @@ -172,48 +164,39 @@ client = get_atlas_client() ``` **Environment-specific .env files**: + ```bash # .env.development DEV_ATLAS_API_KEY=sk-dev-key-here -DEV_ATLAS_ORG_ID=dev-org-id -DEV_ATLAS_PROJECT_ID=dev-project-id DEV_ATLAS_BASE_URL=https://dev-api.layerlens.com # .env.production PROD_ATLAS_API_KEY=sk-prod-key-here -PROD_ATLAS_ORG_ID=prod-org-id -PROD_ATLAS_PROJECT_ID=prod-project-id ``` ### Container and Deployment Security **Docker Secrets**: + ```yaml # docker-compose.yml -version: '3.8' +version: "3.8" services: atlas-app: image: your-app:latest secrets: - atlas_api_key - - atlas_org_id - - atlas_project_id environment: - LAYERLENS_ATLAS_API_KEY_FILE=/run/secrets/atlas_api_key - - LAYERLENS_ATLAS_ORG_ID_FILE=/run/secrets/atlas_org_id - - LAYERLENS_ATLAS_PROJECT_ID_FILE=/run/secrets/atlas_project_id secrets: atlas_api_key: file: ./secrets/atlas_api_key.txt - atlas_org_id: - file: ./secrets/atlas_org_id.txt - atlas_project_id: - file: ./secrets/atlas_project_id.txt ``` **Reading Docker secrets in Python**: + ```python import os from atlas import Atlas @@ -230,29 +213,15 @@ def read_docker_secret(secret_name): def get_atlas_client_from_docker_secrets(): """Initialize Atlas client using Docker secrets""" # Try Docker secrets first, fall back to environment variables - api_key = (read_docker_secret('atlas_api_key') or + api_key = (read_docker_secret('atlas_api_key') or os.getenv('LAYERLENS_ATLAS_API_KEY')) - - org_id = (read_docker_secret('atlas_org_id') or - os.getenv('LAYERLENS_ATLAS_ORG_ID')) - - project_id = (read_docker_secret('atlas_project_id') or - os.getenv('LAYERLENS_ATLAS_PROJECT_ID')) - - if not all([api_key, org_id, project_id]): - raise ValueError("Missing required Atlas credentials") - - return Atlas( - api_key=api_key, - organization_id=org_id, - project_id=project_id - ) + + return Atlas(api_key=api_key) # Usage client = get_atlas_client_from_docker_secrets() ``` - ## Security Checklist ### Development Security Checklist diff --git a/docs/security/environment-variables.md b/docs/security/environment-variables.md index 437864e..b7b6f5e 100644 --- a/docs/security/environment-variables.md +++ b/docs/security/environment-variables.md @@ -10,61 +10,51 @@ Environment variables provide a secure way to configure your Atlas SDK without h The Atlas SDK uses these primary environment variables: -| Variable | Description | Required | Example | -|----------|-------------|----------|---------| -| `LAYERLENS_ATLAS_API_KEY` | Your Atlas API key | Yes | `sk-abc123...` | -| `LAYERLENS_ATLAS_ORG_ID` | Organization identifier | Yes | `org-abc123` | -| `LAYERLENS_ATLAS_PROJECT_ID` | Project identifier | Yes | `proj-xyz789` | +| Variable | Description | Required | Example | +| ------------------------- | ------------------ | -------- | -------------- | +| `LAYERLENS_ATLAS_API_KEY` | Your Atlas API key | Yes | `sk-abc123...` | ## Setting Environment Variables ### Development Environment **Linux/macOS (Bash/Zsh)**: + ```bash # Set for current session export LAYERLENS_ATLAS_API_KEY="sk-your-key-here" -export LAYERLENS_ATLAS_ORG_ID="org-your-org-here" -export LAYERLENS_ATLAS_PROJECT_ID="proj-your-project-here" # Add to shell profile for persistence (.bashrc, .zshrc, etc.) echo 'export LAYERLENS_ATLAS_API_KEY="sk-your-key-here"' >> ~/.bashrc -echo 'export LAYERLENS_ATLAS_ORG_ID="org-your-org-here"' >> ~/.bashrc -echo 'export LAYERLENS_ATLAS_PROJECT_ID="proj-your-project-here"' >> ~/.bashrc # Reload shell configuration source ~/.bashrc ``` **Windows Command Prompt**: + ```cmd # Set for current session set LAYERLENS_ATLAS_API_KEY=sk-your-key-here -set LAYERLENS_ATLAS_ORG_ID=org-your-org-here -set LAYERLENS_ATLAS_PROJECT_ID=proj-your-project-here # Set permanently (requires admin rights) setx LAYERLENS_ATLAS_API_KEY "sk-your-key-here" -setx LAYERLENS_ATLAS_ORG_ID "org-your-org-here" -setx LAYERLENS_ATLAS_PROJECT_ID "proj-your-project-here" ``` **Windows PowerShell**: + ```powershell # Set for current session $env:LAYERLENS_ATLAS_API_KEY="sk-your-key-here" -$env:LAYERLENS_ATLAS_ORG_ID="org-your-org-here" -$env:LAYERLENS_ATLAS_PROJECT_ID="proj-your-project-here" # Set permanently for current user [Environment]::SetEnvironmentVariable("LAYERLENS_ATLAS_API_KEY", "sk-your-key-here", "User") -[Environment]::SetEnvironmentVariable("LAYERLENS_ATLAS_ORG_ID", "org-your-org-here", "User") -[Environment]::SetEnvironmentVariable("LAYERLENS_ATLAS_PROJECT_ID", "proj-your-project-here", "User") ``` ### Verification **Check if variables are set correctly**: + ```python import os @@ -72,17 +62,15 @@ def verify_atlas_environment(): """Verify Atlas environment variables are configured""" required_vars = { 'LAYERLENS_ATLAS_API_KEY': 'API Key', - 'LAYERLENS_ATLAS_ORG_ID': 'Organization ID', - 'LAYERLENS_ATLAS_PROJECT_ID': 'Project ID' } - + print("🔍 Atlas Environment Variable Check") print("=" * 40) - + all_set = True for var_name, description in required_vars.items(): value = os.getenv(var_name) - + if value: # Don't print the full value for security masked_value = f"{value[:8]}..." if len(value) > 8 else "***" @@ -90,13 +78,13 @@ def verify_atlas_environment(): else: print(f"❌ {description}: Not set") all_set = False - - + + if all_set: print(f"\n🎉 All required variables are set!") else: print(f"\n⚠️ Some required variables are missing") - + return all_set # Run verification @@ -108,17 +96,17 @@ verify_atlas_environment() ### Creating .env Files **.env file for development**: + ```bash # .env LAYERLENS_ATLAS_API_KEY=sk-development-key-here -LAYERLENS_ATLAS_ORG_ID=org-dev-12345 -LAYERLENS_ATLAS_PROJECT_ID=proj-dev-67890 # Optional: Set environment name ATLAS_ENV=development ``` **Loading .env files in Python**: + ```python from dotenv import load_dotenv import os @@ -147,27 +135,25 @@ except Exception as e: **Create separate files for each environment**: **.env.development**: + ```bash LAYERLENS_ATLAS_API_KEY=sk-dev-key-here -LAYERLENS_ATLAS_ORG_ID=org-dev-12345 -LAYERLENS_ATLAS_PROJECT_ID=proj-dev-67890 ``` **.env.staging**: + ```bash LAYERLENS_ATLAS_API_KEY=sk-staging-key-here -LAYERLENS_ATLAS_ORG_ID=org-staging-12345 -LAYERLENS_ATLAS_PROJECT_ID=proj-staging-67890 ``` **.env.production**: + ```bash LAYERLENS_ATLAS_API_KEY=sk-prod-key-here -LAYERLENS_ATLAS_ORG_ID=org-prod-12345 -LAYERLENS_ATLAS_PROJECT_ID=proj-prod-67890 ``` **Load environment-specific configuration**: + ```python import os from dotenv import load_dotenv @@ -177,10 +163,10 @@ def load_environment_config(): """Load environment-specific configuration""" # Determine environment env = os.getenv('ATLAS_ENV', 'development') - + # Load base .env file first load_dotenv('.env') - + # Override with environment-specific file env_file = f'.env.{env}' if os.path.exists(env_file): @@ -188,21 +174,21 @@ def load_environment_config(): print(f"📄 Loaded configuration from {env_file}") else: print(f"⚠️ Environment file {env_file} not found, using base configuration") - + return env def get_atlas_client(): """Get Atlas client with environment-specific configuration""" env = load_environment_config() - + # Create client with loaded environment variables client = Atlas() - + # Log configuration (without sensitive data) print(f"🌍 Environment: {env}") print(f"🔗 Base URL: {client.base_url}") print(f"⏱️ Timeout: {client.timeout}s") - + return client # Usage diff --git a/docs/troubleshooting/authentication.md b/docs/troubleshooting/authentication.md index fbf3ee6..958ca27 100644 --- a/docs/troubleshooting/authentication.md +++ b/docs/troubleshooting/authentication.md @@ -17,11 +17,11 @@ The Atlas SDK uses API key-based authentication with three required components: **Error**: `AuthenticationError: Invalid API key` **Symptoms**: + - 401 Unauthorized responses - "Invalid API key" error messages - Authentication fails immediately - ### Missing Required Configuration **Error**: `AtlasError: The api_key client option must be set either by passing api_key to the client or by setting the LAYERLENS_ATLAS_API_KEY environment variable` @@ -29,46 +29,39 @@ The Atlas SDK uses API key-based authentication with three required components: **Solutions**: 1. **Check all required environment variables**: + ```bash # Linux/macOS echo $LAYERLENS_ATLAS_API_KEY - echo $LAYERLENS_ATLAS_ORG_ID - echo $LAYERLENS_ATLAS_PROJECT_ID - + # Windows echo %LAYERLENS_ATLAS_API_KEY% - echo %LAYERLENS_ATLAS_ORG_ID% - echo %LAYERLENS_ATLAS_PROJECT_ID% ``` 2. **Set environment variables properly**: + ```bash # Linux/macOS - in your shell profile (.bashrc, .zshrc, etc.) export LAYERLENS_ATLAS_API_KEY="sk-..." - export LAYERLENS_ATLAS_ORG_ID="org-..." - export LAYERLENS_ATLAS_PROJECT_ID="proj-..." - + # Windows - persistently setx LAYERLENS_ATLAS_API_KEY "sk-..." - setx LAYERLENS_ATLAS_ORG_ID "org-..." - setx LAYERLENS_ATLAS_PROJECT_ID "proj-..." ``` 3. **Use .env file**: + ```bash # Create .env file in your project root LAYERLENS_ATLAS_API_KEY=sk-your-key-here - LAYERLENS_ATLAS_ORG_ID=org-your-org-here - LAYERLENS_ATLAS_PROJECT_ID=proj-your-project-here ``` - + ```python # Load .env file in your Python code from dotenv import load_dotenv import os - + load_dotenv() - + from atlas import Atlas client = Atlas() ``` @@ -78,20 +71,22 @@ The Atlas SDK uses API key-based authentication with three required components: **Error**: `PermissionDeniedError: 403 Forbidden` **Symptoms**: + - Valid API key but still get 403 errors - Can authenticate but cannot create evaluations - Access denied to specific models or benchmarks **Diagnosis**: + ```python import atlas from atlas import Atlas def diagnose_permissions(): client = Atlas() - + print("🔍 Permission Diagnosis:") - + # Test basic access try: # This should fail with specific error types @@ -108,16 +103,16 @@ def diagnose_permissions(): print(" ✅ Authentication works (model/benchmark not found is normal)") except Exception as e: print(f" ❓ Unexpected error: {e}") - + # Test with common models/benchmarks test_combinations = [ ("gpt-3.5-turbo", "mmlu"), ("gpt-4", "hellaswag"), ("claude-3-sonnet", "arc-challenge") ] - + print("\n Testing access to specific resources:") - + for model, benchmark in test_combinations: try: evaluation = client.evaluations.create(model=model, benchmark=benchmark) @@ -138,11 +133,13 @@ diagnose_permissions() **Problem**: Valid API key but wrong organization or project **Symptoms**: + - Authentication succeeds - Cannot access expected models or benchmarks - Permission errors for resources you should have access to **Diagnosis**: + ```python import os from atlas import Atlas @@ -151,28 +148,15 @@ import atlas def verify_org_project_access(): # Test with different org/project combinations api_key = os.getenv('LAYERLENS_ATLAS_API_KEY') - + if not api_key: print("❌ No API key found") return - - # Test current configuration - current_org = os.getenv('LAYERLENS_ATLAS_ORG_ID') - current_project = os.getenv('LAYERLENS_ATLAS_PROJECT_ID') - - print(f"Testing current configuration:") - print(f" Organization: {current_org}") - print(f" Project: {current_project}") - + try: - client = Atlas( - api_key=api_key, - organization_id=current_org, - project_id=current_project - ) - + client = Atlas(api_key=api_key) evaluation = client.evaluations.create(model="test", benchmark="test") - + except atlas.AuthenticationError: print(" ❌ Authentication failed") except atlas.PermissionDeniedError: @@ -183,4 +167,4 @@ def verify_org_project_access(): print(f" ❓ Error: {e}") verify_org_project_access() -``` \ No newline at end of file +``` diff --git a/docs/troubleshooting/common-issues.md b/docs/troubleshooting/common-issues.md index 1df848e..558a315 100644 --- a/docs/troubleshooting/common-issues.md +++ b/docs/troubleshooting/common-issues.md @@ -11,12 +11,14 @@ This guide covers the most frequently encountered issues when using the Atlas Py **Solutions**: 1. **Check Python version compatibility**: + ```bash python --version # Atlas requires Python 3.8+ ``` 2. **Update pip and try again**: + ```bash python -m pip install --upgrade pip pip install atlas @@ -34,53 +36,45 @@ This guide covers the most frequently encountered issues when using the Atlas Py **Problem**: `AtlasError: The api_key client option must be set` **Diagnosis**: + ```python import os print(f"API Key: {os.getenv('LAYERLENS_ATLAS_API_KEY', 'NOT SET')}") -print(f"Org ID: {os.getenv('LAYERLENS_ATLAS_ORG_ID', 'NOT SET')}") -print(f"Project ID: {os.getenv('LAYERLENS_ATLAS_PROJECT_ID', 'NOT SET')}") ``` **Solutions**: 1. **Set environment variables**: + ```bash # Linux/macOS export LAYERLENS_ATLAS_API_KEY="your_api_key_here" - export LAYERLENS_ATLAS_ORG_ID="your_org_id_here" - export LAYERLENS_ATLAS_PROJECT_ID="your_project_id_here" - + # Windows set LAYERLENS_ATLAS_API_KEY=your_api_key_here - set LAYERLENS_ATLAS_ORG_ID=your_org_id_here - set LAYERLENS_ATLAS_PROJECT_ID=your_project_id_here ``` 2. **Use .env file**: + ```bash # Create .env file LAYERLENS_ATLAS_API_KEY=your_api_key_here - LAYERLENS_ATLAS_ORG_ID=your_org_id_here - LAYERLENS_ATLAS_PROJECT_ID=your_project_id_here ``` - + ```python from dotenv import load_dotenv load_dotenv() - + from atlas import Atlas client = Atlas() ``` 3. **Pass explicitly to client**: + ```python from atlas import Atlas - - client = Atlas( - api_key="your_api_key_here", - organization_id="your_org_id_here", - project_id="your_project_id_here" - ) + + client = Atlas(api_key="your_api_key_here") ``` ### Where to Get Help @@ -96,17 +90,19 @@ Include this information when reporting issues: 1. **Environment details** (from debug info above) 2. **Complete error message** with stack trace 3. **Minimal reproducible example**: + ```python from atlas import Atlas - + client = Atlas() - + # Minimal code that demonstrates the problem evaluation = client.evaluations.create( model="gpt-4", benchmark="mmlu" ) ``` + 4. **Expected vs actual behavior** 5. **Steps to reproduce** 6. **Workarounds attempted** diff --git a/docs/troubleshooting/error-codes.md b/docs/troubleshooting/error-codes.md index 227b16f..fc27dc0 100644 --- a/docs/troubleshooting/error-codes.md +++ b/docs/troubleshooting/error-codes.md @@ -26,11 +26,13 @@ AtlasError (Base exception) ### 400 - Bad Request (`BadRequestError`) **When it occurs**: + - Invalid request parameters - Missing required fields - Malformed request data **Common causes**: + ```python # Empty or invalid parameters client.evaluations.create(model="", benchmark="") # Empty strings @@ -41,6 +43,7 @@ client.evaluations.create(model=123, benchmark="mmlu") # Wrong type ``` **Example error**: + ```python import atlas from atlas import Atlas @@ -55,7 +58,9 @@ except atlas.BadRequestError as e: ``` **Solutions**: + 1. **Validate parameters before making requests**: + ```python def validate_evaluation_params(model, benchmark): if not model or not isinstance(model, str): @@ -63,17 +68,18 @@ except atlas.BadRequestError as e: if not benchmark or not isinstance(benchmark, str): raise ValueError("Benchmark must be a non-empty string") return True - + if validate_evaluation_params(model, benchmark): evaluation = client.evaluations.create(model=model, benchmark=benchmark) ``` 2. **Check parameter format requirements**: + ```python # Ensure parameters meet expected format model = model.strip() if model else "" benchmark = benchmark.strip() if benchmark else "" - + if len(model) < 2 or len(benchmark) < 2: raise ValueError("Model and benchmark names must be at least 2 characters") ``` @@ -81,11 +87,13 @@ except atlas.BadRequestError as e: ### 401 - Unauthorized (`AuthenticationError`) **When it occurs**: + - Missing API key - Invalid or expired API key - API key format issues **Common causes**: + ```python # Missing API key client = Atlas(api_key=None) @@ -98,6 +106,7 @@ client = Atlas(api_key="sk-old-expired-key") ``` **Example error**: + ```python import atlas from atlas import Atlas @@ -112,10 +121,12 @@ except atlas.AuthenticationError as e: ``` **Solutions**: + 1. **Verify API key configuration**: + ```python import os - + api_key = os.getenv('LAYERLENS_ATLAS_API_KEY') if not api_key: print("❌ API key not found in environment variables") @@ -126,12 +137,14 @@ except atlas.AuthenticationError as e: ``` 2. **Regenerate API key**: + - Log into Atlas dashboard - Go to Settings > API Keys - Generate new API key - Update environment variables 3. **Test authentication separately**: + ```python def test_authentication(api_key): try: @@ -144,7 +157,7 @@ except atlas.AuthenticationError as e: return True, "Authentication successful (test resources not found is expected)" except Exception as e: return False, f"Unexpected error: {e}" - + is_valid, message = test_authentication(your_api_key) print(f"Authentication test: {message}") ``` @@ -152,11 +165,13 @@ except atlas.AuthenticationError as e: ### 403 - Forbidden (`PermissionDeniedError`) **When it occurs**: + - Valid API key but insufficient permissions - No access to specific models or benchmarks - Organization/project access issues **Example error**: + ```python import atlas from atlas import Atlas @@ -171,22 +186,14 @@ except atlas.PermissionDeniedError as e: ``` **Solutions**: -1. **Check organization and project IDs**: - ```python - import os - - print(f"Organization ID: {os.getenv('LAYERLENS_ATLAS_ORG_ID')}") - print(f"Project ID: {os.getenv('LAYERLENS_ATLAS_PROJECT_ID')}") - - # Verify these match your Atlas dashboard settings - ``` -2. **Test access to different resources**: +1. **Test access to different resources**: + ```python def test_resource_access(models, benchmarks): client = Atlas() access_matrix = {} - + for model in models: access_matrix[model] = {} for benchmark in benchmarks: @@ -199,17 +206,17 @@ except atlas.PermissionDeniedError as e: access_matrix[model][benchmark] = "❓ Resource not found" except Exception as e: access_matrix[model][benchmark] = f"❓ {type(e).__name__}" - + return access_matrix - + # Test common resources models = ["gpt-3.5-turbo", "gpt-4", "claude-3-sonnet"] benchmarks = ["mmlu", "hellaswag", "arc-easy"] - + access = test_resource_access(models, benchmarks) ``` -3. **Contact administrator for access**: +2. **Contact administrator for access**: - Request access to specific models or benchmarks - Verify project membership - Check organization-level permissions @@ -217,12 +224,14 @@ except atlas.PermissionDeniedError as e: ### 404 - Not Found (`NotFoundError`) **When it occurs**: + - Model ID doesn't exist - Benchmark ID doesn't exist - Evaluation ID not found (for results) - Resource doesn't exist in your organization **Example error**: + ```python import atlas from atlas import Atlas @@ -236,20 +245,22 @@ except atlas.NotFoundError as e: ``` **Solutions**: + 1. **Verify resource names**: + ```python def find_available_models(): """Try common model names to find available ones""" client = Atlas() - + common_models = [ "gpt-4", "gpt-3.5-turbo", "gpt-4-turbo", "claude-3-opus", "claude-3-sonnet", "claude-3-haiku", "llama-2-70b", "llama-2-13b", "mistral-7b" ] - + available_models = [] - + for model in common_models: try: # Test with common benchmark @@ -265,19 +276,20 @@ except atlas.NotFoundError as e: except Exception: # Other errors - model might exist available_models.append(f"{model} (unknown status)") - + return available_models - + available = find_available_models() print(f"Available models: {available}") ``` 2. **Check spelling and case sensitivity**: + ```python # Common mistakes correct_names = { "GPT-4": "gpt-4", - "GPT4": "gpt-4", + "GPT4": "gpt-4", "MMLU": "mmlu", "HellaSwag": "hellaswag", "arc_challenge": "arc-challenge" # Underscore vs hyphen @@ -292,11 +304,13 @@ except atlas.NotFoundError as e: ### 409 - Conflict (`ConflictError`) **When it occurs**: + - Resource already exists - Conflicting operation in progress - State conflict (e.g., trying to modify completed evaluation) **Example error**: + ```python import atlas from atlas import Atlas @@ -311,6 +325,7 @@ except atlas.ConflictError as e: ``` **Solutions**: + 1. **Check current resource state** 2. **Wait for ongoing operations to complete** 3. **Use different resource identifiers** @@ -318,11 +333,13 @@ except atlas.ConflictError as e: ### 422 - Unprocessable Entity (`UnprocessableEntityError`) **When it occurs**: + - Valid request format but business logic prevents processing - Parameter combinations that don't make sense - Resource constraints exceeded **Example error**: + ```python import atlas from atlas import Atlas @@ -337,6 +354,7 @@ except atlas.UnprocessableEntityError as e: ``` **Solutions**: + 1. **Check business logic constraints** 2. **Verify parameter combinations are valid** 3. **Review API documentation for limitations** @@ -344,22 +362,24 @@ except atlas.UnprocessableEntityError as e: ### 429 - Rate Limited (`RateLimitError`) **When it occurs**: + - Too many requests in short time period - API rate limits exceeded - Organization-level quotas reached **Example error**: + ```python import atlas from atlas import Atlas try: client = Atlas() - + # Making too many requests quickly for i in range(100): evaluation = client.evaluations.create(model="gpt-4", benchmark="mmlu") - + except atlas.RateLimitError as e: print(f"Rate limited: {e}") print(f"Status code: {e.status_code}") # 429 @@ -367,50 +387,53 @@ except atlas.RateLimitError as e: ``` **Solutions**: + 1. **Implement retry with backoff**: + ```python import time import atlas from atlas import Atlas - + def create_evaluation_with_rate_limit_handling(model, benchmark, max_retries=3): client = Atlas() - + for attempt in range(max_retries): try: return client.evaluations.create(model=model, benchmark=benchmark) - + except atlas.RateLimitError as e: retry_after = e.response.headers.get('retry-after') - + if retry_after: wait_time = int(retry_after) print(f"Rate limited. Waiting {wait_time}s as requested...") else: wait_time = (2 ** attempt) * 60 # Exponential backoff print(f"Rate limited. Waiting {wait_time}s...") - + if attempt < max_retries - 1: time.sleep(wait_time) else: raise # Re-raise on final attempt - + return None - + evaluation = create_evaluation_with_rate_limit_handling("gpt-4", "mmlu") ``` 2. **Add delays between requests**: + ```python import time - + evaluations = [] models = ["gpt-4", "claude-3-opus", "llama-2-70b"] - + for model in models: evaluation = client.evaluations.create(model=model, benchmark="mmlu") evaluations.append(evaluation) - + # Wait between requests to avoid rate limits time.sleep(2) # 2-second delay ``` @@ -427,11 +450,13 @@ except atlas.RateLimitError as e: ### 500+ - Server Errors (`InternalServerError`) **When it occurs**: + - Atlas API server errors - Temporary service unavailability - Infrastructure issues **Example error**: + ```python import atlas from atlas import Atlas @@ -446,24 +471,26 @@ except atlas.InternalServerError as e: ``` **Solutions**: + 1. **Implement retry logic**: + ```python import time import atlas from atlas import Atlas - + def create_evaluation_with_server_error_handling(model, benchmark): client = Atlas() max_retries = 3 base_delay = 5 # seconds - + for attempt in range(max_retries): try: return client.evaluations.create(model=model, benchmark=benchmark) - + except atlas.InternalServerError as e: print(f"Server error on attempt {attempt + 1}: {e}") - + if attempt < max_retries - 1: # Exponential backoff with jitter delay = base_delay * (2 ** attempt) + random.uniform(0, 2) @@ -472,11 +499,12 @@ except atlas.InternalServerError as e: else: print(f"All {max_retries} attempts failed. Request ID: {e.request_id}") raise - + return None ``` 2. **Check service status**: + - Visit LayerLens status page - Check for ongoing incidents - Monitor Atlas service announcements @@ -491,12 +519,14 @@ except atlas.InternalServerError as e: ### `APIConnectionError` **When it occurs**: + - Network connectivity issues - DNS resolution failures - Firewall blocking requests - Proxy configuration problems **Example**: + ```python import atlas from atlas import Atlas @@ -510,7 +540,9 @@ except atlas.APIConnectionError as e: ``` **Solutions**: + 1. **Test basic connectivity**: + ```bash ping api.layerlens.com curl -I https://api.layerlens.com @@ -522,11 +554,13 @@ except atlas.APIConnectionError as e: ### `APITimeoutError` **When it occurs**: + - Request takes longer than configured timeout - Network latency issues - Server processing delays **Example**: + ```python import atlas from atlas import Atlas @@ -539,16 +573,19 @@ except atlas.APITimeoutError as e: ``` **Solutions**: + 1. **Increase timeout**: + ```python client = Atlas(timeout=600.0) # 10 minutes ``` 2. **Use appropriate timeouts for operation type**: + ```python # Quick operations quick_client = Atlas(timeout=60.0) - + # Long-running evaluations patient_client = Atlas(timeout=1800.0) # 30 minutes ``` @@ -569,62 +606,62 @@ logger = logging.getLogger(__name__) def robust_create_evaluation(model: str, benchmark: str): """Create evaluation with comprehensive error handling""" client = Atlas() - + try: evaluation = client.evaluations.create(model=model, benchmark=benchmark) - + if evaluation: logger.info(f"✅ Evaluation created: {evaluation.id}") return evaluation else: logger.warning("⚠️ Evaluation creation returned None") return None - + except atlas.BadRequestError as e: logger.error(f"❌ Bad request - check parameters: {e}") logger.error(f" Model: '{model}', Benchmark: '{benchmark}'") return None - + except atlas.AuthenticationError as e: logger.error(f"❌ Authentication failed: {e}") logger.error(" Check API key configuration") return None - + except atlas.PermissionDeniedError as e: logger.error(f"❌ Permission denied: {e}") logger.error(f" No access to model '{model}' or benchmark '{benchmark}'") return None - + except atlas.NotFoundError as e: logger.error(f"❌ Resource not found: {e}") logger.error(f" Model '{model}' or benchmark '{benchmark}' doesn't exist") return None - + except atlas.RateLimitError as e: retry_after = e.response.headers.get('retry-after', 60) logger.warning(f"⏳ Rate limited - retry after {retry_after}s") return None # Could implement retry logic here - + except atlas.InternalServerError as e: logger.error(f"❌ Server error: {e}") logger.error(f" Request ID: {e.request_id} (include in support requests)") return None - + except atlas.APITimeoutError as e: logger.error(f"⏰ Request timed out: {e}") logger.error(" Consider increasing timeout or checking network") return None - + except atlas.APIConnectionError as e: logger.error(f"🔌 Connection error: {e}") logger.error(" Check network connectivity and proxy settings") return None - + except atlas.APIError as e: logger.error(f"❌ Unexpected API error: {e}") logger.error(f" Type: {type(e).__name__}") return None - + except Exception as e: logger.error(f"❌ Unexpected error: {e}") logger.error(f" Type: {type(e).__name__}") @@ -644,10 +681,10 @@ import random class AtlasErrorRecovery: """Implement various error recovery patterns""" - + def __init__(self, client: Atlas): self.client = client - + def exponential_backoff_retry(self, operation, max_retries=3, base_delay=1): """Retry with exponential backoff""" for attempt in range(max_retries): @@ -656,18 +693,18 @@ class AtlasErrorRecovery: except (atlas.InternalServerError, atlas.APIConnectionError, atlas.APITimeoutError) as e: if attempt == max_retries - 1: raise # Last attempt - re-raise the error - + delay = base_delay * (2 ** attempt) + random.uniform(0, 1) print(f"Attempt {attempt + 1} failed: {e}") print(f"Retrying in {delay:.1f}s...") time.sleep(delay) - + def circuit_breaker(self, operation, failure_threshold=5, recovery_time=60): """Implement circuit breaker pattern""" # This would be a more complex implementation # See advanced-usage.md for full implementation pass - + def fallback_strategy(self, primary_operation, fallback_operation): """Try primary operation, fall back to alternative""" try: diff --git a/examples/demo.py b/examples/demo.py index 4b63f6b..56dc8e6 100644 --- a/examples/demo.py +++ b/examples/demo.py @@ -2,10 +2,8 @@ from atlas import Atlas -# gets API key, organization ID and project ID from environment variables: +# gets API key from environment variable: # - LAYERLENS_ATLAS_API_KEY -# - LAYERLENS_ATLAS_ORG_ID -# - LAYERLENS_ATLAS_PROJECT_ID client = Atlas() # Evaluations diff --git a/scripts/test b/scripts/test old mode 100644 new mode 100755 diff --git a/src/atlas/_client.py b/src/atlas/_client.py index dae3153..7e70403 100644 --- a/src/atlas/_client.py +++ b/src/atlas/_client.py @@ -2,7 +2,7 @@ import os from http import HTTPStatus -from typing import TYPE_CHECKING, Any, Union, Mapping +from typing import TYPE_CHECKING, Any, Union, Mapping, Optional from functools import cached_property from typing_extensions import Self, override @@ -10,12 +10,15 @@ from . import _exceptions from ._utils import is_mapping +from .models import Organization from ._constants import DEFAULT_TIMEOUT from ._exceptions import AtlasError, APIStatusError from ._base_client import BaseClient if TYPE_CHECKING: + from .resources.models import Models from .resources.results import Results + from .resources.benchmarks import Benchmarks from .resources.evaluations import Evaluations @@ -31,8 +34,6 @@ def __init__( self, *, api_key: str | None = None, - organization_id: str | None = None, - project_id: str | None = None, base_url: str | httpx.URL | None = None, timeout: Union[float, httpx.Timeout, None] = DEFAULT_TIMEOUT, ) -> None: @@ -40,8 +41,6 @@ def __init__( This automatically infers the following arguments from their corresponding environment variables if they are not provided: - `api_key` from `LAYERLENS_ATLAS_API_KEY` - - `organization_id` from `LAYERLENS_ATLAS_ORG_ID` - - `project_id` from `LAYERLENS_ATLAS_PROJECT_ID` """ if api_key is None: api_key = os.environ.get("LAYERLENS_ATLAS_API_KEY") @@ -51,30 +50,45 @@ def __init__( ) self.api_key = api_key - if organization_id is None: - organization_id = os.environ.get("LAYERLENS_ATLAS_ORG_ID") - self.organization_id = organization_id - - if project_id is None: - project_id = os.environ.get("LAYERLENS_ATLAS_PROJECT_ID") - self.project_id = project_id - if base_url is None: base_url = os.environ.get("LAYERLENS_ATLAS_BASE_URL") if base_url is None: - base_url = "https://8bg48mbhyi.execute-api.us-east-1.amazonaws.com/prod/api/v1/key" + base_url = "https://8bg48mbhyi.execute-api.us-east-1.amazonaws.com/prod/api/v1/dgklmnr" super().__init__( base_url=base_url, timeout=timeout, ) + organization = self._get_organization() + if organization is None: + raise AtlasError(f"Organization could not be fetched. Please contact LayerLens Atlas support.") + self.organization_id = organization.id + + if organization.projects is None or len(organization.projects) == 0: + raise AtlasError( + f"Organization {self.organization_id} is missing project. Please contact LayerLens Atlas support." + ) + self.project_id = organization.projects[0].id + + @cached_property + def benchmarks(self) -> Benchmarks: + from .resources.benchmarks import Benchmarks + + return Benchmarks(self) + @cached_property def evaluations(self) -> Evaluations: from .resources.evaluations import Evaluations return Evaluations(self) + @cached_property + def models(self) -> Models: + from .resources.models import Models + + return Models(self) + @cached_property def results(self) -> Results: from .resources.results import Results @@ -93,8 +107,6 @@ def copy( self, *, api_key: str | None = None, - organization_id: str | None = None, - project_id: str | None = None, base_url: str | httpx.URL | None = None, timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT, _extra_kwargs: Mapping[str, Any] = {}, @@ -104,8 +116,6 @@ def copy( """ return self.__class__( api_key=api_key or self.api_key, - organization_id=organization_id or self.organization_id, - project_id=project_id or self.project_id, base_url=base_url or self.base_url, timeout=self.timeout or timeout, **_extra_kwargs, @@ -150,5 +160,15 @@ def _make_status_error( return APIStatusError(err_msg, response=response, body=data) + def _get_organization(self) -> Optional[Organization]: + organization = super().get_cast( + f"/organizations", + timeout=30, + cast_to=Organization, + ) + if isinstance(organization, Organization): + return organization + return None + Client = Atlas diff --git a/src/atlas/_models.py b/src/atlas/_models.py deleted file mode 100644 index ac87d2b..0000000 --- a/src/atlas/_models.py +++ /dev/null @@ -1,123 +0,0 @@ -from __future__ import annotations - -from typing import Dict, List, Union, Optional -from datetime import timedelta - -from pydantic import Field, BaseModel, ConfigDict - - -class Evaluation(BaseModel): - id: str - status: str - status_description: str - submitted_at: int - finished_at: int - model_id: str - model_name: str - model_key: str - model_company: str - dataset_id: str - dataset_name: str - average_duration: int - readability_score: float - toxicity_score: float - ethics_score: float - accuracy: float - - -class Evaluations(BaseModel): - data: List[Evaluation] - - -class Result(BaseModel): - subset: str - prompt: str - result: str - truth: str - duration: timedelta - score: float - metrics: Dict[str, Optional[float]] - - -class ResultMetrics(BaseModel): - total_count: int - - -class Pagination(BaseModel): - total_count: int - page_size: int - total_pages: int - - -class Results(BaseModel): - evaluation_id: str - results: List[Result] - metrics: ResultMetrics - pagination: Pagination - - -class Model(BaseModel): - id: str - key: str - name: str - company: str - description: str - released_at: int - parameters: float - modality: str - context_length: int - architecture_type: str - license: str - open_weights: bool - region: str - deprecated: bool - - -class CustomModel(BaseModel): - id: str - key: str - name: str - description: str - max_tokens: int - api_url: str - disabled: bool - - -class Models(BaseModel): - models: List[Union[Model, CustomModel]] - - -class Benchmark(BaseModel): - id: str - key: str - name: str - full_description: str - language: str - categories: List[str] - subsets: List[str] - prompt_count: int - deprecated: bool - - -class CustomBenchmark(BaseModel): - id: str - key: str - name: str - description: str - system_prompt: Optional[str] - subsets: List[str] - prompt_count: int - version_count: int - regex_pattern: Optional[str] - llm_judge_model_id: str - custom_instructions: str - scoring_metric: Optional[str] - metrics: List[str] - files: List[str] - disabled: bool - - -class Benchmarks(BaseModel): - model_config = ConfigDict(populate_by_name=True) - - benchmarks: List[Union[Benchmark, CustomBenchmark]] = Field(..., alias="datasets") diff --git a/src/atlas/models/__init__.py b/src/atlas/models/__init__.py new file mode 100644 index 0000000..d596ffb --- /dev/null +++ b/src/atlas/models/__init__.py @@ -0,0 +1,25 @@ +from .api import Models, Results, Benchmarks, Pagination, Evaluations, ResultMetrics +from .model import Model, CustomModel, PublicModel +from .benchmark import Benchmark, CustomBenchmark, PublicBenchmark +from .evaluation import Result, Evaluation, EvaluationStatus +from .organization import Project, Organization + +__all__ = [ + "Benchmarks", + "Evaluations", + "Models", + "Results", + "Benchmark", + "CustomBenchmark", + "PublicBenchmark", + "Evaluation", + "EvaluationStatus", + "Pagination", + "Result", + "ResultMetrics", + "Model", + "CustomModel", + "PublicModel", + "Organization", + "Project", +] diff --git a/src/atlas/models/api.py b/src/atlas/models/api.py new file mode 100644 index 0000000..62de046 --- /dev/null +++ b/src/atlas/models/api.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from typing import List + +from pydantic import Field, BaseModel, ConfigDict + +from .model import Model +from .benchmark import Benchmark +from .evaluation import Result, Evaluation + + +class Benchmarks(BaseModel): + model_config = ConfigDict(populate_by_name=True) + + benchmarks: List[Benchmark] = Field(..., alias="datasets") + + +class Evaluations(BaseModel): + data: List[Evaluation] + + +class Models(BaseModel): + models: List[Model] + + +class ResultMetrics(BaseModel): + total_count: int + + +class Pagination(BaseModel): + total_count: int + page_size: int + total_pages: int + + +class Results(BaseModel): + evaluation_id: str + results: List[Result] + metrics: ResultMetrics + pagination: Pagination diff --git a/src/atlas/models/benchmark.py b/src/atlas/models/benchmark.py new file mode 100644 index 0000000..0405937 --- /dev/null +++ b/src/atlas/models/benchmark.py @@ -0,0 +1,32 @@ +from __future__ import annotations + +from typing import List, Optional + +from pydantic import Field, BaseModel + + +class Benchmark(BaseModel): + id: str + key: str + name: str + + +class CustomBenchmark(Benchmark): + description: str + system_prompt: Optional[str] + prompt_count: int + version_count: int + regex_pattern: Optional[str] + llm_judge_model_id: str + custom_instructions: str + scoring_metric: Optional[str] + metrics: List[str] + files: List[str] + disabled: bool + + +class PublicBenchmark(Benchmark): + description: str = Field(..., alias="full_description") + language: str + prompt_count: int + deprecated: bool diff --git a/src/atlas/models/evaluation.py b/src/atlas/models/evaluation.py new file mode 100644 index 0000000..f7435a9 --- /dev/null +++ b/src/atlas/models/evaluation.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +from enum import Enum +from typing import Dict, Optional +from datetime import timedelta + +from pydantic import Field, BaseModel, ConfigDict + + +class EvaluationStatus(str, Enum): + PENDING = "pending" + FAILURE = "failure" + IN_PROGRESS = "in-progress" + PAUSED = "paused" + SUCCESS = "success" + TIMEOUT = "timeout" + + +class Evaluation(BaseModel): + model_config = ConfigDict(populate_by_name=True) + + id: str + status: EvaluationStatus + submitted_at: int + finished_at: int + model_id: str + benchmark_id: str = Field(..., alias="dataset_id") + average_duration: int + accuracy: float + + +class Result(BaseModel): + subset: str + prompt: str + result: str + truth: str + duration: timedelta + score: float + metrics: Dict[str, Optional[float]] diff --git a/src/atlas/models/model.py b/src/atlas/models/model.py new file mode 100644 index 0000000..ad63814 --- /dev/null +++ b/src/atlas/models/model.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +from pydantic import BaseModel + + +class Model(BaseModel): + id: str + key: str + name: str + description: str + + +class CustomModel(Model): + max_tokens: int + api_url: str + disabled: bool + + +class PublicModel(Model): + company: str + released_at: int + parameters: float + modality: str + context_length: int + architecture_type: str + license: str + open_weights: bool + region: str + deprecated: bool diff --git a/src/atlas/models/organization.py b/src/atlas/models/organization.py new file mode 100644 index 0000000..cacc6b3 --- /dev/null +++ b/src/atlas/models/organization.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +from typing import List, Optional + +from pydantic import BaseModel + + +class Organization(BaseModel): + id: str + name: str + projects: Optional[List[Project]] = None + + +class Project(BaseModel): + id: str + name: str diff --git a/src/atlas/resources/benchmarks/benchmarks.py b/src/atlas/resources/benchmarks/benchmarks.py index 93a7400..fd69f96 100644 --- a/src/atlas/resources/benchmarks/benchmarks.py +++ b/src/atlas/resources/benchmarks/benchmarks.py @@ -1,10 +1,10 @@ from __future__ import annotations -from typing import List, Literal +from typing import List, Literal, Optional import httpx -from ..._models import Benchmark, Benchmarks as BenchmarksData, CustomBenchmark +from ...models import Benchmark, Benchmarks as BenchmarksResponse from ..._resource import SyncAPIResource from ..._constants import DEFAULT_TIMEOUT @@ -13,17 +13,35 @@ class Benchmarks(SyncAPIResource): def get( self, *, - type: Literal["public"] | Literal["custom"], timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT, - ) -> List[Benchmark | CustomBenchmark] | None: - benchmarks = self._get( - f"/organizations/{self._client.organization_id}/projects/{self._client.project_id}/benchmarks", - params={ - "type": type, - }, - timeout=timeout, - cast_to=BenchmarksData, - ) - if isinstance(benchmarks, BenchmarksData): - return benchmarks.benchmarks - return None + type: Literal["custom", "public"] | None = None, + name: Optional[str] = None, + ) -> List[Benchmark] | None: + base_url = f"/organizations/{self._client.organization_id}/projects/{self._client.project_id}/benchmarks" + + def fetch(bench_type: str) -> BenchmarksResponse | None: + params = {"type": bench_type} + if name: + params["query"] = name + + resp = self._get( + base_url, + params=params, + timeout=timeout, + cast_to=BenchmarksResponse, + ) + return resp if isinstance(resp, BenchmarksResponse) else None + + benchmarks: List[Benchmark] = [] + + if type is None: + for t in ["custom", "public"]: + resp = fetch(t) + if resp: + benchmarks.extend(resp.benchmarks) + else: # fetch only one type + resp = fetch(type) + if resp: + benchmarks.extend(resp.benchmarks) + + return benchmarks diff --git a/src/atlas/resources/evaluations/evaluations.py b/src/atlas/resources/evaluations/evaluations.py index ec33da9..2506ff7 100644 --- a/src/atlas/resources/evaluations/evaluations.py +++ b/src/atlas/resources/evaluations/evaluations.py @@ -2,7 +2,7 @@ import httpx -from ..._models import Evaluation, Evaluations as EvaluationsData +from ...models import Model, Benchmark, Evaluation, Evaluations as EvaluationsResponse from ..._resource import SyncAPIResource from ..._constants import DEFAULT_TIMEOUT @@ -11,23 +11,23 @@ class Evaluations(SyncAPIResource): def create( self, *, - model: str, - benchmark: str, + model: Model, + benchmark: Benchmark, timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT, ) -> Evaluation | None: evaluations = self._post( f"/organizations/{self._client.organization_id}/projects/{self._client.project_id}/evaluations", body=[ { - "model_id": model, - "dataset_id": benchmark, + "model_id": model.id, + "dataset_id": benchmark.id, "is_custom_model": False, "is_custom_dataset": False, } ], timeout=timeout, - cast_to=EvaluationsData, + cast_to=EvaluationsResponse, ) - if isinstance(evaluations, EvaluationsData) and len(evaluations.data) > 0: + if isinstance(evaluations, EvaluationsResponse) and len(evaluations.data) > 0: return evaluations.data[0] return None diff --git a/src/atlas/resources/models/models.py b/src/atlas/resources/models/models.py index a9aca0c..f76f188 100644 --- a/src/atlas/resources/models/models.py +++ b/src/atlas/resources/models/models.py @@ -1,10 +1,10 @@ from __future__ import annotations -from typing import List, Literal +from typing import List, Literal, Optional import httpx -from ..._models import Model, Models as ModelsData, CustomModel +from ...models import Model, Models as ModelsResponse from ..._resource import SyncAPIResource from ..._constants import DEFAULT_TIMEOUT @@ -13,17 +13,44 @@ class Models(SyncAPIResource): def get( self, *, - type: Literal["public"] | Literal["custom"], timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT, - ) -> List[Model | CustomModel] | None: - models = self._get( - f"/organizations/{self._client.organization_id}/projects/{self._client.project_id}/models", - params={ - "type": type, - }, - timeout=timeout, - cast_to=ModelsData, - ) - if isinstance(models, ModelsData): - return models.models - return None + type: Literal["custom", "public"] | None = None, + name: Optional[str] = None, + companies: Optional[List[str]] = None, + regions: Optional[List[str]] = None, + licenses: Optional[List[str]] = None, + ) -> List[Model] | None: + base_url = f"/organizations/{self._client.organization_id}/projects/{self._client.project_id}/models" + + def fetch(model_type: str) -> ModelsResponse | None: + params = {"type": model_type} + if name: + params["query"] = name + if companies: + params["companies"] = ",".join(companies) + if regions: + params["regions"] = ",".join(regions) + if licenses: + params["licenses"] = ",".join(licenses) + + resp = self._get( + base_url, + params=params, + timeout=timeout, + cast_to=ModelsResponse, + ) + return resp if isinstance(resp, ModelsResponse) else None + + models: List[Model] = [] + + if type is None: # fetch both + for t in ["custom", "public"]: + resp = fetch(t) + if resp: + models.extend(resp.models) + else: # fetch only one type + resp = fetch(type) + if resp: + models.extend(resp.models) + + return models diff --git a/src/atlas/resources/results/results.py b/src/atlas/resources/results/results.py index 82f84a6..66b18f0 100644 --- a/src/atlas/resources/results/results.py +++ b/src/atlas/resources/results/results.py @@ -5,9 +5,9 @@ import httpx -from ..._models import Results as ResultsData from ..._resource import SyncAPIResource from ..._constants import DEFAULT_TIMEOUT +from ...models.api import Results as ResultsData DEFAULT_PAGE_SIZE = 100 @@ -69,7 +69,11 @@ def get( # Add pagination to the response response_with_pagination = { **response_data, - "pagination": {"total_count": total_count, "page_size": effective_page_size, "total_pages": total_pages}, + "pagination": { + "total_count": total_count, + "page_size": effective_page_size, + "total_pages": total_pages, + }, } try: diff --git a/tests/conftest.py b/tests/conftest.py index 54d61a8..89eae24 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,7 +7,9 @@ @pytest.fixture def env_vars(): """Clean environment variables for testing.""" - env_keys = ["LAYERLENS_ATLAS_API_KEY", "LAYERLENS_ATLAS_ORG_ID", "LAYERLENS_ATLAS_PROJECT_ID"] + env_keys = [ + "LAYERLENS_ATLAS_API_KEY", + ] original_values = {key: os.environ.get(key) for key in env_keys} # Clear environment variables @@ -32,8 +34,6 @@ def mock_env_vars(): os.environ, { "LAYERLENS_ATLAS_API_KEY": "test-api-key", - "LAYERLENS_ATLAS_ORG_ID": "test-org-id", - "LAYERLENS_ATLAS_PROJECT_ID": "test-project-id", }, ): yield diff --git a/tests/resources/test_benchmarks.py b/tests/resources/test_benchmarks.py index 2ffbdf5..292438d 100644 --- a/tests/resources/test_benchmarks.py +++ b/tests/resources/test_benchmarks.py @@ -1,9 +1,14 @@ -from unittest.mock import Mock +from unittest.mock import Mock, call import httpx import pytest -from atlas._models import Benchmark, Benchmarks as BenchmarksData, CustomBenchmark +from atlas.models import ( + Benchmark, + Benchmarks as BenchmarksData, + CustomBenchmark, + PublicBenchmark, +) from atlas._constants import DEFAULT_TIMEOUT from atlas.resources.benchmarks.benchmarks import Benchmarks @@ -62,16 +67,12 @@ def sample_custom_benchmark_data(self): } @pytest.fixture - def mock_public_benchmarks_response(self, sample_benchmark_data): + def mock_benchmarks_response(self, sample_benchmark_data, sample_custom_benchmark_data): """Mock BenchmarksData response with public benchmarks.""" - benchmark = Benchmark(**sample_benchmark_data) - return BenchmarksData(datasets=[benchmark]) - - @pytest.fixture - def mock_custom_benchmarks_response(self, sample_custom_benchmark_data): - """Mock BenchmarksData response with custom benchmarks.""" + public_benchmark = Benchmark(**sample_benchmark_data) custom_benchmark = CustomBenchmark(**sample_custom_benchmark_data) - return BenchmarksData(datasets=[custom_benchmark]) + + return BenchmarksData(datasets=[public_benchmark, custom_benchmark]) def test_benchmarks_initialization(self, mock_client): """Benchmarks resource initializes correctly.""" @@ -80,101 +81,87 @@ def test_benchmarks_initialization(self, mock_client): assert benchmarks._client is mock_client assert benchmarks._get is mock_client.get_cast - def test_get_public_benchmarks_success(self, benchmarks_resource, mock_public_benchmarks_response): - """get method returns public benchmarks successfully.""" - benchmarks_resource._get.return_value = mock_public_benchmarks_response + def test_get_benchmarks_success(self, benchmarks_resource, mock_benchmarks_response): + """get method returns benchmarks successfully.""" + benchmarks_resource._get.side_effect = lambda *_, **kwargs: ( + mock_benchmarks_response + if kwargs.get("params", {}).get("type") == "public" + else BenchmarksData(benchmarks=[]) + ) - result = benchmarks_resource.get(type="public") + result = benchmarks_resource.get() assert isinstance(result, list) - assert len(result) == 1 + assert len(result) == 2 + assert isinstance(result[0], Benchmark) assert result[0].name == "MMLU" assert result[0].key == "mmlu" - def test_get_custom_benchmarks_success(self, benchmarks_resource, mock_custom_benchmarks_response): - """get method returns custom benchmarks successfully.""" - benchmarks_resource._get.return_value = mock_custom_benchmarks_response - - result = benchmarks_resource.get(type="custom") - - assert isinstance(result, list) - assert len(result) == 1 - assert isinstance(result[0], CustomBenchmark) - assert result[0].name == "My Custom Benchmark" - assert result[0].key == "my-benchmark" - - def test_get_benchmarks_request_parameters_public(self, benchmarks_resource, mock_public_benchmarks_response): - """get method makes correct API request for public benchmarks.""" - benchmarks_resource._get.return_value = mock_public_benchmarks_response - - benchmarks_resource.get(type="public") - - benchmarks_resource._get.assert_called_once_with( - "/organizations/org-123/projects/proj-456/benchmarks", - params={"type": "public"}, - timeout=DEFAULT_TIMEOUT, - cast_to=BenchmarksData, - ) - - def test_get_benchmarks_request_parameters_custom(self, benchmarks_resource, mock_custom_benchmarks_response): - """get method makes correct API request for custom benchmarks.""" - benchmarks_resource._get.return_value = mock_custom_benchmarks_response - - benchmarks_resource.get(type="custom") - - benchmarks_resource._get.assert_called_once_with( - "/organizations/org-123/projects/proj-456/benchmarks", - params={"type": "custom"}, - timeout=DEFAULT_TIMEOUT, - cast_to=BenchmarksData, - ) - - def test_get_benchmarks_with_custom_timeout(self, benchmarks_resource, mock_public_benchmarks_response): + assert isinstance(result[1], Benchmark) + assert result[1].name == "My Custom Benchmark" + assert result[1].key == "my-benchmark" + + def test_get_benchmarks_request_parameters(self, benchmarks_resource, mock_benchmarks_response): + """get method makes correct API request for benchmarks.""" + benchmarks_resource._get.return_value = mock_benchmarks_response + + benchmarks_resource.get() + + expected_calls = [ + call( + "/organizations/org-123/projects/proj-456/benchmarks", + params={"type": "custom"}, + timeout=DEFAULT_TIMEOUT, + cast_to=BenchmarksData, + ), + call( + "/organizations/org-123/projects/proj-456/benchmarks", + params={"type": "public"}, + timeout=DEFAULT_TIMEOUT, + cast_to=BenchmarksData, + ), + ] + + benchmarks_resource._get.assert_has_calls(expected_calls) + + def test_get_benchmarks_with_custom_timeout(self, benchmarks_resource, mock_benchmarks_response): """get method accepts custom timeout.""" - benchmarks_resource._get.return_value = mock_public_benchmarks_response + benchmarks_resource._get.return_value = mock_benchmarks_response custom_timeout = 45.0 - benchmarks_resource.get(type="public", timeout=custom_timeout) + benchmarks_resource.get(timeout=custom_timeout) call_args = benchmarks_resource._get.call_args assert call_args.kwargs["timeout"] == custom_timeout - def test_get_benchmarks_with_httpx_timeout(self, benchmarks_resource, mock_public_benchmarks_response): + def test_get_benchmarks_with_httpx_timeout(self, benchmarks_resource, mock_benchmarks_response): """get method accepts httpx.Timeout object.""" - benchmarks_resource._get.return_value = mock_public_benchmarks_response + benchmarks_resource._get.return_value = mock_benchmarks_response custom_timeout = httpx.Timeout(45.0) - benchmarks_resource.get(type="public", timeout=custom_timeout) + benchmarks_resource.get(timeout=custom_timeout) call_args = benchmarks_resource._get.call_args assert call_args.kwargs["timeout"] is custom_timeout - def test_get_benchmarks_none_response(self, benchmarks_resource): - """get method returns None when response is None.""" - benchmarks_resource._get.return_value = None - - result = benchmarks_resource.get(type="public") - - assert result is None - - def test_get_benchmarks_invalid_response_type(self, benchmarks_resource): - """get method handles non-BenchmarksData response gracefully.""" - benchmarks_resource._get.return_value = "invalid-response" + @pytest.mark.parametrize( + "mock_return, expected", + [ + (None, []), + ("invalid-response", []), + (BenchmarksData(datasets=[]), []), + ], + ids=["none_response", "invalid_type", "empty_response"], + ) + def test_get_benchmarks_various_responses(self, benchmarks_resource, mock_return, expected): + benchmarks_resource._get.return_value = mock_return - result = benchmarks_resource.get(type="public") + result = benchmarks_resource.get() - assert result is None - - def test_get_benchmarks_empty_response(self, benchmarks_resource): - """get method returns empty list when no benchmarks in response.""" - empty_response = BenchmarksData(datasets=[]) - benchmarks_resource._get.return_value = empty_response - - result = benchmarks_resource.get(type="public") - - assert result == [] - assert isinstance(result, list) + assert result == expected + if expected == []: + assert isinstance(result, list) def test_get_benchmarks_multiple_items( self, benchmarks_resource, sample_benchmark_data, sample_custom_benchmark_data @@ -191,59 +178,51 @@ def test_get_benchmarks_multiple_items( benchmark2 = Benchmark(**benchmark2_data) response = BenchmarksData(datasets=[benchmark, benchmark2]) - benchmarks_resource._get.return_value = response + benchmarks_resource._get.side_effect = lambda *_, **kwargs: ( + response if kwargs.get("params", {}).get("type") == "public" else BenchmarksData(benchmarks=[]) + ) - result = benchmarks_resource.get(type="public") + result = benchmarks_resource.get() assert len(result) == 2 assert result[0].key == "mmlu" assert result[1].key == "hellaswag" - def test_get_benchmarks_url_construction(self, benchmarks_resource, mock_public_benchmarks_response): + def test_get_benchmarks_url_construction(self, benchmarks_resource, mock_benchmarks_response): """get method constructs URL correctly with org and project IDs.""" benchmarks_resource._client.organization_id = "custom-org" benchmarks_resource._client.project_id = "custom-project" - benchmarks_resource._get.return_value = mock_public_benchmarks_response + benchmarks_resource._get.return_value = mock_benchmarks_response - benchmarks_resource.get(type="public") + benchmarks_resource.get() expected_url = "/organizations/custom-org/projects/custom-project/benchmarks" call_args = benchmarks_resource._get.call_args assert call_args[0][0] == expected_url - @pytest.mark.parametrize("benchmark_type", ["public", "custom"]) - def test_get_benchmarks_type_parameter(self, benchmarks_resource, benchmark_type): - """get method accepts both public and custom types.""" - benchmarks_resource._get.return_value = BenchmarksData(datasets=[]) - - benchmarks_resource.get(type=benchmark_type) - - call_args = benchmarks_resource._get.call_args - assert call_args.kwargs["params"]["type"] == benchmark_type - - def test_get_benchmarks_cast_to_parameter(self, benchmarks_resource, mock_public_benchmarks_response): + def test_get_benchmarks_cast_to_parameter(self, benchmarks_resource, mock_benchmarks_response): """get method specifies correct cast_to parameter.""" - benchmarks_resource._get.return_value = mock_public_benchmarks_response + benchmarks_resource._get.return_value = mock_benchmarks_response - benchmarks_resource.get(type="public") + benchmarks_resource.get() call_args = benchmarks_resource._get.call_args assert call_args.kwargs["cast_to"] is BenchmarksData - def test_get_benchmarks_timeout_default(self, benchmarks_resource, mock_public_benchmarks_response): + def test_get_benchmarks_timeout_default(self, benchmarks_resource, mock_benchmarks_response): """get method uses DEFAULT_TIMEOUT when no timeout specified.""" - benchmarks_resource._get.return_value = mock_public_benchmarks_response + benchmarks_resource._get.return_value = mock_benchmarks_response - benchmarks_resource.get(type="public") + benchmarks_resource.get() call_args = benchmarks_resource._get.call_args assert call_args.kwargs["timeout"] is DEFAULT_TIMEOUT - def test_get_benchmarks_with_none_timeout(self, benchmarks_resource, mock_public_benchmarks_response): + def test_get_benchmarks_with_none_timeout(self, benchmarks_resource, mock_benchmarks_response): """get method accepts None timeout.""" - benchmarks_resource._get.return_value = mock_public_benchmarks_response + benchmarks_resource._get.return_value = mock_benchmarks_response - benchmarks_resource.get(type="public", timeout=None) + benchmarks_resource.get(timeout=None) call_args = benchmarks_resource._get.call_args assert call_args.kwargs["timeout"] is None @@ -278,7 +257,7 @@ def test_get_benchmarks_handles_api_error(self, benchmarks_resource): benchmarks_resource._get.side_effect = api_error with pytest.raises(APIStatusError): - benchmarks_resource.get(type="public") + benchmarks_resource.get() def test_get_benchmarks_handles_auth_error(self, benchmarks_resource): """get method propagates authentication errors.""" @@ -292,7 +271,7 @@ def test_get_benchmarks_handles_auth_error(self, benchmarks_resource): benchmarks_resource._get.side_effect = auth_error with pytest.raises(AuthenticationError): - benchmarks_resource.get(type="custom") + benchmarks_resource.get() def test_get_benchmarks_handles_connection_error(self, benchmarks_resource): """get method propagates connection errors.""" @@ -303,7 +282,7 @@ def test_get_benchmarks_handles_connection_error(self, benchmarks_resource): benchmarks_resource._get.side_effect = connection_error with pytest.raises(APIConnectionError): - benchmarks_resource.get(type="public") + benchmarks_resource.get() def test_get_benchmarks_handles_timeout_error(self, benchmarks_resource): """get method propagates timeout errors.""" @@ -314,7 +293,7 @@ def test_get_benchmarks_handles_timeout_error(self, benchmarks_resource): benchmarks_resource._get.side_effect = timeout_error with pytest.raises(APITimeoutError): - benchmarks_resource.get(type="public", timeout=1.0) + benchmarks_resource.get(timeout=1.0) class TestBenchmarksTyping: @@ -338,13 +317,13 @@ def test_get_benchmarks_return_type_consistency(self, benchmarks_resource): """get method returns consistent types.""" # Test that the method returns either a list or None benchmarks_resource._get.return_value = None - result = benchmarks_resource.get(type="public") - assert result is None + result = benchmarks_resource.get() + assert result == [] # Test that it returns a list when successful benchmarks_resource._get.return_value = BenchmarksData(datasets=[]) - result = benchmarks_resource.get(type="public") - assert isinstance(result, list) + result = benchmarks_resource.get() + assert result == [] def test_get_benchmarks_mixed_benchmark_types(self, benchmarks_resource): """get method can handle mixed benchmark types in response.""" @@ -379,16 +358,19 @@ def test_get_benchmarks_mixed_benchmark_types(self, benchmarks_resource): "disabled": False, } - public_benchmark = Benchmark(**public_data) + public_benchmark = PublicBenchmark(**public_data) custom_benchmark = CustomBenchmark(**custom_data) - response = BenchmarksData(datasets=[public_benchmark, custom_benchmark]) - benchmarks_resource._get.return_value = response + benchmarks_resource._get.side_effect = lambda *_, **kwargs: ( + BenchmarksData(benchmarks=[public_benchmark]) + if kwargs.get("params", {}).get("type") == "public" + else BenchmarksData(benchmarks=[custom_benchmark]) + ) - result = benchmarks_resource.get(type="public") # Type doesn't matter for this test + result = benchmarks_resource.get() # Type doesn't matter for this test assert len(result) == 2 - assert isinstance(result[0], Benchmark) - assert isinstance(result[1], CustomBenchmark) - assert result[0].key == "mmlu" - assert result[1].key == "my-bench" + assert isinstance(result[0], CustomBenchmark) + assert isinstance(result[1], PublicBenchmark) + assert result[0].key == "my-bench" + assert result[1].key == "mmlu" diff --git a/tests/resources/test_evaluations.py b/tests/resources/test_evaluations.py index a4e9652..a5cf899 100644 --- a/tests/resources/test_evaluations.py +++ b/tests/resources/test_evaluations.py @@ -3,7 +3,7 @@ import httpx import pytest -from atlas._models import Evaluation, Evaluations as EvaluationsData +from atlas.models import Evaluation, Evaluations as EvaluationsData, EvaluationStatus from atlas._constants import DEFAULT_TIMEOUT from atlas.resources.evaluations.evaluations import Evaluations @@ -21,6 +21,24 @@ def mock_client(self): client.post_cast = Mock() return client + @pytest.fixture + def mock_benchmark(self): + """Mock benchmark.""" + benchmark = Mock() + benchmark.id = "benchmark-789" + benchmark.key = "mmlu" + benchmark.name = "MMLU" + return benchmark + + @pytest.fixture + def mock_model(self): + """Mock model.""" + model = Mock() + model.id = "model-123" + model.key = "gpt-4" + model.name = "GPT-4" + return model + @pytest.fixture def evaluations_resource(self, mock_client): """Evaluations resource instance.""" @@ -31,20 +49,13 @@ def sample_evaluation_data(self): """Sample evaluation data for testing.""" return { "id": "eval-123", - "status": "completed", + "status": "success", "status_description": "Evaluation completed successfully", "submitted_at": 1640995200, "finished_at": 1640995800, "model_id": "model-456", - "model_name": "GPT-4", - "model_key": "gpt-4", - "model_company": "OpenAI", - "dataset_id": "dataset-789", - "dataset_name": "MMLU", + "dataset_id": "benchmark-789", "average_duration": 2500, - "readability_score": 0.85, - "toxicity_score": 0.02, - "ethics_score": 0.92, "accuracy": 0.89, } @@ -62,29 +73,41 @@ def test_evaluations_initialization(self, mock_client): assert evaluations._get is mock_client.get_cast assert evaluations._post is mock_client.post_cast - def test_create_evaluation_success(self, evaluations_resource, mock_evaluations_response): + def test_create_evaluation_success( + self, + mock_model, + mock_benchmark, + evaluations_resource, + mock_evaluations_response, + ): """create method returns first evaluation on success.""" evaluations_resource._post.return_value = mock_evaluations_response - result = evaluations_resource.create(model="gpt-4", benchmark="mmlu") + result = evaluations_resource.create(model=mock_model, benchmark=mock_benchmark) assert isinstance(result, Evaluation) assert result.id == "eval-123" - assert result.model_name == "GPT-4" - assert result.dataset_name == "MMLU" - - def test_create_evaluation_request_parameters(self, evaluations_resource, mock_evaluations_response): + assert result.model_id == "model-456" + assert result.benchmark_id == "benchmark-789" + + def test_create_evaluation_request_parameters( + self, + mock_model, + mock_benchmark, + evaluations_resource, + mock_evaluations_response, + ): """create method makes correct API request.""" evaluations_resource._post.return_value = mock_evaluations_response - evaluations_resource.create(model="gpt-4", benchmark="mmlu") + evaluations_resource.create(model=mock_model, benchmark=mock_benchmark) evaluations_resource._post.assert_called_once_with( "/organizations/org-123/projects/proj-456/evaluations", body=[ { - "model_id": "gpt-4", - "dataset_id": "mmlu", + "model_id": "model-123", + "dataset_id": "benchmark-789", "is_custom_model": False, "is_custom_dataset": False, } @@ -93,52 +116,74 @@ def test_create_evaluation_request_parameters(self, evaluations_resource, mock_e cast_to=EvaluationsData, ) - def test_create_evaluation_with_custom_timeout(self, evaluations_resource, mock_evaluations_response): + def test_create_evaluation_with_custom_timeout( + self, + mock_model, + mock_benchmark, + evaluations_resource, + mock_evaluations_response, + ): """create method accepts custom timeout.""" evaluations_resource._post.return_value = mock_evaluations_response custom_timeout = 30.0 - evaluations_resource.create(model="gpt-4", benchmark="mmlu", timeout=custom_timeout) + evaluations_resource.create( + model=mock_model, + benchmark=mock_benchmark, + timeout=custom_timeout, + ) call_args = evaluations_resource._post.call_args assert call_args.kwargs["timeout"] == custom_timeout - def test_create_evaluation_with_httpx_timeout(self, evaluations_resource, mock_evaluations_response): + def test_create_evaluation_with_httpx_timeout( + self, + mock_model, + mock_benchmark, + evaluations_resource, + mock_evaluations_response, + ): """create method accepts httpx.Timeout object.""" evaluations_resource._post.return_value = mock_evaluations_response custom_timeout = httpx.Timeout(30.0) - evaluations_resource.create(model="gpt-4", benchmark="mmlu", timeout=custom_timeout) + evaluations_resource.create( + model=mock_model, + benchmark=mock_benchmark, + timeout=custom_timeout, + ) call_args = evaluations_resource._post.call_args assert call_args.kwargs["timeout"] is custom_timeout - def test_create_evaluation_empty_response(self, evaluations_resource): + def test_create_evaluation_empty_response(self, mock_model, mock_benchmark, evaluations_resource): """create method returns None when no evaluations in response.""" empty_response = EvaluationsData(data=[]) evaluations_resource._post.return_value = empty_response - result = evaluations_resource.create(model="gpt-4", benchmark="mmlu") + result = evaluations_resource.create(model=mock_model, benchmark=mock_benchmark) assert result is None - def test_create_evaluation_none_response(self, evaluations_resource): + def test_create_evaluation_none_response(self, mock_model, mock_benchmark, evaluations_resource): """create method returns None when response is None.""" evaluations_resource._post.return_value = None - result = evaluations_resource.create(model="gpt-4", benchmark="mmlu") + result = evaluations_resource.create(model=mock_model, benchmark=mock_benchmark) assert result is None - def test_create_evaluation_invalid_response_type(self, evaluations_resource): + def test_create_evaluation_invalid_response_type(self, mock_model, mock_benchmark, evaluations_resource): """create method handles non-EvaluationsData response gracefully.""" evaluations_resource._post.return_value = "invalid-response" - result = evaluations_resource.create(model="gpt-4", benchmark="mmlu") + result = evaluations_resource.create(model=mock_model, benchmark=mock_benchmark) assert result is None - def test_create_evaluation_multiple_evaluations_returns_first(self, evaluations_resource, sample_evaluation_data): + def test_create_evaluation_multiple_evaluations_returns_first( + self, mock_model, mock_benchmark, evaluations_resource, sample_evaluation_data + ): """create method returns first evaluation when multiple exist.""" eval1 = Evaluation(**sample_evaluation_data) eval2_data = sample_evaluation_data.copy() @@ -148,85 +193,92 @@ def test_create_evaluation_multiple_evaluations_returns_first(self, evaluations_ response = EvaluationsData(data=[eval1, eval2]) evaluations_resource._post.return_value = response - result = evaluations_resource.create(model="gpt-4", benchmark="mmlu") + result = evaluations_resource.create(model=mock_model, benchmark=mock_benchmark) assert result.id == "eval-123" # First evaluation assert result is not eval2 - def test_create_evaluation_url_construction(self, evaluations_resource, mock_evaluations_response): + def test_create_evaluation_url_construction( + self, + mock_model, + mock_benchmark, + evaluations_resource, + mock_evaluations_response, + ): """create method constructs URL correctly with org and project IDs.""" evaluations_resource._client.organization_id = "custom-org" evaluations_resource._client.project_id = "custom-project" evaluations_resource._post.return_value = mock_evaluations_response - evaluations_resource.create(model="test-model", benchmark="test-benchmark") + evaluations_resource.create(model=mock_model, benchmark=mock_benchmark) expected_url = "/organizations/custom-org/projects/custom-project/evaluations" call_args = evaluations_resource._post.call_args assert call_args[0][0] == expected_url - def test_create_evaluation_request_body_structure(self, evaluations_resource, mock_evaluations_response): + def test_create_evaluation_request_body_structure( + self, + mock_model, + mock_benchmark, + evaluations_resource, + mock_evaluations_response, + ): """create method sends correct request body structure.""" evaluations_resource._post.return_value = mock_evaluations_response - evaluations_resource.create(model="custom-model", benchmark="custom-benchmark") + evaluations_resource.create(model=mock_model, benchmark=mock_benchmark) call_args = evaluations_resource._post.call_args body = call_args.kwargs["body"] assert isinstance(body, list) assert len(body) == 1 - assert body[0]["model_id"] == "custom-model" - assert body[0]["dataset_id"] == "custom-benchmark" + assert body[0]["model_id"] == mock_model.id + assert body[0]["dataset_id"] == mock_benchmark.id assert body[0]["is_custom_model"] is False assert body[0]["is_custom_dataset"] is False - @pytest.mark.parametrize( - "model_name,benchmark_name", - [ - ("gpt-3.5-turbo", "hellaswag"), - ("claude-3-opus", "arc-challenge"), - ("llama-2-70b", "truthfulqa"), - ("custom-model-123", "custom-benchmark-456"), - ], - ) - def test_create_evaluation_with_different_parameters( - self, evaluations_resource, mock_evaluations_response, model_name, benchmark_name + def test_create_evaluation_cast_to_parameter( + self, + mock_model, + mock_benchmark, + evaluations_resource, + mock_evaluations_response, ): - """create method works with various model and benchmark combinations.""" - evaluations_resource._post.return_value = mock_evaluations_response - - result = evaluations_resource.create(model=model_name, benchmark=benchmark_name) - - assert isinstance(result, Evaluation) - call_args = evaluations_resource._post.call_args - body = call_args.kwargs["body"][0] - assert body["model_id"] == model_name - assert body["dataset_id"] == benchmark_name - - def test_create_evaluation_cast_to_parameter(self, evaluations_resource, mock_evaluations_response): """create method specifies correct cast_to parameter.""" evaluations_resource._post.return_value = mock_evaluations_response - evaluations_resource.create(model="gpt-4", benchmark="mmlu") + evaluations_resource.create(model=mock_model, benchmark=mock_benchmark) call_args = evaluations_resource._post.call_args assert call_args.kwargs["cast_to"] is EvaluationsData - def test_create_evaluation_timeout_default(self, evaluations_resource, mock_evaluations_response): + def test_create_evaluation_timeout_default( + self, + mock_model, + mock_benchmark, + evaluations_resource, + mock_evaluations_response, + ): """create method uses DEFAULT_TIMEOUT when no timeout specified.""" evaluations_resource._post.return_value = mock_evaluations_response - evaluations_resource.create(model="gpt-4", benchmark="mmlu") + evaluations_resource.create(model=mock_model, benchmark=mock_benchmark) call_args = evaluations_resource._post.call_args assert call_args.kwargs["timeout"] is DEFAULT_TIMEOUT - def test_create_evaluation_with_none_timeout(self, evaluations_resource, mock_evaluations_response): + def test_create_evaluation_with_none_timeout( + self, + mock_model, + mock_benchmark, + evaluations_resource, + mock_evaluations_response, + ): """create method accepts None timeout.""" evaluations_resource._post.return_value = mock_evaluations_response - evaluations_resource.create(model="gpt-4", benchmark="mmlu", timeout=None) + evaluations_resource.create(model=mock_model, benchmark=mock_benchmark, timeout=None) call_args = evaluations_resource._post.call_args assert call_args.kwargs["timeout"] is None @@ -253,6 +305,12 @@ def test_create_evaluation_handles_api_error(self, evaluations_resource): """create method propagates API errors.""" from atlas._exceptions import APIStatusError + mock_model = Mock() + mock_model.id = "invalid-model" + + mock_benchmark = Mock() + mock_benchmark.id = "invalid-benchmark" + mock_response = Mock() mock_response.status_code = 400 mock_response.headers = {} @@ -261,29 +319,41 @@ def test_create_evaluation_handles_api_error(self, evaluations_resource): evaluations_resource._post.side_effect = api_error with pytest.raises(APIStatusError): - evaluations_resource.create(model="invalid-model", benchmark="invalid-benchmark") + evaluations_resource.create(model=mock_model, benchmark=mock_benchmark) def test_create_evaluation_handles_connection_error(self, evaluations_resource): """create method propagates connection errors.""" from atlas._exceptions import APIConnectionError + mock_model = Mock() + mock_model.id = "invalid-model" + + mock_benchmark = Mock() + mock_benchmark.id = "invalid-benchmark" + mock_request = Mock() connection_error = APIConnectionError(request=mock_request) evaluations_resource._post.side_effect = connection_error with pytest.raises(APIConnectionError): - evaluations_resource.create(model="gpt-4", benchmark="mmlu") + evaluations_resource.create(model=mock_model, benchmark=mock_benchmark) def test_create_evaluation_handles_timeout_error(self, evaluations_resource): """create method propagates timeout errors.""" from atlas._exceptions import APITimeoutError + mock_model = Mock() + mock_model.id = "invalid-model" + + mock_benchmark = Mock() + mock_benchmark.id = "invalid-benchmark" + mock_request = Mock() timeout_error = APITimeoutError(mock_request) evaluations_resource._post.side_effect = timeout_error with pytest.raises(APITimeoutError): - evaluations_resource.create(model="gpt-4", benchmark="mmlu", timeout=1.0) + evaluations_resource.create(model=mock_model, benchmark=mock_benchmark, timeout=1.0) class TestEvaluationsResourceIntegration: @@ -296,23 +366,26 @@ def test_create_evaluation_end_to_end_flow(self): mock_client.organization_id = "test-org" mock_client.project_id = "test-project" + mock_benchmark = Mock() + mock_benchmark.id = "benchmark-789" + mock_benchmark.key = "mmlu" + mock_benchmark.name = "MMLU" + + mock_model = Mock() + mock_model.id = "model-123" + mock_model.key = "gpt-4" + mock_model.name = "GPT-4" + # Create sample evaluation data evaluation_data = { "id": "eval-integration-test", - "status": "submitted", + "status": "in-progress", "status_description": "Evaluation submitted", "submitted_at": 1640995200, "finished_at": 0, - "model_id": "integration-model", - "model_name": "Integration Test Model", - "model_key": "integration-model", - "model_company": "TestCorp", - "dataset_id": "integration-dataset", - "dataset_name": "Integration Test Dataset", + "model_id": mock_model.id, + "dataset_id": mock_benchmark.id, "average_duration": 0, - "readability_score": 0.0, - "toxicity_score": 0.0, - "ethics_score": 0.0, "accuracy": 0.0, } @@ -322,18 +395,18 @@ def test_create_evaluation_end_to_end_flow(self): # Test the resource evaluations_resource = Evaluations(mock_client) - result = evaluations_resource.create(model="integration-model", benchmark="integration-dataset") + result = evaluations_resource.create(model=mock_model, benchmark=mock_benchmark) # Verify the complete flow assert result is not None assert result.id == "eval-integration-test" - assert result.model_id == "integration-model" - assert result.dataset_id == "integration-dataset" - assert result.status == "submitted" + assert result.model_id == mock_model.id + assert result.benchmark_id == mock_benchmark.id + assert result.status == EvaluationStatus.IN_PROGRESS # Verify the API call was made correctly mock_client.post_cast.assert_called_once() call_args = mock_client.post_cast.call_args assert "/organizations/test-org/projects/test-project/evaluations" in call_args[0][0] - assert call_args.kwargs["body"][0]["model_id"] == "integration-model" - assert call_args.kwargs["body"][0]["dataset_id"] == "integration-dataset" + assert call_args.kwargs["body"][0]["model_id"] == mock_model.id + assert call_args.kwargs["body"][0]["dataset_id"] == mock_benchmark.id diff --git a/tests/resources/test_models_resource.py b/tests/resources/test_models_resource.py index 94ba5f2..7cac60b 100644 --- a/tests/resources/test_models_resource.py +++ b/tests/resources/test_models_resource.py @@ -1,9 +1,9 @@ -from unittest.mock import Mock +from unittest.mock import Mock, call import httpx import pytest -from atlas._models import Model, Models as ModelsData, CustomModel +from atlas.models import Models as ModelsData, CustomModel, PublicModel from atlas._constants import DEFAULT_TIMEOUT from atlas.resources.models.models import Models @@ -61,7 +61,7 @@ def sample_custom_model_data(self): @pytest.fixture def mock_public_models_response(self, sample_model_data): """Mock ModelsData response with public models.""" - model = Model(**sample_model_data) + model = PublicModel(**sample_model_data) return ModelsData(models=[model]) @pytest.fixture @@ -79,22 +79,26 @@ def test_models_initialization(self, mock_client): def test_get_public_models_success(self, models_resource, mock_public_models_response): """get method returns public models successfully.""" - models_resource._get.return_value = mock_public_models_response + models_resource._get.side_effect = lambda *_, **kwargs: ( + mock_public_models_response if kwargs.get("params", {}).get("type") == "public" else ModelsData(models=[]) + ) - result = models_resource.get(type="public") + result = models_resource.get() assert isinstance(result, list) assert len(result) == 1 - assert isinstance(result[0], Model) + assert isinstance(result[0], PublicModel) assert result[0].name == "GPT-4" assert result[0].key == "gpt-4" assert result[0].company == "OpenAI" def test_get_custom_models_success(self, models_resource, mock_custom_models_response): """get method returns custom models successfully.""" - models_resource._get.return_value = mock_custom_models_response + models_resource._get.side_effect = lambda *_, **kwargs: ( + mock_custom_models_response if kwargs.get("params", {}).get("type") == "custom" else ModelsData(models=[]) + ) - result = models_resource.get(type="custom") + result = models_resource.get() assert isinstance(result, list) assert len(result) == 1 @@ -107,34 +111,54 @@ def test_get_models_request_parameters_public(self, models_resource, mock_public """get method makes correct API request for public models.""" models_resource._get.return_value = mock_public_models_response - models_resource.get(type="public") - - models_resource._get.assert_called_once_with( - "/organizations/org-123/projects/proj-456/models", - params={"type": "public"}, - timeout=DEFAULT_TIMEOUT, - cast_to=ModelsData, - ) + models_resource.get() + + expected_calls = [ + call( + "/organizations/org-123/projects/proj-456/models", + params={"type": "custom"}, + timeout=DEFAULT_TIMEOUT, + cast_to=ModelsData, + ), + call( + "/organizations/org-123/projects/proj-456/models", + params={"type": "public"}, + timeout=DEFAULT_TIMEOUT, + cast_to=ModelsData, + ), + ] + + models_resource._get.assert_has_calls(expected_calls) def test_get_models_request_parameters_custom(self, models_resource, mock_custom_models_response): """get method makes correct API request for custom models.""" models_resource._get.return_value = mock_custom_models_response - models_resource.get(type="custom") - - models_resource._get.assert_called_once_with( - "/organizations/org-123/projects/proj-456/models", - params={"type": "custom"}, - timeout=DEFAULT_TIMEOUT, - cast_to=ModelsData, - ) + models_resource.get() + + expected_calls = [ + call( + "/organizations/org-123/projects/proj-456/models", + params={"type": "custom"}, + timeout=DEFAULT_TIMEOUT, + cast_to=ModelsData, + ), + call( + "/organizations/org-123/projects/proj-456/models", + params={"type": "public"}, + timeout=DEFAULT_TIMEOUT, + cast_to=ModelsData, + ), + ] + + models_resource._get.assert_has_calls(expected_calls) def test_get_models_with_custom_timeout(self, models_resource, mock_public_models_response): """get method accepts custom timeout.""" models_resource._get.return_value = mock_public_models_response custom_timeout = 60.0 - models_resource.get(type="public", timeout=custom_timeout) + models_resource.get(timeout=custom_timeout) call_args = models_resource._get.call_args assert call_args.kwargs["timeout"] == custom_timeout @@ -144,40 +168,32 @@ def test_get_models_with_httpx_timeout(self, models_resource, mock_public_models models_resource._get.return_value = mock_public_models_response custom_timeout = httpx.Timeout(60.0) - models_resource.get(type="public", timeout=custom_timeout) + models_resource.get(timeout=custom_timeout) call_args = models_resource._get.call_args assert call_args.kwargs["timeout"] is custom_timeout - def test_get_models_none_response(self, models_resource): - """get method returns None when response is None.""" - models_resource._get.return_value = None + @pytest.mark.parametrize( + "mock_response, expected", + [ + (None, []), # None response + ("invalid-response", []), # Invalid type + (ModelsData(models=[]), []), # Empty ModelsData + ], + ) + def test_get_models_responses(self, models_resource, mock_response, expected): + """get method handles different types of responses correctly.""" + models_resource._get.return_value = mock_response - result = models_resource.get(type="public") + result = models_resource.get() - assert result is None - - def test_get_models_invalid_response_type(self, models_resource): - """get method handles non-ModelsData response gracefully.""" - models_resource._get.return_value = "invalid-response" - - result = models_resource.get(type="public") - - assert result is None - - def test_get_models_empty_response(self, models_resource): - """get method returns empty list when no models in response.""" - empty_response = ModelsData(models=[]) - models_resource._get.return_value = empty_response - - result = models_resource.get(type="public") - - assert result == [] - assert isinstance(result, list) + assert result == expected + if isinstance(mock_response, ModelsData): + assert isinstance(result, list) def test_get_models_multiple_items(self, models_resource, sample_model_data): """get method returns multiple models correctly.""" - model1 = Model(**sample_model_data) + model1 = PublicModel(**sample_model_data) # Create second model with different data model2_data = sample_model_data.copy() @@ -185,12 +201,15 @@ def test_get_models_multiple_items(self, models_resource, sample_model_data): model2_data["key"] = "gpt-3.5-turbo" model2_data["name"] = "GPT-3.5 Turbo" model2_data["parameters"] = 1.75e11 - model2 = Model(**model2_data) + model2 = PublicModel(**model2_data) response = ModelsData(models=[model1, model2]) - models_resource._get.return_value = response - result = models_resource.get(type="public") + models_resource._get.side_effect = lambda *_, **kwargs: ( + response if kwargs.get("params", {}).get("type") == "public" else ModelsData(models=[]) + ) + + result = models_resource.get() assert len(result) == 2 assert result[0].key == "gpt-4" @@ -204,27 +223,19 @@ def test_get_models_url_construction(self, models_resource, mock_public_models_r models_resource._client.project_id = "custom-project" models_resource._get.return_value = mock_public_models_response - models_resource.get(type="public") + models_resource.get() expected_url = "/organizations/custom-org/projects/custom-project/models" call_args = models_resource._get.call_args assert call_args[0][0] == expected_url - @pytest.mark.parametrize("model_type", ["public", "custom"]) - def test_get_models_type_parameter(self, models_resource, model_type): - """get method accepts both public and custom types.""" - models_resource._get.return_value = ModelsData(models=[]) - - models_resource.get(type=model_type) - - call_args = models_resource._get.call_args - assert call_args.kwargs["params"]["type"] == model_type - def test_get_models_cast_to_parameter(self, models_resource, mock_public_models_response): """get method specifies correct cast_to parameter.""" - models_resource._get.return_value = mock_public_models_response + models_resource._get.side_effect = lambda *_, **kwargs: ( + mock_public_models_response if kwargs.get("params", {}).get("type") == "public" else ModelsData(models=[]) + ) - models_resource.get(type="public") + models_resource.get() call_args = models_resource._get.call_args assert call_args.kwargs["cast_to"] is ModelsData @@ -233,7 +244,7 @@ def test_get_models_timeout_default(self, models_resource, mock_public_models_re """get method uses DEFAULT_TIMEOUT when no timeout specified.""" models_resource._get.return_value = mock_public_models_response - models_resource.get(type="public") + models_resource.get() call_args = models_resource._get.call_args assert call_args.kwargs["timeout"] is DEFAULT_TIMEOUT @@ -242,7 +253,7 @@ def test_get_models_with_none_timeout(self, models_resource, mock_public_models_ """get method accepts None timeout.""" models_resource._get.return_value = mock_public_models_response - models_resource.get(type="public", timeout=None) + models_resource.get(timeout=None) call_args = models_resource._get.call_args assert call_args.kwargs["timeout"] is None @@ -251,7 +262,7 @@ def test_get_models_model_attributes(self, models_resource, mock_public_models_r """get method preserves all model attributes correctly.""" models_resource._get.return_value = mock_public_models_response - result = models_resource.get(type="public") + result = models_resource.get() model = result[0] assert model.context_length == 8192 @@ -266,7 +277,7 @@ def test_get_models_custom_model_attributes(self, models_resource, mock_custom_m """get method preserves all custom model attributes correctly.""" models_resource._get.return_value = mock_custom_models_response - result = models_resource.get(type="custom") + result = models_resource.get() custom_model = result[0] assert custom_model.max_tokens == 4096 @@ -303,7 +314,7 @@ def test_get_models_handles_api_error(self, models_resource): models_resource._get.side_effect = api_error with pytest.raises(APIStatusError): - models_resource.get(type="public") + models_resource.get() def test_get_models_handles_forbidden_error(self, models_resource): """get method propagates permission errors.""" @@ -317,7 +328,7 @@ def test_get_models_handles_forbidden_error(self, models_resource): models_resource._get.side_effect = permission_error with pytest.raises(PermissionDeniedError): - models_resource.get(type="custom") + models_resource.get() def test_get_models_handles_connection_error(self, models_resource): """get method propagates connection errors.""" @@ -328,7 +339,7 @@ def test_get_models_handles_connection_error(self, models_resource): models_resource._get.side_effect = connection_error with pytest.raises(APIConnectionError): - models_resource.get(type="public") + models_resource.get() def test_get_models_handles_timeout_error(self, models_resource): """get method propagates timeout errors.""" @@ -339,7 +350,7 @@ def test_get_models_handles_timeout_error(self, models_resource): models_resource._get.side_effect = timeout_error with pytest.raises(APITimeoutError): - models_resource.get(type="public", timeout=5.0) + models_resource.get(timeout=5.0) class TestModelsTyping: @@ -359,17 +370,20 @@ def models_resource(self, mock_client): """Models resource instance.""" return Models(mock_client) - def test_get_models_return_type_consistency(self, models_resource): + @pytest.mark.parametrize( + "mock_response, expected_type", + [ + (None, list), # None response + (ModelsData(models=[]), list), # Empty ModelsData + ], + ) + def test_get_models_return_type_consistency(self, models_resource, mock_response, expected_type): """get method returns consistent types.""" - # Test that the method returns either a list or None - models_resource._get.return_value = None - result = models_resource.get(type="public") - assert result is None - - # Test that it returns a list when successful - models_resource._get.return_value = ModelsData(models=[]) - result = models_resource.get(type="public") - assert isinstance(result, list) + models_resource._get.return_value = mock_response + + result = models_resource.get() + + assert isinstance(result, expected_type) def test_get_models_mixed_model_types(self, models_resource): """get method can handle mixed model types in response.""" @@ -401,21 +415,24 @@ def test_get_models_mixed_model_types(self, models_resource): "disabled": False, } - public_model = Model(**public_data) + public_model = PublicModel(**public_data) custom_model = CustomModel(**custom_data) - response = ModelsData(models=[public_model, custom_model]) - models_resource._get.return_value = response + models_resource._get.side_effect = lambda *_, **kwargs: ( + ModelsData(models=[public_model]) + if kwargs.get("params", {}).get("type") == "public" + else ModelsData(models=[custom_model]) + ) - result = models_resource.get(type="public") # Type doesn't matter for this test + result = models_resource.get() # Type doesn't matter for this test assert len(result) == 2 - assert isinstance(result[0], Model) - assert isinstance(result[1], CustomModel) - assert result[0].key == "gpt-4" - assert result[1].key == "my-model" - assert hasattr(result[0], "parameters") # Model-specific attribute - assert hasattr(result[1], "max_tokens") # CustomModel-specific attribute + assert isinstance(result[0], CustomModel) + assert isinstance(result[1], PublicModel) + assert result[0].key == "my-model" + assert result[1].key == "gpt-4" + assert hasattr(result[0], "max_tokens") # CustomModel-specific attribute + assert hasattr(result[1], "parameters") # PublicModel-specific attribute def test_get_models_large_parameters_handling(self, models_resource): """get method handles large parameter numbers correctly.""" @@ -436,11 +453,13 @@ def test_get_models_large_parameters_handling(self, models_resource): "deprecated": False, } - large_model = Model(**large_model_data) + large_model = PublicModel(**large_model_data) response = ModelsData(models=[large_model]) - models_resource._get.return_value = response + models_resource._get.side_effect = lambda *_, **kwargs: ( + response if kwargs.get("params", {}).get("type") == "public" else ModelsData(models=[]) + ) - result = models_resource.get(type="public") + result = models_resource.get() assert len(result) == 1 assert result[0].parameters == 1.3e14 diff --git a/tests/resources/test_results.py b/tests/resources/test_results.py index 2529670..db05b4f 100644 --- a/tests/resources/test_results.py +++ b/tests/resources/test_results.py @@ -4,7 +4,7 @@ import httpx import pytest -from atlas._models import Result, Results as ResultsData, Pagination, ResultMetrics +from atlas.models import Result, Results as ResultsData, Pagination, ResultMetrics from atlas._constants import DEFAULT_TIMEOUT from atlas.resources.results.results import Results @@ -684,7 +684,12 @@ def test_get_results_pagination_metadata_calculation(self, results_resource, sam ], ) def test_pagination_total_pages_calculation( - self, results_resource, sample_result_data, total_count, page_size, expected_pages + self, + results_resource, + sample_result_data, + total_count, + page_size, + expected_pages, ): """get method correctly calculates total_pages for various scenarios.""" api_response = { diff --git a/tests/test_client.py b/tests/test_client.py index 84b386d..af4a995 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -1,148 +1,122 @@ +from unittest.mock import Mock, patch + import pytest from atlas import Atlas -from atlas._exceptions import AtlasError class TestAtlasClientInitialization: """Test Atlas client initialization and configuration.""" - def test_init_with_explicit_params(self): - """Client initializes correctly with explicit parameters.""" - client = Atlas(api_key="explicit-key", organization_id="explicit-org", project_id="explicit-project") - - assert client.api_key == "explicit-key" - assert client.organization_id == "explicit-org" - assert client.project_id == "explicit-project" - - def test_init_from_environment(self, mock_env_vars): - """Client initializes from environment variables.""" - _ = mock_env_vars # Fixture used for side effects - client = Atlas() + @pytest.fixture + def mock_org(self): + org = Mock() + org.id = "org-123" + org.projects = [Mock(id="proj-456")] + return org - assert client.api_key == "test-api-key" - assert client.organization_id == "test-org-id" - assert client.project_id == "test-project-id" - - def test_explicit_params_override_env(self, mock_env_vars): - """Explicit parameters override environment variables.""" - _ = mock_env_vars # Fixture used for side effects - client = Atlas(api_key="override-key", organization_id="override-org") - - assert client.api_key == "override-key" - assert client.organization_id == "override-org" - assert client.project_id == "test-project-id" - - def test_missing_api_key_raises_error(self, env_vars): - """Missing API key raises AtlasError.""" - _ = env_vars # Fixture used for side effects - with pytest.raises(AtlasError, match="api_key client option must be set"): - Atlas() - - def test_none_values_fallback_to_env(self, mock_env_vars): + def test_none_values_fallback_to_env(self, mock_env_vars, mock_org): """None values explicitly passed fallback to environment.""" _ = mock_env_vars # Fixture used for side effects - client = Atlas(api_key=None, organization_id=None, project_id=None) + with patch("atlas.Atlas._get_organization", return_value=mock_org): + client = Atlas(api_key=None) assert client.api_key == "test-api-key" - assert client.organization_id == "test-org-id" - assert client.project_id == "test-project-id" - - def test_optional_params_can_be_none(self): - """Organization and project IDs can be None.""" - client = Atlas(api_key="test-key") - - assert client.api_key == "test-key" - assert client.organization_id is None - assert client.project_id is None @pytest.mark.parametrize("base_url", ["https://custom.api.com", "https://staging.layerlens.ai/api/v1"]) - def test_custom_base_url(self, base_url): + def test_custom_base_url(self, base_url, mock_org): """Client accepts custom base URL.""" - client = Atlas(api_key="test-key", base_url=base_url) + with patch("atlas.Atlas._get_organization", return_value=mock_org): + client = Atlas(api_key="test-key", base_url=base_url) assert str(client.base_url).rstrip("/") == base_url.rstrip("/") - def test_custom_timeout(self): + def test_custom_timeout(self, mock_org): """Client accepts custom timeout.""" import httpx - client = Atlas(api_key="test-key", timeout=30.0) + with patch("atlas.Atlas._get_organization", return_value=mock_org): + client = Atlas(api_key="test-key", timeout=30.0) assert isinstance(client.timeout, httpx.Timeout) - def test_auth_headers_with_api_key(self): + def test_auth_headers_with_api_key(self, mock_org): """auth_headers property returns correct headers when API key is set.""" - client = Atlas(api_key="test-api-key") + with patch("atlas.Atlas._get_organization", return_value=mock_org): + client = Atlas(api_key="test-api-key") headers = client.auth_headers assert headers == {"x-api-key": "test-api-key"} - def test_auth_headers_without_api_key(self): + def test_auth_headers_without_api_key(self, mock_org): """auth_headers property returns empty dict when no API key.""" - client = Atlas(api_key="") + with patch("atlas.Atlas._get_organization", return_value=mock_org): + client = Atlas(api_key="") headers = client.auth_headers assert headers == {} - def test_auth_headers_with_empty_api_key(self): + def test_auth_headers_with_empty_api_key(self, mock_org): """auth_headers property returns empty dict when API key is empty string.""" - client = Atlas(api_key="") + with patch("atlas.Atlas._get_organization", return_value=mock_org): + client = Atlas(api_key="") headers = client.auth_headers assert headers == {} - def test_copy_method(self): + def test_copy_method(self, mock_org): """copy method creates new client with overridden parameters.""" - original_client = Atlas( - api_key="original-key", - organization_id="original-org", - project_id="original-project", - base_url="https://original.api.com", - timeout=10.0, - ) + with patch("atlas.Atlas._get_organization", return_value=mock_org): + original_client = Atlas( + api_key="original-key", + base_url="https://original.api.com", + timeout=10.0, + ) - new_client = original_client.copy(api_key="new-key", organization_id="new-org", timeout=20.0) + with patch("atlas.Atlas._get_organization", return_value=mock_org): + new_client = original_client.copy(api_key="new-key", timeout=20.0) # Check overridden values assert new_client.api_key == "new-key" - assert new_client.organization_id == "new-org" # The copy method uses 'or' logic, so timeout=20.0 won't override the existing timeout # Let's check that the timeout is still the original value assert new_client.timeout == original_client.timeout # Should remain the original timeout # Check unchanged values - assert new_client.project_id == "original-project" assert str(new_client.base_url) == "https://original.api.com" - def test_copy_method_partial_override(self): + def test_copy_method_partial_override(self, mock_org): """copy method allows partial parameter override.""" - original_client = Atlas(api_key="original-key", organization_id="original-org", project_id="original-project") + with patch("atlas.Atlas._get_organization", return_value=mock_org): + original_client = Atlas(api_key="original-key") - new_client = original_client.copy(api_key="new-key") + with patch("atlas.Atlas._get_organization", return_value=mock_org): + new_client = original_client.copy(api_key="new-key") assert new_client.api_key == "new-key" - assert new_client.organization_id == "original-org" - assert new_client.project_id == "original-project" - def test_with_options_alias(self): + def test_with_options_alias(self, mock_org): """with_options is an alias for copy method.""" - original_client = Atlas(api_key="original-key") + with patch("atlas.Atlas._get_organization", return_value=mock_org): + original_client = Atlas(api_key="original-key") - new_client = original_client.with_options(api_key="new-key") + with patch("atlas.Atlas._get_organization", return_value=mock_org): + new_client = original_client.with_options(api_key="new-key") assert new_client.api_key == "new-key" assert new_client is not original_client - def test_copy_method_timeout_override(self): + def test_copy_method_timeout_override(self, mock_org): """copy method properly overrides timeout when original is None.""" # Create a client with no explicit timeout (uses default) - original_client = Atlas(api_key="original-key") + with patch("atlas.Atlas._get_organization", return_value=mock_org): + original_client = Atlas(api_key="original-key") - new_client = original_client.copy(timeout=30.0) + with patch("atlas.Atlas._get_organization", return_value=mock_org): + new_client = original_client.copy(timeout=30.0) import httpx @@ -154,16 +128,28 @@ def test_copy_method_timeout_override(self): class TestAtlasClientErrorHandling: """Test error handling in Atlas client.""" + @pytest.fixture + def mock_org(self): + org = Mock() + org.id = "org-123" + org.projects = [Mock(id="proj-456")] + return org + def _create_mock_response(self, status_code): """Helper to create a mock response with all required attributes.""" mock_request = type("MockRequest", (), {})() - return type("MockResponse", (), {"status_code": status_code, "request": mock_request, "headers": {}})() + return type( + "MockResponse", + (), + {"status_code": status_code, "request": mock_request, "headers": {}}, + )() - def test_make_status_error_bad_request(self): + def test_make_status_error_bad_request(self, mock_org): """_make_status_error creates BadRequestError for 400 status.""" from atlas._exceptions import BadRequestError - client = Atlas(api_key="test-key") + with patch("atlas.Atlas._get_organization", return_value=mock_org): + client = Atlas(api_key="test-key") mock_response = self._create_mock_response(400) mock_body = {"error": "Bad request"} @@ -172,11 +158,12 @@ def test_make_status_error_bad_request(self): assert isinstance(error, BadRequestError) assert error.message == "Bad request" - def test_make_status_error_unauthorized(self): + def test_make_status_error_unauthorized(self, mock_org): """_make_status_error creates AuthenticationError for 401 status.""" from atlas._exceptions import AuthenticationError - client = Atlas(api_key="test-key") + with patch("atlas.Atlas._get_organization", return_value=mock_org): + client = Atlas(api_key="test-key") mock_response = self._create_mock_response(401) mock_body = {"error": "Unauthorized"} @@ -185,11 +172,12 @@ def test_make_status_error_unauthorized(self): assert isinstance(error, AuthenticationError) assert error.message == "Unauthorized" - def test_make_status_error_forbidden(self): + def test_make_status_error_forbidden(self, mock_org): """_make_status_error creates PermissionDeniedError for 403 status.""" from atlas._exceptions import PermissionDeniedError - client = Atlas(api_key="test-key") + with patch("atlas.Atlas._get_organization", return_value=mock_org): + client = Atlas(api_key="test-key") mock_response = self._create_mock_response(403) mock_body = {"error": "Forbidden"} @@ -198,11 +186,12 @@ def test_make_status_error_forbidden(self): assert isinstance(error, PermissionDeniedError) assert error.message == "Forbidden" - def test_make_status_error_not_found(self): + def test_make_status_error_not_found(self, mock_org): """_make_status_error creates NotFoundError for 404 status.""" from atlas._exceptions import NotFoundError - client = Atlas(api_key="test-key") + with patch("atlas.Atlas._get_organization", return_value=mock_org): + client = Atlas(api_key="test-key") mock_response = self._create_mock_response(404) mock_body = {"error": "Not found"} @@ -211,11 +200,12 @@ def test_make_status_error_not_found(self): assert isinstance(error, NotFoundError) assert error.message == "Not found" - def test_make_status_error_conflict(self): + def test_make_status_error_conflict(self, mock_org): """_make_status_error creates ConflictError for 409 status.""" from atlas._exceptions import ConflictError - client = Atlas(api_key="test-key") + with patch("atlas.Atlas._get_organization", return_value=mock_org): + client = Atlas(api_key="test-key") mock_response = self._create_mock_response(409) mock_body = {"error": "Conflict"} @@ -224,11 +214,12 @@ def test_make_status_error_conflict(self): assert isinstance(error, ConflictError) assert error.message == "Conflict" - def test_make_status_error_unprocessable_entity(self): + def test_make_status_error_unprocessable_entity(self, mock_org): """_make_status_error creates UnprocessableEntityError for 422 status.""" from atlas._exceptions import UnprocessableEntityError - client = Atlas(api_key="test-key") + with patch("atlas.Atlas._get_organization", return_value=mock_org): + client = Atlas(api_key="test-key") mock_response = self._create_mock_response(422) mock_body = {"error": "Unprocessable entity"} @@ -237,11 +228,12 @@ def test_make_status_error_unprocessable_entity(self): assert isinstance(error, UnprocessableEntityError) assert error.message == "Unprocessable entity" - def test_make_status_error_rate_limit(self): + def test_make_status_error_rate_limit(self, mock_org): """_make_status_error creates RateLimitError for 429 status.""" from atlas._exceptions import RateLimitError - client = Atlas(api_key="test-key") + with patch("atlas.Atlas._get_organization", return_value=mock_org): + client = Atlas(api_key="test-key") mock_response = self._create_mock_response(429) mock_body = {"error": "Rate limited"} @@ -250,11 +242,12 @@ def test_make_status_error_rate_limit(self): assert isinstance(error, RateLimitError) assert error.message == "Rate limited" - def test_make_status_error_internal_server_error(self): + def test_make_status_error_internal_server_error(self, mock_org): """_make_status_error creates InternalServerError for 500+ status.""" from atlas._exceptions import InternalServerError - client = Atlas(api_key="test-key") + with patch("atlas.Atlas._get_organization", return_value=mock_org): + client = Atlas(api_key="test-key") mock_response = self._create_mock_response(500) mock_body = {"error": "Internal server error"} @@ -263,11 +256,12 @@ def test_make_status_error_internal_server_error(self): assert isinstance(error, InternalServerError) assert error.message == "Internal server error" - def test_make_status_error_gateway_timeout(self): + def test_make_status_error_gateway_timeout(self, mock_org): """_make_status_error creates InternalServerError for 502 status.""" from atlas._exceptions import InternalServerError - client = Atlas(api_key="test-key") + with patch("atlas.Atlas._get_organization", return_value=mock_org): + client = Atlas(api_key="test-key") mock_response = self._create_mock_response(502) mock_body = {"error": "Gateway timeout"} @@ -276,11 +270,12 @@ def test_make_status_error_gateway_timeout(self): assert isinstance(error, InternalServerError) assert error.message == "Gateway timeout" - def test_make_status_error_unknown_status(self): + def test_make_status_error_unknown_status(self, mock_org): """_make_status_error creates generic APIStatusError for unknown status codes.""" from atlas._exceptions import APIStatusError - client = Atlas(api_key="test-key") + with patch("atlas.Atlas._get_organization", return_value=mock_org): + client = Atlas(api_key="test-key") mock_response = self._create_mock_response(418) # I'm a teapot mock_body = {"error": "Unknown error"} @@ -289,11 +284,12 @@ def test_make_status_error_unknown_status(self): assert isinstance(error, APIStatusError) assert error.message == "Unknown error" - def test_make_status_error_with_non_mapping_body(self): + def test_make_status_error_with_non_mapping_body(self, mock_org): """_make_status_error handles non-mapping body correctly.""" from atlas._exceptions import NotFoundError - client = Atlas(api_key="test-key") + with patch("atlas.Atlas._get_organization", return_value=mock_org): + client = Atlas(api_key="test-key") mock_response = self._create_mock_response(404) mock_body = "Simple string error" @@ -302,11 +298,12 @@ def test_make_status_error_with_non_mapping_body(self): assert isinstance(error, NotFoundError) assert error.body == "Simple string error" - def test_make_status_error_with_none_body(self): + def test_make_status_error_with_none_body(self, mock_org): """_make_status_error handles None body correctly.""" from atlas._exceptions import BadRequestError - client = Atlas(api_key="test-key") + with patch("atlas.Atlas._get_organization", return_value=mock_org): + client = Atlas(api_key="test-key") mock_response = self._create_mock_response(400) error = client._make_status_error("Bad request", body=None, response=mock_response) @@ -314,13 +311,17 @@ def test_make_status_error_with_none_body(self): assert isinstance(error, BadRequestError) assert error.body is None - def test_make_status_error_with_complex_body(self): + def test_make_status_error_with_complex_body(self, mock_org): """_make_status_error extracts error from complex body structure.""" from atlas._exceptions import AuthenticationError - client = Atlas(api_key="test-key") + with patch("atlas.Atlas._get_organization", return_value=mock_org): + client = Atlas(api_key="test-key") mock_response = self._create_mock_response(401) - mock_body = {"error": {"message": "Invalid API key", "code": "AUTH_ERROR"}, "timestamp": "2023-01-01T00:00:00Z"} + mock_body = { + "error": {"message": "Invalid API key", "code": "AUTH_ERROR"}, + "timestamp": "2023-01-01T00:00:00Z", + } error = client._make_status_error("Authentication failed", body=mock_body, response=mock_response) diff --git a/tests/test_integration.py b/tests/test_integration.py index d91c87e..2a26d10 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -5,15 +5,14 @@ import pytest from atlas import Atlas -from atlas._models import ( +from atlas.models import ( Model, - Models as ModelsData, Result, Results as ResultsData, Benchmark, - Benchmarks as BenchmarksData, Evaluation, Evaluations as EvaluationsData, + EvaluationStatus, ) @@ -23,7 +22,7 @@ class TestAtlasIntegration: @pytest.fixture def atlas_client(self): """Create Atlas client with mocked dependencies.""" - return Atlas(api_key="test-api-key", organization_id="test-org", project_id="test-project") + return Atlas(api_key="test-api-key") @pytest.fixture def sample_model_data(self): @@ -65,7 +64,7 @@ def sample_evaluation_data(self): """Sample evaluation data for testing.""" return { "id": "eval-12345", - "status": "completed", + "status": "success", "status_description": "Evaluation completed successfully", "submitted_at": 1640995200, "finished_at": 1640995800, @@ -102,7 +101,12 @@ class TestCompleteEvaluationWorkflow: @pytest.fixture def atlas_client(self): """Atlas client for workflow testing.""" - return Atlas(api_key="workflow-test-key", organization_id="workflow-org", project_id="workflow-project") + mock_org = Mock() + mock_org.id = "org-123" + mock_org.projects = [Mock(id="proj-456")] + + with patch("atlas.Atlas._get_organization", return_value=mock_org): + return Atlas(api_key="workflow-test-key") def test_complete_evaluation_workflow(self, atlas_client): """Test complete workflow: get models/benchmarks -> create evaluation -> get results.""" @@ -139,16 +143,12 @@ def test_complete_evaluation_workflow(self, atlas_client): evaluation_data = { "id": "eval-789", - "status": "completed", + "status": "success", "status_description": "Done", "submitted_at": 1640995200, "finished_at": 1640995800, "model_id": "model-123", - "model_name": "GPT-4", - "model_key": "gpt-4", - "model_company": "OpenAI", "dataset_id": "bench-456", - "dataset_name": "MMLU", "average_duration": 2500, "readability_score": 0.85, "toxicity_score": 0.02, @@ -173,8 +173,6 @@ def test_complete_evaluation_workflow(self, atlas_client): result = Result(**result_data) # Mock responses - models_response = ModelsData(models=[model]) - benchmarks_response = BenchmarksData(datasets=[benchmark]) evaluations_response = EvaluationsData(data=[evaluation]) results_response = ResultsData( evaluation_id="eval-789", @@ -209,9 +207,9 @@ def test_complete_evaluation_workflow(self, atlas_client): mock_post.return_value = evaluations_response # Create evaluation # Step 1: Create evaluation directly (Atlas client doesn't expose models/benchmarks resources) - created_evaluation = atlas_client.evaluations.create(model="gpt-4", benchmark="mmlu") + created_evaluation = atlas_client.evaluations.create(model=model, benchmark=benchmark) assert created_evaluation.id == "eval-789" - assert created_evaluation.status == "completed" + assert created_evaluation.status == EvaluationStatus.SUCCESS # Step 2: Get evaluation results results = atlas_client.results.get(evaluation_id=created_evaluation.id) @@ -306,9 +304,12 @@ class TestResourceInteraction: @pytest.fixture def atlas_client(self): """Atlas client for resource interaction testing.""" - return Atlas( - api_key="interaction-test-key", organization_id="interaction-org", project_id="interaction-project" - ) + mock_org = Mock() + mock_org.id = "org-123" + mock_org.projects = [Mock(id="proj-456")] + + with patch("atlas.Atlas._get_organization", return_value=mock_org): + return Atlas(api_key="interaction-test-key") def test_evaluation_creation_with_model_and_benchmark_objects(self, atlas_client): """Test creating evaluation using model and benchmark objects.""" @@ -345,16 +346,12 @@ def test_evaluation_creation_with_model_and_benchmark_objects(self, atlas_client evaluation_data = { "id": "eval-interaction", - "status": "submitted", + "status": "in-progress", "status_description": "Submitted", "submitted_at": 1640995200, "finished_at": 0, "model_id": "model-abc", - "model_name": "Claude 3", - "model_key": "claude-3", - "model_company": "Anthropic", "dataset_id": "bench-xyz", - "dataset_name": "HellaSwag", "average_duration": 0, "readability_score": 0.0, "toxicity_score": 0.0, @@ -372,17 +369,17 @@ def test_evaluation_creation_with_model_and_benchmark_objects(self, atlas_client mock_post.return_value = evaluations_response # Create evaluation using model and benchmark keys - created_evaluation = atlas_client.evaluations.create(model=model.key, benchmark=benchmark.key) + created_evaluation = atlas_client.evaluations.create(model=model, benchmark=benchmark) assert created_evaluation.id == "eval-interaction" - assert created_evaluation.model_key == model.key - assert created_evaluation.dataset_id == benchmark.id + assert created_evaluation.model_id == model.id + assert created_evaluation.benchmark_id == benchmark.id # Verify API call call_args = mock_post.call_args body = call_args.kwargs["body"][0] - assert body["model_id"] == model.key - assert body["dataset_id"] == benchmark.key + assert body["model_id"] == model.id + assert body["dataset_id"] == benchmark.id def test_results_analysis_workflow(self, atlas_client): """Test analyzing results from multiple evaluations.""" @@ -477,9 +474,17 @@ def test_results_analysis_workflow(self, atlas_client): class TestAtlasClientProperties: """Test Atlas client resource properties and access.""" - def test_client_has_all_resource_properties(self): + @pytest.fixture + def mock_org(self): + org = Mock() + org.id = "org-123" + org.projects = [Mock(id="proj-456")] + return org + + def test_client_has_all_resource_properties(self, mock_org): """Atlas client exposes all resource properties.""" - client = Atlas(api_key="property-test-key", organization_id="property-org", project_id="property-project") + with patch("atlas.Atlas._get_organization", return_value=mock_org): + client = Atlas(api_key="property-test-key") # Verify available resource properties exist assert hasattr(client, "evaluations") @@ -492,56 +497,57 @@ def test_client_has_all_resource_properties(self): assert isinstance(client.evaluations, Evaluations) assert isinstance(client.results, Results) - def test_resource_properties_share_same_client(self): + def test_resource_properties_share_same_client(self, mock_org): """All resource properties share the same client instance.""" - client = Atlas(api_key="shared-client-test", organization_id="shared-org", project_id="shared-project") + with patch("atlas.Atlas._get_organization", return_value=mock_org): + client = Atlas(api_key="shared-client-test") # Verify all resources use the same client assert client.evaluations._client is client assert client.results._client is client - def test_client_configuration_propagates_to_resources(self): - """Client configuration (org_id, project_id) propagates to resources.""" - org_id = "config-test-org" - project_id = "config-test-project" - - client = Atlas(api_key="config-test-key", organization_id=org_id, project_id=project_id) - - # Verify configuration is available to resources - assert client.organization_id == org_id - assert client.project_id == project_id - - # Resources should have access to client configuration - assert client.evaluations._client.organization_id == org_id - assert client.evaluations._client.project_id == project_id - assert client.results._client.organization_id == org_id - assert client.results._client.project_id == project_id - class TestConcurrentOperations: """Test concurrent operations and resource independence.""" - def test_multiple_atlas_clients_independent(self): + @pytest.fixture + def mock_org1(self): + org = Mock() + org.id = "org-123" + org.projects = [Mock(id="proj-456")] + return org + + @pytest.fixture + def mock_org2(self): + org = Mock() + org.id = "org-456" + org.projects = [Mock(id="proj-123")] + return org + + def test_multiple_atlas_clients_independent(self, mock_org1, mock_org2): """Multiple Atlas client instances operate independently.""" - client1 = Atlas(api_key="client-1-key", organization_id="org-1", project_id="project-1") + with patch("atlas.Atlas._get_organization", return_value=mock_org1): + client1 = Atlas(api_key="client-1-key") - client2 = Atlas(api_key="client-2-key", organization_id="org-2", project_id="project-2") + with patch("atlas.Atlas._get_organization", return_value=mock_org2): + client2 = Atlas(api_key="client-2-key") # Verify clients are independent assert client1.api_key != client2.api_key - assert client1.organization_id != client2.organization_id - assert client1.project_id != client2.project_id # Verify resources are independent assert client1.evaluations._client is not client2.evaluations._client assert client1.results._client is not client2.results._client - def test_resource_operations_isolated(self): + def test_resource_operations_isolated(self, mock_org1, mock_org2): """Operations on different client resources are isolated.""" - client1 = Atlas(api_key="iso-test-1", organization_id="org-1", project_id="proj-1") - client2 = Atlas(api_key="iso-test-2", organization_id="org-2", project_id="proj-2") + with patch("atlas.Atlas._get_organization", return_value=mock_org1): + client1 = Atlas(api_key="iso-test-1") + + with patch("atlas.Atlas._get_organization", return_value=mock_org2): + client2 = Atlas(api_key="iso-test-2") result_data = { "subset": "test", @@ -618,11 +624,52 @@ def test_resource_operations_isolated(self): class TestErrorPropagation: """Test error propagation through full workflows.""" - def test_evaluation_workflow_error_propagation(self): + @pytest.fixture + def mock_org(self): + org = Mock() + org.id = "org-123" + org.projects = [Mock(id="proj-456")] + return org + + def test_evaluation_workflow_error_propagation(self, mock_org): """Errors in evaluation workflow are properly propagated.""" from atlas._exceptions import APIStatusError, APIConnectionError - client = Atlas(api_key="error-test-key", organization_id="error-org", project_id="error-project") + # Create model and benchmark objects + model_data = { + "id": "model-abc", + "key": "claude-3", + "name": "Claude 3", + "company": "Anthropic", + "description": "Claude 3", + "released_at": 1709251200, + "parameters": 5e11, + "modality": "text", + "context_length": 100000, + "architecture_type": "transformer", + "license": "proprietary", + "open_weights": False, + "region": "us-west-2", + "deprecated": False, + } + + benchmark_data = { + "id": "bench-xyz", + "key": "hellaswag", + "name": "HellaSwag", + "full_description": "HellaSwag benchmark", + "language": "english", + "categories": ["reasoning"], + "subsets": ["commonsense"], + "prompt_count": 10042, + "deprecated": False, + } + + model = Model(**model_data) + benchmark = Benchmark(**benchmark_data) + + with patch("atlas.Atlas._get_organization", return_value=mock_org): + client = Atlas(api_key="error-test-key") mock_response = Mock() mock_response.status_code = 500 @@ -641,7 +688,7 @@ def test_evaluation_workflow_error_propagation(self): # Test connection error in evaluations.create mock_post.side_effect = connection_error with pytest.raises(APIConnectionError): - client.evaluations.create(model="gpt-4", benchmark="mmlu") + client.evaluations.create(model=model, benchmark=benchmark) # Verify errors didn't interfere with each other assert mock_get.called diff --git a/tests/test_models.py b/tests/test_models.py index 4d26157..a3eb0d6 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -3,19 +3,20 @@ import pytest from pydantic import ValidationError -from atlas._models import ( - Model, +from atlas.models import ( Models, Result, Results, - Benchmark, Benchmarks, Evaluation, Pagination, CustomModel, Evaluations, + PublicModel, ResultMetrics, CustomBenchmark, + PublicBenchmark, + EvaluationStatus, ) @@ -27,20 +28,12 @@ def valid_evaluation_data(self): """Valid evaluation data for testing.""" return { "id": "eval-123", - "status": "completed", - "status_description": "Evaluation completed successfully", + "status": "success", "submitted_at": 1640995200, "finished_at": 1640995800, "model_id": "model-456", - "model_name": "GPT-4", - "model_key": "gpt-4", - "model_company": "OpenAI", "dataset_id": "dataset-789", - "dataset_name": "MMLU", "average_duration": 2500, - "readability_score": 0.85, - "toxicity_score": 0.02, - "ethics_score": 0.92, "accuracy": 0.89, } @@ -49,10 +42,9 @@ def test_evaluation_creation_with_valid_data(self, valid_evaluation_data): evaluation = Evaluation(**valid_evaluation_data) assert evaluation.id == "eval-123" - assert evaluation.status == "completed" - assert evaluation.model_name == "GPT-4" + assert evaluation.status == EvaluationStatus.SUCCESS + assert evaluation.model_id == "model-456" assert evaluation.accuracy == 0.89 - assert evaluation.readability_score == 0.85 def test_evaluation_field_types(self, valid_evaluation_data): """Evaluation model enforces correct field types.""" @@ -60,7 +52,6 @@ def test_evaluation_field_types(self, valid_evaluation_data): assert isinstance(evaluation.id, str) assert isinstance(evaluation.submitted_at, int) - assert isinstance(evaluation.readability_score, float) assert isinstance(evaluation.accuracy, float) def test_evaluation_validation_errors(self, valid_evaluation_data): @@ -105,20 +96,12 @@ def evaluation_data(self): """Sample evaluation data.""" return { "id": "eval-1", - "status": "completed", - "status_description": "Done", + "status": "success", "submitted_at": 1640995200, "finished_at": 1640995800, "model_id": "model-1", - "model_name": "Test Model", - "model_key": "test-model", - "model_company": "TestCorp", "dataset_id": "dataset-1", - "dataset_name": "Test Dataset", "average_duration": 1000, - "readability_score": 0.8, - "toxicity_score": 0.1, - "ethics_score": 0.9, "accuracy": 0.85, } @@ -447,7 +430,7 @@ def valid_model_data(self): def test_model_creation(self, valid_model_data): """Model creates with valid data.""" - model = Model(**valid_model_data) + model = PublicModel(**valid_model_data) assert model.id == "model-123" assert model.name == "GPT-4" @@ -457,7 +440,7 @@ def test_model_creation(self, valid_model_data): def test_model_boolean_fields(self, valid_model_data): """Model handles boolean fields correctly.""" - model = Model(**valid_model_data) + model = PublicModel(**valid_model_data) assert isinstance(model.open_weights, bool) assert isinstance(model.deprecated, bool) @@ -465,7 +448,7 @@ def test_model_boolean_fields(self, valid_model_data): def test_model_numeric_fields(self, valid_model_data): """Model validates numeric fields.""" - model = Model(**valid_model_data) + model = PublicModel(**valid_model_data) assert isinstance(model.parameters, float) assert isinstance(model.context_length, int) @@ -477,13 +460,13 @@ def test_model_field_validation(self, valid_model_data): invalid_data = valid_model_data.copy() invalid_data["parameters"] = "not-a-number" with pytest.raises(ValidationError): - Model(**invalid_data) + PublicModel(**invalid_data) # Test int field validation invalid_data = valid_model_data.copy() invalid_data["context_length"] = "not-an-int" with pytest.raises(ValidationError): - Model(**invalid_data) + PublicModel(**invalid_data) class TestCustomModel: @@ -551,10 +534,10 @@ def test_models_with_mixed_model_types(self): "disabled": False, } - models = Models(models=[model_data, custom_model_data]) # type: ignore[arg-type] + models = Models(models=[PublicModel(**model_data), CustomModel(**custom_model_data)]) # type: ignore[arg-type] assert len(models.models) == 2 - assert isinstance(models.models[0], Model) + assert isinstance(models.models[0], PublicModel) assert isinstance(models.models[1], CustomModel) @@ -570,31 +553,18 @@ def valid_benchmark_data(self): "name": "MMLU", "full_description": "Massive Multitask Language Understanding", "language": "english", - "categories": ["reasoning", "knowledge"], - "subsets": ["math", "science", "history"], "prompt_count": 15908, "deprecated": False, } def test_benchmark_creation(self, valid_benchmark_data): """Benchmark creates with valid data.""" - benchmark = Benchmark(**valid_benchmark_data) + benchmark = PublicBenchmark(**valid_benchmark_data) assert benchmark.id == "bench-123" assert benchmark.name == "MMLU" - assert len(benchmark.categories) == 2 - assert len(benchmark.subsets) == 3 assert benchmark.prompt_count == 15908 - def test_benchmark_list_fields(self, valid_benchmark_data): - """Benchmark handles list fields correctly.""" - benchmark = Benchmark(**valid_benchmark_data) - - assert isinstance(benchmark.categories, list) - assert isinstance(benchmark.subsets, list) - assert "reasoning" in benchmark.categories - assert "math" in benchmark.subsets - class TestCustomBenchmark: """Test CustomBenchmark with optional fields.""" @@ -608,7 +578,6 @@ def valid_custom_benchmark_data(self): "name": "My Benchmark", "description": "Custom benchmark", "system_prompt": "You are a helpful assistant", - "subsets": ["subset1", "subset2"], "prompt_count": 100, "version_count": 1, "regex_pattern": r"Answer: (.+)", @@ -637,7 +606,6 @@ def test_custom_benchmark_optional_fields(self): "name": "Test", "description": "Test desc", "system_prompt": None, - "subsets": ["test"], "prompt_count": 10, "version_count": 1, "regex_pattern": None, @@ -667,17 +635,14 @@ def test_benchmarks_with_datasets_alias(self): "name": "Test", "full_description": "Test benchmark", "language": "english", - "categories": ["test"], - "subsets": ["test"], "prompt_count": 10, "deprecated": False, } # Using the alias 'datasets' - benchmarks = Benchmarks(datasets=[benchmark_data]) # type: ignore[arg-type] + benchmarks = Benchmarks(datasets=[PublicBenchmark(**benchmark_data)]) # type: ignore[arg-type] - assert len(benchmarks.benchmarks) == 1 - assert isinstance(benchmarks.benchmarks[0], Benchmark) + assert isinstance(benchmarks.benchmarks[0], PublicBenchmark) def test_benchmarks_field_validation(self): """Benchmarks validates field structure correctly.""" @@ -688,8 +653,6 @@ def test_benchmarks_field_validation(self): "name": "Test", "full_description": "Test benchmark", "language": "english", - "categories": ["test"], - "subsets": ["test"], "prompt_count": 10, "deprecated": False, } @@ -706,20 +669,12 @@ def test_round_trip_serialization(self): """Models can be serialized and deserialized correctly.""" original_data = { "id": "eval-123", - "status": "completed", - "status_description": "Done", + "status": "success", "submitted_at": 1640995200, "finished_at": 1640995800, "model_id": "model-456", - "model_name": "GPT-4", - "model_key": "gpt-4", - "model_company": "OpenAI", "dataset_id": "dataset-789", - "dataset_name": "MMLU", "average_duration": 2500, - "readability_score": 0.85, - "toxicity_score": 0.02, - "ethics_score": 0.92, "accuracy": 0.89, } @@ -753,10 +708,10 @@ def test_json_compatibility(self): "deprecated": False, } - model = Model(**model_data) + model = PublicModel(**model_data) json_str = json.dumps(model.model_dump()) parsed_data = json.loads(json_str) - reconstructed = Model(**parsed_data) + reconstructed = PublicModel(**parsed_data) assert reconstructed.name == model.name assert reconstructed.parameters == model.parameters