LayerLens · Lzok · Aug 8, 2025 · Aug 5, 2025 · Aug 5, 2025 · Aug 5, 2025
diff --git a/.coveragerc b/.coveragerc
@@ -0,0 +1,7 @@
+[report]
+omit =
+    */tests/*
+    */__init__.py
+show_missing = false
+skip_covered = true
+include = *
diff --git a/.github/workflows/run-tests.yaml b/.github/workflows/run-tests.yaml
@@ -0,0 +1,53 @@
+name: Run Tests
+
+on:
+  pull_request:
+    branches: [main]
+  push:
+    branches: [main]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install Rye
+        uses: eifinger/setup-rye@v4
+        with:
+          enable-cache: true
+
+      - name: Set up Python ${{ matrix.python-version }}
+        run: |
+          rye pin ${{ matrix.python-version }}
+          rye sync
+
+      - name: Run lints
+        run: rye run lint
+
+      - name: Run tests
+        run: rye run pytest
+
+  test-build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install Rye
+        uses: eifinger/setup-rye@v4
+        with:
+          enable-cache: true
+
+      - name: Build package
+        run: rye build
+
+      - name: Check build artifacts
+        run: |
+          ls -la dist/
+          # Verify wheel and source distribution were created
+          test -f dist/*.whl
+          test -f dist/*.tar.gz
diff --git a/.gitignore b/.gitignore
@@ -16,3 +16,4 @@ codegen.log
 Brewfile.lock.json
 
 .DS_Store
+.coverage
diff --git a/examples/demo.py b/examples/demo.py
@@ -9,7 +9,7 @@
 client = Atlas()
 
 # Evaluations
-evaluation = client.evaluations.create(model="random", benchmark="random")
+evaluation = client.evaluations.create(model="random_model_id", benchmark="random_benchmark_id")
 
 # Results
 if evaluation is not None:

diff --git a/pyproject.toml b/pyproject.toml
@@ -3,13 +3,8 @@ name = "atlas"
 version = "1.0.0"
 description = "The official Python library for the LayerLens Atlas API"
 license = "Apache-2.0"
-authors = [
-{ name = "LayerLens", email = "support@layerlens.ai" },
-]
-dependencies = [
-    "httpx>=0.23.0, <1",
-    "pydantic>=1.9.0, <3",
-]
+authors = [{ name = "LayerLens", email = "support@layerlens.ai" }]
+dependencies = ["httpx>=0.23.0, <1", "pydantic>=1.9.0, <3"]
 requires-python = ">= 3.8"
 classifiers = [
   "Typing :: Typed",
@@ -25,7 +20,7 @@ classifiers = [
   "Operating System :: MacOS",
   "Operating System :: POSIX :: Linux",
   "Operating System :: Microsoft :: Windows",
-  "Topic :: Software Development :: Libraries :: Python Modules"
+  "Topic :: Software Development :: Libraries :: Python Modules",
 ]
 
 [project.urls]
@@ -40,10 +35,11 @@ atlas = "atlas.cli:main"
 managed = true
 # version pins are in requirements-dev.lock
 dev-dependencies = [
-    "pyright==1.1.399",
-    "mypy",
-    "pytest",
-    "ruff",
+  "pyright==1.1.399",
+  "mypy",
+  "pytest",
+  "ruff",
+  "pytest-cov>=6.2.1",
 ]
 
 [tool.rye.scripts]
@@ -52,26 +48,27 @@ format = { chain = [
   "fix:ruff",
   # run formatting again to fix any inconsistencies when imports are stripped
   "format:ruff",
-]}
+] }
 "format:ruff" = "ruff format"
 
-"lint" = { chain = [
-  "check:ruff",
-  "typecheck",
-  "check:importable",
-]}
+"lint" = { chain = ["check:ruff", "typecheck:src", "check:importable"] }
+"lint:all" = { chain = ["check:ruff", "typecheck", "check:importable"] }
 "check:ruff" = "ruff check ."
 "fix:ruff" = "ruff check --fix ."
 
 "check:importable" = "python -c 'import atlas'"
 
-typecheck = { chain = [
-  "typecheck:pyright",
-  "typecheck:mypy"
-]}
+# Type checking for production code only (excludes tests)
+"typecheck:src" = { chain = ["typecheck:pyright:src", "typecheck:mypy:src"] }
+
+# Type checking for all code including tests
+typecheck = { chain = ["typecheck:pyright", "typecheck:mypy"] }
+
 "typecheck:pyright" = "pyright"
+"typecheck:pyright:src" = "pyright src"
 "typecheck:verify-types" = "pyright --verifytypes atlas --ignoreexternal"
 "typecheck:mypy" = "mypy ."
+"typecheck:mypy:src" = "mypy src"
 
 [tool.ruff]
 line-length = 120
@@ -125,4 +122,14 @@ known-first-party = ["openai", "tests"]
 "bin/**.py" = ["T201", "T203"]
 "scripts/**.py" = ["T201", "T203"]
 "tests/**.py" = ["T201", "T203"]
-"examples/**.py" = ["T201", "T203"]
+"examples/**.py" = ["T201", "T203"]
+
+[tool.pyright]
+include = ["src", "tests"]
+exclude = ["**/__pycache__"]
+reportMissingTypeStubs = false
+
+# Less strict settings for tests
+executionEnvironments = [
+  { root = "tests", reportGeneralTypeIssues = false, reportOptionalSubscript = false, reportOptionalMemberAccess = false, reportUntypedFunctionDecorator = false, reportUnknownArgumentType = false, reportUnknownMemberType = false, reportUnknownVariableType = false, reportUnnecessaryIsInstance = false, reportUnnecessaryComparison = false, reportArgumentType = false, reportCallIssue = false },
+]
diff --git a/requirements-dev.lock b/requirements-dev.lock
@@ -17,6 +17,8 @@ anyio==4.9.0
 certifi==2025.7.14
     # via httpcore
     # via httpx
+coverage==7.10.2
+    # via pytest-cov
 exceptiongroup==1.3.0
     # via anyio
     # via pytest
@@ -42,6 +44,7 @@ pathspec==0.12.1
     # via mypy
 pluggy==1.6.0
     # via pytest
+    # via pytest-cov
 pydantic==2.11.7
     # via atlas
 pydantic-core==2.33.2
@@ -50,10 +53,13 @@ pygments==2.19.2
     # via pytest
 pyright==1.1.399
 pytest==8.4.1
+    # via pytest-cov
+pytest-cov==6.2.1
 ruff==0.12.7
 sniffio==1.3.1
     # via anyio
 tomli==2.2.1
+    # via coverage
     # via mypy
     # via pytest
 typing-extensions==4.14.1

diff --git a/scripts/test b/scripts/test
diff --git a/src/atlas/_models.py b/src/atlas/_models.py
@@ -3,7 +3,7 @@
 from typing import Dict, List, Union, Optional
 from datetime import timedelta
 
-from pydantic import Field, BaseModel
+from pydantic import Field, BaseModel, ConfigDict
 
 
 class Evaluation(BaseModel):
@@ -105,7 +105,6 @@ class CustomBenchmark(BaseModel):
 
 
 class Benchmarks(BaseModel):
+    model_config = ConfigDict(populate_by_name=True)
+
     benchmarks: List[Union[Benchmark, CustomBenchmark]] = Field(..., alias="datasets")
-
-    class Config:
-        validate_by_name = True
diff --git a/src/atlas/resources/models/models.py b/src/atlas/resources/models/models.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import List, Union, Literal
+from typing import List, Literal
 
 import httpx
 
@@ -15,7 +15,7 @@ def get(
         *,
         type: Literal["public"] | Literal["custom"],
         timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT,
-    ) -> List[Union[Model | CustomModel]] | None:
+    ) -> List[Model | CustomModel] | None:
         models = self._get(
             f"/organizations/{self._client.organization_id}/projects/{self._client.project_id}/models",
             params={

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -0,0 +1,36 @@
+import os
+from unittest import mock
+
+import pytest
+
+
+@pytest.fixture
+def env_vars():
+    """Clean environment variables for testing."""
+    env_keys = ["LAYERLENS_ATLAS_API_KEY", "LAYERLENS_ATLAS_ORG_ID", "LAYERLENS_ATLAS_PROJECT_ID"]
+    original_values = {key: os.environ.get(key) for key in env_keys}
+
+    # Clear environment variables
+    for key in env_keys:
+        if key in os.environ:
+            del os.environ[key]
+
+    yield
+
+    # Restore original values
+    for key, value in original_values.items():
+        if value is not None:
+            os.environ[key] = value
+        elif key in os.environ:
+            del os.environ[key]
+
+
+@pytest.fixture
+def mock_env_vars():
+    """Mock environment variables with test values."""
+    with mock.patch.dict(os.environ, {
+        "LAYERLENS_ATLAS_API_KEY": "test-api-key",
+        "LAYERLENS_ATLAS_ORG_ID": "test-org-id",
+        "LAYERLENS_ATLAS_PROJECT_ID": "test-project-id"
+    }):
+        yield
Original file line number	Diff line number	Diff line change
Expand Up		@@ -16,3 +16,4 @@ codegen.log
		Brewfile.lock.json

		.DS_Store
		.coverage