Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions src/atlas/_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from . import _exceptions
from ._utils import is_mapping
from .models import Organization
from .models import Organization, OrganizationResponse
from ._constants import DEFAULT_TIMEOUT
from ._exceptions import AtlasError, APIStatusError
from ._base_client import BaseClient
Expand Down Expand Up @@ -53,7 +53,7 @@ def __init__(
if base_url is None:
base_url = os.environ.get("LAYERLENS_ATLAS_BASE_URL")
if base_url is None:
base_url = "https://8bg48mbhyi.execute-api.us-east-1.amazonaws.com/prod/api/v1/dgklmnr"
base_url = "https://8bg48mbhyi.execute-api.us-east-1.amazonaws.com/prod/api/v1"

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Were we able to setup the api.layerlens.ai DNS?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, we need to setup production API URL. This is the dev env one


super().__init__(
base_url=base_url,
Expand Down Expand Up @@ -164,10 +164,10 @@ def _get_organization(self) -> Optional[Organization]:
organization = super().get_cast(
f"/organizations",
timeout=30,
cast_to=Organization,
cast_to=OrganizationResponse,
)
if isinstance(organization, Organization):
return organization
if isinstance(organization, OrganizationResponse):
return organization.data
return None


Expand Down
19 changes: 14 additions & 5 deletions src/atlas/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,23 @@
from .api import Models, Results, Benchmarks, Pagination, Evaluations, ResultMetrics
from .api import (
Pagination,
ResultMetrics,
ModelsResponse,
ResultsResponse,
BenchmarksResponse,
EvaluationsResponse,
OrganizationResponse,
)
from .model import Model, CustomModel, PublicModel
from .benchmark import Benchmark, CustomBenchmark, PublicBenchmark
from .evaluation import Result, Evaluation, EvaluationStatus
from .organization import Project, Organization

__all__ = [
"Benchmarks",
"Evaluations",
"Models",
"Results",
"BenchmarksResponse",
"EvaluationsResponse",
"ModelsResponse",
"OrganizationResponse",
"ResultsResponse",
"Benchmark",
"CustomBenchmark",
"PublicBenchmark",
Expand Down
25 changes: 18 additions & 7 deletions src/atlas/models/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,31 @@
from .model import Model
from .benchmark import Benchmark
from .evaluation import Result, Evaluation
from .organization import Organization


class Benchmarks(BaseModel):
model_config = ConfigDict(populate_by_name=True)
class BenchmarksResponse(BaseModel):
class Data(BaseModel):
model_config = ConfigDict(populate_by_name=True)

benchmarks: List[Benchmark] = Field(..., alias="datasets")
benchmarks: List[Benchmark] = Field(..., alias="datasets")

data: Data

class Evaluations(BaseModel):

class EvaluationsResponse(BaseModel):
data: List[Evaluation]


class Models(BaseModel):
models: List[Model]
class ModelsResponse(BaseModel):
class Data(BaseModel):
models: List[Model]

data: Data


class OrganizationResponse(BaseModel):
data: Organization


class ResultMetrics(BaseModel):
Expand All @@ -33,7 +44,7 @@ class Pagination(BaseModel):
total_pages: int


class Results(BaseModel):
class ResultsResponse(BaseModel):
evaluation_id: str
results: List[Result]
metrics: ResultMetrics
Expand Down
30 changes: 15 additions & 15 deletions src/atlas/models/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,21 @@ class Benchmark(BaseModel):


class CustomBenchmark(Benchmark):
description: str
system_prompt: Optional[str]
prompt_count: int
version_count: int
regex_pattern: Optional[str]
llm_judge_model_id: str
custom_instructions: str
scoring_metric: Optional[str]
metrics: List[str]
files: List[str]
disabled: bool
description: Optional[str] = None
system_prompt: Optional[str] = None
prompt_count: Optional[int] = None
version_count: Optional[int] = None
regex_pattern: Optional[str] = None
llm_judge_model_id: Optional[str] = None
custom_instructions: Optional[str] = None
scoring_metric: Optional[str] = None
metrics: Optional[List[str]] = None
files: Optional[List[str]] = None
disabled: Optional[bool] = None


class PublicBenchmark(Benchmark):
description: str = Field(..., alias="full_description")
language: str
prompt_count: int
deprecated: bool
description: Optional[str] = Field(None, alias="full_description")
language: Optional[str] = None
prompt_count: Optional[int] = None
deprecated: Optional[bool] = None
28 changes: 15 additions & 13 deletions src/atlas/models/model.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

from typing import Optional

from pydantic import BaseModel


Expand All @@ -11,19 +13,19 @@ class Model(BaseModel):


class CustomModel(Model):
max_tokens: int
api_url: str
disabled: bool
max_tokens: Optional[int] = None
api_url: Optional[str] = None
disabled: Optional[bool] = None


class PublicModel(Model):
company: str
released_at: int
parameters: float
modality: str
context_length: int
architecture_type: str
license: str
open_weights: bool
region: str
deprecated: bool
company: Optional[str] = None
released_at: Optional[int] = None
parameters: Optional[float] = None
modality: Optional[str] = None
context_length: Optional[int] = None
architecture_type: Optional[str] = None
license: Optional[str] = None
open_weights: Optional[bool] = None
region: Optional[str] = None
deprecated: Optional[bool] = None
13 changes: 10 additions & 3 deletions src/atlas/resources/benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import httpx

from ...models import Benchmark, Benchmarks as BenchmarksResponse
from ...models import Benchmark, CustomBenchmark, PublicBenchmark, BenchmarksResponse
from ..._resource import SyncAPIResource
from ..._constants import DEFAULT_TIMEOUT

Expand Down Expand Up @@ -34,14 +34,21 @@ def fetch(bench_type: str) -> BenchmarksResponse | None:

benchmarks: List[Benchmark] = []

def cast_benchmark(b: Benchmark, bench_type: str) -> Benchmark:
if bench_type == "custom":
return CustomBenchmark(**b.model_dump())
elif bench_type == "public":
return PublicBenchmark(**b.model_dump())
return b # fallback, just base class

if type is None:
for t in ["custom", "public"]:
resp = fetch(t)
if resp:
benchmarks.extend(resp.benchmarks)
benchmarks.extend([cast_benchmark(b, t) for b in resp.data.benchmarks])
else: # fetch only one type
resp = fetch(type)
if resp:
benchmarks.extend(resp.benchmarks)
benchmarks.extend([cast_benchmark(b, type) for b in resp.data.benchmarks])

return benchmarks
13 changes: 10 additions & 3 deletions src/atlas/resources/evaluations/evaluations.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,14 @@

import httpx

from ...models import Model, Benchmark, Evaluation, Evaluations as EvaluationsResponse
from ...models import (
Model,
Benchmark,
Evaluation,
CustomModel,
CustomBenchmark,
EvaluationsResponse,
)
from ..._resource import SyncAPIResource
from ..._constants import DEFAULT_TIMEOUT

Expand All @@ -21,8 +28,8 @@ def create(
{
"model_id": model.id,
"dataset_id": benchmark.id,
"is_custom_model": False,
"is_custom_dataset": False,
"is_custom_model": isinstance(model, CustomModel),
"is_custom_dataset": isinstance(benchmark, CustomBenchmark),
}
],
timeout=timeout,
Expand Down
13 changes: 10 additions & 3 deletions src/atlas/resources/models/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import httpx

from ...models import Model, Models as ModelsResponse
from ...models import Model, CustomModel, PublicModel, ModelsResponse
from ..._resource import SyncAPIResource
from ..._constants import DEFAULT_TIMEOUT

Expand Down Expand Up @@ -43,14 +43,21 @@ def fetch(model_type: str) -> ModelsResponse | None:

models: List[Model] = []

def cast_model(m: Model, model_type: str) -> Model:
if model_type == "custom":
return CustomModel(**m.model_dump())
elif model_type == "public":
return PublicModel(**m.model_dump())
return m # fallback, just base class

if type is None: # fetch both
for t in ["custom", "public"]:
resp = fetch(t)
if resp:
models.extend(resp.models)
models.extend([cast_model(m, t) for m in resp.data.models])
else: # fetch only one type
resp = fetch(type)
if resp:
models.extend(resp.models)
models.extend([cast_model(m, type) for m in resp.data.models])

return models
8 changes: 4 additions & 4 deletions src/atlas/resources/results/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from ..._resource import SyncAPIResource
from ..._constants import DEFAULT_TIMEOUT
from ...models.api import Results as ResultsData
from ...models.api import ResultsResponse

DEFAULT_PAGE_SIZE = 100

Expand All @@ -20,7 +20,7 @@ def get(
page: Optional[int] = None,
page_size: Optional[int] = None,
timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT,
) -> ResultsData | None:
) -> ResultsResponse | None:
"""
Get evaluation results with optional pagination.

Expand All @@ -31,7 +31,7 @@ def get(
timeout: Request timeout

Returns:
ResultsData object containing:
ResultsResponse object containing:
- evaluation_id: The evaluation ID
- results: List of Result objects for the current page
- metrics: Contains total_count and score ranges
Expand Down Expand Up @@ -77,6 +77,6 @@ def get(
}

try:
return ResultsData.model_validate(response_with_pagination)
return ResultsResponse.model_validate(response_with_pagination)
except Exception:
return None
Loading
Loading