diff --git a/.gitignore b/.gitignore index 16a79ef..efd18f1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +node_modules/ + .prism.log _dev diff --git a/.husky/pre-commit b/.husky/pre-commit new file mode 100644 index 0000000..ebdf6c8 --- /dev/null +++ b/.husky/pre-commit @@ -0,0 +1,5 @@ +#!/bin/sh + +npx lint-staged + +./scripts/test || { echo "❌ Tests failed"; exit 1; } diff --git a/examples/async_client.py b/examples/async_client.py new file mode 100644 index 0000000..5686f6f --- /dev/null +++ b/examples/async_client.py @@ -0,0 +1,44 @@ +#!/usr/bin/env -S poetry run python + +import asyncio + +from atlas import AsyncAtlas + + +async def main(): + # Construct async client + client = await AsyncAtlas.create() + + # --- Models + models = await client.models.get() + print(f"Found {len(models)} models") + + # --- Benchmarks + benchmarks = await client.benchmarks.get() + print(f"Found {len(benchmarks)} benchmarks") + + # --- Create evaluation + evaluation = await client.evaluations.create( + model=models[0], + benchmark=benchmarks[0], + ) + print(f"Created evaluation {evaluation.id}, status={evaluation.status}") + + # --- Wait for completion + evaluation = await client.evaluations.wait_for_completion( + evaluation, + interval_seconds=10, + timeout=600, # 10 minutes + ) + print(f"Evaluation {evaluation.id} finished with status={evaluation.status}") + + # --- Results + if evaluation.is_success: + results = await client.results.get(evaluation=evaluation) + print("Results:", results) + else: + print("Evaluation did not succeed, no results to show.") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/async_client_simple.py b/examples/async_client_simple.py new file mode 100644 index 0000000..b1bd85c --- /dev/null +++ b/examples/async_client_simple.py @@ -0,0 +1,38 @@ +#!/usr/bin/env -S poetry run python + +import asyncio + +from atlas import AsyncAtlas + + +async def main(): + # Construct async client + client = await AsyncAtlas.create() + + # --- Models + models = await client.models.get() + print(f"Found {len(models)} models") + + # --- Benchmarks + benchmarks = await client.benchmarks.get() + print(f"Found {len(benchmarks)} benchmarks") + + # --- Create evaluation + evaluation = await client.evaluations.create(model=models[0], benchmark=benchmarks[0]) + + print(f"Created evaluation {evaluation.id}, status={evaluation.status}") + + # --- Wait for completion + await evaluation.wait_for_completion_async(interval_seconds=10, timeout=600) + print(f"Evaluation {evaluation.id} finished with status={evaluation.status}") + + # --- Results + if evaluation.is_success: + results = await evaluation.get_results_async() + print("Results:", results) + else: + print("Evaluation did not succeed, no results to show.") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/client.py b/examples/client.py new file mode 100644 index 0000000..4bdc076 --- /dev/null +++ b/examples/client.py @@ -0,0 +1,36 @@ +#!/usr/bin/env -S poetry run python + +from atlas import Atlas + +# Construct sync client (API key from env or inline) +client = Atlas() + +# --- Models +models = client.models.get() +print(f"Found {len(models)} models") + +# --- Benchmarks +benchmarks = client.benchmarks.get() +print(f"Found {len(benchmarks)} benchmarks") + +# --- Create evaluation +evaluation = client.evaluations.create( + model=models[0], + benchmark=benchmarks[0], +) +print(f"Created evaluation {evaluation.id}, status={evaluation.status}") + +# --- Wait for completion +evaluation = client.evaluations.wait_for_completion( + evaluation, + interval_seconds=10, + timeout=600, # 10 minutes +) +print(f"Evaluation {evaluation.id} finished with status={evaluation.status}") + +# --- Results +if evaluation.is_success: + results = client.results.get(evaluation=evaluation) + print("Results:", results) +else: + print("Evaluation did not succeed, no results to show.") diff --git a/examples/client_simple.py b/examples/client_simple.py new file mode 100644 index 0000000..007c850 --- /dev/null +++ b/examples/client_simple.py @@ -0,0 +1,36 @@ +#!/usr/bin/env -S poetry run python + +from atlas import Atlas + +# Construct sync client (API key from env or inline) +client = Atlas() + +# --- Models +models = client.models.get() +print(f"Found {len(models)} models") + +# --- Benchmarks +benchmarks = client.benchmarks.get() +print(f"Found {len(benchmarks)} benchmarks") + +# --- Create evaluation +evaluation = client.evaluations.create( + model=models[0], + benchmark=benchmarks[0], +) + +print(f"Created evaluation {evaluation.id}, status={evaluation.status}") + +# --- Wait for completion +evaluation.wait_for_completion( + interval_seconds=10, + timeout=600, # 10 minutes +) +print(f"Evaluation {evaluation.id} finished with status={evaluation.status}") + +# --- Results +if evaluation.is_success: + results = evaluation.get_results() + print("Results:", results) +else: + print("Evaluation did not succeed, no results to show.") diff --git a/examples/demo.py b/examples/demo.py deleted file mode 100644 index 56dc8e6..0000000 --- a/examples/demo.py +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env -S poetry run python - -from atlas import Atlas - -# gets API key from environment variable: -# - LAYERLENS_ATLAS_API_KEY -client = Atlas() - -# Evaluations -evaluation = client.evaluations.create(model="random_model_id", benchmark="random_benchmark_id") - -# Results -if evaluation is not None: - results = client.results.get(evaluation_id=evaluation.id) diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 0000000..9aadb01 --- /dev/null +++ b/package-lock.json @@ -0,0 +1,543 @@ +{ + "name": "atlas-python", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "devDependencies": { + "husky": "^9.1.7", + "lint-staged": "^16.1.5" + } + }, + "node_modules/ansi-escapes": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/ansi-escapes/-/ansi-escapes-7.0.0.tgz", + "integrity": "sha512-GdYO7a61mR0fOlAsvC9/rIHf7L96sBc6dEWzeOu+KAea5bZyQRPIpojrVoI4AXGJS/ycu/fBTdLrUkA4ODrvjw==", + "dev": true, + "dependencies": { + "environment": "^1.0.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/ansi-regex": { + "version": "6.2.0", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.2.0.tgz", + "integrity": "sha512-TKY5pyBkHyADOPYlRT9Lx6F544mPl0vS5Ew7BJ45hA08Q+t3GjbueLliBWN3sMICk6+y7HdyxSzC4bWS8baBdg==", + "dev": true, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/ansi-regex?sponsor=1" + } + }, + "node_modules/ansi-styles": { + "version": "6.2.1", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.1.tgz", + "integrity": "sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug==", + "dev": true, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/braces": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz", + "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==", + "dev": true, + "dependencies": { + "fill-range": "^7.1.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/chalk": { + "version": "5.6.0", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-5.6.0.tgz", + "integrity": "sha512-46QrSQFyVSEyYAgQ22hQ+zDa60YHA4fBstHmtSApj1Y5vKtG27fWowW03jCk5KcbXEWPZUIR894aARCA/G1kfQ==", + "dev": true, + "engines": { + "node": "^12.17.0 || ^14.13 || >=16.0.0" + }, + "funding": { + "url": "https://github.com/chalk/chalk?sponsor=1" + } + }, + "node_modules/cli-cursor": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/cli-cursor/-/cli-cursor-5.0.0.tgz", + "integrity": "sha512-aCj4O5wKyszjMmDT4tZj93kxyydN/K5zPWSCe6/0AV/AA1pqe5ZBIw0a2ZfPQV7lL5/yb5HsUreJ6UFAF1tEQw==", + "dev": true, + "dependencies": { + "restore-cursor": "^5.0.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/cli-truncate": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/cli-truncate/-/cli-truncate-4.0.0.tgz", + "integrity": "sha512-nPdaFdQ0h/GEigbPClz11D0v/ZJEwxmeVZGeMo3Z5StPtUTkA9o1lD6QwoirYiSDzbcwn2XcjwmCp68W1IS4TA==", + "dev": true, + "dependencies": { + "slice-ansi": "^5.0.0", + "string-width": "^7.0.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/colorette": { + "version": "2.0.20", + "resolved": "https://registry.npmjs.org/colorette/-/colorette-2.0.20.tgz", + "integrity": "sha512-IfEDxwoWIjkeXL1eXcDiow4UbKjhLdq6/EuSVR9GMN7KVH3r9gQ83e73hsz1Nd1T3ijd5xv1wcWRYO+D6kCI2w==", + "dev": true + }, + "node_modules/commander": { + "version": "14.0.0", + "resolved": "https://registry.npmjs.org/commander/-/commander-14.0.0.tgz", + "integrity": "sha512-2uM9rYjPvyq39NwLRqaiLtWHyDC1FvryJDa2ATTVims5YAS4PupsEQsDvP14FqhFr0P49CYDugi59xaxJlTXRA==", + "dev": true, + "engines": { + "node": ">=20" + } + }, + "node_modules/debug": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.1.tgz", + "integrity": "sha512-KcKCqiftBJcZr++7ykoDIEwSa3XWowTfNPo92BYxjXiyYEVrUQh2aLyhxBCwww+heortUFxEJYcRzosstTEBYQ==", + "dev": true, + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/emoji-regex": { + "version": "10.4.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-10.4.0.tgz", + "integrity": "sha512-EC+0oUMY1Rqm4O6LLrgjtYDvcVYTy7chDnM4Q7030tP4Kwj3u/pR6gP9ygnp2CJMK5Gq+9Q2oqmrFJAz01DXjw==", + "dev": true + }, + "node_modules/environment": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/environment/-/environment-1.1.0.tgz", + "integrity": "sha512-xUtoPkMggbz0MPyPiIWr1Kp4aeWJjDZ6SMvURhimjdZgsRuDplF5/s9hcgGhyXMhs+6vpnuoiZ2kFiu3FMnS8Q==", + "dev": true, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/eventemitter3": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-5.0.1.tgz", + "integrity": "sha512-GWkBvjiSZK87ELrYOSESUYeVIc9mvLLf/nXalMOS5dYrgZq9o5OVkbZAVM06CVxYsCwH9BDZFPlQTlPA1j4ahA==", + "dev": true + }, + "node_modules/fill-range": { + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", + "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==", + "dev": true, + "dependencies": { + "to-regex-range": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/get-east-asian-width": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/get-east-asian-width/-/get-east-asian-width-1.3.0.tgz", + "integrity": "sha512-vpeMIQKxczTD/0s2CdEWHcb0eeJe6TFjxb+J5xgX7hScxqrGuyjmv4c1D4A/gelKfyox0gJJwIHF+fLjeaM8kQ==", + "dev": true, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/husky": { + "version": "9.1.7", + "resolved": "https://registry.npmjs.org/husky/-/husky-9.1.7.tgz", + "integrity": "sha512-5gs5ytaNjBrh5Ow3zrvdUUY+0VxIuWVL4i9irt6friV+BqdCfmV11CQTWMiBYWHbXhco+J1kHfTOUkePhCDvMA==", + "dev": true, + "bin": { + "husky": "bin.js" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/typicode" + } + }, + "node_modules/is-fullwidth-code-point": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-4.0.0.tgz", + "integrity": "sha512-O4L094N2/dZ7xqVdrXhh9r1KODPJpFms8B5sGdJLPy664AgvXsreZUyCQQNItZRDlYug4xStLjNp/sz3HvBowQ==", + "dev": true, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/is-number": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", + "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==", + "dev": true, + "engines": { + "node": ">=0.12.0" + } + }, + "node_modules/lilconfig": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-3.1.3.tgz", + "integrity": "sha512-/vlFKAoH5Cgt3Ie+JLhRbwOsCQePABiU3tJ1egGvyQ+33R/vcwM2Zl2QR/LzjsBeItPt3oSVXapn+m4nQDvpzw==", + "dev": true, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/antonk52" + } + }, + "node_modules/lint-staged": { + "version": "16.1.5", + "resolved": "https://registry.npmjs.org/lint-staged/-/lint-staged-16.1.5.tgz", + "integrity": "sha512-uAeQQwByI6dfV7wpt/gVqg+jAPaSp8WwOA8kKC/dv1qw14oGpnpAisY65ibGHUGDUv0rYaZ8CAJZ/1U8hUvC2A==", + "dev": true, + "dependencies": { + "chalk": "^5.5.0", + "commander": "^14.0.0", + "debug": "^4.4.1", + "lilconfig": "^3.1.3", + "listr2": "^9.0.1", + "micromatch": "^4.0.8", + "nano-spawn": "^1.0.2", + "pidtree": "^0.6.0", + "string-argv": "^0.3.2", + "yaml": "^2.8.1" + }, + "bin": { + "lint-staged": "bin/lint-staged.js" + }, + "engines": { + "node": ">=20.17" + }, + "funding": { + "url": "https://opencollective.com/lint-staged" + } + }, + "node_modules/listr2": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/listr2/-/listr2-9.0.1.tgz", + "integrity": "sha512-SL0JY3DaxylDuo/MecFeiC+7pedM0zia33zl0vcjgwcq1q1FWWF1To9EIauPbl8GbMCU0R2e0uJ8bZunhYKD2g==", + "dev": true, + "dependencies": { + "cli-truncate": "^4.0.0", + "colorette": "^2.0.20", + "eventemitter3": "^5.0.1", + "log-update": "^6.1.0", + "rfdc": "^1.4.1", + "wrap-ansi": "^9.0.0" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/log-update": { + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/log-update/-/log-update-6.1.0.tgz", + "integrity": "sha512-9ie8ItPR6tjY5uYJh8K/Zrv/RMZ5VOlOWvtZdEHYSTFKZfIBPQa9tOAEeAWhd+AnIneLJ22w5fjOYtoutpWq5w==", + "dev": true, + "dependencies": { + "ansi-escapes": "^7.0.0", + "cli-cursor": "^5.0.0", + "slice-ansi": "^7.1.0", + "strip-ansi": "^7.1.0", + "wrap-ansi": "^9.0.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/log-update/node_modules/is-fullwidth-code-point": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-5.0.0.tgz", + "integrity": "sha512-OVa3u9kkBbw7b8Xw5F9P+D/T9X+Z4+JruYVNapTjPYZYUznQ5YfWeFkOj606XYYW8yugTfC8Pj0hYqvi4ryAhA==", + "dev": true, + "dependencies": { + "get-east-asian-width": "^1.0.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/log-update/node_modules/slice-ansi": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/slice-ansi/-/slice-ansi-7.1.0.tgz", + "integrity": "sha512-bSiSngZ/jWeX93BqeIAbImyTbEihizcwNjFoRUIY/T1wWQsfsm2Vw1agPKylXvQTU7iASGdHhyqRlqQzfz+Htg==", + "dev": true, + "dependencies": { + "ansi-styles": "^6.2.1", + "is-fullwidth-code-point": "^5.0.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/chalk/slice-ansi?sponsor=1" + } + }, + "node_modules/micromatch": { + "version": "4.0.8", + "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.8.tgz", + "integrity": "sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==", + "dev": true, + "dependencies": { + "braces": "^3.0.3", + "picomatch": "^2.3.1" + }, + "engines": { + "node": ">=8.6" + } + }, + "node_modules/mimic-function": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/mimic-function/-/mimic-function-5.0.1.tgz", + "integrity": "sha512-VP79XUPxV2CigYP3jWwAUFSku2aKqBH7uTAapFWCBqutsbmDo96KY5o8uh6U+/YSIn5OxJnXp73beVkpqMIGhA==", + "dev": true, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "dev": true + }, + "node_modules/nano-spawn": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/nano-spawn/-/nano-spawn-1.0.2.tgz", + "integrity": "sha512-21t+ozMQDAL/UGgQVBbZ/xXvNO10++ZPuTmKRO8k9V3AClVRht49ahtDjfY8l1q6nSHOrE5ASfthzH3ol6R/hg==", + "dev": true, + "engines": { + "node": ">=20.17" + }, + "funding": { + "url": "https://github.com/sindresorhus/nano-spawn?sponsor=1" + } + }, + "node_modules/onetime": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/onetime/-/onetime-7.0.0.tgz", + "integrity": "sha512-VXJjc87FScF88uafS3JllDgvAm+c/Slfz06lorj2uAY34rlUu0Nt+v8wreiImcrgAjjIHp1rXpTDlLOGw29WwQ==", + "dev": true, + "dependencies": { + "mimic-function": "^5.0.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/picomatch": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz", + "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==", + "dev": true, + "engines": { + "node": ">=8.6" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, + "node_modules/pidtree": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/pidtree/-/pidtree-0.6.0.tgz", + "integrity": "sha512-eG2dWTVw5bzqGRztnHExczNxt5VGsE6OwTeCG3fdUf9KBsZzO3R5OIIIzWR+iZA0NtZ+RDVdaoE2dK1cn6jH4g==", + "dev": true, + "bin": { + "pidtree": "bin/pidtree.js" + }, + "engines": { + "node": ">=0.10" + } + }, + "node_modules/restore-cursor": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/restore-cursor/-/restore-cursor-5.1.0.tgz", + "integrity": "sha512-oMA2dcrw6u0YfxJQXm342bFKX/E4sG9rbTzO9ptUcR/e8A33cHuvStiYOwH7fszkZlZ1z/ta9AAoPk2F4qIOHA==", + "dev": true, + "dependencies": { + "onetime": "^7.0.0", + "signal-exit": "^4.1.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/rfdc": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/rfdc/-/rfdc-1.4.1.tgz", + "integrity": "sha512-q1b3N5QkRUWUl7iyylaaj3kOpIT0N2i9MqIEQXP73GVsN9cw3fdx8X63cEmWhJGi2PPCF23Ijp7ktmd39rawIA==", + "dev": true + }, + "node_modules/signal-exit": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz", + "integrity": "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==", + "dev": true, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/slice-ansi": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/slice-ansi/-/slice-ansi-5.0.0.tgz", + "integrity": "sha512-FC+lgizVPfie0kkhqUScwRu1O/lF6NOgJmlCgK+/LYxDCTk8sGelYaHDhFcDN+Sn3Cv+3VSa4Byeo+IMCzpMgQ==", + "dev": true, + "dependencies": { + "ansi-styles": "^6.0.0", + "is-fullwidth-code-point": "^4.0.0" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/slice-ansi?sponsor=1" + } + }, + "node_modules/string-argv": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/string-argv/-/string-argv-0.3.2.tgz", + "integrity": "sha512-aqD2Q0144Z+/RqG52NeHEkZauTAUWJO8c6yTftGJKO3Tja5tUgIfmIl6kExvhtxSDP7fXB6DvzkfMpCd/F3G+Q==", + "dev": true, + "engines": { + "node": ">=0.6.19" + } + }, + "node_modules/string-width": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-7.2.0.tgz", + "integrity": "sha512-tsaTIkKW9b4N+AEj+SVA+WhJzV7/zMhcSu78mLKWSk7cXMOSHsBKFWUs0fWwq8QyK3MgJBQRX6Gbi4kYbdvGkQ==", + "dev": true, + "dependencies": { + "emoji-regex": "^10.3.0", + "get-east-asian-width": "^1.0.0", + "strip-ansi": "^7.1.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/strip-ansi": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz", + "integrity": "sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==", + "dev": true, + "dependencies": { + "ansi-regex": "^6.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/strip-ansi?sponsor=1" + } + }, + "node_modules/to-regex-range": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", + "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==", + "dev": true, + "dependencies": { + "is-number": "^7.0.0" + }, + "engines": { + "node": ">=8.0" + } + }, + "node_modules/wrap-ansi": { + "version": "9.0.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-9.0.0.tgz", + "integrity": "sha512-G8ura3S+3Z2G+mkgNRq8dqaFZAuxfsxpBB8OCTGRTCtp+l/v9nbFNmCUP1BZMts3G1142MsZfn6eeUKrr4PD1Q==", + "dev": true, + "dependencies": { + "ansi-styles": "^6.2.1", + "string-width": "^7.0.0", + "strip-ansi": "^7.1.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, + "node_modules/yaml": { + "version": "2.8.1", + "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.1.tgz", + "integrity": "sha512-lcYcMxX2PO9XMGvAJkJ3OsNMw+/7FKes7/hgerGUYWIoWu5j/+YQqcZr5JnPZWzOsEBgMbSbiSTn/dv/69Mkpw==", + "dev": true, + "bin": { + "yaml": "bin.mjs" + }, + "engines": { + "node": ">= 14.6" + } + } + } +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..17fca9e --- /dev/null +++ b/package.json @@ -0,0 +1,15 @@ +{ + "devDependencies": { + "husky": "^9.1.7", + "lint-staged": "^16.1.5" + }, + "scripts": { + "prepare": "husky" + }, + "lint-staged": { + "*.py": [ + "./scripts/format", + "./scripts/lint" + ] + } +} diff --git a/src/atlas/__init__.py b/src/atlas/__init__.py index d7c8205..c229409 100644 --- a/src/atlas/__init__.py +++ b/src/atlas/__init__.py @@ -1,3 +1,3 @@ -from ._client import Atlas, Client +from ._client import Atlas, Client, AsyncAtlas, AsyncClient -__all__ = ["Atlas", "Client"] +__all__ = ["Atlas", "AsyncAtlas", "Client", "AsyncClient"] diff --git a/src/atlas/_base_client.py b/src/atlas/_base_client.py index 6286361..8af1bb3 100644 --- a/src/atlas/_base_client.py +++ b/src/atlas/_base_client.py @@ -119,3 +119,107 @@ def _make_status_error( response: httpx.Response, ) -> _exceptions.APIStatusError: raise NotImplementedError() + + +class BaseAsyncClient(httpx.AsyncClient): + def __init__( + self, + *, + base_url: URL | str, + headers: Optional[Dict[str, str]] = None, + timeout: Union[float, httpx.Timeout, None] = None, + **kwargs: Any, + ): + super().__init__(base_url=base_url, headers=headers, timeout=timeout, **kwargs) + + @property + def auth_headers(self) -> dict[str, str]: + return {} + + @property + def default_headers(self) -> dict[str, str]: + return { + "Accept": "application/json", + "Content-Type": "application/json", + **self.auth_headers, + } + + async def _request_cast( + self, + method: str, + url: str, + *, + cast_to: Optional[Type[ResponseT]] = None, + body: Optional[Any] = None, + params: Optional[Dict[str, Any]] = None, + headers: Optional[Dict[str, str]] = None, + **kwargs: Any, + ) -> Union[ResponseT, httpx.Response]: + combined_headers = {**self.default_headers, **(headers or {})} + + response = await super().request( + method=method, + url=url, + json=body, + params=params, + headers=combined_headers, + **kwargs, + ) + + try: + response.raise_for_status() + except httpx.HTTPStatusError as err: + log.debug("Encountered httpx.HTTPStatusError", exc_info=True) + log.debug("Re-raising status error") + raise self._make_status_error_from_response(err.response) from None + + if cast_to: + data = response.json() + return cast_to(**data) + return response + + async def get_cast( + self, + url: str, + *, + cast_to: Optional[Type[ResponseT]] = None, + params: Optional[Dict[str, Any]] = None, + headers: Optional[Dict[str, str]] = None, + **kwargs: Any, + ) -> Union[ResponseT, httpx.Response]: + return await self._request_cast("GET", url, cast_to=cast_to, params=params, headers=headers, **kwargs) + + async def post_cast( + self, + url: str, + *, + cast_to: Optional[Type[ResponseT]] = None, + body: Optional[Any] = None, + headers: Optional[Dict[str, str]] = None, + **kwargs: Any, + ) -> Union[ResponseT, httpx.Response]: + return await self._request_cast("POST", url, cast_to=cast_to, body=body, headers=headers, **kwargs) + + def _make_status_error_from_response( + self, + response: httpx.Response, + ) -> _exceptions.APIStatusError: + err_text = response.text.strip() + body = err_text + + try: + body = json.loads(err_text) + err_msg = f"Error code: {response.status_code} - {body}" + except Exception: + err_msg = err_text or f"Error code: {response.status_code}" + + return self._make_status_error(err_msg, body=body, response=response) + + def _make_status_error( + self, + err_msg: str, + *, + body: object, + response: httpx.Response, + ) -> _exceptions.APIStatusError: + raise NotImplementedError() diff --git a/src/atlas/_client.py b/src/atlas/_client.py index 64c7e42..881cfc0 100644 --- a/src/atlas/_client.py +++ b/src/atlas/_client.py @@ -13,13 +13,13 @@ from .models import Organization, OrganizationResponse from ._constants import DEFAULT_TIMEOUT from ._exceptions import AtlasError, APIStatusError -from ._base_client import BaseClient +from ._base_client import BaseClient, BaseAsyncClient if TYPE_CHECKING: - from .resources.models import Models - from .resources.results import Results - from .resources.benchmarks import Benchmarks - from .resources.evaluations import Evaluations + from .resources.models import Models, AsyncModels + from .resources.results import Results, AsyncResults + from .resources.benchmarks import Benchmarks, AsyncBenchmarks + from .resources.evaluations import Evaluations, AsyncEvaluations __all__ = ["Atlas", "Client"] @@ -171,4 +171,149 @@ def _get_organization(self) -> Optional[Organization]: return None +class AsyncAtlas(BaseAsyncClient): + api_key: str + organization_id: str | None + project_id: str | None + + def __init__( + self, + *, + api_key: str | None = None, + base_url: str | httpx.URL | None = None, + timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT, + ) -> None: + """Construct a new asynchronous Atlas client instance. + + This automatically infers the following arguments from their corresponding environment variables if they are not provided: + - `api_key` from `LAYERLENS_ATLAS_API_KEY` + """ + if api_key is None: + api_key = os.environ.get("LAYERLENS_ATLAS_API_KEY") + if api_key is None: + raise AtlasError( + "The api_key client option must be set either by passing api_key to the client " + "or by setting the LAYERLENS_ATLAS_API_KEY environment variable" + ) + self.api_key = api_key + + if base_url is None: + base_url = os.environ.get("LAYERLENS_ATLAS_BASE_URL") + if base_url is None: + base_url = "https://8bg48mbhyi.execute-api.us-east-1.amazonaws.com/prod/api/v1" + + super().__init__(base_url=base_url, timeout=timeout) + + # org/project must be fetched asynchronously + self.organization_id = None + self.project_id = None + + @classmethod + async def create( + cls, + *, + api_key: str | None = None, + base_url: str | httpx.URL | None = None, + timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT, + ) -> "AsyncAtlas": + """Async factory that combines __init__ and ainit into one call.""" + self = cls(api_key=api_key, base_url=base_url, timeout=timeout) + organization = await self._get_organization() + if organization is None: + raise AtlasError("Organization could not be fetched. Please contact LayerLens Atlas support.") + self.organization_id = organization.id + + if not organization.projects: + raise AtlasError( + f"Organization {self.organization_id} is missing project. Please contact LayerLens Atlas support." + ) + self.project_id = organization.projects[0].id + return self + + @cached_property + def benchmarks(self) -> AsyncBenchmarks: + from .resources.benchmarks import AsyncBenchmarks + + return AsyncBenchmarks(self) + + @cached_property + def evaluations(self) -> AsyncEvaluations: + from .resources.evaluations import AsyncEvaluations + + return AsyncEvaluations(self) + + @cached_property + def models(self) -> AsyncModels: + from .resources.models import AsyncModels + + return AsyncModels(self) + + @cached_property + def results(self) -> AsyncResults: + from .resources.results import AsyncResults + + return AsyncResults(self) + + @property + def auth_headers(self) -> dict[str, str]: + return {"x-api-key": self.api_key} if self.api_key else {} + + def copy( + self, + *, + api_key: str | None = None, + base_url: str | httpx.URL | None = None, + timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT, + _extra_kwargs: Mapping[str, Any] = {}, + ) -> Self: + return self.__class__( + api_key=api_key or self.api_key, + base_url=base_url or self.base_url, + timeout=self.timeout or timeout, + **_extra_kwargs, + ) + + # Alias for nicer inline usage + with_options = copy + + def _make_status_error( + self, + err_msg: str, + *, + body: object, + response: httpx.Response, + ) -> APIStatusError: + data = body.get("error", body) if is_mapping(body) else body + + if response.status_code == HTTPStatus.BAD_REQUEST: + return _exceptions.BadRequestError(err_msg, response=response, body=data) + if response.status_code == HTTPStatus.UNAUTHORIZED: + return _exceptions.AuthenticationError(err_msg, response=response, body=data) + if response.status_code == HTTPStatus.FORBIDDEN: + return _exceptions.PermissionDeniedError(err_msg, response=response, body=data) + if response.status_code == HTTPStatus.NOT_FOUND: + return _exceptions.NotFoundError(err_msg, response=response, body=data) + if response.status_code == HTTPStatus.CONFLICT: + return _exceptions.ConflictError(err_msg, response=response, body=data) + if response.status_code == HTTPStatus.UNPROCESSABLE_ENTITY: + return _exceptions.UnprocessableEntityError(err_msg, response=response, body=data) + if response.status_code == HTTPStatus.TOO_MANY_REQUESTS: + return _exceptions.RateLimitError(err_msg, response=response, body=data) + if response.status_code >= HTTPStatus.INTERNAL_SERVER_ERROR: + return _exceptions.InternalServerError(err_msg, response=response, body=data) + + return APIStatusError(err_msg, response=response, body=data) + + async def _get_organization(self) -> Optional[Organization]: + organization = await super().get_cast( + "/organizations", + timeout=30, + cast_to=OrganizationResponse, + ) + if isinstance(organization, OrganizationResponse): + return organization.data + return None + + Client = Atlas +AsyncClient = AsyncAtlas diff --git a/src/atlas/_resource.py b/src/atlas/_resource.py index 46f00f9..c9c8c06 100644 --- a/src/atlas/_resource.py +++ b/src/atlas/_resource.py @@ -1,10 +1,11 @@ from __future__ import annotations import time +import asyncio from typing import TYPE_CHECKING if TYPE_CHECKING: - from ._client import Atlas + from ._client import Atlas, AsyncAtlas class SyncAPIResource: @@ -17,3 +18,15 @@ def __init__(self, client: Atlas) -> None: def _sleep(self, seconds: float) -> None: time.sleep(seconds) + + +class AsyncAPIResource: + _client: AsyncAtlas + + def __init__(self, client: AsyncAtlas) -> None: + self._client = client + self._get = client.get_cast + self._post = client.post_cast + + async def _sleep(self, seconds: float) -> None: + await asyncio.sleep(seconds) diff --git a/src/atlas/models/evaluation.py b/src/atlas/models/evaluation.py index f7435a9..0603b48 100644 --- a/src/atlas/models/evaluation.py +++ b/src/atlas/models/evaluation.py @@ -1,11 +1,16 @@ from __future__ import annotations from enum import Enum -from typing import Dict, Optional +from typing import TYPE_CHECKING, Dict, Optional from datetime import timedelta +import httpx from pydantic import Field, BaseModel, ConfigDict +if TYPE_CHECKING: + from .api import ResultsResponse + from .._client import Atlas, AsyncAtlas + class EvaluationStatus(str, Enum): PENDING = "pending" @@ -28,6 +33,104 @@ class Evaluation(BaseModel): average_duration: int accuracy: float + _client: "Optional[Atlas | AsyncAtlas]" = None + + def attach_client(self, client: "Atlas | AsyncAtlas") -> "Evaluation": + self._client = client + return self + + @property + def is_finished(self) -> bool: + """Return True if evaluation is done (success, failure, or timeout).""" + return self.status in { + EvaluationStatus.SUCCESS, + EvaluationStatus.FAILURE, + EvaluationStatus.TIMEOUT, + } + + @property + def is_success(self) -> bool: + """Return True if evaluation completed successfully.""" + return self.status == EvaluationStatus.SUCCESS + + def get_results( + self, + *, + page: Optional[int] = None, + page_size: Optional[int] = None, + timeout: float | httpx.Timeout | None = None, + ) -> Optional[ResultsResponse]: + """Fetch results synchronously if a sync client is attached.""" + from .._client import AsyncAtlas + + if self._client is None: + raise ValueError("No client attached") + if isinstance(self._client, AsyncAtlas): + raise RuntimeError("Use `await get_results_async()` with an async client") + + return self._client.results.get(evaluation=self, page=page, page_size=page_size, timeout=timeout) + + async def get_results_async( + self, + *, + page: Optional[int] = None, + page_size: Optional[int] = None, + timeout: float | httpx.Timeout | None = None, + ) -> Optional[ResultsResponse]: + """Fetch results asynchronously if an async client is attached.""" + from .._client import AsyncAtlas + + if self._client is None: + raise ValueError("No client attached") + if not isinstance(self._client, AsyncAtlas): + raise RuntimeError("Use `get_results()` with a sync client") + + return await self._client.results.get(evaluation=self, page=page, page_size=page_size, timeout=timeout) + + def wait_for_completion( + self, *, interval_seconds: int = 30, timeout: Optional[float] = None + ) -> Optional["Evaluation"]: + """Sync polling using a sync client.""" + from .._client import AsyncAtlas + + if self._client is None: + raise ValueError("No client attached") + if isinstance(self._client, AsyncAtlas): + raise RuntimeError("Use `wait_for_completion_async()` with an async client") + + evaluation = self._client.evaluations.wait_for_completion( + self, interval_seconds=interval_seconds, timeout=timeout + ) + if evaluation: + self.status = evaluation.status + self.finished_at = evaluation.finished_at + self.average_duration = evaluation.average_duration + self.accuracy = evaluation.accuracy + + return self + + async def wait_for_completion_async( + self, *, interval_seconds: int = 30, timeout: Optional[float] = None + ) -> Optional["Evaluation"]: + """Async polling using an async client.""" + from .._client import AsyncAtlas + + if self._client is None: + raise ValueError("No client attached") + if not isinstance(self._client, AsyncAtlas): + raise RuntimeError("Use `wait_for_completion()` with a sync client") + + evaluation = await self._client.evaluations.wait_for_completion( + self, interval_seconds=interval_seconds, timeout=timeout + ) + if evaluation: + self.status = evaluation.status + self.finished_at = evaluation.finished_at + self.average_duration = evaluation.average_duration + self.accuracy = evaluation.accuracy + + return self + class Result(BaseModel): subset: str diff --git a/src/atlas/resources/benchmarks/__init__.py b/src/atlas/resources/benchmarks/__init__.py index eb44971..76009a1 100644 --- a/src/atlas/resources/benchmarks/__init__.py +++ b/src/atlas/resources/benchmarks/__init__.py @@ -1,3 +1,3 @@ -from .benchmarks import Benchmarks +from .benchmarks import Benchmarks, AsyncBenchmarks -__all__ = ["Benchmarks"] +__all__ = ["Benchmarks", "AsyncBenchmarks"] diff --git a/src/atlas/resources/benchmarks/benchmarks.py b/src/atlas/resources/benchmarks/benchmarks.py index 9e13225..33be623 100644 --- a/src/atlas/resources/benchmarks/benchmarks.py +++ b/src/atlas/resources/benchmarks/benchmarks.py @@ -5,7 +5,7 @@ import httpx from ...models import Benchmark, CustomBenchmark, PublicBenchmark, BenchmarksResponse -from ..._resource import SyncAPIResource +from ..._resource import SyncAPIResource, AsyncAPIResource from ..._constants import DEFAULT_TIMEOUT @@ -52,3 +52,48 @@ def cast_benchmark(b: Benchmark, bench_type: str) -> Benchmark: benchmarks.extend([cast_benchmark(b, type) for b in resp.data.benchmarks]) return benchmarks + + +class AsyncBenchmarks(AsyncAPIResource): + async def get( + self, + *, + timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT, + type: Literal["custom", "public"] | None = None, + name: Optional[str] = None, + ) -> List[Benchmark] | None: + base_url = f"/organizations/{self._client.organization_id}/projects/{self._client.project_id}/benchmarks" + + async def fetch(bench_type: str) -> Optional[BenchmarksResponse]: + params = {"type": bench_type} + if name: + params["query"] = name + + resp = await self._get( + base_url, + params=params, + timeout=timeout, + cast_to=BenchmarksResponse, + ) + return resp if isinstance(resp, BenchmarksResponse) else None + + def cast_benchmark(b: Benchmark, bench_type: str) -> Benchmark: + if bench_type == "custom": + return CustomBenchmark(**b.model_dump()) + elif bench_type == "public": + return PublicBenchmark(**b.model_dump()) + return b # fallback to base class + + benchmarks: List[Benchmark] = [] + + if type is None: # fetch both custom + public + for t in ["custom", "public"]: + resp = await fetch(t) + if resp: + benchmarks.extend([cast_benchmark(b, t) for b in resp.data.benchmarks]) + else: # fetch only one type + resp = await fetch(type) + if resp: + benchmarks.extend([cast_benchmark(b, type) for b in resp.data.benchmarks]) + + return benchmarks diff --git a/src/atlas/resources/evaluations/__init__.py b/src/atlas/resources/evaluations/__init__.py index 2a096e7..b309bc2 100644 --- a/src/atlas/resources/evaluations/__init__.py +++ b/src/atlas/resources/evaluations/__init__.py @@ -1,3 +1,3 @@ -from .evaluations import Evaluations +from .evaluations import Evaluations, AsyncEvaluations -__all__ = ["Evaluations"] +__all__ = ["Evaluations", "AsyncEvaluations"] diff --git a/src/atlas/resources/evaluations/evaluations.py b/src/atlas/resources/evaluations/evaluations.py index 4b779ae..192272c 100644 --- a/src/atlas/resources/evaluations/evaluations.py +++ b/src/atlas/resources/evaluations/evaluations.py @@ -1,5 +1,9 @@ from __future__ import annotations +import time +import asyncio +from typing import Optional + import httpx from ...models import ( @@ -10,7 +14,7 @@ CustomBenchmark, EvaluationsResponse, ) -from ..._resource import SyncAPIResource +from ..._resource import SyncAPIResource, AsyncAPIResource from ..._constants import DEFAULT_TIMEOUT @@ -21,7 +25,7 @@ def create( model: Model, benchmark: Benchmark, timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT, - ) -> Evaluation | None: + ) -> Optional[Evaluation]: evaluations = self._post( f"/organizations/{self._client.organization_id}/projects/{self._client.project_id}/evaluations", body=[ @@ -36,5 +40,123 @@ def create( cast_to=EvaluationsResponse, ) if isinstance(evaluations, EvaluationsResponse) and len(evaluations.data) > 0: - return evaluations.data[0] + evaluation = evaluations.data[0] + evaluation.attach_client(self._client) + return evaluation + return None + + def get( + self, + evaluation: Evaluation, + *, + timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT, + ) -> Optional[Evaluation]: + return self.get_by_id(evaluation.id, timeout=timeout) + + def get_by_id( + self, + evaluation_id: str, + *, + timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT, + ) -> Optional[Evaluation]: + evaluation = self._get( + f"/evaluations/{evaluation_id}", + timeout=timeout, + cast_to=Evaluation, + ) + if isinstance(evaluation, Evaluation): + evaluation.attach_client(self._client) + return evaluation + return None + + def wait_for_completion( + self, + evaluation: Evaluation, + *, + interval_seconds: int = 30, + timeout: float | None = None, + ) -> Optional[Evaluation]: + """Poll until the evaluation finishes or timeout is reached.""" + start = time.time() + + updated_evaluation: Optional[Evaluation] = self.get(evaluation) + while updated_evaluation and not updated_evaluation.is_finished: + if timeout and (time.time() - start) > timeout: + raise TimeoutError(f"Evaluation {updated_evaluation.id} did not complete within {timeout} seconds") + + time.sleep(interval_seconds) + updated_evaluation = self.get(updated_evaluation) + + return updated_evaluation + + +class AsyncEvaluations(AsyncAPIResource): + async def create( + self, + *, + model: Model, + benchmark: Benchmark, + timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT, + ) -> Optional[Evaluation]: + evaluations = await self._post( + f"/organizations/{self._client.organization_id}/projects/{self._client.project_id}/evaluations", + body=[ + { + "model_id": model.id, + "dataset_id": benchmark.id, + "is_custom_model": isinstance(model, CustomModel), + "is_custom_dataset": isinstance(benchmark, CustomBenchmark), + } + ], + timeout=timeout, + cast_to=EvaluationsResponse, + ) + if isinstance(evaluations, EvaluationsResponse) and len(evaluations.data) > 0: + evaluation = evaluations.data[0] + evaluation.attach_client(self._client) + return evaluation + return None + + async def get( + self, + evaluation: Evaluation, + *, + timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT, + ) -> Optional[Evaluation]: + return await self.get_by_id(evaluation.id, timeout=timeout) + + async def get_by_id( + self, + evaluation_id: str, + *, + timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT, + ) -> Optional[Evaluation]: + evaluation = await self._get( + f"/evaluations/{evaluation_id}", + timeout=timeout, + cast_to=Evaluation, + ) + if isinstance(evaluation, Evaluation): + evaluation.attach_client(self._client) + return evaluation return None + + async def wait_for_completion( + self, + evaluation: Evaluation, + *, + interval_seconds: int = 30, + timeout: Optional[float] = None, + ) -> Optional[Evaluation]: + """Poll asynchronously until the evaluation finishes or timeout is reached.""" + start = asyncio.get_event_loop().time() + + updated_evaluation: Optional[Evaluation] = await self.get(evaluation) + while updated_evaluation and not updated_evaluation.is_finished: + if timeout and (asyncio.get_event_loop().time() - start) > timeout: + raise TimeoutError(f"Evaluation {updated_evaluation.id} did not complete within {timeout} seconds") + + await asyncio.sleep(interval_seconds) + updated_evaluation = await self.get(updated_evaluation) + + return updated_evaluation diff --git a/src/atlas/resources/models/__init__.py b/src/atlas/resources/models/__init__.py index 437f5c6..3b75e4b 100644 --- a/src/atlas/resources/models/__init__.py +++ b/src/atlas/resources/models/__init__.py @@ -1,3 +1,3 @@ -from .models import Models +from .models import Models, AsyncModels -__all__ = ["Models"] +__all__ = ["Models", "AsyncModels"] diff --git a/src/atlas/resources/models/models.py b/src/atlas/resources/models/models.py index 3ec3bea..af55ab6 100644 --- a/src/atlas/resources/models/models.py +++ b/src/atlas/resources/models/models.py @@ -5,7 +5,7 @@ import httpx from ...models import Model, CustomModel, PublicModel, ModelsResponse -from ..._resource import SyncAPIResource +from ..._resource import SyncAPIResource, AsyncAPIResource from ..._constants import DEFAULT_TIMEOUT @@ -61,3 +61,57 @@ def cast_model(m: Model, model_type: str) -> Model: models.extend([cast_model(m, type) for m in resp.data.models]) return models + + +class AsyncModels(AsyncAPIResource): + async def get( + self, + *, + timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT, + type: Literal["custom", "public"] | None = None, + name: Optional[str] = None, + companies: Optional[List[str]] = None, + regions: Optional[List[str]] = None, + licenses: Optional[List[str]] = None, + ) -> List[Model] | None: + base_url = f"/organizations/{self._client.organization_id}/projects/{self._client.project_id}/models" + + async def fetch(model_type: str) -> ModelsResponse | None: + params = {"type": model_type} + if name: + params["query"] = name + if companies: + params["companies"] = ",".join(companies) + if regions: + params["regions"] = ",".join(regions) + if licenses: + params["licenses"] = ",".join(licenses) + + resp = await self._get( + base_url, + params=params, + timeout=timeout, + cast_to=ModelsResponse, + ) + return resp if isinstance(resp, ModelsResponse) else None + + models: List[Model] = [] + + def cast_model(m: Model, model_type: str) -> Model: + if model_type == "custom": + return CustomModel(**m.model_dump()) + elif model_type == "public": + return PublicModel(**m.model_dump()) + return m # fallback, just base class + + if type is None: # fetch both + for t in ["custom", "public"]: + resp = await fetch(t) + if resp: + models.extend([cast_model(m, t) for m in resp.data.models]) + else: # fetch only one type + resp = await fetch(type) + if resp: + models.extend([cast_model(m, type) for m in resp.data.models]) + + return models diff --git a/src/atlas/resources/results/__init__.py b/src/atlas/resources/results/__init__.py index 8515229..087e72d 100644 --- a/src/atlas/resources/results/__init__.py +++ b/src/atlas/resources/results/__init__.py @@ -1,3 +1,3 @@ -from .results import Results +from .results import Results, AsyncResults -__all__ = ["Results"] +__all__ = ["Results", "AsyncResults"] diff --git a/src/atlas/resources/results/results.py b/src/atlas/resources/results/results.py index 6a16cad..45c894d 100644 --- a/src/atlas/resources/results/results.py +++ b/src/atlas/resources/results/results.py @@ -5,15 +5,26 @@ import httpx -from ..._resource import SyncAPIResource +from ...models import Evaluation, ResultsResponse +from ..._resource import SyncAPIResource, AsyncAPIResource from ..._constants import DEFAULT_TIMEOUT -from ...models.api import ResultsResponse +DEFAULT_PAGE = 1 DEFAULT_PAGE_SIZE = 100 class Results(SyncAPIResource): def get( + self, + *, + evaluation: Evaluation, + page: Optional[int] = None, + page_size: Optional[int] = None, + timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT, + ) -> ResultsResponse | None: + return self.get_by_id(evaluation_id=evaluation.id, page=page, page_size=page_size, timeout=timeout) + + def get_by_id( self, *, evaluation_id: str, @@ -25,7 +36,7 @@ def get( Get evaluation results with optional pagination. Args: - evaluation_id: The ID of the evaluation to get results for + evaluation: Evaluation to get the results for page: Page number for pagination (1-based, defaults to 1 if not provided) page_size: Number of results per page (default: 100, optional) timeout: Request timeout @@ -40,35 +51,109 @@ def get( """ params = {"evaluation_id": evaluation_id} - # Set default page_size if not provided effective_page_size = page_size if page_size is not None else DEFAULT_PAGE_SIZE + effective_page = page if page is not None else DEFAULT_PAGE + + params["page"] = str(effective_page) + if page_size is not None: + params["pageSize"] = str(page_size) + + # Get the response with cast_to to get parsed data + resp = self._get( + f"/results", + params=params, + timeout=timeout, + cast_to=dict, + ) + + if not resp or not isinstance(resp, dict): + return None + + # Calculate pagination info + metrics = resp.get("metrics", {}) + total_count = metrics.get("total_count", 0) + total_pages = math.ceil(total_count / effective_page_size) if total_count > 0 and effective_page_size > 0 else 0 + + # Add pagination to the response + resp_with_pagination = { + **resp, + "pagination": { + "total_count": total_count, + "page_size": effective_page_size, + "total_pages": total_pages, + }, + } + + try: + return ResultsResponse.model_validate(resp_with_pagination) + except Exception: + return None + + +class AsyncResults(AsyncAPIResource): + async def get( + self, + *, + evaluation: Evaluation, + page: Optional[int] = None, + page_size: Optional[int] = None, + timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT, + ) -> ResultsResponse | None: + return await self.get_by_id(evaluation_id=evaluation.id, page=page, page_size=page_size, timeout=timeout) - # Set default page to 1 if not provided - effective_page = page if page is not None else 1 + async def get_by_id( + self, + *, + evaluation_id: str, + page: Optional[int] = None, + page_size: Optional[int] = None, + timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT, + ) -> ResultsResponse | None: + """ + Get evaluation results with optional pagination. + + Args: + evaluation: Evaluation to get the results for + page: Page number for pagination (1-based, defaults to 1 if not provided) + page_size: Number of results per page (default: 100, optional) + timeout: Request timeout + + Returns: + ResultsResponse object containing: + - evaluation_id: The evaluation ID + - results: List of Result objects for the current page + - metrics: Contains total_count and score ranges + - pagination: Calculated pagination info (total_count, page_size, total_pages) + or None if the request fails + """ + params = {"evaluation_id": evaluation_id} + + effective_page_size = page_size if page_size is not None else DEFAULT_PAGE_SIZE + effective_page = page if page is not None else DEFAULT_PAGE params["page"] = str(effective_page) if page_size is not None: params["pageSize"] = str(page_size) # Get the response with cast_to to get parsed data - response_data = self._get( + resp = await self._get( f"/results", params=params, timeout=timeout, cast_to=dict, ) - if not response_data or not isinstance(response_data, dict): + if not resp or not isinstance(resp, dict): return None # Calculate pagination info - metrics = response_data.get("metrics", {}) + metrics = resp.get("metrics", {}) total_count = metrics.get("total_count", 0) total_pages = math.ceil(total_count / effective_page_size) if total_count > 0 and effective_page_size > 0 else 0 # Add pagination to the response - response_with_pagination = { - **response_data, + resp_with_pagination = { + **resp, "pagination": { "total_count": total_count, "page_size": effective_page_size, @@ -77,6 +162,6 @@ def get( } try: - return ResultsResponse.model_validate(response_with_pagination) + return ResultsResponse.model_validate(resp_with_pagination) except Exception: return None diff --git a/tests/resources/test_results.py b/tests/resources/test_results.py index c3031b3..a22d9ae 100644 --- a/tests/resources/test_results.py +++ b/tests/resources/test_results.py @@ -63,7 +63,7 @@ def test_get_results_success(self, results_resource, mock_results_response): """get method returns ResultsResponse successfully.""" results_resource._get.return_value = mock_results_response - result = results_resource.get(evaluation_id="eval-123") + result = results_resource.get_by_id(evaluation_id="eval-123") assert isinstance(result, ResultsResponse) assert result.evaluation_id == "eval-123" @@ -83,7 +83,7 @@ def test_get_results_request_parameters(self, results_resource, mock_results_res """get method makes correct API request.""" results_resource._get.return_value = mock_results_response - results_resource.get(evaluation_id="eval-456") + results_resource.get_by_id(evaluation_id="eval-456") results_resource._get.assert_called_once_with( "/results", @@ -97,7 +97,7 @@ def test_get_results_with_custom_timeout(self, results_resource, mock_results_re results_resource._get.return_value = mock_results_response custom_timeout = 120.0 - results_resource.get(evaluation_id="eval-123", timeout=custom_timeout) + results_resource.get_by_id(evaluation_id="eval-123", timeout=custom_timeout) call_args = results_resource._get.call_args assert call_args.kwargs["timeout"] == custom_timeout @@ -107,7 +107,7 @@ def test_get_results_with_httpx_timeout(self, results_resource, mock_results_res results_resource._get.return_value = mock_results_response custom_timeout = httpx.Timeout(120.0) - results_resource.get(evaluation_id="eval-123", timeout=custom_timeout) + results_resource.get_by_id(evaluation_id="eval-123", timeout=custom_timeout) call_args = results_resource._get.call_args assert call_args.kwargs["timeout"] is custom_timeout @@ -116,7 +116,7 @@ def test_get_results_none_response(self, results_resource): """get method returns None when response is None.""" results_resource._get.return_value = None - result = results_resource.get(evaluation_id="eval-123") + result = results_resource.get_by_id(evaluation_id="eval-123") assert result is None @@ -124,7 +124,7 @@ def test_get_results_invalid_response_type(self, results_resource): """get method handles non-ResultsResponse response gracefully.""" results_resource._get.return_value = "invalid-response" - result = results_resource.get(evaluation_id="eval-123") + result = results_resource.get_by_id(evaluation_id="eval-123") assert result is None @@ -143,7 +143,7 @@ def test_get_results_empty_response(self, results_resource): } results_resource._get.return_value = empty_response - result = results_resource.get(evaluation_id="eval-123") + result = results_resource.get_by_id(evaluation_id="eval-123") assert isinstance(result, ResultsResponse) assert result.evaluation_id == "eval-123" @@ -178,7 +178,7 @@ def test_get_results_multiple_items(self, results_resource, sample_result_data): } results_resource._get.return_value = response - result = results_resource.get(evaluation_id="eval-123") + result = results_resource.get_by_id(evaluation_id="eval-123") assert isinstance(result, ResultsResponse) assert len(result.results) == 2 @@ -192,7 +192,7 @@ def test_get_results_url_construction(self, results_resource, mock_results_respo """get method uses correct URL endpoint.""" results_resource._get.return_value = mock_results_response - results_resource.get(evaluation_id="eval-123") + results_resource.get_by_id(evaluation_id="eval-123") call_args = results_resource._get.call_args assert call_args[0][0] == "/results" @@ -201,7 +201,7 @@ def test_get_results_evaluation_id_parameter(self, results_resource, mock_result """get method correctly passes evaluation_id parameter.""" results_resource._get.return_value = mock_results_response - results_resource.get(evaluation_id="test-eval-789") + results_resource.get_by_id(evaluation_id="test-eval-789") call_args = results_resource._get.call_args assert call_args.kwargs["params"]["evaluation_id"] == "test-eval-789" @@ -210,7 +210,7 @@ def test_get_results_cast_to_parameter(self, results_resource, mock_results_resp """get method specifies correct cast_to parameter.""" results_resource._get.return_value = mock_results_response - results_resource.get(evaluation_id="eval-123") + results_resource.get_by_id(evaluation_id="eval-123") call_args = results_resource._get.call_args assert call_args.kwargs["cast_to"] is dict @@ -219,7 +219,7 @@ def test_get_results_timeout_default(self, results_resource, mock_results_respon """get method uses DEFAULT_TIMEOUT when no timeout specified.""" results_resource._get.return_value = mock_results_response - results_resource.get(evaluation_id="eval-123") + results_resource.get_by_id(evaluation_id="eval-123") call_args = results_resource._get.call_args assert call_args.kwargs["timeout"] is DEFAULT_TIMEOUT @@ -228,7 +228,7 @@ def test_get_results_with_none_timeout(self, results_resource, mock_results_resp """get method accepts None timeout.""" results_resource._get.return_value = mock_results_response - results_resource.get(evaluation_id="eval-123", timeout=None) + results_resource.get_by_id(evaluation_id="eval-123", timeout=None) call_args = results_resource._get.call_args assert call_args.kwargs["timeout"] is None @@ -237,7 +237,7 @@ def test_get_results_preserves_result_attributes(self, results_resource, mock_re """get method preserves all result attributes correctly.""" results_resource._get.return_value = mock_results_response - result = results_resource.get(evaluation_id="eval-123") + result = results_resource.get_by_id(evaluation_id="eval-123") result_item = result.results[0] assert isinstance(result_item.duration, timedelta) @@ -260,7 +260,7 @@ def test_get_results_with_different_evaluation_ids(self, results_resource, mock_ """get method works with various evaluation ID formats.""" results_resource._get.return_value = mock_results_response - result = results_resource.get(evaluation_id=evaluation_id) + result = results_resource.get_by_id(evaluation_id=evaluation_id) assert isinstance(result, ResultsResponse) call_args = results_resource._get.call_args @@ -294,7 +294,7 @@ def test_get_results_handles_not_found_error(self, results_resource): results_resource._get.side_effect = not_found_error with pytest.raises(NotFoundError): - results_resource.get(evaluation_id="nonexistent-eval") + results_resource.get_by_id(evaluation_id="nonexistent-eval") def test_get_results_handles_auth_error(self, results_resource): """get method propagates authentication errors.""" @@ -308,7 +308,7 @@ def test_get_results_handles_auth_error(self, results_resource): results_resource._get.side_effect = auth_error with pytest.raises(AuthenticationError): - results_resource.get(evaluation_id="eval-123") + results_resource.get_by_id(evaluation_id="eval-123") def test_get_results_handles_permission_error(self, results_resource): """get method propagates permission errors.""" @@ -322,7 +322,7 @@ def test_get_results_handles_permission_error(self, results_resource): results_resource._get.side_effect = permission_error with pytest.raises(PermissionDeniedError): - results_resource.get(evaluation_id="restricted-eval") + results_resource.get_by_id(evaluation_id="restricted-eval") def test_get_results_handles_server_error(self, results_resource): """get method propagates server errors.""" @@ -336,7 +336,7 @@ def test_get_results_handles_server_error(self, results_resource): results_resource._get.side_effect = server_error with pytest.raises(InternalServerError): - results_resource.get(evaluation_id="eval-123") + results_resource.get_by_id(evaluation_id="eval-123") def test_get_results_handles_connection_error(self, results_resource): """get method propagates connection errors.""" @@ -347,7 +347,7 @@ def test_get_results_handles_connection_error(self, results_resource): results_resource._get.side_effect = connection_error with pytest.raises(APIConnectionError): - results_resource.get(evaluation_id="eval-123") + results_resource.get_by_id(evaluation_id="eval-123") def test_get_results_handles_timeout_error(self, results_resource): """get method propagates timeout errors.""" @@ -358,7 +358,7 @@ def test_get_results_handles_timeout_error(self, results_resource): results_resource._get.side_effect = timeout_error with pytest.raises(APITimeoutError): - results_resource.get(evaluation_id="eval-123", timeout=1.0) + results_resource.get_by_id(evaluation_id="eval-123", timeout=1.0) class TestResultsDataHandling: @@ -414,7 +414,7 @@ def test_get_results_handles_complex_metrics(self, results_resource): } results_resource._get.return_value = response - result = results_resource.get(evaluation_id="eval-complex") + result = results_resource.get_by_id(evaluation_id="eval-complex") assert isinstance(result, ResultsResponse) assert len(result.results) == 1 @@ -462,7 +462,7 @@ def test_get_results_handles_different_durations(self, results_resource): } results_resource._get.return_value = response - result = results_resource.get(evaluation_id="eval-durations") + result = results_resource.get_by_id(evaluation_id="eval-durations") assert isinstance(result, ResultsResponse) assert len(result.results) == 5 @@ -497,7 +497,7 @@ def test_get_results_handles_empty_metrics(self, results_resource): } results_resource._get.return_value = response - result = results_resource.get(evaluation_id="eval-minimal") + result = results_resource.get_by_id(evaluation_id="eval-minimal") assert isinstance(result, ResultsResponse) assert len(result.results) == 1 @@ -508,7 +508,7 @@ def test_get_results_return_type_consistency(self, results_resource): """get method returns consistent types.""" # Test that the method returns either a ResultsResponse object or None results_resource._get.return_value = None - result = results_resource.get(evaluation_id="eval-123") + result = results_resource.get_by_id(evaluation_id="eval-123") assert result is None # Test that it returns a ResultsResponse object when successful @@ -524,7 +524,7 @@ def test_get_results_return_type_consistency(self, results_resource): }, } results_resource._get.return_value = empty_response - result = results_resource.get(evaluation_id="eval-123") + result = results_resource.get_by_id(evaluation_id="eval-123") assert isinstance(result, ResultsResponse) @@ -571,7 +571,7 @@ def test_get_results_with_pagination_parameters(self, results_resource, sample_r } results_resource._get.return_value = mock_response - result_data = results_resource.get( + result_data = results_resource.get_by_id( evaluation_id="eval-paginated", page=2, page_size=50, @@ -611,7 +611,7 @@ def test_get_results_pagination_parameter_conversion(self, results_resource, sam } results_resource._get.return_value = mock_response - results_resource.get(evaluation_id="eval-123", page=3, page_size=25) + results_resource.get_by_id(evaluation_id="eval-123", page=3, page_size=25) call_args = results_resource._get.call_args params = call_args.kwargs["params"] @@ -637,7 +637,7 @@ def test_get_results_default_page_parameter(self, results_resource, sample_resul } results_resource._get.return_value = mock_response - results_resource.get(evaluation_id="eval-123") + results_resource.get_by_id(evaluation_id="eval-123") call_args = results_resource._get.call_args params = call_args.kwargs["params"] @@ -660,7 +660,7 @@ def test_get_results_pagination_metadata_calculation(self, results_resource, sam } results_resource._get.return_value = api_response - result = results_resource.get(evaluation_id="eval-math", page=3, page_size=50) + result = results_resource.get_by_id(evaluation_id="eval-math", page=3, page_size=50) # Should have calculated pagination correctly assert isinstance(result, ResultsResponse) @@ -705,7 +705,7 @@ def test_pagination_total_pages_calculation( } results_resource._get.return_value = api_response - result = results_resource.get(evaluation_id="eval-calc", page_size=page_size) + result = results_resource.get_by_id(evaluation_id="eval-calc", page_size=page_size) assert result.pagination.total_count == total_count assert result.pagination.page_size == page_size @@ -737,7 +737,7 @@ def test_get_results_invalid_api_response(self, results_resource): } results_resource._get.return_value = invalid_response - result = results_resource.get(evaluation_id="eval-123") + result = results_resource.get_by_id(evaluation_id="eval-123") # Should return None when response structure is invalid assert result is None @@ -757,7 +757,7 @@ def test_get_results_with_zero_total_count_in_metrics(self, results_resource): } results_resource._get.return_value = invalid_response - result = results_resource.get(evaluation_id="eval-123") + result = results_resource.get_by_id(evaluation_id="eval-123") # Should handle zero total_count gracefully assert isinstance(result, ResultsResponse) @@ -768,7 +768,7 @@ def test_get_results_non_dict_response(self, results_resource): """get method handles non-dict API response.""" results_resource._get.return_value = "invalid-string-response" - result = results_resource.get(evaluation_id="eval-123") + result = results_resource.get_by_id(evaluation_id="eval-123") assert result is None @@ -784,7 +784,7 @@ def test_get_results_pydantic_validation_error(self, results_resource): } results_resource._get.return_value = invalid_response - result = results_resource.get(evaluation_id="eval-123") + result = results_resource.get_by_id(evaluation_id="eval-123") assert result is None @@ -803,7 +803,7 @@ def test_get_results_extreme_pagination_values(self, results_resource): } results_resource._get.return_value = extreme_response - result = results_resource.get(evaluation_id="eval-extreme", page_size=1) + result = results_resource.get_by_id(evaluation_id="eval-extreme", page_size=1) assert isinstance(result, ResultsResponse) assert result.pagination.total_count == 999999 @@ -826,7 +826,7 @@ def test_get_results_zero_page_size_edge_case(self, results_resource): results_resource._get.return_value = response # Pass 0 as page_size - result = results_resource.get(evaluation_id="eval-123", page_size=0) + result = results_resource.get_by_id(evaluation_id="eval-123", page_size=0) assert isinstance(result, ResultsResponse) # Should use 0 as provided (though this might cause division by zero, it's handled) @@ -848,7 +848,7 @@ def test_get_results_negative_page_values(self, results_resource): results_resource._get.return_value = response # Test with negative page and page_size - result = results_resource.get(evaluation_id="eval-123", page=-1, page_size=-50) + result = results_resource.get_by_id(evaluation_id="eval-123", page=-1, page_size=-50) # Should still make the API call and process response call_args = results_resource._get.call_args diff --git a/tests/test_integration.py b/tests/test_integration.py index 5a7b65e..0ba831f 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -10,7 +10,6 @@ Result, Benchmark, Evaluation, - ResultsResponse, EvaluationStatus, EvaluationsResponse, ) @@ -174,22 +173,6 @@ def test_complete_evaluation_workflow(self, atlas_client): # Mock responses evaluations_response = EvaluationsResponse(data=[evaluation]) - results_response = ResultsResponse( - evaluation_id="eval-789", - results=[result], - metrics={ - "total_count": 1, - "min_toxicity_score": 0.02, - "max_toxicity_score": 0.02, - "min_readability_score": 0.85, - "max_readability_score": 0.85, - }, - pagination={ - "total_count": 1, - "page_size": 100, - "total_pages": 1, - }, - ) with patch.object(atlas_client, "get_cast") as mock_get, patch.object(atlas_client, "post_cast") as mock_post: # Configure mocks for the workflow @@ -212,7 +195,7 @@ def test_complete_evaluation_workflow(self, atlas_client): assert created_evaluation.status == EvaluationStatus.SUCCESS # Step 2: Get evaluation results - results = atlas_client.results.get(evaluation_id=created_evaluation.id) + results = atlas_client.results.get(evaluation=created_evaluation) assert len(results.results) == 1 assert results.results[0].score == 1.0 assert results.results[0].subset == "math" @@ -243,7 +226,7 @@ def test_workflow_with_error_handling(self, atlas_client): # Verify error is propagated with pytest.raises(NotFoundError): - atlas_client.results.get(evaluation_id="test-eval") + atlas_client.results.get_by_id(evaluation_id="test-eval") def test_workflow_with_custom_timeouts(self, atlas_client): """Test workflow respects custom timeout settings.""" @@ -257,23 +240,6 @@ def test_workflow_with_custom_timeouts(self, atlas_client): "metrics": {"accuracy": 1.0}, } - results_response = ResultsResponse( - evaluation_id="test-eval", - results=[Result(**result_data)], - metrics={ - "total_count": 1, - "min_toxicity_score": 0.0, - "max_toxicity_score": 0.1, - "min_readability_score": 0.8, - "max_readability_score": 0.9, - }, - pagination={ - "total_count": 1, - "page_size": 100, - "total_pages": 1, - }, - ) - with patch.object(atlas_client, "get_cast") as mock_get: mock_get.return_value = { "evaluation_id": "test-eval", @@ -289,7 +255,7 @@ def test_workflow_with_custom_timeouts(self, atlas_client): # Test with custom timeout custom_timeout = httpx.Timeout(30.0) - results = atlas_client.results.get(evaluation_id="test-eval", timeout=custom_timeout) + results = atlas_client.results.get_by_id(evaluation_id="test-eval", timeout=custom_timeout) assert len(results.results) == 1 @@ -416,22 +382,6 @@ def test_results_analysis_workflow(self, atlas_client): ] results = [Result(**data) for data in results_data] - results_response = ResultsResponse( - evaluation_id="test-eval", - results=results, - metrics={ - "total_count": 3, - "min_toxicity_score": 0.0, - "max_toxicity_score": 0.1, - "min_readability_score": 0.7, - "max_readability_score": 0.9, - }, - pagination={ - "total_count": 3, - "page_size": 100, - "total_pages": 1, - }, - ) with patch.object(atlas_client, "get_cast") as mock_get: mock_get.return_value = { @@ -447,7 +397,7 @@ def test_results_analysis_workflow(self, atlas_client): } # Get results - evaluation_results = atlas_client.results.get(evaluation_id="test-eval") + evaluation_results = atlas_client.results.get_by_id(evaluation_id="test-eval") # Analyze results math_results = [r for r in evaluation_results.results if r.subset == "math"] @@ -559,23 +509,6 @@ def test_resource_operations_isolated(self, mock_org1, mock_org2): "metrics": {"accuracy": 1.0}, } - results_response = ResultsResponse( - evaluation_id="test-eval", - results=[Result(**result_data)], - metrics={ - "total_count": 1, - "min_toxicity_score": 0.0, - "max_toxicity_score": 0.1, - "min_readability_score": 0.8, - "max_readability_score": 0.9, - }, - pagination={ - "total_count": 1, - "page_size": 100, - "total_pages": 1, - }, - ) - with patch.object(client1, "get_cast") as mock_get1, patch.object(client2, "get_cast") as mock_get2: mock_get1.return_value = { "evaluation_id": "test-eval", @@ -601,8 +534,8 @@ def test_resource_operations_isolated(self, mock_org1, mock_org2): } # Make calls on both clients - results1 = client1.results.get(evaluation_id="eval-1") - results2 = client2.results.get(evaluation_id="eval-2") + results1 = client1.results.get_by_id(evaluation_id="eval-1") + results2 = client2.results.get_by_id(evaluation_id="eval-2") # Verify both calls succeeded assert results1 is not None @@ -683,7 +616,7 @@ def test_evaluation_workflow_error_propagation(self, mock_org): # Test API error in results.get mock_get.side_effect = api_error with pytest.raises(APIStatusError): - client.results.get(evaluation_id="test-eval") + client.results.get_by_id(evaluation_id="test-eval") # Test connection error in evaluations.create mock_post.side_effect = connection_error