Examples for browsing, filtering, creating, and managing models and benchmarks using the LayerLens Python SDK.
import asyncio
from layerlens import AsyncStratix
async def main():
client = AsyncStratix()
# --- Filter by name
model_name = "gpt-4o"
models = await client.models.get(name=model_name)
print(f"Found {len(models)} models with name {model_name}")
# --- Filter by company
company_names = ["openai", "anthropic"]
models = await client.models.get(companies=company_names)
print(f"Found {len(models)} models with companies {company_names}")
# --- Filter by region
region_names = ["usa"]
models = await client.models.get(regions=region_names)
print(f"Found {len(models)} models with regions {region_names}")
# --- Filter by categories
categories = ["Open-Source"]
models = await client.models.get(categories=categories)
print(f"Found {len(models)} open-source models")
# --- Filter by key
models = await client.models.get(key="gpt-4")
print(f"Found {len(models)} models matching key 'gpt-4'")
# --- Filter by license
licenses = ["apache-2.0"]
models = await client.models.get(licenses=licenses)
print(f"Found {len(models)} models with license {licenses}")
# --- Filter by type
model_type = "public"
models = await client.models.get(type=model_type)
print(f"Found {len(models)} models with type {model_type}")
if __name__ == "__main__":
asyncio.run(main())import asyncio
from layerlens import AsyncStratix
async def main():
client = AsyncStratix()
# --- Filter by name
benchmark_name = "mmlu"
benchmarks = await client.benchmarks.get(name=benchmark_name)
print(f"Found {len(benchmarks)} benchmarks with name {benchmark_name}")
# --- Filter by categories
categories = ["reasoning"]
benchmarks = await client.benchmarks.get(categories=categories)
print(f"Found {len(benchmarks)} benchmarks with categories {categories}")
# --- Filter by language
languages = ["english"]
benchmarks = await client.benchmarks.get(languages=languages)
print(f"Found {len(benchmarks)} english benchmarks")
# --- Filter by key
benchmarks = await client.benchmarks.get(key="mmlu")
print(f"Found {len(benchmarks)} benchmarks matching key 'mmlu'")
# --- Filter by type
benchmark_type = "public"
benchmarks = await client.benchmarks.get(type=benchmark_type)
print(f"Found {len(benchmarks)} benchmarks with type {benchmark_type}")
if __name__ == "__main__":
asyncio.run(main())Source:
samples/core/custom_model.py
Custom models let you evaluate any model accessible via an OpenAI-compatible chat completions endpoint.
import os
from layerlens import Stratix
def main():
client = Stratix()
result = client.models.create_custom(
name="My Custom Model",
key="my-org/custom-model-v1",
description="Custom fine-tuned model served via vLLM",
api_url="https://my-model-endpoint.example.com/v1/chat/completions",
api_key=os.environ["MY_PROVIDER_API_KEY"],
max_tokens=4096,
)
if result:
print(f"Custom model created: {result.model_id}")
else:
print("Failed to create custom model")
# Verify the model was added
models = client.models.get(type="custom")
if models:
print(f"\nCustom models in project ({len(models)}):")
for m in models:
print(f" - {m.name} (id={m.id}, key={m.key})")
if __name__ == "__main__":
main()Use this when your model's endpoint URL changes — for example, when serving a vLLM instance behind a cloudflared tunnel that rotates its hostname between sessions.
from layerlens import Stratix
def main():
client = Stratix()
result = client.models.create_custom(
name="My Tunnel-backed Model",
key="my-org/tunnel-model-v1",
description="vLLM served behind a cloudflared tunnel",
api_url="https://tunnel-1.example.com/v1/chat/completions",
api_key="my-provider-api-key",
max_tokens=4096,
)
assert result is not None
# Later, when the tunnel URL changes:
client.models.update_custom(
result.model_id,
api_url="https://tunnel-2.example.com/v1/chat/completions",
)
# Run evaluations as usual — the model now points at the new endpoint.
if __name__ == "__main__":
main()delete_custom releases the model's name so it can be reused. This is useful for replacing a misconfigured model without picking a new name.
from layerlens import Stratix
def main():
client = Stratix()
# Tear down the old version
client.models.delete_custom("old-model-id")
# Recreate with the same name (now free)
client.models.create_custom(
name="My Custom Model",
key="my-org/custom-model-v2",
description="Replacement after schema migration",
api_url="https://my-endpoint.example.com/v1/chat/completions",
api_key="my-provider-api-key",
max_tokens=4096,
)
if __name__ == "__main__":
main()Source:
samples/core/custom_benchmark.py
Custom benchmarks are created from JSONL files with input and truth fields.
from layerlens import Stratix
def main():
client = Stratix()
# Basic custom benchmark
result = client.benchmarks.create_custom(
name="My Custom Benchmark",
description="A simple test benchmark for QA evaluation",
file_path="path/to/benchmark.jsonl",
)
if result:
print(f"Custom benchmark created: {result.benchmark_id}")
# With additional metrics and input type
result = client.benchmarks.create_custom(
name="Advanced Benchmark",
description="Benchmark with toxicity and readability scoring",
file_path="path/to/benchmark.jsonl",
additional_metrics=["toxicity", "readability"],
input_type="messages",
)
if result:
print(f"Advanced benchmark created: {result.benchmark_id}")
# Verify
benchmarks = client.benchmarks.get(type="custom")
if benchmarks:
print(f"\nCustom benchmarks in project ({len(benchmarks)}):")
for b in benchmarks:
print(f" - {b.name} (id={b.id})")
if __name__ == "__main__":
main()Each line should be a JSON object:
{"input": "What is 2+2?", "truth": "4"}
{"input": "Capital of France?", "truth": "Paris"}Optional field: subset (for grouping prompts into categories).
Source:
samples/core/custom_benchmark.py
Smart benchmarks use AI to automatically generate benchmark prompts from uploaded documents. Supported file types: .txt, .pdf, .html, .docx, .csv, .json, .jsonl, .parquet.
from layerlens import Stratix
def main():
client = Stratix()
result = client.benchmarks.create_smart(
name="Product Knowledge Benchmark",
description="Evaluates model knowledge of our product documentation",
system_prompt=(
"Generate question-answer pairs that test understanding of the "
"product features, capabilities, and limitations described in "
"the provided documents. Each question should have a clear, "
"factual answer derived from the source material."
),
file_paths=[
"path/to/product_docs.pdf",
"path/to/faq.txt",
],
metrics=["hallucination"],
)
if result:
print(f"Smart benchmark created: {result.benchmark_id}")
print("The benchmark is being generated asynchronously.")
print("Check the dashboard for progress.")
else:
print("Failed to create smart benchmark")
if __name__ == "__main__":
main()Add and remove public models and benchmarks from your project.
from layerlens import Stratix
def main():
client = Stratix()
# --- Add public models to the project
success = client.models.add("model-id-1", "model-id-2")
print(f"Add models: {'success' if success else 'failed'}")
# --- Remove a model from the project
success = client.models.remove("model-id-1")
print(f"Remove model: {'success' if success else 'failed'}")
# --- Add public benchmarks to the project
success = client.benchmarks.add("benchmark-id-1")
print(f"Add benchmark: {'success' if success else 'failed'}")
# --- Remove a benchmark from the project
success = client.benchmarks.remove("benchmark-id-1")
print(f"Remove benchmark: {'success' if success else 'failed'}")
# --- List current models and benchmarks
models = client.models.get()
if models:
print(f"\nModels in project ({len(models)}):")
for m in models:
print(f" - {m.name} (id={m.id})")
benchmarks = client.benchmarks.get()
if benchmarks:
print(f"\nBenchmarks in project ({len(benchmarks)}):")
for b in benchmarks:
print(f" - {b.name} (id={b.id})")
if __name__ == "__main__":
main()