JudgmentLabs
diff --git a/‎src/e2etests/conftest.py‎
Lines changed: 4 additions & 0 deletions b/‎src/e2etests/conftest.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/e2etests/test_prompts.py‎
Lines changed: 1 addition & 1 deletion b/‎src/e2etests/test_prompts.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/judgeval/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎src/judgeval/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/judgeval/api/api_types.py‎
Lines changed: 21 additions & 15 deletions b/‎src/judgeval/api/api_types.py‎
Lines changed: 21 additions & 15 deletions
diff --git a/‎src/judgeval/data/judgment_types.py‎
Lines changed: 21 additions & 20 deletions b/‎src/judgeval/data/judgment_types.py‎
Lines changed: 21 additions & 20 deletions
diff --git a/‎src/judgeval/dataset/__init__.py‎
Lines changed: 11 additions & 2 deletions b/‎src/judgeval/dataset/__init__.py‎
Lines changed: 11 additions & 2 deletions
diff --git a/‎src/judgeval/env.py‎
Lines changed: 2 additions & 11 deletions b/‎src/judgeval/env.py‎
Lines changed: 2 additions & 11 deletions
diff --git a/‎src/judgeval/evaluation/__init__.py‎
Lines changed: 4 additions & 0 deletions b/‎src/judgeval/evaluation/__init__.py‎
Lines changed: 4 additions & 0 deletions
@@ -39,6 +39,10 @@ def project_name():
 def client(project_name: str) -> JudgmentClient:
     """Create a single JudgmentClient instance for all tests."""
     # Setup
+    if not API_KEY or not ORGANIZATION_ID:
+        pytest.skip(
+            "JUDGMENT_API_KEY or JUDGMENT_ORG_ID not set", allow_module_level=True
+        )
     client = JudgmentClient(api_key=API_KEY, organization_id=ORGANIZATION_ID)
     create_project(project_name=project_name)
     yield client
 
@@ -5,7 +5,7 @@
 import uuid
 import pytest
 from judgeval import JudgmentClient
-from judgeval.prompts.prompt import Prompt
+from judgeval.prompt import Prompt
 from judgeval.exceptions import JudgmentAPIError
 
 
 
@@ -146,6 +146,8 @@ def upload_custom_scorer(
                 requirements_text = f.read()
 
         try:
+            if not self.api_key or not self.organization_id:
+                raise ValueError("Judgment API key and organization ID are required")
             client = JudgmentSyncClient(
                 api_key=self.api_key,
                 organization_id=self.organization_id,
 
@@ -1,6 +1,6 @@
 # generated by datamodel-codegen:
 #   filename:  .openapi.json
-#   timestamp: 2025-10-21T01:37:42+00:00
+#   timestamp: 2025-10-25T22:30:20+00:00
 
 from __future__ import annotations
 from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
@@ -24,15 +24,6 @@ class DatasetsFetch(TypedDict):
     project_name: str
 
 
-class DatasetsTableRow(TypedDict):
-    dataset_id: str
-    name: str
-    created_at: str
-    kind: Literal["trace", "example"]
-    entries: int
-    creator: str
-
-
 class ProjectAdd(TypedDict):
     project_name: str
 
@@ -67,13 +58,9 @@ class SavePromptScorerRequest(TypedDict):
     description: NotRequired[Optional[str]]
 
 
-class SavePromptScorerResponse(TypedDict):
-    message: str
-    name: str
-
-
 class FetchPromptScorersRequest(TypedDict):
     names: NotRequired[Optional[List[str]]]
+    is_trace: NotRequired[Optional[bool]]
 
 
 class CustomScorerUploadPayload(TypedDict):
@@ -193,6 +180,9 @@ class UsageInfo(TypedDict):
 
 
 class PromptScorer(TypedDict):
+    id: str
+    user_id: str
+    organization_id: str
     name: str
     prompt: str
     threshold: float
@@ -202,6 +192,7 @@ class PromptScorer(TypedDict):
     created_at: NotRequired[Optional[str]]
     updated_at: NotRequired[Optional[str]]
     is_trace: NotRequired[Optional[bool]]
+    is_bucket_rubric: NotRequired[Optional[bool]]
 
 
 class PromptCommitInfo(TypedDict):
@@ -292,6 +283,7 @@ class TraceEvaluationRun(TypedDict):
     created_at: NotRequired[str]
     trace_and_span_ids: List[TraceAndSpanId]
     is_offline: NotRequired[bool]
+    is_bucket_run: NotRequired[bool]
 
 
 class DatasetInsertExamples(TypedDict):
@@ -300,6 +292,15 @@ class DatasetInsertExamples(TypedDict):
     project_name: str
 
 
+class DatasetInfo(TypedDict):
+    dataset_id: str
+    name: str
+    created_at: str
+    kind: DatasetKind
+    entries: int
+    creator: str
+
+
 class DatasetCreate(TypedDict):
     name: str
     dataset_kind: DatasetKind
@@ -308,6 +309,10 @@ class DatasetCreate(TypedDict):
     overwrite: bool
 
 
+class SavePromptScorerResponse(TypedDict):
+    scorer_response: PromptScorer
+
+
 class FetchPromptScorersResponse(TypedDict):
     scorers: List[PromptScorer]
 
@@ -342,6 +347,7 @@ class OtelTraceListItem(TypedDict):
     llm_cost: NotRequired[Optional[float]]
     error: NotRequired[str]
     scores: NotRequired[List[OtelSpanListItemScores]]
+    rules_invoked: NotRequired[List[str]]
     customer_id: NotRequired[Optional[str]]
     input: NotRequired[Optional[str]]
     output: NotRequired[Optional[str]]
 
@@ -1,6 +1,6 @@
 # generated by datamodel-codegen:
 #   filename:  .openapi.json
-#   timestamp: 2025-10-21T01:37:41+00:00
+#   timestamp: 2025-10-25T22:30:19+00:00
 
 from __future__ import annotations
 from typing import Annotated, Any, Dict, List, Optional, Union
@@ -26,20 +26,6 @@ class DatasetsFetch(BaseModel):
     project_name: Annotated[str, Field(title="Project Name")]
 
 
-class Kind(Enum):
-    trace = "trace"
-    example = "example"
-
-
-class DatasetsTableRow(BaseModel):
-    dataset_id: Annotated[str, Field(title="Dataset Id")]
-    name: Annotated[str, Field(title="Name")]
-    created_at: Annotated[str, Field(title="Created At")]
-    kind: Annotated[Kind, Field(title="Kind")]
-    entries: Annotated[int, Field(title="Entries")]
-    creator: Annotated[str, Field(title="Creator")]
-
-
 class ProjectAdd(BaseModel):
     project_name: Annotated[str, Field(title="Project Name")]
 
@@ -74,13 +60,9 @@ class SavePromptScorerRequest(BaseModel):
     description: Annotated[Optional[str], Field(title="Description")] = None
 
 
-class SavePromptScorerResponse(BaseModel):
-    message: Annotated[str, Field(title="Message")]
-    name: Annotated[str, Field(title="Name")]
-
-
 class FetchPromptScorersRequest(BaseModel):
     names: Annotated[Optional[List[str]], Field(title="Names")] = None
+    is_trace: Annotated[Optional[bool], Field(title="Is Trace")] = None
 
 
 class CustomScorerUploadPayload(BaseModel):
@@ -211,6 +193,9 @@ class DatasetKind(Enum):
 
 
 class PromptScorer(BaseModel):
+    id: Annotated[str, Field(title="Id")]
+    user_id: Annotated[str, Field(title="User Id")]
+    organization_id: Annotated[str, Field(title="Organization Id")]
     name: Annotated[str, Field(title="Name")]
     prompt: Annotated[str, Field(title="Prompt")]
     threshold: Annotated[float, Field(title="Threshold")]
@@ -220,6 +205,7 @@ class PromptScorer(BaseModel):
     created_at: Annotated[Optional[AwareDatetime], Field(title="Created At")] = None
     updated_at: Annotated[Optional[AwareDatetime], Field(title="Updated At")] = None
     is_trace: Annotated[Optional[bool], Field(title="Is Trace")] = False
+    is_bucket_rubric: Annotated[Optional[bool], Field(title="Is Bucket Rubric")] = None
 
 
 class PromptCommitInfo(BaseModel):
@@ -326,6 +312,7 @@ class TraceEvaluationRun(BaseModel):
         List[TraceAndSpanId], Field(title="Trace And Span Ids")
     ]
     is_offline: Annotated[Optional[bool], Field(title="Is Offline")] = False
+    is_bucket_run: Annotated[Optional[bool], Field(title="Is Bucket Run")] = False
 
 
 class DatasetInsertExamples(BaseModel):
@@ -334,6 +321,15 @@ class DatasetInsertExamples(BaseModel):
     project_name: Annotated[str, Field(title="Project Name")]
 
 
+class DatasetInfo(BaseModel):
+    dataset_id: Annotated[str, Field(title="Dataset Id")]
+    name: Annotated[str, Field(title="Name")]
+    created_at: Annotated[str, Field(title="Created At")]
+    kind: DatasetKind
+    entries: Annotated[int, Field(title="Entries")]
+    creator: Annotated[str, Field(title="Creator")]
+
+
 class DatasetCreate(BaseModel):
     name: Annotated[str, Field(title="Name")]
     dataset_kind: DatasetKind
@@ -342,6 +338,10 @@ class DatasetCreate(BaseModel):
     overwrite: Annotated[bool, Field(title="Overwrite")]
 
 
+class SavePromptScorerResponse(BaseModel):
+    scorer_response: PromptScorer
+
+
 class FetchPromptScorersResponse(BaseModel):
     scorers: Annotated[List[PromptScorer], Field(title="Scorers")]
 
@@ -380,6 +380,7 @@ class OtelTraceListItem(BaseModel):
     scores: Annotated[
         Optional[List[OtelSpanListItemScores]], Field(title="Scores")
     ] = []
+    rules_invoked: Annotated[Optional[List[str]], Field(title="Rules Invoked")] = []
     customer_id: Annotated[Optional[str], Field(title="Customer Id")] = None
     input: Annotated[Optional[str], Field(title="Input")] = None
     output: Annotated[Optional[str], Field(title="Output")] = None
 
@@ -32,15 +32,17 @@ class Dataset:
     dataset_kind: DatasetKind = DatasetKind.example
     examples: Optional[List[Example]] = None
     traces: Optional[List[Trace]] = None
-    judgment_api_key: str = JUDGMENT_API_KEY or ""
-    organization_id: str = JUDGMENT_ORG_ID or ""
+    judgment_api_key: str | None = JUDGMENT_API_KEY
+    organization_id: str | None = JUDGMENT_ORG_ID
 
     @classmethod
     def get(
         cls,
         name: str,
         project_name: str,
     ):
+        if not cls.judgment_api_key or not cls.organization_id:
+            raise ValueError("Judgment API key and organization ID are required")
         client = JudgmentSyncClient(cls.judgment_api_key, cls.organization_id)
         dataset = client.datasets_pull_for_judgeval(
             {
@@ -102,6 +104,8 @@ def create(
         examples: List[Example] = [],
         overwrite: bool = False,
     ):
+        if not cls.judgment_api_key or not cls.organization_id:
+            raise ValueError("Judgment API key and organization ID are required")
         if not examples:
             examples = []
 
@@ -125,6 +129,8 @@ def create(
 
     @classmethod
     def list(cls, project_name: str):
+        if not cls.judgment_api_key or not cls.organization_id:
+            raise ValueError("Judgment API key and organization ID are required")
         client = JudgmentSyncClient(cls.judgment_api_key, cls.organization_id)
         datasets = client.datasets_pull_all_for_judgeval({"project_name": project_name})
 
@@ -173,6 +179,9 @@ def add_examples(self, examples: List[Example]) -> None:
         if not isinstance(examples, list):
             raise TypeError("examples must be a list")
 
+        if not self.judgment_api_key or not self.organization_id:
+            raise ValueError("Judgment API key and organization ID are required")
+
         client = JudgmentSyncClient(self.judgment_api_key, self.organization_id)
         client.datasets_insert_examples_for_judgeval(
             {
 
@@ -19,17 +19,8 @@ def optional_env_var(var_name: str, default: str | None = None) -> str | None:
     return os.getenv(var_name, default)
 
 
-def required_env_var(var_name: str) -> str:
-    value = os.getenv(var_name)
-    if value is None:
-        raise EnvironmentError(
-            f"Environment variable '{var_name}' is required but not set."
-        )
-    return value
-
-
-JUDGMENT_API_KEY = required_env_var("JUDGMENT_API_KEY")
-JUDGMENT_ORG_ID = required_env_var("JUDGMENT_ORG_ID")
+JUDGMENT_API_KEY = optional_env_var("JUDGMENT_API_KEY")
+JUDGMENT_ORG_ID = optional_env_var("JUDGMENT_ORG_ID")
 JUDGMENT_API_URL = optional_env_var("JUDGMENT_API_URL", "https://api.judgmentlabs.ai")
 
 JUDGMENT_DEFAULT_GPT_MODEL = optional_env_var("JUDGMENT_DEFAULT_GPT_MODEL", "gpt-5")
 
@@ -112,6 +112,8 @@ def _poll_evaluation_until_complete(
 
     poll_count = 0
     exception_count = 0
+    if not JUDGMENT_API_KEY or not JUDGMENT_ORG_ID:
+        raise ValueError("Judgment API key and organization ID are required")
     api_client = JudgmentSyncClient(JUDGMENT_API_KEY, JUDGMENT_ORG_ID)
     while poll_count < max_poll_count:
         poll_count += 1
@@ -222,6 +224,8 @@ def run_eval(
         )
         t.start()
         try:
+            if not JUDGMENT_API_KEY or not JUDGMENT_ORG_ID:
+                raise ValueError("Judgment API key and organization ID are required")
             api_client = JudgmentSyncClient(JUDGMENT_API_KEY, JUDGMENT_ORG_ID)
             response = api_client.add_to_run_eval_queue_examples(
                 evaluation_run.model_dump(warnings=False)  # type: ignore