From 5e230c23de09fd81a591a64ceb3ddefa0346b578 Mon Sep 17 00:00:00 2001 From: rkritika1508 Date: Fri, 8 May 2026 12:35:55 +0530 Subject: [PATCH 1/3] Added answer relevance validator --- .../008_add_answer_relevance_prompt.py | 53 ++++++++ backend/app/api/main.py | 2 + .../api/routes/answer_relevance_prompts.py | 83 ++++++++++++ backend/app/api/routes/guardrails.py | 14 ++ backend/app/core/enum.py | 1 + .../validators/answer_relevance_custom_llm.py | 85 ++++++++++++ ...ance_custom_llm_safety_validator_config.py | 30 +++++ backend/app/crud/answer_relevance_prompt.py | 126 ++++++++++++++++++ .../models/config/answer_relevance_prompt.py | 68 ++++++++++ .../app/schemas/answer_relevance_prompt.py | 71 ++++++++++ backend/app/schemas/guardrail_config.py | 4 + 11 files changed, 537 insertions(+) create mode 100644 backend/app/alembic/versions/008_add_answer_relevance_prompt.py create mode 100644 backend/app/api/routes/answer_relevance_prompts.py create mode 100644 backend/app/core/validators/answer_relevance_custom_llm.py create mode 100644 backend/app/core/validators/config/answer_relevance_custom_llm_safety_validator_config.py create mode 100644 backend/app/crud/answer_relevance_prompt.py create mode 100644 backend/app/models/config/answer_relevance_prompt.py create mode 100644 backend/app/schemas/answer_relevance_prompt.py diff --git a/backend/app/alembic/versions/008_add_answer_relevance_prompt.py b/backend/app/alembic/versions/008_add_answer_relevance_prompt.py new file mode 100644 index 0000000..acd409d --- /dev/null +++ b/backend/app/alembic/versions/008_add_answer_relevance_prompt.py @@ -0,0 +1,53 @@ +"""Add answer_relevance_prompt table + +Revision ID: 008 +Revises: 007 +Create Date: 2026-05-08 00:00:00.000000 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + +revision: str = "008" +down_revision = "007" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.create_table( + "answer_relevance_prompt", + sa.Column("id", sa.Uuid(), nullable=False), + sa.Column("organization_id", sa.Integer(), nullable=False), + sa.Column("project_id", sa.Integer(), nullable=False), + sa.Column("name", sa.String(), nullable=False), + sa.Column("description", sa.String(), nullable=False), + sa.Column("prompt_template", sa.Text(), nullable=False), + sa.Column("is_active", sa.Boolean(), nullable=False, server_default=sa.true()), + sa.Column("created_at", sa.DateTime(), nullable=False), + sa.Column("updated_at", sa.DateTime(), nullable=False), + sa.PrimaryKeyConstraint("id"), + ) + + op.create_index( + "idx_answer_relevance_prompt_org", + "answer_relevance_prompt", + ["organization_id"], + ) + op.create_index( + "idx_answer_relevance_prompt_project", + "answer_relevance_prompt", + ["project_id"], + ) + op.create_index( + "idx_answer_relevance_prompt_is_active", + "answer_relevance_prompt", + ["is_active"], + ) + + +def downgrade() -> None: + op.drop_table("answer_relevance_prompt") diff --git a/backend/app/api/main.py b/backend/app/api/main.py index f3c4543..40c8d40 100644 --- a/backend/app/api/main.py +++ b/backend/app/api/main.py @@ -1,6 +1,7 @@ from fastapi import APIRouter from app.api.routes import ( + answer_relevance_prompts, ban_lists, guardrails, topic_relevance_configs, @@ -9,6 +10,7 @@ ) api_router = APIRouter() +api_router.include_router(answer_relevance_prompts.router) api_router.include_router(ban_lists.router) api_router.include_router(guardrails.router) api_router.include_router(topic_relevance_configs.router) diff --git a/backend/app/api/routes/answer_relevance_prompts.py b/backend/app/api/routes/answer_relevance_prompts.py new file mode 100644 index 0000000..e0490ac --- /dev/null +++ b/backend/app/api/routes/answer_relevance_prompts.py @@ -0,0 +1,83 @@ +from typing import Annotated +from uuid import UUID + +from fastapi import APIRouter, Query + +from app.api.deps import MultitenantAuthDep, SessionDep +from app.crud.answer_relevance_prompt import answer_relevance_prompt_crud +from app.schemas.answer_relevance_prompt import ( + AnswerRelevancePromptCreate, + AnswerRelevancePromptResponse, + AnswerRelevancePromptUpdate, +) +from app.utils import APIResponse + +router = APIRouter( + prefix="/guardrails/answer_relevance_prompts", + tags=["Answer Relevance Prompts"], +) + + +@router.post("/", response_model=APIResponse[AnswerRelevancePromptResponse]) +def create_answer_relevance_prompt( + payload: AnswerRelevancePromptCreate, + session: SessionDep, + auth: MultitenantAuthDep, +) -> APIResponse[AnswerRelevancePromptResponse]: + obj = answer_relevance_prompt_crud.create( + session, payload, auth.organization_id, auth.project_id + ) + return APIResponse.success_response(data=obj) + + +@router.get("/", response_model=APIResponse[list[AnswerRelevancePromptResponse]]) +def list_answer_relevance_prompts( + session: SessionDep, + auth: MultitenantAuthDep, + offset: Annotated[int, Query(ge=0)] = 0, + limit: Annotated[int | None, Query(ge=1, le=100)] = None, +) -> APIResponse[list[AnswerRelevancePromptResponse]]: + objs = answer_relevance_prompt_crud.list( + session, auth.organization_id, auth.project_id, offset, limit + ) + return APIResponse.success_response(data=objs) + + +@router.get("/{id}", response_model=APIResponse[AnswerRelevancePromptResponse]) +def get_answer_relevance_prompt( + id: UUID, + session: SessionDep, + auth: MultitenantAuthDep, +) -> APIResponse[AnswerRelevancePromptResponse]: + obj = answer_relevance_prompt_crud.get( + session, id, auth.organization_id, auth.project_id + ) + return APIResponse.success_response(data=obj) + + +@router.patch("/{id}", response_model=APIResponse[AnswerRelevancePromptResponse]) +def update_answer_relevance_prompt( + id: UUID, + payload: AnswerRelevancePromptUpdate, + session: SessionDep, + auth: MultitenantAuthDep, +) -> APIResponse[AnswerRelevancePromptResponse]: + obj = answer_relevance_prompt_crud.update( + session, id, auth.organization_id, auth.project_id, payload + ) + return APIResponse.success_response(data=obj) + + +@router.delete("/{id}", response_model=APIResponse[dict]) +def delete_answer_relevance_prompt( + id: UUID, + session: SessionDep, + auth: MultitenantAuthDep, +) -> APIResponse[dict]: + obj = answer_relevance_prompt_crud.get( + session, id, auth.organization_id, auth.project_id + ) + answer_relevance_prompt_crud.delete(session, obj) + return APIResponse.success_response( + data={"message": "Answer relevance prompt deleted successfully"} + ) diff --git a/backend/app/api/routes/guardrails.py b/backend/app/api/routes/guardrails.py index dd195d8..93ffc29 100644 --- a/backend/app/api/routes/guardrails.py +++ b/backend/app/api/routes/guardrails.py @@ -19,10 +19,14 @@ from app.core.validators.config.ban_list_safety_validator_config import ( BanListSafetyValidatorConfig, ) +from app.crud.answer_relevance_prompt import answer_relevance_prompt_crud from app.crud.ban_list import ban_list_crud from app.crud.topic_relevance import topic_relevance_crud from app.crud.request_log import RequestLogCrud from app.crud.validator_log import ValidatorLogCrud +from app.core.validators.config.answer_relevance_custom_llm_safety_validator_config import ( + AnswerRelevanceCustomLLMSafetyValidatorConfig, +) from app.core.validators.config.topic_relevance_safety_validator_config import ( TopicRelevanceSafetyValidatorConfig, ) @@ -126,6 +130,16 @@ def _resolve_validator_configs(payload: GuardrailRequest, session: Session) -> N validator.configuration = config.configuration validator.prompt_schema_version = config.prompt_schema_version + elif isinstance(validator, AnswerRelevanceCustomLLMSafetyValidatorConfig): + if validator.custom_prompt_id is not None: + prompt_config = answer_relevance_prompt_crud.get( + session=session, + id=validator.custom_prompt_id, + organization_id=payload.organization_id, + project_id=payload.project_id, + ) + validator.prompt_template = prompt_config.prompt_template + def _validate_with_guard( payload: GuardrailRequest, diff --git a/backend/app/core/enum.py b/backend/app/core/enum.py index ff653c5..efbe4af 100644 --- a/backend/app/core/enum.py +++ b/backend/app/core/enum.py @@ -36,3 +36,4 @@ class ValidatorType(Enum): LlamaGuard7B = "llamaguard_7b" ProfanityFree = "profanity_free" NSFWText = "nsfw_text" + AnswerRelevanceCustomLLM = "answer_relevance_custom_llm" diff --git a/backend/app/core/validators/answer_relevance_custom_llm.py b/backend/app/core/validators/answer_relevance_custom_llm.py new file mode 100644 index 0000000..4f20704 --- /dev/null +++ b/backend/app/core/validators/answer_relevance_custom_llm.py @@ -0,0 +1,85 @@ +from __future__ import annotations + +import json +from typing import Callable, Optional + +from guardrails import OnFailAction +from guardrails.validators import ( + FailResult, + PassResult, + ValidationResult, + Validator, + register_validator, +) + +DEFAULT_PROMPT_TEMPLATE = ( + "Query: {query}\n" + "Answer: {answer}\n\n" + "Does the answer fully satisfy the query and constraints?\n" + "Answer only YES or NO." +) + + +@register_validator(name="answer-relevance-custom-llm", data_type="string") +class AnswerRelevanceCustomLLM(Validator): + """ + Validates whether an LLM answer is relevant to the user query. + + Expects `value` to be a JSON string: {"query": "...", "answer": "..."}. + Uses a configurable prompt template with {query} and {answer} placeholders. + Returns PassResult for YES, FailResult for NO. + """ + + def __init__( + self, + prompt_template: str = DEFAULT_PROMPT_TEMPLATE, + llm_callable: str = "gpt-4o-mini", + on_fail: Optional[Callable] = OnFailAction.NOOP, + ): + super().__init__(on_fail=on_fail) + self.prompt_template = prompt_template + self.llm_callable = llm_callable + + def _validate(self, value: str, metadata: dict = None) -> ValidationResult: + try: + data = json.loads(value) + query = data.get("query", "") + answer = data.get("answer", "") + except (json.JSONDecodeError, TypeError): + return FailResult( + error_message="Input must be a JSON string with 'query' and 'answer' fields." + ) + + if not query.strip() or not answer.strip(): + return FailResult( + error_message="Both 'query' and 'answer' fields must be non-empty." + ) + + try: + prompt = self.prompt_template.format(query=query, answer=answer) + except KeyError as e: + return FailResult(error_message=f"Prompt template missing placeholder: {e}") + + try: + from litellm import completion + + response = completion( + model=self.llm_callable, + messages=[{"role": "user", "content": prompt}], + max_tokens=10, + ) + response_text = response.choices[0].message.content.strip().upper() + except Exception as e: + return FailResult(error_message=f"LLM call failed: {e}") + + if response_text.startswith("YES"): + return PassResult(value=value) + + if response_text.startswith("NO"): + return FailResult( + error_message="The answer is not relevant to the query.", + ) + + return FailResult( + error_message=f"Unexpected LLM response for relevance check: {response_text}" + ) diff --git a/backend/app/core/validators/config/answer_relevance_custom_llm_safety_validator_config.py b/backend/app/core/validators/config/answer_relevance_custom_llm_safety_validator_config.py new file mode 100644 index 0000000..ab1d7db --- /dev/null +++ b/backend/app/core/validators/config/answer_relevance_custom_llm_safety_validator_config.py @@ -0,0 +1,30 @@ +from typing import Literal, Optional +from uuid import UUID + +from app.core.config import settings +from app.core.validators.answer_relevance_custom_llm import AnswerRelevanceCustomLLM +from app.core.validators.config.base_validator_config import BaseValidatorConfig + + +class AnswerRelevanceCustomLLMSafetyValidatorConfig(BaseValidatorConfig): + type: Literal["answer_relevance_custom_llm"] + llm_callable: str = "gpt-4o-mini" + # Inline prompt template with {query} and {answer} placeholders. + # If None, the validator uses its built-in default. + prompt_template: Optional[str] = None + # Reference to a stored custom prompt; resolved to prompt_template before build(). + custom_prompt_id: Optional[UUID] = None + + def build(self): + if not settings.OPENAI_API_KEY: + raise ValueError( + "OPENAI_API_KEY is not configured. " + "Answer relevance validation requires an OpenAI API key." + ) + kwargs = dict( + llm_callable=self.llm_callable, + on_fail=self.resolve_on_fail(), + ) + if self.prompt_template: + kwargs["prompt_template"] = self.prompt_template + return AnswerRelevanceCustomLLM(**kwargs) diff --git a/backend/app/crud/answer_relevance_prompt.py b/backend/app/crud/answer_relevance_prompt.py new file mode 100644 index 0000000..5bce1f6 --- /dev/null +++ b/backend/app/crud/answer_relevance_prompt.py @@ -0,0 +1,126 @@ +from typing import List +from uuid import UUID + +from fastapi import HTTPException +from sqlalchemy.exc import IntegrityError +from sqlmodel import Session, select + +from app.models.config.answer_relevance_prompt import AnswerRelevancePrompt +from app.schemas.answer_relevance_prompt import ( + AnswerRelevancePromptCreate, + AnswerRelevancePromptUpdate, +) +from app.utils import now + + +class AnswerRelevancePromptCrud: + def create( + self, + session: Session, + payload: AnswerRelevancePromptCreate, + organization_id: int, + project_id: int, + ) -> AnswerRelevancePrompt: + obj = AnswerRelevancePrompt( + **payload.model_dump(), + organization_id=organization_id, + project_id=project_id, + ) + session.add(obj) + try: + session.commit() + except IntegrityError: + session.rollback() + raise HTTPException( + 400, + "Answer relevance prompt with the same configuration already exists", + ) + except Exception: + session.rollback() + raise + + session.refresh(obj) + return obj + + def get( + self, + session: Session, + id: UUID, + organization_id: int, + project_id: int, + ) -> AnswerRelevancePrompt: + query = select(AnswerRelevancePrompt).where( + AnswerRelevancePrompt.id == id, + AnswerRelevancePrompt.organization_id == organization_id, + AnswerRelevancePrompt.project_id == project_id, + ) + obj = session.exec(query).first() + if not obj: + raise HTTPException(404, "Answer relevance prompt not found") + return obj + + def list( + self, + session: Session, + organization_id: int, + project_id: int, + offset: int = 0, + limit: int | None = None, + ) -> List[AnswerRelevancePrompt]: + query = ( + select(AnswerRelevancePrompt) + .where( + AnswerRelevancePrompt.organization_id == organization_id, + AnswerRelevancePrompt.project_id == project_id, + ) + .order_by(AnswerRelevancePrompt.created_at, AnswerRelevancePrompt.id) + ) + + if offset: + query = query.offset(offset) + if limit: + query = query.limit(limit) + + return list(session.exec(query).all()) + + def update( + self, + session: Session, + id: UUID, + organization_id: int, + project_id: int, + payload: AnswerRelevancePromptUpdate, + ) -> AnswerRelevancePrompt: + obj = self.get(session, id, organization_id, project_id) + + update_data = payload.model_dump(exclude_unset=True) + for key, value in update_data.items(): + setattr(obj, key, value) + + obj.updated_at = now() + session.add(obj) + try: + session.commit() + except IntegrityError: + session.rollback() + raise HTTPException( + 400, + "Answer relevance prompt with the same configuration already exists", + ) + except Exception: + session.rollback() + raise + + session.refresh(obj) + return obj + + def delete(self, session: Session, obj: AnswerRelevancePrompt) -> None: + session.delete(obj) + try: + session.commit() + except Exception: + session.rollback() + raise + + +answer_relevance_prompt_crud = AnswerRelevancePromptCrud() diff --git a/backend/app/models/config/answer_relevance_prompt.py b/backend/app/models/config/answer_relevance_prompt.py new file mode 100644 index 0000000..0b8281f --- /dev/null +++ b/backend/app/models/config/answer_relevance_prompt.py @@ -0,0 +1,68 @@ +from datetime import datetime +from uuid import UUID, uuid4 + +from sqlmodel import Field, SQLModel + +from app.utils import now + + +class AnswerRelevancePrompt(SQLModel, table=True): + __tablename__ = "answer_relevance_prompt" + + id: UUID = Field( + default_factory=uuid4, + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the prompt config"}, + ) + + organization_id: int = Field( + nullable=False, + index=True, + sa_column_kwargs={"comment": "Identifier for the organization"}, + ) + + project_id: int = Field( + nullable=False, + index=True, + sa_column_kwargs={"comment": "Identifier for the project"}, + ) + + name: str = Field( + nullable=False, + sa_column_kwargs={"comment": "Human-readable name for this prompt config"}, + ) + + description: str = Field( + nullable=False, + sa_column_kwargs={"comment": "Description of what this prompt evaluates"}, + ) + + # Must contain {query} and {answer} placeholders. + prompt_template: str = Field( + nullable=False, + sa_column_kwargs={ + "comment": "Prompt template with {query} and {answer} placeholders" + }, + ) + + is_active: bool = Field( + default=True, + index=True, + nullable=False, + sa_column_kwargs={"comment": "Whether this prompt config is active"}, + ) + + created_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the entry was created"}, + ) + + updated_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={ + "comment": "Timestamp when the entry was last updated", + "onupdate": now, + }, + ) diff --git a/backend/app/schemas/answer_relevance_prompt.py b/backend/app/schemas/answer_relevance_prompt.py new file mode 100644 index 0000000..09d2b59 --- /dev/null +++ b/backend/app/schemas/answer_relevance_prompt.py @@ -0,0 +1,71 @@ +from datetime import datetime +from typing import Annotated, Optional +from uuid import UUID + +from pydantic import StringConstraints, field_validator +from sqlmodel import Field, SQLModel + +MAX_NAME_LENGTH = 100 +MAX_DESCRIPTION_LENGTH = 500 + +PromptName = Annotated[ + str, + StringConstraints(strip_whitespace=True, min_length=1, max_length=MAX_NAME_LENGTH), +] + +PromptDescription = Annotated[ + str, + StringConstraints( + strip_whitespace=True, min_length=1, max_length=MAX_DESCRIPTION_LENGTH + ), +] + +PromptTemplate = Annotated[ + str, + StringConstraints(strip_whitespace=True, min_length=1), +] + + +def _validate_placeholders(value: str) -> str: + missing = [p for p in ("{query}", "{answer}") if p not in value] + if missing: + raise ValueError( + f"prompt_template must contain the placeholders: {', '.join(missing)}" + ) + return value + + +class AnswerRelevancePromptBase(SQLModel): + name: PromptName + description: PromptDescription + prompt_template: PromptTemplate + + @field_validator("prompt_template") + @classmethod + def check_placeholders(cls, v: str) -> str: + return _validate_placeholders(v) + + +class AnswerRelevancePromptCreate(AnswerRelevancePromptBase): + pass + + +class AnswerRelevancePromptUpdate(SQLModel): + name: Optional[PromptName] = None + description: Optional[PromptDescription] = None + prompt_template: Optional[PromptTemplate] = None + is_active: Optional[bool] = None + + @field_validator("prompt_template") + @classmethod + def check_placeholders(cls, v: Optional[str]) -> Optional[str]: + if v is not None: + return _validate_placeholders(v) + return v + + +class AnswerRelevancePromptResponse(AnswerRelevancePromptBase): + id: UUID + is_active: bool + created_at: datetime + updated_at: datetime diff --git a/backend/app/schemas/guardrail_config.py b/backend/app/schemas/guardrail_config.py index 968c260..84b6446 100644 --- a/backend/app/schemas/guardrail_config.py +++ b/backend/app/schemas/guardrail_config.py @@ -33,9 +33,13 @@ from app.core.validators.config.profanity_free_safety_validator_config import ( ProfanityFreeSafetyValidatorConfig, ) +from app.core.validators.config.answer_relevance_custom_llm_safety_validator_config import ( + AnswerRelevanceCustomLLMSafetyValidatorConfig, +) ValidatorConfigItem = Annotated[ Union[ + AnswerRelevanceCustomLLMSafetyValidatorConfig, BanListSafetyValidatorConfig, GenderAssumptionBiasSafetyValidatorConfig, LexicalSlurSafetyValidatorConfig, From 474245b146e7893582e91c99763887de74562648 Mon Sep 17 00:00:00 2001 From: rkritika1508 Date: Fri, 8 May 2026 12:41:11 +0530 Subject: [PATCH 2/3] Added documentation --- backend/app/api/API_USAGE.md | 94 +++++++++++++++++-- .../answer_relevance_prompts/create_prompt.md | 43 +++++++++ .../answer_relevance_prompts/delete_prompt.md | 10 ++ .../answer_relevance_prompts/get_prompt.md | 9 ++ .../answer_relevance_prompts/list_prompts.md | 12 +++ .../answer_relevance_prompts/update_prompt.md | 12 +++ .../app/api/docs/guardrails/run_guardrails.md | 1 + .../api/routes/answer_relevance_prompts.py | 32 +++++-- backend/app/core/validators/README.md | 54 ++++++++++- 9 files changed, 254 insertions(+), 13 deletions(-) create mode 100644 backend/app/api/docs/answer_relevance_prompts/create_prompt.md create mode 100644 backend/app/api/docs/answer_relevance_prompts/delete_prompt.md create mode 100644 backend/app/api/docs/answer_relevance_prompts/get_prompt.md create mode 100644 backend/app/api/docs/answer_relevance_prompts/list_prompts.md create mode 100644 backend/app/api/docs/answer_relevance_prompts/update_prompt.md diff --git a/backend/app/api/API_USAGE.md b/backend/app/api/API_USAGE.md index 55392b0..19982a9 100644 --- a/backend/app/api/API_USAGE.md +++ b/backend/app/api/API_USAGE.md @@ -7,6 +7,7 @@ This guide explains how to use the current API surface for: - Guardrail execution - Ban list CRUD for multi-tenant projects - Topic relevance config CRUD for multi-tenant projects +- Answer relevance prompt config CRUD for multi-tenant projects ## Base URL and Version @@ -184,8 +185,8 @@ Request fields: Important: - Runtime validators use `on_fail`. - If you pass objects from config APIs, server normalization supports `on_fail_action` and strips non-runtime fields. -- For `topic_relevance`, pass `topic_relevance_config_id` only. -- The API resolves `configuration` + `prompt_schema_version` in `guardrails.py` before validator execution, so the validator always executes with both values. +- For `topic_relevance`, pass `topic_relevance_config_id` only. The API resolves `configuration` + `prompt_schema_version` in `guardrails.py` before validator execution. +- For `answer_relevance_custom_llm`, `input` must be a JSON string `{"query": "...", "answer": "..."}`. Pass `custom_prompt_id` to use a stored tenant prompt, or omit to use the built-in default prompt. Example: @@ -421,7 +422,84 @@ curl -X DELETE "http://localhost:8001/api/v1/guardrails/topic_relevance_configs/ -H "X-API-KEY: " ``` -## 7) End-to-End Usage Pattern +## 7) Answer Relevance Prompt APIs (multi-tenant) + +These endpoints manage tenant-scoped custom prompt templates for the `answer_relevance_custom_llm` validator and use `X-API-KEY` auth. + +Base path: +- `/api/v1/guardrails/answer_relevance_prompts` + +## 7.1 Create answer relevance prompt + +Endpoint: +- `POST /api/v1/guardrails/answer_relevance_prompts/` + +Example: + +```bash +curl -X POST "http://localhost:8001/api/v1/guardrails/answer_relevance_prompts/" \ + -H "X-API-KEY: " \ + -H "Content-Type: application/json" \ + -d '{ + "name": "Maternal Health Relevance", + "description": "Checks if LLM answer addresses a maternal health query", + "prompt_template": "You are evaluating a maternal health assistant.\nQuery: {query}\nAnswer: {answer}\n\nDoes the answer directly address the maternal health query with accurate information?\nAnswer only YES or NO." + }' +``` + +## 7.2 List answer relevance prompts + +Endpoint: +- `GET /api/v1/guardrails/answer_relevance_prompts/?offset=0&limit=20` + +Example: + +```bash +curl -X GET "http://localhost:8001/api/v1/guardrails/answer_relevance_prompts/?offset=0&limit=20" \ + -H "X-API-KEY: " +``` + +## 7.3 Get answer relevance prompt by id + +Endpoint: +- `GET /api/v1/guardrails/answer_relevance_prompts/{id}` + +Example: + +```bash +curl -X GET "http://localhost:8001/api/v1/guardrails/answer_relevance_prompts/" \ + -H "X-API-KEY: " +``` + +## 7.4 Update answer relevance prompt + +Endpoint: +- `PATCH /api/v1/guardrails/answer_relevance_prompts/{id}` + +Example: + +```bash +curl -X PATCH "http://localhost:8001/api/v1/guardrails/answer_relevance_prompts/" \ + -H "X-API-KEY: " \ + -H "Content-Type: application/json" \ + -d '{ + "prompt_template": "Query: {query}\nAnswer: {answer}\n\nIs this answer helpful and relevant?\nAnswer only YES or NO." + }' +``` + +## 7.5 Delete answer relevance prompt + +Endpoint: +- `DELETE /api/v1/guardrails/answer_relevance_prompts/{id}` + +Example: + +```bash +curl -X DELETE "http://localhost:8001/api/v1/guardrails/answer_relevance_prompts/" \ + -H "X-API-KEY: " +``` + +## 8) End-to-End Usage Pattern Recommended request flow: 1. Create/update validator configs via `/guardrails/validators/configs`. @@ -431,15 +509,16 @@ Recommended request flow: 5. If `rephrase_needed=true`, ask user to rephrase. 6. For `ban_list` validators without inline `banned_words`, create/manage a ban list first and pass `ban_list_id`. 7. For `topic_relevance`, create/manage a topic relevance config and pass `topic_relevance_config_id` at runtime. The server resolves the configuration string internally. +8. For `answer_relevance_custom_llm`, format `input` as `{"query": "...", "answer": "..."}`. Optionally create a custom prompt via the Answer Relevance Prompt APIs and pass `custom_prompt_id`. If no `custom_prompt_id` is given, the built-in default prompt is used. -## 8) Common Errors +## 9) Common Errors - `401 Missing Authorization header` - Add `Authorization: Bearer `. - `401 Invalid authorization token` - Verify plaintext token matches server-side hash. - `401 Missing X-API-KEY header` - - Add `X-API-KEY: ` for ban list and topic relevance config endpoints. + - Add `X-API-KEY: ` for ban list, topic relevance config, and answer relevance prompt endpoints. - `401 Invalid API key` - Verify the API key is valid in the upstream Kaapi auth service. - `Invalid request_id` @@ -450,8 +529,10 @@ Recommended request flow: - Confirm `id`, `organization_id`, and `project_id` match. - `Topic relevance preset not found` - Confirm topic relevance config `id` exists within your tenant scope. +- `Answer relevance prompt not found` + - Confirm the answer relevance prompt `id` exists within your tenant scope. -## 9) Current Validator Types +## 10) Current Validator Types From `validators.json`: - `uli_slur_match` @@ -463,6 +544,7 @@ From `validators.json`: - `llamaguard_7b` - `profanity_free` - `nsfw_text` +- `answer_relevance_custom_llm` Source of truth: - `backend/app/core/validators/validators.json` diff --git a/backend/app/api/docs/answer_relevance_prompts/create_prompt.md b/backend/app/api/docs/answer_relevance_prompts/create_prompt.md new file mode 100644 index 0000000..2816024 --- /dev/null +++ b/backend/app/api/docs/answer_relevance_prompts/create_prompt.md @@ -0,0 +1,43 @@ +Creates an answer relevance prompt config for the tenant resolved from `X-API-KEY`. + +Behavior notes: +- Stores a custom prompt template used by the `answer_relevance_custom_llm` validator to evaluate whether an LLM answer is relevant to a user query. +- Tenant scope is enforced from the API key context. +- `prompt_template` must contain both `{query}` and `{answer}` placeholders; the server rejects templates missing either. + +Common failure cases: +- Missing or invalid API key. +- Payload schema validation errors. +- `prompt_template` is missing `{query}` or `{answer}` placeholder. + +## Field glossary + +**`prompt_template`** +A string with `{query}` and `{answer}` placeholders. At validation time, the guardrail substitutes the user's query and the LLM's answer, then asks the model to respond `YES` (relevant) or `NO` (not relevant). + +Default template used when no custom prompt is configured: +``` +Query: {query} +Answer: {answer} + +Does the answer fully satisfy the query and constraints? +Answer only YES or NO. +``` + +NGOs can customise this to add domain-specific constraints, language preferences, or stricter relevance criteria for their use case. + +Example custom template: +``` +You are evaluating a maternal health assistant. +Query: {query} +Answer: {answer} + +Does the answer directly address the maternal health query with accurate information? +Answer only YES or NO. +``` + +**`name`** +Human-readable label for this prompt config (max 100 characters). + +**`description`** +What this prompt evaluates (max 500 characters). diff --git a/backend/app/api/docs/answer_relevance_prompts/delete_prompt.md b/backend/app/api/docs/answer_relevance_prompts/delete_prompt.md new file mode 100644 index 0000000..8d337a4 --- /dev/null +++ b/backend/app/api/docs/answer_relevance_prompts/delete_prompt.md @@ -0,0 +1,10 @@ +Deletes an answer relevance prompt config by id for the tenant resolved from `X-API-KEY`. + +Behavior notes: +- Tenant scope is enforced from the API key context. +- Deletion is permanent; any guardrail configs referencing this `custom_prompt_id` will fail to resolve at runtime after deletion. + +Common failure cases: +- Missing or invalid API key. +- Prompt config not found in tenant's scope. +- Invalid id format. diff --git a/backend/app/api/docs/answer_relevance_prompts/get_prompt.md b/backend/app/api/docs/answer_relevance_prompts/get_prompt.md new file mode 100644 index 0000000..d0f9b90 --- /dev/null +++ b/backend/app/api/docs/answer_relevance_prompts/get_prompt.md @@ -0,0 +1,9 @@ +Fetches a single answer relevance prompt config by id for the tenant resolved from `X-API-KEY`. + +Behavior notes: +- Tenant scope is enforced: only configs belonging to the resolved `organization_id` and `project_id` are accessible. + +Common failure cases: +- Missing or invalid API key. +- Prompt config not found in tenant's scope. +- Invalid id format. diff --git a/backend/app/api/docs/answer_relevance_prompts/list_prompts.md b/backend/app/api/docs/answer_relevance_prompts/list_prompts.md new file mode 100644 index 0000000..5c0d8d4 --- /dev/null +++ b/backend/app/api/docs/answer_relevance_prompts/list_prompts.md @@ -0,0 +1,12 @@ +Lists answer relevance prompt configs for the tenant resolved from `X-API-KEY`. + +Behavior notes: +- Returns all prompt configs scoped to the tenant's `organization_id` and `project_id`. +- Supports pagination via `offset` and `limit`. +- `offset` defaults to `0`. +- `limit` is optional; when omitted, no limit is applied. +- Results are ordered by `created_at` ascending, then `id`. + +Common failure cases: +- Missing or invalid API key. +- Invalid pagination values. diff --git a/backend/app/api/docs/answer_relevance_prompts/update_prompt.md b/backend/app/api/docs/answer_relevance_prompts/update_prompt.md new file mode 100644 index 0000000..8cfe214 --- /dev/null +++ b/backend/app/api/docs/answer_relevance_prompts/update_prompt.md @@ -0,0 +1,12 @@ +Partially updates an answer relevance prompt config by id for the tenant resolved from `X-API-KEY`. + +Behavior notes: +- Supports patch-style updates; omitted fields remain unchanged. +- Tenant scope is enforced from the API key context. +- If `prompt_template` is updated, it must still contain both `{query}` and `{answer}` placeholders. + +Common failure cases: +- Missing or invalid API key. +- Prompt config not found in tenant's scope. +- Payload schema validation errors. +- Updated `prompt_template` is missing `{query}` or `{answer}` placeholder. diff --git a/backend/app/api/docs/guardrails/run_guardrails.md b/backend/app/api/docs/guardrails/run_guardrails.md index d0f1c7f..4b7eb27 100644 --- a/backend/app/api/docs/guardrails/run_guardrails.md +++ b/backend/app/api/docs/guardrails/run_guardrails.md @@ -8,6 +8,7 @@ Behavior notes: - For `ban_list`, `ban_list_id` can be resolved to `banned_words` from tenant ban list configs. - For `topic_relevance`, `topic_relevance_config_id` is required and is resolved to `configuration` + `prompt_schema_version` from tenant topic relevance configs. Requires `OPENAI_API_KEY` to be configured; returns a validation failure with an explicit error if missing. - For `llm_critic`, `OPENAI_API_KEY` must be configured; returns `success=false` with an explicit error if missing. +- For `answer_relevance_custom_llm`, `input` must be a JSON string `{"query": "...", "answer": "..."}`. Pass `custom_prompt_id` to use a tenant-stored prompt template, or `prompt_template` inline. Requires `OPENAI_API_KEY`. - For `llamaguard_7b`, `policies` accepts human-readable policy names (see table below). If omitted, all policies are enforced by default. | `policies` value | Policy enforced | diff --git a/backend/app/api/routes/answer_relevance_prompts.py b/backend/app/api/routes/answer_relevance_prompts.py index e0490ac..5b51cdd 100644 --- a/backend/app/api/routes/answer_relevance_prompts.py +++ b/backend/app/api/routes/answer_relevance_prompts.py @@ -10,7 +10,7 @@ AnswerRelevancePromptResponse, AnswerRelevancePromptUpdate, ) -from app.utils import APIResponse +from app.utils import APIResponse, load_description router = APIRouter( prefix="/guardrails/answer_relevance_prompts", @@ -18,7 +18,11 @@ ) -@router.post("/", response_model=APIResponse[AnswerRelevancePromptResponse]) +@router.post( + "/", + description=load_description("answer_relevance_prompts/create_prompt.md"), + response_model=APIResponse[AnswerRelevancePromptResponse], +) def create_answer_relevance_prompt( payload: AnswerRelevancePromptCreate, session: SessionDep, @@ -30,7 +34,11 @@ def create_answer_relevance_prompt( return APIResponse.success_response(data=obj) -@router.get("/", response_model=APIResponse[list[AnswerRelevancePromptResponse]]) +@router.get( + "/", + description=load_description("answer_relevance_prompts/list_prompts.md"), + response_model=APIResponse[list[AnswerRelevancePromptResponse]], +) def list_answer_relevance_prompts( session: SessionDep, auth: MultitenantAuthDep, @@ -43,7 +51,11 @@ def list_answer_relevance_prompts( return APIResponse.success_response(data=objs) -@router.get("/{id}", response_model=APIResponse[AnswerRelevancePromptResponse]) +@router.get( + "/{id}", + description=load_description("answer_relevance_prompts/get_prompt.md"), + response_model=APIResponse[AnswerRelevancePromptResponse], +) def get_answer_relevance_prompt( id: UUID, session: SessionDep, @@ -55,7 +67,11 @@ def get_answer_relevance_prompt( return APIResponse.success_response(data=obj) -@router.patch("/{id}", response_model=APIResponse[AnswerRelevancePromptResponse]) +@router.patch( + "/{id}", + description=load_description("answer_relevance_prompts/update_prompt.md"), + response_model=APIResponse[AnswerRelevancePromptResponse], +) def update_answer_relevance_prompt( id: UUID, payload: AnswerRelevancePromptUpdate, @@ -68,7 +84,11 @@ def update_answer_relevance_prompt( return APIResponse.success_response(data=obj) -@router.delete("/{id}", response_model=APIResponse[dict]) +@router.delete( + "/{id}", + description=load_description("answer_relevance_prompts/delete_prompt.md"), + response_model=APIResponse[dict], +) def delete_answer_relevance_prompt( id: UUID, session: SessionDep, diff --git a/backend/app/core/validators/README.md b/backend/app/core/validators/README.md index f843d8e..9210aea 100644 --- a/backend/app/core/validators/README.md +++ b/backend/app/core/validators/README.md @@ -15,6 +15,7 @@ Current validator manifest: - `llamaguard_7b` (source: `hub://guardrails/llamaguard_7b`) - `profanity_free` (source: `hub://guardrails/profanity_free`) - `nsfw_text` (source: `hub://guardrails/nsfw_text`) +- `answer_relevance_custom_llm` (source: `local`) ## Configuration Model @@ -483,6 +484,54 @@ Notes / limitations: - No programmatic fix is applied — with `on_fail=fix`, `safe_text` will be `""` and the response `metadata.reason` will identify this validator as the cause. - English-focused; cross-lingual profanity may not be detected. +### 10) Answer Relevance Custom LLM Validator (`answer_relevance_custom_llm`) + +Code: + +- Config: `backend/app/core/validators/config/answer_relevance_custom_llm_safety_validator_config.py` +- Runtime validator: `backend/app/core/validators/answer_relevance_custom_llm.py` + +What it does: + +- Evaluates whether an LLM's answer is relevant to the user's query by asking a configurable LLM to respond YES or NO. +- Accepts `input` as a JSON string `{"query": "...", "answer": "..."}`. +- Uses a customizable prompt template with `{query}` and `{answer}` placeholders; falls back to a built-in default prompt if none is provided. +- Supports per-tenant custom prompts stored via the Answer Relevance Prompt APIs and referenced by `custom_prompt_id`. + +Why this is used: + +- Detects hallucinated or off-topic LLM responses before they are shown to users. +- Each NGO can tune the relevance criteria via a custom prompt without code changes (e.g. stricter domain constraints, language-specific phrasing). + +Recommendation: + +- primarily `output` + - Why `output`: answer relevance is a property of the LLM's generated response relative to the user's query. + +Parameters / customization: + +- `llm_callable: str` (default: `gpt-4o-mini`) — model identifier passed to LiteLLM for the YES/NO evaluation +- `prompt_template: str` (optional) — inline prompt with `{query}` and `{answer}` placeholders +- `custom_prompt_id: UUID` (optional) — reference to a tenant-stored prompt config; resolved to `prompt_template` before execution +- `on_fail` + +Default prompt: +``` +Query: {query} +Answer: {answer} + +Does the answer fully satisfy the query and constraints? +Answer only YES or NO. +``` + +Notes / limitations: + +- **Requires `OPENAI_API_KEY` to be set in environment variables.** +- `input` to the guardrail endpoint must be a JSON string: `{"query": "...", "answer": "..."}`. Both fields must be non-empty. +- LLM-judge responses can vary; YES/NO parsing uses prefix matching. +- `on_fail=fix` has no programmatic fix for irrelevant answers — `safe_text` will be `""` and `metadata.reason` will identify this validator. +- If `custom_prompt_id` is deleted after being referenced, the guardrail will return a 404 at resolution time. + ## Example Config Payloads Example: create validator config (stored shape) @@ -514,7 +563,7 @@ Example: runtime guardrail validator object (execution shape) Default stage strategy: - Input guardrails: `pii_remover`, `uli_slur_match`, `ban_list`, `topic_relevance` (when scope enforcement is needed), `profanity_free`, `llamaguard_7b` -- Output guardrails: `pii_remover`, `uli_slur_match`, `gender_assumption_bias`, `ban_list`, `profanity_free`, `llamaguard_7b` +- Output guardrails: `pii_remover`, `uli_slur_match`, `gender_assumption_bias`, `ban_list`, `profanity_free`, `llamaguard_7b`, `answer_relevance_custom_llm` (when answer quality must be verified) Tuning strategy: @@ -534,5 +583,8 @@ Tuning strategy: - `backend/app/core/validators/config/llamaguard_7b_safety_validator_config.py` - `backend/app/core/validators/config/nsfw_text_safety_validator_config.py` - `backend/app/core/validators/config/profanity_free_safety_validator_config.py` +- `backend/app/core/validators/config/answer_relevance_custom_llm_safety_validator_config.py` +- `backend/app/core/validators/answer_relevance_custom_llm.py` +- `backend/app/models/config/answer_relevance_prompt.py` - `backend/app/schemas/guardrail_config.py` - `backend/app/schemas/validator_config.py` From 64e40aa9bbfbf29f0e0fc21c4e23cdea27a8bbc5 Mon Sep 17 00:00:00 2001 From: rkritika1508 Date: Fri, 8 May 2026 12:52:23 +0530 Subject: [PATCH 3/3] added tests --- .../validators/answer_relevance_custom_llm.py | 3 +- .../test_answer_relevance_prompts_api.py | 165 ++++++++ ...nswer_relevance_prompts_api_integration.py | 360 ++++++++++++++++++ backend/app/tests/test_llm_validators.py | 95 +++++ backend/app/tests/test_validate_with_guard.py | 78 ++++ .../test_answer_relevance_custom_llm.py | 237 ++++++++++++ 6 files changed, 936 insertions(+), 2 deletions(-) create mode 100644 backend/app/tests/test_answer_relevance_prompts_api.py create mode 100644 backend/app/tests/test_answer_relevance_prompts_api_integration.py create mode 100644 backend/app/tests/validators/test_answer_relevance_custom_llm.py diff --git a/backend/app/core/validators/answer_relevance_custom_llm.py b/backend/app/core/validators/answer_relevance_custom_llm.py index 4f20704..305de28 100644 --- a/backend/app/core/validators/answer_relevance_custom_llm.py +++ b/backend/app/core/validators/answer_relevance_custom_llm.py @@ -3,6 +3,7 @@ import json from typing import Callable, Optional +from litellm import completion from guardrails import OnFailAction from guardrails.validators import ( FailResult, @@ -61,8 +62,6 @@ def _validate(self, value: str, metadata: dict = None) -> ValidationResult: return FailResult(error_message=f"Prompt template missing placeholder: {e}") try: - from litellm import completion - response = completion( model=self.llm_callable, messages=[{"role": "user", "content": prompt}], diff --git a/backend/app/tests/test_answer_relevance_prompts_api.py b/backend/app/tests/test_answer_relevance_prompts_api.py new file mode 100644 index 0000000..e21bea9 --- /dev/null +++ b/backend/app/tests/test_answer_relevance_prompts_api.py @@ -0,0 +1,165 @@ +from unittest.mock import MagicMock, patch +from uuid import UUID + +import pytest +from sqlmodel import Session + +from app.api.deps import TenantContext +from app.api.routes.answer_relevance_prompts import ( + create_answer_relevance_prompt, + delete_answer_relevance_prompt, + get_answer_relevance_prompt, + list_answer_relevance_prompts, + update_answer_relevance_prompt, +) +from app.schemas.answer_relevance_prompt import ( + AnswerRelevancePromptCreate, + AnswerRelevancePromptUpdate, +) + +PROMPT_TEST_ID = UUID("aaaabbbb-cccc-dddd-eeee-ffffffffffff") +PROMPT_TEST_ORG_ID = 5 +PROMPT_TEST_PROJECT_ID = 50 +VALID_TEMPLATE = "Query: {query}\nAnswer: {answer}\nRelevant? YES or NO." + + +@pytest.fixture +def mock_session(): + return MagicMock(spec=Session) + + +@pytest.fixture +def sample_prompt(): + obj = MagicMock() + obj.id = PROMPT_TEST_ID + obj.name = "Health Relevance" + obj.description = "Checks answer relevance for health queries" + obj.prompt_template = VALID_TEMPLATE + obj.is_active = True + obj.organization_id = PROMPT_TEST_ORG_ID + obj.project_id = PROMPT_TEST_PROJECT_ID + return obj + + +@pytest.fixture +def create_payload(): + return AnswerRelevancePromptCreate( + name="Health Relevance", + description="Checks answer relevance for health queries", + prompt_template=VALID_TEMPLATE, + ) + + +@pytest.fixture +def auth_context(): + return TenantContext( + organization_id=PROMPT_TEST_ORG_ID, + project_id=PROMPT_TEST_PROJECT_ID, + ) + + +def test_create_calls_crud(mock_session, create_payload, sample_prompt, auth_context): + with patch( + "app.api.routes.answer_relevance_prompts.answer_relevance_prompt_crud" + ) as crud: + crud.create.return_value = sample_prompt + + result = create_answer_relevance_prompt( + payload=create_payload, + session=mock_session, + auth=auth_context, + ) + + crud.create.assert_called_once_with( + mock_session, + create_payload, + PROMPT_TEST_ORG_ID, + PROMPT_TEST_PROJECT_ID, + ) + assert result.data == sample_prompt + + +def test_list_returns_data(mock_session, sample_prompt, auth_context): + with patch( + "app.api.routes.answer_relevance_prompts.answer_relevance_prompt_crud" + ) as crud: + crud.list.return_value = [sample_prompt] + + result = list_answer_relevance_prompts( + session=mock_session, + auth=auth_context, + ) + + crud.list.assert_called_once_with( + mock_session, + PROMPT_TEST_ORG_ID, + PROMPT_TEST_PROJECT_ID, + 0, + None, + ) + assert len(result.data) == 1 + + +def test_get_success(mock_session, sample_prompt, auth_context): + with patch( + "app.api.routes.answer_relevance_prompts.answer_relevance_prompt_crud" + ) as crud: + crud.get.return_value = sample_prompt + + result = get_answer_relevance_prompt( + id=PROMPT_TEST_ID, + session=mock_session, + auth=auth_context, + ) + + crud.get.assert_called_once_with( + mock_session, + PROMPT_TEST_ID, + PROMPT_TEST_ORG_ID, + PROMPT_TEST_PROJECT_ID, + ) + assert result.data == sample_prompt + + +def test_update_success(mock_session, sample_prompt, auth_context): + with patch( + "app.api.routes.answer_relevance_prompts.answer_relevance_prompt_crud" + ) as crud: + crud.update.return_value = sample_prompt + + result = update_answer_relevance_prompt( + id=PROMPT_TEST_ID, + payload=AnswerRelevancePromptUpdate(name="updated"), + session=mock_session, + auth=auth_context, + ) + + crud.update.assert_called_once() + args, _ = crud.update.call_args + assert args[0] == mock_session + assert args[1] == PROMPT_TEST_ID + assert args[2] == PROMPT_TEST_ORG_ID + assert args[3] == PROMPT_TEST_PROJECT_ID + assert result.data == sample_prompt + + +def test_delete_success(mock_session, sample_prompt, auth_context): + with patch( + "app.api.routes.answer_relevance_prompts.answer_relevance_prompt_crud" + ) as crud: + crud.get.return_value = sample_prompt + + result = delete_answer_relevance_prompt( + id=PROMPT_TEST_ID, + session=mock_session, + auth=auth_context, + ) + + crud.get.assert_called_once_with( + mock_session, + PROMPT_TEST_ID, + PROMPT_TEST_ORG_ID, + PROMPT_TEST_PROJECT_ID, + ) + crud.delete.assert_called_once_with(mock_session, sample_prompt) + assert result.success is True diff --git a/backend/app/tests/test_answer_relevance_prompts_api_integration.py b/backend/app/tests/test_answer_relevance_prompts_api_integration.py new file mode 100644 index 0000000..14d5ccf --- /dev/null +++ b/backend/app/tests/test_answer_relevance_prompts_api_integration.py @@ -0,0 +1,360 @@ +import uuid + +import pytest + +from app.schemas.answer_relevance_prompt import MAX_DESCRIPTION_LENGTH, MAX_NAME_LENGTH + +pytestmark = pytest.mark.integration + +BASE_URL = "/api/v1/guardrails/answer_relevance_prompts/" +DEFAULT_API_KEY = "org1_project1" +ALT_API_KEY = "org999_project999" + +VALID_TEMPLATE = "Query: {query}\nAnswer: {answer}\nIs the answer relevant? YES or NO." +CUSTOM_TEMPLATE = ( + "You are evaluating a health assistant.\n" + "Query: {query}\n" + "Answer: {answer}\n" + "Does the answer address the health query? YES or NO." +) + + +class BaseAnswerRelevancePromptTest: + def _headers(self, api_key=DEFAULT_API_KEY): + return {"X-API-Key": api_key} + + def create(self, client, api_key=DEFAULT_API_KEY, **overrides): + payload = { + "name": "Health Relevance", + "description": "Checks LLM answer relevance for health queries", + "prompt_template": VALID_TEMPLATE, + **overrides, + } + return client.post(BASE_URL, json=payload, headers=self._headers(api_key)) + + def list(self, client, api_key=DEFAULT_API_KEY, **filters): + return client.get(BASE_URL, params=filters, headers=self._headers(api_key)) + + def get(self, client, id, api_key=DEFAULT_API_KEY): + return client.get(f"{BASE_URL}{id}", headers=self._headers(api_key)) + + def update(self, client, id, payload, api_key=DEFAULT_API_KEY): + return client.patch( + f"{BASE_URL}{id}", + json=payload, + headers=self._headers(api_key), + ) + + def delete(self, client, id, api_key=DEFAULT_API_KEY): + return client.delete(f"{BASE_URL}{id}", headers=self._headers(api_key)) + + +class TestCreateAnswerRelevancePrompt(BaseAnswerRelevancePromptTest): + def test_create_success(self, integration_client, clear_database): + response = self.create(integration_client) + + assert response.status_code == 200 + data = response.json()["data"] + assert data["name"] == "Health Relevance" + assert "{query}" in data["prompt_template"] + assert "{answer}" in data["prompt_template"] + assert data["is_active"] is True + assert "id" in data + assert "created_at" in data + assert "updated_at" in data + + def test_create_with_custom_template(self, integration_client, clear_database): + response = self.create( + integration_client, + name="Custom Health Prompt", + prompt_template=CUSTOM_TEMPLATE, + ) + + assert response.status_code == 200 + data = response.json()["data"] + assert "health assistant" in data["prompt_template"] + + def test_create_validation_error_missing_required_fields( + self, integration_client, clear_database + ): + response = integration_client.post( + BASE_URL, + json={"name": "incomplete"}, + headers=self._headers(), + ) + + assert response.status_code == 422 + + def test_create_validation_error_template_missing_query_placeholder( + self, integration_client, clear_database + ): + response = self.create( + integration_client, + prompt_template="Answer: {answer}\nRelevant? YES or NO.", + ) + + assert response.status_code == 422 + + def test_create_validation_error_template_missing_answer_placeholder( + self, integration_client, clear_database + ): + response = self.create( + integration_client, + prompt_template="Query: {query}\nRelevant? YES or NO.", + ) + + assert response.status_code == 422 + + def test_create_validation_error_template_missing_both_placeholders( + self, integration_client, clear_database + ): + response = self.create( + integration_client, + prompt_template="Is this relevant? YES or NO.", + ) + + assert response.status_code == 422 + + def test_create_validation_error_name_too_long( + self, integration_client, clear_database + ): + response = self.create( + integration_client, + name="n" * (MAX_NAME_LENGTH + 1), + ) + + assert response.status_code == 422 + + def test_create_validation_error_description_too_long( + self, integration_client, clear_database + ): + response = self.create( + integration_client, + description="d" * (MAX_DESCRIPTION_LENGTH + 1), + ) + + assert response.status_code == 422 + + def test_create_validation_error_empty_name( + self, integration_client, clear_database + ): + response = self.create(integration_client, name="") + + assert response.status_code == 422 + + +class TestListAnswerRelevancePrompts(BaseAnswerRelevancePromptTest): + def test_list_success(self, integration_client, clear_database): + assert self.create(integration_client, name="Prompt 1").status_code == 200 + assert self.create(integration_client, name="Prompt 2").status_code == 200 + assert self.create(integration_client, name="Prompt 3").status_code == 200 + + response = self.list(integration_client) + + assert response.status_code == 200 + data = response.json()["data"] + assert len(data) == 3 + + def test_list_empty(self, integration_client, clear_database): + response = self.list(integration_client) + + assert response.status_code == 200 + assert response.json()["data"] == [] + + def test_list_pagination_with_limit(self, integration_client, clear_database): + for i in range(4): + self.create(integration_client, name=f"Prompt {i}") + + response = self.list(integration_client, limit=2) + + assert response.status_code == 200 + assert len(response.json()["data"]) == 2 + + def test_list_pagination_with_offset_and_limit( + self, integration_client, clear_database + ): + for i in range(4): + self.create(integration_client, name=f"Prompt {i}") + + full_data = self.list(integration_client).json()["data"] + response = self.list(integration_client, offset=2, limit=2) + + assert response.status_code == 200 + paged_data = response.json()["data"] + assert len(paged_data) == 2 + assert [item["id"] for item in paged_data] == [ + item["id"] for item in full_data[2:4] + ] + + def test_list_is_tenant_scoped(self, integration_client, clear_database): + self.create(integration_client, name="Tenant1 prompt") + + response = self.list(integration_client, api_key=ALT_API_KEY) + + assert response.status_code == 200 + assert response.json()["data"] == [] + + +class TestGetAnswerRelevancePrompt(BaseAnswerRelevancePromptTest): + def test_get_success(self, integration_client, clear_database): + create_resp = self.create(integration_client) + prompt_id = create_resp.json()["data"]["id"] + + response = self.get(integration_client, prompt_id) + + assert response.status_code == 200 + data = response.json()["data"] + assert data["id"] == prompt_id + assert "{query}" in data["prompt_template"] + assert "{answer}" in data["prompt_template"] + + def test_get_not_found(self, integration_client, clear_database): + response = self.get(integration_client, uuid.uuid4()) + body = response.json() + + assert response.status_code == 404 + assert body["success"] is False + assert "Answer relevance prompt not found" in body["error"] + + def test_get_other_tenant_not_found(self, integration_client, clear_database): + create_resp = self.create(integration_client) + prompt_id = create_resp.json()["data"]["id"] + + response = self.get(integration_client, prompt_id, api_key=ALT_API_KEY) + body = response.json() + + assert response.status_code == 404 + assert body["success"] is False + assert "Answer relevance prompt not found" in body["error"] + + +class TestUpdateAnswerRelevancePrompt(BaseAnswerRelevancePromptTest): + def test_update_success(self, integration_client, clear_database): + create_resp = self.create(integration_client) + prompt_id = create_resp.json()["data"]["id"] + + response = self.update( + integration_client, + prompt_id, + {"name": "Updated Name"}, + ) + + assert response.status_code == 200 + assert response.json()["data"]["name"] == "Updated Name" + + def test_update_prompt_template(self, integration_client, clear_database): + create_resp = self.create(integration_client) + prompt_id = create_resp.json()["data"]["id"] + + new_template = "Q: {query}\nA: {answer}\nAnswer YES or NO." + response = self.update( + integration_client, + prompt_id, + {"prompt_template": new_template}, + ) + + assert response.status_code == 200 + assert response.json()["data"]["prompt_template"] == new_template + + def test_update_is_active_false(self, integration_client, clear_database): + create_resp = self.create(integration_client) + prompt_id = create_resp.json()["data"]["id"] + + response = self.update(integration_client, prompt_id, {"is_active": False}) + + assert response.status_code == 200 + assert response.json()["data"]["is_active"] is False + + def test_partial_update_preserves_other_fields( + self, integration_client, clear_database + ): + create_resp = self.create(integration_client) + original = create_resp.json()["data"] + prompt_id = original["id"] + + self.update(integration_client, prompt_id, {"name": "New Name"}) + response = self.get(integration_client, prompt_id) + data = response.json()["data"] + + assert data["name"] == "New Name" + assert data["prompt_template"] == original["prompt_template"] + assert data["description"] == original["description"] + + def test_update_validation_error_template_missing_placeholder( + self, integration_client, clear_database + ): + create_resp = self.create(integration_client) + prompt_id = create_resp.json()["data"]["id"] + + response = self.update( + integration_client, + prompt_id, + {"prompt_template": "No placeholders at all."}, + ) + + assert response.status_code == 422 + + def test_update_not_found(self, integration_client, clear_database): + response = self.update(integration_client, uuid.uuid4(), {"name": "x"}) + body = response.json() + + assert response.status_code == 404 + assert body["success"] is False + assert "Answer relevance prompt not found" in body["error"] + + def test_update_other_tenant_not_found(self, integration_client, clear_database): + create_resp = self.create(integration_client) + prompt_id = create_resp.json()["data"]["id"] + + response = self.update( + integration_client, + prompt_id, + {"name": "other-tenant-update"}, + api_key=ALT_API_KEY, + ) + body = response.json() + + assert response.status_code == 404 + assert body["success"] is False + assert "Answer relevance prompt not found" in body["error"] + + +class TestDeleteAnswerRelevancePrompt(BaseAnswerRelevancePromptTest): + def test_delete_success(self, integration_client, clear_database): + create_resp = self.create(integration_client) + prompt_id = create_resp.json()["data"]["id"] + + response = self.delete(integration_client, prompt_id) + + assert response.status_code == 200 + assert response.json()["success"] is True + assert "deleted" in response.json()["data"]["message"].lower() + + def test_delete_removes_from_list(self, integration_client, clear_database): + create_resp = self.create(integration_client) + prompt_id = create_resp.json()["data"]["id"] + + self.delete(integration_client, prompt_id) + + response = self.list(integration_client) + ids = [item["id"] for item in response.json()["data"]] + assert prompt_id not in ids + + def test_delete_not_found(self, integration_client, clear_database): + response = self.delete(integration_client, uuid.uuid4()) + body = response.json() + + assert response.status_code == 404 + assert body["success"] is False + assert "Answer relevance prompt not found" in body["error"] + + def test_delete_other_tenant_not_found(self, integration_client, clear_database): + create_resp = self.create(integration_client) + prompt_id = create_resp.json()["data"]["id"] + + response = self.delete(integration_client, prompt_id, api_key=ALT_API_KEY) + body = response.json() + + assert response.status_code == 404 + assert body["success"] is False + assert "Answer relevance prompt not found" in body["error"] diff --git a/backend/app/tests/test_llm_validators.py b/backend/app/tests/test_llm_validators.py index 5834843..52cc9d9 100644 --- a/backend/app/tests/test_llm_validators.py +++ b/backend/app/tests/test_llm_validators.py @@ -3,6 +3,10 @@ import pytest from guardrails.validators import FailResult +from app.core.validators.answer_relevance_custom_llm import DEFAULT_PROMPT_TEMPLATE +from app.core.validators.config.answer_relevance_custom_llm_safety_validator_config import ( + AnswerRelevanceCustomLLMSafetyValidatorConfig, +) from app.core.validators.config.topic_relevance_safety_validator_config import ( TopicRelevanceSafetyValidatorConfig, ) @@ -118,3 +122,94 @@ def test__normalize_llm_critic_error_passes_through_unknown_messages(): _normalize_llm_critic_error(raw) == "The query did not meet the required quality criteria." ) + + +# --------------------------------------------------------------------------- +# AnswerRelevanceCustomLLMSafetyValidatorConfig +# --------------------------------------------------------------------------- + +_ANSWER_RELEVANCE_SETTINGS_PATH = ( + "app.core.validators.config" + ".answer_relevance_custom_llm_safety_validator_config.settings" +) + +_SAMPLE_ANSWER_RELEVANCE_CONFIG = dict(type="answer_relevance_custom_llm") + + +def test_answer_relevance_build_raises_when_openai_key_missing(): + config = AnswerRelevanceCustomLLMSafetyValidatorConfig( + **_SAMPLE_ANSWER_RELEVANCE_CONFIG + ) + + with patch(_ANSWER_RELEVANCE_SETTINGS_PATH) as mock_settings: + mock_settings.OPENAI_API_KEY = None + + with pytest.raises(ValueError) as exc: + config.build() + + assert "OPENAI_API_KEY" in str(exc.value) + assert "not configured" in str(exc.value) + + +def test_answer_relevance_build_proceeds_when_openai_key_present(): + config = AnswerRelevanceCustomLLMSafetyValidatorConfig( + **_SAMPLE_ANSWER_RELEVANCE_CONFIG + ) + + with patch(_ANSWER_RELEVANCE_SETTINGS_PATH) as mock_settings, patch( + "app.core.validators.config" + ".answer_relevance_custom_llm_safety_validator_config.AnswerRelevanceCustomLLM" + ) as mock_validator: + mock_settings.OPENAI_API_KEY = "sk-test-key" + config.build() + + mock_validator.assert_called_once() + + +def test_answer_relevance_build_uses_default_prompt_when_none(): + config = AnswerRelevanceCustomLLMSafetyValidatorConfig( + **_SAMPLE_ANSWER_RELEVANCE_CONFIG + ) + + with patch(_ANSWER_RELEVANCE_SETTINGS_PATH) as mock_settings, patch( + "app.core.validators.config" + ".answer_relevance_custom_llm_safety_validator_config.AnswerRelevanceCustomLLM" + ) as mock_validator: + mock_settings.OPENAI_API_KEY = "sk-test-key" + config.build() + + _, kwargs = mock_validator.call_args + assert "prompt_template" not in kwargs + + +def test_answer_relevance_build_passes_inline_prompt_template(): + custom = "Q: {query}\nA: {answer}\nYES or NO." + config = AnswerRelevanceCustomLLMSafetyValidatorConfig( + **{**_SAMPLE_ANSWER_RELEVANCE_CONFIG, "prompt_template": custom} + ) + + with patch(_ANSWER_RELEVANCE_SETTINGS_PATH) as mock_settings, patch( + "app.core.validators.config" + ".answer_relevance_custom_llm_safety_validator_config.AnswerRelevanceCustomLLM" + ) as mock_validator: + mock_settings.OPENAI_API_KEY = "sk-test-key" + config.build() + + _, kwargs = mock_validator.call_args + assert kwargs["prompt_template"] == custom + + +def test_answer_relevance_build_passes_llm_callable(): + config = AnswerRelevanceCustomLLMSafetyValidatorConfig( + **{**_SAMPLE_ANSWER_RELEVANCE_CONFIG, "llm_callable": "gpt-4o"} + ) + + with patch(_ANSWER_RELEVANCE_SETTINGS_PATH) as mock_settings, patch( + "app.core.validators.config" + ".answer_relevance_custom_llm_safety_validator_config.AnswerRelevanceCustomLLM" + ) as mock_validator: + mock_settings.OPENAI_API_KEY = "sk-test-key" + config.build() + + _, kwargs = mock_validator.call_args + assert kwargs["llm_callable"] == "gpt-4o" diff --git a/backend/app/tests/test_validate_with_guard.py b/backend/app/tests/test_validate_with_guard.py index 2956512..082de72 100644 --- a/backend/app/tests/test_validate_with_guard.py +++ b/backend/app/tests/test_validate_with_guard.py @@ -270,6 +270,84 @@ def test_resolve_validator_configs_uses_inline_topic_relevance_without_lookup(): mock_get.assert_not_called() +def test_resolve_validator_configs_answer_relevance_from_custom_prompt_id(): + custom_prompt_id = str(uuid4()) + payload = GuardrailRequest( + request_id=str(uuid4()), + organization_id=VALIDATOR_TEST_ORGANIZATION_ID, + project_id=VALIDATOR_TEST_PROJECT_ID, + input="{}", + validators=[ + { + "type": "answer_relevance_custom_llm", + "custom_prompt_id": custom_prompt_id, + } + ], + ) + mock_session = MagicMock() + + with patch( + "app.api.routes.guardrails.answer_relevance_prompt_crud.get" + ) as mock_get: + mock_get.return_value = MagicMock( + prompt_template="Q: {query}\nA: {answer}\nYES or NO." + ) + _resolve_validator_configs(payload, mock_session) + + validator = payload.validators[0] + assert validator.prompt_template == "Q: {query}\nA: {answer}\nYES or NO." + mock_get.assert_called_once_with( + session=mock_session, + id=validator.custom_prompt_id, + organization_id=VALIDATOR_TEST_ORGANIZATION_ID, + project_id=VALIDATOR_TEST_PROJECT_ID, + ) + + +def test_resolve_validator_configs_skips_answer_relevance_lookup_when_no_prompt_id(): + payload = GuardrailRequest( + request_id=str(uuid4()), + organization_id=VALIDATOR_TEST_ORGANIZATION_ID, + project_id=VALIDATOR_TEST_PROJECT_ID, + input="{}", + validators=[{"type": "answer_relevance_custom_llm"}], + ) + mock_session = MagicMock() + + with patch( + "app.api.routes.guardrails.answer_relevance_prompt_crud.get" + ) as mock_get: + _resolve_validator_configs(payload, mock_session) + + mock_get.assert_not_called() + + +def test_resolve_validator_configs_uses_inline_answer_relevance_prompt_without_lookup(): + inline_template = "Query: {query}\nAnswer: {answer}\nYES or NO." + payload = GuardrailRequest( + request_id=str(uuid4()), + organization_id=VALIDATOR_TEST_ORGANIZATION_ID, + project_id=VALIDATOR_TEST_PROJECT_ID, + input="{}", + validators=[ + { + "type": "answer_relevance_custom_llm", + "prompt_template": inline_template, + } + ], + ) + mock_session = MagicMock() + + with patch( + "app.api.routes.guardrails.answer_relevance_prompt_crud.get" + ) as mock_get: + _resolve_validator_configs(payload, mock_session) + + validator = payload.validators[0] + assert validator.prompt_template == inline_template + mock_get.assert_not_called() + + def _build_mock_guard_with_fail_result(validator_name: str, error_message: str): mock_log = MagicMock() mock_log.validator_name = validator_name diff --git a/backend/app/tests/validators/test_answer_relevance_custom_llm.py b/backend/app/tests/validators/test_answer_relevance_custom_llm.py new file mode 100644 index 0000000..9427003 --- /dev/null +++ b/backend/app/tests/validators/test_answer_relevance_custom_llm.py @@ -0,0 +1,237 @@ +import json +from unittest.mock import MagicMock, patch + +import pytest +from guardrails.validators import FailResult, PassResult + +from app.core.validators.answer_relevance_custom_llm import ( + DEFAULT_PROMPT_TEMPLATE, + AnswerRelevanceCustomLLM, +) + +VALID_INPUT = json.dumps( + {"query": "What causes fever?", "answer": "Infections cause fever."} +) +VALID_INPUT_YES = VALID_INPUT +VALID_INPUT_NO = json.dumps( + {"query": "What causes fever?", "answer": "The sky is blue."} +) + + +def _make_llm_response(text: str): + choice = MagicMock() + choice.message.content = text + result = MagicMock() + result.choices = [choice] + return result + + +@pytest.fixture +def validator(): + return AnswerRelevanceCustomLLM() + + +# --------------------------------------------------------------------------- +# Default prompt template shape +# --------------------------------------------------------------------------- + + +def test_default_prompt_template_has_query_placeholder(): + assert "{query}" in DEFAULT_PROMPT_TEMPLATE + + +def test_default_prompt_template_has_answer_placeholder(): + assert "{answer}" in DEFAULT_PROMPT_TEMPLATE + + +# --------------------------------------------------------------------------- +# PassResult on YES +# --------------------------------------------------------------------------- + + +def test_passes_when_llm_returns_yes(validator): + with patch( + "app.core.validators.answer_relevance_custom_llm.completion" + ) as mock_llm: + mock_llm.return_value = _make_llm_response("YES") + result = validator._validate(VALID_INPUT_YES) + + assert isinstance(result, PassResult) + + +def test_passes_when_llm_returns_yes_lowercase(validator): + with patch( + "app.core.validators.answer_relevance_custom_llm.completion" + ) as mock_llm: + mock_llm.return_value = _make_llm_response("yes") + result = validator._validate(VALID_INPUT_YES) + + assert isinstance(result, PassResult) + + +def test_passes_when_llm_returns_yes_with_trailing_text(validator): + with patch( + "app.core.validators.answer_relevance_custom_llm.completion" + ) as mock_llm: + mock_llm.return_value = _make_llm_response("YES.") + result = validator._validate(VALID_INPUT_YES) + + assert isinstance(result, PassResult) + + +# --------------------------------------------------------------------------- +# FailResult on NO +# --------------------------------------------------------------------------- + + +def test_fails_when_llm_returns_no(validator): + with patch( + "app.core.validators.answer_relevance_custom_llm.completion" + ) as mock_llm: + mock_llm.return_value = _make_llm_response("NO") + result = validator._validate(VALID_INPUT_NO) + + assert isinstance(result, FailResult) + assert "not relevant" in result.error_message + + +def test_fails_when_llm_returns_no_lowercase(validator): + with patch( + "app.core.validators.answer_relevance_custom_llm.completion" + ) as mock_llm: + mock_llm.return_value = _make_llm_response("no") + result = validator._validate(VALID_INPUT_NO) + + assert isinstance(result, FailResult) + + +# --------------------------------------------------------------------------- +# Input parsing errors +# --------------------------------------------------------------------------- + + +def test_fails_with_non_json_input(validator): + result = validator._validate("this is not json") + + assert isinstance(result, FailResult) + assert "JSON" in result.error_message + + +def test_fails_with_empty_query(validator): + value = json.dumps({"query": "", "answer": "Some answer."}) + result = validator._validate(value) + + assert isinstance(result, FailResult) + assert "non-empty" in result.error_message + + +def test_fails_with_whitespace_only_query(validator): + value = json.dumps({"query": " ", "answer": "Some answer."}) + result = validator._validate(value) + + assert isinstance(result, FailResult) + + +def test_fails_with_empty_answer(validator): + value = json.dumps({"query": "What is fever?", "answer": ""}) + result = validator._validate(value) + + assert isinstance(result, FailResult) + assert "non-empty" in result.error_message + + +def test_fails_with_missing_query_key(validator): + value = json.dumps({"answer": "Some answer."}) + result = validator._validate(value) + + assert isinstance(result, FailResult) + + +def test_fails_with_missing_answer_key(validator): + value = json.dumps({"query": "What is fever?"}) + result = validator._validate(value) + + assert isinstance(result, FailResult) + + +# --------------------------------------------------------------------------- +# Custom prompt template +# --------------------------------------------------------------------------- + + +def test_custom_prompt_template_is_used(): + custom_template = "Q: {query}\nA: {answer}\nRelevant? YES or NO." + validator = AnswerRelevanceCustomLLM(prompt_template=custom_template) + + with patch( + "app.core.validators.answer_relevance_custom_llm.completion" + ) as mock_llm: + mock_llm.return_value = _make_llm_response("YES") + validator._validate(VALID_INPUT_YES) + + call_args = mock_llm.call_args + prompt_sent = call_args.kwargs["messages"][0]["content"] + + assert "Q: What causes fever?" in prompt_sent + assert "A: Infections cause fever." in prompt_sent + + +def test_custom_prompt_with_unknown_placeholder_returns_fail_result(): + # str.format() raises KeyError for *unknown* keys, not for missing {answer}/{query}. + bad_template = "Query: {query} Answer: {answer} Extra: {unknown_field}" + validator = AnswerRelevanceCustomLLM(prompt_template=bad_template) + + result = validator._validate(VALID_INPUT_YES) + + assert isinstance(result, FailResult) + assert "placeholder" in result.error_message + + +# --------------------------------------------------------------------------- +# LLM call failure +# --------------------------------------------------------------------------- + + +def test_fails_when_llm_raises(validator): + with patch( + "app.core.validators.answer_relevance_custom_llm.completion" + ) as mock_llm: + mock_llm.side_effect = Exception("network error") + result = validator._validate(VALID_INPUT_YES) + + assert isinstance(result, FailResult) + assert "LLM call failed" in result.error_message + + +# --------------------------------------------------------------------------- +# Unexpected LLM response +# --------------------------------------------------------------------------- + + +def test_fails_on_unexpected_llm_response(validator): + with patch( + "app.core.validators.answer_relevance_custom_llm.completion" + ) as mock_llm: + mock_llm.return_value = _make_llm_response("MAYBE") + result = validator._validate(VALID_INPUT_YES) + + assert isinstance(result, FailResult) + assert "Unexpected" in result.error_message + + +# --------------------------------------------------------------------------- +# llm_callable is forwarded +# --------------------------------------------------------------------------- + + +def test_llm_callable_is_forwarded(): + validator = AnswerRelevanceCustomLLM(llm_callable="gpt-4o") + + with patch( + "app.core.validators.answer_relevance_custom_llm.completion" + ) as mock_llm: + mock_llm.return_value = _make_llm_response("YES") + validator._validate(VALID_INPUT_YES) + + call_args = mock_llm.call_args + assert call_args.kwargs["model"] == "gpt-4o"