-
Notifications
You must be signed in to change notification settings - Fork 10
TTS Evaluation: Metric #681
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f03e860
f9ecdb7
33db59e
bdc8133
029bb48
d586eb6
05ff2c7
97e4f82
ad328d5
dad9226
586f728
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| Update an STT sample's language and/or ground truth transcription. | ||
|
|
||
| Only the provided fields will be updated. Fields set to `null` in the request will not modify the existing value. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,20 @@ | ||
| Update human feedback on a TTS synthesis result. | ||
| Update human feedback and score on a TTS synthesis result. | ||
|
|
||
| Only the provided fields will be updated. Fields omitted from the request will not modify the existing value. Sending a field as `null` will clear its value. | ||
|
|
||
| Fields: | ||
| - **is_correct**: Whether the synthesized audio quality is acceptable (null to clear) | ||
| - **comment**: Optional feedback comment | ||
| - **score**: Evaluation metrics for the synthesized audio | ||
|
|
||
| **Example request:** | ||
| ```json | ||
| { | ||
| "is_correct": true, | ||
| "comment": "string", | ||
| "score": { | ||
| "Speech Naturalness": "low | medium | high", | ||
| "Pronunciation Accuracy": "low | medium | high" | ||
| } | ||
| } | ||
| ``` | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,6 +6,7 @@ | |
|
|
||
| from app.api.deps import AuthContextDep, SessionDep | ||
| from app.api.permissions import Permission, require_permission | ||
| from app.core.cloud import get_cloud_storage | ||
| from app.crud.language import get_language_by_id | ||
| from app.crud.tts_evaluations import ( | ||
| get_tts_dataset_by_id, | ||
|
|
@@ -38,13 +39,7 @@ def create_dataset( | |
| """Create a TTS evaluation dataset.""" | ||
| # Validate language_id if provided | ||
| if dataset_create.language_id is not None: | ||
| language = get_language_by_id( | ||
| session=session, language_id=dataset_create.language_id | ||
| ) | ||
| if not language: | ||
| raise HTTPException( | ||
| status_code=400, detail="Invalid language_id: language not found" | ||
| ) | ||
| get_language_by_id(session=session, language_id=dataset_create.language_id) | ||
|
|
||
| dataset = upload_tts_dataset( | ||
| session=session, | ||
|
|
@@ -71,6 +66,9 @@ def list_datasets( | |
| auth_context: AuthContextDep, | ||
| limit: int = Query(50, ge=1, le=100, description="Maximum results to return"), | ||
| offset: int = Query(0, ge=0, description="Number of results to skip"), | ||
| include_signed_url: bool = Query( | ||
| False, description="Include signed URL for dataset files" | ||
| ), | ||
| ) -> APIResponse[list[TTSDatasetPublic]]: | ||
| """List TTS evaluation datasets.""" | ||
| datasets, total = list_tts_datasets( | ||
|
|
@@ -81,8 +79,21 @@ def list_datasets( | |
| offset=offset, | ||
| ) | ||
|
|
||
| storage = None | ||
| if include_signed_url: | ||
| storage = get_cloud_storage( | ||
| session=session, project_id=auth_context.project_.id | ||
| ) | ||
|
|
||
| data = [] | ||
| for dataset in datasets: | ||
| signed_url = None | ||
| if storage and dataset.object_store_url: | ||
| signed_url = storage.get_signed_url(dataset.object_store_url) | ||
| data.append(TTSDatasetPublic.from_model(dataset, signed_url=signed_url)) | ||
|
Comment on lines
+89
to
+93
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Handle signed URL generation failures explicitly.
Suggested hardening@@
- for dataset in datasets:
+ for dataset in datasets:
signed_url = None
if storage and dataset.object_store_url:
- signed_url = storage.get_signed_url(dataset.object_store_url)
+ try:
+ signed_url = storage.get_signed_url(dataset.object_store_url)
+ except Exception as err:
+ logger.warning(
+ f"[list_datasets] Signed URL generation failed | dataset_id={dataset.id} | error={err}"
+ )
data.append(TTSDatasetPublic.from_model(dataset, signed_url=signed_url))
@@
signed_url = None
if include_signed_url and dataset.object_store_url:
storage = get_cloud_storage(
session=session, project_id=auth_context.project_.id
)
- signed_url = storage.get_signed_url(dataset.object_store_url)
+ try:
+ signed_url = storage.get_signed_url(dataset.object_store_url)
+ except Exception as err:
+ logger.error(
+ f"[get_dataset] Signed URL generation failed | dataset_id={dataset_id} | error={err}",
+ exc_info=True,
+ )
+ raise HTTPException(
+ status_code=502, detail="Failed to generate signed URL"
+ ) from errAlso applies to: 133-138 🤖 Prompt for AI Agents |
||
|
|
||
| return APIResponse.success_response( | ||
| data=datasets, | ||
| data=data, | ||
| metadata={"total": total, "limit": limit, "offset": offset}, | ||
| ) | ||
|
|
||
|
|
@@ -98,6 +109,9 @@ def get_dataset( | |
| session: SessionDep, | ||
| auth_context: AuthContextDep, | ||
| dataset_id: int, | ||
| include_signed_url: bool = Query( | ||
| False, description="Include signed URL for dataset file" | ||
| ), | ||
| ) -> APIResponse[TTSDatasetPublic]: | ||
| """Get a TTS evaluation dataset.""" | ||
| dataset = get_tts_dataset_by_id( | ||
|
|
@@ -110,8 +124,15 @@ def get_dataset( | |
| if not dataset: | ||
| raise HTTPException(status_code=404, detail="Dataset not found") | ||
|
|
||
| signed_url = None | ||
| if include_signed_url and dataset.object_store_url: | ||
| storage = get_cloud_storage( | ||
| session=session, project_id=auth_context.project_.id | ||
| ) | ||
| signed_url = storage.get_signed_url(dataset.object_store_url) | ||
|
|
||
| return APIResponse.success_response( | ||
| data=TTSDatasetPublic.from_model(dataset), | ||
| data=TTSDatasetPublic.from_model(dataset, signed_url=signed_url), | ||
| metadata={ | ||
| "sample_count": (dataset.dataset_metadata or {}).get("sample_count", 0) | ||
| }, | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -209,7 +209,7 @@ def update_traces_with_cosine_scores( | |||||
| try: | ||||||
| langfuse.score( | ||||||
| trace_id=trace_id, | ||||||
| name="cosine_similarity", | ||||||
| name="Cosine Similarity", | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use a machine-stable metric key when writing Langfuse scores. Line 212 sets Suggested patch- name="Cosine Similarity",
+ name="cosine_similarity",📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||
| value=cosine_score, | ||||||
| comment=( | ||||||
| "Cosine similarity between generated output and " | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -392,7 +392,7 @@ async def process_completed_embedding_batch( | |||||
| eval_run.score = { | ||||||
| "summary_scores": [ | ||||||
| { | ||||||
| "name": "cosine_similarity", | ||||||
| "name": "Cosine Similarity", | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Keep score identifiers stable to avoid breaking clients. Line 395 changes the summary score Suggested patch- "name": "Cosine Similarity",
+ "name": "cosine_similarity",📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||
| "avg": round(float(similarity_stats["cosine_similarity_avg"]), 2), | ||||||
| "std": round(float(similarity_stats["cosine_similarity_std"]), 2), | ||||||
| "total_pairs": similarity_stats["total_pairs"], | ||||||
|
|
||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use concrete enum values in the JSON example.
On Line 16 and Line 17,
"low | medium | high"reads like a literal payload value and can lead to invalid requests when copied directly.Suggested doc tweak
"score": { - "Speech Naturalness": "low | medium | high", - "Pronunciation Accuracy": "low | medium | high" + "Speech Naturalness": "medium", + "Pronunciation Accuracy": "high" }📝 Committable suggestion
🤖 Prompt for AI Agents