Skip to content
Open
2 changes: 1 addition & 1 deletion sdk/batch/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ from speechmatics.batch import (
AsyncClient,
JobConfig,
JobType,
OperatingPoint,
Model,
TranscriptionConfig,
TranslationConfig,
SummarizationConfig
Expand Down
53 changes: 41 additions & 12 deletions sdk/batch/speechmatics/batch/_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,15 @@ class OperatingPoint(str, Enum):

ENHANCED = "enhanced"
STANDARD = "standard"
MELIA_1 = "melia-1"


class Model(str, Enum):
"""Operating point options for transcription."""

ENHANCED = "enhanced"
STANDARD = "standard"
MELIA_1 = "melia-1"


class NotificationContents(str, Enum):
Expand Down Expand Up @@ -115,6 +117,15 @@ class TranscriptionConfig:
defaults to None.
audio_filtering_config: Configuration for limiting the transcription of quiet audio.
Defaults to None.
language_hints: List of languages that are most likely to appear in your audio,
This improves accuracy by biasing recognition toward the specified languages.
Use ``language_hints_strict`` to control whether other languages can also be detected.
Applicable only for the next-gen models. Support for next-gen models is coming soon.
language_hints_strict: Controls how strictly language hints are applied.
When ``True``, the transcript will only contain languages specified in ``language_hints``.
When ``False``, recognition is biased toward the specified languages while still allowing other
languages to be detected if present.
Applicable only for the next-gen models. Support for the next-gen models is coming soon.
"""

language: str = "en"
Expand All @@ -133,6 +144,8 @@ class TranscriptionConfig:
transcript_filtering_config: Optional[TranscriptFilteringConfig] = None
audio_filtering_config: Optional[AudioFilteringConfig] = None
operating_point: Optional[OperatingPoint] = None
language_hints: Optional[list[str]] = None
Comment thread
rakeshv247 marked this conversation as resolved.
language_hints_strict: Optional[bool] = None

def __post_init__(self) -> None:
if self.model is not _UNSET and self.operating_point is not None:
Expand All @@ -148,6 +161,8 @@ def __post_init__(self) -> None:

def to_dict(self) -> dict[str, Any]:
result: dict[str, Any] = {k: v for k, v in asdict(self).items() if v is not None}
if self.model is _UNSET:
result.pop("model", None)
if self.transcript_filtering_config is not None:
result["transcript_filtering_config"] = self.transcript_filtering_config.to_dict()
if self.audio_filtering_config is not None:
Expand Down Expand Up @@ -811,6 +826,9 @@ class Transcript:
audio_event_summary: Optional audio event statistics.
"""

_LANG_PACK_WORD_DELIMITER_KEY = "word_delimiter"
_LANG_PACK_PER_LANG_DELIMITERS_KEY = "per_language_word_delimiters"

format: str
job: JobInfo
metadata: RecognitionMetadata
Expand Down Expand Up @@ -840,14 +858,23 @@ def transcript_text(self) -> str:
return ""

# Get language pack info for word delimiter
word_delimiter = " " # Default
if self.metadata and self.metadata.language_pack_info and "word_delimiter" in self.metadata.language_pack_info:
word_delimiter = self.metadata.language_pack_info["word_delimiter"]
default_word_delimiter = " " # Default
# Applicable only for the next gen models
per_lang_word_delimiters: dict = {}
if self.metadata and self.metadata.language_pack_info:
if self._LANG_PACK_WORD_DELIMITER_KEY in self.metadata.language_pack_info:
default_word_delimiter = self.metadata.language_pack_info[self._LANG_PACK_WORD_DELIMITER_KEY]

if self._LANG_PACK_PER_LANG_DELIMITERS_KEY in self.metadata.language_pack_info:
per_lang_word_delimiters = self.metadata.language_pack_info[self._LANG_PACK_PER_LANG_DELIMITERS_KEY]

# Group results by speaker and process
transcript_parts = []
current_speaker = None
current_group: list[str] = []
# Each entry is (word, delimiter), where delimiter is looked up from per_language_word_delimiters
# using the word's language code, falling back to the default word delimiter.
# For example, [("hello", " "), ("world", " ")]
current_group: list[tuple[str, str]] = []
Comment thread
rakeshv247 marked this conversation as resolved.

for result in self.results:
if not result.alternatives:
Expand All @@ -856,12 +883,15 @@ def transcript_text(self) -> str:
alternative = result.alternatives[0]
content = alternative.content
speaker = alternative.speaker
word_delimiter = default_word_delimiter
if alternative.language and alternative.language in per_lang_word_delimiters:
word_delimiter = per_lang_word_delimiters[alternative.language]

# Handle speaker changes
if speaker != current_speaker:
# Process accumulated group for previous speaker
if current_group:
text = self._join_content_items(current_group, word_delimiter)
text = self._join_content_items(current_group)
if current_speaker:
transcript_parts.append(f"SPEAKER {current_speaker}: {text}") # type: ignore[unreachable]
else:
Expand All @@ -870,27 +900,26 @@ def transcript_text(self) -> str:

current_speaker = speaker

# Add content to current group
# Add content to current group with its word delimiter
if content:
current_group.append(content)
current_group.append((content, word_delimiter))

# Process final group
if current_group:
text = self._join_content_items(current_group, word_delimiter)
text = self._join_content_items(current_group)
if current_speaker:
transcript_parts.append(f"SPEAKER {current_speaker}: {text}")
else:
transcript_parts.append(text)

return "\n".join(transcript_parts)

def _join_content_items(self, content_items: list[str], word_delimiter: str) -> str:
def _join_content_items(self, content_items: list[tuple[str, str]]) -> str:
"""
Join content items with appropriate spacing and punctuation handling.

Args:
content_items: List of content strings to join.
word_delimiter: Delimiter to use between words.
content_items: List of (content, word_delimiter) pairs to join.

Returns:
Properly formatted text string.
Expand All @@ -900,7 +929,7 @@ def _join_content_items(self, content_items: list[str], word_delimiter: str) ->

result: list[str] = []

for i, content in enumerate(content_items):
for i, (content, word_delimiter) in enumerate(content_items):
if not content:
continue

Expand Down
94 changes: 93 additions & 1 deletion tests/batch/test_models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
from speechmatics.batch._models import JobConfig, TranscriptFilteringConfig, TranscriptionConfig
import json

import pytest

from speechmatics.batch._models import JobConfig, Model, OperatingPoint, TranscriptFilteringConfig, TranscriptionConfig


class TestTranscriptFilteringConfigToDict:
Expand Down Expand Up @@ -127,3 +131,91 @@ def test_absent_output_config_is_none(self):
data = {"type": "transcription"}
job_config = JobConfig.from_dict(data)
assert job_config.output_config is None


class TestModelToDict:
def test_model_gets_serialized(self):
config = TranscriptionConfig(model=Model.MELIA_1)
result = config.to_dict()
assert result["model"] == Model.MELIA_1
assert "operating_point" not in result

def test_operating_point_gets_serialized(self):
config = TranscriptionConfig(operating_point=OperatingPoint.STANDARD)
result = config.to_dict()
assert result["operating_point"] == OperatingPoint.STANDARD
assert "model" not in result

def test_default_model_is_enhanced(self):
config = TranscriptionConfig()
result = config.to_dict()
assert result["model"] == Model.ENHANCED
assert "operating_point" not in result

def test_model_and_operating_point_raises(self):
with pytest.raises(ValueError):
TranscriptionConfig(model=Model.STANDARD, operating_point=OperatingPoint.STANDARD)


class TestLanguageHintsToDict:
def test_language_hints_serializes_correctly(self):
config = TranscriptionConfig(language_hints=["en", "fr"])
Comment thread
rakeshv247 marked this conversation as resolved.
result = config.to_dict()
assert result["language_hints"] == ["en", "fr"]
assert "language_hints_strict" not in result

def test_language_hints_strict_true_serializes_correctly(self):
config = TranscriptionConfig(language_hints=["en"], language_hints_strict=True)
result = config.to_dict()
assert result["language_hints"] == ["en"]
assert result["language_hints_strict"] is True
Comment thread
giorgosHadji marked this conversation as resolved.

def test_language_hints_strict_false_serializes_correctly(self):
config = TranscriptionConfig(language_hints=["en"], language_hints_strict=False)
result = config.to_dict()
assert result["language_hints"] == ["en"]
assert "language_hints_strict" in result
Comment thread
rakeshv247 marked this conversation as resolved.
assert result["language_hints_strict"] is False

def test_language_hints_absent_when_none(self):
config = TranscriptionConfig()
result = config.to_dict()
assert "language_hints" not in result
assert "language_hints_strict" not in result


class TestLanguageHintsFromDict:
def test_language_hints_deserializes_correctly(self):
data = {
"type": "transcription",
"transcription_config": {
"language": "en",
"language_hints": ["en", "fr"],
},
}
job_config = JobConfig.from_dict(data)
assert job_config.transcription_config is not None
assert job_config.transcription_config.language_hints == ["en", "fr"]

def test_language_hints_strict_deserializes_correctly(self):
data = {
"type": "transcription",
"transcription_config": {
"language": "en",
"language_hints": ["en"],
"language_hints_strict": True,
},
}
job_config = JobConfig.from_dict(data)
assert job_config.transcription_config is not None
Comment thread
rakeshv247 marked this conversation as resolved.
assert job_config.transcription_config.language_hints_strict is True
Comment thread
rakeshv247 marked this conversation as resolved.

def test_absent_fields_are_none(self):
data = {
"type": "transcription",
"transcription_config": {"language": "en"},
}
job_config = JobConfig.from_dict(data)
assert job_config.transcription_config
assert job_config.transcription_config.language_hints is None
assert job_config.transcription_config.language_hints_strict is None
Loading