-
Notifications
You must be signed in to change notification settings - Fork 0
Quick Start
Behnam Ebrahimi edited this page Mar 29, 2026
·
1 revision
from whisper_mlx import LightningWhisperMLX
# Initialize with batched decoding for maximum speed
whisper = LightningWhisperMLX(model="distil-large-v3", batch_size=12)
# Transcribe audio
result = whisper.transcribe("audio.mp3")
print(result["text"])from whisper_mlx import LightningWhisperMLX
whisper = LightningWhisperMLX(model="distil-large-v3", batch_size=12)
# With word-level timestamps
result = whisper.transcribe("audio.mp3", language="en", word_timestamps=True)
# Access word timings
for segment in result["segments"]:
for word in segment.get("words", []):
print(f" [{word['start']:.2f} -> {word['end']:.2f}] {word['word']}")from whisper_mlx import transcribe
result = transcribe(
"audio.mp3",
path_or_hf_repo="mlx-community/whisper-turbo",
batch_size=6,
language="en",
word_timestamps=True,
)
print(result["text"])
for segment in result["segments"]:
print(f"[{segment['start']:.2f} -> {segment['end']:.2f}] {segment['text']}")# Basic transcription
vayu audio.mp3
# Batched decoding (3-5x faster)
vayu audio.mp3 --batch-size 12
# Specify model and output format
vayu audio.mp3 --model mlx-community/distil-whisper-large-v3 --output-format srt
# Multiple files
vayu audio1.mp3 audio2.mp3 --output-dir ./transcripts
# Word-level timestamps
vayu audio.mp3 --word-timestamps True
# Translate to English
vayu audio.mp3 --task translateBoth the Python API and CLI return results in this structure:
{
"text": "Full transcription text...",
"segments": [
{
"id": 0,
"seek": 0,
"start": 0.0,
"end": 5.2,
"text": " Segment text",
"tokens": [50364, 1025, ...],
"temperature": 0.0,
"avg_logprob": -0.42,
"compression_ratio": 1.8,
"no_speech_prob": 0.05,
"words": [ # Only with word_timestamps=True
{"word": "Segment", "start": 0.1, "end": 0.5, "probability": 0.98},
{"word": "text", "start": 0.5, "end": 0.9, "probability": 0.95}
]
}
],
"language": "en"
}