diff --git a/README.md b/README.md index b7c1f24..5cbc85b 100644 --- a/README.md +++ b/README.md @@ -106,6 +106,17 @@ bash swift/build.sh uv sync --extra all ``` +When installed from source, the `ownscribe` command lives inside the project's +virtual environment, so run it with `uv run` from the repo directory: + +```bash +uv run ownscribe # equivalent to the bare `ownscribe` used below +``` + +Alternatively, activate the environment once (`source .venv/bin/activate`) and +then call `ownscribe` directly. The examples in [Usage](#usage) use the bare +`ownscribe` form, which works after activation or via `uvx ownscribe`. + ## Usage ### Record, transcribe, and summarize a meeting @@ -147,14 +158,16 @@ ownscribe --silence-timeout 0 # disable silence auto-stop ownscribe devices # list audio devices (uses native CoreAudio when available) ownscribe apps # list running apps with PIDs for use with --pid ownscribe warmup # prefetch WhisperX/pyannote models before a meeting -ownscribe transcribe recording.wav # transcribe an audio file (saves alongside the input) +ownscribe transcribe recording.wav # transcribe an audio or video file: wav/mp3/mp4/mov/mkv (saved alongside) ownscribe summarize transcript.md # summarize a transcript (saves alongside the input) -ownscribe resume ./2026-02-20_1736 # resume a failed/partial pipeline in a directory +ownscribe resume ./2026-02-20_1736 # resume a partial run, or process a folder's audio/video recording ownscribe ask "question" # search your meetings with a natural-language question ownscribe config # open config file in $EDITOR ownscribe cleanup # remove ownscribe data from disk ``` +> **Video files work too.** Anywhere ownscribe accepts an audio file it also accepts a video container (mp4, mov, mkv, m4v) — it extracts the audio track via ffmpeg. To turn a recording into full notes, drop it in a folder and run `ownscribe resume ./that-folder/` (transcript + summary); use `ownscribe transcribe meeting.mp4` for a transcript only. + Use `warmup` ahead of time to avoid first-run model download delays while recording: ```bash @@ -209,6 +222,7 @@ enabled = true backend = "local" # "local" (built-in, no server needed), "ollama", or "openai" model = "phi-4-mini" # local: "phi-4-mini", path to GGUF, or hf:owner/repo/file.gguf; ollama/openai: model name # host = "http://localhost:11434" # only for ollama/openai backends +# api_key = "" # only for openai backend; required by servers like oMLX (or set OPENAI_API_KEY) # template = "meeting" # "meeting", "lecture", "brief", or a custom name # context_size = 0 # 0 = auto-detect from model; set manually for OpenAI-compatible backends @@ -223,7 +237,7 @@ format = "markdown" # "markdown" or "json" keep_recording = true # false = auto-delete WAV after transcription ``` -**Precedence:** CLI flags > environment variables (`HF_TOKEN`, `OLLAMA_HOST`) > config file > defaults. +**Precedence:** CLI flags > environment variables (`HF_TOKEN`, `OLLAMA_HOST`, `OPENAI_API_KEY`) > config file > defaults. ## Summarization Templates diff --git a/src/ownscribe/config.py b/src/ownscribe/config.py index ec6d13a..f5b0885 100644 --- a/src/ownscribe/config.py +++ b/src/ownscribe/config.py @@ -38,6 +38,7 @@ backend = "local" # "local" (built-in, no server needed), "ollama", or "openai" model = "phi-4-mini" # local: "phi-4-mini", path to GGUF, or hf:owner/repo/file.gguf; ollama/openai: model name # host = "http://localhost:11434" # only for ollama/openai backends +# api_key = "" # only for openai backend; required by servers like oMLX (or set OPENAI_API_KEY) # template = "meeting" # built-in: "meeting", "lecture", or "brief" # context_size = 0 # 0 = auto-detect from model; set manually for OpenAI-compatible backends @@ -87,6 +88,7 @@ class SummarizationConfig: backend: str = "local" model: str = "phi-4-mini" host: str = "http://localhost:11434" + api_key: str = "" template: str = "" context_size: int = 0 @@ -132,6 +134,8 @@ def load(cls) -> Config: config.diarization.hf_token = hf_token if ollama_host := os.environ.get("OLLAMA_HOST"): config.summarization.host = ollama_host + if api_key := os.environ.get("OPENAI_API_KEY"): + config.summarization.api_key = api_key return config diff --git a/src/ownscribe/pipeline.py b/src/ownscribe/pipeline.py index f0fe2e9..58ed4ca 100644 --- a/src/ownscribe/pipeline.py +++ b/src/ownscribe/pipeline.py @@ -508,15 +508,18 @@ def _do_transcribe_and_summarize( _AUDIO_EXTENSIONS = {".wav", ".mp3", ".m4a", ".flac", ".ogg", ".webm"} +# Video containers whose audio track ffmpeg/WhisperX can decode (e.g. meeting recordings). +_VIDEO_EXTENSIONS = {".mp4", ".m4v", ".mov", ".mkv"} +_MEDIA_EXTENSIONS = _AUDIO_EXTENSIONS | _VIDEO_EXTENSIONS def _find_audio(directory: Path) -> Path | None: - """Find an audio file in directory, preferring 'recording.wav'.""" + """Find an audio or video file in directory, preferring 'recording.wav'.""" recording = directory / "recording.wav" if recording.exists(): return recording for f in directory.iterdir(): - if f.is_file() and f.suffix.lower() in _AUDIO_EXTENSIONS: + if f.is_file() and f.suffix.lower() in _MEDIA_EXTENSIONS: return f return None diff --git a/src/ownscribe/summarization/openai_summarizer.py b/src/ownscribe/summarization/openai_summarizer.py index 9bf771b..fd8c667 100644 --- a/src/ownscribe/summarization/openai_summarizer.py +++ b/src/ownscribe/summarization/openai_summarizer.py @@ -15,11 +15,13 @@ class OpenAISummarizer(Summarizer): def __init__(self, config: SummarizationConfig, templates: dict | None = None) -> None: self._config = config self._templates = templates or {} - # For local servers, no API key needed — use a dummy base_url = config.host if not base_url.endswith("/v1"): base_url = base_url.rstrip("/") + "/v1" - self._client = openai.OpenAI(base_url=base_url, api_key="not-needed") + # Most local servers ignore the key; servers that require auth (e.g. oMLX) + # read it from config.api_key or the OPENAI_API_KEY env var. + api_key = config.api_key or "not-needed" + self._client = openai.OpenAI(base_url=base_url, api_key=api_key) def chat( self, system_prompt: str, user_prompt: str, diff --git a/tests/test_find_audio.py b/tests/test_find_audio.py new file mode 100644 index 0000000..fd9ae1f --- /dev/null +++ b/tests/test_find_audio.py @@ -0,0 +1,36 @@ +"""Tests for media-file discovery in _find_audio (audio + video containers).""" + +from __future__ import annotations + +from ownscribe.pipeline import _find_audio + + +def _touch(directory, *names): + for name in names: + (directory / name).write_bytes(b"") + + +class TestFindAudioVideo: + def test_finds_mp4_when_no_recording_wav(self, tmp_path): + _touch(tmp_path, "meeting.mp4") + assert _find_audio(tmp_path) == tmp_path / "meeting.mp4" + + def test_finds_mov(self, tmp_path): + _touch(tmp_path, "meeting.mov") + assert _find_audio(tmp_path) == tmp_path / "meeting.mov" + + def test_finds_mkv(self, tmp_path): + _touch(tmp_path, "meeting.mkv") + assert _find_audio(tmp_path) == tmp_path / "meeting.mkv" + + def test_prefers_recording_wav_over_video(self, tmp_path): + _touch(tmp_path, "meeting.mp4", "recording.wav") + assert _find_audio(tmp_path) == tmp_path / "recording.wav" + + def test_case_insensitive_extension(self, tmp_path): + _touch(tmp_path, "Meeting.MP4") + assert _find_audio(tmp_path) == tmp_path / "Meeting.MP4" + + def test_ignores_unrelated_files(self, tmp_path): + _touch(tmp_path, "notes.txt", "slides.pdf") + assert _find_audio(tmp_path) is None