Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,17 @@ bash swift/build.sh
uv sync --extra all
```

When installed from source, the `ownscribe` command lives inside the project's
virtual environment, so run it with `uv run` from the repo directory:

```bash
uv run ownscribe # equivalent to the bare `ownscribe` used below
```

Alternatively, activate the environment once (`source .venv/bin/activate`) and
then call `ownscribe` directly. The examples in [Usage](#usage) use the bare
`ownscribe` form, which works after activation or via `uvx ownscribe`.

## Usage

### Record, transcribe, and summarize a meeting
Expand Down Expand Up @@ -147,14 +158,16 @@ ownscribe --silence-timeout 0 # disable silence auto-stop
ownscribe devices # list audio devices (uses native CoreAudio when available)
ownscribe apps # list running apps with PIDs for use with --pid
ownscribe warmup # prefetch WhisperX/pyannote models before a meeting
ownscribe transcribe recording.wav # transcribe an audio file (saves alongside the input)
ownscribe transcribe recording.wav # transcribe an audio or video file: wav/mp3/mp4/mov/mkv (saved alongside)
ownscribe summarize transcript.md # summarize a transcript (saves alongside the input)
ownscribe resume ./2026-02-20_1736 # resume a failed/partial pipeline in a directory
ownscribe resume ./2026-02-20_1736 # resume a partial run, or process a folder's audio/video recording
ownscribe ask "question" # search your meetings with a natural-language question
ownscribe config # open config file in $EDITOR
ownscribe cleanup # remove ownscribe data from disk
```

> **Video files work too.** Anywhere ownscribe accepts an audio file it also accepts a video container (mp4, mov, mkv, m4v) — it extracts the audio track via ffmpeg. To turn a recording into full notes, drop it in a folder and run `ownscribe resume ./that-folder/` (transcript + summary); use `ownscribe transcribe meeting.mp4` for a transcript only.

Use `warmup` ahead of time to avoid first-run model download delays while recording:

```bash
Expand Down Expand Up @@ -209,6 +222,7 @@ enabled = true
backend = "local" # "local" (built-in, no server needed), "ollama", or "openai"
model = "phi-4-mini" # local: "phi-4-mini", path to GGUF, or hf:owner/repo/file.gguf; ollama/openai: model name
# host = "http://localhost:11434" # only for ollama/openai backends
# api_key = "" # only for openai backend; required by servers like oMLX (or set OPENAI_API_KEY)
# template = "meeting" # "meeting", "lecture", "brief", or a custom name
# context_size = 0 # 0 = auto-detect from model; set manually for OpenAI-compatible backends

Expand All @@ -223,7 +237,7 @@ format = "markdown" # "markdown" or "json"
keep_recording = true # false = auto-delete WAV after transcription
```

**Precedence:** CLI flags > environment variables (`HF_TOKEN`, `OLLAMA_HOST`) > config file > defaults.
**Precedence:** CLI flags > environment variables (`HF_TOKEN`, `OLLAMA_HOST`, `OPENAI_API_KEY`) > config file > defaults.

## Summarization Templates

Expand Down
4 changes: 4 additions & 0 deletions src/ownscribe/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
backend = "local" # "local" (built-in, no server needed), "ollama", or "openai"
model = "phi-4-mini" # local: "phi-4-mini", path to GGUF, or hf:owner/repo/file.gguf; ollama/openai: model name
# host = "http://localhost:11434" # only for ollama/openai backends
# api_key = "" # only for openai backend; required by servers like oMLX (or set OPENAI_API_KEY)
# template = "meeting" # built-in: "meeting", "lecture", or "brief"
# context_size = 0 # 0 = auto-detect from model; set manually for OpenAI-compatible backends

Expand Down Expand Up @@ -87,6 +88,7 @@ class SummarizationConfig:
backend: str = "local"
model: str = "phi-4-mini"
host: str = "http://localhost:11434"
api_key: str = ""
template: str = ""
context_size: int = 0

Expand Down Expand Up @@ -132,6 +134,8 @@ def load(cls) -> Config:
config.diarization.hf_token = hf_token
if ollama_host := os.environ.get("OLLAMA_HOST"):
config.summarization.host = ollama_host
if api_key := os.environ.get("OPENAI_API_KEY"):
config.summarization.api_key = api_key

return config

Expand Down
7 changes: 5 additions & 2 deletions src/ownscribe/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,15 +508,18 @@ def _do_transcribe_and_summarize(


_AUDIO_EXTENSIONS = {".wav", ".mp3", ".m4a", ".flac", ".ogg", ".webm"}
# Video containers whose audio track ffmpeg/WhisperX can decode (e.g. meeting recordings).
_VIDEO_EXTENSIONS = {".mp4", ".m4v", ".mov", ".mkv"}
_MEDIA_EXTENSIONS = _AUDIO_EXTENSIONS | _VIDEO_EXTENSIONS


def _find_audio(directory: Path) -> Path | None:
"""Find an audio file in directory, preferring 'recording.wav'."""
"""Find an audio or video file in directory, preferring 'recording.wav'."""
recording = directory / "recording.wav"
if recording.exists():
return recording
for f in directory.iterdir():
if f.is_file() and f.suffix.lower() in _AUDIO_EXTENSIONS:
if f.is_file() and f.suffix.lower() in _MEDIA_EXTENSIONS:
return f
return None

Expand Down
6 changes: 4 additions & 2 deletions src/ownscribe/summarization/openai_summarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,13 @@ class OpenAISummarizer(Summarizer):
def __init__(self, config: SummarizationConfig, templates: dict | None = None) -> None:
self._config = config
self._templates = templates or {}
# For local servers, no API key needed — use a dummy
base_url = config.host
if not base_url.endswith("/v1"):
base_url = base_url.rstrip("/") + "/v1"
self._client = openai.OpenAI(base_url=base_url, api_key="not-needed")
# Most local servers ignore the key; servers that require auth (e.g. oMLX)
# read it from config.api_key or the OPENAI_API_KEY env var.
api_key = config.api_key or "not-needed"
self._client = openai.OpenAI(base_url=base_url, api_key=api_key)

def chat(
self, system_prompt: str, user_prompt: str,
Expand Down
36 changes: 36 additions & 0 deletions tests/test_find_audio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""Tests for media-file discovery in _find_audio (audio + video containers)."""

from __future__ import annotations

from ownscribe.pipeline import _find_audio


def _touch(directory, *names):
for name in names:
(directory / name).write_bytes(b"")


class TestFindAudioVideo:
def test_finds_mp4_when_no_recording_wav(self, tmp_path):
_touch(tmp_path, "meeting.mp4")
assert _find_audio(tmp_path) == tmp_path / "meeting.mp4"

def test_finds_mov(self, tmp_path):
_touch(tmp_path, "meeting.mov")
assert _find_audio(tmp_path) == tmp_path / "meeting.mov"

def test_finds_mkv(self, tmp_path):
_touch(tmp_path, "meeting.mkv")
assert _find_audio(tmp_path) == tmp_path / "meeting.mkv"

def test_prefers_recording_wav_over_video(self, tmp_path):
_touch(tmp_path, "meeting.mp4", "recording.wav")
assert _find_audio(tmp_path) == tmp_path / "recording.wav"

def test_case_insensitive_extension(self, tmp_path):
_touch(tmp_path, "Meeting.MP4")
assert _find_audio(tmp_path) == tmp_path / "Meeting.MP4"

def test_ignores_unrelated_files(self, tmp_path):
_touch(tmp_path, "notes.txt", "slides.pdf")
assert _find_audio(tmp_path) is None
Loading