diff --git a/docs/concepts/data-attributes.mdx b/docs/concepts/data-attributes.mdx
index ed1caaff1..2ee08dd49 100644
--- a/docs/concepts/data-attributes.mdx
+++ b/docs/concepts/data-attributes.mdx
@@ -20,6 +20,35 @@ Hyperframes uses HTML data attributes to control timing, media playback, and [co
| `data-media-start` | `"2"` | Media playback offset / trim point in seconds. Default: `0` |
| `data-volume` | `"0.8"` | Audio/video volume, 0 to 1 |
| `data-has-audio` | `"true"` | Indicates video has an audio track |
+| `data-role` | `"voice"` | Marks narration or dialogue that should trigger ducking |
+| `data-duck` | `"-12dB"` | Lowers this track while audible `data-role="voice"` clips overlap it |
+| `data-duck-fade` | `"0.3"` | Ducking ramp duration in seconds. Default: `0.3` |
+
+## Music Ducking
+
+Use `data-duck` on background music and `data-role="voice"` on narration or dialogue. Hyperframes computes the overlap windows from clip timing and renders the result through the same volume automation path used for authored fades.
+
+```html index.html
+
+
+```
+
+`data-duck` accepts dB values (`"-12dB"` or `"-12"`) and linear gains (`"0.25"`). `data-duck-fade` controls the ramp into and out of the ducked level. Runtime-triggered audio that is not represented as timed `` or audible `` clips is not part of the compile-time duck calculation.
## Composition Attributes
diff --git a/docs/docs.json b/docs/docs.json
index c8a1caac6..5c4b2566f 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -85,6 +85,7 @@
"guides/hyperframes-vs-remotion",
"guides/gsap-animation",
"guides/keyframes",
+ "guides/music-voiceover",
"guides/rendering",
"guides/remove-background",
"guides/hdr",
diff --git a/docs/guides/music-voiceover.mdx b/docs/guides/music-voiceover.mdx
new file mode 100644
index 000000000..41b891c77
--- /dev/null
+++ b/docs/guides/music-voiceover.mdx
@@ -0,0 +1,32 @@
+---
+title: Music + Voiceover
+description: "Balance background music under narration with declarative audio ducking."
+---
+
+Use `data-duck` when a music bed should automatically drop under narration or dialogue. Mark the narration with `data-role="voice"` and put the duck amount on the music track.
+
+```html index.html
+
+
+
+```
+
+`data-duck="-12dB"` lowers the music by 12 dB while the voice clip is audible. `data-duck-fade="0.3"` ramps into the lower level before the voice starts and ramps back out after it ends. If several voice clips are close together, short gaps are merged so the music does not pump between words.
+
+The ducking pass uses resolved clip timing, so it works best for timed `` clips and audible `` clips. Audio started imperatively from script is outside this compile-time calculation.
diff --git a/docs/reference/html-schema.mdx b/docs/reference/html-schema.mdx
index dd6d0e70c..feee87073 100644
--- a/docs/reference/html-schema.mdx
+++ b/docs/reference/html-schema.mdx
@@ -56,6 +56,9 @@ Common sizes:
| `data-track-index` | All | Yes | Timeline track number. Controls z-ordering (higher = in front). Clips on the same track cannot overlap. |
| `data-media-start` | video, audio | No | Playback offset / trim point in source file (seconds). Default: `0`. See [Data Attributes](/concepts/data-attributes). |
| `data-volume` | audio, video | No | Volume level from `0` to `1`. Default: `1`. |
+| `data-role` | audio, video | No | Semantic audio role. Set `data-role="voice"` on narration or dialogue tracks that should trigger music ducking. |
+| `data-duck` | audio, video | No | Duck this track while any audible `data-role="voice"` clip overlaps it. Accepts dB (`"-12dB"` or `"-12"`) or a linear gain (`"0.25"`). |
+| `data-duck-fade` | audio, video | No | Ducking ramp duration in seconds. Default: `0.3`. |
| `data-composition-id` | div | On compositions | Unique composition ID. Must match the key used in `window.__timelines`. |
| `data-composition-src` | div | No | Path to external composition HTML file (for [nested compositions](#composition-clips)). |
| `data-variable-values` | div | No | JSON object of values passed to a nested composition. The framework carries the values through, but your composition script must read and apply them manually. |
@@ -129,8 +132,30 @@ Common sizes:
- `data-duration` is **optional** — defaults to the remaining duration of the source file from `data-media-start`
- Audio clips are invisible — do not add `class="clip"` (there is nothing to show/hide)
- `data-volume` controls volume — use `"0.5"` for background music at 50% volume
+ - `data-duck` lowers background music under clips marked `data-role="voice"`
- `data-media-start` trims the beginning of the audio source, just like video
- Multiple audio clips can overlap on different tracks for layered sound design
+
+ ```html
+
+
+ ```
diff --git a/packages/core/src/compiler/htmlCompiler.ts b/packages/core/src/compiler/htmlCompiler.ts
index 264f4ec11..113670047 100644
--- a/packages/core/src/compiler/htmlCompiler.ts
+++ b/packages/core/src/compiler/htmlCompiler.ts
@@ -1,6 +1,7 @@
import { resolve } from "path";
import {
compileTimingAttrs,
+ compileAudioDucking,
injectDurations,
extractResolvedMedia,
clampDurations,
@@ -37,7 +38,7 @@ export async function compileHtml(
const { html: staticCompiled, unresolved } = compileTimingAttrs(rawHtml);
let html = staticCompiled;
- if (!probeMediaDuration) return html;
+ if (!probeMediaDuration) return compileAudioDucking(html);
// Phase 1: Resolve missing durations
const mediaUnresolved = unresolved.filter(
@@ -86,5 +87,5 @@ export async function compileHtml(
html = clampDurations(html, clampList);
}
- return html;
+ return compileAudioDucking(html);
}
diff --git a/packages/core/src/compiler/index.ts b/packages/core/src/compiler/index.ts
index d18803c92..5ad5b9c1f 100644
--- a/packages/core/src/compiler/index.ts
+++ b/packages/core/src/compiler/index.ts
@@ -1,6 +1,7 @@
// Timing compiler (browser-safe)
export {
compileTimingAttrs,
+ compileAudioDucking,
injectDurations,
extractResolvedMedia,
clampDurations,
diff --git a/packages/core/src/compiler/timingCompiler.test.ts b/packages/core/src/compiler/timingCompiler.test.ts
index b7dcf044c..2156d8b4a 100644
--- a/packages/core/src/compiler/timingCompiler.test.ts
+++ b/packages/core/src/compiler/timingCompiler.test.ts
@@ -1,10 +1,19 @@
import { describe, it, expect } from "vitest";
import {
compileTimingAttrs,
+ compileAudioDucking,
injectDurations,
extractResolvedMedia,
clampDurations,
} from "./timingCompiler.js";
+import { parseVolumeKeyframesAttribute } from "../runtime/mediaVolumeEnvelope.js";
+
+function duckKeyframesFor(html: string, id: string) {
+ const match = html.match(new RegExp(`]*id=["']${id}["'][^>]*>`, "i"));
+ if (!match) throw new Error(`Missing audio tag ${id}`);
+ const attr = match[0].match(/data-hf-duck-keyframes='([^']+)'/);
+ return parseVolumeKeyframesAttribute(attr?.[1]);
+}
describe("compileTimingAttrs", () => {
it("adds data-end when data-start and data-duration are present on a video", () => {
@@ -185,3 +194,53 @@ describe("clampDurations", () => {
expect(result).toContain('data-end="7"');
});
});
+
+describe("compileAudioDucking", () => {
+ it("emits duck keyframes around voice overlaps", () => {
+ const html = compileTimingAttrs(`
+
+
+ `).html;
+
+ const keyframes = duckKeyframesFor(compileAudioDucking(html), "music");
+
+ expect(keyframes.map((kf) => kf.time)).toEqual([1.5, 2, 4, 4.5]);
+ expect(keyframes[0]?.volume).toBe(0.8);
+ expect(keyframes[1]?.volume).toBeCloseTo(0.20095, 5);
+ expect(keyframes[2]?.volume).toBeCloseTo(0.20095, 5);
+ expect(keyframes[3]?.volume).toBe(0.8);
+ });
+
+ it("merges voice gaps shorter than two fades", () => {
+ const html = compileTimingAttrs(`
+
+
+
+ `).html;
+
+ const keyframes = duckKeyframesFor(compileAudioDucking(html), "music");
+
+ expect(keyframes).toEqual([
+ { time: 0.75, volume: 1 },
+ { time: 1, volume: 0.25 },
+ { time: 3, volume: 0.25 },
+ { time: 3.25, volume: 1 },
+ ]);
+ });
+
+ it("uses resolved timeline duration for playback-rate voice clips", () => {
+ const html = compileTimingAttrs(`
+
+
+ `).html;
+
+ const keyframes = duckKeyframesFor(compileAudioDucking(html), "music");
+
+ expect(keyframes).toEqual([
+ { time: 0.5, volume: 1 },
+ { time: 1, volume: 0.5 },
+ { time: 7, volume: 0.5 },
+ { time: 7.5, volume: 1 },
+ ]);
+ });
+});
diff --git a/packages/core/src/compiler/timingCompiler.ts b/packages/core/src/compiler/timingCompiler.ts
index 169c37b29..047de7de7 100644
--- a/packages/core/src/compiler/timingCompiler.ts
+++ b/packages/core/src/compiler/timingCompiler.ts
@@ -50,6 +50,23 @@ export interface CompilationResult {
// ffprobe precision can differ slightly across local and CI media stacks.
const MEDIA_DURATION_CLAMP_EPSILON_SECONDS = 0.05;
+const DUCK_KEYFRAMES_ATTR = "data-hf-duck-keyframes";
+const DEFAULT_DUCK_FADE_SECONDS = 0.3;
+
+interface Interval {
+ start: number;
+ end: number;
+}
+
+interface MediaTimingClip extends Interval {
+ id: string;
+ volume: number;
+ muted: boolean;
+ hasAudio: boolean;
+ role: string | null;
+ duckGain: number | null;
+ duckFade: number;
+}
export function shouldClampMediaDuration(declaredDuration: number, maxDuration: number): boolean {
return declaredDuration > maxDuration + MEDIA_DURATION_CLAMP_EPSILON_SECONDS;
@@ -58,8 +75,8 @@ export function shouldClampMediaDuration(declaredDuration: number, maxDuration:
// ── Helpers ──────────────────────────────────────────────────────────────
function getAttr(tag: string, attr: string): string | null {
- const match = tag.match(new RegExp(`${attr}=["']([^"']+)["']`));
- return match ? (match[1] ?? null) : null;
+ const match = tag.match(new RegExp(`(?:^|\\s)${attr}\\s*=\\s*(["'])(.*?)\\1`, "i"));
+ return match ? (match[2] ?? null) : null;
}
function hasAttr(tag: string, attr: string): boolean {
@@ -70,6 +87,160 @@ function injectAttr(tag: string, attr: string, value: string): string {
return tag.replace(/>$/, ` ${attr}="${value}">`);
}
+function setAttr(tag: string, attr: string, value: string): string {
+ const serialized = value.replace(/'/g, "'");
+ const pattern = new RegExp(`(\\s${attr}\\s*=\\s*)(["'])(.*?)\\2`, "i");
+ if (pattern.test(tag)) {
+ return tag.replace(pattern, (_match, prefix: string) => `${prefix}'${serialized}'`);
+ }
+ return tag.replace(/>$/, ` ${attr}='${serialized}'>`);
+}
+
+function removeAttr(tag: string, attr: string): string {
+ return tag.replace(new RegExp(`\\s${attr}(?:\\s*=\\s*(["']).*?\\1)?`, "i"), "");
+}
+
+function parseFiniteNumber(raw: string | null): number | null {
+ if (raw === null) return null;
+ const parsed = Number.parseFloat(raw);
+ return Number.isFinite(parsed) ? parsed : null;
+}
+
+function parseSeconds(raw: string | null, fallback: number): number {
+ if (raw === null) return fallback;
+ const value = raw.trim().toLowerCase().endsWith("s") ? raw.trim().slice(0, -1) : raw;
+ const parsed = Number.parseFloat(value);
+ return Number.isFinite(parsed) && parsed >= 0 ? parsed : fallback;
+}
+
+function clampVolume(volume: number): number {
+ if (!Number.isFinite(volume)) return 1;
+ return Math.max(0, Math.min(1, volume));
+}
+
+function parseDuckGain(raw: string | null): number | null {
+ if (raw === null) return null;
+ const trimmed = raw.trim().toLowerCase();
+ if (!trimmed) return null;
+ if (trimmed.endsWith("db")) {
+ const db = Number.parseFloat(trimmed.slice(0, -2));
+ return Number.isFinite(db) ? Math.pow(10, db / 20) : null;
+ }
+ const value = Number.parseFloat(trimmed);
+ if (!Number.isFinite(value)) return null;
+ return value < 0 ? Math.pow(10, value / 20) : value;
+}
+
+function clipEndFromAttrs(tag: string, start: number): number | null {
+ const end = parseFiniteNumber(getAttr(tag, "data-end"));
+ if (end !== null) return end;
+ const duration = parseFiniteNumber(getAttr(tag, "data-duration"));
+ return duration !== null ? start + duration : null;
+}
+
+function readMediaTimingClip(tag: string): MediaTimingClip | null {
+ const id = getAttr(tag, "id");
+ if (!id) return null;
+ const start = parseFiniteNumber(getAttr(tag, "data-start"));
+ if (start === null) return null;
+ const end = clipEndFromAttrs(tag, start);
+ if (end === null || end <= start) return null;
+
+ const tagMatch = tag.match(/^<(audio|video)\b/i);
+ const tagName = tagMatch?.[1]?.toLowerCase();
+ if (tagName !== "audio" && tagName !== "video") return null;
+
+ const volume = clampVolume(parseFiniteNumber(getAttr(tag, "data-volume")) ?? 1);
+ const muted = hasAttr(tag, "muted");
+ const hasAudio = tagName === "audio" || getAttr(tag, "data-has-audio") === "true";
+ const role = getAttr(tag, "data-role")?.trim().toLowerCase() ?? null;
+ const duckGain = parseDuckGain(getAttr(tag, "data-duck"));
+
+ return {
+ id,
+ start,
+ end,
+ volume,
+ muted,
+ hasAudio,
+ role,
+ duckGain,
+ duckFade: parseSeconds(getAttr(tag, "data-duck-fade"), DEFAULT_DUCK_FADE_SECONDS),
+ };
+}
+
+function isAudibleClip(clip: MediaTimingClip): boolean {
+ return clip.hasAudio && !clip.muted && clip.volume > 0;
+}
+
+function mergeIntervals(intervals: Interval[], maxGap: number): Interval[] {
+ const sorted = intervals
+ .filter((interval) => interval.end > interval.start)
+ .sort((a, b) => a.start - b.start);
+ const merged: Interval[] = [];
+ for (const interval of sorted) {
+ const previous = merged.at(-1);
+ if (previous && interval.start <= previous.end + maxGap) {
+ previous.end = Math.max(previous.end, interval.end);
+ } else {
+ merged.push({ ...interval });
+ }
+ }
+ return merged;
+}
+
+function intersectIntervals(track: Interval, intervals: Interval[]): Interval[] {
+ const overlaps: Interval[] = [];
+ for (const interval of intervals) {
+ const start = Math.max(track.start, interval.start);
+ const end = Math.min(track.end, interval.end);
+ if (end > start) overlaps.push({ start, end });
+ }
+ return overlaps;
+}
+
+function roundedKeyframe(time: number, volume: number): { time: number; volume: number } {
+ return {
+ time: Number(time.toFixed(6)),
+ volume: Number(clampVolume(volume).toFixed(6)),
+ };
+}
+
+function createDuckKeyframes(
+ track: MediaTimingClip,
+ voiceIntervals: Interval[],
+): { time: number; volume: number }[] {
+ if (track.duckGain === null) return [];
+ const duckVolume = clampVolume(track.volume * track.duckGain);
+ if (duckVolume >= track.volume - 0.000001) return [];
+
+ const overlaps = mergeIntervals(intersectIntervals(track, voiceIntervals), track.duckFade * 2);
+ const keyframes: { time: number; volume: number }[] = [];
+ const add = (time: number, volume: number) => {
+ const point = roundedKeyframe(time, volume);
+ const previous = keyframes.at(-1);
+ if (previous && Math.abs(previous.time - point.time) < 0.000001) {
+ previous.volume = point.volume;
+ } else {
+ keyframes.push(point);
+ }
+ };
+
+ for (const overlap of overlaps) {
+ const duration = overlap.end - overlap.start;
+ const fade = Math.min(track.duckFade, duration / 2);
+ const rampStart = Math.max(track.start, overlap.start - fade);
+ const rampEnd = Math.min(track.end, overlap.end + fade);
+
+ if (rampStart < overlap.start) add(rampStart, track.volume);
+ add(overlap.start, duckVolume);
+ add(overlap.end, duckVolume);
+ if (rampEnd > overlap.end) add(rampEnd, track.volume);
+ }
+
+ return keyframes;
+}
+
// ── Core compilation ─────────────────────────────────────────────────────
function compileTag(
@@ -177,6 +348,42 @@ export function compileTimingAttrs(html: string): CompilationResult {
return { html, unresolved };
}
+/**
+ * Compile declarative audio ducking into an internal volume multiplier envelope.
+ *
+ * `data-duck` stays authored source-of-truth. The generated
+ * `data-hf-duck-keyframes` attribute is intentionally internal so repeated
+ * compilation can replace it without multiplying the duck curve twice.
+ */
+export function compileAudioDucking(html: string): string {
+ const mediaTags = Array.from(html.matchAll(/<(?:audio|video)\b[^>]*>/gi), (match) => match[0]);
+ const clips = mediaTags
+ .map((tag) => readMediaTimingClip(tag))
+ .filter((clip): clip is MediaTimingClip => clip !== null);
+ const voiceIntervals = mergeIntervals(
+ clips
+ .filter((clip) => clip.role === "voice" && isAudibleClip(clip))
+ .map((clip) => ({ start: clip.start, end: clip.end })),
+ 0,
+ );
+
+ const duckKeyframesById = new Map();
+ for (const clip of clips) {
+ if (clip.duckGain === null || !isAudibleClip(clip)) continue;
+ const keyframes = createDuckKeyframes(clip, voiceIntervals);
+ if (keyframes.length > 0) duckKeyframesById.set(clip.id, keyframes);
+ }
+
+ return html.replace(/<(?:audio|video)\b[^>]*>/gi, (tag) => {
+ const id = getAttr(tag, "id");
+ const keyframes = id ? duckKeyframesById.get(id) : undefined;
+ if (keyframes && keyframes.length > 0) {
+ return setAttr(tag, DUCK_KEYFRAMES_ATTR, JSON.stringify(keyframes));
+ }
+ return hasAttr(tag, DUCK_KEYFRAMES_ATTR) ? removeAttr(tag, DUCK_KEYFRAMES_ATTR) : tag;
+ });
+}
+
/**
* Inject resolved durations into compiled HTML.
*
diff --git a/packages/core/src/index.test.ts b/packages/core/src/index.test.ts
index 55ab677e0..1e703e4dc 100644
--- a/packages/core/src/index.test.ts
+++ b/packages/core/src/index.test.ts
@@ -128,6 +128,7 @@ describe("@hyperframes/core public API exports", () => {
describe("compiler exports", () => {
it("exports compiler functions", () => {
expect(typeof core.compileTimingAttrs).toBe("function");
+ expect(typeof core.compileAudioDucking).toBe("function");
expect(typeof core.injectDurations).toBe("function");
expect(typeof core.extractResolvedMedia).toBe("function");
expect(typeof core.clampDurations).toBe("function");
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index 747e32abf..4187410b9 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -115,6 +115,7 @@ export type {
export {
compileTimingAttrs,
+ compileAudioDucking,
injectDurations,
extractResolvedMedia,
clampDurations,
diff --git a/packages/core/src/runtime/mediaVolumeEnvelope.ts b/packages/core/src/runtime/mediaVolumeEnvelope.ts
index 69d55ea0c..ac6680ef1 100644
--- a/packages/core/src/runtime/mediaVolumeEnvelope.ts
+++ b/packages/core/src/runtime/mediaVolumeEnvelope.ts
@@ -12,6 +12,133 @@ export interface VolumeKeyframe {
volume: number;
}
+function clampVolume(volume: number): number {
+ if (!Number.isFinite(volume)) return 1;
+ return Math.max(0, Math.min(1, volume));
+}
+
+function readNumberProperty(value: object, key: "time" | "volume"): number | null {
+ const descriptor = Object.getOwnPropertyDescriptor(value, key);
+ return typeof descriptor?.value === "number" && Number.isFinite(descriptor.value)
+ ? descriptor.value
+ : null;
+}
+
+function roundedPoint(time: number, volume: number): VolumeKeyframe {
+ return {
+ time: Number(time.toFixed(6)),
+ volume: Number(clampVolume(volume).toFixed(6)),
+ };
+}
+
+function compactVolumeKeyframes(keyframes: VolumeKeyframe[]): VolumeKeyframe[] {
+ const sorted = keyframes
+ .filter((kf) => Number.isFinite(kf.time) && Number.isFinite(kf.volume))
+ .map((kf) => roundedPoint(kf.time, kf.volume))
+ .sort((a, b) => a.time - b.time);
+
+ const deduped: VolumeKeyframe[] = [];
+ for (const point of sorted) {
+ const previous = deduped.at(-1);
+ if (previous && Math.abs(previous.time - point.time) < 0.000001) {
+ previous.volume = point.volume;
+ } else {
+ deduped.push(point);
+ }
+ }
+
+ if (deduped.length < 3) return deduped;
+
+ const compacted: VolumeKeyframe[] = [];
+ for (const point of deduped) {
+ compacted.push(point);
+ while (compacted.length >= 3) {
+ const c = compacted.at(-1)!;
+ const b = compacted.at(-2)!;
+ const a = compacted.at(-3)!;
+ const span = c.time - a.time;
+ const expected =
+ span <= 0 ? a.volume : a.volume + ((c.volume - a.volume) * (b.time - a.time)) / span;
+ if (Math.abs(expected - b.volume) > 0.000001) break;
+ compacted.splice(compacted.length - 2, 1);
+ }
+ }
+ return compacted;
+}
+
+export function parseVolumeKeyframesAttribute(raw: string | null | undefined): VolumeKeyframe[] {
+ if (!raw) return [];
+
+ let parsed: unknown;
+ try {
+ parsed = JSON.parse(raw);
+ } catch {
+ return [];
+ }
+
+ if (!Array.isArray(parsed)) return [];
+
+ const keyframes: VolumeKeyframe[] = [];
+ for (const item of parsed) {
+ if (typeof item !== "object" || item === null) continue;
+ const time = readNumberProperty(item, "time");
+ const volume = readNumberProperty(item, "volume");
+ if (time === null || volume === null) continue;
+ keyframes.push({ time, volume });
+ }
+ return compactVolumeKeyframes(keyframes);
+}
+
+export function serializeVolumeKeyframesAttribute(keyframes: VolumeKeyframe[]): string {
+ return JSON.stringify(compactVolumeKeyframes(keyframes));
+}
+
+export function multiplyVolumeKeyframeEnvelopes(params: {
+ sourceKeyframes: VolumeKeyframe[];
+ multiplierKeyframes: VolumeKeyframe[];
+ trackStart: number;
+ trackEnd: number;
+ baseVolume: number;
+}): VolumeKeyframe[] {
+ const multiplier = compactVolumeKeyframes(params.multiplierKeyframes);
+ if (multiplier.length === 0) return compactVolumeKeyframes(params.sourceKeyframes);
+
+ const source = compactVolumeKeyframes(params.sourceKeyframes);
+ if (source.length === 0) return multiplier;
+
+ const baseVolume = clampVolume(params.baseVolume);
+ const sourceEnvelope = normaliseEnvelope(source, params.trackStart, baseVolume);
+ const multiplierEnvelope = normaliseEnvelope(multiplier, params.trackStart, baseVolume);
+ if (sourceEnvelope.length === 0) return multiplier;
+ if (multiplierEnvelope.length === 0) return source;
+
+ const duration = Math.max(0, params.trackEnd - params.trackStart);
+ const times = new Set([0, duration]);
+ for (const point of sourceEnvelope) {
+ if (point.time >= 0 && point.time <= duration) times.add(point.time);
+ }
+ for (const point of multiplierEnvelope) {
+ if (point.time >= 0 && point.time <= duration) times.add(point.time);
+ }
+
+ const sortedTimes = Array.from(times).sort((a, b) => a - b);
+ const sampleTimes = new Set(sortedTimes);
+ for (let i = 0; i < sortedTimes.length - 1; i += 1) {
+ const a = sortedTimes[i]!;
+ const b = sortedTimes[i + 1]!;
+ if (b > a) sampleTimes.add((a + b) / 2);
+ }
+
+ const combined: VolumeKeyframe[] = [];
+ for (const relativeTime of Array.from(sampleTimes).sort((a, b) => a - b)) {
+ const sourceVolume = interpolateVolumeGain(sourceEnvelope, relativeTime);
+ const multiplierVolume = interpolateVolumeGain(multiplierEnvelope, relativeTime);
+ const multiplierGain = baseVolume > 0 ? multiplierVolume / baseVolume : 0;
+ combined.push(roundedPoint(params.trackStart + relativeTime, sourceVolume * multiplierGain));
+ }
+ return compactVolumeKeyframes(combined);
+}
+
/**
* Normalise raw keyframes to track-relative seconds: subtract `trackStart`,
* clamp to [0,1], sort, de-duplicate, and prepend a `baseVolume` anchor at
@@ -29,7 +156,7 @@ export function normaliseEnvelope(
.filter((k) => Number.isFinite(k.time) && Number.isFinite(k.volume))
.map((k) => ({
time: Math.max(0, k.time - trackStart),
- volume: Math.max(0, Math.min(1, k.volume)),
+ volume: clampVolume(k.volume),
}))
.sort((a, b) => a.time - b.time);
@@ -45,7 +172,7 @@ export function normaliseEnvelope(
if (deduped.length === 0) return deduped;
if (deduped[0]!.time > 0) {
- deduped.unshift({ time: 0, volume: Math.max(0, Math.min(1, baseVolume)) });
+ deduped.unshift({ time: 0, volume: clampVolume(baseVolume) });
}
return deduped;
}
@@ -98,7 +225,7 @@ export function probeElementVolumeKeyframes(
: compositionDuration;
const staticAttr = Number.parseFloat(el.dataset.volume ?? "");
- const staticVolume = Number.isFinite(staticAttr) ? Math.max(0, Math.min(1, staticAttr)) : 1;
+ const staticVolume = Number.isFinite(staticAttr) ? clampVolume(staticAttr) : 1;
// Reset to data-volume so GSAP captures the correct FROM value.
el.volume = staticVolume;
@@ -113,7 +240,7 @@ export function probeElementVolumeKeyframes(
seekTimeline(bounded);
const raw = Number(el.volume);
if (!Number.isFinite(raw)) continue;
- const volume = Math.max(0, Math.min(1, raw));
+ const volume = clampVolume(raw);
const last = keyframes.at(-1);
if (!last || Math.abs(last.volume - volume) > 0.0001 || bounded === sampleEnd) {
keyframes.push({ time: Number(bounded.toFixed(6)), volume: Number(volume.toFixed(6)) });
@@ -140,11 +267,24 @@ export function probeAndCacheElementVolume(
compositionDuration: number,
cache: WeakMap,
): void {
- if (!timeline) return;
if (!(mediaEl instanceof HTMLAudioElement) && !(mediaEl instanceof HTMLVideoElement)) return;
- if (compositionDuration <= 0) return;
+
+ const start = Number.parseFloat(mediaEl.dataset.start ?? "0") || 0;
+ const endAttr = Number.parseFloat(mediaEl.dataset.end ?? "");
+ const durationAttr = Number.parseFloat(mediaEl.dataset.duration ?? "");
+ const end =
+ Number.isFinite(endAttr) && endAttr > start
+ ? endAttr
+ : Number.isFinite(durationAttr) && durationAttr > 0
+ ? start + durationAttr
+ : compositionDuration;
+ const staticAttr = Number.parseFloat(mediaEl.dataset.volume ?? "");
+ const staticVolume = Number.isFinite(staticAttr) ? clampVolume(staticAttr) : 1;
+ const declarativeKeyframes = parseVolumeKeyframesAttribute(mediaEl.dataset.volumeKeyframes);
+ const duckKeyframes = parseVolumeKeyframesAttribute(mediaEl.dataset.hfDuckKeyframes);
const seekFn = (t: number) => {
+ if (!timeline) return;
try {
if (typeof timeline.totalTime === "function") {
timeline.totalTime(t, true);
@@ -156,8 +296,20 @@ export function probeAndCacheElementVolume(
}
};
- const keyframes = probeElementVolumeKeyframes(mediaEl, seekFn, compositionDuration, 60);
- if (keyframes) {
- cache.set(mediaEl, keyframes);
+ const probedKeyframes =
+ timeline && compositionDuration > 0
+ ? probeElementVolumeKeyframes(mediaEl, seekFn, compositionDuration, 60)
+ : null;
+
+ let keyframes = probedKeyframes ?? declarativeKeyframes;
+ if (duckKeyframes.length > 0) {
+ keyframes = multiplyVolumeKeyframeEnvelopes({
+ sourceKeyframes: keyframes,
+ multiplierKeyframes: duckKeyframes,
+ trackStart: start,
+ trackEnd: end,
+ baseVolume: staticVolume,
+ });
}
+ if (keyframes.length > 0) cache.set(mediaEl, keyframes);
}
diff --git a/packages/engine/src/services/audioElementParser.ts b/packages/engine/src/services/audioElementParser.ts
new file mode 100644
index 000000000..9db710d11
--- /dev/null
+++ b/packages/engine/src/services/audioElementParser.ts
@@ -0,0 +1,69 @@
+import { parseHTML } from "linkedom";
+import {
+ multiplyVolumeKeyframeEnvelopes,
+ parseVolumeKeyframesAttribute,
+} from "@hyperframes/core/media-volume-envelope";
+import { unwrapTemplate } from "../utils/htmlTemplate.js";
+import type { AudioElement } from "./audioMixer.types.js";
+
+function parseElementVolumeKeyframes(
+ el: Element,
+ start: number,
+ end: number,
+ baseVolume: number,
+): AudioElement["volumeKeyframes"] {
+ const authored = parseVolumeKeyframesAttribute(el.getAttribute("data-volume-keyframes"));
+ const duck = parseVolumeKeyframesAttribute(el.getAttribute("data-hf-duck-keyframes"));
+ if (duck.length === 0) return authored.length > 0 ? authored : undefined;
+ return multiplyVolumeKeyframeEnvelopes({
+ sourceKeyframes: authored,
+ multiplierKeyframes: duck,
+ trackStart: start,
+ trackEnd: end,
+ baseVolume,
+ });
+}
+
+function readAudioElement(el: Element, type: "audio" | "video"): AudioElement | null {
+ const id = el.getAttribute("id");
+ const src = el.getAttribute("src");
+ if (!id || !src) return null;
+
+ const startAttr = el.getAttribute("data-start");
+ const endAttr = el.getAttribute("data-end");
+ const mediaStartAttr = el.getAttribute("data-media-start");
+ const layerAttr = el.getAttribute("data-layer");
+ const volumeAttr = el.getAttribute("data-volume");
+ const start = startAttr ? parseFloat(startAttr) : 0;
+ const end = endAttr ? parseFloat(endAttr) : 0;
+ const volume = volumeAttr ? parseFloat(volumeAttr) : 1.0;
+
+ return {
+ id: type === "video" ? `${id}-audio` : id,
+ src,
+ start,
+ end,
+ mediaStart: mediaStartAttr ? parseFloat(mediaStartAttr) : 0,
+ layer: layerAttr ? parseInt(layerAttr) : 0,
+ volume,
+ volumeKeyframes: parseElementVolumeKeyframes(el, start, end, volume),
+ type,
+ };
+}
+
+export function parseAudioElements(html: string): AudioElement[] {
+ const elements: AudioElement[] = [];
+ const { document } = parseHTML(unwrapTemplate(html));
+
+ for (const el of document.querySelectorAll("audio[id][src]")) {
+ const audio = readAudioElement(el, "audio");
+ if (audio) elements.push(audio);
+ }
+
+ for (const el of document.querySelectorAll('video[id][src][data-has-audio="true"]')) {
+ const audio = readAudioElement(el, "video");
+ if (audio) elements.push(audio);
+ }
+
+ return elements;
+}
diff --git a/packages/engine/src/services/audioMixer.test.ts b/packages/engine/src/services/audioMixer.test.ts
index 6f6eb4756..4310d92d2 100644
--- a/packages/engine/src/services/audioMixer.test.ts
+++ b/packages/engine/src/services/audioMixer.test.ts
@@ -16,7 +16,7 @@ vi.mock("../utils/runFfmpeg.js", () => ({
runFfmpeg: runFfmpegMock,
}));
-import { processCompositionAudio } from "./audioMixer.js";
+import { parseAudioElements, processCompositionAudio } from "./audioMixer.js";
describe("processCompositionAudio", () => {
const tempDirs: string[] = [];
@@ -28,6 +28,44 @@ describe("processCompositionAudio", () => {
}
});
+ it("parses compiled duck keyframes as volume automation", () => {
+ const elements = parseAudioElements(`
+
+ `);
+
+ expect(elements).toHaveLength(1);
+ expect(elements[0]?.volumeKeyframes).toEqual([
+ { time: 1, volume: 0.2 },
+ { time: 3, volume: 0.2 },
+ { time: 3.5, volume: 0.8 },
+ ]);
+ });
+
+ it("multiplies compiled ducking over authored volume keyframes", () => {
+ const elements = parseAudioElements(`
+
+ `);
+
+ const volumes = elements[0]?.volumeKeyframes?.map((kf) => kf.volume);
+ expect(Math.min(...(volumes ?? [1]))).toBeLessThan(0.2);
+ expect(elements[0]?.volumeKeyframes?.at(-1)).toEqual({ time: 4, volume: 0.5 });
+ });
+
it("preserves muted tracks and uses unity master gain by default", async () => {
const baseDir = mkdtempSync(join(tmpdir(), "hf-audio-base-"));
const workDir = mkdtempSync(join(tmpdir(), "hf-audio-work-"));
diff --git a/packages/engine/src/services/audioMixer.ts b/packages/engine/src/services/audioMixer.ts
index b47d6171c..44a81fe80 100644
--- a/packages/engine/src/services/audioMixer.ts
+++ b/packages/engine/src/services/audioMixer.ts
@@ -6,17 +6,16 @@
import { existsSync, mkdirSync, rmSync } from "fs";
import { isAbsolute, join, dirname } from "path";
-import { parseHTML } from "linkedom";
import { extractAudioMetadata } from "../utils/ffprobe.js";
import { downloadToTemp, isHttpUrl } from "../utils/urlDownloader.js";
import { DEFAULT_CONFIG, type EngineConfig } from "../config.js";
import { runFfmpeg } from "../utils/runFfmpeg.js";
-import { unwrapTemplate } from "../utils/htmlTemplate.js";
import { resolveProjectRelativeSrc } from "./videoFrameExtractor.js";
import type { AudioElement, AudioTrack, MixResult } from "./audioMixer.types.js";
import { applyVolumeEnvelopeToWav } from "./audioVolumeEnvelope.js";
export type { AudioElement, MixResult } from "./audioMixer.types.js";
+export { parseAudioElements } from "./audioElementParser.js";
function clampVolume(volume: number): number {
if (!Number.isFinite(volume)) return 1;
@@ -169,63 +168,6 @@ interface ExtractResult {
error?: string;
}
-export function parseAudioElements(html: string): AudioElement[] {
- const elements: AudioElement[] = [];
- const { document } = parseHTML(unwrapTemplate(html));
-
- // Parse elements
- const audioEls = document.querySelectorAll("audio[id][src]");
- for (const el of audioEls) {
- const id = el.getAttribute("id");
- const src = el.getAttribute("src");
- if (!id || !src) continue;
-
- const startAttr = el.getAttribute("data-start");
- const endAttr = el.getAttribute("data-end");
- const mediaStartAttr = el.getAttribute("data-media-start");
- const layerAttr = el.getAttribute("data-layer");
- const volumeAttr = el.getAttribute("data-volume");
-
- elements.push({
- id,
- src,
- start: startAttr ? parseFloat(startAttr) : 0,
- end: endAttr ? parseFloat(endAttr) : 0,
- mediaStart: mediaStartAttr ? parseFloat(mediaStartAttr) : 0,
- layer: layerAttr ? parseInt(layerAttr) : 0,
- volume: volumeAttr ? parseFloat(volumeAttr) : 1.0,
- type: "audio",
- });
- }
-
- // Parse elements with data-has-audio="true"
- const videoEls = document.querySelectorAll('video[id][src][data-has-audio="true"]');
- for (const el of videoEls) {
- const id = el.getAttribute("id");
- const src = el.getAttribute("src");
- if (!id || !src) continue;
-
- const startAttr = el.getAttribute("data-start");
- const endAttr = el.getAttribute("data-end");
- const mediaStartAttr = el.getAttribute("data-media-start");
- const layerAttr = el.getAttribute("data-layer");
- const volumeAttr = el.getAttribute("data-volume");
-
- elements.push({
- id: `${id}-audio`,
- src,
- start: startAttr ? parseFloat(startAttr) : 0,
- end: endAttr ? parseFloat(endAttr) : 0,
- mediaStart: mediaStartAttr ? parseFloat(mediaStartAttr) : 0,
- layer: layerAttr ? parseInt(layerAttr) : 0,
- volume: volumeAttr ? parseFloat(volumeAttr) : 1.0,
- type: "video",
- });
- }
-
- return elements;
-}
-
async function extractAudioFromVideo(
videoPath: string,
outputPath: string,
diff --git a/packages/producer/src/services/htmlCompiler.test.ts b/packages/producer/src/services/htmlCompiler.test.ts
index 6b6ff7fa6..66e72769a 100644
--- a/packages/producer/src/services/htmlCompiler.test.ts
+++ b/packages/producer/src/services/htmlCompiler.test.ts
@@ -15,6 +15,7 @@ import {
localizeRemoteFontFaces,
recompileWithResolutions,
} from "./htmlCompiler.js";
+import type { AudioVolumeKeyframe } from "@hyperframes/engine";
// ── collectExternalAssets ──────────────────────────────────────────────────
@@ -523,6 +524,56 @@ describe("detectShaderTransitionUsage", () => {
});
});
+function interpolateVolume(keyframes: AudioVolumeKeyframe[], time: number): number {
+ if (keyframes.length === 0) return 1;
+ let segment = 0;
+ while (segment < keyframes.length - 2 && time >= keyframes[segment + 1]!.time) {
+ segment += 1;
+ }
+ const a = keyframes[segment]!;
+ const b = keyframes[segment + 1] ?? a;
+ const span = b.time - a.time;
+ const progress = span <= 0 ? 0 : Math.min(1, Math.max(0, (time - a.time) / span));
+ return a.volume + (b.volume - a.volume) * progress;
+}
+
+function rmsVolume(keyframes: AudioVolumeKeyframe[], start: number, end: number): number {
+ let sumSquares = 0;
+ let count = 0;
+ for (let t = start; t <= end; t += 0.05) {
+ const volume = interpolateVolume(keyframes, t);
+ sumSquares += volume * volume;
+ count += 1;
+ }
+ return Math.sqrt(sumSquares / count);
+}
+
+describe("audio ducking compilation", () => {
+ it("passes a lower render-side RMS envelope during voice windows", async () => {
+ const projectDir = mkdtempSync(join(tmpdir(), "hf-audio-duck-"));
+ writeFileSync(
+ join(projectDir, "index.html"),
+ `
+
+`,
+ );
+
+ const compiled = await compileForRender(projectDir, join(projectDir, "index.html"), projectDir);
+ const music = compiled.audios.find((audio) => audio.id === "music");
+ if (!music?.volumeKeyframes) throw new Error("Missing music duck keyframes");
+
+ const beforeVoice = rmsVolume(music.volumeKeyframes, 0.1, 0.6);
+ const duringVoice = rmsVolume(music.volumeKeyframes, 1.1, 1.9);
+ const afterVoice = rmsVolume(music.volumeKeyframes, 3.0, 3.5);
+
+ expect(duringVoice).toBeLessThan(beforeVoice * 0.35);
+ expect(duringVoice).toBeLessThan(afterVoice * 0.35);
+ });
+});
+
describe("template-wrapped sub-composition media offsets", () => {
function writeTemplateWrappedProject(
hostAttrs: string,
diff --git a/packages/producer/src/services/htmlCompiler.ts b/packages/producer/src/services/htmlCompiler.ts
index 17dc05bcb..0233e1711 100644
--- a/packages/producer/src/services/htmlCompiler.ts
+++ b/packages/producer/src/services/htmlCompiler.ts
@@ -14,6 +14,7 @@ import { join, dirname, resolve, basename } from "path";
import { parseHTML } from "linkedom";
import {
compileTimingAttrs,
+ compileAudioDucking,
injectDurations,
extractResolvedMedia,
clampDurations,
@@ -258,6 +259,8 @@ async function compileHtmlFile(
compiledHtml = clampDurations(compiledHtml, clampList);
}
+ compiledHtml = compileAudioDucking(compiledHtml);
+
// Strip crossorigin from video elements: the render pipeline replaces them with
// injected frame images, so the browser never needs to load the source.
// Without this, videos with crossorigin="anonymous" targeting CORS-restricted
@@ -1884,7 +1887,7 @@ export async function recompileWithResolutions(
): Promise {
if (resolutions.length === 0) return compiled;
- const html = injectDurations(compiled.html, resolutions);
+ const html = compileAudioDucking(injectDurations(compiled.html, resolutions));
// Re-parse sub-compositions with the updated parent bounds
const {
diff --git a/packages/producer/src/services/render/stages/probeStage.ts b/packages/producer/src/services/render/stages/probeStage.ts
index e1a0fb907..36064fde2 100644
--- a/packages/producer/src/services/render/stages/probeStage.ts
+++ b/packages/producer/src/services/render/stages/probeStage.ts
@@ -38,6 +38,7 @@ import {
initializeSession,
} from "@hyperframes/engine";
import { fpsToNumber } from "@hyperframes/core";
+import { multiplyVolumeKeyframeEnvelopes } from "@hyperframes/core/media-volume-envelope";
import type { CompiledComposition } from "../../htmlCompiler.js";
import {
discoverMediaFromBrowser,
@@ -367,9 +368,18 @@ export async function runProbeStage(input: ProbeStageInput): Promise 0
+ ? multiplyVolumeKeyframeEnvelopes({
+ sourceKeyframes: keyframes,
+ multiplierKeyframes: audio.volumeKeyframes,
+ trackStart: audio.start,
+ trackEnd: audio.end,
+ baseVolume: audio.volume ?? 1,
+ })
+ : keyframes;
log.info(`[Probe] Runtime audio volume automation: ${audio.id}`, {
- keyframeCount: keyframes.length,
+ keyframeCount: audio.volumeKeyframes.length,
});
}
}