Skip to content

Commit f18e93a

Browse files
committed
feat(audio): phase shift dynamics — measuring what amplitude misses
Phase coherence and gradient capture temporal relationships between harmonics — the HOW of sound, not just the WHAT: phase.rs: band_phase_coherence(): per-band harmonic locking [0,1]. High = voiced (vowels), Low = noise (consonants). phase_gradient(): inter-frame phase rotation per band. Steady = sustained pitch, changing = vibrato/portamento. stft_with_phase(): STFT preserving real+imag (not just magnitude). PhaseDescriptor (4 bytes — fits alongside AudioFrame's 48): byte 0: overall coherence (voiced vs noise) byte 1: gradient magnitude (static vs moving) byte 2: coherence entropy (uniform vs mixed voiced/unvoiced) byte 3: gradient stability (steady pitch vs changing) Maps to QPL qualia dims: coherence → dim 9 (coherence) + dim 4 (clarity) gradient → dim 7 (velocity) entropy → dim 8 (entropy) stability → dim 14 (groundedness) Phase is relative pressure within bands, not brute force overall — each band's coherence is measured internally between adjacent bins, and gradient is measured between frames at the same band position. 5 tests: sine coherence, noise low-coherence, voiced detection, attack detection, qualia dim mapping. Total: 40 audio tests passing. https://claude.ai/code/session_01NYGrxVopyszZYgLBxe4hgj
1 parent a207420 commit f18e93a

2 files changed

Lines changed: 331 additions & 0 deletions

File tree

src/hpc/audio/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,4 @@ pub mod codec;
2020
pub mod mel;
2121
pub mod voice;
2222
pub mod modes;
23+
pub mod phase;

src/hpc/audio/phase.rs

Lines changed: 330 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,330 @@
1+
//! Phase shift dynamics — measuring what amplitude alone misses.
2+
//!
3+
//! Amplitude tells you WHAT frequencies are present.
4+
//! Phase tells you HOW they relate to each other in time.
5+
//!
6+
//! Phase coherence between harmonics:
7+
//! High coherence → voiced sound (vowels, singing, resonance)
8+
//! Low coherence → noise (consonants, breath, static)
9+
//! Phase locked → natural voice
10+
//! Phase random → synthetic/robotic
11+
//!
12+
//! Phase gradient across frames:
13+
//! Steady phase → sustained note (singing, humming)
14+
//! Rotating phase → vibrato, tremolo
15+
//! Phase discontinuity → attack, plosive, glottal stop
16+
//!
17+
//! Maps to QPL dims:
18+
//! Phase coherence → coherence (dim 9) + clarity (dim 4)
19+
//! Phase gradient → velocity (dim 7) + integration (dim 16)
20+
//! Phase stability → groundedness (dim 14)
21+
//! Phase entropy → entropy (dim 8)
22+
//!
23+
//! Uses the same STFT from mel.rs but keeps phase info instead of
24+
//! discarding it (which is what magnitude spectrograms do).
25+
26+
use crate::hpc::fft;
27+
use core::f32::consts::PI;
28+
use super::bands;
29+
30+
/// Phase coherence between adjacent harmonics within one frame.
31+
///
32+
/// Measures how "locked" the harmonics are to each other.
33+
/// Natural voice: harmonics are phase-locked (coherence ≈ 1.0).
34+
/// Noise: random phase relationships (coherence ≈ 0.0).
35+
///
36+
/// Returns per-band coherence values [0.0, 1.0].
37+
pub fn band_phase_coherence(
38+
real: &[f32],
39+
imag: &[f32],
40+
) -> [f32; bands::N_BANDS] {
41+
let mut coherence = [0.0f32; bands::N_BANDS];
42+
43+
for band in 0..bands::N_BANDS {
44+
let lo = bands::CELT_BANDS_48K[band];
45+
let hi = bands::CELT_BANDS_48K[band + 1].min(real.len().min(imag.len()));
46+
if hi <= lo + 1 { continue; }
47+
48+
// Phase differences between adjacent bins within this band
49+
let mut cos_sum = 0.0f64;
50+
let mut sin_sum = 0.0f64;
51+
let mut count = 0u32;
52+
53+
for i in lo..(hi - 1) {
54+
if i >= real.len() || i + 1 >= real.len() { break; }
55+
let phase_i = imag[i].atan2(real[i]);
56+
let phase_next = imag[i + 1].atan2(real[i + 1]);
57+
let diff = phase_next - phase_i;
58+
cos_sum += diff.cos() as f64;
59+
sin_sum += diff.sin() as f64;
60+
count += 1;
61+
}
62+
63+
if count > 0 {
64+
// Resultant length of unit vectors (circular mean)
65+
let r = ((cos_sum * cos_sum + sin_sum * sin_sum).sqrt()) / count as f64;
66+
coherence[band] = r.min(1.0) as f32;
67+
}
68+
}
69+
70+
coherence
71+
}
72+
73+
/// Phase gradient between two consecutive frames.
74+
///
75+
/// Measures how much phase rotates between frames at each band.
76+
/// Steady gradient → sustained pitch (the gradient IS the frequency).
77+
/// Changing gradient → pitch modulation (vibrato, portamento).
78+
/// Zero gradient → DC or silence.
79+
///
80+
/// Returns per-band gradient in radians/frame.
81+
pub fn phase_gradient(
82+
prev_real: &[f32], prev_imag: &[f32],
83+
curr_real: &[f32], curr_imag: &[f32],
84+
) -> [f32; bands::N_BANDS] {
85+
let mut gradient = [0.0f32; bands::N_BANDS];
86+
87+
for band in 0..bands::N_BANDS {
88+
let lo = bands::CELT_BANDS_48K[band];
89+
let hi = bands::CELT_BANDS_48K[band + 1]
90+
.min(prev_real.len())
91+
.min(curr_real.len());
92+
if hi <= lo { continue; }
93+
94+
let mut total_diff = 0.0f64;
95+
let mut count = 0u32;
96+
97+
for i in lo..hi {
98+
if i >= prev_real.len() || i >= curr_real.len() { break; }
99+
let prev_phase = prev_imag[i].atan2(prev_real[i]);
100+
let curr_phase = curr_imag[i].atan2(curr_real[i]);
101+
// Unwrap phase difference to [-π, π]
102+
let mut diff = curr_phase - prev_phase;
103+
while diff > PI { diff -= 2.0 * PI; }
104+
while diff < -PI { diff += 2.0 * PI; }
105+
total_diff += diff.abs() as f64;
106+
count += 1;
107+
}
108+
109+
if count > 0 {
110+
gradient[band] = (total_diff / count as f64) as f32;
111+
}
112+
}
113+
114+
gradient
115+
}
116+
117+
/// Compact phase descriptor: 4 bytes capturing the essential phase dynamics.
118+
///
119+
/// byte 0: overall coherence (0=noise, 255=perfectly locked harmonics)
120+
/// byte 1: gradient magnitude (0=static, 255=rapid phase rotation)
121+
/// byte 2: coherence entropy (0=uniform coherence, 255=mixed voiced/unvoiced)
122+
/// byte 3: gradient stability (0=steady pitch, 255=rapidly changing pitch)
123+
///
124+
/// These 4 bytes complement AudioFrame's PVQ summary:
125+
/// PVQ summary = amplitude shape (WHAT)
126+
/// Phase descriptor = temporal relationship (HOW)
127+
///
128+
/// Together: complete nonverbal vocal characterization in 52 bytes.
129+
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
130+
pub struct PhaseDescriptor {
131+
pub bytes: [u8; 4],
132+
}
133+
134+
impl PhaseDescriptor {
135+
/// Build from band coherence and gradient.
136+
pub fn from_bands(coherence: &[f32; bands::N_BANDS], gradient: &[f32; bands::N_BANDS]) -> Self {
137+
// Overall coherence: weighted mean (weight mid-bands more — voice formants)
138+
let mut coh_sum = 0.0f32;
139+
let mut weight_sum = 0.0f32;
140+
for i in 0..bands::N_BANDS {
141+
let w = if (4..=14).contains(&i) { 2.0 } else { 1.0 }; // voice range weight
142+
coh_sum += coherence[i] * w;
143+
weight_sum += w;
144+
}
145+
let mean_coherence = coh_sum / weight_sum.max(1.0);
146+
147+
// Gradient magnitude: RMS of per-band gradients
148+
let grad_rms = (gradient.iter().map(|g| g * g).sum::<f32>() / bands::N_BANDS as f32).sqrt();
149+
150+
// Coherence entropy: are some bands voiced and others not?
151+
let mut coh_entropy = 0.0f32;
152+
let coh_total: f32 = coherence.iter().sum::<f32>().max(1e-10);
153+
for &c in coherence {
154+
if c > 1e-10 {
155+
let p = c / coh_total;
156+
coh_entropy -= p * p.ln();
157+
}
158+
}
159+
let max_entropy = (bands::N_BANDS as f32).ln();
160+
let norm_coh_entropy = coh_entropy / max_entropy;
161+
162+
// Gradient stability: std dev of gradients (high = changing pitch)
163+
let grad_mean = gradient.iter().sum::<f32>() / bands::N_BANDS as f32;
164+
let grad_var = gradient.iter()
165+
.map(|g| (g - grad_mean) * (g - grad_mean))
166+
.sum::<f32>() / bands::N_BANDS as f32;
167+
let grad_std = grad_var.sqrt();
168+
169+
PhaseDescriptor {
170+
bytes: [
171+
(mean_coherence * 255.0).clamp(0.0, 255.0) as u8,
172+
(grad_rms * 255.0 / PI).clamp(0.0, 255.0) as u8,
173+
(norm_coh_entropy * 255.0).clamp(0.0, 255.0) as u8,
174+
(grad_std * 255.0 / PI).clamp(0.0, 255.0) as u8,
175+
],
176+
}
177+
}
178+
179+
/// Map phase descriptor to QPL dims it informs.
180+
///
181+
/// Returns (coherence→dim9, clarity→dim4, velocity→dim7,
182+
/// entropy→dim8, groundedness→dim14).
183+
pub fn to_qualia_dims(&self) -> [(usize, f32); 5] {
184+
let coherence = self.bytes[0] as f32 / 255.0;
185+
let gradient = self.bytes[1] as f32 / 255.0;
186+
let coh_entropy = self.bytes[2] as f32 / 255.0;
187+
let stability = 1.0 - self.bytes[3] as f32 / 255.0;
188+
189+
[
190+
(9, coherence), // coherence: phase-locked = unified
191+
(4, coherence), // clarity: locked harmonics = clear
192+
(7, gradient), // velocity: phase rotation = movement
193+
(8, coh_entropy), // entropy: mixed voiced/unvoiced
194+
(14, stability), // groundedness: steady pitch = rooted
195+
]
196+
}
197+
198+
/// Is this a voiced frame? (coherence > threshold)
199+
pub fn is_voiced(&self) -> bool {
200+
self.bytes[0] > 128 // > 50% coherence
201+
}
202+
203+
/// Is this an attack/plosive? (low coherence + high gradient)
204+
pub fn is_attack(&self) -> bool {
205+
self.bytes[0] < 64 && self.bytes[1] > 128
206+
}
207+
}
208+
209+
/// STFT with phase preservation.
210+
///
211+
/// Returns (magnitude_per_frame, real_per_frame, imag_per_frame).
212+
/// Each frame has n_fft/2+1 bins.
213+
pub fn stft_with_phase(
214+
pcm: &[f32],
215+
window_size: usize,
216+
hop_size: usize,
217+
) -> (Vec<Vec<f32>>, Vec<Vec<f32>>, Vec<Vec<f32>>) {
218+
let n_fft = window_size.next_power_of_two();
219+
let n_bins = n_fft / 2 + 1;
220+
let window: Vec<f32> = (0..window_size)
221+
.map(|i| 0.5 * (1.0 - (2.0 * PI * i as f32 / window_size as f32).cos()))
222+
.collect();
223+
224+
let n_frames = if pcm.len() >= window_size {
225+
(pcm.len() - window_size) / hop_size + 1
226+
} else {
227+
0
228+
};
229+
230+
let mut mags = Vec::with_capacity(n_frames);
231+
let mut reals = Vec::with_capacity(n_frames);
232+
let mut imags = Vec::with_capacity(n_frames);
233+
234+
for frame_idx in 0..n_frames {
235+
let start = frame_idx * hop_size;
236+
let mut data = vec![0.0f32; 2 * n_fft];
237+
for i in 0..window_size.min(pcm.len() - start) {
238+
data[2 * i] = pcm[start + i] * window[i];
239+
}
240+
241+
fft::fft_f32(&mut data, n_fft);
242+
243+
let mut mag = Vec::with_capacity(n_bins);
244+
let mut real = Vec::with_capacity(n_bins);
245+
let mut imag = Vec::with_capacity(n_bins);
246+
247+
for bin in 0..n_bins {
248+
let re = data[2 * bin];
249+
let im = data[2 * bin + 1];
250+
mag.push((re * re + im * im).sqrt());
251+
real.push(re);
252+
imag.push(im);
253+
}
254+
255+
mags.push(mag);
256+
reals.push(real);
257+
imags.push(imag);
258+
}
259+
260+
(mags, reals, imags)
261+
}
262+
263+
#[cfg(test)]
264+
mod tests {
265+
use super::*;
266+
267+
#[test]
268+
fn sine_has_high_coherence() {
269+
// Pure 440Hz sine → all energy in one bin → high coherence
270+
let n = 1024;
271+
let pcm: Vec<f32> = (0..n)
272+
.map(|i| (2.0 * PI * 440.0 * i as f32 / 48000.0).sin())
273+
.collect();
274+
275+
let (_mags, reals, imags) = stft_with_phase(&pcm, 512, 256);
276+
if reals.is_empty() { return; }
277+
278+
let coh = band_phase_coherence(&reals[0], &imags[0]);
279+
// At least one band should have high coherence (the one with 440Hz)
280+
let max_coh = coh.iter().cloned().fold(0.0f32, f32::max);
281+
assert!(max_coh > 0.3, "Pure sine should have coherent band: max={}", max_coh);
282+
}
283+
284+
#[test]
285+
fn noise_has_low_coherence() {
286+
// White noise → random phases → low coherence
287+
let n = 1024;
288+
let mut rng = 0x12345678u64;
289+
let pcm: Vec<f32> = (0..n).map(|_| {
290+
rng = rng.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
291+
((rng >> 33) as f32 / (1u64 << 31) as f32) * 2.0 - 1.0
292+
}).collect();
293+
294+
let (_mags, reals, imags) = stft_with_phase(&pcm, 512, 256);
295+
if reals.is_empty() { return; }
296+
297+
let coh = band_phase_coherence(&reals[0], &imags[0]);
298+
let mean_coh: f32 = coh.iter().sum::<f32>() / bands::N_BANDS as f32;
299+
// Noise should have lower mean coherence than pure tone
300+
assert!(mean_coh < 0.8, "Noise should have moderate-low coherence: mean={}", mean_coh);
301+
}
302+
303+
#[test]
304+
fn phase_descriptor_voiced_detection() {
305+
let voiced_coh = [0.9f32; bands::N_BANDS];
306+
let steady_grad = [0.1f32; bands::N_BANDS];
307+
let desc = PhaseDescriptor::from_bands(&voiced_coh, &steady_grad);
308+
assert!(desc.is_voiced(), "High coherence should be voiced");
309+
assert!(!desc.is_attack(), "Steady should not be attack");
310+
}
311+
312+
#[test]
313+
fn phase_descriptor_attack_detection() {
314+
let noise_coh = [0.1f32; bands::N_BANDS];
315+
let high_grad = [2.0f32; bands::N_BANDS];
316+
let desc = PhaseDescriptor::from_bands(&noise_coh, &high_grad);
317+
assert!(!desc.is_voiced(), "Low coherence should not be voiced");
318+
assert!(desc.is_attack(), "Low coherence + high gradient = attack");
319+
}
320+
321+
#[test]
322+
fn phase_to_qualia_dims_valid() {
323+
let desc = PhaseDescriptor { bytes: [200, 50, 100, 30] };
324+
let dims = desc.to_qualia_dims();
325+
for (dim_idx, value) in dims {
326+
assert!(dim_idx < 17, "Invalid dim index: {}", dim_idx);
327+
assert!(value >= 0.0 && value <= 1.0, "Dim {} value out of range: {}", dim_idx, value);
328+
}
329+
}
330+
}

0 commit comments

Comments
 (0)