Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added bin_samples/Trumpet.bin
Binary file not shown.
4 changes: 3 additions & 1 deletion include/wavetable.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#pragma once
#include <stdlib.h>

typedef enum { WAVEFORM_SINE, WAVEFORM_SAW, WAVEFORM_SQUARE, WAVEFORM_TRIANGLE } Waveform;
typedef enum { WAVEFORM_SINE, WAVEFORM_SAW, WAVEFORM_SQUARE, WAVEFORM_TRIANGLE, WAVEFORM_CUSTOM } Waveform;

typedef struct {
float *data;
Expand All @@ -11,3 +11,5 @@ typedef struct {

Wavetable *Wavetable_create(Waveform type, size_t length);
void Wavetable_destroy(Wavetable *wt);

int Wavetable_load(Wavetable *wt, const char *filename);
Binary file added report/wavetable_trumpet.wav
Binary file not shown.
161 changes: 161 additions & 0 deletions resampler/resampler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
import numpy as np
import librosa
import librosa.display
import scipy.signal
import argparse
import struct
import os
import matplotlib.pyplot as plt

def normalize_audio(audio):
"""
Normalizes the audio array so that its maximum absolute value is 1.
"""
max_val = np.max(np.abs(audio))
if max_val > 0:
return audio / max_val
return audio

def extract_wavetable(audio_file, target_freq=440.0, table_size=1024):
"""
Loads an audio file, normalizes it, trims silence, extracts one period of a tone at the
target frequency, and resamples that cycle into a wavetable of the given table_size.

Parameters:
audio_file (str): Path to the input audio file.
target_freq (float): Expected frequency of the tone in Hz.
table_size (int): Desired wavetable size.

Returns:
wavetable (np.ndarray): The wavetable with table_size samples.
sr (int): The sample rate of the audio file.
"""
# Load the audio file (mono) and normalize immediately
y, sr = librosa.load(audio_file, sr=None, mono=True)
y = normalize_audio(y)

# Trim silence from beginning and end (adjust top_db if needed)
y_trimmed, _ = librosa.effects.trim(y, top_db=20)

# Calculate the period length in samples for the target frequency
period_length = sr / target_freq
period_samples = int(round(period_length))

# Extract a cycle from the center of the trimmed audio
center = len(y_trimmed) // 2
start = center - period_samples // 2
end = start + period_samples

if start < 0 or end > len(y_trimmed):
raise ValueError("Not enough samples in the trimmed audio to extract one period.")

cycle = y_trimmed[start:end]

# Resample the extracted cycle to the desired table size (1024)
wavetable = scipy.signal.resample(cycle, table_size)

# Normalize again
wavetable = normalize_audio(wavetable)

return wavetable, sr

def save_wavetable_to_binary(wavetable, filename):
"""
Saves the wavetable to a binary file.

The file format is:
- 4 bytes: unsigned int (little-endian) representing the number of samples.
- 4 bytes per sample: float32 samples.

Parameters:
wavetable (np.ndarray): The wavetable array.
filename (str): The output binary file name.
"""
# Ensure the wavetable is float32
wavetable = wavetable.astype(np.float32)

# Write format
with open(filename, "wb") as f:
# First thing is the table length
f.write(struct.pack("<I", len(wavetable)))
# Next is the samples
f.write(wavetable.tobytes())

def plot_audio_analysis(y, sr):
"""
Plots a time series graph of the original normalized sound, its spectrogram,
and the perceived frequency (f0) over time using librosa's pyin.
"""
fig, axs = plt.subplots(3, 1, figsize=(12, 12))

# Subplot 1: Time Series of Original Sound
t = np.linspace(0, len(y) / sr, num=len(y))
axs[0].plot(t, y)
axs[0].set_title("Time Series of Normalized Sound")
axs[0].set_xlabel("Time (s)")
axs[0].set_ylabel("Amplitude")

# Subplot 2: Spectrogram
S = np.abs(librosa.stft(y))
S_db = librosa.amplitude_to_db(S, ref=np.max)
img = librosa.display.specshow(S_db, sr=sr, x_axis='time', y_axis='hz', ax=axs[1])
axs[1].set_title("Spectrogram of Normalized Sound")
fig.colorbar(img, ax=axs[1], format="%+2.0f dB")

# Subplot 3: Perceived Frequency (f0)
# Define a reasonable pitch range for estimation.
fmin = librosa.note_to_hz('C2')
fmax = librosa.note_to_hz('C7')
f0, voiced_flag, voiced_prob = librosa.pyin(y, fmin=fmin, fmax=fmax)
times = librosa.times_like(f0, sr=sr)
axs[2].plot(times, f0, label="f0", color="b")
axs[2].set_title("Perceived Frequency (f0) Over Time")
axs[2].set_xlabel("Time (s)")
axs[2].set_ylabel("Frequency (Hz)")
axs[2].legend()

plt.tight_layout()
plt.show()

def main():
parser = argparse.ArgumentParser(
description="Convert an audio file into a wavetable, save as a binary file, and plot analysis graphs."
)
parser.add_argument("audio_file", type=str, help="Path to the audio file (WAV, MP3, etc.).")
parser.add_argument("--freq", type=float, default=440.0, help="Target frequency in Hz (default: 440 Hz).")
parser.add_argument("--table_size", type=int, default=1024, help="Wavetable size (default: 1024).")
parser.add_argument("--output", type=str, default=None,
help="Output binary file. If not specified, uses input filename with .bin extension.")
args = parser.parse_args()

# If no output filename is provided, convert the input filename to .bin
if not args.output:
base, _ = os.path.splitext(os.path.basename(args.audio_file))
args.output = "bin_samples/" + base + ".bin"

# Plot analysis of the original audio
y_orig, sr_orig = librosa.load(args.audio_file, sr=None, mono=True)
y_orig = normalize_audio(y_orig)
plot_audio_analysis(y_orig, sr_orig)

# Extract the wavetable
try:
wavetable, sr = extract_wavetable(args.audio_file, target_freq=args.freq, table_size=args.table_size)
except Exception as e:
print("Error processing file:", e)
return

# Save the wavetable to a binary file
save_wavetable_to_binary(wavetable, args.output)
print("Binary wavetable saved as", args.output)

plt.figure(figsize=(10, 4))
plt.plot(wavetable)
plt.title("Extracted Wavetable (Normalized)")
plt.xlabel("Sample Index")
plt.ylabel("Amplitude")
plt.grid(True)
plt.show()

if __name__ == "__main__":
main()
Binary file added samples/Oboe.wav
Binary file not shown.
Binary file added samples/Trumpet.wav
Binary file not shown.
4 changes: 3 additions & 1 deletion src/state.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ State *State_create(void) {
state->wts[WAVEFORM_SINE] = *Wavetable_create(WAVEFORM_SINE, TABLE_SIZE);
state->wts[WAVEFORM_SAW] = *Wavetable_create(WAVEFORM_SAW, TABLE_SIZE);
state->wts[WAVEFORM_SQUARE] = *Wavetable_create(WAVEFORM_SQUARE, TABLE_SIZE);
state->wts[WAVEFORM_TRIANGLE] = *Wavetable_create(WAVEFORM_TRIANGLE, TABLE_SIZE);
// state->wts[WAVEFORM_TRIANGLE] = *Wavetable_create(WAVEFORM_TRIANGLE, TABLE_SIZE);
state->wts[WAVEFORM_TRIANGLE] = *Wavetable_create(WAVEFORM_CUSTOM, TABLE_SIZE);
Wavetable_load(&state->wts[WAVEFORM_TRIANGLE], "Trumpet.bin");

Lowpass_init(&state->lpf);
return state;
Expand Down
24 changes: 24 additions & 0 deletions src/wavetable.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <assert.h>
#include <math.h>
#include <stdlib.h>
#include <stdio.h>

Wavetable *Wavetable_create(Waveform type, size_t length) {
assert(length > 0);
Expand Down Expand Up @@ -45,3 +46,26 @@ void Wavetable_destroy(Wavetable *wt) {
free(wt->data);
free(wt);
}

int Wavetable_load(Wavetable *wt, const char *filename) {
FILE *f = fopen(filename, "rb");
if (!f) return -1;
uint32_t length;
if (fread(&length, sizeof(uint32_t), 1, f) != 1) {
fclose(f);
return -1;
}
wt->length = length;
wt->data = (float*)malloc(length * sizeof(float));
if (!wt->data) {
fclose(f);
return -1;
}
if (fread(wt->data, sizeof(float), length, f) != length) {
free(wt->data);
fclose(f);
return -1;
}
fclose(f);
return 0;
}
Loading