From faca46b0c765f399d4d66f23ec0acd666abd9385 Mon Sep 17 00:00:00 2001 From: kunitoki Date: Tue, 26 Aug 2025 17:04:08 +0200 Subject: [PATCH 01/37] Updates to FFT --- modules/yup_core/maths/yup_MathsFunctions.h | 1 + .../yup_dsp/frequency/yup_FFTProcessor.cpp | 48 ++++++++++++------- modules/yup_dsp/frequency/yup_FFTProcessor.h | 6 ++- 3 files changed, 37 insertions(+), 18 deletions(-) diff --git a/modules/yup_core/maths/yup_MathsFunctions.h b/modules/yup_core/maths/yup_MathsFunctions.h index 9e3b8ebe1..a9015671c 100644 --- a/modules/yup_core/maths/yup_MathsFunctions.h +++ b/modules/yup_core/maths/yup_MathsFunctions.h @@ -751,6 +751,7 @@ constexpr int nextPowerOfTwo (int n) noexcept n |= (n >> 4); n |= (n >> 8); n |= (n >> 16); + return n + 1; } diff --git a/modules/yup_dsp/frequency/yup_FFTProcessor.cpp b/modules/yup_dsp/frequency/yup_FFTProcessor.cpp index 141853da6..45aac82a7 100644 --- a/modules/yup_dsp/frequency/yup_FFTProcessor.cpp +++ b/modules/yup_dsp/frequency/yup_FFTProcessor.cpp @@ -700,7 +700,17 @@ FFTProcessor& FFTProcessor::operator= (FFTProcessor&& other) noexcept } //============================================================================== -// Public interface + +void FFTProcessor::setScaling (FFTScaling newScaling) noexcept +{ + if (scaling != newScaling) + { + scaling = newScaling; + + updateScalingFactor(); + } +} + void FFTProcessor::setSize (int newSize) { jassert (isPowerOfTwo (newSize) && newSize >= 64 && newSize <= 65536); @@ -709,6 +719,8 @@ void FFTProcessor::setSize (int newSize) { fftSize = newSize; + updateScalingFactor(); + if (engine) engine->initialize (fftSize); } @@ -720,6 +732,7 @@ void FFTProcessor::performRealFFTForward (const float* realInput, float* complex jassert (engine != nullptr); engine->performRealFFTForward (realInput, complexOutput); + applyScaling (complexOutput, fftSize * 2, true); } @@ -729,6 +742,7 @@ void FFTProcessor::performRealFFTInverse (const float* complexInput, float* real jassert (engine != nullptr); engine->performRealFFTInverse (complexInput, realOutput); + applyScaling (realOutput, fftSize, false); } @@ -738,6 +752,7 @@ void FFTProcessor::performComplexFFTForward (const float* complexInput, float* c jassert (engine != nullptr); engine->performComplexFFTForward (complexInput, complexOutput); + applyScaling (complexOutput, fftSize * 2, true); } @@ -747,6 +762,7 @@ void FFTProcessor::performComplexFFTInverse (const float* complexInput, float* c jassert (engine != nullptr); engine->performComplexFFTInverse (complexInput, complexOutput); + applyScaling (complexOutput, fftSize * 2, false); } @@ -756,25 +772,25 @@ String FFTProcessor::getBackendName() const } //============================================================================== -// Private implementation -void FFTProcessor::applyScaling (float* data, int numElements, bool isForward) + +void FFTProcessor::updateScalingFactor() { - if (scaling == FFTScaling::none) - return; + if (scaling == FFTScaling::unitary) + scalingFactor = 1.0f / std::sqrt (static_cast (fftSize)); - float scale = 1.0f; + else if (scaling == FFTScaling::asymmetric) + scalingFactor = 1.0f / static_cast (fftSize); - if (scaling == FFTScaling::unitary) - { - scale = 1.0f / std::sqrt (static_cast (fftSize)); - } - else if (scaling == FFTScaling::asymmetric && ! isForward) - { - scale = 1.0f / static_cast (fftSize); - } + else + scalingFactor = 1.0f; +} + +void FFTProcessor::applyScaling (float* data, int numElements, bool isForward) const +{ + if (scaling == FFTScaling::none || (scaling == FFTScaling::asymmetric && ! isForward)) + return; - if (scale != 1.0f) - FloatVectorOperations::multiply (data, scale, numElements); + FloatVectorOperations::multiply (data, scalingFactor, numElements); } } // namespace yup diff --git a/modules/yup_dsp/frequency/yup_FFTProcessor.h b/modules/yup_dsp/frequency/yup_FFTProcessor.h index 5ecaa5d9f..976ec9351 100644 --- a/modules/yup_dsp/frequency/yup_FFTProcessor.h +++ b/modules/yup_dsp/frequency/yup_FFTProcessor.h @@ -89,7 +89,7 @@ class FFTProcessor int getSize() const noexcept { return fftSize; } /** Sets the FFT scaling mode */ - void setScaling (FFTScaling newScaling) noexcept { scaling = newScaling; } + void setScaling (FFTScaling newScaling) noexcept; /** Gets the current scaling mode */ FFTScaling getScaling() const noexcept { return scaling; } @@ -139,11 +139,13 @@ class FFTProcessor private: //============================================================================== - void applyScaling (float* data, int numElements, bool isForward); + void updateScalingFactor (); + void applyScaling (float* data, int numElements, bool isForward) const; //============================================================================== int fftSize = -1; FFTScaling scaling = FFTScaling::none; + float scalingFactor = 1.0f; std::unique_ptr engine; From 1a307098c668f75f40ead1c42608a91bb6ea7fd9 Mon Sep 17 00:00:00 2001 From: kunitoki Date: Tue, 26 Aug 2025 17:04:32 +0200 Subject: [PATCH 02/37] Initial work on the convolution --- .../graphics/data/ir_e112_g12_dyn_us_6v6.wav | Bin 0 -> 4488 bytes .../source/examples/ConvolutionDemo.h | 475 ++++++++++++++ examples/graphics/source/main.cpp | 2 + .../convolution/yup_PartitionedConvolver.cpp | 593 ++++++++++++++++++ .../convolution/yup_PartitionedConvolver.h | 160 +++++ modules/yup_dsp/yup_dsp.cpp | 3 + modules/yup_dsp/yup_dsp.h | 3 + 7 files changed, 1236 insertions(+) create mode 100644 examples/graphics/data/ir_e112_g12_dyn_us_6v6.wav create mode 100644 examples/graphics/source/examples/ConvolutionDemo.h create mode 100644 modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp create mode 100644 modules/yup_dsp/convolution/yup_PartitionedConvolver.h diff --git a/examples/graphics/data/ir_e112_g12_dyn_us_6v6.wav b/examples/graphics/data/ir_e112_g12_dyn_us_6v6.wav new file mode 100644 index 0000000000000000000000000000000000000000..be8463d3aeab399d5330665e53987852d9df2c59 GIT binary patch literal 4488 zcmXX~30zdw8vo9nJ2MOnAflkAf)r|ki5n_mMk%7=^1|E`F_+3Mdt#QQ=u6EtcguV# z_FPhgA{7;v;^7)4TBt0FfGY|j2+qvC_k8a^{NC^U#+kWu?m6H0-_FH%2F1pv_z{hc z88u+qta-srh)7`cPb2zzj39-YQ2d~Tk*#oj(6HFygA!s#`rvbX()?*Xd%xDB&ud*H zB4|p|yrd|sqKI(Qaa&-Z43CNLF^3l~nCbhL5%^}Eb}is!UPjre79~Eef%fv|2U?D8 zk=yLHU$xH-k2vGzlgA|!3+9v*SG9HxcSSdBsdYSVRuNaa^X_*~4{J`#1mB5GUukl| z^R6k=XzT1!H?y+(4&OXeaJ#7TL5Ra_-R?K8x!uptdQdx2bKri5J4Z?*DrP*Wc|5x& zx$fBW%=!lnha1wLm(;ea?NhtIE~jCtYqZgqzoH%Di1>pO_4ggqpFgabUwyjj!>7~g zN*$MEp=rCNmnGPAM3%ZjoE6Sg-AsQgh3Yf&esf#3laffK@=s%k5n*H+{dub>GOaSl zTQ-`#O(Qr{o9?W3yzV-wuV!!MnW;|oG|yERnx-rLL>O)0vs^%Tm1^@k+hzMkpLCxL zZ_VqNt-tkcbEMiP~g9TA_|GE0%mqob?s!HjBfYY_3xy z)hN?oB~2_8$HW+AqG_x8rFoNir@C8_{F(8WwpsgA@523+S>{S>n9bkz%-Yuys}4}+ z6R}19V))BY&Z8fdKB{WYQWu-dN+ES77l%*^-NNWY4`nZ7gBI!P=lszz&9T^Fa@IH_ zwe5P2@rC?V?v!2R3L{q!(9^YjS`+=g{?JI1CVnj6mrIRxdaf3&d1w*Zer>p3ug^4& z8hOTAqe1Vkr)X`oewsszFtX%(R3*Gj<4oDgc(IXxHca{zR}WXHYlCZ|HcW3~%r)j1 z9gHkJUJuq|^yT^){kncZU!>P*y|t;XbO6G z)`N{@hPON_C-EI_L%~$f$9NUb<+u2jjFnrAB0Wf7s=ccDY3;S4TB0^kYo~>4+q4P# zV56I?l@ob7hwvG>K>kzim4=+fh1`ioPz;$Vm4kSnd{2&&+hic;@ERH~%*q^Pld?#O zRT{)L(OcZ5o%8|4(GdECcGFL^j9#IWJd0yEn7uiWqj>@J){R!uMRHIJ;VsV5Kd1l| zc4sfXBQxbUa*KQ_cXKkmB0dpu;v`jZYhEeG$=z~1PowoBL)l<*nl78#n-Y{QB3Sr~ zS;AA3{jSeWSYtcyhXe}L}EA$ubq9{5IOv~hH zc}s@z5}>8^BCyp*Ygf=2+g%21nC7KxrrqqyaRB7Sln>qawOQTXEHg z@_7>{azBpXFz&%)cpH~+6Y4~-l1jhvXFQbO;nn<*M^YwgtH-@xQ!KqiUgS@a^f9H; zA5=(>$zKc>Ux{o{DZCVa#U-wYJ>pX_Ku}gd-Qp7m0cNEoJ zp~`1dto7tg4Y3Pqo6jb>8}*;_de%=e~Im^l~x zZ-%l1#cc7D$OXr}#V(S(h9~jIe27(g9~_iYIb~Bay-ZiYP7vooi(krxa=F|i&&#Xw zkQ^`TjdjK#W1#W1ao32Hi)Fri3HwjrDM0;m-pH9;hQyme8B|N1z)_3{5;tiDFlk3G z_y%9$d+ZH8PJsirL$90Z3z~#qhtN3GITnfen7`$|T!WR%WK*!X9f{G2=7F&bbb~5@ zPA%zV6Rm(&wCE-*A{T4)C2y+Z8rGPo0ecifYnAxif;v(NQuqXT?8q+p2%}0mr8m^j z9op>0{#-84$}IW2td>oy#h!Mk`Ls>tWN;4p{=A%{|EGIb$4F)F^^hcmW zI~(Zly3BzlPsnVHKcpr*!>{q^Vg(=O(|n$b_yx3Kf&zT$On_bPyIck+cVgXj(DdI@UUj|}~ZexR?wTW`4VH8?gJwG9H3QRIG-a$_Hb zd`P6JGy(khBQFb3brGAP-tXx;cyv+&-J`R}uc_#7JhZnND`!B9X;A%4>W{Pth90}2 zrjfX-E2_8;6jwuW-MJ+bs=fzZc~fifGny7a z%}e0pq3An;df|OG>i-_~{u5QU1D`fj*&I0*gvvWmd*EQ8>z{cg6fptGl!QF`7CaPk z6*K(Ylsw_?yZkr*j@JwB3LLWNKF*;UpmdD3;Cm9#83lC=#(pD!MH19=5WgDKTm%Xe z&bJ)+X&%s?j5-z}f#zV&SgsOzD%YK^fOYp9FA)x!bBV4L?U0L)2J!~?5~Drd*TEx2llsE=a<3QC}0!= z1w2Q8zd~x|+3WNu66cjlF-p;{&PmpW(p#O__CfLK9;g(e7_C7Fm ziqAo<7oeJ}{9k12T3!fmeT>?dqmv`xrX0KaQZwYBH}(vJGXltitQbU2u%?g&^_g+a z8?)M=x9;e$1Li-&j_1L`Z{WWgsZCS^mQG{crJT(3;N0~%InH9|2Vlv+QHA}1fZHLpSiP3g>bm zs>ug;B^Y-wu7QsXq)jRNf#LVD<7{vp34N=iu@lOD@qfhasJbomAAmmV(f4(%^B3-` z!YnrBHf8O+Y%I z0b6c_OBz>q*IC}Uo)b>(2pvuY9{++9W`Z+!Vzvb@0T?d<-Hza(Cq@^1YKvsD!ha92 z&UyHKKeBW;&cxIB{*1lwoY)nkBkr?7GuNTUJake6)SRfk41HY&b~kaw3v+_OTqqa| zz?w!QCiP(01(m))b;Y>%6w)Xid36HXIuA_Cv5NzD-@&G|T0mIDj7THI?~z}~e;km{h)^N$gRzI z68RqK^>L#EELF&a-go#79n+D0c{KxU@HxksSw0@T+O_;o^eDpZ?~s@<7b z0S0a0IRN?P2Mn8_V>cqtaK5?c_k~B|;DxRWNWXKt&tD*e>kg2`szex(_n&HlBFSBV$gX zm(!^6HqdeBR0KvxILC_rhwz5RqM_2McuxBQK3xUXeuNy2148Z-bu6+x1=&0gX&wc= zdXf%J5wH(~vIilLh67J`s%hW2bnGs0e<0>YLKl6Z>`8++ih2cFrgz(pnSG{9XeXut>S)u4}yc +#include +#include +#include + +#include +#include +#include +#include + +//============================================================================== + +class ConvolutionDemo + : public yup::Component + , public yup::AudioIODeviceCallback + , public yup::Timer +{ +public: + ConvolutionDemo() + : wetGainSlider (yup::Slider::LinearHorizontal) + , dryGainSlider (yup::Slider::LinearHorizontal) + , loadIRButton ("Load IR...") + { + formatManager.registerDefaultFormats(); + + // Load default audio files + loadAudioFile(); + loadDefaultImpulseResponse(); + + // Audio device manager + audioDeviceManager.initialiseWithDefaultDevices (0, 2); + + // Initialize smoothed values + wetGain.reset (44100, 0.02); + dryGain.reset (44100, 0.02); + wetGain.setCurrentAndTargetValue (1.0f); + dryGain.setCurrentAndTargetValue (0.3f); + + // Configure convolver with typical layout + convolver.setTypicalLayout (256, {256, 1024, 4096}); + + // Create UI + createUI(); + + // Start timer for waveform updates + startTimerHz (30); + } + + ~ConvolutionDemo() override + { + audioDeviceManager.removeAudioCallback (this); + audioDeviceManager.closeAudioDevice(); + } + + void resized() override + { + auto bounds = getLocalBounds().reduced (10); + + // Top controls + auto topControls = bounds.removeFromTop (120); + + // IR loading section + auto irSection = topControls.removeFromTop (60); + loadIRButton.setBounds (irSection.removeFromTop (30).reduced (5, 0)); + irInfoLabel.setBounds (irSection.removeFromTop (25)); + + // Control sliders section + auto controlsSection = topControls; + auto wetSection = controlsSection.removeFromLeft (controlsSection.getWidth() / 2); + wetGainLabel.setBounds (wetSection.removeFromTop (25)); + wetGainSlider.setBounds (wetSection.removeFromTop (30).reduced (5, 0)); + + dryGainLabel.setBounds (controlsSection.removeFromTop (25)); + dryGainSlider.setBounds (controlsSection.removeFromTop (30).reduced (5, 0)); + + // IR waveform display takes remaining space + irWaveformDisplay.setBounds (bounds); + } + + void visibilityChanged() override + { + if (! isVisible()) + audioDeviceManager.removeAudioCallback (this); + else + audioDeviceManager.addAudioCallback (this); + } + + void audioDeviceAboutToStart (yup::AudioIODevice* device) override + { + auto sampleRate = device->getCurrentSampleRate(); + + // Update smoothed values + wetGain.reset (sampleRate, 0.02); + dryGain.reset (sampleRate, 0.02); + + // Reset convolver + convolver.reset(); + } + + void audioDeviceStopped() override + { + } + + void audioDeviceIOCallbackWithContext (const float* const* inputChannelData, + int numInputChannels, + float* const* outputChannelData, + int numOutputChannels, + int numSamples, + const yup::AudioIODeviceCallbackContext& context) override + { + // Clear outputs + for (int ch = 0; ch < numOutputChannels; ++ch) + { + if (outputChannelData[ch] != nullptr) + yup::FloatVectorOperations::clear (outputChannelData[ch], numSamples); + } + + if (numOutputChannels < 2 || audioBuffer.getNumSamples() == 0) + return; + + // Prepare buffers for processing + tempDryBuffer.resize (static_cast (numSamples)); + tempWetBuffer.resize (static_cast (numSamples)); + + // Process samples + const int totalSamples = audioBuffer.getNumSamples(); + const int numChannels = audioBuffer.getNumChannels(); + + for (int i = 0; i < numSamples; ++i) + { + // Get the audio sample from the loaded file (mono to stereo if needed) + float audioSample = 0.0f; + + if (numChannels == 1) + { + // Mono file + audioSample = audioBuffer.getSample (0, readPosition) * 0.5f; + } + else + { + // Stereo or multichannel - mix to mono + for (int ch = 0; ch < yup::jmin (2, numChannels); ++ch) + audioSample += audioBuffer.getSample (ch, readPosition) * 0.5f; + audioSample /= yup::jmin (2, numChannels); + } + + // Increment read position and wrap around for looping + readPosition++; + if (readPosition >= totalSamples) + readPosition = 0; + + // Store dry signal + tempDryBuffer[static_cast (i)] = audioSample; + } + + // Process through convolver if IR is loaded + std::fill (tempWetBuffer.begin(), tempWetBuffer.end(), 0.0f); + if (hasImpulseResponse) + convolver.process (tempDryBuffer.data(), tempWetBuffer.data(), static_cast (numSamples)); + + // Mix dry and wet signals with gains + for (int i = 0; i < numSamples; ++i) + { + float wetGainValue = wetGain.getNextValue(); + float dryGainValue = dryGain.getNextValue(); + + float drySignal = tempDryBuffer[static_cast (i)] * dryGainValue; + float wetSignal = tempWetBuffer[static_cast (i)] * wetGainValue; + float mixedSignal = drySignal + wetSignal; + + // Output to both channels (mono to stereo) + outputChannelData[0][i] = mixedSignal; + outputChannelData[1][i] = mixedSignal; + } + } + + void timerCallback() override + { + // Update waveform display if needed + repaint(); + } + +private: + void loadAudioFile() + { + // Create the path to the audio file + auto dataDir = yup::File (__FILE__) + .getParentDirectory() + .getParentDirectory() + .getParentDirectory() + .getChildFile ("data"); + + yup::File audioFile = dataDir.getChildFile ("break_boomblastic_92bpm.wav"); + if (! audioFile.existsAsFile()) + { + std::cerr << "Could not find break_boomblastic_92bpm.wav" << std::endl; + return; + } + + // Load the audio file + yup::AudioFormatManager formatManager; + formatManager.registerDefaultFormats(); + + if (auto reader = formatManager.createReaderFor (audioFile)) + { + audioBuffer.setSize ((int) reader->numChannels, (int) reader->lengthInSamples); + reader->read (&audioBuffer, 0, (int) reader->lengthInSamples, 0, true, true); + + std::cout << "Loaded audio file: " << audioFile.getFileName() << std::endl; + std::cout << "Sample rate: " << reader->sampleRate << " Hz" << std::endl; + std::cout << "Channels: " << reader->numChannels << std::endl; + std::cout << "Length: " << reader->lengthInSamples << " samples" << std::endl; + } + else + { + std::cerr << "Failed to create reader for audio file" << std::endl; + } + } + + void loadDefaultImpulseResponse() + { + // Create the path to the default impulse response file + auto dataDir = yup::File (__FILE__) + .getParentDirectory() + .getParentDirectory() + .getParentDirectory() + .getChildFile ("data"); + + yup::File irFile = dataDir.getChildFile ("ir_e112_g12_dyn_us_6v6.wav"); + loadImpulseResponseFromFile (irFile); + } + + void loadImpulseResponseFromFile (const yup::File& file) + { + if (! file.existsAsFile()) + { + std::cerr << "Could not find impulse response file: " << file.getFullPathName() << std::endl; + updateIRInfo ("No IR loaded"); + return; + } + + // Load the impulse response file + if (auto reader = formatManager.createReaderFor (file)) + { + impulseResponseBuffer.setSize ((int) reader->numChannels, (int) reader->lengthInSamples); + reader->read (&impulseResponseBuffer, 0, (int) reader->lengthInSamples, 0, true, true); + + // Convert to mono if stereo + if (impulseResponseBuffer.getNumChannels() > 1) + { + for (int i = 0; i < impulseResponseBuffer.getNumSamples(); ++i) + { + float monoSample = 0.0f; + for (int ch = 0; ch < impulseResponseBuffer.getNumChannels(); ++ch) + monoSample += impulseResponseBuffer.getSample (ch, i); + monoSample /= static_cast (impulseResponseBuffer.getNumChannels()); + impulseResponseBuffer.setSample (0, i, monoSample); + } + impulseResponseBuffer.setSize (1, impulseResponseBuffer.getNumSamples(), true); + } + + // Extract samples for convolver and normalize + const int numSamples = impulseResponseBuffer.getNumSamples(); + impulseResponseData.resize (static_cast (numSamples)); + + // Normalize IR to prevent clipping (very aggressive scaling for testing) + float normalizationGain = 1.0f; + for (int i = 0; i < numSamples; ++i) + impulseResponseData[static_cast (i)] = impulseResponseBuffer.getSample (0, i) * normalizationGain; + + // Set impulse response in convolver + convolver.setImpulseResponse (impulseResponseData); + hasImpulseResponse = true; + + std::cout << "Loaded impulse response: " << file.getFileName() << std::endl; + std::cout << "Sample rate: " << reader->sampleRate << " Hz" << std::endl; + std::cout << "Length: " << reader->lengthInSamples << " samples" << std::endl; + + // Update UI + updateIRInfo (file.getFileName()); + updateWaveformDisplay(); + } + else + { + std::cerr << "Failed to create reader for impulse response file" << std::endl; + updateIRInfo ("Failed to load IR"); + } + } + + void createUI() + { + setOpaque (false); + + // Get fonts + auto labelFont = yup::ApplicationTheme::getGlobalTheme()->getDefaultFont().withHeight (12.0f); + auto buttonFont = yup::ApplicationTheme::getGlobalTheme()->getDefaultFont().withHeight (14.0f); + + // Load IR button + // loadIRButton.setFont (buttonFont); + loadIRButton.onClick = [this] + { + auto chooser = yup::FileChooser::create ("Load Impulse Response", + yup::File(), + "*.wav;*.aiff;*.aif"); + chooser->browseForFileToOpen ([this] (bool success, const yup::Array& results) + { + if (success && results.size() > 0) + { + loadImpulseResponseFromFile (results[0]); + } + }); + }; + addAndMakeVisible (loadIRButton); + + // IR info label + irInfoLabel.setText ("Loading default IR...", yup::NotificationType::dontSendNotification); + irInfoLabel.setFont (labelFont); + irInfoLabel.setJustification (yup::Justification::center); + addAndMakeVisible (irInfoLabel); + + // Wet gain slider + wetGainLabel.setText ("Wet Gain", yup::NotificationType::dontSendNotification); + wetGainLabel.setFont (labelFont); + addAndMakeVisible (wetGainLabel); + + wetGainSlider.setRange (0.0, 2.0); + wetGainSlider.setValue (1.0); + wetGainSlider.onValueChanged = [this] (float value) + { + wetGain.setTargetValue (value); + }; + addAndMakeVisible (wetGainSlider); + + // Dry gain slider + dryGainLabel.setText ("Dry Gain", yup::NotificationType::dontSendNotification); + dryGainLabel.setFont (labelFont); + addAndMakeVisible (dryGainLabel); + + dryGainSlider.setRange (0.0, 2.0); + dryGainSlider.setValue (0.3); + dryGainSlider.onValueChanged = [this] (float value) + { + dryGain.setTargetValue (value); + }; + addAndMakeVisible (dryGainSlider); + + // Configure IR waveform display + setupWaveformDisplay(); + addAndMakeVisible (irWaveformDisplay); + } + + void setupWaveformDisplay() + { + // Configure the CartesianPlane for waveform display + irWaveformDisplay.setTitle ("Impulse Response Waveform"); + + // Set linear axes + irWaveformDisplay.setXRange (0.0, 1.0); + irWaveformDisplay.setXScaleType (yup::CartesianPlane::AxisScaleType::linear); + irWaveformDisplay.setYRange (-1.0, 1.0); + irWaveformDisplay.setYScaleType (yup::CartesianPlane::AxisScaleType::linear); + + // Set margins + irWaveformDisplay.setMargins (25, 25, 25, 25); + + // Add grid lines + irWaveformDisplay.setVerticalGridLines ({ 0.0, 1.0 }); + irWaveformDisplay.setHorizontalGridLines ({ -1.0, -0.5, 0.5, 1.0 }); + irWaveformDisplay.addHorizontalGridLine (0.0, yup::Color (0xFF666666), 1.0f, true); + + irWaveformDisplay.clearXAxisLabels(); + irWaveformDisplay.setYAxisLabels ({ -1.0, -0.5, 0.5, 1.0 }); + + // Add waveform signal + waveformSignalIndex = irWaveformDisplay.addSignal ("IR", yup::Color (0xFF44AA44), 1.5f); + + // Configure legend + irWaveformDisplay.setLegendVisible (false); + } + + void updateWaveformDisplay() + { + if (impulseResponseData.empty()) + return; + + // Create waveform data points + const size_t numPoints = std::min (static_cast (2048), impulseResponseData.size()); + const size_t stride = impulseResponseData.size() / numPoints; + + std::vector> waveformData; + waveformData.reserve (numPoints); + + for (size_t i = 0; i < numPoints; ++i) + { + size_t sampleIndex = i * stride; + if (sampleIndex >= impulseResponseData.size()) + sampleIndex = impulseResponseData.size() - 1; + + double normalizedTime = static_cast (i) / static_cast (numPoints - 1); + double amplitude = static_cast (impulseResponseData[sampleIndex]); + + waveformData.emplace_back (normalizedTime, amplitude); + } + + // Update the display + irWaveformDisplay.updateSignalData (waveformSignalIndex, waveformData); + + // Update X axis range to show time + double lengthInSeconds = static_cast (impulseResponseData.size()) / 44100.0; // Assume 44.1kHz + irWaveformDisplay.setXRange (0.0, lengthInSeconds); + + // Update X axis labels to show time + std::vector timeLabels; + for (int i = 0; i <= 4; ++i) + timeLabels.push_back (lengthInSeconds * static_cast (i) / 4.0); + irWaveformDisplay.setXAxisLabels (timeLabels); + } + + void updateIRInfo (const yup::String& info) + { + irInfoLabel.setText (info, yup::NotificationType::dontSendNotification); + } + + // Audio + yup::AudioFormatManager formatManager; + yup::AudioDeviceManager audioDeviceManager; + yup::AudioBuffer audioBuffer; + yup::AudioBuffer impulseResponseBuffer; + std::vector impulseResponseData; + int readPosition = 0; + std::atomic hasImpulseResponse = false; + + // Processing + yup::PartitionedConvolver convolver; + std::vector tempDryBuffer; + std::vector tempWetBuffer; + + // Smoothed parameters + yup::SmoothedValue wetGain, dryGain; + + // UI + yup::TextButton loadIRButton; + yup::Label irInfoLabel; + yup::Label wetGainLabel; + yup::Slider wetGainSlider; + yup::Label dryGainLabel; + yup::Slider dryGainSlider; + yup::CartesianPlane irWaveformDisplay; + + // Display + int waveformSignalIndex = -1; +}; diff --git a/examples/graphics/source/main.cpp b/examples/graphics/source/main.cpp index 34e405208..f6d4d6084 100644 --- a/examples/graphics/source/main.cpp +++ b/examples/graphics/source/main.cpp @@ -40,6 +40,7 @@ #include "examples/Artboard.h" #include "examples/Audio.h" #include "examples/CrossoverDemo.h" +#include "examples/ConvolutionDemo.h" #include "examples/FilterDemo.h" #include "examples/LayoutFonts.h" #include "examples/FileChooser.h" @@ -107,6 +108,7 @@ class CustomWindow registerDemo ("FFT Analyzer", counter++); registerDemo ("Filter Demo", counter++); registerDemo ("Crossover Demo", counter++); + registerDemo ("Convolution Demo", counter++); registerDemo ("Layout Fonts", counter++); registerDemo ("Variable Fonts", counter++); registerDemo ("Paths", counter++); diff --git a/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp new file mode 100644 index 000000000..09319eba6 --- /dev/null +++ b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp @@ -0,0 +1,593 @@ +/* + ============================================================================== + + This file is part of the YUP library. + Copyright (c) 2025 - kunitoki@gmail.com + + YUP is an open source library subject to open-source licensing. + + The code included in this file is provided under the terms of the ISC license + http://www.isc.org/downloads/software-support-policy/isc-license. Permission + to use, copy, modify, and/or distribute this software for any purpose with or + without fee is hereby granted provided that the above copyright notice and + this permission notice appear in all copies. + + YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER + EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE + DISCLAIMED. + + ============================================================================== +*/ + +#include +#include + +namespace yup +{ + +//============================================================================== +// Complex multiply-accumulate for interleaved real/imaginary format +// Y += A * B (complex multiplication) +//============================================================================== +static void complexMultiplyAccumulate (const float* A, const float* B, float* Y, int complexPairs) +{ + for (int i = 0; i < complexPairs; ++i) + { + const int ri = i * 2; + const int ii = ri + 1; + const float ar = A[ri]; + const float ai = A[ii]; + const float br = B[ri]; + const float bi = B[ii]; + // (ar + j*ai) * (br + j*bi) = (ar*br - ai*bi) + j*(ar*bi + ai*br) + Y[ri] += ar * br - ai * bi; + Y[ii] += ar * bi + ai * br; + } +} + +//============================================================================== +// DirectFIR - Brute-force FIR implementation for early taps +//============================================================================== +class PartitionedConvolver::DirectFIR +{ +public: + DirectFIR() = default; + + void setTaps (std::vector taps, float scaling) + { + taps_ = std::move (taps); + FloatVectorOperations::multiply (taps_.data(), scaling, taps_.size()); + + history_.assign (taps_.size(), 0.0f); + + writeIndex_ = 0; + } + + void reset() + { + std::fill (history_.begin(), history_.end(), 0.0f); + writeIndex_ = 0; + } + + void process (const float* input, float* output, std::size_t numSamples) + { + const std::size_t numTaps = taps_.size(); + if (numTaps == 0) return; + + for (std::size_t i = 0; i < numSamples; ++i) + { + history_[writeIndex_] = input[i]; + + // Convolution: y[n] = sum(h[m] * x[n-m]) + float sum = 0.0f; + std::size_t readIndex = writeIndex_; + for (std::size_t m = 0; m < numTaps; ++m) + { + sum += taps_[m] * history_[readIndex]; + if (readIndex == 0) + readIndex = numTaps - 1; + else + --readIndex; + } + + output[i] += sum; + + // Advance circular buffer + if (++writeIndex_ == numTaps) + writeIndex_ = 0; + } + } + + std::size_t getNumTaps() const { return taps_.size(); } + +private: + std::vector taps_; + std::vector history_; + std::size_t writeIndex_ = 0; +}; + +//============================================================================== +// FFTLayer - Single uniform-partitioned OLA layer +//============================================================================== +class PartitionedConvolver::FFTLayer +{ +public: + FFTLayer() = default; + ~FFTLayer() = default; + + FFTLayer (FFTLayer&& other) = default; + FFTLayer& operator= (FFTLayer&& other) = default; + + void configure (int hopSize) + { + hopSize_ = hopSize; + fftSize_ = hopSize * 2; + + fftProcessor_.setSize (fftSize_); + fftProcessor_.setScaling (FFTProcessor::FFTScaling::asymmetric); + + overlapBuffer_.assign (static_cast (hopSize_), 0.0f); + timeBuffer_.assign (static_cast (fftSize_), 0.0f); + frequencyBuffer_.assign (static_cast (fftSize_) * 2, 0.0f); + tempBuffer_.assign (static_cast (fftSize_) * 2, 0.0f); // Must hold complex data for in-place FFT + + fdlIndex_ = 0; + configured_ = true; + } + + int getHopSize() const { return hopSize_; } + int getFFTSize() const { return fftSize_; } + bool isConfigured() const { return configured_; } + + std::size_t setImpulseResponse (const float* impulseResponse, std::size_t length, float scaling) + { + jassert (configured_); + + if (fftSize_ <= 0 || hopSize_ <= 0) + { + resetState(); + return 0; + } + + frequencyPartitions_.clear(); + frequencyDelayLine_.clear(); + + if (length == 0 || impulseResponse == nullptr) + { + resetState(); + return 0; + } + + const auto numPartitions = (length + static_cast (hopSize_) - 1) / static_cast (hopSize_); + if (numPartitions == 0) + { + resetState(); + return 0; + } + + std::size_t processedSamples = 0; + frequencyPartitions_.reserve (numPartitions); + + for (std::size_t p = 0; p < numPartitions; ++p) + { + std::vector partition; + partition.resize (static_cast (fftSize_) * 2); + + std::fill (tempBuffer_.begin(), tempBuffer_.end(), 0.0f); + + const std::size_t offset = p * static_cast (hopSize_); + const std::size_t copyCount = std::min (static_cast (hopSize_), length - offset); + + if (copyCount > 0 && offset < length) + { + for (std::size_t i = 0; i < copyCount && offset + i < length; ++i) + tempBuffer_[i] = impulseResponse[offset + i] * scaling; + } + + fftProcessor_.performRealFFTForward (tempBuffer_.data(), partition.data()); + + frequencyPartitions_.push_back (std::move (partition)); + + processedSamples += copyCount; + } + + frequencyDelayLine_.assign (numPartitions, std::vector (static_cast (fftSize_) * 2, 0.0f)); + fdlIndex_ = 0; + + resetState(); + + return processedSamples; + } + + void resetState() + { + fdlIndex_ = 0; + + for (auto& partition : frequencyDelayLine_) + std::fill (partition.begin(), partition.end(), 0.0f); + + std::fill (overlapBuffer_.begin(), overlapBuffer_.end(), 0.0f); + std::fill (timeBuffer_.begin(), timeBuffer_.end(), 0.0f); + std::fill (frequencyBuffer_.begin(), frequencyBuffer_.end(), 0.0f); + } + + void processHop (const float* inputHop, float* outputAccumulator) + { + jassert (configured_); + + if (frequencyPartitions_.empty()) + return; + + // 1) Transform current input hop to frequency domain + std::fill (tempBuffer_.begin(), tempBuffer_.end(), 0.0f); + for (int i = 0; i < hopSize_; ++i) + tempBuffer_[i] = inputHop[i]; + + fftProcessor_.performRealFFTForward (tempBuffer_.data(), tempBuffer_.data()); + + // 2) Store in frequency delay line (circular buffer) - copy full complex buffer + fdlIndex_ = (fdlIndex_ == 0) ? static_cast (frequencyDelayLine_.size()) - 1 : fdlIndex_ - 1; + std::copy (tempBuffer_.begin(), tempBuffer_.begin() + (fftSize_ * 2), frequencyDelayLine_[static_cast (fdlIndex_)].begin()); + + // 3) Frequency domain convolution: Y = sum(X[k-p] * H[p]) + std::fill (frequencyBuffer_.data(), frequencyBuffer_.data() + (fftSize_ * 2), 0.0f); + + int xIndex = fdlIndex_; + for (std::size_t p = 0; p < frequencyPartitions_.size(); ++p) + { + const float* X = frequencyDelayLine_[static_cast (xIndex)].data(); + const float* H = frequencyPartitions_[p].data(); + + // fftSize_/2 gives the number of complex pairs for real FFT + complexMultiplyAccumulate (X, H, frequencyBuffer_.data(), fftSize_ / 2); + + // Move to next older spectrum + xIndex++; + if (xIndex >= static_cast (frequencyDelayLine_.size())) + xIndex = 0; + } + + // 4) Inverse FFT back to time domain + fftProcessor_.performRealFFTInverse (frequencyBuffer_.data(), timeBuffer_.data()); + + // 5) Overlap-Add: output first hopSize_ samples, store last hopSize_ as overlap + for (int i = 0; i < hopSize_; ++i) + { + outputAccumulator[i] += timeBuffer_[i] + overlapBuffer_[i]; + overlapBuffer_[i] = timeBuffer_[i + hopSize_]; + } + } + + bool hasImpulseResponse() const { return !frequencyPartitions_.empty(); } + +private: + int hopSize_ = 0; + int fftSize_ = 0; + + FFTProcessor fftProcessor_; + + // IR partitions in frequency domain + std::vector> frequencyPartitions_; + + // Frequency Delay Line (most recent at fdlIndex_) + std::vector> frequencyDelayLine_; + int fdlIndex_ = 0; + + // Processing buffers + std::vector overlapBuffer_; + std::vector timeBuffer_; + std::vector frequencyBuffer_; + std::vector tempBuffer_; + + bool configured_ = false; +}; + +//============================================================================== +// PartitionedConvolver::Impl - Implementation details +//============================================================================== +class PartitionedConvolver::Impl +{ +public: + Impl() = default; + ~Impl() = default; + + void configureLayers (std::size_t directFIRTaps, const std::vector& layers) + { + directFIRTapCount_ = directFIRTaps; + layers_.clear(); + layers_.resize (layers.size()); + + std::size_t maximumHopSize = 0; + + baseHopSize_ = layers.empty() ? 0 : layers.front().hopSize; + for (std::size_t i = 0; i < layers.size(); ++i) + { + layers_[i].configure (layers[i].hopSize); + if (i == 0) + baseHopSize_ = layers[i].hopSize; + else + baseHopSize_ = std::min (baseHopSize_, layers[i].hopSize); + + maximumHopSize = std::max (maximumHopSize, layers[i].hopSize); + } + + // Prepare staging buffers + inputStaging_.clear(); + outputStaging_.assign (static_cast (baseHopSize_), 0.0f); + inputCarry_.clear(); + + // Prepare per-layer accumulators + layerInputAccumulators_.assign (layers.size(), std::vector()); + layerOutputCarries_.assign (layers.size(), std::vector()); + layerTempOutput_.resize (maximumHopSize); + } + + void setImpulseResponse (const float* impulseResponse, std::size_t length, const PartitionedConvolver::IRLoadOptions& options) + { + DirectFIR newFIR; + std::vector newLayers (layers_.size()); + + // Safety check + if (impulseResponse != nullptr && length > 0) + { + // Always apply peak headroom + float headroomScale = std::pow (10.0f, options.headroomDb / 20.0f); + if (options.normalize) + { + const auto minMax = FloatVectorOperations::findMinAndMax (impulseResponse, length); + + const float peak = std::max (std::abs (minMax.getStart()), std::abs (minMax.getEnd())); + if (peak > 0.0f) + headroomScale /= peak; + } + + // Update DirectFIR in-place + std::vector directTaps; + + const auto directTapsCount = std::min (directFIRTapCount_, length); + if (directTapsCount > 0) + { + directTaps.reserve (directTapsCount); + directTaps.assign (impulseResponse, impulseResponse + directTapsCount); + } + + newFIR.setTaps (std::move (directTaps), headroomScale); + + // Update FFT layers + std::size_t consumed = directTapsCount; + for (std::size_t i = 0; i < newLayers.size(); ++i) + { + auto& layer = newLayers[i]; + layer.configure (layers_[i].getHopSize()); + + const std::size_t remaining = (consumed < length) ? (length - consumed) : 0; + if (remaining == 0) + { + layer.setImpulseResponse (nullptr, 0, headroomScale); + continue; + } + + consumed += layer.setImpulseResponse (impulseResponse + consumed, remaining, headroomScale); + } + } + + { + SpinLock::ScopedLockType lock (processingLock_); + + directFIR_ = std::move (newFIR); + layers_ = std::move (newLayers); + + resetStateUnsafe(); + } + } + + void reset() + { + SpinLock::ScopedLockType lock (processingLock_); + + resetStateUnsafe(); + } + + void process (const float* input, float* output, std::size_t numSamples) + { + if (numSamples == 0) return; + + SpinLock::ScopedLockType lock (processingLock_); + + processUnsafe (input, output, numSamples); + } + +private: + void resetStateUnsafe() + { + directFIR_.reset(); + inputStaging_.clear(); + std::fill (outputStaging_.begin(), outputStaging_.end(), 0.0f); + inputCarry_.clear(); + + for (auto& acc : layerInputAccumulators_) + acc.clear(); + + for (auto& carry : layerOutputCarries_) + carry.clear(); + + for (auto& layer : layers_) + layer.resetState(); + } + + void processUnsafe (const float* input, float* output, std::size_t numSamples) + { + ensureWorkingBuffers (numSamples); // TODO - move outside of process + + FloatVectorOperations::copy (workingInput_.data(), input, numSamples); + FloatVectorOperations::clear (workingOutput_.data(), numSamples); + + // Process direct FIR (no block size constraints) + directFIR_.process (workingInput_.data(), workingOutput_.data(), numSamples); + if (layers_.empty()) + { + FloatVectorOperations::add (output, workingOutput_.data(), numSamples); + return; + } + + // Process FFT layers with hop-based processing + appendToBuffer (inputCarry_, workingInput_.data(), numSamples); + + std::size_t outputSamplesProduced = 0; + while (inputCarry_.size() >= static_cast (baseHopSize_)) + { + const std::size_t hopSize = static_cast (baseHopSize_); + inputStaging_.assign (inputCarry_.begin(), inputCarry_.begin() + hopSize); + + FloatVectorOperations::clear (outputStaging_.data(), outputStaging_.size()); + + for (std::size_t layerIndex = 0; layerIndex < layers_.size(); ++layerIndex) + { + auto& layer = layers_[layerIndex]; + const int layerHopSize = layer.getHopSize(); + + appendToBuffer (layerInputAccumulators_[layerIndex], inputStaging_.data(), hopSize); + + while (layerInputAccumulators_[layerIndex].size() >= static_cast (layerHopSize)) + { + tempLayerHop_.assign (layerInputAccumulators_[layerIndex].begin(), + layerInputAccumulators_[layerIndex].begin() + layerHopSize); + + FloatVectorOperations::clear (layerTempOutput_.data(), layerHopSize); + + if (layer.hasImpulseResponse()) + layer.processHop (tempLayerHop_.data(), layerTempOutput_.data()); + + appendToBuffer (layerOutputCarries_[layerIndex], layerTempOutput_.data(), static_cast (layerHopSize)); + + layerInputAccumulators_[layerIndex].erase ( + layerInputAccumulators_[layerIndex].begin(), + layerInputAccumulators_[layerIndex].begin() + layerHopSize); + } + + if (layerOutputCarries_[layerIndex].size() >= hopSize) + { + FloatVectorOperations::add (outputStaging_.data(), layerOutputCarries_[layerIndex].data(), hopSize); + + layerOutputCarries_[layerIndex].erase ( + layerOutputCarries_[layerIndex].begin(), + layerOutputCarries_[layerIndex].begin() + hopSize); + } + } + + // Add staging output to main output + const std::size_t samplesToWrite = std::min (hopSize, numSamples - outputSamplesProduced); + FloatVectorOperations::add (workingOutput_.data() + outputSamplesProduced, outputStaging_.data(), samplesToWrite); + outputSamplesProduced += samplesToWrite; + + // Remove processed input from carry buffer + inputCarry_.erase (inputCarry_.begin(), inputCarry_.begin() + hopSize); + } + + // Copy final result to output (accumulate) + FloatVectorOperations::add (output, workingOutput_.data(), numSamples); + } + +private: + void ensureWorkingBuffers (std::size_t numSamples) + { + if (workingInput_.size() < numSamples) + workingInput_.resize (numSamples); + + if (workingOutput_.size() < numSamples) + workingOutput_.resize (numSamples); + } + + void appendToBuffer (std::vector& buffer, const float* data, std::size_t numSamples) + { + const std::size_t oldSize = buffer.size(); + + buffer.resize (oldSize + numSamples); + + std::copy (data, data + numSamples, buffer.begin() + oldSize); + } + + std::size_t directFIRTapCount_ = 0; + int baseHopSize_ = 0; + + DirectFIR directFIR_; + std::vector layers_; + + // Working buffers + std::vector workingInput_; + std::vector workingOutput_; + + // Staging for hop-based processing + std::vector inputCarry_; + std::vector inputStaging_; + std::vector outputStaging_; + + // Per-layer buffering + std::vector> layerInputAccumulators_; + std::vector> layerOutputCarries_; + std::vector tempLayerHop_; + std::vector layerTempOutput_; + + mutable SpinLock processingLock_; +}; + +//============================================================================== +// PartitionedConvolver implementation +//============================================================================== + +PartitionedConvolver::PartitionedConvolver() + : pImpl (std::make_unique()) +{ +} + +PartitionedConvolver::~PartitionedConvolver() = default; + +PartitionedConvolver::PartitionedConvolver (PartitionedConvolver&& other) noexcept + : pImpl (std::move (other.pImpl)) +{ +} + +PartitionedConvolver& PartitionedConvolver::operator= (PartitionedConvolver&& other) noexcept +{ + if (this != &other) + pImpl = std::move (other.pImpl); + return *this; +} + +void PartitionedConvolver::configureLayers (std::size_t directFIRTaps, const std::vector& layers) +{ + pImpl->configureLayers (directFIRTaps, layers); +} + +void PartitionedConvolver::setTypicalLayout (std::size_t directTaps, const std::vector& hops) +{ + std::vector layerSpecs; + layerSpecs.reserve (hops.size()); + + for (int hop : hops) + layerSpecs.push_back ({hop}); + + configureLayers (directTaps, layerSpecs); +} + +void PartitionedConvolver::setImpulseResponse (const float* impulseResponse, std::size_t length, const IRLoadOptions& options) +{ + pImpl->setImpulseResponse (impulseResponse, length, options); +} + +void PartitionedConvolver::setImpulseResponse (const std::vector& impulseResponse, const IRLoadOptions& options) +{ + setImpulseResponse (impulseResponse.data(), impulseResponse.size(), options); +} + +void PartitionedConvolver::reset() +{ + pImpl->reset(); +} + +void PartitionedConvolver::process (const float* input, float* output, std::size_t numSamples) +{ + pImpl->process (input, output, numSamples); +} + +} // namespace yup diff --git a/modules/yup_dsp/convolution/yup_PartitionedConvolver.h b/modules/yup_dsp/convolution/yup_PartitionedConvolver.h new file mode 100644 index 000000000..69c384ca1 --- /dev/null +++ b/modules/yup_dsp/convolution/yup_PartitionedConvolver.h @@ -0,0 +1,160 @@ +/* + ============================================================================== + + This file is part of the YUP library. + Copyright (c) 2025 - kunitoki@gmail.com + + YUP is an open source library subject to open-source licensing. + + The code included in this file is provided under the terms of the ISC license + http://www.isc.org/downloads/software-support-policy/isc-license. Permission + to use, copy, modify, and/or distribute this software for any purpose with or + without fee is hereby granted provided that the above copyright notice and + this permission notice appear in all copies. + + YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER + EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE + DISCLAIMED. + + ============================================================================== +*/ + +#pragma once + +namespace yup +{ + +//============================================================================== +/** + Layered partitioned convolution engine optimized for real-time audio processing. + + Combines multiple processing strategies for efficient convolution: + - Direct FIR computation for early taps (low latency) + - One or more FFT-based Overlap-Add layers with uniform partitioning per layer + + The engine uses YUP's FFTProcessor for real FFT operations and supports: + - Arbitrary input/output block sizes with internal buffering + - Real-time safe processing (no heap allocations during process()) + - Configurable layer hierarchy for optimal CPU/latency trade-off + + Example usage: + @code + PartitionedConvolver convolver; + + // Configure layers: 256 direct taps + FFT layers with hops 256, 1024, 4096 + convolver.setTypicalLayout(256, {256, 1024, 4096}); + + // Set impulse response (e.g., reverb IR) + std::vector impulseResponse = loadImpulseResponse(); + convolver.setImpulseResponse(impulseResponse); + + // In audio callback (accumulates into output): + convolver.process(inputBuffer, outputBuffer, numSamples); + @endcode + + @note The process() method accumulates results into the output buffer. + Clear the output buffer first if overwrite behavior is desired. +*/ +class PartitionedConvolver +{ +public: + //============================================================================== + /** Configuration for a single FFT-based convolution layer */ + struct LayerSpec + { + int hopSize; /**< Partition size L (FFT size will be 2*L) */ + }; + + //============================================================================== + /** Default constructor */ + PartitionedConvolver(); + + /** Destructor */ + ~PartitionedConvolver(); + + // Non-copyable but movable + PartitionedConvolver (PartitionedConvolver&& other) noexcept; + PartitionedConvolver& operator= (PartitionedConvolver&& other) noexcept; + + //============================================================================== + /** + Configure the convolution layers before setting the impulse response. + + @param directFIRTaps Number of early taps to process with direct FIR (for low latency) + @param layers Vector of layer specifications with increasing hop sizes + (e.g., {{256}, {1024}, {4096}} for 256→1024→4096 progression) + */ + void configureLayers (std::size_t directFIRTaps, const std::vector& layers); + + /** + Convenience method to set a typical late-reverb configuration. + + @param directTaps Number of direct FIR taps for early reflections + @param hops Vector of hop sizes for FFT layers (geometrically increasing recommended) + */ + void setTypicalLayout (std::size_t directTaps, const std::vector& hops); + + //============================================================================== + + struct IRLoadOptions + { + IRLoadOptions() + : normalize (true) + , headroomDb (-12.0f) + { + } + + bool normalize; + float headroomDb; + }; + + /** + Set the impulse response for convolution. + + @param impulseResponse Pointer to impulse response samples + @param length Number of samples in the impulse response + + @note This method is not real-time safe and should be called during initialization + or from a background thread when audio is paused. + */ + void setImpulseResponse (const float* impulseResponse, std::size_t length, const IRLoadOptions& options = {}); + + /** + Set the impulse response from a vector. + + @param impulseResponse Vector containing impulse response samples + */ + void setImpulseResponse (const std::vector& impulseResponse, const IRLoadOptions& options = {}); + + //============================================================================== + /** + Reset all internal processing state (clears delay lines, overlap buffers). + Impulse response partitions are preserved. + */ + void reset(); + + /** + Process audio samples through the convolver. + + @param input Input audio buffer + @param output Output audio buffer (results are accumulated) + @param numSamples Number of samples to process + + @note Results are accumulated into the output buffer. Clear it first if needed. + @note This method is real-time safe with no heap allocations. + */ + void process (const float* input, float* output, std::size_t numSamples); + +private: + //============================================================================== + class DirectFIR; + class FFTLayer; + class Impl; + + std::unique_ptr pImpl; + + //============================================================================== + YUP_DECLARE_NON_COPYABLE_WITH_LEAK_DETECTOR (PartitionedConvolver) +}; + +} // namespace yup diff --git a/modules/yup_dsp/yup_dsp.cpp b/modules/yup_dsp/yup_dsp.cpp index e7c1087c4..ced8951b1 100644 --- a/modules/yup_dsp/yup_dsp.cpp +++ b/modules/yup_dsp/yup_dsp.cpp @@ -40,3 +40,6 @@ //============================================================================== #include "designers/yup_FilterDesigner.cpp" + +//============================================================================== +#include "convolution/yup_PartitionedConvolver.cpp" diff --git a/modules/yup_dsp/yup_dsp.h b/modules/yup_dsp/yup_dsp.h index b6eca3c79..e9ab7c795 100644 --- a/modules/yup_dsp/yup_dsp.h +++ b/modules/yup_dsp/yup_dsp.h @@ -141,4 +141,7 @@ // Dynamics processors #include "dynamics/yup_SoftClipper.h" +// Convolution processors +#include "convolution/yup_PartitionedConvolver.h" + //============================================================================== From 7e9b1a86dd10d5038b3cb98c550786bffe9ac141 Mon Sep 17 00:00:00 2001 From: kunitoki Date: Tue, 26 Aug 2025 17:19:01 +0200 Subject: [PATCH 03/37] More work --- .../source/examples/ConvolutionDemo.h | 16 ++- .../convolution/yup_PartitionedConvolver.cpp | 101 ++++++++++++++---- .../convolution/yup_PartitionedConvolver.h | 16 ++- 3 files changed, 111 insertions(+), 22 deletions(-) diff --git a/examples/graphics/source/examples/ConvolutionDemo.h b/examples/graphics/source/examples/ConvolutionDemo.h index a5dc7256b..8885d8ae9 100644 --- a/examples/graphics/source/examples/ConvolutionDemo.h +++ b/examples/graphics/source/examples/ConvolutionDemo.h @@ -60,7 +60,7 @@ class ConvolutionDemo dryGain.setCurrentAndTargetValue (0.3f); // Configure convolver with typical layout - convolver.setTypicalLayout (256, {256, 1024, 4096}); + convolver.setTypicalLayout (128, {128, 256, 512, 1024, 4096}); // Create UI createUI(); @@ -118,6 +118,7 @@ class ConvolutionDemo // Reset convolver convolver.reset(); + convolver.prepare (static_cast (device->getCurrentBufferSizeSamples())); } void audioDeviceStopped() override @@ -387,7 +388,7 @@ class ConvolutionDemo irWaveformDisplay.setMargins (25, 25, 25, 25); // Add grid lines - irWaveformDisplay.setVerticalGridLines ({ 0.0, 1.0 }); + irWaveformDisplay.setVerticalGridLines ({ 0.0, 1.0 }); irWaveformDisplay.setHorizontalGridLines ({ -1.0, -0.5, 0.5, 1.0 }); irWaveformDisplay.addHorizontalGridLine (0.0, yup::Color (0xFF666666), 1.0f, true); @@ -406,6 +407,13 @@ class ConvolutionDemo if (impulseResponseData.empty()) return; + // Always apply peak headroom + float headroomScale = std::pow (10.0f, -12.0f / 20.0f); + const auto minMax = yup::FloatVectorOperations::findMinAndMax (impulseResponseData.data(), impulseResponseData.size()); + const float peak = std::max (std::abs (minMax.getStart()), std::abs (minMax.getEnd())); + if (peak > 0.0f) + headroomScale /= peak; + // Create waveform data points const size_t numPoints = std::min (static_cast (2048), impulseResponseData.size()); const size_t stride = impulseResponseData.size() / numPoints; @@ -420,7 +428,7 @@ class ConvolutionDemo sampleIndex = impulseResponseData.size() - 1; double normalizedTime = static_cast (i) / static_cast (numPoints - 1); - double amplitude = static_cast (impulseResponseData[sampleIndex]); + double amplitude = static_cast (impulseResponseData[sampleIndex] * headroomScale); waveformData.emplace_back (normalizedTime, amplitude); } @@ -431,11 +439,13 @@ class ConvolutionDemo // Update X axis range to show time double lengthInSeconds = static_cast (impulseResponseData.size()) / 44100.0; // Assume 44.1kHz irWaveformDisplay.setXRange (0.0, lengthInSeconds); + irWaveformDisplay.setVerticalGridLines ({ 0.0, lengthInSeconds }); // Update X axis labels to show time std::vector timeLabels; for (int i = 0; i <= 4; ++i) timeLabels.push_back (lengthInSeconds * static_cast (i) / 4.0); + irWaveformDisplay.setXAxisLabels (timeLabels); } diff --git a/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp index 09319eba6..01ee58d1f 100644 --- a/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp +++ b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp @@ -308,18 +308,64 @@ class PartitionedConvolver::Impl else baseHopSize_ = std::min (baseHopSize_, layers[i].hopSize); - maximumHopSize = std::max (maximumHopSize, layers[i].hopSize); + maximumHopSize = std::max (maximumHopSize, static_cast (layers[i].hopSize)); } + maxHopSize_ = maximumHopSize; + + // Clear staging buffers - will be allocated in prepare() + inputStaging_.clear(); + outputStaging_.clear(); + inputCarry_.clear(); + + // Clear per-layer accumulators - will be allocated in prepare() + layerInputAccumulators_.assign (layers.size(), std::vector()); + layerOutputCarries_.assign (layers.size(), std::vector()); + + layerTempOutput_.clear(); + tempLayerHop_.clear(); + + // Clear working buffers - will be allocated in prepare() + workingInput_.clear(); + workingOutput_.clear(); + + isPrepared_ = false; + } + + void prepare (std::size_t maxBlockSize) + { + maxBlockSize_ = maxBlockSize; + + // Calculate buffer sizes based on block size and hop configurations + const std::size_t maxBufferSize = std::max (maxBlockSize * 4, maxHopSize_ * 16); + // Prepare staging buffers inputStaging_.clear(); outputStaging_.assign (static_cast (baseHopSize_), 0.0f); inputCarry_.clear(); + inputCarry_.reserve (maxBufferSize); // Prepare per-layer accumulators - layerInputAccumulators_.assign (layers.size(), std::vector()); - layerOutputCarries_.assign (layers.size(), std::vector()); - layerTempOutput_.resize (maximumHopSize); + for (std::size_t i = 0; i < layerInputAccumulators_.size(); ++i) + { + layerInputAccumulators_[i].clear(); + layerInputAccumulators_[i].reserve (maxBufferSize); + layerOutputCarries_[i].clear(); + layerOutputCarries_[i].reserve (maxBufferSize); + } + + // Allocate temp buffers + if (maxHopSize_ > 0) + { + layerTempOutput_.resize (maxHopSize_); + tempLayerHop_.reserve (maxHopSize_); + } + + // Allocate working buffers + workingInput_.resize (maxBlockSize); + workingOutput_.resize (maxBlockSize); + + isPrepared_ = true; } void setImpulseResponse (const float* impulseResponse, std::size_t length, const PartitionedConvolver::IRLoadOptions& options) @@ -417,7 +463,12 @@ class PartitionedConvolver::Impl void processUnsafe (const float* input, float* output, std::size_t numSamples) { - ensureWorkingBuffers (numSamples); // TODO - move outside of process + // Ensure prepare() was called + jassert (isPrepared_); + jassert (numSamples <= maxBlockSize_); + + if (!isPrepared_ || numSamples > maxBlockSize_) + return; // Fail gracefully in release builds FloatVectorOperations::copy (workingInput_.data(), input, numSamples); FloatVectorOperations::clear (workingOutput_.data(), numSamples); @@ -431,7 +482,7 @@ class PartitionedConvolver::Impl } // Process FFT layers with hop-based processing - appendToBuffer (inputCarry_, workingInput_.data(), numSamples); + safeAppendToBuffer (inputCarry_, workingInput_.data(), numSamples); std::size_t outputSamplesProduced = 0; while (inputCarry_.size() >= static_cast (baseHopSize_)) @@ -446,7 +497,7 @@ class PartitionedConvolver::Impl auto& layer = layers_[layerIndex]; const int layerHopSize = layer.getHopSize(); - appendToBuffer (layerInputAccumulators_[layerIndex], inputStaging_.data(), hopSize); + safeAppendToBuffer (layerInputAccumulators_[layerIndex], inputStaging_.data(), hopSize); while (layerInputAccumulators_[layerIndex].size() >= static_cast (layerHopSize)) { @@ -458,7 +509,7 @@ class PartitionedConvolver::Impl if (layer.hasImpulseResponse()) layer.processHop (tempLayerHop_.data(), layerTempOutput_.data()); - appendToBuffer (layerOutputCarries_[layerIndex], layerTempOutput_.data(), static_cast (layerHopSize)); + safeAppendToBuffer (layerOutputCarries_[layerIndex], layerTempOutput_.data(), static_cast (layerHopSize)); layerInputAccumulators_[layerIndex].erase ( layerInputAccumulators_[layerIndex].begin(), @@ -489,26 +540,35 @@ class PartitionedConvolver::Impl } private: - void ensureWorkingBuffers (std::size_t numSamples) - { - if (workingInput_.size() < numSamples) - workingInput_.resize (numSamples); - - if (workingOutput_.size() < numSamples) - workingOutput_.resize (numSamples); - } - void appendToBuffer (std::vector& buffer, const float* data, std::size_t numSamples) + void safeAppendToBuffer (std::vector& buffer, const float* data, std::size_t numSamples) { const std::size_t oldSize = buffer.size(); + const std::size_t newSize = oldSize + numSamples; - buffer.resize (oldSize + numSamples); + // Ensure we never exceed the reserved capacity to avoid allocations + jassert (newSize <= buffer.capacity()); + if (newSize > buffer.capacity()) + { + // Truncate to prevent allocation - this is a safety measure + const std::size_t maxSamples = buffer.capacity() - oldSize; + if (maxSamples > 0) + { + buffer.resize (buffer.capacity()); + std::copy (data, data + maxSamples, buffer.begin() + oldSize); + } + return; + } + buffer.resize (newSize); std::copy (data, data + numSamples, buffer.begin() + oldSize); } std::size_t directFIRTapCount_ = 0; int baseHopSize_ = 0; + std::size_t maxHopSize_ = 0; + std::size_t maxBlockSize_ = 0; + bool isPrepared_ = false; DirectFIR directFIR_; std::vector layers_; @@ -580,6 +640,11 @@ void PartitionedConvolver::setImpulseResponse (const std::vector& impulse setImpulseResponse (impulseResponse.data(), impulseResponse.size(), options); } +void PartitionedConvolver::prepare (std::size_t maxBlockSize) +{ + pImpl->prepare (maxBlockSize); +} + void PartitionedConvolver::reset() { pImpl->reset(); diff --git a/modules/yup_dsp/convolution/yup_PartitionedConvolver.h b/modules/yup_dsp/convolution/yup_PartitionedConvolver.h index 69c384ca1..5931e7ece 100644 --- a/modules/yup_dsp/convolution/yup_PartitionedConvolver.h +++ b/modules/yup_dsp/convolution/yup_PartitionedConvolver.h @@ -44,12 +44,15 @@ namespace yup // Configure layers: 256 direct taps + FFT layers with hops 256, 1024, 4096 convolver.setTypicalLayout(256, {256, 1024, 4096}); + // Prepare for processing with maximum block size (must be called before process) + convolver.prepare(512); // Maximum 512 samples per process() call + // Set impulse response (e.g., reverb IR) std::vector impulseResponse = loadImpulseResponse(); convolver.setImpulseResponse(impulseResponse); // In audio callback (accumulates into output): - convolver.process(inputBuffer, outputBuffer, numSamples); + convolver.process(inputBuffer, outputBuffer, numSamples); // numSamples <= 512 @endcode @note The process() method accumulates results into the output buffer. @@ -127,6 +130,17 @@ class PartitionedConvolver void setImpulseResponse (const std::vector& impulseResponse, const IRLoadOptions& options = {}); //============================================================================== + /** + Prepare the convolver for processing with a specific maximum block size. + + @param maxBlockSize Maximum number of samples that will be passed to process() + + @note This method is not real-time safe and should be called during initialization + or when audio processing is paused. It pre-allocates all internal buffers + to handle the specified block size without further allocations. + */ + void prepare (std::size_t maxBlockSize); + /** Reset all internal processing state (clears delay lines, overlap buffers). Impulse response partitions are preserved. From 29ba67e05a69b9f712cab8c17e4a49594163aa32 Mon Sep 17 00:00:00 2001 From: kunitoki Date: Tue, 26 Aug 2025 21:40:58 +0200 Subject: [PATCH 04/37] Fix typo --- modules/yup_dsp/frequency/yup_FFTProcessor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/yup_dsp/frequency/yup_FFTProcessor.cpp b/modules/yup_dsp/frequency/yup_FFTProcessor.cpp index 45aac82a7..c85263290 100644 --- a/modules/yup_dsp/frequency/yup_FFTProcessor.cpp +++ b/modules/yup_dsp/frequency/yup_FFTProcessor.cpp @@ -787,7 +787,7 @@ void FFTProcessor::updateScalingFactor() void FFTProcessor::applyScaling (float* data, int numElements, bool isForward) const { - if (scaling == FFTScaling::none || (scaling == FFTScaling::asymmetric && ! isForward)) + if (scaling == FFTScaling::none || (scaling == FFTScaling::asymmetric && ! isForward)) return; FloatVectorOperations::multiply (data, scalingFactor, numElements); From fd316bf0e732526c1242ceeb82654c8790da7d47 Mon Sep 17 00:00:00 2001 From: kunitoki Date: Tue, 26 Aug 2025 22:15:15 +0200 Subject: [PATCH 05/37] Improved convolver --- modules/yup_audio_basics/yup_audio_basics.h | 9 + .../convolution/yup_PartitionedConvolver.cpp | 376 ++++++++++++++---- .../convolution/yup_PartitionedConvolver.h | 55 +-- modules/yup_dsp/yup_dsp.cpp | 4 + 4 files changed, 330 insertions(+), 114 deletions(-) diff --git a/modules/yup_audio_basics/yup_audio_basics.h b/modules/yup_audio_basics/yup_audio_basics.h index 944dd6ec6..fb69411df 100644 --- a/modules/yup_audio_basics/yup_audio_basics.h +++ b/modules/yup_audio_basics/yup_audio_basics.h @@ -69,11 +69,20 @@ //============================================================================== #ifndef YUP_USE_SSE_INTRINSICS +#if defined (__SSE__) #define YUP_USE_SSE_INTRINSICS 1 #endif +#endif + +#ifndef YUP_USE_AVX_INTRINSICS +#if defined (__AVX2__) +#define YUP_USE_AVX_INTRINSICS 1 +#endif +#endif #if ! YUP_INTEL #undef YUP_USE_SSE_INTRINSICS +#undef YUP_USE_AVX_INTRINSICS #endif #if __ARM_NEON__ && ! (YUP_USE_VDSP_FRAMEWORK || defined(YUP_USE_ARM_NEON)) diff --git a/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp index 01ee58d1f..ef98c9a2f 100644 --- a/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp +++ b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp @@ -19,26 +19,127 @@ ============================================================================== */ -#include -#include - namespace yup { //============================================================================== -// Complex multiply-accumulate for interleaved real/imaginary format -// Y += A * B (complex multiplication) -//============================================================================== -static void complexMultiplyAccumulate (const float* A, const float* B, float* Y, int complexPairs) + +/** Performs Y += A * B (complex multiply accumulate) where A, B, and Y + are arrays of interleaved complex values [real, imag, real, imag...]. + + @param A pointer to input complex array + @param B pointer to input complex array + @param Y pointer to output complex array (accumulated) + @param complexPairs number of complex pairs (not number of floats!) +*/ +static void complexMultiplyAccumulate (const float* A, const float* B, float* Y, int complexPairs) noexcept { - for (int i = 0; i < complexPairs; ++i) + int i = 0; + +#if YUP_USE_AVX_INTRINSICS + constexpr int simdWidth = 4; // AVX2 path: process 4 complex pairs (8 floats) at a time + for (; i <= complexPairs - simdWidth; i += simdWidth) + { + const int idx = i * 2; + + __m256 a = _mm256_loadu_ps(A + idx); + __m256 b = _mm256_loadu_ps(B + idx); + __m256 y = _mm256_loadu_ps(Y + idx); + + // a = [ar0 ai0 ar1 ai1 ar2 ai2 ar3 ai3] + // b = [br0 bi0 br1 bi1 br2 bi2 br3 bi3] + + // separate real and imag for a and b + const __m256 a_shuffled = _mm256_permute_ps(a, _MM_SHUFFLE(2, 3, 0, 1)); + const __m256 b_shuffled = _mm256_permute_ps(b, _MM_SHUFFLE(2, 3, 0, 1)); + + // real = ar*br - ai*bi + __m256 realPart = _mm256_fmsub_ps(a, b, _mm256_mul_ps(a_shuffled, b_shuffled)); + + // imag = ar*bi + ai*br + __m256 imagPart = _mm256_fmadd_ps(a, b_shuffled, _mm256_mul_ps(a_shuffled, b)); + + // interleave real/imag back + const __m256 interleaved = _mm256_blend_ps(realPart, imagPart, 0b10101010); + + y = _mm256_add_ps(y, interleaved); + _mm256_storeu_ps(Y + idx, y); + } + +#elif YUP_USE_SSE_INTRINSICS + constexpr int simdWidth = 2; // SSE path: process 2 complex pairs (4 floats) at a time + for (; i <= complexPairs - simdWidth; i += simdWidth) + { + const int idx = i * 2; + + __m128 a = _mm_loadu_ps(A + idx); + __m128 b = _mm_loadu_ps(B + idx); + __m128 y = _mm_loadu_ps(Y + idx); + + // separate real and imag for a and b + const __m128 a_shuffled = _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 3, 0, 1)); + const __m128 b_shuffled = _mm_shuffle_ps(b, b, _MM_SHUFFLE(2, 3, 0, 1)); + + // real = ar*br - ai*bi + __m128 realPart = _mm_sub_ps(_mm_mul_ps(a, b), _mm_mul_ps(a_shuffled, b_shuffled)); + + // imag = ar*bi + ai*br + __m128 imagPart = _mm_add_ps(_mm_mul_ps(a, b_shuffled), _mm_mul_ps(a_shuffled, b)); + + // interleave real/imag back + const __m128 interleaved = _mm_unpacklo_ps(realPart, imagPart); + + y = _mm_add_ps(y, interleaved); + _mm_storeu_ps(Y + idx, y); + } + +#elif YUP_USE_ARM_NEON + constexpr int simdWidth = 2; // NEON path: process 2 complex pairs (4 floats) at a time + for (; i <= complexPairs - simdWidth; i += simdWidth) + { + const int idx = i * 2; + + float32x4_t a = vld1q_f32(A + idx); // [ar0, ai0, ar1, ai1] + float32x4_t b = vld1q_f32(B + idx); // [br0, bi0, br1, bi1] + float32x4_t y = vld1q_f32(Y + idx); + + // Shuffle a and b to get swapped real/imag for cross-multiplication + float32x4_t a_shuf = vrev64q_f32(a); // [ai0, ar0, ai1, ar1] + float32x4_t b_shuf = vrev64q_f32(b); // [bi0, br0, bi1, br1] + + // real = ar*br - ai*bi + float32x4_t realPart = vsubq_f32(vmulq_f32(a, b), vmulq_f32(a_shuf, b_shuf)); + + // imag = ar*bi + ai*br + float32x4_t imagPart = vaddq_f32(vmulq_f32(a, b_shuf), vmulq_f32(a_shuf, b)); + + // Interleave real and imag: [real0, imag0, real1, imag1] + float32x2_t realLow = vget_low_f32(realPart); + float32x2_t imagLow = vget_low_f32(imagPart); + float32x2x2_t zippedLow = vzip_f32(realLow, imagLow); + + float32x2_t realHigh = vget_high_f32(realPart); + float32x2_t imagHigh = vget_high_f32(imagPart); + float32x2x2_t zippedHigh = vzip_f32(realHigh, imagHigh); + + float32x4_t interleaved = vcombine_f32(zippedLow.val[0], zippedHigh.val[0]); + + y = vaddq_f32(y, interleaved); + vst1q_f32(Y + idx, y); + } + +#endif + + for (; i < complexPairs; ++i) { const int ri = i * 2; const int ii = ri + 1; + const float ar = A[ri]; const float ai = A[ii]; const float br = B[ri]; const float bi = B[ii]; + // (ar + j*ai) * (br + j*bi) = (ar*br - ai*bi) + j*(ar*bi + ai*br) Y[ri] += ar * br - ai * bi; Y[ii] += ar * bi + ai * br; @@ -46,8 +147,7 @@ static void complexMultiplyAccumulate (const float* A, const float* B, float* Y, } //============================================================================== -// DirectFIR - Brute-force FIR implementation for early taps -//============================================================================== + class PartitionedConvolver::DirectFIR { public: @@ -107,8 +207,7 @@ class PartitionedConvolver::DirectFIR }; //============================================================================== -// FFTLayer - Single uniform-partitioned OLA layer -//============================================================================== + class PartitionedConvolver::FFTLayer { public: @@ -283,8 +382,100 @@ class PartitionedConvolver::FFTLayer }; //============================================================================== -// PartitionedConvolver::Impl - Implementation details + +class PartitionedConvolver::CircularBuffer +{ +public: + CircularBuffer() = default; + + void resize (std::size_t size) + { + buffer_.resize (size); + clear(); + } + + void clear() + { + std::fill (buffer_.begin(), buffer_.end(), 0.0f); + writeIndex_ = 0; + readIndex_ = 0; + availableForRead_ = 0; + } + + std::size_t getAvailableForRead() const { return availableForRead_; } + std::size_t getAvailableForWrite() const { return buffer_.size() - availableForRead_; } + std::size_t getSize() const { return buffer_.size(); } + + void write (const float* data, std::size_t numSamples) + { + jassert (numSamples <= getAvailableForWrite()); + numSamples = std::min (numSamples, getAvailableForWrite()); + + if (numSamples == 0) return; + + const std::size_t beforeWrap = std::min (numSamples, buffer_.size() - writeIndex_); + const std::size_t afterWrap = numSamples - beforeWrap; + + std::copy (data, data + beforeWrap, buffer_.begin() + writeIndex_); + if (afterWrap > 0) + std::copy (data + beforeWrap, data + numSamples, buffer_.begin()); + + writeIndex_ = (writeIndex_ + numSamples) % buffer_.size(); + availableForRead_ += numSamples; + } + + void read (float* data, std::size_t numSamples) + { + jassert (numSamples <= getAvailableForRead()); + numSamples = std::min (numSamples, getAvailableForRead()); + + if (numSamples == 0) return; + + const std::size_t beforeWrap = std::min (numSamples, buffer_.size() - readIndex_); + const std::size_t afterWrap = numSamples - beforeWrap; + + std::copy (buffer_.begin() + readIndex_, buffer_.begin() + readIndex_ + beforeWrap, data); + if (afterWrap > 0) + std::copy (buffer_.begin(), buffer_.begin() + afterWrap, data + beforeWrap); + + readIndex_ = (readIndex_ + numSamples) % buffer_.size(); + availableForRead_ -= numSamples; + } + + void peek (float* data, std::size_t numSamples, std::size_t offset = 0) const + { + jassert (numSamples + offset <= getAvailableForRead()); + numSamples = std::min (numSamples, getAvailableForRead() - offset); + + if (numSamples == 0) return; + + const std::size_t startIndex = (readIndex_ + offset) % buffer_.size(); + const std::size_t beforeWrap = std::min (numSamples, buffer_.size() - startIndex); + const std::size_t afterWrap = numSamples - beforeWrap; + + std::copy (buffer_.begin() + startIndex, buffer_.begin() + startIndex + beforeWrap, data); + if (afterWrap > 0) + std::copy (buffer_.begin(), buffer_.begin() + afterWrap, data + beforeWrap); + } + + void skip (std::size_t numSamples) + { + jassert (numSamples <= getAvailableForRead()); + numSamples = std::min (numSamples, getAvailableForRead()); + + readIndex_ = (readIndex_ + numSamples) % buffer_.size(); + availableForRead_ -= numSamples; + } + +private: + std::vector buffer_; + std::size_t writeIndex_ = 0; + std::size_t readIndex_ = 0; + std::size_t availableForRead_ = 0; +}; + //============================================================================== + class PartitionedConvolver::Impl { public: @@ -294,6 +485,7 @@ class PartitionedConvolver::Impl void configureLayers (std::size_t directFIRTaps, const std::vector& layers) { directFIRTapCount_ = directFIRTaps; + layers_.clear(); layers_.resize (layers.size()); @@ -316,11 +508,10 @@ class PartitionedConvolver::Impl // Clear staging buffers - will be allocated in prepare() inputStaging_.clear(); outputStaging_.clear(); - inputCarry_.clear(); - // Clear per-layer accumulators - will be allocated in prepare() - layerInputAccumulators_.assign (layers.size(), std::vector()); - layerOutputCarries_.assign (layers.size(), std::vector()); + // Resize per-layer circular buffers - will be allocated in prepare() + layerInputBuffers_.resize (layers.size()); + layerOutputBuffers_.resize (layers.size()); layerTempOutput_.clear(); tempLayerHop_.clear(); @@ -336,29 +527,26 @@ class PartitionedConvolver::Impl { maxBlockSize_ = maxBlockSize; - // Calculate buffer sizes based on block size and hop configurations - const std::size_t maxBufferSize = std::max (maxBlockSize * 4, maxHopSize_ * 16); - - // Prepare staging buffers - inputStaging_.clear(); + // Calculate buffer sizes - generous but fixed allocation + const std::size_t inputBufferSize = maxBlockSize; // Input staging for all layers + const std::size_t outputBufferSize = maxHopSize_; // Output buffering for all layers + + // Prepare main input staging + inputStaging_.resize (inputBufferSize); outputStaging_.assign (static_cast (baseHopSize_), 0.0f); - inputCarry_.clear(); - inputCarry_.reserve (maxBufferSize); - // Prepare per-layer accumulators - for (std::size_t i = 0; i < layerInputAccumulators_.size(); ++i) + // Prepare per-layer circular buffers + for (std::size_t i = 0; i < layerInputBuffers_.size(); ++i) { - layerInputAccumulators_[i].clear(); - layerInputAccumulators_[i].reserve (maxBufferSize); - layerOutputCarries_[i].clear(); - layerOutputCarries_[i].reserve (maxBufferSize); + layerInputBuffers_[i].resize (inputBufferSize); + layerOutputBuffers_[i].resize (outputBufferSize); } // Allocate temp buffers if (maxHopSize_ > 0) { layerTempOutput_.resize (maxHopSize_); - tempLayerHop_.reserve (maxHopSize_); + tempLayerHop_.resize (maxHopSize_); } // Allocate working buffers @@ -447,15 +635,16 @@ class PartitionedConvolver::Impl void resetStateUnsafe() { directFIR_.reset(); - inputStaging_.clear(); + inputStagingReadIndex_ = 0; + inputStagingWriteIndex_ = 0; + inputStagingAvailable_ = 0; std::fill (outputStaging_.begin(), outputStaging_.end(), 0.0f); - inputCarry_.clear(); - for (auto& acc : layerInputAccumulators_) - acc.clear(); + for (auto& buffer : layerInputBuffers_) + buffer.clear(); - for (auto& carry : layerOutputCarries_) - carry.clear(); + for (auto& buffer : layerOutputBuffers_) + buffer.clear(); for (auto& layer : layers_) layer.resetState(); @@ -481,48 +670,49 @@ class PartitionedConvolver::Impl return; } - // Process FFT layers with hop-based processing - safeAppendToBuffer (inputCarry_, workingInput_.data(), numSamples); + // Add input to main input staging buffer using circular buffer logic + writeToInputStaging (workingInput_.data(), numSamples); std::size_t outputSamplesProduced = 0; - while (inputCarry_.size() >= static_cast (baseHopSize_)) + while (getInputStagingAvailable() >= static_cast (baseHopSize_)) { const std::size_t hopSize = static_cast (baseHopSize_); - inputStaging_.assign (inputCarry_.begin(), inputCarry_.begin() + hopSize); - + + // Read hop from input staging + readFromInputStaging (tempLayerHop_.data(), hopSize); + FloatVectorOperations::clear (outputStaging_.data(), outputStaging_.size()); for (std::size_t layerIndex = 0; layerIndex < layers_.size(); ++layerIndex) { auto& layer = layers_[layerIndex]; const int layerHopSize = layer.getHopSize(); + auto& inputBuffer = layerInputBuffers_[layerIndex]; + auto& outputBuffer = layerOutputBuffers_[layerIndex]; - safeAppendToBuffer (layerInputAccumulators_[layerIndex], inputStaging_.data(), hopSize); + // Write input hop to layer's input buffer + inputBuffer.write (tempLayerHop_.data(), hopSize); - while (layerInputAccumulators_[layerIndex].size() >= static_cast (layerHopSize)) + // Process complete layer hops + while (inputBuffer.getAvailableForRead() >= static_cast (layerHopSize)) { - tempLayerHop_.assign (layerInputAccumulators_[layerIndex].begin(), - layerInputAccumulators_[layerIndex].begin() + layerHopSize); - + // Read a full hop for this layer + inputBuffer.read (tempLayerHop_.data(), static_cast (layerHopSize)); + FloatVectorOperations::clear (layerTempOutput_.data(), layerHopSize); if (layer.hasImpulseResponse()) layer.processHop (tempLayerHop_.data(), layerTempOutput_.data()); - safeAppendToBuffer (layerOutputCarries_[layerIndex], layerTempOutput_.data(), static_cast (layerHopSize)); - - layerInputAccumulators_[layerIndex].erase ( - layerInputAccumulators_[layerIndex].begin(), - layerInputAccumulators_[layerIndex].begin() + layerHopSize); + // Write output to layer's output buffer + outputBuffer.write (layerTempOutput_.data(), static_cast (layerHopSize)); } - if (layerOutputCarries_[layerIndex].size() >= hopSize) + // Mix available output from this layer + if (outputBuffer.getAvailableForRead() >= hopSize) { - FloatVectorOperations::add (outputStaging_.data(), layerOutputCarries_[layerIndex].data(), hopSize); - - layerOutputCarries_[layerIndex].erase ( - layerOutputCarries_[layerIndex].begin(), - layerOutputCarries_[layerIndex].begin() + hopSize); + outputBuffer.read (layerTempOutput_.data(), hopSize); + FloatVectorOperations::add (outputStaging_.data(), layerTempOutput_.data(), hopSize); } } @@ -530,9 +720,6 @@ class PartitionedConvolver::Impl const std::size_t samplesToWrite = std::min (hopSize, numSamples - outputSamplesProduced); FloatVectorOperations::add (workingOutput_.data() + outputSamplesProduced, outputStaging_.data(), samplesToWrite); outputSamplesProduced += samplesToWrite; - - // Remove processed input from carry buffer - inputCarry_.erase (inputCarry_.begin(), inputCarry_.begin() + hopSize); } // Copy final result to output (accumulate) @@ -540,29 +727,42 @@ class PartitionedConvolver::Impl } private: - - void safeAppendToBuffer (std::vector& buffer, const float* data, std::size_t numSamples) + void writeToInputStaging (const float* data, std::size_t numSamples) { - const std::size_t oldSize = buffer.size(); - const std::size_t newSize = oldSize + numSamples; - - // Ensure we never exceed the reserved capacity to avoid allocations - jassert (newSize <= buffer.capacity()); - if (newSize > buffer.capacity()) - { - // Truncate to prevent allocation - this is a safety measure - const std::size_t maxSamples = buffer.capacity() - oldSize; - if (maxSamples > 0) - { - buffer.resize (buffer.capacity()); - std::copy (data, data + maxSamples, buffer.begin() + oldSize); - } - return; - } - - buffer.resize (newSize); - std::copy (data, data + numSamples, buffer.begin() + oldSize); + const std::size_t available = inputStaging_.size() - inputStagingAvailable_; + jassert (numSamples <= available); + numSamples = std::min (numSamples, available); + if (numSamples == 0) return; + + const std::size_t beforeWrap = std::min (numSamples, inputStaging_.size() - inputStagingWriteIndex_); + const std::size_t afterWrap = numSamples - beforeWrap; + + std::copy (data, data + beforeWrap, inputStaging_.begin() + inputStagingWriteIndex_); + if (afterWrap > 0) + std::copy (data + beforeWrap, data + numSamples, inputStaging_.begin()); + + inputStagingWriteIndex_ = (inputStagingWriteIndex_ + numSamples) % inputStaging_.size(); + inputStagingAvailable_ += numSamples; + } + + void readFromInputStaging (float* data, std::size_t numSamples) + { + jassert (numSamples <= inputStagingAvailable_); + numSamples = std::min (numSamples, inputStagingAvailable_); + if (numSamples == 0) return; + + const std::size_t beforeWrap = std::min (numSamples, inputStaging_.size() - inputStagingReadIndex_); + const std::size_t afterWrap = numSamples - beforeWrap; + + std::copy (inputStaging_.begin() + inputStagingReadIndex_, inputStaging_.begin() + inputStagingReadIndex_ + beforeWrap, data); + if (afterWrap > 0) + std::copy (inputStaging_.begin(), inputStaging_.begin() + afterWrap, data + beforeWrap); + + inputStagingReadIndex_ = (inputStagingReadIndex_ + numSamples) % inputStaging_.size(); + inputStagingAvailable_ -= numSamples; } + + std::size_t getInputStagingAvailable() const { return inputStagingAvailable_; } std::size_t directFIRTapCount_ = 0; int baseHopSize_ = 0; @@ -577,14 +777,16 @@ class PartitionedConvolver::Impl std::vector workingInput_; std::vector workingOutput_; - // Staging for hop-based processing - std::vector inputCarry_; + // Input staging with circular buffer management std::vector inputStaging_; + std::size_t inputStagingReadIndex_ = 0; + std::size_t inputStagingWriteIndex_ = 0; + std::size_t inputStagingAvailable_ = 0; std::vector outputStaging_; - // Per-layer buffering - std::vector> layerInputAccumulators_; - std::vector> layerOutputCarries_; + // Per-layer circular buffering + std::vector layerInputBuffers_; + std::vector layerOutputBuffers_; std::vector tempLayerHop_; std::vector layerTempOutput_; diff --git a/modules/yup_dsp/convolution/yup_PartitionedConvolver.h b/modules/yup_dsp/convolution/yup_PartitionedConvolver.h index 5931e7ece..a4e4d4d5b 100644 --- a/modules/yup_dsp/convolution/yup_PartitionedConvolver.h +++ b/modules/yup_dsp/convolution/yup_PartitionedConvolver.h @@ -27,34 +27,34 @@ namespace yup //============================================================================== /** Layered partitioned convolution engine optimized for real-time audio processing. - + Combines multiple processing strategies for efficient convolution: - Direct FIR computation for early taps (low latency) - One or more FFT-based Overlap-Add layers with uniform partitioning per layer - + The engine uses YUP's FFTProcessor for real FFT operations and supports: - Arbitrary input/output block sizes with internal buffering - Real-time safe processing (no heap allocations during process()) - Configurable layer hierarchy for optimal CPU/latency trade-off - + Example usage: @code PartitionedConvolver convolver; - + // Configure layers: 256 direct taps + FFT layers with hops 256, 1024, 4096 convolver.setTypicalLayout(256, {256, 1024, 4096}); - + // Prepare for processing with maximum block size (must be called before process) convolver.prepare(512); // Maximum 512 samples per process() call - + // Set impulse response (e.g., reverb IR) std::vector impulseResponse = loadImpulseResponse(); convolver.setImpulseResponse(impulseResponse); - + // In audio callback (accumulates into output): convolver.process(inputBuffer, outputBuffer, numSamples); // numSamples <= 512 @endcode - + @note The process() method accumulates results into the output buffer. Clear the output buffer first if overwrite behavior is desired. */ @@ -71,7 +71,7 @@ class PartitionedConvolver //============================================================================== /** Default constructor */ PartitionedConvolver(); - + /** Destructor */ ~PartitionedConvolver(); @@ -82,23 +82,23 @@ class PartitionedConvolver //============================================================================== /** Configure the convolution layers before setting the impulse response. - + @param directFIRTaps Number of early taps to process with direct FIR (for low latency) @param layers Vector of layer specifications with increasing hop sizes (e.g., {{256}, {1024}, {4096}} for 256→1024→4096 progression) */ void configureLayers (std::size_t directFIRTaps, const std::vector& layers); - + /** Convenience method to set a typical late-reverb configuration. - + @param directTaps Number of direct FIR taps for early reflections @param hops Vector of hop sizes for FFT layers (geometrically increasing recommended) */ void setTypicalLayout (std::size_t directTaps, const std::vector& hops); //============================================================================== - + /** Impulse response loading options. */ struct IRLoadOptions { IRLoadOptions() @@ -113,10 +113,10 @@ class PartitionedConvolver /** Set the impulse response for convolution. - + @param impulseResponse Pointer to impulse response samples @param length Number of samples in the impulse response - + @note This method is not real-time safe and should be called during initialization or from a background thread when audio is paused. */ @@ -124,7 +124,7 @@ class PartitionedConvolver /** Set the impulse response from a vector. - + @param impulseResponse Vector containing impulse response samples */ void setImpulseResponse (const std::vector& impulseResponse, const IRLoadOptions& options = {}); @@ -132,39 +132,40 @@ class PartitionedConvolver //============================================================================== /** Prepare the convolver for processing with a specific maximum block size. - + @param maxBlockSize Maximum number of samples that will be passed to process() - + @note This method is not real-time safe and should be called during initialization or when audio processing is paused. It pre-allocates all internal buffers to handle the specified block size without further allocations. */ void prepare (std::size_t maxBlockSize); - /** - Reset all internal processing state (clears delay lines, overlap buffers). - Impulse response partitions are preserved. - */ - void reset(); - /** Process audio samples through the convolver. - + @param input Input audio buffer @param output Output audio buffer (results are accumulated) @param numSamples Number of samples to process - + @note Results are accumulated into the output buffer. Clear it first if needed. @note This method is real-time safe with no heap allocations. */ void process (const float* input, float* output, std::size_t numSamples); + /** + Reset all internal processing state (clears delay lines, overlap buffers). + Impulse response partitions are preserved. + */ + void reset(); + private: //============================================================================== class DirectFIR; class FFTLayer; + class CircularBuffer; class Impl; - + std::unique_ptr pImpl; //============================================================================== diff --git a/modules/yup_dsp/yup_dsp.cpp b/modules/yup_dsp/yup_dsp.cpp index ced8951b1..ec420b409 100644 --- a/modules/yup_dsp/yup_dsp.cpp +++ b/modules/yup_dsp/yup_dsp.cpp @@ -30,6 +30,10 @@ #include "yup_dsp.h" +//============================================================================== +#include +#include + //============================================================================== #include "frequency/yup_FFTProcessor.cpp" #include "frequency/yup_SpectrumAnalyzerState.cpp" From 897da5183d5229da31484cd2ea4e0b105f5afce6 Mon Sep 17 00:00:00 2001 From: kunitoki Date: Tue, 26 Aug 2025 22:16:06 +0200 Subject: [PATCH 06/37] Formatting --- .../convolution/yup_PartitionedConvolver.cpp | 183 ++++++++++-------- .../convolution/yup_PartitionedConvolver.h | 4 +- 2 files changed, 99 insertions(+), 88 deletions(-) diff --git a/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp index ef98c9a2f..d4ecc2e64 100644 --- a/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp +++ b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp @@ -42,28 +42,28 @@ static void complexMultiplyAccumulate (const float* A, const float* B, float* Y, { const int idx = i * 2; - __m256 a = _mm256_loadu_ps(A + idx); - __m256 b = _mm256_loadu_ps(B + idx); - __m256 y = _mm256_loadu_ps(Y + idx); + __m256 a = _mm256_loadu_ps (A + idx); + __m256 b = _mm256_loadu_ps (B + idx); + __m256 y = _mm256_loadu_ps (Y + idx); // a = [ar0 ai0 ar1 ai1 ar2 ai2 ar3 ai3] // b = [br0 bi0 br1 bi1 br2 bi2 br3 bi3] // separate real and imag for a and b - const __m256 a_shuffled = _mm256_permute_ps(a, _MM_SHUFFLE(2, 3, 0, 1)); - const __m256 b_shuffled = _mm256_permute_ps(b, _MM_SHUFFLE(2, 3, 0, 1)); + const __m256 a_shuffled = _mm256_permute_ps (a, _MM_SHUFFLE (2, 3, 0, 1)); + const __m256 b_shuffled = _mm256_permute_ps (b, _MM_SHUFFLE (2, 3, 0, 1)); // real = ar*br - ai*bi - __m256 realPart = _mm256_fmsub_ps(a, b, _mm256_mul_ps(a_shuffled, b_shuffled)); + __m256 realPart = _mm256_fmsub_ps (a, b, _mm256_mul_ps (a_shuffled, b_shuffled)); // imag = ar*bi + ai*br - __m256 imagPart = _mm256_fmadd_ps(a, b_shuffled, _mm256_mul_ps(a_shuffled, b)); + __m256 imagPart = _mm256_fmadd_ps (a, b_shuffled, _mm256_mul_ps (a_shuffled, b)); // interleave real/imag back - const __m256 interleaved = _mm256_blend_ps(realPart, imagPart, 0b10101010); + const __m256 interleaved = _mm256_blend_ps (realPart, imagPart, 0b10101010); - y = _mm256_add_ps(y, interleaved); - _mm256_storeu_ps(Y + idx, y); + y = _mm256_add_ps (y, interleaved); + _mm256_storeu_ps (Y + idx, y); } #elif YUP_USE_SSE_INTRINSICS @@ -72,25 +72,25 @@ static void complexMultiplyAccumulate (const float* A, const float* B, float* Y, { const int idx = i * 2; - __m128 a = _mm_loadu_ps(A + idx); - __m128 b = _mm_loadu_ps(B + idx); - __m128 y = _mm_loadu_ps(Y + idx); + __m128 a = _mm_loadu_ps (A + idx); + __m128 b = _mm_loadu_ps (B + idx); + __m128 y = _mm_loadu_ps (Y + idx); // separate real and imag for a and b - const __m128 a_shuffled = _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 3, 0, 1)); - const __m128 b_shuffled = _mm_shuffle_ps(b, b, _MM_SHUFFLE(2, 3, 0, 1)); + const __m128 a_shuffled = _mm_shuffle_ps (a, a, _MM_SHUFFLE (2, 3, 0, 1)); + const __m128 b_shuffled = _mm_shuffle_ps (b, b, _MM_SHUFFLE (2, 3, 0, 1)); // real = ar*br - ai*bi - __m128 realPart = _mm_sub_ps(_mm_mul_ps(a, b), _mm_mul_ps(a_shuffled, b_shuffled)); + __m128 realPart = _mm_sub_ps (_mm_mul_ps (a, b), _mm_mul_ps (a_shuffled, b_shuffled)); // imag = ar*bi + ai*br - __m128 imagPart = _mm_add_ps(_mm_mul_ps(a, b_shuffled), _mm_mul_ps(a_shuffled, b)); + __m128 imagPart = _mm_add_ps (_mm_mul_ps (a, b_shuffled), _mm_mul_ps (a_shuffled, b)); // interleave real/imag back - const __m128 interleaved = _mm_unpacklo_ps(realPart, imagPart); + const __m128 interleaved = _mm_unpacklo_ps (realPart, imagPart); - y = _mm_add_ps(y, interleaved); - _mm_storeu_ps(Y + idx, y); + y = _mm_add_ps (y, interleaved); + _mm_storeu_ps (Y + idx, y); } #elif YUP_USE_ARM_NEON @@ -99,33 +99,33 @@ static void complexMultiplyAccumulate (const float* A, const float* B, float* Y, { const int idx = i * 2; - float32x4_t a = vld1q_f32(A + idx); // [ar0, ai0, ar1, ai1] - float32x4_t b = vld1q_f32(B + idx); // [br0, bi0, br1, bi1] - float32x4_t y = vld1q_f32(Y + idx); + float32x4_t a = vld1q_f32 (A + idx); // [ar0, ai0, ar1, ai1] + float32x4_t b = vld1q_f32 (B + idx); // [br0, bi0, br1, bi1] + float32x4_t y = vld1q_f32 (Y + idx); // Shuffle a and b to get swapped real/imag for cross-multiplication - float32x4_t a_shuf = vrev64q_f32(a); // [ai0, ar0, ai1, ar1] - float32x4_t b_shuf = vrev64q_f32(b); // [bi0, br0, bi1, br1] + float32x4_t a_shuf = vrev64q_f32 (a); // [ai0, ar0, ai1, ar1] + float32x4_t b_shuf = vrev64q_f32 (b); // [bi0, br0, bi1, br1] // real = ar*br - ai*bi - float32x4_t realPart = vsubq_f32(vmulq_f32(a, b), vmulq_f32(a_shuf, b_shuf)); + float32x4_t realPart = vsubq_f32 (vmulq_f32 (a, b), vmulq_f32 (a_shuf, b_shuf)); // imag = ar*bi + ai*br - float32x4_t imagPart = vaddq_f32(vmulq_f32(a, b_shuf), vmulq_f32(a_shuf, b)); + float32x4_t imagPart = vaddq_f32 (vmulq_f32 (a, b_shuf), vmulq_f32 (a_shuf, b)); // Interleave real and imag: [real0, imag0, real1, imag1] - float32x2_t realLow = vget_low_f32(realPart); - float32x2_t imagLow = vget_low_f32(imagPart); - float32x2x2_t zippedLow = vzip_f32(realLow, imagLow); + float32x2_t realLow = vget_low_f32 (realPart); + float32x2_t imagLow = vget_low_f32 (imagPart); + float32x2x2_t zippedLow = vzip_f32 (realLow, imagLow); - float32x2_t realHigh = vget_high_f32(realPart); - float32x2_t imagHigh = vget_high_f32(imagPart); - float32x2x2_t zippedHigh = vzip_f32(realHigh, imagHigh); + float32x2_t realHigh = vget_high_f32 (realPart); + float32x2_t imagHigh = vget_high_f32 (imagPart); + float32x2x2_t zippedHigh = vzip_f32 (realHigh, imagHigh); - float32x4_t interleaved = vcombine_f32(zippedLow.val[0], zippedHigh.val[0]); + float32x4_t interleaved = vcombine_f32 (zippedLow.val[0], zippedHigh.val[0]); - y = vaddq_f32(y, interleaved); - vst1q_f32(Y + idx, y); + y = vaddq_f32 (y, interleaved); + vst1q_f32 (Y + idx, y); } #endif @@ -172,7 +172,8 @@ class PartitionedConvolver::DirectFIR void process (const float* input, float* output, std::size_t numSamples) { const std::size_t numTaps = taps_.size(); - if (numTaps == 0) return; + if (numTaps == 0) + return; for (std::size_t i = 0; i < numSamples; ++i) { @@ -228,14 +229,16 @@ class PartitionedConvolver::FFTLayer overlapBuffer_.assign (static_cast (hopSize_), 0.0f); timeBuffer_.assign (static_cast (fftSize_), 0.0f); frequencyBuffer_.assign (static_cast (fftSize_) * 2, 0.0f); - tempBuffer_.assign (static_cast (fftSize_) * 2, 0.0f); // Must hold complex data for in-place FFT + tempBuffer_.assign (static_cast (fftSize_) * 2, 0.0f); // Must hold complex data for in-place FFT fdlIndex_ = 0; configured_ = true; } int getHopSize() const { return hopSize_; } + int getFFTSize() const { return fftSize_; } + bool isConfigured() const { return configured_; } std::size_t setImpulseResponse (const float* impulseResponse, std::size_t length, float scaling) @@ -357,7 +360,7 @@ class PartitionedConvolver::FFTLayer } } - bool hasImpulseResponse() const { return !frequencyPartitions_.empty(); } + bool hasImpulseResponse() const { return ! frequencyPartitions_.empty(); } private: int hopSize_ = 0; @@ -387,13 +390,13 @@ class PartitionedConvolver::CircularBuffer { public: CircularBuffer() = default; - + void resize (std::size_t size) { buffer_.resize (size); clear(); } - + void clear() { std::fill (buffer_.begin(), buffer_.end(), 0.0f); @@ -401,68 +404,73 @@ class PartitionedConvolver::CircularBuffer readIndex_ = 0; availableForRead_ = 0; } - + std::size_t getAvailableForRead() const { return availableForRead_; } + std::size_t getAvailableForWrite() const { return buffer_.size() - availableForRead_; } + std::size_t getSize() const { return buffer_.size(); } - + void write (const float* data, std::size_t numSamples) { jassert (numSamples <= getAvailableForWrite()); numSamples = std::min (numSamples, getAvailableForWrite()); - - if (numSamples == 0) return; - + + if (numSamples == 0) + return; + const std::size_t beforeWrap = std::min (numSamples, buffer_.size() - writeIndex_); const std::size_t afterWrap = numSamples - beforeWrap; - + std::copy (data, data + beforeWrap, buffer_.begin() + writeIndex_); if (afterWrap > 0) std::copy (data + beforeWrap, data + numSamples, buffer_.begin()); - + writeIndex_ = (writeIndex_ + numSamples) % buffer_.size(); availableForRead_ += numSamples; } - + void read (float* data, std::size_t numSamples) { jassert (numSamples <= getAvailableForRead()); numSamples = std::min (numSamples, getAvailableForRead()); - - if (numSamples == 0) return; - + + if (numSamples == 0) + return; + const std::size_t beforeWrap = std::min (numSamples, buffer_.size() - readIndex_); const std::size_t afterWrap = numSamples - beforeWrap; - + std::copy (buffer_.begin() + readIndex_, buffer_.begin() + readIndex_ + beforeWrap, data); if (afterWrap > 0) std::copy (buffer_.begin(), buffer_.begin() + afterWrap, data + beforeWrap); - + readIndex_ = (readIndex_ + numSamples) % buffer_.size(); availableForRead_ -= numSamples; } - + void peek (float* data, std::size_t numSamples, std::size_t offset = 0) const { jassert (numSamples + offset <= getAvailableForRead()); numSamples = std::min (numSamples, getAvailableForRead() - offset); - - if (numSamples == 0) return; - + + if (numSamples == 0) + return; + const std::size_t startIndex = (readIndex_ + offset) % buffer_.size(); const std::size_t beforeWrap = std::min (numSamples, buffer_.size() - startIndex); const std::size_t afterWrap = numSamples - beforeWrap; - + std::copy (buffer_.begin() + startIndex, buffer_.begin() + startIndex + beforeWrap, data); if (afterWrap > 0) std::copy (buffer_.begin(), buffer_.begin() + afterWrap, data + beforeWrap); } - + void skip (std::size_t numSamples) { jassert (numSamples <= getAvailableForRead()); numSamples = std::min (numSamples, getAvailableForRead()); - + readIndex_ = (readIndex_ + numSamples) % buffer_.size(); availableForRead_ -= numSamples; } @@ -504,7 +512,7 @@ class PartitionedConvolver::Impl } maxHopSize_ = maximumHopSize; - + // Clear staging buffers - will be allocated in prepare() inputStaging_.clear(); outputStaging_.clear(); @@ -512,21 +520,21 @@ class PartitionedConvolver::Impl // Resize per-layer circular buffers - will be allocated in prepare() layerInputBuffers_.resize (layers.size()); layerOutputBuffers_.resize (layers.size()); - + layerTempOutput_.clear(); tempLayerHop_.clear(); - + // Clear working buffers - will be allocated in prepare() workingInput_.clear(); workingOutput_.clear(); - + isPrepared_ = false; } void prepare (std::size_t maxBlockSize) { maxBlockSize_ = maxBlockSize; - + // Calculate buffer sizes - generous but fixed allocation const std::size_t inputBufferSize = maxBlockSize; // Input staging for all layers const std::size_t outputBufferSize = maxHopSize_; // Output buffering for all layers @@ -541,18 +549,18 @@ class PartitionedConvolver::Impl layerInputBuffers_[i].resize (inputBufferSize); layerOutputBuffers_[i].resize (outputBufferSize); } - + // Allocate temp buffers if (maxHopSize_ > 0) { layerTempOutput_.resize (maxHopSize_); tempLayerHop_.resize (maxHopSize_); } - + // Allocate working buffers workingInput_.resize (maxBlockSize); workingOutput_.resize (maxBlockSize); - + isPrepared_ = true; } @@ -624,7 +632,8 @@ class PartitionedConvolver::Impl void process (const float* input, float* output, std::size_t numSamples) { - if (numSamples == 0) return; + if (numSamples == 0) + return; SpinLock::ScopedLockType lock (processingLock_); @@ -655,8 +664,8 @@ class PartitionedConvolver::Impl // Ensure prepare() was called jassert (isPrepared_); jassert (numSamples <= maxBlockSize_); - - if (!isPrepared_ || numSamples > maxBlockSize_) + + if (! isPrepared_ || numSamples > maxBlockSize_) return; // Fail gracefully in release builds FloatVectorOperations::copy (workingInput_.data(), input, numSamples); @@ -677,10 +686,10 @@ class PartitionedConvolver::Impl while (getInputStagingAvailable() >= static_cast (baseHopSize_)) { const std::size_t hopSize = static_cast (baseHopSize_); - + // Read hop from input staging readFromInputStaging (tempLayerHop_.data(), hopSize); - + FloatVectorOperations::clear (outputStaging_.data(), outputStaging_.size()); for (std::size_t layerIndex = 0; layerIndex < layers_.size(); ++layerIndex) @@ -698,7 +707,7 @@ class PartitionedConvolver::Impl { // Read a full hop for this layer inputBuffer.read (tempLayerHop_.data(), static_cast (layerHopSize)); - + FloatVectorOperations::clear (layerTempOutput_.data(), layerHopSize); if (layer.hasImpulseResponse()) @@ -732,36 +741,38 @@ class PartitionedConvolver::Impl const std::size_t available = inputStaging_.size() - inputStagingAvailable_; jassert (numSamples <= available); numSamples = std::min (numSamples, available); - if (numSamples == 0) return; - + if (numSamples == 0) + return; + const std::size_t beforeWrap = std::min (numSamples, inputStaging_.size() - inputStagingWriteIndex_); const std::size_t afterWrap = numSamples - beforeWrap; - + std::copy (data, data + beforeWrap, inputStaging_.begin() + inputStagingWriteIndex_); if (afterWrap > 0) std::copy (data + beforeWrap, data + numSamples, inputStaging_.begin()); - + inputStagingWriteIndex_ = (inputStagingWriteIndex_ + numSamples) % inputStaging_.size(); inputStagingAvailable_ += numSamples; } - + void readFromInputStaging (float* data, std::size_t numSamples) { jassert (numSamples <= inputStagingAvailable_); numSamples = std::min (numSamples, inputStagingAvailable_); - if (numSamples == 0) return; - + if (numSamples == 0) + return; + const std::size_t beforeWrap = std::min (numSamples, inputStaging_.size() - inputStagingReadIndex_); const std::size_t afterWrap = numSamples - beforeWrap; - + std::copy (inputStaging_.begin() + inputStagingReadIndex_, inputStaging_.begin() + inputStagingReadIndex_ + beforeWrap, data); if (afterWrap > 0) std::copy (inputStaging_.begin(), inputStaging_.begin() + afterWrap, data + beforeWrap); - + inputStagingReadIndex_ = (inputStagingReadIndex_ + numSamples) % inputStaging_.size(); inputStagingAvailable_ -= numSamples; } - + std::size_t getInputStagingAvailable() const { return inputStagingAvailable_; } std::size_t directFIRTapCount_ = 0; @@ -827,7 +838,7 @@ void PartitionedConvolver::setTypicalLayout (std::size_t directTaps, const std:: layerSpecs.reserve (hops.size()); for (int hop : hops) - layerSpecs.push_back ({hop}); + layerSpecs.push_back ({ hop }); configureLayers (directTaps, layerSpecs); } diff --git a/modules/yup_dsp/convolution/yup_PartitionedConvolver.h b/modules/yup_dsp/convolution/yup_PartitionedConvolver.h index a4e4d4d5b..78f3cb176 100644 --- a/modules/yup_dsp/convolution/yup_PartitionedConvolver.h +++ b/modules/yup_dsp/convolution/yup_PartitionedConvolver.h @@ -65,7 +65,7 @@ class PartitionedConvolver /** Configuration for a single FFT-based convolution layer */ struct LayerSpec { - int hopSize; /**< Partition size L (FFT size will be 2*L) */ + int hopSize; /**< Partition size L (FFT size will be 2*L) */ }; //============================================================================== @@ -153,7 +153,7 @@ class PartitionedConvolver */ void process (const float* input, float* output, std::size_t numSamples); - /** + /** Reset all internal processing state (clears delay lines, overlap buffers). Impulse response partitions are preserved. */ From 230f934989211297ec53b26df832e096e0178278 Mon Sep 17 00:00:00 2001 From: kunitoki Date: Tue, 26 Aug 2025 22:54:39 +0200 Subject: [PATCH 07/37] More tweaks --- .../convolution/yup_PartitionedConvolver.cpp | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp index d4ecc2e64..88ea1c489 100644 --- a/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp +++ b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp @@ -321,10 +321,7 @@ class PartitionedConvolver::FFTLayer return; // 1) Transform current input hop to frequency domain - std::fill (tempBuffer_.begin(), tempBuffer_.end(), 0.0f); - for (int i = 0; i < hopSize_; ++i) - tempBuffer_[i] = inputHop[i]; - + FloatVectorOperations::copy (tempBuffer_.data(), inputHop, hopSize_); fftProcessor_.performRealFFTForward (tempBuffer_.data(), tempBuffer_.data()); // 2) Store in frequency delay line (circular buffer) - copy full complex buffer @@ -332,7 +329,7 @@ class PartitionedConvolver::FFTLayer std::copy (tempBuffer_.begin(), tempBuffer_.begin() + (fftSize_ * 2), frequencyDelayLine_[static_cast (fdlIndex_)].begin()); // 3) Frequency domain convolution: Y = sum(X[k-p] * H[p]) - std::fill (frequencyBuffer_.data(), frequencyBuffer_.data() + (fftSize_ * 2), 0.0f); + FloatVectorOperations::clear (frequencyBuffer_.data(), fftSize_ * 2); int xIndex = fdlIndex_; for (std::size_t p = 0; p < frequencyPartitions_.size(); ++p) @@ -661,12 +658,10 @@ class PartitionedConvolver::Impl void processUnsafe (const float* input, float* output, std::size_t numSamples) { - // Ensure prepare() was called jassert (isPrepared_); jassert (numSamples <= maxBlockSize_); - if (! isPrepared_ || numSamples > maxBlockSize_) - return; // Fail gracefully in release builds + return; FloatVectorOperations::copy (workingInput_.data(), input, numSamples); FloatVectorOperations::clear (workingOutput_.data(), numSamples); @@ -689,7 +684,6 @@ class PartitionedConvolver::Impl // Read hop from input staging readFromInputStaging (tempLayerHop_.data(), hopSize); - FloatVectorOperations::clear (outputStaging_.data(), outputStaging_.size()); for (std::size_t layerIndex = 0; layerIndex < layers_.size(); ++layerIndex) @@ -707,7 +701,6 @@ class PartitionedConvolver::Impl { // Read a full hop for this layer inputBuffer.read (tempLayerHop_.data(), static_cast (layerHopSize)); - FloatVectorOperations::clear (layerTempOutput_.data(), layerHopSize); if (layer.hasImpulseResponse()) From 15d5ce4c3e184c4bd1ce8bd6e8cef1a29d255240 Mon Sep 17 00:00:00 2001 From: kunitoki Date: Tue, 26 Aug 2025 23:30:47 +0200 Subject: [PATCH 08/37] More work on Convolution --- .../source/examples/ConvolutionDemo.h | 2 +- modules/yup_audio_basics/yup_audio_basics.h | 7 + .../convolution/yup_PartitionedConvolver.cpp | 146 ++++++++++++++---- modules/yup_dsp/yup_dsp.cpp | 17 ++ 4 files changed, 142 insertions(+), 30 deletions(-) diff --git a/examples/graphics/source/examples/ConvolutionDemo.h b/examples/graphics/source/examples/ConvolutionDemo.h index 8885d8ae9..4b1abc252 100644 --- a/examples/graphics/source/examples/ConvolutionDemo.h +++ b/examples/graphics/source/examples/ConvolutionDemo.h @@ -60,7 +60,7 @@ class ConvolutionDemo dryGain.setCurrentAndTargetValue (0.3f); // Configure convolver with typical layout - convolver.setTypicalLayout (128, {128, 256, 512, 1024, 4096}); + convolver.setTypicalLayout (128, {128, 512, 2048}); // Create UI createUI(); diff --git a/modules/yup_audio_basics/yup_audio_basics.h b/modules/yup_audio_basics/yup_audio_basics.h index fb69411df..79fee813d 100644 --- a/modules/yup_audio_basics/yup_audio_basics.h +++ b/modules/yup_audio_basics/yup_audio_basics.h @@ -80,9 +80,16 @@ #endif #endif +#ifndef YUP_USE_FMA_INTRINSICS +#if defined (__FMA__) +#define YUP_USE_FMA_INTRINSICS 1 +#endif +#endif + #if ! YUP_INTEL #undef YUP_USE_SSE_INTRINSICS #undef YUP_USE_AVX_INTRINSICS +#undef YUP_USE_FMA_INTRINSICS #endif #if __ARM_NEON__ && ! (YUP_USE_VDSP_FRAMEWORK || defined(YUP_USE_ARM_NEON)) diff --git a/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp index 88ea1c489..03166fa30 100644 --- a/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp +++ b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp @@ -155,11 +155,16 @@ class PartitionedConvolver::DirectFIR void setTaps (std::vector taps, float scaling) { - taps_ = std::move (taps); - FloatVectorOperations::multiply (taps_.data(), scaling, taps_.size()); + FloatVectorOperations::multiply (taps.data(), scaling, taps.size()); - history_.assign (taps_.size(), 0.0f); + tapsReversed_ = std::move (taps); + std::reverse (tapsReversed_.begin(), tapsReversed_.end()); + numTaps_ = tapsReversed_.size(); + paddedLen_ = (numTaps_ + 3u) & ~3u; + tapsReversed_.resize (paddedLen_, 0.0f); + + history_.assign (2 * numTaps_, 0.0f); writeIndex_ = 0; } @@ -169,41 +174,120 @@ class PartitionedConvolver::DirectFIR writeIndex_ = 0; } - void process (const float* input, float* output, std::size_t numSamples) + void process (const float* input, float* output, std::size_t numSamples) noexcept { - const std::size_t numTaps = taps_.size(); - if (numTaps == 0) + const std::size_t M = numTaps_; + if (M == 0) return; + const float* h = tapsReversed_.data(); for (std::size_t i = 0; i < numSamples; ++i) { - history_[writeIndex_] = input[i]; + const float x = input[i]; + + history_[writeIndex_] = x; + history_[writeIndex_ + M] = x; + + const float* w = history_.data() + writeIndex_ + 1; - // Convolution: y[n] = sum(h[m] * x[n-m]) float sum = 0.0f; - std::size_t readIndex = writeIndex_; - for (std::size_t m = 0; m < numTaps; ++m) - { - sum += taps_[m] * history_[readIndex]; - if (readIndex == 0) - readIndex = numTaps - 1; - else - --readIndex; - } + +#if YUP_ENABLE_VDSP + vDSP_dotpr (w, 1, h, 1, &sum, M); +#else + sum = dotProduct (w, h, M, paddedLen_); +#endif output[i] += sum; - // Advance circular buffer - if (++writeIndex_ == numTaps) + if (++writeIndex_ == M) writeIndex_ = 0; } } - std::size_t getNumTaps() const { return taps_.size(); } + std::size_t getNumTaps() const + { + return numTaps_; + } private: - std::vector taps_; + static float dotProduct (const float* __restrict a, const float* __restrict b, std::size_t len, std::size_t paddedLen) noexcept + { + float acc = 0.0f; + std::size_t i = 0; + +#if YUP_USE_AVX_INTRINSICS && YUP_USE_FMA_INTRINSICS + // 8-wide AVX2 FMA path + __m256 vacc = _mm256_setzero_ps(); + for (; i + 8 <= len; i += 8) + { + __m256 va = _mm256_loadu_ps (a + i); + __m256 vb = _mm256_loadu_ps (b + i); + vacc = _mm256_fmadd_ps (va, vb, vacc); + } + // horizontal add + __m128 low = _mm256_castps256_ps128 (vacc); + __m128 high = _mm256_extractf128_ps (vacc, 1); + __m128 vsum = _mm_add_ps (low, high); + vsum = _mm_hadd_ps (vsum, vsum); + vsum = _mm_hadd_ps (vsum, vsum); + acc += _mm_cvtss_f32 (vsum); + +#elif YUP_USE_SSE_INTRINSICS + __m128 vacc = _mm_setzero_ps(); + std::size_t i = 0; +#if YUP_USE_FMA_INTRINSICS + for (; i + 4 <= len; i += 4) + { + __m128 va = _mm_loadu_ps (a + i); + __m128 vb = _mm_loadu_ps (b + i); + vacc = _mm_fmadd_ps (va, vb, vacc); + } +#else + for (; i + 4 <= len; i += 4) + { + __m128 va = _mm_loadu_ps (a + i); + __m128 vb = _mm_loadu_ps (b + i); + vacc = _mm_add_ps (vacc, _mm_mul_ps (va, vb)); + } +#endif + // horizontal add + __m128 shuf = _mm_movehdup_ps (vacc); + __m128 sums = _mm_add_ps (vacc, shuf); + shuf = _mm_movehl_ps (shuf, sums); + sums = _mm_add_ss (sums, shuf); + acc += _mm_cvtss_f32 (sums); + +#elif YUP_USE_ARM_NEON + float32x4_t vacc = vdupq_n_f32 (0.0f); + for (; i + 4 <= len; i += 4) + { + float32x4_t va = vld1q_f32 (a + i); + float32x4_t vb = vld1q_f32 (b + i); + vacc = vmlaq_f32 (vacc, va, vb); + } +#if YUP_64BIT + acc += vaddvq_f32 (vacc); +#else + float32x2_t vlow = vget_low_f32 (vacc); + float32x2_t vhigh = vget_high_f32 (vacc); + float32x2_t vsum2 = vpadd_f32 (vlow, vhigh); + vsum2 = vpadd_f32 (vsum2, vsum2); + acc += vget_lane_f32 (vsum2, 0); +#endif + +#endif + + for (; i < len; ++i) + acc += a[i] * b[i]; + + return acc; + } + + std::vector tapsReversed_; std::vector history_; + std::size_t numTaps_ = 0; + std::size_t paddedLen_ = 0; std::size_t writeIndex_ = 0; }; @@ -532,19 +616,23 @@ class PartitionedConvolver::Impl { maxBlockSize_ = maxBlockSize; - // Calculate buffer sizes - generous but fixed allocation - const std::size_t inputBufferSize = maxBlockSize; // Input staging for all layers - const std::size_t outputBufferSize = maxHopSize_; // Output buffering for all layers - // Prepare main input staging - inputStaging_.resize (inputBufferSize); + inputStaging_.resize (maxBlockSize); outputStaging_.assign (static_cast (baseHopSize_), 0.0f); - // Prepare per-layer circular buffers + // Prepare per-layer circular buffers with layer-specific sizing for (std::size_t i = 0; i < layerInputBuffers_.size(); ++i) { - layerInputBuffers_[i].resize (inputBufferSize); - layerOutputBuffers_[i].resize (outputBufferSize); + const std::size_t layerHopSize = static_cast (layers_[i].getHopSize()); + + // Input buffer: needs to accumulate up to layerHopSize samples plus incoming block + const std::size_t layerInputBufferSize = layerHopSize + maxBlockSize; + layerInputBuffers_[i].resize (layerInputBufferSize); + + // Output buffer: needs to handle bursts of layerHopSize samples + // Size it to handle multiple hops since read rate (baseHopSize) may be much smaller than write rate (layerHopSize) + const std::size_t layerOutputBufferSize = layerHopSize * ((layerHopSize / static_cast (baseHopSize_)) + 2); + layerOutputBuffers_[i].resize (layerOutputBufferSize); } // Allocate temp buffers diff --git a/modules/yup_dsp/yup_dsp.cpp b/modules/yup_dsp/yup_dsp.cpp index ec420b409..4d7e9a73e 100644 --- a/modules/yup_dsp/yup_dsp.cpp +++ b/modules/yup_dsp/yup_dsp.cpp @@ -30,6 +30,23 @@ #include "yup_dsp.h" +//============================================================================== +#if YUP_USE_AVX_INTRINSICS || YUP_USE_FMA_INTRINSICS +#include +#endif + +#if YUP_USE_SSE_INTRINSICS +#include +#endif + +#if YUP_USE_ARM_NEON +#include +#endif + +#if (YUP_MAC || YUP_IOS) && YUP_ENABLE_VDSP +#include +#endif + //============================================================================== #include #include From c870ab3c7fe83bb0147fc5bbcabc60238d2e9421 Mon Sep 17 00:00:00 2001 From: kunitoki Date: Tue, 26 Aug 2025 23:45:03 +0200 Subject: [PATCH 09/37] Remove unused variable --- modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp index 03166fa30..d627ad42f 100644 --- a/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp +++ b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp @@ -195,7 +195,7 @@ class PartitionedConvolver::DirectFIR #if YUP_ENABLE_VDSP vDSP_dotpr (w, 1, h, 1, &sum, M); #else - sum = dotProduct (w, h, M, paddedLen_); + sum = dotProduct (w, h, M); #endif output[i] += sum; @@ -211,7 +211,7 @@ class PartitionedConvolver::DirectFIR } private: - static float dotProduct (const float* __restrict a, const float* __restrict b, std::size_t len, std::size_t paddedLen) noexcept + static float dotProduct (const float* __restrict a, const float* __restrict b, std::size_t len) noexcept { float acc = 0.0f; std::size_t i = 0; From 7f92cd3115638d3850719bb7c8398691b6c9692f Mon Sep 17 00:00:00 2001 From: kunitoki Date: Wed, 27 Aug 2025 00:08:42 +0200 Subject: [PATCH 10/37] More tweaks --- .../yup_dsp/convolution/yup_PartitionedConvolver.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp index d627ad42f..078707bc4 100644 --- a/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp +++ b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp @@ -616,8 +616,9 @@ class PartitionedConvolver::Impl { maxBlockSize_ = maxBlockSize; - // Prepare main input staging - inputStaging_.resize (maxBlockSize); + // Prepare main input staging - needs to accumulate up to baseHopSize samples plus incoming block + const std::size_t inputStagingSize = static_cast (baseHopSize_) + maxBlockSize; + inputStaging_.resize (inputStagingSize); outputStaging_.assign (static_cast (baseHopSize_), 0.0f); // Prepare per-layer circular buffers with layer-specific sizing @@ -777,6 +778,9 @@ class PartitionedConvolver::Impl for (std::size_t layerIndex = 0; layerIndex < layers_.size(); ++layerIndex) { auto& layer = layers_[layerIndex]; + if (! layer.hasImpulseResponse()) + continue; + const int layerHopSize = layer.getHopSize(); auto& inputBuffer = layerInputBuffers_[layerIndex]; auto& outputBuffer = layerOutputBuffers_[layerIndex]; @@ -791,8 +795,8 @@ class PartitionedConvolver::Impl inputBuffer.read (tempLayerHop_.data(), static_cast (layerHopSize)); FloatVectorOperations::clear (layerTempOutput_.data(), layerHopSize); - if (layer.hasImpulseResponse()) - layer.processHop (tempLayerHop_.data(), layerTempOutput_.data()); + // Process hop + layer.processHop (tempLayerHop_.data(), layerTempOutput_.data()); // Write output to layer's output buffer outputBuffer.write (layerTempOutput_.data(), static_cast (layerHopSize)); From 09166e549f3fa8de1080d0a218fd6f6189248fd8 Mon Sep 17 00:00:00 2001 From: kunitoki Date: Wed, 27 Aug 2025 00:26:15 +0200 Subject: [PATCH 11/37] Less copying --- examples/graphics/source/examples/ConvolutionDemo.h | 2 +- modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp | 8 ++------ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/examples/graphics/source/examples/ConvolutionDemo.h b/examples/graphics/source/examples/ConvolutionDemo.h index 4b1abc252..a984b57eb 100644 --- a/examples/graphics/source/examples/ConvolutionDemo.h +++ b/examples/graphics/source/examples/ConvolutionDemo.h @@ -60,7 +60,7 @@ class ConvolutionDemo dryGain.setCurrentAndTargetValue (0.3f); // Configure convolver with typical layout - convolver.setTypicalLayout (128, {128, 512, 2048}); + convolver.setTypicalLayout (256, {256, 1024, 4096}); // Create UI createUI(); diff --git a/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp index 078707bc4..9eab19c50 100644 --- a/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp +++ b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp @@ -606,7 +606,6 @@ class PartitionedConvolver::Impl tempLayerHop_.clear(); // Clear working buffers - will be allocated in prepare() - workingInput_.clear(); workingOutput_.clear(); isPrepared_ = false; @@ -644,7 +643,6 @@ class PartitionedConvolver::Impl } // Allocate working buffers - workingInput_.resize (maxBlockSize); workingOutput_.resize (maxBlockSize); isPrepared_ = true; @@ -752,11 +750,10 @@ class PartitionedConvolver::Impl if (! isPrepared_ || numSamples > maxBlockSize_) return; - FloatVectorOperations::copy (workingInput_.data(), input, numSamples); FloatVectorOperations::clear (workingOutput_.data(), numSamples); // Process direct FIR (no block size constraints) - directFIR_.process (workingInput_.data(), workingOutput_.data(), numSamples); + directFIR_.process (input, workingOutput_.data(), numSamples); if (layers_.empty()) { FloatVectorOperations::add (output, workingOutput_.data(), numSamples); @@ -764,7 +761,7 @@ class PartitionedConvolver::Impl } // Add input to main input staging buffer using circular buffer logic - writeToInputStaging (workingInput_.data(), numSamples); + writeToInputStaging (input, numSamples); std::size_t outputSamplesProduced = 0; while (getInputStagingAvailable() >= static_cast (baseHopSize_)) @@ -870,7 +867,6 @@ class PartitionedConvolver::Impl std::vector layers_; // Working buffers - std::vector workingInput_; std::vector workingOutput_; // Input staging with circular buffer management From e8cefe637297b0e879ce4c5f6f926e16baf5a3ce Mon Sep 17 00:00:00 2001 From: kunitoki Date: Wed, 27 Aug 2025 00:27:49 +0200 Subject: [PATCH 12/37] Some tests --- tests/yup_dsp/yup_PartitionedConvolver.cpp | 838 +++++++++++++++++++++ 1 file changed, 838 insertions(+) create mode 100644 tests/yup_dsp/yup_PartitionedConvolver.cpp diff --git a/tests/yup_dsp/yup_PartitionedConvolver.cpp b/tests/yup_dsp/yup_PartitionedConvolver.cpp new file mode 100644 index 000000000..393a01a2f --- /dev/null +++ b/tests/yup_dsp/yup_PartitionedConvolver.cpp @@ -0,0 +1,838 @@ +/* + ============================================================================== + + This file is part of the YUP library. + Copyright (c) 2025 - kunitoki@gmail.com + + YUP is an open source library subject to open-source licensing. + + The code included in this file is provided under the terms of the ISC license + http://www.isc.org/downloads/software-support-policy/isc-license. Permission + to use, copy, modify, and/or distribute this software for any purpose with or + without fee is hereby granted provided that the above copyright notice and + this permission notice appear in all copies. + + YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER + EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE + DISCLAIMED. + + ============================================================================== +*/ + +#include + +#include + +#include +#include + +namespace yup::test +{ + +//============================================================================== +class PartitionedConvolverTest : public ::testing::Test +{ +protected: + void SetUp() override + { + generator.seed (42); // Fixed seed for reproducible tests + } + + float randomFloat (float min = -1.0f, float max = 1.0f) + { + std::uniform_real_distribution dist (min, max); + return dist (generator); + } + + void fillWithRandomData (std::vector& buffer) + { + for (auto& sample : buffer) + sample = randomFloat(); + } + + void fillWithSine (std::vector& buffer, float frequency, float sampleRate) + { + for (size_t i = 0; i < buffer.size(); ++i) + buffer[i] = std::sin (2.0f * MathConstants::pi * frequency * static_cast (i) / sampleRate); + } + + void clearBuffer (std::vector& buffer) + { + std::fill (buffer.begin(), buffer.end(), 0.0f); + } + + float calculateRMS (const std::vector& buffer) + { + if (buffer.empty()) + return 0.0f; + + float sum = 0.0f; + for (float sample : buffer) + sum += sample * sample; + + return std::sqrt (sum / static_cast (buffer.size())); + } + + float findPeak (const std::vector& buffer) + { + if (buffer.empty()) + return 0.0f; + + float peak = 0.0f; + for (float sample : buffer) + peak = std::max (peak, std::abs (sample)); + + return peak; + } + + std::mt19937 generator; +}; + +//============================================================================== +// Basic API Tests +//============================================================================== + +TEST_F (PartitionedConvolverTest, DefaultConstruction) +{ + PartitionedConvolver convolver; + // Should not crash - basic construction test +} + +TEST_F (PartitionedConvolverTest, MoveSemantics) +{ + PartitionedConvolver convolver1; + convolver1.setTypicalLayout (64, { 64, 256 }); + convolver1.prepare (512); + + // Move constructor + PartitionedConvolver convolver2 = std::move (convolver1); + + // Move assignment + PartitionedConvolver convolver3; + convolver3 = std::move (convolver2); + + // Should not crash +} + +TEST_F (PartitionedConvolverTest, BasicConfiguration) +{ + PartitionedConvolver convolver; + + // Test typical layout configuration + convolver.setTypicalLayout (128, { 128, 512, 2048 }); + + // Should not crash + convolver.prepare (512); + + // Test reset + convolver.reset(); +} + +TEST_F (PartitionedConvolverTest, ConfigureLayers) +{ + PartitionedConvolver convolver; + + std::vector layers = { + { 64 }, { 256 }, { 1024 } + }; + + convolver.configureLayers (32, layers); + convolver.prepare (256); + + // Should not crash +} + +//============================================================================== +// Impulse Response Tests +//============================================================================== + +TEST_F (PartitionedConvolverTest, SetImpulseResponseVector) +{ + PartitionedConvolver convolver; + convolver.setTypicalLayout (64, { 64, 256 }); + convolver.prepare (512); + + // Create simple impulse response + std::vector ir (1000); + fillWithRandomData (ir); + + convolver.setImpulseResponse (ir); + + // Should not crash +} + +TEST_F (PartitionedConvolverTest, SetImpulseResponsePointer) +{ + PartitionedConvolver convolver; + convolver.setTypicalLayout (64, { 64, 256 }); + convolver.prepare (512); + + // Create simple impulse response + std::vector ir (1000); + fillWithRandomData (ir); + + convolver.setImpulseResponse (ir.data(), ir.size()); + + // Should not crash +} + +TEST_F (PartitionedConvolverTest, SetImpulseResponseWithOptions) +{ + PartitionedConvolver convolver; + convolver.setTypicalLayout (64, { 64, 256 }); + convolver.prepare (512); + + std::vector ir (1000); + fillWithRandomData (ir); + + PartitionedConvolver::IRLoadOptions options; + options.normalize = true; + options.headroomDb = -6.0f; + + convolver.setImpulseResponse (ir, options); + + // Should not crash +} + +TEST_F (PartitionedConvolverTest, EmptyImpulseResponse) +{ + PartitionedConvolver convolver; + convolver.setTypicalLayout (64, { 64, 256 }); + convolver.prepare (512); + + std::vector emptyIR; + convolver.setImpulseResponse (emptyIR); + + // Processing with empty IR should work + std::vector input (256); + std::vector output (256); + fillWithRandomData (input); + clearBuffer (output); + + convolver.process (input.data(), output.data(), input.size()); + + // Output should remain zero + for (float sample : output) + EXPECT_FLOAT_EQ (sample, 0.0f); +} + +//============================================================================== +// Audio Processing Tests +//============================================================================== + +TEST_F (PartitionedConvolverTest, ImpulseResponseTest) +{ + PartitionedConvolver convolver; + convolver.setTypicalLayout (64, { 64, 256 }); + convolver.prepare (512); + + // Create unit impulse response + std::vector ir (256, 0.0f); + ir[0] = 1.0f; // Unit impulse at start + ir[10] = 0.5f; // Delayed impulse + ir[50] = 0.25f; // Another delayed impulse + + convolver.setImpulseResponse (ir); + + // Test with unit impulse input + std::vector input (512, 0.0f); + input[0] = 1.0f; // Unit impulse + + std::vector output (512); + clearBuffer (output); + + convolver.process (input.data(), output.data(), input.size()); + + // Output should contain the impulse response (with some latency) + // Check for non-zero output + float outputRMS = calculateRMS (output); + EXPECT_GT (outputRMS, 0.01f); +} + +TEST_F (PartitionedConvolverTest, SineWaveConvolution) +{ + PartitionedConvolver convolver; + convolver.setTypicalLayout (128, { 128, 512 }); + convolver.prepare (2048); + + // Create simple lowpass IR (moving average) + const size_t irLength = 32; + std::vector ir (irLength); + for (size_t i = 0; i < irLength; ++i) + ir[i] = 1.0f / static_cast (irLength); + + convolver.setImpulseResponse (ir); + + // Test with sine wave + const float sampleRate = 44100.0f; + const float frequency = 1000.0f; + std::vector input (2048); + fillWithSine (input, frequency, sampleRate); + + std::vector output (2048); + clearBuffer (output); + + convolver.process (input.data(), output.data(), input.size()); + + // Output should have significant energy (lowpass filtered sine) + float outputRMS = calculateRMS (output); + EXPECT_GT (outputRMS, 0.1f); +} + +TEST_F (PartitionedConvolverTest, AccumulativeOutput) +{ + PartitionedConvolver convolver; + convolver.setTypicalLayout (64, { 64, 256 }); + convolver.prepare (256); + + // Simple IR + std::vector ir (100, 0.1f); + convolver.setImpulseResponse (ir); + + std::vector input (256); + fillWithRandomData (input); + + // Pre-populate output buffer + std::vector output (256); + fillWithRandomData (output); + std::vector originalOutput = output; + + convolver.process (input.data(), output.data(), input.size()); + + // Output should contain original data plus convolution result + bool hasAccumulated = false; + for (size_t i = 0; i < output.size(); ++i) + { + if (std::abs (output[i] - originalOutput[i]) > 0.001f) + { + hasAccumulated = true; + break; + } + } + EXPECT_TRUE (hasAccumulated); +} + +//============================================================================== +// Latency Tests +//============================================================================== + +TEST_F (PartitionedConvolverTest, LatencyMeasurement) +{ + // Test different configurations and measure latency + std::vector>> configs = { + { 64, { 64 } }, + { 128, { 128 } }, + { 64, { 64, 256 } }, + { 128, { 128, 512 } }, + { 256, { 256, 1024 } } + }; + + for (const auto& [directTaps, hops] : configs) + { + PartitionedConvolver convolver; + convolver.setTypicalLayout (directTaps, hops); + convolver.prepare (1024); + + // Unit impulse response + std::vector ir (1000, 0.0f); + ir[0] = 1.0f; + convolver.setImpulseResponse (ir); + + // Unit impulse input + std::vector input (1024, 0.0f); + input[0] = 1.0f; + + std::vector output (1024); + clearBuffer (output); + + convolver.process (input.data(), output.data(), input.size()); + + // Find first non-zero sample in output + size_t latencySamples = 0; + for (size_t i = 0; i < output.size(); ++i) + { + if (std::abs (output[i]) > 0.001f) + { + latencySamples = i; + break; + } + } + + // Latency should be reasonable (less than largest hop size) + const int maxHop = *std::max_element (hops.begin(), hops.end()); + EXPECT_LE (latencySamples, static_cast (maxHop * 2)); + + // With direct FIR, latency should be minimal + if (directTaps > 0) + EXPECT_LE (latencySamples, directTaps); + } +} + +//============================================================================== +// Partition Size Tests (Fixed) +//============================================================================== + +TEST_F (PartitionedConvolverTest, VariousPartitionSizes) +{ + // Test various partition configurations - all with direct taps for immediate response + std::vector, size_t>> testConfigs = { + // (directTaps, hops, maxBlockSize) + { 64, { 64 }, 512 }, + { 32, { 64 }, 512 }, + { 64, { 64, 256 }, 512 }, + { 128, { 128, 512 }, 1024 }, + { 128, { 128, 512, 2048 }, 2048 }, + { 256, { 256, 1024, 4096 }, 4096 }, + { 64, { 128, 256, 512 }, 1024 }, + { 48, { 32, 128, 512 }, 1024 }, + { 24, { 32, 64, 128 }, 1024 }, + }; + + for (const auto& item : testConfigs) + { + const auto& directTaps = std::get<0> (item); + const auto& hops = std::get<1> (item); + const auto& maxBlockSize = std::get<2> (item); + + SCOPED_TRACE (testing::Message() << "Config: directTaps=" << directTaps << " hops=[" << [&]() + { + std::string hopStr; + for (size_t i = 0; i < hops.size(); ++i) + { + if (i > 0) + hopStr += ","; + hopStr += std::to_string (hops[i]); + } + return hopStr; + }() << "] maxBlockSize=" << maxBlockSize); + + PartitionedConvolver convolver; + + // Configure and verify setup + EXPECT_NO_THROW (convolver.setTypicalLayout (directTaps, hops)); + EXPECT_NO_THROW (convolver.prepare (maxBlockSize)); + + // Create a simple known impulse response + std::vector ir (std::min (static_cast (500), maxBlockSize), 0.0f); + ir[0] = 1.0f; // Unit impulse at start + if (ir.size() > 100) + ir[100] = 0.5f; // Delayed impulse for verification + EXPECT_NO_THROW (convolver.setImpulseResponse (ir)); + + // Test with unit impulse to verify convolution correctness + std::vector deltaInput (maxBlockSize, 0.0f); + deltaInput[0] = 1.0f; // Unit impulse + std::vector deltaOutput (maxBlockSize); + clearBuffer (deltaOutput); + + EXPECT_NO_THROW (convolver.process (deltaInput.data(), deltaOutput.data(), maxBlockSize)); + + // Should produce significant output + float outputRMS = calculateRMS (deltaOutput); + EXPECT_GT (outputRMS, 0.003f) << "No significant convolution output detected"; + + // Verify we get immediate response from direct FIR + EXPECT_GT (findPeak (deltaOutput), 0.1f) << "No immediate response detected"; + + // Process various realistic block sizes + std::vector blockSizes = { 64, 128, 256, maxBlockSize }; + + for (size_t blockSize : blockSizes) + { + if (blockSize > maxBlockSize) + continue; + + SCOPED_TRACE (testing::Message() << "BlockSize=" << blockSize); + + std::vector input (blockSize); + std::vector output (blockSize); + fillWithRandomData (input); + clearBuffer (output); + + EXPECT_NO_THROW (convolver.process (input.data(), output.data(), blockSize)); + + // Verify audio processing quality + for (float sample : output) + { + EXPECT_TRUE (std::isfinite (sample)) << "Non-finite output detected"; + EXPECT_LT (std::abs (sample), 100.0f) << "Output amplitude too large"; + } + + // With direct taps, should get output for reasonable input + float inputRMS = calculateRMS (input); + float outputRMS = calculateRMS (output); + + if (inputRMS > 0.01f) + { + EXPECT_GT (outputRMS, 0.001f) << "Output unexpectedly quiet for significant input"; + } + } + } +} + +//============================================================================== +// Stress Test (Fixed) +//============================================================================== + +TEST_F (PartitionedConvolverTest, StressTestDifferentBlockSizes) +{ + PartitionedConvolver convolver; + convolver.setTypicalLayout (128, { 128, 512, 2048 }); + convolver.prepare (2048); + + // Create a simple, well-behaved impulse response + std::vector ir (1024, 0.0f); + // Simple decaying impulse response + for (size_t i = 0; i < 200; ++i) + { + ir[i] = std::exp (-static_cast (i) / 50.0f) * std::cos (2.0f * MathConstants::pi * i / 16.0f); + } + + // Normalize to prevent overflow + float peak = *std::max_element (ir.begin(), ir.end(), [] (float a, float b) + { + return std::abs (a) < std::abs (b); + }); + if (peak > 0.0f) + { + for (auto& sample : ir) + sample /= (peak * 2.0f); // Extra headroom + } + + convolver.setImpulseResponse (ir); + + // Test reasonable block sizes first + std::vector blockSizes = { 32, 64, 128, 256, 512, 1024 }; + + float totalInputEnergy = 0.0f; + float totalOutputEnergy = 0.0f; + + for (size_t blockSize : blockSizes) + { + SCOPED_TRACE (testing::Message() << "Processing blockSize=" << blockSize); + + std::vector input (blockSize); + std::vector output (blockSize); + fillWithRandomData (input); + clearBuffer (output); + + EXPECT_NO_THROW (convolver.process (input.data(), output.data(), blockSize)); + + // Most critical: no non-finite values + for (float sample : output) + { + EXPECT_TRUE (std::isfinite (sample)) << "Non-finite output in blockSize=" << blockSize; + } + + float inputRMS = calculateRMS (input); + float outputRMS = calculateRMS (output); + + if (std::isfinite (outputRMS)) + { + totalInputEnergy += inputRMS * inputRMS * blockSize; + totalOutputEnergy += outputRMS * outputRMS * blockSize; + } + + // Verify reasonable levels + float peak = findPeak (output); + EXPECT_LT (peak, 50.0f) << "Output peak too large for blockSize=" << blockSize; + + // With direct taps, expect output for reasonable input + if (inputRMS > 0.01f) + { + EXPECT_GT (outputRMS, 0.0001f) << "No output for significant input, blockSize=" << blockSize; + EXPECT_LT (outputRMS, inputRMS * 5.0f) << "Output unreasonably high for blockSize=" << blockSize; + } + } + + // Test challenging small block sizes + std::vector smallBlockSizes = { 1, 7, 15 }; + + for (size_t blockSize : smallBlockSizes) + { + SCOPED_TRACE (testing::Message() << "Processing small blockSize=" << blockSize); + + std::vector input (blockSize); + std::vector output (blockSize); + fillWithRandomData (input); + clearBuffer (output); + + EXPECT_NO_THROW (convolver.process (input.data(), output.data(), blockSize)); + + // Critical: no non-finite values + for (float sample : output) + { + EXPECT_TRUE (std::isfinite (sample)) << "Non-finite output in small blockSize=" << blockSize; + } + + // Reasonable bounds + float peak = findPeak (output); + EXPECT_LT (peak, 50.0f) << "Output peak too large for small blockSize=" << blockSize; + } + + // Energy conservation check + if (totalInputEnergy > 0.0f && totalOutputEnergy > 0.0f) + { + EXPECT_GT (totalOutputEnergy, totalInputEnergy * 0.01f) << "Total output energy too low"; + EXPECT_LT (totalOutputEnergy, totalInputEnergy * 10.0f) << "Total output energy too high"; + } +} + +//============================================================================== +// Remaining Tests (These were passing) +//============================================================================== + +TEST_F (PartitionedConvolverTest, RandomizedFuzzing) +{ + // Generate random configurations and test them + std::uniform_int_distribution hopDist (32, 2048); + std::uniform_int_distribution directTapsDist (32, 512); // Always have some direct taps + std::uniform_int_distribution blockSizeDist (32, 1024); + + for (int trial = 0; trial < 10; ++trial) // Reduce trials for stability + { + SCOPED_TRACE (testing::Message() << "Fuzzing trial " << trial); + + // Generate random configuration + const size_t directTaps = directTapsDist (generator); + const size_t numLayers = 1 + (generator() % 3); // 1-3 layers + + std::vector hops; + int prevHop = 32; + for (size_t i = 0; i < numLayers; ++i) + { + int hop = std::max (prevHop, hopDist (generator)); + // Ensure power-of-2 for valid FFT sizes + hop = 1 << static_cast (std::log2 (hop)); + hops.push_back (hop); + prevHop = hop; + } + + const size_t maxBlockSize = 1024; + + PartitionedConvolver convolver; + + try + { + convolver.setTypicalLayout (directTaps, hops); + convolver.prepare (maxBlockSize); + + // Simple impulse response + std::vector ir (512); + for (size_t i = 0; i < ir.size(); ++i) + ir[i] = std::exp (-static_cast (i) / 100.0f) * randomFloat (-0.1f, 0.1f); + + convolver.setImpulseResponse (ir); + + // Test with impulse + std::vector deltaInput (maxBlockSize, 0.0f); + deltaInput[0] = 1.0f; + std::vector deltaOutput (maxBlockSize); + clearBuffer (deltaOutput); + + convolver.process (deltaInput.data(), deltaOutput.data(), maxBlockSize); + + float deltaRMS = calculateRMS (deltaOutput); + EXPECT_GT (deltaRMS, 0.001f) << "No convolution output in trial " << trial; + + // Process several blocks + for (int block = 0; block < 5; ++block) + { + const size_t blockSize = 32 + (generator() % (maxBlockSize - 32)); + + std::vector input (blockSize); + std::vector output (blockSize); + fillWithRandomData (input); + clearBuffer (output); + + convolver.process (input.data(), output.data(), blockSize); + + // Audio quality checks + for (float sample : output) + { + EXPECT_TRUE (std::isfinite (sample)) << "Non-finite output in trial " << trial << " block " << block; + EXPECT_LT (std::abs (sample), 100.0f) << "Output too large in trial " << trial << " block " << block; + } + } + } + catch (const std::exception& e) + { + FAIL() << "Exception in fuzzing trial " << trial << ": " << e.what(); + } + } +} + +TEST_F (PartitionedConvolverTest, ShortImpulseResponseWithManyLayers) +{ + PartitionedConvolver convolver; + + // Configure many layers but use a short IR + convolver.setTypicalLayout (64, { 128, 512, 2048, 4096 }); + convolver.prepare (512); + + // Very short IR (only 32 samples) - much shorter than layer configurations + std::vector shortIR (32); + fillWithRandomData (shortIR); + + // This should not crash and should not create "zombie" layers + EXPECT_NO_THROW (convolver.setImpulseResponse (shortIR)); + + // Process some data - should work without endless loops + std::vector input (512); + std::vector output (512); + fillWithRandomData (input); + clearBuffer (output); + + EXPECT_NO_THROW (convolver.process (input.data(), output.data(), input.size())); + + // Should produce some output (from direct FIR at least) + float outputRMS = calculateRMS (output); + EXPECT_GT (outputRMS, 0.001f); +} + +TEST_F (PartitionedConvolverTest, IRShorterThanDirectTaps) +{ + PartitionedConvolver convolver; + + // Configure with 128 direct taps but use much shorter IR + convolver.setTypicalLayout (128, { 256, 1024 }); + convolver.prepare (512); + + // IR shorter than direct taps + std::vector shortIR (64); + fillWithRandomData (shortIR); + + EXPECT_NO_THROW (convolver.setImpulseResponse (shortIR)); + + // Should still work - only direct FIR should be active + std::vector input (512); + std::vector output (512); + fillWithRandomData (input); + clearBuffer (output); + + EXPECT_NO_THROW (convolver.process (input.data(), output.data(), input.size())); + + // Should produce output from direct FIR + float outputRMS = calculateRMS (output); + EXPECT_GT (outputRMS, 0.001f); +} + +TEST_F (PartitionedConvolverTest, IRExactlyMatchesFirstLayer) +{ + PartitionedConvolver convolver; + + // Configure layers + convolver.setTypicalLayout (64, { 128, 512, 2048 }); + convolver.prepare (512); + + // IR that exactly fills direct taps + first layer + const std::size_t irLength = 64 + 128; // direct + first layer + std::vector ir (irLength); + fillWithRandomData (ir); + + EXPECT_NO_THROW (convolver.setImpulseResponse (ir)); + + // Should work with first layer active, subsequent layers inactive + std::vector input (512); + std::vector output (512); + fillWithRandomData (input); + clearBuffer (output); + + EXPECT_NO_THROW (convolver.process (input.data(), output.data(), input.size())); + + float outputRMS = calculateRMS (output); + EXPECT_GT (outputRMS, 0.001f); +} + +TEST_F (PartitionedConvolverTest, ZeroLengthIR) +{ + PartitionedConvolver convolver; + convolver.setTypicalLayout (64, { 128, 512 }); + convolver.prepare (512); + + // Zero length IR + std::vector emptyIR; + EXPECT_NO_THROW (convolver.setImpulseResponse (emptyIR)); + + // Should process without crashing but produce no output + std::vector input (512); + std::vector output (512); + fillWithRandomData (input); + clearBuffer (output); + + EXPECT_NO_THROW (convolver.process (input.data(), output.data(), input.size())); + + // Output should be zero (or very close to zero) + for (float sample : output) + EXPECT_NEAR (sample, 0.0f, 0.0001f); +} + +TEST_F (PartitionedConvolverTest, ProgressiveIRLengths) +{ + // Test with progressively longer IRs to ensure layer activation works correctly + std::vector irLengths = { 10, 50, 100, 200, 500, 1000, 2000 }; + + for (size_t irLength : irLengths) + { + SCOPED_TRACE (testing::Message() << "IR Length: " << irLength); + + PartitionedConvolver convolver; + convolver.setTypicalLayout (64, { 128, 512, 2048 }); + convolver.prepare (512); + + std::vector ir (irLength); + fillWithRandomData (ir); + + EXPECT_NO_THROW (convolver.setImpulseResponse (ir)); + + // Process and verify output + std::vector input (512); + std::vector output (512); + fillWithRandomData (input); + clearBuffer (output); + + EXPECT_NO_THROW (convolver.process (input.data(), output.data(), input.size())); + + // Should produce reasonable output + for (float sample : output) + { + EXPECT_TRUE (std::isfinite (sample)); + EXPECT_LT (std::abs (sample), 100.0f); // Sanity check + } + } +} + +TEST_F (PartitionedConvolverTest, ResetFunctionality) +{ + PartitionedConvolver convolver; + convolver.setTypicalLayout (64, { 64, 256 }); + convolver.prepare (512); + + std::vector ir (500); + fillWithRandomData (ir); + convolver.setImpulseResponse (ir); + + // Process some data to build up internal state + std::vector input (512); + std::vector output1 (512); + fillWithRandomData (input); + clearBuffer (output1); + + convolver.process (input.data(), output1.data(), input.size()); + + // Reset and process same input again + convolver.reset(); + + std::vector output2 (512); + clearBuffer (output2); + + convolver.process (input.data(), output2.data(), input.size()); + + // Outputs should be identical after reset + for (size_t i = 0; i < output1.size(); ++i) + { + EXPECT_NEAR (output1[i], output2[i], 0.001f) << "Mismatch at sample " << i; + } +} + +} // namespace yup::test \ No newline at end of file From 8e8a4c8edf9caa21670b2778c023eb1185639bc8 Mon Sep 17 00:00:00 2001 From: kunitoki Date: Wed, 27 Aug 2025 00:31:03 +0200 Subject: [PATCH 13/37] Fix tests --- tests/yup_dsp/yup_PartitionedConvolver.cpp | 193 +++++++++++++++++++-- 1 file changed, 178 insertions(+), 15 deletions(-) diff --git a/tests/yup_dsp/yup_PartitionedConvolver.cpp b/tests/yup_dsp/yup_PartitionedConvolver.cpp index 393a01a2f..c1ea2985f 100644 --- a/tests/yup_dsp/yup_PartitionedConvolver.cpp +++ b/tests/yup_dsp/yup_PartitionedConvolver.cpp @@ -95,7 +95,22 @@ class PartitionedConvolverTest : public ::testing::Test TEST_F (PartitionedConvolverTest, DefaultConstruction) { PartitionedConvolver convolver; - // Should not crash - basic construction test + + // Verify default state - should be safe to call these methods + EXPECT_NO_THROW (convolver.reset()); + + // Should be able to configure after construction + EXPECT_NO_THROW (convolver.setTypicalLayout (64, { 64, 256 })); + EXPECT_NO_THROW (convolver.prepare (512)); + + // Should handle empty processing gracefully + std::vector input (256, 0.0f); + std::vector output (256, 0.0f); + EXPECT_NO_THROW (convolver.process (input.data(), output.data(), input.size())); + + // Output should remain zero without impulse response + for (float sample : output) + EXPECT_EQ (sample, 0.0f); } TEST_F (PartitionedConvolverTest, MoveSemantics) @@ -104,14 +119,35 @@ TEST_F (PartitionedConvolverTest, MoveSemantics) convolver1.setTypicalLayout (64, { 64, 256 }); convolver1.prepare (512); + // Set up a known state + std::vector ir (128, 0.0f); + ir[0] = 1.0f; + convolver1.setImpulseResponse (ir); + // Move constructor PartitionedConvolver convolver2 = std::move (convolver1); + // Verify moved convolver works + std::vector input (256, 0.0f); + input[0] = 1.0f; + std::vector output (256, 0.0f); + + EXPECT_NO_THROW (convolver2.process (input.data(), output.data(), input.size())); + + // Should produce output from the moved convolver + float outputRMS = calculateRMS (output); + EXPECT_GT (outputRMS, 0.001f); + // Move assignment PartitionedConvolver convolver3; convolver3 = std::move (convolver2); - // Should not crash + // Verify move-assigned convolver works + clearBuffer (output); + EXPECT_NO_THROW (convolver3.process (input.data(), output.data(), input.size())); + + outputRMS = calculateRMS (output); + EXPECT_GT (outputRMS, 0.001f); } TEST_F (PartitionedConvolverTest, BasicConfiguration) @@ -119,13 +155,37 @@ TEST_F (PartitionedConvolverTest, BasicConfiguration) PartitionedConvolver convolver; // Test typical layout configuration - convolver.setTypicalLayout (128, { 128, 512, 2048 }); + EXPECT_NO_THROW (convolver.setTypicalLayout (128, { 128, 512, 2048 })); - // Should not crash - convolver.prepare (512); + // Should be able to prepare after configuration + EXPECT_NO_THROW (convolver.prepare (512)); + + // Verify configuration works by setting an impulse response + std::vector ir (256, 0.0f); + ir[0] = 1.0f; + EXPECT_NO_THROW (convolver.setImpulseResponse (ir)); - // Test reset + // Verify processing works after configuration + std::vector input (256, 0.0f); + input[0] = 1.0f; + std::vector output (256, 0.0f); + + EXPECT_NO_THROW (convolver.process (input.data(), output.data(), input.size())); + + // Should produce output + float outputRMS = calculateRMS (output); + EXPECT_GT (outputRMS, 0.001f); + + // Test reset clears state convolver.reset(); + + // After reset, same input should produce same output (deterministic) + std::vector output2 (256, 0.0f); + EXPECT_NO_THROW (convolver.process (input.data(), output2.data(), input.size())); + + // Outputs should be very similar after reset + for (size_t i = 0; i < output.size(); ++i) + EXPECT_NEAR (output[i], output2[i], 0.001f); } TEST_F (PartitionedConvolverTest, ConfigureLayers) @@ -136,10 +196,28 @@ TEST_F (PartitionedConvolverTest, ConfigureLayers) { 64 }, { 256 }, { 1024 } }; - convolver.configureLayers (32, layers); - convolver.prepare (256); + EXPECT_NO_THROW (convolver.configureLayers (32, layers)); + EXPECT_NO_THROW (convolver.prepare (256)); - // Should not crash + // Verify the configuration works with an impulse response + std::vector ir (500, 0.0f); + ir[0] = 1.0f; + ir[50] = 0.5f; + EXPECT_NO_THROW (convolver.setImpulseResponse (ir)); + + // Test processing with the configured layers + std::vector input (256, 0.0f); + input[0] = 1.0f; + std::vector output (256, 0.0f); + + EXPECT_NO_THROW (convolver.process (input.data(), output.data(), input.size())); + + // Should produce output from direct FIR immediately + float outputRMS = calculateRMS (output); + EXPECT_GT (outputRMS, 0.001f); + + // Verify immediate response from direct taps + EXPECT_GT (findPeak (output), 0.1f); } //============================================================================== @@ -156,9 +234,36 @@ TEST_F (PartitionedConvolverTest, SetImpulseResponseVector) std::vector ir (1000); fillWithRandomData (ir); - convolver.setImpulseResponse (ir); + // Normalize to reasonable levels + float peak = findPeak (ir); + if (peak > 0.0f) + { + for (auto& sample : ir) + sample /= peak; + } + + EXPECT_NO_THROW (convolver.setImpulseResponse (ir)); + + // Verify the impulse response was set by testing processing + std::vector input (512, 0.0f); + input[0] = 1.0f; + std::vector output (512, 0.0f); + + EXPECT_NO_THROW (convolver.process (input.data(), output.data(), input.size())); - // Should not crash + // Should produce significant output + float outputRMS = calculateRMS (output); + EXPECT_GT (outputRMS, 0.001f); + + // Test linearity - 2x input should give ~2x output + input[0] = 2.0f; + std::vector output2 (512, 0.0f); + + convolver.reset(); + EXPECT_NO_THROW (convolver.process (input.data(), output2.data(), input.size())); + + float output2RMS = calculateRMS (output2); + EXPECT_GT (output2RMS, outputRMS * 1.5f); } TEST_F (PartitionedConvolverTest, SetImpulseResponsePointer) @@ -171,9 +276,35 @@ TEST_F (PartitionedConvolverTest, SetImpulseResponsePointer) std::vector ir (1000); fillWithRandomData (ir); - convolver.setImpulseResponse (ir.data(), ir.size()); + // Normalize to reasonable levels + float peak = findPeak (ir); + if (peak > 0.0f) + { + for (auto& sample : ir) + sample /= peak; + } + + EXPECT_NO_THROW (convolver.setImpulseResponse (ir.data(), ir.size())); + + // Verify both pointer and vector methods produce same result + std::vector input (512, 0.0f); + input[0] = 1.0f; + std::vector output1 (512, 0.0f); + + EXPECT_NO_THROW (convolver.process (input.data(), output1.data(), input.size())); - // Should not crash + // Reset and test with vector method + PartitionedConvolver convolver2; + convolver2.setTypicalLayout (64, { 64, 256 }); + convolver2.prepare (512); + convolver2.setImpulseResponse (ir); + + std::vector output2 (512, 0.0f); + EXPECT_NO_THROW (convolver2.process (input.data(), output2.data(), input.size())); + + // Both methods should produce identical results + for (size_t i = 0; i < output1.size(); ++i) + EXPECT_NEAR (output1[i], output2[i], 0.0001f); } TEST_F (PartitionedConvolverTest, SetImpulseResponseWithOptions) @@ -185,13 +316,45 @@ TEST_F (PartitionedConvolverTest, SetImpulseResponseWithOptions) std::vector ir (1000); fillWithRandomData (ir); + // Make IR have a known peak + ir[0] = 2.0f; // Peak value + PartitionedConvolver::IRLoadOptions options; options.normalize = true; options.headroomDb = -6.0f; - convolver.setImpulseResponse (ir, options); + EXPECT_NO_THROW (convolver.setImpulseResponse (ir, options)); + + // Test that normalization and headroom are applied + std::vector input (512, 0.0f); + input[0] = 1.0f; + std::vector output (512, 0.0f); + + EXPECT_NO_THROW (convolver.process (input.data(), output.data(), input.size())); + + // Should produce output, but peak should be limited by headroom + float outputPeak = findPeak (output); + EXPECT_GT (outputPeak, 0.001f); + EXPECT_LT (outputPeak, 1.0f); // Should be less than input due to headroom + + // Compare with non-normalized version + PartitionedConvolver convolver2; + convolver2.setTypicalLayout (64, { 64, 256 }); + convolver2.prepare (512); + + PartitionedConvolver::IRLoadOptions options2; + options2.normalize = false; + options2.headroomDb = 0.0f; + + convolver2.setImpulseResponse (ir, options2); + + std::vector output2 (512, 0.0f); + EXPECT_NO_THROW (convolver2.process (input.data(), output2.data(), input.size())); + + float output2Peak = findPeak (output2); - // Should not crash + // Normalized version should have different peak + EXPECT_NE (outputPeak, output2Peak); } TEST_F (PartitionedConvolverTest, EmptyImpulseResponse) From 2f6139384287e77138c2ce560368dc2f2515ed71 Mon Sep 17 00:00:00 2001 From: kunitoki Date: Wed, 27 Aug 2025 09:07:15 +0200 Subject: [PATCH 14/37] Include fixes --- modules/yup_audio_basics/yup_audio_basics.cpp | 23 -------- modules/yup_audio_basics/yup_audio_basics.h | 30 +++++++++++ modules/yup_core/system/yup_PlatformDefs.h | 20 +++---- .../yup_dsp/frequency/yup_FFTProcessor.cpp | 35 ------------- modules/yup_dsp/yup_dsp.cpp | 52 +++++++++++++------ modules/yup_dsp/yup_dsp.h | 4 +- 6 files changed, 77 insertions(+), 87 deletions(-) diff --git a/modules/yup_audio_basics/yup_audio_basics.cpp b/modules/yup_audio_basics/yup_audio_basics.cpp index d3b7ad64d..5d128a9e3 100644 --- a/modules/yup_audio_basics/yup_audio_basics.cpp +++ b/modules/yup_audio_basics/yup_audio_basics.cpp @@ -48,29 +48,6 @@ #include "yup_audio_basics.h" -#if YUP_USE_SSE_INTRINSICS -#include -#endif - -#if YUP_MAC || YUP_IOS -#ifndef YUP_USE_VDSP_FRAMEWORK -#define YUP_USE_VDSP_FRAMEWORK 1 -#endif - -#if YUP_USE_VDSP_FRAMEWORK -#include -#endif - -#include "native/yup_AudioWorkgroup_apple.h" - -#elif YUP_USE_VDSP_FRAMEWORK -#undef YUP_USE_VDSP_FRAMEWORK -#endif - -#if YUP_USE_ARM_NEON -#include -#endif - #include "buffers/yup_FloatVectorOperations.cpp" #include "buffers/yup_AudioChannelSet.cpp" #include "buffers/yup_AudioProcessLoadMeasurer.cpp" diff --git a/modules/yup_audio_basics/yup_audio_basics.h b/modules/yup_audio_basics/yup_audio_basics.h index 79fee813d..22fc858d6 100644 --- a/modules/yup_audio_basics/yup_audio_basics.h +++ b/modules/yup_audio_basics/yup_audio_basics.h @@ -103,6 +103,36 @@ #define YUP_USE_ARM_NEON 0 #endif +//============================================================================== +#if YUP_USE_AVX_INTRINSICS || YUP_USE_FMA_INTRINSICS +#include +#endif + +#if YUP_USE_SSE_INTRINSICS +#include +#endif + +#if YUP_USE_ARM_NEON +#if JUCE_64BIT && JUCE_WINDOWS +#include +#else +#include +#endif +#endif + +#if (YUP_MAC || YUP_IOS) && __has_include() +#ifndef YUP_USE_VDSP_FRAMEWORK +#define YUP_USE_VDSP_FRAMEWORK 1 +#endif + +#if YUP_USE_VDSP_FRAMEWORK +#include +#endif + +#elif YUP_USE_VDSP_FRAMEWORK +#undef YUP_USE_VDSP_FRAMEWORK +#endif + //============================================================================== #include "buffers/yup_AudioDataConverters.h" YUP_BEGIN_IGNORE_WARNINGS_MSVC (4661) diff --git a/modules/yup_core/system/yup_PlatformDefs.h b/modules/yup_core/system/yup_PlatformDefs.h index b70e93747..b9c8ef125 100644 --- a/modules/yup_core/system/yup_PlatformDefs.h +++ b/modules/yup_core/system/yup_PlatformDefs.h @@ -97,24 +97,26 @@ namespace yup @see jassert() */ -#define YUP_BREAK_IN_DEBUGGER ::kill (0, SIGTRAP); +#define YUP_BREAK_IN_DEBUGGER { ::kill (0, SIGTRAP); } #elif YUP_WASM -#define YUP_BREAK_IN_DEBUGGER +#define YUP_BREAK_IN_DEBUGGER { } #elif YUP_MSVC #pragma intrinsic(__debugbreak) -#define YUP_BREAK_IN_DEBUGGER __debugbreak(); +#define YUP_BREAK_IN_DEBUGGER { __debugbreak(); } #elif YUP_INTEL && (YUP_GCC || YUP_CLANG || YUP_MAC) #if YUP_NO_INLINE_ASM -#define YUP_BREAK_IN_DEBUGGER +#define YUP_BREAK_IN_DEBUGGER { } #else -#define YUP_BREAK_IN_DEBUGGER asm ("int $3"); +#define YUP_BREAK_IN_DEBUGGER { asm ("int $3"); } #endif -#elif YUP_ARM && YUP_MAC -#define YUP_BREAK_IN_DEBUGGER __builtin_debugtrap(); #elif YUP_ANDROID -#define YUP_BREAK_IN_DEBUGGER __builtin_trap(); +#define YUP_BREAK_IN_DEBUGGER { __builtin_trap(); } +#elif YUP_ARM +#if YUP_MAC || (YUP_WINDOWS && YUP_CLANG) +#define YUP_BREAK_IN_DEBUGGER { __builtin_debugtrap(); } +#endif #else -#define YUP_BREAK_IN_DEBUGGER __asm int 3; +#define YUP_BREAK_IN_DEBUGGER { __asm int 3; } #endif // clang-format on diff --git a/modules/yup_dsp/frequency/yup_FFTProcessor.cpp b/modules/yup_dsp/frequency/yup_FFTProcessor.cpp index c85263290..94f0daceb 100644 --- a/modules/yup_dsp/frequency/yup_FFTProcessor.cpp +++ b/modules/yup_dsp/frequency/yup_FFTProcessor.cpp @@ -19,41 +19,6 @@ ============================================================================== */ -// Conditional includes based on available FFT backends -#if ! YUP_FFT_FOUND_BACKEND && YUP_ENABLE_VDSP && (YUP_MAC || YUP_IOS) && __has_include() -#include -#define YUP_FFT_USING_VDSP 1 -#define YUP_FFT_FOUND_BACKEND 1 -#endif - -#if ! YUP_FFT_FOUND_BACKEND && YUP_ENABLE_INTEL_IPP && __has_include() -#include -#define YUP_FFT_USING_IPP 1 -#define YUP_FFT_FOUND_BACKEND 1 -#endif - -#if ! YUP_FFT_FOUND_BACKEND && YUP_ENABLE_FFTW3 && __has_include() -#include -#define YUP_FFT_USING_FFTW3 1 -#define YUP_FFT_FOUND_BACKEND 1 -#endif - -#if ! YUP_FFT_FOUND_BACKEND && YUP_ENABLE_PFFFT && YUP_MODULE_AVAILABLE_pffft_library -#include -#define YUP_FFT_USING_PFFFT 1 -#define YUP_FFT_FOUND_BACKEND 1 -#endif - -#if ! YUP_FFT_FOUND_BACKEND && YUP_ENABLE_OOURA -#include "yup_OouraFFT8g.h" -#define YUP_FFT_USING_OOURA 1 -#define YUP_FFT_FOUND_BACKEND 1 -#endif - -#if ! defined(YUP_FFT_FOUND_BACKEND) -#error "Unable to find a proper FFT backend !" -#endif - namespace yup { diff --git a/modules/yup_dsp/yup_dsp.cpp b/modules/yup_dsp/yup_dsp.cpp index 4d7e9a73e..39d32fd85 100644 --- a/modules/yup_dsp/yup_dsp.cpp +++ b/modules/yup_dsp/yup_dsp.cpp @@ -31,36 +31,54 @@ #include "yup_dsp.h" //============================================================================== -#if YUP_USE_AVX_INTRINSICS || YUP_USE_FMA_INTRINSICS -#include + +#include +#include + +//============================================================================== + +#if ! YUP_FFT_FOUND_BACKEND && YUP_ENABLE_VDSP && (YUP_MAC || YUP_IOS) +#define YUP_FFT_USING_VDSP 1 +#define YUP_FFT_FOUND_BACKEND 1 #endif -#if YUP_USE_SSE_INTRINSICS -#include +#if ! YUP_FFT_FOUND_BACKEND && YUP_ENABLE_INTEL_IPP && __has_include() +#include +#define YUP_FFT_USING_IPP 1 +#define YUP_FFT_FOUND_BACKEND 1 #endif -#if YUP_USE_ARM_NEON -#include +#if ! YUP_FFT_FOUND_BACKEND && YUP_ENABLE_FFTW3 && __has_include() +#include +#define YUP_FFT_USING_FFTW3 1 +#define YUP_FFT_FOUND_BACKEND 1 #endif -#if (YUP_MAC || YUP_IOS) && YUP_ENABLE_VDSP -#include +#if ! YUP_FFT_FOUND_BACKEND && YUP_ENABLE_PFFFT && YUP_MODULE_AVAILABLE_pffft_library +#include +#define YUP_FFT_USING_PFFFT 1 +#define YUP_FFT_FOUND_BACKEND 1 #endif -//============================================================================== -#include -#include +#if ! YUP_FFT_FOUND_BACKEND && YUP_ENABLE_OOURA +#include "yup_OouraFFT8g.h" +#define YUP_FFT_USING_OOURA 1 +#define YUP_FFT_FOUND_BACKEND 1 +#endif + +#if ! defined(YUP_FFT_FOUND_BACKEND) +#error "Unable to find a proper FFT backend !" +#endif //============================================================================== + #include "frequency/yup_FFTProcessor.cpp" #include "frequency/yup_SpectrumAnalyzerState.cpp" +#include "designers/yup_FilterDesigner.cpp" +#include "convolution/yup_PartitionedConvolver.cpp" + +//============================================================================== #if YUP_ENABLE_OOURA && YUP_FFT_USING_OOURA #include "frequency/yup_OouraFFT8g.cpp" #endif - -//============================================================================== -#include "designers/yup_FilterDesigner.cpp" - -//============================================================================== -#include "convolution/yup_PartitionedConvolver.cpp" diff --git a/modules/yup_dsp/yup_dsp.h b/modules/yup_dsp/yup_dsp.h index e9ab7c795..c89002c13 100644 --- a/modules/yup_dsp/yup_dsp.h +++ b/modules/yup_dsp/yup_dsp.h @@ -68,7 +68,7 @@ Enable Apple's vDSP backend. */ #ifndef YUP_ENABLE_VDSP -#if (YUP_MAC || YUP_IOS) +#if (YUP_MAC || YUP_IOS) && YUP_USE_VDSP_FRAMEWORK #define YUP_ENABLE_VDSP 1 #else #define YUP_ENABLE_VDSP 0 @@ -143,5 +143,3 @@ // Convolution processors #include "convolution/yup_PartitionedConvolver.h" - -//============================================================================== From 35318a9ef23e00c293a41c829f33e5e045883b90 Mon Sep 17 00:00:00 2001 From: kunitoki Date: Wed, 27 Aug 2025 09:17:42 +0200 Subject: [PATCH 15/37] Make things faster --- .github/workflows/build_android.yml | 6 ++++++ .github/workflows/build_ios.yml | 9 +++++++++ .github/workflows/build_linux.yml | 12 ++++++++++++ .github/workflows/build_macos.yml | 12 ++++++++++++ .github/workflows/build_wasm.yml | 8 ++++++++ .github/workflows/build_windows.yml | 10 ++++++++++ .github/workflows/coverage.yml | 2 ++ .github/workflows/python_linux.yml | 2 ++ .github/workflows/python_macos.yml | 4 +++- .github/workflows/python_windows.yml | 2 ++ 10 files changed, 66 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build_android.yml b/.github/workflows/build_android.yml index ca95f690a..0d0612c36 100644 --- a/.github/workflows/build_android.yml +++ b/.github/workflows/build_android.yml @@ -37,6 +37,8 @@ jobs: steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - uses: seanmiddleditch/gha-setup-ninja@master - name: Setup Java uses: actions/setup-java@v3 @@ -65,6 +67,8 @@ jobs: needs: [configure] steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - uses: seanmiddleditch/gha-setup-ninja@master - name: Setup Java uses: actions/setup-java@v3 @@ -95,6 +99,8 @@ jobs: needs: [configure] steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - uses: seanmiddleditch/gha-setup-ninja@master - name: Setup Java uses: actions/setup-java@v3 diff --git a/.github/workflows/build_ios.yml b/.github/workflows/build_ios.yml index 51a60a5e8..49c990074 100644 --- a/.github/workflows/build_ios.yml +++ b/.github/workflows/build_ios.yml @@ -33,6 +33,9 @@ jobs: steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - uses: seanmiddleditch/gha-setup-ninja@master - name: Configure @@ -57,6 +60,8 @@ jobs: needs: [configure] steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - uses: seanmiddleditch/gha-setup-ninja@master - uses: actions/cache/restore@v4 id: cache-restore @@ -76,6 +81,8 @@ jobs: needs: [configure] steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - uses: seanmiddleditch/gha-setup-ninja@master - uses: actions/cache/restore@v4 id: cache-restore @@ -95,6 +102,8 @@ jobs: needs: [configure] steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - uses: seanmiddleditch/gha-setup-ninja@master - uses: actions/cache/restore@v4 id: cache-restore diff --git a/.github/workflows/build_linux.yml b/.github/workflows/build_linux.yml index 6f9b096ec..941b7e466 100644 --- a/.github/workflows/build_linux.yml +++ b/.github/workflows/build_linux.yml @@ -39,6 +39,8 @@ jobs: steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - uses: seanmiddleditch/gha-setup-ninja@master - name: Install Dependencies run: sudo apt-get update && sudo apt-get install -y ${INSTALL_DEPS} @@ -63,6 +65,8 @@ jobs: needs: [configure] steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - uses: seanmiddleditch/gha-setup-ninja@master - run: sudo apt-get update && sudo apt-get install -y ${INSTALL_DEPS} - uses: actions/cache/restore@v4 @@ -85,6 +89,8 @@ jobs: needs: [configure] steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - uses: seanmiddleditch/gha-setup-ninja@master - run: sudo apt-get update && sudo apt-get install -y ${INSTALL_DEPS} - uses: actions/cache/restore@v4 @@ -103,6 +109,8 @@ jobs: needs: [configure] steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - uses: seanmiddleditch/gha-setup-ninja@master - run: sudo apt-get update && sudo apt-get install -y ${INSTALL_DEPS} - uses: actions/cache/restore@v4 @@ -121,6 +129,8 @@ jobs: needs: [configure] steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - uses: seanmiddleditch/gha-setup-ninja@master - run: sudo apt-get update && sudo apt-get install -y ${INSTALL_DEPS} - uses: actions/cache/restore@v4 @@ -139,6 +149,8 @@ jobs: needs: [configure] steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - uses: seanmiddleditch/gha-setup-ninja@master - run: sudo apt-get update && sudo apt-get install -y ${INSTALL_DEPS} - uses: actions/cache/restore@v4 diff --git a/.github/workflows/build_macos.yml b/.github/workflows/build_macos.yml index f2e672586..dbfa6bd6f 100644 --- a/.github/workflows/build_macos.yml +++ b/.github/workflows/build_macos.yml @@ -32,6 +32,8 @@ jobs: steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - uses: seanmiddleditch/gha-setup-ninja@master - name: Configure @@ -54,6 +56,8 @@ jobs: needs: [configure] steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - uses: seanmiddleditch/gha-setup-ninja@master - uses: actions/cache/restore@v4 id: cache-restore @@ -75,6 +79,8 @@ jobs: needs: [configure] steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - uses: seanmiddleditch/gha-setup-ninja@master - uses: actions/cache/restore@v4 id: cache-restore @@ -92,6 +98,8 @@ jobs: needs: [configure] steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - uses: seanmiddleditch/gha-setup-ninja@master - uses: actions/cache/restore@v4 id: cache-restore @@ -109,6 +117,8 @@ jobs: needs: [configure] steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - uses: seanmiddleditch/gha-setup-ninja@master - uses: actions/cache/restore@v4 id: cache-restore @@ -126,6 +136,8 @@ jobs: needs: [configure] steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - uses: seanmiddleditch/gha-setup-ninja@master - uses: actions/cache/restore@v4 id: cache-restore diff --git a/.github/workflows/build_wasm.yml b/.github/workflows/build_wasm.yml index b88c425e7..1ac58ae20 100644 --- a/.github/workflows/build_wasm.yml +++ b/.github/workflows/build_wasm.yml @@ -38,6 +38,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - uses: seanmiddleditch/gha-setup-ninja@master - run: sudo apt-get update && sudo apt-get install -y ${INSTALL_DEPS} - name: Setup emsdk @@ -58,6 +60,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - uses: seanmiddleditch/gha-setup-ninja@master - run: sudo apt-get update && sudo apt-get install -y ${INSTALL_DEPS} - name: Setup emsdk @@ -74,6 +78,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - uses: seanmiddleditch/gha-setup-ninja@master - run: sudo apt-get update && sudo apt-get install -y ${INSTALL_DEPS} - name: Setup emsdk @@ -90,6 +96,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - uses: seanmiddleditch/gha-setup-ninja@master - run: sudo apt-get update && sudo apt-get install -y ${INSTALL_DEPS} - name: Setup emsdk diff --git a/.github/workflows/build_windows.yml b/.github/workflows/build_windows.yml index 80dad6dd6..024ec032d 100644 --- a/.github/workflows/build_windows.yml +++ b/.github/workflows/build_windows.yml @@ -33,6 +33,8 @@ jobs: runs-on: windows-latest steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - run: cmake ${{ github.workspace }} -B ${{ runner.workspace }}/build -DYUP_ENABLE_TESTS=ON - run: cmake --build ${{ runner.workspace }}/build --config Debug --parallel 4 --target yup_tests - working-directory: ${{ runner.workspace }}/build/tests/Debug @@ -47,6 +49,8 @@ jobs: runs-on: windows-latest steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - run: cmake ${{ github.workspace }} -B ${{ runner.workspace }}/build -DYUP_ENABLE_EXAMPLES=ON - run: cmake --build ${{ runner.workspace }}/build --config Debug --parallel 4 --target example_console - run: cmake --build ${{ runner.workspace }}/build --config Release --parallel 4 --target example_console @@ -55,6 +59,8 @@ jobs: runs-on: windows-latest steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - run: cmake ${{ github.workspace }} -B ${{ runner.workspace }}/build -DYUP_ENABLE_EXAMPLES=ON - run: cmake --build ${{ runner.workspace }}/build --config Debug --parallel 4 --target example_app - run: cmake --build ${{ runner.workspace }}/build --config Release --parallel 4 --target example_app @@ -63,6 +69,8 @@ jobs: runs-on: windows-latest steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - run: cmake ${{ github.workspace }} -B ${{ runner.workspace }}/build -DYUP_ENABLE_EXAMPLES=ON - run: cmake --build ${{ runner.workspace }}/build --config Debug --parallel 4 --target example_graphics - run: cmake --build ${{ runner.workspace }}/build --config Release --parallel 4 --target example_graphics @@ -71,6 +79,8 @@ jobs: runs-on: windows-latest steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - run: cmake ${{ github.workspace }} -B ${{ runner.workspace }}/build -DYUP_ENABLE_EXAMPLES=ON - run: cmake --build ${{ runner.workspace }}/build --config Debug --parallel 4 --target example_plugin_clap_plugin - run: cmake --build ${{ runner.workspace }}/build --config Release --parallel 4 --target example_plugin_clap_plugin diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 0c0652eb2..1ad4e6340 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -59,6 +59,8 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Setup Ninja uses: seanmiddleditch/gha-setup-ninja@master diff --git a/.github/workflows/python_linux.yml b/.github/workflows/python_linux.yml index 00ea8c306..0861a4c52 100644 --- a/.github/workflows/python_linux.yml +++ b/.github/workflows/python_linux.yml @@ -40,6 +40,8 @@ jobs: steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Set up QEMU if: matrix.cibw_archs == 'aarch64' diff --git a/.github/workflows/python_macos.yml b/.github/workflows/python_macos.yml index 9fb101e05..a3d1d2846 100644 --- a/.github/workflows/python_macos.yml +++ b/.github/workflows/python_macos.yml @@ -34,10 +34,12 @@ jobs: fail-fast: true matrix: include: - - { os: macos-15, python: 311, platform_id: macosx_universal2, cibw_archs: universal2 } + - { os: macos-latest, python: 311, platform_id: macosx_universal2, cibw_archs: universal2 } steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Setup and install python uses: actions/setup-python@v5 diff --git a/.github/workflows/python_windows.yml b/.github/workflows/python_windows.yml index ae90b3a7f..b4df3770f 100644 --- a/.github/workflows/python_windows.yml +++ b/.github/workflows/python_windows.yml @@ -41,6 +41,8 @@ jobs: steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Setup and install python uses: actions/setup-python@v5 From f6357559ef316790e73661d27703e08a2f01520d Mon Sep 17 00:00:00 2001 From: kunitoki Date: Wed, 27 Aug 2025 09:43:18 +0200 Subject: [PATCH 16/37] Missing include --- modules/yup_audio_basics/yup_audio_basics.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/modules/yup_audio_basics/yup_audio_basics.cpp b/modules/yup_audio_basics/yup_audio_basics.cpp index 5d128a9e3..60c63befa 100644 --- a/modules/yup_audio_basics/yup_audio_basics.cpp +++ b/modules/yup_audio_basics/yup_audio_basics.cpp @@ -48,6 +48,10 @@ #include "yup_audio_basics.h" +#if YUP_MAC || YUP_IOS +#include "native/yup_AudioWorkgroup_apple.h" +#endif + #include "buffers/yup_FloatVectorOperations.cpp" #include "buffers/yup_AudioChannelSet.cpp" #include "buffers/yup_AudioProcessLoadMeasurer.cpp" From 81c4821d76275865ef9421c70ef917e187dbf145 Mon Sep 17 00:00:00 2001 From: kunitoki Date: Wed, 27 Aug 2025 09:43:36 +0200 Subject: [PATCH 17/37] Missing divide methods in float vector operations --- .../buffers/yup_FloatVectorOperations.cpp | 155 ++++++++++++++++++ .../buffers/yup_FloatVectorOperations.h | 18 ++ 2 files changed, 173 insertions(+) diff --git a/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp b/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp index de9ffbfc8..447b9fec3 100644 --- a/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp +++ b/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp @@ -89,6 +89,8 @@ struct BasicOps32 static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_ps (a, b); } + static forcedinline ParallelType div (ParallelType a, ParallelType b) noexcept { return _mm_div_ps (a, b); } + static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_ps (a, b); } static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_ps (a, b); } @@ -148,6 +150,8 @@ struct BasicOps64 static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_pd (a, b); } + static forcedinline ParallelType div (ParallelType a, ParallelType b) noexcept { return _mm_div_pd (a, b); } + static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_pd (a, b); } static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_pd (a, b); } @@ -337,6 +341,8 @@ struct BasicOps32 static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return vmulq_f32 (a, b); } + static forcedinline ParallelType div (ParallelType a, ParallelType b) noexcept { return vdivq_f32 (a, b); } + static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return vmaxq_f32 (a, b); } static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return vminq_f32 (a, b); } @@ -411,6 +417,8 @@ struct BasicOps64 static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return a * b; } + static forcedinline ParallelType div (ParallelType a, ParallelType b) noexcept { return a / b; } + static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return jmax (a, b); } static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return jmin (a, b); } @@ -755,6 +763,63 @@ void copyWithMultiply (double* dest, const double* src, double multiplier, Size #endif } + +template +void copyWithDividend (float* dest, const float* src, float dividend, Size num) noexcept +{ +#if YUP_USE_VDSP_FRAMEWORK + vDSP_svdiv (÷nd, src, 1, dest, 1, (vDSP_Length) num); +#else + YUP_PERFORM_VEC_OP_SRC_DEST (dest[i] = dividend / src[i], + Mode::div (divsd, s), + YUP_LOAD_SRC, + YUP_INCREMENT_SRC_DEST, + const Mode::ParallelType divsd = Mode::load1 (dividend);) +#endif +} + +template +void copyWithDividend (double* dest, const double* src, double dividend, Size num) noexcept +{ +#if YUP_USE_VDSP_FRAMEWORK + vDSP_svdivD (÷nd, src, 1, dest, 1, (vDSP_Length) num); +#else + YUP_PERFORM_VEC_OP_SRC_DEST (dest[i] = dividend / src[i], + Mode::div (divsd, s), + YUP_LOAD_SRC, + YUP_INCREMENT_SRC_DEST, + const Mode::ParallelType divsd = Mode::load1 (dividend);) +#endif +} + +template +void copyWithDivide (float* dest, const float* src, float divisor, Size num) noexcept +{ +#if YUP_USE_VDSP_FRAMEWORK + vDSP_vsdiv (src, 1, &divisor, dest, 1, (vDSP_Length) num); +#else + YUP_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] / divisor, + Mode::div (s, divs), + YUP_LOAD_SRC, + YUP_INCREMENT_SRC_DEST, + const Mode::ParallelType divs = Mode::load1 (divisor);) +#endif +} + +template +void copyWithDivide (double* dest, const double* src, double divisor, Size num) noexcept +{ +#if YUP_USE_VDSP_FRAMEWORK + vDSP_vsdivD (src, 1, &divisor, dest, 1, (vDSP_Length) num); +#else + YUP_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] / divisor, + Mode::div (s, divs), + YUP_LOAD_SRC, + YUP_INCREMENT_SRC_DEST, + const Mode::ParallelType divs = Mode::load1 (divisor);) +#endif +} + template void add (float* dest, float amount, Size num) noexcept { @@ -1099,6 +1164,78 @@ void multiply (double* dest, const double* src, double multiplier, Size num) noe const Mode::ParallelType mult = Mode::load1 (multiplier);) } +template +void divide (float* dest, float divisor, Size num) noexcept +{ +#if YUP_USE_VDSP_FRAMEWORK + vDSP_vsdiv (dest, 1, &divisor, dest, 1, (vDSP_Length) num); +#else + YUP_PERFORM_VEC_OP_DEST (dest[i] /= divisor, + Mode::div (d, divs), + YUP_LOAD_DEST, + const Mode::ParallelType divs = Mode::load1 (divisor);) +#endif +} + +template +void divide (double* dest, double divisor, Size num) noexcept +{ +#if YUP_USE_VDSP_FRAMEWORK + vDSP_vsdivD (dest, 1, &divisor, dest, 1, (vDSP_Length) num); +#else + YUP_PERFORM_VEC_OP_DEST (dest[i] /= divisor, + Mode::div (d, divs), + YUP_LOAD_DEST, + const Mode::ParallelType divs = Mode::load1 (divisor);) +#endif +} + +template +void divide (float* dest, const float* src1, const float* src2, Size num) noexcept +{ +#if YUP_USE_VDSP_FRAMEWORK + vDSP_vdiv (src2, 1, src1, 1, dest, 1, (vDSP_Length) num); +#else + YUP_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] / src2[i], + Mode::div (s1, s2), + YUP_LOAD_SRC1_SRC2, + YUP_INCREMENT_SRC1_SRC2_DEST,) +#endif +} + +template +void divide (double* dest, const double* src1, const double* src2, Size num) noexcept +{ +#if YUP_USE_VDSP_FRAMEWORK + vDSP_vdivD (src2, 1, src1, 1, dest, 1, (vDSP_Length) num); +#else + YUP_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] / src2[i], + Mode::div (s1, s2), + YUP_LOAD_SRC1_SRC2, + YUP_INCREMENT_SRC1_SRC2_DEST,) +#endif +} + +template +void divide (float* dest, const float* src, float divisor, Size num) noexcept +{ + YUP_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] / divisor, + Mode::div (s, divs), + YUP_LOAD_SRC, + YUP_INCREMENT_SRC_DEST, + const Mode::ParallelType divs = Mode::load1 (divisor);) +} + +template +void divide (double* dest, const double* src, double divisor, Size num) noexcept +{ + YUP_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] / divisor, + Mode::div (s, divs), + YUP_LOAD_SRC, + YUP_INCREMENT_SRC_DEST, + const Mode::ParallelType divs = Mode::load1 (divisor);) +} + template void negate (float* dest, const float* src, Size num) noexcept { @@ -1374,6 +1511,24 @@ void YUP_CALLTYPE FloatVectorOperationsBase::copyWithMulti FloatVectorHelpers::copyWithMultiply (dest, src, multiplier, numValues); } +template +void YUP_CALLTYPE FloatVectorOperationsBase::copyWithDividend (FloatType* dest, + const FloatType* src, + FloatType dividend, + CountType numValues) noexcept +{ + FloatVectorHelpers::copyWithDividend (dest, src, dividend, numValues); +} + +template +void YUP_CALLTYPE FloatVectorOperationsBase::copyWithDivide (FloatType* dest, + const FloatType* src, + FloatType divisor, + CountType numValues) noexcept +{ + FloatVectorHelpers::copyWithDivide (dest, src, divisor, numValues); +} + template void YUP_CALLTYPE FloatVectorOperationsBase::add (FloatType* dest, FloatType amountToAdd, diff --git a/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.h b/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.h index 3b7cd9e90..3b66168fc 100644 --- a/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.h +++ b/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.h @@ -84,6 +84,12 @@ struct FloatVectorOperationsBase /** Copies a vector of floating point numbers, multiplying each value by a given multiplier */ static void YUP_CALLTYPE copyWithMultiply (FloatType* dest, const FloatType* src, FloatType multiplier, CountType numValues) noexcept; + /** Copies a vector of floating point numbers, dividing a dividend with each value (dest[i] = dividend / src[i]) */ + static void YUP_CALLTYPE copyWithDividend (FloatType* dest, const FloatType* src, FloatType dividend, CountType numValues) noexcept; + + /** Copies a vector of floating point numbers, dividing each value with a divisor (dest[i] = src[i] / divisor) */ + static void YUP_CALLTYPE copyWithDivide (FloatType* dest, const FloatType* src, FloatType divisor, CountType numValues) noexcept; + /** Adds a fixed value to the destination values. */ static void YUP_CALLTYPE add (FloatType* dest, FloatType amountToAdd, CountType numValues) noexcept; @@ -126,6 +132,18 @@ struct FloatVectorOperationsBase /** Multiplies each of the source values by a fixed multiplier and stores the result in the destination array. */ static void YUP_CALLTYPE multiply (FloatType* dest, const FloatType* src, FloatType multiplier, CountType num) noexcept; + /** Divides the destination values by the source values. */ + static void YUP_CALLTYPE divide (FloatType* dest, const FloatType* src, CountType numValues) noexcept; + + /** Divides each source1 value by the corresponding source2 value, then stores it in the destination array. */ + static void YUP_CALLTYPE divide (FloatType* dest, const FloatType* src1, const FloatType* src2, CountType numValues) noexcept; + + /** Divides each of the destination values by a fixed divisor. */ + static void YUP_CALLTYPE divide (FloatType* dest, FloatType divisor, CountType numValues) noexcept; + + /** Divides each of the source values by a fixed divisor and stores the result in the destination array. */ + static void YUP_CALLTYPE divide (FloatType* dest, const FloatType* src, FloatType divisor, CountType num) noexcept; + /** Copies a source vector to a destination, negating each value. */ static void YUP_CALLTYPE negate (FloatType* dest, const FloatType* src, CountType numValues) noexcept; From 55ca8f88e28bb3355a42a689b5781aac43030081 Mon Sep 17 00:00:00 2001 From: kunitoki Date: Wed, 27 Aug 2025 09:55:10 +0200 Subject: [PATCH 18/37] More tests --- .../buffers/yup_FloatVectorOperations.cpp | 84 ++++++++++++++++--- .../buffers/yup_FloatVectorOperations.h | 3 + .../yup_FloatVectorOperations.cpp | 27 ++++++ 3 files changed, 102 insertions(+), 12 deletions(-) diff --git a/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp b/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp index 447b9fec3..7fbb129bf 100644 --- a/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp +++ b/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp @@ -1165,28 +1165,28 @@ void multiply (double* dest, const double* src, double multiplier, Size num) noe } template -void divide (float* dest, float divisor, Size num) noexcept +void divide (float* dest, const float* src, Size num) noexcept { #if YUP_USE_VDSP_FRAMEWORK - vDSP_vsdiv (dest, 1, &divisor, dest, 1, (vDSP_Length) num); + vDSP_vdiv (src, 1, dest, 1, dest, 1, (vDSP_Length) num); #else - YUP_PERFORM_VEC_OP_DEST (dest[i] /= divisor, - Mode::div (d, divs), - YUP_LOAD_DEST, - const Mode::ParallelType divs = Mode::load1 (divisor);) + YUP_PERFORM_VEC_OP_SRC_DEST (dest[i] /= src[i], + Mode::div (d, s), + YUP_LOAD_SRC_DEST, + YUP_INCREMENT_SRC_DEST, ) #endif } template -void divide (double* dest, double divisor, Size num) noexcept +void divide (double* dest, const double* src, Size num) noexcept { #if YUP_USE_VDSP_FRAMEWORK - vDSP_vsdivD (dest, 1, &divisor, dest, 1, (vDSP_Length) num); + vDSP_vdivD (src, 1, dest, 1, dest, 1, (vDSP_Length) num); #else - YUP_PERFORM_VEC_OP_DEST (dest[i] /= divisor, - Mode::div (d, divs), - YUP_LOAD_DEST, - const Mode::ParallelType divs = Mode::load1 (divisor);) + YUP_PERFORM_VEC_OP_SRC_DEST (dest[i] /= src[i], + Mode::div (d, s), + YUP_LOAD_SRC_DEST, + YUP_INCREMENT_SRC_DEST, ) #endif } @@ -1216,6 +1216,32 @@ void divide (double* dest, const double* src1, const double* src2, Size num) noe #endif } +template +void divide (float* dest, float divisor, Size num) noexcept +{ +#if YUP_USE_VDSP_FRAMEWORK + vDSP_vsdiv (dest, 1, &divisor, dest, 1, (vDSP_Length) num); +#else + YUP_PERFORM_VEC_OP_DEST (dest[i] /= divisor, + Mode::div (d, divs), + YUP_LOAD_DEST, + const Mode::ParallelType divs = Mode::load1 (divisor);) +#endif +} + +template +void divide (double* dest, double divisor, Size num) noexcept +{ +#if YUP_USE_VDSP_FRAMEWORK + vDSP_vsdivD (dest, 1, &divisor, dest, 1, (vDSP_Length) num); +#else + YUP_PERFORM_VEC_OP_DEST (dest[i] /= divisor, + Mode::div (d, divs), + YUP_LOAD_DEST, + const Mode::ParallelType divs = Mode::load1 (divisor);) +#endif +} + template void divide (float* dest, const float* src, float divisor, Size num) noexcept { @@ -1650,6 +1676,40 @@ void YUP_CALLTYPE FloatVectorOperationsBase::multiply (Flo FloatVectorHelpers::multiply (dest, src, multiplier, num); } +template +void YUP_CALLTYPE FloatVectorOperationsBase::divide (FloatType* dest, + const FloatType* src, + CountType numValues) noexcept +{ + FloatVectorHelpers::divide (dest, src, numValues); +} + +template +void YUP_CALLTYPE FloatVectorOperationsBase::divide (FloatType* dest, + const FloatType* src1, + const FloatType* src2, + CountType numValues) noexcept +{ + FloatVectorHelpers::divide (dest, src1, src2, numValues); +} + +template +void YUP_CALLTYPE FloatVectorOperationsBase::divide (FloatType* dest, + FloatType divisor, + CountType numValues) noexcept +{ + FloatVectorHelpers::divide (dest, divisor, numValues); +} + +template +void YUP_CALLTYPE FloatVectorOperationsBase::divide (FloatType* dest, + const FloatType* src, + FloatType divisor, + CountType num) noexcept +{ + FloatVectorHelpers::divide (dest, src, divisor, num); +} + template void YUP_CALLTYPE FloatVectorOperationsBase::negate (FloatType* dest, const FloatType* src, diff --git a/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.h b/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.h index 3b66168fc..b83831b82 100644 --- a/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.h +++ b/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.h @@ -186,11 +186,14 @@ struct NameForwarder : public Bases... Bases::fill..., Bases::copy..., Bases::copyWithMultiply..., + Bases::copyWithDividend..., + Bases::copyWithDivide..., Bases::add..., Bases::subtract..., Bases::addWithMultiply..., Bases::subtractWithMultiply..., Bases::multiply..., + Bases::divide..., Bases::negate..., Bases::abs..., Bases::min..., diff --git a/tests/yup_audio_basics/yup_FloatVectorOperations.cpp b/tests/yup_audio_basics/yup_FloatVectorOperations.cpp index 223cbda7d..b4fc11b6c 100644 --- a/tests/yup_audio_basics/yup_FloatVectorOperations.cpp +++ b/tests/yup_audio_basics/yup_FloatVectorOperations.cpp @@ -125,6 +125,33 @@ class FloatVectorOperationsTests : public ::testing::Test FloatVectorOperations::fill (data2, (ValueType) 3, num); FloatVectorOperations::addWithMultiply (data1, data1, data2, num); EXPECT_TRUE (areAllValuesEqual (data1, num, (ValueType) 8)); + + FloatVectorOperations::fill (data1, (ValueType) 8, num); + FloatVectorOperations::copyWithDividend (data2, data1, (ValueType) 16, num); + EXPECT_TRUE (areAllValuesEqual (data2, num, (ValueType) 2)); + + FloatVectorOperations::fill (data1, (ValueType) 12, num); + FloatVectorOperations::copyWithDivide (data2, data1, (ValueType) 3, num); + EXPECT_TRUE (areAllValuesEqual (data2, num, (ValueType) 4)); + + FloatVectorOperations::fill (data1, (ValueType) 20, num); + FloatVectorOperations::divide (data1, (ValueType) 4, num); + EXPECT_TRUE (areAllValuesEqual (data1, num, (ValueType) 5)); + + FloatVectorOperations::fill (data1, (ValueType) 15, num); + FloatVectorOperations::fill (data2, (ValueType) 3, num); + HeapBlock result (num + 16); +#if YUP_ARM + ValueType* const resultData = result; +#else + ValueType* const resultData = addBytesToPointer (result.get(), random.nextInt (16)); +#endif + FloatVectorOperations::divide (resultData, data1, data2, num); + EXPECT_TRUE (areAllValuesEqual (resultData, num, (ValueType) 5)); + + FloatVectorOperations::fill (data1, (ValueType) 18, num); + FloatVectorOperations::divide (data2, data1, (ValueType) 6, num); + EXPECT_TRUE (areAllValuesEqual (data2, num, (ValueType) 3)); } static void fillRandomly (Random& random, ValueType* d, int num) From f5d66f61ea0c3214a4212d7523c40e8421671239 Mon Sep 17 00:00:00 2001 From: kunitoki Date: Wed, 27 Aug 2025 09:56:51 +0200 Subject: [PATCH 19/37] More comments --- tests/yup_audio_basics/yup_FloatVectorOperations.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/yup_audio_basics/yup_FloatVectorOperations.cpp b/tests/yup_audio_basics/yup_FloatVectorOperations.cpp index b4fc11b6c..3d7bdb91e 100644 --- a/tests/yup_audio_basics/yup_FloatVectorOperations.cpp +++ b/tests/yup_audio_basics/yup_FloatVectorOperations.cpp @@ -144,6 +144,8 @@ class FloatVectorOperationsTests : public ::testing::Test #if YUP_ARM ValueType* const resultData = result; #else + // These tests deliberately operate on misaligned memory and will be flagged up by + // checks for undefined behavior! ValueType* const resultData = addBytesToPointer (result.get(), random.nextInt (16)); #endif FloatVectorOperations::divide (resultData, data1, data2, num); From 61b5bee3d1a89858f96c8827cd682956adff4f39 Mon Sep 17 00:00:00 2001 From: kunitoki Date: Wed, 27 Aug 2025 10:00:40 +0200 Subject: [PATCH 20/37] Restored fixed to floatconversions --- .../buffers/yup_FloatVectorOperations.cpp | 29 +++++++++++++++++++ .../buffers/yup_FloatVectorOperations.h | 6 ++++ 2 files changed, 35 insertions(+) diff --git a/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp b/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp index 7fbb129bf..accf84ce3 100644 --- a/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp +++ b/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp @@ -1501,6 +1501,23 @@ double findMaximum (const double* src, Size num) noexcept #endif } +template +void convertFixedToFloat (float* dest, const int* src, float multiplier, Size num) noexcept +{ +#if YUP_USE_ARM_NEON + YUP_PERFORM_VEC_OP_SRC_DEST (dest[i] = (float) src[i] * multiplier, + vmulq_n_f32 (vcvtq_f32_s32 (vld1q_s32 (src)), multiplier), + YUP_LOAD_NONE, + YUP_INCREMENT_SRC_DEST, ) +#else + YUP_PERFORM_VEC_OP_SRC_DEST (dest[i] = (float) src[i] * multiplier, + Mode::mul (mult, _mm_cvtepi32_ps (_mm_loadu_si128 (reinterpret_cast (src)))), + YUP_LOAD_NONE, + YUP_INCREMENT_SRC_DEST, + const Mode::ParallelType mult = Mode::load1 (multiplier);) +#endif +} + } // namespace } // namespace FloatVectorHelpers @@ -1802,6 +1819,18 @@ template struct FloatVectorOperationsBase; //============================================================================== +void YUP_CALLTYPE FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, float multiplier, size_t num) noexcept +{ + FloatVectorHelpers::convertFixedToFloat (dest, src, multiplier, num); +} + +void YUP_CALLTYPE FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, float multiplier, int num) noexcept +{ + FloatVectorHelpers::convertFixedToFloat (dest, src, multiplier, num); +} + +//============================================================================== + intptr_t YUP_CALLTYPE FloatVectorOperations::getFpStatusRegister() noexcept { intptr_t fpsr = 0; diff --git a/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.h b/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.h index b83831b82..033c33b49 100644 --- a/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.h +++ b/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.h @@ -220,6 +220,12 @@ struct NameForwarder : public Bases... class YUP_API FloatVectorOperations : public detail::NameForwarder, FloatVectorOperationsBase, FloatVectorOperationsBase, FloatVectorOperationsBase> { public: + /** */ + static void JUCE_CALLTYPE convertFixedToFloat (float* dest, const int* src, float multiplier, int num) noexcept; + + /** */ + static void JUCE_CALLTYPE convertFixedToFloat (float* dest, const int* src, float multiplier, size_t num) noexcept; + /** This method enables or disables the SSE/NEON flush-to-zero mode. */ static void YUP_CALLTYPE enableFlushToZeroMode (bool shouldEnable) noexcept; From 9b1e6ee458f5a98746769b50c37994f4e6ecd654 Mon Sep 17 00:00:00 2001 From: kunitoki Date: Wed, 27 Aug 2025 10:32:56 +0200 Subject: [PATCH 21/37] More work on FloatVectorOperations --- .../buffers/yup_FloatVectorOperations.cpp | 45 ++++++++++++++++++- .../buffers/yup_FloatVectorOperations.h | 10 +++-- .../yup_FloatVectorOperations.cpp | 22 +++++++++ 3 files changed, 72 insertions(+), 5 deletions(-) diff --git a/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp b/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp index accf84ce3..bcb9d614b 100644 --- a/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp +++ b/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp @@ -83,6 +83,10 @@ struct BasicOps32 static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_ps (dest, a); } + static forcedinline void storeU (int* dest, ParallelType a) noexcept { _mm_storeu_si128 (reinterpret_cast<__m128i*> (dest), _mm_castps_si128 (a)); } + + static forcedinline void storeA (int* dest, ParallelType a) noexcept { _mm_store_si128 (reinterpret_cast<__m128i*> (dest), _mm_castps_si128 (a)); } + static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_ps (a, b); } static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_ps (a, b); } @@ -335,6 +339,10 @@ struct BasicOps32 static forcedinline void storeU (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); } + static forcedinline void storeU (int* dest, ParallelType a) noexcept { vst1q_f32 (reinterpret_cast (dest), a); } + + static forcedinline void storeA (int* dest, ParallelType a) noexcept { vst1q_f32 (reinterpret_cast (dest), a); } + static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return vaddq_f32 (a, b); } static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return vsubq_f32 (a, b); } @@ -1509,12 +1517,37 @@ void convertFixedToFloat (float* dest, const int* src, float multiplier, Size nu vmulq_n_f32 (vcvtq_f32_s32 (vld1q_s32 (src)), multiplier), YUP_LOAD_NONE, YUP_INCREMENT_SRC_DEST, ) -#else +#elif YUP_USE_SSE_INTRINSICS YUP_PERFORM_VEC_OP_SRC_DEST (dest[i] = (float) src[i] * multiplier, Mode::mul (mult, _mm_cvtepi32_ps (_mm_loadu_si128 (reinterpret_cast (src)))), YUP_LOAD_NONE, YUP_INCREMENT_SRC_DEST, const Mode::ParallelType mult = Mode::load1 (multiplier);) +#else + for (Size i = 0; i < num; ++i) + dest[i] = (float) src[i] * multiplier; +#endif +} + +template +void convertFloatToFixed (int* dest, const float* src, float multiplier, Size num) noexcept +{ +#if YUP_USE_ARM_NEON + YUP_PERFORM_VEC_OP_SRC_DEST (dest[i] = (int) (src[i] * multiplier), + vreinterpretq_f32_s32 (vcvtq_s32_f32 (vmulq_n_f32 (vld1q_f32 (src), multiplier))), + YUP_LOAD_NONE, + YUP_INCREMENT_SRC_DEST, ) + +#elif YUP_USE_SSE_INTRINSICS + YUP_PERFORM_VEC_OP_SRC_DEST (dest[i] = (int) (src[i] * multiplier), + _mm_castsi128_ps (_mm_cvtps_epi32 (_mm_mul_ps (_mm_loadu_ps (src), mult))), + YUP_LOAD_NONE, + YUP_INCREMENT_SRC_DEST, + const Mode::ParallelType mult = Mode::load1 (multiplier);) + +#else + for (Size i = 0; i < num; ++i) + dest[i] = (int) (src[i] * multiplier); #endif } @@ -1829,6 +1862,16 @@ void YUP_CALLTYPE FloatVectorOperations::convertFixedToFloat (float* dest, const FloatVectorHelpers::convertFixedToFloat (dest, src, multiplier, num); } +void YUP_CALLTYPE FloatVectorOperations::convertFloatToFixed (int* dest, const float* src, float multiplier, size_t num) noexcept +{ + FloatVectorHelpers::convertFloatToFixed (dest, src, multiplier, num); +} + +void YUP_CALLTYPE FloatVectorOperations::convertFloatToFixed (int* dest, const float* src, float multiplier, int num) noexcept +{ + FloatVectorHelpers::convertFloatToFixed (dest, src, multiplier, num); +} + //============================================================================== intptr_t YUP_CALLTYPE FloatVectorOperations::getFpStatusRegister() noexcept diff --git a/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.h b/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.h index 033c33b49..18d373ad2 100644 --- a/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.h +++ b/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.h @@ -220,11 +220,13 @@ struct NameForwarder : public Bases... class YUP_API FloatVectorOperations : public detail::NameForwarder, FloatVectorOperationsBase, FloatVectorOperationsBase, FloatVectorOperationsBase> { public: - /** */ - static void JUCE_CALLTYPE convertFixedToFloat (float* dest, const int* src, float multiplier, int num) noexcept; + /** Convert fixed integer signal to float applying a multiplier. */ + static void YUP_CALLTYPE convertFixedToFloat (float* dest, const int* src, float multiplier, int num) noexcept; + static void YUP_CALLTYPE convertFixedToFloat (float* dest, const int* src, float multiplier, size_t num) noexcept; - /** */ - static void JUCE_CALLTYPE convertFixedToFloat (float* dest, const int* src, float multiplier, size_t num) noexcept; + /** Convert float signal to int applying a multiplier. */ + static void YUP_CALLTYPE convertFloatToFixed (int* dest, const float* src, float multiplier, int num) noexcept; + static void YUP_CALLTYPE convertFloatToFixed (int* dest, const float* src, float multiplier, size_t num) noexcept; /** This method enables or disables the SSE/NEON flush-to-zero mode. */ static void YUP_CALLTYPE enableFlushToZeroMode (bool shouldEnable) noexcept; diff --git a/tests/yup_audio_basics/yup_FloatVectorOperations.cpp b/tests/yup_audio_basics/yup_FloatVectorOperations.cpp index 3d7bdb91e..b7c0fa294 100644 --- a/tests/yup_audio_basics/yup_FloatVectorOperations.cpp +++ b/tests/yup_audio_basics/yup_FloatVectorOperations.cpp @@ -154,6 +154,28 @@ class FloatVectorOperationsTests : public ::testing::Test FloatVectorOperations::fill (data1, (ValueType) 18, num); FloatVectorOperations::divide (data2, data1, (ValueType) 6, num); EXPECT_TRUE (areAllValuesEqual (data2, num, (ValueType) 3)); + + fillRandomly (random, int1, num); + const ValueType multiplier = (ValueType) (1.0 / (1 << 16)); + + if constexpr (std::is_same_v) + { + convertFixed (data1, int1, multiplier, num); + FloatVectorOperations::convertFixedToFloat (data2, int1, multiplier, num); + EXPECT_TRUE (buffersMatch (data1, data2, num)); + + convertFloatToFixed (int1, data1, 1.0f / multiplier, num); + HeapBlock int2 (num + 16); +#if YUP_ARM + int* const intData = int2; +#else + int* const intData = addBytesToPointer (int2.get(), random.nextInt (16)); +#endif + FloatVectorOperations::convertFloatToFixed (intData, data1, 1.0f / multiplier, num); + + for (int i = 0; i < num; ++i) + EXPECT_EQ (int1[i], intData[i]); + } } static void fillRandomly (Random& random, ValueType* d, int num) From 3fea1ab88479383ff3c54c1c865be29034ad0c31 Mon Sep 17 00:00:00 2001 From: Yup Bot Date: Wed, 27 Aug 2025 08:33:40 +0000 Subject: [PATCH 22/37] Code formatting --- .../buffers/yup_FloatVectorOperations.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp b/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp index bcb9d614b..0108a98cb 100644 --- a/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp +++ b/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp @@ -93,7 +93,7 @@ struct BasicOps32 static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_ps (a, b); } - static forcedinline ParallelType div (ParallelType a, ParallelType b) noexcept { return _mm_div_ps (a, b); } + static forcedinline ParallelType div (ParallelType a, ParallelType b) noexcept { return _mm_div_ps (a, b); } static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_ps (a, b); } @@ -154,7 +154,7 @@ struct BasicOps64 static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_pd (a, b); } - static forcedinline ParallelType div (ParallelType a, ParallelType b) noexcept { return _mm_div_pd (a, b); } + static forcedinline ParallelType div (ParallelType a, ParallelType b) noexcept { return _mm_div_pd (a, b); } static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_pd (a, b); } @@ -771,7 +771,6 @@ void copyWithMultiply (double* dest, const double* src, double multiplier, Size #endif } - template void copyWithDividend (float* dest, const float* src, float dividend, Size num) noexcept { @@ -1207,7 +1206,7 @@ void divide (float* dest, const float* src1, const float* src2, Size num) noexce YUP_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] / src2[i], Mode::div (s1, s2), YUP_LOAD_SRC1_SRC2, - YUP_INCREMENT_SRC1_SRC2_DEST,) + YUP_INCREMENT_SRC1_SRC2_DEST, ) #endif } @@ -1220,7 +1219,7 @@ void divide (double* dest, const double* src1, const double* src2, Size num) noe YUP_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] / src2[i], Mode::div (s1, s2), YUP_LOAD_SRC1_SRC2, - YUP_INCREMENT_SRC1_SRC2_DEST,) + YUP_INCREMENT_SRC1_SRC2_DEST, ) #endif } @@ -1854,7 +1853,7 @@ template struct FloatVectorOperationsBase; void YUP_CALLTYPE FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, float multiplier, size_t num) noexcept { - FloatVectorHelpers::convertFixedToFloat (dest, src, multiplier, num); + FloatVectorHelpers::convertFixedToFloat (dest, src, multiplier, num); } void YUP_CALLTYPE FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, float multiplier, int num) noexcept @@ -1864,7 +1863,7 @@ void YUP_CALLTYPE FloatVectorOperations::convertFixedToFloat (float* dest, const void YUP_CALLTYPE FloatVectorOperations::convertFloatToFixed (int* dest, const float* src, float multiplier, size_t num) noexcept { - FloatVectorHelpers::convertFloatToFixed (dest, src, multiplier, num); + FloatVectorHelpers::convertFloatToFixed (dest, src, multiplier, num); } void YUP_CALLTYPE FloatVectorOperations::convertFloatToFixed (int* dest, const float* src, float multiplier, int num) noexcept From 3a1632f8893afc6651951c0df1cbc2cc83c5b98e Mon Sep 17 00:00:00 2001 From: kunitoki Date: Wed, 27 Aug 2025 11:00:38 +0200 Subject: [PATCH 23/37] Add conversions to and from float and double --- .../buffers/yup_FloatVectorOperations.cpp | 78 +++++++++++++++++++ .../buffers/yup_FloatVectorOperations.h | 8 ++ 2 files changed, 86 insertions(+) diff --git a/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp b/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp index 0108a98cb..ef87efbe7 100644 --- a/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp +++ b/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp @@ -1550,6 +1550,64 @@ void convertFloatToFixed (int* dest, const float* src, float multiplier, Size nu #endif } +template +void convertDoubleToFloat (float* dest, const double* src, Size num) noexcept +{ +#if YUP_USE_VDSP_FRAMEWORK + vDSP_vdpsp (src, 1, dest, 1, (vDSP_Length) num); +#else + Size i = 0; +#if YUP_USE_ARM_NEON + for (; i + 2 <= num; i += 2) + { + float64x2_t d = vld1q_f64 (src + i); + float32x2_t f = vcvt_f32_f64 (d); + vst1_f32 (dest + i, f); + } +#elif JUCE_USE_SSE_INTRINSICS + for (; i + 2 <= num; i += 2) + { + __m128d d = _mm_loadu_pd (src + i); + __m128 f = _mm_cvtpd_ps (d); + _mm_storel_pi ((__m64*) (dest + i), f); + } +#endif + + for (; i < num; ++i) + dest[i] = (float) src[i]; +#endif +} + +template +void convertFloatToDouble (double* dest, const float* src, Size num) noexcept +{ +#if YUP_USE_VDSP_FRAMEWORK + vDSP_vspdp (src, 1, dest, 1, (vDSP_Length) num); +#else + Size i = 0; +#if YUP_USE_ARM_NEON + for (; i + 2 <= num; i += 2) + { + float32x2_t f = vld1_f32 (src + i); + float64x2_t d = vcvt_f64_f32 (f); + vst1q_f64 (dest + i, d); + } +#elif JUCE_USE_SSE_INTRINSICS + for (; i + 4 <= num; i += 4) + { + __m128 f = _mm_loadu_ps (src + i); + __m128d d0 = _mm_cvtps_pd (f); + __m128d d1 = _mm_cvtps_pd (_mm_movehl_ps (f, f)); + _mm_storeu_pd (dest + i, d0); + _mm_storeu_pd (dest + i + 2, d1); + } +#endif + + for (; i < num; ++i) + dest[i] = (double) src[i]; +#endif +} + } // namespace } // namespace FloatVectorHelpers @@ -1871,6 +1929,26 @@ void YUP_CALLTYPE FloatVectorOperations::convertFloatToFixed (int* dest, const f FloatVectorHelpers::convertFloatToFixed (dest, src, multiplier, num); } +void YUP_CALLTYPE convertFloatToDouble (double* dest, const float* src, int num) noexcept +{ + FloatVectorHelpers::convertFloatToDouble (dest, src, num); +} + +void YUP_CALLTYPE convertFloatToDouble (double* dest, const float* src, size_t num) noexcept +{ + FloatVectorHelpers::convertFloatToDouble (dest, src, num); +} + +void YUP_CALLTYPE convertDoubleToFloat (float* dest, const double* src, int num) noexcept +{ + FloatVectorHelpers::convertDoubleToFloat (dest, src, num); +} + +void YUP_CALLTYPE convertDoubleToFloat (float* dest, const double* src, size_t num) noexcept +{ + FloatVectorHelpers::convertDoubleToFloat (dest, src, num); +} + //============================================================================== intptr_t YUP_CALLTYPE FloatVectorOperations::getFpStatusRegister() noexcept diff --git a/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.h b/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.h index 18d373ad2..9efe0f1ea 100644 --- a/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.h +++ b/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.h @@ -228,6 +228,14 @@ class YUP_API FloatVectorOperations : public detail::NameForwarder Date: Wed, 27 Aug 2025 11:01:07 +0200 Subject: [PATCH 24/37] Unset device open --- modules/yup_audio_devices/native/yup_OpenSL_android.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/yup_audio_devices/native/yup_OpenSL_android.cpp b/modules/yup_audio_devices/native/yup_OpenSL_android.cpp index 942b59410..c20f039b9 100644 --- a/modules/yup_audio_devices/native/yup_OpenSL_android.cpp +++ b/modules/yup_audio_devices/native/yup_OpenSL_android.cpp @@ -1073,6 +1073,9 @@ class OpenSLAudioIODevice final : public AudioIODevice void close() override { stop(); + + deviceOpen = false; + session = nullptr; callback = nullptr; } From 942dccb1eac29b5d14b65559ae4efa9da88f1ac0 Mon Sep 17 00:00:00 2001 From: kunitoki Date: Wed, 27 Aug 2025 11:29:35 +0200 Subject: [PATCH 25/37] More tests --- .../buffers/yup_FloatVectorOperations.cpp | 8 +- .../yup_FloatVectorOperations.cpp | 163 +++++++++++++----- 2 files changed, 121 insertions(+), 50 deletions(-) diff --git a/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp b/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp index ef87efbe7..246c17b92 100644 --- a/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp +++ b/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp @@ -1929,22 +1929,22 @@ void YUP_CALLTYPE FloatVectorOperations::convertFloatToFixed (int* dest, const f FloatVectorHelpers::convertFloatToFixed (dest, src, multiplier, num); } -void YUP_CALLTYPE convertFloatToDouble (double* dest, const float* src, int num) noexcept +void YUP_CALLTYPE FloatVectorOperations::convertFloatToDouble (double* dest, const float* src, int num) noexcept { FloatVectorHelpers::convertFloatToDouble (dest, src, num); } -void YUP_CALLTYPE convertFloatToDouble (double* dest, const float* src, size_t num) noexcept +void YUP_CALLTYPE FloatVectorOperations::convertFloatToDouble (double* dest, const float* src, size_t num) noexcept { FloatVectorHelpers::convertFloatToDouble (dest, src, num); } -void YUP_CALLTYPE convertDoubleToFloat (float* dest, const double* src, int num) noexcept +void YUP_CALLTYPE FloatVectorOperations::convertDoubleToFloat (float* dest, const double* src, int num) noexcept { FloatVectorHelpers::convertDoubleToFloat (dest, src, num); } -void YUP_CALLTYPE convertDoubleToFloat (float* dest, const double* src, size_t num) noexcept +void YUP_CALLTYPE FloatVectorOperations::convertDoubleToFloat (float* dest, const double* src, size_t num) noexcept { FloatVectorHelpers::convertDoubleToFloat (dest, src, num); } diff --git a/tests/yup_audio_basics/yup_FloatVectorOperations.cpp b/tests/yup_audio_basics/yup_FloatVectorOperations.cpp index b7c0fa294..85a551710 100644 --- a/tests/yup_audio_basics/yup_FloatVectorOperations.cpp +++ b/tests/yup_audio_basics/yup_FloatVectorOperations.cpp @@ -154,28 +154,6 @@ class FloatVectorOperationsTests : public ::testing::Test FloatVectorOperations::fill (data1, (ValueType) 18, num); FloatVectorOperations::divide (data2, data1, (ValueType) 6, num); EXPECT_TRUE (areAllValuesEqual (data2, num, (ValueType) 3)); - - fillRandomly (random, int1, num); - const ValueType multiplier = (ValueType) (1.0 / (1 << 16)); - - if constexpr (std::is_same_v) - { - convertFixed (data1, int1, multiplier, num); - FloatVectorOperations::convertFixedToFloat (data2, int1, multiplier, num); - EXPECT_TRUE (buffersMatch (data1, data2, num)); - - convertFloatToFixed (int1, data1, 1.0f / multiplier, num); - HeapBlock int2 (num + 16); -#if YUP_ARM - int* const intData = int2; -#else - int* const intData = addBytesToPointer (int2.get(), random.nextInt (16)); -#endif - FloatVectorOperations::convertFloatToFixed (intData, data1, 1.0f / multiplier, num); - - for (int i = 0; i < num; ++i) - EXPECT_EQ (int1[i], intData[i]); - } } static void fillRandomly (Random& random, ValueType* d, int num) @@ -190,30 +168,6 @@ class FloatVectorOperationsTests : public ::testing::Test *d++ = random.nextInt(); } - static void convertFixed (float* d, const int* s, ValueType multiplier, int num) - { - while (--num >= 0) - *d++ = (float) *s++ * multiplier; - } - - static void convertFixedToDouble (double* d, const int* s, double multiplier, int num) - { - while (--num >= 0) - *d++ = (double) *s++ * multiplier; - } - - static void convertFloatToFixed (int* d, const float* s, float multiplier, int num) - { - while (--num >= 0) - *d++ = (int) (*s++ * multiplier); - } - - static void convertDoubleToFixed (int* d, const double* s, double multiplier, int num) - { - while (--num >= 0) - *d++ = (int) (*s++ * multiplier); - } - static bool areAllValuesEqual (const ValueType* d, int num, ValueType target) { while (--num >= 0) @@ -237,6 +191,43 @@ class FloatVectorOperationsTests : public ::testing::Test return std::abs (v1 - v2) < std::numeric_limits::epsilon(); } }; + + template + static bool valuesMatch (ValueType v1, ValueType v2) + { + return std::abs (v1 - v2) < std::numeric_limits::epsilon(); + } + + template + static bool buffersMatch (const ValueType* d1, const ValueType* d2, int num) + { + while (--num >= 0) + { + if (! valuesMatch (*d1++, *d2++)) + return false; + } + + return true; + } + + static void convertFixedToFloat (float* d, const int* s, float multiplier, int num) + { + while (--num >= 0) + *d++ = (float) *s++ * multiplier; + } + + static void convertFloatToFixed (int* d, const float* s, float multiplier, int num) + { + while (--num >= 0) + *d++ = (int) (*s++ * multiplier); + } + + template + static void fillRandomly (Random& random, ValueType* d, int num) + { + while (--num >= 0) + *d++ = (ValueType) (random.nextDouble() * 1000.0); + } }; TEST_F (FloatVectorOperationsTests, BasicOperations) @@ -247,3 +238,83 @@ TEST_F (FloatVectorOperationsTests, BasicOperations) TestRunner::runTest (Random::getSystemRandom()); } } + +TEST_F (FloatVectorOperationsTests, FloatToFixedAndBack) +{ + Random& random = Random::getSystemRandom(); + + for (int i = 1000; --i >= 0;) + { + const int range = random.nextBool() ? 500 : 10; + const int num = random.nextInt (range) + 1; + + HeapBlock buffer1 (num + 16), buffer2 (num + 16); + HeapBlock buffer3 (num + 16, true); + +#if YUP_ARM + float* const data1 = buffer1; + float* const data2 = buffer2; + int* const int1 = buffer3; +#else + // These tests deliberately operate on misaligned memory and will be flagged up by + // checks for undefined behavior! + float* const data1 = addBytesToPointer (buffer1.get(), random.nextInt (16)); + float* const data2 = addBytesToPointer (buffer2.get(), random.nextInt (16)); + int* const int1 = addBytesToPointer (buffer3.get(), random.nextInt (16)); +#endif + + fillRandomly (random, data1, num); + fillRandomly (random, data2, num); + + fillRandomly (random, int1, num); + const auto multiplier = (float) (1.0 / (1 << 16)); + + convertFixedToFloat (data1, int1, multiplier, num); + FloatVectorOperations::convertFixedToFloat (data2, int1, multiplier, num); + EXPECT_TRUE (buffersMatch (data1, data2, num)); + + convertFloatToFixed (int1, data1, 1.0f / multiplier, num); + HeapBlock int2 (num + 16); +#if YUP_ARM + int* const intData = int2; +#else + int* const intData = addBytesToPointer (int2.get(), random.nextInt (16)); +#endif + FloatVectorOperations::convertFloatToFixed (intData, data1, 1.0f / multiplier, num); + + for (int i = 0; i < num; ++i) + EXPECT_EQ (int1[i], intData[i]); + } +} + +TEST_F (FloatVectorOperationsTests, FloatToDoubleAndBack) +{ + Random& random = Random::getSystemRandom(); + + for (int i = 1000; --i >= 0;) + { + const int range = random.nextBool() ? 500 : 10; + const int num = random.nextInt (range) + 1; + + HeapBlock floatBuffer (num + 16); + HeapBlock doubleBuffer (num + 16); + +#if YUP_ARM + float* const floatData = floatBuffer; + double* const doubleData = doubleBuffer; +#else + float* const floatData = addBytesToPointer (floatBuffer.get(), random.nextInt (16)); + double* const doubleData = addBytesToPointer (doubleBuffer.get(), random.nextInt (16)); +#endif + + fillRandomly (random, floatData, num); + FloatVectorOperations::convertFloatToDouble (doubleData, floatData, num); + for (int i = 0; i < num; ++i) + EXPECT_NEAR ((float) doubleData[i], (float) floatData[i], std::numeric_limits::epsilon()); + + fillRandomly (random, doubleData, num); + FloatVectorOperations::convertDoubleToFloat (floatData, doubleData, num); + for (int i = 0; i < num; ++i) + EXPECT_NEAR ((float) floatData[i], (float) doubleData[i], std::numeric_limits::epsilon()); + } +} From ef4f8527540e9586d611129adbf8b4649c466871 Mon Sep 17 00:00:00 2001 From: kunitoki Date: Wed, 27 Aug 2025 11:42:19 +0200 Subject: [PATCH 26/37] More fixes --- modules/yup_core/text/yup_String.h | 9 ++++----- tests/yup_core/yup_String.cpp | 12 ++++++++++++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/modules/yup_core/text/yup_String.h b/modules/yup_core/text/yup_String.h index 60d4110af..4cd5ef796 100644 --- a/modules/yup_core/text/yup_String.h +++ b/modules/yup_core/text/yup_String.h @@ -1193,11 +1193,10 @@ class YUP_API String final return "0"; } - auto numDigitsBeforePoint = (int) std::ceil (std::log10 (number < 0 ? -number : number)); - - auto shift = numberOfSignificantFigures - numDigitsBeforePoint; - auto factor = std::pow (10.0, shift); - auto rounded = std::round (number * factor) / factor; + const auto numDigitsBeforePoint = (int) std::floor (std::log10 (std::abs (number)) + DecimalType (1)); + const auto shift = numberOfSignificantFigures - numDigitsBeforePoint; + const auto factor = std::pow (10.0, shift); + const auto rounded = std::round (number * factor) / factor; std::stringstream ss; ss << std::fixed << std::setprecision (std::max (shift, 0)) << rounded; diff --git a/tests/yup_core/yup_String.cpp b/tests/yup_core/yup_String.cpp index 0aedea73b..bb9dcb64a 100644 --- a/tests/yup_core/yup_String.cpp +++ b/tests/yup_core/yup_String.cpp @@ -571,6 +571,18 @@ TEST_F (StringTests, SignificantFigures) EXPECT_EQ (String::toDecimalStringWithSignificantFigures (2.8647, 6), String ("2.86470")); EXPECT_EQ (String::toDecimalStringWithSignificantFigures (-0.0000000000019, 1), String ("-0.000000000002")); + + EXPECT_EQ (String::toDecimalStringWithSignificantFigures (0.001, 7), String ("0.001000000")); + EXPECT_EQ (String::toDecimalStringWithSignificantFigures (0.01, 7), String ("0.01000000")); + EXPECT_EQ (String::toDecimalStringWithSignificantFigures (0.1, 7), String ("0.1000000")); + EXPECT_EQ (String::toDecimalStringWithSignificantFigures (1, 7), String ("1.000000")); + EXPECT_EQ (String::toDecimalStringWithSignificantFigures (10, 7), String ("10.00000")); + EXPECT_EQ (String::toDecimalStringWithSignificantFigures (100, 7), String ("100.0000")); + EXPECT_EQ (String::toDecimalStringWithSignificantFigures (1000, 7), String ("1000.000")); + EXPECT_EQ (String::toDecimalStringWithSignificantFigures (10000, 7), String ("10000.00")); + EXPECT_EQ (String::toDecimalStringWithSignificantFigures (100000, 7), String ("100000.0")); + EXPECT_EQ (String::toDecimalStringWithSignificantFigures (1000000, 7), String ("1000000")); + EXPECT_EQ (String::toDecimalStringWithSignificantFigures (10000000, 7), String ("10000000")); } TEST_F (StringTests, FloatTrimming) From 5c58a34b93a0900594a04631f2ab7d6b816f878d Mon Sep 17 00:00:00 2001 From: Yup Bot Date: Wed, 27 Aug 2025 09:52:44 +0000 Subject: [PATCH 27/37] Code formatting --- .../buffers/yup_FloatVectorOperations.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp b/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp index 246c17b92..42c601386 100644 --- a/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp +++ b/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp @@ -1568,13 +1568,13 @@ void convertDoubleToFloat (float* dest, const double* src, Size num) noexcept for (; i + 2 <= num; i += 2) { __m128d d = _mm_loadu_pd (src + i); - __m128 f = _mm_cvtpd_ps (d); + __m128 f = _mm_cvtpd_ps (d); _mm_storel_pi ((__m64*) (dest + i), f); } #endif - for (; i < num; ++i) - dest[i] = (float) src[i]; + for (; i < num; ++i) + dest[i] = (float) src[i]; #endif } @@ -1603,8 +1603,8 @@ void convertFloatToDouble (double* dest, const float* src, Size num) noexcept } #endif - for (; i < num; ++i) - dest[i] = (double) src[i]; + for (; i < num; ++i) + dest[i] = (double) src[i]; #endif } From dd6ecd79db424d9d5ae5bb51684d8fb3ec57460e Mon Sep 17 00:00:00 2001 From: kunitoki Date: Wed, 27 Aug 2025 11:55:30 +0200 Subject: [PATCH 28/37] Fix warning --- modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp index 9eab19c50..68dfe45d7 100644 --- a/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp +++ b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp @@ -235,7 +235,6 @@ class PartitionedConvolver::DirectFIR #elif YUP_USE_SSE_INTRINSICS __m128 vacc = _mm_setzero_ps(); - std::size_t i = 0; #if YUP_USE_FMA_INTRINSICS for (; i + 4 <= len; i += 4) { From ca39bd75bddce1406d755036c0824e3504845819 Mon Sep 17 00:00:00 2001 From: kunitoki Date: Wed, 27 Aug 2025 11:58:52 +0200 Subject: [PATCH 29/37] Fix SSE3 > SSE2 --- modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp index 68dfe45d7..e189a5dc0 100644 --- a/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp +++ b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp @@ -217,7 +217,6 @@ class PartitionedConvolver::DirectFIR std::size_t i = 0; #if YUP_USE_AVX_INTRINSICS && YUP_USE_FMA_INTRINSICS - // 8-wide AVX2 FMA path __m256 vacc = _mm256_setzero_ps(); for (; i + 8 <= len; i += 8) { @@ -225,7 +224,6 @@ class PartitionedConvolver::DirectFIR __m256 vb = _mm256_loadu_ps (b + i); vacc = _mm256_fmadd_ps (va, vb, vacc); } - // horizontal add __m128 low = _mm256_castps256_ps128 (vacc); __m128 high = _mm256_extractf128_ps (vacc, 1); __m128 vsum = _mm_add_ps (low, high); @@ -250,8 +248,7 @@ class PartitionedConvolver::DirectFIR vacc = _mm_add_ps (vacc, _mm_mul_ps (va, vb)); } #endif - // horizontal add - __m128 shuf = _mm_movehdup_ps (vacc); + __m128 shuf = _mm_shuffle_ps (vacc, vacc, _MM_SHUFFLE (2, 3, 0, 1)); __m128 sums = _mm_add_ps (vacc, shuf); shuf = _mm_movehl_ps (shuf, sums); sums = _mm_add_ss (sums, shuf); From 1ed1edb50778a40efacb0de8107babb591701c2d Mon Sep 17 00:00:00 2001 From: kunitoki Date: Thu, 11 Sep 2025 21:19:16 +0200 Subject: [PATCH 30/37] Still delay running --- tests/main.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/main.cpp b/tests/main.cpp index 37d5d7637..35d8eae6f 100644 --- a/tests/main.cpp +++ b/tests/main.cpp @@ -80,7 +80,10 @@ struct TestApplication : yup::YUPApplication else { // Run suites individually - runNextSuite (0); + yup::MessageManager::callAsync ([this] + { + runNextSuite (0); + }); } } From 47094537872f8c492d4f7551113639e7e5a8a326 Mon Sep 17 00:00:00 2001 From: kunitoki Date: Thu, 11 Sep 2025 22:01:58 +0200 Subject: [PATCH 31/37] More work --- .../source/examples/ConvolutionDemo.h | 8 +- .../convolution/yup_PartitionedConvolver.cpp | 566 ++++++++++-------- .../convolution/yup_PartitionedConvolver.h | 1 + modules/yup_dsp/yup_dsp.h | 1 + tests/yup_dsp/yup_PartitionedConvolver.cpp | 281 +++++++++ 5 files changed, 592 insertions(+), 265 deletions(-) diff --git a/examples/graphics/source/examples/ConvolutionDemo.h b/examples/graphics/source/examples/ConvolutionDemo.h index a984b57eb..4253d8973 100644 --- a/examples/graphics/source/examples/ConvolutionDemo.h +++ b/examples/graphics/source/examples/ConvolutionDemo.h @@ -60,7 +60,7 @@ class ConvolutionDemo dryGain.setCurrentAndTargetValue (0.3f); // Configure convolver with typical layout - convolver.setTypicalLayout (256, {256, 1024, 4096}); + convolver.setTypicalLayout (256, { 256, 1024, 4096 }); // Create UI createUI(); @@ -293,7 +293,9 @@ class ConvolutionDemo impulseResponseData[static_cast (i)] = impulseResponseBuffer.getSample (0, i) * normalizationGain; // Set impulse response in convolver - convolver.setImpulseResponse (impulseResponseData); + yup::PartitionedConvolver::IRLoadOptions loadOptions; + loadOptions.trimEndSilenceBelowDb = -60.0f; + convolver.setImpulseResponse (impulseResponseData, loadOptions); hasImpulseResponse = true; std::cout << "Loaded impulse response: " << file.getFileName() << std::endl; @@ -388,7 +390,7 @@ class ConvolutionDemo irWaveformDisplay.setMargins (25, 25, 25, 25); // Add grid lines - irWaveformDisplay.setVerticalGridLines ({ 0.0, 1.0 }); + irWaveformDisplay.setVerticalGridLines ({ 0.0, 1.0 }); irWaveformDisplay.setHorizontalGridLines ({ -1.0, -0.5, 0.5, 1.0 }); irWaveformDisplay.addHorizontalGridLine (0.0, yup::Color (0xFF666666), 1.0f, true); diff --git a/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp index e189a5dc0..133d27335 100644 --- a/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp +++ b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp @@ -46,20 +46,12 @@ static void complexMultiplyAccumulate (const float* A, const float* B, float* Y, __m256 b = _mm256_loadu_ps (B + idx); __m256 y = _mm256_loadu_ps (Y + idx); - // a = [ar0 ai0 ar1 ai1 ar2 ai2 ar3 ai3] - // b = [br0 bi0 br1 bi1 br2 bi2 br3 bi3] - - // separate real and imag for a and b const __m256 a_shuffled = _mm256_permute_ps (a, _MM_SHUFFLE (2, 3, 0, 1)); const __m256 b_shuffled = _mm256_permute_ps (b, _MM_SHUFFLE (2, 3, 0, 1)); - // real = ar*br - ai*bi __m256 realPart = _mm256_fmsub_ps (a, b, _mm256_mul_ps (a_shuffled, b_shuffled)); - - // imag = ar*bi + ai*br __m256 imagPart = _mm256_fmadd_ps (a, b_shuffled, _mm256_mul_ps (a_shuffled, b)); - // interleave real/imag back const __m256 interleaved = _mm256_blend_ps (realPart, imagPart, 0b10101010); y = _mm256_add_ps (y, interleaved); @@ -76,17 +68,12 @@ static void complexMultiplyAccumulate (const float* A, const float* B, float* Y, __m128 b = _mm_loadu_ps (B + idx); __m128 y = _mm_loadu_ps (Y + idx); - // separate real and imag for a and b const __m128 a_shuffled = _mm_shuffle_ps (a, a, _MM_SHUFFLE (2, 3, 0, 1)); const __m128 b_shuffled = _mm_shuffle_ps (b, b, _MM_SHUFFLE (2, 3, 0, 1)); - // real = ar*br - ai*bi __m128 realPart = _mm_sub_ps (_mm_mul_ps (a, b), _mm_mul_ps (a_shuffled, b_shuffled)); - - // imag = ar*bi + ai*br __m128 imagPart = _mm_add_ps (_mm_mul_ps (a, b_shuffled), _mm_mul_ps (a_shuffled, b)); - // interleave real/imag back const __m128 interleaved = _mm_unpacklo_ps (realPart, imagPart); y = _mm_add_ps (y, interleaved); @@ -94,38 +81,29 @@ static void complexMultiplyAccumulate (const float* A, const float* B, float* Y, } #elif YUP_USE_ARM_NEON - constexpr int simdWidth = 2; // NEON path: process 2 complex pairs (4 floats) at a time + constexpr int simdWidth = 4; for (; i <= complexPairs - simdWidth; i += simdWidth) { const int idx = i * 2; - float32x4_t a = vld1q_f32 (A + idx); // [ar0, ai0, ar1, ai1] - float32x4_t b = vld1q_f32 (B + idx); // [br0, bi0, br1, bi1] - float32x4_t y = vld1q_f32 (Y + idx); - - // Shuffle a and b to get swapped real/imag for cross-multiplication - float32x4_t a_shuf = vrev64q_f32 (a); // [ai0, ar0, ai1, ar1] - float32x4_t b_shuf = vrev64q_f32 (b); // [bi0, br0, bi1, br1] + float32x4x2_t a = vld2q_f32 (A + idx); + float32x4x2_t b = vld2q_f32 (B + idx); + float32x4x2_t y = vld2q_f32 (Y + idx); - // real = ar*br - ai*bi - float32x4_t realPart = vsubq_f32 (vmulq_f32 (a, b), vmulq_f32 (a_shuf, b_shuf)); + float32x4_t ar = a.val[0], ai = a.val[1]; + float32x4_t br = b.val[0], bi = b.val[1]; + float32x4_t yr = y.val[0], yi = y.val[1]; - // imag = ar*bi + ai*br - float32x4_t imagPart = vaddq_f32 (vmulq_f32 (a, b_shuf), vmulq_f32 (a_shuf, b)); + float32x4_t real = vmulq_f32 (ar, br); + real = vfmsq_f32 (real, ai, bi); + float32x4_t imag = vmulq_f32 (ar, bi); + imag = vfmaq_f32 (imag, ai, br); - // Interleave real and imag: [real0, imag0, real1, imag1] - float32x2_t realLow = vget_low_f32 (realPart); - float32x2_t imagLow = vget_low_f32 (imagPart); - float32x2x2_t zippedLow = vzip_f32 (realLow, imagLow); + yr = vaddq_f32 (yr, real); + yi = vaddq_f32 (yi, imag); - float32x2_t realHigh = vget_high_f32 (realPart); - float32x2_t imagHigh = vget_high_f32 (imagPart); - float32x2x2_t zippedHigh = vzip_f32 (realHigh, imagHigh); - - float32x4_t interleaved = vcombine_f32 (zippedLow.val[0], zippedHigh.val[0]); - - y = vaddq_f32 (y, interleaved); - vst1q_f32 (Y + idx, y); + float32x4x2_t out = { yr, yi }; + vst2q_f32 (Y + idx, out); // interleave back } #endif @@ -140,7 +118,6 @@ static void complexMultiplyAccumulate (const float* A, const float* B, float* Y, const float br = B[ri]; const float bi = B[ii]; - // (ar + j*ai) * (br + j*bi) = (ar*br - ai*bi) + j*(ar*bi + ai*br) Y[ri] += ar * br - ai * bi; Y[ii] += ar * bi + ai * br; } @@ -157,38 +134,38 @@ class PartitionedConvolver::DirectFIR { FloatVectorOperations::multiply (taps.data(), scaling, taps.size()); - tapsReversed_ = std::move (taps); - std::reverse (tapsReversed_.begin(), tapsReversed_.end()); + tapsReversed = std::move (taps); + std::reverse (tapsReversed.begin(), tapsReversed.end()); - numTaps_ = tapsReversed_.size(); - paddedLen_ = (numTaps_ + 3u) & ~3u; - tapsReversed_.resize (paddedLen_, 0.0f); + numTaps = tapsReversed.size(); + paddedLen = (numTaps + 3u) & ~3u; + tapsReversed.resize (paddedLen, 0.0f); - history_.assign (2 * numTaps_, 0.0f); - writeIndex_ = 0; + history.assign (2 * numTaps, 0.0f); + writeIndex = 0; } void reset() { - std::fill (history_.begin(), history_.end(), 0.0f); - writeIndex_ = 0; + std::fill (history.begin(), history.end(), 0.0f); + writeIndex = 0; } void process (const float* input, float* output, std::size_t numSamples) noexcept { - const std::size_t M = numTaps_; + const std::size_t M = numTaps; if (M == 0) return; - const float* h = tapsReversed_.data(); + const float* h = tapsReversed.data(); for (std::size_t i = 0; i < numSamples; ++i) { const float x = input[i]; - history_[writeIndex_] = x; - history_[writeIndex_ + M] = x; + history[writeIndex] = x; + history[writeIndex + M] = x; - const float* w = history_.data() + writeIndex_ + 1; + const float* w = history.data() + writeIndex + 1; float sum = 0.0f; @@ -200,14 +177,14 @@ class PartitionedConvolver::DirectFIR output[i] += sum; - if (++writeIndex_ == M) - writeIndex_ = 0; + if (++writeIndex == M) + writeIndex = 0; } } std::size_t getNumTaps() const { - return numTaps_; + return numTaps; } private: @@ -280,11 +257,11 @@ class PartitionedConvolver::DirectFIR return acc; } - std::vector tapsReversed_; - std::vector history_; - std::size_t numTaps_ = 0; - std::size_t paddedLen_ = 0; - std::size_t writeIndex_ = 0; + std::vector tapsReversed; + std::vector history; + std::size_t numTaps = 0; + std::size_t paddedLen = 0; + std::size_t writeIndex = 0; }; //============================================================================== @@ -298,41 +275,41 @@ class PartitionedConvolver::FFTLayer FFTLayer (FFTLayer&& other) = default; FFTLayer& operator= (FFTLayer&& other) = default; - void configure (int hopSize) + void configure (int newHopSize) { - hopSize_ = hopSize; - fftSize_ = hopSize * 2; + hopSize = newHopSize; + fftSize = hopSize * 2; - fftProcessor_.setSize (fftSize_); - fftProcessor_.setScaling (FFTProcessor::FFTScaling::asymmetric); + fftProcessor.setSize (fftSize); + fftProcessor.setScaling (FFTProcessor::FFTScaling::asymmetric); - overlapBuffer_.assign (static_cast (hopSize_), 0.0f); - timeBuffer_.assign (static_cast (fftSize_), 0.0f); - frequencyBuffer_.assign (static_cast (fftSize_) * 2, 0.0f); - tempBuffer_.assign (static_cast (fftSize_) * 2, 0.0f); // Must hold complex data for in-place FFT + overlapBuffer.assign (static_cast (hopSize), 0.0f); + timeBuffer.assign (static_cast (fftSize), 0.0f); + frequencyBuffer.assign (static_cast (fftSize) * 2, 0.0f); + tempBuffer.assign (static_cast (fftSize) * 2, 0.0f); // Must hold complex data for in-place FFT - fdlIndex_ = 0; - configured_ = true; + fdlIndex = 0; + configured = true; } - int getHopSize() const { return hopSize_; } + int getHopSize() const { return hopSize; } - int getFFTSize() const { return fftSize_; } + int getFFTSize() const { return fftSize; } - bool isConfigured() const { return configured_; } + bool isConfigured() const { return configured; } std::size_t setImpulseResponse (const float* impulseResponse, std::size_t length, float scaling) { - jassert (configured_); + jassert (configured); - if (fftSize_ <= 0 || hopSize_ <= 0) + if (fftSize <= 0 || hopSize <= 0) { resetState(); return 0; } - frequencyPartitions_.clear(); - frequencyDelayLine_.clear(); + frequencyPartitions.clear(); + frequencyDelayLine.clear(); if (length == 0 || impulseResponse == nullptr) { @@ -340,7 +317,7 @@ class PartitionedConvolver::FFTLayer return 0; } - const auto numPartitions = (length + static_cast (hopSize_) - 1) / static_cast (hopSize_); + const auto numPartitions = (length + static_cast (hopSize) - 1) / static_cast (hopSize); if (numPartitions == 0) { resetState(); @@ -348,33 +325,33 @@ class PartitionedConvolver::FFTLayer } std::size_t processedSamples = 0; - frequencyPartitions_.reserve (numPartitions); + frequencyPartitions.reserve (numPartitions); for (std::size_t p = 0; p < numPartitions; ++p) { std::vector partition; - partition.resize (static_cast (fftSize_) * 2); + partition.resize (static_cast (fftSize) * 2); - std::fill (tempBuffer_.begin(), tempBuffer_.end(), 0.0f); + std::fill (tempBuffer.begin(), tempBuffer.end(), 0.0f); - const std::size_t offset = p * static_cast (hopSize_); - const std::size_t copyCount = std::min (static_cast (hopSize_), length - offset); + const std::size_t offset = p * static_cast (hopSize); + const std::size_t copyCount = std::min (static_cast (hopSize), length - offset); if (copyCount > 0 && offset < length) { for (std::size_t i = 0; i < copyCount && offset + i < length; ++i) - tempBuffer_[i] = impulseResponse[offset + i] * scaling; + tempBuffer[i] = impulseResponse[offset + i] * scaling; } - fftProcessor_.performRealFFTForward (tempBuffer_.data(), partition.data()); + fftProcessor.performRealFFTForward (tempBuffer.data(), partition.data()); - frequencyPartitions_.push_back (std::move (partition)); + frequencyPartitions.push_back (std::move (partition)); processedSamples += copyCount; } - frequencyDelayLine_.assign (numPartitions, std::vector (static_cast (fftSize_) * 2, 0.0f)); - fdlIndex_ = 0; + frequencyDelayLine.assign (numPartitions, std::vector (static_cast (fftSize) * 2, 0.0f)); + fdlIndex = 0; resetState(); @@ -383,82 +360,82 @@ class PartitionedConvolver::FFTLayer void resetState() { - fdlIndex_ = 0; + fdlIndex = 0; - for (auto& partition : frequencyDelayLine_) + for (auto& partition : frequencyDelayLine) std::fill (partition.begin(), partition.end(), 0.0f); - std::fill (overlapBuffer_.begin(), overlapBuffer_.end(), 0.0f); - std::fill (timeBuffer_.begin(), timeBuffer_.end(), 0.0f); - std::fill (frequencyBuffer_.begin(), frequencyBuffer_.end(), 0.0f); + std::fill (overlapBuffer.begin(), overlapBuffer.end(), 0.0f); + std::fill (timeBuffer.begin(), timeBuffer.end(), 0.0f); + std::fill (frequencyBuffer.begin(), frequencyBuffer.end(), 0.0f); } void processHop (const float* inputHop, float* outputAccumulator) { - jassert (configured_); + jassert (configured); - if (frequencyPartitions_.empty()) + if (frequencyPartitions.empty()) return; // 1) Transform current input hop to frequency domain - FloatVectorOperations::copy (tempBuffer_.data(), inputHop, hopSize_); - fftProcessor_.performRealFFTForward (tempBuffer_.data(), tempBuffer_.data()); + FloatVectorOperations::copy (tempBuffer.data(), inputHop, hopSize); + fftProcessor.performRealFFTForward (tempBuffer.data(), tempBuffer.data()); // 2) Store in frequency delay line (circular buffer) - copy full complex buffer - fdlIndex_ = (fdlIndex_ == 0) ? static_cast (frequencyDelayLine_.size()) - 1 : fdlIndex_ - 1; - std::copy (tempBuffer_.begin(), tempBuffer_.begin() + (fftSize_ * 2), frequencyDelayLine_[static_cast (fdlIndex_)].begin()); + fdlIndex = (fdlIndex == 0) ? static_cast (frequencyDelayLine.size()) - 1 : fdlIndex - 1; + std::copy (tempBuffer.begin(), tempBuffer.begin() + (fftSize * 2), frequencyDelayLine[static_cast (fdlIndex)].begin()); // 3) Frequency domain convolution: Y = sum(X[k-p] * H[p]) - FloatVectorOperations::clear (frequencyBuffer_.data(), fftSize_ * 2); + FloatVectorOperations::clear (frequencyBuffer.data(), fftSize * 2); - int xIndex = fdlIndex_; - for (std::size_t p = 0; p < frequencyPartitions_.size(); ++p) + int xIndex = fdlIndex; + for (std::size_t p = 0; p < frequencyPartitions.size(); ++p) { - const float* X = frequencyDelayLine_[static_cast (xIndex)].data(); - const float* H = frequencyPartitions_[p].data(); + const float* X = frequencyDelayLine[static_cast (xIndex)].data(); + const float* H = frequencyPartitions[p].data(); // fftSize_/2 gives the number of complex pairs for real FFT - complexMultiplyAccumulate (X, H, frequencyBuffer_.data(), fftSize_ / 2); + complexMultiplyAccumulate (X, H, frequencyBuffer.data(), fftSize / 2); // Move to next older spectrum xIndex++; - if (xIndex >= static_cast (frequencyDelayLine_.size())) + if (xIndex >= static_cast (frequencyDelayLine.size())) xIndex = 0; } // 4) Inverse FFT back to time domain - fftProcessor_.performRealFFTInverse (frequencyBuffer_.data(), timeBuffer_.data()); + fftProcessor.performRealFFTInverse (frequencyBuffer.data(), timeBuffer.data()); - // 5) Overlap-Add: output first hopSize_ samples, store last hopSize_ as overlap - for (int i = 0; i < hopSize_; ++i) + // 5) Overlap-Add: output first hopSize samples, store last hopSize as overlap + for (int i = 0; i < hopSize; ++i) { - outputAccumulator[i] += timeBuffer_[i] + overlapBuffer_[i]; - overlapBuffer_[i] = timeBuffer_[i + hopSize_]; + outputAccumulator[i] += timeBuffer[i] + overlapBuffer[i]; + overlapBuffer[i] = timeBuffer[i + hopSize]; } } - bool hasImpulseResponse() const { return ! frequencyPartitions_.empty(); } + bool hasImpulseResponse() const { return ! frequencyPartitions.empty(); } private: - int hopSize_ = 0; - int fftSize_ = 0; + int hopSize = 0; + int fftSize = 0; - FFTProcessor fftProcessor_; + FFTProcessor fftProcessor; // IR partitions in frequency domain - std::vector> frequencyPartitions_; + std::vector> frequencyPartitions; - // Frequency Delay Line (most recent at fdlIndex_) - std::vector> frequencyDelayLine_; - int fdlIndex_ = 0; + // Frequency Delay Line (most recent at fdlIndex) + std::vector> frequencyDelayLine; + int fdlIndex = 0; // Processing buffers - std::vector overlapBuffer_; - std::vector timeBuffer_; - std::vector frequencyBuffer_; - std::vector tempBuffer_; + std::vector overlapBuffer; + std::vector timeBuffer; + std::vector frequencyBuffer; + std::vector tempBuffer; - bool configured_ = false; + bool configured = false; }; //============================================================================== @@ -470,23 +447,23 @@ class PartitionedConvolver::CircularBuffer void resize (std::size_t size) { - buffer_.resize (size); + buffer.resize (size); clear(); } void clear() { - std::fill (buffer_.begin(), buffer_.end(), 0.0f); - writeIndex_ = 0; - readIndex_ = 0; - availableForRead_ = 0; + std::fill (buffer.begin(), buffer.end(), 0.0f); + writeIndex = 0; + readIndex = 0; + availableForRead = 0; } - std::size_t getAvailableForRead() const { return availableForRead_; } + std::size_t getAvailableForRead() const { return availableForRead; } - std::size_t getAvailableForWrite() const { return buffer_.size() - availableForRead_; } + std::size_t getAvailableForWrite() const { return buffer.size() - availableForRead; } - std::size_t getSize() const { return buffer_.size(); } + std::size_t getSize() const { return buffer.size(); } void write (const float* data, std::size_t numSamples) { @@ -496,15 +473,15 @@ class PartitionedConvolver::CircularBuffer if (numSamples == 0) return; - const std::size_t beforeWrap = std::min (numSamples, buffer_.size() - writeIndex_); + const std::size_t beforeWrap = std::min (numSamples, buffer.size() - writeIndex); const std::size_t afterWrap = numSamples - beforeWrap; - std::copy (data, data + beforeWrap, buffer_.begin() + writeIndex_); + std::copy (data, data + beforeWrap, buffer.begin() + writeIndex); if (afterWrap > 0) - std::copy (data + beforeWrap, data + numSamples, buffer_.begin()); + std::copy (data + beforeWrap, data + numSamples, buffer.begin()); - writeIndex_ = (writeIndex_ + numSamples) % buffer_.size(); - availableForRead_ += numSamples; + writeIndex = (writeIndex + numSamples) % buffer.size(); + availableForRead += numSamples; } void read (float* data, std::size_t numSamples) @@ -515,15 +492,15 @@ class PartitionedConvolver::CircularBuffer if (numSamples == 0) return; - const std::size_t beforeWrap = std::min (numSamples, buffer_.size() - readIndex_); + const std::size_t beforeWrap = std::min (numSamples, buffer.size() - readIndex); const std::size_t afterWrap = numSamples - beforeWrap; - std::copy (buffer_.begin() + readIndex_, buffer_.begin() + readIndex_ + beforeWrap, data); + std::copy (buffer.begin() + readIndex, buffer.begin() + readIndex + beforeWrap, data); if (afterWrap > 0) - std::copy (buffer_.begin(), buffer_.begin() + afterWrap, data + beforeWrap); + std::copy (buffer.begin(), buffer.begin() + afterWrap, data + beforeWrap); - readIndex_ = (readIndex_ + numSamples) % buffer_.size(); - availableForRead_ -= numSamples; + readIndex = (readIndex + numSamples) % buffer.size(); + availableForRead -= numSamples; } void peek (float* data, std::size_t numSamples, std::size_t offset = 0) const @@ -534,13 +511,13 @@ class PartitionedConvolver::CircularBuffer if (numSamples == 0) return; - const std::size_t startIndex = (readIndex_ + offset) % buffer_.size(); - const std::size_t beforeWrap = std::min (numSamples, buffer_.size() - startIndex); + const std::size_t startIndex = (readIndex + offset) % buffer.size(); + const std::size_t beforeWrap = std::min (numSamples, buffer.size() - startIndex); const std::size_t afterWrap = numSamples - beforeWrap; - std::copy (buffer_.begin() + startIndex, buffer_.begin() + startIndex + beforeWrap, data); + std::copy (buffer.begin() + startIndex, buffer.begin() + startIndex + beforeWrap, data); if (afterWrap > 0) - std::copy (buffer_.begin(), buffer_.begin() + afterWrap, data + beforeWrap); + std::copy (buffer.begin(), buffer.begin() + afterWrap, data + beforeWrap); } void skip (std::size_t numSamples) @@ -548,15 +525,15 @@ class PartitionedConvolver::CircularBuffer jassert (numSamples <= getAvailableForRead()); numSamples = std::min (numSamples, getAvailableForRead()); - readIndex_ = (readIndex_ + numSamples) % buffer_.size(); - availableForRead_ -= numSamples; + readIndex = (readIndex + numSamples) % buffer.size(); + availableForRead -= numSamples; } private: - std::vector buffer_; - std::size_t writeIndex_ = 0; - std::size_t readIndex_ = 0; - std::size_t availableForRead_ = 0; + std::vector buffer; + std::size_t writeIndex = 0; + std::size_t readIndex = 0; + std::size_t availableForRead = 0; }; //============================================================================== @@ -567,96 +544,156 @@ class PartitionedConvolver::Impl Impl() = default; ~Impl() = default; - void configureLayers (std::size_t directFIRTaps, const std::vector& layers) + void configureLayers (std::size_t directFIRTaps, const std::vector& newLayers) { - directFIRTapCount_ = directFIRTaps; + directFIRTapCount = directFIRTaps; - layers_.clear(); - layers_.resize (layers.size()); + layers.clear(); + layers.resize (newLayers.size()); std::size_t maximumHopSize = 0; - baseHopSize_ = layers.empty() ? 0 : layers.front().hopSize; - for (std::size_t i = 0; i < layers.size(); ++i) + baseHopSize = newLayers.empty() ? 0 : newLayers.front().hopSize; + for (std::size_t i = 0; i < newLayers.size(); ++i) { - layers_[i].configure (layers[i].hopSize); + layers[i].configure (newLayers[i].hopSize); if (i == 0) - baseHopSize_ = layers[i].hopSize; + baseHopSize = newLayers[i].hopSize; else - baseHopSize_ = std::min (baseHopSize_, layers[i].hopSize); + baseHopSize = std::min (baseHopSize, newLayers[i].hopSize); - maximumHopSize = std::max (maximumHopSize, static_cast (layers[i].hopSize)); + maximumHopSize = std::max (maximumHopSize, static_cast (newLayers[i].hopSize)); } - maxHopSize_ = maximumHopSize; + maxHopSize = maximumHopSize; // Clear staging buffers - will be allocated in prepare() - inputStaging_.clear(); - outputStaging_.clear(); + inputStaging.clear(); + outputStaging.clear(); // Resize per-layer circular buffers - will be allocated in prepare() - layerInputBuffers_.resize (layers.size()); - layerOutputBuffers_.resize (layers.size()); + layerInputBuffers.resize (layers.size()); + layerOutputBuffers.resize (layers.size()); - layerTempOutput_.clear(); - tempLayerHop_.clear(); + layerTempOutput.clear(); + tempLayerHop.clear(); // Clear working buffers - will be allocated in prepare() - workingOutput_.clear(); + workingOutput.clear(); - isPrepared_ = false; + isPrepared = false; } void prepare (std::size_t maxBlockSize) { - maxBlockSize_ = maxBlockSize; + this->maxBlockSize = maxBlockSize; // Prepare main input staging - needs to accumulate up to baseHopSize samples plus incoming block - const std::size_t inputStagingSize = static_cast (baseHopSize_) + maxBlockSize; - inputStaging_.resize (inputStagingSize); - outputStaging_.assign (static_cast (baseHopSize_), 0.0f); + const std::size_t inputStagingSize = static_cast (baseHopSize) + maxBlockSize; + inputStaging.resize (inputStagingSize); + outputStaging.assign (static_cast (baseHopSize), 0.0f); // Prepare per-layer circular buffers with layer-specific sizing - for (std::size_t i = 0; i < layerInputBuffers_.size(); ++i) + for (std::size_t i = 0; i < layerInputBuffers.size(); ++i) { - const std::size_t layerHopSize = static_cast (layers_[i].getHopSize()); + const std::size_t layerHopSize = static_cast (layers[i].getHopSize()); // Input buffer: needs to accumulate up to layerHopSize samples plus incoming block const std::size_t layerInputBufferSize = layerHopSize + maxBlockSize; - layerInputBuffers_[i].resize (layerInputBufferSize); + layerInputBuffers[i].resize (layerInputBufferSize); // Output buffer: needs to handle bursts of layerHopSize samples // Size it to handle multiple hops since read rate (baseHopSize) may be much smaller than write rate (layerHopSize) - const std::size_t layerOutputBufferSize = layerHopSize * ((layerHopSize / static_cast (baseHopSize_)) + 2); - layerOutputBuffers_[i].resize (layerOutputBufferSize); + const std::size_t layerOutputBufferSize = layerHopSize * ((layerHopSize / static_cast (baseHopSize)) + 2); + layerOutputBuffers[i].resize (layerOutputBufferSize); } // Allocate temp buffers - if (maxHopSize_ > 0) + if (maxHopSize > 0) { - layerTempOutput_.resize (maxHopSize_); - tempLayerHop_.resize (maxHopSize_); + layerTempOutput.resize (maxHopSize); + tempLayerHop.resize (maxHopSize); } // Allocate working buffers - workingOutput_.resize (maxBlockSize); + workingOutput.resize (maxBlockSize); + + isPrepared = true; + } + + std::size_t trimSilenceFromEnd (const float* impulseResponse, std::size_t length, float thresholdDb) + { + if (impulseResponse == nullptr || length == 0) + return 0; + + const float threshold = std::pow (10.0f, thresholdDb / 20.0f); + + // For short IRs, use smaller window size and be more conservative + const std::size_t minRetainLength = std::max (std::size_t (32), length / 4); + const std::size_t windowSize = std::min (std::size_t (1024), std::max (std::size_t (64), length / 20)); + + // First pass: scan from end to find significant content + std::size_t significantContentEnd = 0; + for (std::size_t i = length; i > windowSize; i -= windowSize) + { + const std::size_t startIdx = i - windowSize; + const std::size_t endIdx = std::min (i, length); + const std::size_t samples = endIdx - startIdx; + + if (samples == 0) + continue; + + float rmsSquared = 0.0f; + for (std::size_t j = startIdx; j < endIdx; ++j) + rmsSquared += impulseResponse[j] * impulseResponse[j]; + + const float rms = std::sqrt (rmsSquared / static_cast (samples)); + if (rms >= threshold) + { + significantContentEnd = endIdx; + break; + } + } - isPrepared_ = true; + // If no significant content found, check the beginning more carefully + if (significantContentEnd == 0) + { + const std::size_t checkLength = std::min (minRetainLength, length); + float rmsSquared = 0.0f; + for (std::size_t j = 0; j < checkLength; ++j) + rmsSquared += impulseResponse[j] * impulseResponse[j]; + + const float rms = std::sqrt (rmsSquared / static_cast (checkLength)); + if (rms < threshold) + return 1; + } + + // Return the found significant content end, but respect minimum for short IRs + if (length <= 200) // Short IR protection + return std::max (significantContentEnd, minRetainLength); + else + return std::max (significantContentEnd, windowSize); } void setImpulseResponse (const float* impulseResponse, std::size_t length, const PartitionedConvolver::IRLoadOptions& options) { DirectFIR newFIR; - std::vector newLayers (layers_.size()); + std::vector newLayers (layers.size()); + + std::size_t trimmedLength = length; // Safety check - if (impulseResponse != nullptr && length > 0) + if (impulseResponse != nullptr && trimmedLength > 0) { + // Trim end silence if requested + if (options.trimEndSilenceBelowDb) + trimmedLength = trimSilenceFromEnd (impulseResponse, length, *options.trimEndSilenceBelowDb); + // Always apply peak headroom float headroomScale = std::pow (10.0f, options.headroomDb / 20.0f); if (options.normalize) { - const auto minMax = FloatVectorOperations::findMinAndMax (impulseResponse, length); + const auto minMax = FloatVectorOperations::findMinAndMax (impulseResponse, trimmedLength); const float peak = std::max (std::abs (minMax.getStart()), std::abs (minMax.getEnd())); if (peak > 0.0f) @@ -666,7 +703,7 @@ class PartitionedConvolver::Impl // Update DirectFIR in-place std::vector directTaps; - const auto directTapsCount = std::min (directFIRTapCount_, length); + const auto directTapsCount = std::min (directFIRTapCount, trimmedLength); if (directTapsCount > 0) { directTaps.reserve (directTapsCount); @@ -680,9 +717,9 @@ class PartitionedConvolver::Impl for (std::size_t i = 0; i < newLayers.size(); ++i) { auto& layer = newLayers[i]; - layer.configure (layers_[i].getHopSize()); + layer.configure (layers[i].getHopSize()); - const std::size_t remaining = (consumed < length) ? (length - consumed) : 0; + const std::size_t remaining = (consumed < trimmedLength) ? (trimmedLength - consumed) : 0; if (remaining == 0) { layer.setImpulseResponse (nullptr, 0, headroomScale); @@ -694,10 +731,10 @@ class PartitionedConvolver::Impl } { - SpinLock::ScopedLockType lock (processingLock_); + SpinLock::ScopedLockType lock (processingLock); - directFIR_ = std::move (newFIR); - layers_ = std::move (newLayers); + directFIR = std::move (newFIR); + layers = std::move (newLayers); resetStateUnsafe(); } @@ -705,7 +742,7 @@ class PartitionedConvolver::Impl void reset() { - SpinLock::ScopedLockType lock (processingLock_); + SpinLock::ScopedLockType lock (processingLock); resetStateUnsafe(); } @@ -715,7 +752,7 @@ class PartitionedConvolver::Impl if (numSamples == 0) return; - SpinLock::ScopedLockType lock (processingLock_); + SpinLock::ScopedLockType lock (processingLock); processUnsafe (input, output, numSamples); } @@ -723,36 +760,36 @@ class PartitionedConvolver::Impl private: void resetStateUnsafe() { - directFIR_.reset(); - inputStagingReadIndex_ = 0; - inputStagingWriteIndex_ = 0; - inputStagingAvailable_ = 0; - std::fill (outputStaging_.begin(), outputStaging_.end(), 0.0f); + directFIR.reset(); + inputStagingReadIndex = 0; + inputStagingWriteIndex = 0; + inputStagingAvailable = 0; + std::fill (outputStaging.begin(), outputStaging.end(), 0.0f); - for (auto& buffer : layerInputBuffers_) + for (auto& buffer : layerInputBuffers) buffer.clear(); - for (auto& buffer : layerOutputBuffers_) + for (auto& buffer : layerOutputBuffers) buffer.clear(); - for (auto& layer : layers_) + for (auto& layer : layers) layer.resetState(); } void processUnsafe (const float* input, float* output, std::size_t numSamples) { - jassert (isPrepared_); - jassert (numSamples <= maxBlockSize_); - if (! isPrepared_ || numSamples > maxBlockSize_) + jassert (isPrepared); + jassert (numSamples <= maxBlockSize); + if (! isPrepared || numSamples > maxBlockSize) return; - FloatVectorOperations::clear (workingOutput_.data(), numSamples); + FloatVectorOperations::clear (workingOutput.data(), numSamples); // Process direct FIR (no block size constraints) - directFIR_.process (input, workingOutput_.data(), numSamples); - if (layers_.empty()) + directFIR.process (input, workingOutput.data(), numSamples); + if (layers.empty()) { - FloatVectorOperations::add (output, workingOutput_.data(), numSamples); + FloatVectorOperations::add (output, workingOutput.data(), numSamples); return; } @@ -760,125 +797,125 @@ class PartitionedConvolver::Impl writeToInputStaging (input, numSamples); std::size_t outputSamplesProduced = 0; - while (getInputStagingAvailable() >= static_cast (baseHopSize_)) + while (getInputStagingAvailable() >= static_cast (baseHopSize)) { - const std::size_t hopSize = static_cast (baseHopSize_); + const std::size_t hopSize = static_cast (baseHopSize); // Read hop from input staging - readFromInputStaging (tempLayerHop_.data(), hopSize); - FloatVectorOperations::clear (outputStaging_.data(), outputStaging_.size()); + readFromInputStaging (tempLayerHop.data(), hopSize); + FloatVectorOperations::clear (outputStaging.data(), outputStaging.size()); - for (std::size_t layerIndex = 0; layerIndex < layers_.size(); ++layerIndex) + for (std::size_t layerIndex = 0; layerIndex < layers.size(); ++layerIndex) { - auto& layer = layers_[layerIndex]; + auto& layer = layers[layerIndex]; if (! layer.hasImpulseResponse()) continue; const int layerHopSize = layer.getHopSize(); - auto& inputBuffer = layerInputBuffers_[layerIndex]; - auto& outputBuffer = layerOutputBuffers_[layerIndex]; + auto& inputBuffer = layerInputBuffers[layerIndex]; + auto& outputBuffer = layerOutputBuffers[layerIndex]; // Write input hop to layer's input buffer - inputBuffer.write (tempLayerHop_.data(), hopSize); + inputBuffer.write (tempLayerHop.data(), hopSize); // Process complete layer hops while (inputBuffer.getAvailableForRead() >= static_cast (layerHopSize)) { // Read a full hop for this layer - inputBuffer.read (tempLayerHop_.data(), static_cast (layerHopSize)); - FloatVectorOperations::clear (layerTempOutput_.data(), layerHopSize); + inputBuffer.read (tempLayerHop.data(), static_cast (layerHopSize)); + FloatVectorOperations::clear (layerTempOutput.data(), layerHopSize); // Process hop - layer.processHop (tempLayerHop_.data(), layerTempOutput_.data()); + layer.processHop (tempLayerHop.data(), layerTempOutput.data()); // Write output to layer's output buffer - outputBuffer.write (layerTempOutput_.data(), static_cast (layerHopSize)); + outputBuffer.write (layerTempOutput.data(), static_cast (layerHopSize)); } // Mix available output from this layer if (outputBuffer.getAvailableForRead() >= hopSize) { - outputBuffer.read (layerTempOutput_.data(), hopSize); - FloatVectorOperations::add (outputStaging_.data(), layerTempOutput_.data(), hopSize); + outputBuffer.read (layerTempOutput.data(), hopSize); + FloatVectorOperations::add (outputStaging.data(), layerTempOutput.data(), hopSize); } } // Add staging output to main output const std::size_t samplesToWrite = std::min (hopSize, numSamples - outputSamplesProduced); - FloatVectorOperations::add (workingOutput_.data() + outputSamplesProduced, outputStaging_.data(), samplesToWrite); + FloatVectorOperations::add (workingOutput.data() + outputSamplesProduced, outputStaging.data(), samplesToWrite); outputSamplesProduced += samplesToWrite; } // Copy final result to output (accumulate) - FloatVectorOperations::add (output, workingOutput_.data(), numSamples); + FloatVectorOperations::add (output, workingOutput.data(), numSamples); } private: void writeToInputStaging (const float* data, std::size_t numSamples) { - const std::size_t available = inputStaging_.size() - inputStagingAvailable_; + const std::size_t available = inputStaging.size() - inputStagingAvailable; jassert (numSamples <= available); numSamples = std::min (numSamples, available); if (numSamples == 0) return; - const std::size_t beforeWrap = std::min (numSamples, inputStaging_.size() - inputStagingWriteIndex_); + const std::size_t beforeWrap = std::min (numSamples, inputStaging.size() - inputStagingWriteIndex); const std::size_t afterWrap = numSamples - beforeWrap; - std::copy (data, data + beforeWrap, inputStaging_.begin() + inputStagingWriteIndex_); + std::copy (data, data + beforeWrap, inputStaging.begin() + inputStagingWriteIndex); if (afterWrap > 0) - std::copy (data + beforeWrap, data + numSamples, inputStaging_.begin()); + std::copy (data + beforeWrap, data + numSamples, inputStaging.begin()); - inputStagingWriteIndex_ = (inputStagingWriteIndex_ + numSamples) % inputStaging_.size(); - inputStagingAvailable_ += numSamples; + inputStagingWriteIndex = (inputStagingWriteIndex + numSamples) % inputStaging.size(); + inputStagingAvailable += numSamples; } void readFromInputStaging (float* data, std::size_t numSamples) { - jassert (numSamples <= inputStagingAvailable_); - numSamples = std::min (numSamples, inputStagingAvailable_); + jassert (numSamples <= inputStagingAvailable); + numSamples = std::min (numSamples, inputStagingAvailable); if (numSamples == 0) return; - const std::size_t beforeWrap = std::min (numSamples, inputStaging_.size() - inputStagingReadIndex_); + const std::size_t beforeWrap = std::min (numSamples, inputStaging.size() - inputStagingReadIndex); const std::size_t afterWrap = numSamples - beforeWrap; - std::copy (inputStaging_.begin() + inputStagingReadIndex_, inputStaging_.begin() + inputStagingReadIndex_ + beforeWrap, data); + std::copy (inputStaging.begin() + inputStagingReadIndex, inputStaging.begin() + inputStagingReadIndex + beforeWrap, data); if (afterWrap > 0) - std::copy (inputStaging_.begin(), inputStaging_.begin() + afterWrap, data + beforeWrap); + std::copy (inputStaging.begin(), inputStaging.begin() + afterWrap, data + beforeWrap); - inputStagingReadIndex_ = (inputStagingReadIndex_ + numSamples) % inputStaging_.size(); - inputStagingAvailable_ -= numSamples; + inputStagingReadIndex = (inputStagingReadIndex + numSamples) % inputStaging.size(); + inputStagingAvailable -= numSamples; } - std::size_t getInputStagingAvailable() const { return inputStagingAvailable_; } + std::size_t getInputStagingAvailable() const { return inputStagingAvailable; } - std::size_t directFIRTapCount_ = 0; - int baseHopSize_ = 0; - std::size_t maxHopSize_ = 0; - std::size_t maxBlockSize_ = 0; - bool isPrepared_ = false; + std::size_t directFIRTapCount = 0; + int baseHopSize = 0; + std::size_t maxHopSize = 0; + std::size_t maxBlockSize = 0; + bool isPrepared = false; - DirectFIR directFIR_; - std::vector layers_; + DirectFIR directFIR; + std::vector layers; // Working buffers - std::vector workingOutput_; + std::vector workingOutput; // Input staging with circular buffer management - std::vector inputStaging_; - std::size_t inputStagingReadIndex_ = 0; - std::size_t inputStagingWriteIndex_ = 0; - std::size_t inputStagingAvailable_ = 0; - std::vector outputStaging_; + std::vector inputStaging; + std::size_t inputStagingReadIndex = 0; + std::size_t inputStagingWriteIndex = 0; + std::size_t inputStagingAvailable = 0; + std::vector outputStaging; // Per-layer circular buffering - std::vector layerInputBuffers_; - std::vector layerOutputBuffers_; - std::vector tempLayerHop_; - std::vector layerTempOutput_; + std::vector layerInputBuffers; + std::vector layerOutputBuffers; + std::vector tempLayerHop; + std::vector layerTempOutput; - mutable SpinLock processingLock_; + mutable SpinLock processingLock; }; //============================================================================== @@ -915,7 +952,12 @@ void PartitionedConvolver::setTypicalLayout (std::size_t directTaps, const std:: layerSpecs.reserve (hops.size()); for (int hop : hops) - layerSpecs.push_back ({ hop }); + { + if (hop < 64) + directTaps += static_cast (hop); + else + layerSpecs.push_back ({ nextPowerOfTwo (hop) }); + } configureLayers (directTaps, layerSpecs); } diff --git a/modules/yup_dsp/convolution/yup_PartitionedConvolver.h b/modules/yup_dsp/convolution/yup_PartitionedConvolver.h index 78f3cb176..b73f98fa8 100644 --- a/modules/yup_dsp/convolution/yup_PartitionedConvolver.h +++ b/modules/yup_dsp/convolution/yup_PartitionedConvolver.h @@ -109,6 +109,7 @@ class PartitionedConvolver bool normalize; float headroomDb; + std::optional trimEndSilenceBelowDb; }; /** diff --git a/modules/yup_dsp/yup_dsp.h b/modules/yup_dsp/yup_dsp.h index c89002c13..c42e03212 100644 --- a/modules/yup_dsp/yup_dsp.h +++ b/modules/yup_dsp/yup_dsp.h @@ -97,6 +97,7 @@ #include #include #include +#include #include //============================================================================== diff --git a/tests/yup_dsp/yup_PartitionedConvolver.cpp b/tests/yup_dsp/yup_PartitionedConvolver.cpp index c1ea2985f..b8a4f09b7 100644 --- a/tests/yup_dsp/yup_PartitionedConvolver.cpp +++ b/tests/yup_dsp/yup_PartitionedConvolver.cpp @@ -998,4 +998,285 @@ TEST_F (PartitionedConvolverTest, ResetFunctionality) } } +//============================================================================== +// IR Trimming Tests +//============================================================================== + +TEST_F (PartitionedConvolverTest, IRTrimmingBasicFunctionality) +{ + PartitionedConvolver convolver; + convolver.setTypicalLayout (64, { 64, 256 }); + convolver.prepare (512); + + // Create IR with significant content at start and silence at end + const size_t originalLength = 2000; + const size_t significantLength = 800; + std::vector ir (originalLength, 0.0f); + + // Fill first part with meaningful signal + for (size_t i = 0; i < significantLength; ++i) + { + ir[i] = std::exp (-static_cast (i) / 100.0f) * std::sin (2.0f * MathConstants::pi * i / 32.0f); + } + + // Add very quiet noise at the end (below -60dB) + for (size_t i = significantLength; i < originalLength; ++i) + { + ir[i] = randomFloat (-0.001f, 0.001f); // ~ -60dB + } + + // Test without trimming + convolver.setImpulseResponse (ir); + std::vector input (512, 0.0f); + input[0] = 1.0f; + std::vector outputWithoutTrim (512, 0.0f); + convolver.process (input.data(), outputWithoutTrim.data(), input.size()); + convolver.reset(); + + // Test with trimming at -50dB threshold + PartitionedConvolver::IRLoadOptions options; + options.trimEndSilenceBelowDb = -50.0f; + convolver.setImpulseResponse (ir, options); + + std::vector outputWithTrim (512, 0.0f); + convolver.process (input.data(), outputWithTrim.data(), input.size()); + + // Both should produce similar output in the early samples + float correlationSum = 0.0f; + float norm1 = 0.0f, norm2 = 0.0f; + + for (size_t i = 0; i < 200; ++i) // Compare first 200 samples + { + correlationSum += outputWithoutTrim[i] * outputWithTrim[i]; + norm1 += outputWithoutTrim[i] * outputWithoutTrim[i]; + norm2 += outputWithTrim[i] * outputWithTrim[i]; + } + + if (norm1 > 0.0f && norm2 > 0.0f) + { + float correlation = correlationSum / std::sqrt (norm1 * norm2); + EXPECT_GT (correlation, 0.95f) << "Trimmed and untrimmed outputs should be highly correlated in early samples"; + } +} + +TEST_F (PartitionedConvolverTest, IRTrimmingWithDifferentThresholds) +{ + PartitionedConvolver convolver; + convolver.setTypicalLayout (64, { 64, 256 }); + convolver.prepare (512); + + // Create IR with exponentially decaying tail + const size_t originalLength = 2000; + std::vector ir (originalLength); + + for (size_t i = 0; i < originalLength; ++i) + { + float decay = std::exp (-static_cast (i) / 200.0f); + ir[i] = decay * std::sin (2.0f * MathConstants::pi * i / 16.0f); + } + + std::vector thresholds = { -20.0f, -40.0f, -60.0f, -80.0f }; + std::vector outputEnergies; + + for (float threshold : thresholds) + { + PartitionedConvolver::IRLoadOptions options; + options.trimEndSilenceBelowDb = threshold; + convolver.setImpulseResponse (ir, options); + + std::vector input (512, 0.0f); + input[0] = 1.0f; + std::vector output (512, 0.0f); + convolver.process (input.data(), output.data(), input.size()); + + float energy = 0.0f; + for (float sample : output) + energy += sample * sample; + + outputEnergies.push_back (energy); + convolver.reset(); + } + + // More aggressive trimming should result in less energy + for (size_t i = 1; i < outputEnergies.size(); ++i) + { + EXPECT_LE (outputEnergies[i], outputEnergies[i - 1] * 1.1f) + << "More aggressive trimming threshold should not significantly increase output energy"; + } +} + +TEST_F (PartitionedConvolverTest, IRTrimmingVeryShortIR) +{ + PartitionedConvolver convolver; + convolver.setTypicalLayout (64, { 64, 256 }); + convolver.prepare (512); + + // Very short IR that shouldn't be trimmed much + std::vector shortIR (100); + for (size_t i = 0; i < shortIR.size(); ++i) + { + shortIR[i] = std::sin (2.0f * MathConstants::pi * i / 8.0f); + } + + PartitionedConvolver::IRLoadOptions options; + options.trimEndSilenceBelowDb = -40.0f; + + // Should not crash or produce errors with short IR + EXPECT_NO_THROW (convolver.setImpulseResponse (shortIR, options)); + + std::vector input (512, 0.0f); + input[0] = 1.0f; + std::vector output (512, 0.0f); + + EXPECT_NO_THROW (convolver.process (input.data(), output.data(), input.size())); + + // Should still produce meaningful output + float outputRMS = calculateRMS (output); + EXPECT_GT (outputRMS, 0.01f); +} + +TEST_F (PartitionedConvolverTest, IRTrimmingAllSilence) +{ + PartitionedConvolver convolver; + convolver.setTypicalLayout (64, { 64, 256 }); + convolver.prepare (512); + + // IR with only very quiet content + std::vector quietIR (1000); + for (size_t i = 0; i < quietIR.size(); ++i) + { + quietIR[i] = randomFloat (-0.0001f, 0.0001f); // Very quiet, ~ -80dB + } + + PartitionedConvolver::IRLoadOptions options; + options.normalize = false; // Don't normalize the quiet IR + options.trimEndSilenceBelowDb = -60.0f; // Should trim most/all of it + + EXPECT_NO_THROW (convolver.setImpulseResponse (quietIR, options)); + + std::vector input (512); + fillWithRandomData (input); + std::vector output (512, 0.0f); + + EXPECT_NO_THROW (convolver.process (input.data(), output.data(), input.size())); + + // Output should be very quiet or silent + float outputRMS = calculateRMS (output); + EXPECT_LT (outputRMS, 0.001f); // Should be very quiet with normalized disabled and aggressive trimming +} + +TEST_F (PartitionedConvolverTest, IRTrimmingWithNormalization) +{ + PartitionedConvolver convolver; + convolver.setTypicalLayout (64, { 64, 256 }); + convolver.prepare (512); + + // Create IR with large peak but quiet tail + std::vector ir (1500); + for (size_t i = 0; i < ir.size(); ++i) + { + if (i < 100) + ir[i] = 2.0f * std::exp (-static_cast (i) / 50.0f); // Large peak + else + ir[i] = 0.01f * randomFloat (-0.1f, 0.1f); // Quiet tail + } + + PartitionedConvolver::IRLoadOptions options; + options.normalize = true; + options.headroomDb = -6.0f; + options.trimEndSilenceBelowDb = -50.0f; + + EXPECT_NO_THROW (convolver.setImpulseResponse (ir, options)); + + std::vector input (512, 0.0f); + input[0] = 1.0f; + std::vector output (512, 0.0f); + + EXPECT_NO_THROW (convolver.process (input.data(), output.data(), input.size())); + + // Should produce reasonable output levels due to normalization + float outputPeak = findPeak (output); + EXPECT_GT (outputPeak, 0.1f); + EXPECT_LT (outputPeak, 1.0f); // Should be limited by headroom +} + +TEST_F (PartitionedConvolverTest, IRTrimmingExactBoundary) +{ + PartitionedConvolver convolver; + convolver.setTypicalLayout (64, { 64, 256 }); + convolver.prepare (512); + + // Create IR that drops exactly to threshold + const size_t significantLength = 1000; + const size_t totalLength = 1500; + std::vector ir (totalLength, 0.0f); + + // Significant content + for (size_t i = 0; i < significantLength; ++i) + { + ir[i] = std::exp (-static_cast (i) / 200.0f); + } + + // Content right at threshold level (-50dB = 0.00316) + const float thresholdLevel = std::pow (10.0f, -50.0f / 20.0f); + for (size_t i = significantLength; i < totalLength; ++i) + { + ir[i] = thresholdLevel * 0.9f; // Slightly below threshold + } + + PartitionedConvolver::IRLoadOptions options; + options.trimEndSilenceBelowDb = -50.0f; + + EXPECT_NO_THROW (convolver.setImpulseResponse (ir, options)); + + std::vector input (512, 0.0f); + input[0] = 1.0f; + std::vector output (512, 0.0f); + + EXPECT_NO_THROW (convolver.process (input.data(), output.data(), input.size())); + + // Should work correctly at boundary conditions + float outputRMS = calculateRMS (output); + EXPECT_GT (outputRMS, 0.001f); +} + +TEST_F (PartitionedConvolverTest, IRTrimmingConsistency) +{ + // Test that trimming produces consistent results across multiple calls + PartitionedConvolver convolver1, convolver2; + convolver1.setTypicalLayout (64, { 64, 256 }); + convolver1.prepare (512); + convolver2.setTypicalLayout (64, { 64, 256 }); + convolver2.prepare (512); + + std::vector ir (1000); + fillWithRandomData (ir); + // Add quiet tail + for (size_t i = 600; i < ir.size(); ++i) + { + ir[i] *= 0.001f; // Make very quiet + } + + PartitionedConvolver::IRLoadOptions options; + options.trimEndSilenceBelowDb = -50.0f; + + // Set same IR with trimming on both convolvers + convolver1.setImpulseResponse (ir, options); + convolver2.setImpulseResponse (ir, options); + + std::vector input (512); + fillWithRandomData (input); + std::vector output1 (512, 0.0f); + std::vector output2 (512, 0.0f); + + convolver1.process (input.data(), output1.data(), input.size()); + convolver2.process (input.data(), output2.data(), input.size()); + + // Both should produce identical results + for (size_t i = 0; i < output1.size(); ++i) + { + EXPECT_NEAR (output1[i], output2[i], 0.0001f) << "Inconsistent trimming results at sample " << i; + } +} + } // namespace yup::test \ No newline at end of file From 35c4e5c2c678f610696b1d27aa3f7158e7981211 Mon Sep 17 00:00:00 2001 From: kunitoki Date: Thu, 11 Sep 2025 22:58:14 +0200 Subject: [PATCH 32/37] Moved the DirectFIR outside of the PartitionedConvolution --- .../source/examples/ConvolutionDemo.h | 17 +- examples/graphics/source/main.cpp | 17 +- .../convolution/yup_PartitionedConvolver.cpp | 196 ++---- .../convolution/yup_PartitionedConvolver.h | 16 +- modules/yup_dsp/filters/yup_DirectFIR.cpp | 227 +++++++ modules/yup_dsp/filters/yup_DirectFIR.h | 158 +++++ modules/yup_dsp/yup_dsp.cpp | 1 + modules/yup_dsp/yup_dsp.h | 1 + tests/yup_dsp/yup_DirectFIR.cpp | 585 ++++++++++++++++++ tests/yup_dsp/yup_PartitionedConvolver.cpp | 24 +- 10 files changed, 1050 insertions(+), 192 deletions(-) create mode 100644 modules/yup_dsp/filters/yup_DirectFIR.cpp create mode 100644 modules/yup_dsp/filters/yup_DirectFIR.h create mode 100644 tests/yup_dsp/yup_DirectFIR.cpp diff --git a/examples/graphics/source/examples/ConvolutionDemo.h b/examples/graphics/source/examples/ConvolutionDemo.h index 4253d8973..221cb37cb 100644 --- a/examples/graphics/source/examples/ConvolutionDemo.h +++ b/examples/graphics/source/examples/ConvolutionDemo.h @@ -294,13 +294,15 @@ class ConvolutionDemo // Set impulse response in convolver yup::PartitionedConvolver::IRLoadOptions loadOptions; - loadOptions.trimEndSilenceBelowDb = -60.0f; + loadOptions.trimEndSilenceBelowDb = -36.0f; convolver.setImpulseResponse (impulseResponseData, loadOptions); + impulseLength = static_cast (convolver.getImpulseLength()); hasImpulseResponse = true; std::cout << "Loaded impulse response: " << file.getFileName() << std::endl; std::cout << "Sample rate: " << reader->sampleRate << " Hz" << std::endl; std::cout << "Length: " << reader->lengthInSamples << " samples" << std::endl; + std::cout << "Effective Length: " << impulseLength << " samples" << std::endl; // Update UI updateIRInfo (file.getFileName()); @@ -409,6 +411,8 @@ class ConvolutionDemo if (impulseResponseData.empty()) return; + const size_t length = static_cast (impulseLength); + // Always apply peak headroom float headroomScale = std::pow (10.0f, -12.0f / 20.0f); const auto minMax = yup::FloatVectorOperations::findMinAndMax (impulseResponseData.data(), impulseResponseData.size()); @@ -417,8 +421,8 @@ class ConvolutionDemo headroomScale /= peak; // Create waveform data points - const size_t numPoints = std::min (static_cast (2048), impulseResponseData.size()); - const size_t stride = impulseResponseData.size() / numPoints; + const size_t numPoints = std::min (static_cast (getWidth()), length); + const size_t stride = length / numPoints; std::vector> waveformData; waveformData.reserve (numPoints); @@ -426,8 +430,8 @@ class ConvolutionDemo for (size_t i = 0; i < numPoints; ++i) { size_t sampleIndex = i * stride; - if (sampleIndex >= impulseResponseData.size()) - sampleIndex = impulseResponseData.size() - 1; + if (sampleIndex >= length) + sampleIndex = length - 1; double normalizedTime = static_cast (i) / static_cast (numPoints - 1); double amplitude = static_cast (impulseResponseData[sampleIndex] * headroomScale); @@ -439,7 +443,7 @@ class ConvolutionDemo irWaveformDisplay.updateSignalData (waveformSignalIndex, waveformData); // Update X axis range to show time - double lengthInSeconds = static_cast (impulseResponseData.size()) / 44100.0; // Assume 44.1kHz + double lengthInSeconds = static_cast (length) / 44100.0; // Assume 44.1kHz irWaveformDisplay.setXRange (0.0, lengthInSeconds); irWaveformDisplay.setVerticalGridLines ({ 0.0, lengthInSeconds }); @@ -463,6 +467,7 @@ class ConvolutionDemo yup::AudioBuffer impulseResponseBuffer; std::vector impulseResponseData; int readPosition = 0; + int impulseLength = 0; std::atomic hasImpulseResponse = false; // Processing diff --git a/examples/graphics/source/main.cpp b/examples/graphics/source/main.cpp index f6d4d6084..65b6b648b 100644 --- a/examples/graphics/source/main.cpp +++ b/examples/graphics/source/main.cpp @@ -309,18 +309,23 @@ struct Application : yup::YUPApplication yup::Logger::outputDebugString ("Starting app " + commandLineParameters); - window = std::make_unique(); + yup::MessageManager::callAsync ([this] + { + yup::Process::makeForegroundProcess(); + + window = std::make_unique(); #if YUP_IOS - window->centreWithSize ({ 320, 480 }); + window->centreWithSize ({ 320, 480 }); #elif YUP_ANDROID - window->centreWithSize ({ 1080, 2400 }); - // window->setFullScreen(true); + window->centreWithSize ({ 1080, 2400 }); + // window->setFullScreen(true); #else - window->centreWithSize ({ 600, 800 }); + window->centreWithSize ({ 600, 800 }); #endif - window->setVisible (true); + window->setVisible (true); + }); } void shutdown() override diff --git a/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp index 133d27335..566ba72cf 100644 --- a/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp +++ b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp @@ -32,7 +32,7 @@ namespace yup @param Y pointer to output complex array (accumulated) @param complexPairs number of complex pairs (not number of floats!) */ -static void complexMultiplyAccumulate (const float* A, const float* B, float* Y, int complexPairs) noexcept +static void complexMultiplyAccumulate (const float* __restrict A, const float* __restrict B, float* __restrict Y, int complexPairs) noexcept { int i = 0; @@ -125,147 +125,6 @@ static void complexMultiplyAccumulate (const float* A, const float* B, float* Y, //============================================================================== -class PartitionedConvolver::DirectFIR -{ -public: - DirectFIR() = default; - - void setTaps (std::vector taps, float scaling) - { - FloatVectorOperations::multiply (taps.data(), scaling, taps.size()); - - tapsReversed = std::move (taps); - std::reverse (tapsReversed.begin(), tapsReversed.end()); - - numTaps = tapsReversed.size(); - paddedLen = (numTaps + 3u) & ~3u; - tapsReversed.resize (paddedLen, 0.0f); - - history.assign (2 * numTaps, 0.0f); - writeIndex = 0; - } - - void reset() - { - std::fill (history.begin(), history.end(), 0.0f); - writeIndex = 0; - } - - void process (const float* input, float* output, std::size_t numSamples) noexcept - { - const std::size_t M = numTaps; - if (M == 0) - return; - - const float* h = tapsReversed.data(); - for (std::size_t i = 0; i < numSamples; ++i) - { - const float x = input[i]; - - history[writeIndex] = x; - history[writeIndex + M] = x; - - const float* w = history.data() + writeIndex + 1; - - float sum = 0.0f; - -#if YUP_ENABLE_VDSP - vDSP_dotpr (w, 1, h, 1, &sum, M); -#else - sum = dotProduct (w, h, M); -#endif - - output[i] += sum; - - if (++writeIndex == M) - writeIndex = 0; - } - } - - std::size_t getNumTaps() const - { - return numTaps; - } - -private: - static float dotProduct (const float* __restrict a, const float* __restrict b, std::size_t len) noexcept - { - float acc = 0.0f; - std::size_t i = 0; - -#if YUP_USE_AVX_INTRINSICS && YUP_USE_FMA_INTRINSICS - __m256 vacc = _mm256_setzero_ps(); - for (; i + 8 <= len; i += 8) - { - __m256 va = _mm256_loadu_ps (a + i); - __m256 vb = _mm256_loadu_ps (b + i); - vacc = _mm256_fmadd_ps (va, vb, vacc); - } - __m128 low = _mm256_castps256_ps128 (vacc); - __m128 high = _mm256_extractf128_ps (vacc, 1); - __m128 vsum = _mm_add_ps (low, high); - vsum = _mm_hadd_ps (vsum, vsum); - vsum = _mm_hadd_ps (vsum, vsum); - acc += _mm_cvtss_f32 (vsum); - -#elif YUP_USE_SSE_INTRINSICS - __m128 vacc = _mm_setzero_ps(); -#if YUP_USE_FMA_INTRINSICS - for (; i + 4 <= len; i += 4) - { - __m128 va = _mm_loadu_ps (a + i); - __m128 vb = _mm_loadu_ps (b + i); - vacc = _mm_fmadd_ps (va, vb, vacc); - } -#else - for (; i + 4 <= len; i += 4) - { - __m128 va = _mm_loadu_ps (a + i); - __m128 vb = _mm_loadu_ps (b + i); - vacc = _mm_add_ps (vacc, _mm_mul_ps (va, vb)); - } -#endif - __m128 shuf = _mm_shuffle_ps (vacc, vacc, _MM_SHUFFLE (2, 3, 0, 1)); - __m128 sums = _mm_add_ps (vacc, shuf); - shuf = _mm_movehl_ps (shuf, sums); - sums = _mm_add_ss (sums, shuf); - acc += _mm_cvtss_f32 (sums); - -#elif YUP_USE_ARM_NEON - float32x4_t vacc = vdupq_n_f32 (0.0f); - for (; i + 4 <= len; i += 4) - { - float32x4_t va = vld1q_f32 (a + i); - float32x4_t vb = vld1q_f32 (b + i); - vacc = vmlaq_f32 (vacc, va, vb); - } -#if YUP_64BIT - acc += vaddvq_f32 (vacc); -#else - float32x2_t vlow = vget_low_f32 (vacc); - float32x2_t vhigh = vget_high_f32 (vacc); - float32x2_t vsum2 = vpadd_f32 (vlow, vhigh); - vsum2 = vpadd_f32 (vsum2, vsum2); - acc += vget_lane_f32 (vsum2, 0); -#endif - -#endif - - for (; i < len; ++i) - acc += a[i] * b[i]; - - return acc; - } - - std::vector tapsReversed; - std::vector history; - std::size_t numTaps = 0; - std::size_t paddedLen = 0; - std::size_t writeIndex = 0; -}; - -//============================================================================== - class PartitionedConvolver::FFTLayer { public: @@ -544,9 +403,9 @@ class PartitionedConvolver::Impl Impl() = default; ~Impl() = default; - void configureLayers (std::size_t directFIRTaps, const std::vector& newLayers) + void configureLayers (std::size_t directFIRCoefficients, const std::vector& newLayers) { - directFIRTapCount = directFIRTaps; + directFIRCoefficientCount = directFIRCoefficients; layers.clear(); layers.resize (newLayers.size()); @@ -659,6 +518,7 @@ class PartitionedConvolver::Impl if (significantContentEnd == 0) { const std::size_t checkLength = std::min (minRetainLength, length); + float rmsSquared = 0.0f; for (std::size_t j = 0; j < checkLength; ++j) rmsSquared += impulseResponse[j] * impulseResponse[j]; @@ -685,12 +545,10 @@ class PartitionedConvolver::Impl // Safety check if (impulseResponse != nullptr && trimmedLength > 0) { - // Trim end silence if requested - if (options.trimEndSilenceBelowDb) - trimmedLength = trimSilenceFromEnd (impulseResponse, length, *options.trimEndSilenceBelowDb); - // Always apply peak headroom float headroomScale = std::pow (10.0f, options.headroomDb / 20.0f); + + // Normalize peaks if (options.normalize) { const auto minMax = FloatVectorOperations::findMinAndMax (impulseResponse, trimmedLength); @@ -700,20 +558,24 @@ class PartitionedConvolver::Impl headroomScale /= peak; } + // Trim end silence if requested + if (options.trimEndSilenceBelowDb) + trimmedLength = trimSilenceFromEnd (impulseResponse, length, *options.trimEndSilenceBelowDb); + // Update DirectFIR in-place - std::vector directTaps; + std::vector directCoefficients; - const auto directTapsCount = std::min (directFIRTapCount, trimmedLength); - if (directTapsCount > 0) + const auto directCoefficientsCount = std::min (directFIRCoefficientCount, trimmedLength); + if (directCoefficientsCount > 0) { - directTaps.reserve (directTapsCount); - directTaps.assign (impulseResponse, impulseResponse + directTapsCount); + directCoefficients.reserve (directCoefficientsCount); + directCoefficients.assign (impulseResponse, impulseResponse + directCoefficientsCount); } - newFIR.setTaps (std::move (directTaps), headroomScale); + newFIR.setCoefficients (std::move (directCoefficients), headroomScale); // Update FFT layers - std::size_t consumed = directTapsCount; + std::size_t consumed = directCoefficientsCount; for (std::size_t i = 0; i < newLayers.size(); ++i) { auto& layer = newLayers[i]; @@ -735,11 +597,17 @@ class PartitionedConvolver::Impl directFIR = std::move (newFIR); layers = std::move (newLayers); + finalImpulseLength = trimmedLength; resetStateUnsafe(); } } + std::size_t getImpulseLength() const + { + return finalImpulseLength; + } + void reset() { SpinLock::ScopedLockType lock (processingLock); @@ -890,10 +758,11 @@ class PartitionedConvolver::Impl std::size_t getInputStagingAvailable() const { return inputStagingAvailable; } - std::size_t directFIRTapCount = 0; + std::size_t directFIRCoefficientCount = 0; int baseHopSize = 0; std::size_t maxHopSize = 0; std::size_t maxBlockSize = 0; + std::size_t finalImpulseLength = 0; bool isPrepared = false; DirectFIR directFIR; @@ -941,12 +810,12 @@ PartitionedConvolver& PartitionedConvolver::operator= (PartitionedConvolver&& ot return *this; } -void PartitionedConvolver::configureLayers (std::size_t directFIRTaps, const std::vector& layers) +void PartitionedConvolver::configureLayers (std::size_t directFIRCoefficients, const std::vector& layers) { - pImpl->configureLayers (directFIRTaps, layers); + pImpl->configureLayers (directFIRCoefficients, layers); } -void PartitionedConvolver::setTypicalLayout (std::size_t directTaps, const std::vector& hops) +void PartitionedConvolver::setTypicalLayout (std::size_t directCoefficients, const std::vector& hops) { std::vector layerSpecs; layerSpecs.reserve (hops.size()); @@ -954,12 +823,12 @@ void PartitionedConvolver::setTypicalLayout (std::size_t directTaps, const std:: for (int hop : hops) { if (hop < 64) - directTaps += static_cast (hop); + directCoefficients += static_cast (hop); else layerSpecs.push_back ({ nextPowerOfTwo (hop) }); } - configureLayers (directTaps, layerSpecs); + configureLayers (directCoefficients, layerSpecs); } void PartitionedConvolver::setImpulseResponse (const float* impulseResponse, std::size_t length, const IRLoadOptions& options) @@ -972,6 +841,11 @@ void PartitionedConvolver::setImpulseResponse (const std::vector& impulse setImpulseResponse (impulseResponse.data(), impulseResponse.size(), options); } +std::size_t PartitionedConvolver::getImpulseLength() const +{ + return pImpl->getImpulseLength(); +} + void PartitionedConvolver::prepare (std::size_t maxBlockSize) { pImpl->prepare (maxBlockSize); diff --git a/modules/yup_dsp/convolution/yup_PartitionedConvolver.h b/modules/yup_dsp/convolution/yup_PartitionedConvolver.h index b73f98fa8..f319bc217 100644 --- a/modules/yup_dsp/convolution/yup_PartitionedConvolver.h +++ b/modules/yup_dsp/convolution/yup_PartitionedConvolver.h @@ -29,7 +29,7 @@ namespace yup Layered partitioned convolution engine optimized for real-time audio processing. Combines multiple processing strategies for efficient convolution: - - Direct FIR computation for early taps (low latency) + - Direct FIR computation for early coefficients (low latency) - One or more FFT-based Overlap-Add layers with uniform partitioning per layer The engine uses YUP's FFTProcessor for real FFT operations and supports: @@ -41,7 +41,7 @@ namespace yup @code PartitionedConvolver convolver; - // Configure layers: 256 direct taps + FFT layers with hops 256, 1024, 4096 + // Configure layers: 256 direct coefficients + FFT layers with hops 256, 1024, 4096 convolver.setTypicalLayout(256, {256, 1024, 4096}); // Prepare for processing with maximum block size (must be called before process) @@ -83,19 +83,19 @@ class PartitionedConvolver /** Configure the convolution layers before setting the impulse response. - @param directFIRTaps Number of early taps to process with direct FIR (for low latency) + @param directFIRCoefficients Number of early coefficients to process with direct FIR (for low latency) @param layers Vector of layer specifications with increasing hop sizes (e.g., {{256}, {1024}, {4096}} for 256→1024→4096 progression) */ - void configureLayers (std::size_t directFIRTaps, const std::vector& layers); + void configureLayers (std::size_t directFIRCoefficients, const std::vector& layers); /** Convenience method to set a typical late-reverb configuration. - @param directTaps Number of direct FIR taps for early reflections + @param directCoefficients Number of direct FIR coefficients for early reflections @param hops Vector of hop sizes for FFT layers (geometrically increasing recommended) */ - void setTypicalLayout (std::size_t directTaps, const std::vector& hops); + void setTypicalLayout (std::size_t directCoefficients, const std::vector& hops); //============================================================================== /** Impulse response loading options. */ @@ -130,6 +130,9 @@ class PartitionedConvolver */ void setImpulseResponse (const std::vector& impulseResponse, const IRLoadOptions& options = {}); + /** Returns the length of the impulse in samples, taking into account trimmed silence samples. */ + std::size_t getImpulseLength() const; + //============================================================================== /** Prepare the convolver for processing with a specific maximum block size. @@ -162,7 +165,6 @@ class PartitionedConvolver private: //============================================================================== - class DirectFIR; class FFTLayer; class CircularBuffer; class Impl; diff --git a/modules/yup_dsp/filters/yup_DirectFIR.cpp b/modules/yup_dsp/filters/yup_DirectFIR.cpp new file mode 100644 index 000000000..1c5d34da9 --- /dev/null +++ b/modules/yup_dsp/filters/yup_DirectFIR.cpp @@ -0,0 +1,227 @@ +/* + ============================================================================== + + This file is part of the YUP library. + Copyright (c) 2025 - kunitoki@gmail.com + + YUP is an open source library subject to open-source licensing. + + The code included in this file is provided under the terms of the ISC license + http://www.isc.org/downloads/software-support-policy/isc-license. Permission + to use, copy, modify, and/or distribute this software for any purpose with or + without fee is hereby granted provided that the above copyright notice and + this permission notice appear in all copies. + + YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER + EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE + DISCLAIMED. + + ============================================================================== +*/ + +namespace yup +{ + +namespace +{ + +//============================================================================== + +float dotProduct (const float* __restrict a, const float* __restrict b, std::size_t len) noexcept +{ + float acc = 0.0f; + std::size_t i = 0; + +#if YUP_USE_AVX_INTRINSICS && YUP_USE_FMA_INTRINSICS + __m256 vacc = _mm256_setzero_ps(); + for (; i + 8 <= len; i += 8) + { + __m256 va = _mm256_loadu_ps (a + i); + __m256 vb = _mm256_loadu_ps (b + i); + vacc = _mm256_fmadd_ps (va, vb, vacc); + } + __m128 low = _mm256_castps256_ps128 (vacc); + __m128 high = _mm256_extractf128_ps (vacc, 1); + __m128 vsum = _mm_add_ps (low, high); + vsum = _mm_hadd_ps (vsum, vsum); + vsum = _mm_hadd_ps (vsum, vsum); + acc += _mm_cvtss_f32 (vsum); + +#elif YUP_USE_SSE_INTRINSICS + __m128 vacc = _mm_setzero_ps(); +#if YUP_USE_FMA_INTRINSICS + for (; i + 4 <= len; i += 4) + { + __m128 va = _mm_loadu_ps (a + i); + __m128 vb = _mm_loadu_ps (b + i); + vacc = _mm_fmadd_ps (va, vb, vacc); + } +#else + for (; i + 4 <= len; i += 4) + { + __m128 va = _mm_loadu_ps (a + i); + __m128 vb = _mm_loadu_ps (b + i); + vacc = _mm_add_ps (vacc, _mm_mul_ps (va, vb)); + } +#endif + __m128 shuf = _mm_shuffle_ps (vacc, vacc, _MM_SHUFFLE (2, 3, 0, 1)); + __m128 sums = _mm_add_ps (vacc, shuf); + shuf = _mm_movehl_ps (shuf, sums); + sums = _mm_add_ss (sums, shuf); + acc += _mm_cvtss_f32 (sums); + +#elif YUP_USE_ARM_NEON + float32x4_t vacc = vdupq_n_f32 (0.0f); + for (; i + 4 <= len; i += 4) + { + float32x4_t va = vld1q_f32 (a + i); + float32x4_t vb = vld1q_f32 (b + i); + vacc = vmlaq_f32 (vacc, va, vb); + } +#if YUP_64BIT + acc += vaddvq_f32 (vacc); +#else + float32x2_t vlow = vget_low_f32 (vacc); + float32x2_t vhigh = vget_high_f32 (vacc); + float32x2_t vsum2 = vpadd_f32 (vlow, vhigh); + vsum2 = vpadd_f32 (vsum2, vsum2); + acc += vget_lane_f32 (vsum2, 0); +#endif + +#endif + + // Handle remaining samples + for (; i < len; ++i) + acc += a[i] * b[i]; + + return acc; +} + +} // namespace + +//============================================================================== + +DirectFIR::DirectFIR() = default; + +DirectFIR::~DirectFIR() = default; + +DirectFIR::DirectFIR (DirectFIR&& other) noexcept + : coefficientsReversed (std::move (other.coefficientsReversed)) + , history (std::move (other.history)) + , numCoefficients (std::exchange (other.numCoefficients, 0)) + , paddedLen (std::exchange (other.paddedLen, 0)) + , writeIndex (std::exchange (other.writeIndex, 0)) + , currentScaling (std::exchange (other.currentScaling, 1.0f)) +{ +} + +DirectFIR& DirectFIR::operator= (DirectFIR&& other) noexcept +{ + if (this != &other) + { + coefficientsReversed = std::move (other.coefficientsReversed); + history = std::move (other.history); + numCoefficients = std::exchange (other.numCoefficients, 0); + paddedLen = std::exchange (other.paddedLen, 0); + writeIndex = std::exchange (other.writeIndex, 0); + currentScaling = std::exchange (other.currentScaling, 1.0f); + } + return *this; +} + +void DirectFIR::setCoefficients (std::vector coefficients, float scaling) +{ + currentScaling = scaling; + if (! approximatelyEqual (currentScaling, 1.0f)) + FloatVectorOperations::multiply (coefficients.data(), scaling, coefficients.size()); + + coefficientsReversed = std::move (coefficients); + std::reverse (coefficientsReversed.begin(), coefficientsReversed.end()); + + numCoefficients = coefficientsReversed.size(); + paddedLen = (numCoefficients + 3u) & ~3u; // Round up to multiple of 4 for SIMD + coefficientsReversed.resize (paddedLen, 0.0f); + + history.assign (2 * numCoefficients, 0.0f); + + reset(); +} + +void DirectFIR::setCoefficients (const float* coefficients, std::size_t numCoefficientsIn, float scaling) +{ + if (coefficients == nullptr || numCoefficientsIn == 0) + { + reset(); + numCoefficients = 0; + return; + } + + std::vector coefficientsVector (coefficients, coefficients + numCoefficientsIn); + setCoefficients (std::move (coefficientsVector), scaling); +} + +std::size_t DirectFIR::getNumCoefficients() const noexcept +{ + return numCoefficients; +} + +bool DirectFIR::hasCoefficients() const noexcept +{ + return numCoefficients > 0; +} + +const std::vector& DirectFIR::getCoefficients() const noexcept +{ + return coefficientsReversed; +} + +float DirectFIR::getScaling() const noexcept +{ + return currentScaling; +} + +void DirectFIR::reset() +{ + std::fill (history.begin(), history.end(), 0.0f); + writeIndex = 0; +} + +void DirectFIR::process (const float* input, float* output, std::size_t numSamples) noexcept +{ + const std::size_t M = numCoefficients; + if (M == 0 || input == nullptr || output == nullptr) + return; + + const float* h = coefficientsReversed.data(); + + for (std::size_t i = 0; i < numSamples; ++i) + { + const float x = input[i]; + + // Update circular buffer with current input sample + history[writeIndex] = x; + history[writeIndex + M] = x; // Duplicate for efficient circular access + + // Point to the start of the delay line for this sample + const float* w = history.data() + writeIndex + 1; + + float sum = 0.0f; + +#if YUP_ENABLE_VDSP + // Use Apple's optimized vDSP if available + vDSP_dotpr (w, 1, h, 1, &sum, M); +#else + // Use our own SIMD-optimized dot product + sum = dotProduct (w, h, M); +#endif + + // Accumulate result into output + output[i] += sum; + + // Advance circular buffer write pointer + if (++writeIndex == M) + writeIndex = 0; + } +} + +} // namespace yup diff --git a/modules/yup_dsp/filters/yup_DirectFIR.h b/modules/yup_dsp/filters/yup_DirectFIR.h new file mode 100644 index 000000000..23abdaeb6 --- /dev/null +++ b/modules/yup_dsp/filters/yup_DirectFIR.h @@ -0,0 +1,158 @@ +/* + ============================================================================== + + This file is part of the YUP library. + Copyright (c) 2025 - kunitoki@gmail.com + + YUP is an open source library subject to open-source licensing. + + The code included in this file is provided under the terms of the ISC license + http://www.isc.org/downloads/software-support-policy/isc-license. Permission + to use, copy, modify, and/or distribute this software for any purpose with or + without fee is hereby granted provided that the above copyright notice and + this permission notice appear in all copies. + + YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER + EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE + DISCLAIMED. + + ============================================================================== +*/ + +#pragma once + +namespace yup +{ + +//============================================================================== +/** + Direct-form FIR (Finite Impulse Response) filter processor optimized for real-time audio. + + Implements a time-domain FIR filter using direct convolution with SIMD optimizations. + This class is ideal for low-latency applications where the number of coefficients is relatively + small (typically < 512 coefficients), as it provides zero algorithmic delay. + + Features: + - Zero algorithmic latency (only processing delay) + - SIMD-optimized convolution (AVX2, SSE, ARM NEON, vDSP) + - Circular buffer implementation for efficient sample history management + - Real-time safe processing (no heap allocations during process()) + - Support for arbitrary block sizes + + Example usage: + @code + DirectFIR fir; + + // Set filter coefficients (e.g., lowpass filter) + std::vector coeffs = calculateLowpassCoeffs(44100.0f, 1000.0f, 64); + fir.setTaps(coeffs, 1.0f); // coeffs with 1.0x scaling + + // Prepare for processing + fir.prepare(512); // Maximum 512 samples per process() call + + // In audio callback: + fir.process(inputBuffer, outputBuffer, numSamples); // Accumulates into output + @endcode + + @note The process() method accumulates results into the output buffer. + Clear the output buffer first if overwrite behavior is desired. + + @see PartitionedConvolver for longer impulse responses using FFT-based convolution +*/ +class DirectFIR +{ +public: + //============================================================================== + /** Default constructor */ + DirectFIR(); + + /** Destructor */ + ~DirectFIR(); + + // Non-copyable but movable + DirectFIR (DirectFIR&& other) noexcept; + DirectFIR& operator= (DirectFIR&& other) noexcept; + + //============================================================================== + /** + Set the FIR filter coefficients. + + @param coefficients Vector containing the FIR coefficients in time order + @param scaling Scaling factor to apply to all coefficients + + @note This method is not real-time safe and should be called during initialization + or when audio processing is paused. + */ + void setCoefficients (std::vector coefficients, float scaling = 1.0f); + + /** + Set the FIR filter coefficients from a raw pointer. + + @param coefficients Pointer to FIR coefficients array + @param numCoefficients Number of coefficients + @param scaling Scaling factor to apply to all coefficients + + @note This method is not real-time safe and should be called during initialization + or when audio processing is paused. + */ + void setCoefficients (const float* coefficients, std::size_t numCoefficients, float scaling = 1.0f); + + /** + Get the number of filter coefficients. + + @return Number of coefficients in the current filter + */ + std::size_t getNumCoefficients() const noexcept; + + /** + Check if the filter has been configured with coefficients. + + @return True if coefficients have been set, false otherwise + */ + bool hasCoefficients() const noexcept; + + /** + Get the current filter coefficients. + + @return Vector containing the current coefficients (time-reversed for processing) + */ + const std::vector& getCoefficients() const noexcept; + + /** + Get the current scaling factor applied to coefficients. + + @return Current scaling factor + */ + float getScaling() const noexcept; + + //============================================================================== + /** + Reset all internal processing state (clears sample history). + Filter coefficients are preserved. + */ + void reset(); + + /** + Process audio samples through the FIR filter. + + @param input Input audio buffer + @param output Output audio buffer (results are accumulated) + @param numSamples Number of samples to process + + @note Results are accumulated into the output buffer. Clear it first if needed. + @note This method is real-time safe with no heap allocations. + */ + void process (const float* input, float* output, std::size_t numSamples) noexcept; + +private: + std::vector coefficientsReversed; + std::vector history; + std::size_t numCoefficients = 0; + std::size_t paddedLen = 0; + std::size_t writeIndex = 0; + float currentScaling = 1.0f; + + YUP_DECLARE_NON_COPYABLE_WITH_LEAK_DETECTOR (DirectFIR) +}; + +} // namespace yup diff --git a/modules/yup_dsp/yup_dsp.cpp b/modules/yup_dsp/yup_dsp.cpp index 39d32fd85..0c3488b70 100644 --- a/modules/yup_dsp/yup_dsp.cpp +++ b/modules/yup_dsp/yup_dsp.cpp @@ -75,6 +75,7 @@ #include "frequency/yup_FFTProcessor.cpp" #include "frequency/yup_SpectrumAnalyzerState.cpp" #include "designers/yup_FilterDesigner.cpp" +#include "filters/yup_DirectFIR.cpp" #include "convolution/yup_PartitionedConvolver.cpp" //============================================================================== diff --git a/modules/yup_dsp/yup_dsp.h b/modules/yup_dsp/yup_dsp.h index c42e03212..5181dfb6c 100644 --- a/modules/yup_dsp/yup_dsp.h +++ b/modules/yup_dsp/yup_dsp.h @@ -138,6 +138,7 @@ #include "filters/yup_StateVariableFilter.h" #include "filters/yup_ButterworthFilter.h" #include "filters/yup_LinkwitzRileyFilter.h" +#include "filters/yup_DirectFIR.h" // Dynamics processors #include "dynamics/yup_SoftClipper.h" diff --git a/tests/yup_dsp/yup_DirectFIR.cpp b/tests/yup_dsp/yup_DirectFIR.cpp new file mode 100644 index 000000000..e46071a19 --- /dev/null +++ b/tests/yup_dsp/yup_DirectFIR.cpp @@ -0,0 +1,585 @@ +/* + ============================================================================== + + This file is part of the YUP library. + Copyright (c) 2025 - kunitoki@gmail.com + + YUP is an open source library subject to open-source licensing. + + The code included in this file is provided under the terms of the ISC license + http://www.isc.org/downloads/software-support-policy/isc-license. Permission + to use, copy, modify, and/or distribute this software for any purpose with or + without fee is hereby granted provided that the above copyright notice and + this permission notice appear in all copies. + + YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER + EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE + DISCLAIMED. + + ============================================================================== +*/ + +#include + +#include + +#include +#include + +namespace yup::test +{ + +//============================================================================== +class DirectFIRTest : public ::testing::Test +{ +protected: + void SetUp() override + { + generator.seed (42); // Fixed seed for reproducible tests + } + + float randomFloat (float min = -1.0f, float max = 1.0f) + { + std::uniform_real_distribution dist (min, max); + return dist (generator); + } + + void fillWithRandomData (std::vector& buffer) + { + for (auto& sample : buffer) + sample = randomFloat(); + } + + void fillWithSine (std::vector& buffer, float frequency, float sampleRate) + { + for (size_t i = 0; i < buffer.size(); ++i) + buffer[i] = std::sin (2.0f * MathConstants::pi * frequency * static_cast (i) / sampleRate); + } + + void clearBuffer (std::vector& buffer) + { + std::fill (buffer.begin(), buffer.end(), 0.0f); + } + + float calculateRMS (const std::vector& buffer) + { + if (buffer.empty()) + return 0.0f; + + float sum = 0.0f; + for (float sample : buffer) + sum += sample * sample; + + return std::sqrt (sum / static_cast (buffer.size())); + } + + float findPeak (const std::vector& buffer) + { + if (buffer.empty()) + return 0.0f; + + float peak = 0.0f; + for (float sample : buffer) + peak = std::max (peak, std::abs (sample)); + + return peak; + } + + std::vector createLowpassCoefficients (int numCoefficients, float cutoffFreq, float sampleRate) + { + std::vector coefficients (numCoefficients); + float fc = cutoffFreq / sampleRate; + int center = numCoefficients / 2; + + for (int i = 0; i < numCoefficients; ++i) + { + if (i == center) + coefficients[i] = 2.0f * fc; + else + { + float x = 2.0f * MathConstants::pi * fc * (i - center); + coefficients[i] = std::sin (x) / x; + } + + // Apply Hanning window + float w = 0.5f - 0.5f * std::cos (2.0f * MathConstants::pi * i / (numCoefficients - 1)); + coefficients[i] *= w; + } + + return coefficients; + } + + std::mt19937 generator; +}; + +//============================================================================== +// Basic API Tests +//============================================================================== + +TEST_F (DirectFIRTest, DefaultConstruction) +{ + DirectFIR fir; + + // Default state should be safe + EXPECT_EQ (fir.getNumCoefficients(), 0); + EXPECT_FALSE (fir.hasCoefficients()); + EXPECT_EQ (fir.getScaling(), 1.0f); + + // Should handle empty processing gracefully + std::vector input (256, 0.0f); + std::vector output (256, 0.0f); + EXPECT_NO_THROW (fir.process (input.data(), output.data(), input.size())); + + // Output should remain zero without coefficients + for (float sample : output) + EXPECT_EQ (sample, 0.0f); +} + +TEST_F (DirectFIRTest, MoveSemantics) +{ + DirectFIR fir1; + std::vector coefficients = { 1.0f, 0.5f, 0.25f }; + fir1.setCoefficients (coefficients, 2.0f); + + // Move constructor + DirectFIR fir2 = std::move (fir1); + + // Verify moved filter works + EXPECT_EQ (fir2.getNumCoefficients(), 3); + EXPECT_TRUE (fir2.hasCoefficients()); + EXPECT_EQ (fir2.getScaling(), 2.0f); + + // Original should be in valid but unspecified state + EXPECT_EQ (fir1.getNumCoefficients(), 0); + + // Test processing with moved filter + std::vector input (10, 0.0f); + input[0] = 1.0f; + std::vector output (10, 0.0f); + + EXPECT_NO_THROW (fir2.process (input.data(), output.data(), input.size())); + + // Should produce scaled output + float outputSum = 0.0f; + for (float sample : output) + outputSum += std::abs (sample); + EXPECT_GT (outputSum, 1.0f); // Should be > 1 due to scaling + + // Move assignment + DirectFIR fir3; + fir3 = std::move (fir2); + + EXPECT_EQ (fir3.getNumCoefficients(), 3); + EXPECT_TRUE (fir3.hasCoefficients()); + EXPECT_EQ (fir3.getScaling(), 2.0f); +} + +//============================================================================== +// Coefficient Setting Tests +//============================================================================== + +TEST_F (DirectFIRTest, SetCoefficientsVector) +{ + DirectFIR fir; + std::vector coefficients = { 0.1f, 0.5f, 1.0f, 0.5f, 0.1f }; + + fir.setCoefficients (coefficients, 1.0f); + + EXPECT_EQ (fir.getNumCoefficients(), 5); + EXPECT_TRUE (fir.hasCoefficients()); + EXPECT_EQ (fir.getScaling(), 1.0f); + + // Coefficients should be available + const auto& coeffs = fir.getCoefficients(); + EXPECT_EQ (coeffs.size(), 8); // Padded to multiple of 4 +} + +TEST_F (DirectFIRTest, SetCoefficientsPointer) +{ + DirectFIR fir; + float coefficients[] = { 0.2f, 0.4f, 0.6f, 0.8f }; + + fir.setCoefficients (coefficients, 4, 2.0f); + + EXPECT_EQ (fir.getNumCoefficients(), 4); + EXPECT_TRUE (fir.hasCoefficients()); + EXPECT_EQ (fir.getScaling(), 2.0f); +} + +TEST_F (DirectFIRTest, SetCoefficientsNullptr) +{ + DirectFIR fir; + + // First set some valid coefficients + std::vector coefficients = { 1.0f, 0.5f }; + fir.setCoefficients (coefficients); + EXPECT_TRUE (fir.hasCoefficients()); + + // Setting nullptr should clear the filter + fir.setCoefficients (nullptr, 0, 1.0f); + EXPECT_FALSE (fir.hasCoefficients()); + EXPECT_EQ (fir.getNumCoefficients(), 0); +} + +TEST_F (DirectFIRTest, SetCoefficientsWithScaling) +{ + DirectFIR fir; + std::vector coefficients = { 1.0f, 1.0f, 1.0f }; + + fir.setCoefficients (coefficients, 0.5f); + + // Test impulse response + std::vector input (10, 0.0f); + input[0] = 2.0f; // Unit impulse scaled by 2 + std::vector output (10, 0.0f); + + fir.process (input.data(), output.data(), input.size()); + + // Output should reflect the coefficient scaling + // Each coefficient was originally 1.0, scaled by 0.5, so output per coefficient = 2.0 * 0.5 = 1.0 + float expectedSum = 3.0f; // 3 coefficients * 1.0 each + float actualSum = 0.0f; + for (size_t i = 0; i < 5; ++i) // Check first 5 samples + actualSum += output[i]; + + EXPECT_NEAR (actualSum, expectedSum, 0.001f); +} + +//============================================================================== +// Processing Tests +//============================================================================== + +TEST_F (DirectFIRTest, ImpulseResponse) +{ + DirectFIR fir; + std::vector coefficients = { 1.0f, 0.5f, 0.25f }; + fir.setCoefficients (coefficients); + + // Test with unit impulse + std::vector input (10, 0.0f); + input[0] = 1.0f; + std::vector output (10, 0.0f); + + fir.process (input.data(), output.data(), input.size()); + + // Should get the impulse response (coefficients in original order) + EXPECT_NEAR (output[0], 1.0f, 0.001f); // First coefficient h0 + EXPECT_NEAR (output[1], 0.5f, 0.001f); // Second coefficient h1 + EXPECT_NEAR (output[2], 0.25f, 0.001f); // Third coefficient h2 + + // Rest should be zero + for (size_t i = 3; i < output.size(); ++i) + EXPECT_NEAR (output[i], 0.0f, 0.001f); +} + +TEST_F (DirectFIRTest, AccumulativeOutput) +{ + DirectFIR fir; + std::vector coefficients = { 0.5f, 0.5f }; + fir.setCoefficients (coefficients); + + std::vector input (5, 1.0f); + std::vector output (5); + + // Pre-populate output buffer + std::fill (output.begin(), output.end(), 1.0f); + std::vector originalOutput = output; + + fir.process (input.data(), output.data(), input.size()); + + // Output should contain original data plus filter result + for (size_t i = 0; i < output.size(); ++i) + EXPECT_GT (output[i], originalOutput[i]); +} + +TEST_F (DirectFIRTest, Linearity) +{ + DirectFIR fir; + std::vector coefficients = createLowpassCoefficients (32, 1000.0f, 44100.0f); + fir.setCoefficients (coefficients); + + std::vector input (512); + fillWithRandomData (input); + + // Scale input by 2 and test linearity + std::vector input2 = input; + FloatVectorOperations::multiply (input2.data(), 2.0f, input2.size()); + + std::vector output1 (512, 0.0f); + std::vector output2 (512, 0.0f); + + fir.reset(); + fir.process (input.data(), output1.data(), input.size()); + + fir.reset(); + fir.process (input2.data(), output2.data(), input2.size()); + + // output2 should be approximately 2x output1 + for (size_t i = 0; i < output1.size(); ++i) + { + if (std::abs (output1[i]) > 0.001f) // Avoid division by near-zero + EXPECT_NEAR (output2[i] / output1[i], 2.0f, 0.01f); + } +} + +TEST_F (DirectFIRTest, Reset) +{ + DirectFIR fir; + std::vector coefficients = { 1.0f, 0.8f, 0.6f, 0.4f, 0.2f }; + fir.setCoefficients (coefficients); + + std::vector input (20); + fillWithRandomData (input); + std::vector output1 (20, 0.0f); + + // Process some data to build up internal state + fir.process (input.data(), output1.data(), input.size()); + + // Reset and process same input + fir.reset(); + std::vector output2 (20, 0.0f); + fir.process (input.data(), output2.data(), input.size()); + + // Outputs should be identical after reset + for (size_t i = 0; i < output1.size(); ++i) + EXPECT_NEAR (output1[i], output2[i], 0.0001f); +} + +//============================================================================== +// Signal Processing Tests +//============================================================================== + +TEST_F (DirectFIRTest, LowpassFiltering) +{ + DirectFIR fir; + + // Create lowpass filter coefficients + std::vector coefficients = createLowpassCoefficients (64, 1000.0f, 44100.0f); + fir.setCoefficients (coefficients); + + const float sampleRate = 44100.0f; + const size_t bufferSize = 2048; + + // Test with low frequency (should pass) + std::vector lowFreqInput (bufferSize); + fillWithSine (lowFreqInput, 500.0f, sampleRate); + std::vector lowFreqOutput (bufferSize, 0.0f); + + fir.process (lowFreqInput.data(), lowFreqOutput.data(), bufferSize); + + // Test with high frequency (should be attenuated) + fir.reset(); + std::vector highFreqInput (bufferSize); + fillWithSine (highFreqInput, 5000.0f, sampleRate); + std::vector highFreqOutput (bufferSize, 0.0f); + + fir.process (highFreqInput.data(), highFreqOutput.data(), bufferSize); + + // Compare RMS levels (skip first samples due to transient) + const size_t skipSamples = 100; + float lowFreqRMS = 0.0f, highFreqRMS = 0.0f; + + for (size_t i = skipSamples; i < bufferSize; ++i) + { + lowFreqRMS += lowFreqOutput[i] * lowFreqOutput[i]; + highFreqRMS += highFreqOutput[i] * highFreqOutput[i]; + } + + lowFreqRMS = std::sqrt (lowFreqRMS / (bufferSize - skipSamples)); + highFreqRMS = std::sqrt (highFreqRMS / (bufferSize - skipSamples)); + + // Low frequency should have higher RMS than high frequency + EXPECT_GT (lowFreqRMS, highFreqRMS * 2.0f); +} + +TEST_F (DirectFIRTest, BlockSizeIndependence) +{ + DirectFIR fir; + std::vector coefficients = createLowpassCoefficients (48, 2000.0f, 44100.0f); + fir.setCoefficients (coefficients); + + const size_t totalSamples = 1024; + std::vector input (totalSamples); + fillWithRandomData (input); + + // Process in one big block + fir.reset(); + std::vector output1 (totalSamples, 0.0f); + fir.process (input.data(), output1.data(), totalSamples); + + // Process in smaller blocks + fir.reset(); + std::vector output2 (totalSamples, 0.0f); + const std::vector blockSizes = { 32, 64, 128, 256, 32, 128, 64 }; + size_t processed = 0; + + for (size_t blockSize : blockSizes) + { + if (processed >= totalSamples) + break; + + if (processed + blockSize > totalSamples) + blockSize = totalSamples - processed; + + if (blockSize == 0) + break; + + fir.process (input.data() + processed, output2.data() + processed, blockSize); + processed += blockSize; + } + + // Process any remaining samples + while (processed < totalSamples) + { + size_t remaining = totalSamples - processed; + size_t blockSize = std::min (remaining, size_t (128)); // Process in chunks of 128 + fir.process (input.data() + processed, output2.data() + processed, blockSize); + processed += blockSize; + } + + // Outputs should be identical regardless of block size + for (size_t i = 0; i < totalSamples; ++i) + EXPECT_NEAR (output1[i], output2[i], 0.0001f); +} + +//============================================================================== +// Edge Cases and Error Handling +//============================================================================== + +TEST_F (DirectFIRTest, ZeroSamples) +{ + DirectFIR fir; + std::vector coefficients = { 1.0f, 0.5f }; + fir.setCoefficients (coefficients); + + std::vector input (10, 1.0f); + std::vector output (10, 0.0f); + + // Processing zero samples should be safe + EXPECT_NO_THROW (fir.process (input.data(), output.data(), 0)); + + // Output should remain unchanged + for (float sample : output) + EXPECT_EQ (sample, 0.0f); +} + +TEST_F (DirectFIRTest, NullPointers) +{ + DirectFIR fir; + std::vector coefficients = { 1.0f }; + fir.setCoefficients (coefficients); + + std::vector buffer (10, 0.0f); + + // Null input pointer should be handled gracefully + EXPECT_NO_THROW (fir.process (nullptr, buffer.data(), 10)); + + // Null output pointer should be handled gracefully + EXPECT_NO_THROW (fir.process (buffer.data(), nullptr, 10)); + + // Both null should be handled gracefully + EXPECT_NO_THROW (fir.process (nullptr, nullptr, 10)); +} + +TEST_F (DirectFIRTest, LargeTapCounts) +{ + DirectFIR fir; + + // Test with relatively large number of coefficients + std::vector coefficients (512); + for (size_t i = 0; i < coefficients.size(); ++i) + coefficients[i] = std::exp (-static_cast (i) / 100.0f) * std::sin (2.0f * MathConstants::pi * i / 16.0f); + + EXPECT_NO_THROW (fir.setCoefficients (coefficients)); + EXPECT_EQ (fir.getNumCoefficients(), 512); + + // Should process without issues + std::vector input (1024); + std::vector output (1024, 0.0f); + fillWithRandomData (input); + + EXPECT_NO_THROW (fir.process (input.data(), output.data(), input.size())); + + // Should produce reasonable output + float rms = calculateRMS (output); + EXPECT_GT (rms, 0.001f); + EXPECT_LT (rms, 10.0f); +} + +TEST_F (DirectFIRTest, SingleTap) +{ + DirectFIR fir; + std::vector coefficients = { 0.75f }; + fir.setCoefficients (coefficients); + + EXPECT_EQ (fir.getNumCoefficients(), 1); + + // Single coefficient should act as a simple gain + std::vector input = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f }; + std::vector output (5, 0.0f); + + fir.process (input.data(), output.data(), input.size()); + + for (size_t i = 0; i < input.size(); ++i) + EXPECT_NEAR (output[i], input[i] * 0.75f, 0.001f); +} + +//============================================================================== +// Performance and Memory Tests +//============================================================================== + +TEST_F (DirectFIRTest, MemoryAlignment) +{ + DirectFIR fir; + + // Coefficient count that's not a multiple of 4 + std::vector coefficients (37); + std::fill (coefficients.begin(), coefficients.end(), 0.1f); + fir.setCoefficients (coefficients); + + // Coefficients should be padded for SIMD alignment + const auto& coeffs = fir.getCoefficients(); + EXPECT_EQ (coeffs.size() % 4, 0); // Should be multiple of 4 + EXPECT_GE (coeffs.size(), 37); // Should be at least original size + + // Padded elements should be zero + for (size_t i = 37; i < coeffs.size(); ++i) + EXPECT_EQ (coeffs[i], 0.0f); +} + +TEST_F (DirectFIRTest, StressTest) +{ + DirectFIR fir; + + // Create complex impulse response + std::vector coefficients (256); + for (size_t i = 0; i < coefficients.size(); ++i) + { + float t = static_cast (i) / 256.0f; + coefficients[i] = std::exp (-t * 5.0f) * std::cos (20.0f * MathConstants::pi * t); + } + fir.setCoefficients (coefficients); + + // Process multiple blocks of varying sizes + const std::vector blockSizes = { 1, 7, 32, 63, 128, 255, 512, 1023 }; + + for (size_t blockSize : blockSizes) + { + SCOPED_TRACE (testing::Message() << "Block size: " << blockSize); + + std::vector input (blockSize); + std::vector output (blockSize, 0.0f); + fillWithRandomData (input); + + EXPECT_NO_THROW (fir.process (input.data(), output.data(), blockSize)); + + // Verify output quality + for (float sample : output) + { + EXPECT_TRUE (std::isfinite (sample)); + EXPECT_LT (std::abs (sample), 100.0f); // Reasonable bounds + } + } +} + +} // namespace yup::test \ No newline at end of file diff --git a/tests/yup_dsp/yup_PartitionedConvolver.cpp b/tests/yup_dsp/yup_PartitionedConvolver.cpp index b8a4f09b7..5f83c46b1 100644 --- a/tests/yup_dsp/yup_PartitionedConvolver.cpp +++ b/tests/yup_dsp/yup_PartitionedConvolver.cpp @@ -490,10 +490,10 @@ TEST_F (PartitionedConvolverTest, LatencyMeasurement) { 256, { 256, 1024 } } }; - for (const auto& [directTaps, hops] : configs) + for (const auto& [directCoefficients, hops] : configs) { PartitionedConvolver convolver; - convolver.setTypicalLayout (directTaps, hops); + convolver.setTypicalLayout (directCoefficients, hops); convolver.prepare (1024); // Unit impulse response @@ -526,8 +526,8 @@ TEST_F (PartitionedConvolverTest, LatencyMeasurement) EXPECT_LE (latencySamples, static_cast (maxHop * 2)); // With direct FIR, latency should be minimal - if (directTaps > 0) - EXPECT_LE (latencySamples, directTaps); + if (directCoefficients > 0) + EXPECT_LE (latencySamples, directCoefficients); } } @@ -537,9 +537,9 @@ TEST_F (PartitionedConvolverTest, LatencyMeasurement) TEST_F (PartitionedConvolverTest, VariousPartitionSizes) { - // Test various partition configurations - all with direct taps for immediate response + // Test various partition configurations - all with direct coefficients for immediate response std::vector, size_t>> testConfigs = { - // (directTaps, hops, maxBlockSize) + // (directCoefficients, hops, maxBlockSize) { 64, { 64 }, 512 }, { 32, { 64 }, 512 }, { 64, { 64, 256 }, 512 }, @@ -553,11 +553,11 @@ TEST_F (PartitionedConvolverTest, VariousPartitionSizes) for (const auto& item : testConfigs) { - const auto& directTaps = std::get<0> (item); + const auto& directCoefficients = std::get<0> (item); const auto& hops = std::get<1> (item); const auto& maxBlockSize = std::get<2> (item); - SCOPED_TRACE (testing::Message() << "Config: directTaps=" << directTaps << " hops=[" << [&]() + SCOPED_TRACE (testing::Message() << "Config: directCoefficients=" << directCoefficients << " hops=[" << [&]() { std::string hopStr; for (size_t i = 0; i < hops.size(); ++i) @@ -572,7 +572,7 @@ TEST_F (PartitionedConvolverTest, VariousPartitionSizes) PartitionedConvolver convolver; // Configure and verify setup - EXPECT_NO_THROW (convolver.setTypicalLayout (directTaps, hops)); + EXPECT_NO_THROW (convolver.setTypicalLayout (directCoefficients, hops)); EXPECT_NO_THROW (convolver.prepare (maxBlockSize)); // Create a simple known impulse response @@ -749,7 +749,7 @@ TEST_F (PartitionedConvolverTest, RandomizedFuzzing) { // Generate random configurations and test them std::uniform_int_distribution hopDist (32, 2048); - std::uniform_int_distribution directTapsDist (32, 512); // Always have some direct taps + std::uniform_int_distribution directCoefficientsDist (32, 512); // Always have some direct coefficients std::uniform_int_distribution blockSizeDist (32, 1024); for (int trial = 0; trial < 10; ++trial) // Reduce trials for stability @@ -757,7 +757,7 @@ TEST_F (PartitionedConvolverTest, RandomizedFuzzing) SCOPED_TRACE (testing::Message() << "Fuzzing trial " << trial); // Generate random configuration - const size_t directTaps = directTapsDist (generator); + const size_t directCoefficients = directCoefficientsDist (generator); const size_t numLayers = 1 + (generator() % 3); // 1-3 layers std::vector hops; @@ -777,7 +777,7 @@ TEST_F (PartitionedConvolverTest, RandomizedFuzzing) try { - convolver.setTypicalLayout (directTaps, hops); + convolver.setTypicalLayout (directCoefficients, hops); convolver.prepare (maxBlockSize); // Simple impulse response From 9ffd8d2e2e72a29cd45779ecacf5d95855ab14e3 Mon Sep 17 00:00:00 2001 From: kunitoki Date: Fri, 12 Sep 2025 08:23:12 +0200 Subject: [PATCH 33/37] FIR Implementation --- .../convolution/yup_PartitionedConvolver.cpp | 4 +- .../yup_dsp/designers/yup_FilterDesigner.cpp | 166 ++++++++++ .../yup_dsp/designers/yup_FilterDesigner.h | 72 ++++ modules/yup_dsp/filters/yup_DirectFIR.cpp | 227 ------------- modules/yup_dsp/filters/yup_DirectFIR.h | 207 ++++++++++-- modules/yup_dsp/utilities/yup_DspMath.cpp | 104 ++++++ modules/yup_dsp/utilities/yup_DspMath.h | 18 + modules/yup_dsp/yup_dsp.cpp | 2 +- tests/yup_dsp/yup_DirectFIR.cpp | 48 +-- tests/yup_dsp/yup_FilterDesigner.cpp | 307 ++++++++++++++++++ 10 files changed, 875 insertions(+), 280 deletions(-) delete mode 100644 modules/yup_dsp/filters/yup_DirectFIR.cpp create mode 100644 modules/yup_dsp/utilities/yup_DspMath.cpp diff --git a/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp index 566ba72cf..f5be2c0fb 100644 --- a/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp +++ b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp @@ -537,7 +537,7 @@ class PartitionedConvolver::Impl void setImpulseResponse (const float* impulseResponse, std::size_t length, const PartitionedConvolver::IRLoadOptions& options) { - DirectFIR newFIR; + DirectFIRFloat newFIR; std::vector newLayers (layers.size()); std::size_t trimmedLength = length; @@ -765,7 +765,7 @@ class PartitionedConvolver::Impl std::size_t finalImpulseLength = 0; bool isPrepared = false; - DirectFIR directFIR; + DirectFIRFloat directFIR; std::vector layers; // Working buffers diff --git a/modules/yup_dsp/designers/yup_FilterDesigner.cpp b/modules/yup_dsp/designers/yup_FilterDesigner.cpp index fc32560d1..a92949a08 100644 --- a/modules/yup_dsp/designers/yup_FilterDesigner.cpp +++ b/modules/yup_dsp/designers/yup_FilterDesigner.cpp @@ -625,6 +625,172 @@ int FilterDesigner::designLinkwitzRiley ( return static_cast (lowCoeffs.size()); } +//============================================================================== +// FIR Filter Design Implementations +//============================================================================== + +template +std::vector FilterDesigner::designFIRLowpass ( + int numCoefficients, + CoeffType cutoffFreq, + double sampleRate, + WindowType windowType) noexcept +{ + jassert (numCoefficients > 0); + jassert (cutoffFreq > static_cast (0.0)); + jassert (sampleRate > 0.0); + jassert (cutoffFreq < static_cast (sampleRate / 2.0)); + + numCoefficients = nextOdd (numCoefficients); + std::vector coefficients (numCoefficients); + + const auto normalizedCutoff = static_cast (2.0) * cutoffFreq / static_cast (sampleRate); + const int center = (numCoefficients - 1) / 2; + + // Generate ideal lowpass sinc function + for (int i = 0; i < numCoefficients; ++i) + { + if (i == center) + { + coefficients[i] = normalizedCutoff; + } + else + { + const auto x = MathConstants::pi * normalizedCutoff * static_cast (i - center); + coefficients[i] = std::sin (x) / (MathConstants::pi * static_cast (i - center)); + } + } + + // Apply window function + for (int i = 0; i < numCoefficients; ++i) + { + const auto windowValue = WindowFunctions::getValue (windowType, i, numCoefficients); + coefficients[i] *= windowValue; + } + + // Normalization + const auto sum = std::accumulate (coefficients.begin(), coefficients.end(), static_cast (0.0)); + if (sum != static_cast (0.0)) + for (auto& c : coefficients) + c /= sum; + + return coefficients; +} + +template +std::vector FilterDesigner::designFIRHighpass ( + int numCoefficients, + CoeffType cutoffFreq, + double sampleRate, + WindowType windowType) noexcept +{ + jassert (numCoefficients > 0); + jassert (cutoffFreq > static_cast (0.0)); + jassert (sampleRate > 0.0); + jassert (cutoffFreq < static_cast (sampleRate / 2.0)); + + // Generate lowpass first + numCoefficients = nextOdd (numCoefficients); + auto coefficients = designFIRLowpass (numCoefficients, cutoffFreq, sampleRate, windowType); + + // Convert to highpass using spectral inversion + const int center = (numCoefficients - 1) / 2; + for (int i = 0; i < numCoefficients; ++i) + coefficients[i] = -coefficients[i]; + + // Add unit impulse at center + coefficients[center] += static_cast (1.0); + + // Normalization + CoeffType hpi (0.0); + for (int n = 0; n < numCoefficients; ++n) + hpi += coefficients[n] * ((n & 1) ? static_cast (-1.0) : static_cast (1.0)); + + if (hpi != static_cast (0.0)) + for (auto& c : coefficients) + c /= hpi; + + return coefficients; +} + +template +std::vector FilterDesigner::designFIRBandpass ( + int numCoefficients, + CoeffType lowCutoffFreq, + CoeffType highCutoffFreq, + double sampleRate, + WindowType windowType) noexcept +{ + jassert (numCoefficients > 0); + jassert (lowCutoffFreq > static_cast (0.0)); + jassert (highCutoffFreq > lowCutoffFreq); + jassert (sampleRate > 0.0); + jassert (highCutoffFreq < static_cast (sampleRate / 2.0)); + + numCoefficients = nextOdd (numCoefficients); + std::vector coefficients (numCoefficients); + + const auto normalizedLow = static_cast (2.0) * lowCutoffFreq / static_cast (sampleRate); + const auto normalizedHigh = static_cast (2.0) * highCutoffFreq / static_cast (sampleRate); + const int center = (numCoefficients - 1) / 2; + + // Generate ideal bandpass as difference of two sinc functions + for (int i = 0; i < numCoefficients; ++i) + { + if (i == center) + { + coefficients[i] = normalizedHigh - normalizedLow; + } + else + { + const auto n = static_cast (i - center); + const auto xHigh = MathConstants::pi * normalizedHigh * n; + const auto xLow = MathConstants::pi * normalizedLow * n; + + coefficients[i] = (std::sin (xHigh) - std::sin (xLow)) / (MathConstants::pi * n); + } + } + + // Apply window function + for (int i = 0; i < numCoefficients; ++i) + { + const auto windowValue = WindowFunctions::getValue (windowType, i, numCoefficients); + coefficients[i] *= windowValue; + } + + return coefficients; +} + +template +std::vector FilterDesigner::designFIRBandstop ( + int numCoefficients, + CoeffType lowCutoffFreq, + CoeffType highCutoffFreq, + double sampleRate, + WindowType windowType) noexcept +{ + jassert (numCoefficients > 0); + jassert (lowCutoffFreq > static_cast (0.0)); + jassert (highCutoffFreq > lowCutoffFreq); + jassert (sampleRate > 0.0); + jassert (highCutoffFreq < static_cast (sampleRate / 2.0)); + + // Generate bandpass first + numCoefficients = nextOdd (numCoefficients); + auto coefficients = designFIRBandpass (numCoefficients, lowCutoffFreq, highCutoffFreq, sampleRate, windowType); + + // Convert to bandstop using spectral inversion + const int center = (numCoefficients - 1) / 2; + + for (int i = 0; i < numCoefficients; ++i) + coefficients[i] = -coefficients[i]; + + // Add unit impulse at center + coefficients[center] += static_cast (1.0); + + return coefficients; +} + //============================================================================== template class FilterDesigner; diff --git a/modules/yup_dsp/designers/yup_FilterDesigner.h b/modules/yup_dsp/designers/yup_FilterDesigner.h index 0dba2770e..baed267a2 100644 --- a/modules/yup_dsp/designers/yup_FilterDesigner.h +++ b/modules/yup_dsp/designers/yup_FilterDesigner.h @@ -625,6 +625,78 @@ class FilterDesigner { return designLinkwitzRiley (8, crossoverFreq, sampleRate, lowCoeffs, highCoeffs); } + + //============================================================================== + // FIR Filter Design + //============================================================================== + + /** + Designs FIR lowpass filter coefficients using windowed sinc method. + + @param numCoefficients The number of filter coefficients (filter order + 1) + @param cutoffFreq The cutoff frequency in Hz + @param sampleRate The sample rate in Hz + @param windowType The window function to apply (default: Hanning) + + @returns Vector of FIR coefficients suitable for DirectFIR + */ + static std::vector designFIRLowpass ( + int numCoefficients, + CoeffType cutoffFreq, + double sampleRate, + WindowType windowType = WindowType::hann) noexcept; + + /** + Designs FIR highpass filter coefficients using windowed sinc method. + + @param numCoefficients The number of filter coefficients (filter order + 1) + @param cutoffFreq The cutoff frequency in Hz + @param sampleRate The sample rate in Hz + @param windowType The window function to apply (default: Hanning) + + @returns Vector of FIR coefficients suitable for DirectFIR + */ + static std::vector designFIRHighpass ( + int numCoefficients, + CoeffType cutoffFreq, + double sampleRate, + WindowType windowType = WindowType::hann) noexcept; + + /** + Designs FIR bandpass filter coefficients using windowed sinc method. + + @param numCoefficients The number of filter coefficients (filter order + 1) + @param lowCutoffFreq The lower cutoff frequency in Hz + @param highCutoffFreq The upper cutoff frequency in Hz + @param sampleRate The sample rate in Hz + @param windowType The window function to apply (default: Hanning) + + @returns Vector of FIR coefficients suitable for DirectFIR + */ + static std::vector designFIRBandpass ( + int numCoefficients, + CoeffType lowCutoffFreq, + CoeffType highCutoffFreq, + double sampleRate, + WindowType windowType = WindowType::hann) noexcept; + + /** + Designs FIR bandstop filter coefficients using windowed sinc method. + + @param numCoefficients The number of filter coefficients (filter order + 1) + @param lowCutoffFreq The lower cutoff frequency in Hz + @param highCutoffFreq The upper cutoff frequency in Hz + @param sampleRate The sample rate in Hz + @param windowType The window function to apply (default: Hanning) + + @returns Vector of FIR coefficients suitable for DirectFIR + */ + static std::vector designFIRBandstop ( + int numCoefficients, + CoeffType lowCutoffFreq, + CoeffType highCutoffFreq, + double sampleRate, + WindowType windowType = WindowType::hann) noexcept; }; } // namespace yup diff --git a/modules/yup_dsp/filters/yup_DirectFIR.cpp b/modules/yup_dsp/filters/yup_DirectFIR.cpp deleted file mode 100644 index 1c5d34da9..000000000 --- a/modules/yup_dsp/filters/yup_DirectFIR.cpp +++ /dev/null @@ -1,227 +0,0 @@ -/* - ============================================================================== - - This file is part of the YUP library. - Copyright (c) 2025 - kunitoki@gmail.com - - YUP is an open source library subject to open-source licensing. - - The code included in this file is provided under the terms of the ISC license - http://www.isc.org/downloads/software-support-policy/isc-license. Permission - to use, copy, modify, and/or distribute this software for any purpose with or - without fee is hereby granted provided that the above copyright notice and - this permission notice appear in all copies. - - YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER - EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE - DISCLAIMED. - - ============================================================================== -*/ - -namespace yup -{ - -namespace -{ - -//============================================================================== - -float dotProduct (const float* __restrict a, const float* __restrict b, std::size_t len) noexcept -{ - float acc = 0.0f; - std::size_t i = 0; - -#if YUP_USE_AVX_INTRINSICS && YUP_USE_FMA_INTRINSICS - __m256 vacc = _mm256_setzero_ps(); - for (; i + 8 <= len; i += 8) - { - __m256 va = _mm256_loadu_ps (a + i); - __m256 vb = _mm256_loadu_ps (b + i); - vacc = _mm256_fmadd_ps (va, vb, vacc); - } - __m128 low = _mm256_castps256_ps128 (vacc); - __m128 high = _mm256_extractf128_ps (vacc, 1); - __m128 vsum = _mm_add_ps (low, high); - vsum = _mm_hadd_ps (vsum, vsum); - vsum = _mm_hadd_ps (vsum, vsum); - acc += _mm_cvtss_f32 (vsum); - -#elif YUP_USE_SSE_INTRINSICS - __m128 vacc = _mm_setzero_ps(); -#if YUP_USE_FMA_INTRINSICS - for (; i + 4 <= len; i += 4) - { - __m128 va = _mm_loadu_ps (a + i); - __m128 vb = _mm_loadu_ps (b + i); - vacc = _mm_fmadd_ps (va, vb, vacc); - } -#else - for (; i + 4 <= len; i += 4) - { - __m128 va = _mm_loadu_ps (a + i); - __m128 vb = _mm_loadu_ps (b + i); - vacc = _mm_add_ps (vacc, _mm_mul_ps (va, vb)); - } -#endif - __m128 shuf = _mm_shuffle_ps (vacc, vacc, _MM_SHUFFLE (2, 3, 0, 1)); - __m128 sums = _mm_add_ps (vacc, shuf); - shuf = _mm_movehl_ps (shuf, sums); - sums = _mm_add_ss (sums, shuf); - acc += _mm_cvtss_f32 (sums); - -#elif YUP_USE_ARM_NEON - float32x4_t vacc = vdupq_n_f32 (0.0f); - for (; i + 4 <= len; i += 4) - { - float32x4_t va = vld1q_f32 (a + i); - float32x4_t vb = vld1q_f32 (b + i); - vacc = vmlaq_f32 (vacc, va, vb); - } -#if YUP_64BIT - acc += vaddvq_f32 (vacc); -#else - float32x2_t vlow = vget_low_f32 (vacc); - float32x2_t vhigh = vget_high_f32 (vacc); - float32x2_t vsum2 = vpadd_f32 (vlow, vhigh); - vsum2 = vpadd_f32 (vsum2, vsum2); - acc += vget_lane_f32 (vsum2, 0); -#endif - -#endif - - // Handle remaining samples - for (; i < len; ++i) - acc += a[i] * b[i]; - - return acc; -} - -} // namespace - -//============================================================================== - -DirectFIR::DirectFIR() = default; - -DirectFIR::~DirectFIR() = default; - -DirectFIR::DirectFIR (DirectFIR&& other) noexcept - : coefficientsReversed (std::move (other.coefficientsReversed)) - , history (std::move (other.history)) - , numCoefficients (std::exchange (other.numCoefficients, 0)) - , paddedLen (std::exchange (other.paddedLen, 0)) - , writeIndex (std::exchange (other.writeIndex, 0)) - , currentScaling (std::exchange (other.currentScaling, 1.0f)) -{ -} - -DirectFIR& DirectFIR::operator= (DirectFIR&& other) noexcept -{ - if (this != &other) - { - coefficientsReversed = std::move (other.coefficientsReversed); - history = std::move (other.history); - numCoefficients = std::exchange (other.numCoefficients, 0); - paddedLen = std::exchange (other.paddedLen, 0); - writeIndex = std::exchange (other.writeIndex, 0); - currentScaling = std::exchange (other.currentScaling, 1.0f); - } - return *this; -} - -void DirectFIR::setCoefficients (std::vector coefficients, float scaling) -{ - currentScaling = scaling; - if (! approximatelyEqual (currentScaling, 1.0f)) - FloatVectorOperations::multiply (coefficients.data(), scaling, coefficients.size()); - - coefficientsReversed = std::move (coefficients); - std::reverse (coefficientsReversed.begin(), coefficientsReversed.end()); - - numCoefficients = coefficientsReversed.size(); - paddedLen = (numCoefficients + 3u) & ~3u; // Round up to multiple of 4 for SIMD - coefficientsReversed.resize (paddedLen, 0.0f); - - history.assign (2 * numCoefficients, 0.0f); - - reset(); -} - -void DirectFIR::setCoefficients (const float* coefficients, std::size_t numCoefficientsIn, float scaling) -{ - if (coefficients == nullptr || numCoefficientsIn == 0) - { - reset(); - numCoefficients = 0; - return; - } - - std::vector coefficientsVector (coefficients, coefficients + numCoefficientsIn); - setCoefficients (std::move (coefficientsVector), scaling); -} - -std::size_t DirectFIR::getNumCoefficients() const noexcept -{ - return numCoefficients; -} - -bool DirectFIR::hasCoefficients() const noexcept -{ - return numCoefficients > 0; -} - -const std::vector& DirectFIR::getCoefficients() const noexcept -{ - return coefficientsReversed; -} - -float DirectFIR::getScaling() const noexcept -{ - return currentScaling; -} - -void DirectFIR::reset() -{ - std::fill (history.begin(), history.end(), 0.0f); - writeIndex = 0; -} - -void DirectFIR::process (const float* input, float* output, std::size_t numSamples) noexcept -{ - const std::size_t M = numCoefficients; - if (M == 0 || input == nullptr || output == nullptr) - return; - - const float* h = coefficientsReversed.data(); - - for (std::size_t i = 0; i < numSamples; ++i) - { - const float x = input[i]; - - // Update circular buffer with current input sample - history[writeIndex] = x; - history[writeIndex + M] = x; // Duplicate for efficient circular access - - // Point to the start of the delay line for this sample - const float* w = history.data() + writeIndex + 1; - - float sum = 0.0f; - -#if YUP_ENABLE_VDSP - // Use Apple's optimized vDSP if available - vDSP_dotpr (w, 1, h, 1, &sum, M); -#else - // Use our own SIMD-optimized dot product - sum = dotProduct (w, h, M); -#endif - - // Accumulate result into output - output[i] += sum; - - // Advance circular buffer write pointer - if (++writeIndex == M) - writeIndex = 0; - } -} - -} // namespace yup diff --git a/modules/yup_dsp/filters/yup_DirectFIR.h b/modules/yup_dsp/filters/yup_DirectFIR.h index 23abdaeb6..cdedac031 100644 --- a/modules/yup_dsp/filters/yup_DirectFIR.h +++ b/modules/yup_dsp/filters/yup_DirectFIR.h @@ -38,40 +38,65 @@ namespace yup - Circular buffer implementation for efficient sample history management - Real-time safe processing (no heap allocations during process()) - Support for arbitrary block sizes + - Inherits FilterBase interface for frequency response analysis Example usage: @code - DirectFIR fir; + DirectFIR fir; // Set filter coefficients (e.g., lowpass filter) - std::vector coeffs = calculateLowpassCoeffs(44100.0f, 1000.0f, 64); - fir.setTaps(coeffs, 1.0f); // coeffs with 1.0x scaling + auto coeffs = FilterDesigner::designFIRLowpass(64, 1000.0f, 44100.0); + fir.setCoefficients(coeffs); // Prepare for processing - fir.prepare(512); // Maximum 512 samples per process() call + fir.prepare(44100.0, 512); // In audio callback: - fir.process(inputBuffer, outputBuffer, numSamples); // Accumulates into output + fir.processBlock(inputBuffer, outputBuffer, numSamples); @endcode - @note The process() method accumulates results into the output buffer. - Clear the output buffer first if overwrite behavior is desired. + @tparam SampleType Type for audio samples (float or double) + @tparam CoeffType Type for internal coefficients (defaults to double) @see PartitionedConvolver for longer impulse responses using FFT-based convolution + @see FilterBase for frequency response methods */ -class DirectFIR +template +class DirectFIR : public FilterBase { public: //============================================================================== /** Default constructor */ - DirectFIR(); + DirectFIR() = default; /** Destructor */ - ~DirectFIR(); - - // Non-copyable but movable - DirectFIR (DirectFIR&& other) noexcept; - DirectFIR& operator= (DirectFIR&& other) noexcept; + ~DirectFIR() override = default; + + /** Move constructor */ + DirectFIR (DirectFIR&& other) noexcept + : coefficientsReversed (std::move (other.coefficientsReversed)) + , history (std::move (other.history)) + , numCoefficients (std::exchange (other.numCoefficients, 0)) + , paddedLen (std::exchange (other.paddedLen, 0)) + , writeIndex (std::exchange (other.writeIndex, 0)) + , currentScaling (std::exchange (other.currentScaling, CoeffType (1))) + { + } + + /** Move assignment */ + DirectFIR& operator= (DirectFIR&& other) noexcept + { + if (this != &other) + { + coefficientsReversed = std::move (other.coefficientsReversed); + history = std::move (other.history); + numCoefficients = std::exchange (other.numCoefficients, 0); + paddedLen = std::exchange (other.paddedLen, 0); + writeIndex = std::exchange (other.writeIndex, 0); + currentScaling = std::exchange (other.currentScaling, CoeffType (1)); + } + return *this; + } //============================================================================== /** @@ -83,7 +108,22 @@ class DirectFIR @note This method is not real-time safe and should be called during initialization or when audio processing is paused. */ - void setCoefficients (std::vector coefficients, float scaling = 1.0f); + void setCoefficients (std::vector coefficients, CoeffType scaling = CoeffType (1)) + { + currentScaling = scaling; + if (! approximatelyEqual (currentScaling, 1.0f)) + FloatVectorOperations::multiply (coefficients.data(), scaling, coefficients.size()); + + coefficientsReversed = std::move (coefficients); + std::reverse (coefficientsReversed.begin(), coefficientsReversed.end()); + + numCoefficients = coefficientsReversed.size(); + paddedLen = (numCoefficients + 3u) & ~3u; // Round up to multiple of 4 for SIMD + coefficientsReversed.resize (paddedLen, 0.0f); + + history.assign (2 * numCoefficients, 0.0f); + reset(); + } /** Set the FIR filter coefficients from a raw pointer. @@ -95,45 +135,151 @@ class DirectFIR @note This method is not real-time safe and should be called during initialization or when audio processing is paused. */ - void setCoefficients (const float* coefficients, std::size_t numCoefficients, float scaling = 1.0f); + void setCoefficients (const CoeffType* coefficients, std::size_t numCoefficientsIn, CoeffType scaling = CoeffType (1)) + { + if (coefficients == nullptr || numCoefficientsIn == 0) + { + reset(); + numCoefficients = 0; + return; + } + + std::vector coefficientsVector (coefficients, coefficients + numCoefficientsIn); + setCoefficients (std::move (coefficientsVector), scaling); + } /** Get the number of filter coefficients. @return Number of coefficients in the current filter */ - std::size_t getNumCoefficients() const noexcept; + std::size_t getNumCoefficients() const noexcept + { + return numCoefficients; + } /** Check if the filter has been configured with coefficients. @return True if coefficients have been set, false otherwise */ - bool hasCoefficients() const noexcept; + bool hasCoefficients() const noexcept + { + return numCoefficients > 0; + } /** Get the current filter coefficients. @return Vector containing the current coefficients (time-reversed for processing) */ - const std::vector& getCoefficients() const noexcept; + const std::vector& getCoefficients() const noexcept + { + return coefficientsReversed; + } /** Get the current scaling factor applied to coefficients. @return Current scaling factor */ - float getScaling() const noexcept; + CoeffType getScaling() const noexcept + { + return currentScaling; + } //============================================================================== /** Reset all internal processing state (clears sample history). Filter coefficients are preserved. */ - void reset(); + void reset() noexcept override + { + std::fill (history.begin(), history.end(), 0.0f); + writeIndex = 0; + } + + /** + Prepares the filter for processing with the given sample rate and block size. + + @param sampleRate The sample rate in Hz + @param maximumBlockSize The maximum number of samples that will be processed at once + */ + void prepare (double sampleRate, int maximumBlockSize) override + { + this->sampleRate = sampleRate; + this->maximumBlockSize = maximumBlockSize; + } + + /** + Processes a single sample. + + @param inputSample The input sample to process + @returns The filtered output sample + */ + SampleType processSample (SampleType inputSample) noexcept override + { + const std::size_t M = numCoefficients; + const CoeffType* h = coefficientsReversed.data(); + + // Update circular buffer with current input sample + history[writeIndex] = inputSample; + history[writeIndex + M] = inputSample; // Duplicate for efficient circular access + + // Point to the start of the delay line for this sample + const SampleType* w = history.data() + writeIndex + 1; + + // Advance circular buffer write pointer + if (++writeIndex == M) + writeIndex = 0; + + return dotProduct (w, h, M); + } /** - Process audio samples through the FIR filter. + Processes a block of samples. + + @param inputBuffer Pointer to the input samples + @param outputBuffer Pointer to the output buffer + @param numSamples Number of samples to process + */ + void processBlock (const SampleType* inputBuffer, SampleType* outputBuffer, int numSamples) noexcept override + { + if (numCoefficients == 0 || inputBuffer == nullptr || outputBuffer == nullptr) + return; + + for (int i = 0; i < numSamples; ++i) + outputBuffer[i] += processSample (inputBuffer[i]); + } + + /** + Returns the complex frequency response at the given frequency. + + @param frequency The frequency in Hz + @returns The complex frequency response + */ + Complex getComplexResponse (CoeffType frequency) const override + { + if (numCoefficients == 0) + return Complex (0, 0); + + const CoeffType omega = MathConstants::twoPi * frequency / static_cast (this->sampleRate); + + Complex response (0, 0); + + // H(e^jω) = Σ h[n] * e^(-jωn) for n = 0 to N-1 + for (std::size_t n = 0; n < numCoefficients; ++n) + { + const CoeffType angle = -omega * static_cast (n); + Complex exponential (std::cos (angle), std::sin (angle)); + response += static_cast (coefficientsReversed[numCoefficients - 1 - n]) * exponential; + } + + return response; + } + + /** + Process audio samples through the FIR filter (legacy method). @param input Input audio buffer @param output Output audio buffer (results are accumulated) @@ -141,18 +287,27 @@ class DirectFIR @note Results are accumulated into the output buffer. Clear it first if needed. @note This method is real-time safe with no heap allocations. + @note Use processBlock() for new code */ - void process (const float* input, float* output, std::size_t numSamples) noexcept; + void process (const SampleType* input, SampleType* output, std::size_t numSamples) noexcept + { + processBlock (input, output, static_cast (numSamples)); + } private: - std::vector coefficientsReversed; - std::vector history; + std::vector coefficientsReversed; + std::vector history; std::size_t numCoefficients = 0; std::size_t paddedLen = 0; std::size_t writeIndex = 0; - float currentScaling = 1.0f; + CoeffType currentScaling = CoeffType (1); YUP_DECLARE_NON_COPYABLE_WITH_LEAK_DETECTOR (DirectFIR) }; +//============================================================================== +/** Type aliases for backward compatibility and convenience */ +using DirectFIRFloat = DirectFIR; +using DirectFIRDouble = DirectFIR; + } // namespace yup diff --git a/modules/yup_dsp/utilities/yup_DspMath.cpp b/modules/yup_dsp/utilities/yup_DspMath.cpp new file mode 100644 index 000000000..b14bade41 --- /dev/null +++ b/modules/yup_dsp/utilities/yup_DspMath.cpp @@ -0,0 +1,104 @@ +/* + ============================================================================== + + This file is part of the YUP library. + Copyright (c) 2025 - kunitoki@gmail.com + + YUP is an open source library subject to open-source licensing. + + The code included in this file is provided under the terms of the ISC license + http://www.isc.org/downloads/software-support-policy/isc-license. Permission + to use, copy, modify, and/or distribute this software for any purpose with or + without fee is hereby granted provided that the above copyright notice and + this permission notice appear in all copies. + + YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER + EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE + DISCLAIMED. + + ============================================================================== +*/ + +namespace yup +{ + +//============================================================================== + +template <> +float dotProduct (const float* __restrict a, const float* __restrict b, std::size_t length) noexcept +{ + float accumulation = 0.0f; + +#if YUP_ENABLE_VDSP + vDSP_dotpr (a, 1, b, 1, &accumulation, length); + +#else + std::size_t i = 0; + +#if YUP_USE_AVX_INTRINSICS && YUP_USE_FMA_INTRINSICS + __m256 vacc = _mm256_setzero_ps(); + for (; i + 8 <= length; i += 8) + { + __m256 va = _mm256_loadu_ps (a + i); + __m256 vb = _mm256_loadu_ps (b + i); + vacc = _mm256_fmadd_ps (va, vb, vacc); + } + __m128 low = _mm256_castps256_ps128 (vacc); + __m128 high = _mm256_extractf128_ps (vacc, 1); + __m128 vsum = _mm_add_ps (low, high); + vsum = _mm_hadd_ps (vsum, vsum); + vsum = _mm_hadd_ps (vsum, vsum); + accumulation += _mm_cvtss_f32 (vsum); + +#elif YUP_USE_SSE_INTRINSICS + __m128 vacc = _mm_setzero_ps(); +#if YUP_USE_FMA_INTRINSICS + for (; i + 4 <= length; i += 4) + { + __m128 va = _mm_loadu_ps (a + i); + __m128 vb = _mm_loadu_ps (b + i); + vacc = _mm_fmadd_ps (va, vb, vacc); + } +#else + for (; i + 4 <= length; i += 4) + { + __m128 va = _mm_loadu_ps (a + i); + __m128 vb = _mm_loadu_ps (b + i); + vacc = _mm_add_ps (vacc, _mm_mul_ps (va, vb)); + } +#endif + __m128 shuf = _mm_shuffle_ps (vacc, vacc, _MM_SHUFFLE (2, 3, 0, 1)); + __m128 sums = _mm_add_ps (vacc, shuf); + shuf = _mm_movehl_ps (shuf, sums); + sums = _mm_add_ss (sums, shuf); + accumulation += _mm_cvtss_f32 (sums); + +#elif YUP_USE_ARM_NEON + float32x4_t vacc = vdupq_n_f32 (0.0f); + for (; i + 4 <= length; i += 4) + { + float32x4_t va = vld1q_f32 (a + i); + float32x4_t vb = vld1q_f32 (b + i); + vacc = vmlaq_f32 (vacc, va, vb); + } +#if YUP_64BIT + accumulation += vaddvq_f32 (vacc); +#else + float32x2_t vlow = vget_low_f32 (vacc); + float32x2_t vhigh = vget_high_f32 (vacc); + float32x2_t vsum2 = vpadd_f32 (vlow, vhigh); + vsum2 = vpadd_f32 (vsum2, vsum2); + accumulation += vget_lane_f32 (vsum2, 0); +#endif + +#endif + + // Handle remaining samples + for (; i < length; ++i) + accumulation += a[i] * b[i]; +#endif + + return accumulation; +} + +} // namespace yup diff --git a/modules/yup_dsp/utilities/yup_DspMath.h b/modules/yup_dsp/utilities/yup_DspMath.h index 920522777..c058bbc2c 100644 --- a/modules/yup_dsp/utilities/yup_DspMath.h +++ b/modules/yup_dsp/utilities/yup_DspMath.h @@ -110,6 +110,24 @@ FloatType fastCos (FloatType x) noexcept //============================================================================== +/** Dot product fallback implementation */ +template +SampleType dotProduct (const CoeffType* __restrict a, const SampleType* __restrict b, std::size_t length) noexcept +{ + CoeffType acc = CoeffType (0); + + for (std::size_t i = 0; i < length; ++i) + acc += a[i] * static_cast (b[i]); + + return static_cast (acc); +} + +/** Fast specialization for dotProduct using SIMD */ +template <> +float dotProduct (const float* __restrict a, const float* __restrict b, std::size_t length) noexcept; + +//============================================================================== + /** Bilinear transform from s-plane to z-plane with frequency warping */ template void bilinearTransform (FloatType& a0, FloatType& a1, FloatType& a2, FloatType& b0, FloatType& b1, FloatType& b2, FloatType frequency, FloatType sampleRate) noexcept diff --git a/modules/yup_dsp/yup_dsp.cpp b/modules/yup_dsp/yup_dsp.cpp index 0c3488b70..85c09b0b8 100644 --- a/modules/yup_dsp/yup_dsp.cpp +++ b/modules/yup_dsp/yup_dsp.cpp @@ -75,8 +75,8 @@ #include "frequency/yup_FFTProcessor.cpp" #include "frequency/yup_SpectrumAnalyzerState.cpp" #include "designers/yup_FilterDesigner.cpp" -#include "filters/yup_DirectFIR.cpp" #include "convolution/yup_PartitionedConvolver.cpp" +#include "utilities/yup_DspMath.cpp" //============================================================================== diff --git a/tests/yup_dsp/yup_DirectFIR.cpp b/tests/yup_dsp/yup_DirectFIR.cpp index e46071a19..9d79d8066 100644 --- a/tests/yup_dsp/yup_DirectFIR.cpp +++ b/tests/yup_dsp/yup_DirectFIR.cpp @@ -118,7 +118,7 @@ class DirectFIRTest : public ::testing::Test TEST_F (DirectFIRTest, DefaultConstruction) { - DirectFIR fir; + DirectFIRFloat fir; // Default state should be safe EXPECT_EQ (fir.getNumCoefficients(), 0); @@ -137,12 +137,12 @@ TEST_F (DirectFIRTest, DefaultConstruction) TEST_F (DirectFIRTest, MoveSemantics) { - DirectFIR fir1; + DirectFIRFloat fir1; std::vector coefficients = { 1.0f, 0.5f, 0.25f }; fir1.setCoefficients (coefficients, 2.0f); // Move constructor - DirectFIR fir2 = std::move (fir1); + DirectFIRFloat fir2 = std::move (fir1); // Verify moved filter works EXPECT_EQ (fir2.getNumCoefficients(), 3); @@ -166,7 +166,7 @@ TEST_F (DirectFIRTest, MoveSemantics) EXPECT_GT (outputSum, 1.0f); // Should be > 1 due to scaling // Move assignment - DirectFIR fir3; + DirectFIRFloat fir3; fir3 = std::move (fir2); EXPECT_EQ (fir3.getNumCoefficients(), 3); @@ -180,7 +180,7 @@ TEST_F (DirectFIRTest, MoveSemantics) TEST_F (DirectFIRTest, SetCoefficientsVector) { - DirectFIR fir; + DirectFIRFloat fir; std::vector coefficients = { 0.1f, 0.5f, 1.0f, 0.5f, 0.1f }; fir.setCoefficients (coefficients, 1.0f); @@ -196,7 +196,7 @@ TEST_F (DirectFIRTest, SetCoefficientsVector) TEST_F (DirectFIRTest, SetCoefficientsPointer) { - DirectFIR fir; + DirectFIRFloat fir; float coefficients[] = { 0.2f, 0.4f, 0.6f, 0.8f }; fir.setCoefficients (coefficients, 4, 2.0f); @@ -208,7 +208,7 @@ TEST_F (DirectFIRTest, SetCoefficientsPointer) TEST_F (DirectFIRTest, SetCoefficientsNullptr) { - DirectFIR fir; + DirectFIRFloat fir; // First set some valid coefficients std::vector coefficients = { 1.0f, 0.5f }; @@ -223,7 +223,7 @@ TEST_F (DirectFIRTest, SetCoefficientsNullptr) TEST_F (DirectFIRTest, SetCoefficientsWithScaling) { - DirectFIR fir; + DirectFIRFloat fir; std::vector coefficients = { 1.0f, 1.0f, 1.0f }; fir.setCoefficients (coefficients, 0.5f); @@ -251,7 +251,7 @@ TEST_F (DirectFIRTest, SetCoefficientsWithScaling) TEST_F (DirectFIRTest, ImpulseResponse) { - DirectFIR fir; + DirectFIRFloat fir; std::vector coefficients = { 1.0f, 0.5f, 0.25f }; fir.setCoefficients (coefficients); @@ -274,7 +274,7 @@ TEST_F (DirectFIRTest, ImpulseResponse) TEST_F (DirectFIRTest, AccumulativeOutput) { - DirectFIR fir; + DirectFIRFloat fir; std::vector coefficients = { 0.5f, 0.5f }; fir.setCoefficients (coefficients); @@ -294,8 +294,8 @@ TEST_F (DirectFIRTest, AccumulativeOutput) TEST_F (DirectFIRTest, Linearity) { - DirectFIR fir; - std::vector coefficients = createLowpassCoefficients (32, 1000.0f, 44100.0f); + DirectFIRFloat fir; + auto coefficients = FilterDesigner::designFIRLowpass (32, 1000.0f, 44100.0f); fir.setCoefficients (coefficients); std::vector input (512); @@ -324,7 +324,7 @@ TEST_F (DirectFIRTest, Linearity) TEST_F (DirectFIRTest, Reset) { - DirectFIR fir; + DirectFIRFloat fir; std::vector coefficients = { 1.0f, 0.8f, 0.6f, 0.4f, 0.2f }; fir.setCoefficients (coefficients); @@ -351,10 +351,10 @@ TEST_F (DirectFIRTest, Reset) TEST_F (DirectFIRTest, LowpassFiltering) { - DirectFIR fir; + DirectFIRFloat fir; // Create lowpass filter coefficients - std::vector coefficients = createLowpassCoefficients (64, 1000.0f, 44100.0f); + auto coefficients = FilterDesigner::designFIRLowpass (64, 1000.0f, 44100.0); fir.setCoefficients (coefficients); const float sampleRate = 44100.0f; @@ -394,8 +394,8 @@ TEST_F (DirectFIRTest, LowpassFiltering) TEST_F (DirectFIRTest, BlockSizeIndependence) { - DirectFIR fir; - std::vector coefficients = createLowpassCoefficients (48, 2000.0f, 44100.0f); + DirectFIRFloat fir; + auto coefficients = FilterDesigner::designFIRLowpass (48, 2000.0f, 44100.0); fir.setCoefficients (coefficients); const size_t totalSamples = 1024; @@ -448,7 +448,7 @@ TEST_F (DirectFIRTest, BlockSizeIndependence) TEST_F (DirectFIRTest, ZeroSamples) { - DirectFIR fir; + DirectFIRFloat fir; std::vector coefficients = { 1.0f, 0.5f }; fir.setCoefficients (coefficients); @@ -465,7 +465,7 @@ TEST_F (DirectFIRTest, ZeroSamples) TEST_F (DirectFIRTest, NullPointers) { - DirectFIR fir; + DirectFIRFloat fir; std::vector coefficients = { 1.0f }; fir.setCoefficients (coefficients); @@ -483,7 +483,7 @@ TEST_F (DirectFIRTest, NullPointers) TEST_F (DirectFIRTest, LargeTapCounts) { - DirectFIR fir; + DirectFIRFloat fir; // Test with relatively large number of coefficients std::vector coefficients (512); @@ -508,7 +508,7 @@ TEST_F (DirectFIRTest, LargeTapCounts) TEST_F (DirectFIRTest, SingleTap) { - DirectFIR fir; + DirectFIRFloat fir; std::vector coefficients = { 0.75f }; fir.setCoefficients (coefficients); @@ -530,7 +530,7 @@ TEST_F (DirectFIRTest, SingleTap) TEST_F (DirectFIRTest, MemoryAlignment) { - DirectFIR fir; + DirectFIRFloat fir; // Coefficient count that's not a multiple of 4 std::vector coefficients (37); @@ -549,7 +549,7 @@ TEST_F (DirectFIRTest, MemoryAlignment) TEST_F (DirectFIRTest, StressTest) { - DirectFIR fir; + DirectFIRFloat fir; // Create complex impulse response std::vector coefficients (256); @@ -582,4 +582,4 @@ TEST_F (DirectFIRTest, StressTest) } } -} // namespace yup::test \ No newline at end of file +} // namespace yup::test diff --git a/tests/yup_dsp/yup_FilterDesigner.cpp b/tests/yup_dsp/yup_FilterDesigner.cpp index ca43acc07..63b17071a 100644 --- a/tests/yup_dsp/yup_FilterDesigner.cpp +++ b/tests/yup_dsp/yup_FilterDesigner.cpp @@ -23,6 +23,8 @@ #include +#include + using namespace yup; //============================================================================== @@ -358,3 +360,308 @@ TEST_F (FilterDesignerTests, FloatPrecisionConsistency) EXPECT_NEAR (doubleCoeffs.a1, static_cast (floatCoeffs.a1), toleranceF); EXPECT_NEAR (doubleCoeffs.a2, static_cast (floatCoeffs.a2), toleranceF); } + +//============================================================================== +// FIR Filter Design Tests +//============================================================================== + +TEST_F (FilterDesignerTests, FirLowpassBasicProperties) +{ + const int numCoeffs = 65; // Odd number for symmetric filter + auto coeffs = FilterDesigner::designFIRLowpass (numCoeffs, 1000.0f, sampleRate); + + // Should return the correct number of coefficients + EXPECT_EQ (coeffs.size(), numCoeffs); + + // All coefficients should be finite + for (const auto& coeff : coeffs) + EXPECT_TRUE (std::isfinite (coeff)); + + // FIR filter should be symmetric for linear phase + const int center = (numCoeffs - 1) / 2; + for (int i = 0; i < center; ++i) + EXPECT_NEAR (coeffs[i], coeffs[numCoeffs - 1 - i], toleranceF); + + // Center coefficient should be largest for lowpass + for (int i = 0; i < numCoeffs; ++i) + { + if (i != center) + EXPECT_GE (coeffs[center], coeffs[i]); + } +} + +TEST_F (FilterDesignerTests, FirHighpassBasicProperties) +{ + const int numCoeffs = 65; + auto coeffs = FilterDesigner::designFIRHighpass (numCoeffs, 1000.0f, sampleRate); + + // Should return the correct number of coefficients + EXPECT_EQ (coeffs.size(), numCoeffs); + + // All coefficients should be finite + for (const auto& coeff : coeffs) + EXPECT_TRUE (std::isfinite (coeff)); + + // FIR filter should be symmetric for linear phase + const int center = (numCoeffs - 1) / 2; + for (int i = 0; i < center; ++i) + EXPECT_NEAR (coeffs[i], coeffs[numCoeffs - 1 - i], toleranceF); + + // Sum of coefficients should be approximately zero for highpass (DC gain = 0) + // Note: windowing can cause small deviations from ideal DC gain + float sum = 0.0f; + for (const auto& coeff : coeffs) + sum += coeff; + + EXPECT_NEAR (sum, 0.0f, 0.05f); // Relaxed tolerance for windowed FIR +} + +TEST_F (FilterDesignerTests, FirBandpassBasicProperties) +{ + const int numCoeffs = 65; + auto coeffs = FilterDesigner::designFIRBandpass (numCoeffs, 800.0f, 1200.0f, sampleRate); + + // Should return the correct number of coefficients + EXPECT_EQ (coeffs.size(), numCoeffs); + + // All coefficients should be finite + for (const auto& coeff : coeffs) + EXPECT_TRUE (std::isfinite (coeff)); + + // FIR filter should be symmetric for linear phase + const int center = (numCoeffs - 1) / 2; + for (int i = 0; i < center; ++i) + EXPECT_NEAR (coeffs[i], coeffs[numCoeffs - 1 - i], toleranceF); + + // Sum of coefficients should be approximately zero for bandpass (DC gain = 0) + // Note: windowing can cause small deviations from ideal DC gain + float sum = 0.0f; + for (const auto& coeff : coeffs) + sum += coeff; + + EXPECT_NEAR (sum, 0.0f, 0.15f); // Relaxed tolerance for windowed FIR +} + +TEST_F (FilterDesignerTests, FirBandstopBasicProperties) +{ + const int numCoeffs = 65; + auto coeffs = FilterDesigner::designFIRBandstop (numCoeffs, 800.0f, 1200.0f, sampleRate); + + // Should return the correct number of coefficients + EXPECT_EQ (coeffs.size(), numCoeffs); + + // All coefficients should be finite + for (const auto& coeff : coeffs) + EXPECT_TRUE (std::isfinite (coeff)); + + // FIR filter should be symmetric for linear phase + const int center = (numCoeffs - 1) / 2; + for (int i = 0; i < center; ++i) + EXPECT_NEAR (coeffs[i], coeffs[numCoeffs - 1 - i], toleranceF); + + // Sum of coefficients should be approximately 1.0 for bandstop (DC gain = 1) + // Note: windowing can cause small deviations from ideal DC gain + float sum = 0.0f; + for (const auto& coeff : coeffs) + sum += coeff; + + EXPECT_NEAR (sum, 1.0f, 0.15f); // Relaxed tolerance for windowed FIR +} + +TEST_F (FilterDesignerTests, FirDifferentWindowTypes) +{ + const int numCoeffs = 33; + + // Test different window types + auto hannCoeffs = FilterDesigner::designFIRLowpass (numCoeffs, 1000.0f, sampleRate, WindowType::hann); + auto hammingCoeffs = FilterDesigner::designFIRLowpass (numCoeffs, 1000.0f, sampleRate, WindowType::hamming); + auto blackmanCoeffs = FilterDesigner::designFIRLowpass (numCoeffs, 1000.0f, sampleRate, WindowType::blackman); + + // All should have same size + EXPECT_EQ (hannCoeffs.size(), numCoeffs); + EXPECT_EQ (hammingCoeffs.size(), numCoeffs); + EXPECT_EQ (blackmanCoeffs.size(), numCoeffs); + + // All coefficients should be finite + for (int i = 0; i < numCoeffs; ++i) + { + EXPECT_TRUE (std::isfinite (hannCoeffs[i])); + EXPECT_TRUE (std::isfinite (hammingCoeffs[i])); + EXPECT_TRUE (std::isfinite (blackmanCoeffs[i])); + } + + // Different windows should produce different coefficients + bool coeffsDifferent = false; + for (int i = 0; i < numCoeffs; ++i) + { + if (std::abs (hannCoeffs[i] - blackmanCoeffs[i]) > toleranceF) + { + coeffsDifferent = true; + break; + } + } + EXPECT_TRUE (coeffsDifferent); +} + +TEST_F (FilterDesignerTests, FirFloatDoubleConsistency) +{ + const int numCoeffs = 33; + + auto doubleCoeffs = FilterDesigner::designFIRLowpass (numCoeffs, 1000.0, sampleRate); + auto floatCoeffs = FilterDesigner::designFIRLowpass (numCoeffs, 1000.0f, sampleRate); + + EXPECT_EQ (doubleCoeffs.size(), floatCoeffs.size()); + + // Coefficients should be very similar between float and double precision + for (int i = 0; i < numCoeffs; ++i) + EXPECT_NEAR (doubleCoeffs[i], static_cast (floatCoeffs[i]), toleranceF); +} + +TEST_F (FilterDesignerTests, ExportFIRCoefficientsForAnalysis) +{ + const int numCoeffs = 65; + const float sampleRateF = 44100.0f; + + // Design different FIR filters + auto lowpass = FilterDesigner::designFIRLowpass (numCoeffs, 1000.0f, sampleRateF); + auto highpass = FilterDesigner::designFIRHighpass (numCoeffs, 1000.0f, sampleRateF); + auto bandpass = FilterDesigner::designFIRBandpass (numCoeffs, 800.0f, 1200.0f, sampleRateF); + auto bandstop = FilterDesigner::designFIRBandstop (numCoeffs, 800.0f, 1200.0f, sampleRateF); + + // Different windows for lowpass + auto lowpassHann = FilterDesigner::designFIRLowpass (numCoeffs, 1000.0f, sampleRateF, WindowType::hann); + auto lowpassHamming = FilterDesigner::designFIRLowpass (numCoeffs, 1000.0f, sampleRateF, WindowType::hamming); + auto lowpassBlackman = FilterDesigner::designFIRLowpass (numCoeffs, 1000.0f, sampleRateF, WindowType::blackman); + + // Helper lambda to write coefficients to file + auto writeCoeffs = [] (const std::vector& coeffs, const std::string& filename) + { + std::ofstream file (filename); + if (file.is_open()) + { + for (size_t i = 0; i < coeffs.size(); ++i) + { + file << coeffs[i]; + if (i < coeffs.size() - 1) + file << "\n"; + } + file.close(); + } + }; + + // Write all coefficient sets to files + writeCoeffs (lowpass, "fir_lowpass_1000hz.txt"); + writeCoeffs (highpass, "fir_highpass_1000hz.txt"); + writeCoeffs (bandpass, "fir_bandpass_800_1200hz.txt"); + writeCoeffs (bandstop, "fir_bandstop_800_1200hz.txt"); + writeCoeffs (lowpassHann, "fir_lowpass_hann_1000hz.txt"); + writeCoeffs (lowpassHamming, "fir_lowpass_hamming_1000hz.txt"); + writeCoeffs (lowpassBlackman, "fir_lowpass_blackman_1000hz.txt"); + + // Create a Python script to plot the frequency responses + std::ofstream pyScript ("plot_fir_responses.py"); + if (pyScript.is_open()) + { + pyScript << R"(#!/usr/bin/env python3 +import numpy as np +import matplotlib.pyplot as plt +from scipy import signal + +def load_coeffs(filename): + with open(filename, 'r') as f: + return [float(line.strip()) for line in f.readlines()] + +def plot_frequency_response(coeffs, title, sample_rate=44100): + w, h = signal.freqz(coeffs, worN=8000, fs=sample_rate) + + plt.figure(figsize=(12, 8)) + + # Magnitude response + plt.subplot(2, 1, 1) + plt.plot(w, 20 * np.log10(np.abs(h))) + plt.title(f'{title} - Magnitude Response') + plt.xlabel('Frequency (Hz)') + plt.ylabel('Magnitude (dB)') + plt.grid(True) + plt.xlim(0, sample_rate/2) + plt.ylim(-80, 5) + + # Phase response + plt.subplot(2, 1, 2) + plt.plot(w, np.unwrap(np.angle(h)) * 180 / np.pi) + plt.title(f'{title} - Phase Response') + plt.xlabel('Frequency (Hz)') + plt.ylabel('Phase (degrees)') + plt.grid(True) + plt.xlim(0, sample_rate/2) + + plt.tight_layout() + plt.savefig(f'{title.lower().replace(" ", "_").replace("-", "_")}_response.png', dpi=150, bbox_inches='tight') + plt.show() + +# Load and plot all FIR filter responses +filters = [ + ('fir_lowpass_1000hz.txt', 'FIR Lowpass 1000Hz'), + ('fir_highpass_1000hz.txt', 'FIR Highpass 1000Hz'), + ('fir_bandpass_800_1200hz.txt', 'FIR Bandpass 800-1200Hz'), + ('fir_bandstop_800_1200hz.txt', 'FIR Bandstop 800-1200Hz'), + ('fir_lowpass_hann_1000hz.txt', 'FIR Lowpass Hann Window'), + ('fir_lowpass_hamming_1000hz.txt', 'FIR Lowpass Hamming Window'), + ('fir_lowpass_blackman_1000hz.txt', 'FIR Lowpass Blackman Window') +] + +for filename, title in filters: + try: + coeffs = load_coeffs(filename) + plot_frequency_response(coeffs, title) + except FileNotFoundError: + print(f"File {filename} not found!") + +# Compare window types on same plot +plt.figure(figsize=(12, 6)) +window_files = [ + ('fir_lowpass_hann_1000hz.txt', 'Hann', 'blue'), + ('fir_lowpass_hamming_1000hz.txt', 'Hamming', 'red'), + ('fir_lowpass_blackman_1000hz.txt', 'Blackman', 'green') +] + +for filename, label, color in window_files: + try: + coeffs = load_coeffs(filename) + w, h = signal.freqz(coeffs, worN=8000, fs=44100) + plt.plot(w, 20 * np.log10(np.abs(h)), label=label, color=color) + except FileNotFoundError: + print(f"File {filename} not found!") + +plt.title('FIR Lowpass 1000Hz - Window Comparison') +plt.xlabel('Frequency (Hz)') +plt.ylabel('Magnitude (dB)') +plt.grid(True) +plt.legend() +plt.xlim(0, 22050) +plt.ylim(-80, 5) +plt.savefig('fir_window_comparison.png', dpi=150, bbox_inches='tight') +plt.show() + +print("All plots generated successfully!") +)"; + pyScript.close(); + } + + // Just verify the files were created - the actual validation will be done visually with Python + EXPECT_EQ (lowpass.size(), numCoeffs); + EXPECT_EQ (highpass.size(), numCoeffs); + EXPECT_EQ (bandpass.size(), numCoeffs); + EXPECT_EQ (bandstop.size(), numCoeffs); + + std::cout << "\nFIR coefficient files and Python plotting script created:\n"; + std::cout << "- fir_lowpass_1000hz.txt\n"; + std::cout << "- fir_highpass_1000hz.txt\n"; + std::cout << "- fir_bandpass_800_1200hz.txt\n"; + std::cout << "- fir_bandstop_800_1200hz.txt\n"; + std::cout << "- fir_lowpass_hann_1000hz.txt\n"; + std::cout << "- fir_lowpass_hamming_1000hz.txt\n"; + std::cout << "- fir_lowpass_blackman_1000hz.txt\n"; + std::cout << "- plot_fir_responses.py\n\n"; + std::cout << "Run: python3 plot_fir_responses.py (requires numpy, matplotlib, scipy)\n"; +} From 66b0e1b0a695a87e65e96a357789cfca92d54258 Mon Sep 17 00:00:00 2001 From: kunitoki Date: Fri, 12 Sep 2025 08:24:05 +0200 Subject: [PATCH 34/37] Remove validations --- tests/yup_dsp/yup_FilterDesigner.cpp | 149 --------------------------- 1 file changed, 149 deletions(-) diff --git a/tests/yup_dsp/yup_FilterDesigner.cpp b/tests/yup_dsp/yup_FilterDesigner.cpp index 63b17071a..5f031090b 100644 --- a/tests/yup_dsp/yup_FilterDesigner.cpp +++ b/tests/yup_dsp/yup_FilterDesigner.cpp @@ -516,152 +516,3 @@ TEST_F (FilterDesignerTests, FirFloatDoubleConsistency) for (int i = 0; i < numCoeffs; ++i) EXPECT_NEAR (doubleCoeffs[i], static_cast (floatCoeffs[i]), toleranceF); } - -TEST_F (FilterDesignerTests, ExportFIRCoefficientsForAnalysis) -{ - const int numCoeffs = 65; - const float sampleRateF = 44100.0f; - - // Design different FIR filters - auto lowpass = FilterDesigner::designFIRLowpass (numCoeffs, 1000.0f, sampleRateF); - auto highpass = FilterDesigner::designFIRHighpass (numCoeffs, 1000.0f, sampleRateF); - auto bandpass = FilterDesigner::designFIRBandpass (numCoeffs, 800.0f, 1200.0f, sampleRateF); - auto bandstop = FilterDesigner::designFIRBandstop (numCoeffs, 800.0f, 1200.0f, sampleRateF); - - // Different windows for lowpass - auto lowpassHann = FilterDesigner::designFIRLowpass (numCoeffs, 1000.0f, sampleRateF, WindowType::hann); - auto lowpassHamming = FilterDesigner::designFIRLowpass (numCoeffs, 1000.0f, sampleRateF, WindowType::hamming); - auto lowpassBlackman = FilterDesigner::designFIRLowpass (numCoeffs, 1000.0f, sampleRateF, WindowType::blackman); - - // Helper lambda to write coefficients to file - auto writeCoeffs = [] (const std::vector& coeffs, const std::string& filename) - { - std::ofstream file (filename); - if (file.is_open()) - { - for (size_t i = 0; i < coeffs.size(); ++i) - { - file << coeffs[i]; - if (i < coeffs.size() - 1) - file << "\n"; - } - file.close(); - } - }; - - // Write all coefficient sets to files - writeCoeffs (lowpass, "fir_lowpass_1000hz.txt"); - writeCoeffs (highpass, "fir_highpass_1000hz.txt"); - writeCoeffs (bandpass, "fir_bandpass_800_1200hz.txt"); - writeCoeffs (bandstop, "fir_bandstop_800_1200hz.txt"); - writeCoeffs (lowpassHann, "fir_lowpass_hann_1000hz.txt"); - writeCoeffs (lowpassHamming, "fir_lowpass_hamming_1000hz.txt"); - writeCoeffs (lowpassBlackman, "fir_lowpass_blackman_1000hz.txt"); - - // Create a Python script to plot the frequency responses - std::ofstream pyScript ("plot_fir_responses.py"); - if (pyScript.is_open()) - { - pyScript << R"(#!/usr/bin/env python3 -import numpy as np -import matplotlib.pyplot as plt -from scipy import signal - -def load_coeffs(filename): - with open(filename, 'r') as f: - return [float(line.strip()) for line in f.readlines()] - -def plot_frequency_response(coeffs, title, sample_rate=44100): - w, h = signal.freqz(coeffs, worN=8000, fs=sample_rate) - - plt.figure(figsize=(12, 8)) - - # Magnitude response - plt.subplot(2, 1, 1) - plt.plot(w, 20 * np.log10(np.abs(h))) - plt.title(f'{title} - Magnitude Response') - plt.xlabel('Frequency (Hz)') - plt.ylabel('Magnitude (dB)') - plt.grid(True) - plt.xlim(0, sample_rate/2) - plt.ylim(-80, 5) - - # Phase response - plt.subplot(2, 1, 2) - plt.plot(w, np.unwrap(np.angle(h)) * 180 / np.pi) - plt.title(f'{title} - Phase Response') - plt.xlabel('Frequency (Hz)') - plt.ylabel('Phase (degrees)') - plt.grid(True) - plt.xlim(0, sample_rate/2) - - plt.tight_layout() - plt.savefig(f'{title.lower().replace(" ", "_").replace("-", "_")}_response.png', dpi=150, bbox_inches='tight') - plt.show() - -# Load and plot all FIR filter responses -filters = [ - ('fir_lowpass_1000hz.txt', 'FIR Lowpass 1000Hz'), - ('fir_highpass_1000hz.txt', 'FIR Highpass 1000Hz'), - ('fir_bandpass_800_1200hz.txt', 'FIR Bandpass 800-1200Hz'), - ('fir_bandstop_800_1200hz.txt', 'FIR Bandstop 800-1200Hz'), - ('fir_lowpass_hann_1000hz.txt', 'FIR Lowpass Hann Window'), - ('fir_lowpass_hamming_1000hz.txt', 'FIR Lowpass Hamming Window'), - ('fir_lowpass_blackman_1000hz.txt', 'FIR Lowpass Blackman Window') -] - -for filename, title in filters: - try: - coeffs = load_coeffs(filename) - plot_frequency_response(coeffs, title) - except FileNotFoundError: - print(f"File {filename} not found!") - -# Compare window types on same plot -plt.figure(figsize=(12, 6)) -window_files = [ - ('fir_lowpass_hann_1000hz.txt', 'Hann', 'blue'), - ('fir_lowpass_hamming_1000hz.txt', 'Hamming', 'red'), - ('fir_lowpass_blackman_1000hz.txt', 'Blackman', 'green') -] - -for filename, label, color in window_files: - try: - coeffs = load_coeffs(filename) - w, h = signal.freqz(coeffs, worN=8000, fs=44100) - plt.plot(w, 20 * np.log10(np.abs(h)), label=label, color=color) - except FileNotFoundError: - print(f"File {filename} not found!") - -plt.title('FIR Lowpass 1000Hz - Window Comparison') -plt.xlabel('Frequency (Hz)') -plt.ylabel('Magnitude (dB)') -plt.grid(True) -plt.legend() -plt.xlim(0, 22050) -plt.ylim(-80, 5) -plt.savefig('fir_window_comparison.png', dpi=150, bbox_inches='tight') -plt.show() - -print("All plots generated successfully!") -)"; - pyScript.close(); - } - - // Just verify the files were created - the actual validation will be done visually with Python - EXPECT_EQ (lowpass.size(), numCoeffs); - EXPECT_EQ (highpass.size(), numCoeffs); - EXPECT_EQ (bandpass.size(), numCoeffs); - EXPECT_EQ (bandstop.size(), numCoeffs); - - std::cout << "\nFIR coefficient files and Python plotting script created:\n"; - std::cout << "- fir_lowpass_1000hz.txt\n"; - std::cout << "- fir_highpass_1000hz.txt\n"; - std::cout << "- fir_bandpass_800_1200hz.txt\n"; - std::cout << "- fir_bandstop_800_1200hz.txt\n"; - std::cout << "- fir_lowpass_hann_1000hz.txt\n"; - std::cout << "- fir_lowpass_hamming_1000hz.txt\n"; - std::cout << "- fir_lowpass_blackman_1000hz.txt\n"; - std::cout << "- plot_fir_responses.py\n\n"; - std::cout << "Run: python3 plot_fir_responses.py (requires numpy, matplotlib, scipy)\n"; -} From 0dfd4ea3f10ce7e1d24caae82b2ff37cb1114157 Mon Sep 17 00:00:00 2001 From: kunitoki Date: Fri, 12 Sep 2025 08:28:15 +0200 Subject: [PATCH 35/37] Export FIR coefficients for analysis --- tests/yup_dsp/yup_FilterDesigner.cpp | 149 +++++++++++++++++++++++++++ 1 file changed, 149 insertions(+) diff --git a/tests/yup_dsp/yup_FilterDesigner.cpp b/tests/yup_dsp/yup_FilterDesigner.cpp index 5f031090b..120c4ed08 100644 --- a/tests/yup_dsp/yup_FilterDesigner.cpp +++ b/tests/yup_dsp/yup_FilterDesigner.cpp @@ -516,3 +516,152 @@ TEST_F (FilterDesignerTests, FirFloatDoubleConsistency) for (int i = 0; i < numCoeffs; ++i) EXPECT_NEAR (doubleCoeffs[i], static_cast (floatCoeffs[i]), toleranceF); } + +TEST_F (FilterDesignerTests, DISABLED_ExportFIRCoefficientsForAnalysis) +{ + const int numCoeffs = 129; + const float sampleRateF = 44100.0f; + + // Design different FIR filters + auto lowpass = FilterDesigner::designFIRLowpass (numCoeffs, 10000.0f, sampleRateF); + auto highpass = FilterDesigner::designFIRHighpass (numCoeffs, 10000.0f, sampleRateF); + auto bandpass = FilterDesigner::designFIRBandpass (numCoeffs, 8000.0f, 12000.0f, sampleRateF); + auto bandstop = FilterDesigner::designFIRBandstop (numCoeffs, 8000.0f, 12000.0f, sampleRateF); + + // Different windows for lowpass + auto lowpassHann = FilterDesigner::designFIRLowpass (numCoeffs, 10000.0f, sampleRateF, WindowType::hann); + auto lowpassHamming = FilterDesigner::designFIRLowpass (numCoeffs, 10000.0f, sampleRateF, WindowType::hamming); + auto lowpassBlackman = FilterDesigner::designFIRLowpass (numCoeffs, 10000.0f, sampleRateF, WindowType::blackman); + + // Helper lambda to write coefficients to file + auto writeCoeffs = [] (const std::vector& coeffs, const std::string& filename) + { + std::ofstream file (filename); + if (file.is_open()) + { + for (size_t i = 0; i < coeffs.size(); ++i) + { + file << coeffs[i]; + if (i < coeffs.size() - 1) + file << "\n"; + } + file.close(); + } + }; + + // Write all coefficient sets to files + writeCoeffs (lowpass, "fir_lowpass_10000hz.txt"); + writeCoeffs (highpass, "fir_highpass_10000hz.txt"); + writeCoeffs (bandpass, "fir_bandpass_8000_12000hz.txt"); + writeCoeffs (bandstop, "fir_bandstop_8000_12000hz.txt"); + writeCoeffs (lowpassHann, "fir_lowpass_hann_10000hz.txt"); + writeCoeffs (lowpassHamming, "fir_lowpass_hamming_10000hz.txt"); + writeCoeffs (lowpassBlackman, "fir_lowpass_blackman_10000hz.txt"); + + // Create a Python script to plot the frequency responses + std::ofstream pyScript ("plot_fir_responses.py"); + if (pyScript.is_open()) + { + pyScript << R"(#!/usr/bin/env python3 +import numpy as np +import matplotlib.pyplot as plt +from scipy import signal + +def load_coeffs(filename): + with open(filename, 'r') as f: + return [float(line.strip()) for line in f.readlines()] + +def plot_frequency_response(coeffs, title, sample_rate=44100): + w, h = signal.freqz(coeffs, worN=8000, fs=sample_rate) + + plt.figure(figsize=(12, 8)) + + # Magnitude response + plt.subplot(2, 1, 1) + plt.plot(w, 20 * np.log10(np.abs(h))) + plt.title(f'{title} - Magnitude Response') + plt.xlabel('Frequency (Hz)') + plt.ylabel('Magnitude (dB)') + plt.grid(True) + plt.xlim(0, sample_rate/2) + plt.ylim(-80, 5) + + # Phase response + plt.subplot(2, 1, 2) + plt.plot(w, np.unwrap(np.angle(h)) * 180 / np.pi) + plt.title(f'{title} - Phase Response') + plt.xlabel('Frequency (Hz)') + plt.ylabel('Phase (degrees)') + plt.grid(True) + plt.xlim(0, sample_rate/2) + + plt.tight_layout() + plt.savefig(f'{title.lower().replace(" ", "_").replace("-", "_")}_response.png', dpi=150, bbox_inches='tight') + plt.show() + +# Load and plot all FIR filter responses +filters = [ + ('fir_lowpass_10000hz.txt', 'FIR Lowpass 10000Hz'), + ('fir_highpass_10000hz.txt', 'FIR Highpass 10000Hz'), + ('fir_bandpass_8000_12000hz.txt', 'FIR Bandpass 8000-12000Hz'), + ('fir_bandstop_8000_12000hz.txt', 'FIR Bandstop 8000-12000Hz'), + ('fir_lowpass_hann_10000hz.txt', 'FIR Lowpass Hann Window'), + ('fir_lowpass_hamming_10000hz.txt', 'FIR Lowpass Hamming Window'), + ('fir_lowpass_blackman_10000hz.txt', 'FIR Lowpass Blackman Window') +] + +for filename, title in filters: + try: + coeffs = load_coeffs(filename) + plot_frequency_response(coeffs, title) + except FileNotFoundError: + print(f"File {filename} not found!") + +# Compare window types on same plot +plt.figure(figsize=(12, 6)) +window_files = [ + ('fir_lowpass_hann_1000hz.txt', 'Hann', 'blue'), + ('fir_lowpass_hamming_1000hz.txt', 'Hamming', 'red'), + ('fir_lowpass_blackman_1000hz.txt', 'Blackman', 'green') +] + +for filename, label, color in window_files: + try: + coeffs = load_coeffs(filename) + w, h = signal.freqz(coeffs, worN=8000, fs=44100) + plt.plot(w, 20 * np.log10(np.abs(h)), label=label, color=color) + except FileNotFoundError: + print(f"File {filename} not found!") + +plt.title('FIR Lowpass 1000Hz - Window Comparison') +plt.xlabel('Frequency (Hz)') +plt.ylabel('Magnitude (dB)') +plt.grid(True) +plt.legend() +plt.xlim(0, 22050) +plt.ylim(-80, 5) +plt.savefig('fir_window_comparison.png', dpi=150, bbox_inches='tight') +plt.show() + +print("All plots generated successfully!") +)"; + pyScript.close(); + } + + // Just verify the files were created - the actual validation will be done visually with Python + EXPECT_EQ (lowpass.size(), numCoeffs); + EXPECT_EQ (highpass.size(), numCoeffs); + EXPECT_EQ (bandpass.size(), numCoeffs); + EXPECT_EQ (bandstop.size(), numCoeffs); + + std::cout << "\nFIR coefficient files and Python plotting script created:\n"; + std::cout << "- fir_lowpass_10000hz.txt\n"; + std::cout << "- fir_highpass_10000hz.txt\n"; + std::cout << "- fir_bandpass_8000_12000hz.txt\n"; + std::cout << "- fir_bandstop_8000_12000hz.txt\n"; + std::cout << "- fir_lowpass_hann_10000hz.txt\n"; + std::cout << "- fir_lowpass_hamming_10000hz.txt\n"; + std::cout << "- fir_lowpass_blackman_10000hz.txt\n"; + std::cout << "- plot_fir_responses.py\n\n"; + std::cout << "Run: python3 plot_fir_responses.py (requires numpy, matplotlib, scipy)\n"; +} From cf8851f813eb125bad4304ee0b8f029ae6482b76 Mon Sep 17 00:00:00 2001 From: kunitoki Date: Fri, 12 Sep 2025 09:50:12 +0200 Subject: [PATCH 36/37] Fix issues in FIR filter --- modules/yup_dsp/filters/yup_DirectFIR.h | 2 +- tests/yup_dsp/yup_DirectFIR.cpp | 82 ++++++++++++------------- 2 files changed, 42 insertions(+), 42 deletions(-) diff --git a/modules/yup_dsp/filters/yup_DirectFIR.h b/modules/yup_dsp/filters/yup_DirectFIR.h index cdedac031..5a2dea78b 100644 --- a/modules/yup_dsp/filters/yup_DirectFIR.h +++ b/modules/yup_dsp/filters/yup_DirectFIR.h @@ -111,7 +111,7 @@ class DirectFIR : public FilterBase void setCoefficients (std::vector coefficients, CoeffType scaling = CoeffType (1)) { currentScaling = scaling; - if (! approximatelyEqual (currentScaling, 1.0f)) + if (! approximatelyEqual (currentScaling, CoeffType (1))) FloatVectorOperations::multiply (coefficients.data(), scaling, coefficients.size()); coefficientsReversed = std::move (coefficients); diff --git a/tests/yup_dsp/yup_DirectFIR.cpp b/tests/yup_dsp/yup_DirectFIR.cpp index 9d79d8066..a58a5e3cd 100644 --- a/tests/yup_dsp/yup_DirectFIR.cpp +++ b/tests/yup_dsp/yup_DirectFIR.cpp @@ -118,7 +118,7 @@ class DirectFIRTest : public ::testing::Test TEST_F (DirectFIRTest, DefaultConstruction) { - DirectFIRFloat fir; + DirectFIR fir; // Default state should be safe EXPECT_EQ (fir.getNumCoefficients(), 0); @@ -128,7 +128,7 @@ TEST_F (DirectFIRTest, DefaultConstruction) // Should handle empty processing gracefully std::vector input (256, 0.0f); std::vector output (256, 0.0f); - EXPECT_NO_THROW (fir.process (input.data(), output.data(), input.size())); + EXPECT_NO_THROW (fir.processBlock (input.data(), output.data(), static_cast (input.size()))); // Output should remain zero without coefficients for (float sample : output) @@ -137,12 +137,12 @@ TEST_F (DirectFIRTest, DefaultConstruction) TEST_F (DirectFIRTest, MoveSemantics) { - DirectFIRFloat fir1; + DirectFIR fir1; std::vector coefficients = { 1.0f, 0.5f, 0.25f }; fir1.setCoefficients (coefficients, 2.0f); // Move constructor - DirectFIRFloat fir2 = std::move (fir1); + DirectFIR fir2 = std::move (fir1); // Verify moved filter works EXPECT_EQ (fir2.getNumCoefficients(), 3); @@ -157,7 +157,7 @@ TEST_F (DirectFIRTest, MoveSemantics) input[0] = 1.0f; std::vector output (10, 0.0f); - EXPECT_NO_THROW (fir2.process (input.data(), output.data(), input.size())); + EXPECT_NO_THROW (fir2.processBlock (input.data(), output.data(), static_cast (static_cast (input.size())))); // Should produce scaled output float outputSum = 0.0f; @@ -166,7 +166,7 @@ TEST_F (DirectFIRTest, MoveSemantics) EXPECT_GT (outputSum, 1.0f); // Should be > 1 due to scaling // Move assignment - DirectFIRFloat fir3; + DirectFIR fir3; fir3 = std::move (fir2); EXPECT_EQ (fir3.getNumCoefficients(), 3); @@ -180,7 +180,7 @@ TEST_F (DirectFIRTest, MoveSemantics) TEST_F (DirectFIRTest, SetCoefficientsVector) { - DirectFIRFloat fir; + DirectFIR fir; std::vector coefficients = { 0.1f, 0.5f, 1.0f, 0.5f, 0.1f }; fir.setCoefficients (coefficients, 1.0f); @@ -196,7 +196,7 @@ TEST_F (DirectFIRTest, SetCoefficientsVector) TEST_F (DirectFIRTest, SetCoefficientsPointer) { - DirectFIRFloat fir; + DirectFIR fir; float coefficients[] = { 0.2f, 0.4f, 0.6f, 0.8f }; fir.setCoefficients (coefficients, 4, 2.0f); @@ -208,7 +208,7 @@ TEST_F (DirectFIRTest, SetCoefficientsPointer) TEST_F (DirectFIRTest, SetCoefficientsNullptr) { - DirectFIRFloat fir; + DirectFIR fir; // First set some valid coefficients std::vector coefficients = { 1.0f, 0.5f }; @@ -223,7 +223,7 @@ TEST_F (DirectFIRTest, SetCoefficientsNullptr) TEST_F (DirectFIRTest, SetCoefficientsWithScaling) { - DirectFIRFloat fir; + DirectFIR fir; std::vector coefficients = { 1.0f, 1.0f, 1.0f }; fir.setCoefficients (coefficients, 0.5f); @@ -233,7 +233,7 @@ TEST_F (DirectFIRTest, SetCoefficientsWithScaling) input[0] = 2.0f; // Unit impulse scaled by 2 std::vector output (10, 0.0f); - fir.process (input.data(), output.data(), input.size()); + fir.processBlock (input.data(), output.data(), static_cast (input.size())); // Output should reflect the coefficient scaling // Each coefficient was originally 1.0, scaled by 0.5, so output per coefficient = 2.0 * 0.5 = 1.0 @@ -251,7 +251,7 @@ TEST_F (DirectFIRTest, SetCoefficientsWithScaling) TEST_F (DirectFIRTest, ImpulseResponse) { - DirectFIRFloat fir; + DirectFIR fir; std::vector coefficients = { 1.0f, 0.5f, 0.25f }; fir.setCoefficients (coefficients); @@ -260,7 +260,7 @@ TEST_F (DirectFIRTest, ImpulseResponse) input[0] = 1.0f; std::vector output (10, 0.0f); - fir.process (input.data(), output.data(), input.size()); + fir.processBlock (input.data(), output.data(), static_cast (input.size())); // Should get the impulse response (coefficients in original order) EXPECT_NEAR (output[0], 1.0f, 0.001f); // First coefficient h0 @@ -274,7 +274,7 @@ TEST_F (DirectFIRTest, ImpulseResponse) TEST_F (DirectFIRTest, AccumulativeOutput) { - DirectFIRFloat fir; + DirectFIR fir; std::vector coefficients = { 0.5f, 0.5f }; fir.setCoefficients (coefficients); @@ -285,7 +285,7 @@ TEST_F (DirectFIRTest, AccumulativeOutput) std::fill (output.begin(), output.end(), 1.0f); std::vector originalOutput = output; - fir.process (input.data(), output.data(), input.size()); + fir.processBlock (input.data(), output.data(), static_cast (input.size())); // Output should contain original data plus filter result for (size_t i = 0; i < output.size(); ++i) @@ -294,7 +294,7 @@ TEST_F (DirectFIRTest, AccumulativeOutput) TEST_F (DirectFIRTest, Linearity) { - DirectFIRFloat fir; + DirectFIR fir; auto coefficients = FilterDesigner::designFIRLowpass (32, 1000.0f, 44100.0f); fir.setCoefficients (coefficients); @@ -309,10 +309,10 @@ TEST_F (DirectFIRTest, Linearity) std::vector output2 (512, 0.0f); fir.reset(); - fir.process (input.data(), output1.data(), input.size()); + fir.processBlock (input.data(), output1.data(), static_cast (input.size())); fir.reset(); - fir.process (input2.data(), output2.data(), input2.size()); + fir.processBlock (input2.data(), output2.data(), input2.size()); // output2 should be approximately 2x output1 for (size_t i = 0; i < output1.size(); ++i) @@ -324,7 +324,7 @@ TEST_F (DirectFIRTest, Linearity) TEST_F (DirectFIRTest, Reset) { - DirectFIRFloat fir; + DirectFIR fir; std::vector coefficients = { 1.0f, 0.8f, 0.6f, 0.4f, 0.2f }; fir.setCoefficients (coefficients); @@ -333,12 +333,12 @@ TEST_F (DirectFIRTest, Reset) std::vector output1 (20, 0.0f); // Process some data to build up internal state - fir.process (input.data(), output1.data(), input.size()); + fir.processBlock (input.data(), output1.data(), static_cast (input.size())); // Reset and process same input fir.reset(); std::vector output2 (20, 0.0f); - fir.process (input.data(), output2.data(), input.size()); + fir.processBlock (input.data(), output2.data(), static_cast (input.size())); // Outputs should be identical after reset for (size_t i = 0; i < output1.size(); ++i) @@ -351,7 +351,7 @@ TEST_F (DirectFIRTest, Reset) TEST_F (DirectFIRTest, LowpassFiltering) { - DirectFIRFloat fir; + DirectFIR fir; // Create lowpass filter coefficients auto coefficients = FilterDesigner::designFIRLowpass (64, 1000.0f, 44100.0); @@ -365,7 +365,7 @@ TEST_F (DirectFIRTest, LowpassFiltering) fillWithSine (lowFreqInput, 500.0f, sampleRate); std::vector lowFreqOutput (bufferSize, 0.0f); - fir.process (lowFreqInput.data(), lowFreqOutput.data(), bufferSize); + fir.processBlock (lowFreqInput.data(), lowFreqOutput.data(), bufferSize); // Test with high frequency (should be attenuated) fir.reset(); @@ -373,7 +373,7 @@ TEST_F (DirectFIRTest, LowpassFiltering) fillWithSine (highFreqInput, 5000.0f, sampleRate); std::vector highFreqOutput (bufferSize, 0.0f); - fir.process (highFreqInput.data(), highFreqOutput.data(), bufferSize); + fir.processBlock (highFreqInput.data(), highFreqOutput.data(), bufferSize); // Compare RMS levels (skip first samples due to transient) const size_t skipSamples = 100; @@ -394,7 +394,7 @@ TEST_F (DirectFIRTest, LowpassFiltering) TEST_F (DirectFIRTest, BlockSizeIndependence) { - DirectFIRFloat fir; + DirectFIR fir; auto coefficients = FilterDesigner::designFIRLowpass (48, 2000.0f, 44100.0); fir.setCoefficients (coefficients); @@ -405,7 +405,7 @@ TEST_F (DirectFIRTest, BlockSizeIndependence) // Process in one big block fir.reset(); std::vector output1 (totalSamples, 0.0f); - fir.process (input.data(), output1.data(), totalSamples); + fir.processBlock (input.data(), output1.data(), totalSamples); // Process in smaller blocks fir.reset(); @@ -424,7 +424,7 @@ TEST_F (DirectFIRTest, BlockSizeIndependence) if (blockSize == 0) break; - fir.process (input.data() + processed, output2.data() + processed, blockSize); + fir.processBlock (input.data() + processed, output2.data() + processed, blockSize); processed += blockSize; } @@ -433,7 +433,7 @@ TEST_F (DirectFIRTest, BlockSizeIndependence) { size_t remaining = totalSamples - processed; size_t blockSize = std::min (remaining, size_t (128)); // Process in chunks of 128 - fir.process (input.data() + processed, output2.data() + processed, blockSize); + fir.processBlock (input.data() + processed, output2.data() + processed, blockSize); processed += blockSize; } @@ -448,7 +448,7 @@ TEST_F (DirectFIRTest, BlockSizeIndependence) TEST_F (DirectFIRTest, ZeroSamples) { - DirectFIRFloat fir; + DirectFIR fir; std::vector coefficients = { 1.0f, 0.5f }; fir.setCoefficients (coefficients); @@ -456,7 +456,7 @@ TEST_F (DirectFIRTest, ZeroSamples) std::vector output (10, 0.0f); // Processing zero samples should be safe - EXPECT_NO_THROW (fir.process (input.data(), output.data(), 0)); + EXPECT_NO_THROW (fir.processBlock (input.data(), output.data(), 0)); // Output should remain unchanged for (float sample : output) @@ -465,25 +465,25 @@ TEST_F (DirectFIRTest, ZeroSamples) TEST_F (DirectFIRTest, NullPointers) { - DirectFIRFloat fir; + DirectFIR fir; std::vector coefficients = { 1.0f }; fir.setCoefficients (coefficients); std::vector buffer (10, 0.0f); // Null input pointer should be handled gracefully - EXPECT_NO_THROW (fir.process (nullptr, buffer.data(), 10)); + EXPECT_NO_THROW (fir.processBlock (nullptr, buffer.data(), 10)); // Null output pointer should be handled gracefully - EXPECT_NO_THROW (fir.process (buffer.data(), nullptr, 10)); + EXPECT_NO_THROW (fir.processBlock (buffer.data(), nullptr, 10)); // Both null should be handled gracefully - EXPECT_NO_THROW (fir.process (nullptr, nullptr, 10)); + EXPECT_NO_THROW (fir.processBlock (nullptr, nullptr, 10)); } TEST_F (DirectFIRTest, LargeTapCounts) { - DirectFIRFloat fir; + DirectFIR fir; // Test with relatively large number of coefficients std::vector coefficients (512); @@ -498,7 +498,7 @@ TEST_F (DirectFIRTest, LargeTapCounts) std::vector output (1024, 0.0f); fillWithRandomData (input); - EXPECT_NO_THROW (fir.process (input.data(), output.data(), input.size())); + EXPECT_NO_THROW (fir.processBlock (input.data(), output.data(), static_cast (input.size()))); // Should produce reasonable output float rms = calculateRMS (output); @@ -508,7 +508,7 @@ TEST_F (DirectFIRTest, LargeTapCounts) TEST_F (DirectFIRTest, SingleTap) { - DirectFIRFloat fir; + DirectFIR fir; std::vector coefficients = { 0.75f }; fir.setCoefficients (coefficients); @@ -518,7 +518,7 @@ TEST_F (DirectFIRTest, SingleTap) std::vector input = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f }; std::vector output (5, 0.0f); - fir.process (input.data(), output.data(), input.size()); + fir.processBlock (input.data(), output.data(), static_cast (input.size())); for (size_t i = 0; i < input.size(); ++i) EXPECT_NEAR (output[i], input[i] * 0.75f, 0.001f); @@ -530,7 +530,7 @@ TEST_F (DirectFIRTest, SingleTap) TEST_F (DirectFIRTest, MemoryAlignment) { - DirectFIRFloat fir; + DirectFIR fir; // Coefficient count that's not a multiple of 4 std::vector coefficients (37); @@ -549,7 +549,7 @@ TEST_F (DirectFIRTest, MemoryAlignment) TEST_F (DirectFIRTest, StressTest) { - DirectFIRFloat fir; + DirectFIR fir; // Create complex impulse response std::vector coefficients (256); @@ -571,7 +571,7 @@ TEST_F (DirectFIRTest, StressTest) std::vector output (blockSize, 0.0f); fillWithRandomData (input); - EXPECT_NO_THROW (fir.process (input.data(), output.data(), blockSize)); + EXPECT_NO_THROW (fir.processBlock (input.data(), output.data(), blockSize)); // Verify output quality for (float sample : output) From 0a4b74ab87c9145195f6598d323b7c8311455fd0 Mon Sep 17 00:00:00 2001 From: kunitoki Date: Sun, 12 Oct 2025 11:30:07 +0200 Subject: [PATCH 37/37] More FIR specializations --- .../graphics/source/examples/FilterDemo.h | 226 +++++++++++++++++- .../convolution/yup_PartitionedConvolver.cpp | 10 +- .../yup_dsp/designers/yup_FilterDesigner.cpp | 48 ++-- .../yup_dsp/designers/yup_FilterDesigner.h | 24 +- modules/yup_dsp/filters/yup_DirectFIR.h | 65 ++--- .../yup_dsp/windowing/yup_WindowFunctions.h | 82 +++++-- tests/yup_dsp/yup_DirectFIR.cpp | 17 +- tests/yup_dsp/yup_FilterDesigner.cpp | 45 ++-- 8 files changed, 402 insertions(+), 115 deletions(-) diff --git a/examples/graphics/source/examples/FilterDemo.h b/examples/graphics/source/examples/FilterDemo.h index 317fd98c2..8c16d7b56 100644 --- a/examples/graphics/source/examples/FilterDemo.h +++ b/examples/graphics/source/examples/FilterDemo.h @@ -951,6 +951,7 @@ class FilterDemo filterTypeCombo->addItem ("State Variable", 3); filterTypeCombo->addItem ("First Order", 4); filterTypeCombo->addItem ("Butterworth", 5); + filterTypeCombo->addItem ("FIR Filter", 6); filterTypeCombo->setSelectedId (1); filterTypeCombo->onSelectedItemChanged = [this] { @@ -976,6 +977,42 @@ class FilterDemo }; addAndMakeVisible (*responseTypeCombo); + // FIR-specific controls + firCoefficientsSlider = std::make_unique (yup::Slider::LinearBarHorizontal, "FIR Length"); + firCoefficientsSlider->setRange ({ 16.0, 256.0 }); + firCoefficientsSlider->setValue (64.0); + firCoefficientsSlider->onValueChanged = [this] (float value) + { + updateAnalysisDisplays(); + }; + addAndMakeVisible (*firCoefficientsSlider); + + firWindowCombo = std::make_unique ("FIR Window"); + firWindowCombo->addItem ("Hann", 1); + firWindowCombo->addItem ("Hamming", 2); + firWindowCombo->addItem ("Blackman", 3); + firWindowCombo->addItem ("Kaiser", 4); + firWindowCombo->addItem ("Rectangle", 5); + firWindowCombo->addItem ("Rakshit-Ullah", 6); + firWindowCombo->setSelectedId (1); + firWindowCombo->onSelectedItemChanged = [this] + { + updateWindowParameterRange(); + updateAnalysisDisplays(); + }; + addAndMakeVisible (*firWindowCombo); + + // FIR window parameter control (for adjustable windows like Kaiser and Rakshit-Ullah) + firWindowParameterSlider = std::make_unique (yup::Slider::LinearBarHorizontal, "Window Parameter"); + firWindowParameterSlider->setRange ({ 0.0005, 10.0 }); + firWindowParameterSlider->setSkewFactorFromMidpoint (1.0); + firWindowParameterSlider->setValue (1.0); + firWindowParameterSlider->onValueChanged = [this] (float value) + { + updateAnalysisDisplays(); + }; + addAndMakeVisible (*firWindowParameterSlider); + // Parameter controls with smoothed parameter updates frequencySlider = std::make_unique (yup::Slider::LinearBarHorizontal, "Frequency"); frequencySlider->setRange ({ 20.0, 20000.0 }); @@ -1066,7 +1103,7 @@ class FilterDemo // Labels for parameter controls auto font = yup::ApplicationTheme::getGlobalTheme()->getDefaultFont().withHeight (10.0f); - for (const auto& labelText : { "Filter Type:", "Response Type:", "Frequency:", "Frequency 2:", "Q/Resonance:", "Gain (dB):", "Order:", "Noise Level:", "Output Level:" }) + for (const auto& labelText : { "Filter Type:", "Response Type:", "Frequency:", "Frequency 2:", "Q/Resonance:", "Gain (dB):", "Order:", "FIR Length:", "FIR Window:", "Window Param:", "Noise Level:", "Output Level:" }) { auto label = parameterLabels.add (std::make_unique (labelText)); label->setText (labelText); @@ -1092,8 +1129,11 @@ class FilterDemo { parameterLabels[4], qSlider.get() }, { parameterLabels[5], gainSlider.get() }, { parameterLabels[6], orderSlider.get() }, - { parameterLabels[7], noiseGainSlider.get() }, - { parameterLabels[8], outputGainSlider.get() } + { parameterLabels[7], firCoefficientsSlider.get() }, + { parameterLabels[8], firWindowCombo.get() }, + { parameterLabels[9], firWindowParameterSlider.get() }, + { parameterLabels[10], noiseGainSlider.get() }, + { parameterLabels[11], outputGainSlider.get() } }; for (auto& [label, component] : layouts) @@ -1114,6 +1154,7 @@ class FilterDemo audioSvf = std::make_shared>(); audioFirstOrder = std::make_shared>(); audioButterworthFilter = std::make_shared>(); + audioDirectFIR = std::make_shared>(); // Create instances of all filter types for UI thread uiRbj = std::make_shared>(); @@ -1121,14 +1162,15 @@ class FilterDemo uiSvf = std::make_shared>(); uiFirstOrder = std::make_shared>(); uiButterworthFilter = std::make_shared>(); + uiDirectFIR = std::make_shared>(); // Store in arrays for easy management allAudioFilters = { - audioRbj, audioZoelzer, audioSvf, audioFirstOrder, audioButterworthFilter + audioRbj, audioZoelzer, audioSvf, audioFirstOrder, audioButterworthFilter, audioDirectFIR }; allUIFilters = { - uiRbj, uiZoelzer, uiSvf, uiFirstOrder, uiButterworthFilter + uiRbj, uiZoelzer, uiSvf, uiFirstOrder, uiButterworthFilter, uiDirectFIR }; // Set default filters @@ -1144,7 +1186,9 @@ class FilterDemo { noiseGeneratorAmplitude.setCurrentAndTargetValue (0.1f); outputGain.setCurrentAndTargetValue (0.5f); + updateWindowParameterRange(); // Set initial window parameter range updateCurrentFilter(); + updateControlVisibility(); // Set initial visibility } void updateCurrentFilter() @@ -1171,6 +1215,9 @@ class FilterDemo case 5: currentUIFilter = uiButterworthFilter; break; + case 6: + currentUIFilter = uiDirectFIR; + break; default: currentUIFilter = uiRbj; break; @@ -1189,6 +1236,9 @@ class FilterDemo // Update UI filter with current parameters updateUIFilterParameters(); + // Update control visibility based on filter type + updateControlVisibility(); + // Update displays using UI filter frequencyResponsePlot.setFilter (currentUIFilter); frequencyResponsePlot.updateResponseData(); @@ -1201,12 +1251,12 @@ class FilterDemo return; double freq = smoothedFrequency.getNextValue(); - double freq2 = smoothedFrequency2.getNextValue(); + double freq2 = yup::jmax (freq, (double) smoothedFrequency2.getNextValue()); double q = smoothedQ.getNextValue(); double gain = smoothedGain.getNextValue(); int order = yup::jlimit (2, 16, static_cast (smoothedOrder.getNextValue())); - updateFilterParameters (currentAudioFilter.get(), freq, freq2, q, gain, order); + updateFilterParameters (currentAudioFilter.get(), firCoefficients, freq, freq2, q, gain, order); } void updateUIFilterParameters() @@ -1215,15 +1265,15 @@ class FilterDemo return; double freq = frequencySlider->getValue(); - double freq2 = frequency2Slider->getValue(); + double freq2 = yup::jmax (freq, frequency2Slider->getValue()); double q = qSlider->getValue(); double gain = gainSlider->getValue(); int order = yup::jlimit (2, 16, static_cast (orderSlider->getValue())); - updateFilterParameters (currentUIFilter.get(), freq, freq2, q, gain, order); + updateFilterParameters (currentUIFilter.get(), firCoefficientsUI, freq, freq2, q, gain, order); } - void updateFilterParameters (yup::FilterBase* filter, double freq, double freq2, double q, double gain, int order) + void updateFilterParameters (yup::FilterBase* filter, std::vector& coefficients, double freq, double freq2, double q, double gain, int order) { // Update parameters based on filter type using direct UI values if (auto rf = dynamic_cast*> (filter)) @@ -1246,6 +1296,10 @@ class FilterDemo { bf->setParameters (getFilterMode (currentResponseTypeId), order, freq, yup::jmax (freq2, freq * 1.01), currentSampleRate); } + else if (auto fir = dynamic_cast*> (filter)) + { + updateFIRFilterParameters (fir, coefficients, freq, freq2); + } } void updateCurrentAudioFilter() @@ -1268,6 +1322,9 @@ class FilterDemo case 5: currentAudioFilter = audioButterworthFilter; break; + case 6: + currentAudioFilter = audioDirectFIR; + break; default: currentAudioFilter = audioRbj; break; @@ -1344,6 +1401,147 @@ class FilterDemo polesZerosDisplay.updatePolesZeros (poles, zeros); } + void updateControlVisibility() + { + bool isFIRFilter = (currentFilterTypeId == 6); + + // Show/hide FIR-specific controls + firCoefficientsSlider->setVisible (isFIRFilter); + firWindowCombo->setVisible (isFIRFilter); + parameterLabels[7]->setVisible (isFIRFilter); // FIR Length label + parameterLabels[8]->setVisible (isFIRFilter); // FIR Window label + + // Show/hide window parameter control for adjustable windows (Kaiser, Rakshit-Ullah) + bool needsWindowParameter = isFIRFilter && (firWindowCombo->getSelectedId() == 4 || firWindowCombo->getSelectedId() == 6); // Kaiser or Rakshit-Ullah + firWindowParameterSlider->setVisible (needsWindowParameter); + parameterLabels[9]->setVisible (needsWindowParameter); // Window Parameter label + + // Show/hide standard filter controls + qSlider->setVisible (! isFIRFilter); + gainSlider->setVisible (! isFIRFilter); + orderSlider->setVisible (! isFIRFilter || currentFilterTypeId == 5); // Show for Butterworth and FIR + parameterLabels[4]->setVisible (! isFIRFilter); // Q label + parameterLabels[5]->setVisible (! isFIRFilter); // Gain label + parameterLabels[6]->setVisible (! isFIRFilter || currentFilterTypeId == 5); // Order label + + // Frequency 2 is only visible for bandpass/bandstop filters + bool needsFreq2 = (currentResponseTypeId >= 3 && currentResponseTypeId <= 5); + frequency2Slider->setVisible (needsFreq2); + parameterLabels[3]->setVisible (needsFreq2); // Frequency 2 label + + // Update restricted response types for FIR + if (isFIRFilter) + { + // Save current selection + int currentResponse = responseTypeCombo->getSelectedId(); + + // Clear and repopulate with FIR-compatible responses + responseTypeCombo->clear(); + responseTypeCombo->addItem ("Lowpass", 1); + responseTypeCombo->addItem ("Highpass", 2); + responseTypeCombo->addItem ("Bandpass", 3); + responseTypeCombo->addItem ("Bandstop", 5); + + // Restore selection if compatible, otherwise default to lowpass + if (currentResponse == 1 || currentResponse == 2 || currentResponse == 3 || currentResponse == 5) + responseTypeCombo->setSelectedId (currentResponse, yup::dontSendNotification); + else + responseTypeCombo->setSelectedId (1, yup::dontSendNotification); + } + else + { + // Restore full response type list for IIR filters + int currentResponse = responseTypeCombo->getSelectedId(); + responseTypeCombo->clear(); + responseTypeCombo->addItem ("Lowpass", 1); + responseTypeCombo->addItem ("Highpass", 2); + responseTypeCombo->addItem ("Bandpass CSG", 3); + responseTypeCombo->addItem ("Bandpass CPG", 4); + responseTypeCombo->addItem ("Bandstop", 5); + responseTypeCombo->addItem ("Peak", 6); + responseTypeCombo->addItem ("Low Shelf", 7); + responseTypeCombo->addItem ("High Shelf", 8); + responseTypeCombo->addItem ("Allpass", 9); + + // Restore selection + responseTypeCombo->setSelectedId (currentResponse, yup::dontSendNotification); + } + + repaint(); + } + + void updateWindowParameterRange() + { + int windowId = firWindowCombo->getSelectedId(); + + // Update parameter range and default based on window type + switch (windowId) + { + case 4: // Kaiser + firWindowParameterSlider->setRange ({ 0.0, 20.0 }); + firWindowParameterSlider->setSkewFactorFromMidpoint (8.0); + firWindowParameterSlider->setValue (8.0); // Kaiser beta parameter + break; + + case 6: // Rakshit-Ullah + firWindowParameterSlider->setRange ({ 0.0001, 100.0 }); + firWindowParameterSlider->setSkewFactorFromMidpoint (1.0); + firWindowParameterSlider->setValue (1.0); // Rakshit-Ullah r parameter + break; + + default: // Other windows (parameter not used) + firWindowParameterSlider->setRange ({ 0.0, 10.0 }); + firWindowParameterSlider->setValue (1.0); + break; + } + + updateControlVisibility(); + } + + void updateFIRFilterParameters (yup::DirectFIR* fir, std::vector& coeffs, double freq, double freq2) + { + int numCoeffs = static_cast (firCoefficientsSlider->getValue()); + auto windowType = getFIRWindowType (firWindowCombo->getSelectedId()); + auto responseMode = getFilterMode (currentResponseTypeId); + + // Get window parameter (for Kaiser and Rakshit-Ullah windows) + double windowParam = firWindowParameterSlider->getValue(); + + if (responseMode.test (yup::FilterMode::lowpass)) + yup::FilterDesigner::designFIRLowpass (coeffs, numCoeffs, freq, currentSampleRate, windowType, windowParam); + else if (responseMode.test (yup::FilterMode::highpass)) + yup::FilterDesigner::designFIRHighpass (coeffs, numCoeffs, freq, currentSampleRate, windowType, windowParam); + else if (responseMode.test (yup::FilterMode::bandpassCsg | yup::FilterMode::bandpassCpg)) + yup::FilterDesigner::designFIRBandpass (coeffs, numCoeffs, freq, freq2, currentSampleRate, windowType, windowParam); + else if (responseMode.test (yup::FilterMode::bandstop)) + yup::FilterDesigner::designFIRBandstop (coeffs, numCoeffs, freq, freq2, currentSampleRate, windowType, windowParam); + else + yup::FilterDesigner::designFIRLowpass (coeffs, numCoeffs, freq, currentSampleRate, windowType, windowParam); + + fir->setCoefficients (coeffs.data(), coeffs.size()); + } + + yup::WindowType getFIRWindowType (int windowId) + { + switch (windowId) + { + case 1: + return yup::WindowType::hann; + case 2: + return yup::WindowType::hamming; + case 3: + return yup::WindowType::blackman; + case 4: + return yup::WindowType::kaiser; + case 5: + return yup::WindowType::rectangular; + case 6: + return yup::WindowType::rakshitUllah; + default: + return yup::WindowType::hann; + } + } + yup::FilterModeType getFilterMode (int responseTypeId) { switch (responseTypeId) @@ -1391,6 +1589,9 @@ class FilterDemo std::vector> poles; std::vector> zeros; + std::vector firCoefficients { 512, 0.0f }; + std::vector firCoefficientsUI { 512, 0.0f }; + // Filter type settings (thread-safe storage) std::atomic currentFilterTypeId { 1 }; std::atomic currentResponseTypeId { 1 }; @@ -1401,6 +1602,7 @@ class FilterDemo std::shared_ptr> audioSvf; std::shared_ptr> audioFirstOrder; std::shared_ptr> audioButterworthFilter; + std::shared_ptr> audioDirectFIR; // UI thread filter instances std::shared_ptr> uiRbj; @@ -1408,6 +1610,7 @@ class FilterDemo std::shared_ptr> uiSvf; std::shared_ptr> uiFirstOrder; std::shared_ptr> uiButterworthFilter; + std::shared_ptr> uiDirectFIR; std::vector>> allAudioFilters; std::vector>> allUIFilters; @@ -1423,6 +1626,9 @@ class FilterDemo std::unique_ptr qSlider; std::unique_ptr gainSlider; std::unique_ptr orderSlider; + std::unique_ptr firCoefficientsSlider; + std::unique_ptr firWindowCombo; + std::unique_ptr firWindowParameterSlider; std::unique_ptr noiseGainSlider; std::unique_ptr outputGainSlider; yup::OwnedArray parameterLabels; diff --git a/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp index f5be2c0fb..ac027993e 100644 --- a/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp +++ b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp @@ -563,16 +563,8 @@ class PartitionedConvolver::Impl trimmedLength = trimSilenceFromEnd (impulseResponse, length, *options.trimEndSilenceBelowDb); // Update DirectFIR in-place - std::vector directCoefficients; - const auto directCoefficientsCount = std::min (directFIRCoefficientCount, trimmedLength); - if (directCoefficientsCount > 0) - { - directCoefficients.reserve (directCoefficientsCount); - directCoefficients.assign (impulseResponse, impulseResponse + directCoefficientsCount); - } - - newFIR.setCoefficients (std::move (directCoefficients), headroomScale); + newFIR.setCoefficients (impulseResponse, directCoefficientsCount, headroomScale); // Update FFT layers std::size_t consumed = directCoefficientsCount; diff --git a/modules/yup_dsp/designers/yup_FilterDesigner.cpp b/modules/yup_dsp/designers/yup_FilterDesigner.cpp index a92949a08..2972eb0c0 100644 --- a/modules/yup_dsp/designers/yup_FilterDesigner.cpp +++ b/modules/yup_dsp/designers/yup_FilterDesigner.cpp @@ -630,11 +630,13 @@ int FilterDesigner::designLinkwitzRiley ( //============================================================================== template -std::vector FilterDesigner::designFIRLowpass ( +void FilterDesigner::designFIRLowpass ( + std::vector& coefficients, int numCoefficients, CoeffType cutoffFreq, double sampleRate, - WindowType windowType) noexcept + WindowType windowType, + CoeffType windowParameter) noexcept { jassert (numCoefficients > 0); jassert (cutoffFreq > static_cast (0.0)); @@ -642,7 +644,7 @@ std::vector FilterDesigner::designFIRLowpass ( jassert (cutoffFreq < static_cast (sampleRate / 2.0)); numCoefficients = nextOdd (numCoefficients); - std::vector coefficients (numCoefficients); + coefficients.resize (numCoefficients); const auto normalizedCutoff = static_cast (2.0) * cutoffFreq / static_cast (sampleRate); const int center = (numCoefficients - 1) / 2; @@ -664,25 +666,27 @@ std::vector FilterDesigner::designFIRLowpass ( // Apply window function for (int i = 0; i < numCoefficients; ++i) { - const auto windowValue = WindowFunctions::getValue (windowType, i, numCoefficients); + const auto windowValue = WindowFunctions::getValue (windowType, i, numCoefficients, windowParameter); coefficients[i] *= windowValue; } // Normalization const auto sum = std::accumulate (coefficients.begin(), coefficients.end(), static_cast (0.0)); if (sum != static_cast (0.0)) + { for (auto& c : coefficients) c /= sum; - - return coefficients; + } } template -std::vector FilterDesigner::designFIRHighpass ( +void FilterDesigner::designFIRHighpass ( + std::vector& coefficients, int numCoefficients, CoeffType cutoffFreq, double sampleRate, - WindowType windowType) noexcept + WindowType windowType, + CoeffType windowParameter) noexcept { jassert (numCoefficients > 0); jassert (cutoffFreq > static_cast (0.0)); @@ -691,7 +695,7 @@ std::vector FilterDesigner::designFIRHighpass ( // Generate lowpass first numCoefficients = nextOdd (numCoefficients); - auto coefficients = designFIRLowpass (numCoefficients, cutoffFreq, sampleRate, windowType); + designFIRLowpass (coefficients, numCoefficients, cutoffFreq, sampleRate, windowType); // Convert to highpass using spectral inversion const int center = (numCoefficients - 1) / 2; @@ -707,19 +711,21 @@ std::vector FilterDesigner::designFIRHighpass ( hpi += coefficients[n] * ((n & 1) ? static_cast (-1.0) : static_cast (1.0)); if (hpi != static_cast (0.0)) + { for (auto& c : coefficients) c /= hpi; - - return coefficients; + } } template -std::vector FilterDesigner::designFIRBandpass ( +void FilterDesigner::designFIRBandpass ( + std::vector& coefficients, int numCoefficients, CoeffType lowCutoffFreq, CoeffType highCutoffFreq, double sampleRate, - WindowType windowType) noexcept + WindowType windowType, + CoeffType windowParameter) noexcept { jassert (numCoefficients > 0); jassert (lowCutoffFreq > static_cast (0.0)); @@ -728,7 +734,7 @@ std::vector FilterDesigner::designFIRBandpass ( jassert (highCutoffFreq < static_cast (sampleRate / 2.0)); numCoefficients = nextOdd (numCoefficients); - std::vector coefficients (numCoefficients); + coefficients.resize (numCoefficients); const auto normalizedLow = static_cast (2.0) * lowCutoffFreq / static_cast (sampleRate); const auto normalizedHigh = static_cast (2.0) * highCutoffFreq / static_cast (sampleRate); @@ -754,20 +760,20 @@ std::vector FilterDesigner::designFIRBandpass ( // Apply window function for (int i = 0; i < numCoefficients; ++i) { - const auto windowValue = WindowFunctions::getValue (windowType, i, numCoefficients); + const auto windowValue = WindowFunctions::getValue (windowType, i, numCoefficients, windowParameter); coefficients[i] *= windowValue; } - - return coefficients; } template -std::vector FilterDesigner::designFIRBandstop ( +void FilterDesigner::designFIRBandstop ( + std::vector& coefficients, int numCoefficients, CoeffType lowCutoffFreq, CoeffType highCutoffFreq, double sampleRate, - WindowType windowType) noexcept + WindowType windowType, + CoeffType windowParameter) noexcept { jassert (numCoefficients > 0); jassert (lowCutoffFreq > static_cast (0.0)); @@ -777,7 +783,7 @@ std::vector FilterDesigner::designFIRBandstop ( // Generate bandpass first numCoefficients = nextOdd (numCoefficients); - auto coefficients = designFIRBandpass (numCoefficients, lowCutoffFreq, highCutoffFreq, sampleRate, windowType); + designFIRBandpass (coefficients, numCoefficients, lowCutoffFreq, highCutoffFreq, sampleRate, windowType); // Convert to bandstop using spectral inversion const int center = (numCoefficients - 1) / 2; @@ -787,8 +793,6 @@ std::vector FilterDesigner::designFIRBandstop ( // Add unit impulse at center coefficients[center] += static_cast (1.0); - - return coefficients; } //============================================================================== diff --git a/modules/yup_dsp/designers/yup_FilterDesigner.h b/modules/yup_dsp/designers/yup_FilterDesigner.h index baed267a2..f74b7097d 100644 --- a/modules/yup_dsp/designers/yup_FilterDesigner.h +++ b/modules/yup_dsp/designers/yup_FilterDesigner.h @@ -640,11 +640,13 @@ class FilterDesigner @returns Vector of FIR coefficients suitable for DirectFIR */ - static std::vector designFIRLowpass ( + static void designFIRLowpass ( + std::vector& coefficients, int numCoefficients, CoeffType cutoffFreq, double sampleRate, - WindowType windowType = WindowType::hann) noexcept; + WindowType windowType = WindowType::hann, + CoeffType windowParameter = CoeffType (8)) noexcept; /** Designs FIR highpass filter coefficients using windowed sinc method. @@ -656,11 +658,13 @@ class FilterDesigner @returns Vector of FIR coefficients suitable for DirectFIR */ - static std::vector designFIRHighpass ( + static void designFIRHighpass ( + std::vector& coefficients, int numCoefficients, CoeffType cutoffFreq, double sampleRate, - WindowType windowType = WindowType::hann) noexcept; + WindowType windowType = WindowType::hann, + CoeffType windowParameter = CoeffType (8)) noexcept; /** Designs FIR bandpass filter coefficients using windowed sinc method. @@ -673,12 +677,14 @@ class FilterDesigner @returns Vector of FIR coefficients suitable for DirectFIR */ - static std::vector designFIRBandpass ( + static void designFIRBandpass ( + std::vector& coefficients, int numCoefficients, CoeffType lowCutoffFreq, CoeffType highCutoffFreq, double sampleRate, - WindowType windowType = WindowType::hann) noexcept; + WindowType windowType = WindowType::hann, + CoeffType windowParameter = CoeffType (8)) noexcept; /** Designs FIR bandstop filter coefficients using windowed sinc method. @@ -691,12 +697,14 @@ class FilterDesigner @returns Vector of FIR coefficients suitable for DirectFIR */ - static std::vector designFIRBandstop ( + static void designFIRBandstop ( + std::vector& coefficients, int numCoefficients, CoeffType lowCutoffFreq, CoeffType highCutoffFreq, double sampleRate, - WindowType windowType = WindowType::hann) noexcept; + WindowType windowType = WindowType::hann, + CoeffType windowParameter = CoeffType (8)) noexcept; }; } // namespace yup diff --git a/modules/yup_dsp/filters/yup_DirectFIR.h b/modules/yup_dsp/filters/yup_DirectFIR.h index 5a2dea78b..b637714c8 100644 --- a/modules/yup_dsp/filters/yup_DirectFIR.h +++ b/modules/yup_dsp/filters/yup_DirectFIR.h @@ -100,29 +100,17 @@ class DirectFIR : public FilterBase //============================================================================== /** - Set the FIR filter coefficients. + Set the FIR filter coefficients from a raw pointer. - @param coefficients Vector containing the FIR coefficients in time order - @param scaling Scaling factor to apply to all coefficients + @param coefficients Span of FIR coefficients array + @param scaling Scaling factor to apply to all coefficients @note This method is not real-time safe and should be called during initialization - or when audio processing is paused. + or when audio processing is paused, unless the coefficients vector has already been set with a greater or equal size. */ - void setCoefficients (std::vector coefficients, CoeffType scaling = CoeffType (1)) + void setCoefficients (yup::Span coefficients, CoeffType scaling = CoeffType (1)) { - currentScaling = scaling; - if (! approximatelyEqual (currentScaling, CoeffType (1))) - FloatVectorOperations::multiply (coefficients.data(), scaling, coefficients.size()); - - coefficientsReversed = std::move (coefficients); - std::reverse (coefficientsReversed.begin(), coefficientsReversed.end()); - - numCoefficients = coefficientsReversed.size(); - paddedLen = (numCoefficients + 3u) & ~3u; // Round up to multiple of 4 for SIMD - coefficientsReversed.resize (paddedLen, 0.0f); - - history.assign (2 * numCoefficients, 0.0f); - reset(); + setCoefficients (coefficients.data(), coefficients.size(), scaling); } /** @@ -133,7 +121,7 @@ class DirectFIR : public FilterBase @param scaling Scaling factor to apply to all coefficients @note This method is not real-time safe and should be called during initialization - or when audio processing is paused. + or when audio processing is paused, unless the coefficients vector has already been set with a greater or equal size. */ void setCoefficients (const CoeffType* coefficients, std::size_t numCoefficientsIn, CoeffType scaling = CoeffType (1)) { @@ -144,8 +132,21 @@ class DirectFIR : public FilterBase return; } - std::vector coefficientsVector (coefficients, coefficients + numCoefficientsIn); - setCoefficients (std::move (coefficientsVector), scaling); + numCoefficients = numCoefficientsIn; + paddedLen = (numCoefficientsIn + 3u) & ~3u; // Round up to multiple of 4 for SIMD + + coefficientsReversed.resize (paddedLen, 0.0f); + + currentScaling = scaling; + if (! approximatelyEqual (currentScaling, CoeffType (1))) + FloatVectorOperations::copyWithMultiply (coefficientsReversed.data(), coefficients, scaling, static_cast (numCoefficientsIn)); + else + FloatVectorOperations::copy (coefficientsReversed.data(), coefficients, static_cast (numCoefficientsIn)); + + std::reverse (coefficientsReversed.begin(), coefficientsReversed.begin() + numCoefficients); + + history.resize (2 * numCoefficients, 0.0f); + writeIndex = 0; } /** @@ -263,19 +264,29 @@ class DirectFIR : public FilterBase if (numCoefficients == 0) return Complex (0, 0); + // ω = 2π f / Fs const CoeffType omega = MathConstants::twoPi * frequency / static_cast (this->sampleRate); - Complex response (0, 0); + // Standard FIR frequency response: H(e^{jω}) = Σ_{n=0}^{N-1} h[n] * e^{-jωn} + // coefficientsReversed stores: [h[M-1], h[M-2], ..., h[1], h[0]] + // So coefficientsReversed[k] = h[M-1-k], and we need: Σ h[n] * e^{-jωn} + + // e^{-jω} + const Complex ez_neg { std::cos (omega), -std::sin (omega) }; + + // Accumulate: Σ_{n=0}^{N-1} h[n] * e^{-jωn} + // Since coefficientsReversed[k] = h[M-1-k], we have h[n] = coefficientsReversed[M-1-n] + Complex sum { 0, 0 }; + Complex ez_neg_n { 1, 0 }; // e^{-jω*0} = 1 - // H(e^jω) = Σ h[n] * e^(-jωn) for n = 0 to N-1 for (std::size_t n = 0; n < numCoefficients; ++n) { - const CoeffType angle = -omega * static_cast (n); - Complex exponential (std::cos (angle), std::sin (angle)); - response += static_cast (coefficientsReversed[numCoefficients - 1 - n]) * exponential; + const CoeffType h_n = coefficientsReversed[numCoefficients - 1 - n]; + sum += h_n * ez_neg_n; + ez_neg_n *= ez_neg; } - return response; + return sum; } /** diff --git a/modules/yup_dsp/windowing/yup_WindowFunctions.h b/modules/yup_dsp/windowing/yup_WindowFunctions.h index e3968b8cb..2e9ddadfa 100644 --- a/modules/yup_dsp/windowing/yup_WindowFunctions.h +++ b/modules/yup_dsp/windowing/yup_WindowFunctions.h @@ -35,21 +35,22 @@ namespace yup */ enum class WindowType { - rectangular, /**< Rectangular (no windowing) */ - hann, /**< Hann window (raised cosine) */ - hamming, /**< Hamming window */ - blackman, /**< Blackman window */ - blackmanHarris, /**< Blackman-Harris window (4-term) */ - kaiser, /**< Kaiser window (parameterizable) */ - gaussian, /**< Gaussian window */ - tukey, /**< Tukey window (tapered cosine) */ - bartlett, /**< Bartlett window (triangular) */ - welch, /**< Welch window (parabolic) */ - flattop, /**< Flat-top window */ - cosine, /**< Cosine window */ - lanczos, /**< Lanczos window (sinc) */ - nuttall, /**< Nuttall window */ - blackmanNuttall /**< Blackman-Nuttall window */ + rectangular, /**< Rectangular (no windowing) */ + hann, /**< Hann window (raised cosine) */ + hamming, /**< Hamming window */ + blackman, /**< Blackman window */ + blackmanHarris, /**< Blackman-Harris window (4-term) */ + kaiser, /**< Kaiser window (parameterizable) */ + gaussian, /**< Gaussian window */ + tukey, /**< Tukey window (tapered cosine) */ + bartlett, /**< Bartlett window (triangular) */ + welch, /**< Welch window (parabolic) */ + flattop, /**< Flat-top window */ + cosine, /**< Cosine window */ + lanczos, /**< Lanczos window (sinc) */ + nuttall, /**< Nuttall window */ + blackmanNuttall, /**< Blackman-Nuttall window */ + rakshitUllah /**< Rakshit-Ullah adjustable window (novel) */ }; //============================================================================== @@ -131,6 +132,8 @@ class WindowFunctions return nuttall (n, N); case WindowType::blackmanNuttall: return blackmanNuttall (n, N); + case WindowType::rakshitUllah: + return rakshitUllah (n, N, parameter); default: return rectangular (n, N); } @@ -353,6 +356,55 @@ class WindowFunctions return a0 - a1 * std::cos (factor) + a2 * std::cos (FloatType (2) * factor) - a3 * std::cos (FloatType (3) * factor); } + /** + Rakshit-Ullah adjustable window function. + + A novel adjustable window combining hyperbolic tangent and weighted cosine functions. + Proposed by Hrishi Rakshit and Muhammad Ahsan Ullah (2015). + + @param n Sample index (0 to N-1) + @param N Window length + @param r Controlling parameter (default 1.0). Higher values give better side-lobe roll-off. + Common values: 0.0005, 1.18, 1.618, 30, 75 + @return Window value at sample n + + @note Reference: "FIR Filter Design Using An Adjustable Novel Window and Its Applications" + International Journal of Engineering and Technology (IJET), 2015 + */ + static FloatType rakshitUllah (int n, int N, FloatType r = FloatType (1)) noexcept + { + if (N <= 1) + return FloatType (1); + + // Constants from the paper + constexpr auto alpha = FloatType (2); + constexpr auto B = FloatType (2); + + // Hyperbolic tangent component (y1) + const auto center = (N - 1) / FloatType (2); + const auto coshAlpha = std::cosh (alpha); + const auto coshAlphaSquared = coshAlpha * coshAlpha; + + const auto arg1 = (n - center + coshAlphaSquared) / B; + const auto arg2 = (n - center - coshAlphaSquared) / B; + + const auto y1 = std::tanh (arg1) - std::tanh (arg2); + + // Weighted cosine component (y2) + const auto factor = MathConstants::twoPi * n / (N - 1); + const auto y2 = FloatType (0.375) - FloatType (0.5) * std::cos (factor) + + FloatType (0.125) * std::cos (FloatType (2) * factor); + + // Combined window with power parameter + const auto window = y1 * y2; + + // Apply the controlling parameter r + if (approximatelyEqual (r, FloatType (1))) + return window; + else + return std::pow (std::abs (window), r) * (window >= FloatType (0) ? FloatType (1) : FloatType (-1)); + } + private: //============================================================================== /** Modified Bessel function of the first kind, order 0 */ diff --git a/tests/yup_dsp/yup_DirectFIR.cpp b/tests/yup_dsp/yup_DirectFIR.cpp index a58a5e3cd..0f1747c20 100644 --- a/tests/yup_dsp/yup_DirectFIR.cpp +++ b/tests/yup_dsp/yup_DirectFIR.cpp @@ -295,7 +295,8 @@ TEST_F (DirectFIRTest, AccumulativeOutput) TEST_F (DirectFIRTest, Linearity) { DirectFIR fir; - auto coefficients = FilterDesigner::designFIRLowpass (32, 1000.0f, 44100.0f); + std::vector coefficients; + FilterDesigner::designFIRLowpass (coefficients, 32, 1000.0f, 44100.0f); fir.setCoefficients (coefficients); std::vector input (512); @@ -312,7 +313,7 @@ TEST_F (DirectFIRTest, Linearity) fir.processBlock (input.data(), output1.data(), static_cast (input.size())); fir.reset(); - fir.processBlock (input2.data(), output2.data(), input2.size()); + fir.processBlock (input2.data(), output2.data(), static_cast (input2.size())); // output2 should be approximately 2x output1 for (size_t i = 0; i < output1.size(); ++i) @@ -354,7 +355,8 @@ TEST_F (DirectFIRTest, LowpassFiltering) DirectFIR fir; // Create lowpass filter coefficients - auto coefficients = FilterDesigner::designFIRLowpass (64, 1000.0f, 44100.0); + std::vector coefficients; + FilterDesigner::designFIRLowpass (coefficients, 64, 1000.0f, 44100.0); fir.setCoefficients (coefficients); const float sampleRate = 44100.0f; @@ -395,7 +397,8 @@ TEST_F (DirectFIRTest, LowpassFiltering) TEST_F (DirectFIRTest, BlockSizeIndependence) { DirectFIR fir; - auto coefficients = FilterDesigner::designFIRLowpass (48, 2000.0f, 44100.0); + std::vector coefficients; + FilterDesigner::designFIRLowpass (coefficients, 48, 2000.0f, 44100.0); fir.setCoefficients (coefficients); const size_t totalSamples = 1024; @@ -424,7 +427,7 @@ TEST_F (DirectFIRTest, BlockSizeIndependence) if (blockSize == 0) break; - fir.processBlock (input.data() + processed, output2.data() + processed, blockSize); + fir.processBlock (input.data() + processed, output2.data() + processed, static_cast (blockSize)); processed += blockSize; } @@ -433,7 +436,7 @@ TEST_F (DirectFIRTest, BlockSizeIndependence) { size_t remaining = totalSamples - processed; size_t blockSize = std::min (remaining, size_t (128)); // Process in chunks of 128 - fir.processBlock (input.data() + processed, output2.data() + processed, blockSize); + fir.processBlock (input.data() + processed, output2.data() + processed, static_cast (blockSize)); processed += blockSize; } @@ -571,7 +574,7 @@ TEST_F (DirectFIRTest, StressTest) std::vector output (blockSize, 0.0f); fillWithRandomData (input); - EXPECT_NO_THROW (fir.processBlock (input.data(), output.data(), blockSize)); + EXPECT_NO_THROW (fir.processBlock (input.data(), output.data(), static_cast (blockSize))); // Verify output quality for (float sample : output) diff --git a/tests/yup_dsp/yup_FilterDesigner.cpp b/tests/yup_dsp/yup_FilterDesigner.cpp index 120c4ed08..5ddc72b6d 100644 --- a/tests/yup_dsp/yup_FilterDesigner.cpp +++ b/tests/yup_dsp/yup_FilterDesigner.cpp @@ -368,7 +368,8 @@ TEST_F (FilterDesignerTests, FloatPrecisionConsistency) TEST_F (FilterDesignerTests, FirLowpassBasicProperties) { const int numCoeffs = 65; // Odd number for symmetric filter - auto coeffs = FilterDesigner::designFIRLowpass (numCoeffs, 1000.0f, sampleRate); + std::vector coeffs; + FilterDesigner::designFIRLowpass (coeffs, numCoeffs, 1000.0f, sampleRate); // Should return the correct number of coefficients EXPECT_EQ (coeffs.size(), numCoeffs); @@ -393,7 +394,8 @@ TEST_F (FilterDesignerTests, FirLowpassBasicProperties) TEST_F (FilterDesignerTests, FirHighpassBasicProperties) { const int numCoeffs = 65; - auto coeffs = FilterDesigner::designFIRHighpass (numCoeffs, 1000.0f, sampleRate); + std::vector coeffs; + FilterDesigner::designFIRHighpass (coeffs, numCoeffs, 1000.0f, sampleRate); // Should return the correct number of coefficients EXPECT_EQ (coeffs.size(), numCoeffs); @@ -419,7 +421,8 @@ TEST_F (FilterDesignerTests, FirHighpassBasicProperties) TEST_F (FilterDesignerTests, FirBandpassBasicProperties) { const int numCoeffs = 65; - auto coeffs = FilterDesigner::designFIRBandpass (numCoeffs, 800.0f, 1200.0f, sampleRate); + std::vector coeffs; + FilterDesigner::designFIRBandpass (coeffs, numCoeffs, 800.0f, 1200.0f, sampleRate); // Should return the correct number of coefficients EXPECT_EQ (coeffs.size(), numCoeffs); @@ -445,7 +448,8 @@ TEST_F (FilterDesignerTests, FirBandpassBasicProperties) TEST_F (FilterDesignerTests, FirBandstopBasicProperties) { const int numCoeffs = 65; - auto coeffs = FilterDesigner::designFIRBandstop (numCoeffs, 800.0f, 1200.0f, sampleRate); + std::vector coeffs; + FilterDesigner::designFIRBandstop (coeffs, numCoeffs, 800.0f, 1200.0f, sampleRate); // Should return the correct number of coefficients EXPECT_EQ (coeffs.size(), numCoeffs); @@ -473,9 +477,11 @@ TEST_F (FilterDesignerTests, FirDifferentWindowTypes) const int numCoeffs = 33; // Test different window types - auto hannCoeffs = FilterDesigner::designFIRLowpass (numCoeffs, 1000.0f, sampleRate, WindowType::hann); - auto hammingCoeffs = FilterDesigner::designFIRLowpass (numCoeffs, 1000.0f, sampleRate, WindowType::hamming); - auto blackmanCoeffs = FilterDesigner::designFIRLowpass (numCoeffs, 1000.0f, sampleRate, WindowType::blackman); + std::vector hannCoeffs, hammingCoeffs, blackmanCoeffs; + + FilterDesigner::designFIRLowpass (hannCoeffs, numCoeffs, 1000.0f, sampleRate, WindowType::hann); + FilterDesigner::designFIRLowpass (hammingCoeffs, numCoeffs, 1000.0f, sampleRate, WindowType::hamming); + FilterDesigner::designFIRLowpass (blackmanCoeffs, numCoeffs, 1000.0f, sampleRate, WindowType::blackman); // All should have same size EXPECT_EQ (hannCoeffs.size(), numCoeffs); @@ -507,8 +513,11 @@ TEST_F (FilterDesignerTests, FirFloatDoubleConsistency) { const int numCoeffs = 33; - auto doubleCoeffs = FilterDesigner::designFIRLowpass (numCoeffs, 1000.0, sampleRate); - auto floatCoeffs = FilterDesigner::designFIRLowpass (numCoeffs, 1000.0f, sampleRate); + std::vector doubleCoeffs; + FilterDesigner::designFIRLowpass (doubleCoeffs, numCoeffs, 1000.0, sampleRate); + + std::vector floatCoeffs; + FilterDesigner::designFIRLowpass (floatCoeffs, numCoeffs, 1000.0f, sampleRate); EXPECT_EQ (doubleCoeffs.size(), floatCoeffs.size()); @@ -519,19 +528,21 @@ TEST_F (FilterDesignerTests, FirFloatDoubleConsistency) TEST_F (FilterDesignerTests, DISABLED_ExportFIRCoefficientsForAnalysis) { - const int numCoeffs = 129; + const int numCoeffs = 97; const float sampleRateF = 44100.0f; // Design different FIR filters - auto lowpass = FilterDesigner::designFIRLowpass (numCoeffs, 10000.0f, sampleRateF); - auto highpass = FilterDesigner::designFIRHighpass (numCoeffs, 10000.0f, sampleRateF); - auto bandpass = FilterDesigner::designFIRBandpass (numCoeffs, 8000.0f, 12000.0f, sampleRateF); - auto bandstop = FilterDesigner::designFIRBandstop (numCoeffs, 8000.0f, 12000.0f, sampleRateF); + std::vector lowpass, highpass, bandpass, bandstop; + FilterDesigner::designFIRLowpass (lowpass, numCoeffs, 10000.0f, sampleRateF); + FilterDesigner::designFIRHighpass (highpass, numCoeffs, 10000.0f, sampleRateF); + FilterDesigner::designFIRBandpass (bandpass, numCoeffs, 8000.0f, 12000.0f, sampleRateF); + FilterDesigner::designFIRBandstop (bandstop, numCoeffs, 8000.0f, 12000.0f, sampleRateF); // Different windows for lowpass - auto lowpassHann = FilterDesigner::designFIRLowpass (numCoeffs, 10000.0f, sampleRateF, WindowType::hann); - auto lowpassHamming = FilterDesigner::designFIRLowpass (numCoeffs, 10000.0f, sampleRateF, WindowType::hamming); - auto lowpassBlackman = FilterDesigner::designFIRLowpass (numCoeffs, 10000.0f, sampleRateF, WindowType::blackman); + std::vector lowpassHann, lowpassHamming, lowpassBlackman; + FilterDesigner::designFIRLowpass (lowpassHann, numCoeffs, 10000.0f, sampleRateF, WindowType::hann); + FilterDesigner::designFIRLowpass (lowpassHamming, numCoeffs, 10000.0f, sampleRateF, WindowType::hamming); + FilterDesigner::designFIRLowpass (lowpassBlackman, numCoeffs, 10000.0f, sampleRateF, WindowType::blackman); // Helper lambda to write coefficients to file auto writeCoeffs = [] (const std::vector& coeffs, const std::string& filename)