diff --git a/.github/workflows/build_android.yml b/.github/workflows/build_android.yml
index ca95f690a..0d0612c36 100644
--- a/.github/workflows/build_android.yml
+++ b/.github/workflows/build_android.yml
@@ -37,6 +37,8 @@ jobs:
 
     steps:
         - uses: actions/checkout@v4
+          with:
+            fetch-depth: 0
         - uses: seanmiddleditch/gha-setup-ninja@master
         - name: Setup Java
           uses: actions/setup-java@v3
@@ -65,6 +67,8 @@ jobs:
     needs: [configure]
     steps:
     - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
     - uses: seanmiddleditch/gha-setup-ninja@master
     - name: Setup Java
       uses: actions/setup-java@v3
@@ -95,6 +99,8 @@ jobs:
     needs: [configure]
     steps:
     - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
     - uses: seanmiddleditch/gha-setup-ninja@master
     - name: Setup Java
       uses: actions/setup-java@v3
diff --git a/.github/workflows/build_ios.yml b/.github/workflows/build_ios.yml
index 51a60a5e8..49c990074 100644
--- a/.github/workflows/build_ios.yml
+++ b/.github/workflows/build_ios.yml
@@ -33,6 +33,9 @@ jobs:
 
     steps:
     - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+
     - uses: seanmiddleditch/gha-setup-ninja@master
 
     - name: Configure
@@ -57,6 +60,8 @@ jobs:
     needs: [configure]
     steps:
     - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
     - uses: seanmiddleditch/gha-setup-ninja@master
     - uses: actions/cache/restore@v4
       id: cache-restore
@@ -76,6 +81,8 @@ jobs:
     needs: [configure]
     steps:
     - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
     - uses: seanmiddleditch/gha-setup-ninja@master
     - uses: actions/cache/restore@v4
       id: cache-restore
@@ -95,6 +102,8 @@ jobs:
     needs: [configure]
     steps:
     - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
     - uses: seanmiddleditch/gha-setup-ninja@master
     - uses: actions/cache/restore@v4
       id: cache-restore
diff --git a/.github/workflows/build_linux.yml b/.github/workflows/build_linux.yml
index 6f9b096ec..941b7e466 100644
--- a/.github/workflows/build_linux.yml
+++ b/.github/workflows/build_linux.yml
@@ -39,6 +39,8 @@ jobs:
 
     steps:
     - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
     - uses: seanmiddleditch/gha-setup-ninja@master
     - name: Install Dependencies
       run: sudo apt-get update && sudo apt-get install -y ${INSTALL_DEPS}
@@ -63,6 +65,8 @@ jobs:
     needs: [configure]
     steps:
     - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
     - uses: seanmiddleditch/gha-setup-ninja@master
     - run: sudo apt-get update && sudo apt-get install -y ${INSTALL_DEPS}
     - uses: actions/cache/restore@v4
@@ -85,6 +89,8 @@ jobs:
     needs: [configure]
     steps:
     - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
     - uses: seanmiddleditch/gha-setup-ninja@master
     - run: sudo apt-get update && sudo apt-get install -y ${INSTALL_DEPS}
     - uses: actions/cache/restore@v4
@@ -103,6 +109,8 @@ jobs:
     needs: [configure]
     steps:
     - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
     - uses: seanmiddleditch/gha-setup-ninja@master
     - run: sudo apt-get update && sudo apt-get install -y ${INSTALL_DEPS}
     - uses: actions/cache/restore@v4
@@ -121,6 +129,8 @@ jobs:
     needs: [configure]
     steps:
     - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
     - uses: seanmiddleditch/gha-setup-ninja@master
     - run: sudo apt-get update && sudo apt-get install -y ${INSTALL_DEPS}
     - uses: actions/cache/restore@v4
@@ -139,6 +149,8 @@ jobs:
     needs: [configure]
     steps:
     - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
     - uses: seanmiddleditch/gha-setup-ninja@master
     - run: sudo apt-get update && sudo apt-get install -y ${INSTALL_DEPS}
     - uses: actions/cache/restore@v4
diff --git a/.github/workflows/build_macos.yml b/.github/workflows/build_macos.yml
index f2e672586..dbfa6bd6f 100644
--- a/.github/workflows/build_macos.yml
+++ b/.github/workflows/build_macos.yml
@@ -32,6 +32,8 @@ jobs:
 
     steps:
     - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
     - uses: seanmiddleditch/gha-setup-ninja@master
 
     - name: Configure
@@ -54,6 +56,8 @@ jobs:
     needs: [configure]
     steps:
     - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
     - uses: seanmiddleditch/gha-setup-ninja@master
     - uses: actions/cache/restore@v4
       id: cache-restore
@@ -75,6 +79,8 @@ jobs:
     needs: [configure]
     steps:
     - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
     - uses: seanmiddleditch/gha-setup-ninja@master
     - uses: actions/cache/restore@v4
       id: cache-restore
@@ -92,6 +98,8 @@ jobs:
     needs: [configure]
     steps:
     - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
     - uses: seanmiddleditch/gha-setup-ninja@master
     - uses: actions/cache/restore@v4
       id: cache-restore
@@ -109,6 +117,8 @@ jobs:
     needs: [configure]
     steps:
     - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
     - uses: seanmiddleditch/gha-setup-ninja@master
     - uses: actions/cache/restore@v4
       id: cache-restore
@@ -126,6 +136,8 @@ jobs:
     needs: [configure]
     steps:
     - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
     - uses: seanmiddleditch/gha-setup-ninja@master
     - uses: actions/cache/restore@v4
       id: cache-restore
diff --git a/.github/workflows/build_wasm.yml b/.github/workflows/build_wasm.yml
index b88c425e7..1ac58ae20 100644
--- a/.github/workflows/build_wasm.yml
+++ b/.github/workflows/build_wasm.yml
@@ -38,6 +38,8 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
     - uses: seanmiddleditch/gha-setup-ninja@master
     - run: sudo apt-get update && sudo apt-get install -y ${INSTALL_DEPS}
     - name: Setup emsdk
@@ -58,6 +60,8 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
     - uses: seanmiddleditch/gha-setup-ninja@master
     - run: sudo apt-get update && sudo apt-get install -y ${INSTALL_DEPS}
     - name: Setup emsdk
@@ -74,6 +78,8 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
     - uses: seanmiddleditch/gha-setup-ninja@master
     - run: sudo apt-get update && sudo apt-get install -y ${INSTALL_DEPS}
     - name: Setup emsdk
@@ -90,6 +96,8 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
     - uses: seanmiddleditch/gha-setup-ninja@master
     - run: sudo apt-get update && sudo apt-get install -y ${INSTALL_DEPS}
     - name: Setup emsdk
diff --git a/.github/workflows/build_windows.yml b/.github/workflows/build_windows.yml
index 80dad6dd6..024ec032d 100644
--- a/.github/workflows/build_windows.yml
+++ b/.github/workflows/build_windows.yml
@@ -33,6 +33,8 @@ jobs:
     runs-on: windows-latest
     steps:
     - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
     - run: cmake ${{ github.workspace }} -B ${{ runner.workspace }}/build -DYUP_ENABLE_TESTS=ON
     - run: cmake --build ${{ runner.workspace }}/build --config Debug --parallel 4 --target yup_tests
     - working-directory: ${{ runner.workspace }}/build/tests/Debug
@@ -47,6 +49,8 @@ jobs:
     runs-on: windows-latest
     steps:
     - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
     - run: cmake ${{ github.workspace }} -B ${{ runner.workspace }}/build -DYUP_ENABLE_EXAMPLES=ON
     - run: cmake --build ${{ runner.workspace }}/build --config Debug --parallel 4 --target example_console
     - run: cmake --build ${{ runner.workspace }}/build --config Release --parallel 4 --target example_console
@@ -55,6 +59,8 @@ jobs:
     runs-on: windows-latest
     steps:
     - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
     - run: cmake ${{ github.workspace }} -B ${{ runner.workspace }}/build -DYUP_ENABLE_EXAMPLES=ON
     - run: cmake --build ${{ runner.workspace }}/build --config Debug --parallel 4 --target example_app
     - run: cmake --build ${{ runner.workspace }}/build --config Release --parallel 4 --target example_app
@@ -63,6 +69,8 @@ jobs:
     runs-on: windows-latest
     steps:
     - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
     - run: cmake ${{ github.workspace }} -B ${{ runner.workspace }}/build -DYUP_ENABLE_EXAMPLES=ON
     - run: cmake --build ${{ runner.workspace }}/build --config Debug --parallel 4 --target example_graphics
     - run: cmake --build ${{ runner.workspace }}/build --config Release --parallel 4 --target example_graphics
@@ -71,6 +79,8 @@ jobs:
     runs-on: windows-latest
     steps:
     - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
     - run: cmake ${{ github.workspace }} -B ${{ runner.workspace }}/build -DYUP_ENABLE_EXAMPLES=ON
     - run: cmake --build ${{ runner.workspace }}/build --config Debug --parallel 4 --target example_plugin_clap_plugin
     - run: cmake --build ${{ runner.workspace }}/build --config Release --parallel 4 --target example_plugin_clap_plugin
diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml
index 0c0652eb2..1ad4e6340 100644
--- a/.github/workflows/coverage.yml
+++ b/.github/workflows/coverage.yml
@@ -59,6 +59,8 @@ jobs:
     steps:
     - name: Checkout repository
       uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
 
     - name: Setup Ninja
       uses: seanmiddleditch/gha-setup-ninja@master
diff --git a/.github/workflows/python_linux.yml b/.github/workflows/python_linux.yml
index 00ea8c306..0861a4c52 100644
--- a/.github/workflows/python_linux.yml
+++ b/.github/workflows/python_linux.yml
@@ -40,6 +40,8 @@ jobs:
 
     steps:
       - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
 
       - name: Set up QEMU
         if: matrix.cibw_archs == 'aarch64'
diff --git a/.github/workflows/python_macos.yml b/.github/workflows/python_macos.yml
index 9fb101e05..a3d1d2846 100644
--- a/.github/workflows/python_macos.yml
+++ b/.github/workflows/python_macos.yml
@@ -34,10 +34,12 @@ jobs:
       fail-fast: true
       matrix:
         include:
-        - { os: macos-15, python: 311, platform_id: macosx_universal2, cibw_archs: universal2 }
+        - { os: macos-latest, python: 311, platform_id: macosx_universal2, cibw_archs: universal2 }
 
     steps:
       - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
 
       - name: Setup and install python
         uses: actions/setup-python@v5
diff --git a/.github/workflows/python_windows.yml b/.github/workflows/python_windows.yml
index ae90b3a7f..b4df3770f 100644
--- a/.github/workflows/python_windows.yml
+++ b/.github/workflows/python_windows.yml
@@ -41,6 +41,8 @@ jobs:
 
     steps:
       - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
 
       - name: Setup and install python
         uses: actions/setup-python@v5
diff --git a/examples/graphics/data/ir_e112_g12_dyn_us_6v6.wav b/examples/graphics/data/ir_e112_g12_dyn_us_6v6.wav
new file mode 100644
index 000000000..be8463d3a
Binary files /dev/null and b/examples/graphics/data/ir_e112_g12_dyn_us_6v6.wav differ
diff --git a/examples/graphics/source/examples/ConvolutionDemo.h b/examples/graphics/source/examples/ConvolutionDemo.h
new file mode 100644
index 000000000..221cb37cb
--- /dev/null
+++ b/examples/graphics/source/examples/ConvolutionDemo.h
@@ -0,0 +1,492 @@
+/*
+  ==============================================================================
+
+   This file is part of the YUP library.
+   Copyright (c) 2025 - kunitoki@gmail.com
+
+   YUP is an open source library subject to open-source licensing.
+
+   The code included in this file is provided under the terms of the ISC license
+   http://www.isc.org/downloads/software-support-policy/isc-license. Permission
+   to use, copy, modify, and/or distribute this software for any purpose with or
+   without fee is hereby granted provided that the above copyright notice and
+   this permission notice appear in all copies.
+
+   YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+#pragma once
+
+#include <yup_dsp/yup_dsp.h>
+#include <yup_audio_formats/yup_audio_formats.h>
+#include <yup_audio_gui/yup_audio_gui.h>
+#include <yup_gui/yup_gui.h>
+
+#include <memory>
+#include <vector>
+#include <iostream>
+#include <atomic>
+
+//==============================================================================
+
+class ConvolutionDemo
+    : public yup::Component
+    , public yup::AudioIODeviceCallback
+    , public yup::Timer
+{
+public:
+    ConvolutionDemo()
+        : wetGainSlider (yup::Slider::LinearHorizontal)
+        , dryGainSlider (yup::Slider::LinearHorizontal)
+        , loadIRButton ("Load IR...")
+    {
+        formatManager.registerDefaultFormats();
+
+        // Load default audio files
+        loadAudioFile();
+        loadDefaultImpulseResponse();
+
+        // Audio device manager
+        audioDeviceManager.initialiseWithDefaultDevices (0, 2);
+
+        // Initialize smoothed values
+        wetGain.reset (44100, 0.02);
+        dryGain.reset (44100, 0.02);
+        wetGain.setCurrentAndTargetValue (1.0f);
+        dryGain.setCurrentAndTargetValue (0.3f);
+
+        // Configure convolver with typical layout
+        convolver.setTypicalLayout (256, { 256, 1024, 4096 });
+
+        // Create UI
+        createUI();
+
+        // Start timer for waveform updates
+        startTimerHz (30);
+    }
+
+    ~ConvolutionDemo() override
+    {
+        audioDeviceManager.removeAudioCallback (this);
+        audioDeviceManager.closeAudioDevice();
+    }
+
+    void resized() override
+    {
+        auto bounds = getLocalBounds().reduced (10);
+
+        // Top controls
+        auto topControls = bounds.removeFromTop (120);
+
+        // IR loading section
+        auto irSection = topControls.removeFromTop (60);
+        loadIRButton.setBounds (irSection.removeFromTop (30).reduced (5, 0));
+        irInfoLabel.setBounds (irSection.removeFromTop (25));
+
+        // Control sliders section
+        auto controlsSection = topControls;
+        auto wetSection = controlsSection.removeFromLeft (controlsSection.getWidth() / 2);
+        wetGainLabel.setBounds (wetSection.removeFromTop (25));
+        wetGainSlider.setBounds (wetSection.removeFromTop (30).reduced (5, 0));
+
+        dryGainLabel.setBounds (controlsSection.removeFromTop (25));
+        dryGainSlider.setBounds (controlsSection.removeFromTop (30).reduced (5, 0));
+
+        // IR waveform display takes remaining space
+        irWaveformDisplay.setBounds (bounds);
+    }
+
+    void visibilityChanged() override
+    {
+        if (! isVisible())
+            audioDeviceManager.removeAudioCallback (this);
+        else
+            audioDeviceManager.addAudioCallback (this);
+    }
+
+    void audioDeviceAboutToStart (yup::AudioIODevice* device) override
+    {
+        auto sampleRate = device->getCurrentSampleRate();
+
+        // Update smoothed values
+        wetGain.reset (sampleRate, 0.02);
+        dryGain.reset (sampleRate, 0.02);
+
+        // Reset convolver
+        convolver.reset();
+        convolver.prepare (static_cast<std::size_t> (device->getCurrentBufferSizeSamples()));
+    }
+
+    void audioDeviceStopped() override
+    {
+    }
+
+    void audioDeviceIOCallbackWithContext (const float* const* inputChannelData,
+                                           int numInputChannels,
+                                           float* const* outputChannelData,
+                                           int numOutputChannels,
+                                           int numSamples,
+                                           const yup::AudioIODeviceCallbackContext& context) override
+    {
+        // Clear outputs
+        for (int ch = 0; ch < numOutputChannels; ++ch)
+        {
+            if (outputChannelData[ch] != nullptr)
+                yup::FloatVectorOperations::clear (outputChannelData[ch], numSamples);
+        }
+
+        if (numOutputChannels < 2 || audioBuffer.getNumSamples() == 0)
+            return;
+
+        // Prepare buffers for processing
+        tempDryBuffer.resize (static_cast<size_t> (numSamples));
+        tempWetBuffer.resize (static_cast<size_t> (numSamples));
+
+        // Process samples
+        const int totalSamples = audioBuffer.getNumSamples();
+        const int numChannels = audioBuffer.getNumChannels();
+
+        for (int i = 0; i < numSamples; ++i)
+        {
+            // Get the audio sample from the loaded file (mono to stereo if needed)
+            float audioSample = 0.0f;
+
+            if (numChannels == 1)
+            {
+                // Mono file
+                audioSample = audioBuffer.getSample (0, readPosition) * 0.5f;
+            }
+            else
+            {
+                // Stereo or multichannel - mix to mono
+                for (int ch = 0; ch < yup::jmin (2, numChannels); ++ch)
+                    audioSample += audioBuffer.getSample (ch, readPosition) * 0.5f;
+                audioSample /= yup::jmin (2, numChannels);
+            }
+
+            // Increment read position and wrap around for looping
+            readPosition++;
+            if (readPosition >= totalSamples)
+                readPosition = 0;
+
+            // Store dry signal
+            tempDryBuffer[static_cast<size_t> (i)] = audioSample;
+        }
+
+        // Process through convolver if IR is loaded
+        std::fill (tempWetBuffer.begin(), tempWetBuffer.end(), 0.0f);
+        if (hasImpulseResponse)
+            convolver.process (tempDryBuffer.data(), tempWetBuffer.data(), static_cast<size_t> (numSamples));
+
+        // Mix dry and wet signals with gains
+        for (int i = 0; i < numSamples; ++i)
+        {
+            float wetGainValue = wetGain.getNextValue();
+            float dryGainValue = dryGain.getNextValue();
+
+            float drySignal = tempDryBuffer[static_cast<size_t> (i)] * dryGainValue;
+            float wetSignal = tempWetBuffer[static_cast<size_t> (i)] * wetGainValue;
+            float mixedSignal = drySignal + wetSignal;
+
+            // Output to both channels (mono to stereo)
+            outputChannelData[0][i] = mixedSignal;
+            outputChannelData[1][i] = mixedSignal;
+        }
+    }
+
+    void timerCallback() override
+    {
+        // Update waveform display if needed
+        repaint();
+    }
+
+private:
+    void loadAudioFile()
+    {
+        // Create the path to the audio file
+        auto dataDir = yup::File (__FILE__)
+                           .getParentDirectory()
+                           .getParentDirectory()
+                           .getParentDirectory()
+                           .getChildFile ("data");
+
+        yup::File audioFile = dataDir.getChildFile ("break_boomblastic_92bpm.wav");
+        if (! audioFile.existsAsFile())
+        {
+            std::cerr << "Could not find break_boomblastic_92bpm.wav" << std::endl;
+            return;
+        }
+
+        // Load the audio file
+        yup::AudioFormatManager formatManager;
+        formatManager.registerDefaultFormats();
+
+        if (auto reader = formatManager.createReaderFor (audioFile))
+        {
+            audioBuffer.setSize ((int) reader->numChannels, (int) reader->lengthInSamples);
+            reader->read (&audioBuffer, 0, (int) reader->lengthInSamples, 0, true, true);
+
+            std::cout << "Loaded audio file: " << audioFile.getFileName() << std::endl;
+            std::cout << "Sample rate: " << reader->sampleRate << " Hz" << std::endl;
+            std::cout << "Channels: " << reader->numChannels << std::endl;
+            std::cout << "Length: " << reader->lengthInSamples << " samples" << std::endl;
+        }
+        else
+        {
+            std::cerr << "Failed to create reader for audio file" << std::endl;
+        }
+    }
+
+    void loadDefaultImpulseResponse()
+    {
+        // Create the path to the default impulse response file
+        auto dataDir = yup::File (__FILE__)
+                           .getParentDirectory()
+                           .getParentDirectory()
+                           .getParentDirectory()
+                           .getChildFile ("data");
+
+        yup::File irFile = dataDir.getChildFile ("ir_e112_g12_dyn_us_6v6.wav");
+        loadImpulseResponseFromFile (irFile);
+    }
+
+    void loadImpulseResponseFromFile (const yup::File& file)
+    {
+        if (! file.existsAsFile())
+        {
+            std::cerr << "Could not find impulse response file: " << file.getFullPathName() << std::endl;
+            updateIRInfo ("No IR loaded");
+            return;
+        }
+
+        // Load the impulse response file
+        if (auto reader = formatManager.createReaderFor (file))
+        {
+            impulseResponseBuffer.setSize ((int) reader->numChannels, (int) reader->lengthInSamples);
+            reader->read (&impulseResponseBuffer, 0, (int) reader->lengthInSamples, 0, true, true);
+
+            // Convert to mono if stereo
+            if (impulseResponseBuffer.getNumChannels() > 1)
+            {
+                for (int i = 0; i < impulseResponseBuffer.getNumSamples(); ++i)
+                {
+                    float monoSample = 0.0f;
+                    for (int ch = 0; ch < impulseResponseBuffer.getNumChannels(); ++ch)
+                        monoSample += impulseResponseBuffer.getSample (ch, i);
+                    monoSample /= static_cast<float> (impulseResponseBuffer.getNumChannels());
+                    impulseResponseBuffer.setSample (0, i, monoSample);
+                }
+                impulseResponseBuffer.setSize (1, impulseResponseBuffer.getNumSamples(), true);
+            }
+
+            // Extract samples for convolver and normalize
+            const int numSamples = impulseResponseBuffer.getNumSamples();
+            impulseResponseData.resize (static_cast<size_t> (numSamples));
+
+            // Normalize IR to prevent clipping (very aggressive scaling for testing)
+            float normalizationGain = 1.0f;
+            for (int i = 0; i < numSamples; ++i)
+                impulseResponseData[static_cast<size_t> (i)] = impulseResponseBuffer.getSample (0, i) * normalizationGain;
+
+            // Set impulse response in convolver
+            yup::PartitionedConvolver::IRLoadOptions loadOptions;
+            loadOptions.trimEndSilenceBelowDb = -36.0f;
+            convolver.setImpulseResponse (impulseResponseData, loadOptions);
+            impulseLength = static_cast<int> (convolver.getImpulseLength());
+            hasImpulseResponse = true;
+
+            std::cout << "Loaded impulse response: " << file.getFileName() << std::endl;
+            std::cout << "Sample rate: " << reader->sampleRate << " Hz" << std::endl;
+            std::cout << "Length: " << reader->lengthInSamples << " samples" << std::endl;
+            std::cout << "Effective Length: " << impulseLength << " samples" << std::endl;
+
+            // Update UI
+            updateIRInfo (file.getFileName());
+            updateWaveformDisplay();
+        }
+        else
+        {
+            std::cerr << "Failed to create reader for impulse response file" << std::endl;
+            updateIRInfo ("Failed to load IR");
+        }
+    }
+
+    void createUI()
+    {
+        setOpaque (false);
+
+        // Get fonts
+        auto labelFont = yup::ApplicationTheme::getGlobalTheme()->getDefaultFont().withHeight (12.0f);
+        auto buttonFont = yup::ApplicationTheme::getGlobalTheme()->getDefaultFont().withHeight (14.0f);
+
+        // Load IR button
+        // loadIRButton.setFont (buttonFont);
+        loadIRButton.onClick = [this]
+        {
+            auto chooser = yup::FileChooser::create ("Load Impulse Response",
+                                                     yup::File(),
+                                                     "*.wav;*.aiff;*.aif");
+            chooser->browseForFileToOpen ([this] (bool success, const yup::Array<yup::File>& results)
+            {
+                if (success && results.size() > 0)
+                {
+                    loadImpulseResponseFromFile (results[0]);
+                }
+            });
+        };
+        addAndMakeVisible (loadIRButton);
+
+        // IR info label
+        irInfoLabel.setText ("Loading default IR...", yup::NotificationType::dontSendNotification);
+        irInfoLabel.setFont (labelFont);
+        irInfoLabel.setJustification (yup::Justification::center);
+        addAndMakeVisible (irInfoLabel);
+
+        // Wet gain slider
+        wetGainLabel.setText ("Wet Gain", yup::NotificationType::dontSendNotification);
+        wetGainLabel.setFont (labelFont);
+        addAndMakeVisible (wetGainLabel);
+
+        wetGainSlider.setRange (0.0, 2.0);
+        wetGainSlider.setValue (1.0);
+        wetGainSlider.onValueChanged = [this] (float value)
+        {
+            wetGain.setTargetValue (value);
+        };
+        addAndMakeVisible (wetGainSlider);
+
+        // Dry gain slider
+        dryGainLabel.setText ("Dry Gain", yup::NotificationType::dontSendNotification);
+        dryGainLabel.setFont (labelFont);
+        addAndMakeVisible (dryGainLabel);
+
+        dryGainSlider.setRange (0.0, 2.0);
+        dryGainSlider.setValue (0.3);
+        dryGainSlider.onValueChanged = [this] (float value)
+        {
+            dryGain.setTargetValue (value);
+        };
+        addAndMakeVisible (dryGainSlider);
+
+        // Configure IR waveform display
+        setupWaveformDisplay();
+        addAndMakeVisible (irWaveformDisplay);
+    }
+
+    void setupWaveformDisplay()
+    {
+        // Configure the CartesianPlane for waveform display
+        irWaveformDisplay.setTitle ("Impulse Response Waveform");
+
+        // Set linear axes
+        irWaveformDisplay.setXRange (0.0, 1.0);
+        irWaveformDisplay.setXScaleType (yup::CartesianPlane::AxisScaleType::linear);
+        irWaveformDisplay.setYRange (-1.0, 1.0);
+        irWaveformDisplay.setYScaleType (yup::CartesianPlane::AxisScaleType::linear);
+
+        // Set margins
+        irWaveformDisplay.setMargins (25, 25, 25, 25);
+
+        // Add grid lines
+        irWaveformDisplay.setVerticalGridLines ({ 0.0, 1.0 });
+        irWaveformDisplay.setHorizontalGridLines ({ -1.0, -0.5, 0.5, 1.0 });
+        irWaveformDisplay.addHorizontalGridLine (0.0, yup::Color (0xFF666666), 1.0f, true);
+
+        irWaveformDisplay.clearXAxisLabels();
+        irWaveformDisplay.setYAxisLabels ({ -1.0, -0.5, 0.5, 1.0 });
+
+        // Add waveform signal
+        waveformSignalIndex = irWaveformDisplay.addSignal ("IR", yup::Color (0xFF44AA44), 1.5f);
+
+        // Configure legend
+        irWaveformDisplay.setLegendVisible (false);
+    }
+
+    void updateWaveformDisplay()
+    {
+        if (impulseResponseData.empty())
+            return;
+
+        const size_t length = static_cast<size_t> (impulseLength);
+
+        // Always apply peak headroom
+        float headroomScale = std::pow (10.0f, -12.0f / 20.0f);
+        const auto minMax = yup::FloatVectorOperations::findMinAndMax (impulseResponseData.data(), impulseResponseData.size());
+        const float peak = std::max (std::abs (minMax.getStart()), std::abs (minMax.getEnd()));
+        if (peak > 0.0f)
+            headroomScale /= peak;
+
+        // Create waveform data points
+        const size_t numPoints = std::min (static_cast<size_t> (getWidth()), length);
+        const size_t stride = length / numPoints;
+
+        std::vector<yup::Point<double>> waveformData;
+        waveformData.reserve (numPoints);
+
+        for (size_t i = 0; i < numPoints; ++i)
+        {
+            size_t sampleIndex = i * stride;
+            if (sampleIndex >= length)
+                sampleIndex = length - 1;
+
+            double normalizedTime = static_cast<double> (i) / static_cast<double> (numPoints - 1);
+            double amplitude = static_cast<double> (impulseResponseData[sampleIndex] * headroomScale);
+
+            waveformData.emplace_back (normalizedTime, amplitude);
+        }
+
+        // Update the display
+        irWaveformDisplay.updateSignalData (waveformSignalIndex, waveformData);
+
+        // Update X axis range to show time
+        double lengthInSeconds = static_cast<double> (length) / 44100.0; // Assume 44.1kHz
+        irWaveformDisplay.setXRange (0.0, lengthInSeconds);
+        irWaveformDisplay.setVerticalGridLines ({ 0.0, lengthInSeconds });
+
+        // Update X axis labels to show time
+        std::vector<double> timeLabels;
+        for (int i = 0; i <= 4; ++i)
+            timeLabels.push_back (lengthInSeconds * static_cast<double> (i) / 4.0);
+
+        irWaveformDisplay.setXAxisLabels (timeLabels);
+    }
+
+    void updateIRInfo (const yup::String& info)
+    {
+        irInfoLabel.setText (info, yup::NotificationType::dontSendNotification);
+    }
+
+    // Audio
+    yup::AudioFormatManager formatManager;
+    yup::AudioDeviceManager audioDeviceManager;
+    yup::AudioBuffer<float> audioBuffer;
+    yup::AudioBuffer<float> impulseResponseBuffer;
+    std::vector<float> impulseResponseData;
+    int readPosition = 0;
+    int impulseLength = 0;
+    std::atomic<bool> hasImpulseResponse = false;
+
+    // Processing
+    yup::PartitionedConvolver convolver;
+    std::vector<float> tempDryBuffer;
+    std::vector<float> tempWetBuffer;
+
+    // Smoothed parameters
+    yup::SmoothedValue<float> wetGain, dryGain;
+
+    // UI
+    yup::TextButton loadIRButton;
+    yup::Label irInfoLabel;
+    yup::Label wetGainLabel;
+    yup::Slider wetGainSlider;
+    yup::Label dryGainLabel;
+    yup::Slider dryGainSlider;
+    yup::CartesianPlane irWaveformDisplay;
+
+    // Display
+    int waveformSignalIndex = -1;
+};
diff --git a/examples/graphics/source/examples/FilterDemo.h b/examples/graphics/source/examples/FilterDemo.h
index 317fd98c2..8c16d7b56 100644
--- a/examples/graphics/source/examples/FilterDemo.h
+++ b/examples/graphics/source/examples/FilterDemo.h
@@ -951,6 +951,7 @@ class FilterDemo
         filterTypeCombo->addItem ("State Variable", 3);
         filterTypeCombo->addItem ("First Order", 4);
         filterTypeCombo->addItem ("Butterworth", 5);
+        filterTypeCombo->addItem ("FIR Filter", 6);
         filterTypeCombo->setSelectedId (1);
         filterTypeCombo->onSelectedItemChanged = [this]
         {
@@ -976,6 +977,42 @@ class FilterDemo
         };
         addAndMakeVisible (*responseTypeCombo);
 
+        // FIR-specific controls
+        firCoefficientsSlider = std::make_unique<yup::Slider> (yup::Slider::LinearBarHorizontal, "FIR Length");
+        firCoefficientsSlider->setRange ({ 16.0, 256.0 });
+        firCoefficientsSlider->setValue (64.0);
+        firCoefficientsSlider->onValueChanged = [this] (float value)
+        {
+            updateAnalysisDisplays();
+        };
+        addAndMakeVisible (*firCoefficientsSlider);
+
+        firWindowCombo = std::make_unique<yup::ComboBox> ("FIR Window");
+        firWindowCombo->addItem ("Hann", 1);
+        firWindowCombo->addItem ("Hamming", 2);
+        firWindowCombo->addItem ("Blackman", 3);
+        firWindowCombo->addItem ("Kaiser", 4);
+        firWindowCombo->addItem ("Rectangle", 5);
+        firWindowCombo->addItem ("Rakshit-Ullah", 6);
+        firWindowCombo->setSelectedId (1);
+        firWindowCombo->onSelectedItemChanged = [this]
+        {
+            updateWindowParameterRange();
+            updateAnalysisDisplays();
+        };
+        addAndMakeVisible (*firWindowCombo);
+
+        // FIR window parameter control (for adjustable windows like Kaiser and Rakshit-Ullah)
+        firWindowParameterSlider = std::make_unique<yup::Slider> (yup::Slider::LinearBarHorizontal, "Window Parameter");
+        firWindowParameterSlider->setRange ({ 0.0005, 10.0 });
+        firWindowParameterSlider->setSkewFactorFromMidpoint (1.0);
+        firWindowParameterSlider->setValue (1.0);
+        firWindowParameterSlider->onValueChanged = [this] (float value)
+        {
+            updateAnalysisDisplays();
+        };
+        addAndMakeVisible (*firWindowParameterSlider);
+
         // Parameter controls with smoothed parameter updates
         frequencySlider = std::make_unique<yup::Slider> (yup::Slider::LinearBarHorizontal, "Frequency");
         frequencySlider->setRange ({ 20.0, 20000.0 });
@@ -1066,7 +1103,7 @@ class FilterDemo
         // Labels for parameter controls
         auto font = yup::ApplicationTheme::getGlobalTheme()->getDefaultFont().withHeight (10.0f);
 
-        for (const auto& labelText : { "Filter Type:", "Response Type:", "Frequency:", "Frequency 2:", "Q/Resonance:", "Gain (dB):", "Order:", "Noise Level:", "Output Level:" })
+        for (const auto& labelText : { "Filter Type:", "Response Type:", "Frequency:", "Frequency 2:", "Q/Resonance:", "Gain (dB):", "Order:", "FIR Length:", "FIR Window:", "Window Param:", "Noise Level:", "Output Level:" })
         {
             auto label = parameterLabels.add (std::make_unique<yup::Label> (labelText));
             label->setText (labelText);
@@ -1092,8 +1129,11 @@ class FilterDemo
             { parameterLabels[4], qSlider.get() },
             { parameterLabels[5], gainSlider.get() },
             { parameterLabels[6], orderSlider.get() },
-            { parameterLabels[7], noiseGainSlider.get() },
-            { parameterLabels[8], outputGainSlider.get() }
+            { parameterLabels[7], firCoefficientsSlider.get() },
+            { parameterLabels[8], firWindowCombo.get() },
+            { parameterLabels[9], firWindowParameterSlider.get() },
+            { parameterLabels[10], noiseGainSlider.get() },
+            { parameterLabels[11], outputGainSlider.get() }
         };
 
         for (auto& [label, component] : layouts)
@@ -1114,6 +1154,7 @@ class FilterDemo
         audioSvf = std::make_shared<yup::StateVariableFilter<float>>();
         audioFirstOrder = std::make_shared<yup::FirstOrderFilter<float>>();
         audioButterworthFilter = std::make_shared<yup::ButterworthFilter<float>>();
+        audioDirectFIR = std::make_shared<yup::DirectFIR<float>>();
 
         // Create instances of all filter types for UI thread
         uiRbj = std::make_shared<yup::RbjFilter<float>>();
@@ -1121,14 +1162,15 @@ class FilterDemo
         uiSvf = std::make_shared<yup::StateVariableFilter<float>>();
         uiFirstOrder = std::make_shared<yup::FirstOrderFilter<float>>();
         uiButterworthFilter = std::make_shared<yup::ButterworthFilter<float>>();
+        uiDirectFIR = std::make_shared<yup::DirectFIR<float>>();
 
         // Store in arrays for easy management
         allAudioFilters = {
-            audioRbj, audioZoelzer, audioSvf, audioFirstOrder, audioButterworthFilter
+            audioRbj, audioZoelzer, audioSvf, audioFirstOrder, audioButterworthFilter, audioDirectFIR
         };
 
         allUIFilters = {
-            uiRbj, uiZoelzer, uiSvf, uiFirstOrder, uiButterworthFilter
+            uiRbj, uiZoelzer, uiSvf, uiFirstOrder, uiButterworthFilter, uiDirectFIR
         };
 
         // Set default filters
@@ -1144,7 +1186,9 @@ class FilterDemo
     {
         noiseGeneratorAmplitude.setCurrentAndTargetValue (0.1f);
         outputGain.setCurrentAndTargetValue (0.5f);
+        updateWindowParameterRange(); // Set initial window parameter range
         updateCurrentFilter();
+        updateControlVisibility(); // Set initial visibility
     }
 
     void updateCurrentFilter()
@@ -1171,6 +1215,9 @@ class FilterDemo
             case 5:
                 currentUIFilter = uiButterworthFilter;
                 break;
+            case 6:
+                currentUIFilter = uiDirectFIR;
+                break;
             default:
                 currentUIFilter = uiRbj;
                 break;
@@ -1189,6 +1236,9 @@ class FilterDemo
         // Update UI filter with current parameters
         updateUIFilterParameters();
 
+        // Update control visibility based on filter type
+        updateControlVisibility();
+
         // Update displays using UI filter
         frequencyResponsePlot.setFilter (currentUIFilter);
         frequencyResponsePlot.updateResponseData();
@@ -1201,12 +1251,12 @@ class FilterDemo
             return;
 
         double freq = smoothedFrequency.getNextValue();
-        double freq2 = smoothedFrequency2.getNextValue();
+        double freq2 = yup::jmax (freq, (double) smoothedFrequency2.getNextValue());
         double q = smoothedQ.getNextValue();
         double gain = smoothedGain.getNextValue();
         int order = yup::jlimit (2, 16, static_cast<int> (smoothedOrder.getNextValue()));
 
-        updateFilterParameters (currentAudioFilter.get(), freq, freq2, q, gain, order);
+        updateFilterParameters (currentAudioFilter.get(), firCoefficients, freq, freq2, q, gain, order);
     }
 
     void updateUIFilterParameters()
@@ -1215,15 +1265,15 @@ class FilterDemo
             return;
 
         double freq = frequencySlider->getValue();
-        double freq2 = frequency2Slider->getValue();
+        double freq2 = yup::jmax (freq, frequency2Slider->getValue());
         double q = qSlider->getValue();
         double gain = gainSlider->getValue();
         int order = yup::jlimit (2, 16, static_cast<int> (orderSlider->getValue()));
 
-        updateFilterParameters (currentUIFilter.get(), freq, freq2, q, gain, order);
+        updateFilterParameters (currentUIFilter.get(), firCoefficientsUI, freq, freq2, q, gain, order);
     }
 
-    void updateFilterParameters (yup::FilterBase<float>* filter, double freq, double freq2, double q, double gain, int order)
+    void updateFilterParameters (yup::FilterBase<float>* filter, std::vector<double>& coefficients, double freq, double freq2, double q, double gain, int order)
     {
         // Update parameters based on filter type using direct UI values
         if (auto rf = dynamic_cast<yup::RbjFilter<float>*> (filter))
@@ -1246,6 +1296,10 @@ class FilterDemo
         {
             bf->setParameters (getFilterMode (currentResponseTypeId), order, freq, yup::jmax (freq2, freq * 1.01), currentSampleRate);
         }
+        else if (auto fir = dynamic_cast<yup::DirectFIR<float>*> (filter))
+        {
+            updateFIRFilterParameters (fir, coefficients, freq, freq2);
+        }
     }
 
     void updateCurrentAudioFilter()
@@ -1268,6 +1322,9 @@ class FilterDemo
             case 5:
                 currentAudioFilter = audioButterworthFilter;
                 break;
+            case 6:
+                currentAudioFilter = audioDirectFIR;
+                break;
             default:
                 currentAudioFilter = audioRbj;
                 break;
@@ -1344,6 +1401,147 @@ class FilterDemo
         polesZerosDisplay.updatePolesZeros (poles, zeros);
     }
 
+    void updateControlVisibility()
+    {
+        bool isFIRFilter = (currentFilterTypeId == 6);
+
+        // Show/hide FIR-specific controls
+        firCoefficientsSlider->setVisible (isFIRFilter);
+        firWindowCombo->setVisible (isFIRFilter);
+        parameterLabels[7]->setVisible (isFIRFilter); // FIR Length label
+        parameterLabels[8]->setVisible (isFIRFilter); // FIR Window label
+
+        // Show/hide window parameter control for adjustable windows (Kaiser, Rakshit-Ullah)
+        bool needsWindowParameter = isFIRFilter && (firWindowCombo->getSelectedId() == 4 || firWindowCombo->getSelectedId() == 6); // Kaiser or Rakshit-Ullah
+        firWindowParameterSlider->setVisible (needsWindowParameter);
+        parameterLabels[9]->setVisible (needsWindowParameter); // Window Parameter label
+
+        // Show/hide standard filter controls
+        qSlider->setVisible (! isFIRFilter);
+        gainSlider->setVisible (! isFIRFilter);
+        orderSlider->setVisible (! isFIRFilter || currentFilterTypeId == 5);        // Show for Butterworth and FIR
+        parameterLabels[4]->setVisible (! isFIRFilter);                             // Q label
+        parameterLabels[5]->setVisible (! isFIRFilter);                             // Gain label
+        parameterLabels[6]->setVisible (! isFIRFilter || currentFilterTypeId == 5); // Order label
+
+        // Frequency 2 is only visible for bandpass/bandstop filters
+        bool needsFreq2 = (currentResponseTypeId >= 3 && currentResponseTypeId <= 5);
+        frequency2Slider->setVisible (needsFreq2);
+        parameterLabels[3]->setVisible (needsFreq2); // Frequency 2 label
+
+        // Update restricted response types for FIR
+        if (isFIRFilter)
+        {
+            // Save current selection
+            int currentResponse = responseTypeCombo->getSelectedId();
+
+            // Clear and repopulate with FIR-compatible responses
+            responseTypeCombo->clear();
+            responseTypeCombo->addItem ("Lowpass", 1);
+            responseTypeCombo->addItem ("Highpass", 2);
+            responseTypeCombo->addItem ("Bandpass", 3);
+            responseTypeCombo->addItem ("Bandstop", 5);
+
+            // Restore selection if compatible, otherwise default to lowpass
+            if (currentResponse == 1 || currentResponse == 2 || currentResponse == 3 || currentResponse == 5)
+                responseTypeCombo->setSelectedId (currentResponse, yup::dontSendNotification);
+            else
+                responseTypeCombo->setSelectedId (1, yup::dontSendNotification);
+        }
+        else
+        {
+            // Restore full response type list for IIR filters
+            int currentResponse = responseTypeCombo->getSelectedId();
+            responseTypeCombo->clear();
+            responseTypeCombo->addItem ("Lowpass", 1);
+            responseTypeCombo->addItem ("Highpass", 2);
+            responseTypeCombo->addItem ("Bandpass CSG", 3);
+            responseTypeCombo->addItem ("Bandpass CPG", 4);
+            responseTypeCombo->addItem ("Bandstop", 5);
+            responseTypeCombo->addItem ("Peak", 6);
+            responseTypeCombo->addItem ("Low Shelf", 7);
+            responseTypeCombo->addItem ("High Shelf", 8);
+            responseTypeCombo->addItem ("Allpass", 9);
+
+            // Restore selection
+            responseTypeCombo->setSelectedId (currentResponse, yup::dontSendNotification);
+        }
+
+        repaint();
+    }
+
+    void updateWindowParameterRange()
+    {
+        int windowId = firWindowCombo->getSelectedId();
+
+        // Update parameter range and default based on window type
+        switch (windowId)
+        {
+            case 4: // Kaiser
+                firWindowParameterSlider->setRange ({ 0.0, 20.0 });
+                firWindowParameterSlider->setSkewFactorFromMidpoint (8.0);
+                firWindowParameterSlider->setValue (8.0); // Kaiser beta parameter
+                break;
+
+            case 6: // Rakshit-Ullah
+                firWindowParameterSlider->setRange ({ 0.0001, 100.0 });
+                firWindowParameterSlider->setSkewFactorFromMidpoint (1.0);
+                firWindowParameterSlider->setValue (1.0); // Rakshit-Ullah r parameter
+                break;
+
+            default: // Other windows (parameter not used)
+                firWindowParameterSlider->setRange ({ 0.0, 10.0 });
+                firWindowParameterSlider->setValue (1.0);
+                break;
+        }
+
+        updateControlVisibility();
+    }
+
+    void updateFIRFilterParameters (yup::DirectFIR<float>* fir, std::vector<double>& coeffs, double freq, double freq2)
+    {
+        int numCoeffs = static_cast<int> (firCoefficientsSlider->getValue());
+        auto windowType = getFIRWindowType (firWindowCombo->getSelectedId());
+        auto responseMode = getFilterMode (currentResponseTypeId);
+
+        // Get window parameter (for Kaiser and Rakshit-Ullah windows)
+        double windowParam = firWindowParameterSlider->getValue();
+
+        if (responseMode.test (yup::FilterMode::lowpass))
+            yup::FilterDesigner<double>::designFIRLowpass (coeffs, numCoeffs, freq, currentSampleRate, windowType, windowParam);
+        else if (responseMode.test (yup::FilterMode::highpass))
+            yup::FilterDesigner<double>::designFIRHighpass (coeffs, numCoeffs, freq, currentSampleRate, windowType, windowParam);
+        else if (responseMode.test (yup::FilterMode::bandpassCsg | yup::FilterMode::bandpassCpg))
+            yup::FilterDesigner<double>::designFIRBandpass (coeffs, numCoeffs, freq, freq2, currentSampleRate, windowType, windowParam);
+        else if (responseMode.test (yup::FilterMode::bandstop))
+            yup::FilterDesigner<double>::designFIRBandstop (coeffs, numCoeffs, freq, freq2, currentSampleRate, windowType, windowParam);
+        else
+            yup::FilterDesigner<double>::designFIRLowpass (coeffs, numCoeffs, freq, currentSampleRate, windowType, windowParam);
+
+        fir->setCoefficients (coeffs.data(), coeffs.size());
+    }
+
+    yup::WindowType getFIRWindowType (int windowId)
+    {
+        switch (windowId)
+        {
+            case 1:
+                return yup::WindowType::hann;
+            case 2:
+                return yup::WindowType::hamming;
+            case 3:
+                return yup::WindowType::blackman;
+            case 4:
+                return yup::WindowType::kaiser;
+            case 5:
+                return yup::WindowType::rectangular;
+            case 6:
+                return yup::WindowType::rakshitUllah;
+            default:
+                return yup::WindowType::hann;
+        }
+    }
+
     yup::FilterModeType getFilterMode (int responseTypeId)
     {
         switch (responseTypeId)
@@ -1391,6 +1589,9 @@ class FilterDemo
     std::vector<std::complex<double>> poles;
     std::vector<std::complex<double>> zeros;
 
+    std::vector<double> firCoefficients { 512, 0.0f };
+    std::vector<double> firCoefficientsUI { 512, 0.0f };
+
     // Filter type settings (thread-safe storage)
     std::atomic<int> currentFilterTypeId { 1 };
     std::atomic<int> currentResponseTypeId { 1 };
@@ -1401,6 +1602,7 @@ class FilterDemo
     std::shared_ptr<yup::StateVariableFilter<float>> audioSvf;
     std::shared_ptr<yup::FirstOrderFilter<float>> audioFirstOrder;
     std::shared_ptr<yup::ButterworthFilter<float>> audioButterworthFilter;
+    std::shared_ptr<yup::DirectFIR<float>> audioDirectFIR;
 
     // UI thread filter instances
     std::shared_ptr<yup::RbjFilter<float>> uiRbj;
@@ -1408,6 +1610,7 @@ class FilterDemo
     std::shared_ptr<yup::StateVariableFilter<float>> uiSvf;
     std::shared_ptr<yup::FirstOrderFilter<float>> uiFirstOrder;
     std::shared_ptr<yup::ButterworthFilter<float>> uiButterworthFilter;
+    std::shared_ptr<yup::DirectFIR<float>> uiDirectFIR;
 
     std::vector<std::shared_ptr<yup::FilterBase<float>>> allAudioFilters;
     std::vector<std::shared_ptr<yup::FilterBase<float>>> allUIFilters;
@@ -1423,6 +1626,9 @@ class FilterDemo
     std::unique_ptr<yup::Slider> qSlider;
     std::unique_ptr<yup::Slider> gainSlider;
     std::unique_ptr<yup::Slider> orderSlider;
+    std::unique_ptr<yup::Slider> firCoefficientsSlider;
+    std::unique_ptr<yup::ComboBox> firWindowCombo;
+    std::unique_ptr<yup::Slider> firWindowParameterSlider;
     std::unique_ptr<yup::Slider> noiseGainSlider;
     std::unique_ptr<yup::Slider> outputGainSlider;
     yup::OwnedArray<yup::Label> parameterLabels;
diff --git a/examples/graphics/source/main.cpp b/examples/graphics/source/main.cpp
index 34e405208..65b6b648b 100644
--- a/examples/graphics/source/main.cpp
+++ b/examples/graphics/source/main.cpp
@@ -40,6 +40,7 @@
 #include "examples/Artboard.h"
 #include "examples/Audio.h"
 #include "examples/CrossoverDemo.h"
+#include "examples/ConvolutionDemo.h"
 #include "examples/FilterDemo.h"
 #include "examples/LayoutFonts.h"
 #include "examples/FileChooser.h"
@@ -107,6 +108,7 @@ class CustomWindow
         registerDemo<SpectrumAnalyzerDemo> ("FFT Analyzer", counter++);
         registerDemo<FilterDemo> ("Filter Demo", counter++);
         registerDemo<CrossoverDemo> ("Crossover Demo", counter++);
+        registerDemo<ConvolutionDemo> ("Convolution Demo", counter++);
         registerDemo<LayoutFontsExample> ("Layout Fonts", counter++);
         registerDemo<VariableFontsExample> ("Variable Fonts", counter++);
         registerDemo<PathsExample> ("Paths", counter++);
@@ -307,18 +309,23 @@ struct Application : yup::YUPApplication
 
         yup::Logger::outputDebugString ("Starting app " + commandLineParameters);
 
-        window = std::make_unique<CustomWindow>();
+        yup::MessageManager::callAsync ([this]
+        {
+            yup::Process::makeForegroundProcess();
+
+            window = std::make_unique<CustomWindow>();
 
 #if YUP_IOS
-        window->centreWithSize ({ 320, 480 });
+            window->centreWithSize ({ 320, 480 });
 #elif YUP_ANDROID
-        window->centreWithSize ({ 1080, 2400 });
-        // window->setFullScreen(true);
+            window->centreWithSize ({ 1080, 2400 });
+            // window->setFullScreen(true);
 #else
-        window->centreWithSize ({ 600, 800 });
+            window->centreWithSize ({ 600, 800 });
 #endif
 
-        window->setVisible (true);
+            window->setVisible (true);
+        });
     }
 
     void shutdown() override
diff --git a/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp b/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp
index de9ffbfc8..42c601386 100644
--- a/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp
+++ b/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.cpp
@@ -83,12 +83,18 @@ struct BasicOps32
 
     static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_ps (dest, a); }
 
+    static forcedinline void storeU (int* dest, ParallelType a) noexcept { _mm_storeu_si128 (reinterpret_cast<__m128i*> (dest), _mm_castps_si128 (a)); }
+
+    static forcedinline void storeA (int* dest, ParallelType a) noexcept { _mm_store_si128 (reinterpret_cast<__m128i*> (dest), _mm_castps_si128 (a)); }
+
     static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_ps (a, b); }
 
     static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_ps (a, b); }
 
     static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_ps (a, b); }
 
+    static forcedinline ParallelType div (ParallelType a, ParallelType b) noexcept { return _mm_div_ps (a, b); }
+
     static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_ps (a, b); }
 
     static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_ps (a, b); }
@@ -148,6 +154,8 @@ struct BasicOps64
 
     static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_pd (a, b); }
 
+    static forcedinline ParallelType div (ParallelType a, ParallelType b) noexcept { return _mm_div_pd (a, b); }
+
     static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_pd (a, b); }
 
     static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_pd (a, b); }
@@ -331,12 +339,18 @@ struct BasicOps32
 
     static forcedinline void storeU (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
 
+    static forcedinline void storeU (int* dest, ParallelType a) noexcept { vst1q_f32 (reinterpret_cast<float*> (dest), a); }
+
+    static forcedinline void storeA (int* dest, ParallelType a) noexcept { vst1q_f32 (reinterpret_cast<float*> (dest), a); }
+
     static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return vaddq_f32 (a, b); }
 
     static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return vsubq_f32 (a, b); }
 
     static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return vmulq_f32 (a, b); }
 
+    static forcedinline ParallelType div (ParallelType a, ParallelType b) noexcept { return vdivq_f32 (a, b); }
+
     static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return vmaxq_f32 (a, b); }
 
     static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return vminq_f32 (a, b); }
@@ -411,6 +425,8 @@ struct BasicOps64
 
     static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return a * b; }
 
+    static forcedinline ParallelType div (ParallelType a, ParallelType b) noexcept { return a / b; }
+
     static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return jmax (a, b); }
 
     static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return jmin (a, b); }
@@ -755,6 +771,62 @@ void copyWithMultiply (double* dest, const double* src, double multiplier, Size
 #endif
 }
 
+template <typename Size>
+void copyWithDividend (float* dest, const float* src, float dividend, Size num) noexcept
+{
+#if YUP_USE_VDSP_FRAMEWORK
+    vDSP_svdiv (&dividend, src, 1, dest, 1, (vDSP_Length) num);
+#else
+    YUP_PERFORM_VEC_OP_SRC_DEST (dest[i] = dividend / src[i],
+                                 Mode::div (divsd, s),
+                                 YUP_LOAD_SRC,
+                                 YUP_INCREMENT_SRC_DEST,
+                                 const Mode::ParallelType divsd = Mode::load1 (dividend);)
+#endif
+}
+
+template <typename Size>
+void copyWithDividend (double* dest, const double* src, double dividend, Size num) noexcept
+{
+#if YUP_USE_VDSP_FRAMEWORK
+    vDSP_svdivD (&dividend, src, 1, dest, 1, (vDSP_Length) num);
+#else
+    YUP_PERFORM_VEC_OP_SRC_DEST (dest[i] = dividend / src[i],
+                                 Mode::div (divsd, s),
+                                 YUP_LOAD_SRC,
+                                 YUP_INCREMENT_SRC_DEST,
+                                 const Mode::ParallelType divsd = Mode::load1 (dividend);)
+#endif
+}
+
+template <typename Size>
+void copyWithDivide (float* dest, const float* src, float divisor, Size num) noexcept
+{
+#if YUP_USE_VDSP_FRAMEWORK
+    vDSP_vsdiv (src, 1, &divisor, dest, 1, (vDSP_Length) num);
+#else
+    YUP_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] / divisor,
+                                 Mode::div (s, divs),
+                                 YUP_LOAD_SRC,
+                                 YUP_INCREMENT_SRC_DEST,
+                                 const Mode::ParallelType divs = Mode::load1 (divisor);)
+#endif
+}
+
+template <typename Size>
+void copyWithDivide (double* dest, const double* src, double divisor, Size num) noexcept
+{
+#if YUP_USE_VDSP_FRAMEWORK
+    vDSP_vsdivD (src, 1, &divisor, dest, 1, (vDSP_Length) num);
+#else
+    YUP_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] / divisor,
+                                 Mode::div (s, divs),
+                                 YUP_LOAD_SRC,
+                                 YUP_INCREMENT_SRC_DEST,
+                                 const Mode::ParallelType divs = Mode::load1 (divisor);)
+#endif
+}
+
 template <typename Size>
 void add (float* dest, float amount, Size num) noexcept
 {
@@ -1099,6 +1171,104 @@ void multiply (double* dest, const double* src, double multiplier, Size num) noe
                                  const Mode::ParallelType mult = Mode::load1 (multiplier);)
 }
 
+template <typename Size>
+void divide (float* dest, const float* src, Size num) noexcept
+{
+#if YUP_USE_VDSP_FRAMEWORK
+    vDSP_vdiv (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
+#else
+    YUP_PERFORM_VEC_OP_SRC_DEST (dest[i] /= src[i],
+                                 Mode::div (d, s),
+                                 YUP_LOAD_SRC_DEST,
+                                 YUP_INCREMENT_SRC_DEST, )
+#endif
+}
+
+template <typename Size>
+void divide (double* dest, const double* src, Size num) noexcept
+{
+#if YUP_USE_VDSP_FRAMEWORK
+    vDSP_vdivD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
+#else
+    YUP_PERFORM_VEC_OP_SRC_DEST (dest[i] /= src[i],
+                                 Mode::div (d, s),
+                                 YUP_LOAD_SRC_DEST,
+                                 YUP_INCREMENT_SRC_DEST, )
+#endif
+}
+
+template <typename Size>
+void divide (float* dest, const float* src1, const float* src2, Size num) noexcept
+{
+#if YUP_USE_VDSP_FRAMEWORK
+    vDSP_vdiv (src2, 1, src1, 1, dest, 1, (vDSP_Length) num);
+#else
+    YUP_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] / src2[i],
+                                       Mode::div (s1, s2),
+                                       YUP_LOAD_SRC1_SRC2,
+                                       YUP_INCREMENT_SRC1_SRC2_DEST, )
+#endif
+}
+
+template <typename Size>
+void divide (double* dest, const double* src1, const double* src2, Size num) noexcept
+{
+#if YUP_USE_VDSP_FRAMEWORK
+    vDSP_vdivD (src2, 1, src1, 1, dest, 1, (vDSP_Length) num);
+#else
+    YUP_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] / src2[i],
+                                       Mode::div (s1, s2),
+                                       YUP_LOAD_SRC1_SRC2,
+                                       YUP_INCREMENT_SRC1_SRC2_DEST, )
+#endif
+}
+
+template <typename Size>
+void divide (float* dest, float divisor, Size num) noexcept
+{
+#if YUP_USE_VDSP_FRAMEWORK
+    vDSP_vsdiv (dest, 1, &divisor, dest, 1, (vDSP_Length) num);
+#else
+    YUP_PERFORM_VEC_OP_DEST (dest[i] /= divisor,
+                             Mode::div (d, divs),
+                             YUP_LOAD_DEST,
+                             const Mode::ParallelType divs = Mode::load1 (divisor);)
+#endif
+}
+
+template <typename Size>
+void divide (double* dest, double divisor, Size num) noexcept
+{
+#if YUP_USE_VDSP_FRAMEWORK
+    vDSP_vsdivD (dest, 1, &divisor, dest, 1, (vDSP_Length) num);
+#else
+    YUP_PERFORM_VEC_OP_DEST (dest[i] /= divisor,
+                             Mode::div (d, divs),
+                             YUP_LOAD_DEST,
+                             const Mode::ParallelType divs = Mode::load1 (divisor);)
+#endif
+}
+
+template <typename Size>
+void divide (float* dest, const float* src, float divisor, Size num) noexcept
+{
+    YUP_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] / divisor,
+                                 Mode::div (s, divs),
+                                 YUP_LOAD_SRC,
+                                 YUP_INCREMENT_SRC_DEST,
+                                 const Mode::ParallelType divs = Mode::load1 (divisor);)
+}
+
+template <typename Size>
+void divide (double* dest, const double* src, double divisor, Size num) noexcept
+{
+    YUP_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] / divisor,
+                                 Mode::div (s, divs),
+                                 YUP_LOAD_SRC,
+                                 YUP_INCREMENT_SRC_DEST,
+                                 const Mode::ParallelType divs = Mode::load1 (divisor);)
+}
+
 template <typename Size>
 void negate (float* dest, const float* src, Size num) noexcept
 {
@@ -1338,6 +1508,106 @@ double findMaximum (const double* src, Size num) noexcept
 #endif
 }
 
+template <typename Size>
+void convertFixedToFloat (float* dest, const int* src, float multiplier, Size num) noexcept
+{
+#if YUP_USE_ARM_NEON
+    YUP_PERFORM_VEC_OP_SRC_DEST (dest[i] = (float) src[i] * multiplier,
+                                 vmulq_n_f32 (vcvtq_f32_s32 (vld1q_s32 (src)), multiplier),
+                                 YUP_LOAD_NONE,
+                                 YUP_INCREMENT_SRC_DEST, )
+#elif YUP_USE_SSE_INTRINSICS
+    YUP_PERFORM_VEC_OP_SRC_DEST (dest[i] = (float) src[i] * multiplier,
+                                 Mode::mul (mult, _mm_cvtepi32_ps (_mm_loadu_si128 (reinterpret_cast<const __m128i*> (src)))),
+                                 YUP_LOAD_NONE,
+                                 YUP_INCREMENT_SRC_DEST,
+                                 const Mode::ParallelType mult = Mode::load1 (multiplier);)
+#else
+    for (Size i = 0; i < num; ++i)
+        dest[i] = (float) src[i] * multiplier;
+#endif
+}
+
+template <typename Size>
+void convertFloatToFixed (int* dest, const float* src, float multiplier, Size num) noexcept
+{
+#if YUP_USE_ARM_NEON
+    YUP_PERFORM_VEC_OP_SRC_DEST (dest[i] = (int) (src[i] * multiplier),
+                                 vreinterpretq_f32_s32 (vcvtq_s32_f32 (vmulq_n_f32 (vld1q_f32 (src), multiplier))),
+                                 YUP_LOAD_NONE,
+                                 YUP_INCREMENT_SRC_DEST, )
+
+#elif YUP_USE_SSE_INTRINSICS
+    YUP_PERFORM_VEC_OP_SRC_DEST (dest[i] = (int) (src[i] * multiplier),
+                                 _mm_castsi128_ps (_mm_cvtps_epi32 (_mm_mul_ps (_mm_loadu_ps (src), mult))),
+                                 YUP_LOAD_NONE,
+                                 YUP_INCREMENT_SRC_DEST,
+                                 const Mode::ParallelType mult = Mode::load1 (multiplier);)
+
+#else
+    for (Size i = 0; i < num; ++i)
+        dest[i] = (int) (src[i] * multiplier);
+#endif
+}
+
+template <typename Size>
+void convertDoubleToFloat (float* dest, const double* src, Size num) noexcept
+{
+#if YUP_USE_VDSP_FRAMEWORK
+    vDSP_vdpsp (src, 1, dest, 1, (vDSP_Length) num);
+#else
+    Size i = 0;
+#if YUP_USE_ARM_NEON
+    for (; i + 2 <= num; i += 2)
+    {
+        float64x2_t d = vld1q_f64 (src + i);
+        float32x2_t f = vcvt_f32_f64 (d);
+        vst1_f32 (dest + i, f);
+    }
+#elif JUCE_USE_SSE_INTRINSICS
+    for (; i + 2 <= num; i += 2)
+    {
+        __m128d d = _mm_loadu_pd (src + i);
+        __m128 f = _mm_cvtpd_ps (d);
+        _mm_storel_pi ((__m64*) (dest + i), f);
+    }
+#endif
+
+    for (; i < num; ++i)
+        dest[i] = (float) src[i];
+#endif
+}
+
+template <typename Size>
+void convertFloatToDouble (double* dest, const float* src, Size num) noexcept
+{
+#if YUP_USE_VDSP_FRAMEWORK
+    vDSP_vspdp (src, 1, dest, 1, (vDSP_Length) num);
+#else
+    Size i = 0;
+#if YUP_USE_ARM_NEON
+    for (; i + 2 <= num; i += 2)
+    {
+        float32x2_t f = vld1_f32 (src + i);
+        float64x2_t d = vcvt_f64_f32 (f);
+        vst1q_f64 (dest + i, d);
+    }
+#elif JUCE_USE_SSE_INTRINSICS
+    for (; i + 4 <= num; i += 4)
+    {
+        __m128 f = _mm_loadu_ps (src + i);
+        __m128d d0 = _mm_cvtps_pd (f);
+        __m128d d1 = _mm_cvtps_pd (_mm_movehl_ps (f, f));
+        _mm_storeu_pd (dest + i, d0);
+        _mm_storeu_pd (dest + i + 2, d1);
+    }
+#endif
+
+    for (; i < num; ++i)
+        dest[i] = (double) src[i];
+#endif
+}
+
 } // namespace
 } // namespace FloatVectorHelpers
 
@@ -1374,6 +1644,24 @@ void YUP_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::copyWithMulti
     FloatVectorHelpers::copyWithMultiply (dest, src, multiplier, numValues);
 }
 
+template <typename FloatType, typename CountType>
+void YUP_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::copyWithDividend (FloatType* dest,
+                                                                                     const FloatType* src,
+                                                                                     FloatType dividend,
+                                                                                     CountType numValues) noexcept
+{
+    FloatVectorHelpers::copyWithDividend (dest, src, dividend, numValues);
+}
+
+template <typename FloatType, typename CountType>
+void YUP_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::copyWithDivide (FloatType* dest,
+                                                                                   const FloatType* src,
+                                                                                   FloatType divisor,
+                                                                                   CountType numValues) noexcept
+{
+    FloatVectorHelpers::copyWithDivide (dest, src, divisor, numValues);
+}
+
 template <typename FloatType, typename CountType>
 void YUP_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::add (FloatType* dest,
                                                                         FloatType amountToAdd,
@@ -1495,6 +1783,40 @@ void YUP_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::multiply (Flo
     FloatVectorHelpers::multiply (dest, src, multiplier, num);
 }
 
+template <typename FloatType, typename CountType>
+void YUP_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::divide (FloatType* dest,
+                                                                           const FloatType* src,
+                                                                           CountType numValues) noexcept
+{
+    FloatVectorHelpers::divide (dest, src, numValues);
+}
+
+template <typename FloatType, typename CountType>
+void YUP_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::divide (FloatType* dest,
+                                                                           const FloatType* src1,
+                                                                           const FloatType* src2,
+                                                                           CountType numValues) noexcept
+{
+    FloatVectorHelpers::divide (dest, src1, src2, numValues);
+}
+
+template <typename FloatType, typename CountType>
+void YUP_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::divide (FloatType* dest,
+                                                                           FloatType divisor,
+                                                                           CountType numValues) noexcept
+{
+    FloatVectorHelpers::divide (dest, divisor, numValues);
+}
+
+template <typename FloatType, typename CountType>
+void YUP_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::divide (FloatType* dest,
+                                                                           const FloatType* src,
+                                                                           FloatType divisor,
+                                                                           CountType num) noexcept
+{
+    FloatVectorHelpers::divide (dest, src, divisor, num);
+}
+
 template <typename FloatType, typename CountType>
 void YUP_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::negate (FloatType* dest,
                                                                            const FloatType* src,
@@ -1587,6 +1909,48 @@ template struct FloatVectorOperationsBase<double, size_t>;
 
 //==============================================================================
 
+void YUP_CALLTYPE FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, float multiplier, size_t num) noexcept
+{
+    FloatVectorHelpers::convertFixedToFloat (dest, src, multiplier, num);
+}
+
+void YUP_CALLTYPE FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, float multiplier, int num) noexcept
+{
+    FloatVectorHelpers::convertFixedToFloat (dest, src, multiplier, num);
+}
+
+void YUP_CALLTYPE FloatVectorOperations::convertFloatToFixed (int* dest, const float* src, float multiplier, size_t num) noexcept
+{
+    FloatVectorHelpers::convertFloatToFixed (dest, src, multiplier, num);
+}
+
+void YUP_CALLTYPE FloatVectorOperations::convertFloatToFixed (int* dest, const float* src, float multiplier, int num) noexcept
+{
+    FloatVectorHelpers::convertFloatToFixed (dest, src, multiplier, num);
+}
+
+void YUP_CALLTYPE FloatVectorOperations::convertFloatToDouble (double* dest, const float* src, int num) noexcept
+{
+    FloatVectorHelpers::convertFloatToDouble (dest, src, num);
+}
+
+void YUP_CALLTYPE FloatVectorOperations::convertFloatToDouble (double* dest, const float* src, size_t num) noexcept
+{
+    FloatVectorHelpers::convertFloatToDouble (dest, src, num);
+}
+
+void YUP_CALLTYPE FloatVectorOperations::convertDoubleToFloat (float* dest, const double* src, int num) noexcept
+{
+    FloatVectorHelpers::convertDoubleToFloat (dest, src, num);
+}
+
+void YUP_CALLTYPE FloatVectorOperations::convertDoubleToFloat (float* dest, const double* src, size_t num) noexcept
+{
+    FloatVectorHelpers::convertDoubleToFloat (dest, src, num);
+}
+
+//==============================================================================
+
 intptr_t YUP_CALLTYPE FloatVectorOperations::getFpStatusRegister() noexcept
 {
     intptr_t fpsr = 0;
diff --git a/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.h b/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.h
index 3b7cd9e90..9efe0f1ea 100644
--- a/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.h
+++ b/modules/yup_audio_basics/buffers/yup_FloatVectorOperations.h
@@ -84,6 +84,12 @@ struct FloatVectorOperationsBase
     /** Copies a vector of floating point numbers, multiplying each value by a given multiplier */
     static void YUP_CALLTYPE copyWithMultiply (FloatType* dest, const FloatType* src, FloatType multiplier, CountType numValues) noexcept;
 
+    /** Copies a vector of floating point numbers, dividing a dividend with each value (dest[i] = dividend / src[i]) */
+    static void YUP_CALLTYPE copyWithDividend (FloatType* dest, const FloatType* src, FloatType dividend, CountType numValues) noexcept;
+
+    /** Copies a vector of floating point numbers, dividing each value with a divisor (dest[i] = src[i] / divisor) */
+    static void YUP_CALLTYPE copyWithDivide (FloatType* dest, const FloatType* src, FloatType divisor, CountType numValues) noexcept;
+
     /** Adds a fixed value to the destination values. */
     static void YUP_CALLTYPE add (FloatType* dest, FloatType amountToAdd, CountType numValues) noexcept;
 
@@ -126,6 +132,18 @@ struct FloatVectorOperationsBase
     /** Multiplies each of the source values by a fixed multiplier and stores the result in the destination array. */
     static void YUP_CALLTYPE multiply (FloatType* dest, const FloatType* src, FloatType multiplier, CountType num) noexcept;
 
+    /** Divides the destination values by the source values. */
+    static void YUP_CALLTYPE divide (FloatType* dest, const FloatType* src, CountType numValues) noexcept;
+
+    /** Divides each source1 value by the corresponding source2 value, then stores it in the destination array. */
+    static void YUP_CALLTYPE divide (FloatType* dest, const FloatType* src1, const FloatType* src2, CountType numValues) noexcept;
+
+    /** Divides each of the destination values by a fixed divisor. */
+    static void YUP_CALLTYPE divide (FloatType* dest, FloatType divisor, CountType numValues) noexcept;
+
+    /** Divides each of the source values by a fixed divisor and stores the result in the destination array. */
+    static void YUP_CALLTYPE divide (FloatType* dest, const FloatType* src, FloatType divisor, CountType num) noexcept;
+
     /** Copies a source vector to a destination, negating each value. */
     static void YUP_CALLTYPE negate (FloatType* dest, const FloatType* src, CountType numValues) noexcept;
 
@@ -168,11 +186,14 @@ struct NameForwarder : public Bases...
         Bases::fill...,
         Bases::copy...,
         Bases::copyWithMultiply...,
+        Bases::copyWithDividend...,
+        Bases::copyWithDivide...,
         Bases::add...,
         Bases::subtract...,
         Bases::addWithMultiply...,
         Bases::subtractWithMultiply...,
         Bases::multiply...,
+        Bases::divide...,
         Bases::negate...,
         Bases::abs...,
         Bases::min...,
@@ -199,6 +220,22 @@ struct NameForwarder : public Bases...
 class YUP_API FloatVectorOperations : public detail::NameForwarder<FloatVectorOperationsBase<float, int>, FloatVectorOperationsBase<float, size_t>, FloatVectorOperationsBase<double, int>, FloatVectorOperationsBase<double, size_t>>
 {
 public:
+    /** Convert fixed integer signal to float applying a multiplier. */
+    static void YUP_CALLTYPE convertFixedToFloat (float* dest, const int* src, float multiplier, int num) noexcept;
+    static void YUP_CALLTYPE convertFixedToFloat (float* dest, const int* src, float multiplier, size_t num) noexcept;
+
+    /** Convert float signal to int applying a multiplier. */
+    static void YUP_CALLTYPE convertFloatToFixed (int* dest, const float* src, float multiplier, int num) noexcept;
+    static void YUP_CALLTYPE convertFloatToFixed (int* dest, const float* src, float multiplier, size_t num) noexcept;
+
+    /** Convert float signal to double. */
+    static void YUP_CALLTYPE convertFloatToDouble (double* dest, const float* src, int num) noexcept;
+    static void YUP_CALLTYPE convertFloatToDouble (double* dest, const float* src, size_t num) noexcept;
+
+    /** Convert double signal to float. */
+    static void YUP_CALLTYPE convertDoubleToFloat (float* dest, const double* src, int num) noexcept;
+    static void YUP_CALLTYPE convertDoubleToFloat (float* dest, const double* src, size_t num) noexcept;
+
     /** This method enables or disables the SSE/NEON flush-to-zero mode. */
     static void YUP_CALLTYPE enableFlushToZeroMode (bool shouldEnable) noexcept;
 
diff --git a/modules/yup_audio_basics/yup_audio_basics.cpp b/modules/yup_audio_basics/yup_audio_basics.cpp
index d3b7ad64d..60c63befa 100644
--- a/modules/yup_audio_basics/yup_audio_basics.cpp
+++ b/modules/yup_audio_basics/yup_audio_basics.cpp
@@ -48,27 +48,8 @@
 
 #include "yup_audio_basics.h"
 
-#if YUP_USE_SSE_INTRINSICS
-#include <emmintrin.h>
-#endif
-
 #if YUP_MAC || YUP_IOS
-#ifndef YUP_USE_VDSP_FRAMEWORK
-#define YUP_USE_VDSP_FRAMEWORK 1
-#endif
-
-#if YUP_USE_VDSP_FRAMEWORK
-#include <Accelerate/Accelerate.h>
-#endif
-
 #include "native/yup_AudioWorkgroup_apple.h"
-
-#elif YUP_USE_VDSP_FRAMEWORK
-#undef YUP_USE_VDSP_FRAMEWORK
-#endif
-
-#if YUP_USE_ARM_NEON
-#include <arm_neon.h>
 #endif
 
 #include "buffers/yup_FloatVectorOperations.cpp"
diff --git a/modules/yup_audio_basics/yup_audio_basics.h b/modules/yup_audio_basics/yup_audio_basics.h
index 944dd6ec6..22fc858d6 100644
--- a/modules/yup_audio_basics/yup_audio_basics.h
+++ b/modules/yup_audio_basics/yup_audio_basics.h
@@ -69,11 +69,27 @@
 
 //==============================================================================
 #ifndef YUP_USE_SSE_INTRINSICS
+#if defined (__SSE__)
 #define YUP_USE_SSE_INTRINSICS 1
 #endif
+#endif
+
+#ifndef YUP_USE_AVX_INTRINSICS
+#if defined (__AVX2__)
+#define YUP_USE_AVX_INTRINSICS 1
+#endif
+#endif
+
+#ifndef YUP_USE_FMA_INTRINSICS
+#if defined (__FMA__)
+#define YUP_USE_FMA_INTRINSICS 1
+#endif
+#endif
 
 #if ! YUP_INTEL
 #undef YUP_USE_SSE_INTRINSICS
+#undef YUP_USE_AVX_INTRINSICS
+#undef YUP_USE_FMA_INTRINSICS
 #endif
 
 #if __ARM_NEON__ && ! (YUP_USE_VDSP_FRAMEWORK || defined(YUP_USE_ARM_NEON))
@@ -87,6 +103,36 @@
 #define YUP_USE_ARM_NEON 0
 #endif
 
+//==============================================================================
+#if YUP_USE_AVX_INTRINSICS || YUP_USE_FMA_INTRINSICS
+#include <immintrin.h>
+#endif
+
+#if YUP_USE_SSE_INTRINSICS
+#include <emmintrin.h>
+#endif
+
+#if YUP_USE_ARM_NEON
+#if JUCE_64BIT && JUCE_WINDOWS
+#include <arm64_neon.h>
+#else
+#include <arm_neon.h>
+#endif
+#endif
+
+#if (YUP_MAC || YUP_IOS) && __has_include(<Accelerate/Accelerate.h>)
+#ifndef YUP_USE_VDSP_FRAMEWORK
+#define YUP_USE_VDSP_FRAMEWORK 1
+#endif
+
+#if YUP_USE_VDSP_FRAMEWORK
+#include <Accelerate/Accelerate.h>
+#endif
+
+#elif YUP_USE_VDSP_FRAMEWORK
+#undef YUP_USE_VDSP_FRAMEWORK
+#endif
+
 //==============================================================================
 #include "buffers/yup_AudioDataConverters.h"
 YUP_BEGIN_IGNORE_WARNINGS_MSVC (4661)
diff --git a/modules/yup_audio_devices/native/yup_OpenSL_android.cpp b/modules/yup_audio_devices/native/yup_OpenSL_android.cpp
index 942b59410..c20f039b9 100644
--- a/modules/yup_audio_devices/native/yup_OpenSL_android.cpp
+++ b/modules/yup_audio_devices/native/yup_OpenSL_android.cpp
@@ -1073,6 +1073,9 @@ class OpenSLAudioIODevice final : public AudioIODevice
     void close() override
     {
         stop();
+
+        deviceOpen = false;
+
         session = nullptr;
         callback = nullptr;
     }
diff --git a/modules/yup_core/maths/yup_MathsFunctions.h b/modules/yup_core/maths/yup_MathsFunctions.h
index 9e3b8ebe1..a9015671c 100644
--- a/modules/yup_core/maths/yup_MathsFunctions.h
+++ b/modules/yup_core/maths/yup_MathsFunctions.h
@@ -751,6 +751,7 @@ constexpr int nextPowerOfTwo (int n) noexcept
     n |= (n >> 4);
     n |= (n >> 8);
     n |= (n >> 16);
+
     return n + 1;
 }
 
diff --git a/modules/yup_core/system/yup_PlatformDefs.h b/modules/yup_core/system/yup_PlatformDefs.h
index b70e93747..b9c8ef125 100644
--- a/modules/yup_core/system/yup_PlatformDefs.h
+++ b/modules/yup_core/system/yup_PlatformDefs.h
@@ -97,24 +97,26 @@ namespace yup
 
     @see jassert()
 */
-#define YUP_BREAK_IN_DEBUGGER ::kill (0, SIGTRAP);
+#define YUP_BREAK_IN_DEBUGGER { ::kill (0, SIGTRAP); }
 #elif YUP_WASM
-#define YUP_BREAK_IN_DEBUGGER
+#define YUP_BREAK_IN_DEBUGGER { }
 #elif YUP_MSVC
 #pragma intrinsic(__debugbreak)
-#define YUP_BREAK_IN_DEBUGGER __debugbreak();
+#define YUP_BREAK_IN_DEBUGGER { __debugbreak(); }
 #elif YUP_INTEL && (YUP_GCC || YUP_CLANG || YUP_MAC)
 #if YUP_NO_INLINE_ASM
-#define YUP_BREAK_IN_DEBUGGER
+#define YUP_BREAK_IN_DEBUGGER { }
 #else
-#define YUP_BREAK_IN_DEBUGGER asm ("int $3");
+#define YUP_BREAK_IN_DEBUGGER { asm ("int $3"); }
 #endif
-#elif YUP_ARM && YUP_MAC
-#define YUP_BREAK_IN_DEBUGGER __builtin_debugtrap();
 #elif YUP_ANDROID
-#define YUP_BREAK_IN_DEBUGGER __builtin_trap();
+#define YUP_BREAK_IN_DEBUGGER { __builtin_trap(); }
+#elif YUP_ARM
+#if YUP_MAC || (YUP_WINDOWS && YUP_CLANG)
+#define YUP_BREAK_IN_DEBUGGER { __builtin_debugtrap(); }
+#endif
 #else
-#define YUP_BREAK_IN_DEBUGGER __asm int 3;
+#define YUP_BREAK_IN_DEBUGGER { __asm int 3; }
 #endif
 // clang-format on
 
diff --git a/modules/yup_core/text/yup_String.h b/modules/yup_core/text/yup_String.h
index 60d4110af..4cd5ef796 100644
--- a/modules/yup_core/text/yup_String.h
+++ b/modules/yup_core/text/yup_String.h
@@ -1193,11 +1193,10 @@ class YUP_API String final
             return "0";
         }
 
-        auto numDigitsBeforePoint = (int) std::ceil (std::log10 (number < 0 ? -number : number));
-
-        auto shift = numberOfSignificantFigures - numDigitsBeforePoint;
-        auto factor = std::pow (10.0, shift);
-        auto rounded = std::round (number * factor) / factor;
+        const auto numDigitsBeforePoint = (int) std::floor (std::log10 (std::abs (number)) + DecimalType (1));
+        const auto shift = numberOfSignificantFigures - numDigitsBeforePoint;
+        const auto factor = std::pow (10.0, shift);
+        const auto rounded = std::round (number * factor) / factor;
 
         std::stringstream ss;
         ss << std::fixed << std::setprecision (std::max (shift, 0)) << rounded;
diff --git a/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp
new file mode 100644
index 000000000..ac027993e
--- /dev/null
+++ b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp
@@ -0,0 +1,856 @@
+/*
+  ==============================================================================
+
+   This file is part of the YUP library.
+   Copyright (c) 2025 - kunitoki@gmail.com
+
+   YUP is an open source library subject to open-source licensing.
+
+   The code included in this file is provided under the terms of the ISC license
+   http://www.isc.org/downloads/software-support-policy/isc-license. Permission
+   to use, copy, modify, and/or distribute this software for any purpose with or
+   without fee is hereby granted provided that the above copyright notice and
+   this permission notice appear in all copies.
+
+   YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace yup
+{
+
+//==============================================================================
+
+/** Performs Y += A * B (complex multiply accumulate) where A, B, and Y
+    are arrays of interleaved complex<float> values [real, imag, real, imag...].
+
+    @param A pointer to input complex array
+    @param B pointer to input complex array
+    @param Y pointer to output complex array (accumulated)
+    @param complexPairs number of complex pairs (not number of floats!)
+*/
+static void complexMultiplyAccumulate (const float* __restrict A, const float* __restrict B, float* __restrict Y, int complexPairs) noexcept
+{
+    int i = 0;
+
+#if YUP_USE_AVX_INTRINSICS
+    constexpr int simdWidth = 4; // AVX2 path: process 4 complex pairs (8 floats) at a time
+    for (; i <= complexPairs - simdWidth; i += simdWidth)
+    {
+        const int idx = i * 2;
+
+        __m256 a = _mm256_loadu_ps (A + idx);
+        __m256 b = _mm256_loadu_ps (B + idx);
+        __m256 y = _mm256_loadu_ps (Y + idx);
+
+        const __m256 a_shuffled = _mm256_permute_ps (a, _MM_SHUFFLE (2, 3, 0, 1));
+        const __m256 b_shuffled = _mm256_permute_ps (b, _MM_SHUFFLE (2, 3, 0, 1));
+
+        __m256 realPart = _mm256_fmsub_ps (a, b, _mm256_mul_ps (a_shuffled, b_shuffled));
+        __m256 imagPart = _mm256_fmadd_ps (a, b_shuffled, _mm256_mul_ps (a_shuffled, b));
+
+        const __m256 interleaved = _mm256_blend_ps (realPart, imagPart, 0b10101010);
+
+        y = _mm256_add_ps (y, interleaved);
+        _mm256_storeu_ps (Y + idx, y);
+    }
+
+#elif YUP_USE_SSE_INTRINSICS
+    constexpr int simdWidth = 2; // SSE path: process 2 complex pairs (4 floats) at a time
+    for (; i <= complexPairs - simdWidth; i += simdWidth)
+    {
+        const int idx = i * 2;
+
+        __m128 a = _mm_loadu_ps (A + idx);
+        __m128 b = _mm_loadu_ps (B + idx);
+        __m128 y = _mm_loadu_ps (Y + idx);
+
+        const __m128 a_shuffled = _mm_shuffle_ps (a, a, _MM_SHUFFLE (2, 3, 0, 1));
+        const __m128 b_shuffled = _mm_shuffle_ps (b, b, _MM_SHUFFLE (2, 3, 0, 1));
+
+        __m128 realPart = _mm_sub_ps (_mm_mul_ps (a, b), _mm_mul_ps (a_shuffled, b_shuffled));
+        __m128 imagPart = _mm_add_ps (_mm_mul_ps (a, b_shuffled), _mm_mul_ps (a_shuffled, b));
+
+        const __m128 interleaved = _mm_unpacklo_ps (realPart, imagPart);
+
+        y = _mm_add_ps (y, interleaved);
+        _mm_storeu_ps (Y + idx, y);
+    }
+
+#elif YUP_USE_ARM_NEON
+    constexpr int simdWidth = 4;
+    for (; i <= complexPairs - simdWidth; i += simdWidth)
+    {
+        const int idx = i * 2;
+
+        float32x4x2_t a = vld2q_f32 (A + idx);
+        float32x4x2_t b = vld2q_f32 (B + idx);
+        float32x4x2_t y = vld2q_f32 (Y + idx);
+
+        float32x4_t ar = a.val[0], ai = a.val[1];
+        float32x4_t br = b.val[0], bi = b.val[1];
+        float32x4_t yr = y.val[0], yi = y.val[1];
+
+        float32x4_t real = vmulq_f32 (ar, br);
+        real = vfmsq_f32 (real, ai, bi);
+        float32x4_t imag = vmulq_f32 (ar, bi);
+        imag = vfmaq_f32 (imag, ai, br);
+
+        yr = vaddq_f32 (yr, real);
+        yi = vaddq_f32 (yi, imag);
+
+        float32x4x2_t out = { yr, yi };
+        vst2q_f32 (Y + idx, out); // interleave back
+    }
+
+#endif
+
+    for (; i < complexPairs; ++i)
+    {
+        const int ri = i * 2;
+        const int ii = ri + 1;
+
+        const float ar = A[ri];
+        const float ai = A[ii];
+        const float br = B[ri];
+        const float bi = B[ii];
+
+        Y[ri] += ar * br - ai * bi;
+        Y[ii] += ar * bi + ai * br;
+    }
+}
+
+//==============================================================================
+
+class PartitionedConvolver::FFTLayer
+{
+public:
+    FFTLayer() = default;
+    ~FFTLayer() = default;
+
+    FFTLayer (FFTLayer&& other) = default;
+    FFTLayer& operator= (FFTLayer&& other) = default;
+
+    void configure (int newHopSize)
+    {
+        hopSize = newHopSize;
+        fftSize = hopSize * 2;
+
+        fftProcessor.setSize (fftSize);
+        fftProcessor.setScaling (FFTProcessor::FFTScaling::asymmetric);
+
+        overlapBuffer.assign (static_cast<std::size_t> (hopSize), 0.0f);
+        timeBuffer.assign (static_cast<std::size_t> (fftSize), 0.0f);
+        frequencyBuffer.assign (static_cast<std::size_t> (fftSize) * 2, 0.0f);
+        tempBuffer.assign (static_cast<std::size_t> (fftSize) * 2, 0.0f); // Must hold complex data for in-place FFT
+
+        fdlIndex = 0;
+        configured = true;
+    }
+
+    int getHopSize() const { return hopSize; }
+
+    int getFFTSize() const { return fftSize; }
+
+    bool isConfigured() const { return configured; }
+
+    std::size_t setImpulseResponse (const float* impulseResponse, std::size_t length, float scaling)
+    {
+        jassert (configured);
+
+        if (fftSize <= 0 || hopSize <= 0)
+        {
+            resetState();
+            return 0;
+        }
+
+        frequencyPartitions.clear();
+        frequencyDelayLine.clear();
+
+        if (length == 0 || impulseResponse == nullptr)
+        {
+            resetState();
+            return 0;
+        }
+
+        const auto numPartitions = (length + static_cast<std::size_t> (hopSize) - 1) / static_cast<std::size_t> (hopSize);
+        if (numPartitions == 0)
+        {
+            resetState();
+            return 0;
+        }
+
+        std::size_t processedSamples = 0;
+        frequencyPartitions.reserve (numPartitions);
+
+        for (std::size_t p = 0; p < numPartitions; ++p)
+        {
+            std::vector<float> partition;
+            partition.resize (static_cast<std::size_t> (fftSize) * 2);
+
+            std::fill (tempBuffer.begin(), tempBuffer.end(), 0.0f);
+
+            const std::size_t offset = p * static_cast<std::size_t> (hopSize);
+            const std::size_t copyCount = std::min (static_cast<std::size_t> (hopSize), length - offset);
+
+            if (copyCount > 0 && offset < length)
+            {
+                for (std::size_t i = 0; i < copyCount && offset + i < length; ++i)
+                    tempBuffer[i] = impulseResponse[offset + i] * scaling;
+            }
+
+            fftProcessor.performRealFFTForward (tempBuffer.data(), partition.data());
+
+            frequencyPartitions.push_back (std::move (partition));
+
+            processedSamples += copyCount;
+        }
+
+        frequencyDelayLine.assign (numPartitions, std::vector<float> (static_cast<std::size_t> (fftSize) * 2, 0.0f));
+        fdlIndex = 0;
+
+        resetState();
+
+        return processedSamples;
+    }
+
+    void resetState()
+    {
+        fdlIndex = 0;
+
+        for (auto& partition : frequencyDelayLine)
+            std::fill (partition.begin(), partition.end(), 0.0f);
+
+        std::fill (overlapBuffer.begin(), overlapBuffer.end(), 0.0f);
+        std::fill (timeBuffer.begin(), timeBuffer.end(), 0.0f);
+        std::fill (frequencyBuffer.begin(), frequencyBuffer.end(), 0.0f);
+    }
+
+    void processHop (const float* inputHop, float* outputAccumulator)
+    {
+        jassert (configured);
+
+        if (frequencyPartitions.empty())
+            return;
+
+        // 1) Transform current input hop to frequency domain
+        FloatVectorOperations::copy (tempBuffer.data(), inputHop, hopSize);
+        fftProcessor.performRealFFTForward (tempBuffer.data(), tempBuffer.data());
+
+        // 2) Store in frequency delay line (circular buffer) - copy full complex buffer
+        fdlIndex = (fdlIndex == 0) ? static_cast<int> (frequencyDelayLine.size()) - 1 : fdlIndex - 1;
+        std::copy (tempBuffer.begin(), tempBuffer.begin() + (fftSize * 2), frequencyDelayLine[static_cast<std::size_t> (fdlIndex)].begin());
+
+        // 3) Frequency domain convolution: Y = sum(X[k-p] * H[p])
+        FloatVectorOperations::clear (frequencyBuffer.data(), fftSize * 2);
+
+        int xIndex = fdlIndex;
+        for (std::size_t p = 0; p < frequencyPartitions.size(); ++p)
+        {
+            const float* X = frequencyDelayLine[static_cast<std::size_t> (xIndex)].data();
+            const float* H = frequencyPartitions[p].data();
+
+            // fftSize_/2 gives the number of complex pairs for real FFT
+            complexMultiplyAccumulate (X, H, frequencyBuffer.data(), fftSize / 2);
+
+            // Move to next older spectrum
+            xIndex++;
+            if (xIndex >= static_cast<int> (frequencyDelayLine.size()))
+                xIndex = 0;
+        }
+
+        // 4) Inverse FFT back to time domain
+        fftProcessor.performRealFFTInverse (frequencyBuffer.data(), timeBuffer.data());
+
+        // 5) Overlap-Add: output first hopSize samples, store last hopSize as overlap
+        for (int i = 0; i < hopSize; ++i)
+        {
+            outputAccumulator[i] += timeBuffer[i] + overlapBuffer[i];
+            overlapBuffer[i] = timeBuffer[i + hopSize];
+        }
+    }
+
+    bool hasImpulseResponse() const { return ! frequencyPartitions.empty(); }
+
+private:
+    int hopSize = 0;
+    int fftSize = 0;
+
+    FFTProcessor fftProcessor;
+
+    // IR partitions in frequency domain
+    std::vector<std::vector<float>> frequencyPartitions;
+
+    // Frequency Delay Line (most recent at fdlIndex)
+    std::vector<std::vector<float>> frequencyDelayLine;
+    int fdlIndex = 0;
+
+    // Processing buffers
+    std::vector<float> overlapBuffer;
+    std::vector<float> timeBuffer;
+    std::vector<float> frequencyBuffer;
+    std::vector<float> tempBuffer;
+
+    bool configured = false;
+};
+
+//==============================================================================
+
+class PartitionedConvolver::CircularBuffer
+{
+public:
+    CircularBuffer() = default;
+
+    void resize (std::size_t size)
+    {
+        buffer.resize (size);
+        clear();
+    }
+
+    void clear()
+    {
+        std::fill (buffer.begin(), buffer.end(), 0.0f);
+        writeIndex = 0;
+        readIndex = 0;
+        availableForRead = 0;
+    }
+
+    std::size_t getAvailableForRead() const { return availableForRead; }
+
+    std::size_t getAvailableForWrite() const { return buffer.size() - availableForRead; }
+
+    std::size_t getSize() const { return buffer.size(); }
+
+    void write (const float* data, std::size_t numSamples)
+    {
+        jassert (numSamples <= getAvailableForWrite());
+        numSamples = std::min (numSamples, getAvailableForWrite());
+
+        if (numSamples == 0)
+            return;
+
+        const std::size_t beforeWrap = std::min (numSamples, buffer.size() - writeIndex);
+        const std::size_t afterWrap = numSamples - beforeWrap;
+
+        std::copy (data, data + beforeWrap, buffer.begin() + writeIndex);
+        if (afterWrap > 0)
+            std::copy (data + beforeWrap, data + numSamples, buffer.begin());
+
+        writeIndex = (writeIndex + numSamples) % buffer.size();
+        availableForRead += numSamples;
+    }
+
+    void read (float* data, std::size_t numSamples)
+    {
+        jassert (numSamples <= getAvailableForRead());
+        numSamples = std::min (numSamples, getAvailableForRead());
+
+        if (numSamples == 0)
+            return;
+
+        const std::size_t beforeWrap = std::min (numSamples, buffer.size() - readIndex);
+        const std::size_t afterWrap = numSamples - beforeWrap;
+
+        std::copy (buffer.begin() + readIndex, buffer.begin() + readIndex + beforeWrap, data);
+        if (afterWrap > 0)
+            std::copy (buffer.begin(), buffer.begin() + afterWrap, data + beforeWrap);
+
+        readIndex = (readIndex + numSamples) % buffer.size();
+        availableForRead -= numSamples;
+    }
+
+    void peek (float* data, std::size_t numSamples, std::size_t offset = 0) const
+    {
+        jassert (numSamples + offset <= getAvailableForRead());
+        numSamples = std::min (numSamples, getAvailableForRead() - offset);
+
+        if (numSamples == 0)
+            return;
+
+        const std::size_t startIndex = (readIndex + offset) % buffer.size();
+        const std::size_t beforeWrap = std::min (numSamples, buffer.size() - startIndex);
+        const std::size_t afterWrap = numSamples - beforeWrap;
+
+        std::copy (buffer.begin() + startIndex, buffer.begin() + startIndex + beforeWrap, data);
+        if (afterWrap > 0)
+            std::copy (buffer.begin(), buffer.begin() + afterWrap, data + beforeWrap);
+    }
+
+    void skip (std::size_t numSamples)
+    {
+        jassert (numSamples <= getAvailableForRead());
+        numSamples = std::min (numSamples, getAvailableForRead());
+
+        readIndex = (readIndex + numSamples) % buffer.size();
+        availableForRead -= numSamples;
+    }
+
+private:
+    std::vector<float> buffer;
+    std::size_t writeIndex = 0;
+    std::size_t readIndex = 0;
+    std::size_t availableForRead = 0;
+};
+
+//==============================================================================
+
+class PartitionedConvolver::Impl
+{
+public:
+    Impl() = default;
+    ~Impl() = default;
+
+    void configureLayers (std::size_t directFIRCoefficients, const std::vector<LayerSpec>& newLayers)
+    {
+        directFIRCoefficientCount = directFIRCoefficients;
+
+        layers.clear();
+        layers.resize (newLayers.size());
+
+        std::size_t maximumHopSize = 0;
+
+        baseHopSize = newLayers.empty() ? 0 : newLayers.front().hopSize;
+        for (std::size_t i = 0; i < newLayers.size(); ++i)
+        {
+            layers[i].configure (newLayers[i].hopSize);
+            if (i == 0)
+                baseHopSize = newLayers[i].hopSize;
+            else
+                baseHopSize = std::min (baseHopSize, newLayers[i].hopSize);
+
+            maximumHopSize = std::max (maximumHopSize, static_cast<std::size_t> (newLayers[i].hopSize));
+        }
+
+        maxHopSize = maximumHopSize;
+
+        // Clear staging buffers - will be allocated in prepare()
+        inputStaging.clear();
+        outputStaging.clear();
+
+        // Resize per-layer circular buffers - will be allocated in prepare()
+        layerInputBuffers.resize (layers.size());
+        layerOutputBuffers.resize (layers.size());
+
+        layerTempOutput.clear();
+        tempLayerHop.clear();
+
+        // Clear working buffers - will be allocated in prepare()
+        workingOutput.clear();
+
+        isPrepared = false;
+    }
+
+    void prepare (std::size_t maxBlockSize)
+    {
+        this->maxBlockSize = maxBlockSize;
+
+        // Prepare main input staging - needs to accumulate up to baseHopSize samples plus incoming block
+        const std::size_t inputStagingSize = static_cast<std::size_t> (baseHopSize) + maxBlockSize;
+        inputStaging.resize (inputStagingSize);
+        outputStaging.assign (static_cast<std::size_t> (baseHopSize), 0.0f);
+
+        // Prepare per-layer circular buffers with layer-specific sizing
+        for (std::size_t i = 0; i < layerInputBuffers.size(); ++i)
+        {
+            const std::size_t layerHopSize = static_cast<std::size_t> (layers[i].getHopSize());
+
+            // Input buffer: needs to accumulate up to layerHopSize samples plus incoming block
+            const std::size_t layerInputBufferSize = layerHopSize + maxBlockSize;
+            layerInputBuffers[i].resize (layerInputBufferSize);
+
+            // Output buffer: needs to handle bursts of layerHopSize samples
+            // Size it to handle multiple hops since read rate (baseHopSize) may be much smaller than write rate (layerHopSize)
+            const std::size_t layerOutputBufferSize = layerHopSize * ((layerHopSize / static_cast<std::size_t> (baseHopSize)) + 2);
+            layerOutputBuffers[i].resize (layerOutputBufferSize);
+        }
+
+        // Allocate temp buffers
+        if (maxHopSize > 0)
+        {
+            layerTempOutput.resize (maxHopSize);
+            tempLayerHop.resize (maxHopSize);
+        }
+
+        // Allocate working buffers
+        workingOutput.resize (maxBlockSize);
+
+        isPrepared = true;
+    }
+
+    std::size_t trimSilenceFromEnd (const float* impulseResponse, std::size_t length, float thresholdDb)
+    {
+        if (impulseResponse == nullptr || length == 0)
+            return 0;
+
+        const float threshold = std::pow (10.0f, thresholdDb / 20.0f);
+
+        // For short IRs, use smaller window size and be more conservative
+        const std::size_t minRetainLength = std::max (std::size_t (32), length / 4);
+        const std::size_t windowSize = std::min (std::size_t (1024), std::max (std::size_t (64), length / 20));
+
+        // First pass: scan from end to find significant content
+        std::size_t significantContentEnd = 0;
+        for (std::size_t i = length; i > windowSize; i -= windowSize)
+        {
+            const std::size_t startIdx = i - windowSize;
+            const std::size_t endIdx = std::min (i, length);
+            const std::size_t samples = endIdx - startIdx;
+
+            if (samples == 0)
+                continue;
+
+            float rmsSquared = 0.0f;
+            for (std::size_t j = startIdx; j < endIdx; ++j)
+                rmsSquared += impulseResponse[j] * impulseResponse[j];
+
+            const float rms = std::sqrt (rmsSquared / static_cast<float> (samples));
+            if (rms >= threshold)
+            {
+                significantContentEnd = endIdx;
+                break;
+            }
+        }
+
+        // If no significant content found, check the beginning more carefully
+        if (significantContentEnd == 0)
+        {
+            const std::size_t checkLength = std::min (minRetainLength, length);
+
+            float rmsSquared = 0.0f;
+            for (std::size_t j = 0; j < checkLength; ++j)
+                rmsSquared += impulseResponse[j] * impulseResponse[j];
+
+            const float rms = std::sqrt (rmsSquared / static_cast<float> (checkLength));
+            if (rms < threshold)
+                return 1;
+        }
+
+        // Return the found significant content end, but respect minimum for short IRs
+        if (length <= 200) // Short IR protection
+            return std::max (significantContentEnd, minRetainLength);
+        else
+            return std::max (significantContentEnd, windowSize);
+    }
+
+    void setImpulseResponse (const float* impulseResponse, std::size_t length, const PartitionedConvolver::IRLoadOptions& options)
+    {
+        DirectFIRFloat newFIR;
+        std::vector<FFTLayer> newLayers (layers.size());
+
+        std::size_t trimmedLength = length;
+
+        // Safety check
+        if (impulseResponse != nullptr && trimmedLength > 0)
+        {
+            // Always apply peak headroom
+            float headroomScale = std::pow (10.0f, options.headroomDb / 20.0f);
+
+            // Normalize peaks
+            if (options.normalize)
+            {
+                const auto minMax = FloatVectorOperations::findMinAndMax (impulseResponse, trimmedLength);
+
+                const float peak = std::max (std::abs (minMax.getStart()), std::abs (minMax.getEnd()));
+                if (peak > 0.0f)
+                    headroomScale /= peak;
+            }
+
+            // Trim end silence if requested
+            if (options.trimEndSilenceBelowDb)
+                trimmedLength = trimSilenceFromEnd (impulseResponse, length, *options.trimEndSilenceBelowDb);
+
+            // Update DirectFIR in-place
+            const auto directCoefficientsCount = std::min (directFIRCoefficientCount, trimmedLength);
+            newFIR.setCoefficients (impulseResponse, directCoefficientsCount, headroomScale);
+
+            // Update FFT layers
+            std::size_t consumed = directCoefficientsCount;
+            for (std::size_t i = 0; i < newLayers.size(); ++i)
+            {
+                auto& layer = newLayers[i];
+                layer.configure (layers[i].getHopSize());
+
+                const std::size_t remaining = (consumed < trimmedLength) ? (trimmedLength - consumed) : 0;
+                if (remaining == 0)
+                {
+                    layer.setImpulseResponse (nullptr, 0, headroomScale);
+                    continue;
+                }
+
+                consumed += layer.setImpulseResponse (impulseResponse + consumed, remaining, headroomScale);
+            }
+        }
+
+        {
+            SpinLock::ScopedLockType lock (processingLock);
+
+            directFIR = std::move (newFIR);
+            layers = std::move (newLayers);
+            finalImpulseLength = trimmedLength;
+
+            resetStateUnsafe();
+        }
+    }
+
+    std::size_t getImpulseLength() const
+    {
+        return finalImpulseLength;
+    }
+
+    void reset()
+    {
+        SpinLock::ScopedLockType lock (processingLock);
+
+        resetStateUnsafe();
+    }
+
+    void process (const float* input, float* output, std::size_t numSamples)
+    {
+        if (numSamples == 0)
+            return;
+
+        SpinLock::ScopedLockType lock (processingLock);
+
+        processUnsafe (input, output, numSamples);
+    }
+
+private:
+    void resetStateUnsafe()
+    {
+        directFIR.reset();
+        inputStagingReadIndex = 0;
+        inputStagingWriteIndex = 0;
+        inputStagingAvailable = 0;
+        std::fill (outputStaging.begin(), outputStaging.end(), 0.0f);
+
+        for (auto& buffer : layerInputBuffers)
+            buffer.clear();
+
+        for (auto& buffer : layerOutputBuffers)
+            buffer.clear();
+
+        for (auto& layer : layers)
+            layer.resetState();
+    }
+
+    void processUnsafe (const float* input, float* output, std::size_t numSamples)
+    {
+        jassert (isPrepared);
+        jassert (numSamples <= maxBlockSize);
+        if (! isPrepared || numSamples > maxBlockSize)
+            return;
+
+        FloatVectorOperations::clear (workingOutput.data(), numSamples);
+
+        // Process direct FIR (no block size constraints)
+        directFIR.process (input, workingOutput.data(), numSamples);
+        if (layers.empty())
+        {
+            FloatVectorOperations::add (output, workingOutput.data(), numSamples);
+            return;
+        }
+
+        // Add input to main input staging buffer using circular buffer logic
+        writeToInputStaging (input, numSamples);
+
+        std::size_t outputSamplesProduced = 0;
+        while (getInputStagingAvailable() >= static_cast<std::size_t> (baseHopSize))
+        {
+            const std::size_t hopSize = static_cast<std::size_t> (baseHopSize);
+
+            // Read hop from input staging
+            readFromInputStaging (tempLayerHop.data(), hopSize);
+            FloatVectorOperations::clear (outputStaging.data(), outputStaging.size());
+
+            for (std::size_t layerIndex = 0; layerIndex < layers.size(); ++layerIndex)
+            {
+                auto& layer = layers[layerIndex];
+                if (! layer.hasImpulseResponse())
+                    continue;
+
+                const int layerHopSize = layer.getHopSize();
+                auto& inputBuffer = layerInputBuffers[layerIndex];
+                auto& outputBuffer = layerOutputBuffers[layerIndex];
+
+                // Write input hop to layer's input buffer
+                inputBuffer.write (tempLayerHop.data(), hopSize);
+
+                // Process complete layer hops
+                while (inputBuffer.getAvailableForRead() >= static_cast<std::size_t> (layerHopSize))
+                {
+                    // Read a full hop for this layer
+                    inputBuffer.read (tempLayerHop.data(), static_cast<std::size_t> (layerHopSize));
+                    FloatVectorOperations::clear (layerTempOutput.data(), layerHopSize);
+
+                    // Process hop
+                    layer.processHop (tempLayerHop.data(), layerTempOutput.data());
+
+                    // Write output to layer's output buffer
+                    outputBuffer.write (layerTempOutput.data(), static_cast<std::size_t> (layerHopSize));
+                }
+
+                // Mix available output from this layer
+                if (outputBuffer.getAvailableForRead() >= hopSize)
+                {
+                    outputBuffer.read (layerTempOutput.data(), hopSize);
+                    FloatVectorOperations::add (outputStaging.data(), layerTempOutput.data(), hopSize);
+                }
+            }
+
+            // Add staging output to main output
+            const std::size_t samplesToWrite = std::min (hopSize, numSamples - outputSamplesProduced);
+            FloatVectorOperations::add (workingOutput.data() + outputSamplesProduced, outputStaging.data(), samplesToWrite);
+            outputSamplesProduced += samplesToWrite;
+        }
+
+        // Copy final result to output (accumulate)
+        FloatVectorOperations::add (output, workingOutput.data(), numSamples);
+    }
+
+private:
+    void writeToInputStaging (const float* data, std::size_t numSamples)
+    {
+        const std::size_t available = inputStaging.size() - inputStagingAvailable;
+        jassert (numSamples <= available);
+        numSamples = std::min (numSamples, available);
+        if (numSamples == 0)
+            return;
+
+        const std::size_t beforeWrap = std::min (numSamples, inputStaging.size() - inputStagingWriteIndex);
+        const std::size_t afterWrap = numSamples - beforeWrap;
+
+        std::copy (data, data + beforeWrap, inputStaging.begin() + inputStagingWriteIndex);
+        if (afterWrap > 0)
+            std::copy (data + beforeWrap, data + numSamples, inputStaging.begin());
+
+        inputStagingWriteIndex = (inputStagingWriteIndex + numSamples) % inputStaging.size();
+        inputStagingAvailable += numSamples;
+    }
+
+    void readFromInputStaging (float* data, std::size_t numSamples)
+    {
+        jassert (numSamples <= inputStagingAvailable);
+        numSamples = std::min (numSamples, inputStagingAvailable);
+        if (numSamples == 0)
+            return;
+
+        const std::size_t beforeWrap = std::min (numSamples, inputStaging.size() - inputStagingReadIndex);
+        const std::size_t afterWrap = numSamples - beforeWrap;
+
+        std::copy (inputStaging.begin() + inputStagingReadIndex, inputStaging.begin() + inputStagingReadIndex + beforeWrap, data);
+        if (afterWrap > 0)
+            std::copy (inputStaging.begin(), inputStaging.begin() + afterWrap, data + beforeWrap);
+
+        inputStagingReadIndex = (inputStagingReadIndex + numSamples) % inputStaging.size();
+        inputStagingAvailable -= numSamples;
+    }
+
+    std::size_t getInputStagingAvailable() const { return inputStagingAvailable; }
+
+    std::size_t directFIRCoefficientCount = 0;
+    int baseHopSize = 0;
+    std::size_t maxHopSize = 0;
+    std::size_t maxBlockSize = 0;
+    std::size_t finalImpulseLength = 0;
+    bool isPrepared = false;
+
+    DirectFIRFloat directFIR;
+    std::vector<FFTLayer> layers;
+
+    // Working buffers
+    std::vector<float> workingOutput;
+
+    // Input staging with circular buffer management
+    std::vector<float> inputStaging;
+    std::size_t inputStagingReadIndex = 0;
+    std::size_t inputStagingWriteIndex = 0;
+    std::size_t inputStagingAvailable = 0;
+    std::vector<float> outputStaging;
+
+    // Per-layer circular buffering
+    std::vector<CircularBuffer> layerInputBuffers;
+    std::vector<CircularBuffer> layerOutputBuffers;
+    std::vector<float> tempLayerHop;
+    std::vector<float> layerTempOutput;
+
+    mutable SpinLock processingLock;
+};
+
+//==============================================================================
+// PartitionedConvolver implementation
+//==============================================================================
+
+PartitionedConvolver::PartitionedConvolver()
+    : pImpl (std::make_unique<Impl>())
+{
+}
+
+PartitionedConvolver::~PartitionedConvolver() = default;
+
+PartitionedConvolver::PartitionedConvolver (PartitionedConvolver&& other) noexcept
+    : pImpl (std::move (other.pImpl))
+{
+}
+
+PartitionedConvolver& PartitionedConvolver::operator= (PartitionedConvolver&& other) noexcept
+{
+    if (this != &other)
+        pImpl = std::move (other.pImpl);
+    return *this;
+}
+
+void PartitionedConvolver::configureLayers (std::size_t directFIRCoefficients, const std::vector<LayerSpec>& layers)
+{
+    pImpl->configureLayers (directFIRCoefficients, layers);
+}
+
+void PartitionedConvolver::setTypicalLayout (std::size_t directCoefficients, const std::vector<int>& hops)
+{
+    std::vector<LayerSpec> layerSpecs;
+    layerSpecs.reserve (hops.size());
+
+    for (int hop : hops)
+    {
+        if (hop < 64)
+            directCoefficients += static_cast<std::size_t> (hop);
+        else
+            layerSpecs.push_back ({ nextPowerOfTwo (hop) });
+    }
+
+    configureLayers (directCoefficients, layerSpecs);
+}
+
+void PartitionedConvolver::setImpulseResponse (const float* impulseResponse, std::size_t length, const IRLoadOptions& options)
+{
+    pImpl->setImpulseResponse (impulseResponse, length, options);
+}
+
+void PartitionedConvolver::setImpulseResponse (const std::vector<float>& impulseResponse, const IRLoadOptions& options)
+{
+    setImpulseResponse (impulseResponse.data(), impulseResponse.size(), options);
+}
+
+std::size_t PartitionedConvolver::getImpulseLength() const
+{
+    return pImpl->getImpulseLength();
+}
+
+void PartitionedConvolver::prepare (std::size_t maxBlockSize)
+{
+    pImpl->prepare (maxBlockSize);
+}
+
+void PartitionedConvolver::reset()
+{
+    pImpl->reset();
+}
+
+void PartitionedConvolver::process (const float* input, float* output, std::size_t numSamples)
+{
+    pImpl->process (input, output, numSamples);
+}
+
+} // namespace yup
diff --git a/modules/yup_dsp/convolution/yup_PartitionedConvolver.h b/modules/yup_dsp/convolution/yup_PartitionedConvolver.h
new file mode 100644
index 000000000..f319bc217
--- /dev/null
+++ b/modules/yup_dsp/convolution/yup_PartitionedConvolver.h
@@ -0,0 +1,178 @@
+/*
+  ==============================================================================
+
+   This file is part of the YUP library.
+   Copyright (c) 2025 - kunitoki@gmail.com
+
+   YUP is an open source library subject to open-source licensing.
+
+   The code included in this file is provided under the terms of the ISC license
+   http://www.isc.org/downloads/software-support-policy/isc-license. Permission
+   to use, copy, modify, and/or distribute this software for any purpose with or
+   without fee is hereby granted provided that the above copyright notice and
+   this permission notice appear in all copies.
+
+   YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+#pragma once
+
+namespace yup
+{
+
+//==============================================================================
+/**
+    Layered partitioned convolution engine optimized for real-time audio processing.
+
+    Combines multiple processing strategies for efficient convolution:
+    - Direct FIR computation for early coefficients (low latency)
+    - One or more FFT-based Overlap-Add layers with uniform partitioning per layer
+
+    The engine uses YUP's FFTProcessor for real FFT operations and supports:
+    - Arbitrary input/output block sizes with internal buffering
+    - Real-time safe processing (no heap allocations during process())
+    - Configurable layer hierarchy for optimal CPU/latency trade-off
+
+    Example usage:
+    @code
+    PartitionedConvolver convolver;
+
+    // Configure layers: 256 direct coefficients + FFT layers with hops 256, 1024, 4096
+    convolver.setTypicalLayout(256, {256, 1024, 4096});
+
+    // Prepare for processing with maximum block size (must be called before process)
+    convolver.prepare(512); // Maximum 512 samples per process() call
+
+    // Set impulse response (e.g., reverb IR)
+    std::vector<float> impulseResponse = loadImpulseResponse();
+    convolver.setImpulseResponse(impulseResponse);
+
+    // In audio callback (accumulates into output):
+    convolver.process(inputBuffer, outputBuffer, numSamples); // numSamples <= 512
+    @endcode
+
+    @note The process() method accumulates results into the output buffer.
+          Clear the output buffer first if overwrite behavior is desired.
+*/
+class PartitionedConvolver
+{
+public:
+    //==============================================================================
+    /** Configuration for a single FFT-based convolution layer */
+    struct LayerSpec
+    {
+        int hopSize; /**< Partition size L (FFT size will be 2*L) */
+    };
+
+    //==============================================================================
+    /** Default constructor */
+    PartitionedConvolver();
+
+    /** Destructor */
+    ~PartitionedConvolver();
+
+    // Non-copyable but movable
+    PartitionedConvolver (PartitionedConvolver&& other) noexcept;
+    PartitionedConvolver& operator= (PartitionedConvolver&& other) noexcept;
+
+    //==============================================================================
+    /**
+        Configure the convolution layers before setting the impulse response.
+
+        @param directFIRCoefficients  Number of early coefficients to process with direct FIR (for low latency)
+        @param layers         Vector of layer specifications with increasing hop sizes
+                             (e.g., {{256}, {1024}, {4096}} for 256→1024→4096 progression)
+    */
+    void configureLayers (std::size_t directFIRCoefficients, const std::vector<LayerSpec>& layers);
+
+    /**
+        Convenience method to set a typical late-reverb configuration.
+
+        @param directCoefficients  Number of direct FIR coefficients for early reflections
+        @param hops        Vector of hop sizes for FFT layers (geometrically increasing recommended)
+    */
+    void setTypicalLayout (std::size_t directCoefficients, const std::vector<int>& hops);
+
+    //==============================================================================
+    /** Impulse response loading options. */
+    struct IRLoadOptions
+    {
+        IRLoadOptions()
+            : normalize (true)
+            , headroomDb (-12.0f)
+        {
+        }
+
+        bool normalize;
+        float headroomDb;
+        std::optional<float> trimEndSilenceBelowDb;
+    };
+
+    /**
+        Set the impulse response for convolution.
+
+        @param impulseResponse  Pointer to impulse response samples
+        @param length          Number of samples in the impulse response
+
+        @note This method is not real-time safe and should be called during initialization
+              or from a background thread when audio is paused.
+    */
+    void setImpulseResponse (const float* impulseResponse, std::size_t length, const IRLoadOptions& options = {});
+
+    /**
+        Set the impulse response from a vector.
+
+        @param impulseResponse  Vector containing impulse response samples
+    */
+    void setImpulseResponse (const std::vector<float>& impulseResponse, const IRLoadOptions& options = {});
+
+    /** Returns the length of the impulse in samples, taking into account trimmed silence samples. */
+    std::size_t getImpulseLength() const;
+
+    //==============================================================================
+    /**
+        Prepare the convolver for processing with a specific maximum block size.
+
+        @param maxBlockSize  Maximum number of samples that will be passed to process()
+
+        @note This method is not real-time safe and should be called during initialization
+              or when audio processing is paused. It pre-allocates all internal buffers
+              to handle the specified block size without further allocations.
+    */
+    void prepare (std::size_t maxBlockSize);
+
+    /**
+        Process audio samples through the convolver.
+
+        @param input       Input audio buffer
+        @param output      Output audio buffer (results are accumulated)
+        @param numSamples  Number of samples to process
+
+        @note Results are accumulated into the output buffer. Clear it first if needed.
+        @note This method is real-time safe with no heap allocations.
+    */
+    void process (const float* input, float* output, std::size_t numSamples);
+
+    /**
+        Reset all internal processing state (clears delay lines, overlap buffers).
+        Impulse response partitions are preserved.
+    */
+    void reset();
+
+private:
+    //==============================================================================
+    class FFTLayer;
+    class CircularBuffer;
+    class Impl;
+
+    std::unique_ptr<Impl> pImpl;
+
+    //==============================================================================
+    YUP_DECLARE_NON_COPYABLE_WITH_LEAK_DETECTOR (PartitionedConvolver)
+};
+
+} // namespace yup
diff --git a/modules/yup_dsp/designers/yup_FilterDesigner.cpp b/modules/yup_dsp/designers/yup_FilterDesigner.cpp
index fc32560d1..2972eb0c0 100644
--- a/modules/yup_dsp/designers/yup_FilterDesigner.cpp
+++ b/modules/yup_dsp/designers/yup_FilterDesigner.cpp
@@ -625,6 +625,176 @@ int FilterDesigner<CoeffType>::designLinkwitzRiley (
     return static_cast<int> (lowCoeffs.size());
 }
 
+//==============================================================================
+// FIR Filter Design Implementations
+//==============================================================================
+
+template <typename CoeffType>
+void FilterDesigner<CoeffType>::designFIRLowpass (
+    std::vector<CoeffType>& coefficients,
+    int numCoefficients,
+    CoeffType cutoffFreq,
+    double sampleRate,
+    WindowType windowType,
+    CoeffType windowParameter) noexcept
+{
+    jassert (numCoefficients > 0);
+    jassert (cutoffFreq > static_cast<CoeffType> (0.0));
+    jassert (sampleRate > 0.0);
+    jassert (cutoffFreq < static_cast<CoeffType> (sampleRate / 2.0));
+
+    numCoefficients = nextOdd (numCoefficients);
+    coefficients.resize (numCoefficients);
+
+    const auto normalizedCutoff = static_cast<CoeffType> (2.0) * cutoffFreq / static_cast<CoeffType> (sampleRate);
+    const int center = (numCoefficients - 1) / 2;
+
+    // Generate ideal lowpass sinc function
+    for (int i = 0; i < numCoefficients; ++i)
+    {
+        if (i == center)
+        {
+            coefficients[i] = normalizedCutoff;
+        }
+        else
+        {
+            const auto x = MathConstants<CoeffType>::pi * normalizedCutoff * static_cast<CoeffType> (i - center);
+            coefficients[i] = std::sin (x) / (MathConstants<CoeffType>::pi * static_cast<CoeffType> (i - center));
+        }
+    }
+
+    // Apply window function
+    for (int i = 0; i < numCoefficients; ++i)
+    {
+        const auto windowValue = WindowFunctions<CoeffType>::getValue (windowType, i, numCoefficients, windowParameter);
+        coefficients[i] *= windowValue;
+    }
+
+    // Normalization
+    const auto sum = std::accumulate (coefficients.begin(), coefficients.end(), static_cast<CoeffType> (0.0));
+    if (sum != static_cast<CoeffType> (0.0))
+    {
+        for (auto& c : coefficients)
+            c /= sum;
+    }
+}
+
+template <typename CoeffType>
+void FilterDesigner<CoeffType>::designFIRHighpass (
+    std::vector<CoeffType>& coefficients,
+    int numCoefficients,
+    CoeffType cutoffFreq,
+    double sampleRate,
+    WindowType windowType,
+    CoeffType windowParameter) noexcept
+{
+    jassert (numCoefficients > 0);
+    jassert (cutoffFreq > static_cast<CoeffType> (0.0));
+    jassert (sampleRate > 0.0);
+    jassert (cutoffFreq < static_cast<CoeffType> (sampleRate / 2.0));
+
+    // Generate lowpass first
+    numCoefficients = nextOdd (numCoefficients);
+    designFIRLowpass (coefficients, numCoefficients, cutoffFreq, sampleRate, windowType);
+
+    // Convert to highpass using spectral inversion
+    const int center = (numCoefficients - 1) / 2;
+    for (int i = 0; i < numCoefficients; ++i)
+        coefficients[i] = -coefficients[i];
+
+    // Add unit impulse at center
+    coefficients[center] += static_cast<CoeffType> (1.0);
+
+    // Normalization
+    CoeffType hpi (0.0);
+    for (int n = 0; n < numCoefficients; ++n)
+        hpi += coefficients[n] * ((n & 1) ? static_cast<CoeffType> (-1.0) : static_cast<CoeffType> (1.0));
+
+    if (hpi != static_cast<CoeffType> (0.0))
+    {
+        for (auto& c : coefficients)
+            c /= hpi;
+    }
+}
+
+template <typename CoeffType>
+void FilterDesigner<CoeffType>::designFIRBandpass (
+    std::vector<CoeffType>& coefficients,
+    int numCoefficients,
+    CoeffType lowCutoffFreq,
+    CoeffType highCutoffFreq,
+    double sampleRate,
+    WindowType windowType,
+    CoeffType windowParameter) noexcept
+{
+    jassert (numCoefficients > 0);
+    jassert (lowCutoffFreq > static_cast<CoeffType> (0.0));
+    jassert (highCutoffFreq > lowCutoffFreq);
+    jassert (sampleRate > 0.0);
+    jassert (highCutoffFreq < static_cast<CoeffType> (sampleRate / 2.0));
+
+    numCoefficients = nextOdd (numCoefficients);
+    coefficients.resize (numCoefficients);
+
+    const auto normalizedLow = static_cast<CoeffType> (2.0) * lowCutoffFreq / static_cast<CoeffType> (sampleRate);
+    const auto normalizedHigh = static_cast<CoeffType> (2.0) * highCutoffFreq / static_cast<CoeffType> (sampleRate);
+    const int center = (numCoefficients - 1) / 2;
+
+    // Generate ideal bandpass as difference of two sinc functions
+    for (int i = 0; i < numCoefficients; ++i)
+    {
+        if (i == center)
+        {
+            coefficients[i] = normalizedHigh - normalizedLow;
+        }
+        else
+        {
+            const auto n = static_cast<CoeffType> (i - center);
+            const auto xHigh = MathConstants<CoeffType>::pi * normalizedHigh * n;
+            const auto xLow = MathConstants<CoeffType>::pi * normalizedLow * n;
+
+            coefficients[i] = (std::sin (xHigh) - std::sin (xLow)) / (MathConstants<CoeffType>::pi * n);
+        }
+    }
+
+    // Apply window function
+    for (int i = 0; i < numCoefficients; ++i)
+    {
+        const auto windowValue = WindowFunctions<CoeffType>::getValue (windowType, i, numCoefficients, windowParameter);
+        coefficients[i] *= windowValue;
+    }
+}
+
+template <typename CoeffType>
+void FilterDesigner<CoeffType>::designFIRBandstop (
+    std::vector<CoeffType>& coefficients,
+    int numCoefficients,
+    CoeffType lowCutoffFreq,
+    CoeffType highCutoffFreq,
+    double sampleRate,
+    WindowType windowType,
+    CoeffType windowParameter) noexcept
+{
+    jassert (numCoefficients > 0);
+    jassert (lowCutoffFreq > static_cast<CoeffType> (0.0));
+    jassert (highCutoffFreq > lowCutoffFreq);
+    jassert (sampleRate > 0.0);
+    jassert (highCutoffFreq < static_cast<CoeffType> (sampleRate / 2.0));
+
+    // Generate bandpass first
+    numCoefficients = nextOdd (numCoefficients);
+    designFIRBandpass (coefficients, numCoefficients, lowCutoffFreq, highCutoffFreq, sampleRate, windowType);
+
+    // Convert to bandstop using spectral inversion
+    const int center = (numCoefficients - 1) / 2;
+
+    for (int i = 0; i < numCoefficients; ++i)
+        coefficients[i] = -coefficients[i];
+
+    // Add unit impulse at center
+    coefficients[center] += static_cast<CoeffType> (1.0);
+}
+
 //==============================================================================
 
 template class FilterDesigner<float>;
diff --git a/modules/yup_dsp/designers/yup_FilterDesigner.h b/modules/yup_dsp/designers/yup_FilterDesigner.h
index 0dba2770e..f74b7097d 100644
--- a/modules/yup_dsp/designers/yup_FilterDesigner.h
+++ b/modules/yup_dsp/designers/yup_FilterDesigner.h
@@ -625,6 +625,86 @@ class FilterDesigner
     {
         return designLinkwitzRiley (8, crossoverFreq, sampleRate, lowCoeffs, highCoeffs);
     }
+
+    //==============================================================================
+    // FIR Filter Design
+    //==============================================================================
+
+    /**
+        Designs FIR lowpass filter coefficients using windowed sinc method.
+
+        @param numCoefficients  The number of filter coefficients (filter order + 1)
+        @param cutoffFreq       The cutoff frequency in Hz
+        @param sampleRate       The sample rate in Hz
+        @param windowType       The window function to apply (default: Hanning)
+
+        @returns               Vector of FIR coefficients suitable for DirectFIR
+    */
+    static void designFIRLowpass (
+        std::vector<CoeffType>& coefficients,
+        int numCoefficients,
+        CoeffType cutoffFreq,
+        double sampleRate,
+        WindowType windowType = WindowType::hann,
+        CoeffType windowParameter = CoeffType (8)) noexcept;
+
+    /**
+        Designs FIR highpass filter coefficients using windowed sinc method.
+
+        @param numCoefficients  The number of filter coefficients (filter order + 1)
+        @param cutoffFreq       The cutoff frequency in Hz
+        @param sampleRate       The sample rate in Hz
+        @param windowType       The window function to apply (default: Hanning)
+
+        @returns               Vector of FIR coefficients suitable for DirectFIR
+    */
+    static void designFIRHighpass (
+        std::vector<CoeffType>& coefficients,
+        int numCoefficients,
+        CoeffType cutoffFreq,
+        double sampleRate,
+        WindowType windowType = WindowType::hann,
+        CoeffType windowParameter = CoeffType (8)) noexcept;
+
+    /**
+        Designs FIR bandpass filter coefficients using windowed sinc method.
+
+        @param numCoefficients  The number of filter coefficients (filter order + 1)
+        @param lowCutoffFreq    The lower cutoff frequency in Hz
+        @param highCutoffFreq   The upper cutoff frequency in Hz
+        @param sampleRate       The sample rate in Hz
+        @param windowType       The window function to apply (default: Hanning)
+
+        @returns               Vector of FIR coefficients suitable for DirectFIR
+    */
+    static void designFIRBandpass (
+        std::vector<CoeffType>& coefficients,
+        int numCoefficients,
+        CoeffType lowCutoffFreq,
+        CoeffType highCutoffFreq,
+        double sampleRate,
+        WindowType windowType = WindowType::hann,
+        CoeffType windowParameter = CoeffType (8)) noexcept;
+
+    /**
+        Designs FIR bandstop filter coefficients using windowed sinc method.
+
+        @param numCoefficients  The number of filter coefficients (filter order + 1)
+        @param lowCutoffFreq    The lower cutoff frequency in Hz
+        @param highCutoffFreq   The upper cutoff frequency in Hz
+        @param sampleRate       The sample rate in Hz
+        @param windowType       The window function to apply (default: Hanning)
+
+        @returns               Vector of FIR coefficients suitable for DirectFIR
+    */
+    static void designFIRBandstop (
+        std::vector<CoeffType>& coefficients,
+        int numCoefficients,
+        CoeffType lowCutoffFreq,
+        CoeffType highCutoffFreq,
+        double sampleRate,
+        WindowType windowType = WindowType::hann,
+        CoeffType windowParameter = CoeffType (8)) noexcept;
 };
 
 } // namespace yup
diff --git a/modules/yup_dsp/filters/yup_DirectFIR.h b/modules/yup_dsp/filters/yup_DirectFIR.h
new file mode 100644
index 000000000..b637714c8
--- /dev/null
+++ b/modules/yup_dsp/filters/yup_DirectFIR.h
@@ -0,0 +1,324 @@
+/*
+  ==============================================================================
+
+   This file is part of the YUP library.
+   Copyright (c) 2025 - kunitoki@gmail.com
+
+   YUP is an open source library subject to open-source licensing.
+
+   The code included in this file is provided under the terms of the ISC license
+   http://www.isc.org/downloads/software-support-policy/isc-license. Permission
+   to use, copy, modify, and/or distribute this software for any purpose with or
+   without fee is hereby granted provided that the above copyright notice and
+   this permission notice appear in all copies.
+
+   YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+#pragma once
+
+namespace yup
+{
+
+//==============================================================================
+/**
+    Direct-form FIR (Finite Impulse Response) filter processor optimized for real-time audio.
+
+    Implements a time-domain FIR filter using direct convolution with SIMD optimizations.
+    This class is ideal for low-latency applications where the number of coefficients is relatively
+    small (typically < 512 coefficients), as it provides zero algorithmic delay.
+
+    Features:
+    - Zero algorithmic latency (only processing delay)
+    - SIMD-optimized convolution (AVX2, SSE, ARM NEON, vDSP)
+    - Circular buffer implementation for efficient sample history management
+    - Real-time safe processing (no heap allocations during process())
+    - Support for arbitrary block sizes
+    - Inherits FilterBase interface for frequency response analysis
+
+    Example usage:
+    @code
+    DirectFIR<float> fir;
+
+    // Set filter coefficients (e.g., lowpass filter)
+    auto coeffs = FilterDesigner<float>::designFIRLowpass(64, 1000.0f, 44100.0);
+    fir.setCoefficients(coeffs);
+
+    // Prepare for processing
+    fir.prepare(44100.0, 512);
+
+    // In audio callback:
+    fir.processBlock(inputBuffer, outputBuffer, numSamples);
+    @endcode
+
+    @tparam SampleType  Type for audio samples (float or double)
+    @tparam CoeffType   Type for internal coefficients (defaults to double)
+
+    @see PartitionedConvolver for longer impulse responses using FFT-based convolution
+    @see FilterBase for frequency response methods
+*/
+template <typename SampleType, typename CoeffType = double>
+class DirectFIR : public FilterBase<SampleType, CoeffType>
+{
+public:
+    //==============================================================================
+    /** Default constructor */
+    DirectFIR() = default;
+
+    /** Destructor */
+    ~DirectFIR() override = default;
+
+    /** Move constructor */
+    DirectFIR (DirectFIR&& other) noexcept
+        : coefficientsReversed (std::move (other.coefficientsReversed))
+        , history (std::move (other.history))
+        , numCoefficients (std::exchange (other.numCoefficients, 0))
+        , paddedLen (std::exchange (other.paddedLen, 0))
+        , writeIndex (std::exchange (other.writeIndex, 0))
+        , currentScaling (std::exchange (other.currentScaling, CoeffType (1)))
+    {
+    }
+
+    /** Move assignment */
+    DirectFIR& operator= (DirectFIR&& other) noexcept
+    {
+        if (this != &other)
+        {
+            coefficientsReversed = std::move (other.coefficientsReversed);
+            history = std::move (other.history);
+            numCoefficients = std::exchange (other.numCoefficients, 0);
+            paddedLen = std::exchange (other.paddedLen, 0);
+            writeIndex = std::exchange (other.writeIndex, 0);
+            currentScaling = std::exchange (other.currentScaling, CoeffType (1));
+        }
+        return *this;
+    }
+
+    //==============================================================================
+    /**
+        Set the FIR filter coefficients from a raw pointer.
+
+        @param coefficients     Span of FIR coefficients array
+        @param scaling          Scaling factor to apply to all coefficients
+
+        @note This method is not real-time safe and should be called during initialization
+              or when audio processing is paused, unless the coefficients vector has already been set with a greater or equal size.
+    */
+    void setCoefficients (yup::Span<const CoeffType> coefficients, CoeffType scaling = CoeffType (1))
+    {
+        setCoefficients (coefficients.data(), coefficients.size(), scaling);
+    }
+
+    /**
+        Set the FIR filter coefficients from a raw pointer.
+
+        @param coefficients     Pointer to FIR coefficients array
+        @param numCoefficients  Number of coefficients
+        @param scaling          Scaling factor to apply to all coefficients
+
+        @note This method is not real-time safe and should be called during initialization
+              or when audio processing is paused, unless the coefficients vector has already been set with a greater or equal size.
+    */
+    void setCoefficients (const CoeffType* coefficients, std::size_t numCoefficientsIn, CoeffType scaling = CoeffType (1))
+    {
+        if (coefficients == nullptr || numCoefficientsIn == 0)
+        {
+            reset();
+            numCoefficients = 0;
+            return;
+        }
+
+        numCoefficients = numCoefficientsIn;
+        paddedLen = (numCoefficientsIn + 3u) & ~3u; // Round up to multiple of 4 for SIMD
+
+        coefficientsReversed.resize (paddedLen, 0.0f);
+
+        currentScaling = scaling;
+        if (! approximatelyEqual (currentScaling, CoeffType (1)))
+            FloatVectorOperations::copyWithMultiply (coefficientsReversed.data(), coefficients, scaling, static_cast<int> (numCoefficientsIn));
+        else
+            FloatVectorOperations::copy (coefficientsReversed.data(), coefficients, static_cast<int> (numCoefficientsIn));
+
+        std::reverse (coefficientsReversed.begin(), coefficientsReversed.begin() + numCoefficients);
+
+        history.resize (2 * numCoefficients, 0.0f);
+        writeIndex = 0;
+    }
+
+    /**
+        Get the number of filter coefficients.
+
+        @return Number of coefficients in the current filter
+    */
+    std::size_t getNumCoefficients() const noexcept
+    {
+        return numCoefficients;
+    }
+
+    /**
+        Check if the filter has been configured with coefficients.
+
+        @return True if coefficients have been set, false otherwise
+    */
+    bool hasCoefficients() const noexcept
+    {
+        return numCoefficients > 0;
+    }
+
+    /**
+        Get the current filter coefficients.
+
+        @return Vector containing the current coefficients (time-reversed for processing)
+    */
+    const std::vector<CoeffType>& getCoefficients() const noexcept
+    {
+        return coefficientsReversed;
+    }
+
+    /**
+        Get the current scaling factor applied to coefficients.
+
+        @return Current scaling factor
+    */
+    CoeffType getScaling() const noexcept
+    {
+        return currentScaling;
+    }
+
+    //==============================================================================
+    /**
+        Reset all internal processing state (clears sample history).
+        Filter coefficients are preserved.
+    */
+    void reset() noexcept override
+    {
+        std::fill (history.begin(), history.end(), 0.0f);
+        writeIndex = 0;
+    }
+
+    /**
+        Prepares the filter for processing with the given sample rate and block size.
+
+        @param sampleRate     The sample rate in Hz
+        @param maximumBlockSize  The maximum number of samples that will be processed at once
+    */
+    void prepare (double sampleRate, int maximumBlockSize) override
+    {
+        this->sampleRate = sampleRate;
+        this->maximumBlockSize = maximumBlockSize;
+    }
+
+    /**
+        Processes a single sample.
+
+        @param inputSample  The input sample to process
+        @returns           The filtered output sample
+    */
+    SampleType processSample (SampleType inputSample) noexcept override
+    {
+        const std::size_t M = numCoefficients;
+        const CoeffType* h = coefficientsReversed.data();
+
+        // Update circular buffer with current input sample
+        history[writeIndex] = inputSample;
+        history[writeIndex + M] = inputSample; // Duplicate for efficient circular access
+
+        // Point to the start of the delay line for this sample
+        const SampleType* w = history.data() + writeIndex + 1;
+
+        // Advance circular buffer write pointer
+        if (++writeIndex == M)
+            writeIndex = 0;
+
+        return dotProduct (w, h, M);
+    }
+
+    /**
+        Processes a block of samples.
+
+        @param inputBuffer   Pointer to the input samples
+        @param outputBuffer  Pointer to the output buffer
+        @param numSamples    Number of samples to process
+    */
+    void processBlock (const SampleType* inputBuffer, SampleType* outputBuffer, int numSamples) noexcept override
+    {
+        if (numCoefficients == 0 || inputBuffer == nullptr || outputBuffer == nullptr)
+            return;
+
+        for (int i = 0; i < numSamples; ++i)
+            outputBuffer[i] += processSample (inputBuffer[i]);
+    }
+
+    /**
+        Returns the complex frequency response at the given frequency.
+
+        @param frequency  The frequency in Hz
+        @returns         The complex frequency response
+    */
+    Complex<CoeffType> getComplexResponse (CoeffType frequency) const override
+    {
+        if (numCoefficients == 0)
+            return Complex<CoeffType> (0, 0);
+
+        // ω = 2π f / Fs
+        const CoeffType omega = MathConstants<CoeffType>::twoPi * frequency / static_cast<CoeffType> (this->sampleRate);
+
+        // Standard FIR frequency response: H(e^{jω}) = Σ_{n=0}^{N-1} h[n] * e^{-jωn}
+        // coefficientsReversed stores: [h[M-1], h[M-2], ..., h[1], h[0]]
+        // So coefficientsReversed[k] = h[M-1-k], and we need: Σ h[n] * e^{-jωn}
+
+        // e^{-jω}
+        const Complex<CoeffType> ez_neg { std::cos (omega), -std::sin (omega) };
+
+        // Accumulate: Σ_{n=0}^{N-1} h[n] * e^{-jωn}
+        // Since coefficientsReversed[k] = h[M-1-k], we have h[n] = coefficientsReversed[M-1-n]
+        Complex<CoeffType> sum { 0, 0 };
+        Complex<CoeffType> ez_neg_n { 1, 0 }; // e^{-jω*0} = 1
+
+        for (std::size_t n = 0; n < numCoefficients; ++n)
+        {
+            const CoeffType h_n = coefficientsReversed[numCoefficients - 1 - n];
+            sum += h_n * ez_neg_n;
+            ez_neg_n *= ez_neg;
+        }
+
+        return sum;
+    }
+
+    /**
+        Process audio samples through the FIR filter (legacy method).
+
+        @param input       Input audio buffer
+        @param output      Output audio buffer (results are accumulated)
+        @param numSamples  Number of samples to process
+
+        @note Results are accumulated into the output buffer. Clear it first if needed.
+        @note This method is real-time safe with no heap allocations.
+        @note Use processBlock() for new code
+    */
+    void process (const SampleType* input, SampleType* output, std::size_t numSamples) noexcept
+    {
+        processBlock (input, output, static_cast<int> (numSamples));
+    }
+
+private:
+    std::vector<CoeffType> coefficientsReversed;
+    std::vector<SampleType> history;
+    std::size_t numCoefficients = 0;
+    std::size_t paddedLen = 0;
+    std::size_t writeIndex = 0;
+    CoeffType currentScaling = CoeffType (1);
+
+    YUP_DECLARE_NON_COPYABLE_WITH_LEAK_DETECTOR (DirectFIR)
+};
+
+//==============================================================================
+/** Type aliases for backward compatibility and convenience */
+using DirectFIRFloat = DirectFIR<float, float>;
+using DirectFIRDouble = DirectFIR<double, double>;
+
+} // namespace yup
diff --git a/modules/yup_dsp/frequency/yup_FFTProcessor.cpp b/modules/yup_dsp/frequency/yup_FFTProcessor.cpp
index 141853da6..94f0daceb 100644
--- a/modules/yup_dsp/frequency/yup_FFTProcessor.cpp
+++ b/modules/yup_dsp/frequency/yup_FFTProcessor.cpp
@@ -19,41 +19,6 @@
   ==============================================================================
 */
 
-// Conditional includes based on available FFT backends
-#if ! YUP_FFT_FOUND_BACKEND && YUP_ENABLE_VDSP && (YUP_MAC || YUP_IOS) && __has_include(<Accelerate/Accelerate.h>)
-#include <Accelerate/Accelerate.h>
-#define YUP_FFT_USING_VDSP 1
-#define YUP_FFT_FOUND_BACKEND 1
-#endif
-
-#if ! YUP_FFT_FOUND_BACKEND && YUP_ENABLE_INTEL_IPP && __has_include(<ipp.h>)
-#include <ipp.h>
-#define YUP_FFT_USING_IPP 1
-#define YUP_FFT_FOUND_BACKEND 1
-#endif
-
-#if ! YUP_FFT_FOUND_BACKEND && YUP_ENABLE_FFTW3 && __has_include(<fftw3.h>)
-#include <fftw3.h>
-#define YUP_FFT_USING_FFTW3 1
-#define YUP_FFT_FOUND_BACKEND 1
-#endif
-
-#if ! YUP_FFT_FOUND_BACKEND && YUP_ENABLE_PFFFT && YUP_MODULE_AVAILABLE_pffft_library
-#include <pffft_library/pffft_library.h>
-#define YUP_FFT_USING_PFFFT 1
-#define YUP_FFT_FOUND_BACKEND 1
-#endif
-
-#if ! YUP_FFT_FOUND_BACKEND && YUP_ENABLE_OOURA
-#include "yup_OouraFFT8g.h"
-#define YUP_FFT_USING_OOURA 1
-#define YUP_FFT_FOUND_BACKEND 1
-#endif
-
-#if ! defined(YUP_FFT_FOUND_BACKEND)
-#error "Unable to find a proper FFT backend !"
-#endif
-
 namespace yup
 {
 
@@ -700,7 +665,17 @@ FFTProcessor& FFTProcessor::operator= (FFTProcessor&& other) noexcept
 }
 
 //==============================================================================
-// Public interface
+
+void FFTProcessor::setScaling (FFTScaling newScaling) noexcept
+{
+    if (scaling != newScaling)
+    {
+        scaling = newScaling;
+
+        updateScalingFactor();
+    }
+}
+
 void FFTProcessor::setSize (int newSize)
 {
     jassert (isPowerOfTwo (newSize) && newSize >= 64 && newSize <= 65536);
@@ -709,6 +684,8 @@ void FFTProcessor::setSize (int newSize)
     {
         fftSize = newSize;
 
+        updateScalingFactor();
+
         if (engine)
             engine->initialize (fftSize);
     }
@@ -720,6 +697,7 @@ void FFTProcessor::performRealFFTForward (const float* realInput, float* complex
     jassert (engine != nullptr);
 
     engine->performRealFFTForward (realInput, complexOutput);
+
     applyScaling (complexOutput, fftSize * 2, true);
 }
 
@@ -729,6 +707,7 @@ void FFTProcessor::performRealFFTInverse (const float* complexInput, float* real
     jassert (engine != nullptr);
 
     engine->performRealFFTInverse (complexInput, realOutput);
+
     applyScaling (realOutput, fftSize, false);
 }
 
@@ -738,6 +717,7 @@ void FFTProcessor::performComplexFFTForward (const float* complexInput, float* c
     jassert (engine != nullptr);
 
     engine->performComplexFFTForward (complexInput, complexOutput);
+
     applyScaling (complexOutput, fftSize * 2, true);
 }
 
@@ -747,6 +727,7 @@ void FFTProcessor::performComplexFFTInverse (const float* complexInput, float* c
     jassert (engine != nullptr);
 
     engine->performComplexFFTInverse (complexInput, complexOutput);
+
     applyScaling (complexOutput, fftSize * 2, false);
 }
 
@@ -756,25 +737,25 @@ String FFTProcessor::getBackendName() const
 }
 
 //==============================================================================
-// Private implementation
-void FFTProcessor::applyScaling (float* data, int numElements, bool isForward)
+
+void FFTProcessor::updateScalingFactor()
 {
-    if (scaling == FFTScaling::none)
-        return;
+    if (scaling == FFTScaling::unitary)
+        scalingFactor = 1.0f / std::sqrt (static_cast<float> (fftSize));
 
-    float scale = 1.0f;
+    else if (scaling == FFTScaling::asymmetric)
+        scalingFactor = 1.0f / static_cast<float> (fftSize);
 
-    if (scaling == FFTScaling::unitary)
-    {
-        scale = 1.0f / std::sqrt (static_cast<float> (fftSize));
-    }
-    else if (scaling == FFTScaling::asymmetric && ! isForward)
-    {
-        scale = 1.0f / static_cast<float> (fftSize);
-    }
+    else
+        scalingFactor = 1.0f;
+}
+
+void FFTProcessor::applyScaling (float* data, int numElements, bool isForward) const
+{
+    if (scaling == FFTScaling::none || (scaling == FFTScaling::asymmetric && ! isForward))
+        return;
 
-    if (scale != 1.0f)
-        FloatVectorOperations::multiply (data, scale, numElements);
+    FloatVectorOperations::multiply (data, scalingFactor, numElements);
 }
 
 } // namespace yup
diff --git a/modules/yup_dsp/frequency/yup_FFTProcessor.h b/modules/yup_dsp/frequency/yup_FFTProcessor.h
index 5ecaa5d9f..976ec9351 100644
--- a/modules/yup_dsp/frequency/yup_FFTProcessor.h
+++ b/modules/yup_dsp/frequency/yup_FFTProcessor.h
@@ -89,7 +89,7 @@ class FFTProcessor
     int getSize() const noexcept { return fftSize; }
 
     /** Sets the FFT scaling mode */
-    void setScaling (FFTScaling newScaling) noexcept { scaling = newScaling; }
+    void setScaling (FFTScaling newScaling) noexcept;
 
     /** Gets the current scaling mode */
     FFTScaling getScaling() const noexcept { return scaling; }
@@ -139,11 +139,13 @@ class FFTProcessor
 
 private:
     //==============================================================================
-    void applyScaling (float* data, int numElements, bool isForward);
+    void updateScalingFactor ();
+    void applyScaling (float* data, int numElements, bool isForward) const;
 
     //==============================================================================
     int fftSize = -1;
     FFTScaling scaling = FFTScaling::none;
+    float scalingFactor = 1.0f;
 
     std::unique_ptr<Engine> engine;
 
diff --git a/modules/yup_dsp/utilities/yup_DspMath.cpp b/modules/yup_dsp/utilities/yup_DspMath.cpp
new file mode 100644
index 000000000..b14bade41
--- /dev/null
+++ b/modules/yup_dsp/utilities/yup_DspMath.cpp
@@ -0,0 +1,104 @@
+/*
+  ==============================================================================
+
+   This file is part of the YUP library.
+   Copyright (c) 2025 - kunitoki@gmail.com
+
+   YUP is an open source library subject to open-source licensing.
+
+   The code included in this file is provided under the terms of the ISC license
+   http://www.isc.org/downloads/software-support-policy/isc-license. Permission
+   to use, copy, modify, and/or distribute this software for any purpose with or
+   without fee is hereby granted provided that the above copyright notice and
+   this permission notice appear in all copies.
+
+   YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace yup
+{
+
+//==============================================================================
+
+template <>
+float dotProduct (const float* __restrict a, const float* __restrict b, std::size_t length) noexcept
+{
+    float accumulation = 0.0f;
+
+#if YUP_ENABLE_VDSP
+    vDSP_dotpr (a, 1, b, 1, &accumulation, length);
+
+#else
+    std::size_t i = 0;
+
+#if YUP_USE_AVX_INTRINSICS && YUP_USE_FMA_INTRINSICS
+    __m256 vacc = _mm256_setzero_ps();
+    for (; i + 8 <= length; i += 8)
+    {
+        __m256 va = _mm256_loadu_ps (a + i);
+        __m256 vb = _mm256_loadu_ps (b + i);
+        vacc = _mm256_fmadd_ps (va, vb, vacc);
+    }
+    __m128 low = _mm256_castps256_ps128 (vacc);
+    __m128 high = _mm256_extractf128_ps (vacc, 1);
+    __m128 vsum = _mm_add_ps (low, high);
+    vsum = _mm_hadd_ps (vsum, vsum);
+    vsum = _mm_hadd_ps (vsum, vsum);
+    accumulation += _mm_cvtss_f32 (vsum);
+
+#elif YUP_USE_SSE_INTRINSICS
+    __m128 vacc = _mm_setzero_ps();
+#if YUP_USE_FMA_INTRINSICS
+    for (; i + 4 <= length; i += 4)
+    {
+        __m128 va = _mm_loadu_ps (a + i);
+        __m128 vb = _mm_loadu_ps (b + i);
+        vacc = _mm_fmadd_ps (va, vb, vacc);
+    }
+#else
+    for (; i + 4 <= length; i += 4)
+    {
+        __m128 va = _mm_loadu_ps (a + i);
+        __m128 vb = _mm_loadu_ps (b + i);
+        vacc = _mm_add_ps (vacc, _mm_mul_ps (va, vb));
+    }
+#endif
+    __m128 shuf = _mm_shuffle_ps (vacc, vacc, _MM_SHUFFLE (2, 3, 0, 1));
+    __m128 sums = _mm_add_ps (vacc, shuf);
+    shuf = _mm_movehl_ps (shuf, sums);
+    sums = _mm_add_ss (sums, shuf);
+    accumulation += _mm_cvtss_f32 (sums);
+
+#elif YUP_USE_ARM_NEON
+    float32x4_t vacc = vdupq_n_f32 (0.0f);
+    for (; i + 4 <= length; i += 4)
+    {
+        float32x4_t va = vld1q_f32 (a + i);
+        float32x4_t vb = vld1q_f32 (b + i);
+        vacc = vmlaq_f32 (vacc, va, vb);
+    }
+#if YUP_64BIT
+    accumulation += vaddvq_f32 (vacc);
+#else
+    float32x2_t vlow = vget_low_f32 (vacc);
+    float32x2_t vhigh = vget_high_f32 (vacc);
+    float32x2_t vsum2 = vpadd_f32 (vlow, vhigh);
+    vsum2 = vpadd_f32 (vsum2, vsum2);
+    accumulation += vget_lane_f32 (vsum2, 0);
+#endif
+
+#endif
+
+    // Handle remaining samples
+    for (; i < length; ++i)
+        accumulation += a[i] * b[i];
+#endif
+
+    return accumulation;
+}
+
+} // namespace yup
diff --git a/modules/yup_dsp/utilities/yup_DspMath.h b/modules/yup_dsp/utilities/yup_DspMath.h
index 920522777..c058bbc2c 100644
--- a/modules/yup_dsp/utilities/yup_DspMath.h
+++ b/modules/yup_dsp/utilities/yup_DspMath.h
@@ -110,6 +110,24 @@ FloatType fastCos (FloatType x) noexcept
 
 //==============================================================================
 
+/** Dot product fallback implementation */
+template <typename CoeffType, typename SampleType>
+SampleType dotProduct (const CoeffType* __restrict a, const SampleType* __restrict b, std::size_t length) noexcept
+{
+    CoeffType acc = CoeffType (0);
+
+    for (std::size_t i = 0; i < length; ++i)
+        acc += a[i] * static_cast<CoeffType> (b[i]);
+
+    return static_cast<SampleType> (acc);
+}
+
+/** Fast specialization for dotProduct using SIMD */
+template <>
+float dotProduct (const float* __restrict a, const float* __restrict b, std::size_t length) noexcept;
+
+//==============================================================================
+
 /** Bilinear transform from s-plane to z-plane with frequency warping */
 template <typename FloatType>
 void bilinearTransform (FloatType& a0, FloatType& a1, FloatType& a2, FloatType& b0, FloatType& b1, FloatType& b2, FloatType frequency, FloatType sampleRate) noexcept
diff --git a/modules/yup_dsp/windowing/yup_WindowFunctions.h b/modules/yup_dsp/windowing/yup_WindowFunctions.h
index e3968b8cb..2e9ddadfa 100644
--- a/modules/yup_dsp/windowing/yup_WindowFunctions.h
+++ b/modules/yup_dsp/windowing/yup_WindowFunctions.h
@@ -35,21 +35,22 @@ namespace yup
 */
 enum class WindowType
 {
-    rectangular,    /**< Rectangular (no windowing) */
-    hann,           /**< Hann window (raised cosine) */
-    hamming,        /**< Hamming window */
-    blackman,       /**< Blackman window */
-    blackmanHarris, /**< Blackman-Harris window (4-term) */
-    kaiser,         /**< Kaiser window (parameterizable) */
-    gaussian,       /**< Gaussian window */
-    tukey,          /**< Tukey window (tapered cosine) */
-    bartlett,       /**< Bartlett window (triangular) */
-    welch,          /**< Welch window (parabolic) */
-    flattop,        /**< Flat-top window */
-    cosine,         /**< Cosine window */
-    lanczos,        /**< Lanczos window (sinc) */
-    nuttall,        /**< Nuttall window */
-    blackmanNuttall /**< Blackman-Nuttall window */
+    rectangular,     /**< Rectangular (no windowing) */
+    hann,            /**< Hann window (raised cosine) */
+    hamming,         /**< Hamming window */
+    blackman,        /**< Blackman window */
+    blackmanHarris,  /**< Blackman-Harris window (4-term) */
+    kaiser,          /**< Kaiser window (parameterizable) */
+    gaussian,        /**< Gaussian window */
+    tukey,           /**< Tukey window (tapered cosine) */
+    bartlett,        /**< Bartlett window (triangular) */
+    welch,           /**< Welch window (parabolic) */
+    flattop,         /**< Flat-top window */
+    cosine,          /**< Cosine window */
+    lanczos,         /**< Lanczos window (sinc) */
+    nuttall,         /**< Nuttall window */
+    blackmanNuttall, /**< Blackman-Nuttall window */
+    rakshitUllah     /**< Rakshit-Ullah adjustable window (novel) */
 };
 
 //==============================================================================
@@ -131,6 +132,8 @@ class WindowFunctions
                 return nuttall (n, N);
             case WindowType::blackmanNuttall:
                 return blackmanNuttall (n, N);
+            case WindowType::rakshitUllah:
+                return rakshitUllah (n, N, parameter);
             default:
                 return rectangular (n, N);
         }
@@ -353,6 +356,55 @@ class WindowFunctions
         return a0 - a1 * std::cos (factor) + a2 * std::cos (FloatType (2) * factor) - a3 * std::cos (FloatType (3) * factor);
     }
 
+    /**
+        Rakshit-Ullah adjustable window function.
+
+        A novel adjustable window combining hyperbolic tangent and weighted cosine functions.
+        Proposed by Hrishi Rakshit and Muhammad Ahsan Ullah (2015).
+
+        @param n Sample index (0 to N-1)
+        @param N Window length
+        @param r Controlling parameter (default 1.0). Higher values give better side-lobe roll-off.
+               Common values: 0.0005, 1.18, 1.618, 30, 75
+        @return Window value at sample n
+
+        @note Reference: "FIR Filter Design Using An Adjustable Novel Window and Its Applications"
+              International Journal of Engineering and Technology (IJET), 2015
+    */
+    static FloatType rakshitUllah (int n, int N, FloatType r = FloatType (1)) noexcept
+    {
+        if (N <= 1)
+            return FloatType (1);
+
+        // Constants from the paper
+        constexpr auto alpha = FloatType (2);
+        constexpr auto B = FloatType (2);
+
+        // Hyperbolic tangent component (y1)
+        const auto center = (N - 1) / FloatType (2);
+        const auto coshAlpha = std::cosh (alpha);
+        const auto coshAlphaSquared = coshAlpha * coshAlpha;
+
+        const auto arg1 = (n - center + coshAlphaSquared) / B;
+        const auto arg2 = (n - center - coshAlphaSquared) / B;
+
+        const auto y1 = std::tanh (arg1) - std::tanh (arg2);
+
+        // Weighted cosine component (y2)
+        const auto factor = MathConstants<FloatType>::twoPi * n / (N - 1);
+        const auto y2 = FloatType (0.375) - FloatType (0.5) * std::cos (factor)
+                      + FloatType (0.125) * std::cos (FloatType (2) * factor);
+
+        // Combined window with power parameter
+        const auto window = y1 * y2;
+
+        // Apply the controlling parameter r
+        if (approximatelyEqual (r, FloatType (1)))
+            return window;
+        else
+            return std::pow (std::abs (window), r) * (window >= FloatType (0) ? FloatType (1) : FloatType (-1));
+    }
+
 private:
     //==============================================================================
     /** Modified Bessel function of the first kind, order 0 */
diff --git a/modules/yup_dsp/yup_dsp.cpp b/modules/yup_dsp/yup_dsp.cpp
index e7c1087c4..85c09b0b8 100644
--- a/modules/yup_dsp/yup_dsp.cpp
+++ b/modules/yup_dsp/yup_dsp.cpp
@@ -31,12 +31,55 @@
 #include "yup_dsp.h"
 
 //==============================================================================
+
+#include <atomic>
+#include <thread>
+
+//==============================================================================
+
+#if ! YUP_FFT_FOUND_BACKEND && YUP_ENABLE_VDSP && (YUP_MAC || YUP_IOS)
+#define YUP_FFT_USING_VDSP 1
+#define YUP_FFT_FOUND_BACKEND 1
+#endif
+
+#if ! YUP_FFT_FOUND_BACKEND && YUP_ENABLE_INTEL_IPP && __has_include(<ipp.h>)
+#include <ipp.h>
+#define YUP_FFT_USING_IPP 1
+#define YUP_FFT_FOUND_BACKEND 1
+#endif
+
+#if ! YUP_FFT_FOUND_BACKEND && YUP_ENABLE_FFTW3 && __has_include(<fftw3.h>)
+#include <fftw3.h>
+#define YUP_FFT_USING_FFTW3 1
+#define YUP_FFT_FOUND_BACKEND 1
+#endif
+
+#if ! YUP_FFT_FOUND_BACKEND && YUP_ENABLE_PFFFT && YUP_MODULE_AVAILABLE_pffft_library
+#include <pffft_library/pffft_library.h>
+#define YUP_FFT_USING_PFFFT 1
+#define YUP_FFT_FOUND_BACKEND 1
+#endif
+
+#if ! YUP_FFT_FOUND_BACKEND && YUP_ENABLE_OOURA
+#include "yup_OouraFFT8g.h"
+#define YUP_FFT_USING_OOURA 1
+#define YUP_FFT_FOUND_BACKEND 1
+#endif
+
+#if ! defined(YUP_FFT_FOUND_BACKEND)
+#error "Unable to find a proper FFT backend !"
+#endif
+
+//==============================================================================
+
 #include "frequency/yup_FFTProcessor.cpp"
 #include "frequency/yup_SpectrumAnalyzerState.cpp"
+#include "designers/yup_FilterDesigner.cpp"
+#include "convolution/yup_PartitionedConvolver.cpp"
+#include "utilities/yup_DspMath.cpp"
+
+//==============================================================================
 
 #if YUP_ENABLE_OOURA && YUP_FFT_USING_OOURA
 #include "frequency/yup_OouraFFT8g.cpp"
 #endif
-
-//==============================================================================
-#include "designers/yup_FilterDesigner.cpp"
diff --git a/modules/yup_dsp/yup_dsp.h b/modules/yup_dsp/yup_dsp.h
index b6eca3c79..5181dfb6c 100644
--- a/modules/yup_dsp/yup_dsp.h
+++ b/modules/yup_dsp/yup_dsp.h
@@ -68,7 +68,7 @@
     Enable Apple's vDSP backend.
 */
 #ifndef YUP_ENABLE_VDSP
-#if (YUP_MAC || YUP_IOS)
+#if (YUP_MAC || YUP_IOS) && YUP_USE_VDSP_FRAMEWORK
 #define YUP_ENABLE_VDSP 1
 #else
 #define YUP_ENABLE_VDSP 0
@@ -97,6 +97,7 @@
 #include <cmath>
 #include <complex>
 #include <memory>
+#include <optional>
 #include <vector>
 
 //==============================================================================
@@ -137,8 +138,10 @@
 #include "filters/yup_StateVariableFilter.h"
 #include "filters/yup_ButterworthFilter.h"
 #include "filters/yup_LinkwitzRileyFilter.h"
+#include "filters/yup_DirectFIR.h"
 
 // Dynamics processors
 #include "dynamics/yup_SoftClipper.h"
 
-//==============================================================================
+// Convolution processors
+#include "convolution/yup_PartitionedConvolver.h"
diff --git a/tests/main.cpp b/tests/main.cpp
index 37d5d7637..35d8eae6f 100644
--- a/tests/main.cpp
+++ b/tests/main.cpp
@@ -80,7 +80,10 @@ struct TestApplication : yup::YUPApplication
         else
         {
             // Run suites individually
-            runNextSuite (0);
+            yup::MessageManager::callAsync ([this]
+            {
+                runNextSuite (0);
+            });
         }
     }
 
diff --git a/tests/yup_audio_basics/yup_FloatVectorOperations.cpp b/tests/yup_audio_basics/yup_FloatVectorOperations.cpp
index 223cbda7d..85a551710 100644
--- a/tests/yup_audio_basics/yup_FloatVectorOperations.cpp
+++ b/tests/yup_audio_basics/yup_FloatVectorOperations.cpp
@@ -125,42 +125,47 @@ class FloatVectorOperationsTests : public ::testing::Test
             FloatVectorOperations::fill (data2, (ValueType) 3, num);
             FloatVectorOperations::addWithMultiply (data1, data1, data2, num);
             EXPECT_TRUE (areAllValuesEqual (data1, num, (ValueType) 8));
-        }
 
-        static void fillRandomly (Random& random, ValueType* d, int num)
-        {
-            while (--num >= 0)
-                *d++ = (ValueType) (random.nextDouble() * 1000.0);
-        }
+            FloatVectorOperations::fill (data1, (ValueType) 8, num);
+            FloatVectorOperations::copyWithDividend (data2, data1, (ValueType) 16, num);
+            EXPECT_TRUE (areAllValuesEqual (data2, num, (ValueType) 2));
 
-        static void fillRandomly (Random& random, int* d, int num)
-        {
-            while (--num >= 0)
-                *d++ = random.nextInt();
-        }
+            FloatVectorOperations::fill (data1, (ValueType) 12, num);
+            FloatVectorOperations::copyWithDivide (data2, data1, (ValueType) 3, num);
+            EXPECT_TRUE (areAllValuesEqual (data2, num, (ValueType) 4));
 
-        static void convertFixed (float* d, const int* s, ValueType multiplier, int num)
-        {
-            while (--num >= 0)
-                *d++ = (float) *s++ * multiplier;
-        }
+            FloatVectorOperations::fill (data1, (ValueType) 20, num);
+            FloatVectorOperations::divide (data1, (ValueType) 4, num);
+            EXPECT_TRUE (areAllValuesEqual (data1, num, (ValueType) 5));
 
-        static void convertFixedToDouble (double* d, const int* s, double multiplier, int num)
-        {
-            while (--num >= 0)
-                *d++ = (double) *s++ * multiplier;
+            FloatVectorOperations::fill (data1, (ValueType) 15, num);
+            FloatVectorOperations::fill (data2, (ValueType) 3, num);
+            HeapBlock<ValueType> result (num + 16);
+#if YUP_ARM
+            ValueType* const resultData = result;
+#else
+            // These tests deliberately operate on misaligned memory and will be flagged up by
+            // checks for undefined behavior!
+            ValueType* const resultData = addBytesToPointer (result.get(), random.nextInt (16));
+#endif
+            FloatVectorOperations::divide (resultData, data1, data2, num);
+            EXPECT_TRUE (areAllValuesEqual (resultData, num, (ValueType) 5));
+
+            FloatVectorOperations::fill (data1, (ValueType) 18, num);
+            FloatVectorOperations::divide (data2, data1, (ValueType) 6, num);
+            EXPECT_TRUE (areAllValuesEqual (data2, num, (ValueType) 3));
         }
 
-        static void convertFloatToFixed (int* d, const float* s, float multiplier, int num)
+        static void fillRandomly (Random& random, ValueType* d, int num)
         {
             while (--num >= 0)
-                *d++ = (int) (*s++ * multiplier);
+                *d++ = (ValueType) (random.nextDouble() * 1000.0);
         }
 
-        static void convertDoubleToFixed (int* d, const double* s, double multiplier, int num)
+        static void fillRandomly (Random& random, int* d, int num)
         {
             while (--num >= 0)
-                *d++ = (int) (*s++ * multiplier);
+                *d++ = random.nextInt();
         }
 
         static bool areAllValuesEqual (const ValueType* d, int num, ValueType target)
@@ -186,6 +191,43 @@ class FloatVectorOperationsTests : public ::testing::Test
             return std::abs (v1 - v2) < std::numeric_limits<ValueType>::epsilon();
         }
     };
+
+    template <class ValueType>
+    static bool valuesMatch (ValueType v1, ValueType v2)
+    {
+        return std::abs (v1 - v2) < std::numeric_limits<ValueType>::epsilon();
+    }
+
+    template <class ValueType>
+    static bool buffersMatch (const ValueType* d1, const ValueType* d2, int num)
+    {
+        while (--num >= 0)
+        {
+            if (! valuesMatch (*d1++, *d2++))
+                return false;
+        }
+
+        return true;
+    }
+
+    static void convertFixedToFloat (float* d, const int* s, float multiplier, int num)
+    {
+        while (--num >= 0)
+            *d++ = (float) *s++ * multiplier;
+    }
+
+    static void convertFloatToFixed (int* d, const float* s, float multiplier, int num)
+    {
+        while (--num >= 0)
+            *d++ = (int) (*s++ * multiplier);
+    }
+
+    template <class ValueType>
+    static void fillRandomly (Random& random, ValueType* d, int num)
+    {
+        while (--num >= 0)
+            *d++ = (ValueType) (random.nextDouble() * 1000.0);
+    }
 };
 
 TEST_F (FloatVectorOperationsTests, BasicOperations)
@@ -196,3 +238,83 @@ TEST_F (FloatVectorOperationsTests, BasicOperations)
         TestRunner<double>::runTest (Random::getSystemRandom());
     }
 }
+
+TEST_F (FloatVectorOperationsTests, FloatToFixedAndBack)
+{
+    Random& random = Random::getSystemRandom();
+
+    for (int i = 1000; --i >= 0;)
+    {
+        const int range = random.nextBool() ? 500 : 10;
+        const int num = random.nextInt (range) + 1;
+
+        HeapBlock<float> buffer1 (num + 16), buffer2 (num + 16);
+        HeapBlock<int> buffer3 (num + 16, true);
+
+#if YUP_ARM
+        float* const data1 = buffer1;
+        float* const data2 = buffer2;
+        int* const int1 = buffer3;
+#else
+        // These tests deliberately operate on misaligned memory and will be flagged up by
+        // checks for undefined behavior!
+        float* const data1 = addBytesToPointer (buffer1.get(), random.nextInt (16));
+        float* const data2 = addBytesToPointer (buffer2.get(), random.nextInt (16));
+        int* const int1 = addBytesToPointer (buffer3.get(), random.nextInt (16));
+#endif
+
+        fillRandomly (random, data1, num);
+        fillRandomly (random, data2, num);
+
+        fillRandomly (random, int1, num);
+        const auto multiplier = (float) (1.0 / (1 << 16));
+
+        convertFixedToFloat (data1, int1, multiplier, num);
+        FloatVectorOperations::convertFixedToFloat (data2, int1, multiplier, num);
+        EXPECT_TRUE (buffersMatch (data1, data2, num));
+
+        convertFloatToFixed (int1, data1, 1.0f / multiplier, num);
+        HeapBlock<int> int2 (num + 16);
+#if YUP_ARM
+        int* const intData = int2;
+#else
+        int* const intData = addBytesToPointer (int2.get(), random.nextInt (16));
+#endif
+        FloatVectorOperations::convertFloatToFixed (intData, data1, 1.0f / multiplier, num);
+
+        for (int i = 0; i < num; ++i)
+            EXPECT_EQ (int1[i], intData[i]);
+    }
+}
+
+TEST_F (FloatVectorOperationsTests, FloatToDoubleAndBack)
+{
+    Random& random = Random::getSystemRandom();
+
+    for (int i = 1000; --i >= 0;)
+    {
+        const int range = random.nextBool() ? 500 : 10;
+        const int num = random.nextInt (range) + 1;
+
+        HeapBlock<float> floatBuffer (num + 16);
+        HeapBlock<double> doubleBuffer (num + 16);
+
+#if YUP_ARM
+        float* const floatData = floatBuffer;
+        double* const doubleData = doubleBuffer;
+#else
+        float* const floatData = addBytesToPointer (floatBuffer.get(), random.nextInt (16));
+        double* const doubleData = addBytesToPointer (doubleBuffer.get(), random.nextInt (16));
+#endif
+
+        fillRandomly (random, floatData, num);
+        FloatVectorOperations::convertFloatToDouble (doubleData, floatData, num);
+        for (int i = 0; i < num; ++i)
+            EXPECT_NEAR ((float) doubleData[i], (float) floatData[i], std::numeric_limits<float>::epsilon());
+
+        fillRandomly (random, doubleData, num);
+        FloatVectorOperations::convertDoubleToFloat (floatData, doubleData, num);
+        for (int i = 0; i < num; ++i)
+            EXPECT_NEAR ((float) floatData[i], (float) doubleData[i], std::numeric_limits<float>::epsilon());
+    }
+}
diff --git a/tests/yup_core/yup_String.cpp b/tests/yup_core/yup_String.cpp
index 0aedea73b..bb9dcb64a 100644
--- a/tests/yup_core/yup_String.cpp
+++ b/tests/yup_core/yup_String.cpp
@@ -571,6 +571,18 @@ TEST_F (StringTests, SignificantFigures)
     EXPECT_EQ (String::toDecimalStringWithSignificantFigures (2.8647, 6), String ("2.86470"));
 
     EXPECT_EQ (String::toDecimalStringWithSignificantFigures (-0.0000000000019, 1), String ("-0.000000000002"));
+
+    EXPECT_EQ (String::toDecimalStringWithSignificantFigures (0.001, 7), String ("0.001000000"));
+    EXPECT_EQ (String::toDecimalStringWithSignificantFigures (0.01, 7), String ("0.01000000"));
+    EXPECT_EQ (String::toDecimalStringWithSignificantFigures (0.1, 7), String ("0.1000000"));
+    EXPECT_EQ (String::toDecimalStringWithSignificantFigures (1, 7), String ("1.000000"));
+    EXPECT_EQ (String::toDecimalStringWithSignificantFigures (10, 7), String ("10.00000"));
+    EXPECT_EQ (String::toDecimalStringWithSignificantFigures (100, 7), String ("100.0000"));
+    EXPECT_EQ (String::toDecimalStringWithSignificantFigures (1000, 7), String ("1000.000"));
+    EXPECT_EQ (String::toDecimalStringWithSignificantFigures (10000, 7), String ("10000.00"));
+    EXPECT_EQ (String::toDecimalStringWithSignificantFigures (100000, 7), String ("100000.0"));
+    EXPECT_EQ (String::toDecimalStringWithSignificantFigures (1000000, 7), String ("1000000"));
+    EXPECT_EQ (String::toDecimalStringWithSignificantFigures (10000000, 7), String ("10000000"));
 }
 
 TEST_F (StringTests, FloatTrimming)
diff --git a/tests/yup_dsp/yup_DirectFIR.cpp b/tests/yup_dsp/yup_DirectFIR.cpp
new file mode 100644
index 000000000..0f1747c20
--- /dev/null
+++ b/tests/yup_dsp/yup_DirectFIR.cpp
@@ -0,0 +1,588 @@
+/*
+  ==============================================================================
+
+   This file is part of the YUP library.
+   Copyright (c) 2025 - kunitoki@gmail.com
+
+   YUP is an open source library subject to open-source licensing.
+
+   The code included in this file is provided under the terms of the ISC license
+   http://www.isc.org/downloads/software-support-policy/isc-license. Permission
+   to use, copy, modify, and/or distribute this software for any purpose with or
+   without fee is hereby granted provided that the above copyright notice and
+   this permission notice appear in all copies.
+
+   YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+#include <yup_dsp/yup_dsp.h>
+
+#include <gtest/gtest.h>
+
+#include <random>
+#include <cmath>
+
+namespace yup::test
+{
+
+//==============================================================================
+class DirectFIRTest : public ::testing::Test
+{
+protected:
+    void SetUp() override
+    {
+        generator.seed (42); // Fixed seed for reproducible tests
+    }
+
+    float randomFloat (float min = -1.0f, float max = 1.0f)
+    {
+        std::uniform_real_distribution<float> dist (min, max);
+        return dist (generator);
+    }
+
+    void fillWithRandomData (std::vector<float>& buffer)
+    {
+        for (auto& sample : buffer)
+            sample = randomFloat();
+    }
+
+    void fillWithSine (std::vector<float>& buffer, float frequency, float sampleRate)
+    {
+        for (size_t i = 0; i < buffer.size(); ++i)
+            buffer[i] = std::sin (2.0f * MathConstants<float>::pi * frequency * static_cast<float> (i) / sampleRate);
+    }
+
+    void clearBuffer (std::vector<float>& buffer)
+    {
+        std::fill (buffer.begin(), buffer.end(), 0.0f);
+    }
+
+    float calculateRMS (const std::vector<float>& buffer)
+    {
+        if (buffer.empty())
+            return 0.0f;
+
+        float sum = 0.0f;
+        for (float sample : buffer)
+            sum += sample * sample;
+
+        return std::sqrt (sum / static_cast<float> (buffer.size()));
+    }
+
+    float findPeak (const std::vector<float>& buffer)
+    {
+        if (buffer.empty())
+            return 0.0f;
+
+        float peak = 0.0f;
+        for (float sample : buffer)
+            peak = std::max (peak, std::abs (sample));
+
+        return peak;
+    }
+
+    std::vector<float> createLowpassCoefficients (int numCoefficients, float cutoffFreq, float sampleRate)
+    {
+        std::vector<float> coefficients (numCoefficients);
+        float fc = cutoffFreq / sampleRate;
+        int center = numCoefficients / 2;
+
+        for (int i = 0; i < numCoefficients; ++i)
+        {
+            if (i == center)
+                coefficients[i] = 2.0f * fc;
+            else
+            {
+                float x = 2.0f * MathConstants<float>::pi * fc * (i - center);
+                coefficients[i] = std::sin (x) / x;
+            }
+
+            // Apply Hanning window
+            float w = 0.5f - 0.5f * std::cos (2.0f * MathConstants<float>::pi * i / (numCoefficients - 1));
+            coefficients[i] *= w;
+        }
+
+        return coefficients;
+    }
+
+    std::mt19937 generator;
+};
+
+//==============================================================================
+// Basic API Tests
+//==============================================================================
+
+TEST_F (DirectFIRTest, DefaultConstruction)
+{
+    DirectFIR<float, float> fir;
+
+    // Default state should be safe
+    EXPECT_EQ (fir.getNumCoefficients(), 0);
+    EXPECT_FALSE (fir.hasCoefficients());
+    EXPECT_EQ (fir.getScaling(), 1.0f);
+
+    // Should handle empty processing gracefully
+    std::vector<float> input (256, 0.0f);
+    std::vector<float> output (256, 0.0f);
+    EXPECT_NO_THROW (fir.processBlock (input.data(), output.data(), static_cast<int> (input.size())));
+
+    // Output should remain zero without coefficients
+    for (float sample : output)
+        EXPECT_EQ (sample, 0.0f);
+}
+
+TEST_F (DirectFIRTest, MoveSemantics)
+{
+    DirectFIR<float, float> fir1;
+    std::vector<float> coefficients = { 1.0f, 0.5f, 0.25f };
+    fir1.setCoefficients (coefficients, 2.0f);
+
+    // Move constructor
+    DirectFIR<float, float> fir2 = std::move (fir1);
+
+    // Verify moved filter works
+    EXPECT_EQ (fir2.getNumCoefficients(), 3);
+    EXPECT_TRUE (fir2.hasCoefficients());
+    EXPECT_EQ (fir2.getScaling(), 2.0f);
+
+    // Original should be in valid but unspecified state
+    EXPECT_EQ (fir1.getNumCoefficients(), 0);
+
+    // Test processing with moved filter
+    std::vector<float> input (10, 0.0f);
+    input[0] = 1.0f;
+    std::vector<float> output (10, 0.0f);
+
+    EXPECT_NO_THROW (fir2.processBlock (input.data(), output.data(), static_cast<int> (static_cast<int> (input.size()))));
+
+    // Should produce scaled output
+    float outputSum = 0.0f;
+    for (float sample : output)
+        outputSum += std::abs (sample);
+    EXPECT_GT (outputSum, 1.0f); // Should be > 1 due to scaling
+
+    // Move assignment
+    DirectFIR<float, float> fir3;
+    fir3 = std::move (fir2);
+
+    EXPECT_EQ (fir3.getNumCoefficients(), 3);
+    EXPECT_TRUE (fir3.hasCoefficients());
+    EXPECT_EQ (fir3.getScaling(), 2.0f);
+}
+
+//==============================================================================
+// Coefficient Setting Tests
+//==============================================================================
+
+TEST_F (DirectFIRTest, SetCoefficientsVector)
+{
+    DirectFIR<float, float> fir;
+    std::vector<float> coefficients = { 0.1f, 0.5f, 1.0f, 0.5f, 0.1f };
+
+    fir.setCoefficients (coefficients, 1.0f);
+
+    EXPECT_EQ (fir.getNumCoefficients(), 5);
+    EXPECT_TRUE (fir.hasCoefficients());
+    EXPECT_EQ (fir.getScaling(), 1.0f);
+
+    // Coefficients should be available
+    const auto& coeffs = fir.getCoefficients();
+    EXPECT_EQ (coeffs.size(), 8); // Padded to multiple of 4
+}
+
+TEST_F (DirectFIRTest, SetCoefficientsPointer)
+{
+    DirectFIR<float, float> fir;
+    float coefficients[] = { 0.2f, 0.4f, 0.6f, 0.8f };
+
+    fir.setCoefficients (coefficients, 4, 2.0f);
+
+    EXPECT_EQ (fir.getNumCoefficients(), 4);
+    EXPECT_TRUE (fir.hasCoefficients());
+    EXPECT_EQ (fir.getScaling(), 2.0f);
+}
+
+TEST_F (DirectFIRTest, SetCoefficientsNullptr)
+{
+    DirectFIR<float, float> fir;
+
+    // First set some valid coefficients
+    std::vector<float> coefficients = { 1.0f, 0.5f };
+    fir.setCoefficients (coefficients);
+    EXPECT_TRUE (fir.hasCoefficients());
+
+    // Setting nullptr should clear the filter
+    fir.setCoefficients (nullptr, 0, 1.0f);
+    EXPECT_FALSE (fir.hasCoefficients());
+    EXPECT_EQ (fir.getNumCoefficients(), 0);
+}
+
+TEST_F (DirectFIRTest, SetCoefficientsWithScaling)
+{
+    DirectFIR<float, float> fir;
+    std::vector<float> coefficients = { 1.0f, 1.0f, 1.0f };
+
+    fir.setCoefficients (coefficients, 0.5f);
+
+    // Test impulse response
+    std::vector<float> input (10, 0.0f);
+    input[0] = 2.0f; // Unit impulse scaled by 2
+    std::vector<float> output (10, 0.0f);
+
+    fir.processBlock (input.data(), output.data(), static_cast<int> (input.size()));
+
+    // Output should reflect the coefficient scaling
+    // Each coefficient was originally 1.0, scaled by 0.5, so output per coefficient = 2.0 * 0.5 = 1.0
+    float expectedSum = 3.0f; // 3 coefficients * 1.0 each
+    float actualSum = 0.0f;
+    for (size_t i = 0; i < 5; ++i) // Check first 5 samples
+        actualSum += output[i];
+
+    EXPECT_NEAR (actualSum, expectedSum, 0.001f);
+}
+
+//==============================================================================
+// Processing Tests
+//==============================================================================
+
+TEST_F (DirectFIRTest, ImpulseResponse)
+{
+    DirectFIR<float, float> fir;
+    std::vector<float> coefficients = { 1.0f, 0.5f, 0.25f };
+    fir.setCoefficients (coefficients);
+
+    // Test with unit impulse
+    std::vector<float> input (10, 0.0f);
+    input[0] = 1.0f;
+    std::vector<float> output (10, 0.0f);
+
+    fir.processBlock (input.data(), output.data(), static_cast<int> (input.size()));
+
+    // Should get the impulse response (coefficients in original order)
+    EXPECT_NEAR (output[0], 1.0f, 0.001f);  // First coefficient h0
+    EXPECT_NEAR (output[1], 0.5f, 0.001f);  // Second coefficient h1
+    EXPECT_NEAR (output[2], 0.25f, 0.001f); // Third coefficient h2
+
+    // Rest should be zero
+    for (size_t i = 3; i < output.size(); ++i)
+        EXPECT_NEAR (output[i], 0.0f, 0.001f);
+}
+
+TEST_F (DirectFIRTest, AccumulativeOutput)
+{
+    DirectFIR<float, float> fir;
+    std::vector<float> coefficients = { 0.5f, 0.5f };
+    fir.setCoefficients (coefficients);
+
+    std::vector<float> input (5, 1.0f);
+    std::vector<float> output (5);
+
+    // Pre-populate output buffer
+    std::fill (output.begin(), output.end(), 1.0f);
+    std::vector<float> originalOutput = output;
+
+    fir.processBlock (input.data(), output.data(), static_cast<int> (input.size()));
+
+    // Output should contain original data plus filter result
+    for (size_t i = 0; i < output.size(); ++i)
+        EXPECT_GT (output[i], originalOutput[i]);
+}
+
+TEST_F (DirectFIRTest, Linearity)
+{
+    DirectFIR<float, float> fir;
+    std::vector<float> coefficients;
+    FilterDesigner<float>::designFIRLowpass (coefficients, 32, 1000.0f, 44100.0f);
+    fir.setCoefficients (coefficients);
+
+    std::vector<float> input (512);
+    fillWithRandomData (input);
+
+    // Scale input by 2 and test linearity
+    std::vector<float> input2 = input;
+    FloatVectorOperations::multiply (input2.data(), 2.0f, input2.size());
+
+    std::vector<float> output1 (512, 0.0f);
+    std::vector<float> output2 (512, 0.0f);
+
+    fir.reset();
+    fir.processBlock (input.data(), output1.data(), static_cast<int> (input.size()));
+
+    fir.reset();
+    fir.processBlock (input2.data(), output2.data(), static_cast<int> (input2.size()));
+
+    // output2 should be approximately 2x output1
+    for (size_t i = 0; i < output1.size(); ++i)
+    {
+        if (std::abs (output1[i]) > 0.001f) // Avoid division by near-zero
+            EXPECT_NEAR (output2[i] / output1[i], 2.0f, 0.01f);
+    }
+}
+
+TEST_F (DirectFIRTest, Reset)
+{
+    DirectFIR<float, float> fir;
+    std::vector<float> coefficients = { 1.0f, 0.8f, 0.6f, 0.4f, 0.2f };
+    fir.setCoefficients (coefficients);
+
+    std::vector<float> input (20);
+    fillWithRandomData (input);
+    std::vector<float> output1 (20, 0.0f);
+
+    // Process some data to build up internal state
+    fir.processBlock (input.data(), output1.data(), static_cast<int> (input.size()));
+
+    // Reset and process same input
+    fir.reset();
+    std::vector<float> output2 (20, 0.0f);
+    fir.processBlock (input.data(), output2.data(), static_cast<int> (input.size()));
+
+    // Outputs should be identical after reset
+    for (size_t i = 0; i < output1.size(); ++i)
+        EXPECT_NEAR (output1[i], output2[i], 0.0001f);
+}
+
+//==============================================================================
+// Signal Processing Tests
+//==============================================================================
+
+TEST_F (DirectFIRTest, LowpassFiltering)
+{
+    DirectFIR<float, float> fir;
+
+    // Create lowpass filter coefficients
+    std::vector<float> coefficients;
+    FilterDesigner<float>::designFIRLowpass (coefficients, 64, 1000.0f, 44100.0);
+    fir.setCoefficients (coefficients);
+
+    const float sampleRate = 44100.0f;
+    const size_t bufferSize = 2048;
+
+    // Test with low frequency (should pass)
+    std::vector<float> lowFreqInput (bufferSize);
+    fillWithSine (lowFreqInput, 500.0f, sampleRate);
+    std::vector<float> lowFreqOutput (bufferSize, 0.0f);
+
+    fir.processBlock (lowFreqInput.data(), lowFreqOutput.data(), bufferSize);
+
+    // Test with high frequency (should be attenuated)
+    fir.reset();
+    std::vector<float> highFreqInput (bufferSize);
+    fillWithSine (highFreqInput, 5000.0f, sampleRate);
+    std::vector<float> highFreqOutput (bufferSize, 0.0f);
+
+    fir.processBlock (highFreqInput.data(), highFreqOutput.data(), bufferSize);
+
+    // Compare RMS levels (skip first samples due to transient)
+    const size_t skipSamples = 100;
+    float lowFreqRMS = 0.0f, highFreqRMS = 0.0f;
+
+    for (size_t i = skipSamples; i < bufferSize; ++i)
+    {
+        lowFreqRMS += lowFreqOutput[i] * lowFreqOutput[i];
+        highFreqRMS += highFreqOutput[i] * highFreqOutput[i];
+    }
+
+    lowFreqRMS = std::sqrt (lowFreqRMS / (bufferSize - skipSamples));
+    highFreqRMS = std::sqrt (highFreqRMS / (bufferSize - skipSamples));
+
+    // Low frequency should have higher RMS than high frequency
+    EXPECT_GT (lowFreqRMS, highFreqRMS * 2.0f);
+}
+
+TEST_F (DirectFIRTest, BlockSizeIndependence)
+{
+    DirectFIR<float, float> fir;
+    std::vector<float> coefficients;
+    FilterDesigner<float>::designFIRLowpass (coefficients, 48, 2000.0f, 44100.0);
+    fir.setCoefficients (coefficients);
+
+    const size_t totalSamples = 1024;
+    std::vector<float> input (totalSamples);
+    fillWithRandomData (input);
+
+    // Process in one big block
+    fir.reset();
+    std::vector<float> output1 (totalSamples, 0.0f);
+    fir.processBlock (input.data(), output1.data(), totalSamples);
+
+    // Process in smaller blocks
+    fir.reset();
+    std::vector<float> output2 (totalSamples, 0.0f);
+    const std::vector<size_t> blockSizes = { 32, 64, 128, 256, 32, 128, 64 };
+    size_t processed = 0;
+
+    for (size_t blockSize : blockSizes)
+    {
+        if (processed >= totalSamples)
+            break;
+
+        if (processed + blockSize > totalSamples)
+            blockSize = totalSamples - processed;
+
+        if (blockSize == 0)
+            break;
+
+        fir.processBlock (input.data() + processed, output2.data() + processed, static_cast<int> (blockSize));
+        processed += blockSize;
+    }
+
+    // Process any remaining samples
+    while (processed < totalSamples)
+    {
+        size_t remaining = totalSamples - processed;
+        size_t blockSize = std::min (remaining, size_t (128)); // Process in chunks of 128
+        fir.processBlock (input.data() + processed, output2.data() + processed, static_cast<int> (blockSize));
+        processed += blockSize;
+    }
+
+    // Outputs should be identical regardless of block size
+    for (size_t i = 0; i < totalSamples; ++i)
+        EXPECT_NEAR (output1[i], output2[i], 0.0001f);
+}
+
+//==============================================================================
+// Edge Cases and Error Handling
+//==============================================================================
+
+TEST_F (DirectFIRTest, ZeroSamples)
+{
+    DirectFIR<float, float> fir;
+    std::vector<float> coefficients = { 1.0f, 0.5f };
+    fir.setCoefficients (coefficients);
+
+    std::vector<float> input (10, 1.0f);
+    std::vector<float> output (10, 0.0f);
+
+    // Processing zero samples should be safe
+    EXPECT_NO_THROW (fir.processBlock (input.data(), output.data(), 0));
+
+    // Output should remain unchanged
+    for (float sample : output)
+        EXPECT_EQ (sample, 0.0f);
+}
+
+TEST_F (DirectFIRTest, NullPointers)
+{
+    DirectFIR<float, float> fir;
+    std::vector<float> coefficients = { 1.0f };
+    fir.setCoefficients (coefficients);
+
+    std::vector<float> buffer (10, 0.0f);
+
+    // Null input pointer should be handled gracefully
+    EXPECT_NO_THROW (fir.processBlock (nullptr, buffer.data(), 10));
+
+    // Null output pointer should be handled gracefully
+    EXPECT_NO_THROW (fir.processBlock (buffer.data(), nullptr, 10));
+
+    // Both null should be handled gracefully
+    EXPECT_NO_THROW (fir.processBlock (nullptr, nullptr, 10));
+}
+
+TEST_F (DirectFIRTest, LargeTapCounts)
+{
+    DirectFIR<float, float> fir;
+
+    // Test with relatively large number of coefficients
+    std::vector<float> coefficients (512);
+    for (size_t i = 0; i < coefficients.size(); ++i)
+        coefficients[i] = std::exp (-static_cast<float> (i) / 100.0f) * std::sin (2.0f * MathConstants<float>::pi * i / 16.0f);
+
+    EXPECT_NO_THROW (fir.setCoefficients (coefficients));
+    EXPECT_EQ (fir.getNumCoefficients(), 512);
+
+    // Should process without issues
+    std::vector<float> input (1024);
+    std::vector<float> output (1024, 0.0f);
+    fillWithRandomData (input);
+
+    EXPECT_NO_THROW (fir.processBlock (input.data(), output.data(), static_cast<int> (input.size())));
+
+    // Should produce reasonable output
+    float rms = calculateRMS (output);
+    EXPECT_GT (rms, 0.001f);
+    EXPECT_LT (rms, 10.0f);
+}
+
+TEST_F (DirectFIRTest, SingleTap)
+{
+    DirectFIR<float, float> fir;
+    std::vector<float> coefficients = { 0.75f };
+    fir.setCoefficients (coefficients);
+
+    EXPECT_EQ (fir.getNumCoefficients(), 1);
+
+    // Single coefficient should act as a simple gain
+    std::vector<float> input = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f };
+    std::vector<float> output (5, 0.0f);
+
+    fir.processBlock (input.data(), output.data(), static_cast<int> (input.size()));
+
+    for (size_t i = 0; i < input.size(); ++i)
+        EXPECT_NEAR (output[i], input[i] * 0.75f, 0.001f);
+}
+
+//==============================================================================
+// Performance and Memory Tests
+//==============================================================================
+
+TEST_F (DirectFIRTest, MemoryAlignment)
+{
+    DirectFIR<float, float> fir;
+
+    // Coefficient count that's not a multiple of 4
+    std::vector<float> coefficients (37);
+    std::fill (coefficients.begin(), coefficients.end(), 0.1f);
+    fir.setCoefficients (coefficients);
+
+    // Coefficients should be padded for SIMD alignment
+    const auto& coeffs = fir.getCoefficients();
+    EXPECT_EQ (coeffs.size() % 4, 0); // Should be multiple of 4
+    EXPECT_GE (coeffs.size(), 37);    // Should be at least original size
+
+    // Padded elements should be zero
+    for (size_t i = 37; i < coeffs.size(); ++i)
+        EXPECT_EQ (coeffs[i], 0.0f);
+}
+
+TEST_F (DirectFIRTest, StressTest)
+{
+    DirectFIR<float, float> fir;
+
+    // Create complex impulse response
+    std::vector<float> coefficients (256);
+    for (size_t i = 0; i < coefficients.size(); ++i)
+    {
+        float t = static_cast<float> (i) / 256.0f;
+        coefficients[i] = std::exp (-t * 5.0f) * std::cos (20.0f * MathConstants<float>::pi * t);
+    }
+    fir.setCoefficients (coefficients);
+
+    // Process multiple blocks of varying sizes
+    const std::vector<size_t> blockSizes = { 1, 7, 32, 63, 128, 255, 512, 1023 };
+
+    for (size_t blockSize : blockSizes)
+    {
+        SCOPED_TRACE (testing::Message() << "Block size: " << blockSize);
+
+        std::vector<float> input (blockSize);
+        std::vector<float> output (blockSize, 0.0f);
+        fillWithRandomData (input);
+
+        EXPECT_NO_THROW (fir.processBlock (input.data(), output.data(), static_cast<int> (blockSize)));
+
+        // Verify output quality
+        for (float sample : output)
+        {
+            EXPECT_TRUE (std::isfinite (sample));
+            EXPECT_LT (std::abs (sample), 100.0f); // Reasonable bounds
+        }
+    }
+}
+
+} // namespace yup::test
diff --git a/tests/yup_dsp/yup_FilterDesigner.cpp b/tests/yup_dsp/yup_FilterDesigner.cpp
index ca43acc07..5ddc72b6d 100644
--- a/tests/yup_dsp/yup_FilterDesigner.cpp
+++ b/tests/yup_dsp/yup_FilterDesigner.cpp
@@ -23,6 +23,8 @@
 
 #include <gtest/gtest.h>
 
+#include <fstream>
+
 using namespace yup;
 
 //==============================================================================
@@ -358,3 +360,319 @@ TEST_F (FilterDesignerTests, FloatPrecisionConsistency)
     EXPECT_NEAR (doubleCoeffs.a1, static_cast<double> (floatCoeffs.a1), toleranceF);
     EXPECT_NEAR (doubleCoeffs.a2, static_cast<double> (floatCoeffs.a2), toleranceF);
 }
+
+//==============================================================================
+// FIR Filter Design Tests
+//==============================================================================
+
+TEST_F (FilterDesignerTests, FirLowpassBasicProperties)
+{
+    const int numCoeffs = 65; // Odd number for symmetric filter
+    std::vector<float> coeffs;
+    FilterDesigner<float>::designFIRLowpass (coeffs, numCoeffs, 1000.0f, sampleRate);
+
+    // Should return the correct number of coefficients
+    EXPECT_EQ (coeffs.size(), numCoeffs);
+
+    // All coefficients should be finite
+    for (const auto& coeff : coeffs)
+        EXPECT_TRUE (std::isfinite (coeff));
+
+    // FIR filter should be symmetric for linear phase
+    const int center = (numCoeffs - 1) / 2;
+    for (int i = 0; i < center; ++i)
+        EXPECT_NEAR (coeffs[i], coeffs[numCoeffs - 1 - i], toleranceF);
+
+    // Center coefficient should be largest for lowpass
+    for (int i = 0; i < numCoeffs; ++i)
+    {
+        if (i != center)
+            EXPECT_GE (coeffs[center], coeffs[i]);
+    }
+}
+
+TEST_F (FilterDesignerTests, FirHighpassBasicProperties)
+{
+    const int numCoeffs = 65;
+    std::vector<float> coeffs;
+    FilterDesigner<float>::designFIRHighpass (coeffs, numCoeffs, 1000.0f, sampleRate);
+
+    // Should return the correct number of coefficients
+    EXPECT_EQ (coeffs.size(), numCoeffs);
+
+    // All coefficients should be finite
+    for (const auto& coeff : coeffs)
+        EXPECT_TRUE (std::isfinite (coeff));
+
+    // FIR filter should be symmetric for linear phase
+    const int center = (numCoeffs - 1) / 2;
+    for (int i = 0; i < center; ++i)
+        EXPECT_NEAR (coeffs[i], coeffs[numCoeffs - 1 - i], toleranceF);
+
+    // Sum of coefficients should be approximately zero for highpass (DC gain = 0)
+    // Note: windowing can cause small deviations from ideal DC gain
+    float sum = 0.0f;
+    for (const auto& coeff : coeffs)
+        sum += coeff;
+
+    EXPECT_NEAR (sum, 0.0f, 0.05f); // Relaxed tolerance for windowed FIR
+}
+
+TEST_F (FilterDesignerTests, FirBandpassBasicProperties)
+{
+    const int numCoeffs = 65;
+    std::vector<float> coeffs;
+    FilterDesigner<float>::designFIRBandpass (coeffs, numCoeffs, 800.0f, 1200.0f, sampleRate);
+
+    // Should return the correct number of coefficients
+    EXPECT_EQ (coeffs.size(), numCoeffs);
+
+    // All coefficients should be finite
+    for (const auto& coeff : coeffs)
+        EXPECT_TRUE (std::isfinite (coeff));
+
+    // FIR filter should be symmetric for linear phase
+    const int center = (numCoeffs - 1) / 2;
+    for (int i = 0; i < center; ++i)
+        EXPECT_NEAR (coeffs[i], coeffs[numCoeffs - 1 - i], toleranceF);
+
+    // Sum of coefficients should be approximately zero for bandpass (DC gain = 0)
+    // Note: windowing can cause small deviations from ideal DC gain
+    float sum = 0.0f;
+    for (const auto& coeff : coeffs)
+        sum += coeff;
+
+    EXPECT_NEAR (sum, 0.0f, 0.15f); // Relaxed tolerance for windowed FIR
+}
+
+TEST_F (FilterDesignerTests, FirBandstopBasicProperties)
+{
+    const int numCoeffs = 65;
+    std::vector<float> coeffs;
+    FilterDesigner<float>::designFIRBandstop (coeffs, numCoeffs, 800.0f, 1200.0f, sampleRate);
+
+    // Should return the correct number of coefficients
+    EXPECT_EQ (coeffs.size(), numCoeffs);
+
+    // All coefficients should be finite
+    for (const auto& coeff : coeffs)
+        EXPECT_TRUE (std::isfinite (coeff));
+
+    // FIR filter should be symmetric for linear phase
+    const int center = (numCoeffs - 1) / 2;
+    for (int i = 0; i < center; ++i)
+        EXPECT_NEAR (coeffs[i], coeffs[numCoeffs - 1 - i], toleranceF);
+
+    // Sum of coefficients should be approximately 1.0 for bandstop (DC gain = 1)
+    // Note: windowing can cause small deviations from ideal DC gain
+    float sum = 0.0f;
+    for (const auto& coeff : coeffs)
+        sum += coeff;
+
+    EXPECT_NEAR (sum, 1.0f, 0.15f); // Relaxed tolerance for windowed FIR
+}
+
+TEST_F (FilterDesignerTests, FirDifferentWindowTypes)
+{
+    const int numCoeffs = 33;
+
+    // Test different window types
+    std::vector<float> hannCoeffs, hammingCoeffs, blackmanCoeffs;
+
+    FilterDesigner<float>::designFIRLowpass (hannCoeffs, numCoeffs, 1000.0f, sampleRate, WindowType::hann);
+    FilterDesigner<float>::designFIRLowpass (hammingCoeffs, numCoeffs, 1000.0f, sampleRate, WindowType::hamming);
+    FilterDesigner<float>::designFIRLowpass (blackmanCoeffs, numCoeffs, 1000.0f, sampleRate, WindowType::blackman);
+
+    // All should have same size
+    EXPECT_EQ (hannCoeffs.size(), numCoeffs);
+    EXPECT_EQ (hammingCoeffs.size(), numCoeffs);
+    EXPECT_EQ (blackmanCoeffs.size(), numCoeffs);
+
+    // All coefficients should be finite
+    for (int i = 0; i < numCoeffs; ++i)
+    {
+        EXPECT_TRUE (std::isfinite (hannCoeffs[i]));
+        EXPECT_TRUE (std::isfinite (hammingCoeffs[i]));
+        EXPECT_TRUE (std::isfinite (blackmanCoeffs[i]));
+    }
+
+    // Different windows should produce different coefficients
+    bool coeffsDifferent = false;
+    for (int i = 0; i < numCoeffs; ++i)
+    {
+        if (std::abs (hannCoeffs[i] - blackmanCoeffs[i]) > toleranceF)
+        {
+            coeffsDifferent = true;
+            break;
+        }
+    }
+    EXPECT_TRUE (coeffsDifferent);
+}
+
+TEST_F (FilterDesignerTests, FirFloatDoubleConsistency)
+{
+    const int numCoeffs = 33;
+
+    std::vector<double> doubleCoeffs;
+    FilterDesigner<double>::designFIRLowpass (doubleCoeffs, numCoeffs, 1000.0, sampleRate);
+
+    std::vector<float> floatCoeffs;
+    FilterDesigner<float>::designFIRLowpass (floatCoeffs, numCoeffs, 1000.0f, sampleRate);
+
+    EXPECT_EQ (doubleCoeffs.size(), floatCoeffs.size());
+
+    // Coefficients should be very similar between float and double precision
+    for (int i = 0; i < numCoeffs; ++i)
+        EXPECT_NEAR (doubleCoeffs[i], static_cast<double> (floatCoeffs[i]), toleranceF);
+}
+
+TEST_F (FilterDesignerTests, DISABLED_ExportFIRCoefficientsForAnalysis)
+{
+    const int numCoeffs = 97;
+    const float sampleRateF = 44100.0f;
+
+    // Design different FIR filters
+    std::vector<float> lowpass, highpass, bandpass, bandstop;
+    FilterDesigner<float>::designFIRLowpass (lowpass, numCoeffs, 10000.0f, sampleRateF);
+    FilterDesigner<float>::designFIRHighpass (highpass, numCoeffs, 10000.0f, sampleRateF);
+    FilterDesigner<float>::designFIRBandpass (bandpass, numCoeffs, 8000.0f, 12000.0f, sampleRateF);
+    FilterDesigner<float>::designFIRBandstop (bandstop, numCoeffs, 8000.0f, 12000.0f, sampleRateF);
+
+    // Different windows for lowpass
+    std::vector<float> lowpassHann, lowpassHamming, lowpassBlackman;
+    FilterDesigner<float>::designFIRLowpass (lowpassHann, numCoeffs, 10000.0f, sampleRateF, WindowType::hann);
+    FilterDesigner<float>::designFIRLowpass (lowpassHamming, numCoeffs, 10000.0f, sampleRateF, WindowType::hamming);
+    FilterDesigner<float>::designFIRLowpass (lowpassBlackman, numCoeffs, 10000.0f, sampleRateF, WindowType::blackman);
+
+    // Helper lambda to write coefficients to file
+    auto writeCoeffs = [] (const std::vector<float>& coeffs, const std::string& filename)
+    {
+        std::ofstream file (filename);
+        if (file.is_open())
+        {
+            for (size_t i = 0; i < coeffs.size(); ++i)
+            {
+                file << coeffs[i];
+                if (i < coeffs.size() - 1)
+                    file << "\n";
+            }
+            file.close();
+        }
+    };
+
+    // Write all coefficient sets to files
+    writeCoeffs (lowpass, "fir_lowpass_10000hz.txt");
+    writeCoeffs (highpass, "fir_highpass_10000hz.txt");
+    writeCoeffs (bandpass, "fir_bandpass_8000_12000hz.txt");
+    writeCoeffs (bandstop, "fir_bandstop_8000_12000hz.txt");
+    writeCoeffs (lowpassHann, "fir_lowpass_hann_10000hz.txt");
+    writeCoeffs (lowpassHamming, "fir_lowpass_hamming_10000hz.txt");
+    writeCoeffs (lowpassBlackman, "fir_lowpass_blackman_10000hz.txt");
+
+    // Create a Python script to plot the frequency responses
+    std::ofstream pyScript ("plot_fir_responses.py");
+    if (pyScript.is_open())
+    {
+        pyScript << R"(#!/usr/bin/env python3
+import numpy as np
+import matplotlib.pyplot as plt
+from scipy import signal
+
+def load_coeffs(filename):
+    with open(filename, 'r') as f:
+        return [float(line.strip()) for line in f.readlines()]
+
+def plot_frequency_response(coeffs, title, sample_rate=44100):
+    w, h = signal.freqz(coeffs, worN=8000, fs=sample_rate)
+    
+    plt.figure(figsize=(12, 8))
+    
+    # Magnitude response
+    plt.subplot(2, 1, 1)
+    plt.plot(w, 20 * np.log10(np.abs(h)))
+    plt.title(f'{title} - Magnitude Response')
+    plt.xlabel('Frequency (Hz)')
+    plt.ylabel('Magnitude (dB)')
+    plt.grid(True)
+    plt.xlim(0, sample_rate/2)
+    plt.ylim(-80, 5)
+    
+    # Phase response
+    plt.subplot(2, 1, 2)
+    plt.plot(w, np.unwrap(np.angle(h)) * 180 / np.pi)
+    plt.title(f'{title} - Phase Response')
+    plt.xlabel('Frequency (Hz)')
+    plt.ylabel('Phase (degrees)')
+    plt.grid(True)
+    plt.xlim(0, sample_rate/2)
+    
+    plt.tight_layout()
+    plt.savefig(f'{title.lower().replace(" ", "_").replace("-", "_")}_response.png', dpi=150, bbox_inches='tight')
+    plt.show()
+
+# Load and plot all FIR filter responses
+filters = [
+    ('fir_lowpass_10000hz.txt', 'FIR Lowpass 10000Hz'),
+    ('fir_highpass_10000hz.txt', 'FIR Highpass 10000Hz'), 
+    ('fir_bandpass_8000_12000hz.txt', 'FIR Bandpass 8000-12000Hz'),
+    ('fir_bandstop_8000_12000hz.txt', 'FIR Bandstop 8000-12000Hz'),
+    ('fir_lowpass_hann_10000hz.txt', 'FIR Lowpass Hann Window'),
+    ('fir_lowpass_hamming_10000hz.txt', 'FIR Lowpass Hamming Window'),
+    ('fir_lowpass_blackman_10000hz.txt', 'FIR Lowpass Blackman Window')
+]
+
+for filename, title in filters:
+    try:
+        coeffs = load_coeffs(filename)
+        plot_frequency_response(coeffs, title)
+    except FileNotFoundError:
+        print(f"File {filename} not found!")
+
+# Compare window types on same plot
+plt.figure(figsize=(12, 6))
+window_files = [
+    ('fir_lowpass_hann_1000hz.txt', 'Hann', 'blue'),
+    ('fir_lowpass_hamming_1000hz.txt', 'Hamming', 'red'),
+    ('fir_lowpass_blackman_1000hz.txt', 'Blackman', 'green')
+]
+
+for filename, label, color in window_files:
+    try:
+        coeffs = load_coeffs(filename)
+        w, h = signal.freqz(coeffs, worN=8000, fs=44100)
+        plt.plot(w, 20 * np.log10(np.abs(h)), label=label, color=color)
+    except FileNotFoundError:
+        print(f"File {filename} not found!")
+
+plt.title('FIR Lowpass 1000Hz - Window Comparison')
+plt.xlabel('Frequency (Hz)')
+plt.ylabel('Magnitude (dB)')
+plt.grid(True)
+plt.legend()
+plt.xlim(0, 22050)
+plt.ylim(-80, 5)
+plt.savefig('fir_window_comparison.png', dpi=150, bbox_inches='tight')
+plt.show()
+
+print("All plots generated successfully!")
+)";
+        pyScript.close();
+    }
+
+    // Just verify the files were created - the actual validation will be done visually with Python
+    EXPECT_EQ (lowpass.size(), numCoeffs);
+    EXPECT_EQ (highpass.size(), numCoeffs);
+    EXPECT_EQ (bandpass.size(), numCoeffs);
+    EXPECT_EQ (bandstop.size(), numCoeffs);
+
+    std::cout << "\nFIR coefficient files and Python plotting script created:\n";
+    std::cout << "- fir_lowpass_10000hz.txt\n";
+    std::cout << "- fir_highpass_10000hz.txt\n";
+    std::cout << "- fir_bandpass_8000_12000hz.txt\n";
+    std::cout << "- fir_bandstop_8000_12000hz.txt\n";
+    std::cout << "- fir_lowpass_hann_10000hz.txt\n";
+    std::cout << "- fir_lowpass_hamming_10000hz.txt\n";
+    std::cout << "- fir_lowpass_blackman_10000hz.txt\n";
+    std::cout << "- plot_fir_responses.py\n\n";
+    std::cout << "Run: python3 plot_fir_responses.py (requires numpy, matplotlib, scipy)\n";
+}
diff --git a/tests/yup_dsp/yup_PartitionedConvolver.cpp b/tests/yup_dsp/yup_PartitionedConvolver.cpp
new file mode 100644
index 000000000..5f83c46b1
--- /dev/null
+++ b/tests/yup_dsp/yup_PartitionedConvolver.cpp
@@ -0,0 +1,1282 @@
+/*
+  ==============================================================================
+
+   This file is part of the YUP library.
+   Copyright (c) 2025 - kunitoki@gmail.com
+
+   YUP is an open source library subject to open-source licensing.
+
+   The code included in this file is provided under the terms of the ISC license
+   http://www.isc.org/downloads/software-support-policy/isc-license. Permission
+   to use, copy, modify, and/or distribute this software for any purpose with or
+   without fee is hereby granted provided that the above copyright notice and
+   this permission notice appear in all copies.
+
+   YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+#include <yup_dsp/yup_dsp.h>
+
+#include <gtest/gtest.h>
+
+#include <random>
+#include <cmath>
+
+namespace yup::test
+{
+
+//==============================================================================
+class PartitionedConvolverTest : public ::testing::Test
+{
+protected:
+    void SetUp() override
+    {
+        generator.seed (42); // Fixed seed for reproducible tests
+    }
+
+    float randomFloat (float min = -1.0f, float max = 1.0f)
+    {
+        std::uniform_real_distribution<float> dist (min, max);
+        return dist (generator);
+    }
+
+    void fillWithRandomData (std::vector<float>& buffer)
+    {
+        for (auto& sample : buffer)
+            sample = randomFloat();
+    }
+
+    void fillWithSine (std::vector<float>& buffer, float frequency, float sampleRate)
+    {
+        for (size_t i = 0; i < buffer.size(); ++i)
+            buffer[i] = std::sin (2.0f * MathConstants<float>::pi * frequency * static_cast<float> (i) / sampleRate);
+    }
+
+    void clearBuffer (std::vector<float>& buffer)
+    {
+        std::fill (buffer.begin(), buffer.end(), 0.0f);
+    }
+
+    float calculateRMS (const std::vector<float>& buffer)
+    {
+        if (buffer.empty())
+            return 0.0f;
+
+        float sum = 0.0f;
+        for (float sample : buffer)
+            sum += sample * sample;
+
+        return std::sqrt (sum / static_cast<float> (buffer.size()));
+    }
+
+    float findPeak (const std::vector<float>& buffer)
+    {
+        if (buffer.empty())
+            return 0.0f;
+
+        float peak = 0.0f;
+        for (float sample : buffer)
+            peak = std::max (peak, std::abs (sample));
+
+        return peak;
+    }
+
+    std::mt19937 generator;
+};
+
+//==============================================================================
+// Basic API Tests
+//==============================================================================
+
+TEST_F (PartitionedConvolverTest, DefaultConstruction)
+{
+    PartitionedConvolver convolver;
+
+    // Verify default state - should be safe to call these methods
+    EXPECT_NO_THROW (convolver.reset());
+
+    // Should be able to configure after construction
+    EXPECT_NO_THROW (convolver.setTypicalLayout (64, { 64, 256 }));
+    EXPECT_NO_THROW (convolver.prepare (512));
+
+    // Should handle empty processing gracefully
+    std::vector<float> input (256, 0.0f);
+    std::vector<float> output (256, 0.0f);
+    EXPECT_NO_THROW (convolver.process (input.data(), output.data(), input.size()));
+
+    // Output should remain zero without impulse response
+    for (float sample : output)
+        EXPECT_EQ (sample, 0.0f);
+}
+
+TEST_F (PartitionedConvolverTest, MoveSemantics)
+{
+    PartitionedConvolver convolver1;
+    convolver1.setTypicalLayout (64, { 64, 256 });
+    convolver1.prepare (512);
+
+    // Set up a known state
+    std::vector<float> ir (128, 0.0f);
+    ir[0] = 1.0f;
+    convolver1.setImpulseResponse (ir);
+
+    // Move constructor
+    PartitionedConvolver convolver2 = std::move (convolver1);
+
+    // Verify moved convolver works
+    std::vector<float> input (256, 0.0f);
+    input[0] = 1.0f;
+    std::vector<float> output (256, 0.0f);
+
+    EXPECT_NO_THROW (convolver2.process (input.data(), output.data(), input.size()));
+
+    // Should produce output from the moved convolver
+    float outputRMS = calculateRMS (output);
+    EXPECT_GT (outputRMS, 0.001f);
+
+    // Move assignment
+    PartitionedConvolver convolver3;
+    convolver3 = std::move (convolver2);
+
+    // Verify move-assigned convolver works
+    clearBuffer (output);
+    EXPECT_NO_THROW (convolver3.process (input.data(), output.data(), input.size()));
+
+    outputRMS = calculateRMS (output);
+    EXPECT_GT (outputRMS, 0.001f);
+}
+
+TEST_F (PartitionedConvolverTest, BasicConfiguration)
+{
+    PartitionedConvolver convolver;
+
+    // Test typical layout configuration
+    EXPECT_NO_THROW (convolver.setTypicalLayout (128, { 128, 512, 2048 }));
+
+    // Should be able to prepare after configuration
+    EXPECT_NO_THROW (convolver.prepare (512));
+
+    // Verify configuration works by setting an impulse response
+    std::vector<float> ir (256, 0.0f);
+    ir[0] = 1.0f;
+    EXPECT_NO_THROW (convolver.setImpulseResponse (ir));
+
+    // Verify processing works after configuration
+    std::vector<float> input (256, 0.0f);
+    input[0] = 1.0f;
+    std::vector<float> output (256, 0.0f);
+
+    EXPECT_NO_THROW (convolver.process (input.data(), output.data(), input.size()));
+
+    // Should produce output
+    float outputRMS = calculateRMS (output);
+    EXPECT_GT (outputRMS, 0.001f);
+
+    // Test reset clears state
+    convolver.reset();
+
+    // After reset, same input should produce same output (deterministic)
+    std::vector<float> output2 (256, 0.0f);
+    EXPECT_NO_THROW (convolver.process (input.data(), output2.data(), input.size()));
+
+    // Outputs should be very similar after reset
+    for (size_t i = 0; i < output.size(); ++i)
+        EXPECT_NEAR (output[i], output2[i], 0.001f);
+}
+
+TEST_F (PartitionedConvolverTest, ConfigureLayers)
+{
+    PartitionedConvolver convolver;
+
+    std::vector<PartitionedConvolver::LayerSpec> layers = {
+        { 64 }, { 256 }, { 1024 }
+    };
+
+    EXPECT_NO_THROW (convolver.configureLayers (32, layers));
+    EXPECT_NO_THROW (convolver.prepare (256));
+
+    // Verify the configuration works with an impulse response
+    std::vector<float> ir (500, 0.0f);
+    ir[0] = 1.0f;
+    ir[50] = 0.5f;
+    EXPECT_NO_THROW (convolver.setImpulseResponse (ir));
+
+    // Test processing with the configured layers
+    std::vector<float> input (256, 0.0f);
+    input[0] = 1.0f;
+    std::vector<float> output (256, 0.0f);
+
+    EXPECT_NO_THROW (convolver.process (input.data(), output.data(), input.size()));
+
+    // Should produce output from direct FIR immediately
+    float outputRMS = calculateRMS (output);
+    EXPECT_GT (outputRMS, 0.001f);
+
+    // Verify immediate response from direct taps
+    EXPECT_GT (findPeak (output), 0.1f);
+}
+
+//==============================================================================
+// Impulse Response Tests
+//==============================================================================
+
+TEST_F (PartitionedConvolverTest, SetImpulseResponseVector)
+{
+    PartitionedConvolver convolver;
+    convolver.setTypicalLayout (64, { 64, 256 });
+    convolver.prepare (512);
+
+    // Create simple impulse response
+    std::vector<float> ir (1000);
+    fillWithRandomData (ir);
+
+    // Normalize to reasonable levels
+    float peak = findPeak (ir);
+    if (peak > 0.0f)
+    {
+        for (auto& sample : ir)
+            sample /= peak;
+    }
+
+    EXPECT_NO_THROW (convolver.setImpulseResponse (ir));
+
+    // Verify the impulse response was set by testing processing
+    std::vector<float> input (512, 0.0f);
+    input[0] = 1.0f;
+    std::vector<float> output (512, 0.0f);
+
+    EXPECT_NO_THROW (convolver.process (input.data(), output.data(), input.size()));
+
+    // Should produce significant output
+    float outputRMS = calculateRMS (output);
+    EXPECT_GT (outputRMS, 0.001f);
+
+    // Test linearity - 2x input should give ~2x output
+    input[0] = 2.0f;
+    std::vector<float> output2 (512, 0.0f);
+
+    convolver.reset();
+    EXPECT_NO_THROW (convolver.process (input.data(), output2.data(), input.size()));
+
+    float output2RMS = calculateRMS (output2);
+    EXPECT_GT (output2RMS, outputRMS * 1.5f);
+}
+
+TEST_F (PartitionedConvolverTest, SetImpulseResponsePointer)
+{
+    PartitionedConvolver convolver;
+    convolver.setTypicalLayout (64, { 64, 256 });
+    convolver.prepare (512);
+
+    // Create simple impulse response
+    std::vector<float> ir (1000);
+    fillWithRandomData (ir);
+
+    // Normalize to reasonable levels
+    float peak = findPeak (ir);
+    if (peak > 0.0f)
+    {
+        for (auto& sample : ir)
+            sample /= peak;
+    }
+
+    EXPECT_NO_THROW (convolver.setImpulseResponse (ir.data(), ir.size()));
+
+    // Verify both pointer and vector methods produce same result
+    std::vector<float> input (512, 0.0f);
+    input[0] = 1.0f;
+    std::vector<float> output1 (512, 0.0f);
+
+    EXPECT_NO_THROW (convolver.process (input.data(), output1.data(), input.size()));
+
+    // Reset and test with vector method
+    PartitionedConvolver convolver2;
+    convolver2.setTypicalLayout (64, { 64, 256 });
+    convolver2.prepare (512);
+    convolver2.setImpulseResponse (ir);
+
+    std::vector<float> output2 (512, 0.0f);
+    EXPECT_NO_THROW (convolver2.process (input.data(), output2.data(), input.size()));
+
+    // Both methods should produce identical results
+    for (size_t i = 0; i < output1.size(); ++i)
+        EXPECT_NEAR (output1[i], output2[i], 0.0001f);
+}
+
+TEST_F (PartitionedConvolverTest, SetImpulseResponseWithOptions)
+{
+    PartitionedConvolver convolver;
+    convolver.setTypicalLayout (64, { 64, 256 });
+    convolver.prepare (512);
+
+    std::vector<float> ir (1000);
+    fillWithRandomData (ir);
+
+    // Make IR have a known peak
+    ir[0] = 2.0f; // Peak value
+
+    PartitionedConvolver::IRLoadOptions options;
+    options.normalize = true;
+    options.headroomDb = -6.0f;
+
+    EXPECT_NO_THROW (convolver.setImpulseResponse (ir, options));
+
+    // Test that normalization and headroom are applied
+    std::vector<float> input (512, 0.0f);
+    input[0] = 1.0f;
+    std::vector<float> output (512, 0.0f);
+
+    EXPECT_NO_THROW (convolver.process (input.data(), output.data(), input.size()));
+
+    // Should produce output, but peak should be limited by headroom
+    float outputPeak = findPeak (output);
+    EXPECT_GT (outputPeak, 0.001f);
+    EXPECT_LT (outputPeak, 1.0f); // Should be less than input due to headroom
+
+    // Compare with non-normalized version
+    PartitionedConvolver convolver2;
+    convolver2.setTypicalLayout (64, { 64, 256 });
+    convolver2.prepare (512);
+
+    PartitionedConvolver::IRLoadOptions options2;
+    options2.normalize = false;
+    options2.headroomDb = 0.0f;
+
+    convolver2.setImpulseResponse (ir, options2);
+
+    std::vector<float> output2 (512, 0.0f);
+    EXPECT_NO_THROW (convolver2.process (input.data(), output2.data(), input.size()));
+
+    float output2Peak = findPeak (output2);
+
+    // Normalized version should have different peak
+    EXPECT_NE (outputPeak, output2Peak);
+}
+
+TEST_F (PartitionedConvolverTest, EmptyImpulseResponse)
+{
+    PartitionedConvolver convolver;
+    convolver.setTypicalLayout (64, { 64, 256 });
+    convolver.prepare (512);
+
+    std::vector<float> emptyIR;
+    convolver.setImpulseResponse (emptyIR);
+
+    // Processing with empty IR should work
+    std::vector<float> input (256);
+    std::vector<float> output (256);
+    fillWithRandomData (input);
+    clearBuffer (output);
+
+    convolver.process (input.data(), output.data(), input.size());
+
+    // Output should remain zero
+    for (float sample : output)
+        EXPECT_FLOAT_EQ (sample, 0.0f);
+}
+
+//==============================================================================
+// Audio Processing Tests
+//==============================================================================
+
+TEST_F (PartitionedConvolverTest, ImpulseResponseTest)
+{
+    PartitionedConvolver convolver;
+    convolver.setTypicalLayout (64, { 64, 256 });
+    convolver.prepare (512);
+
+    // Create unit impulse response
+    std::vector<float> ir (256, 0.0f);
+    ir[0] = 1.0f;   // Unit impulse at start
+    ir[10] = 0.5f;  // Delayed impulse
+    ir[50] = 0.25f; // Another delayed impulse
+
+    convolver.setImpulseResponse (ir);
+
+    // Test with unit impulse input
+    std::vector<float> input (512, 0.0f);
+    input[0] = 1.0f; // Unit impulse
+
+    std::vector<float> output (512);
+    clearBuffer (output);
+
+    convolver.process (input.data(), output.data(), input.size());
+
+    // Output should contain the impulse response (with some latency)
+    // Check for non-zero output
+    float outputRMS = calculateRMS (output);
+    EXPECT_GT (outputRMS, 0.01f);
+}
+
+TEST_F (PartitionedConvolverTest, SineWaveConvolution)
+{
+    PartitionedConvolver convolver;
+    convolver.setTypicalLayout (128, { 128, 512 });
+    convolver.prepare (2048);
+
+    // Create simple lowpass IR (moving average)
+    const size_t irLength = 32;
+    std::vector<float> ir (irLength);
+    for (size_t i = 0; i < irLength; ++i)
+        ir[i] = 1.0f / static_cast<float> (irLength);
+
+    convolver.setImpulseResponse (ir);
+
+    // Test with sine wave
+    const float sampleRate = 44100.0f;
+    const float frequency = 1000.0f;
+    std::vector<float> input (2048);
+    fillWithSine (input, frequency, sampleRate);
+
+    std::vector<float> output (2048);
+    clearBuffer (output);
+
+    convolver.process (input.data(), output.data(), input.size());
+
+    // Output should have significant energy (lowpass filtered sine)
+    float outputRMS = calculateRMS (output);
+    EXPECT_GT (outputRMS, 0.1f);
+}
+
+TEST_F (PartitionedConvolverTest, AccumulativeOutput)
+{
+    PartitionedConvolver convolver;
+    convolver.setTypicalLayout (64, { 64, 256 });
+    convolver.prepare (256);
+
+    // Simple IR
+    std::vector<float> ir (100, 0.1f);
+    convolver.setImpulseResponse (ir);
+
+    std::vector<float> input (256);
+    fillWithRandomData (input);
+
+    // Pre-populate output buffer
+    std::vector<float> output (256);
+    fillWithRandomData (output);
+    std::vector<float> originalOutput = output;
+
+    convolver.process (input.data(), output.data(), input.size());
+
+    // Output should contain original data plus convolution result
+    bool hasAccumulated = false;
+    for (size_t i = 0; i < output.size(); ++i)
+    {
+        if (std::abs (output[i] - originalOutput[i]) > 0.001f)
+        {
+            hasAccumulated = true;
+            break;
+        }
+    }
+    EXPECT_TRUE (hasAccumulated);
+}
+
+//==============================================================================
+// Latency Tests
+//==============================================================================
+
+TEST_F (PartitionedConvolverTest, LatencyMeasurement)
+{
+    // Test different configurations and measure latency
+    std::vector<std::pair<size_t, std::vector<int>>> configs = {
+        { 64, { 64 } },
+        { 128, { 128 } },
+        { 64, { 64, 256 } },
+        { 128, { 128, 512 } },
+        { 256, { 256, 1024 } }
+    };
+
+    for (const auto& [directCoefficients, hops] : configs)
+    {
+        PartitionedConvolver convolver;
+        convolver.setTypicalLayout (directCoefficients, hops);
+        convolver.prepare (1024);
+
+        // Unit impulse response
+        std::vector<float> ir (1000, 0.0f);
+        ir[0] = 1.0f;
+        convolver.setImpulseResponse (ir);
+
+        // Unit impulse input
+        std::vector<float> input (1024, 0.0f);
+        input[0] = 1.0f;
+
+        std::vector<float> output (1024);
+        clearBuffer (output);
+
+        convolver.process (input.data(), output.data(), input.size());
+
+        // Find first non-zero sample in output
+        size_t latencySamples = 0;
+        for (size_t i = 0; i < output.size(); ++i)
+        {
+            if (std::abs (output[i]) > 0.001f)
+            {
+                latencySamples = i;
+                break;
+            }
+        }
+
+        // Latency should be reasonable (less than largest hop size)
+        const int maxHop = *std::max_element (hops.begin(), hops.end());
+        EXPECT_LE (latencySamples, static_cast<size_t> (maxHop * 2));
+
+        // With direct FIR, latency should be minimal
+        if (directCoefficients > 0)
+            EXPECT_LE (latencySamples, directCoefficients);
+    }
+}
+
+//==============================================================================
+// Partition Size Tests (Fixed)
+//==============================================================================
+
+TEST_F (PartitionedConvolverTest, VariousPartitionSizes)
+{
+    // Test various partition configurations - all with direct coefficients for immediate response
+    std::vector<std::tuple<size_t, std::vector<int>, size_t>> testConfigs = {
+        // (directCoefficients, hops, maxBlockSize)
+        { 64, { 64 }, 512 },
+        { 32, { 64 }, 512 },
+        { 64, { 64, 256 }, 512 },
+        { 128, { 128, 512 }, 1024 },
+        { 128, { 128, 512, 2048 }, 2048 },
+        { 256, { 256, 1024, 4096 }, 4096 },
+        { 64, { 128, 256, 512 }, 1024 },
+        { 48, { 32, 128, 512 }, 1024 },
+        { 24, { 32, 64, 128 }, 1024 },
+    };
+
+    for (const auto& item : testConfigs)
+    {
+        const auto& directCoefficients = std::get<0> (item);
+        const auto& hops = std::get<1> (item);
+        const auto& maxBlockSize = std::get<2> (item);
+
+        SCOPED_TRACE (testing::Message() << "Config: directCoefficients=" << directCoefficients << " hops=[" << [&]()
+        {
+            std::string hopStr;
+            for (size_t i = 0; i < hops.size(); ++i)
+            {
+                if (i > 0)
+                    hopStr += ",";
+                hopStr += std::to_string (hops[i]);
+            }
+            return hopStr;
+        }() << "] maxBlockSize=" << maxBlockSize);
+
+        PartitionedConvolver convolver;
+
+        // Configure and verify setup
+        EXPECT_NO_THROW (convolver.setTypicalLayout (directCoefficients, hops));
+        EXPECT_NO_THROW (convolver.prepare (maxBlockSize));
+
+        // Create a simple known impulse response
+        std::vector<float> ir (std::min (static_cast<size_t> (500), maxBlockSize), 0.0f);
+        ir[0] = 1.0f; // Unit impulse at start
+        if (ir.size() > 100)
+            ir[100] = 0.5f; // Delayed impulse for verification
+        EXPECT_NO_THROW (convolver.setImpulseResponse (ir));
+
+        // Test with unit impulse to verify convolution correctness
+        std::vector<float> deltaInput (maxBlockSize, 0.0f);
+        deltaInput[0] = 1.0f; // Unit impulse
+        std::vector<float> deltaOutput (maxBlockSize);
+        clearBuffer (deltaOutput);
+
+        EXPECT_NO_THROW (convolver.process (deltaInput.data(), deltaOutput.data(), maxBlockSize));
+
+        // Should produce significant output
+        float outputRMS = calculateRMS (deltaOutput);
+        EXPECT_GT (outputRMS, 0.003f) << "No significant convolution output detected";
+
+        // Verify we get immediate response from direct FIR
+        EXPECT_GT (findPeak (deltaOutput), 0.1f) << "No immediate response detected";
+
+        // Process various realistic block sizes
+        std::vector<size_t> blockSizes = { 64, 128, 256, maxBlockSize };
+
+        for (size_t blockSize : blockSizes)
+        {
+            if (blockSize > maxBlockSize)
+                continue;
+
+            SCOPED_TRACE (testing::Message() << "BlockSize=" << blockSize);
+
+            std::vector<float> input (blockSize);
+            std::vector<float> output (blockSize);
+            fillWithRandomData (input);
+            clearBuffer (output);
+
+            EXPECT_NO_THROW (convolver.process (input.data(), output.data(), blockSize));
+
+            // Verify audio processing quality
+            for (float sample : output)
+            {
+                EXPECT_TRUE (std::isfinite (sample)) << "Non-finite output detected";
+                EXPECT_LT (std::abs (sample), 100.0f) << "Output amplitude too large";
+            }
+
+            // With direct taps, should get output for reasonable input
+            float inputRMS = calculateRMS (input);
+            float outputRMS = calculateRMS (output);
+
+            if (inputRMS > 0.01f)
+            {
+                EXPECT_GT (outputRMS, 0.001f) << "Output unexpectedly quiet for significant input";
+            }
+        }
+    }
+}
+
+//==============================================================================
+// Stress Test (Fixed)
+//==============================================================================
+
+TEST_F (PartitionedConvolverTest, StressTestDifferentBlockSizes)
+{
+    PartitionedConvolver convolver;
+    convolver.setTypicalLayout (128, { 128, 512, 2048 });
+    convolver.prepare (2048);
+
+    // Create a simple, well-behaved impulse response
+    std::vector<float> ir (1024, 0.0f);
+    // Simple decaying impulse response
+    for (size_t i = 0; i < 200; ++i)
+    {
+        ir[i] = std::exp (-static_cast<float> (i) / 50.0f) * std::cos (2.0f * MathConstants<float>::pi * i / 16.0f);
+    }
+
+    // Normalize to prevent overflow
+    float peak = *std::max_element (ir.begin(), ir.end(), [] (float a, float b)
+    {
+        return std::abs (a) < std::abs (b);
+    });
+    if (peak > 0.0f)
+    {
+        for (auto& sample : ir)
+            sample /= (peak * 2.0f); // Extra headroom
+    }
+
+    convolver.setImpulseResponse (ir);
+
+    // Test reasonable block sizes first
+    std::vector<size_t> blockSizes = { 32, 64, 128, 256, 512, 1024 };
+
+    float totalInputEnergy = 0.0f;
+    float totalOutputEnergy = 0.0f;
+
+    for (size_t blockSize : blockSizes)
+    {
+        SCOPED_TRACE (testing::Message() << "Processing blockSize=" << blockSize);
+
+        std::vector<float> input (blockSize);
+        std::vector<float> output (blockSize);
+        fillWithRandomData (input);
+        clearBuffer (output);
+
+        EXPECT_NO_THROW (convolver.process (input.data(), output.data(), blockSize));
+
+        // Most critical: no non-finite values
+        for (float sample : output)
+        {
+            EXPECT_TRUE (std::isfinite (sample)) << "Non-finite output in blockSize=" << blockSize;
+        }
+
+        float inputRMS = calculateRMS (input);
+        float outputRMS = calculateRMS (output);
+
+        if (std::isfinite (outputRMS))
+        {
+            totalInputEnergy += inputRMS * inputRMS * blockSize;
+            totalOutputEnergy += outputRMS * outputRMS * blockSize;
+        }
+
+        // Verify reasonable levels
+        float peak = findPeak (output);
+        EXPECT_LT (peak, 50.0f) << "Output peak too large for blockSize=" << blockSize;
+
+        // With direct taps, expect output for reasonable input
+        if (inputRMS > 0.01f)
+        {
+            EXPECT_GT (outputRMS, 0.0001f) << "No output for significant input, blockSize=" << blockSize;
+            EXPECT_LT (outputRMS, inputRMS * 5.0f) << "Output unreasonably high for blockSize=" << blockSize;
+        }
+    }
+
+    // Test challenging small block sizes
+    std::vector<size_t> smallBlockSizes = { 1, 7, 15 };
+
+    for (size_t blockSize : smallBlockSizes)
+    {
+        SCOPED_TRACE (testing::Message() << "Processing small blockSize=" << blockSize);
+
+        std::vector<float> input (blockSize);
+        std::vector<float> output (blockSize);
+        fillWithRandomData (input);
+        clearBuffer (output);
+
+        EXPECT_NO_THROW (convolver.process (input.data(), output.data(), blockSize));
+
+        // Critical: no non-finite values
+        for (float sample : output)
+        {
+            EXPECT_TRUE (std::isfinite (sample)) << "Non-finite output in small blockSize=" << blockSize;
+        }
+
+        // Reasonable bounds
+        float peak = findPeak (output);
+        EXPECT_LT (peak, 50.0f) << "Output peak too large for small blockSize=" << blockSize;
+    }
+
+    // Energy conservation check
+    if (totalInputEnergy > 0.0f && totalOutputEnergy > 0.0f)
+    {
+        EXPECT_GT (totalOutputEnergy, totalInputEnergy * 0.01f) << "Total output energy too low";
+        EXPECT_LT (totalOutputEnergy, totalInputEnergy * 10.0f) << "Total output energy too high";
+    }
+}
+
+//==============================================================================
+// Remaining Tests (These were passing)
+//==============================================================================
+
+TEST_F (PartitionedConvolverTest, RandomizedFuzzing)
+{
+    // Generate random configurations and test them
+    std::uniform_int_distribution<int> hopDist (32, 2048);
+    std::uniform_int_distribution<size_t> directCoefficientsDist (32, 512); // Always have some direct coefficients
+    std::uniform_int_distribution<size_t> blockSizeDist (32, 1024);
+
+    for (int trial = 0; trial < 10; ++trial) // Reduce trials for stability
+    {
+        SCOPED_TRACE (testing::Message() << "Fuzzing trial " << trial);
+
+        // Generate random configuration
+        const size_t directCoefficients = directCoefficientsDist (generator);
+        const size_t numLayers = 1 + (generator() % 3); // 1-3 layers
+
+        std::vector<int> hops;
+        int prevHop = 32;
+        for (size_t i = 0; i < numLayers; ++i)
+        {
+            int hop = std::max (prevHop, hopDist (generator));
+            // Ensure power-of-2 for valid FFT sizes
+            hop = 1 << static_cast<int> (std::log2 (hop));
+            hops.push_back (hop);
+            prevHop = hop;
+        }
+
+        const size_t maxBlockSize = 1024;
+
+        PartitionedConvolver convolver;
+
+        try
+        {
+            convolver.setTypicalLayout (directCoefficients, hops);
+            convolver.prepare (maxBlockSize);
+
+            // Simple impulse response
+            std::vector<float> ir (512);
+            for (size_t i = 0; i < ir.size(); ++i)
+                ir[i] = std::exp (-static_cast<float> (i) / 100.0f) * randomFloat (-0.1f, 0.1f);
+
+            convolver.setImpulseResponse (ir);
+
+            // Test with impulse
+            std::vector<float> deltaInput (maxBlockSize, 0.0f);
+            deltaInput[0] = 1.0f;
+            std::vector<float> deltaOutput (maxBlockSize);
+            clearBuffer (deltaOutput);
+
+            convolver.process (deltaInput.data(), deltaOutput.data(), maxBlockSize);
+
+            float deltaRMS = calculateRMS (deltaOutput);
+            EXPECT_GT (deltaRMS, 0.001f) << "No convolution output in trial " << trial;
+
+            // Process several blocks
+            for (int block = 0; block < 5; ++block)
+            {
+                const size_t blockSize = 32 + (generator() % (maxBlockSize - 32));
+
+                std::vector<float> input (blockSize);
+                std::vector<float> output (blockSize);
+                fillWithRandomData (input);
+                clearBuffer (output);
+
+                convolver.process (input.data(), output.data(), blockSize);
+
+                // Audio quality checks
+                for (float sample : output)
+                {
+                    EXPECT_TRUE (std::isfinite (sample)) << "Non-finite output in trial " << trial << " block " << block;
+                    EXPECT_LT (std::abs (sample), 100.0f) << "Output too large in trial " << trial << " block " << block;
+                }
+            }
+        }
+        catch (const std::exception& e)
+        {
+            FAIL() << "Exception in fuzzing trial " << trial << ": " << e.what();
+        }
+    }
+}
+
+TEST_F (PartitionedConvolverTest, ShortImpulseResponseWithManyLayers)
+{
+    PartitionedConvolver convolver;
+
+    // Configure many layers but use a short IR
+    convolver.setTypicalLayout (64, { 128, 512, 2048, 4096 });
+    convolver.prepare (512);
+
+    // Very short IR (only 32 samples) - much shorter than layer configurations
+    std::vector<float> shortIR (32);
+    fillWithRandomData (shortIR);
+
+    // This should not crash and should not create "zombie" layers
+    EXPECT_NO_THROW (convolver.setImpulseResponse (shortIR));
+
+    // Process some data - should work without endless loops
+    std::vector<float> input (512);
+    std::vector<float> output (512);
+    fillWithRandomData (input);
+    clearBuffer (output);
+
+    EXPECT_NO_THROW (convolver.process (input.data(), output.data(), input.size()));
+
+    // Should produce some output (from direct FIR at least)
+    float outputRMS = calculateRMS (output);
+    EXPECT_GT (outputRMS, 0.001f);
+}
+
+TEST_F (PartitionedConvolverTest, IRShorterThanDirectTaps)
+{
+    PartitionedConvolver convolver;
+
+    // Configure with 128 direct taps but use much shorter IR
+    convolver.setTypicalLayout (128, { 256, 1024 });
+    convolver.prepare (512);
+
+    // IR shorter than direct taps
+    std::vector<float> shortIR (64);
+    fillWithRandomData (shortIR);
+
+    EXPECT_NO_THROW (convolver.setImpulseResponse (shortIR));
+
+    // Should still work - only direct FIR should be active
+    std::vector<float> input (512);
+    std::vector<float> output (512);
+    fillWithRandomData (input);
+    clearBuffer (output);
+
+    EXPECT_NO_THROW (convolver.process (input.data(), output.data(), input.size()));
+
+    // Should produce output from direct FIR
+    float outputRMS = calculateRMS (output);
+    EXPECT_GT (outputRMS, 0.001f);
+}
+
+TEST_F (PartitionedConvolverTest, IRExactlyMatchesFirstLayer)
+{
+    PartitionedConvolver convolver;
+
+    // Configure layers
+    convolver.setTypicalLayout (64, { 128, 512, 2048 });
+    convolver.prepare (512);
+
+    // IR that exactly fills direct taps + first layer
+    const std::size_t irLength = 64 + 128; // direct + first layer
+    std::vector<float> ir (irLength);
+    fillWithRandomData (ir);
+
+    EXPECT_NO_THROW (convolver.setImpulseResponse (ir));
+
+    // Should work with first layer active, subsequent layers inactive
+    std::vector<float> input (512);
+    std::vector<float> output (512);
+    fillWithRandomData (input);
+    clearBuffer (output);
+
+    EXPECT_NO_THROW (convolver.process (input.data(), output.data(), input.size()));
+
+    float outputRMS = calculateRMS (output);
+    EXPECT_GT (outputRMS, 0.001f);
+}
+
+TEST_F (PartitionedConvolverTest, ZeroLengthIR)
+{
+    PartitionedConvolver convolver;
+    convolver.setTypicalLayout (64, { 128, 512 });
+    convolver.prepare (512);
+
+    // Zero length IR
+    std::vector<float> emptyIR;
+    EXPECT_NO_THROW (convolver.setImpulseResponse (emptyIR));
+
+    // Should process without crashing but produce no output
+    std::vector<float> input (512);
+    std::vector<float> output (512);
+    fillWithRandomData (input);
+    clearBuffer (output);
+
+    EXPECT_NO_THROW (convolver.process (input.data(), output.data(), input.size()));
+
+    // Output should be zero (or very close to zero)
+    for (float sample : output)
+        EXPECT_NEAR (sample, 0.0f, 0.0001f);
+}
+
+TEST_F (PartitionedConvolverTest, ProgressiveIRLengths)
+{
+    // Test with progressively longer IRs to ensure layer activation works correctly
+    std::vector<size_t> irLengths = { 10, 50, 100, 200, 500, 1000, 2000 };
+
+    for (size_t irLength : irLengths)
+    {
+        SCOPED_TRACE (testing::Message() << "IR Length: " << irLength);
+
+        PartitionedConvolver convolver;
+        convolver.setTypicalLayout (64, { 128, 512, 2048 });
+        convolver.prepare (512);
+
+        std::vector<float> ir (irLength);
+        fillWithRandomData (ir);
+
+        EXPECT_NO_THROW (convolver.setImpulseResponse (ir));
+
+        // Process and verify output
+        std::vector<float> input (512);
+        std::vector<float> output (512);
+        fillWithRandomData (input);
+        clearBuffer (output);
+
+        EXPECT_NO_THROW (convolver.process (input.data(), output.data(), input.size()));
+
+        // Should produce reasonable output
+        for (float sample : output)
+        {
+            EXPECT_TRUE (std::isfinite (sample));
+            EXPECT_LT (std::abs (sample), 100.0f); // Sanity check
+        }
+    }
+}
+
+TEST_F (PartitionedConvolverTest, ResetFunctionality)
+{
+    PartitionedConvolver convolver;
+    convolver.setTypicalLayout (64, { 64, 256 });
+    convolver.prepare (512);
+
+    std::vector<float> ir (500);
+    fillWithRandomData (ir);
+    convolver.setImpulseResponse (ir);
+
+    // Process some data to build up internal state
+    std::vector<float> input (512);
+    std::vector<float> output1 (512);
+    fillWithRandomData (input);
+    clearBuffer (output1);
+
+    convolver.process (input.data(), output1.data(), input.size());
+
+    // Reset and process same input again
+    convolver.reset();
+
+    std::vector<float> output2 (512);
+    clearBuffer (output2);
+
+    convolver.process (input.data(), output2.data(), input.size());
+
+    // Outputs should be identical after reset
+    for (size_t i = 0; i < output1.size(); ++i)
+    {
+        EXPECT_NEAR (output1[i], output2[i], 0.001f) << "Mismatch at sample " << i;
+    }
+}
+
+//==============================================================================
+// IR Trimming Tests
+//==============================================================================
+
+TEST_F (PartitionedConvolverTest, IRTrimmingBasicFunctionality)
+{
+    PartitionedConvolver convolver;
+    convolver.setTypicalLayout (64, { 64, 256 });
+    convolver.prepare (512);
+
+    // Create IR with significant content at start and silence at end
+    const size_t originalLength = 2000;
+    const size_t significantLength = 800;
+    std::vector<float> ir (originalLength, 0.0f);
+
+    // Fill first part with meaningful signal
+    for (size_t i = 0; i < significantLength; ++i)
+    {
+        ir[i] = std::exp (-static_cast<float> (i) / 100.0f) * std::sin (2.0f * MathConstants<float>::pi * i / 32.0f);
+    }
+
+    // Add very quiet noise at the end (below -60dB)
+    for (size_t i = significantLength; i < originalLength; ++i)
+    {
+        ir[i] = randomFloat (-0.001f, 0.001f); // ~ -60dB
+    }
+
+    // Test without trimming
+    convolver.setImpulseResponse (ir);
+    std::vector<float> input (512, 0.0f);
+    input[0] = 1.0f;
+    std::vector<float> outputWithoutTrim (512, 0.0f);
+    convolver.process (input.data(), outputWithoutTrim.data(), input.size());
+    convolver.reset();
+
+    // Test with trimming at -50dB threshold
+    PartitionedConvolver::IRLoadOptions options;
+    options.trimEndSilenceBelowDb = -50.0f;
+    convolver.setImpulseResponse (ir, options);
+
+    std::vector<float> outputWithTrim (512, 0.0f);
+    convolver.process (input.data(), outputWithTrim.data(), input.size());
+
+    // Both should produce similar output in the early samples
+    float correlationSum = 0.0f;
+    float norm1 = 0.0f, norm2 = 0.0f;
+
+    for (size_t i = 0; i < 200; ++i) // Compare first 200 samples
+    {
+        correlationSum += outputWithoutTrim[i] * outputWithTrim[i];
+        norm1 += outputWithoutTrim[i] * outputWithoutTrim[i];
+        norm2 += outputWithTrim[i] * outputWithTrim[i];
+    }
+
+    if (norm1 > 0.0f && norm2 > 0.0f)
+    {
+        float correlation = correlationSum / std::sqrt (norm1 * norm2);
+        EXPECT_GT (correlation, 0.95f) << "Trimmed and untrimmed outputs should be highly correlated in early samples";
+    }
+}
+
+TEST_F (PartitionedConvolverTest, IRTrimmingWithDifferentThresholds)
+{
+    PartitionedConvolver convolver;
+    convolver.setTypicalLayout (64, { 64, 256 });
+    convolver.prepare (512);
+
+    // Create IR with exponentially decaying tail
+    const size_t originalLength = 2000;
+    std::vector<float> ir (originalLength);
+
+    for (size_t i = 0; i < originalLength; ++i)
+    {
+        float decay = std::exp (-static_cast<float> (i) / 200.0f);
+        ir[i] = decay * std::sin (2.0f * MathConstants<float>::pi * i / 16.0f);
+    }
+
+    std::vector<float> thresholds = { -20.0f, -40.0f, -60.0f, -80.0f };
+    std::vector<float> outputEnergies;
+
+    for (float threshold : thresholds)
+    {
+        PartitionedConvolver::IRLoadOptions options;
+        options.trimEndSilenceBelowDb = threshold;
+        convolver.setImpulseResponse (ir, options);
+
+        std::vector<float> input (512, 0.0f);
+        input[0] = 1.0f;
+        std::vector<float> output (512, 0.0f);
+        convolver.process (input.data(), output.data(), input.size());
+
+        float energy = 0.0f;
+        for (float sample : output)
+            energy += sample * sample;
+
+        outputEnergies.push_back (energy);
+        convolver.reset();
+    }
+
+    // More aggressive trimming should result in less energy
+    for (size_t i = 1; i < outputEnergies.size(); ++i)
+    {
+        EXPECT_LE (outputEnergies[i], outputEnergies[i - 1] * 1.1f)
+            << "More aggressive trimming threshold should not significantly increase output energy";
+    }
+}
+
+TEST_F (PartitionedConvolverTest, IRTrimmingVeryShortIR)
+{
+    PartitionedConvolver convolver;
+    convolver.setTypicalLayout (64, { 64, 256 });
+    convolver.prepare (512);
+
+    // Very short IR that shouldn't be trimmed much
+    std::vector<float> shortIR (100);
+    for (size_t i = 0; i < shortIR.size(); ++i)
+    {
+        shortIR[i] = std::sin (2.0f * MathConstants<float>::pi * i / 8.0f);
+    }
+
+    PartitionedConvolver::IRLoadOptions options;
+    options.trimEndSilenceBelowDb = -40.0f;
+
+    // Should not crash or produce errors with short IR
+    EXPECT_NO_THROW (convolver.setImpulseResponse (shortIR, options));
+
+    std::vector<float> input (512, 0.0f);
+    input[0] = 1.0f;
+    std::vector<float> output (512, 0.0f);
+
+    EXPECT_NO_THROW (convolver.process (input.data(), output.data(), input.size()));
+
+    // Should still produce meaningful output
+    float outputRMS = calculateRMS (output);
+    EXPECT_GT (outputRMS, 0.01f);
+}
+
+TEST_F (PartitionedConvolverTest, IRTrimmingAllSilence)
+{
+    PartitionedConvolver convolver;
+    convolver.setTypicalLayout (64, { 64, 256 });
+    convolver.prepare (512);
+
+    // IR with only very quiet content
+    std::vector<float> quietIR (1000);
+    for (size_t i = 0; i < quietIR.size(); ++i)
+    {
+        quietIR[i] = randomFloat (-0.0001f, 0.0001f); // Very quiet, ~ -80dB
+    }
+
+    PartitionedConvolver::IRLoadOptions options;
+    options.normalize = false;              // Don't normalize the quiet IR
+    options.trimEndSilenceBelowDb = -60.0f; // Should trim most/all of it
+
+    EXPECT_NO_THROW (convolver.setImpulseResponse (quietIR, options));
+
+    std::vector<float> input (512);
+    fillWithRandomData (input);
+    std::vector<float> output (512, 0.0f);
+
+    EXPECT_NO_THROW (convolver.process (input.data(), output.data(), input.size()));
+
+    // Output should be very quiet or silent
+    float outputRMS = calculateRMS (output);
+    EXPECT_LT (outputRMS, 0.001f); // Should be very quiet with normalized disabled and aggressive trimming
+}
+
+TEST_F (PartitionedConvolverTest, IRTrimmingWithNormalization)
+{
+    PartitionedConvolver convolver;
+    convolver.setTypicalLayout (64, { 64, 256 });
+    convolver.prepare (512);
+
+    // Create IR with large peak but quiet tail
+    std::vector<float> ir (1500);
+    for (size_t i = 0; i < ir.size(); ++i)
+    {
+        if (i < 100)
+            ir[i] = 2.0f * std::exp (-static_cast<float> (i) / 50.0f); // Large peak
+        else
+            ir[i] = 0.01f * randomFloat (-0.1f, 0.1f); // Quiet tail
+    }
+
+    PartitionedConvolver::IRLoadOptions options;
+    options.normalize = true;
+    options.headroomDb = -6.0f;
+    options.trimEndSilenceBelowDb = -50.0f;
+
+    EXPECT_NO_THROW (convolver.setImpulseResponse (ir, options));
+
+    std::vector<float> input (512, 0.0f);
+    input[0] = 1.0f;
+    std::vector<float> output (512, 0.0f);
+
+    EXPECT_NO_THROW (convolver.process (input.data(), output.data(), input.size()));
+
+    // Should produce reasonable output levels due to normalization
+    float outputPeak = findPeak (output);
+    EXPECT_GT (outputPeak, 0.1f);
+    EXPECT_LT (outputPeak, 1.0f); // Should be limited by headroom
+}
+
+TEST_F (PartitionedConvolverTest, IRTrimmingExactBoundary)
+{
+    PartitionedConvolver convolver;
+    convolver.setTypicalLayout (64, { 64, 256 });
+    convolver.prepare (512);
+
+    // Create IR that drops exactly to threshold
+    const size_t significantLength = 1000;
+    const size_t totalLength = 1500;
+    std::vector<float> ir (totalLength, 0.0f);
+
+    // Significant content
+    for (size_t i = 0; i < significantLength; ++i)
+    {
+        ir[i] = std::exp (-static_cast<float> (i) / 200.0f);
+    }
+
+    // Content right at threshold level (-50dB = 0.00316)
+    const float thresholdLevel = std::pow (10.0f, -50.0f / 20.0f);
+    for (size_t i = significantLength; i < totalLength; ++i)
+    {
+        ir[i] = thresholdLevel * 0.9f; // Slightly below threshold
+    }
+
+    PartitionedConvolver::IRLoadOptions options;
+    options.trimEndSilenceBelowDb = -50.0f;
+
+    EXPECT_NO_THROW (convolver.setImpulseResponse (ir, options));
+
+    std::vector<float> input (512, 0.0f);
+    input[0] = 1.0f;
+    std::vector<float> output (512, 0.0f);
+
+    EXPECT_NO_THROW (convolver.process (input.data(), output.data(), input.size()));
+
+    // Should work correctly at boundary conditions
+    float outputRMS = calculateRMS (output);
+    EXPECT_GT (outputRMS, 0.001f);
+}
+
+TEST_F (PartitionedConvolverTest, IRTrimmingConsistency)
+{
+    // Test that trimming produces consistent results across multiple calls
+    PartitionedConvolver convolver1, convolver2;
+    convolver1.setTypicalLayout (64, { 64, 256 });
+    convolver1.prepare (512);
+    convolver2.setTypicalLayout (64, { 64, 256 });
+    convolver2.prepare (512);
+
+    std::vector<float> ir (1000);
+    fillWithRandomData (ir);
+    // Add quiet tail
+    for (size_t i = 600; i < ir.size(); ++i)
+    {
+        ir[i] *= 0.001f; // Make very quiet
+    }
+
+    PartitionedConvolver::IRLoadOptions options;
+    options.trimEndSilenceBelowDb = -50.0f;
+
+    // Set same IR with trimming on both convolvers
+    convolver1.setImpulseResponse (ir, options);
+    convolver2.setImpulseResponse (ir, options);
+
+    std::vector<float> input (512);
+    fillWithRandomData (input);
+    std::vector<float> output1 (512, 0.0f);
+    std::vector<float> output2 (512, 0.0f);
+
+    convolver1.process (input.data(), output1.data(), input.size());
+    convolver2.process (input.data(), output2.data(), input.size());
+
+    // Both should produce identical results
+    for (size_t i = 0; i < output1.size(); ++i)
+    {
+        EXPECT_NEAR (output1[i], output2[i], 0.0001f) << "Inconsistent trimming results at sample " << i;
+    }
+}
+
+} // namespace yup::test
\ No newline at end of file