From d35f4047e9c0428d2675dd5edde07d7486f01720 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Volker=20Str=C3=B6bel?= Date: Tue, 25 Nov 2025 08:39:23 +0100 Subject: [PATCH] Optimize ycbcr conversion --- src/image_buffer.rs | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/src/image_buffer.rs b/src/image_buffer.rs index 4716344..26e9be4 100644 --- a/src/image_buffer.rs +++ b/src/image_buffer.rs @@ -153,11 +153,40 @@ macro_rules! ycbcr_image { fn fill_buffers(&self, y: u16, buffers: &mut [Vec; 4]) { let line = get_line(self.0, y, self.width(), $num_colors); - for pixel in line.chunks_exact($num_colors) { + // Doing the convertion in chunks allows the compiler to vectorize the code + // A size of 16 seems optimal for SSE and AVX capable hardware + const CHUNK_SIZE:usize = 16; + + let mut y_buffer = [0;CHUNK_SIZE]; + let mut cb_buffer = [0;CHUNK_SIZE]; + let mut cr_buffer = [0;CHUNK_SIZE]; + + for chuck in line.chunks_exact($num_colors*CHUNK_SIZE) { + for i in (0..CHUNK_SIZE) { + let (y, cb, cr) = rgb_to_ycbcr( + chuck[i * $num_colors + $o1], + chuck[i * $num_colors + $o2], + chuck[i * $num_colors + $o3], + ); + + y_buffer[i] = y; + cb_buffer[i] = cb; + cr_buffer[i] = cr; + } + + buffers[0].extend_from_slice(&y_buffer); + buffers[1].extend_from_slice(&cb_buffer); + buffers[2].extend_from_slice(&cr_buffer); + } + + // Add the remaining pixels in case the number of + // pixels is not a multiple of CHUNK_SIZE + let pixel = line.len() / $num_colors; + for i in pixel/CHUNK_SIZE*CHUNK_SIZE..pixel { let (y, cb, cr) = rgb_to_ycbcr( - pixel[$o1], - pixel[$o2], - pixel[$o3], + line[i*$num_colors + $o1], + line[i*$num_colors + $o2], + line[i*$num_colors + $o3], ); buffers[0].push(y);