Skip to content

Commit 5cf7c83

Browse files
Shnatselvstroebel
authored andcommitted
Fix subtle bug due to intermediate arrays being created by the try_into() call
1 parent 33de59f commit 5cf7c83

File tree

2 files changed

+13
-14
lines changed

2 files changed

+13
-14
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ repository = "https://github.com/vstroebel/jpeg-encoder"
1212
rust-version = "1.87"
1313

1414
[features]
15-
default = ["std"]
15+
default = ["std", "simd"]
1616
simd = ["std"]
1717
std = []
1818

src/avx2/fdct.rs

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -420,10 +420,10 @@ fn fdct_avx2_internal(data: &mut [i16; 64]) {
420420
(t1, t2, t3, t4)
421421
}
422422

423-
let ymm4 = avx_load(data[0..16].try_into().unwrap());
424-
let ymm5 = avx_load(data[16..32].try_into().unwrap());
425-
let ymm6 = avx_load(data[32..48].try_into().unwrap());
426-
let ymm7 = avx_load(data[48..64].try_into().unwrap());
423+
let ymm4 = avx_load(&data[0..16]);
424+
let ymm5 = avx_load(&data[16..32]);
425+
let ymm6 = avx_load(&data[32..48]);
426+
let ymm7 = avx_load(&data[48..64]);
427427

428428
// ---- Pass 1: process rows.
429429
// ymm4=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17)
@@ -457,19 +457,17 @@ fn fdct_avx2_internal(data: &mut [i16; 64]) {
457457
let ymm6 = _mm256_permute2x128_si256(ymm0, ymm4, 0x31); // ymm6=data4_5
458458
let ymm7 = _mm256_permute2x128_si256(ymm2, ymm4, 0x21); // ymm7=data6_7
459459

460-
unsafe {
461-
let out_data = core::mem::transmute::<*mut i16, *mut __m256i>(data.as_mut_ptr());
462-
_mm256_storeu_si256(out_data, ymm3);
463-
_mm256_storeu_si256(out_data.add(1), ymm5);
464-
_mm256_storeu_si256(out_data.add(2), ymm6);
465-
_mm256_storeu_si256(out_data.add(3), ymm7);
466-
}
460+
avx_store(ymm3, &mut data[0..16]);
461+
avx_store(ymm5, &mut data[16..32]);
462+
avx_store(ymm6, &mut data[32..48]);
463+
avx_store(ymm7, &mut data[48..64]);
467464
}
468465

469466
/// Safe wrapper for an unaligned AVX load
470467
#[target_feature(enable = "avx2")]
471468
#[inline]
472-
fn avx_load(input: &[i16; 16]) -> __m256i {
469+
fn avx_load(input: &[i16]) -> __m256i {
470+
assert!(input.len() == 16);
473471
assert!(core::mem::size_of::<[i16; 16]>() == core::mem::size_of::<__m256i>());
474472
// SAFETY: we've checked sizes above. The load is unaligned, so no alignment requirements.
475473
unsafe { _mm256_loadu_si256(input.as_ptr() as *const __m256i) }
@@ -478,7 +476,8 @@ fn avx_load(input: &[i16; 16]) -> __m256i {
478476
/// Safe wrapper for an unaligned AVX store
479477
#[target_feature(enable = "avx2")]
480478
#[inline]
481-
fn avx_store(input: __m256i, output: &mut [i16; 16]) {
479+
fn avx_store(input: __m256i, output: &mut [i16]) {
480+
assert!(output.len() == 16);
482481
assert!(core::mem::size_of::<[i16; 16]>() == core::mem::size_of::<__m256i>());
483482
// SAFETY: we've checked sizes above. The load is unaligned, so no alignment requirements.
484483
unsafe { _mm256_storeu_si256(output.as_mut_ptr() as *mut __m256i, input) }

0 commit comments

Comments
 (0)