Skip to content

Commit 7e055be

Browse files
Shnatselvstroebel
authored andcommitted
Use the safe loads/store wrappers inside the AVX FDCT, drop the 'unsafe fn' from it now that it contains no unsafe ops
1 parent 922a26c commit 7e055be

File tree

1 file changed

+9
-13
lines changed

1 file changed

+9
-13
lines changed

src/avx2/fdct.rs

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ pub fn fdct_avx2(data: &mut [i16; 64]) {
6464
}
6565

6666
#[target_feature(enable = "avx2")]
67-
unsafe fn fdct_avx2_internal(data: &mut [i16; 64]) {
67+
fn fdct_avx2_internal(data: &mut [i16; 64]) {
6868
#[target_feature(enable = "avx2")]
6969
#[allow(non_snake_case)]
7070
#[inline]
@@ -420,12 +420,10 @@ unsafe fn fdct_avx2_internal(data: &mut [i16; 64]) {
420420
(t1, t2, t3, t4)
421421
}
422422

423-
let in_data = core::mem::transmute::<*mut i16, *mut __m256i>(data.as_mut_ptr());
424-
425-
let ymm4 = _mm256_loadu_si256(in_data);
426-
let ymm5 = _mm256_loadu_si256(in_data.add(1));
427-
let ymm6 = _mm256_loadu_si256(in_data.add(2));
428-
let ymm7 = _mm256_loadu_si256(in_data.add(3));
423+
let ymm4 = avx_load(data[0..16].try_into().unwrap());
424+
let ymm5 = avx_load(data[16..32].try_into().unwrap());
425+
let ymm6 = avx_load(data[32..48].try_into().unwrap());
426+
let ymm7 = avx_load(data[48..64].try_into().unwrap());
429427

430428
// ---- Pass 1: process rows.
431429
// ymm4=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17)
@@ -459,12 +457,10 @@ unsafe fn fdct_avx2_internal(data: &mut [i16; 64]) {
459457
let ymm6 = _mm256_permute2x128_si256(ymm0, ymm4, 0x31); // ymm6=data4_5
460458
let ymm7 = _mm256_permute2x128_si256(ymm2, ymm4, 0x21); // ymm7=data6_7
461459

462-
let out_data = core::mem::transmute::<*mut i16, *mut __m256i>(data.as_mut_ptr());
463-
464-
_mm256_storeu_si256(out_data, ymm3);
465-
_mm256_storeu_si256(out_data.add(1), ymm5);
466-
_mm256_storeu_si256(out_data.add(2), ymm6);
467-
_mm256_storeu_si256(out_data.add(3), ymm7);
460+
avx_store(ymm3, &mut data[0..16].try_into().unwrap());
461+
avx_store(ymm5, &mut data[16..32].try_into().unwrap());
462+
avx_store(ymm6, &mut data[32..48].try_into().unwrap());
463+
avx_store(ymm7, &mut data[48..64].try_into().unwrap());
468464
}
469465

470466
/// Safe wrapper for an unaligned AVX load

0 commit comments

Comments
 (0)