@@ -64,7 +64,7 @@ pub fn fdct_avx2(data: &mut [i16; 64]) {
6464}
6565
6666#[ target_feature( enable = "avx2" ) ]
67- unsafe fn fdct_avx2_internal ( data : & mut [ i16 ; 64 ] ) {
67+ fn fdct_avx2_internal ( data : & mut [ i16 ; 64 ] ) {
6868 #[ target_feature( enable = "avx2" ) ]
6969 #[ allow( non_snake_case) ]
7070 #[ inline]
@@ -420,12 +420,10 @@ unsafe fn fdct_avx2_internal(data: &mut [i16; 64]) {
420420 ( t1, t2, t3, t4)
421421 }
422422
423- let in_data = core:: mem:: transmute :: < * mut i16 , * mut __m256i > ( data. as_mut_ptr ( ) ) ;
424-
425- let ymm4 = _mm256_loadu_si256 ( in_data) ;
426- let ymm5 = _mm256_loadu_si256 ( in_data. add ( 1 ) ) ;
427- let ymm6 = _mm256_loadu_si256 ( in_data. add ( 2 ) ) ;
428- let ymm7 = _mm256_loadu_si256 ( in_data. add ( 3 ) ) ;
423+ let ymm4 = avx_load ( data[ 0 ..16 ] . try_into ( ) . unwrap ( ) ) ;
424+ let ymm5 = avx_load ( data[ 16 ..32 ] . try_into ( ) . unwrap ( ) ) ;
425+ let ymm6 = avx_load ( data[ 32 ..48 ] . try_into ( ) . unwrap ( ) ) ;
426+ let ymm7 = avx_load ( data[ 48 ..64 ] . try_into ( ) . unwrap ( ) ) ;
429427
430428 // ---- Pass 1: process rows.
431429 // ymm4=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17)
@@ -459,12 +457,10 @@ unsafe fn fdct_avx2_internal(data: &mut [i16; 64]) {
459457 let ymm6 = _mm256_permute2x128_si256 ( ymm0, ymm4, 0x31 ) ; // ymm6=data4_5
460458 let ymm7 = _mm256_permute2x128_si256 ( ymm2, ymm4, 0x21 ) ; // ymm7=data6_7
461459
462- let out_data = core:: mem:: transmute :: < * mut i16 , * mut __m256i > ( data. as_mut_ptr ( ) ) ;
463-
464- _mm256_storeu_si256 ( out_data, ymm3) ;
465- _mm256_storeu_si256 ( out_data. add ( 1 ) , ymm5) ;
466- _mm256_storeu_si256 ( out_data. add ( 2 ) , ymm6) ;
467- _mm256_storeu_si256 ( out_data. add ( 3 ) , ymm7) ;
460+ avx_store ( ymm3, & mut data[ 0 ..16 ] . try_into ( ) . unwrap ( ) ) ;
461+ avx_store ( ymm5, & mut data[ 16 ..32 ] . try_into ( ) . unwrap ( ) ) ;
462+ avx_store ( ymm6, & mut data[ 32 ..48 ] . try_into ( ) . unwrap ( ) ) ;
463+ avx_store ( ymm7, & mut data[ 48 ..64 ] . try_into ( ) . unwrap ( ) ) ;
468464}
469465
470466/// Safe wrapper for an unaligned AVX load
0 commit comments