@@ -56,6 +56,8 @@ int sgemm_direct_performant(BLASLONG M, BLASLONG N, BLASLONG K);
5656
5757int shgemm_beta (BLASLONG , BLASLONG , BLASLONG , float ,
5858 hfloat16 * , BLASLONG , hfloat16 * , BLASLONG , float * , BLASLONG );
59+ int bgemm_beta (BLASLONG , BLASLONG , BLASLONG , bfloat16 ,
60+ bfloat16 * , BLASLONG , bfloat16 * , BLASLONG , bfloat16 * , BLASLONG );
5961int sbgemm_beta (BLASLONG , BLASLONG , BLASLONG , float ,
6062 bfloat16 * , BLASLONG , bfloat16 * , BLASLONG , float * , BLASLONG );
6163int sgemm_beta (BLASLONG , BLASLONG , BLASLONG , float ,
@@ -83,6 +85,10 @@ int shgemm_incopy(BLASLONG m, BLASLONG n, hfloat16 *a, BLASLONG lda, hfloat16 *b
8385int shgemm_itcopy (BLASLONG m , BLASLONG n , hfloat16 * a , BLASLONG lda , hfloat16 * b );
8486int shgemm_oncopy (BLASLONG m , BLASLONG n , hfloat16 * a , BLASLONG lda , hfloat16 * b );
8587int shgemm_otcopy (BLASLONG m , BLASLONG n , hfloat16 * a , BLASLONG lda , hfloat16 * b );
88+ int bgemm_incopy (BLASLONG m , BLASLONG n , bfloat16 * a , BLASLONG lda , bfloat16 * b );
89+ int bgemm_itcopy (BLASLONG m , BLASLONG n , bfloat16 * a , BLASLONG lda , bfloat16 * b );
90+ int bgemm_oncopy (BLASLONG m , BLASLONG n , bfloat16 * a , BLASLONG lda , bfloat16 * b );
91+ int bgemm_otcopy (BLASLONG m , BLASLONG n , bfloat16 * a , BLASLONG lda , bfloat16 * b );
8692int sbgemm_incopy (BLASLONG m , BLASLONG n , bfloat16 * a , BLASLONG lda , bfloat16 * b );
8793int sbgemm_itcopy (BLASLONG m , BLASLONG n , bfloat16 * a , BLASLONG lda , bfloat16 * b );
8894int sbgemm_oncopy (BLASLONG m , BLASLONG n , bfloat16 * a , BLASLONG lda , bfloat16 * b );
@@ -511,6 +517,7 @@ int xher2k_kernel_LN(BLASLONG m, BLASLONG n, BLASLONG k, xdouble alpha_r, xdoubl
511517int xher2k_kernel_LC (BLASLONG m , BLASLONG n , BLASLONG k , xdouble alpha_r , xdouble alpha_i , xdouble * a , xdouble * b , xdouble * c , BLASLONG ldc , BLASLONG offset , int flag );
512518
513519int shgemm_kernel (BLASLONG , BLASLONG , BLASLONG , float , hfloat16 * , hfloat16 * , float * , BLASLONG );
520+ int bgemm_kernel (BLASLONG , BLASLONG , BLASLONG , bfloat16 , bfloat16 * , bfloat16 * , bfloat16 * , BLASLONG );
514521int sbgemm_kernel (BLASLONG , BLASLONG , BLASLONG , float , bfloat16 * , bfloat16 * , float * , BLASLONG );
515522int sgemm_kernel (BLASLONG , BLASLONG , BLASLONG , float , float * , float * , float * , BLASLONG );
516523int dgemm_kernel (BLASLONG , BLASLONG , BLASLONG , double , double * , double * , double * , BLASLONG );
@@ -668,6 +675,11 @@ int shgemm_nt(blas_arg_t *, BLASLONG *, BLASLONG *, hfloat16 *, hfloat16 *, BLAS
668675int shgemm_tn (blas_arg_t * , BLASLONG * , BLASLONG * , hfloat16 * , hfloat16 * , BLASLONG );
669676int shgemm_tt (blas_arg_t * , BLASLONG * , BLASLONG * , hfloat16 * , hfloat16 * , BLASLONG );
670677
678+ int bgemm_nn (blas_arg_t * , BLASLONG * , BLASLONG * , bfloat16 * , bfloat16 * , BLASLONG );
679+ int bgemm_nt (blas_arg_t * , BLASLONG * , BLASLONG * , bfloat16 * , bfloat16 * , BLASLONG );
680+ int bgemm_tn (blas_arg_t * , BLASLONG * , BLASLONG * , bfloat16 * , bfloat16 * , BLASLONG );
681+ int bgemm_tt (blas_arg_t * , BLASLONG * , BLASLONG * , bfloat16 * , bfloat16 * , BLASLONG );
682+
671683int sbgemm_nn (blas_arg_t * , BLASLONG * , BLASLONG * , bfloat16 * , bfloat16 * , BLASLONG );
672684int sbgemm_nt (blas_arg_t * , BLASLONG * , BLASLONG * , bfloat16 * , bfloat16 * , BLASLONG );
673685int sbgemm_tn (blas_arg_t * , BLASLONG * , BLASLONG * , bfloat16 * , bfloat16 * , BLASLONG );
@@ -770,6 +782,11 @@ int shgemm_thread_nt(blas_arg_t *, BLASLONG *, BLASLONG *, hfloat16 *, hfloat16
770782int shgemm_thread_tn (blas_arg_t * , BLASLONG * , BLASLONG * , hfloat16 * , hfloat16 * , BLASLONG );
771783int shgemm_thread_tt (blas_arg_t * , BLASLONG * , BLASLONG * , hfloat16 * , hfloat16 * , BLASLONG );
772784
785+ int bgemm_thread_nn (blas_arg_t * , BLASLONG * , BLASLONG * , bfloat16 * , bfloat16 * , BLASLONG );
786+ int bgemm_thread_nt (blas_arg_t * , BLASLONG * , BLASLONG * , bfloat16 * , bfloat16 * , BLASLONG );
787+ int bgemm_thread_tn (blas_arg_t * , BLASLONG * , BLASLONG * , bfloat16 * , bfloat16 * , BLASLONG );
788+ int bgemm_thread_tt (blas_arg_t * , BLASLONG * , BLASLONG * , bfloat16 * , bfloat16 * , BLASLONG );
789+
773790int sbgemm_thread_nn (blas_arg_t * , BLASLONG * , BLASLONG * , bfloat16 * , bfloat16 * , BLASLONG );
774791int sbgemm_thread_nt (blas_arg_t * , BLASLONG * , BLASLONG * , bfloat16 * , bfloat16 * , BLASLONG );
775792int sbgemm_thread_tn (blas_arg_t * , BLASLONG * , BLASLONG * , bfloat16 * , bfloat16 * , BLASLONG );
0 commit comments