11/*********************************************************************/
22/* Copyright 2009, 2010 The University of Texas at Austin. */
3+ /* Copyright 2025 The OpenBLAS Project. */
34/* All rights reserved. */
45/* */
56/* Redistribution and use in source and binary forms, with or */
@@ -284,6 +285,8 @@ int strmm_ilnucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX
284285int strmm_ilnncopy (BLASLONG m , BLASLONG n , float * a , BLASLONG lda , BLASLONG posX , BLASLONG posY , float * b );
285286int strmm_iltucopy (BLASLONG m , BLASLONG n , float * a , BLASLONG lda , BLASLONG posX , BLASLONG posY , float * b );
286287int strmm_iltncopy (BLASLONG m , BLASLONG n , float * a , BLASLONG lda , BLASLONG posX , BLASLONG posY , float * b );
288+ int scomm_ncopy (BLASLONG m , BLASLONG n , float * a , BLASLONG lda , float * b );
289+ int scomm_tcopy (BLASLONG m , BLASLONG n , float * a , BLASLONG lda , float * b );
287290int strmm_olnucopy (BLASLONG m , BLASLONG n , float * a , BLASLONG lda , BLASLONG posX , BLASLONG posY , float * b );
288291int strmm_olnncopy (BLASLONG m , BLASLONG n , float * a , BLASLONG lda , BLASLONG posX , BLASLONG posY , float * b );
289292int strmm_oltucopy (BLASLONG m , BLASLONG n , float * a , BLASLONG lda , BLASLONG posX , BLASLONG posY , float * b );
@@ -301,6 +304,8 @@ int dtrmm_ilnucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG pos
301304int dtrmm_ilnncopy (BLASLONG m , BLASLONG n , double * a , BLASLONG lda , BLASLONG posX , BLASLONG posY , double * b );
302305int dtrmm_iltucopy (BLASLONG m , BLASLONG n , double * a , BLASLONG lda , BLASLONG posX , BLASLONG posY , double * b );
303306int dtrmm_iltncopy (BLASLONG m , BLASLONG n , double * a , BLASLONG lda , BLASLONG posX , BLASLONG posY , double * b );
307+ int dcomm_tcopy (BLASLONG m , BLASLONG n , double * a , BLASLONG lda , double * b );
308+ int dcomm_ncopy (BLASLONG m , BLASLONG n , double * a , BLASLONG lda , double * b );
304309int dtrmm_olnucopy (BLASLONG m , BLASLONG n , double * a , BLASLONG lda , BLASLONG posX , BLASLONG posY , double * b );
305310int dtrmm_olnncopy (BLASLONG m , BLASLONG n , double * a , BLASLONG lda , BLASLONG posX , BLASLONG posY , double * b );
306311int dtrmm_oltucopy (BLASLONG m , BLASLONG n , double * a , BLASLONG lda , BLASLONG posX , BLASLONG posY , double * b );
@@ -335,6 +340,8 @@ int ctrmm_ilnucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX
335340int ctrmm_ilnncopy (BLASLONG m , BLASLONG n , float * a , BLASLONG lda , BLASLONG posX , BLASLONG posY , float * b );
336341int ctrmm_iltucopy (BLASLONG m , BLASLONG n , float * a , BLASLONG lda , BLASLONG posX , BLASLONG posY , float * b );
337342int ctrmm_iltncopy (BLASLONG m , BLASLONG n , float * a , BLASLONG lda , BLASLONG posX , BLASLONG posY , float * b );
343+ int ccomm_tcopy (BLASLONG m , BLASLONG n , float * a , BLASLONG lda , float * b );
344+ int ccomm_ncopy (BLASLONG m , BLASLONG n , float * a , BLASLONG lda , float * b );
338345int ctrmm_olnucopy (BLASLONG m , BLASLONG n , float * a , BLASLONG lda , BLASLONG posX , BLASLONG posY , float * b );
339346int ctrmm_olnncopy (BLASLONG m , BLASLONG n , float * a , BLASLONG lda , BLASLONG posX , BLASLONG posY , float * b );
340347int ctrmm_oltucopy (BLASLONG m , BLASLONG n , float * a , BLASLONG lda , BLASLONG posX , BLASLONG posY , float * b );
@@ -352,6 +359,8 @@ int ztrmm_ilnucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG pos
352359int ztrmm_ilnncopy (BLASLONG m , BLASLONG n , double * a , BLASLONG lda , BLASLONG posX , BLASLONG posY , double * b );
353360int ztrmm_iltucopy (BLASLONG m , BLASLONG n , double * a , BLASLONG lda , BLASLONG posX , BLASLONG posY , double * b );
354361int ztrmm_iltncopy (BLASLONG m , BLASLONG n , double * a , BLASLONG lda , BLASLONG posX , BLASLONG posY , double * b );
362+ int zcomm_tcopy (BLASLONG m , BLASLONG n , double * a , BLASLONG lda , double * b );
363+ int zcomm_ncopy (BLASLONG m , BLASLONG n , double * a , BLASLONG lda , double * b );
355364int ztrmm_olnucopy (BLASLONG m , BLASLONG n , double * a , BLASLONG lda , BLASLONG posX , BLASLONG posY , double * b );
356365int ztrmm_olnncopy (BLASLONG m , BLASLONG n , double * a , BLASLONG lda , BLASLONG posX , BLASLONG posY , double * b );
357366int ztrmm_oltucopy (BLASLONG m , BLASLONG n , double * a , BLASLONG lda , BLASLONG posX , BLASLONG posY , double * b );
@@ -579,6 +588,8 @@ int bgemm_kernel(BLASLONG, BLASLONG, BLASLONG, bfloat16, bfloat16 *, bfloat16 *
579588int sbgemm_kernel (BLASLONG , BLASLONG , BLASLONG , float , bfloat16 * , bfloat16 * , float * , BLASLONG );
580589int sgemm_kernel (BLASLONG , BLASLONG , BLASLONG , float , float * , float * , float * , BLASLONG );
581590int dgemm_kernel (BLASLONG , BLASLONG , BLASLONG , double , double * , double * , double * , BLASLONG );
591+ int scomm_kernel (BLASLONG , BLASLONG , BLASLONG , float , float * , float * , float * , BLASLONG );
592+ int dcomm_kernel (BLASLONG , BLASLONG , BLASLONG , double , double * , double * , double * , BLASLONG );
582593
583594#ifdef QUAD_PRECISION
584595int qgemm_kernel (BLASLONG , BLASLONG , BLASLONG , xidouble * , xidouble * , xidouble * , xdouble * , BLASLONG );
@@ -728,6 +739,16 @@ int cgemm3m_kernel(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float
728739int zgemm3m_kernel (BLASLONG , BLASLONG , BLASLONG , double , double , double * , double * , double * , BLASLONG );
729740int xgemm3m_kernel (BLASLONG , BLASLONG , BLASLONG , xdouble , xdouble , xdouble * , xdouble * , xdouble * , BLASLONG );
730741
742+ int ccomm_kernel_n (BLASLONG , BLASLONG , BLASLONG , float , float , float * , float * , float * , BLASLONG );
743+ int ccomm_kernel_l (BLASLONG , BLASLONG , BLASLONG , float , float , float * , float * , float * , BLASLONG );
744+ int ccomm_kernel_r (BLASLONG , BLASLONG , BLASLONG , float , float , float * , float * , float * , BLASLONG );
745+ int ccomm_kernel_b (BLASLONG , BLASLONG , BLASLONG , float , float , float * , float * , float * , BLASLONG );
746+
747+ int zcomm_kernel_n (BLASLONG , BLASLONG , BLASLONG , double , double , double * , double * , double * , BLASLONG );
748+ int zcomm_kernel_l (BLASLONG , BLASLONG , BLASLONG , double , double , double * , double * , double * , BLASLONG );
749+ int zcomm_kernel_r (BLASLONG , BLASLONG , BLASLONG , double , double , double * , double * , double * , BLASLONG );
750+ int zcomm_kernel_b (BLASLONG , BLASLONG , BLASLONG , double , double , double * , double * , double * , BLASLONG );
751+
731752int shgemm_nn (blas_arg_t * , BLASLONG * , BLASLONG * , hfloat16 * , hfloat16 * , BLASLONG );
732753int shgemm_nt (blas_arg_t * , BLASLONG * , BLASLONG * , hfloat16 * , hfloat16 * , BLASLONG );
733754int shgemm_tn (blas_arg_t * , BLASLONG * , BLASLONG * , hfloat16 * , hfloat16 * , BLASLONG );
0 commit comments