@ -728,14 +728,22 @@ void kernel_dsyrk_nt_l_12x4_lib44cc(int kmax, double *alpha, double *A, int sda,
void kernel_dsyrk_nt_l_12x4_vs_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dsyrk_nt_l_12x4_vs_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dsyrk_nt_u_12x4_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dsyrk_nt_u_12x4_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dsyrk_nt_u_12x4_vs_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dsyrk_nt_u_12x4_vs_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dger2k_nt_12x4_lib44cc ( int kmax , double * alpha , double * A0 , int sda0 , double * B0 , double * A1 , int sda1 , double * B1 , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dger2k_nt_12x4_vs_lib44cc ( int kmax , double * alpha , double * A0 , int sda0 , double * B0 , double * A1 , int sda1 , double * B1 , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dsyr2k_nt_l_12x4_lib44cc ( int kmax , double * alpha , double * A0 , int sda0 , double * B0 , double * A1 , int sda1 , double * B1 , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dsyr2k_nt_l_12x4_vs_lib44cc ( int kmax , double * alpha , double * A0 , int sda0 , double * B0 , double * A1 , int sda1 , double * B1 , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_rl_12x4_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dtrmm_nt_rl_12x4_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dtrmm_nt_rl_12x4_vs_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_rl_12x4_vs_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_rl_one_12x4_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dtrmm_nt_rl_one_12x4_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dtrmm_nt_rl_one_12x4_vs_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_rl_one_12x4_vs_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_ru_12x4_lib444c ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int sdc , double * D , int ldd ) ;
void kernel_dtrmm_nt_ru_12x4_vs_lib444c ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int sdc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_ru_12x4_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dtrmm_nt_ru_12x4_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dtrmm_nt_ru_12x4_vs_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_ru_12x4_vs_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_ru_one_12x4_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dtrmm_nt_ru_one_12x4_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dtrmm_nt_ru_one_12x4_vs_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_ru_one_12x4_vs_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrsm_nt_ll_inv_12x4_lib44cc4 ( int kmax , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , double * E , int sde , double * inv_diag_E ) ;
void kernel_dtrsm_nt_ll_inv_12x4_vs_lib44cc4 ( int kmax , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , double * E , int sde , double * inv_diag_E , int km , int kn ) ;
void kernel_dtrsm_nt_rl_inv_12x4_lib44cc4 ( int kmax , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , double * E , double * dE ) ;
void kernel_dtrsm_nt_rl_inv_12x4_lib44cc4 ( int kmax , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , double * E , double * dE ) ;
void kernel_dtrsm_nt_rl_inv_12x4_vs_lib44cc4 ( int kmax , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , double * E , double * dE , int m1 , int n1 ) ;
void kernel_dtrsm_nt_rl_inv_12x4_vs_lib44cc4 ( int kmax , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , double * E , double * dE , int m1 , int n1 ) ;
void kernel_dtrsm_nt_rl_inv_12x4_lib44ccc ( int kmax , double * A , int sda , double * B , double * C , int ldc , double * D , int ldd , double * E , int lde , double * dE ) ;
void kernel_dtrsm_nt_rl_inv_12x4_lib44ccc ( int kmax , double * A , int sda , double * B , double * C , int ldc , double * D , int ldd , double * E , int lde , double * dE ) ;
@ -751,6 +759,7 @@ void kernel_dpotrf_nt_l_12x4_vs_lib44cc(int kmax, double *A, int sda, double *B,
// 4x12
// 4x12
void kernel_dgemm_nt_4x12_lib44cc ( int kmax , double * alpha , double * A , double * B , int sdb , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dgemm_nt_4x12_lib44cc ( int kmax , double * alpha , double * A , double * B , int sdb , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dgemm_nt_4x12_vs_lib44cc ( int kmax , double * alpha , double * A , double * B , int sdb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dgemm_nt_4x12_vs_lib44cc ( int kmax , double * alpha , double * A , double * B , int sdb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dgemm_nt_4x12_p0_vs_lib44cc ( int kmax , double * alpha , double * A , double * B , int sdb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 , double * A_p , double * B_p ) ;
void kernel_dtrmm_nt_rl_4x12_tran_lib444c ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int sdc , double * D , int ldd ) ;
void kernel_dtrmm_nt_rl_4x12_tran_lib444c ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int sdc , double * D , int ldd ) ;
void kernel_dtrmm_nt_rl_4x12_tran_vs_lib444c ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int sdc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_rl_4x12_tran_vs_lib444c ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int sdc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_rl_one_4x12_tran_lib444c ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int sdc , double * D , int ldd ) ;
void kernel_dtrmm_nt_rl_one_4x12_tran_lib444c ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int sdc , double * D , int ldd ) ;
@ -764,6 +773,10 @@ void kernel_dsyrk_nt_l_8x8_lib44cc(int kmax, double *alpha, double *A, int sda,
void kernel_dsyrk_nt_l_8x8_vs_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , int sdb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dsyrk_nt_l_8x8_vs_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , int sdb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dsyrk_nt_u_8x8_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , int sdb , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dsyrk_nt_u_8x8_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , int sdb , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dsyrk_nt_u_8x8_vs_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , int sdb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dsyrk_nt_u_8x8_vs_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , int sdb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dger2k_nt_8x8_lib44cc ( int kmax , double * alpha , double * A0 , int sda0 , double * B0 , int sdb0 , double * A1 , int sda1 , double * B1 , int sdb1 , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dger2k_nt_8x8_vs_lib44cc ( int kmax , double * alpha , double * A0 , int sda0 , double * B0 , int sdb0 , double * A1 , int sda1 , double * B1 , int sdb1 , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dsyr2k_nt_l_8x8_lib44cc ( int kmax , double * alpha , double * A0 , int sda0 , double * B0 , int sdb0 , double * A1 , int sda1 , double * B1 , int sdb1 , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dsyr2k_nt_l_8x8_vs_lib44cc ( int kmax , double * alpha , double * A0 , int sda0 , double * B0 , int sdb0 , double * A1 , int sda1 , double * B1 , int sdb1 , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dpotrf_nt_l_8x8_lib44cc ( int kmax , double * A , int sda , double * B , int sdb , double * C , int ldc , double * D , int ldd , double * dD ) ;
void kernel_dpotrf_nt_l_8x8_lib44cc ( int kmax , double * A , int sda , double * B , int sdb , double * C , int ldc , double * D , int ldd , double * dD ) ;
void kernel_dpotrf_nt_l_8x8_vs_lib44cc ( int kmax , double * A , int sda , double * B , int sdb , double * C , int ldc , double * D , int ldd , double * dD , int m1 , int n1 ) ;
void kernel_dpotrf_nt_l_8x8_vs_lib44cc ( int kmax , double * A , int sda , double * B , int sdb , double * C , int ldc , double * D , int ldd , double * dD , int m1 , int n1 ) ;
// 8x4
// 8x4
@ -775,14 +788,22 @@ void kernel_dsyrk_nt_l_8x4_lib44cc(int kmax, double *alpha, double *A, int sda,
void kernel_dsyrk_nt_l_8x4_vs_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dsyrk_nt_l_8x4_vs_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dsyrk_nt_u_8x4_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dsyrk_nt_u_8x4_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dsyrk_nt_u_8x4_vs_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dsyrk_nt_u_8x4_vs_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dger2k_nt_8x4_lib44cc ( int kmax , double * alpha , double * A0 , int sda0 , double * B0 , double * A1 , int sda1 , double * B1 , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dger2k_nt_8x4_vs_lib44cc ( int kmax , double * alpha , double * A0 , int sda0 , double * B0 , double * A1 , int sda1 , double * B1 , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dsyr2k_nt_l_8x4_lib44cc ( int kmax , double * alpha , double * A0 , int sda0 , double * B0 , double * A1 , int sda1 , double * B1 , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dsyr2k_nt_l_8x4_vs_lib44cc ( int kmax , double * alpha , double * A0 , int sda0 , double * B0 , double * A1 , int sda1 , double * B1 , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_rl_8x4_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dtrmm_nt_rl_8x4_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dtrmm_nt_rl_8x4_vs_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_rl_8x4_vs_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_rl_one_8x4_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dtrmm_nt_rl_one_8x4_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dtrmm_nt_rl_one_8x4_vs_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_rl_one_8x4_vs_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_ru_8x4_lib444c ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int sdc , double * D , int ldd ) ;
void kernel_dtrmm_nt_ru_8x4_vs_lib444c ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int sdc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_ru_8x4_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dtrmm_nt_ru_8x4_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dtrmm_nt_ru_8x4_vs_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_ru_8x4_vs_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_ru_one_8x4_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dtrmm_nt_ru_one_8x4_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dtrmm_nt_ru_one_8x4_vs_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_ru_one_8x4_vs_lib44cc ( int kmax , double * alpha , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrsm_nt_ll_inv_8x4_lib44cc4 ( int kmax , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , double * E , int sde , double * inv_diag_E ) ;
void kernel_dtrsm_nt_ll_inv_8x4_vs_lib44cc4 ( int kmax , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , double * E , int sde , double * inv_diag_E , int km , int kn ) ;
void kernel_dtrsm_nt_rl_inv_8x4_lib44cc4 ( int kmax , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , double * E , double * dE ) ;
void kernel_dtrsm_nt_rl_inv_8x4_lib44cc4 ( int kmax , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , double * E , double * dE ) ;
void kernel_dtrsm_nt_rl_inv_8x4_vs_lib44cc4 ( int kmax , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , double * E , double * dE , int m1 , int n1 ) ;
void kernel_dtrsm_nt_rl_inv_8x4_vs_lib44cc4 ( int kmax , double * A , int sda , double * B , double * beta , double * C , int ldc , double * D , int ldd , double * E , double * dE , int m1 , int n1 ) ;
void kernel_dtrsm_nt_rl_inv_8x4_lib44ccc ( int kmax , double * A , int sda , double * B , double * C , int ldc , double * D , int ldd , double * E , int lde , double * dE ) ;
void kernel_dtrsm_nt_rl_inv_8x4_lib44ccc ( int kmax , double * A , int sda , double * B , double * C , int ldc , double * D , int ldd , double * E , int lde , double * dE ) ;
@ -813,6 +834,10 @@ void kernel_dsyrk_nt_l_4x4_lib44cc(int kmax, double *alpha, double *A, double *B
void kernel_dsyrk_nt_l_4x4_vs_lib44cc ( int kmax , double * alpha , double * A , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dsyrk_nt_l_4x4_vs_lib44cc ( int kmax , double * alpha , double * A , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dsyrk_nt_u_4x4_lib44cc ( int kmax , double * alpha , double * A , double * B , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dsyrk_nt_u_4x4_lib44cc ( int kmax , double * alpha , double * A , double * B , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dsyrk_nt_u_4x4_vs_lib44cc ( int kmax , double * alpha , double * A , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dsyrk_nt_u_4x4_vs_lib44cc ( int kmax , double * alpha , double * A , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dger2k_nt_4x4_lib44cc ( int kmax , double * alpha , double * A0 , double * B0 , double * A1 , double * B1 , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dger2k_nt_4x4_vs_lib44cc ( int kmax , double * alpha , double * A0 , double * B0 , double * A1 , double * B1 , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dsyr2k_nt_l_4x4_lib44cc ( int kmax , double * alpha , double * A0 , double * B0 , double * A1 , double * B1 , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dsyr2k_nt_l_4x4_vs_lib44cc ( int kmax , double * alpha , double * A0 , double * B0 , double * A1 , double * B1 , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_rl_4x4_lib44cc ( int kmax , double * alpha , double * A , double * B , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dtrmm_nt_rl_4x4_lib44cc ( int kmax , double * alpha , double * A , double * B , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dtrmm_nt_rl_4x4_vs_lib44cc ( int kmax , double * alpha , double * A , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_rl_4x4_vs_lib44cc ( int kmax , double * alpha , double * A , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_rl_4x4_tran_lib444c ( int kmax , double * alpha , double * A , double * B , double * beta , double * C , double * D , int ldd ) ;
void kernel_dtrmm_nt_rl_4x4_tran_lib444c ( int kmax , double * alpha , double * A , double * B , double * beta , double * C , double * D , int ldd ) ;
@ -821,6 +846,8 @@ void kernel_dtrmm_nt_rl_one_4x4_lib44cc(int kmax, double *alpha, double *A, doub
void kernel_dtrmm_nt_rl_one_4x4_vs_lib44cc ( int kmax , double * alpha , double * A , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_rl_one_4x4_vs_lib44cc ( int kmax , double * alpha , double * A , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_rl_one_4x4_tran_lib444c ( int kmax , double * alpha , double * A , double * B , double * beta , double * C , double * D , int ldd ) ;
void kernel_dtrmm_nt_rl_one_4x4_tran_lib444c ( int kmax , double * alpha , double * A , double * B , double * beta , double * C , double * D , int ldd ) ;
void kernel_dtrmm_nt_rl_one_4x4_tran_vs_lib444c ( int kmax , double * alpha , double * A , double * B , double * beta , double * C , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_rl_one_4x4_tran_vs_lib444c ( int kmax , double * alpha , double * A , double * B , double * beta , double * C , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_ru_4x4_lib444c ( int kmax , double * alpha , double * A , double * B , double * beta , double * C , double * D , int ldd ) ;
void kernel_dtrmm_nt_ru_4x4_vs_lib444c ( int kmax , double * alpha , double * A , double * B , double * beta , double * C , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_ru_4x4_lib44cc ( int kmax , double * alpha , double * A , double * B , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dtrmm_nt_ru_4x4_lib44cc ( int kmax , double * alpha , double * A , double * B , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dtrmm_nt_ru_4x4_vs_lib44cc ( int kmax , double * alpha , double * A , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_ru_4x4_vs_lib44cc ( int kmax , double * alpha , double * A , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_ru_4x4_tran_lib444c ( int kmax , double * alpha , double * A , double * B , double * beta , double * C , double * D , int ldd ) ;
void kernel_dtrmm_nt_ru_4x4_tran_lib444c ( int kmax , double * alpha , double * A , double * B , double * beta , double * C , double * D , int ldd ) ;
@ -829,6 +856,8 @@ void kernel_dtrmm_nt_ru_one_4x4_lib44cc(int kmax, double *alpha, double *A, doub
void kernel_dtrmm_nt_ru_one_4x4_vs_lib44cc ( int kmax , double * alpha , double * A , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_ru_one_4x4_vs_lib44cc ( int kmax , double * alpha , double * A , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_ru_one_4x4_tran_lib444c ( int kmax , double * alpha , double * A , double * B , double * beta , double * C , double * D , int ldd ) ;
void kernel_dtrmm_nt_ru_one_4x4_tran_lib444c ( int kmax , double * alpha , double * A , double * B , double * beta , double * C , double * D , int ldd ) ;
void kernel_dtrmm_nt_ru_one_4x4_tran_vs_lib444c ( int kmax , double * alpha , double * A , double * B , double * beta , double * C , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_ru_one_4x4_tran_vs_lib444c ( int kmax , double * alpha , double * A , double * B , double * beta , double * C , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrsm_nt_ll_inv_4x4_lib44cc4 ( int kmax , double * A , double * B , double * beta , double * C , int ldc , double * D , int ldd , double * E , double * inv_diag_E ) ;
void kernel_dtrsm_nt_ll_inv_4x4_vs_lib44cc4 ( int kmax , double * A , double * B , double * beta , double * C , int ldc , double * D , int ldd , double * E , double * inv_diag_E , int km , int kn ) ;
void kernel_dtrsm_nt_rl_inv_4x4_lib44cc4 ( int kmax , double * A , double * B , double * beta , double * C , int ldc , double * D , int ldd , double * E , double * dE ) ;
void kernel_dtrsm_nt_rl_inv_4x4_lib44cc4 ( int kmax , double * A , double * B , double * beta , double * C , int ldc , double * D , int ldd , double * E , double * dE ) ;
void kernel_dtrsm_nt_rl_inv_4x4_vs_lib44cc4 ( int kmax , double * A , double * B , double * beta , double * C , int ldc , double * D , int ldd , double * E , double * dE , int m1 , int n1 ) ;
void kernel_dtrsm_nt_rl_inv_4x4_vs_lib44cc4 ( int kmax , double * A , double * B , double * beta , double * C , int ldc , double * D , int ldd , double * E , double * dE , int m1 , int n1 ) ;
void kernel_dtrsm_nt_rl_inv_4x4_lib44ccc ( int kmax , double * A , double * B , double * beta , double * C , int ldc , double * D , int ldd , double * E , int lde , double * dE ) ;
void kernel_dtrsm_nt_rl_inv_4x4_lib44ccc ( int kmax , double * A , double * B , double * beta , double * C , int ldc , double * D , int ldd , double * E , int lde , double * dE ) ;
@ -852,6 +881,8 @@ void kernel_dgemm_nt_12x4_lib4ccc(int kmax, double *alpha, double *A, int sda, d
void kernel_dgemm_nt_12x4_vs_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dgemm_nt_12x4_vs_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dsyrk_nt_l_12x4_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dsyrk_nt_l_12x4_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dsyrk_nt_l_12x4_vs_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dsyrk_nt_l_12x4_vs_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dger2k_nt_12x4_lib4ccc ( int kmax , double * alpha , double * A0 , int sda0 , double * B0 , int ldb0 , double * A1 , int sda1 , double * B1 , int ldb1 , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dger2k_nt_12x4_vs_lib4ccc ( int kmax , double * alpha , double * A0 , int sda0 , double * B0 , int ldb0 , double * A1 , int sda1 , double * B1 , int ldb1 , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nn_rl_12x4_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dtrmm_nn_rl_12x4_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dtrmm_nn_rl_12x4_vs_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nn_rl_12x4_vs_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nn_rl_one_12x4_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dtrmm_nn_rl_one_12x4_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd ) ;
@ -868,6 +899,8 @@ void kernel_dtrmm_nt_ru_12x4_lib4ccc(int kmax, double *alpha, double *A, int sda
void kernel_dtrmm_nt_ru_12x4_vs_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_ru_12x4_vs_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_ru_one_12x4_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dtrmm_nt_ru_one_12x4_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dtrmm_nt_ru_one_12x4_vs_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_ru_one_12x4_vs_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrsm_nn_ll_inv_12x4_lib4ccc4 ( int kmax , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , double * E , int sde , double * inv_diag_E ) ;
void kernel_dtrsm_nn_ll_inv_12x4_vs_lib4ccc4 ( int kmax , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , double * E , int sde , double * inv_diag_E , int km , int kn ) ;
void kernel_dtrsm_nn_ll_one_12x4_lib4cccc ( int kmax , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , double * E , int lde ) ;
void kernel_dtrsm_nn_ll_one_12x4_lib4cccc ( int kmax , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , double * E , int lde ) ;
void kernel_dtrsm_nn_ll_one_12x4_vs_lib4cccc ( int kmax , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , double * E , int lde , int m1 , int n1 ) ;
void kernel_dtrsm_nn_ll_one_12x4_vs_lib4cccc ( int kmax , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , double * E , int lde , int m1 , int n1 ) ;
void kernel_dtrsm_nn_rl_inv_12x4_lib4cccc ( int kmax , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , double * E , int lde , double * dE ) ;
void kernel_dtrsm_nn_rl_inv_12x4_lib4cccc ( int kmax , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , double * E , int lde , double * dE ) ;
@ -920,6 +953,8 @@ void kernel_dgemm_nt_8x4_lib4ccc(int kmax, double *alpha, double *A, int sda, do
void kernel_dgemm_nt_8x4_vs_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dgemm_nt_8x4_vs_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dsyrk_nt_l_8x4_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dsyrk_nt_l_8x4_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dsyrk_nt_l_8x4_vs_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dsyrk_nt_l_8x4_vs_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dger2k_nt_8x4_lib4ccc ( int kmax , double * alpha , double * A0 , int sda0 , double * B0 , int ldb0 , double * A1 , int sda1 , double * B1 , int ldb1 , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dger2k_nt_8x4_vs_lib4ccc ( int kmax , double * alpha , double * A0 , int sda0 , double * B0 , int ldb0 , double * A1 , int sda1 , double * B1 , int ldb1 , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nn_rl_8x4_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dtrmm_nn_rl_8x4_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dtrmm_nn_rl_8x4_vs_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nn_rl_8x4_vs_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nn_rl_one_8x4_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dtrmm_nn_rl_one_8x4_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd ) ;
@ -936,6 +971,8 @@ void kernel_dtrmm_nt_ru_8x4_lib4ccc(int kmax, double *alpha, double *A, int sda,
void kernel_dtrmm_nt_ru_8x4_vs_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_ru_8x4_vs_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_ru_one_8x4_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dtrmm_nt_ru_one_8x4_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dtrmm_nt_ru_one_8x4_vs_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_ru_one_8x4_vs_lib4ccc ( int kmax , double * alpha , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrsm_nn_ll_inv_8x4_lib4ccc4 ( int kmax , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , double * E , int sde , double * inv_diag_E ) ;
void kernel_dtrsm_nn_ll_inv_8x4_vs_lib4ccc4 ( int kmax , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , double * E , int sde , double * inv_diag_E , int km , int kn ) ;
void kernel_dtrsm_nn_ll_one_8x4_lib4cccc ( int kmax , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , double * E , int lde ) ;
void kernel_dtrsm_nn_ll_one_8x4_lib4cccc ( int kmax , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , double * E , int lde ) ;
void kernel_dtrsm_nn_ll_one_8x4_vs_lib4cccc ( int kmax , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , double * E , int lde , int m1 , int n1 ) ;
void kernel_dtrsm_nn_ll_one_8x4_vs_lib4cccc ( int kmax , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , double * E , int lde , int m1 , int n1 ) ;
void kernel_dtrsm_nn_rl_inv_8x4_lib4cccc ( int kmax , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , double * E , int lde , double * dE ) ;
void kernel_dtrsm_nn_rl_inv_8x4_lib4cccc ( int kmax , double * A , int sda , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , double * E , int lde , double * dE ) ;
@ -986,6 +1023,8 @@ void kernel_dgemm_nt_4x4_lib4ccc(int kmax, double *alpha, double *A, double *B,
void kernel_dgemm_nt_4x4_vs_lib4ccc ( int kmax , double * alpha , double * A , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dgemm_nt_4x4_vs_lib4ccc ( int kmax , double * alpha , double * A , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dsyrk_nt_l_4x4_lib4ccc ( int kmax , double * alpha , double * A , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dsyrk_nt_l_4x4_lib4ccc ( int kmax , double * alpha , double * A , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dsyrk_nt_l_4x4_vs_lib4ccc ( int kmax , double * alpha , double * A , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dsyrk_nt_l_4x4_vs_lib4ccc ( int kmax , double * alpha , double * A , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dger2k_nt_4x4_lib4ccc ( int kmax , double * alpha , double * A0 , double * B0 , int ldb0 , double * A1 , double * B1 , int ldb1 , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dger2k_nt_4x4_vs_lib4ccc ( int kmax , double * alpha , double * A0 , double * B0 , int ldb0 , double * A1 , double * B1 , int ldb1 , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nn_rl_4x4_lib4ccc ( int kmax , double * alpha , double * A , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dtrmm_nn_rl_4x4_lib4ccc ( int kmax , double * alpha , double * A , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd ) ;
void kernel_dtrmm_nn_rl_4x4_vs_lib4ccc ( int kmax , double * alpha , double * A , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nn_rl_4x4_vs_lib4ccc ( int kmax , double * alpha , double * A , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nn_rl_4x4_tran_lib4c4c ( int kmax , double * alpha , double * A , double * B , int ldb , double * beta , double * C , double * D , int ldd ) ;
void kernel_dtrmm_nn_rl_4x4_tran_lib4c4c ( int kmax , double * alpha , double * A , double * B , int ldb , double * beta , double * C , double * D , int ldd ) ;
@ -1018,6 +1057,8 @@ void kernel_dtrmm_nt_ru_one_4x4_lib4ccc(int kmax, double *alpha, double *A, doub
void kernel_dtrmm_nt_ru_one_4x4_vs_lib4ccc ( int kmax , double * alpha , double * A , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_ru_one_4x4_vs_lib4ccc ( int kmax , double * alpha , double * A , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_ru_one_4x4_tran_lib4c4c ( int kmax , double * alpha , double * A , double * B , int ldb , double * beta , double * C , double * D , int ldd ) ;
void kernel_dtrmm_nt_ru_one_4x4_tran_lib4c4c ( int kmax , double * alpha , double * A , double * B , int ldb , double * beta , double * C , double * D , int ldd ) ;
void kernel_dtrmm_nt_ru_one_4x4_tran_vs_lib4c4c ( int kmax , double * alpha , double * A , double * B , int ldb , double * beta , double * C , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrmm_nt_ru_one_4x4_tran_vs_lib4c4c ( int kmax , double * alpha , double * A , double * B , int ldb , double * beta , double * C , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dtrsm_nn_ll_inv_4x4_lib4ccc4 ( int kmax , double * A , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , double * E , double * inv_diag_E ) ;
void kernel_dtrsm_nn_ll_inv_4x4_vs_lib4ccc4 ( int kmax , double * A , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , double * E , double * inv_diag_E , int km , int kn ) ;
void kernel_dtrsm_nn_ll_one_4x4_lib4cccc ( int kmax , double * A , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , double * E , int lde ) ;
void kernel_dtrsm_nn_ll_one_4x4_lib4cccc ( int kmax , double * A , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , double * E , int lde ) ;
void kernel_dtrsm_nn_ll_one_4x4_vs_lib4cccc ( int kmax , double * A , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , double * E , int lde , int m1 , int n1 ) ;
void kernel_dtrsm_nn_ll_one_4x4_vs_lib4cccc ( int kmax , double * A , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , double * E , int lde , int m1 , int n1 ) ;
void kernel_dtrsm_nn_rl_inv_4x4_lib4cccc ( int kmax , double * A , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , double * E , int lde , double * dE ) ;
void kernel_dtrsm_nn_rl_inv_4x4_lib4cccc ( int kmax , double * A , double * B , int ldb , double * beta , double * C , int ldc , double * D , int ldd , double * E , int lde , double * dE ) ;
@ -1181,6 +1222,7 @@ void kernel_drowsw_lib(int kmax, double *pA, int lda, double *pC, int ldc);
// 12
// 12
void kernel_dgemm_nt_12xn_p0_lib44cc ( int n , int k , double * alpha , double * A , int sda , double * B , int sdb , double * beta , double * C , int ldc , double * D , int ldd , double * A_p , double * B_p ) ;
void kernel_dgemm_nt_12xn_p0_lib44cc ( int n , int k , double * alpha , double * A , int sda , double * B , int sdb , double * beta , double * C , int ldc , double * D , int ldd , double * A_p , double * B_p ) ;
void kernel_dgemm_nt_12xn_pl_lib44cc ( int n , int k , double * alpha , double * A , int sda , double * B , int sdb , double * beta , double * C , int ldc , double * D , int ldd , double * A_p , double * B_p ) ;
void kernel_dgemm_nt_12xn_pl_lib44cc ( int n , int k , double * alpha , double * A , int sda , double * B , int sdb , double * beta , double * C , int ldc , double * D , int ldd , double * A_p , double * B_p ) ;
void kernel_dgemm_nt_mx12_p0_lib44cc ( int m , int k , double * alpha , double * A , int sda , double * B , int sdb , double * beta , double * C , int ldc , double * D , int ldd , double * A_p , double * B_p ) ;
// 8
// 8
void kernel_dgemm_nt_8xn_p0_lib44cc ( int n , int k , double * alpha , double * A , int sda , double * B , int sdb , double * beta , double * C , int ldc , double * D , int ldd , double * A_p , double * B_p ) ;
void kernel_dgemm_nt_8xn_p0_lib44cc ( int n , int k , double * alpha , double * A , int sda , double * B , int sdb , double * beta , double * C , int ldc , double * D , int ldd , double * A_p , double * B_p ) ;
@ -1232,11 +1274,43 @@ void kernel_dgemm_tt_8x8_libc8cc(int kmax, double *alpha, double *A, int lda, do
void kernel_dgemm_tt_8x8_vs_libc8cc ( int kmax , double * alpha , double * A , int lda , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
void kernel_dgemm_tt_8x8_vs_libc8cc ( int kmax , double * alpha , double * A , int lda , double * B , double * beta , double * C , int ldc , double * D , int ldd , int m1 , int n1 ) ;
// level 2 BLAS
void kernel_dgemv_n_4_libc ( int kmax , double * alpha , double * A , int lda , double * x , double * z ) ;
void kernel_dgemv_n_4_vs_libc ( int kmax , double * alpha , double * A , int lda , double * x , double * z , int km ) ;
void kernel_dgemv_t_4_libc ( int kmax , double * alpha , double * A , int lda , double * x , double * beta , double * y , double * z ) ;
void kernel_dgemv_t_4_vs_libc ( int kmax , double * alpha , double * A , int lda , double * x , double * beta , double * y , double * z , int km ) ;
void kernel_dsymv_l_4_libc ( int kmax , double * alpha , double * A , int lda , double * x , double * z ) ;
void kernel_dsymv_l_4_vs_libc ( int kmax , double * alpha , double * A , int lda , double * x , double * z , int km ) ;
void kernel_dsymv_u_4_libc ( int kmax , double * alpha , double * A , int lda , double * x , double * z ) ;
void kernel_dsymv_u_4_vs_libc ( int kmax , double * alpha , double * A , int lda , double * x , double * z , int km ) ;
void kernel_dger_4_libc ( int kmax , double * alpha , double * x , double * y , double * C , int ldc , double * D , int ldd ) ;
void kernel_dger_4_vs_libc ( int kmax , double * alpha , double * x , double * y , double * C , int ldc , double * D , int ldd , int km ) ;
// aux
// aux
void kernel_dvecld_inc1 ( int kmax , double * x ) ;
void kernel_dvecld_inc1 ( int kmax , double * x ) ;
void kernel_dveccp_inc1 ( int kmax , double * x , double * y ) ;
void kernel_dveccp_inc1 ( int kmax , double * x , double * y ) ;
//void kernel_dgetr_nt_8_p0_lib(int kmax, double *A, int lda, double *C, int ldc, double *Ap, double *Bp);
//void kernel_dgetr_nt_8_lib(int kmax, double *A, int lda, double *C, int ldc);
//void kernel_dgetr_nt_4_lib(int kmax, double *A, int lda, double *C, int ldc);
void kernel_dgetr_tn_8_p0_lib ( int kmax , double * A , int lda , double * C , int ldc , double * Ap , double * Bp ) ;
void kernel_dgetr_tn_8_lib ( int kmax , double * A , int lda , double * C , int ldc ) ;
void kernel_dgetr_tn_4_lib ( int kmax , double * A , int lda , double * C , int ldc ) ;
void kernel_dgetr_tn_4_vs_lib ( int kmax , double * A , int lda , double * C , int ldc , int m1 ) ;
// building blocks for blocked algorithms
//
void blasfeo_hp_dgemm_nt_m2 ( int m , int n , int k , double alpha , double * pA , int sda , double * pB , int sdb , double beta , double * C , int ldc , double * D , int ldd ) ;
void blasfeo_hp_dgemm_nt_n2 ( int m , int n , int k , double alpha , double * pA , int sda , double * pB , int sdb , double beta , double * C , int ldc , double * D , int ldd ) ;
//
void kernel_dpack_buffer_fn ( int m , int n , double * A , int lda , double * pA , int sda ) ;
void kernel_dpack_buffer_ft ( int m , int n , double * A , int lda , double * pA , int sda ) ;
void kernel_dpack_buffer_ln ( int m , double * A , int lda , double * pA , int sda ) ;
void kernel_dpack_buffer_lt ( int m , double * A , int lda , double * pA , int sda ) ;
void kernel_dpack_buffer_ut ( int m , double * A , int lda , double * pA , int sda ) ;