diff options
-rw-r--r-- | redist/dclapack.h | 8 | ||||
-rw-r--r-- | redist/dclhelpers.c | 25 | ||||
-rw-r--r-- | redist/dclhelpers.h | 24 |
3 files changed, 54 insertions, 3 deletions
diff --git a/redist/dclapack.h b/redist/dclapack.h index bebda2d..76ce545 100644 --- a/redist/dclapack.h +++ b/redist/dclapack.h @@ -11,8 +11,10 @@ #ifdef DYNAMIC_INDEX #define _(A,O,P) A[O*n+P] + #define _I(A,O,P,n) A[O*n+P] #else #define _(A,O,P) A[O][P] + #define _I(A,O,P,n) A[O][P] #endif @@ -192,7 +194,7 @@ PRINT(Ainv,n,n); \ for (j=0; j<p; j++) { \ _(R,i,j) = 0.0f; \ for (k=0; k<m; k++) { \ - _(R,i,j) += _(A,i,k) * _(B,k,j); \ + _(R,i,j) += _(A,i,k) * _I(B,k,j,m); \ } \ } \ } \ @@ -211,7 +213,7 @@ PRINT(Ainv,n,n); \ for (j=0; j<p; j++) { \ _(R,i,j) = _(C,i,j); \ for (k=0; k<m; k++) { \ - _(R,i,j) += _(A,i,k) * _(B,k,j); \ + _(R,i,j) += _(A,i,k) * _I(B,k,j,m); \ } \ } \ } \ @@ -231,7 +233,7 @@ PRINT(Ainv,n,n); \ for (j=0; j<p; j++) { \ sum = 0.0f; \ for (k=0; k<m; k++) { \ - sum += _(A,i,k) * _(B,k,j); \ + sum += _(A,i,k) * _I(B,k,j,m); \ } \ _(R,i,j) = alpha * sum + beta * _(C,i,j); \ } \ diff --git a/redist/dclhelpers.c b/redist/dclhelpers.c index 2ee6c4a..23c3ca5 100644 --- a/redist/dclhelpers.c +++ b/redist/dclhelpers.c @@ -60,4 +60,29 @@ void dclGMulAdd( DCL_FLOAT * R, const DCL_FLOAT * A, const DCL_FLOAT * B, const GMULADD(R,A,B,C,alpha,beta,n,m,p); } +/* dclGMulAdd( R, ((transA)?TRANS(A):A, (transB)?TRANS(B):B), C, alpha, beta, n, m, p ); */ +void dcldgemm( + char transA, + char transB, + int m, + int n, + int k, + DCL_FLOAT alpha, + const DCL_FLOAT* A, + int lda, //must be n + const DCL_FLOAT* B, + int ldb, //must be m + DCL_FLOAT beta, + const DCL_FLOAT * C, + int ldc //must be n + ) +{ + DCL_FLOAT * ta; + DCL_FLOAT * tb; + if( transA ) + { + ta = alloca( sizeof( DCL_FLOAT ) * n * m ); + + } +} diff --git a/redist/dclhelpers.h b/redist/dclhelpers.h index fd86c78..fd4e02d 100644 --- a/redist/dclhelpers.h +++ b/redist/dclhelpers.h @@ -57,5 +57,29 @@ void dclMulAdd( DCL_FLOAT * R, const DCL_FLOAT * A, const DCL_FLOAT * B, const D R (n by p) */ void dclGMulAdd( DCL_FLOAT * R, const DCL_FLOAT * A, const DCL_FLOAT * B, const DCL_FLOAT * C, DCL_FLOAT alpha, DCL_FLOAT beta, int n, int m, int p ); + +/******************************** + * Auxiliary functionality in C * + ********************************/ + +//Matches dgemm from lapack. +void dcldgemm( + char transA, + char transB, + int m, + int n, + int k, + DCL_FLOAT alpha, + const DCL_FLOAT* A, + int lda, //must be n + const DCL_FLOAT* B, + int ldb, //must be m + DCL_FLOAT beta, + const DCL_FLOAT * C, + int ldc //must be n + ); + + + #endif |