From 9115ffd3138b460707dd1ba45dd7f6fccde87a46 Mon Sep 17 00:00:00 2001 From: cnlohr Date: Sat, 17 Mar 2018 03:28:23 -0400 Subject: Test DCL. --- redist/test_dcl.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 redist/test_dcl.c (limited to 'redist/test_dcl.c') diff --git a/redist/test_dcl.c b/redist/test_dcl.c new file mode 100644 index 0000000..adea7b5 --- /dev/null +++ b/redist/test_dcl.c @@ -0,0 +1,39 @@ +#include "dclhelpers.h" +#include +#include + + +int main() +{ + FLT A[2][4] = { { 0, 1, 2, 3 }, { 4, 5, 6, 7} }; + FLT B[4][2]; + dclPrint( A[0], 4, 2, 4 ); + dclTransp( B[0], 2, A[0], 4, 2, 4 ); + dclPrint( B[0], 2, 4, 2 ); + + int i; + for( i = 0; i < 8; i++ ) + { + printf( "%f\n", ((float*)(B[0]))[i] ); + } + + FLT M[3][3] = { + { .32, 1, 0 }, + { 0, 1, 2 }, + { 1, 0, 1 } }; + FLT Mo[3][3]; + dclInv( Mo[0], 3, M[0], 3, 3 ); + dclPrint( Mo[0], 3, 3, 3 ); + + FLT MM[3][3]; + dclMul( MM[0], 3, M[0], 3, Mo[0], 3, 3, 3, 3 ); + + printf( "The following should be an identity matrix\n" ); + dclPrint( MM[0], 3, 3, 3 ); + +//void dclTransp( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, int n, int m ); + +// dclIdentity( A[0], MATx, 5 ); +// dclPrint( A[0], MATx, MATx, MATy ); +} + -- cgit v1.2.3 From 00fbdfcbe9b1c3cc9f9f0814fe5f60bbf066cbbf Mon Sep 17 00:00:00 2001 From: Justin Berger Date: Sat, 17 Mar 2018 08:15:09 -0600 Subject: Added test target, mul test --- redist/test_dcl.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'redist/test_dcl.c') diff --git a/redist/test_dcl.c b/redist/test_dcl.c index adea7b5..a9512f8 100644 --- a/redist/test_dcl.c +++ b/redist/test_dcl.c @@ -1,8 +1,8 @@ #include "dclhelpers.h" +#include #include #include - int main() { FLT A[2][4] = { { 0, 1, 2, 3 }, { 4, 5, 6, 7} }; @@ -31,9 +31,25 @@ int main() printf( "The following should be an identity matrix\n" ); dclPrint( MM[0], 3, 3, 3 ); -//void dclTransp( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, int n, int m ); + { + FLT A[3][4]; + dclIdentity(A[0], 4, 3); + dclPrint(A[0], 4, 3, 4); + + FLT x[4] = {7, 8, 9, 10}; + FLT R[4]; + + dclMul(R, 1, A[0], 4, x, 1, 4, 1, 3); + dclPrint(x, 1, 4, 1); + dclPrint(R, 1, 4, 1); + + for (int i = 0; i < 3; i++) + assert(R[i] == x[i]); + assert(R[3] == 0.); + } + // void dclTransp( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, int n, int m ); -// dclIdentity( A[0], MATx, 5 ); -// dclPrint( A[0], MATx, MATx, MATy ); + // dclIdentity( A[0], MATx, 5 ); + // dclPrint( A[0], MATx, MATx, MATy ); } -- cgit v1.2.3 From 7c97cfe7f63650fc79ce4fa7f081b556ce275475 Mon Sep 17 00:00:00 2001 From: Justin Berger Date: Sat, 17 Mar 2018 08:18:45 -0600 Subject: Made it test dcldgemm instead --- redist/test_dcl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'redist/test_dcl.c') diff --git a/redist/test_dcl.c b/redist/test_dcl.c index a9512f8..6d49548 100644 --- a/redist/test_dcl.c +++ b/redist/test_dcl.c @@ -39,7 +39,9 @@ int main() FLT x[4] = {7, 8, 9, 10}; FLT R[4]; - dclMul(R, 1, A[0], 4, x, 1, 4, 1, 3); + // dclMul(R, 1, A[0], 4, x, 1, 4, 1, 3); + dcldgemm(0, 0, 4, 1, 3, 1, A[0], 4, x, 1, 0, R, 1); + dclPrint(x, 1, 4, 1); dclPrint(R, 1, 4, 1); -- cgit v1.2.3 From 0b9e66ad2ff686a4dcf8a6838f33edb203a1bff5 Mon Sep 17 00:00:00 2001 From: Justin Berger Date: Sat, 17 Mar 2018 09:32:22 -0600 Subject: Fixed gemm --- redist/test_dcl.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'redist/test_dcl.c') diff --git a/redist/test_dcl.c b/redist/test_dcl.c index 6d49548..42f4fd6 100644 --- a/redist/test_dcl.c +++ b/redist/test_dcl.c @@ -1,5 +1,6 @@ #include "dclhelpers.h" #include +#include #include #include @@ -36,18 +37,24 @@ int main() dclIdentity(A[0], 4, 3); dclPrint(A[0], 4, 3, 4); - FLT x[4] = {7, 8, 9, 10}; - FLT R[4]; + FLT x[4][2] = { + {7, -7}, {8, -8}, {9, -9}, {10, -10}, + }; + FLT R[4][2]; + printf("%p %p %p\n", A, x, R); // dclMul(R, 1, A[0], 4, x, 1, 4, 1, 3); - dcldgemm(0, 0, 4, 1, 3, 1, A[0], 4, x, 1, 0, R, 1); + dcldgemm(0, 0, 3, 4, 2, 1, A[0], 4, x[0], 2, 0, R[0], 2); - dclPrint(x, 1, 4, 1); - dclPrint(R, 1, 4, 1); + dclPrint(x[0], 2, 4, 2); + dclPrint(R[0], 2, 4, 2); - for (int i = 0; i < 3; i++) - assert(R[i] == x[i]); - assert(R[3] == 0.); + for (int j = 0; j < 2; j++) { + for (int i = 0; i < 3; i++) + assert(R[i][j] == x[i][j]); + + assert(fabs(R[3][j]) < .0000001); + } } // void dclTransp( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, int n, int m ); -- cgit v1.2.3 From 497e65e339edcc77bd272b97b9c1b1b5217a24b6 Mon Sep 17 00:00:00 2001 From: cnlohr Date: Sat, 17 Mar 2018 12:50:32 -0400 Subject: Check in new failing test. --- redist/test_dcl.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) (limited to 'redist/test_dcl.c') diff --git a/redist/test_dcl.c b/redist/test_dcl.c index 42f4fd6..056acdf 100644 --- a/redist/test_dcl.c +++ b/redist/test_dcl.c @@ -56,9 +56,32 @@ int main() assert(fabs(R[3][j]) < .0000001); } } - // void dclTransp( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, int n, int m ); - // dclIdentity( A[0], MATx, 5 ); - // dclPrint( A[0], MATx, MATx, MATy ); + + printf( "The following should be an identity matrix\n" ); + dclPrint( MM[0], 3, 3, 3 ); + + + //Currently failing test... + { + FLT em1[12][20]; + FLT em2[20][12]; + FLT emo[12][12]; + int x, y; + + for( y = 0; y < 12; y++ ) + for( x = 0; x < 20; x++ ) + { + em1[y][x] = (rand()%1000)/1000.0; + em2[x][y] = (rand()%1000)/1000.0; + } + + int m = 12; + int n = 20; + int k = 12; + dcldgemm( 0, 0, m, n, k, 0.1, em1[0], 20, em2[0], 12, .1, emo[0], 12 ); + dclPrint( emo[0], 12, 12, 12 ); + } + } -- cgit v1.2.3 From 04bd16aeb391e67716344268cf0f43d1f31f180a Mon Sep 17 00:00:00 2001 From: cnlohr Date: Sat, 17 Mar 2018 14:21:20 -0400 Subject: Update dcl and test. --- redist/test_dcl.c | 41 ++++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) (limited to 'redist/test_dcl.c') diff --git a/redist/test_dcl.c b/redist/test_dcl.c index 056acdf..6b0d870 100644 --- a/redist/test_dcl.c +++ b/redist/test_dcl.c @@ -12,7 +12,7 @@ int main() dclTransp( B[0], 2, A[0], 4, 2, 4 ); dclPrint( B[0], 2, 4, 2 ); - int i; + int i, j; for( i = 0; i < 8; i++ ) { printf( "%f\n", ((float*)(B[0]))[i] ); @@ -34,54 +34,65 @@ int main() { FLT A[3][4]; - dclIdentity(A[0], 4, 3); - dclPrint(A[0], 4, 3, 4); + dclIdentity( DMS(A), 3, 4); + dclPrint( DMS(A), 3, 4); FLT x[4][2] = { {7, -7}, {8, -8}, {9, -9}, {10, -10}, }; FLT R[4][2]; + dclZero( DMS(R), 4, 2 ); - printf("%p %p %p\n", A, x, R); // dclMul(R, 1, A[0], 4, x, 1, 4, 1, 3); dcldgemm(0, 0, 3, 4, 2, 1, A[0], 4, x[0], 2, 0, R[0], 2); - dclPrint(x[0], 2, 4, 2); - dclPrint(R[0], 2, 4, 2); + dclPrint(DMS(x), 4, 2); + dclPrint(DMS(R), 3, 2); for (int j = 0; j < 2; j++) { for (int i = 0; i < 3; i++) + { + printf( "[%d][%d]\n", i, j ); assert(R[i][j] == x[i][j]); + } assert(fabs(R[3][j]) < .0000001); } } - printf( "The following should be an identity matrix\n" ); - dclPrint( MM[0], 3, 3, 3 ); - +#if 0 //Currently failing test... { +// FLT em1[3][4]; +// FLT em2[4][2]; +// FLT emo[4][2]; + FLT em1[12][20]; FLT em2[20][12]; - FLT emo[12][12]; + FLT emo[20][12]; int x, y; for( y = 0; y < 12; y++ ) for( x = 0; x < 20; x++ ) - { em1[y][x] = (rand()%1000)/1000.0; - em2[x][y] = (rand()%1000)/1000.0; - } + + for( y = 0; y < 20; y++ ) + for( x = 0; x < 12; x++ ) + em2[y][x] = (rand()%1000)/1000.0; int m = 12; int n = 20; int k = 12; - dcldgemm( 0, 0, m, n, k, 0.1, em1[0], 20, em2[0], 12, .1, emo[0], 12 ); - dclPrint( emo[0], 12, 12, 12 ); + + dclPrint( em1[0], 20, 12, 20 ); + dclPrint( em2[0], 12, 20, 12 ); + + dcldgemm( 0, 0, m, n, k, 1.0, em1[0], 20, em2[0], 12, .1, emo[0], 12 ); + dclPrint( emo[0], 12, 20, 12 ); } +#endif } -- cgit v1.2.3 From f6586a91478b1ca5920c58f90925deaafab81465 Mon Sep 17 00:00:00 2001 From: cnlohr Date: Sat, 17 Mar 2018 14:44:10 -0400 Subject: Update tests... seems to work with both cblas and dclapack... Still haven't checked outputs. --- redist/test_dcl.c | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) (limited to 'redist/test_dcl.c') diff --git a/redist/test_dcl.c b/redist/test_dcl.c index 6b0d870..29c3c72 100644 --- a/redist/test_dcl.c +++ b/redist/test_dcl.c @@ -1,8 +1,13 @@ +//gcc -msse2 -O3 -ftree-vectorize test_dcl.c dclhelpers.c os_generic.c -DFLT=double -lpthread -lcblas && valgrind ./a.out + + #include "dclhelpers.h" #include #include #include #include +#include "os_generic.h" +#include int main() { @@ -61,7 +66,7 @@ int main() } -#if 0 +#if 1 //Currently failing test... { @@ -70,8 +75,8 @@ int main() // FLT emo[4][2]; FLT em1[12][20]; - FLT em2[20][12]; - FLT emo[20][12]; + FLT em2[20][20]; + FLT emo[20][20]; int x, y; for( y = 0; y < 12; y++ ) @@ -79,9 +84,13 @@ int main() em1[y][x] = (rand()%1000)/1000.0; for( y = 0; y < 20; y++ ) - for( x = 0; x < 12; x++ ) + for( x = 0; x < 20; x++ ) em2[y][x] = (rand()%1000)/1000.0; + for( y = 0; y < 20; y++ ) + for( x = 0; x < 20; x++ ) + emo[y][x] = 0; + int m = 12; int n = 20; int k = 12; @@ -89,7 +98,23 @@ int main() dclPrint( em1[0], 20, 12, 20 ); dclPrint( em2[0], 12, 20, 12 ); - dcldgemm( 0, 0, m, n, k, 1.0, em1[0], 20, em2[0], 12, .1, emo[0], 12 ); + int i; + + double start = OGGetAbsoluteTime(); + for( i = 0; i < 10000; i++ ) + { + dcldgemm( 0, 0, m, n, k, 1.0, DMS(em1), DMS(em2), .1, DMS(emo) ); + //cblas_dgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, DMS(em1), DMS(em2), .1, DMS(emo) ); + +/*void cblas_dgemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA, + CBLAS_TRANSPOSE TransB, const int M, const int N, + const int K, const double alpha, const double *A, + const int lda, const double *B, const int ldb, + const double beta, double *C, const int ldc);*/ + + } + printf( "Elapsed: %f\n", OGGetAbsoluteTime()-start ); + dclPrint( emo[0], 12, 20, 12 ); } #endif -- cgit v1.2.3 From a3d7611ef9cbdd4171d64942cf3b3d6a0eb5caed Mon Sep 17 00:00:00 2001 From: cnlohr Date: Sat, 17 Mar 2018 14:57:20 -0400 Subject: The results still disagree. --- redist/test_dcl.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'redist/test_dcl.c') diff --git a/redist/test_dcl.c b/redist/test_dcl.c index 29c3c72..5435a3c 100644 --- a/redist/test_dcl.c +++ b/redist/test_dcl.c @@ -95,16 +95,18 @@ int main() int n = 20; int k = 12; - dclPrint( em1[0], 20, 12, 20 ); - dclPrint( em2[0], 12, 20, 12 ); + dclPrint( DMS(em1), 12, 20 ); + dclPrint( DMS(em2), 20, 12 ); int i; double start = OGGetAbsoluteTime(); for( i = 0; i < 10000; i++ ) { + dclZero( DMS(emo), 20, 20 ); + dcldgemm( 0, 0, m, n, k, 1.0, DMS(em1), DMS(em2), .1, DMS(emo) ); - //cblas_dgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, DMS(em1), DMS(em2), .1, DMS(emo) ); + //cblas_dgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, DMS(em1), DMS(em2), .1, DMS(emo) ); /*void cblas_dgemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA, CBLAS_TRANSPOSE TransB, const int M, const int N, -- cgit v1.2.3 From 4cb248d46808c07f029c0c595af8f14517925816 Mon Sep 17 00:00:00 2001 From: Justin Berger Date: Sat, 17 Mar 2018 14:00:32 -0600 Subject: Fixed test --- redist/test_dcl.c | 110 +++++++++++++++++++++++++----------------------------- 1 file changed, 51 insertions(+), 59 deletions(-) (limited to 'redist/test_dcl.c') diff --git a/redist/test_dcl.c b/redist/test_dcl.c index 5435a3c..68a6129 100644 --- a/redist/test_dcl.c +++ b/redist/test_dcl.c @@ -1,13 +1,60 @@ //gcc -msse2 -O3 -ftree-vectorize test_dcl.c dclhelpers.c os_generic.c -DFLT=double -lpthread -lcblas && valgrind ./a.out - #include "dclhelpers.h" +#include "os_generic.h" #include +#include #include #include #include -#include "os_generic.h" -#include +#include + +void compareToCblas() { + FLT em1[12][20]; + FLT em2[20][20]; + FLT emo[2][20][20] = {}; + int x, y; + + for (y = 0; y < 12; y++) + for (x = 0; x < 20; x++) + em1[y][x] = (rand() % 1000) / 1000.0; + + for (y = 0; y < 20; y++) + for (x = 0; x < 20; x++) + em2[y][x] = (rand() % 1000) / 1000.0; + + int m = 12; + int n = 20; + int k = 20; + + dclPrint(DMS(em1), 12, 20); + dclPrint(DMS(em2), 20, 12); + + double times[2]; + for (int z = 0; z < 2; z++) { + double start = OGGetAbsoluteTime(); + for (int i = 0; i < 100000; i++) { + dclZero(DMS(emo[z]), 20, 20); + + if (z == 0) { + dcldgemm(0, 0, m, n, k, 1.0, DMS(em1), DMS(em2), .1, DMS(emo[z])); + } else { + cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, DMS(em1), DMS(em2), .1, + DMS(emo[z])); + } + /*void cblas_dgemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA, + CBLAS_TRANSPOSE TransB, const int M, const int N, + const int K, const double alpha, const double *A, + const int lda, const double *B, const int ldb, + const double beta, double *C, const int ldc);*/ + } + + printf("%s Elapsed: %f\n", z ? "CBlas" : "dcl", times[z] = OGGetAbsoluteTime() - start); + } + printf("%fx difference\n", times[0] / times[1]); + dclPrint(emo[0][0], 12, 20, 12); + dclPrint(emo[1][0], 12, 20, 12); +} int main() { @@ -65,61 +112,6 @@ int main() } } - -#if 1 - - //Currently failing test... - { -// FLT em1[3][4]; -// FLT em2[4][2]; -// FLT emo[4][2]; - - FLT em1[12][20]; - FLT em2[20][20]; - FLT emo[20][20]; - int x, y; - - for( y = 0; y < 12; y++ ) - for( x = 0; x < 20; x++ ) - em1[y][x] = (rand()%1000)/1000.0; - - for( y = 0; y < 20; y++ ) - for( x = 0; x < 20; x++ ) - em2[y][x] = (rand()%1000)/1000.0; - - for( y = 0; y < 20; y++ ) - for( x = 0; x < 20; x++ ) - emo[y][x] = 0; - - int m = 12; - int n = 20; - int k = 12; - - dclPrint( DMS(em1), 12, 20 ); - dclPrint( DMS(em2), 20, 12 ); - - int i; - - double start = OGGetAbsoluteTime(); - for( i = 0; i < 10000; i++ ) - { - dclZero( DMS(emo), 20, 20 ); - - dcldgemm( 0, 0, m, n, k, 1.0, DMS(em1), DMS(em2), .1, DMS(emo) ); - //cblas_dgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, DMS(em1), DMS(em2), .1, DMS(emo) ); - -/*void cblas_dgemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA, - CBLAS_TRANSPOSE TransB, const int M, const int N, - const int K, const double alpha, const double *A, - const int lda, const double *B, const int ldb, - const double beta, double *C, const int ldc);*/ - - } - printf( "Elapsed: %f\n", OGGetAbsoluteTime()-start ); - - dclPrint( emo[0], 12, 20, 12 ); - } -#endif - + compareToCblas(); } -- cgit v1.2.3 From e2db23ea8401ea8a3e608c3ec4d84423e09047f9 Mon Sep 17 00:00:00 2001 From: Justin Berger Date: Sat, 17 Mar 2018 14:12:34 -0600 Subject: Added assert clause to test --- redist/test_dcl.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'redist/test_dcl.c') diff --git a/redist/test_dcl.c b/redist/test_dcl.c index 68a6129..ce7f3de 100644 --- a/redist/test_dcl.c +++ b/redist/test_dcl.c @@ -42,11 +42,6 @@ void compareToCblas() { cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, DMS(em1), DMS(em2), .1, DMS(emo[z])); } - /*void cblas_dgemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA, - CBLAS_TRANSPOSE TransB, const int M, const int N, - const int K, const double alpha, const double *A, - const int lda, const double *B, const int ldb, - const double beta, double *C, const int ldc);*/ } printf("%s Elapsed: %f\n", z ? "CBlas" : "dcl", times[z] = OGGetAbsoluteTime() - start); @@ -54,6 +49,12 @@ void compareToCblas() { printf("%fx difference\n", times[0] / times[1]); dclPrint(emo[0][0], 12, 20, 12); dclPrint(emo[1][0], 12, 20, 12); + + for (int i = 0; i < m; i++) { + for (int j = 0; j < k; j++) { + assert(fabs(emo[0][i][j] - emo[1][i][j]) < .00001); + } + } } int main() -- cgit v1.2.3 From 728936f7dce1bc545b5136590a6eb660771266d4 Mon Sep 17 00:00:00 2001 From: Justin Berger Date: Sat, 17 Mar 2018 16:08:49 -0600 Subject: Added transpose timing tests --- redist/test_dcl.c | 97 +++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 70 insertions(+), 27 deletions(-) (limited to 'redist/test_dcl.c') diff --git a/redist/test_dcl.c b/redist/test_dcl.c index ce7f3de..dc6a93e 100644 --- a/redist/test_dcl.c +++ b/redist/test_dcl.c @@ -9,46 +9,43 @@ #include #include -void compareToCblas() { - FLT em1[12][20]; - FLT em2[20][20]; - FLT emo[2][20][20] = {}; - int x, y; - - for (y = 0; y < 12; y++) - for (x = 0; x < 20; x++) - em1[y][x] = (rand() % 1000) / 1000.0; - - for (y = 0; y < 20; y++) - for (x = 0; x < 20; x++) - em2[y][x] = (rand() % 1000) / 1000.0; - - int m = 12; - int n = 20; - int k = 20; +#include "minimal_opencv.h" - dclPrint(DMS(em1), 12, 20); - dclPrint(DMS(em2), 20, 12); +void fill_random(FLT *A, int ld, int m, int n) { + assert(ld == n); + for (int y = 0; y < m; y++) + for (int x = 0; x < n; x++) + A[y * ld + x] = (rand()) / (double)RAND_MAX; +} +void test_dcldgemm_speed(const char *name, char transA, char transB, int m, int n, int k, DCL_FLOAT alpha, + const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, DCL_FLOAT beta) { + printf("%s speed test:\n", name); double times[2]; + FLT emo[2][m][n]; + for (int z = 0; z < 2; z++) { double start = OGGetAbsoluteTime(); for (int i = 0; i < 100000; i++) { - dclZero(DMS(emo[z]), 20, 20); + dclZero(DMS(emo[z]), m, n); if (z == 0) { - dcldgemm(0, 0, m, n, k, 1.0, DMS(em1), DMS(em2), .1, DMS(emo[z])); + dcldgemm(transA, transB, m, n, k, alpha, A, Ac, B, Bc, beta, DMS(emo[z])); } else { - cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, DMS(em1), DMS(em2), .1, - DMS(emo[z])); + + cblas_dgemm(CblasRowMajor, transA == 1 ? CblasTrans : CblasNoTrans, + transB == 1 ? CblasTrans : CblasNoTrans, m, n, k, alpha, A, Ac, B, Bc, beta, emo[z][0], n); } } - printf("%s Elapsed: %f\n", z ? "CBlas" : "dcl", times[z] = OGGetAbsoluteTime() - start); + times[z] = OGGetAbsoluteTime() - start; } + + dclPrint(DMS(emo[0]), m, n); + dclPrint(DMS(emo[1]), m, n); + printf("dcl Elapsed: %f\n", times[0]); + printf("cblas Elapsed: %f\n", times[1]); printf("%fx difference\n", times[0] / times[1]); - dclPrint(emo[0][0], 12, 20, 12); - dclPrint(emo[1][0], 12, 20, 12); for (int i = 0; i < m; i++) { for (int j = 0; j < k; j++) { @@ -57,12 +54,57 @@ void compareToCblas() { } } +void compareToCblas() { + srand(0); + int m = 12; + int n = 20; + int k = 20; + + FLT em1[m][n]; + FLT em2[n][k]; + + fill_random(DMS(em1), m, n); + fill_random(DMS(em2), n, k); + + dclPrint(DMS(em1), m, n); + dclPrint(DMS(em2), n, k); + + test_dcldgemm_speed("Simple", 0, 0, m, n, k, 1.0, DMS(em1), DMS(em2), .1); +} + +void compareToCblasTrans() { + srand(0); + int m = 12; + int n = 20; + int k = n; + + FLT em1[m][n]; + + fill_random(DMS(em1), m, n); + + dclPrint(DMS(em1), m, n); + + CvMat Em1 = cvMat(m, n, CV_64F, em1); + FLT em1tem1[n][n]; + CvMat Em1tEm1 = cvMat(n, n, CV_64F, em1tem1); + cvMulTransposed(&Em1, &Em1tEm1, 1, 0, 1); + print_mat(&Em1tEm1); + + test_dcldgemm_speed("Trans", 1, 0, + n, // # of rows in OP(A) == em1' -- 20 + n, // # of cols in OP(B) == em1 -- 20 + m, // # of cols in OP(A) == em1' -- 12 + 1.0, + DMS(em1), // Note that LD stays the same + DMS(em1), 0); +} + int main() { FLT A[2][4] = { { 0, 1, 2, 3 }, { 4, 5, 6, 7} }; FLT B[4][2]; dclPrint( A[0], 4, 2, 4 ); - dclTransp( B[0], 2, A[0], 4, 2, 4 ); + dclTransp(B[0], 2, A[0], 4, 2, 4); dclPrint( B[0], 2, 4, 2 ); int i, j; @@ -114,5 +156,6 @@ int main() } compareToCblas(); + compareToCblasTrans(); } -- cgit v1.2.3