From 728936f7dce1bc545b5136590a6eb660771266d4 Mon Sep 17 00:00:00 2001 From: Justin Berger Date: Sat, 17 Mar 2018 16:08:49 -0600 Subject: Added transpose timing tests --- redist/dclapack.h | 2 +- redist/minimal_opencv.c | 13 ++----- redist/test_dcl.c | 97 +++++++++++++++++++++++++++++++++++-------------- 3 files changed, 74 insertions(+), 38 deletions(-) (limited to 'redist') diff --git a/redist/dclapack.h b/redist/dclapack.h index e43a4f9..7f30187 100644 --- a/redist/dclapack.h +++ b/redist/dclapack.h @@ -24,7 +24,7 @@ */ #define PRINT(A, m, n) \ { \ - printf(#A "\n"); \ + printf(#A " %dx%d\n", m, n); \ for (int _i = 0; _i < (m); _i++) { \ for (int _j = 0; _j < (n); _j++) { \ printf("%4.3f ", _(A, _i, _j)); \ diff --git a/redist/minimal_opencv.c b/redist/minimal_opencv.c index 3f7bed7..8e71034 100644 --- a/redist/minimal_opencv.c +++ b/redist/minimal_opencv.c @@ -44,11 +44,8 @@ void cvGEMM(const CvMat *src1, const CvMat *src2, double alpha, const CvMat *src beta = 0; cblas_dgemm(CblasRowMajor, (tABC & GEMM_1_T) ? CblasTrans : CblasNoTrans, - (tABC & GEMM_2_T) ? CblasTrans : CblasNoTrans, src1->rows, dst->cols, src1->cols, alpha, - - src1->data.db, lda, src2->data.db, ldb, beta, - - dst->data.db, dst->cols); + (tABC & GEMM_2_T) ? CblasTrans : CblasNoTrans, src1->rows, dst->cols, src1->cols, alpha, src1->data.db, + lda, src2->data.db, ldb, beta, dst->data.db, dst->cols); } void cvMulTransposed(const CvMat *src, CvMat *dst, int order, const CvMat *delta, double scale) { @@ -67,11 +64,7 @@ void cvMulTransposed(const CvMat *src, CvMat *dst, int order, const CvMat *delta lapack_int dstCols = dst->cols; cblas_dgemm(CblasRowMajor, isAT ? CblasTrans : CblasNoTrans, isBT ? CblasTrans : CblasNoTrans, cols, dstCols, rows, - scale, - - src->data.db, cols, src->data.db, cols, beta, - - dst->data.db, dstCols); + scale, src->data.db, cols, src->data.db, cols, beta, dst->data.db, dstCols); } void *cvAlloc(size_t size) { return malloc(size); } diff --git a/redist/test_dcl.c b/redist/test_dcl.c index ce7f3de..dc6a93e 100644 --- a/redist/test_dcl.c +++ b/redist/test_dcl.c @@ -9,46 +9,43 @@ #include #include -void compareToCblas() { - FLT em1[12][20]; - FLT em2[20][20]; - FLT emo[2][20][20] = {}; - int x, y; - - for (y = 0; y < 12; y++) - for (x = 0; x < 20; x++) - em1[y][x] = (rand() % 1000) / 1000.0; - - for (y = 0; y < 20; y++) - for (x = 0; x < 20; x++) - em2[y][x] = (rand() % 1000) / 1000.0; - - int m = 12; - int n = 20; - int k = 20; +#include "minimal_opencv.h" - dclPrint(DMS(em1), 12, 20); - dclPrint(DMS(em2), 20, 12); +void fill_random(FLT *A, int ld, int m, int n) { + assert(ld == n); + for (int y = 0; y < m; y++) + for (int x = 0; x < n; x++) + A[y * ld + x] = (rand()) / (double)RAND_MAX; +} +void test_dcldgemm_speed(const char *name, char transA, char transB, int m, int n, int k, DCL_FLOAT alpha, + const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, DCL_FLOAT beta) { + printf("%s speed test:\n", name); double times[2]; + FLT emo[2][m][n]; + for (int z = 0; z < 2; z++) { double start = OGGetAbsoluteTime(); for (int i = 0; i < 100000; i++) { - dclZero(DMS(emo[z]), 20, 20); + dclZero(DMS(emo[z]), m, n); if (z == 0) { - dcldgemm(0, 0, m, n, k, 1.0, DMS(em1), DMS(em2), .1, DMS(emo[z])); + dcldgemm(transA, transB, m, n, k, alpha, A, Ac, B, Bc, beta, DMS(emo[z])); } else { - cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, DMS(em1), DMS(em2), .1, - DMS(emo[z])); + + cblas_dgemm(CblasRowMajor, transA == 1 ? CblasTrans : CblasNoTrans, + transB == 1 ? CblasTrans : CblasNoTrans, m, n, k, alpha, A, Ac, B, Bc, beta, emo[z][0], n); } } - printf("%s Elapsed: %f\n", z ? "CBlas" : "dcl", times[z] = OGGetAbsoluteTime() - start); + times[z] = OGGetAbsoluteTime() - start; } + + dclPrint(DMS(emo[0]), m, n); + dclPrint(DMS(emo[1]), m, n); + printf("dcl Elapsed: %f\n", times[0]); + printf("cblas Elapsed: %f\n", times[1]); printf("%fx difference\n", times[0] / times[1]); - dclPrint(emo[0][0], 12, 20, 12); - dclPrint(emo[1][0], 12, 20, 12); for (int i = 0; i < m; i++) { for (int j = 0; j < k; j++) { @@ -57,12 +54,57 @@ void compareToCblas() { } } +void compareToCblas() { + srand(0); + int m = 12; + int n = 20; + int k = 20; + + FLT em1[m][n]; + FLT em2[n][k]; + + fill_random(DMS(em1), m, n); + fill_random(DMS(em2), n, k); + + dclPrint(DMS(em1), m, n); + dclPrint(DMS(em2), n, k); + + test_dcldgemm_speed("Simple", 0, 0, m, n, k, 1.0, DMS(em1), DMS(em2), .1); +} + +void compareToCblasTrans() { + srand(0); + int m = 12; + int n = 20; + int k = n; + + FLT em1[m][n]; + + fill_random(DMS(em1), m, n); + + dclPrint(DMS(em1), m, n); + + CvMat Em1 = cvMat(m, n, CV_64F, em1); + FLT em1tem1[n][n]; + CvMat Em1tEm1 = cvMat(n, n, CV_64F, em1tem1); + cvMulTransposed(&Em1, &Em1tEm1, 1, 0, 1); + print_mat(&Em1tEm1); + + test_dcldgemm_speed("Trans", 1, 0, + n, // # of rows in OP(A) == em1' -- 20 + n, // # of cols in OP(B) == em1 -- 20 + m, // # of cols in OP(A) == em1' -- 12 + 1.0, + DMS(em1), // Note that LD stays the same + DMS(em1), 0); +} + int main() { FLT A[2][4] = { { 0, 1, 2, 3 }, { 4, 5, 6, 7} }; FLT B[4][2]; dclPrint( A[0], 4, 2, 4 ); - dclTransp( B[0], 2, A[0], 4, 2, 4 ); + dclTransp(B[0], 2, A[0], 4, 2, 4); dclPrint( B[0], 2, 4, 2 ); int i, j; @@ -114,5 +156,6 @@ int main() } compareToCblas(); + compareToCblasTrans(); } -- cgit v1.2.3