aboutsummaryrefslogtreecommitdiff
path: root/redist
diff options
context:
space:
mode:
authorJustin Berger <j.david.berger@gmail.com>2018-03-17 16:08:49 -0600
committerJustin Berger <j.david.berger@gmail.com>2018-03-17 16:08:49 -0600
commit728936f7dce1bc545b5136590a6eb660771266d4 (patch)
tree8324ca298a49fc965247c18c3c8214df97d08a83 /redist
parente2db23ea8401ea8a3e608c3ec4d84423e09047f9 (diff)
downloadlibsurvive-728936f7dce1bc545b5136590a6eb660771266d4.tar.gz
libsurvive-728936f7dce1bc545b5136590a6eb660771266d4.tar.bz2
Added transpose timing tests
Diffstat (limited to 'redist')
-rw-r--r--redist/dclapack.h2
-rw-r--r--redist/minimal_opencv.c13
-rw-r--r--redist/test_dcl.c97
3 files changed, 74 insertions, 38 deletions
diff --git a/redist/dclapack.h b/redist/dclapack.h
index e43a4f9..7f30187 100644
--- a/redist/dclapack.h
+++ b/redist/dclapack.h
@@ -24,7 +24,7 @@
*/
#define PRINT(A, m, n) \
{ \
- printf(#A "\n"); \
+ printf(#A " %dx%d\n", m, n); \
for (int _i = 0; _i < (m); _i++) { \
for (int _j = 0; _j < (n); _j++) { \
printf("%4.3f ", _(A, _i, _j)); \
diff --git a/redist/minimal_opencv.c b/redist/minimal_opencv.c
index 3f7bed7..8e71034 100644
--- a/redist/minimal_opencv.c
+++ b/redist/minimal_opencv.c
@@ -44,11 +44,8 @@ void cvGEMM(const CvMat *src1, const CvMat *src2, double alpha, const CvMat *src
beta = 0;
cblas_dgemm(CblasRowMajor, (tABC & GEMM_1_T) ? CblasTrans : CblasNoTrans,
- (tABC & GEMM_2_T) ? CblasTrans : CblasNoTrans, src1->rows, dst->cols, src1->cols, alpha,
-
- src1->data.db, lda, src2->data.db, ldb, beta,
-
- dst->data.db, dst->cols);
+ (tABC & GEMM_2_T) ? CblasTrans : CblasNoTrans, src1->rows, dst->cols, src1->cols, alpha, src1->data.db,
+ lda, src2->data.db, ldb, beta, dst->data.db, dst->cols);
}
void cvMulTransposed(const CvMat *src, CvMat *dst, int order, const CvMat *delta, double scale) {
@@ -67,11 +64,7 @@ void cvMulTransposed(const CvMat *src, CvMat *dst, int order, const CvMat *delta
lapack_int dstCols = dst->cols;
cblas_dgemm(CblasRowMajor, isAT ? CblasTrans : CblasNoTrans, isBT ? CblasTrans : CblasNoTrans, cols, dstCols, rows,
- scale,
-
- src->data.db, cols, src->data.db, cols, beta,
-
- dst->data.db, dstCols);
+ scale, src->data.db, cols, src->data.db, cols, beta, dst->data.db, dstCols);
}
void *cvAlloc(size_t size) { return malloc(size); }
diff --git a/redist/test_dcl.c b/redist/test_dcl.c
index ce7f3de..dc6a93e 100644
--- a/redist/test_dcl.c
+++ b/redist/test_dcl.c
@@ -9,46 +9,43 @@
#include <stdio.h>
#include <stdlib.h>
-void compareToCblas() {
- FLT em1[12][20];
- FLT em2[20][20];
- FLT emo[2][20][20] = {};
- int x, y;
-
- for (y = 0; y < 12; y++)
- for (x = 0; x < 20; x++)
- em1[y][x] = (rand() % 1000) / 1000.0;
-
- for (y = 0; y < 20; y++)
- for (x = 0; x < 20; x++)
- em2[y][x] = (rand() % 1000) / 1000.0;
-
- int m = 12;
- int n = 20;
- int k = 20;
+#include "minimal_opencv.h"
- dclPrint(DMS(em1), 12, 20);
- dclPrint(DMS(em2), 20, 12);
+void fill_random(FLT *A, int ld, int m, int n) {
+ assert(ld == n);
+ for (int y = 0; y < m; y++)
+ for (int x = 0; x < n; x++)
+ A[y * ld + x] = (rand()) / (double)RAND_MAX;
+}
+void test_dcldgemm_speed(const char *name, char transA, char transB, int m, int n, int k, DCL_FLOAT alpha,
+ const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, DCL_FLOAT beta) {
+ printf("%s speed test:\n", name);
double times[2];
+ FLT emo[2][m][n];
+
for (int z = 0; z < 2; z++) {
double start = OGGetAbsoluteTime();
for (int i = 0; i < 100000; i++) {
- dclZero(DMS(emo[z]), 20, 20);
+ dclZero(DMS(emo[z]), m, n);
if (z == 0) {
- dcldgemm(0, 0, m, n, k, 1.0, DMS(em1), DMS(em2), .1, DMS(emo[z]));
+ dcldgemm(transA, transB, m, n, k, alpha, A, Ac, B, Bc, beta, DMS(emo[z]));
} else {
- cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, DMS(em1), DMS(em2), .1,
- DMS(emo[z]));
+
+ cblas_dgemm(CblasRowMajor, transA == 1 ? CblasTrans : CblasNoTrans,
+ transB == 1 ? CblasTrans : CblasNoTrans, m, n, k, alpha, A, Ac, B, Bc, beta, emo[z][0], n);
}
}
- printf("%s Elapsed: %f\n", z ? "CBlas" : "dcl", times[z] = OGGetAbsoluteTime() - start);
+ times[z] = OGGetAbsoluteTime() - start;
}
+
+ dclPrint(DMS(emo[0]), m, n);
+ dclPrint(DMS(emo[1]), m, n);
+ printf("dcl Elapsed: %f\n", times[0]);
+ printf("cblas Elapsed: %f\n", times[1]);
printf("%fx difference\n", times[0] / times[1]);
- dclPrint(emo[0][0], 12, 20, 12);
- dclPrint(emo[1][0], 12, 20, 12);
for (int i = 0; i < m; i++) {
for (int j = 0; j < k; j++) {
@@ -57,12 +54,57 @@ void compareToCblas() {
}
}
+void compareToCblas() {
+ srand(0);
+ int m = 12;
+ int n = 20;
+ int k = 20;
+
+ FLT em1[m][n];
+ FLT em2[n][k];
+
+ fill_random(DMS(em1), m, n);
+ fill_random(DMS(em2), n, k);
+
+ dclPrint(DMS(em1), m, n);
+ dclPrint(DMS(em2), n, k);
+
+ test_dcldgemm_speed("Simple", 0, 0, m, n, k, 1.0, DMS(em1), DMS(em2), .1);
+}
+
+void compareToCblasTrans() {
+ srand(0);
+ int m = 12;
+ int n = 20;
+ int k = n;
+
+ FLT em1[m][n];
+
+ fill_random(DMS(em1), m, n);
+
+ dclPrint(DMS(em1), m, n);
+
+ CvMat Em1 = cvMat(m, n, CV_64F, em1);
+ FLT em1tem1[n][n];
+ CvMat Em1tEm1 = cvMat(n, n, CV_64F, em1tem1);
+ cvMulTransposed(&Em1, &Em1tEm1, 1, 0, 1);
+ print_mat(&Em1tEm1);
+
+ test_dcldgemm_speed("Trans", 1, 0,
+ n, // # of rows in OP(A) == em1' -- 20
+ n, // # of cols in OP(B) == em1 -- 20
+ m, // # of cols in OP(A) == em1' -- 12
+ 1.0,
+ DMS(em1), // Note that LD stays the same
+ DMS(em1), 0);
+}
+
int main()
{
FLT A[2][4] = { { 0, 1, 2, 3 }, { 4, 5, 6, 7} };
FLT B[4][2];
dclPrint( A[0], 4, 2, 4 );
- dclTransp( B[0], 2, A[0], 4, 2, 4 );
+ dclTransp(B[0], 2, A[0], 4, 2, 4);
dclPrint( B[0], 2, 4, 2 );
int i, j;
@@ -114,5 +156,6 @@ int main()
}
compareToCblas();
+ compareToCblasTrans();
}