From 0529854c1f0fc3756cb93db839b83055b681ebc2 Mon Sep 17 00:00:00 2001 From: Justin Berger Date: Fri, 16 Mar 2018 23:27:36 -0600 Subject: Removed GFX guts out of data_recorder --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Makefile') diff --git a/Makefile b/Makefile index 034fbd0..e130357 100644 --- a/Makefile +++ b/Makefile @@ -69,7 +69,7 @@ test : test.c ./lib/libsurvive.so redist/os_generic.o simple_pose_test : simple_pose_test.c ./lib/libsurvive.so redist/os_generic.o $(DRAWFUNCTIONS) $(CC) -o $@ $^ $(LDFLAGS) $(CFLAGS) -data_recorder : data_recorder.c ./lib/libsurvive.so redist/os_generic.c $(DRAWFUNCTIONS) +data_recorder : data_recorder.c ./lib/libsurvive.so redist/os_generic.c $(CC) -o $@ $^ $(LDFLAGS) $(CFLAGS) calibrate : calibrate.c ./lib/libsurvive.so redist/os_generic.c $(DRAWFUNCTIONS) -- cgit v1.3.1 From 00fbdfcbe9b1c3cc9f9f0814fe5f60bbf066cbbf Mon Sep 17 00:00:00 2001 From: Justin Berger Date: Sat, 17 Mar 2018 08:15:09 -0600 Subject: Added test target, mul test --- Makefile | 3 +++ redist/test_dcl.c | 24 ++++++++++++++++++++---- 2 files changed, 23 insertions(+), 4 deletions(-) (limited to 'Makefile') diff --git a/Makefile b/Makefile index e130357..9239c39 100644 --- a/Makefile +++ b/Makefile @@ -82,6 +82,9 @@ calibrate_client : calibrate_client.c ./lib/libsurvive.so redist/os_generic.c $ static_calibrate : calibrate.c redist/os_generic.c $(DRAWFUNCTIONS) $(LIBSURVIVE_C) tcc -o $@ $^ $(CFLAGS) $(LDFLAGS) -DTCC +test_dcl: ./redist/test_dcl.c ./redist/dclhelpers.c + $(CC) -o $@ $^ $(LDFLAGS) $(CFLAGS) -DFLT=double -fsanitize=address -fsanitize=undefined + test_minimal_cv: ./src/epnp/test_minimal_cv.c ./lib/libsurvive.so $(CC) -o $@ $^ $(LDFLAGS) $(CFLAGS) diff --git a/redist/test_dcl.c b/redist/test_dcl.c index adea7b5..a9512f8 100644 --- a/redist/test_dcl.c +++ b/redist/test_dcl.c @@ -1,8 +1,8 @@ #include "dclhelpers.h" +#include #include #include - int main() { FLT A[2][4] = { { 0, 1, 2, 3 }, { 4, 5, 6, 7} }; @@ -31,9 +31,25 @@ int main() printf( "The following should be an identity matrix\n" ); dclPrint( MM[0], 3, 3, 3 ); -//void dclTransp( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, int n, int m ); + { + FLT A[3][4]; + dclIdentity(A[0], 4, 3); + dclPrint(A[0], 4, 3, 4); + + FLT x[4] = {7, 8, 9, 10}; + FLT R[4]; + + dclMul(R, 1, A[0], 4, x, 1, 4, 1, 3); + dclPrint(x, 1, 4, 1); + dclPrint(R, 1, 4, 1); + + for (int i = 0; i < 3; i++) + assert(R[i] == x[i]); + assert(R[3] == 0.); + } + // void dclTransp( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, int n, int m ); -// dclIdentity( A[0], MATx, 5 ); -// dclPrint( A[0], MATx, MATx, MATy ); + // dclIdentity( A[0], MATx, 5 ); + // dclPrint( A[0], MATx, MATx, MATy ); } -- cgit v1.3.1 From 0b9e66ad2ff686a4dcf8a6838f33edb203a1bff5 Mon Sep 17 00:00:00 2001 From: Justin Berger Date: Sat, 17 Mar 2018 09:32:22 -0600 Subject: Fixed gemm --- Makefile | 8 +++++++- redist/dclapack.h | 33 +++++++++++++++++---------------- redist/dclhelpers.c | 9 +++++---- redist/test_dcl.c | 23 +++++++++++++++-------- 4 files changed, 44 insertions(+), 29 deletions(-) (limited to 'Makefile') diff --git a/Makefile b/Makefile index 9239c39..dfcb9f4 100644 --- a/Makefile +++ b/Makefile @@ -82,7 +82,13 @@ calibrate_client : calibrate_client.c ./lib/libsurvive.so redist/os_generic.c $ static_calibrate : calibrate.c redist/os_generic.c $(DRAWFUNCTIONS) $(LIBSURVIVE_C) tcc -o $@ $^ $(CFLAGS) $(LDFLAGS) -DTCC -test_dcl: ./redist/test_dcl.c ./redist/dclhelpers.c +./redist/dclhelpers_debuggable.c : ./redist/dclhelpers.c ./redist/dclhelpers.h ./redist/dclapack.h + gcc -E ./redist/dclhelpers.c > ./redist/dclhelpers_debuggable.c + clang-format -i ./redist/dclhelpers_debuggable.c + sed -i 's/#/\/\/#/g' ./redist/dclhelpers_debuggable.c + + +test_dcl: ./redist/test_dcl.c ./redist/dclhelpers_debuggable.c ./redist/dclhelpers.h ./redist/dclapack.h $(CC) -o $@ $^ $(LDFLAGS) $(CFLAGS) -DFLT=double -fsanitize=address -fsanitize=undefined test_minimal_cv: ./src/epnp/test_minimal_cv.c ./lib/libsurvive.so diff --git a/redist/dclapack.h b/redist/dclapack.h index af8869c..af5035b 100644 --- a/redist/dclapack.h +++ b/redist/dclapack.h @@ -224,22 +224,23 @@ PRINT(Ainv,n,n); \ /* * Matrix Multiply R = alpha * A * B + beta * C * R (n by p) - * A (n by m) - * B (m by p) - * C (n by p) + * A (m by n) + * B (n by p) + * C (m by p) */ -#define GMULADD(R,A,B,C,alpha,beta,n,m,p) { \ - int i,j,k; \ - float sum; \ - for (i=0; i #include "dclapack.h" #include +#include +#include void dclPrint( const DCL_FLOAT * PMATRIX, int PMATRIXc, int n, int m ) { @@ -77,7 +78,7 @@ void dcldgemm( int Cc //must be n ) { - const DCL_FLOAT * ta; + const DCL_FLOAT *ta; const DCL_FLOAT * tb; int tac = Ac; int tbc = Bc; @@ -102,7 +103,7 @@ void dcldgemm( } else tb = B; - - GMULADD(C,ta,tb,C,alpha,beta,n,m,k); + printf("%d %d %d\n", tac, tbc, Cc); + GMULADD(C, ta, tb, C, alpha, beta, m, n, k); } diff --git a/redist/test_dcl.c b/redist/test_dcl.c index 6d49548..42f4fd6 100644 --- a/redist/test_dcl.c +++ b/redist/test_dcl.c @@ -1,5 +1,6 @@ #include "dclhelpers.h" #include +#include #include #include @@ -36,18 +37,24 @@ int main() dclIdentity(A[0], 4, 3); dclPrint(A[0], 4, 3, 4); - FLT x[4] = {7, 8, 9, 10}; - FLT R[4]; + FLT x[4][2] = { + {7, -7}, {8, -8}, {9, -9}, {10, -10}, + }; + FLT R[4][2]; + printf("%p %p %p\n", A, x, R); // dclMul(R, 1, A[0], 4, x, 1, 4, 1, 3); - dcldgemm(0, 0, 4, 1, 3, 1, A[0], 4, x, 1, 0, R, 1); + dcldgemm(0, 0, 3, 4, 2, 1, A[0], 4, x[0], 2, 0, R[0], 2); - dclPrint(x, 1, 4, 1); - dclPrint(R, 1, 4, 1); + dclPrint(x[0], 2, 4, 2); + dclPrint(R[0], 2, 4, 2); - for (int i = 0; i < 3; i++) - assert(R[i] == x[i]); - assert(R[3] == 0.); + for (int j = 0; j < 2; j++) { + for (int i = 0; i < 3; i++) + assert(R[i][j] == x[i][j]); + + assert(fabs(R[3][j]) < .0000001); + } } // void dclTransp( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, int n, int m ); -- cgit v1.3.1 From 4cb248d46808c07f029c0c595af8f14517925816 Mon Sep 17 00:00:00 2001 From: Justin Berger Date: Sat, 17 Mar 2018 14:00:32 -0600 Subject: Fixed test --- Makefile | 6 ++- redist/dclapack.h | 2 +- redist/test_dcl.c | 110 +++++++++++++++++++++++++----------------------------- 3 files changed, 57 insertions(+), 61 deletions(-) (limited to 'Makefile') diff --git a/Makefile b/Makefile index dfcb9f4..ceb452a 100644 --- a/Makefile +++ b/Makefile @@ -3,6 +3,7 @@ all : lib data_recorder test calibrate calibrate_client simple_pose_test CC?=gcc CFLAGS:=-Iinclude/libsurvive -fPIC -g -O0 -Iredist -flto -DUSE_DOUBLE -std=gnu99 -rdynamic -llapacke -lcblas -lm +CFLAGS_RELEASE:=-Iinclude/libsurvive -fPIC -msse2 -ftree-vectorize -O3 -Iredist -flto -DUSE_DOUBLE -std=gnu99 -rdynamic -llapacke -lcblas -lm #LDFLAGS:=-L/usr/local/lib -lpthread -lusb-1.0 -lz -lm -flto -g LDFLAGS:=-L/usr/local/lib -lpthread -lz -lm -flto -g @@ -88,7 +89,10 @@ static_calibrate : calibrate.c redist/os_generic.c $(DRAWFUNCTIONS) $(LIBSURVIVE sed -i 's/#/\/\/#/g' ./redist/dclhelpers_debuggable.c -test_dcl: ./redist/test_dcl.c ./redist/dclhelpers_debuggable.c ./redist/dclhelpers.h ./redist/dclapack.h +test_dcl: ./redist/test_dcl.c ./redist/dclhelpers.c ./redist/dclhelpers.h ./redist/dclapack.h redist/os_generic.c + $(CC) -o $@ $^ $(LDFLAGS) $(CFLAGS_RELEASE) -DFLT=double + +test_dcl_debug: ./redist/test_dcl.c ./redist/dclhelpers_debuggable.c ./redist/dclhelpers.h ./redist/dclapack.h redist/os_generic.c $(CC) -o $@ $^ $(LDFLAGS) $(CFLAGS) -DFLT=double -fsanitize=address -fsanitize=undefined test_minimal_cv: ./src/epnp/test_minimal_cv.c ./lib/libsurvive.so diff --git a/redist/dclapack.h b/redist/dclapack.h index d4634ac..e43a4f9 100644 --- a/redist/dclapack.h +++ b/redist/dclapack.h @@ -27,7 +27,7 @@ printf(#A "\n"); \ for (int _i = 0; _i < (m); _i++) { \ for (int _j = 0; _j < (n); _j++) { \ - printf("%4.3f\t", _(A, _i, _j)); \ + printf("%4.3f ", _(A, _i, _j)); \ } \ printf("\n"); \ } \ diff --git a/redist/test_dcl.c b/redist/test_dcl.c index 5435a3c..68a6129 100644 --- a/redist/test_dcl.c +++ b/redist/test_dcl.c @@ -1,13 +1,60 @@ //gcc -msse2 -O3 -ftree-vectorize test_dcl.c dclhelpers.c os_generic.c -DFLT=double -lpthread -lcblas && valgrind ./a.out - #include "dclhelpers.h" +#include "os_generic.h" #include +#include #include #include #include -#include "os_generic.h" -#include +#include + +void compareToCblas() { + FLT em1[12][20]; + FLT em2[20][20]; + FLT emo[2][20][20] = {}; + int x, y; + + for (y = 0; y < 12; y++) + for (x = 0; x < 20; x++) + em1[y][x] = (rand() % 1000) / 1000.0; + + for (y = 0; y < 20; y++) + for (x = 0; x < 20; x++) + em2[y][x] = (rand() % 1000) / 1000.0; + + int m = 12; + int n = 20; + int k = 20; + + dclPrint(DMS(em1), 12, 20); + dclPrint(DMS(em2), 20, 12); + + double times[2]; + for (int z = 0; z < 2; z++) { + double start = OGGetAbsoluteTime(); + for (int i = 0; i < 100000; i++) { + dclZero(DMS(emo[z]), 20, 20); + + if (z == 0) { + dcldgemm(0, 0, m, n, k, 1.0, DMS(em1), DMS(em2), .1, DMS(emo[z])); + } else { + cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, DMS(em1), DMS(em2), .1, + DMS(emo[z])); + } + /*void cblas_dgemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA, + CBLAS_TRANSPOSE TransB, const int M, const int N, + const int K, const double alpha, const double *A, + const int lda, const double *B, const int ldb, + const double beta, double *C, const int ldc);*/ + } + + printf("%s Elapsed: %f\n", z ? "CBlas" : "dcl", times[z] = OGGetAbsoluteTime() - start); + } + printf("%fx difference\n", times[0] / times[1]); + dclPrint(emo[0][0], 12, 20, 12); + dclPrint(emo[1][0], 12, 20, 12); +} int main() { @@ -65,61 +112,6 @@ int main() } } - -#if 1 - - //Currently failing test... - { -// FLT em1[3][4]; -// FLT em2[4][2]; -// FLT emo[4][2]; - - FLT em1[12][20]; - FLT em2[20][20]; - FLT emo[20][20]; - int x, y; - - for( y = 0; y < 12; y++ ) - for( x = 0; x < 20; x++ ) - em1[y][x] = (rand()%1000)/1000.0; - - for( y = 0; y < 20; y++ ) - for( x = 0; x < 20; x++ ) - em2[y][x] = (rand()%1000)/1000.0; - - for( y = 0; y < 20; y++ ) - for( x = 0; x < 20; x++ ) - emo[y][x] = 0; - - int m = 12; - int n = 20; - int k = 12; - - dclPrint( DMS(em1), 12, 20 ); - dclPrint( DMS(em2), 20, 12 ); - - int i; - - double start = OGGetAbsoluteTime(); - for( i = 0; i < 10000; i++ ) - { - dclZero( DMS(emo), 20, 20 ); - - dcldgemm( 0, 0, m, n, k, 1.0, DMS(em1), DMS(em2), .1, DMS(emo) ); - //cblas_dgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, DMS(em1), DMS(em2), .1, DMS(emo) ); - -/*void cblas_dgemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA, - CBLAS_TRANSPOSE TransB, const int M, const int N, - const int K, const double alpha, const double *A, - const int lda, const double *B, const int ldb, - const double beta, double *C, const int ldc);*/ - - } - printf( "Elapsed: %f\n", OGGetAbsoluteTime()-start ); - - dclPrint( emo[0], 12, 20, 12 ); - } -#endif - + compareToCblas(); } -- cgit v1.3.1 From 728936f7dce1bc545b5136590a6eb660771266d4 Mon Sep 17 00:00:00 2001 From: Justin Berger Date: Sat, 17 Mar 2018 16:08:49 -0600 Subject: Added transpose timing tests --- Makefile | 2 +- redist/dclapack.h | 2 +- redist/minimal_opencv.c | 13 ++----- redist/test_dcl.c | 97 +++++++++++++++++++++++++++++++++++-------------- 4 files changed, 75 insertions(+), 39 deletions(-) (limited to 'Makefile') diff --git a/Makefile b/Makefile index ceb452a..1b5d5e6 100644 --- a/Makefile +++ b/Makefile @@ -89,7 +89,7 @@ static_calibrate : calibrate.c redist/os_generic.c $(DRAWFUNCTIONS) $(LIBSURVIVE sed -i 's/#/\/\/#/g' ./redist/dclhelpers_debuggable.c -test_dcl: ./redist/test_dcl.c ./redist/dclhelpers.c ./redist/dclhelpers.h ./redist/dclapack.h redist/os_generic.c +test_dcl: ./redist/test_dcl.c ./redist/dclhelpers.c ./redist/dclhelpers.h ./redist/dclapack.h redist/os_generic.c ./redist/minimal_opencv.c ./src/epnp/epnp.c $(CC) -o $@ $^ $(LDFLAGS) $(CFLAGS_RELEASE) -DFLT=double test_dcl_debug: ./redist/test_dcl.c ./redist/dclhelpers_debuggable.c ./redist/dclhelpers.h ./redist/dclapack.h redist/os_generic.c diff --git a/redist/dclapack.h b/redist/dclapack.h index e43a4f9..7f30187 100644 --- a/redist/dclapack.h +++ b/redist/dclapack.h @@ -24,7 +24,7 @@ */ #define PRINT(A, m, n) \ { \ - printf(#A "\n"); \ + printf(#A " %dx%d\n", m, n); \ for (int _i = 0; _i < (m); _i++) { \ for (int _j = 0; _j < (n); _j++) { \ printf("%4.3f ", _(A, _i, _j)); \ diff --git a/redist/minimal_opencv.c b/redist/minimal_opencv.c index 3f7bed7..8e71034 100644 --- a/redist/minimal_opencv.c +++ b/redist/minimal_opencv.c @@ -44,11 +44,8 @@ void cvGEMM(const CvMat *src1, const CvMat *src2, double alpha, const CvMat *src beta = 0; cblas_dgemm(CblasRowMajor, (tABC & GEMM_1_T) ? CblasTrans : CblasNoTrans, - (tABC & GEMM_2_T) ? CblasTrans : CblasNoTrans, src1->rows, dst->cols, src1->cols, alpha, - - src1->data.db, lda, src2->data.db, ldb, beta, - - dst->data.db, dst->cols); + (tABC & GEMM_2_T) ? CblasTrans : CblasNoTrans, src1->rows, dst->cols, src1->cols, alpha, src1->data.db, + lda, src2->data.db, ldb, beta, dst->data.db, dst->cols); } void cvMulTransposed(const CvMat *src, CvMat *dst, int order, const CvMat *delta, double scale) { @@ -67,11 +64,7 @@ void cvMulTransposed(const CvMat *src, CvMat *dst, int order, const CvMat *delta lapack_int dstCols = dst->cols; cblas_dgemm(CblasRowMajor, isAT ? CblasTrans : CblasNoTrans, isBT ? CblasTrans : CblasNoTrans, cols, dstCols, rows, - scale, - - src->data.db, cols, src->data.db, cols, beta, - - dst->data.db, dstCols); + scale, src->data.db, cols, src->data.db, cols, beta, dst->data.db, dstCols); } void *cvAlloc(size_t size) { return malloc(size); } diff --git a/redist/test_dcl.c b/redist/test_dcl.c index ce7f3de..dc6a93e 100644 --- a/redist/test_dcl.c +++ b/redist/test_dcl.c @@ -9,46 +9,43 @@ #include #include -void compareToCblas() { - FLT em1[12][20]; - FLT em2[20][20]; - FLT emo[2][20][20] = {}; - int x, y; - - for (y = 0; y < 12; y++) - for (x = 0; x < 20; x++) - em1[y][x] = (rand() % 1000) / 1000.0; - - for (y = 0; y < 20; y++) - for (x = 0; x < 20; x++) - em2[y][x] = (rand() % 1000) / 1000.0; - - int m = 12; - int n = 20; - int k = 20; +#include "minimal_opencv.h" - dclPrint(DMS(em1), 12, 20); - dclPrint(DMS(em2), 20, 12); +void fill_random(FLT *A, int ld, int m, int n) { + assert(ld == n); + for (int y = 0; y < m; y++) + for (int x = 0; x < n; x++) + A[y * ld + x] = (rand()) / (double)RAND_MAX; +} +void test_dcldgemm_speed(const char *name, char transA, char transB, int m, int n, int k, DCL_FLOAT alpha, + const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, DCL_FLOAT beta) { + printf("%s speed test:\n", name); double times[2]; + FLT emo[2][m][n]; + for (int z = 0; z < 2; z++) { double start = OGGetAbsoluteTime(); for (int i = 0; i < 100000; i++) { - dclZero(DMS(emo[z]), 20, 20); + dclZero(DMS(emo[z]), m, n); if (z == 0) { - dcldgemm(0, 0, m, n, k, 1.0, DMS(em1), DMS(em2), .1, DMS(emo[z])); + dcldgemm(transA, transB, m, n, k, alpha, A, Ac, B, Bc, beta, DMS(emo[z])); } else { - cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, DMS(em1), DMS(em2), .1, - DMS(emo[z])); + + cblas_dgemm(CblasRowMajor, transA == 1 ? CblasTrans : CblasNoTrans, + transB == 1 ? CblasTrans : CblasNoTrans, m, n, k, alpha, A, Ac, B, Bc, beta, emo[z][0], n); } } - printf("%s Elapsed: %f\n", z ? "CBlas" : "dcl", times[z] = OGGetAbsoluteTime() - start); + times[z] = OGGetAbsoluteTime() - start; } + + dclPrint(DMS(emo[0]), m, n); + dclPrint(DMS(emo[1]), m, n); + printf("dcl Elapsed: %f\n", times[0]); + printf("cblas Elapsed: %f\n", times[1]); printf("%fx difference\n", times[0] / times[1]); - dclPrint(emo[0][0], 12, 20, 12); - dclPrint(emo[1][0], 12, 20, 12); for (int i = 0; i < m; i++) { for (int j = 0; j < k; j++) { @@ -57,12 +54,57 @@ void compareToCblas() { } } +void compareToCblas() { + srand(0); + int m = 12; + int n = 20; + int k = 20; + + FLT em1[m][n]; + FLT em2[n][k]; + + fill_random(DMS(em1), m, n); + fill_random(DMS(em2), n, k); + + dclPrint(DMS(em1), m, n); + dclPrint(DMS(em2), n, k); + + test_dcldgemm_speed("Simple", 0, 0, m, n, k, 1.0, DMS(em1), DMS(em2), .1); +} + +void compareToCblasTrans() { + srand(0); + int m = 12; + int n = 20; + int k = n; + + FLT em1[m][n]; + + fill_random(DMS(em1), m, n); + + dclPrint(DMS(em1), m, n); + + CvMat Em1 = cvMat(m, n, CV_64F, em1); + FLT em1tem1[n][n]; + CvMat Em1tEm1 = cvMat(n, n, CV_64F, em1tem1); + cvMulTransposed(&Em1, &Em1tEm1, 1, 0, 1); + print_mat(&Em1tEm1); + + test_dcldgemm_speed("Trans", 1, 0, + n, // # of rows in OP(A) == em1' -- 20 + n, // # of cols in OP(B) == em1 -- 20 + m, // # of cols in OP(A) == em1' -- 12 + 1.0, + DMS(em1), // Note that LD stays the same + DMS(em1), 0); +} + int main() { FLT A[2][4] = { { 0, 1, 2, 3 }, { 4, 5, 6, 7} }; FLT B[4][2]; dclPrint( A[0], 4, 2, 4 ); - dclTransp( B[0], 2, A[0], 4, 2, 4 ); + dclTransp(B[0], 2, A[0], 4, 2, 4); dclPrint( B[0], 2, 4, 2 ); int i, j; @@ -114,5 +156,6 @@ int main() } compareToCblas(); + compareToCblasTrans(); } -- cgit v1.3.1