diff options
-rw-r--r-- | Makefile | 18 | ||||
-rw-r--r-- | README.md | 3 | ||||
-rw-r--r-- | data_recorder.c | 113 | ||||
-rw-r--r-- | redist/dclapack.h | 385 | ||||
-rw-r--r-- | redist/dclhelpers.c | 80 | ||||
-rw-r--r-- | redist/dclhelpers.h | 74 | ||||
-rw-r--r-- | redist/minimal_opencv.c | 13 | ||||
-rw-r--r-- | redist/test_dcl.c | 155 | ||||
-rw-r--r-- | src/poser_daveortho.c | 40 |
9 files changed, 584 insertions, 297 deletions
@@ -2,7 +2,9 @@ all : lib data_recorder test calibrate calibrate_client simple_pose_test CC?=gcc -CFLAGS:=-Iinclude/libsurvive -fPIC -g -O0 -Iredist -flto -DUSE_DOUBLE -std=gnu99 -rdynamic -llapacke -lcblas -lm -fsanitize=address -fsanitize=undefined +CFLAGS:=-Iinclude/libsurvive -fPIC -g -O0 -Iredist -flto -DUSE_DOUBLE -std=gnu99 -rdynamic -llapacke -lcblas -lm +CFLAGS_RELEASE:=-Iinclude/libsurvive -fPIC -msse2 -ftree-vectorize -O3 -Iredist -flto -DUSE_DOUBLE -std=gnu99 -rdynamic -llapacke -lcblas -lm + #LDFLAGS:=-L/usr/local/lib -lpthread -lusb-1.0 -lz -lm -flto -g LDFLAGS:=-L/usr/local/lib -lpthread -lz -lm -flto -g @@ -70,7 +72,7 @@ test : test.c ./lib/libsurvive.so redist/os_generic.o simple_pose_test : simple_pose_test.c ./lib/libsurvive.so redist/os_generic.o $(DRAWFUNCTIONS) $(CC) -o $@ $^ $(LDFLAGS) $(CFLAGS) -data_recorder : data_recorder.c ./lib/libsurvive.so redist/os_generic.c $(DRAWFUNCTIONS) +data_recorder : data_recorder.c ./lib/libsurvive.so redist/os_generic.c $(CC) -o $@ $^ $(LDFLAGS) $(CFLAGS) calibrate : calibrate.c ./lib/libsurvive.so redist/os_generic.c $(DRAWFUNCTIONS) @@ -83,6 +85,18 @@ calibrate_client : calibrate_client.c ./lib/libsurvive.so redist/os_generic.c $ static_calibrate : calibrate.c redist/os_generic.c $(DRAWFUNCTIONS) $(LIBSURVIVE_C) tcc -o $@ $^ $(CFLAGS) $(LDFLAGS) -DTCC +./redist/dclhelpers_debuggable.c : ./redist/dclhelpers.c ./redist/dclhelpers.h ./redist/dclapack.h + gcc -E ./redist/dclhelpers.c > ./redist/dclhelpers_debuggable.c + clang-format -i ./redist/dclhelpers_debuggable.c + sed -i 's/#/\/\/#/g' ./redist/dclhelpers_debuggable.c + + +test_dcl: ./redist/test_dcl.c ./redist/dclhelpers.c ./redist/dclhelpers.h ./redist/dclapack.h redist/os_generic.c ./redist/minimal_opencv.c ./src/epnp/epnp.c + $(CC) -o $@ $^ $(LDFLAGS) $(CFLAGS_RELEASE) -DFLT=double + +test_dcl_debug: ./redist/test_dcl.c ./redist/dclhelpers_debuggable.c ./redist/dclhelpers.h ./redist/dclapack.h redist/os_generic.c + $(CC) -o $@ $^ $(LDFLAGS) $(CFLAGS) -DFLT=double -fsanitize=address -fsanitize=undefined + test_minimal_cv: ./src/epnp/test_minimal_cv.c ./lib/libsurvive.so $(CC) -o $@ $^ $(LDFLAGS) $(CFLAGS) @@ -82,6 +82,9 @@ Will ~~I~~ we succeed? Probably not. ~~Definitely going to try!~~ Though it's * Optionally OpenGL. On Debian, ```sudo apt-get install build-essential zlib1g-dev libx11-dev libusb-1.0-0-dev freeglut3-dev``` should be sufficient. +Temporarily needed packages: liblapack and libopenblas +On Debian, ```sudo apt-get install liblapacke-dev libopenblas-dev``` + ## Architecture <TABLE><TR><TH>Description</TH><TH>Diagram</TH> diff --git a/data_recorder.c b/data_recorder.c index 7f53d18..271b996 100644 --- a/data_recorder.c +++ b/data_recorder.c @@ -4,7 +4,6 @@ #include <unistd.h> #endif -#include <CNFGFunctions.h> #include <os_generic.h> #include <stdarg.h> #include <stdint.h> @@ -25,28 +24,6 @@ struct SurviveContext *ctx; FILE *output_file = 0; -void HandleKey(int keycode, int bDown) { - if (!bDown) - return; - - if (keycode == 'O' || keycode == 'o') { - survive_send_magic(ctx, 1, 0, 0); - } - if (keycode == 'F' || keycode == 'f') { - survive_send_magic(ctx, 0, 0, 0); - } -} - -void HandleButton(int x, int y, int button, int bDown) {} - -void HandleMotion(int x, int y, int mask) {} - -void HandleDestroy() {} - -int bufferpts[32 * 2 * 3]; -char buffermts[32 * 128 * 3]; -int buffertimeto[32 * 3]; - double timestamp_in_us() { static double start_time_us = 0; if (start_time_us == 0) @@ -74,6 +51,7 @@ int my_config_process(SurviveObject *so, char *ct0conf, int len) { fwrite("\n", 1, 1, output_file); return survive_default_htc_config_process(so, ct0conf, len); } + void my_lighthouse_process(SurviveContext *ctx, uint8_t lighthouse, SurvivePose *pose) { survive_default_lighthouse_pose_process(ctx, lighthouse, pose); write_to_output("%d LH_POSE %0.6f %0.6f %0.6f %0.6f %0.6f %0.6f %0.6f\n", lighthouse, pose->Pos[0], pose->Pos[1], @@ -98,51 +76,38 @@ void my_light_process(struct SurviveObject *so, int sensor_id, int acode, survive_default_light_process(so, sensor_id, acode, timeinsweep, timecode, length, lh); - if (acode == -1 || sensor_id < 0) { + if (acode == -1) { write_to_output("%s S %d %d %d %u %u %u\n", so->codename, sensor_id, acode, timeinsweep, timecode, length, lh); return; } - int jumpoffset = sensor_id; - - if (strcmp(so->codename, "WM0") == 0) - jumpoffset += 32; - else if (strcmp(so->codename, "WM1") == 0) - jumpoffset += 64; - const char *LH_ID = 0; const char *LH_Axis = 0; switch (acode) { case 0: case 2: - bufferpts[jumpoffset * 2 + 0] = (timeinsweep - 100000) / 500; LH_ID = "L"; LH_Axis = "X"; break; case 1: case 3: - bufferpts[jumpoffset * 2 + 1] = (timeinsweep - 100000) / 500; LH_ID = "L"; LH_Axis = "Y"; break; case 4: case 6: - bufferpts[jumpoffset * 2 + 0] = (timeinsweep - 100000) / 500; LH_ID = "R"; LH_Axis = "X"; break; case 5: case 7: - bufferpts[jumpoffset * 2 + 1] = (timeinsweep - 100000) / 500; LH_ID = "R"; LH_Axis = "Y"; break; } - write_to_output("%s %s %s %d %d %d %u %u %u\n", so->codename, LH_ID, LH_Axis, sensor_id, acode, timeinsweep, timecode, length, lh); - buffertimeto[jumpoffset] = 0; } void my_imu_process(struct SurviveObject *so, int mask, FLT *accelgyro, @@ -152,47 +117,17 @@ void my_imu_process(struct SurviveObject *so, int mask, FLT *accelgyro, accelgyro[1], accelgyro[2], accelgyro[3], accelgyro[4], accelgyro[5], id); } -void *GuiThread(void *v) { - CNFGBGColor = 0x000000; - CNFGDialogColor = 0x444444; - CNFGSetup("Survive GUI Debug", 640, 480); - - short screenx, screeny; - while (1) { - CNFGHandleInput(); - CNFGClearFrame(); - CNFGColor(0xFFFFFF); - CNFGGetDimensions(&screenx, &screeny); - - int i; - for (i = 0; i < 32 * 3; i++) { - if (buffertimeto[i] < 50) { - uint32_t color = i * 3231349; - uint8_t r = color & 0xff; - uint8_t g = (color >> 8) & 0xff; - uint8_t b = (color >> 16) & 0xff; - r = (r * (5 - buffertimeto[i])) / 5; - g = (g * (5 - buffertimeto[i])) / 5; - b = (b * (5 - buffertimeto[i])) / 5; - CNFGColor((b << 16) | (g << 8) | r); - CNFGTackRectangle(bufferpts[i * 2 + 0], bufferpts[i * 2 + 1], - bufferpts[i * 2 + 0] + 5, - bufferpts[i * 2 + 1] + 5); - CNFGPenX = bufferpts[i * 2 + 0]; - CNFGPenY = bufferpts[i * 2 + 1]; - CNFGDrawText(buffermts, 2); - buffertimeto[i]++; - } +int main(int argc, char **argv) { + if (argc > 1) { + output_file = fopen(argv[1], "w"); + if (output_file == 0) { + fprintf(stderr, "Could not open %s for writing", argv[1]); + return -1; } - - CNFGSwapBuffers(); - OGUSleep(10000); + } else { + output_file = stdout; } -} -int SurviveThreadLoaded = 0; - -void *SurviveThread(void *junk) { ctx = survive_init_with_config_cb(0, my_config_process); survive_install_light_fn(ctx, my_light_process); @@ -201,6 +136,7 @@ void *SurviveThread(void *junk) { survive_install_raw_pose_fn(ctx, my_raw_pose_process); survive_install_angle_fn(ctx, my_angle_process); survive_install_info_fn(ctx, my_info_process); + survive_cal_install(ctx); if (!ctx) { @@ -208,35 +144,8 @@ void *SurviveThread(void *junk) { exit(1); } - SurviveThreadLoaded = 1; - while (survive_poll(ctx) == 0) { } return 0; } - -int main(int argc, char **argv) { - if (argc > 1) { - output_file = fopen(argv[1], "w"); - if (output_file == 0) { - fprintf(stderr, "Could not open %s for writing", argv[1]); - return -1; - } - } else { - output_file = stdout; - } - SurviveThread(0); - /* - // Create the libsurvive thread - OGCreateThread(SurviveThread, 0); - - // Wait for the survive thread to load - while (!SurviveThreadLoaded) { - OGUSleep(100); - } - - // Run the Gui in the main thread - GuiThread(0); - */ -} diff --git a/redist/dclapack.h b/redist/dclapack.h index 4e209d3..0950815 100644 --- a/redist/dclapack.h +++ b/redist/dclapack.h @@ -1,10 +1,6 @@ #ifndef __DCLAPACK_H__ #define __DCLAPACK_H__ -#ifndef ORDER -#define ORDER 50 -#endif - #ifndef FLOAT #define FLOAT float #endif @@ -13,146 +9,207 @@ #define _ABS(a) ( (a)<=0 ? 0-(a) : (a) ) +// Tricky: If you want to use this with pointers, instead of 2D arrays, you will +// need to #define DYNAMIC_INDEX, as well as, for all arrays, suffix their name +// with 'c' + +#ifdef DYNAMIC_INDEX +#define _(AM, O, P) AM[O * AM##c + P] +#else +#define _(AM, O, P) AM[O][P] +#endif + /* - * Prints a matrix A (n by m) + * Prints a matrix A (m by n) (with witdth Ac) */ -#define PRINT(A,n,m) { \ - int i,j; \ - printf(#A "\n"); \ - for (i=0; i<n; i++) { \ - for (j=0; j<m; j++) { \ - printf("%4.3f\t", A[i][j]); \ - } \ - printf("\n"); \ - } \ - printf("\n"); \ -} +#define PRINT(A, m, n) \ + { \ + printf(#A " %dx%d\n", m, n); \ + for (int _i = 0; _i < (m); _i++) { \ + for (int _j = 0; _j < (n); _j++) { \ + printf("%4.3f ", _(A, _i, _j)); \ + } \ + printf("\n"); \ + } \ + printf("\n"); \ + } /* - * Returns the identity matrix + * Returns the identity matrix (with size n x n, but width Ic) */ -#define IDENTITY(I,n) { \ - int i,j; \ - for (i=0; i<n; i++) { \ - for (j=0; j<i; j++) { I[i][j]=0.0f; } \ - I[i][i] = 1.0f; \ - for (j=i+1; j<n; j++) { I[i][j]=0.0f; } \ - } \ -} +#define IDENTITY(I, m, n) \ + { \ + for (int _i = 0; _i < (m); _i++) { \ + for (int _j = 0; _j < _i; _j++) { \ + _(I, _i, _j) = 0.0f; \ + } \ + _(I, _i, _i) = 1.0f; \ + for (int _j = _i + 1; _j < (n); _j++) { \ + _(I, _i, _j) = 0.0f; \ + } \ + } \ + } /* - * B = Transpose(A) - * A is (n by m) - * B is (m by n) + * Returns the identity matrix (with size n x n, but width Ic) */ -#define TRANSP(A,B,n,m) { \ - int i,j; \ - for (i=0; i<n; i++) { \ - for (j=0; j<m; j++) { \ - B[j][i] = A[i][j]; \ - } \ - } \ -} +#define ZERO(Z, m, n) \ + { \ + for (int _i = 0; _i < (m); _i++) \ + for (int _j = 0; _j < (n); _j++) \ + _(Z, _i, _j) = 0.0f; \ + } + +/* + * R = Transpose(A) + * A is (m by n) + * R is (n by m) + */ +#define TRANSP(R, A, m, n) \ + { \ + for (int _i = 0; _i < (m); _i++) { \ + for (int _j = 0; _j < (n); _j++) { \ + _(R, _j, _i) = _(A, _i, _j); \ + } \ + } \ + } /* * Calculate L,U of a matrix A with pivot table */ -#define LU(A,L,U,Piv,n) { \ - int i,j,k,_tempi; float _tempf; \ - for (i=0; i<n; i++) { Piv[i]=i; } \ - for (i=0; i<n; i++) { \ - for (j=0; j<n; j++) { \ - U[i][j] = A[i][j]; \ - } \ - } \ - IDENTITY(L,n); \ - \ - for (i=0; i<n-1; i++) { \ - \ - int max=i; \ - for (j=i+1; j<n; j++) { \ - if (_ABS(U[j][i]) > _ABS(U[max][i])) { max = j; } \ - } \ - _tempi=Piv[i]; Piv[i]=Piv[max]; Piv[max]=_tempi; \ - for (k=i; k<n; k++) { \ - _tempf=U[i][k]; U[i][k]=U[max][k]; U[max][k]=_tempf; \ - } \ - for (k=0; k<i; k++) { \ - _tempf=L[i][k]; L[i][k]=L[max][k]; L[max][k]=_tempf; \ - } \ - \ - FLOAT invDiag = 1.0 / U[i][i]; \ - for (j=i+1; j<n; j++) { \ - FLOAT scale = U[j][i] * invDiag; \ - U[j][i] = 0.0; \ - for (k=i+1; k<n; k++) { U[j][k] -= U[i][k]*scale; } \ - L[j][i] = scale; \ - } \ - } \ -} +#define LU(L, U, A, Piv, n) \ + { \ + int _i, _j, _k, _tempi; \ + float _tempf; \ + for (_i = 0; _i < (n); _i++) { \ + Piv[_i] = _i; \ + } \ + for (_i = 0; _i < (n); _i++) { \ + for (_j = 0; _j < (n); _j++) { \ + _(U, _i, _j) = _(A, _i, _j); \ + } \ + } \ + IDENTITY(L, n, n); \ + \ + for (_i = 0; _i < (n)-1; _i++) { \ + \ + int max = _i; \ + for (_j = _i + 1; _j < (n); _j++) { \ + if (_ABS(_(U, _j, _i)) > _ABS(_(U, max, _i))) { \ + max = _j; \ + } \ + } \ + _tempi = Piv[_i]; \ + Piv[_i] = Piv[max]; \ + Piv[max] = _tempi; \ + for (_k = _i; _k < (n); _k++) { \ + _tempf = _(U, _i, _k); \ + _(U, _i, _k) = _(U, max, _k); \ + _(U, max, _k) = _tempf; \ + } \ + for (_k = 0; _k < _i; _k++) { \ + _tempf = _(L, _i, _k); \ + _(L, _i, _k) = _(L, max, _k); \ + _(L, max, _k) = _tempf; \ + } \ + \ + FLOAT invDiag = 1.0 / _(U, _i, _i); \ + for (_j = _i + 1; _j < (n); _j++) { \ + FLOAT scale = _(U, _j, _i) * invDiag; \ + _(U, _j, _i) = 0.0; \ + for (_k = _i + 1; _k < (n); _k++) { \ + _(U, _j, _k) -= _(U, _i, _k) * scale; \ + } \ + _(L, _j, _i) = scale; \ + } \ + } \ + } /* * Pivots a matrix to a different matrix - * B = Pivot(A) given table 'Piv' - * A and B are (n by m) + * R = Pivot(A) given table 'Piv' + * A and R are (m by n) */ -#define PIVOT(A,B,Piv,n,m) { \ - int i,j; \ - for (j=0; j<n; j++) { \ - for (i=0; i<m; i++) { \ - B[j][i] = A[Piv[j]][i]; \ - } \ - } \ -} +#define PIVOT(R, A, Piv, m, n) \ + { \ + for (int _i = 0; _i < (m); _i++) { \ + for (int _j = 0; _j < (n); _j++) { \ + _(R, _i, _j) = _(A, Piv[_i], _j); \ + } \ + } \ + } /* * Solve LX=B for matrix X and B - * L is n by n (lower triangular) - * B is n by m + * L is m by m (lower triangular) + * B is m by n */ -#define L_SUB(L,X,B,n,m) { \ - int i,j,k; \ - for (i=0; i<m; i++) { \ - for (j=0; j<n; j++) { \ - float sum=0.0; \ - for (k=0; k<j; k++) { sum += L[j][k]*X[k][i]; } \ - X[j][i] = (B[j][i] - sum) / L[j][j]; \ - } \ - } \ -} +#define L_SUB(X, L, B, m, n) \ + { \ + for (int _i = 0; _i < (n); _i++) { \ + for (int _j = 0; _j < (m); _j++) { \ + float sum = 0.0; \ + for (int _k = 0; _k < _j; _k++) { \ + sum += _(L, _j, _k) * _(X, _k, _i); \ + } \ + _(X, _j, _i) = (_(B, _j, _i) - sum) / _(L, _j, _j); \ + } \ + } \ + } /* * Solve UX=B for matrix X and B - * U is n by n (upper triangular) - * B is n by m + * U is m by m (upper triangular) + * B is m by n */ -#define U_SUB(U,X,B,n,m) { \ - int i,j,k; \ - for (i=0; i<m; i++) { \ - for (j=n-1; j>=0; j--) { \ - float sum=0.0; \ - for (k=n-1; k>j; k--) { sum += U[j][k]*X[k][i]; } \ - X[j][i] = (B[j][i] - sum) / U[j][j]; \ - } \ - } \ -} +#define U_SUB(X, U, B, m, n) \ + { \ + for (int _i = 0; _i < (n); _i++) { \ + for (int _j = m - 1; _j >= 0; _j--) { \ + float sum = 0.0; \ + for (int _k = n - 1; _k > _j; _k--) { \ + sum += _(U, _j, _k) * _(X, _k, _i); \ + } \ + _(X, _j, _i) = (_(B, _j, _i) - sum) / _(U, _j, _j); \ + } \ + } \ + } /* * Inverts a matrix X (n by n) using the method of LU decomposition */ -#define INV(A,Ainv,n) { \ - FLOAT Ipiv[ORDER][ORDER]; \ - FLOAT L[ORDER][ORDER]; \ - FLOAT U[ORDER][ORDER]; \ - FLOAT I[ORDER][ORDER]; \ - FLOAT C[ORDER][ORDER]; \ - int Piv[ORDER]; \ - IDENTITY(I,n); \ - LU(A,L,U,Piv,n); \ - PIVOT(I,Ipiv,Piv,n,n); \ - L_SUB(L,C,Ipiv,n,n); \ - U_SUB(U,Ainv,C,n,n); \ -} + +#ifdef DYNAMIC_INDEX +#define INV_SETUP(ORDER) \ + FLOAT Ipiv[ORDER * ORDER]; \ + const int Ipivc = ORDER; \ + FLOAT L[ORDER * ORDER]; \ + const int Lc = ORDER; \ + FLOAT U[ORDER * ORDER]; \ + const int Uc = ORDER; \ + FLOAT I[ORDER * ORDER]; \ + const int Ic = ORDER; \ + FLOAT C[ORDER * ORDER]; \ + const int Cc = ORDER; +#else +#define INV_SETUP(ORDER) \ + FLOAT Ipiv[ORDER][ORDER]; \ + FLOAT L[ORDER][ORDER]; \ + FLOAT U[ORDER][ORDER]; \ + FLOAT I[ORDER][ORDER]; \ + FLOAT C[ORDER][ORDER]; +#endif + +#define INV(Ainv, A, n, ORDER) \ + { \ + INV_SETUP(ORDER) \ + int Piv[ORDER]; \ + IDENTITY(I, n, n); \ + LU(L, U, A, Piv, n); \ + PIVOT(Ipiv, I, Piv, n, n); \ + L_SUB(C, L, Ipiv, n, n); \ + U_SUB(Ainv, U, C, n, n); \ + } /* PRINT(A,n,n); \ @@ -165,61 +222,61 @@ PRINT(Ainv,n,n); \ */ /* - * Matrix Multiply C = A * B - * A (n by m) + * Matrix Multiply R = A * B + * R (n by p) + * A (m by n) * B (m by p) - * C (n by p) */ -#define MUL(A,B,C,n,m,p) { \ - int i,j,k; \ - for (i=0; i<n; i++) { \ - for (j=0; j<p; j++) { \ - C[i][j] = 0.0f; \ - for (k=0; k<m; k++) { \ - C[i][j] += A[i][k] * B[k][j]; \ - } \ - } \ - } \ -} +#define MUL(R, A, B, m, n, p) \ + { \ + for (int _i = 0; _i < (m); _i++) { \ + for (int _j = 0; _j < p; _j++) { \ + _(R, _i, _j) = 0.0f; \ + for (int _k = 0; _k < (n); _k++) { \ + _(R, _i, _j) += _(A, _i, _k) * _(B, _k, _j); \ + } \ + } \ + } \ + } /* - * Matrix Multiply D = A * B + C - * A (n by m) - * B (m by p) - * C (n by p) - * D (n by p) + * Matrix Multiply R = A * B + C + * R (m by p) + * A (m by n) + * B (n by p) + * C (m by p) */ -#define MULADD(A,B,C,D,n,m,p) { \ - int i,j,k; \ - for (i=0; i<n; i++) { \ - for (j=0; j<p; j++) { \ - D[i][j] = C[i][j]; \ - for (k=0; k<m; k++) { \ - D[i][j] += A[i][k] * B[k][j]; \ - } \ - } \ - } \ -} +#define MULADD(R, A, B, C, m, n, p) \ + { \ + for (int _i = 0; _i < (m); _i++) { \ + for (int _j = 0; _j < p; _j++) { \ + _(R, _i, _j) = _(C, _i, _j); \ + for (int _k = 0; _k < (n); _k++) { \ + _(R, _i, _j) += _(A, _i, _k) * _(B, _k, _j); \ + } \ + } \ + } \ + } /* - * Matrix Multiply D = alpha * A * B + beta * C - * A (n by m) - * B (m by p) - * C (n by p) - * D (n by p) + * Matrix Multiply R = alpha * A * B + beta * C + * R (m by p) + * A (m by n) + * B (n by p) + * C (m by p) */ -#define GMULADD(A,B,C,D,alpha,beta,n,m,p) { \ - int i,j,k; \ - float sum; \ - for (i=0; i<n; i++) { \ - for (j=0; j<p; j++) { \ - sum = 0.0f; \ - for (k=0; k<m; k++) { \ - sum += A[i][k] * B[k][j]; \ - } \ - D[i][j] = alpha * sum + beta * C[i][j]; \ - } \ - } \ -} +#define GMULADD(R, A, B, C, alpha, beta, m, n, p) \ + { \ + float sum; \ + for (int _i = 0; _i < m; _i++) { \ + for (int _j = 0; _j < p; _j++) { \ + sum = 0.0f; \ + for (int _k = 0; _k < n; _k++) { \ + sum += _(A, _i, _k) * _(B, _k, _j); \ + } \ + _(R, _i, _j) = alpha * sum + beta * _(C, _i, _j); \ + } \ + } \ + } #endif diff --git a/redist/dclhelpers.c b/redist/dclhelpers.c new file mode 100644 index 0000000..9ed7c5f --- /dev/null +++ b/redist/dclhelpers.c @@ -0,0 +1,80 @@ +#include "dclhelpers.h" +#define FLOAT DCL_FLOAT +#define DYNAMIC_INDEX +#include "dclapack.h" +#include <alloca.h> +#include <assert.h> +#include <stdio.h> +#include <string.h> + +void dclPrint(const DCL_FLOAT *PMATRIX, int PMATRIXc, int n, int m) { PRINT(PMATRIX, n, m); } + +void dclIdentity(DCL_FLOAT *I, int Ic, int m, int n) { IDENTITY(I, m, n); } + +/* Returns the zero matrix */ +void dclZero(DCL_FLOAT *Z, int Zc, int m, int n) { memset(Z, 0, m * n * sizeof(DCL_FLOAT)); } + +void dclTransp(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, int n, int m) { TRANSP(R, A, n, m); } + +void dclLU(DCL_FLOAT *L, int Lc, DCL_FLOAT *U, int Uc, const DCL_FLOAT *A, int Ac, int *Piv, int n) { + LU(L, U, A, Piv, n); +} + +void dclPivot(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, int *Piv, int n, int m) { PIVOT(R, A, Piv, n, m); } + +void dclLSub(DCL_FLOAT *X, int Xc, const DCL_FLOAT *L, int Lc, const DCL_FLOAT *B, int Bc, int n, int m) { + L_SUB(X, L, B, n, m); +} + +void dclUSub(DCL_FLOAT *X, int Xc, const DCL_FLOAT *U, int Uc, const DCL_FLOAT *B, int Bc, int n, int m) { + U_SUB(X, U, B, n, m); +} + +void dclInv(DCL_FLOAT *Ainv, int Ainvc, const DCL_FLOAT *A, int Ac, int n) { INV(Ainv, A, n, n); } + +void dclMul(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, int n, int m, int p) { + MUL(R, A, B, n, m, p); +} + +void dclMulAdd(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, const DCL_FLOAT *C, int Cc, + int n, int m, int p) { + MULADD(R, A, B, C, n, m, p); +} + +void dclGMulAdd(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, const DCL_FLOAT *C, + int Cc, DCL_FLOAT alpha, DCL_FLOAT beta, int n, int m, int p) { + GMULADD(R, A, B, C, alpha, beta, n, m, p); +} + +/* dclGMulAdd( R, ((transA)?TRANS(A):A, (transB)?TRANS(B):B), C, alpha, beta, n, m, p ); */ +void dcldgemm(char transA, char transB, int m, int n, int k, DCL_FLOAT alpha, const DCL_FLOAT *A, + int Ac, // must be n + const DCL_FLOAT *B, + int Bc, // must be m + DCL_FLOAT beta, DCL_FLOAT *C, + int Cc // must be n + ) { + const DCL_FLOAT *ta; + const DCL_FLOAT *tb; + int tac = Ac; + int tbc = Bc; + if (transA) { + DCL_FLOAT *la = alloca(sizeof(DCL_FLOAT) * n * m); + const int lac = m; + TRANSP(la, A, n, m); + ta = la; + tac = lac; + } else + ta = A; + + if (transB) { + DCL_FLOAT *lb = alloca(sizeof(DCL_FLOAT) * n * m); + const int lbc = m; + TRANSP(lb, B, n, m); + tb = lb; + tbc = lbc; + } else + tb = B; + + GMULADD(C, ta, tb, C, alpha, beta, m, n, k); +} diff --git a/redist/dclhelpers.h b/redist/dclhelpers.h new file mode 100644 index 0000000..00acdac --- /dev/null +++ b/redist/dclhelpers.h @@ -0,0 +1,74 @@ +#ifndef _DCL_HELPERS_H +#define _DCL_HELPERS_H + +#define DCL_FLOAT FLT + +// Use this macro to safely +#define DMS(m) ((m)[0]), (sizeof((m)[0]) / sizeof((m)[0][0])) + +/* Prints matrix A of size[n][m] */ +void dclPrint(const DCL_FLOAT *A, int Ac, int n, int m); + +/* Returns the identity matrix */ +void dclIdentity(DCL_FLOAT *I, int Ic, int m, int n); + +/* Returns the zero matrix */ +void dclZero(DCL_FLOAT *I, int Ic, int m, int n); + +/* R = Transpose(A) + A is (n by m) + R is (m by n) */ +void dclTransp(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, int n, int m); + +/* Calculate L,U of a matrix A with pivot table; the pivot table is output. */ +void dclLU(DCL_FLOAT *L, int Lc, DCL_FLOAT *U, int Uc, const DCL_FLOAT *A, int Ac, int *Piv, int n); + +/* Pivots a matrix to a different matrix + R = Pivot(A) given table 'Piv' + A and R are (n by m) */ +void dclPivot(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, int *Piv, int n, int m); + +/* Solve LX=B for matrix X and B + L is n by n (lower triangular) + B is n by m */ +void dclLSub(DCL_FLOAT *X, int Xc, const DCL_FLOAT *L, int Lc, const DCL_FLOAT *B, int Bc, int n, int m); + +/* Solve UX=B for matrix X and B + U is n by n (upper triangular) + B is n by m */ +void dclUSub(DCL_FLOAT *X, int Xc, const DCL_FLOAT *U, int Uc, const DCL_FLOAT *B, int Bc, int n, int m); + +/* Inverts a matrix X (n by n) using the method of LU decomposition */ +void dclInv(DCL_FLOAT *Ainv, int Ainvc, const DCL_FLOAT *A, int Ac, int n); + +/* Matrix Multiply R = A * B + A (n by m) + B (m by p) + R (n by p) */ +void dclMul(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, int n, int m, int p); + +/* Matrix Multiply R = A * B + C + A (n by m) + B (m by p) + C (n by p) + R (n by p) */ +void dclMulAdd(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, const DCL_FLOAT *C, int Cc, + int n, int m, int p); + +/* Matrix Multiply R = alpha * A * B + beta * C + A (n by m) + B (m by p) + C (n by p) + R (n by p) */ +void dclGMulAdd(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, const DCL_FLOAT *C, + int Cc, DCL_FLOAT alpha, DCL_FLOAT beta, int n, int m, int p); + +/******************************** + * Auxiliary functionality in C * + ********************************/ + +// Matches dgemm from lapack. +void dcldgemm(char transA, char transB, int m, int n, int k, DCL_FLOAT alpha, const DCL_FLOAT *A, int Ac, + const DCL_FLOAT *B, int Bc, DCL_FLOAT beta, DCL_FLOAT *C, int Cc); + +#endif diff --git a/redist/minimal_opencv.c b/redist/minimal_opencv.c index e8a6214..50eb7df 100644 --- a/redist/minimal_opencv.c +++ b/redist/minimal_opencv.c @@ -44,11 +44,8 @@ void cvGEMM(const CvMat *src1, const CvMat *src2, double alpha, const CvMat *src beta = 0; cblas_dgemm(CblasRowMajor, (tABC & GEMM_1_T) ? CblasTrans : CblasNoTrans, - (tABC & GEMM_2_T) ? CblasTrans : CblasNoTrans, src1->rows, dst->cols, src1->cols, alpha, - - src1->data.db, lda, src2->data.db, ldb, beta, - - dst->data.db, dst->cols); + (tABC & GEMM_2_T) ? CblasTrans : CblasNoTrans, src1->rows, dst->cols, src1->cols, alpha, src1->data.db, + lda, src2->data.db, ldb, beta, dst->data.db, dst->cols); } void cvMulTransposed(const CvMat *src, CvMat *dst, int order, const CvMat *delta, double scale) { @@ -67,11 +64,7 @@ void cvMulTransposed(const CvMat *src, CvMat *dst, int order, const CvMat *delta lapack_int dstCols = dst->cols; cblas_dgemm(CblasRowMajor, isAT ? CblasTrans : CblasNoTrans, isBT ? CblasTrans : CblasNoTrans, cols, dstCols, rows, - scale, - - src->data.db, cols, src->data.db, cols, beta, - - dst->data.db, dstCols); + scale, src->data.db, cols, src->data.db, cols, beta, dst->data.db, dstCols); } void *cvAlloc(size_t size) { return malloc(size); } diff --git a/redist/test_dcl.c b/redist/test_dcl.c new file mode 100644 index 0000000..ded2863 --- /dev/null +++ b/redist/test_dcl.c @@ -0,0 +1,155 @@ +// gcc -msse2 -O3 -ftree-vectorize test_dcl.c dclhelpers.c os_generic.c -DFLT=double -lpthread -lcblas && valgrind +// ./a.out + +#include "dclhelpers.h" +#include "os_generic.h" +#include <assert.h> +#include <cblas.h> +#include <math.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> + +#include "minimal_opencv.h" + +void fill_random(FLT *A, int ld, int m, int n) { + assert(ld == n); + for (int y = 0; y < m; y++) + for (int x = 0; x < n; x++) + A[y * ld + x] = (rand()) / (double)RAND_MAX; +} + +void test_dcldgemm_speed(const char *name, char transA, char transB, int m, int n, int k, DCL_FLOAT alpha, + const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, DCL_FLOAT beta) { + printf("%s speed test:\n", name); + double times[2]; + FLT emo[2][m][n]; + + for (int z = 0; z < 2; z++) { + double start = OGGetAbsoluteTime(); + for (int i = 0; i < 100000; i++) { + dclZero(DMS(emo[z]), m, n); + + if (z == 0) { + dcldgemm(transA, transB, m, n, k, alpha, A, Ac, B, Bc, beta, DMS(emo[z])); + } else { + + cblas_dgemm(CblasRowMajor, transA == 1 ? CblasTrans : CblasNoTrans, + transB == 1 ? CblasTrans : CblasNoTrans, m, n, k, alpha, A, Ac, B, Bc, beta, emo[z][0], n); + } + } + + times[z] = OGGetAbsoluteTime() - start; + } + + dclPrint(DMS(emo[0]), m, n); + dclPrint(DMS(emo[1]), m, n); + printf("dcl Elapsed: %f\n", times[0]); + printf("cblas Elapsed: %f\n", times[1]); + printf("%fx difference\n", times[0] / times[1]); + + for (int i = 0; i < m; i++) { + for (int j = 0; j < k; j++) { + assert(fabs(emo[0][i][j] - emo[1][i][j]) < .00001); + } + } +} + +void compareToCblas() { + srand(0); + int m = 12; + int n = 20; + int k = 20; + + FLT em1[m][n]; + FLT em2[n][k]; + + fill_random(DMS(em1), m, n); + fill_random(DMS(em2), n, k); + + dclPrint(DMS(em1), m, n); + dclPrint(DMS(em2), n, k); + + test_dcldgemm_speed("Simple", 0, 0, m, n, k, 1.0, DMS(em1), DMS(em2), .1); +} + +void compareToCblasTrans() { + srand(0); + int m = 12; + int n = 20; + int k = n; + + FLT em1[m][n]; + + fill_random(DMS(em1), m, n); + + dclPrint(DMS(em1), m, n); + + CvMat Em1 = cvMat(m, n, CV_64F, em1); + FLT em1tem1[n][n]; + CvMat Em1tEm1 = cvMat(n, n, CV_64F, em1tem1); + cvMulTransposed(&Em1, &Em1tEm1, 1, 0, 1); + print_mat(&Em1tEm1); + + test_dcldgemm_speed("Trans", 1, 0, + n, // # of rows in OP(A) == em1' -- 20 + n, // # of cols in OP(B) == em1 -- 20 + m, // # of cols in OP(A) == em1' -- 12 + 1.0, + DMS(em1), // Note that LD stays the same + DMS(em1), 0); +} + +int main() { + FLT A[2][4] = {{0, 1, 2, 3}, {4, 5, 6, 7}}; + FLT B[4][2]; + dclPrint(A[0], 4, 2, 4); + dclTransp(B[0], 2, A[0], 4, 2, 4); + dclPrint(B[0], 2, 4, 2); + + int i, j; + for (i = 0; i < 8; i++) { + printf("%f\n", ((float *)(B[0]))[i]); + } + + FLT M[3][3] = {{.32, 1, 0}, {0, 1, 2}, {1, 0, 1}}; + FLT Mo[3][3]; + dclInv(Mo[0], 3, M[0], 3, 3); + dclPrint(Mo[0], 3, 3, 3); + + FLT MM[3][3]; + dclMul(MM[0], 3, M[0], 3, Mo[0], 3, 3, 3, 3); + + printf("The following should be an identity matrix\n"); + dclPrint(MM[0], 3, 3, 3); + + { + FLT A[3][4]; + dclIdentity(DMS(A), 3, 4); + dclPrint(DMS(A), 3, 4); + + FLT x[4][2] = { + {7, -7}, {8, -8}, {9, -9}, {10, -10}, + }; + FLT R[4][2]; + dclZero(DMS(R), 4, 2); + + // dclMul(R, 1, A[0], 4, x, 1, 4, 1, 3); + dcldgemm(0, 0, 3, 4, 2, 1, A[0], 4, x[0], 2, 0, R[0], 2); + + dclPrint(DMS(x), 4, 2); + dclPrint(DMS(R), 3, 2); + + for (int j = 0; j < 2; j++) { + for (int i = 0; i < 3; i++) { + printf("[%d][%d]\n", i, j); + assert(R[i][j] == x[i][j]); + } + + assert(fabs(R[3][j]) < .0000001); + } + } + + compareToCblas(); + compareToCblasTrans(); +} diff --git a/src/poser_daveortho.c b/src/poser_daveortho.c index 9d02bb3..2bf57c6 100644 --- a/src/poser_daveortho.c +++ b/src/poser_daveortho.c @@ -193,17 +193,19 @@ void OrthoSolve( FLT invXXt[3][3]; FLT SXt[2][3]; FLT M[2][3]; // Morph matrix! (2 by 3) - TRANSP(X,Xt,3,nPoints); - MUL(X,Xt,XXt,3,nPoints,3); - MUL(S,Xt,SXt,2,nPoints,3); - INV(XXt,invXXt,3); - MUL(SXt,invXXt,M,2,3,3); -//PRINT(M,2,3); - -// Double checking work -FLT S_morph[2][SENSORS_PER_OBJECT]; -MUL(M,X,S_morph,2,3,nPoints); -for (i=0; i<nPoints; i++) { S_morph[0][i]+=sbar[0]; S_morph[1][i]+=sbar[1]; } + TRANSP(Xt, X, 3, nPoints); + MUL(XXt, X, Xt, 3, nPoints, 3); + MUL(SXt, S, Xt, 2, nPoints, 3); + INV(invXXt, XXt, 3, 3); + MUL(M, SXt, invXXt, 2, 3, 3); + // PRINT(M,2,3); + + // Double checking work + FLT S_morph[2][SENSORS_PER_OBJECT]; + MUL(S_morph, M, X, 2, 3, nPoints); + for (i = 0; i < nPoints; i++) { + S_morph[0][i] += sbar[0]; + S_morph[1][i] += sbar[1]; } //-------------------- // Solve for the non-trivial vector @@ -218,10 +220,10 @@ for (i=0; i<nPoints; i++) { S_morph[0][i]+=sbar[0]; S_morph[1][i]+=sbar[1]; } FLT B[3][1] = { {0.0}, {0.0}, {1.0} }; FLT inv_uM[3][3]; FLT uf[3][1]; - INV(uM,inv_uM,3); - MUL(inv_uM,B,uf,3,3,1); - - //-------------------- + INV(inv_uM, uM, 3, 3); + MUL(uf, inv_uM, B, 3, 3, 1); + + //-------------------- // Solve for unit length vector // f that goes into the camera //-------------------- @@ -250,10 +252,10 @@ for (i=0; i<nPoints; i++) { S_morph[0][i]+=sbar[0]; S_morph[1][i]+=sbar[1]; } // uhat,rhat //-------------------- FLT uhat[2][1], rhat[2][1], fhat[2][1]; - MUL(M,f,fhat,2,3,1); - MUL(M,u,uhat,2,3,1); - MUL(M,r,rhat,2,3,1); - FLT fhat_len = sqrt( fhat[0][0]*fhat[0][0] + fhat[1][0]*fhat[1][0] ); + MUL(fhat, M, f, 2, 3, 1); + MUL(uhat, M, u, 2, 3, 1); + MUL(rhat, M, r, 2, 3, 1); + FLT fhat_len = sqrt( fhat[0][0]*fhat[0][0] + fhat[1][0]*fhat[1][0] ); FLT uhat_len = sqrt( uhat[0][0]*uhat[0][0] + uhat[1][0]*uhat[1][0] ); FLT rhat_len = sqrt( rhat[0][0]*rhat[0][0] + rhat[1][0]*rhat[1][0] ); FLT urhat_len = 0.5 * (uhat_len + rhat_len); |