aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile18
-rw-r--r--README.md3
-rw-r--r--data_recorder.c113
-rw-r--r--redist/dclapack.h385
-rw-r--r--redist/dclhelpers.c80
-rw-r--r--redist/dclhelpers.h74
-rw-r--r--redist/minimal_opencv.c13
-rw-r--r--redist/test_dcl.c155
-rw-r--r--src/poser_daveortho.c40
9 files changed, 584 insertions, 297 deletions
diff --git a/Makefile b/Makefile
index 3e5059a..7c0b946 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,9 @@ all : lib data_recorder test calibrate calibrate_client simple_pose_test
CC?=gcc
-CFLAGS:=-Iinclude/libsurvive -fPIC -g -O0 -Iredist -flto -DUSE_DOUBLE -std=gnu99 -rdynamic -llapacke -lcblas -lm -fsanitize=address -fsanitize=undefined
+CFLAGS:=-Iinclude/libsurvive -fPIC -g -O0 -Iredist -flto -DUSE_DOUBLE -std=gnu99 -rdynamic -llapacke -lcblas -lm
+CFLAGS_RELEASE:=-Iinclude/libsurvive -fPIC -msse2 -ftree-vectorize -O3 -Iredist -flto -DUSE_DOUBLE -std=gnu99 -rdynamic -llapacke -lcblas -lm
+
#LDFLAGS:=-L/usr/local/lib -lpthread -lusb-1.0 -lz -lm -flto -g
LDFLAGS:=-L/usr/local/lib -lpthread -lz -lm -flto -g
@@ -70,7 +72,7 @@ test : test.c ./lib/libsurvive.so redist/os_generic.o
simple_pose_test : simple_pose_test.c ./lib/libsurvive.so redist/os_generic.o $(DRAWFUNCTIONS)
$(CC) -o $@ $^ $(LDFLAGS) $(CFLAGS)
-data_recorder : data_recorder.c ./lib/libsurvive.so redist/os_generic.c $(DRAWFUNCTIONS)
+data_recorder : data_recorder.c ./lib/libsurvive.so redist/os_generic.c
$(CC) -o $@ $^ $(LDFLAGS) $(CFLAGS)
calibrate : calibrate.c ./lib/libsurvive.so redist/os_generic.c $(DRAWFUNCTIONS)
@@ -83,6 +85,18 @@ calibrate_client : calibrate_client.c ./lib/libsurvive.so redist/os_generic.c $
static_calibrate : calibrate.c redist/os_generic.c $(DRAWFUNCTIONS) $(LIBSURVIVE_C)
tcc -o $@ $^ $(CFLAGS) $(LDFLAGS) -DTCC
+./redist/dclhelpers_debuggable.c : ./redist/dclhelpers.c ./redist/dclhelpers.h ./redist/dclapack.h
+ gcc -E ./redist/dclhelpers.c > ./redist/dclhelpers_debuggable.c
+ clang-format -i ./redist/dclhelpers_debuggable.c
+ sed -i 's/#/\/\/#/g' ./redist/dclhelpers_debuggable.c
+
+
+test_dcl: ./redist/test_dcl.c ./redist/dclhelpers.c ./redist/dclhelpers.h ./redist/dclapack.h redist/os_generic.c ./redist/minimal_opencv.c ./src/epnp/epnp.c
+ $(CC) -o $@ $^ $(LDFLAGS) $(CFLAGS_RELEASE) -DFLT=double
+
+test_dcl_debug: ./redist/test_dcl.c ./redist/dclhelpers_debuggable.c ./redist/dclhelpers.h ./redist/dclapack.h redist/os_generic.c
+ $(CC) -o $@ $^ $(LDFLAGS) $(CFLAGS) -DFLT=double -fsanitize=address -fsanitize=undefined
+
test_minimal_cv: ./src/epnp/test_minimal_cv.c ./lib/libsurvive.so
$(CC) -o $@ $^ $(LDFLAGS) $(CFLAGS)
diff --git a/README.md b/README.md
index 4dce4f9..f404f39 100644
--- a/README.md
+++ b/README.md
@@ -82,6 +82,9 @@ Will ~~I~~ we succeed? Probably not. ~~Definitely going to try!~~ Though it's
* Optionally OpenGL.
On Debian, ```sudo apt-get install build-essential zlib1g-dev libx11-dev libusb-1.0-0-dev freeglut3-dev``` should be sufficient.
+Temporarily needed packages: liblapack and libopenblas
+On Debian, ```sudo apt-get install liblapacke-dev libopenblas-dev```
+
## Architecture
<TABLE><TR><TH>Description</TH><TH>Diagram</TH>
diff --git a/data_recorder.c b/data_recorder.c
index 7f53d18..271b996 100644
--- a/data_recorder.c
+++ b/data_recorder.c
@@ -4,7 +4,6 @@
#include <unistd.h>
#endif
-#include <CNFGFunctions.h>
#include <os_generic.h>
#include <stdarg.h>
#include <stdint.h>
@@ -25,28 +24,6 @@ struct SurviveContext *ctx;
FILE *output_file = 0;
-void HandleKey(int keycode, int bDown) {
- if (!bDown)
- return;
-
- if (keycode == 'O' || keycode == 'o') {
- survive_send_magic(ctx, 1, 0, 0);
- }
- if (keycode == 'F' || keycode == 'f') {
- survive_send_magic(ctx, 0, 0, 0);
- }
-}
-
-void HandleButton(int x, int y, int button, int bDown) {}
-
-void HandleMotion(int x, int y, int mask) {}
-
-void HandleDestroy() {}
-
-int bufferpts[32 * 2 * 3];
-char buffermts[32 * 128 * 3];
-int buffertimeto[32 * 3];
-
double timestamp_in_us() {
static double start_time_us = 0;
if (start_time_us == 0)
@@ -74,6 +51,7 @@ int my_config_process(SurviveObject *so, char *ct0conf, int len) {
fwrite("\n", 1, 1, output_file);
return survive_default_htc_config_process(so, ct0conf, len);
}
+
void my_lighthouse_process(SurviveContext *ctx, uint8_t lighthouse, SurvivePose *pose) {
survive_default_lighthouse_pose_process(ctx, lighthouse, pose);
write_to_output("%d LH_POSE %0.6f %0.6f %0.6f %0.6f %0.6f %0.6f %0.6f\n", lighthouse, pose->Pos[0], pose->Pos[1],
@@ -98,51 +76,38 @@ void my_light_process(struct SurviveObject *so, int sensor_id, int acode,
survive_default_light_process(so, sensor_id, acode, timeinsweep, timecode,
length, lh);
- if (acode == -1 || sensor_id < 0) {
+ if (acode == -1) {
write_to_output("%s S %d %d %d %u %u %u\n", so->codename, sensor_id, acode, timeinsweep, timecode, length, lh);
return;
}
- int jumpoffset = sensor_id;
-
- if (strcmp(so->codename, "WM0") == 0)
- jumpoffset += 32;
- else if (strcmp(so->codename, "WM1") == 0)
- jumpoffset += 64;
-
const char *LH_ID = 0;
const char *LH_Axis = 0;
switch (acode) {
case 0:
case 2:
- bufferpts[jumpoffset * 2 + 0] = (timeinsweep - 100000) / 500;
LH_ID = "L";
LH_Axis = "X";
break;
case 1:
case 3:
- bufferpts[jumpoffset * 2 + 1] = (timeinsweep - 100000) / 500;
LH_ID = "L";
LH_Axis = "Y";
break;
case 4:
case 6:
- bufferpts[jumpoffset * 2 + 0] = (timeinsweep - 100000) / 500;
LH_ID = "R";
LH_Axis = "X";
break;
case 5:
case 7:
- bufferpts[jumpoffset * 2 + 1] = (timeinsweep - 100000) / 500;
LH_ID = "R";
LH_Axis = "Y";
break;
}
-
write_to_output("%s %s %s %d %d %d %u %u %u\n", so->codename, LH_ID, LH_Axis, sensor_id, acode, timeinsweep,
timecode, length, lh);
- buffertimeto[jumpoffset] = 0;
}
void my_imu_process(struct SurviveObject *so, int mask, FLT *accelgyro,
@@ -152,47 +117,17 @@ void my_imu_process(struct SurviveObject *so, int mask, FLT *accelgyro,
accelgyro[1], accelgyro[2], accelgyro[3], accelgyro[4], accelgyro[5], id);
}
-void *GuiThread(void *v) {
- CNFGBGColor = 0x000000;
- CNFGDialogColor = 0x444444;
- CNFGSetup("Survive GUI Debug", 640, 480);
-
- short screenx, screeny;
- while (1) {
- CNFGHandleInput();
- CNFGClearFrame();
- CNFGColor(0xFFFFFF);
- CNFGGetDimensions(&screenx, &screeny);
-
- int i;
- for (i = 0; i < 32 * 3; i++) {
- if (buffertimeto[i] < 50) {
- uint32_t color = i * 3231349;
- uint8_t r = color & 0xff;
- uint8_t g = (color >> 8) & 0xff;
- uint8_t b = (color >> 16) & 0xff;
- r = (r * (5 - buffertimeto[i])) / 5;
- g = (g * (5 - buffertimeto[i])) / 5;
- b = (b * (5 - buffertimeto[i])) / 5;
- CNFGColor((b << 16) | (g << 8) | r);
- CNFGTackRectangle(bufferpts[i * 2 + 0], bufferpts[i * 2 + 1],
- bufferpts[i * 2 + 0] + 5,
- bufferpts[i * 2 + 1] + 5);
- CNFGPenX = bufferpts[i * 2 + 0];
- CNFGPenY = bufferpts[i * 2 + 1];
- CNFGDrawText(buffermts, 2);
- buffertimeto[i]++;
- }
+int main(int argc, char **argv) {
+ if (argc > 1) {
+ output_file = fopen(argv[1], "w");
+ if (output_file == 0) {
+ fprintf(stderr, "Could not open %s for writing", argv[1]);
+ return -1;
}
-
- CNFGSwapBuffers();
- OGUSleep(10000);
+ } else {
+ output_file = stdout;
}
-}
-int SurviveThreadLoaded = 0;
-
-void *SurviveThread(void *junk) {
ctx = survive_init_with_config_cb(0, my_config_process);
survive_install_light_fn(ctx, my_light_process);
@@ -201,6 +136,7 @@ void *SurviveThread(void *junk) {
survive_install_raw_pose_fn(ctx, my_raw_pose_process);
survive_install_angle_fn(ctx, my_angle_process);
survive_install_info_fn(ctx, my_info_process);
+
survive_cal_install(ctx);
if (!ctx) {
@@ -208,35 +144,8 @@ void *SurviveThread(void *junk) {
exit(1);
}
- SurviveThreadLoaded = 1;
-
while (survive_poll(ctx) == 0) {
}
return 0;
}
-
-int main(int argc, char **argv) {
- if (argc > 1) {
- output_file = fopen(argv[1], "w");
- if (output_file == 0) {
- fprintf(stderr, "Could not open %s for writing", argv[1]);
- return -1;
- }
- } else {
- output_file = stdout;
- }
- SurviveThread(0);
- /*
- // Create the libsurvive thread
- OGCreateThread(SurviveThread, 0);
-
- // Wait for the survive thread to load
- while (!SurviveThreadLoaded) {
- OGUSleep(100);
- }
-
- // Run the Gui in the main thread
- GuiThread(0);
- */
-}
diff --git a/redist/dclapack.h b/redist/dclapack.h
index 4e209d3..0950815 100644
--- a/redist/dclapack.h
+++ b/redist/dclapack.h
@@ -1,10 +1,6 @@
#ifndef __DCLAPACK_H__
#define __DCLAPACK_H__
-#ifndef ORDER
-#define ORDER 50
-#endif
-
#ifndef FLOAT
#define FLOAT float
#endif
@@ -13,146 +9,207 @@
#define _ABS(a) ( (a)<=0 ? 0-(a) : (a) )
+// Tricky: If you want to use this with pointers, instead of 2D arrays, you will
+// need to #define DYNAMIC_INDEX, as well as, for all arrays, suffix their name
+// with 'c'
+
+#ifdef DYNAMIC_INDEX
+#define _(AM, O, P) AM[O * AM##c + P]
+#else
+#define _(AM, O, P) AM[O][P]
+#endif
+
/*
- * Prints a matrix A (n by m)
+ * Prints a matrix A (m by n) (with witdth Ac)
*/
-#define PRINT(A,n,m) { \
- int i,j; \
- printf(#A "\n"); \
- for (i=0; i<n; i++) { \
- for (j=0; j<m; j++) { \
- printf("%4.3f\t", A[i][j]); \
- } \
- printf("\n"); \
- } \
- printf("\n"); \
-}
+#define PRINT(A, m, n) \
+ { \
+ printf(#A " %dx%d\n", m, n); \
+ for (int _i = 0; _i < (m); _i++) { \
+ for (int _j = 0; _j < (n); _j++) { \
+ printf("%4.3f ", _(A, _i, _j)); \
+ } \
+ printf("\n"); \
+ } \
+ printf("\n"); \
+ }
/*
- * Returns the identity matrix
+ * Returns the identity matrix (with size n x n, but width Ic)
*/
-#define IDENTITY(I,n) { \
- int i,j; \
- for (i=0; i<n; i++) { \
- for (j=0; j<i; j++) { I[i][j]=0.0f; } \
- I[i][i] = 1.0f; \
- for (j=i+1; j<n; j++) { I[i][j]=0.0f; } \
- } \
-}
+#define IDENTITY(I, m, n) \
+ { \
+ for (int _i = 0; _i < (m); _i++) { \
+ for (int _j = 0; _j < _i; _j++) { \
+ _(I, _i, _j) = 0.0f; \
+ } \
+ _(I, _i, _i) = 1.0f; \
+ for (int _j = _i + 1; _j < (n); _j++) { \
+ _(I, _i, _j) = 0.0f; \
+ } \
+ } \
+ }
/*
- * B = Transpose(A)
- * A is (n by m)
- * B is (m by n)
+ * Returns the identity matrix (with size n x n, but width Ic)
*/
-#define TRANSP(A,B,n,m) { \
- int i,j; \
- for (i=0; i<n; i++) { \
- for (j=0; j<m; j++) { \
- B[j][i] = A[i][j]; \
- } \
- } \
-}
+#define ZERO(Z, m, n) \
+ { \
+ for (int _i = 0; _i < (m); _i++) \
+ for (int _j = 0; _j < (n); _j++) \
+ _(Z, _i, _j) = 0.0f; \
+ }
+
+/*
+ * R = Transpose(A)
+ * A is (m by n)
+ * R is (n by m)
+ */
+#define TRANSP(R, A, m, n) \
+ { \
+ for (int _i = 0; _i < (m); _i++) { \
+ for (int _j = 0; _j < (n); _j++) { \
+ _(R, _j, _i) = _(A, _i, _j); \
+ } \
+ } \
+ }
/*
* Calculate L,U of a matrix A with pivot table
*/
-#define LU(A,L,U,Piv,n) { \
- int i,j,k,_tempi; float _tempf; \
- for (i=0; i<n; i++) { Piv[i]=i; } \
- for (i=0; i<n; i++) { \
- for (j=0; j<n; j++) { \
- U[i][j] = A[i][j]; \
- } \
- } \
- IDENTITY(L,n); \
- \
- for (i=0; i<n-1; i++) { \
- \
- int max=i; \
- for (j=i+1; j<n; j++) { \
- if (_ABS(U[j][i]) > _ABS(U[max][i])) { max = j; } \
- } \
- _tempi=Piv[i]; Piv[i]=Piv[max]; Piv[max]=_tempi; \
- for (k=i; k<n; k++) { \
- _tempf=U[i][k]; U[i][k]=U[max][k]; U[max][k]=_tempf; \
- } \
- for (k=0; k<i; k++) { \
- _tempf=L[i][k]; L[i][k]=L[max][k]; L[max][k]=_tempf; \
- } \
- \
- FLOAT invDiag = 1.0 / U[i][i]; \
- for (j=i+1; j<n; j++) { \
- FLOAT scale = U[j][i] * invDiag; \
- U[j][i] = 0.0; \
- for (k=i+1; k<n; k++) { U[j][k] -= U[i][k]*scale; } \
- L[j][i] = scale; \
- } \
- } \
-}
+#define LU(L, U, A, Piv, n) \
+ { \
+ int _i, _j, _k, _tempi; \
+ float _tempf; \
+ for (_i = 0; _i < (n); _i++) { \
+ Piv[_i] = _i; \
+ } \
+ for (_i = 0; _i < (n); _i++) { \
+ for (_j = 0; _j < (n); _j++) { \
+ _(U, _i, _j) = _(A, _i, _j); \
+ } \
+ } \
+ IDENTITY(L, n, n); \
+ \
+ for (_i = 0; _i < (n)-1; _i++) { \
+ \
+ int max = _i; \
+ for (_j = _i + 1; _j < (n); _j++) { \
+ if (_ABS(_(U, _j, _i)) > _ABS(_(U, max, _i))) { \
+ max = _j; \
+ } \
+ } \
+ _tempi = Piv[_i]; \
+ Piv[_i] = Piv[max]; \
+ Piv[max] = _tempi; \
+ for (_k = _i; _k < (n); _k++) { \
+ _tempf = _(U, _i, _k); \
+ _(U, _i, _k) = _(U, max, _k); \
+ _(U, max, _k) = _tempf; \
+ } \
+ for (_k = 0; _k < _i; _k++) { \
+ _tempf = _(L, _i, _k); \
+ _(L, _i, _k) = _(L, max, _k); \
+ _(L, max, _k) = _tempf; \
+ } \
+ \
+ FLOAT invDiag = 1.0 / _(U, _i, _i); \
+ for (_j = _i + 1; _j < (n); _j++) { \
+ FLOAT scale = _(U, _j, _i) * invDiag; \
+ _(U, _j, _i) = 0.0; \
+ for (_k = _i + 1; _k < (n); _k++) { \
+ _(U, _j, _k) -= _(U, _i, _k) * scale; \
+ } \
+ _(L, _j, _i) = scale; \
+ } \
+ } \
+ }
/*
* Pivots a matrix to a different matrix
- * B = Pivot(A) given table 'Piv'
- * A and B are (n by m)
+ * R = Pivot(A) given table 'Piv'
+ * A and R are (m by n)
*/
-#define PIVOT(A,B,Piv,n,m) { \
- int i,j; \
- for (j=0; j<n; j++) { \
- for (i=0; i<m; i++) { \
- B[j][i] = A[Piv[j]][i]; \
- } \
- } \
-}
+#define PIVOT(R, A, Piv, m, n) \
+ { \
+ for (int _i = 0; _i < (m); _i++) { \
+ for (int _j = 0; _j < (n); _j++) { \
+ _(R, _i, _j) = _(A, Piv[_i], _j); \
+ } \
+ } \
+ }
/*
* Solve LX=B for matrix X and B
- * L is n by n (lower triangular)
- * B is n by m
+ * L is m by m (lower triangular)
+ * B is m by n
*/
-#define L_SUB(L,X,B,n,m) { \
- int i,j,k; \
- for (i=0; i<m; i++) { \
- for (j=0; j<n; j++) { \
- float sum=0.0; \
- for (k=0; k<j; k++) { sum += L[j][k]*X[k][i]; } \
- X[j][i] = (B[j][i] - sum) / L[j][j]; \
- } \
- } \
-}
+#define L_SUB(X, L, B, m, n) \
+ { \
+ for (int _i = 0; _i < (n); _i++) { \
+ for (int _j = 0; _j < (m); _j++) { \
+ float sum = 0.0; \
+ for (int _k = 0; _k < _j; _k++) { \
+ sum += _(L, _j, _k) * _(X, _k, _i); \
+ } \
+ _(X, _j, _i) = (_(B, _j, _i) - sum) / _(L, _j, _j); \
+ } \
+ } \
+ }
/*
* Solve UX=B for matrix X and B
- * U is n by n (upper triangular)
- * B is n by m
+ * U is m by m (upper triangular)
+ * B is m by n
*/
-#define U_SUB(U,X,B,n,m) { \
- int i,j,k; \
- for (i=0; i<m; i++) { \
- for (j=n-1; j>=0; j--) { \
- float sum=0.0; \
- for (k=n-1; k>j; k--) { sum += U[j][k]*X[k][i]; } \
- X[j][i] = (B[j][i] - sum) / U[j][j]; \
- } \
- } \
-}
+#define U_SUB(X, U, B, m, n) \
+ { \
+ for (int _i = 0; _i < (n); _i++) { \
+ for (int _j = m - 1; _j >= 0; _j--) { \
+ float sum = 0.0; \
+ for (int _k = n - 1; _k > _j; _k--) { \
+ sum += _(U, _j, _k) * _(X, _k, _i); \
+ } \
+ _(X, _j, _i) = (_(B, _j, _i) - sum) / _(U, _j, _j); \
+ } \
+ } \
+ }
/*
* Inverts a matrix X (n by n) using the method of LU decomposition
*/
-#define INV(A,Ainv,n) { \
- FLOAT Ipiv[ORDER][ORDER]; \
- FLOAT L[ORDER][ORDER]; \
- FLOAT U[ORDER][ORDER]; \
- FLOAT I[ORDER][ORDER]; \
- FLOAT C[ORDER][ORDER]; \
- int Piv[ORDER]; \
- IDENTITY(I,n); \
- LU(A,L,U,Piv,n); \
- PIVOT(I,Ipiv,Piv,n,n); \
- L_SUB(L,C,Ipiv,n,n); \
- U_SUB(U,Ainv,C,n,n); \
-}
+
+#ifdef DYNAMIC_INDEX
+#define INV_SETUP(ORDER) \
+ FLOAT Ipiv[ORDER * ORDER]; \
+ const int Ipivc = ORDER; \
+ FLOAT L[ORDER * ORDER]; \
+ const int Lc = ORDER; \
+ FLOAT U[ORDER * ORDER]; \
+ const int Uc = ORDER; \
+ FLOAT I[ORDER * ORDER]; \
+ const int Ic = ORDER; \
+ FLOAT C[ORDER * ORDER]; \
+ const int Cc = ORDER;
+#else
+#define INV_SETUP(ORDER) \
+ FLOAT Ipiv[ORDER][ORDER]; \
+ FLOAT L[ORDER][ORDER]; \
+ FLOAT U[ORDER][ORDER]; \
+ FLOAT I[ORDER][ORDER]; \
+ FLOAT C[ORDER][ORDER];
+#endif
+
+#define INV(Ainv, A, n, ORDER) \
+ { \
+ INV_SETUP(ORDER) \
+ int Piv[ORDER]; \
+ IDENTITY(I, n, n); \
+ LU(L, U, A, Piv, n); \
+ PIVOT(Ipiv, I, Piv, n, n); \
+ L_SUB(C, L, Ipiv, n, n); \
+ U_SUB(Ainv, U, C, n, n); \
+ }
/*
PRINT(A,n,n); \
@@ -165,61 +222,61 @@ PRINT(Ainv,n,n); \
*/
/*
- * Matrix Multiply C = A * B
- * A (n by m)
+ * Matrix Multiply R = A * B
+ * R (n by p)
+ * A (m by n)
* B (m by p)
- * C (n by p)
*/
-#define MUL(A,B,C,n,m,p) { \
- int i,j,k; \
- for (i=0; i<n; i++) { \
- for (j=0; j<p; j++) { \
- C[i][j] = 0.0f; \
- for (k=0; k<m; k++) { \
- C[i][j] += A[i][k] * B[k][j]; \
- } \
- } \
- } \
-}
+#define MUL(R, A, B, m, n, p) \
+ { \
+ for (int _i = 0; _i < (m); _i++) { \
+ for (int _j = 0; _j < p; _j++) { \
+ _(R, _i, _j) = 0.0f; \
+ for (int _k = 0; _k < (n); _k++) { \
+ _(R, _i, _j) += _(A, _i, _k) * _(B, _k, _j); \
+ } \
+ } \
+ } \
+ }
/*
- * Matrix Multiply D = A * B + C
- * A (n by m)
- * B (m by p)
- * C (n by p)
- * D (n by p)
+ * Matrix Multiply R = A * B + C
+ * R (m by p)
+ * A (m by n)
+ * B (n by p)
+ * C (m by p)
*/
-#define MULADD(A,B,C,D,n,m,p) { \
- int i,j,k; \
- for (i=0; i<n; i++) { \
- for (j=0; j<p; j++) { \
- D[i][j] = C[i][j]; \
- for (k=0; k<m; k++) { \
- D[i][j] += A[i][k] * B[k][j]; \
- } \
- } \
- } \
-}
+#define MULADD(R, A, B, C, m, n, p) \
+ { \
+ for (int _i = 0; _i < (m); _i++) { \
+ for (int _j = 0; _j < p; _j++) { \
+ _(R, _i, _j) = _(C, _i, _j); \
+ for (int _k = 0; _k < (n); _k++) { \
+ _(R, _i, _j) += _(A, _i, _k) * _(B, _k, _j); \
+ } \
+ } \
+ } \
+ }
/*
- * Matrix Multiply D = alpha * A * B + beta * C
- * A (n by m)
- * B (m by p)
- * C (n by p)
- * D (n by p)
+ * Matrix Multiply R = alpha * A * B + beta * C
+ * R (m by p)
+ * A (m by n)
+ * B (n by p)
+ * C (m by p)
*/
-#define GMULADD(A,B,C,D,alpha,beta,n,m,p) { \
- int i,j,k; \
- float sum; \
- for (i=0; i<n; i++) { \
- for (j=0; j<p; j++) { \
- sum = 0.0f; \
- for (k=0; k<m; k++) { \
- sum += A[i][k] * B[k][j]; \
- } \
- D[i][j] = alpha * sum + beta * C[i][j]; \
- } \
- } \
-}
+#define GMULADD(R, A, B, C, alpha, beta, m, n, p) \
+ { \
+ float sum; \
+ for (int _i = 0; _i < m; _i++) { \
+ for (int _j = 0; _j < p; _j++) { \
+ sum = 0.0f; \
+ for (int _k = 0; _k < n; _k++) { \
+ sum += _(A, _i, _k) * _(B, _k, _j); \
+ } \
+ _(R, _i, _j) = alpha * sum + beta * _(C, _i, _j); \
+ } \
+ } \
+ }
#endif
diff --git a/redist/dclhelpers.c b/redist/dclhelpers.c
new file mode 100644
index 0000000..9ed7c5f
--- /dev/null
+++ b/redist/dclhelpers.c
@@ -0,0 +1,80 @@
+#include "dclhelpers.h"
+#define FLOAT DCL_FLOAT
+#define DYNAMIC_INDEX
+#include "dclapack.h"
+#include <alloca.h>
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+void dclPrint(const DCL_FLOAT *PMATRIX, int PMATRIXc, int n, int m) { PRINT(PMATRIX, n, m); }
+
+void dclIdentity(DCL_FLOAT *I, int Ic, int m, int n) { IDENTITY(I, m, n); }
+
+/* Returns the zero matrix */
+void dclZero(DCL_FLOAT *Z, int Zc, int m, int n) { memset(Z, 0, m * n * sizeof(DCL_FLOAT)); }
+
+void dclTransp(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, int n, int m) { TRANSP(R, A, n, m); }
+
+void dclLU(DCL_FLOAT *L, int Lc, DCL_FLOAT *U, int Uc, const DCL_FLOAT *A, int Ac, int *Piv, int n) {
+ LU(L, U, A, Piv, n);
+}
+
+void dclPivot(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, int *Piv, int n, int m) { PIVOT(R, A, Piv, n, m); }
+
+void dclLSub(DCL_FLOAT *X, int Xc, const DCL_FLOAT *L, int Lc, const DCL_FLOAT *B, int Bc, int n, int m) {
+ L_SUB(X, L, B, n, m);
+}
+
+void dclUSub(DCL_FLOAT *X, int Xc, const DCL_FLOAT *U, int Uc, const DCL_FLOAT *B, int Bc, int n, int m) {
+ U_SUB(X, U, B, n, m);
+}
+
+void dclInv(DCL_FLOAT *Ainv, int Ainvc, const DCL_FLOAT *A, int Ac, int n) { INV(Ainv, A, n, n); }
+
+void dclMul(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, int n, int m, int p) {
+ MUL(R, A, B, n, m, p);
+}
+
+void dclMulAdd(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, const DCL_FLOAT *C, int Cc,
+ int n, int m, int p) {
+ MULADD(R, A, B, C, n, m, p);
+}
+
+void dclGMulAdd(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, const DCL_FLOAT *C,
+ int Cc, DCL_FLOAT alpha, DCL_FLOAT beta, int n, int m, int p) {
+ GMULADD(R, A, B, C, alpha, beta, n, m, p);
+}
+
+/* dclGMulAdd( R, ((transA)?TRANS(A):A, (transB)?TRANS(B):B), C, alpha, beta, n, m, p ); */
+void dcldgemm(char transA, char transB, int m, int n, int k, DCL_FLOAT alpha, const DCL_FLOAT *A,
+ int Ac, // must be n
+ const DCL_FLOAT *B,
+ int Bc, // must be m
+ DCL_FLOAT beta, DCL_FLOAT *C,
+ int Cc // must be n
+ ) {
+ const DCL_FLOAT *ta;
+ const DCL_FLOAT *tb;
+ int tac = Ac;
+ int tbc = Bc;
+ if (transA) {
+ DCL_FLOAT *la = alloca(sizeof(DCL_FLOAT) * n * m);
+ const int lac = m;
+ TRANSP(la, A, n, m);
+ ta = la;
+ tac = lac;
+ } else
+ ta = A;
+
+ if (transB) {
+ DCL_FLOAT *lb = alloca(sizeof(DCL_FLOAT) * n * m);
+ const int lbc = m;
+ TRANSP(lb, B, n, m);
+ tb = lb;
+ tbc = lbc;
+ } else
+ tb = B;
+
+ GMULADD(C, ta, tb, C, alpha, beta, m, n, k);
+}
diff --git a/redist/dclhelpers.h b/redist/dclhelpers.h
new file mode 100644
index 0000000..00acdac
--- /dev/null
+++ b/redist/dclhelpers.h
@@ -0,0 +1,74 @@
+#ifndef _DCL_HELPERS_H
+#define _DCL_HELPERS_H
+
+#define DCL_FLOAT FLT
+
+// Use this macro to safely
+#define DMS(m) ((m)[0]), (sizeof((m)[0]) / sizeof((m)[0][0]))
+
+/* Prints matrix A of size[n][m] */
+void dclPrint(const DCL_FLOAT *A, int Ac, int n, int m);
+
+/* Returns the identity matrix */
+void dclIdentity(DCL_FLOAT *I, int Ic, int m, int n);
+
+/* Returns the zero matrix */
+void dclZero(DCL_FLOAT *I, int Ic, int m, int n);
+
+/* R = Transpose(A)
+ A is (n by m)
+ R is (m by n) */
+void dclTransp(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, int n, int m);
+
+/* Calculate L,U of a matrix A with pivot table; the pivot table is output. */
+void dclLU(DCL_FLOAT *L, int Lc, DCL_FLOAT *U, int Uc, const DCL_FLOAT *A, int Ac, int *Piv, int n);
+
+/* Pivots a matrix to a different matrix
+ R = Pivot(A) given table 'Piv'
+ A and R are (n by m) */
+void dclPivot(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, int *Piv, int n, int m);
+
+/* Solve LX=B for matrix X and B
+ L is n by n (lower triangular)
+ B is n by m */
+void dclLSub(DCL_FLOAT *X, int Xc, const DCL_FLOAT *L, int Lc, const DCL_FLOAT *B, int Bc, int n, int m);
+
+/* Solve UX=B for matrix X and B
+ U is n by n (upper triangular)
+ B is n by m */
+void dclUSub(DCL_FLOAT *X, int Xc, const DCL_FLOAT *U, int Uc, const DCL_FLOAT *B, int Bc, int n, int m);
+
+/* Inverts a matrix X (n by n) using the method of LU decomposition */
+void dclInv(DCL_FLOAT *Ainv, int Ainvc, const DCL_FLOAT *A, int Ac, int n);
+
+/* Matrix Multiply R = A * B
+ A (n by m)
+ B (m by p)
+ R (n by p) */
+void dclMul(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, int n, int m, int p);
+
+/* Matrix Multiply R = A * B + C
+ A (n by m)
+ B (m by p)
+ C (n by p)
+ R (n by p) */
+void dclMulAdd(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, const DCL_FLOAT *C, int Cc,
+ int n, int m, int p);
+
+/* Matrix Multiply R = alpha * A * B + beta * C
+ A (n by m)
+ B (m by p)
+ C (n by p)
+ R (n by p) */
+void dclGMulAdd(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, const DCL_FLOAT *C,
+ int Cc, DCL_FLOAT alpha, DCL_FLOAT beta, int n, int m, int p);
+
+/********************************
+ * Auxiliary functionality in C *
+ ********************************/
+
+// Matches dgemm from lapack.
+void dcldgemm(char transA, char transB, int m, int n, int k, DCL_FLOAT alpha, const DCL_FLOAT *A, int Ac,
+ const DCL_FLOAT *B, int Bc, DCL_FLOAT beta, DCL_FLOAT *C, int Cc);
+
+#endif
diff --git a/redist/minimal_opencv.c b/redist/minimal_opencv.c
index e8a6214..50eb7df 100644
--- a/redist/minimal_opencv.c
+++ b/redist/minimal_opencv.c
@@ -44,11 +44,8 @@ void cvGEMM(const CvMat *src1, const CvMat *src2, double alpha, const CvMat *src
beta = 0;
cblas_dgemm(CblasRowMajor, (tABC & GEMM_1_T) ? CblasTrans : CblasNoTrans,
- (tABC & GEMM_2_T) ? CblasTrans : CblasNoTrans, src1->rows, dst->cols, src1->cols, alpha,
-
- src1->data.db, lda, src2->data.db, ldb, beta,
-
- dst->data.db, dst->cols);
+ (tABC & GEMM_2_T) ? CblasTrans : CblasNoTrans, src1->rows, dst->cols, src1->cols, alpha, src1->data.db,
+ lda, src2->data.db, ldb, beta, dst->data.db, dst->cols);
}
void cvMulTransposed(const CvMat *src, CvMat *dst, int order, const CvMat *delta, double scale) {
@@ -67,11 +64,7 @@ void cvMulTransposed(const CvMat *src, CvMat *dst, int order, const CvMat *delta
lapack_int dstCols = dst->cols;
cblas_dgemm(CblasRowMajor, isAT ? CblasTrans : CblasNoTrans, isBT ? CblasTrans : CblasNoTrans, cols, dstCols, rows,
- scale,
-
- src->data.db, cols, src->data.db, cols, beta,
-
- dst->data.db, dstCols);
+ scale, src->data.db, cols, src->data.db, cols, beta, dst->data.db, dstCols);
}
void *cvAlloc(size_t size) { return malloc(size); }
diff --git a/redist/test_dcl.c b/redist/test_dcl.c
new file mode 100644
index 0000000..ded2863
--- /dev/null
+++ b/redist/test_dcl.c
@@ -0,0 +1,155 @@
+// gcc -msse2 -O3 -ftree-vectorize test_dcl.c dclhelpers.c os_generic.c -DFLT=double -lpthread -lcblas && valgrind
+// ./a.out
+
+#include "dclhelpers.h"
+#include "os_generic.h"
+#include <assert.h>
+#include <cblas.h>
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "minimal_opencv.h"
+
+void fill_random(FLT *A, int ld, int m, int n) {
+ assert(ld == n);
+ for (int y = 0; y < m; y++)
+ for (int x = 0; x < n; x++)
+ A[y * ld + x] = (rand()) / (double)RAND_MAX;
+}
+
+void test_dcldgemm_speed(const char *name, char transA, char transB, int m, int n, int k, DCL_FLOAT alpha,
+ const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, DCL_FLOAT beta) {
+ printf("%s speed test:\n", name);
+ double times[2];
+ FLT emo[2][m][n];
+
+ for (int z = 0; z < 2; z++) {
+ double start = OGGetAbsoluteTime();
+ for (int i = 0; i < 100000; i++) {
+ dclZero(DMS(emo[z]), m, n);
+
+ if (z == 0) {
+ dcldgemm(transA, transB, m, n, k, alpha, A, Ac, B, Bc, beta, DMS(emo[z]));
+ } else {
+
+ cblas_dgemm(CblasRowMajor, transA == 1 ? CblasTrans : CblasNoTrans,
+ transB == 1 ? CblasTrans : CblasNoTrans, m, n, k, alpha, A, Ac, B, Bc, beta, emo[z][0], n);
+ }
+ }
+
+ times[z] = OGGetAbsoluteTime() - start;
+ }
+
+ dclPrint(DMS(emo[0]), m, n);
+ dclPrint(DMS(emo[1]), m, n);
+ printf("dcl Elapsed: %f\n", times[0]);
+ printf("cblas Elapsed: %f\n", times[1]);
+ printf("%fx difference\n", times[0] / times[1]);
+
+ for (int i = 0; i < m; i++) {
+ for (int j = 0; j < k; j++) {
+ assert(fabs(emo[0][i][j] - emo[1][i][j]) < .00001);
+ }
+ }
+}
+
+void compareToCblas() {
+ srand(0);
+ int m = 12;
+ int n = 20;
+ int k = 20;
+
+ FLT em1[m][n];
+ FLT em2[n][k];
+
+ fill_random(DMS(em1), m, n);
+ fill_random(DMS(em2), n, k);
+
+ dclPrint(DMS(em1), m, n);
+ dclPrint(DMS(em2), n, k);
+
+ test_dcldgemm_speed("Simple", 0, 0, m, n, k, 1.0, DMS(em1), DMS(em2), .1);
+}
+
+void compareToCblasTrans() {
+ srand(0);
+ int m = 12;
+ int n = 20;
+ int k = n;
+
+ FLT em1[m][n];
+
+ fill_random(DMS(em1), m, n);
+
+ dclPrint(DMS(em1), m, n);
+
+ CvMat Em1 = cvMat(m, n, CV_64F, em1);
+ FLT em1tem1[n][n];
+ CvMat Em1tEm1 = cvMat(n, n, CV_64F, em1tem1);
+ cvMulTransposed(&Em1, &Em1tEm1, 1, 0, 1);
+ print_mat(&Em1tEm1);
+
+ test_dcldgemm_speed("Trans", 1, 0,
+ n, // # of rows in OP(A) == em1' -- 20
+ n, // # of cols in OP(B) == em1 -- 20
+ m, // # of cols in OP(A) == em1' -- 12
+ 1.0,
+ DMS(em1), // Note that LD stays the same
+ DMS(em1), 0);
+}
+
+int main() {
+ FLT A[2][4] = {{0, 1, 2, 3}, {4, 5, 6, 7}};
+ FLT B[4][2];
+ dclPrint(A[0], 4, 2, 4);
+ dclTransp(B[0], 2, A[0], 4, 2, 4);
+ dclPrint(B[0], 2, 4, 2);
+
+ int i, j;
+ for (i = 0; i < 8; i++) {
+ printf("%f\n", ((float *)(B[0]))[i]);
+ }
+
+ FLT M[3][3] = {{.32, 1, 0}, {0, 1, 2}, {1, 0, 1}};
+ FLT Mo[3][3];
+ dclInv(Mo[0], 3, M[0], 3, 3);
+ dclPrint(Mo[0], 3, 3, 3);
+
+ FLT MM[3][3];
+ dclMul(MM[0], 3, M[0], 3, Mo[0], 3, 3, 3, 3);
+
+ printf("The following should be an identity matrix\n");
+ dclPrint(MM[0], 3, 3, 3);
+
+ {
+ FLT A[3][4];
+ dclIdentity(DMS(A), 3, 4);
+ dclPrint(DMS(A), 3, 4);
+
+ FLT x[4][2] = {
+ {7, -7}, {8, -8}, {9, -9}, {10, -10},
+ };
+ FLT R[4][2];
+ dclZero(DMS(R), 4, 2);
+
+ // dclMul(R, 1, A[0], 4, x, 1, 4, 1, 3);
+ dcldgemm(0, 0, 3, 4, 2, 1, A[0], 4, x[0], 2, 0, R[0], 2);
+
+ dclPrint(DMS(x), 4, 2);
+ dclPrint(DMS(R), 3, 2);
+
+ for (int j = 0; j < 2; j++) {
+ for (int i = 0; i < 3; i++) {
+ printf("[%d][%d]\n", i, j);
+ assert(R[i][j] == x[i][j]);
+ }
+
+ assert(fabs(R[3][j]) < .0000001);
+ }
+ }
+
+ compareToCblas();
+ compareToCblasTrans();
+}
diff --git a/src/poser_daveortho.c b/src/poser_daveortho.c
index 9d02bb3..2bf57c6 100644
--- a/src/poser_daveortho.c
+++ b/src/poser_daveortho.c
@@ -193,17 +193,19 @@ void OrthoSolve(
FLT invXXt[3][3];
FLT SXt[2][3];
FLT M[2][3]; // Morph matrix! (2 by 3)
- TRANSP(X,Xt,3,nPoints);
- MUL(X,Xt,XXt,3,nPoints,3);
- MUL(S,Xt,SXt,2,nPoints,3);
- INV(XXt,invXXt,3);
- MUL(SXt,invXXt,M,2,3,3);
-//PRINT(M,2,3);
-
-// Double checking work
-FLT S_morph[2][SENSORS_PER_OBJECT];
-MUL(M,X,S_morph,2,3,nPoints);
-for (i=0; i<nPoints; i++) { S_morph[0][i]+=sbar[0]; S_morph[1][i]+=sbar[1]; }
+ TRANSP(Xt, X, 3, nPoints);
+ MUL(XXt, X, Xt, 3, nPoints, 3);
+ MUL(SXt, S, Xt, 2, nPoints, 3);
+ INV(invXXt, XXt, 3, 3);
+ MUL(M, SXt, invXXt, 2, 3, 3);
+ // PRINT(M,2,3);
+
+ // Double checking work
+ FLT S_morph[2][SENSORS_PER_OBJECT];
+ MUL(S_morph, M, X, 2, 3, nPoints);
+ for (i = 0; i < nPoints; i++) {
+ S_morph[0][i] += sbar[0];
+ S_morph[1][i] += sbar[1]; }
//--------------------
// Solve for the non-trivial vector
@@ -218,10 +220,10 @@ for (i=0; i<nPoints; i++) { S_morph[0][i]+=sbar[0]; S_morph[1][i]+=sbar[1]; }
FLT B[3][1] = { {0.0}, {0.0}, {1.0} };
FLT inv_uM[3][3];
FLT uf[3][1];
- INV(uM,inv_uM,3);
- MUL(inv_uM,B,uf,3,3,1);
-
- //--------------------
+ INV(inv_uM, uM, 3, 3);
+ MUL(uf, inv_uM, B, 3, 3, 1);
+
+ //--------------------
// Solve for unit length vector
// f that goes into the camera
//--------------------
@@ -250,10 +252,10 @@ for (i=0; i<nPoints; i++) { S_morph[0][i]+=sbar[0]; S_morph[1][i]+=sbar[1]; }
// uhat,rhat
//--------------------
FLT uhat[2][1], rhat[2][1], fhat[2][1];
- MUL(M,f,fhat,2,3,1);
- MUL(M,u,uhat,2,3,1);
- MUL(M,r,rhat,2,3,1);
- FLT fhat_len = sqrt( fhat[0][0]*fhat[0][0] + fhat[1][0]*fhat[1][0] );
+ MUL(fhat, M, f, 2, 3, 1);
+ MUL(uhat, M, u, 2, 3, 1);
+ MUL(rhat, M, r, 2, 3, 1);
+ FLT fhat_len = sqrt( fhat[0][0]*fhat[0][0] + fhat[1][0]*fhat[1][0] );
FLT uhat_len = sqrt( uhat[0][0]*uhat[0][0] + uhat[1][0]*uhat[1][0] );
FLT rhat_len = sqrt( rhat[0][0]*rhat[0][0] + rhat[1][0]*rhat[1][0] );
FLT urhat_len = 0.5 * (uhat_len + rhat_len);