From d0ccf2041dc48ca4e5b1ee6b91e70da05f771c72 Mon Sep 17 00:00:00 2001
From: CNLohr <charles@cnlohr.com>
Date: Fri, 16 Mar 2018 22:30:51 -0400
Subject: Update README.md

---
 README.md | 3 +++
 1 file changed, 3 insertions(+)
diff --git a/README.md b/README.md
index 4dce4f9..3a4f22a 100644
--- a/README.md
+++ b/README.md
@@ -82,6 +82,9 @@ Will ~~I~~ we succeed?  Probably not.  ~~Definitely going to try!~~ Though it's
 * Optionally OpenGL.
 On Debian, ```sudo apt-get install build-essential zlib1g-dev libx11-dev libusb-1.0-0-dev freeglut3-dev``` should be sufficient.
 
+Temporarily needed packages: liblapack
+On Debian, ```sudo apt-get install liblapacke-dev```
+
 ## Architecture
 
 <TABLE><TR><TH>Description</TH><TH>Diagram</TH>
-- 
cgit v1.3.1


From 4aff72023129e95d5fe84975ed04a41053bc5739 Mon Sep 17 00:00:00 2001
From: CNLohr <charles@cnlohr.com>
Date: Fri, 16 Mar 2018 22:41:51 -0400
Subject: Add note about additional dependencies.

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 3a4f22a..f404f39 100644
--- a/README.md
+++ b/README.md
@@ -82,8 +82,8 @@ Will ~~I~~ we succeed?  Probably not.  ~~Definitely going to try!~~ Though it's
 * Optionally OpenGL.
 On Debian, ```sudo apt-get install build-essential zlib1g-dev libx11-dev libusb-1.0-0-dev freeglut3-dev``` should be sufficient.
 
-Temporarily needed packages: liblapack
-On Debian, ```sudo apt-get install liblapacke-dev```
+Temporarily needed packages: liblapack and libopenblas
+On Debian, ```sudo apt-get install liblapacke-dev libopenblas-dev```
 
 ## Architecture
 
-- 
cgit v1.3.1


From 783a88f8904b5758195f96c65c7e0b6e87f8a51e Mon Sep 17 00:00:00 2001
From: cnlohr <lohr85@gmail.com>
Date: Sat, 17 Mar 2018 01:00:07 -0400
Subject: Update DCLapack with the C helpers to cize stuff.

---
 redist/dclapack.h   | 78 ++++++++++++++++++++++++++++++++---------------------
 redist/dclhelpers.c | 63 +++++++++++++++++++++++++++++++++++++++++++
 redist/dclhelpers.h | 61 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 171 insertions(+), 31 deletions(-)
 create mode 100644 redist/dclhelpers.c
 create mode 100644 redist/dclhelpers.h

diff --git a/redist/dclapack.h b/redist/dclapack.h
index 4e209d3..62f3e9d 100644
--- a/redist/dclapack.h
+++ b/redist/dclapack.h
@@ -1,10 +1,6 @@
 #ifndef __DCLAPACK_H__
 #define __DCLAPACK_H__
 
-#ifndef ORDER
-#define ORDER 50
-#endif
-
 #ifndef FLOAT
 #define FLOAT float
 #endif
@@ -13,6 +9,13 @@
 
 #define _ABS(a)  ( (a)<=0 ? 0-(a) : (a) )
 
+#ifdef DYNAMIC_INDEX
+	#define _(A,O,P) A[O*n+P]
+#else
+	#define _(A,O,P) A[O][P]
+#endif
+
+
 /*
  * Prints a matrix A (n by m)
  */
@@ -21,7 +24,7 @@
     printf(#A "\n"); \
     for (i=0; i<n; i++) { \
         for (j=0; j<m; j++) { \
-            printf("%4.3f\t", A[i][j]); \
+            printf("%4.3f\t", _(A,i,j)); \
         } \
         printf("\n"); \
     } \
@@ -34,9 +37,9 @@
 #define IDENTITY(I,n) { \
     int i,j; \
     for (i=0; i<n; i++) { \
-        for (j=0; j<i; j++) { I[i][j]=0.0f; } \
-        I[i][i] = 1.0f; \
-        for (j=i+1; j<n; j++) { I[i][j]=0.0f; } \
+        for (j=0; j<i; j++) { _(I,i,j)=0.0f; } \
+        _(I,i,i) = 1.0f; \
+        for (j=i+1; j<n; j++) { _(I,i,j)=0.0f; } \
     } \
 }
 
@@ -49,7 +52,7 @@
    int i,j; \
    for (i=0; i<n; i++) { \
       for (j=0; j<m; j++) { \
-         B[j][i] = A[i][j]; \
+         _(B,j,i) = _(A,i,j); \
       } \
    } \
 }
@@ -62,7 +65,7 @@
     for (i=0; i<n; i++) { Piv[i]=i; } \
     for (i=0; i<n; i++) { \
         for (j=0; j<n; j++) { \
-            U[i][j] = A[i][j]; \
+            _(U,i,j) = _(A,i,j); \
         } \
     } \
     IDENTITY(L,n); \
@@ -71,22 +74,22 @@
         \
         int max=i; \
 	for (j=i+1; j<n; j++) { \
-		if (_ABS(U[j][i]) > _ABS(U[max][i])) { max = j; } \
+		if (_ABS( _(U,j,i)) > _ABS(_(U,max,i))) { max = j; } \
 	} \
 	_tempi=Piv[i]; Piv[i]=Piv[max]; Piv[max]=_tempi; \
 	for (k=i; k<n; k++) { \
-		_tempf=U[i][k]; U[i][k]=U[max][k]; U[max][k]=_tempf; \
+		_tempf=_(U,i,k); _(U,i,k)=_(U,max,k); _(U,max,k)=_tempf; \
 	} \
 	for (k=0; k<i; k++) { \
-		_tempf=L[i][k]; L[i][k]=L[max][k]; L[max][k]=_tempf; \
+		_tempf=_(L,i,k); _(L,i,k)=_(L,max,k); _(L,max,k)=_tempf; \
 	} \
         \
-        FLOAT invDiag = 1.0 / U[i][i]; \
+        FLOAT invDiag = 1.0 / _(U,i,i); \
         for (j=i+1; j<n; j++) { \
-            FLOAT scale = U[j][i] * invDiag; \
-            U[j][i] = 0.0; \
-            for (k=i+1; k<n; k++) { U[j][k] -= U[i][k]*scale; } \
-            L[j][i] = scale; \
+            FLOAT scale = _(U,j,i) * invDiag; \
+            _(U,j,i) = 0.0; \
+            for (k=i+1; k<n; k++) { _(U,j,k) -= _(U,i,k)*scale; } \
+            _(L,j,i) = scale; \
         } \
     } \
 }
@@ -100,7 +103,7 @@
     int i,j; \
     for (j=0; j<n; j++) { \
         for (i=0; i<m; i++) { \
-            B[j][i] = A[Piv[j]][i]; \
+            _(B,j,i) = _(A,Piv[j],i); \
         } \
     } \
 }
@@ -115,8 +118,8 @@
     for (i=0; i<m; i++) { \
         for (j=0; j<n; j++) { \
             float sum=0.0; \
-            for (k=0; k<j; k++) { sum += L[j][k]*X[k][i]; } \
-            X[j][i] = (B[j][i] - sum) / L[j][j]; \
+            for (k=0; k<j; k++) { sum += _(L,j,k)*_(X,k,i); } \
+            _(X,j,i) = (_(B,j,i) - sum) / _(L,j,j); \
         } \
     } \
 }
@@ -131,8 +134,8 @@
     for (i=0; i<m; i++) { \
         for (j=n-1; j>=0; j--) { \
             float sum=0.0; \
-            for (k=n-1; k>j; k--) { sum += U[j][k]*X[k][i]; } \
-            X[j][i] = (B[j][i] - sum) / U[j][j]; \
+            for (k=n-1; k>j; k--) { sum += _(U,j,k)*_(X,k,i); } \
+            _(X,j,i) = (_(B,j,i) - sum) / _(U,j,j); \
         } \
     } \
 }
@@ -140,12 +143,25 @@
 /*
  * Inverts a matrix X (n by n) using the method of LU decomposition
  */
-#define INV(A,Ainv,n) { \
+
+#ifdef DYNAMIC_INDEX
+	#define INV_SETUP(ORDER) \
+    FLOAT Ipiv[ORDER*ORDER]; \
+    FLOAT L[ORDER*ORDER]; \
+    FLOAT U[ORDER*ORDER]; \
+    FLOAT I[ORDER*ORDER]; \
+    FLOAT C[ORDER*ORDER];
+#else
+	#define INV_SETUP(ORDER) \
     FLOAT Ipiv[ORDER][ORDER]; \
     FLOAT L[ORDER][ORDER]; \
     FLOAT U[ORDER][ORDER]; \
     FLOAT I[ORDER][ORDER]; \
-    FLOAT C[ORDER][ORDER]; \
+    FLOAT C[ORDER][ORDER];
+#endif
+
+#define INV(A,Ainv,n,ORDER) { \
+	INV_SETUP(ORDER) \
     int Piv[ORDER]; \
     IDENTITY(I,n); \
     LU(A,L,U,Piv,n); \
@@ -174,9 +190,9 @@ PRINT(Ainv,n,n); \
     int i,j,k; \
     for (i=0; i<n; i++) { \
         for (j=0; j<p; j++) { \
-            C[i][j] = 0.0f; \
+            _(C,i,j) = 0.0f; \
             for (k=0; k<m; k++) { \
-                C[i][j] += A[i][k] * B[k][j]; \
+                _(C,i,j) += _(A,i,k) * _(B,k,j); \
             } \
         } \
     } \
@@ -193,9 +209,9 @@ PRINT(Ainv,n,n); \
     int i,j,k; \
     for (i=0; i<n; i++) { \
         for (j=0; j<p; j++) { \
-            D[i][j] = C[i][j]; \
+            _(D,i,j) = _(C,i,j); \
             for (k=0; k<m; k++) { \
-                D[i][j] += A[i][k] * B[k][j]; \
+                _(D,i,j) += _(A,i,k) * _(B,k,j); \
             } \
         } \
     } \
@@ -215,9 +231,9 @@ PRINT(Ainv,n,n); \
         for (j=0; j<p; j++) { \
             sum = 0.0f; \
             for (k=0; k<m; k++) { \
-                sum += A[i][k] * B[k][j]; \
+                sum += _(A,i,k) * _(B,k,j); \
             } \
-            D[i][j] = alpha * sum + beta * C[i][j]; \
+            _(D,i,j) = alpha * sum + beta * _(C,i,j); \
         } \
     } \
 }
diff --git a/redist/dclhelpers.c b/redist/dclhelpers.c
new file mode 100644
index 0000000..9d3d0eb
--- /dev/null
+++ b/redist/dclhelpers.c
@@ -0,0 +1,63 @@
+#include "dclhelpers.h"
+#define FLOAT DCL_FLOAT
+#define DYNAMIC_INDEX
+#include <stdio.h>
+#include "dclapack.h"
+
+
+void dclPrint( const DCL_FLOAT * A, int n, int m )
+{
+	PRINT( A, n, m );
+}
+
+void dclIdentity( DCL_FLOAT * A, int n )
+{
+	IDENTITY( A, n );
+}
+
+void dclTransp( const DCL_FLOAT * A, DCL_FLOAT * B, int n, int m)
+{
+	TRANSP(A,B,n,m);
+}
+
+void dclLU( const DCL_FLOAT * A, DCL_FLOAT * L, DCL_FLOAT * U, int * Piv, int n )
+{
+	LU(A,L,U,Piv,n);
+}
+
+void dclPivot( const DCL_FLOAT * A, DCL_FLOAT * B, int * Piv, int n, int m )
+{
+	PIVOT(A,B,Piv,n,m);
+}
+
+void dclLSub( const DCL_FLOAT * L, DCL_FLOAT * X, const DCL_FLOAT * B, int n, int m )
+{
+	L_SUB(L,X,B,n,m);
+}
+
+void dclUSub( const DCL_FLOAT * U, DCL_FLOAT * X, const DCL_FLOAT * B, int n, int m )
+{
+	U_SUB(U,X,B,n,m);
+}
+
+void dclInv( const DCL_FLOAT * A, DCL_FLOAT * Ainv, int n )
+{
+	INV(A,Ainv,n,n);
+}
+
+void dclMul( const DCL_FLOAT * A, const DCL_FLOAT * B, DCL_FLOAT * C, int n, int m, int p )
+{
+	MUL(A,B,C,n,m,p);
+}
+
+void dclMulAdd( const DCL_FLOAT * A, const DCL_FLOAT * B, const DCL_FLOAT * C, DCL_FLOAT * D, int n, int m, int p )
+{
+	MULADD(A,B,C,D,n,m,p);
+}
+
+void dclGMulAdd( const DCL_FLOAT * A, const DCL_FLOAT * B, const DCL_FLOAT * C, DCL_FLOAT * D, DCL_FLOAT alpha, DCL_FLOAT beta, int n, int m, int p )
+{
+	GMULADD(A,B,C,D,alpha,beta,n,m,p);
+}
+
+
diff --git a/redist/dclhelpers.h b/redist/dclhelpers.h
new file mode 100644
index 0000000..fbaa7ec
--- /dev/null
+++ b/redist/dclhelpers.h
@@ -0,0 +1,61 @@
+#ifndef _DCL_HELPERS_H
+#define _DCL_HELPERS_H
+
+#define DCL_FLOAT FLT
+
+//XXX XXX XXX WARNING XXX XXX XXX The argument order may be changing!!!
+
+/* Prints matrix A of size[n][m] */
+void dclPrint( const DCL_FLOAT * A, int n, int m );
+
+/* Returns the identity matrix */
+void dclIdentity( DCL_FLOAT * A, int n );
+
+/* B = Transpose(A)
+     A is (n by m)
+     B is (m by n) */
+void dclTransp( const DCL_FLOAT * A, DCL_FLOAT * B, int n, int m );
+
+/* Calculate L,U of a matrix A with pivot table; the pivot table is output. */
+void dclLU( const DCL_FLOAT * A, DCL_FLOAT * L, DCL_FLOAT * U, int * Piv, int n );
+
+/* Pivots a matrix to a different matrix
+    B = Pivot(A) given table 'Piv'
+    A and B are (n by m) */
+void dclPivot( const DCL_FLOAT * A, DCL_FLOAT * B, int * Piv, int n, int m );
+
+/* Solve LX=B for matrix X and B
+    L is n by n (lower triangular)
+    B is n by m */
+void dclLSub( const DCL_FLOAT * L, DCL_FLOAT * X, const DCL_FLOAT * B, int n, int m );
+
+/* Solve UX=B for matrix X and B
+    U is n by n (upper triangular)
+    B is n by m */
+void dclUSub( const DCL_FLOAT * U, DCL_FLOAT * X, const DCL_FLOAT * B, int n, int m );
+
+/* Inverts a matrix X (n by n) using the method of LU decomposition */
+void dclInv( const DCL_FLOAT * A, DCL_FLOAT * Ainv, int n );
+
+/* Matrix Multiply C = A * B
+    A (n by m)
+    B (m by p)
+    C (n by p) */
+void dclMul( const DCL_FLOAT * A, const DCL_FLOAT * B, DCL_FLOAT * C, int n, int m, int p );
+
+/* Matrix Multiply D = A * B + C
+    A (n by m)
+    B (m by p)
+    C (n by p)
+    D (n by p) */
+void dclMulAdd( const DCL_FLOAT * A, const DCL_FLOAT * B, const DCL_FLOAT * C, DCL_FLOAT * D, int n, int m, int p );
+
+/* Matrix Multiply D = alpha * A * B + beta * C
+    A (n by m)
+    B (m by p)
+    C (n by p)
+    D (n by p) */
+void dclGMulAdd( const DCL_FLOAT * A, const DCL_FLOAT * B, const DCL_FLOAT * C, DCL_FLOAT * D, DCL_FLOAT alpha, DCL_FLOAT beta, int n, int m, int p );
+
+#endif
+
-- 
cgit v1.3.1


From 8b9a196232661bd6506a41b7ceca015d684086a8 Mon Sep 17 00:00:00 2001
From: cnlohr <lohr85@gmail.com>
Date: Sat, 17 Mar 2018 01:13:28 -0400
Subject: Update variable ordering...

---
 redist/dclapack.h   | 48 ++++++++++++++++++++++++------------------------
 redist/dclhelpers.c | 36 ++++++++++++++++++------------------
 redist/dclhelpers.h | 26 +++++++++++++-------------
 3 files changed, 55 insertions(+), 55 deletions(-)

diff --git a/redist/dclapack.h b/redist/dclapack.h
index 62f3e9d..5aba214 100644
--- a/redist/dclapack.h
+++ b/redist/dclapack.h
@@ -44,15 +44,15 @@
 }
 
 /*
- * B = Transpose(A)
+ * R = Transpose(A)
  *  A is (n by m)
- *  B is (m by n)
+ *  R is (m by n)
  */
-#define TRANSP(A,B,n,m) { \
+#define TRANSP(R,A,n,m) { \
    int i,j; \
    for (i=0; i<n; i++) { \
       for (j=0; j<m; j++) { \
-         _(B,j,i) = _(A,i,j); \
+         _(R,j,i) = _(A,i,j); \
       } \
    } \
 }
@@ -60,7 +60,7 @@
 /*
  * Calculate L,U of a matrix A with pivot table
  */
-#define LU(A,L,U,Piv,n) { \
+#define LU(L,U,A,Piv,n) { \
     int i,j,k,_tempi; float _tempf; \
     for (i=0; i<n; i++) { Piv[i]=i; } \
     for (i=0; i<n; i++) { \
@@ -96,14 +96,14 @@
 
 /*
  * Pivots a matrix to a different matrix
- *  B = Pivot(A) given table 'Piv'
- *  A and B are (n by m)
+ *  R = Pivot(A) given table 'Piv'
+ *  A and R are (n by m)
  */
-#define PIVOT(A,B,Piv,n,m) { \
+#define PIVOT(R,A,Piv,n,m) { \
     int i,j; \
     for (j=0; j<n; j++) { \
         for (i=0; i<m; i++) { \
-            _(B,j,i) = _(A,Piv[j],i); \
+            _(R,j,i) = _(A,Piv[j],i); \
         } \
     } \
 }
@@ -113,7 +113,7 @@
  *  L is n by n (lower triangular)
  *  B is n by m
  */
-#define L_SUB(L,X,B,n,m) { \
+#define L_SUB(X,L,B,n,m) { \
     int i,j,k; \
     for (i=0; i<m; i++) { \
         for (j=0; j<n; j++) { \
@@ -129,7 +129,7 @@
  *  U is n by n (upper triangular)
  *  B is n by m
  */
-#define U_SUB(U,X,B,n,m) { \
+#define U_SUB(X,U,B,n,m) { \
     int i,j,k; \
     for (i=0; i<m; i++) { \
         for (j=n-1; j>=0; j--) { \
@@ -160,14 +160,14 @@
     FLOAT C[ORDER][ORDER];
 #endif
 
-#define INV(A,Ainv,n,ORDER) { \
+#define INV(Ainv,A,n,ORDER) { \
 	INV_SETUP(ORDER) \
     int Piv[ORDER]; \
     IDENTITY(I,n); \
-    LU(A,L,U,Piv,n); \
-    PIVOT(I,Ipiv,Piv,n,n); \
-    L_SUB(L,C,Ipiv,n,n); \
-    U_SUB(U,Ainv,C,n,n); \
+    LU(L,U,A,Piv,n); \
+    PIVOT(Ipiv,I,Piv,n,n); \
+    L_SUB(C,L,Ipiv,n,n); \
+    U_SUB(Ainv,U,C,n,n); \
 }
 
 /*
@@ -186,13 +186,13 @@ PRINT(Ainv,n,n); \
  *  B (m by p)
  *  C (n by p)
  */
-#define MUL(A,B,C,n,m,p) { \
+#define MUL(R,A,B,n,m,p) { \
     int i,j,k; \
     for (i=0; i<n; i++) { \
         for (j=0; j<p; j++) { \
-            _(C,i,j) = 0.0f; \
+            _(R,i,j) = 0.0f; \
             for (k=0; k<m; k++) { \
-                _(C,i,j) += _(A,i,k) * _(B,k,j); \
+                _(R,i,j) += _(A,i,k) * _(B,k,j); \
             } \
         } \
     } \
@@ -205,13 +205,13 @@ PRINT(Ainv,n,n); \
  *  C (n by p)
  *  D (n by p)
  */
-#define MULADD(A,B,C,D,n,m,p) { \
+#define MULADD(R,A,B,C,n,m,p) { \
     int i,j,k; \
     for (i=0; i<n; i++) { \
         for (j=0; j<p; j++) { \
-            _(D,i,j) = _(C,i,j); \
+            _(R,i,j) = _(C,i,j); \
             for (k=0; k<m; k++) { \
-                _(D,i,j) += _(A,i,k) * _(B,k,j); \
+                _(R,i,j) += _(A,i,k) * _(B,k,j); \
             } \
         } \
     } \
@@ -224,7 +224,7 @@ PRINT(Ainv,n,n); \
  *  C (n by p)
  *  D (n by p)
  */
-#define GMULADD(A,B,C,D,alpha,beta,n,m,p) { \
+#define GMULADD(R,A,B,C,alpha,beta,n,m,p) { \
     int i,j,k; \
     float sum; \
     for (i=0; i<n; i++) { \
@@ -233,7 +233,7 @@ PRINT(Ainv,n,n); \
             for (k=0; k<m; k++) { \
                 sum += _(A,i,k) * _(B,k,j); \
             } \
-            _(D,i,j) = alpha * sum + beta * _(C,i,j); \
+            _(R,i,j) = alpha * sum + beta * _(C,i,j); \
         } \
     } \
 }
diff --git a/redist/dclhelpers.c b/redist/dclhelpers.c
index 9d3d0eb..2ee6c4a 100644
--- a/redist/dclhelpers.c
+++ b/redist/dclhelpers.c
@@ -15,49 +15,49 @@ void dclIdentity( DCL_FLOAT * A, int n )
 	IDENTITY( A, n );
 }
 
-void dclTransp( const DCL_FLOAT * A, DCL_FLOAT * B, int n, int m)
+void dclTransp( DCL_FLOAT * R, const DCL_FLOAT * A, int n, int m )
 {
-	TRANSP(A,B,n,m);
+	TRANSP(R,A,n,m);
 }
 
-void dclLU( const DCL_FLOAT * A, DCL_FLOAT * L, DCL_FLOAT * U, int * Piv, int n )
+void dclLU( DCL_FLOAT * L, DCL_FLOAT * U, const DCL_FLOAT * A, int * Piv, int n )
 {
-	LU(A,L,U,Piv,n);
+	LU(L,U,A,Piv,n);
 }
 
-void dclPivot( const DCL_FLOAT * A, DCL_FLOAT * B, int * Piv, int n, int m )
+void dclPivot( DCL_FLOAT * R, const DCL_FLOAT * A, int * Piv, int n, int m )
 {
-	PIVOT(A,B,Piv,n,m);
+	PIVOT(R,A,Piv,n,m);
 }
 
-void dclLSub( const DCL_FLOAT * L, DCL_FLOAT * X, const DCL_FLOAT * B, int n, int m )
+void dclLSub( DCL_FLOAT * X, const DCL_FLOAT * L, const DCL_FLOAT * B, int n, int m )
 {
-	L_SUB(L,X,B,n,m);
+	L_SUB(X,L,B,n,m);
 }
 
-void dclUSub( const DCL_FLOAT * U, DCL_FLOAT * X, const DCL_FLOAT * B, int n, int m )
+void dclUSub( DCL_FLOAT * X, const DCL_FLOAT * U, const DCL_FLOAT * B, int n, int m )
 {
-	U_SUB(U,X,B,n,m);
+	U_SUB(X,U,B,n,m);
 }
 
-void dclInv( const DCL_FLOAT * A, DCL_FLOAT * Ainv, int n )
+void dclInv( DCL_FLOAT * Ainv, const DCL_FLOAT * A, int n )
 {
-	INV(A,Ainv,n,n);
+	INV(Ainv,A,n,n);
 }
 
-void dclMul( const DCL_FLOAT * A, const DCL_FLOAT * B, DCL_FLOAT * C, int n, int m, int p )
+void dclMul( DCL_FLOAT * R, const DCL_FLOAT * A, const DCL_FLOAT * B, int n, int m, int p )
 {
-	MUL(A,B,C,n,m,p);
+	MUL(R,A,B,n,m,p);
 }
 
-void dclMulAdd( const DCL_FLOAT * A, const DCL_FLOAT * B, const DCL_FLOAT * C, DCL_FLOAT * D, int n, int m, int p )
+void dclMulAdd( DCL_FLOAT * R, const DCL_FLOAT * A, const DCL_FLOAT * B, const DCL_FLOAT * C, int n, int m, int p )
 {
-	MULADD(A,B,C,D,n,m,p);
+	MULADD(R,A,B,C,n,m,p);
 }
 
-void dclGMulAdd( const DCL_FLOAT * A, const DCL_FLOAT * B, const DCL_FLOAT * C, DCL_FLOAT * D, DCL_FLOAT alpha, DCL_FLOAT beta, int n, int m, int p )
+void dclGMulAdd( DCL_FLOAT * R, const DCL_FLOAT * A, const DCL_FLOAT * B, const DCL_FLOAT * C, DCL_FLOAT alpha, DCL_FLOAT beta, int n, int m, int p )
 {
-	GMULADD(A,B,C,D,alpha,beta,n,m,p);
+	GMULADD(R,A,B,C,alpha,beta,n,m,p);
 }
 
 
diff --git a/redist/dclhelpers.h b/redist/dclhelpers.h
index fbaa7ec..4ada24a 100644
--- a/redist/dclhelpers.h
+++ b/redist/dclhelpers.h
@@ -11,51 +11,51 @@ void dclPrint( const DCL_FLOAT * A, int n, int m );
 /* Returns the identity matrix */
 void dclIdentity( DCL_FLOAT * A, int n );
 
-/* B = Transpose(A)
+/* R = Transpose(A)
      A is (n by m)
-     B is (m by n) */
-void dclTransp( const DCL_FLOAT * A, DCL_FLOAT * B, int n, int m );
+     R is (m by n) */
+void dclTransp( DCL_FLOAT * R, const DCL_FLOAT * A, int n, int m );
 
 /* Calculate L,U of a matrix A with pivot table; the pivot table is output. */
-void dclLU( const DCL_FLOAT * A, DCL_FLOAT * L, DCL_FLOAT * U, int * Piv, int n );
+void dclLU( DCL_FLOAT * L, DCL_FLOAT * U, const DCL_FLOAT * A, int * Piv, int n );
 
 /* Pivots a matrix to a different matrix
-    B = Pivot(A) given table 'Piv'
-    A and B are (n by m) */
-void dclPivot( const DCL_FLOAT * A, DCL_FLOAT * B, int * Piv, int n, int m );
+    R = Pivot(A) given table 'Piv'
+    A and R are (n by m) */
+void dclPivot( DCL_FLOAT * R, const DCL_FLOAT * A, int * Piv, int n, int m );
 
 /* Solve LX=B for matrix X and B
     L is n by n (lower triangular)
     B is n by m */
-void dclLSub( const DCL_FLOAT * L, DCL_FLOAT * X, const DCL_FLOAT * B, int n, int m );
+void dclLSub( DCL_FLOAT * X, const DCL_FLOAT * L, const DCL_FLOAT * B, int n, int m );
 
 /* Solve UX=B for matrix X and B
     U is n by n (upper triangular)
     B is n by m */
-void dclUSub( const DCL_FLOAT * U, DCL_FLOAT * X, const DCL_FLOAT * B, int n, int m );
+void dclUSub( DCL_FLOAT * X, const DCL_FLOAT * U, const DCL_FLOAT * B, int n, int m );
 
 /* Inverts a matrix X (n by n) using the method of LU decomposition */
-void dclInv( const DCL_FLOAT * A, DCL_FLOAT * Ainv, int n );
+void dclInv( DCL_FLOAT * Ainv, const DCL_FLOAT * A, int n );
 
 /* Matrix Multiply C = A * B
     A (n by m)
     B (m by p)
     C (n by p) */
-void dclMul( const DCL_FLOAT * A, const DCL_FLOAT * B, DCL_FLOAT * C, int n, int m, int p );
+void dclMul( DCL_FLOAT * R, const DCL_FLOAT * A, const DCL_FLOAT * B, int n, int m, int p );
 
 /* Matrix Multiply D = A * B + C
     A (n by m)
     B (m by p)
     C (n by p)
     D (n by p) */
-void dclMulAdd( const DCL_FLOAT * A, const DCL_FLOAT * B, const DCL_FLOAT * C, DCL_FLOAT * D, int n, int m, int p );
+void dclMulAdd( DCL_FLOAT * R, const DCL_FLOAT * A, const DCL_FLOAT * B, const DCL_FLOAT * C, int n, int m, int p );
 
 /* Matrix Multiply D = alpha * A * B + beta * C
     A (n by m)
     B (m by p)
     C (n by p)
     D (n by p) */
-void dclGMulAdd( const DCL_FLOAT * A, const DCL_FLOAT * B, const DCL_FLOAT * C, DCL_FLOAT * D, DCL_FLOAT alpha, DCL_FLOAT beta, int n, int m, int p );
+void dclGMulAdd( DCL_FLOAT * D, const DCL_FLOAT * A, const DCL_FLOAT * B, const DCL_FLOAT * C, DCL_FLOAT alpha, DCL_FLOAT beta, int n, int m, int p );
 
 #endif
 
-- 
cgit v1.3.1


From 910a0acb448feaeb18d1d9929f610d9a550d0272 Mon Sep 17 00:00:00 2001
From: cnlohr <lohr85@gmail.com>
Date: Sat, 17 Mar 2018 01:16:20 -0400
Subject: update daveortho to use new nomenclature

---
 src/poser_daveortho.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/poser_daveortho.c b/src/poser_daveortho.c
index 9d02bb3..4cf6dd8 100644
--- a/src/poser_daveortho.c
+++ b/src/poser_daveortho.c
@@ -193,16 +193,16 @@ void OrthoSolve(
     FLT invXXt[3][3];
     FLT SXt[2][3];
     FLT M[2][3];           // Morph matrix! (2 by 3)
-    TRANSP(X,Xt,3,nPoints);
-    MUL(X,Xt,XXt,3,nPoints,3);
-    MUL(S,Xt,SXt,2,nPoints,3);
-    INV(XXt,invXXt,3);
-    MUL(SXt,invXXt,M,2,3,3);
+    TRANSP(Xt,X,3,nPoints);
+    MUL(XXt,X,Xt,3,nPoints,3);
+    MUL(SXt,S,Xt,2,nPoints,3);
+    INV(invXXt,XXt,3,3);
+    MUL(M,SXt,invXXt,2,3,3);
 //PRINT(M,2,3);
 
 // Double checking work
 FLT S_morph[2][SENSORS_PER_OBJECT];
-MUL(M,X,S_morph,2,3,nPoints);
+MUL(S_morph,M,X,2,3,nPoints);
 for (i=0; i<nPoints; i++) { S_morph[0][i]+=sbar[0]; S_morph[1][i]+=sbar[1]; }
 
     //--------------------
@@ -218,8 +218,8 @@ for (i=0; i<nPoints; i++) { S_morph[0][i]+=sbar[0]; S_morph[1][i]+=sbar[1]; }
     FLT B[3][1] = { {0.0}, {0.0}, {1.0} };
     FLT inv_uM[3][3];
     FLT uf[3][1];
-    INV(uM,inv_uM,3);
-    MUL(inv_uM,B,uf,3,3,1);
+    INV(inv_uM,uM,3,3);
+    MUL(uf,inv_uM,B,3,3,1);
     
     //--------------------
     // Solve for unit length vector
@@ -250,9 +250,9 @@ for (i=0; i<nPoints; i++) { S_morph[0][i]+=sbar[0]; S_morph[1][i]+=sbar[1]; }
     //  uhat,rhat
     //--------------------
     FLT uhat[2][1], rhat[2][1], fhat[2][1];
-    MUL(M,f,fhat,2,3,1);
-    MUL(M,u,uhat,2,3,1);
-    MUL(M,r,rhat,2,3,1);
+    MUL(fhat,M,f,2,3,1);
+    MUL(uhat,M,u,2,3,1);
+    MUL(rhat,M,r,2,3,1);
     FLT fhat_len = sqrt( fhat[0][0]*fhat[0][0] + fhat[1][0]*fhat[1][0] );
     FLT uhat_len = sqrt( uhat[0][0]*uhat[0][0] + uhat[1][0]*uhat[1][0] );
     FLT rhat_len = sqrt( rhat[0][0]*rhat[0][0] + rhat[1][0]*rhat[1][0] );
-- 
cgit v1.3.1


From 0529854c1f0fc3756cb93db839b83055b681ebc2 Mon Sep 17 00:00:00 2001
From: Justin Berger <j.david.berger@gmail.com>
Date: Fri, 16 Mar 2018 23:27:36 -0600
Subject: Removed GFX guts out of data_recorder

---
 Makefile        |   2 +-
 data_recorder.c | 114 ++++++--------------------------------------------------
 2 files changed, 13 insertions(+), 103 deletions(-)

diff --git a/Makefile b/Makefile
index 034fbd0..e130357 100644
--- a/Makefile
+++ b/Makefile
@@ -69,7 +69,7 @@ test : test.c ./lib/libsurvive.so redist/os_generic.o
 simple_pose_test : simple_pose_test.c ./lib/libsurvive.so redist/os_generic.o $(DRAWFUNCTIONS)
 	$(CC) -o $@ $^ $(LDFLAGS) $(CFLAGS)
 
-data_recorder : data_recorder.c ./lib/libsurvive.so redist/os_generic.c $(DRAWFUNCTIONS)
+data_recorder : data_recorder.c ./lib/libsurvive.so redist/os_generic.c
 	$(CC) -o $@ $^ $(LDFLAGS) $(CFLAGS)
 
 calibrate :  calibrate.c ./lib/libsurvive.so redist/os_generic.c $(DRAWFUNCTIONS)
diff --git a/data_recorder.c b/data_recorder.c
index 1392bed..271b996 100644
--- a/data_recorder.c
+++ b/data_recorder.c
@@ -4,7 +4,6 @@
 #include <unistd.h>
 #endif
 
-#include <CNFGFunctions.h>
 #include <os_generic.h>
 #include <stdarg.h>
 #include <stdint.h>
@@ -25,28 +24,6 @@ struct SurviveContext *ctx;
 
 FILE *output_file = 0;
 
-void HandleKey(int keycode, int bDown) {
-	if (!bDown)
-		return;
-
-	if (keycode == 'O' || keycode == 'o') {
-		survive_send_magic(ctx, 1, 0, 0);
-	}
-	if (keycode == 'F' || keycode == 'f') {
-		survive_send_magic(ctx, 0, 0, 0);
-	}
-}
-
-void HandleButton(int x, int y, int button, int bDown) {}
-
-void HandleMotion(int x, int y, int mask) {}
-
-void HandleDestroy() {}
-
-int bufferpts[32 * 2 * 3];
-char buffermts[32 * 128 * 3];
-int buffertimeto[32 * 3];
-
 double timestamp_in_us() {
 	static double start_time_us = 0;
 	if (start_time_us == 0)
@@ -74,6 +51,7 @@ int my_config_process(SurviveObject *so, char *ct0conf, int len) {
 	fwrite("\n", 1, 1, output_file);
 	return survive_default_htc_config_process(so, ct0conf, len);
 }
+
 void my_lighthouse_process(SurviveContext *ctx, uint8_t lighthouse, SurvivePose *pose) {
 	survive_default_lighthouse_pose_process(ctx, lighthouse, pose);
 	write_to_output("%d LH_POSE %0.6f %0.6f %0.6f %0.6f %0.6f %0.6f %0.6f\n", lighthouse, pose->Pos[0], pose->Pos[1],
@@ -98,51 +76,38 @@ void my_light_process(struct SurviveObject *so, int sensor_id, int acode,
 	survive_default_light_process(so, sensor_id, acode, timeinsweep, timecode,
 								  length, lh);
 
-	if (acode == -1 || sensor_id < 0) {
+	if (acode == -1) {
 		write_to_output("%s S %d %d %d %u %u %u\n", so->codename, sensor_id, acode, timeinsweep, timecode, length, lh);
 		return;
 	}
 
-	int jumpoffset = sensor_id;
-
-	if (strcmp(so->codename, "WM0") == 0)
-		jumpoffset += 32;
-	else if (strcmp(so->codename, "WM1") == 0)
-		jumpoffset += 64;
-
 	const char *LH_ID = 0;
 	const char *LH_Axis = 0;
 
 	switch (acode) {
 	case 0:
 	case 2:
-		bufferpts[jumpoffset * 2 + 0] = (timeinsweep - 100000) / 500;
 		LH_ID = "L";
 		LH_Axis = "X";
 		break;
 	case 1:
 	case 3:
-		bufferpts[jumpoffset * 2 + 1] = (timeinsweep - 100000) / 500;
 		LH_ID = "L";
 		LH_Axis = "Y";
 		break;
 	case 4:
 	case 6:
-		bufferpts[jumpoffset * 2 + 0] = (timeinsweep - 100000) / 500;
 		LH_ID = "R";
 		LH_Axis = "X";
 		break;
 	case 5:
 	case 7:
-		bufferpts[jumpoffset * 2 + 1] = (timeinsweep - 100000) / 500;
 		LH_ID = "R";
 		LH_Axis = "Y";
 		break;
 	}
-
 	write_to_output("%s %s %s %d %d %d %u %u %u\n", so->codename, LH_ID, LH_Axis, sensor_id, acode, timeinsweep,
 					timecode, length, lh);
-	buffertimeto[jumpoffset] = 0;
 }
 
 void my_imu_process(struct SurviveObject *so, int mask, FLT *accelgyro,
@@ -152,47 +117,17 @@ void my_imu_process(struct SurviveObject *so, int mask, FLT *accelgyro,
 					accelgyro[1], accelgyro[2], accelgyro[3], accelgyro[4], accelgyro[5], id);
 }
 
-void *GuiThread(void *v) {
-	CNFGBGColor = 0x000000;
-	CNFGDialogColor = 0x444444;
-	CNFGSetup("Survive GUI Debug", 640, 480);
-
-	short screenx, screeny;
-	while (1) {
-		CNFGHandleInput();
-		CNFGClearFrame();
-		CNFGColor(0xFFFFFF);
-		CNFGGetDimensions(&screenx, &screeny);
-
-		int i;
-		for (i = 0; i < 32 * 3; i++) {
-			if (buffertimeto[i] < 50) {
-				uint32_t color = i * 3231349;
-				uint8_t r = color & 0xff;
-				uint8_t g = (color >> 8) & 0xff;
-				uint8_t b = (color >> 16) & 0xff;
-				r = (r * (5 - buffertimeto[i])) / 5;
-				g = (g * (5 - buffertimeto[i])) / 5;
-				b = (b * (5 - buffertimeto[i])) / 5;
-				CNFGColor((b << 16) | (g << 8) | r);
-				CNFGTackRectangle(bufferpts[i * 2 + 0], bufferpts[i * 2 + 1],
-								  bufferpts[i * 2 + 0] + 5,
-								  bufferpts[i * 2 + 1] + 5);
-				CNFGPenX = bufferpts[i * 2 + 0];
-				CNFGPenY = bufferpts[i * 2 + 1];
-				CNFGDrawText(buffermts, 2);
-				buffertimeto[i]++;
-			}
+int main(int argc, char **argv) {
+	if (argc > 1) {
+		output_file = fopen(argv[1], "w");
+		if (output_file == 0) {
+			fprintf(stderr, "Could not open %s for writing", argv[1]);
+			return -1;
 		}
-
-		CNFGSwapBuffers();
-		OGUSleep(10000);
+	} else {
+		output_file = stdout;
 	}
-}
 
-int SurviveThreadLoaded = 0;
-
-void *SurviveThread(void *junk) {
 	ctx = survive_init_with_config_cb(0, my_config_process);
 
 	survive_install_light_fn(ctx, my_light_process);
@@ -201,41 +136,16 @@ void *SurviveThread(void *junk) {
 	survive_install_raw_pose_fn(ctx, my_raw_pose_process);
 	survive_install_angle_fn(ctx, my_angle_process);
 	survive_install_info_fn(ctx, my_info_process);
+
 	survive_cal_install(ctx);
+
 	if (!ctx) {
 		fprintf(stderr, "Fatal. Could not start\n");
 		exit(1);
 	}
 
-	SurviveThreadLoaded = 1;
-
 	while (survive_poll(ctx) == 0) {
 	}
 
 	return 0;
 }
-
-int main(int argc, char **argv) {
-	if (argc > 1) {
-		output_file = fopen(argv[1], "w");
-		if (output_file == 0) {
-			fprintf(stderr, "Could not open %s for writing", argv[1]);
-			return -1;
-		}
-	} else {
-		output_file = stdout;
-	}
-	SurviveThread(0);
-	/*
-	// Create the libsurvive thread
-	OGCreateThread(SurviveThread, 0);
-
-	// Wait for the survive thread to load
-	while (!SurviveThreadLoaded) {
-		OGUSleep(100);
-	}
-
-	// Run the Gui in the main thread
-	GuiThread(0);
-	*/
-}
-- 
cgit v1.3.1


From dbe5808f70e1e088e311697e102ff60a6b07a7c6 Mon Sep 17 00:00:00 2001
From: cnlohr <lohr85@gmail.com>
Date: Sat, 17 Mar 2018 01:34:36 -0400
Subject: Fix comments in dclapack code to reflect current behavior

---
 redist/dclapack.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/redist/dclapack.h b/redist/dclapack.h
index 5aba214..bebda2d 100644
--- a/redist/dclapack.h
+++ b/redist/dclapack.h
@@ -181,10 +181,10 @@ PRINT(Ainv,n,n); \
 */
 
 /*
- * Matrix Multiply C = A * B
+ * Matrix Multiply R = A * B
+ *  R (n by p)
  *  A (n by m)
  *  B (m by p)
- *  C (n by p)
  */
 #define MUL(R,A,B,n,m,p) { \
     int i,j,k; \
@@ -199,11 +199,11 @@ PRINT(Ainv,n,n); \
 }
 
 /*
- * Matrix Multiply D = A * B + C
+ * Matrix Multiply R = A * B + C
+ *  R (n by p)
  *  A (n by m)
  *  B (m by p)
  *  C (n by p)
- *  D (n by p)
  */
 #define MULADD(R,A,B,C,n,m,p) { \
     int i,j,k; \
@@ -218,11 +218,11 @@ PRINT(Ainv,n,n); \
 }
 
 /*
- * Matrix Multiply D = alpha * A * B + beta * C
+ * Matrix Multiply R = alpha * A * B + beta * C
+ *  R (n by p)
  *  A (n by m)
  *  B (m by p)
  *  C (n by p)
- *  D (n by p)
  */
 #define GMULADD(R,A,B,C,alpha,beta,n,m,p) { \
     int i,j,k; \
-- 
cgit v1.3.1


From 9bd8c4c5ed38d186c04ea318722ca54b2f9c8ea1 Mon Sep 17 00:00:00 2001
From: cnlohr <lohr85@gmail.com>
Date: Sat, 17 Mar 2018 01:39:04 -0400
Subject: Fix other comments in .h file.

---
 redist/dclhelpers.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/redist/dclhelpers.h b/redist/dclhelpers.h
index 4ada24a..fd86c78 100644
--- a/redist/dclhelpers.h
+++ b/redist/dclhelpers.h
@@ -37,25 +37,25 @@ void dclUSub( DCL_FLOAT * X, const DCL_FLOAT * U, const DCL_FLOAT * B, int n, in
 /* Inverts a matrix X (n by n) using the method of LU decomposition */
 void dclInv( DCL_FLOAT * Ainv, const DCL_FLOAT * A, int n );
 
-/* Matrix Multiply C = A * B
+/* Matrix Multiply R = A * B
     A (n by m)
     B (m by p)
-    C (n by p) */
+    R (n by p) */
 void dclMul( DCL_FLOAT * R, const DCL_FLOAT * A, const DCL_FLOAT * B, int n, int m, int p );
 
-/* Matrix Multiply D = A * B + C
+/* Matrix Multiply R = A * B + C
     A (n by m)
     B (m by p)
     C (n by p)
-    D (n by p) */
+    R (n by p) */
 void dclMulAdd( DCL_FLOAT * R, const DCL_FLOAT * A, const DCL_FLOAT * B, const DCL_FLOAT * C, int n, int m, int p );
 
-/* Matrix Multiply D = alpha * A * B + beta * C
+/* Matrix Multiply R = alpha * A * B + beta * C
     A (n by m)
     B (m by p)
     C (n by p)
-    D (n by p) */
-void dclGMulAdd( DCL_FLOAT * D, const DCL_FLOAT * A, const DCL_FLOAT * B, const DCL_FLOAT * C, DCL_FLOAT alpha, DCL_FLOAT beta, int n, int m, int p );
+    R (n by p) */
+void dclGMulAdd( DCL_FLOAT * R, const DCL_FLOAT * A, const DCL_FLOAT * B, const DCL_FLOAT * C, DCL_FLOAT alpha, DCL_FLOAT beta, int n, int m, int p );
 
 #endif
 
-- 
cgit v1.3.1


From bcf08b95ab6daa7ac7bffe1449fa8a11cad2a02a Mon Sep 17 00:00:00 2001
From: cnlohr <lohr85@gmail.com>
Date: Sat, 17 Mar 2018 02:28:11 -0400
Subject: Get closer to functional.

---
 redist/dclapack.h   |  8 +++++---
 redist/dclhelpers.c | 25 +++++++++++++++++++++++++
 redist/dclhelpers.h | 24 ++++++++++++++++++++++++
 3 files changed, 54 insertions(+), 3 deletions(-)

diff --git a/redist/dclapack.h b/redist/dclapack.h
index bebda2d..76ce545 100644
--- a/redist/dclapack.h
+++ b/redist/dclapack.h
@@ -11,8 +11,10 @@
 
 #ifdef DYNAMIC_INDEX
 	#define _(A,O,P) A[O*n+P]
+	#define _I(A,O,P,n) A[O*n+P]
 #else
 	#define _(A,O,P) A[O][P]
+	#define _I(A,O,P,n) A[O][P]
 #endif
 
 
@@ -192,7 +194,7 @@ PRINT(Ainv,n,n); \
         for (j=0; j<p; j++) { \
             _(R,i,j) = 0.0f; \
             for (k=0; k<m; k++) { \
-                _(R,i,j) += _(A,i,k) * _(B,k,j); \
+                _(R,i,j) += _(A,i,k) * _I(B,k,j,m); \
             } \
         } \
     } \
@@ -211,7 +213,7 @@ PRINT(Ainv,n,n); \
         for (j=0; j<p; j++) { \
             _(R,i,j) = _(C,i,j); \
             for (k=0; k<m; k++) { \
-                _(R,i,j) += _(A,i,k) * _(B,k,j); \
+                _(R,i,j) += _(A,i,k) * _I(B,k,j,m); \
             } \
         } \
     } \
@@ -231,7 +233,7 @@ PRINT(Ainv,n,n); \
         for (j=0; j<p; j++) { \
             sum = 0.0f; \
             for (k=0; k<m; k++) { \
-                sum += _(A,i,k) * _(B,k,j); \
+                sum += _(A,i,k) * _I(B,k,j,m); \
             } \
             _(R,i,j) = alpha * sum + beta * _(C,i,j); \
         } \
diff --git a/redist/dclhelpers.c b/redist/dclhelpers.c
index 2ee6c4a..23c3ca5 100644
--- a/redist/dclhelpers.c
+++ b/redist/dclhelpers.c
@@ -60,4 +60,29 @@ void dclGMulAdd( DCL_FLOAT * R, const DCL_FLOAT * A, const DCL_FLOAT * B, const
 	GMULADD(R,A,B,C,alpha,beta,n,m,p);
 }
 
+/* dclGMulAdd( R, ((transA)?TRANS(A):A, (transB)?TRANS(B):B), C, alpha, beta, n, m, p ); */
+void dcldgemm(
+	char transA,
+	char transB,
+	int m,
+	int n,
+	int k,
+	DCL_FLOAT alpha,
+	const DCL_FLOAT* A,
+	int lda, //must be n
+	const DCL_FLOAT* B,
+	int ldb, //must be m
+	DCL_FLOAT beta,
+	const DCL_FLOAT * C,
+	int ldc //must be n
+	 )
+{
+	DCL_FLOAT * ta;
+	DCL_FLOAT * tb;
+	if( transA )
+	{
+		ta = alloca( sizeof( DCL_FLOAT ) * n * m );
+		
+	}
+}
 
diff --git a/redist/dclhelpers.h b/redist/dclhelpers.h
index fd86c78..fd4e02d 100644
--- a/redist/dclhelpers.h
+++ b/redist/dclhelpers.h
@@ -57,5 +57,29 @@ void dclMulAdd( DCL_FLOAT * R, const DCL_FLOAT * A, const DCL_FLOAT * B, const D
     R (n by p) */
 void dclGMulAdd( DCL_FLOAT * R, const DCL_FLOAT * A, const DCL_FLOAT * B, const DCL_FLOAT * C, DCL_FLOAT alpha, DCL_FLOAT beta, int n, int m, int p );
 
+
+/********************************
+ * Auxiliary functionality in C *
+ ********************************/
+
+//Matches dgemm from lapack.
+void dcldgemm(
+	char transA,
+	char transB,
+	int m,
+	int n,
+	int k,
+	DCL_FLOAT alpha,
+	const DCL_FLOAT* A,
+	int lda, //must be n
+	const DCL_FLOAT* B,
+	int ldb, //must be m
+	DCL_FLOAT beta,
+	const DCL_FLOAT * C,
+	int ldc //must be n
+	 );
+
+
+
 #endif
 
-- 
cgit v1.3.1


From 08eec7a6bd5edc0fd1cae7f98d7788ea3905a467 Mon Sep 17 00:00:00 2001
From: cnlohr <lohr85@gmail.com>
Date: Sat, 17 Mar 2018 03:05:21 -0400
Subject: Now wrangled with ability to use submatricies.

---
 redist/dclapack.h   | 24 +++++++++++-------------
 redist/dclhelpers.c | 46 ++++++++++++++++++++++++++++++----------------
 redist/dclhelpers.h | 28 ++++++++++++++--------------
 3 files changed, 55 insertions(+), 43 deletions(-)

diff --git a/redist/dclapack.h b/redist/dclapack.h
index 76ce545..1dc7b77 100644
--- a/redist/dclapack.h
+++ b/redist/dclapack.h
@@ -10,16 +10,14 @@
 #define _ABS(a)  ( (a)<=0 ? 0-(a) : (a) )
 
 #ifdef DYNAMIC_INDEX
-	#define _(A,O,P) A[O*n+P]
-	#define _I(A,O,P,n) A[O*n+P]
+	#define _(A,O,P) A[O*A##c+P]
 #else
 	#define _(A,O,P) A[O][P]
-	#define _I(A,O,P,n) A[O][P]
 #endif
 
 
 /*
- * Prints a matrix A (n by m)
+ * Prints a matrix A (n by m)  (with witdth Ac)
  */
 #define PRINT(A,n,m) { \
     int i,j; \
@@ -34,7 +32,7 @@
 } 
 
 /*
- * Returns the identity matrix
+ * Returns the identity matrix (with size n x n, but width Ic)
  */
 #define IDENTITY(I,n) { \
     int i,j; \
@@ -148,11 +146,11 @@
 
 #ifdef DYNAMIC_INDEX
 	#define INV_SETUP(ORDER) \
-    FLOAT Ipiv[ORDER*ORDER]; \
-    FLOAT L[ORDER*ORDER]; \
-    FLOAT U[ORDER*ORDER]; \
-    FLOAT I[ORDER*ORDER]; \
-    FLOAT C[ORDER*ORDER];
+    FLOAT Ipiv[ORDER*ORDER]; const int Ipivc = ORDER; \
+    FLOAT L[ORDER*ORDER]; const int Lc = ORDER; \
+    FLOAT U[ORDER*ORDER]; const int Uc = ORDER; \
+    FLOAT I[ORDER*ORDER]; const int Ic = ORDER; \
+    FLOAT C[ORDER*ORDER]; const int Cc = ORDER;
 #else
 	#define INV_SETUP(ORDER) \
     FLOAT Ipiv[ORDER][ORDER]; \
@@ -194,7 +192,7 @@ PRINT(Ainv,n,n); \
         for (j=0; j<p; j++) { \
             _(R,i,j) = 0.0f; \
             for (k=0; k<m; k++) { \
-                _(R,i,j) += _(A,i,k) * _I(B,k,j,m); \
+                _(R,i,j) += _(A,i,k) * _(B,k,j); \
             } \
         } \
     } \
@@ -213,7 +211,7 @@ PRINT(Ainv,n,n); \
         for (j=0; j<p; j++) { \
             _(R,i,j) = _(C,i,j); \
             for (k=0; k<m; k++) { \
-                _(R,i,j) += _(A,i,k) * _I(B,k,j,m); \
+                _(R,i,j) += _(A,i,k) * _(B,k,j); \
             } \
         } \
     } \
@@ -233,7 +231,7 @@ PRINT(Ainv,n,n); \
         for (j=0; j<p; j++) { \
             sum = 0.0f; \
             for (k=0; k<m; k++) { \
-                sum += _(A,i,k) * _I(B,k,j,m); \
+                sum += _(A,i,k) * _(B,k,j); \
             } \
             _(R,i,j) = alpha * sum + beta * _(C,i,j); \
         } \
diff --git a/redist/dclhelpers.c b/redist/dclhelpers.c
index 23c3ca5..9fa9266 100644
--- a/redist/dclhelpers.c
+++ b/redist/dclhelpers.c
@@ -3,59 +3,59 @@
 #define DYNAMIC_INDEX
 #include <stdio.h>
 #include "dclapack.h"
+#include <alloca.h>
 
-
-void dclPrint( const DCL_FLOAT * A, int n, int m )
+void dclPrint( const DCL_FLOAT * A, int Ac, int n, int m )
 {
 	PRINT( A, n, m );
 }
 
-void dclIdentity( DCL_FLOAT * A, int n )
+void dclIdentity( DCL_FLOAT * I, int Ic, int n )
 {
-	IDENTITY( A, n );
+	IDENTITY( I, n );
 }
 
-void dclTransp( DCL_FLOAT * R, const DCL_FLOAT * A, int n, int m )
+void dclTransp( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, int n, int m )
 {
 	TRANSP(R,A,n,m);
 }
 
-void dclLU( DCL_FLOAT * L, DCL_FLOAT * U, const DCL_FLOAT * A, int * Piv, int n )
+void dclLU( DCL_FLOAT * L, int Lc, DCL_FLOAT * U, int Uc, const DCL_FLOAT * A, int Ac, int * Piv, int n )
 {
 	LU(L,U,A,Piv,n);
 }
 
-void dclPivot( DCL_FLOAT * R, const DCL_FLOAT * A, int * Piv, int n, int m )
+void dclPivot( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, int * Piv, int n, int m )
 {
 	PIVOT(R,A,Piv,n,m);
 }
 
-void dclLSub( DCL_FLOAT * X, const DCL_FLOAT * L, const DCL_FLOAT * B, int n, int m )
+void dclLSub( DCL_FLOAT * X, int Xc, const DCL_FLOAT * L, int Lc, const DCL_FLOAT * B, int Bc, int n, int m )
 {
 	L_SUB(X,L,B,n,m);
 }
 
-void dclUSub( DCL_FLOAT * X, const DCL_FLOAT * U, const DCL_FLOAT * B, int n, int m )
+void dclUSub( DCL_FLOAT * X, int Xc, const DCL_FLOAT * U, int Uc, const DCL_FLOAT * B, int Bc, int n, int m )
 {
 	U_SUB(X,U,B,n,m);
 }
 
-void dclInv( DCL_FLOAT * Ainv, const DCL_FLOAT * A, int n )
+void dclInv( DCL_FLOAT * Ainv, int Ainvc, const DCL_FLOAT * A, int Ac, int n )
 {
 	INV(Ainv,A,n,n);
 }
 
-void dclMul( DCL_FLOAT * R, const DCL_FLOAT * A, const DCL_FLOAT * B, int n, int m, int p )
+void dclMul( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, const DCL_FLOAT * B, int Bc, int n, int m, int p )
 {
 	MUL(R,A,B,n,m,p);
 }
 
-void dclMulAdd( DCL_FLOAT * R, const DCL_FLOAT * A, const DCL_FLOAT * B, const DCL_FLOAT * C, int n, int m, int p )
+void dclMulAdd( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, const DCL_FLOAT * B, int Bc, const DCL_FLOAT * C, int Cc, int n, int m, int p )
 {
 	MULADD(R,A,B,C,n,m,p);
 }
 
-void dclGMulAdd( DCL_FLOAT * R, const DCL_FLOAT * A, const DCL_FLOAT * B, const DCL_FLOAT * C, DCL_FLOAT alpha, DCL_FLOAT beta, int n, int m, int p )
+void dclGMulAdd( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, const DCL_FLOAT * B, int Bc, const DCL_FLOAT * C, int Cc, DCL_FLOAT alpha, DCL_FLOAT beta, int n, int m, int p )
 {
 	GMULADD(R,A,B,C,alpha,beta,n,m,p);
 }
@@ -77,12 +77,26 @@ void dcldgemm(
 	int ldc //must be n
 	 )
 {
-	DCL_FLOAT * ta;
-	DCL_FLOAT * tb;
+	const DCL_FLOAT * ta;
+	const DCL_FLOAT * tb;
 	if( transA )
 	{
 		ta = alloca( sizeof( DCL_FLOAT ) * n * m );
-		
+		//TRANSP(	ta, A, n, m );
+	}
+	else
+		ta = A;
+
+	if( transB )
+	{
+		tb = alloca( sizeof( DCL_FLOAT ) * n * m );
+		//TRANSP(	tb, B, n, m );
 	}
+	else
+		tb = B;
+
+
+	//XXX Tricky: In here, A is mxn
+
 }
 
diff --git a/redist/dclhelpers.h b/redist/dclhelpers.h
index fd4e02d..5010e02 100644
--- a/redist/dclhelpers.h
+++ b/redist/dclhelpers.h
@@ -6,56 +6,56 @@
 //XXX XXX XXX WARNING XXX XXX XXX The argument order may be changing!!!
 
 /* Prints matrix A of size[n][m] */
-void dclPrint( const DCL_FLOAT * A, int n, int m );
+void dclPrint( const DCL_FLOAT * A, int Ac, int n, int m );
 
 /* Returns the identity matrix */
-void dclIdentity( DCL_FLOAT * A, int n );
+void dclIdentity( DCL_FLOAT * I, int Ic, int n );
 
 /* R = Transpose(A)
      A is (n by m)
      R is (m by n) */
-void dclTransp( DCL_FLOAT * R, const DCL_FLOAT * A, int n, int m );
+void dclTransp( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, int n, int m );
 
 /* Calculate L,U of a matrix A with pivot table; the pivot table is output. */
-void dclLU( DCL_FLOAT * L, DCL_FLOAT * U, const DCL_FLOAT * A, int * Piv, int n );
+void dclLU( DCL_FLOAT * L, int Lc, DCL_FLOAT * U, int Uc, const DCL_FLOAT * A, int Ac, int * Piv, int n );
 
 /* Pivots a matrix to a different matrix
     R = Pivot(A) given table 'Piv'
     A and R are (n by m) */
-void dclPivot( DCL_FLOAT * R, const DCL_FLOAT * A, int * Piv, int n, int m );
+void dclPivot( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, int * Piv, int n, int m );
 
 /* Solve LX=B for matrix X and B
     L is n by n (lower triangular)
     B is n by m */
-void dclLSub( DCL_FLOAT * X, const DCL_FLOAT * L, const DCL_FLOAT * B, int n, int m );
+void dclLSub( DCL_FLOAT * X, int Xc, const DCL_FLOAT * L, int Lc, const DCL_FLOAT * B, int Bc, int n, int m );
 
 /* Solve UX=B for matrix X and B
     U is n by n (upper triangular)
     B is n by m */
-void dclUSub( DCL_FLOAT * X, const DCL_FLOAT * U, const DCL_FLOAT * B, int n, int m );
+void dclUSub( DCL_FLOAT * X, int Xc, const DCL_FLOAT * U, int Uc, const DCL_FLOAT * B, int Bc, int n, int m );
 
 /* Inverts a matrix X (n by n) using the method of LU decomposition */
-void dclInv( DCL_FLOAT * Ainv, const DCL_FLOAT * A, int n );
+void dclInv( DCL_FLOAT * Ainv, int Ainvc, const DCL_FLOAT * A, int Ac, int n );
 
 /* Matrix Multiply R = A * B
     A (n by m)
     B (m by p)
     R (n by p) */
-void dclMul( DCL_FLOAT * R, const DCL_FLOAT * A, const DCL_FLOAT * B, int n, int m, int p );
+void dclMul( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, const DCL_FLOAT * B, int Bc, int n, int m, int p );
 
 /* Matrix Multiply R = A * B + C
     A (n by m)
     B (m by p)
     C (n by p)
     R (n by p) */
-void dclMulAdd( DCL_FLOAT * R, const DCL_FLOAT * A, const DCL_FLOAT * B, const DCL_FLOAT * C, int n, int m, int p );
+void dclMulAdd( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, const DCL_FLOAT * B, int Bc, const DCL_FLOAT * C, int Cc, int n, int m, int p );
 
 /* Matrix Multiply R = alpha * A * B + beta * C
     A (n by m)
     B (m by p)
     C (n by p)
     R (n by p) */
-void dclGMulAdd( DCL_FLOAT * R, const DCL_FLOAT * A, const DCL_FLOAT * B, const DCL_FLOAT * C, DCL_FLOAT alpha, DCL_FLOAT beta, int n, int m, int p );
+void dclGMulAdd( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, const DCL_FLOAT * B, int Bc, const DCL_FLOAT * C, int Cc, DCL_FLOAT alpha, DCL_FLOAT beta, int n, int m, int p );
 
 
 /********************************
@@ -71,12 +71,12 @@ void dcldgemm(
 	int k,
 	DCL_FLOAT alpha,
 	const DCL_FLOAT* A,
-	int lda, //must be n
+	int Ac,
 	const DCL_FLOAT* B,
-	int ldb, //must be m
+	int Bc,
 	DCL_FLOAT beta,
 	const DCL_FLOAT * C,
-	int ldc //must be n
+	int Cc
 	 );
 
 
-- 
cgit v1.3.1


From f0c26bd1b0b8ffe05c0c5f04567a9b7aa47c3e6b Mon Sep 17 00:00:00 2001
From: cnlohr <lohr85@gmail.com>
Date: Sat, 17 Mar 2018 03:15:46 -0400
Subject: Update dclapack to do automatic size-of-array'ing

---
 redist/dclapack.h   |  8 ++++++--
 redist/dclhelpers.c | 28 +++++++++++++++++-----------
 redist/dclhelpers.h |  4 +---
 3 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/redist/dclapack.h b/redist/dclapack.h
index 1dc7b77..af8869c 100644
--- a/redist/dclapack.h
+++ b/redist/dclapack.h
@@ -9,10 +9,14 @@
 
 #define _ABS(a)  ( (a)<=0 ? 0-(a) : (a) )
 
+//Tricky: If you want to use this with pointers, instead of 2D arrays, you will
+//need to #define DYNAMIC_INDEX, as well as, for all arrays, suffix their name
+//with 'c'
+
 #ifdef DYNAMIC_INDEX
-	#define _(A,O,P) A[O*A##c+P]
+	#define _(AM,O,P) AM[O*AM##c+P]
 #else
-	#define _(A,O,P) A[O][P]
+	#define _(AM,O,P) AM[O][P]
 #endif
 
 
diff --git a/redist/dclhelpers.c b/redist/dclhelpers.c
index 9fa9266..c3725f8 100644
--- a/redist/dclhelpers.c
+++ b/redist/dclhelpers.c
@@ -69,34 +69,40 @@ void dcldgemm(
 	int k,
 	DCL_FLOAT alpha,
 	const DCL_FLOAT* A,
-	int lda, //must be n
+	int Ac, //must be n
 	const DCL_FLOAT* B,
-	int ldb, //must be m
+	int Bc, //must be m
 	DCL_FLOAT beta,
-	const DCL_FLOAT * C,
-	int ldc //must be n
+	DCL_FLOAT * C,
+	int Cc //must be n
 	 )
 {
 	const DCL_FLOAT * ta;
 	const DCL_FLOAT * tb;
+	int tac = Ac;
+	int tbc = Bc;
 	if( transA )
 	{
-		ta = alloca( sizeof( DCL_FLOAT ) * n * m );
-		//TRANSP(	ta, A, n, m );
+		DCL_FLOAT * la = alloca( sizeof( DCL_FLOAT ) * n * m );
+		const int lac = m;
+		TRANSP(	la, A, n, m );
+		ta = la;
+		tac = lac;
 	}
 	else
 		ta = A;
 
 	if( transB )
 	{
-		tb = alloca( sizeof( DCL_FLOAT ) * n * m );
-		//TRANSP(	tb, B, n, m );
+		DCL_FLOAT * lb = alloca( sizeof( DCL_FLOAT ) * n * m );
+		const int lbc = m;
+		TRANSP( lb, B, n, m );
+		tb = lb;
+		tbc = lbc;
 	}
 	else
 		tb = B;
 
-
-	//XXX Tricky: In here, A is mxn
-
+	GMULADD(C,ta,tb,C,alpha,beta,n,m,k);
 }
 
diff --git a/redist/dclhelpers.h b/redist/dclhelpers.h
index 5010e02..8779e1a 100644
--- a/redist/dclhelpers.h
+++ b/redist/dclhelpers.h
@@ -3,8 +3,6 @@
 
 #define DCL_FLOAT FLT
 
-//XXX XXX XXX WARNING XXX XXX XXX The argument order may be changing!!!
-
 /* Prints matrix A of size[n][m] */
 void dclPrint( const DCL_FLOAT * A, int Ac, int n, int m );
 
@@ -75,7 +73,7 @@ void dcldgemm(
 	const DCL_FLOAT* B,
 	int Bc,
 	DCL_FLOAT beta,
-	const DCL_FLOAT * C,
+	DCL_FLOAT * C,
 	int Cc
 	 );
 
-- 
cgit v1.3.1


From 9115ffd3138b460707dd1ba45dd7f6fccde87a46 Mon Sep 17 00:00:00 2001
From: cnlohr <lohr85@gmail.com>
Date: Sat, 17 Mar 2018 03:28:23 -0400
Subject: Test DCL.

---
 redist/dclhelpers.c |  4 ++--
 redist/test_dcl.c   | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+), 2 deletions(-)
 create mode 100644 redist/test_dcl.c

diff --git a/redist/dclhelpers.c b/redist/dclhelpers.c
index c3725f8..fb6aba6 100644
--- a/redist/dclhelpers.c
+++ b/redist/dclhelpers.c
@@ -5,9 +5,9 @@
 #include "dclapack.h"
 #include <alloca.h>
 
-void dclPrint( const DCL_FLOAT * A, int Ac, int n, int m )
+void dclPrint( const DCL_FLOAT * PMATRIX, int PMATRIXc, int n, int m )
 {
-	PRINT( A, n, m );
+	PRINT( PMATRIX, n, m );
 }
 
 void dclIdentity( DCL_FLOAT * I, int Ic, int n )
diff --git a/redist/test_dcl.c b/redist/test_dcl.c
new file mode 100644
index 0000000..adea7b5
--- /dev/null
+++ b/redist/test_dcl.c
@@ -0,0 +1,39 @@
+#include "dclhelpers.h"
+#include <stdint.h>
+#include <stdio.h>
+
+
+int main()
+{
+	FLT A[2][4]	= { { 0, 1, 2, 3 }, { 4, 5, 6, 7} };
+	FLT B[4][2];
+	dclPrint( A[0], 4, 2, 4 );
+	dclTransp( B[0], 2, A[0], 4, 2, 4 );
+	dclPrint( B[0], 2, 4, 2 );
+
+	int i;
+	for( i = 0; i < 8; i++ )
+	{
+		printf( "%f\n", ((float*)(B[0]))[i] );
+	}
+
+	FLT M[3][3] = {
+		{ .32, 1, 0 },
+		{ 0, 1, 2 },
+		{ 1, 0, 1 } };
+	FLT Mo[3][3];
+	dclInv( Mo[0], 3, M[0], 3, 3 );
+	dclPrint( Mo[0], 3, 3, 3 );
+
+	FLT MM[3][3];
+	dclMul( MM[0], 3, M[0], 3, Mo[0], 3, 3, 3, 3 );
+
+	printf( "The following should be an identity matrix\n" );
+	dclPrint( MM[0], 3, 3, 3 );
+
+//void dclTransp( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, int n, int m );
+
+//	dclIdentity( A[0], MATx, 5 );
+//	dclPrint( A[0], MATx, MATx, MATy );
+}
+
-- 
cgit v1.3.1


From 00fbdfcbe9b1c3cc9f9f0814fe5f60bbf066cbbf Mon Sep 17 00:00:00 2001
From: Justin Berger <j.david.berger@gmail.com>
Date: Sat, 17 Mar 2018 08:15:09 -0600
Subject: Added test target, mul test

---
 Makefile          |  3 +++
 redist/test_dcl.c | 24 ++++++++++++++++++++----
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/Makefile b/Makefile
index e130357..9239c39 100644
--- a/Makefile
+++ b/Makefile
@@ -82,6 +82,9 @@ calibrate_client :  calibrate_client.c ./lib/libsurvive.so redist/os_generic.c $
 static_calibrate : calibrate.c redist/os_generic.c $(DRAWFUNCTIONS) $(LIBSURVIVE_C)
 	tcc -o $@ $^ $(CFLAGS) $(LDFLAGS) -DTCC
 
+test_dcl: ./redist/test_dcl.c ./redist/dclhelpers.c
+	$(CC) -o $@ $^ $(LDFLAGS) $(CFLAGS) -DFLT=double -fsanitize=address -fsanitize=undefined
+
 test_minimal_cv: ./src/epnp/test_minimal_cv.c ./lib/libsurvive.so 
 	$(CC) -o $@ $^ $(LDFLAGS) $(CFLAGS)
 
diff --git a/redist/test_dcl.c b/redist/test_dcl.c
index adea7b5..a9512f8 100644
--- a/redist/test_dcl.c
+++ b/redist/test_dcl.c
@@ -1,8 +1,8 @@
 #include "dclhelpers.h"
+#include <assert.h>
 #include <stdint.h>
 #include <stdio.h>
 
-
 int main()
 {
 	FLT A[2][4]	= { { 0, 1, 2, 3 }, { 4, 5, 6, 7} };
@@ -31,9 +31,25 @@ int main()
 	printf( "The following should be an identity matrix\n" );
 	dclPrint( MM[0], 3, 3, 3 );
 
-//void dclTransp( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, int n, int m );
+	{
+		FLT A[3][4];
+		dclIdentity(A[0], 4, 3);
+		dclPrint(A[0], 4, 3, 4);
+
+		FLT x[4] = {7, 8, 9, 10};
+		FLT R[4];
+
+		dclMul(R, 1, A[0], 4, x, 1, 4, 1, 3);
+		dclPrint(x, 1, 4, 1);
+		dclPrint(R, 1, 4, 1);
+
+		for (int i = 0; i < 3; i++)
+			assert(R[i] == x[i]);
+		assert(R[3] == 0.);
+	}
+	// void dclTransp( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, int n, int m );
 
-//	dclIdentity( A[0], MATx, 5 );
-//	dclPrint( A[0], MATx, MATx, MATy );
+	//	dclIdentity( A[0], MATx, 5 );
+	//	dclPrint( A[0], MATx, MATx, MATy );
 }
 
-- 
cgit v1.3.1


From 7c97cfe7f63650fc79ce4fa7f081b556ce275475 Mon Sep 17 00:00:00 2001
From: Justin Berger <j.david.berger@gmail.com>
Date: Sat, 17 Mar 2018 08:18:45 -0600
Subject: Made it test dcldgemm instead

---
 redist/test_dcl.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/redist/test_dcl.c b/redist/test_dcl.c
index a9512f8..6d49548 100644
--- a/redist/test_dcl.c
+++ b/redist/test_dcl.c
@@ -39,7 +39,9 @@ int main()
 		FLT x[4] = {7, 8, 9, 10};
 		FLT R[4];
 
-		dclMul(R, 1, A[0], 4, x, 1, 4, 1, 3);
+		// dclMul(R, 1, A[0], 4, x, 1, 4, 1, 3);
+		dcldgemm(0, 0, 4, 1, 3, 1, A[0], 4, x, 1, 0, R, 1);
+
 		dclPrint(x, 1, 4, 1);
 		dclPrint(R, 1, 4, 1);
 
-- 
cgit v1.3.1


From 0b9e66ad2ff686a4dcf8a6838f33edb203a1bff5 Mon Sep 17 00:00:00 2001
From: Justin Berger <j.david.berger@gmail.com>
Date: Sat, 17 Mar 2018 09:32:22 -0600
Subject: Fixed gemm

---
 Makefile            |  8 +++++++-
 redist/dclapack.h   | 33 +++++++++++++++++----------------
 redist/dclhelpers.c |  9 +++++----
 redist/test_dcl.c   | 23 +++++++++++++++--------
 4 files changed, 44 insertions(+), 29 deletions(-)

diff --git a/Makefile b/Makefile
index 9239c39..dfcb9f4 100644
--- a/Makefile
+++ b/Makefile
@@ -82,7 +82,13 @@ calibrate_client :  calibrate_client.c ./lib/libsurvive.so redist/os_generic.c $
 static_calibrate : calibrate.c redist/os_generic.c $(DRAWFUNCTIONS) $(LIBSURVIVE_C)
 	tcc -o $@ $^ $(CFLAGS) $(LDFLAGS) -DTCC
 
-test_dcl: ./redist/test_dcl.c ./redist/dclhelpers.c
+./redist/dclhelpers_debuggable.c : ./redist/dclhelpers.c ./redist/dclhelpers.h ./redist/dclapack.h
+	gcc -E ./redist/dclhelpers.c  > ./redist/dclhelpers_debuggable.c
+	clang-format -i ./redist/dclhelpers_debuggable.c
+	sed -i 's/#/\/\/#/g' ./redist/dclhelpers_debuggable.c
+
+
+test_dcl: ./redist/test_dcl.c ./redist/dclhelpers_debuggable.c ./redist/dclhelpers.h ./redist/dclapack.h
 	$(CC) -o $@ $^ $(LDFLAGS) $(CFLAGS) -DFLT=double -fsanitize=address -fsanitize=undefined
 
 test_minimal_cv: ./src/epnp/test_minimal_cv.c ./lib/libsurvive.so 
diff --git a/redist/dclapack.h b/redist/dclapack.h
index af8869c..af5035b 100644
--- a/redist/dclapack.h
+++ b/redist/dclapack.h
@@ -224,22 +224,23 @@ PRINT(Ainv,n,n); \
 /*
  * Matrix Multiply R = alpha * A * B + beta * C
  *  R (n by p)
- *  A (n by m)
- *  B (m by p)
- *  C (n by p)
+ *  A (m by n)
+ *  B (n by p)
+ *  C (m by p)
  */
-#define GMULADD(R,A,B,C,alpha,beta,n,m,p) { \
-    int i,j,k; \
-    float sum; \
-    for (i=0; i<n; i++) { \
-        for (j=0; j<p; j++) { \
-            sum = 0.0f; \
-            for (k=0; k<m; k++) { \
-                sum += _(A,i,k) * _(B,k,j); \
-            } \
-            _(R,i,j) = alpha * sum + beta * _(C,i,j); \
-        } \
-    } \
-}
+#define GMULADD(R, A, B, C, alpha, beta, m, n, p)                                                                      \
+	{                                                                                                                  \
+		int _i, _j, _k;                                                                                                \
+		float sum;                                                                                                     \
+		for (_i = 0; _i < m; _i++) {                                                                                   \
+			for (_j = 0; _j < p; _j++) {                                                                               \
+				sum = 0.0f;                                                                                            \
+				for (_k = 0; _k < n; _k++) {                                                                           \
+					sum += _(A, _i, _k) * _(B, _k, _j);                                                                \
+				}                                                                                                      \
+				_(R, _i, _j) = alpha * sum + beta * _(C, _i, _j);                                                      \
+			}                                                                                                          \
+		}                                                                                                              \
+	}
 
 #endif
diff --git a/redist/dclhelpers.c b/redist/dclhelpers.c
index fb6aba6..3e51fd2 100644
--- a/redist/dclhelpers.c
+++ b/redist/dclhelpers.c
@@ -1,9 +1,10 @@
 #include "dclhelpers.h"
 #define FLOAT DCL_FLOAT
 #define DYNAMIC_INDEX
-#include <stdio.h>
 #include "dclapack.h"
 #include <alloca.h>
+#include <assert.h>
+#include <stdio.h>
 
 void dclPrint( const DCL_FLOAT * PMATRIX, int PMATRIXc, int n, int m )
 {
@@ -77,7 +78,7 @@ void dcldgemm(
 	int Cc //must be n
 	 )
 {
-	const DCL_FLOAT * ta;
+	const DCL_FLOAT *ta;
 	const DCL_FLOAT * tb;
 	int tac = Ac;
 	int tbc = Bc;
@@ -102,7 +103,7 @@ void dcldgemm(
 	}
 	else
 		tb = B;
-
-	GMULADD(C,ta,tb,C,alpha,beta,n,m,k);
+	printf("%d %d %d\n", tac, tbc, Cc);
+	GMULADD(C, ta, tb, C, alpha, beta, m, n, k);
 }
 
diff --git a/redist/test_dcl.c b/redist/test_dcl.c
index 6d49548..42f4fd6 100644
--- a/redist/test_dcl.c
+++ b/redist/test_dcl.c
@@ -1,5 +1,6 @@
 #include "dclhelpers.h"
 #include <assert.h>
+#include <math.h>
 #include <stdint.h>
 #include <stdio.h>
 
@@ -36,18 +37,24 @@ int main()
 		dclIdentity(A[0], 4, 3);
 		dclPrint(A[0], 4, 3, 4);
 
-		FLT x[4] = {7, 8, 9, 10};
-		FLT R[4];
+		FLT x[4][2] = {
+			{7, -7}, {8, -8}, {9, -9}, {10, -10},
+		};
+		FLT R[4][2];
 
+		printf("%p %p %p\n", A, x, R);
 		// dclMul(R, 1, A[0], 4, x, 1, 4, 1, 3);
-		dcldgemm(0, 0, 4, 1, 3, 1, A[0], 4, x, 1, 0, R, 1);
+		dcldgemm(0, 0, 3, 4, 2, 1, A[0], 4, x[0], 2, 0, R[0], 2);
 
-		dclPrint(x, 1, 4, 1);
-		dclPrint(R, 1, 4, 1);
+		dclPrint(x[0], 2, 4, 2);
+		dclPrint(R[0], 2, 4, 2);
 
-		for (int i = 0; i < 3; i++)
-			assert(R[i] == x[i]);
-		assert(R[3] == 0.);
+		for (int j = 0; j < 2; j++) {
+			for (int i = 0; i < 3; i++)
+				assert(R[i][j] == x[i][j]);
+
+			assert(fabs(R[3][j]) < .0000001);
+		}
 	}
 	// void dclTransp( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, int n, int m );
 
-- 
cgit v1.3.1


From 5d4f4068ae265271d0840ca1bddd7dfcddd948e9 Mon Sep 17 00:00:00 2001
From: Justin Berger <j.david.berger@gmail.com>
Date: Sat, 17 Mar 2018 10:33:14 -0600
Subject: Made naming follow conventions

---
 redist/dclapack.h | 278 +++++++++++++++++++++++++++++-------------------------
 1 file changed, 149 insertions(+), 129 deletions(-)

diff --git a/redist/dclapack.h b/redist/dclapack.h
index af5035b..2823c5d 100644
--- a/redist/dclapack.h
+++ b/redist/dclapack.h
@@ -19,130 +19,151 @@
 	#define _(AM,O,P) AM[O][P]
 #endif
 
-
 /*
- * Prints a matrix A (n by m)  (with witdth Ac)
+ * Prints a matrix A (m by n)  (with witdth Ac)
  */
-#define PRINT(A,n,m) { \
-    int i,j; \
-    printf(#A "\n"); \
-    for (i=0; i<n; i++) { \
-        for (j=0; j<m; j++) { \
-            printf("%4.3f\t", _(A,i,j)); \
-        } \
-        printf("\n"); \
-    } \
-    printf("\n"); \
-} 
+#define PRINT(A, m, n)                                                                                                 \
+	{                                                                                                                  \
+		printf(#A "\n");                                                                                               \
+		for (int _i = 0; _i < (m); _i++) {                                                                             \
+			for (int _j = 0; _j < (n); _j++) {                                                                         \
+				printf("%4.3f\t", _(A, _i, _j));                                                                       \
+			}                                                                                                          \
+			printf("\n");                                                                                              \
+		}                                                                                                              \
+		printf("\n");                                                                                                  \
+	}
 
 /*
  * Returns the identity matrix (with size n x n, but width Ic)
  */
-#define IDENTITY(I,n) { \
-    int i,j; \
-    for (i=0; i<n; i++) { \
-        for (j=0; j<i; j++) { _(I,i,j)=0.0f; } \
-        _(I,i,i) = 1.0f; \
-        for (j=i+1; j<n; j++) { _(I,i,j)=0.0f; } \
-    } \
-}
+#define IDENTITY(I, n)                                                                                                 \
+	{                                                                                                                  \
+		for (int _i = 0; _i < (n); _i++) {                                                                             \
+			for (int _j = 0; _j < _i; _j++) {                                                                          \
+				_(I, _i, _j) = 0.0f;                                                                                   \
+			}                                                                                                          \
+			_(I, _i, _i) = 1.0f;                                                                                       \
+			for (int _j = _i + 1; _j < (n); _j++) {                                                                    \
+				_(I, _i, _j) = 0.0f;                                                                                   \
+			}                                                                                                          \
+		}                                                                                                              \
+	}
 
 /*
  * R = Transpose(A)
- *  A is (n by m)
- *  R is (m by n)
+ *  A is (m by n)
+ *  R is (n by m)
  */
-#define TRANSP(R,A,n,m) { \
-   int i,j; \
-   for (i=0; i<n; i++) { \
-      for (j=0; j<m; j++) { \
-         _(R,j,i) = _(A,i,j); \
-      } \
-   } \
-}
+#define TRANSP(R, A, m, n)                                                                                             \
+	{                                                                                                                  \
+		for (int _i = 0; _i < (m); _i++) {                                                                             \
+			for (int _j = 0; _j < (n); _j++) {                                                                         \
+				_(R, _j, _i) = _(A, _i, _j);                                                                           \
+			}                                                                                                          \
+		}                                                                                                              \
+	}
 
 /*
  * Calculate L,U of a matrix A with pivot table
  */
-#define LU(L,U,A,Piv,n) { \
-    int i,j,k,_tempi; float _tempf; \
-    for (i=0; i<n; i++) { Piv[i]=i; } \
-    for (i=0; i<n; i++) { \
-        for (j=0; j<n; j++) { \
-            _(U,i,j) = _(A,i,j); \
-        } \
-    } \
-    IDENTITY(L,n); \
-    \
-    for (i=0; i<n-1; i++) { \
-        \
-        int max=i; \
-	for (j=i+1; j<n; j++) { \
-		if (_ABS( _(U,j,i)) > _ABS(_(U,max,i))) { max = j; } \
-	} \
-	_tempi=Piv[i]; Piv[i]=Piv[max]; Piv[max]=_tempi; \
-	for (k=i; k<n; k++) { \
-		_tempf=_(U,i,k); _(U,i,k)=_(U,max,k); _(U,max,k)=_tempf; \
-	} \
-	for (k=0; k<i; k++) { \
-		_tempf=_(L,i,k); _(L,i,k)=_(L,max,k); _(L,max,k)=_tempf; \
-	} \
-        \
-        FLOAT invDiag = 1.0 / _(U,i,i); \
-        for (j=i+1; j<n; j++) { \
-            FLOAT scale = _(U,j,i) * invDiag; \
-            _(U,j,i) = 0.0; \
-            for (k=i+1; k<n; k++) { _(U,j,k) -= _(U,i,k)*scale; } \
-            _(L,j,i) = scale; \
-        } \
-    } \
-}
+#define LU(L, U, A, Piv, n)                                                                                            \
+	{                                                                                                                  \
+		int _i, _j, _k, _tempi;                                                                                        \
+		float _tempf;                                                                                                  \
+		for (_i = 0; _i < (n); _i++) {                                                                                 \
+			Piv[_i] = _i;                                                                                              \
+		}                                                                                                              \
+		for (_i = 0; _i < (n); _i++) {                                                                                 \
+			for (_j = 0; _j < (n); _j++) {                                                                             \
+				_(U, _i, _j) = _(A, _i, _j);                                                                           \
+			}                                                                                                          \
+		}                                                                                                              \
+		IDENTITY(L, n);                                                                                                \
+                                                                                                                       \
+		for (_i = 0; _i < (n)-1; _i++) {                                                                               \
+                                                                                                                       \
+			int max = _i;                                                                                              \
+			for (_j = _i + 1; _j < (n); _j++) {                                                                        \
+				if (_ABS(_(U, _j, _i)) > _ABS(_(U, max, _i))) {                                                        \
+					max = _j;                                                                                          \
+				}                                                                                                      \
+			}                                                                                                          \
+			_tempi = Piv[_i];                                                                                          \
+			Piv[_i] = Piv[max];                                                                                        \
+			Piv[max] = _tempi;                                                                                         \
+			for (_k = _i; _k < (n); _k++) {                                                                            \
+				_tempf = _(U, _i, _k);                                                                                 \
+				_(U, _i, _k) = _(U, max, _k);                                                                          \
+				_(U, max, _k) = _tempf;                                                                                \
+			}                                                                                                          \
+			for (_k = 0; _k < _i; _k++) {                                                                              \
+				_tempf = _(L, _i, _k);                                                                                 \
+				_(L, _i, _k) = _(L, max, _k);                                                                          \
+				_(L, max, _k) = _tempf;                                                                                \
+			}                                                                                                          \
+                                                                                                                       \
+			FLOAT invDiag = 1.0 / _(U, _i, _i);                                                                        \
+			for (_j = _i + 1; _j < (n); _j++) {                                                                        \
+				FLOAT scale = _(U, _j, _i) * invDiag;                                                                  \
+				_(U, _j, _i) = 0.0;                                                                                    \
+				for (_k = _i + 1; _k < (n); _k++) {                                                                    \
+					_(U, _j, _k) -= _(U, _i, _k) * scale;                                                              \
+				}                                                                                                      \
+				_(L, _j, _i) = scale;                                                                                  \
+			}                                                                                                          \
+		}                                                                                                              \
+	}
 
 /*
  * Pivots a matrix to a different matrix
  *  R = Pivot(A) given table 'Piv'
- *  A and R are (n by m)
+ *  A and R are (m by n)
  */
-#define PIVOT(R,A,Piv,n,m) { \
-    int i,j; \
-    for (j=0; j<n; j++) { \
-        for (i=0; i<m; i++) { \
-            _(R,j,i) = _(A,Piv[j],i); \
-        } \
-    } \
-}
+#define PIVOT(R, A, Piv, m, n)                                                                                         \
+	{                                                                                                                  \
+		for (int _i = 0; _i < (m); _i++) {                                                                             \
+			for (int _j = 0; _j < (n); _j++) {                                                                         \
+				_(R, _i, _j) = _(A, Piv[_i], _j);                                                                      \
+			}                                                                                                          \
+		}                                                                                                              \
+	}
 
 /*
  * Solve LX=B for matrix X and B
- *  L is n by n (lower triangular)
- *  B is n by m
+ *  L is m by m (lower triangular)
+ *  B is m by n
  */
-#define L_SUB(X,L,B,n,m) { \
-    int i,j,k; \
-    for (i=0; i<m; i++) { \
-        for (j=0; j<n; j++) { \
-            float sum=0.0; \
-            for (k=0; k<j; k++) { sum += _(L,j,k)*_(X,k,i); } \
-            _(X,j,i) = (_(B,j,i) - sum) / _(L,j,j); \
-        } \
-    } \
-}
+#define L_SUB(X, L, B, m, n)                                                                                           \
+	{                                                                                                                  \
+		for (int _i = 0; _i < (n); _i++) {                                                                             \
+			for (int _j = 0; _j < (m); _j++) {                                                                         \
+				float sum = 0.0;                                                                                       \
+				for (int _k = 0; _k < _j; _k++) {                                                                      \
+					sum += _(L, _j, _k) * _(X, _k, _i);                                                                \
+				}                                                                                                      \
+				_(X, _j, _i) = (_(B, _j, _i) - sum) / _(L, _j, _j);                                                    \
+			}                                                                                                          \
+		}                                                                                                              \
+	}
 
 /*
  * Solve UX=B for matrix X and B
- *  U is n by n (upper triangular)
- *  B is n by m
+ *  U is m by m (upper triangular)
+ *  B is m by n
  */
-#define U_SUB(X,U,B,n,m) { \
-    int i,j,k; \
-    for (i=0; i<m; i++) { \
-        for (j=n-1; j>=0; j--) { \
-            float sum=0.0; \
-            for (k=n-1; k>j; k--) { sum += _(U,j,k)*_(X,k,i); } \
-            _(X,j,i) = (_(B,j,i) - sum) / _(U,j,j); \
-        } \
-    } \
-}
+#define U_SUB(X, U, B, m, n)                                                                                           \
+	{                                                                                                                  \
+		for (int _i = 0; _i < (n); _i++) {                                                                             \
+			for (int _j = m - 1; _j >= 0; _j--) {                                                                      \
+				float sum = 0.0;                                                                                       \
+				for (int _k = n - 1; _k > _j; _k--) {                                                                  \
+					sum += _(U, _j, _k) * _(X, _k, _i);                                                                \
+				}                                                                                                      \
+				_(X, _j, _i) = (_(B, _j, _i) - sum) / _(U, _j, _j);                                                    \
+			}                                                                                                          \
+		}                                                                                                              \
+	}
 
 /*
  * Inverts a matrix X (n by n) using the method of LU decomposition
@@ -187,55 +208,54 @@ PRINT(Ainv,n,n); \
 /*
  * Matrix Multiply R = A * B
  *  R (n by p)
- *  A (n by m)
+ *  A (m by n)
  *  B (m by p)
  */
-#define MUL(R,A,B,n,m,p) { \
-    int i,j,k; \
-    for (i=0; i<n; i++) { \
-        for (j=0; j<p; j++) { \
-            _(R,i,j) = 0.0f; \
-            for (k=0; k<m; k++) { \
-                _(R,i,j) += _(A,i,k) * _(B,k,j); \
-            } \
-        } \
-    } \
-}
+#define MUL(R, A, B, m, n, p)                                                                                          \
+	{                                                                                                                  \
+		for (int _i = 0; _i < (m); _i++) {                                                                             \
+			for (int _j = 0; _j < p; _j++) {                                                                           \
+				_(R, _i, _j) = 0.0f;                                                                                   \
+				for (int _k = 0; _k < (n); _k++) {                                                                     \
+					_(R, _i, _j) += _(A, _i, _k) * _(B, _k, _j);                                                       \
+				}                                                                                                      \
+			}                                                                                                          \
+		}                                                                                                              \
+	}
 
 /*
  * Matrix Multiply R = A * B + C
- *  R (n by p)
- *  A (n by m)
- *  B (m by p)
- *  C (n by p)
+ *  R (m by p)
+ *  A (m by n)
+ *  B (n by p)
+ *  C (m by p)
  */
-#define MULADD(R,A,B,C,n,m,p) { \
-    int i,j,k; \
-    for (i=0; i<n; i++) { \
-        for (j=0; j<p; j++) { \
-            _(R,i,j) = _(C,i,j); \
-            for (k=0; k<m; k++) { \
-                _(R,i,j) += _(A,i,k) * _(B,k,j); \
-            } \
-        } \
-    } \
-}
+#define MULADD(R, A, B, C, m, n, p)                                                                                    \
+	{                                                                                                                  \
+		for (int _i = 0; _i < (m); _i++) {                                                                             \
+			for (int _j = 0; _j < p; _j++) {                                                                           \
+				_(R, _i, _j) = _(C, _i, _j);                                                                           \
+				for (int _k = 0; _k < (n); _k++) {                                                                     \
+					_(R, _i, _j) += _(A, _i, _k) * _(B, _k, _j);                                                       \
+				}                                                                                                      \
+			}                                                                                                          \
+		}                                                                                                              \
+	}
 
 /*
  * Matrix Multiply R = alpha * A * B + beta * C
- *  R (n by p)
+ *  R (m by p)
  *  A (m by n)
  *  B (n by p)
  *  C (m by p)
  */
 #define GMULADD(R, A, B, C, alpha, beta, m, n, p)                                                                      \
 	{                                                                                                                  \
-		int _i, _j, _k;                                                                                                \
 		float sum;                                                                                                     \
-		for (_i = 0; _i < m; _i++) {                                                                                   \
-			for (_j = 0; _j < p; _j++) {                                                                               \
+		for (int _i = 0; _i < m; _i++) {                                                                               \
+			for (int _j = 0; _j < p; _j++) {                                                                           \
 				sum = 0.0f;                                                                                            \
-				for (_k = 0; _k < n; _k++) {                                                                           \
+				for (int _k = 0; _k < n; _k++) {                                                                       \
 					sum += _(A, _i, _k) * _(B, _k, _j);                                                                \
 				}                                                                                                      \
 				_(R, _i, _j) = alpha * sum + beta * _(C, _i, _j);                                                      \
-- 
cgit v1.3.1


From 497e65e339edcc77bd272b97b9c1b1b5217a24b6 Mon Sep 17 00:00:00 2001
From: cnlohr <lohr85@gmail.com>
Date: Sat, 17 Mar 2018 12:50:32 -0400
Subject: Check in new failing test.

---
 redist/test_dcl.c | 29 ++++++++++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/redist/test_dcl.c b/redist/test_dcl.c
index 42f4fd6..056acdf 100644
--- a/redist/test_dcl.c
+++ b/redist/test_dcl.c
@@ -56,9 +56,32 @@ int main()
 			assert(fabs(R[3][j]) < .0000001);
 		}
 	}
-	// void dclTransp( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, int n, int m );
 
-	//	dclIdentity( A[0], MATx, 5 );
-	//	dclPrint( A[0], MATx, MATx, MATy );
+
+	printf( "The following should be an identity matrix\n" );
+	dclPrint( MM[0], 3, 3, 3 );
+
+
+	//Currently failing test...
+	{
+		FLT em1[12][20];
+		FLT em2[20][12];
+		FLT emo[12][12];
+		int x, y;
+
+		for( y = 0; y < 12; y++ )
+		for( x = 0; x < 20; x++ )
+		{
+			em1[y][x] = (rand()%1000)/1000.0;
+			em2[x][y] = (rand()%1000)/1000.0;
+		}
+
+		int m = 12;
+		int n = 20;
+		int k = 12;
+		dcldgemm( 0, 0, m, n, k, 0.1, em1[0], 20, em2[0], 12, .1, emo[0], 12 );
+		dclPrint( emo[0], 12, 12, 12 );
+	}
+
 }
 
-- 
cgit v1.3.1


From 04bd16aeb391e67716344268cf0f43d1f31f180a Mon Sep 17 00:00:00 2001
From: cnlohr <lohr85@gmail.com>
Date: Sat, 17 Mar 2018 14:21:20 -0400
Subject: Update dcl and test.

---
 redist/dclapack.h   | 20 ++++++++++++++++----
 redist/dclhelpers.c | 14 +++++++++++---
 redist/dclhelpers.h |  8 +++++++-
 redist/test_dcl.c   | 41 ++++++++++++++++++++++++++---------------
 4 files changed, 60 insertions(+), 23 deletions(-)

diff --git a/redist/dclapack.h b/redist/dclapack.h
index 2823c5d..cae377b 100644
--- a/redist/dclapack.h
+++ b/redist/dclapack.h
@@ -37,9 +37,9 @@
 /*
  * Returns the identity matrix (with size n x n, but width Ic)
  */
-#define IDENTITY(I, n)                                                                                                 \
+#define IDENTITY(I, m, n)                                                                                              \
 	{                                                                                                                  \
-		for (int _i = 0; _i < (n); _i++) {                                                                             \
+		for (int _i = 0; _i < (m); _i++) {                                                                             \
 			for (int _j = 0; _j < _i; _j++) {                                                                          \
 				_(I, _i, _j) = 0.0f;                                                                                   \
 			}                                                                                                          \
@@ -50,6 +50,17 @@
 		}                                                                                                              \
 	}
 
+
+/*
+ * Returns the identity matrix (with size n x n, but width Ic)
+ */
+#define ZERO(Z, m, n)                                                                                                  \
+	{                                                                                                                  \
+		for (int _i = 0; _i < (m); _i++)                                                                               \
+			for (int _j = 0; _j < (n); _j++)                                                                           \
+				_(Z, _i, _j) = 0.0f;                                                                                   \
+	}
+
 /*
  * R = Transpose(A)
  *  A is (m by n)
@@ -79,7 +90,7 @@
 				_(U, _i, _j) = _(A, _i, _j);                                                                           \
 			}                                                                                                          \
 		}                                                                                                              \
-		IDENTITY(L, n);                                                                                                \
+		IDENTITY(L, n, n);                                                                                                \
                                                                                                                        \
 		for (_i = 0; _i < (n)-1; _i++) {                                                                               \
                                                                                                                        \
@@ -188,7 +199,7 @@
 #define INV(Ainv,A,n,ORDER) { \
 	INV_SETUP(ORDER) \
     int Piv[ORDER]; \
-    IDENTITY(I,n); \
+    IDENTITY(I,n,n); \
     LU(L,U,A,Piv,n); \
     PIVOT(Ipiv,I,Piv,n,n); \
     L_SUB(C,L,Ipiv,n,n); \
@@ -258,6 +269,7 @@ PRINT(Ainv,n,n); \
 				for (int _k = 0; _k < n; _k++) {                                                                       \
 					sum += _(A, _i, _k) * _(B, _k, _j);                                                                \
 				}                                                                                                      \
+				printf( "-> %d %d =   %f %f %f %f[%d %d]\n", _i, _j, alpha,sum,beta, _(C,_i,_j), _i, _j ); \
 				_(R, _i, _j) = alpha * sum + beta * _(C, _i, _j);                                                      \
 			}                                                                                                          \
 		}                                                                                                              \
diff --git a/redist/dclhelpers.c b/redist/dclhelpers.c
index 3e51fd2..5b956b0 100644
--- a/redist/dclhelpers.c
+++ b/redist/dclhelpers.c
@@ -5,15 +5,23 @@
 #include <alloca.h>
 #include <assert.h>
 #include <stdio.h>
+#include <string.h>
 
 void dclPrint( const DCL_FLOAT * PMATRIX, int PMATRIXc, int n, int m )
 {
 	PRINT( PMATRIX, n, m );
 }
 
-void dclIdentity( DCL_FLOAT * I, int Ic, int n )
+void dclIdentity( DCL_FLOAT * I, int Ic, int m, int n )
 {
-	IDENTITY( I, n );
+	IDENTITY( I, m, n );
+}
+
+
+/* Returns the zero matrix */
+void dclZero( DCL_FLOAT * Z, int Zc, int m, int n )
+{
+	memset( Z, 0, m*n*sizeof(DCL_FLOAT) );
 }
 
 void dclTransp( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, int n, int m )
@@ -103,7 +111,7 @@ void dcldgemm(
 	}
 	else
 		tb = B;
-	printf("%d %d %d\n", tac, tbc, Cc);
+
 	GMULADD(C, ta, tb, C, alpha, beta, m, n, k);
 }
 
diff --git a/redist/dclhelpers.h b/redist/dclhelpers.h
index 8779e1a..b4acec8 100644
--- a/redist/dclhelpers.h
+++ b/redist/dclhelpers.h
@@ -3,11 +3,17 @@
 
 #define DCL_FLOAT FLT
 
+//Use this macro to safely 
+#define DMS( m ) ((m)[0]), (sizeof((m)[0])/sizeof((m)[0][0]))
+
 /* Prints matrix A of size[n][m] */
 void dclPrint( const DCL_FLOAT * A, int Ac, int n, int m );
 
 /* Returns the identity matrix */
-void dclIdentity( DCL_FLOAT * I, int Ic, int n );
+void dclIdentity( DCL_FLOAT * I, int Ic, int m, int n );
+
+/* Returns the zero matrix */
+void dclZero( DCL_FLOAT * I, int Ic, int m, int n );
 
 /* R = Transpose(A)
      A is (n by m)
diff --git a/redist/test_dcl.c b/redist/test_dcl.c
index 056acdf..6b0d870 100644
--- a/redist/test_dcl.c
+++ b/redist/test_dcl.c
@@ -12,7 +12,7 @@ int main()
 	dclTransp( B[0], 2, A[0], 4, 2, 4 );
 	dclPrint( B[0], 2, 4, 2 );
 
-	int i;
+	int i, j;
 	for( i = 0; i < 8; i++ )
 	{
 		printf( "%f\n", ((float*)(B[0]))[i] );
@@ -34,54 +34,65 @@ int main()
 
 	{
 		FLT A[3][4];
-		dclIdentity(A[0], 4, 3);
-		dclPrint(A[0], 4, 3, 4);
+		dclIdentity( DMS(A), 3, 4);
+		dclPrint( DMS(A), 3, 4);
 
 		FLT x[4][2] = {
 			{7, -7}, {8, -8}, {9, -9}, {10, -10},
 		};
 		FLT R[4][2];
+		dclZero( DMS(R), 4, 2 );
 
-		printf("%p %p %p\n", A, x, R);
 		// dclMul(R, 1, A[0], 4, x, 1, 4, 1, 3);
 		dcldgemm(0, 0, 3, 4, 2, 1, A[0], 4, x[0], 2, 0, R[0], 2);
 
-		dclPrint(x[0], 2, 4, 2);
-		dclPrint(R[0], 2, 4, 2);
+		dclPrint(DMS(x), 4, 2);
+		dclPrint(DMS(R), 3, 2);
 
 		for (int j = 0; j < 2; j++) {
 			for (int i = 0; i < 3; i++)
+			{
+				printf( "[%d][%d]\n", i, j );
 				assert(R[i][j] == x[i][j]);
+			}
 
 			assert(fabs(R[3][j]) < .0000001);
 		}
 	}
 
 
-	printf( "The following should be an identity matrix\n" );
-	dclPrint( MM[0], 3, 3, 3 );
-
+#if 0
 
 	//Currently failing test...
 	{
+//		FLT em1[3][4];
+//		FLT em2[4][2];
+//		FLT emo[4][2];
+
 		FLT em1[12][20];
 		FLT em2[20][12];
-		FLT emo[12][12];
+		FLT emo[20][12];
 		int x, y;
 
 		for( y = 0; y < 12; y++ )
 		for( x = 0; x < 20; x++ )
-		{
 			em1[y][x] = (rand()%1000)/1000.0;
-			em2[x][y] = (rand()%1000)/1000.0;
-		}
+
+		for( y = 0; y < 20; y++ )
+		for( x = 0; x < 12; x++ )
+			em2[y][x] = (rand()%1000)/1000.0;
 
 		int m = 12;
 		int n = 20;
 		int k = 12;
-		dcldgemm( 0, 0, m, n, k, 0.1, em1[0], 20, em2[0], 12, .1, emo[0], 12 );
-		dclPrint( emo[0], 12, 12, 12 );
+
+		dclPrint( em1[0], 20, 12, 20 );
+		dclPrint( em2[0], 12, 20, 12 );
+
+		dcldgemm( 0, 0, m, n, k, 1.0, em1[0], 20, em2[0], 12, .1, emo[0], 12 );
+		dclPrint( emo[0], 12, 20, 12 );
 	}
+#endif
 
 }
 
-- 
cgit v1.3.1


From f6586a91478b1ca5920c58f90925deaafab81465 Mon Sep 17 00:00:00 2001
From: cnlohr <lohr85@gmail.com>
Date: Sat, 17 Mar 2018 14:44:10 -0400
Subject: Update tests... seems to work with both cblas and dclapack... Still
 haven't checked outputs.

---
 redist/dclapack.h |  1 -
 redist/test_dcl.c | 35 ++++++++++++++++++++++++++++++-----
 2 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/redist/dclapack.h b/redist/dclapack.h
index cae377b..d4634ac 100644
--- a/redist/dclapack.h
+++ b/redist/dclapack.h
@@ -269,7 +269,6 @@ PRINT(Ainv,n,n); \
 				for (int _k = 0; _k < n; _k++) {                                                                       \
 					sum += _(A, _i, _k) * _(B, _k, _j);                                                                \
 				}                                                                                                      \
-				printf( "-> %d %d =   %f %f %f %f[%d %d]\n", _i, _j, alpha,sum,beta, _(C,_i,_j), _i, _j ); \
 				_(R, _i, _j) = alpha * sum + beta * _(C, _i, _j);                                                      \
 			}                                                                                                          \
 		}                                                                                                              \
diff --git a/redist/test_dcl.c b/redist/test_dcl.c
index 6b0d870..29c3c72 100644
--- a/redist/test_dcl.c
+++ b/redist/test_dcl.c
@@ -1,8 +1,13 @@
+//gcc -msse2 -O3 -ftree-vectorize test_dcl.c dclhelpers.c os_generic.c -DFLT=double -lpthread -lcblas && valgrind ./a.out
+
+
 #include "dclhelpers.h"
 #include <assert.h>
 #include <math.h>
 #include <stdint.h>
 #include <stdio.h>
+#include "os_generic.h"
+#include <cblas.h>
 
 int main()
 {
@@ -61,7 +66,7 @@ int main()
 	}
 
 
-#if 0
+#if 1
 
 	//Currently failing test...
 	{
@@ -70,8 +75,8 @@ int main()
 //		FLT emo[4][2];
 
 		FLT em1[12][20];
-		FLT em2[20][12];
-		FLT emo[20][12];
+		FLT em2[20][20];
+		FLT emo[20][20];
 		int x, y;
 
 		for( y = 0; y < 12; y++ )
@@ -79,9 +84,13 @@ int main()
 			em1[y][x] = (rand()%1000)/1000.0;
 
 		for( y = 0; y < 20; y++ )
-		for( x = 0; x < 12; x++ )
+		for( x = 0; x < 20; x++ )
 			em2[y][x] = (rand()%1000)/1000.0;
 
+		for( y = 0; y < 20; y++ )
+		for( x = 0; x < 20; x++ )
+			emo[y][x] = 0;
+
 		int m = 12;
 		int n = 20;
 		int k = 12;
@@ -89,7 +98,23 @@ int main()
 		dclPrint( em1[0], 20, 12, 20 );
 		dclPrint( em2[0], 12, 20, 12 );
 
-		dcldgemm( 0, 0, m, n, k, 1.0, em1[0], 20, em2[0], 12, .1, emo[0], 12 );
+		int i;
+		
+		double start = OGGetAbsoluteTime();
+		for( i = 0; i < 10000; i++ )
+		{
+			dcldgemm( 0, 0, m, n, k, 1.0, DMS(em1), DMS(em2), .1, DMS(emo) );
+			//cblas_dgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, DMS(em1), DMS(em2), .1, DMS(emo) );
+
+/*void cblas_dgemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA,
+                 CBLAS_TRANSPOSE TransB, const int M, const int N,
+                 const int K, const double alpha, const double *A,
+                 const int lda, const double *B, const int ldb,
+                 const double beta, double *C, const int ldc);*/
+
+		}
+		printf( "Elapsed: %f\n", OGGetAbsoluteTime()-start );
+
 		dclPrint( emo[0], 12, 20, 12 );
 	}
 #endif
-- 
cgit v1.3.1


From a3d7611ef9cbdd4171d64942cf3b3d6a0eb5caed Mon Sep 17 00:00:00 2001
From: cnlohr <lohr85@gmail.com>
Date: Sat, 17 Mar 2018 14:57:20 -0400
Subject: The results still disagree.

---
 redist/test_dcl.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/redist/test_dcl.c b/redist/test_dcl.c
index 29c3c72..5435a3c 100644
--- a/redist/test_dcl.c
+++ b/redist/test_dcl.c
@@ -95,16 +95,18 @@ int main()
 		int n = 20;
 		int k = 12;
 
-		dclPrint( em1[0], 20, 12, 20 );
-		dclPrint( em2[0], 12, 20, 12 );
+		dclPrint( DMS(em1), 12, 20 );
+		dclPrint( DMS(em2), 20, 12 );
 
 		int i;
 		
 		double start = OGGetAbsoluteTime();
 		for( i = 0; i < 10000; i++ )
 		{
+			dclZero( DMS(emo), 20, 20 );
+
 			dcldgemm( 0, 0, m, n, k, 1.0, DMS(em1), DMS(em2), .1, DMS(emo) );
-			//cblas_dgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, DMS(em1), DMS(em2), .1, DMS(emo) );
+			//cblas_dgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, DMS(em1), DMS(em2), .1, DMS(emo) );
 
 /*void cblas_dgemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA,
                  CBLAS_TRANSPOSE TransB, const int M, const int N,
-- 
cgit v1.3.1


From 4cb248d46808c07f029c0c595af8f14517925816 Mon Sep 17 00:00:00 2001
From: Justin Berger <j.david.berger@gmail.com>
Date: Sat, 17 Mar 2018 14:00:32 -0600
Subject: Fixed test

---
 Makefile          |   6 ++-
 redist/dclapack.h |   2 +-
 redist/test_dcl.c | 110 +++++++++++++++++++++++++-----------------------------
 3 files changed, 57 insertions(+), 61 deletions(-)

diff --git a/Makefile b/Makefile
index dfcb9f4..ceb452a 100644
--- a/Makefile
+++ b/Makefile
@@ -3,6 +3,7 @@ all : lib data_recorder test calibrate calibrate_client simple_pose_test
 CC?=gcc
 
 CFLAGS:=-Iinclude/libsurvive -fPIC -g -O0 -Iredist -flto -DUSE_DOUBLE -std=gnu99 -rdynamic -llapacke  -lcblas -lm
+CFLAGS_RELEASE:=-Iinclude/libsurvive -fPIC -msse2 -ftree-vectorize -O3 -Iredist -flto -DUSE_DOUBLE -std=gnu99 -rdynamic -llapacke  -lcblas -lm 
 #LDFLAGS:=-L/usr/local/lib -lpthread -lusb-1.0 -lz -lm -flto -g
 LDFLAGS:=-L/usr/local/lib -lpthread -lz -lm -flto -g
 
@@ -88,7 +89,10 @@ static_calibrate : calibrate.c redist/os_generic.c $(DRAWFUNCTIONS) $(LIBSURVIVE
 	sed -i 's/#/\/\/#/g' ./redist/dclhelpers_debuggable.c
 
 
-test_dcl: ./redist/test_dcl.c ./redist/dclhelpers_debuggable.c ./redist/dclhelpers.h ./redist/dclapack.h
+test_dcl: ./redist/test_dcl.c ./redist/dclhelpers.c ./redist/dclhelpers.h ./redist/dclapack.h redist/os_generic.c
+	$(CC) -o $@ $^ $(LDFLAGS) $(CFLAGS_RELEASE) -DFLT=double
+
+test_dcl_debug: ./redist/test_dcl.c ./redist/dclhelpers_debuggable.c ./redist/dclhelpers.h ./redist/dclapack.h redist/os_generic.c
 	$(CC) -o $@ $^ $(LDFLAGS) $(CFLAGS) -DFLT=double -fsanitize=address -fsanitize=undefined
 
 test_minimal_cv: ./src/epnp/test_minimal_cv.c ./lib/libsurvive.so 
diff --git a/redist/dclapack.h b/redist/dclapack.h
index d4634ac..e43a4f9 100644
--- a/redist/dclapack.h
+++ b/redist/dclapack.h
@@ -27,7 +27,7 @@
 		printf(#A "\n");                                                                                               \
 		for (int _i = 0; _i < (m); _i++) {                                                                             \
 			for (int _j = 0; _j < (n); _j++) {                                                                         \
-				printf("%4.3f\t", _(A, _i, _j));                                                                       \
+				printf("%4.3f ", _(A, _i, _j));                                                                        \
 			}                                                                                                          \
 			printf("\n");                                                                                              \
 		}                                                                                                              \
diff --git a/redist/test_dcl.c b/redist/test_dcl.c
index 5435a3c..68a6129 100644
--- a/redist/test_dcl.c
+++ b/redist/test_dcl.c
@@ -1,13 +1,60 @@
 //gcc -msse2 -O3 -ftree-vectorize test_dcl.c dclhelpers.c os_generic.c -DFLT=double -lpthread -lcblas && valgrind ./a.out
 
-
 #include "dclhelpers.h"
+#include "os_generic.h"
 #include <assert.h>
+#include <cblas.h>
 #include <math.h>
 #include <stdint.h>
 #include <stdio.h>
-#include "os_generic.h"
-#include <cblas.h>
+#include <stdlib.h>
+
+void compareToCblas() {
+	FLT em1[12][20];
+	FLT em2[20][20];
+	FLT emo[2][20][20] = {};
+	int x, y;
+
+	for (y = 0; y < 12; y++)
+		for (x = 0; x < 20; x++)
+			em1[y][x] = (rand() % 1000) / 1000.0;
+
+	for (y = 0; y < 20; y++)
+		for (x = 0; x < 20; x++)
+			em2[y][x] = (rand() % 1000) / 1000.0;
+
+	int m = 12;
+	int n = 20;
+	int k = 20;
+
+	dclPrint(DMS(em1), 12, 20);
+	dclPrint(DMS(em2), 20, 12);
+
+	double times[2];
+	for (int z = 0; z < 2; z++) {
+		double start = OGGetAbsoluteTime();
+		for (int i = 0; i < 100000; i++) {
+			dclZero(DMS(emo[z]), 20, 20);
+
+			if (z == 0) {
+				dcldgemm(0, 0, m, n, k, 1.0, DMS(em1), DMS(em2), .1, DMS(emo[z]));
+			} else {
+				cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, DMS(em1), DMS(em2), .1,
+							DMS(emo[z]));
+			}
+			/*void cblas_dgemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA,
+		  CBLAS_TRANSPOSE TransB, const int M, const int N,
+		  const int K, const double alpha, const double *A,
+		  const int lda, const double *B, const int ldb,
+		  const double beta, double *C, const int ldc);*/
+		}
+
+		printf("%s Elapsed: %f\n", z ? "CBlas" : "dcl", times[z] = OGGetAbsoluteTime() - start);
+	}
+	printf("%fx difference\n", times[0] / times[1]);
+	dclPrint(emo[0][0], 12, 20, 12);
+	dclPrint(emo[1][0], 12, 20, 12);
+}
 
 int main()
 {
@@ -65,61 +112,6 @@ int main()
 		}
 	}
 
-
-#if 1
-
-	//Currently failing test...
-	{
-//		FLT em1[3][4];
-//		FLT em2[4][2];
-//		FLT emo[4][2];
-
-		FLT em1[12][20];
-		FLT em2[20][20];
-		FLT emo[20][20];
-		int x, y;
-
-		for( y = 0; y < 12; y++ )
-		for( x = 0; x < 20; x++ )
-			em1[y][x] = (rand()%1000)/1000.0;
-
-		for( y = 0; y < 20; y++ )
-		for( x = 0; x < 20; x++ )
-			em2[y][x] = (rand()%1000)/1000.0;
-
-		for( y = 0; y < 20; y++ )
-		for( x = 0; x < 20; x++ )
-			emo[y][x] = 0;
-
-		int m = 12;
-		int n = 20;
-		int k = 12;
-
-		dclPrint( DMS(em1), 12, 20 );
-		dclPrint( DMS(em2), 20, 12 );
-
-		int i;
-		
-		double start = OGGetAbsoluteTime();
-		for( i = 0; i < 10000; i++ )
-		{
-			dclZero( DMS(emo), 20, 20 );
-
-			dcldgemm( 0, 0, m, n, k, 1.0, DMS(em1), DMS(em2), .1, DMS(emo) );
-			//cblas_dgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, DMS(em1), DMS(em2), .1, DMS(emo) );
-
-/*void cblas_dgemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA,
-                 CBLAS_TRANSPOSE TransB, const int M, const int N,
-                 const int K, const double alpha, const double *A,
-                 const int lda, const double *B, const int ldb,
-                 const double beta, double *C, const int ldc);*/
-
-		}
-		printf( "Elapsed: %f\n", OGGetAbsoluteTime()-start );
-
-		dclPrint( emo[0], 12, 20, 12 );
-	}
-#endif
-
+	compareToCblas();
 }
 
-- 
cgit v1.3.1


From e2db23ea8401ea8a3e608c3ec4d84423e09047f9 Mon Sep 17 00:00:00 2001
From: Justin Berger <j.david.berger@gmail.com>
Date: Sat, 17 Mar 2018 14:12:34 -0600
Subject: Added assert clause to test

---
 redist/test_dcl.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/redist/test_dcl.c b/redist/test_dcl.c
index 68a6129..ce7f3de 100644
--- a/redist/test_dcl.c
+++ b/redist/test_dcl.c
@@ -42,11 +42,6 @@ void compareToCblas() {
 				cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, DMS(em1), DMS(em2), .1,
 							DMS(emo[z]));
 			}
-			/*void cblas_dgemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA,
-		  CBLAS_TRANSPOSE TransB, const int M, const int N,
-		  const int K, const double alpha, const double *A,
-		  const int lda, const double *B, const int ldb,
-		  const double beta, double *C, const int ldc);*/
 		}
 
 		printf("%s Elapsed: %f\n", z ? "CBlas" : "dcl", times[z] = OGGetAbsoluteTime() - start);
@@ -54,6 +49,12 @@ void compareToCblas() {
 	printf("%fx difference\n", times[0] / times[1]);
 	dclPrint(emo[0][0], 12, 20, 12);
 	dclPrint(emo[1][0], 12, 20, 12);
+
+	for (int i = 0; i < m; i++) {
+		for (int j = 0; j < k; j++) {
+			assert(fabs(emo[0][i][j] - emo[1][i][j]) < .00001);
+		}
+	}
 }
 
 int main()
-- 
cgit v1.3.1


From 728936f7dce1bc545b5136590a6eb660771266d4 Mon Sep 17 00:00:00 2001
From: Justin Berger <j.david.berger@gmail.com>
Date: Sat, 17 Mar 2018 16:08:49 -0600
Subject: Added transpose timing tests

---
 Makefile                |  2 +-
 redist/dclapack.h       |  2 +-
 redist/minimal_opencv.c | 13 ++-----
 redist/test_dcl.c       | 97 +++++++++++++++++++++++++++++++++++--------------
 4 files changed, 75 insertions(+), 39 deletions(-)

diff --git a/Makefile b/Makefile
index ceb452a..1b5d5e6 100644
--- a/Makefile
+++ b/Makefile
@@ -89,7 +89,7 @@ static_calibrate : calibrate.c redist/os_generic.c $(DRAWFUNCTIONS) $(LIBSURVIVE
 	sed -i 's/#/\/\/#/g' ./redist/dclhelpers_debuggable.c
 
 
-test_dcl: ./redist/test_dcl.c ./redist/dclhelpers.c ./redist/dclhelpers.h ./redist/dclapack.h redist/os_generic.c
+test_dcl: ./redist/test_dcl.c ./redist/dclhelpers.c ./redist/dclhelpers.h ./redist/dclapack.h redist/os_generic.c ./redist/minimal_opencv.c ./src/epnp/epnp.c
 	$(CC) -o $@ $^ $(LDFLAGS) $(CFLAGS_RELEASE) -DFLT=double
 
 test_dcl_debug: ./redist/test_dcl.c ./redist/dclhelpers_debuggable.c ./redist/dclhelpers.h ./redist/dclapack.h redist/os_generic.c
diff --git a/redist/dclapack.h b/redist/dclapack.h
index e43a4f9..7f30187 100644
--- a/redist/dclapack.h
+++ b/redist/dclapack.h
@@ -24,7 +24,7 @@
  */
 #define PRINT(A, m, n)                                                                                                 \
 	{                                                                                                                  \
-		printf(#A "\n");                                                                                               \
+		printf(#A " %dx%d\n", m, n);                                                                                   \
 		for (int _i = 0; _i < (m); _i++) {                                                                             \
 			for (int _j = 0; _j < (n); _j++) {                                                                         \
 				printf("%4.3f ", _(A, _i, _j));                                                                        \
diff --git a/redist/minimal_opencv.c b/redist/minimal_opencv.c
index 3f7bed7..8e71034 100644
--- a/redist/minimal_opencv.c
+++ b/redist/minimal_opencv.c
@@ -44,11 +44,8 @@ void cvGEMM(const CvMat *src1, const CvMat *src2, double alpha, const CvMat *src
 		beta = 0;
 
 	cblas_dgemm(CblasRowMajor, (tABC & GEMM_1_T) ? CblasTrans : CblasNoTrans,
-				(tABC & GEMM_2_T) ? CblasTrans : CblasNoTrans, src1->rows, dst->cols, src1->cols, alpha,
-
-				src1->data.db, lda, src2->data.db, ldb, beta,
-
-				dst->data.db, dst->cols);
+				(tABC & GEMM_2_T) ? CblasTrans : CblasNoTrans, src1->rows, dst->cols, src1->cols, alpha, src1->data.db,
+				lda, src2->data.db, ldb, beta, dst->data.db, dst->cols);
 }
 
 void cvMulTransposed(const CvMat *src, CvMat *dst, int order, const CvMat *delta, double scale) {
@@ -67,11 +64,7 @@ void cvMulTransposed(const CvMat *src, CvMat *dst, int order, const CvMat *delta
 	lapack_int dstCols = dst->cols;
 
 	cblas_dgemm(CblasRowMajor, isAT ? CblasTrans : CblasNoTrans, isBT ? CblasTrans : CblasNoTrans, cols, dstCols, rows,
-				scale,
-
-				src->data.db, cols, src->data.db, cols, beta,
-
-				dst->data.db, dstCols);
+				scale, src->data.db, cols, src->data.db, cols, beta, dst->data.db, dstCols);
 }
 
 void *cvAlloc(size_t size) { return malloc(size); }
diff --git a/redist/test_dcl.c b/redist/test_dcl.c
index ce7f3de..dc6a93e 100644
--- a/redist/test_dcl.c
+++ b/redist/test_dcl.c
@@ -9,46 +9,43 @@
 #include <stdio.h>
 #include <stdlib.h>
 
-void compareToCblas() {
-	FLT em1[12][20];
-	FLT em2[20][20];
-	FLT emo[2][20][20] = {};
-	int x, y;
-
-	for (y = 0; y < 12; y++)
-		for (x = 0; x < 20; x++)
-			em1[y][x] = (rand() % 1000) / 1000.0;
-
-	for (y = 0; y < 20; y++)
-		for (x = 0; x < 20; x++)
-			em2[y][x] = (rand() % 1000) / 1000.0;
-
-	int m = 12;
-	int n = 20;
-	int k = 20;
+#include "minimal_opencv.h"
 
-	dclPrint(DMS(em1), 12, 20);
-	dclPrint(DMS(em2), 20, 12);
+void fill_random(FLT *A, int ld, int m, int n) {
+	assert(ld == n);
+	for (int y = 0; y < m; y++)
+		for (int x = 0; x < n; x++)
+			A[y * ld + x] = (rand()) / (double)RAND_MAX;
+}
 
+void test_dcldgemm_speed(const char *name, char transA, char transB, int m, int n, int k, DCL_FLOAT alpha,
+						 const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, DCL_FLOAT beta) {
+	printf("%s speed test:\n", name);
 	double times[2];
+	FLT emo[2][m][n];
+
 	for (int z = 0; z < 2; z++) {
 		double start = OGGetAbsoluteTime();
 		for (int i = 0; i < 100000; i++) {
-			dclZero(DMS(emo[z]), 20, 20);
+			dclZero(DMS(emo[z]), m, n);
 
 			if (z == 0) {
-				dcldgemm(0, 0, m, n, k, 1.0, DMS(em1), DMS(em2), .1, DMS(emo[z]));
+				dcldgemm(transA, transB, m, n, k, alpha, A, Ac, B, Bc, beta, DMS(emo[z]));
 			} else {
-				cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, DMS(em1), DMS(em2), .1,
-							DMS(emo[z]));
+
+				cblas_dgemm(CblasRowMajor, transA == 1 ? CblasTrans : CblasNoTrans,
+							transB == 1 ? CblasTrans : CblasNoTrans, m, n, k, alpha, A, Ac, B, Bc, beta, emo[z][0], n);
 			}
 		}
 
-		printf("%s Elapsed: %f\n", z ? "CBlas" : "dcl", times[z] = OGGetAbsoluteTime() - start);
+		times[z] = OGGetAbsoluteTime() - start;
 	}
+
+	dclPrint(DMS(emo[0]), m, n);
+	dclPrint(DMS(emo[1]), m, n);
+	printf("dcl  Elapsed: %f\n", times[0]);
+	printf("cblas Elapsed: %f\n", times[1]);
 	printf("%fx difference\n", times[0] / times[1]);
-	dclPrint(emo[0][0], 12, 20, 12);
-	dclPrint(emo[1][0], 12, 20, 12);
 
 	for (int i = 0; i < m; i++) {
 		for (int j = 0; j < k; j++) {
@@ -57,12 +54,57 @@ void compareToCblas() {
 	}
 }
 
+void compareToCblas() {
+	srand(0);
+	int m = 12;
+	int n = 20;
+	int k = 20;
+
+	FLT em1[m][n];
+	FLT em2[n][k];
+
+	fill_random(DMS(em1), m, n);
+	fill_random(DMS(em2), n, k);
+
+	dclPrint(DMS(em1), m, n);
+	dclPrint(DMS(em2), n, k);
+
+	test_dcldgemm_speed("Simple", 0, 0, m, n, k, 1.0, DMS(em1), DMS(em2), .1);
+}
+
+void compareToCblasTrans() {
+	srand(0);
+	int m = 12;
+	int n = 20;
+	int k = n;
+
+	FLT em1[m][n];
+
+	fill_random(DMS(em1), m, n);
+
+	dclPrint(DMS(em1), m, n);
+
+	CvMat Em1 = cvMat(m, n, CV_64F, em1);
+	FLT em1tem1[n][n];
+	CvMat Em1tEm1 = cvMat(n, n, CV_64F, em1tem1);
+	cvMulTransposed(&Em1, &Em1tEm1, 1, 0, 1);
+	print_mat(&Em1tEm1);
+
+	test_dcldgemm_speed("Trans", 1, 0,
+						n, // # of rows in OP(A) == em1' -- 20
+						n, // # of cols in OP(B) == em1 -- 20
+						m, // # of cols in OP(A) == em1' -- 12
+						1.0,
+						DMS(em1), // Note that LD stays the same
+						DMS(em1), 0);
+}
+
 int main()
 {
 	FLT A[2][4]	= { { 0, 1, 2, 3 }, { 4, 5, 6, 7} };
 	FLT B[4][2];
 	dclPrint( A[0], 4, 2, 4 );
-	dclTransp( B[0], 2, A[0], 4, 2, 4 );
+	dclTransp(B[0], 2, A[0], 4, 2, 4);
 	dclPrint( B[0], 2, 4, 2 );
 
 	int i, j;
@@ -114,5 +156,6 @@ int main()
 	}
 
 	compareToCblas();
+	compareToCblasTrans();
 }
 
-- 
cgit v1.3.1