aboutsummaryrefslogtreecommitdiff
path: root/redist
diff options
context:
space:
mode:
authorJustin Berger <j.david.berger@gmail.com>2018-03-17 16:12:40 -0600
committerJustin Berger <j.david.berger@gmail.com>2018-03-17 16:12:40 -0600
commit954816d74e5460bc6d57ec682a5960a1a85298b1 (patch)
tree87b3d6a0a3fb4dbb2d00b2aac8154e1c47e9ca14 /redist
parent93627f6b89d22e6e2f87df0bd4d809e168958b1d (diff)
parent728936f7dce1bc545b5136590a6eb660771266d4 (diff)
downloadlibsurvive-954816d74e5460bc6d57ec682a5960a1a85298b1.tar.gz
libsurvive-954816d74e5460bc6d57ec682a5960a1a85298b1.tar.bz2
Merge branch 'master' into sba
Diffstat (limited to 'redist')
-rw-r--r--redist/dclapack.h385
-rw-r--r--redist/dclhelpers.c80
-rw-r--r--redist/dclhelpers.h74
-rw-r--r--redist/minimal_opencv.c13
-rw-r--r--redist/test_dcl.c155
5 files changed, 533 insertions, 174 deletions
diff --git a/redist/dclapack.h b/redist/dclapack.h
index 4e209d3..0950815 100644
--- a/redist/dclapack.h
+++ b/redist/dclapack.h
@@ -1,10 +1,6 @@
#ifndef __DCLAPACK_H__
#define __DCLAPACK_H__
-#ifndef ORDER
-#define ORDER 50
-#endif
-
#ifndef FLOAT
#define FLOAT float
#endif
@@ -13,146 +9,207 @@
#define _ABS(a) ( (a)<=0 ? 0-(a) : (a) )
+// Tricky: If you want to use this with pointers, instead of 2D arrays, you will
+// need to #define DYNAMIC_INDEX, as well as, for all arrays, suffix their name
+// with 'c'
+
+#ifdef DYNAMIC_INDEX
+#define _(AM, O, P) AM[O * AM##c + P]
+#else
+#define _(AM, O, P) AM[O][P]
+#endif
+
/*
- * Prints a matrix A (n by m)
+ * Prints a matrix A (m by n) (with witdth Ac)
*/
-#define PRINT(A,n,m) { \
- int i,j; \
- printf(#A "\n"); \
- for (i=0; i<n; i++) { \
- for (j=0; j<m; j++) { \
- printf("%4.3f\t", A[i][j]); \
- } \
- printf("\n"); \
- } \
- printf("\n"); \
-}
+#define PRINT(A, m, n) \
+ { \
+ printf(#A " %dx%d\n", m, n); \
+ for (int _i = 0; _i < (m); _i++) { \
+ for (int _j = 0; _j < (n); _j++) { \
+ printf("%4.3f ", _(A, _i, _j)); \
+ } \
+ printf("\n"); \
+ } \
+ printf("\n"); \
+ }
/*
- * Returns the identity matrix
+ * Returns the identity matrix (with size n x n, but width Ic)
*/
-#define IDENTITY(I,n) { \
- int i,j; \
- for (i=0; i<n; i++) { \
- for (j=0; j<i; j++) { I[i][j]=0.0f; } \
- I[i][i] = 1.0f; \
- for (j=i+1; j<n; j++) { I[i][j]=0.0f; } \
- } \
-}
+#define IDENTITY(I, m, n) \
+ { \
+ for (int _i = 0; _i < (m); _i++) { \
+ for (int _j = 0; _j < _i; _j++) { \
+ _(I, _i, _j) = 0.0f; \
+ } \
+ _(I, _i, _i) = 1.0f; \
+ for (int _j = _i + 1; _j < (n); _j++) { \
+ _(I, _i, _j) = 0.0f; \
+ } \
+ } \
+ }
/*
- * B = Transpose(A)
- * A is (n by m)
- * B is (m by n)
+ * Returns the identity matrix (with size n x n, but width Ic)
*/
-#define TRANSP(A,B,n,m) { \
- int i,j; \
- for (i=0; i<n; i++) { \
- for (j=0; j<m; j++) { \
- B[j][i] = A[i][j]; \
- } \
- } \
-}
+#define ZERO(Z, m, n) \
+ { \
+ for (int _i = 0; _i < (m); _i++) \
+ for (int _j = 0; _j < (n); _j++) \
+ _(Z, _i, _j) = 0.0f; \
+ }
+
+/*
+ * R = Transpose(A)
+ * A is (m by n)
+ * R is (n by m)
+ */
+#define TRANSP(R, A, m, n) \
+ { \
+ for (int _i = 0; _i < (m); _i++) { \
+ for (int _j = 0; _j < (n); _j++) { \
+ _(R, _j, _i) = _(A, _i, _j); \
+ } \
+ } \
+ }
/*
* Calculate L,U of a matrix A with pivot table
*/
-#define LU(A,L,U,Piv,n) { \
- int i,j,k,_tempi; float _tempf; \
- for (i=0; i<n; i++) { Piv[i]=i; } \
- for (i=0; i<n; i++) { \
- for (j=0; j<n; j++) { \
- U[i][j] = A[i][j]; \
- } \
- } \
- IDENTITY(L,n); \
- \
- for (i=0; i<n-1; i++) { \
- \
- int max=i; \
- for (j=i+1; j<n; j++) { \
- if (_ABS(U[j][i]) > _ABS(U[max][i])) { max = j; } \
- } \
- _tempi=Piv[i]; Piv[i]=Piv[max]; Piv[max]=_tempi; \
- for (k=i; k<n; k++) { \
- _tempf=U[i][k]; U[i][k]=U[max][k]; U[max][k]=_tempf; \
- } \
- for (k=0; k<i; k++) { \
- _tempf=L[i][k]; L[i][k]=L[max][k]; L[max][k]=_tempf; \
- } \
- \
- FLOAT invDiag = 1.0 / U[i][i]; \
- for (j=i+1; j<n; j++) { \
- FLOAT scale = U[j][i] * invDiag; \
- U[j][i] = 0.0; \
- for (k=i+1; k<n; k++) { U[j][k] -= U[i][k]*scale; } \
- L[j][i] = scale; \
- } \
- } \
-}
+#define LU(L, U, A, Piv, n) \
+ { \
+ int _i, _j, _k, _tempi; \
+ float _tempf; \
+ for (_i = 0; _i < (n); _i++) { \
+ Piv[_i] = _i; \
+ } \
+ for (_i = 0; _i < (n); _i++) { \
+ for (_j = 0; _j < (n); _j++) { \
+ _(U, _i, _j) = _(A, _i, _j); \
+ } \
+ } \
+ IDENTITY(L, n, n); \
+ \
+ for (_i = 0; _i < (n)-1; _i++) { \
+ \
+ int max = _i; \
+ for (_j = _i + 1; _j < (n); _j++) { \
+ if (_ABS(_(U, _j, _i)) > _ABS(_(U, max, _i))) { \
+ max = _j; \
+ } \
+ } \
+ _tempi = Piv[_i]; \
+ Piv[_i] = Piv[max]; \
+ Piv[max] = _tempi; \
+ for (_k = _i; _k < (n); _k++) { \
+ _tempf = _(U, _i, _k); \
+ _(U, _i, _k) = _(U, max, _k); \
+ _(U, max, _k) = _tempf; \
+ } \
+ for (_k = 0; _k < _i; _k++) { \
+ _tempf = _(L, _i, _k); \
+ _(L, _i, _k) = _(L, max, _k); \
+ _(L, max, _k) = _tempf; \
+ } \
+ \
+ FLOAT invDiag = 1.0 / _(U, _i, _i); \
+ for (_j = _i + 1; _j < (n); _j++) { \
+ FLOAT scale = _(U, _j, _i) * invDiag; \
+ _(U, _j, _i) = 0.0; \
+ for (_k = _i + 1; _k < (n); _k++) { \
+ _(U, _j, _k) -= _(U, _i, _k) * scale; \
+ } \
+ _(L, _j, _i) = scale; \
+ } \
+ } \
+ }
/*
* Pivots a matrix to a different matrix
- * B = Pivot(A) given table 'Piv'
- * A and B are (n by m)
+ * R = Pivot(A) given table 'Piv'
+ * A and R are (m by n)
*/
-#define PIVOT(A,B,Piv,n,m) { \
- int i,j; \
- for (j=0; j<n; j++) { \
- for (i=0; i<m; i++) { \
- B[j][i] = A[Piv[j]][i]; \
- } \
- } \
-}
+#define PIVOT(R, A, Piv, m, n) \
+ { \
+ for (int _i = 0; _i < (m); _i++) { \
+ for (int _j = 0; _j < (n); _j++) { \
+ _(R, _i, _j) = _(A, Piv[_i], _j); \
+ } \
+ } \
+ }
/*
* Solve LX=B for matrix X and B
- * L is n by n (lower triangular)
- * B is n by m
+ * L is m by m (lower triangular)
+ * B is m by n
*/
-#define L_SUB(L,X,B,n,m) { \
- int i,j,k; \
- for (i=0; i<m; i++) { \
- for (j=0; j<n; j++) { \
- float sum=0.0; \
- for (k=0; k<j; k++) { sum += L[j][k]*X[k][i]; } \
- X[j][i] = (B[j][i] - sum) / L[j][j]; \
- } \
- } \
-}
+#define L_SUB(X, L, B, m, n) \
+ { \
+ for (int _i = 0; _i < (n); _i++) { \
+ for (int _j = 0; _j < (m); _j++) { \
+ float sum = 0.0; \
+ for (int _k = 0; _k < _j; _k++) { \
+ sum += _(L, _j, _k) * _(X, _k, _i); \
+ } \
+ _(X, _j, _i) = (_(B, _j, _i) - sum) / _(L, _j, _j); \
+ } \
+ } \
+ }
/*
* Solve UX=B for matrix X and B
- * U is n by n (upper triangular)
- * B is n by m
+ * U is m by m (upper triangular)
+ * B is m by n
*/
-#define U_SUB(U,X,B,n,m) { \
- int i,j,k; \
- for (i=0; i<m; i++) { \
- for (j=n-1; j>=0; j--) { \
- float sum=0.0; \
- for (k=n-1; k>j; k--) { sum += U[j][k]*X[k][i]; } \
- X[j][i] = (B[j][i] - sum) / U[j][j]; \
- } \
- } \
-}
+#define U_SUB(X, U, B, m, n) \
+ { \
+ for (int _i = 0; _i < (n); _i++) { \
+ for (int _j = m - 1; _j >= 0; _j--) { \
+ float sum = 0.0; \
+ for (int _k = n - 1; _k > _j; _k--) { \
+ sum += _(U, _j, _k) * _(X, _k, _i); \
+ } \
+ _(X, _j, _i) = (_(B, _j, _i) - sum) / _(U, _j, _j); \
+ } \
+ } \
+ }
/*
* Inverts a matrix X (n by n) using the method of LU decomposition
*/
-#define INV(A,Ainv,n) { \
- FLOAT Ipiv[ORDER][ORDER]; \
- FLOAT L[ORDER][ORDER]; \
- FLOAT U[ORDER][ORDER]; \
- FLOAT I[ORDER][ORDER]; \
- FLOAT C[ORDER][ORDER]; \
- int Piv[ORDER]; \
- IDENTITY(I,n); \
- LU(A,L,U,Piv,n); \
- PIVOT(I,Ipiv,Piv,n,n); \
- L_SUB(L,C,Ipiv,n,n); \
- U_SUB(U,Ainv,C,n,n); \
-}
+
+#ifdef DYNAMIC_INDEX
+#define INV_SETUP(ORDER) \
+ FLOAT Ipiv[ORDER * ORDER]; \
+ const int Ipivc = ORDER; \
+ FLOAT L[ORDER * ORDER]; \
+ const int Lc = ORDER; \
+ FLOAT U[ORDER * ORDER]; \
+ const int Uc = ORDER; \
+ FLOAT I[ORDER * ORDER]; \
+ const int Ic = ORDER; \
+ FLOAT C[ORDER * ORDER]; \
+ const int Cc = ORDER;
+#else
+#define INV_SETUP(ORDER) \
+ FLOAT Ipiv[ORDER][ORDER]; \
+ FLOAT L[ORDER][ORDER]; \
+ FLOAT U[ORDER][ORDER]; \
+ FLOAT I[ORDER][ORDER]; \
+ FLOAT C[ORDER][ORDER];
+#endif
+
+#define INV(Ainv, A, n, ORDER) \
+ { \
+ INV_SETUP(ORDER) \
+ int Piv[ORDER]; \
+ IDENTITY(I, n, n); \
+ LU(L, U, A, Piv, n); \
+ PIVOT(Ipiv, I, Piv, n, n); \
+ L_SUB(C, L, Ipiv, n, n); \
+ U_SUB(Ainv, U, C, n, n); \
+ }
/*
PRINT(A,n,n); \
@@ -165,61 +222,61 @@ PRINT(Ainv,n,n); \
*/
/*
- * Matrix Multiply C = A * B
- * A (n by m)
+ * Matrix Multiply R = A * B
+ * R (n by p)
+ * A (m by n)
* B (m by p)
- * C (n by p)
*/
-#define MUL(A,B,C,n,m,p) { \
- int i,j,k; \
- for (i=0; i<n; i++) { \
- for (j=0; j<p; j++) { \
- C[i][j] = 0.0f; \
- for (k=0; k<m; k++) { \
- C[i][j] += A[i][k] * B[k][j]; \
- } \
- } \
- } \
-}
+#define MUL(R, A, B, m, n, p) \
+ { \
+ for (int _i = 0; _i < (m); _i++) { \
+ for (int _j = 0; _j < p; _j++) { \
+ _(R, _i, _j) = 0.0f; \
+ for (int _k = 0; _k < (n); _k++) { \
+ _(R, _i, _j) += _(A, _i, _k) * _(B, _k, _j); \
+ } \
+ } \
+ } \
+ }
/*
- * Matrix Multiply D = A * B + C
- * A (n by m)
- * B (m by p)
- * C (n by p)
- * D (n by p)
+ * Matrix Multiply R = A * B + C
+ * R (m by p)
+ * A (m by n)
+ * B (n by p)
+ * C (m by p)
*/
-#define MULADD(A,B,C,D,n,m,p) { \
- int i,j,k; \
- for (i=0; i<n; i++) { \
- for (j=0; j<p; j++) { \
- D[i][j] = C[i][j]; \
- for (k=0; k<m; k++) { \
- D[i][j] += A[i][k] * B[k][j]; \
- } \
- } \
- } \
-}
+#define MULADD(R, A, B, C, m, n, p) \
+ { \
+ for (int _i = 0; _i < (m); _i++) { \
+ for (int _j = 0; _j < p; _j++) { \
+ _(R, _i, _j) = _(C, _i, _j); \
+ for (int _k = 0; _k < (n); _k++) { \
+ _(R, _i, _j) += _(A, _i, _k) * _(B, _k, _j); \
+ } \
+ } \
+ } \
+ }
/*
- * Matrix Multiply D = alpha * A * B + beta * C
- * A (n by m)
- * B (m by p)
- * C (n by p)
- * D (n by p)
+ * Matrix Multiply R = alpha * A * B + beta * C
+ * R (m by p)
+ * A (m by n)
+ * B (n by p)
+ * C (m by p)
*/
-#define GMULADD(A,B,C,D,alpha,beta,n,m,p) { \
- int i,j,k; \
- float sum; \
- for (i=0; i<n; i++) { \
- for (j=0; j<p; j++) { \
- sum = 0.0f; \
- for (k=0; k<m; k++) { \
- sum += A[i][k] * B[k][j]; \
- } \
- D[i][j] = alpha * sum + beta * C[i][j]; \
- } \
- } \
-}
+#define GMULADD(R, A, B, C, alpha, beta, m, n, p) \
+ { \
+ float sum; \
+ for (int _i = 0; _i < m; _i++) { \
+ for (int _j = 0; _j < p; _j++) { \
+ sum = 0.0f; \
+ for (int _k = 0; _k < n; _k++) { \
+ sum += _(A, _i, _k) * _(B, _k, _j); \
+ } \
+ _(R, _i, _j) = alpha * sum + beta * _(C, _i, _j); \
+ } \
+ } \
+ }
#endif
diff --git a/redist/dclhelpers.c b/redist/dclhelpers.c
new file mode 100644
index 0000000..9ed7c5f
--- /dev/null
+++ b/redist/dclhelpers.c
@@ -0,0 +1,80 @@
+#include "dclhelpers.h"
+#define FLOAT DCL_FLOAT
+#define DYNAMIC_INDEX
+#include "dclapack.h"
+#include <alloca.h>
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+void dclPrint(const DCL_FLOAT *PMATRIX, int PMATRIXc, int n, int m) { PRINT(PMATRIX, n, m); }
+
+void dclIdentity(DCL_FLOAT *I, int Ic, int m, int n) { IDENTITY(I, m, n); }
+
+/* Returns the zero matrix */
+void dclZero(DCL_FLOAT *Z, int Zc, int m, int n) { memset(Z, 0, m * n * sizeof(DCL_FLOAT)); }
+
+void dclTransp(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, int n, int m) { TRANSP(R, A, n, m); }
+
+void dclLU(DCL_FLOAT *L, int Lc, DCL_FLOAT *U, int Uc, const DCL_FLOAT *A, int Ac, int *Piv, int n) {
+ LU(L, U, A, Piv, n);
+}
+
+void dclPivot(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, int *Piv, int n, int m) { PIVOT(R, A, Piv, n, m); }
+
+void dclLSub(DCL_FLOAT *X, int Xc, const DCL_FLOAT *L, int Lc, const DCL_FLOAT *B, int Bc, int n, int m) {
+ L_SUB(X, L, B, n, m);
+}
+
+void dclUSub(DCL_FLOAT *X, int Xc, const DCL_FLOAT *U, int Uc, const DCL_FLOAT *B, int Bc, int n, int m) {
+ U_SUB(X, U, B, n, m);
+}
+
+void dclInv(DCL_FLOAT *Ainv, int Ainvc, const DCL_FLOAT *A, int Ac, int n) { INV(Ainv, A, n, n); }
+
+void dclMul(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, int n, int m, int p) {
+ MUL(R, A, B, n, m, p);
+}
+
+void dclMulAdd(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, const DCL_FLOAT *C, int Cc,
+ int n, int m, int p) {
+ MULADD(R, A, B, C, n, m, p);
+}
+
+void dclGMulAdd(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, const DCL_FLOAT *C,
+ int Cc, DCL_FLOAT alpha, DCL_FLOAT beta, int n, int m, int p) {
+ GMULADD(R, A, B, C, alpha, beta, n, m, p);
+}
+
+/* dclGMulAdd( R, ((transA)?TRANS(A):A, (transB)?TRANS(B):B), C, alpha, beta, n, m, p ); */
+void dcldgemm(char transA, char transB, int m, int n, int k, DCL_FLOAT alpha, const DCL_FLOAT *A,
+ int Ac, // must be n
+ const DCL_FLOAT *B,
+ int Bc, // must be m
+ DCL_FLOAT beta, DCL_FLOAT *C,
+ int Cc // must be n
+ ) {
+ const DCL_FLOAT *ta;
+ const DCL_FLOAT *tb;
+ int tac = Ac;
+ int tbc = Bc;
+ if (transA) {
+ DCL_FLOAT *la = alloca(sizeof(DCL_FLOAT) * n * m);
+ const int lac = m;
+ TRANSP(la, A, n, m);
+ ta = la;
+ tac = lac;
+ } else
+ ta = A;
+
+ if (transB) {
+ DCL_FLOAT *lb = alloca(sizeof(DCL_FLOAT) * n * m);
+ const int lbc = m;
+ TRANSP(lb, B, n, m);
+ tb = lb;
+ tbc = lbc;
+ } else
+ tb = B;
+
+ GMULADD(C, ta, tb, C, alpha, beta, m, n, k);
+}
diff --git a/redist/dclhelpers.h b/redist/dclhelpers.h
new file mode 100644
index 0000000..00acdac
--- /dev/null
+++ b/redist/dclhelpers.h
@@ -0,0 +1,74 @@
+#ifndef _DCL_HELPERS_H
+#define _DCL_HELPERS_H
+
+#define DCL_FLOAT FLT
+
+// Use this macro to safely
+#define DMS(m) ((m)[0]), (sizeof((m)[0]) / sizeof((m)[0][0]))
+
+/* Prints matrix A of size[n][m] */
+void dclPrint(const DCL_FLOAT *A, int Ac, int n, int m);
+
+/* Returns the identity matrix */
+void dclIdentity(DCL_FLOAT *I, int Ic, int m, int n);
+
+/* Returns the zero matrix */
+void dclZero(DCL_FLOAT *I, int Ic, int m, int n);
+
+/* R = Transpose(A)
+ A is (n by m)
+ R is (m by n) */
+void dclTransp(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, int n, int m);
+
+/* Calculate L,U of a matrix A with pivot table; the pivot table is output. */
+void dclLU(DCL_FLOAT *L, int Lc, DCL_FLOAT *U, int Uc, const DCL_FLOAT *A, int Ac, int *Piv, int n);
+
+/* Pivots a matrix to a different matrix
+ R = Pivot(A) given table 'Piv'
+ A and R are (n by m) */
+void dclPivot(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, int *Piv, int n, int m);
+
+/* Solve LX=B for matrix X and B
+ L is n by n (lower triangular)
+ B is n by m */
+void dclLSub(DCL_FLOAT *X, int Xc, const DCL_FLOAT *L, int Lc, const DCL_FLOAT *B, int Bc, int n, int m);
+
+/* Solve UX=B for matrix X and B
+ U is n by n (upper triangular)
+ B is n by m */
+void dclUSub(DCL_FLOAT *X, int Xc, const DCL_FLOAT *U, int Uc, const DCL_FLOAT *B, int Bc, int n, int m);
+
+/* Inverts a matrix X (n by n) using the method of LU decomposition */
+void dclInv(DCL_FLOAT *Ainv, int Ainvc, const DCL_FLOAT *A, int Ac, int n);
+
+/* Matrix Multiply R = A * B
+ A (n by m)
+ B (m by p)
+ R (n by p) */
+void dclMul(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, int n, int m, int p);
+
+/* Matrix Multiply R = A * B + C
+ A (n by m)
+ B (m by p)
+ C (n by p)
+ R (n by p) */
+void dclMulAdd(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, const DCL_FLOAT *C, int Cc,
+ int n, int m, int p);
+
+/* Matrix Multiply R = alpha * A * B + beta * C
+ A (n by m)
+ B (m by p)
+ C (n by p)
+ R (n by p) */
+void dclGMulAdd(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, const DCL_FLOAT *C,
+ int Cc, DCL_FLOAT alpha, DCL_FLOAT beta, int n, int m, int p);
+
+/********************************
+ * Auxiliary functionality in C *
+ ********************************/
+
+// Matches dgemm from lapack.
+void dcldgemm(char transA, char transB, int m, int n, int k, DCL_FLOAT alpha, const DCL_FLOAT *A, int Ac,
+ const DCL_FLOAT *B, int Bc, DCL_FLOAT beta, DCL_FLOAT *C, int Cc);
+
+#endif
diff --git a/redist/minimal_opencv.c b/redist/minimal_opencv.c
index e8a6214..50eb7df 100644
--- a/redist/minimal_opencv.c
+++ b/redist/minimal_opencv.c
@@ -44,11 +44,8 @@ void cvGEMM(const CvMat *src1, const CvMat *src2, double alpha, const CvMat *src
beta = 0;
cblas_dgemm(CblasRowMajor, (tABC & GEMM_1_T) ? CblasTrans : CblasNoTrans,
- (tABC & GEMM_2_T) ? CblasTrans : CblasNoTrans, src1->rows, dst->cols, src1->cols, alpha,
-
- src1->data.db, lda, src2->data.db, ldb, beta,
-
- dst->data.db, dst->cols);
+ (tABC & GEMM_2_T) ? CblasTrans : CblasNoTrans, src1->rows, dst->cols, src1->cols, alpha, src1->data.db,
+ lda, src2->data.db, ldb, beta, dst->data.db, dst->cols);
}
void cvMulTransposed(const CvMat *src, CvMat *dst, int order, const CvMat *delta, double scale) {
@@ -67,11 +64,7 @@ void cvMulTransposed(const CvMat *src, CvMat *dst, int order, const CvMat *delta
lapack_int dstCols = dst->cols;
cblas_dgemm(CblasRowMajor, isAT ? CblasTrans : CblasNoTrans, isBT ? CblasTrans : CblasNoTrans, cols, dstCols, rows,
- scale,
-
- src->data.db, cols, src->data.db, cols, beta,
-
- dst->data.db, dstCols);
+ scale, src->data.db, cols, src->data.db, cols, beta, dst->data.db, dstCols);
}
void *cvAlloc(size_t size) { return malloc(size); }
diff --git a/redist/test_dcl.c b/redist/test_dcl.c
new file mode 100644
index 0000000..ded2863
--- /dev/null
+++ b/redist/test_dcl.c
@@ -0,0 +1,155 @@
+// gcc -msse2 -O3 -ftree-vectorize test_dcl.c dclhelpers.c os_generic.c -DFLT=double -lpthread -lcblas && valgrind
+// ./a.out
+
+#include "dclhelpers.h"
+#include "os_generic.h"
+#include <assert.h>
+#include <cblas.h>
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "minimal_opencv.h"
+
+void fill_random(FLT *A, int ld, int m, int n) {
+ assert(ld == n);
+ for (int y = 0; y < m; y++)
+ for (int x = 0; x < n; x++)
+ A[y * ld + x] = (rand()) / (double)RAND_MAX;
+}
+
+void test_dcldgemm_speed(const char *name, char transA, char transB, int m, int n, int k, DCL_FLOAT alpha,
+ const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, DCL_FLOAT beta) {
+ printf("%s speed test:\n", name);
+ double times[2];
+ FLT emo[2][m][n];
+
+ for (int z = 0; z < 2; z++) {
+ double start = OGGetAbsoluteTime();
+ for (int i = 0; i < 100000; i++) {
+ dclZero(DMS(emo[z]), m, n);
+
+ if (z == 0) {
+ dcldgemm(transA, transB, m, n, k, alpha, A, Ac, B, Bc, beta, DMS(emo[z]));
+ } else {
+
+ cblas_dgemm(CblasRowMajor, transA == 1 ? CblasTrans : CblasNoTrans,
+ transB == 1 ? CblasTrans : CblasNoTrans, m, n, k, alpha, A, Ac, B, Bc, beta, emo[z][0], n);
+ }
+ }
+
+ times[z] = OGGetAbsoluteTime() - start;
+ }
+
+ dclPrint(DMS(emo[0]), m, n);
+ dclPrint(DMS(emo[1]), m, n);
+ printf("dcl Elapsed: %f\n", times[0]);
+ printf("cblas Elapsed: %f\n", times[1]);
+ printf("%fx difference\n", times[0] / times[1]);
+
+ for (int i = 0; i < m; i++) {
+ for (int j = 0; j < k; j++) {
+ assert(fabs(emo[0][i][j] - emo[1][i][j]) < .00001);
+ }
+ }
+}
+
+void compareToCblas() {
+ srand(0);
+ int m = 12;
+ int n = 20;
+ int k = 20;
+
+ FLT em1[m][n];
+ FLT em2[n][k];
+
+ fill_random(DMS(em1), m, n);
+ fill_random(DMS(em2), n, k);
+
+ dclPrint(DMS(em1), m, n);
+ dclPrint(DMS(em2), n, k);
+
+ test_dcldgemm_speed("Simple", 0, 0, m, n, k, 1.0, DMS(em1), DMS(em2), .1);
+}
+
+void compareToCblasTrans() {
+ srand(0);
+ int m = 12;
+ int n = 20;
+ int k = n;
+
+ FLT em1[m][n];
+
+ fill_random(DMS(em1), m, n);
+
+ dclPrint(DMS(em1), m, n);
+
+ CvMat Em1 = cvMat(m, n, CV_64F, em1);
+ FLT em1tem1[n][n];
+ CvMat Em1tEm1 = cvMat(n, n, CV_64F, em1tem1);
+ cvMulTransposed(&Em1, &Em1tEm1, 1, 0, 1);
+ print_mat(&Em1tEm1);
+
+ test_dcldgemm_speed("Trans", 1, 0,
+ n, // # of rows in OP(A) == em1' -- 20
+ n, // # of cols in OP(B) == em1 -- 20
+ m, // # of cols in OP(A) == em1' -- 12
+ 1.0,
+ DMS(em1), // Note that LD stays the same
+ DMS(em1), 0);
+}
+
+int main() {
+ FLT A[2][4] = {{0, 1, 2, 3}, {4, 5, 6, 7}};
+ FLT B[4][2];
+ dclPrint(A[0], 4, 2, 4);
+ dclTransp(B[0], 2, A[0], 4, 2, 4);
+ dclPrint(B[0], 2, 4, 2);
+
+ int i, j;
+ for (i = 0; i < 8; i++) {
+ printf("%f\n", ((float *)(B[0]))[i]);
+ }
+
+ FLT M[3][3] = {{.32, 1, 0}, {0, 1, 2}, {1, 0, 1}};
+ FLT Mo[3][3];
+ dclInv(Mo[0], 3, M[0], 3, 3);
+ dclPrint(Mo[0], 3, 3, 3);
+
+ FLT MM[3][3];
+ dclMul(MM[0], 3, M[0], 3, Mo[0], 3, 3, 3, 3);
+
+ printf("The following should be an identity matrix\n");
+ dclPrint(MM[0], 3, 3, 3);
+
+ {
+ FLT A[3][4];
+ dclIdentity(DMS(A), 3, 4);
+ dclPrint(DMS(A), 3, 4);
+
+ FLT x[4][2] = {
+ {7, -7}, {8, -8}, {9, -9}, {10, -10},
+ };
+ FLT R[4][2];
+ dclZero(DMS(R), 4, 2);
+
+ // dclMul(R, 1, A[0], 4, x, 1, 4, 1, 3);
+ dcldgemm(0, 0, 3, 4, 2, 1, A[0], 4, x[0], 2, 0, R[0], 2);
+
+ dclPrint(DMS(x), 4, 2);
+ dclPrint(DMS(R), 3, 2);
+
+ for (int j = 0; j < 2; j++) {
+ for (int i = 0; i < 3; i++) {
+ printf("[%d][%d]\n", i, j);
+ assert(R[i][j] == x[i][j]);
+ }
+
+ assert(fabs(R[3][j]) < .0000001);
+ }
+ }
+
+ compareToCblas();
+ compareToCblasTrans();
+}