diff options
Diffstat (limited to 'redist')
-rw-r--r-- | redist/dclapack.h | 61 | ||||
-rw-r--r-- | redist/dclhelpers.c | 109 | ||||
-rw-r--r-- | redist/dclhelpers.h | 91 | ||||
-rw-r--r-- | redist/json_helpers.c | 2 | ||||
-rw-r--r-- | redist/minimal_opencv.c | 2 | ||||
-rw-r--r-- | redist/sba/compiler.h | 42 | ||||
-rw-r--r-- | redist/sba/sba.h | 145 | ||||
-rw-r--r-- | redist/sba/sba_chkjac.c | 468 | ||||
-rw-r--r-- | redist/sba/sba_chkjac.h | 69 | ||||
-rw-r--r-- | redist/sba/sba_crsm.c | 338 | ||||
-rw-r--r-- | redist/sba/sba_lapack.c | 1730 | ||||
-rw-r--r-- | redist/sba/sba_levmar.c | 2715 | ||||
-rw-r--r-- | redist/sba/sba_levmar_wrap.c | 917 | ||||
-rw-r--r-- | redist/test_dcl.c | 44 |
14 files changed, 6550 insertions, 183 deletions
diff --git a/redist/dclapack.h b/redist/dclapack.h index 7f30187..0950815 100644 --- a/redist/dclapack.h +++ b/redist/dclapack.h @@ -9,14 +9,14 @@ #define _ABS(a) ( (a)<=0 ? 0-(a) : (a) ) -//Tricky: If you want to use this with pointers, instead of 2D arrays, you will -//need to #define DYNAMIC_INDEX, as well as, for all arrays, suffix their name -//with 'c' +// Tricky: If you want to use this with pointers, instead of 2D arrays, you will +// need to #define DYNAMIC_INDEX, as well as, for all arrays, suffix their name +// with 'c' #ifdef DYNAMIC_INDEX - #define _(AM,O,P) AM[O*AM##c+P] +#define _(AM, O, P) AM[O * AM##c + P] #else - #define _(AM,O,P) AM[O][P] +#define _(AM, O, P) AM[O][P] #endif /* @@ -50,7 +50,6 @@ } \ } - /* * Returns the identity matrix (with size n x n, but width Ic) */ @@ -90,7 +89,7 @@ _(U, _i, _j) = _(A, _i, _j); \ } \ } \ - IDENTITY(L, n, n); \ + IDENTITY(L, n, n); \ \ for (_i = 0; _i < (n)-1; _i++) { \ \ @@ -181,30 +180,36 @@ */ #ifdef DYNAMIC_INDEX - #define INV_SETUP(ORDER) \ - FLOAT Ipiv[ORDER*ORDER]; const int Ipivc = ORDER; \ - FLOAT L[ORDER*ORDER]; const int Lc = ORDER; \ - FLOAT U[ORDER*ORDER]; const int Uc = ORDER; \ - FLOAT I[ORDER*ORDER]; const int Ic = ORDER; \ - FLOAT C[ORDER*ORDER]; const int Cc = ORDER; +#define INV_SETUP(ORDER) \ + FLOAT Ipiv[ORDER * ORDER]; \ + const int Ipivc = ORDER; \ + FLOAT L[ORDER * ORDER]; \ + const int Lc = ORDER; \ + FLOAT U[ORDER * ORDER]; \ + const int Uc = ORDER; \ + FLOAT I[ORDER * ORDER]; \ + const int Ic = ORDER; \ + FLOAT C[ORDER * ORDER]; \ + const int Cc = ORDER; #else - #define INV_SETUP(ORDER) \ - FLOAT Ipiv[ORDER][ORDER]; \ - FLOAT L[ORDER][ORDER]; \ - FLOAT U[ORDER][ORDER]; \ - FLOAT I[ORDER][ORDER]; \ - FLOAT C[ORDER][ORDER]; +#define INV_SETUP(ORDER) \ + FLOAT Ipiv[ORDER][ORDER]; \ + FLOAT L[ORDER][ORDER]; \ + FLOAT U[ORDER][ORDER]; \ + FLOAT I[ORDER][ORDER]; \ + FLOAT C[ORDER][ORDER]; #endif -#define INV(Ainv,A,n,ORDER) { \ - INV_SETUP(ORDER) \ - int Piv[ORDER]; \ - IDENTITY(I,n,n); \ - LU(L,U,A,Piv,n); \ - PIVOT(Ipiv,I,Piv,n,n); \ - L_SUB(C,L,Ipiv,n,n); \ - U_SUB(Ainv,U,C,n,n); \ -} +#define INV(Ainv, A, n, ORDER) \ + { \ + INV_SETUP(ORDER) \ + int Piv[ORDER]; \ + IDENTITY(I, n, n); \ + LU(L, U, A, Piv, n); \ + PIVOT(Ipiv, I, Piv, n, n); \ + L_SUB(C, L, Ipiv, n, n); \ + U_SUB(Ainv, U, C, n, n); \ + } /* PRINT(A,n,n); \ diff --git a/redist/dclhelpers.c b/redist/dclhelpers.c index 5b956b0..9ed7c5f 100644 --- a/redist/dclhelpers.c +++ b/redist/dclhelpers.c @@ -7,111 +7,74 @@ #include <stdio.h> #include <string.h> -void dclPrint( const DCL_FLOAT * PMATRIX, int PMATRIXc, int n, int m ) -{ - PRINT( PMATRIX, n, m ); -} - -void dclIdentity( DCL_FLOAT * I, int Ic, int m, int n ) -{ - IDENTITY( I, m, n ); -} +void dclPrint(const DCL_FLOAT *PMATRIX, int PMATRIXc, int n, int m) { PRINT(PMATRIX, n, m); } +void dclIdentity(DCL_FLOAT *I, int Ic, int m, int n) { IDENTITY(I, m, n); } /* Returns the zero matrix */ -void dclZero( DCL_FLOAT * Z, int Zc, int m, int n ) -{ - memset( Z, 0, m*n*sizeof(DCL_FLOAT) ); -} +void dclZero(DCL_FLOAT *Z, int Zc, int m, int n) { memset(Z, 0, m * n * sizeof(DCL_FLOAT)); } -void dclTransp( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, int n, int m ) -{ - TRANSP(R,A,n,m); -} +void dclTransp(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, int n, int m) { TRANSP(R, A, n, m); } -void dclLU( DCL_FLOAT * L, int Lc, DCL_FLOAT * U, int Uc, const DCL_FLOAT * A, int Ac, int * Piv, int n ) -{ - LU(L,U,A,Piv,n); +void dclLU(DCL_FLOAT *L, int Lc, DCL_FLOAT *U, int Uc, const DCL_FLOAT *A, int Ac, int *Piv, int n) { + LU(L, U, A, Piv, n); } -void dclPivot( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, int * Piv, int n, int m ) -{ - PIVOT(R,A,Piv,n,m); -} +void dclPivot(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, int *Piv, int n, int m) { PIVOT(R, A, Piv, n, m); } -void dclLSub( DCL_FLOAT * X, int Xc, const DCL_FLOAT * L, int Lc, const DCL_FLOAT * B, int Bc, int n, int m ) -{ - L_SUB(X,L,B,n,m); +void dclLSub(DCL_FLOAT *X, int Xc, const DCL_FLOAT *L, int Lc, const DCL_FLOAT *B, int Bc, int n, int m) { + L_SUB(X, L, B, n, m); } -void dclUSub( DCL_FLOAT * X, int Xc, const DCL_FLOAT * U, int Uc, const DCL_FLOAT * B, int Bc, int n, int m ) -{ - U_SUB(X,U,B,n,m); +void dclUSub(DCL_FLOAT *X, int Xc, const DCL_FLOAT *U, int Uc, const DCL_FLOAT *B, int Bc, int n, int m) { + U_SUB(X, U, B, n, m); } -void dclInv( DCL_FLOAT * Ainv, int Ainvc, const DCL_FLOAT * A, int Ac, int n ) -{ - INV(Ainv,A,n,n); -} +void dclInv(DCL_FLOAT *Ainv, int Ainvc, const DCL_FLOAT *A, int Ac, int n) { INV(Ainv, A, n, n); } -void dclMul( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, const DCL_FLOAT * B, int Bc, int n, int m, int p ) -{ - MUL(R,A,B,n,m,p); +void dclMul(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, int n, int m, int p) { + MUL(R, A, B, n, m, p); } -void dclMulAdd( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, const DCL_FLOAT * B, int Bc, const DCL_FLOAT * C, int Cc, int n, int m, int p ) -{ - MULADD(R,A,B,C,n,m,p); +void dclMulAdd(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, const DCL_FLOAT *C, int Cc, + int n, int m, int p) { + MULADD(R, A, B, C, n, m, p); } -void dclGMulAdd( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, const DCL_FLOAT * B, int Bc, const DCL_FLOAT * C, int Cc, DCL_FLOAT alpha, DCL_FLOAT beta, int n, int m, int p ) -{ - GMULADD(R,A,B,C,alpha,beta,n,m,p); +void dclGMulAdd(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, const DCL_FLOAT *C, + int Cc, DCL_FLOAT alpha, DCL_FLOAT beta, int n, int m, int p) { + GMULADD(R, A, B, C, alpha, beta, n, m, p); } /* dclGMulAdd( R, ((transA)?TRANS(A):A, (transB)?TRANS(B):B), C, alpha, beta, n, m, p ); */ -void dcldgemm( - char transA, - char transB, - int m, - int n, - int k, - DCL_FLOAT alpha, - const DCL_FLOAT* A, - int Ac, //must be n - const DCL_FLOAT* B, - int Bc, //must be m - DCL_FLOAT beta, - DCL_FLOAT * C, - int Cc //must be n - ) -{ +void dcldgemm(char transA, char transB, int m, int n, int k, DCL_FLOAT alpha, const DCL_FLOAT *A, + int Ac, // must be n + const DCL_FLOAT *B, + int Bc, // must be m + DCL_FLOAT beta, DCL_FLOAT *C, + int Cc // must be n + ) { const DCL_FLOAT *ta; - const DCL_FLOAT * tb; + const DCL_FLOAT *tb; int tac = Ac; int tbc = Bc; - if( transA ) - { - DCL_FLOAT * la = alloca( sizeof( DCL_FLOAT ) * n * m ); + if (transA) { + DCL_FLOAT *la = alloca(sizeof(DCL_FLOAT) * n * m); const int lac = m; - TRANSP( la, A, n, m ); + TRANSP(la, A, n, m); ta = la; tac = lac; - } - else + } else ta = A; - if( transB ) - { - DCL_FLOAT * lb = alloca( sizeof( DCL_FLOAT ) * n * m ); + if (transB) { + DCL_FLOAT *lb = alloca(sizeof(DCL_FLOAT) * n * m); const int lbc = m; - TRANSP( lb, B, n, m ); + TRANSP(lb, B, n, m); tb = lb; tbc = lbc; - } - else + } else tb = B; GMULADD(C, ta, tb, C, alpha, beta, m, n, k); } - diff --git a/redist/dclhelpers.h b/redist/dclhelpers.h index b4acec8..00acdac 100644 --- a/redist/dclhelpers.h +++ b/redist/dclhelpers.h @@ -3,87 +3,72 @@ #define DCL_FLOAT FLT -//Use this macro to safely -#define DMS( m ) ((m)[0]), (sizeof((m)[0])/sizeof((m)[0][0])) +// Use this macro to safely +#define DMS(m) ((m)[0]), (sizeof((m)[0]) / sizeof((m)[0][0])) /* Prints matrix A of size[n][m] */ -void dclPrint( const DCL_FLOAT * A, int Ac, int n, int m ); +void dclPrint(const DCL_FLOAT *A, int Ac, int n, int m); /* Returns the identity matrix */ -void dclIdentity( DCL_FLOAT * I, int Ic, int m, int n ); +void dclIdentity(DCL_FLOAT *I, int Ic, int m, int n); /* Returns the zero matrix */ -void dclZero( DCL_FLOAT * I, int Ic, int m, int n ); +void dclZero(DCL_FLOAT *I, int Ic, int m, int n); /* R = Transpose(A) - A is (n by m) - R is (m by n) */ -void dclTransp( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, int n, int m ); + A is (n by m) + R is (m by n) */ +void dclTransp(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, int n, int m); /* Calculate L,U of a matrix A with pivot table; the pivot table is output. */ -void dclLU( DCL_FLOAT * L, int Lc, DCL_FLOAT * U, int Uc, const DCL_FLOAT * A, int Ac, int * Piv, int n ); +void dclLU(DCL_FLOAT *L, int Lc, DCL_FLOAT *U, int Uc, const DCL_FLOAT *A, int Ac, int *Piv, int n); /* Pivots a matrix to a different matrix - R = Pivot(A) given table 'Piv' - A and R are (n by m) */ -void dclPivot( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, int * Piv, int n, int m ); + R = Pivot(A) given table 'Piv' + A and R are (n by m) */ +void dclPivot(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, int *Piv, int n, int m); /* Solve LX=B for matrix X and B - L is n by n (lower triangular) - B is n by m */ -void dclLSub( DCL_FLOAT * X, int Xc, const DCL_FLOAT * L, int Lc, const DCL_FLOAT * B, int Bc, int n, int m ); + L is n by n (lower triangular) + B is n by m */ +void dclLSub(DCL_FLOAT *X, int Xc, const DCL_FLOAT *L, int Lc, const DCL_FLOAT *B, int Bc, int n, int m); /* Solve UX=B for matrix X and B - U is n by n (upper triangular) - B is n by m */ -void dclUSub( DCL_FLOAT * X, int Xc, const DCL_FLOAT * U, int Uc, const DCL_FLOAT * B, int Bc, int n, int m ); + U is n by n (upper triangular) + B is n by m */ +void dclUSub(DCL_FLOAT *X, int Xc, const DCL_FLOAT *U, int Uc, const DCL_FLOAT *B, int Bc, int n, int m); /* Inverts a matrix X (n by n) using the method of LU decomposition */ -void dclInv( DCL_FLOAT * Ainv, int Ainvc, const DCL_FLOAT * A, int Ac, int n ); +void dclInv(DCL_FLOAT *Ainv, int Ainvc, const DCL_FLOAT *A, int Ac, int n); /* Matrix Multiply R = A * B - A (n by m) - B (m by p) - R (n by p) */ -void dclMul( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, const DCL_FLOAT * B, int Bc, int n, int m, int p ); + A (n by m) + B (m by p) + R (n by p) */ +void dclMul(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, int n, int m, int p); /* Matrix Multiply R = A * B + C - A (n by m) - B (m by p) - C (n by p) - R (n by p) */ -void dclMulAdd( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, const DCL_FLOAT * B, int Bc, const DCL_FLOAT * C, int Cc, int n, int m, int p ); + A (n by m) + B (m by p) + C (n by p) + R (n by p) */ +void dclMulAdd(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, const DCL_FLOAT *C, int Cc, + int n, int m, int p); /* Matrix Multiply R = alpha * A * B + beta * C - A (n by m) - B (m by p) - C (n by p) - R (n by p) */ -void dclGMulAdd( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, const DCL_FLOAT * B, int Bc, const DCL_FLOAT * C, int Cc, DCL_FLOAT alpha, DCL_FLOAT beta, int n, int m, int p ); - + A (n by m) + B (m by p) + C (n by p) + R (n by p) */ +void dclGMulAdd(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, const DCL_FLOAT *C, + int Cc, DCL_FLOAT alpha, DCL_FLOAT beta, int n, int m, int p); /******************************** * Auxiliary functionality in C * ********************************/ -//Matches dgemm from lapack. -void dcldgemm( - char transA, - char transB, - int m, - int n, - int k, - DCL_FLOAT alpha, - const DCL_FLOAT* A, - int Ac, - const DCL_FLOAT* B, - int Bc, - DCL_FLOAT beta, - DCL_FLOAT * C, - int Cc - ); - - +// Matches dgemm from lapack. +void dcldgemm(char transA, char transB, int m, int n, int k, DCL_FLOAT alpha, const DCL_FLOAT *A, int Ac, + const DCL_FLOAT *B, int Bc, DCL_FLOAT beta, DCL_FLOAT *C, int Cc); #endif - diff --git a/redist/json_helpers.c b/redist/json_helpers.c index 8704a93..ebacd2c 100644 --- a/redist/json_helpers.c +++ b/redist/json_helpers.c @@ -188,7 +188,6 @@ void json_load_file(const char* path) { if (value_t->type == JSMN_ARRAY) { i += json_load_array(JSON_STRING, tokens+i+2,value_t->size, tag); //look at array children } else if (value_t->type == JSMN_OBJECT) { - printf("Begin Object\n"); if (json_begin_object != NULL) json_begin_object(tag); children = (int16_t)(value_t->size +1); //+1 to account for this loop where we are not yed parsing children // i += decode_jsmn_object(JSON_STRING, tokens+i+2,value_t->size); @@ -200,7 +199,6 @@ void json_load_file(const char* path) { if (children>=0) children--; if (children == 0) { children = -1; - printf("End Object\n"); if (json_end_object!=NULL) json_end_object(); } diff --git a/redist/minimal_opencv.c b/redist/minimal_opencv.c index 8e71034..50eb7df 100644 --- a/redist/minimal_opencv.c +++ b/redist/minimal_opencv.c @@ -168,10 +168,8 @@ double cvInvert(const CvMat *srcarr, CvMat *dstarr, int method) { lapack_int *ipiv = malloc(sizeof(lapack_int) * MIN(srcarr->rows, srcarr->cols)); inf = LAPACKE_dgetrf(LAPACK_ROW_MAJOR, rows, cols, a, lda, ipiv); assert(inf == 0); - print_mat(dstarr); inf = LAPACKE_dgetri(LAPACK_ROW_MAJOR, rows, a, lda, ipiv); - print_mat(dstarr); assert(inf >= 0); if (inf > 0) { printf("Warning: Singular matrix: \n"); diff --git a/redist/sba/compiler.h b/redist/sba/compiler.h new file mode 100644 index 0000000..09c01f1 --- /dev/null +++ b/redist/sba/compiler.h @@ -0,0 +1,42 @@ +///////////////////////////////////////////////////////////////////////////////// +//// +//// Compiler-specific definitions for sparse bundle adjustment based on the +//// Levenberg - Marquardt minimization algorithm +//// Copyright (C) 2004-2008 Manolis Lourakis (lourakis at ics forth gr) +//// Institute of Computer Science, Foundation for Research & Technology - Hellas +//// Heraklion, Crete, Greece. +//// +//// This program is free software; you can redistribute it and/or modify +//// it under the terms of the GNU General Public License as published by +//// the Free Software Foundation; either version 2 of the License, or +//// (at your option) any later version. +//// +//// This program is distributed in the hope that it will be useful, +//// but WITHOUT ANY WARRANTY; without even the implied warranty of +//// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//// GNU General Public License for more details. +//// +/////////////////////////////////////////////////////////////////////////////////// + +#ifndef _COMPILER_H_ +#define _COMPILER_H_ + +/* note: intel's icc defines both __ICC & __INTEL_COMPILER. + * Also, some compilers other than gcc define __GNUC__, + * therefore gcc should be checked last + */ +#ifdef _MSC_VER +#define inline __inline // MSVC +#elif !defined(__ICC) && !defined(__INTEL_COMPILER) && !defined(__GNUC__) +#define inline // other than MSVC, ICC, GCC: define empty +#endif + +#ifdef _MSC_VER +#define SBA_FINITE _finite // MSVC +#elif defined(__ICC) || defined(__INTEL_COMPILER) || defined(__GNUC__) +#define SBA_FINITE isfinite // ICC, GCC +#else +#define SBA_FINITE isfinite // other than MSVC, ICC, GCC, let's hope this will work +#endif + +#endif /* _COMPILER_H_ */ diff --git a/redist/sba/sba.h b/redist/sba/sba.h new file mode 100644 index 0000000..dd7b2b8 --- /dev/null +++ b/redist/sba/sba.h @@ -0,0 +1,145 @@ +/* +///////////////////////////////////////////////////////////////////////////////// +//// +//// Prototypes and definitions for sparse bundle adjustment +//// Copyright (C) 2004-2009 Manolis Lourakis (lourakis at ics forth gr) +//// Institute of Computer Science, Foundation for Research & Technology - Hellas +//// Heraklion, Crete, Greece. +//// +//// This program is free software; you can redistribute it and/or modify +//// it under the terms of the GNU General Public License as published by +//// the Free Software Foundation; either version 2 of the License, or +//// (at your option) any later version. +//// +//// This program is distributed in the hope that it will be useful, +//// but WITHOUT ANY WARRANTY; without even the implied warranty of +//// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//// GNU General Public License for more details. +//// +/////////////////////////////////////////////////////////////////////////////////// +*/ + +#ifndef _SBA_H_ +#define _SBA_H_ + +/**************************** Start of configuration options ****************************/ + +/* define the following if a trailing underscore should be appended to lapack functions names */ +#define SBA_APPEND_UNDERSCORE_SUFFIX // undef this for AIX + +/* define this to make linear solver routines use the minimum amount of memory + * possible. This lowers the memory requirements of large BA problems but will + * most likely induce a penalty on performance + */ +/*#define SBA_LS_SCARCE_MEMORY */ + +/* define this to save some memory by storing the weight matrices for the image + * projections (i.e. wght in the expert drivers) into the memory containing their + * covariances (i.e. covx arg). Note that this overwrites covx, making changes + * noticeable by the caller + */ +/*#define SBA_DESTROY_COVS */ + +/********* End of configuration options, no changes necessary beyond this point *********/ + +#ifdef __cplusplus +extern "C" { +#endif + +#define SBA_MIN_DELTA 1E-06 // finite differentiation minimum delta +#define SBA_DELTA_SCALE 1E-04 // finite differentiation delta scale + +#define SBA_OPTSSZ 5 +#define SBA_INFOSZ 10 +#define SBA_ERROR -1 +#define SBA_INIT_MU 1E-03 +#define SBA_STOP_THRESH 1E-12 +#define SBA_CG_NOPREC 0 +#define SBA_CG_JACOBI 1 +#define SBA_CG_SSOR 2 +#define SBA_VERSION "1.6 (Aug. 2009)" + +/* Sparse matrix representation using Compressed Row Storage (CRS) format. + * See http://www.netlib.org/linalg/html_templates/node91.html#SECTION00931100000000000000 + */ + +struct sba_crsm { + int nr, nc; /* #rows, #cols for the sparse matrix */ + int nnz; /* number of nonzero array elements */ + int *val; /* storage for nonzero array elements. size: nnz */ + int *colidx; /* column indexes of nonzero elements. size: nnz */ + int *rowptr; /* locations in val that start a row. size: nr+1. + * By convention, rowptr[nr]=nnz + */ +}; + +/* sparse LM */ + +/* simple drivers */ +extern int sba_motstr_levmar(const int n, const int ncon, const int m, const int mcon, char *vmask, double *p, + const int cnp, const int pnp, double *x, double *covx, const int mnp, + void (*proj)(int j, int i, double *aj, double *bi, double *xij, void *adata), + void (*projac)(int j, int i, double *aj, double *bi, double *Aij, double *Bij, + void *adata), + void *adata, const int itmax, const int verbose, const double opts[SBA_OPTSSZ], + double info[SBA_INFOSZ]); + +extern int sba_mot_levmar(const int n, const int m, const int mcon, char *vmask, double *p, const int cnp, double *x, + double *covx, const int mnp, void (*proj)(int j, int i, double *aj, double *xij, void *adata), + void (*projac)(int j, int i, double *aj, double *Aij, void *adata), void *adata, + const int itmax, const int verbose, const double opts[SBA_OPTSSZ], double info[SBA_INFOSZ]); + +extern int sba_str_levmar(const int n, const int ncon, const int m, char *vmask, double *p, const int pnp, double *x, + double *covx, const int mnp, void (*proj)(int j, int i, double *bi, double *xij, void *adata), + void (*projac)(int j, int i, double *bi, double *Bij, void *adata), void *adata, + const int itmax, const int verbose, const double opts[SBA_OPTSSZ], double info[SBA_INFOSZ]); + +/* expert drivers */ +extern int +sba_motstr_levmar_x(const int n, const int ncon, const int m, const int mcon, char *vmask, double *p, const int cnp, + const int pnp, double *x, double *covx, const int mnp, + void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata), + void (*fjac)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata), + void *adata, const int itmax, const int verbose, const double opts[SBA_OPTSSZ], + double info[SBA_INFOSZ]); + +extern int sba_mot_levmar_x( + const int n, const int m, const int mcon, char *vmask, double *p, const int cnp, double *x, double *covx, + const int mnp, void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata), + void (*fjac)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata), void *adata, + const int itmax, const int verbose, const double opts[SBA_OPTSSZ], double info[SBA_INFOSZ]); + +extern int sba_str_levmar_x( + const int n, const int ncon, const int m, char *vmask, double *p, const int pnp, double *x, double *covx, + const int mnp, void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata), + void (*fjac)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata), void *adata, + const int itmax, const int verbose, const double opts[SBA_OPTSSZ], double info[SBA_INFOSZ]); + +/* interfaces to LAPACK routines: solution of linear systems, matrix inversion, cholesky of inverse */ +extern int sba_Axb_QR(double *A, double *B, double *x, int m, int iscolmaj); +extern int sba_Axb_QRnoQ(double *A, double *B, double *x, int m, int iscolmaj); +extern int sba_Axb_Chol(double *A, double *B, double *x, int m, int iscolmaj); +extern int sba_Axb_LDLt(double *A, double *B, double *x, int m, int iscolmaj); +extern int sba_Axb_LU(double *A, double *B, double *x, int m, int iscolmaj); +extern int sba_Axb_SVD(double *A, double *B, double *x, int m, int iscolmaj); +extern int sba_Axb_BK(double *A, double *B, double *x, int m, int iscolmaj); +extern int sba_Axb_CG(double *A, double *B, double *x, int m, int niter, double eps, int prec, int iscolmaj); +extern int sba_symat_invert_LU(double *A, int m); +extern int sba_symat_invert_Chol(double *A, int m); +extern int sba_symat_invert_BK(double *A, int m); +extern int sba_mat_cholinv(double *A, double *B, int m); + +/* CRS sparse matrices manipulation routines */ +extern void sba_crsm_alloc(struct sba_crsm *sm, int nr, int nc, int nnz); +extern void sba_crsm_free(struct sba_crsm *sm); +extern int sba_crsm_elmidx(struct sba_crsm *sm, int i, int j); +extern int sba_crsm_elmidxp(struct sba_crsm *sm, int i, int j, int jp, int jpidx); +extern int sba_crsm_row_elmidxs(struct sba_crsm *sm, int i, int *vidxs, int *jidxs); +extern int sba_crsm_col_elmidxs(struct sba_crsm *sm, int j, int *vidxs, int *iidxs); +/* extern int sba_crsm_common_row(struct sba_crsm *sm, int j, int k); */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SBA_H_ */ diff --git a/redist/sba/sba_chkjac.c b/redist/sba/sba_chkjac.c new file mode 100644 index 0000000..7cb53ec --- /dev/null +++ b/redist/sba/sba_chkjac.c @@ -0,0 +1,468 @@ +///////////////////////////////////////////////////////////////////////////////// +//// +//// Verification routines for the jacobians employed in the expert & simple drivers +//// for sparse bundle adjustment based on the Levenberg - Marquardt minimization algorithm +//// Copyright (C) 2005-2008 Manolis Lourakis (lourakis at ics forth gr) +//// Institute of Computer Science, Foundation for Research & Technology - Hellas +//// Heraklion, Crete, Greece. +//// +//// This program is free software; you can redistribute it and/or modify +//// it under the terms of the GNU General Public License as published by +//// the Free Software Foundation; either version 2 of the License, or +//// (at your option) any later version. +//// +//// This program is distributed in the hope that it will be useful, +//// but WITHOUT ANY WARRANTY; without even the implied warranty of +//// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//// GNU General Public License for more details. +//// +/////////////////////////////////////////////////////////////////////////////////// + +#include <float.h> +#include <math.h> +#include <stdio.h> +#include <stdlib.h> + +#include "compiler.h" +#include "sba.h" + +#define emalloc(sz) emalloc_(__FILE__, __LINE__, sz) + +#define FABS(x) (((x) >= 0) ? (x) : -(x)) + +/* auxiliary memory allocation routine with error checking */ +inline static void *emalloc_(char *file, int line, size_t sz) { + void *ptr; + + ptr = (void *)malloc(sz); + if (ptr == NULL) { + fprintf(stderr, "SBA: memory allocation request for %zu bytes failed in file %s, line %d, exiting", sz, file, + line); + exit(1); + } + + return ptr; +} + +/* + * Check the jacobian of a projection function in nvars variables + * evaluated at a point p, for consistency with the function itself. + * Expert version + * + * Based on fortran77 subroutine CHKDER by + * Burton S. Garbow, Kenneth E. Hillstrom, Jorge J. More + * Argonne National Laboratory. MINPACK project. March 1980. + * + * + * func points to a function from R^{nvars} --> R^{nobs}: Given a p in R^{nvars} + * it yields hx in R^{nobs} + * jacf points to a function implementing the jacobian of func, whose consistency with + * func is to be tested. Given a p in R^{nvars}, jacf computes into the nvis*(Asz+Bsz) + * matrix jac the jacobian of func at p. Note the jacobian is sparse, consisting of + * all A_ij, B_ij and that row i of jac corresponds to the gradient of the i-th + * component of func, evaluated at p. + * p is an input array of length nvars containing the point of evaluation. + * idxij, rcidxs, rcsubs, ncon, mcon, cnp, pnp, mnp are as usual. Note that if cnp=0 or + * pnp=0 a jacobian corresponding resp. to motion or camera parameters + * only is assumed. + * func_adata, jac_adata point to possible additional data and are passed + * uninterpreted to func, jacf respectively. + * err is an array of length nobs. On output, err contains measures + * of correctness of the respective gradients. if there is + * no severe loss of significance, then if err[i] is 1.0 the + * i-th gradient is correct, while if err[i] is 0.0 the i-th + * gradient is incorrect. For values of err between 0.0 and 1.0, + * the categorization is less certain. In general, a value of + * err[i] greater than 0.5 indicates that the i-th gradient is + * probably correct, while a value of err[i] less than 0.5 + * indicates that the i-th gradient is probably incorrect. + * + * CAUTION: THIS FUNCTION IS NOT 100% FOOLPROOF. The + * following excerpt comes from CHKDER's documentation: + * + * "The function does not perform reliably if cancellation or + * rounding errors cause a severe loss of significance in the + * evaluation of a function. therefore, none of the components + * of p should be unusually small (in particular, zero) or any + * other value which may cause loss of significance." + */ + +void sba_motstr_chkjac_x(void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, + void *adata), + void (*jacf)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, + void *adata), + double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, int ncon, int mcon, int cnp, + int pnp, int mnp, void *func_adata, void *jac_adata) { + const double factor = 100.0, one = 1.0, zero = 0.0; + double *fvec, *fjac, *pp, *fvecp, *buf, *err; + + int nvars, nobs, m, n, Asz, Bsz, ABsz, nnz; + register int i, j, ii, jj; + double eps, epsf, temp, epsmch, epslog; + register double *ptr1, *ptr2, *pab; + double *pa, *pb; + int fvec_sz, pp_sz, fvecp_sz, numerr = 0; + + nobs = idxij->nnz * mnp; + n = idxij->nr; + m = idxij->nc; + nvars = m * cnp + n * pnp; + epsmch = DBL_EPSILON; + eps = sqrt(epsmch); + + Asz = mnp * cnp; + Bsz = mnp * pnp; + ABsz = Asz + Bsz; + fjac = (double *)emalloc(idxij->nnz * ABsz * sizeof(double)); + + fvec_sz = fvecp_sz = nobs; + pp_sz = nvars; + buf = (double *)emalloc((fvec_sz + pp_sz + fvecp_sz) * sizeof(double)); + fvec = buf; + pp = fvec + fvec_sz; + fvecp = pp + pp_sz; + + err = (double *)emalloc(nobs * sizeof(double)); + + /* compute fvec=func(p) */ + (*func)(p, idxij, rcidxs, rcsubs, fvec, func_adata); + + /* compute the jacobian at p */ + (*jacf)(p, idxij, rcidxs, rcsubs, fjac, jac_adata); + + /* compute pp */ + for (j = 0; j < nvars; ++j) { + temp = eps * FABS(p[j]); + if (temp == zero) + temp = eps; + pp[j] = p[j] + temp; + } + + /* compute fvecp=func(pp) */ + (*func)(pp, idxij, rcidxs, rcsubs, fvecp, func_adata); + + epsf = factor * epsmch; + epslog = log10(eps); + + for (i = 0; i < nobs; ++i) + err[i] = zero; + + pa = p; + pb = p + m * cnp; + for (i = 0; i < n; ++i) { + nnz = sba_crsm_row_elmidxs(idxij, i, rcidxs, + rcsubs); /* find nonzero A_ij, B_ij, j=0...m-1, actual column numbers in rcsubs */ + for (j = 0; j < nnz; ++j) { + ptr2 = err + idxij->val[rcidxs[j]] * mnp; // set ptr2 to point into err + + if (cnp && rcsubs[j] >= mcon) { // A_ij is nonzero + ptr1 = fjac + idxij->val[rcidxs[j]] * ABsz; // set ptr1 to point to A_ij + pab = pa + rcsubs[j] * cnp; + for (jj = 0; jj < cnp; ++jj) { + temp = FABS(pab[jj]); + if (temp == zero) + temp = one; + + for (ii = 0; ii < mnp; ++ii) + ptr2[ii] += temp * ptr1[ii * cnp + jj]; + } + } + + if (pnp && i >= ncon) { // B_ij is nonzero + ptr1 = fjac + idxij->val[rcidxs[j]] * ABsz + Asz; // set ptr1 to point to B_ij + pab = pb + i * pnp; + for (jj = 0; jj < pnp; ++jj) { + temp = FABS(pab[jj]); + if (temp == zero) + temp = one; + + for (ii = 0; ii < mnp; ++ii) + ptr2[ii] += temp * ptr1[ii * pnp + jj]; + } + } + } + } + + for (i = 0; i < nobs; ++i) { + temp = one; + if (fvec[i] != zero && fvecp[i] != zero && FABS(fvecp[i] - fvec[i]) >= epsf * FABS(fvec[i])) + temp = eps * FABS((fvecp[i] - fvec[i]) / eps - err[i]) / (FABS(fvec[i]) + FABS(fvecp[i])); + err[i] = one; + if (temp > epsmch && temp < eps) + err[i] = (log10(temp) - epslog) / epslog; + if (temp >= eps) + err[i] = zero; + } + + free(fjac); + free(buf); + + for (i = 0; i < n; ++i) { + nnz = sba_crsm_row_elmidxs(idxij, i, rcidxs, rcsubs); /* find nonzero err_ij, j=0...m-1 */ + for (j = 0; j < nnz; ++j) { + if (i < ncon && rcsubs[j] < mcon) + continue; // corresponding gradients are taken to be zero + + ptr1 = err + idxij->val[rcidxs[j]] * mnp; // set ptr1 to point into err + for (ii = 0; ii < mnp; ++ii) + if (ptr1[ii] <= 0.5) { + fprintf(stderr, "SBA: gradient %d (corresponding to element %d of the projection of point %d on " + "camera %d) is %s (err=%g)\n", + idxij->val[rcidxs[j]] * mnp + ii, ii, i, rcsubs[j], + (ptr1[ii] == 0.0) ? "wrong" : "probably wrong", ptr1[ii]); + ++numerr; + } + } + } + if (numerr) + fprintf(stderr, "SBA: found %d suspicious gradients out of %d\n\n", numerr, nobs); + + free(err); + + return; +} + +void sba_mot_chkjac_x( + void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata), + void (*jacf)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata), double *p, + struct sba_crsm *idxij, int *rcidxs, int *rcsubs, int mcon, int cnp, int mnp, void *func_adata, void *jac_adata) { + sba_motstr_chkjac_x(func, jacf, p, idxij, rcidxs, rcsubs, 0, mcon, cnp, 0, mnp, func_adata, jac_adata); +} + +void sba_str_chkjac_x( + void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata), + void (*jacf)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata), double *p, + struct sba_crsm *idxij, int *rcidxs, int *rcsubs, int ncon, int pnp, int mnp, void *func_adata, void *jac_adata) { + sba_motstr_chkjac_x(func, jacf, p, idxij, rcidxs, rcsubs, ncon, 0, 0, pnp, mnp, func_adata, jac_adata); +} + +#if 0 +/* Routines for directly checking the jacobians supplied to the simple drivers. + * They shouldn't be necessary since these jacobians can be verified indirectly + * through the expert sba_XXX_chkjac_x() routines. + */ + +/*****************************************************************************************/ +// Sample code for using sba_motstr_chkjac(): + + for(i=ncon; i<n; ++i) + for(j=mcon; j<m; ++j){ + if(!vmask[i*m+j]) continue; // point i does not appear in image j + + sba_motstr_chkjac(proj, projac, p+j*cnp, p+m*cnp+i*pnp, j, i, cnp, pnp, mnp, adata, adata); + } + + +/*****************************************************************************************/ + + +/* union used for passing pointers to the user-supplied functions for the motstr/mot/str simple drivers */ +union proj_projac{ + struct{ + void (*proj)(int j, int i, double *aj, double *bi, double *xij, void *adata); + void (*projac)(int j, int i, double *aj, double *bi, double *Aij, double *Bij, void *adata); + } motstr; + + struct{ + void (*proj)(int j, int i, double *aj, double *xij, void *adata); + void (*projac)(int j, int i, double *aj, double *Aij, void *adata); + } mot; + + struct{ + void (*proj)(int j, int i, double *bi, double *xij, void *adata); + void (*projac)(int j, int i, double *bi, double *Bij, void *adata); + } str; +}; + + +/* + * Check the jacobian of a projection function in cnp+pnp variables + * evaluated at a point p, for consistency with the function itself. + * Simple version of the above, NOT to be called directly + * + * Based on fortran77 subroutine CHKDER by + * Burton S. Garbow, Kenneth E. Hillstrom, Jorge J. More + * Argonne National Laboratory. MINPACK project. March 1980. + * + * + * proj points to a function from R^{cnp+pnp} --> R^{mnp}: Given a p=(aj, bi) in R^{cnp+pnp} + * it yields hx in R^{mnp} + * projac points to a function implementing the jacobian of func, whose consistency with proj + * is to be tested. Given a p in R^{cnp+pnp}, jacf computes into the matrix jac=[Aij | Bij] + * jacobian of proj at p. Note that row i of jac corresponds to the gradient of the i-th + * component of proj, evaluated at p. + * aj, bi are input arrays of lengths cnp, pnp containing the parameters for the point of + * evaluation, i.e. j-th camera and i-th point + * jj, ii specify the point (ii) whose projection jacobian in image (jj) is being checked + * cnp, pnp, mnp are as usual. Note that if cnp=0 or + * pnp=0 a jacobian corresponding resp. to motion or camera parameters + * only is assumed. + * func_adata, jac_adata point to possible additional data and are passed + * uninterpreted to func, jacf respectively. + * err is an array of length mnp. On output, err contains measures + * of correctness of the respective gradients. if there is + * no severe loss of significance, then if err[i] is 1.0 the + * i-th gradient is correct, while if err[i] is 0.0 the i-th + * gradient is incorrect. For values of err between 0.0 and 1.0, + * the categorization is less certain. In general, a value of + * err[i] greater than 0.5 indicates that the i-th gradient is + * probably correct, while a value of err[i] less than 0.5 + * indicates that the i-th gradient is probably incorrect. + * + * CAUTION: THIS FUNCTION IS NOT 100% FOOLPROOF. The + * following excerpt comes from CHKDER's documentation: + * + * "The function does not perform reliably if cancellation or + * rounding errors cause a severe loss of significance in the + * evaluation of a function. therefore, none of the components + * of p should be unusually small (in particular, zero) or any + * other value which may cause loss of significance." + */ + +static void sba_chkjac( + union proj_projac *funcs, double *aj, double *bi, int jj, int ii, int cnp, int pnp, int mnp, void *func_adata, void *jac_adata) +{ +const double factor=100.0, one=1.0, zero=0.0; +double *fvec, *fjac, *Aij, *Bij, *ajp, *bip, *fvecp, *buf, *err; + +int Asz, Bsz; +register int i, j; +double eps, epsf, temp, epsmch, epslog; +int fvec_sz, ajp_sz, bip_sz, fvecp_sz, err_sz, numerr=0; + + epsmch=DBL_EPSILON; + eps=sqrt(epsmch); + + Asz=mnp*cnp; Bsz=mnp*pnp; + fjac=(double *)emalloc((Asz+Bsz)*sizeof(double)); + Aij=fjac; + Bij=Aij+Asz; + + fvec_sz=fvecp_sz=mnp; + ajp_sz=cnp; bip_sz=pnp; + err_sz=mnp; + buf=(double *)emalloc((fvec_sz + ajp_sz + bip_sz + fvecp_sz + err_sz)*sizeof(double)); + fvec=buf; + ajp=fvec+fvec_sz; + bip=ajp+ajp_sz; + fvecp=bip+bip_sz; + err=fvecp+fvecp_sz; + + /* compute fvec=proj(p), p=(aj, bi) & the jacobian at p */ + if(cnp && pnp){ + (*(funcs->motstr.proj))(jj, ii, aj, bi, fvec, func_adata); + (*(funcs->motstr.projac))(jj, ii, aj, bi, Aij, Bij, jac_adata); + } + else if(cnp){ + (*(funcs->mot.proj))(jj, ii, aj, fvec, func_adata); + (*(funcs->mot.projac))(jj, ii, aj, Aij, jac_adata); + } + else{ + (*(funcs->str.proj))(jj, ii, bi, fvec, func_adata); + (*(funcs->str.projac))(jj, ii, bi, Bij, jac_adata); + } + + /* compute pp, pp=(ajp, bip) */ + for(j=0; j<cnp; ++j){ + temp=eps*FABS(aj[j]); + if(temp==zero) temp=eps; + ajp[j]=aj[j]+temp; + } + for(j=0; j<pnp; ++j){ + temp=eps*FABS(bi[j]); + if(temp==zero) temp=eps; + bip[j]=bi[j]+temp; + } + + /* compute fvecp=proj(pp) */ + if(cnp && pnp) + (*(funcs->motstr.proj))(jj, ii, ajp, bip, fvecp, func_adata); + else if(cnp) + (*(funcs->mot.proj))(jj, ii, ajp, fvecp, func_adata); + else + (*(funcs->str.proj))(jj, ii, bip, fvecp, func_adata); + + epsf=factor*epsmch; + epslog=log10(eps); + + for(i=0; i<mnp; ++i) + err[i]=zero; + + for(j=0; j<cnp; ++j){ + temp=FABS(aj[j]); + if(temp==zero) temp=one; + + for(i=0; i<mnp; ++i) + err[i]+=temp*Aij[i*cnp+j]; + } + for(j=0; j<pnp; ++j){ + temp=FABS(bi[j]); + if(temp==zero) temp=one; + + for(i=0; i<mnp; ++i) + err[i]+=temp*Bij[i*pnp+j]; + } + + for(i=0; i<mnp; ++i){ + temp=one; + if(fvec[i]!=zero && fvecp[i]!=zero && FABS(fvecp[i]-fvec[i])>=epsf*FABS(fvec[i])) + temp=eps*FABS((fvecp[i]-fvec[i])/eps - err[i])/(FABS(fvec[i])+FABS(fvecp[i])); + err[i]=one; + if(temp>epsmch && temp<eps) + err[i]=(log10(temp) - epslog)/epslog; + if(temp>=eps) err[i]=zero; + } + + for(i=0; i<mnp; ++i) + if(err[i]<=0.5){ + fprintf(stderr, "SBA: gradient %d (corresponding to element %d of the projection of point %d on camera %d) is %s (err=%g)\n", + i, i, ii, jj, (err[i]==0.0)? "wrong" : "probably wrong", err[i]); + ++numerr; + } + if(numerr) fprintf(stderr, "SBA: found %d suspicious gradients out of %d\n\n", numerr, mnp); + + free(fjac); + free(buf); + + return; +} + +void sba_motstr_chkjac( + void (*proj)(int jj, int ii, double *aj, double *bi, double *xij, void *adata), + void (*projac)(int jj, int ii, double *aj, double *bi, double *Aij, double *Bij, void *adata), + double *aj, double *bi, int jj, int ii, int cnp, int pnp, int mnp, void *func_adata, void *jac_adata) +{ +union proj_projac funcs; + + funcs.motstr.proj=proj; + funcs.motstr.projac=projac; + + sba_chkjac(&funcs, aj, bi, jj, ii, cnp, pnp, mnp, func_adata, jac_adata); +} + +void sba_mot_chkjac( + void (*proj)(int jj, int ii, double *aj, double *xij, void *adata), + void (*projac)(int jj, int ii, double *aj, double *Aij, void *adata), + double *aj, double *bi, int jj, int ii, int cnp, int pnp, int mnp, void *func_adata, void *jac_adata) +{ +union proj_projac funcs; + + funcs.mot.proj=proj; + funcs.mot.projac=projac; + + sba_chkjac(&funcs, aj, NULL, jj, ii, cnp, 0, mnp, func_adata, jac_adata); +} + +void sba_str_chkjac( + void (*proj)(int jj, int ii, double *bi, double *xij, void *adata), + void (*projac)(int jj, int ii, double *bi, double *Bij, void *adata), + double *aj, double *bi, int jj, int ii, int cnp, int pnp, int mnp, void *func_adata, void *jac_adata) +{ +union proj_projac funcs; + + funcs.str.proj=proj; + funcs.str.projac=projac; + + sba_chkjac(&funcs, NULL, bi, jj, ii, 0, pnp, mnp, func_adata, jac_adata); +} +#endif /* 0 */ diff --git a/redist/sba/sba_chkjac.h b/redist/sba/sba_chkjac.h new file mode 100644 index 0000000..899c35e --- /dev/null +++ b/redist/sba/sba_chkjac.h @@ -0,0 +1,69 @@ +///////////////////////////////////////////////////////////////////////////////// +//// +//// Prototypes and definitions for verification routines for the jacobians +//// employed in the expert & simple drivers for sparse bundle adjustment +//// Copyright (C) 2005-2008 Manolis Lourakis (lourakis at ics forth gr) +//// Institute of Computer Science, Foundation for Research & Technology - Hellas +//// Heraklion, Crete, Greece. +//// +//// This program is free software; you can redistribute it and/or modify +//// it under the terms of the GNU General Public License as published by +//// the Free Software Foundation; either version 2 of the License, or +//// (at your option) any later version. +//// +//// This program is distributed in the hope that it will be useful, +//// but WITHOUT ANY WARRANTY; without even the implied warranty of +//// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//// GNU General Public License for more details. +//// +/////////////////////////////////////////////////////////////////////////////////// + +#ifndef _SBA_CHKJAC_H_ +#define _SBA_CHKJAC_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#if 0 +/* simple driver jacobians */ +extern void sba_motstr_chkjac( + void (*proj)(int jj, int ii, double *aj, double *bi, double *xij, void *adata), + void (*projac)(int jj, int ii, double *aj, double *bi, double *Aij, double *Bij, void *adata), + double *aj, double *bi, int jj, int ii, int cnp, int pnp, int mnp, void *func_adata, void *jac_adata); + +extern void sba_mot_chkjac( + void (*proj)(int jj, int ii, double *aj, double *xij, void *adata), + void (*projac)(int jj, int ii, double *aj, double *Aij, void *adata), + double *aj, double *bi, int jj, int ii, int cnp, int pnp, int mnp, void *func_adata, void *jac_adata); + +extern void sba_str_chkjac( + void (*proj)(int jj, int ii, double *bi, double *xij, void *adata), + void (*projac)(int jj, int ii, double *bi, double *Bij, void *adata), + double *aj, double *bi, int jj, int ii, int cnp, int pnp, int mnp, void *func_adata, void *jac_adata); +#endif /* 0 */ + +/* expert driver jacobians */ +extern void +sba_motstr_chkjac_x(void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata), + void (*jacf)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata), + double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, int ncon, int mcon, int cnp, int pnp, + int mnp, void *func_adata, void *jac_adata); + +extern void +sba_mot_chkjac_x(void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata), + void (*jacf)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata), + double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, int mcon, int cnp, int mnp, + void *func_adata, void *jac_adata); + +extern void +sba_str_chkjac_x(void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata), + void (*jacf)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata), + double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, int ncon, int pnp, int mnp, + void *func_adata, void *jac_adata); + +#ifdef __cplusplus +} +#endif + +#endif /* _SBA_CHKJAC_H_ */ diff --git a/redist/sba/sba_crsm.c b/redist/sba/sba_crsm.c new file mode 100644 index 0000000..9ba11f1 --- /dev/null +++ b/redist/sba/sba_crsm.c @@ -0,0 +1,338 @@ +///////////////////////////////////////////////////////////////////////////////// +//// +//// CRS sparse matrices manipulation routines +//// Copyright (C) 2004-2008 Manolis Lourakis (lourakis at ics forth gr) +//// Institute of Computer Science, Foundation for Research & Technology - Hellas +//// Heraklion, Crete, Greece. +//// +//// This program is free software; you can redistribute it and/or modify +//// it under the terms of the GNU General Public License as published by +//// the Free Software Foundation; either version 2 of the License, or +//// (at your option) any later version. +//// +//// This program is distributed in the hope that it will be useful, +//// but WITHOUT ANY WARRANTY; without even the implied warranty of +//// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//// GNU General Public License for more details. +//// +/////////////////////////////////////////////////////////////////////////////////// + +#include <stdio.h> +#include <stdlib.h> + +#include "sba.h" + +static void sba_crsm_print(struct sba_crsm *sm, FILE *fp); +static void sba_crsm_build(struct sba_crsm *sm, int *m, int nr, int nc); + +/* allocate a sparse CRS matrix */ +void sba_crsm_alloc(struct sba_crsm *sm, int nr, int nc, int nnz) { + int msz; + + sm->nr = nr; + sm->nc = nc; + sm->nnz = nnz; + msz = 2 * nnz + nr + 1; + sm->val = (int *)malloc(msz * sizeof(int)); /* required memory is allocated in a single step */ + if (!sm->val) { + fprintf(stderr, "SBA: memory allocation request failed in sba_crsm_alloc() [nr=%d, nc=%d, nnz=%d]\n", nr, nc, + nnz); + exit(1); + } + sm->colidx = sm->val + nnz; + sm->rowptr = sm->colidx + nnz; +} + +/* free a sparse CRS matrix */ +void sba_crsm_free(struct sba_crsm *sm) { + sm->nr = sm->nc = sm->nnz = -1; + free(sm->val); + sm->val = sm->colidx = sm->rowptr = NULL; +} + +static void sba_crsm_print(struct sba_crsm *sm, FILE *fp) { + register int i; + + fprintf(fp, "matrix is %dx%d, %d non-zeros\nval: ", sm->nr, sm->nc, sm->nnz); + for (i = 0; i < sm->nnz; ++i) + fprintf(fp, "%d ", sm->val[i]); + fprintf(fp, "\ncolidx: "); + for (i = 0; i < sm->nnz; ++i) + fprintf(fp, "%d ", sm->colidx[i]); + fprintf(fp, "\nrowptr: "); + for (i = 0; i <= sm->nr; ++i) + fprintf(fp, "%d ", sm->rowptr[i]); + fprintf(fp, "\n"); +} + +/* build a sparse CRS matrix from a dense one. intended to serve as an example for sm creation */ +static void sba_crsm_build(struct sba_crsm *sm, int *m, int nr, int nc) { + int nnz; + register int i, j, k; + + /* count nonzeros */ + for (i = nnz = 0; i < nr; ++i) + for (j = 0; j < nc; ++j) + if (m[i * nc + j] != 0) + ++nnz; + + sba_crsm_alloc(sm, nr, nc, nnz); + + /* fill up the sm structure */ + for (i = k = 0; i < nr; ++i) { + sm->rowptr[i] = k; + for (j = 0; j < nc; ++j) + if (m[i * nc + j] != 0) { + sm->val[k] = m[i * nc + j]; + sm->colidx[k++] = j; + } + } + sm->rowptr[nr] = nnz; +} + +/* returns the index of the (i, j) element. No bounds checking! */ +int sba_crsm_elmidx(struct sba_crsm *sm, int i, int j) { + register int low, high, mid, diff; + + low = sm->rowptr[i]; + high = sm->rowptr[i + 1] - 1; + + /* binary search for finding the element at column j */ + while (low <= high) { + /* following early termination test seems to actually slow down the search */ + // if(j<sm->colidx[low] || j>sm->colidx[high]) return -1; /* not found */ + + /* mid=low+((high-low)>>1) ensures no index overflows */ + mid = (low + high) >> 1; //(low+high)/2; + diff = j - sm->colidx[mid]; + if (diff < 0) + high = mid - 1; + else if (diff > 0) + low = mid + 1; + else + return mid; + } + + return -1; /* not found */ +} + +/* similarly to sba_crsm_elmidx() above, returns the index of the (i, j) element using the + * fact that the index of element (i, jp) was previously found to be jpidx. This can be + * slightly faster than sba_crsm_elmidx(). No bounds checking! + */ +int sba_crsm_elmidxp(struct sba_crsm *sm, int i, int j, int jp, int jpidx) { + register int low, high, mid, diff; + + diff = j - jp; + if (diff > 0) { + low = jpidx + 1; + high = sm->rowptr[i + 1] - 1; + } else if (diff == 0) + return jpidx; + else { /* diff<0 */ + low = sm->rowptr[i]; + high = jpidx - 1; + } + + /* binary search for finding the element at column j */ + while (low <= high) { + /* following early termination test seems to actually slow down the search */ + // if(j<sm->colidx[low] || j>sm->colidx[high]) return -1; /* not found */ + + /* mid=low+((high-low)>>1) ensures no index overflows */ + mid = (low + high) >> 1; //(low+high)/2; + diff = j - sm->colidx[mid]; + if (diff < 0) + high = mid - 1; + else if (diff > 0) + low = mid + 1; + else + return mid; + } + + return -1; /* not found */ +} + +/* returns the number of nonzero elements in row i and + * fills up the vidxs and jidxs arrays with the val and column + * indexes of the elements found, respectively. + * vidxs and jidxs are assumed preallocated and of max. size sm->nc + */ +int sba_crsm_row_elmidxs(struct sba_crsm *sm, int i, int *vidxs, int *jidxs) { + register int j, k; + + for (j = sm->rowptr[i], k = 0; j < sm->rowptr[i + 1]; ++j, ++k) { + vidxs[k] = j; + jidxs[k] = sm->colidx[j]; + } + + return k; +} + +/* returns the number of nonzero elements in col j and + * fills up the vidxs and iidxs arrays with the val and row + * indexes of the elements found, respectively. + * vidxs and iidxs are assumed preallocated and of max. size sm->nr + */ +int sba_crsm_col_elmidxs(struct sba_crsm *sm, int j, int *vidxs, int *iidxs) { + register int *nextrowptr = sm->rowptr + 1; + register int i, l; + register int low, high, mid, diff; + + for (i = l = 0; i < sm->nr; ++i) { + low = sm->rowptr[i]; + high = nextrowptr[i] - 1; + + /* binary search attempting to find an element at column j */ + while (low <= high) { + // if(j<sm->colidx[low] || j>sm->colidx[high]) break; /* not found */ + + mid = (low + high) >> 1; //(low+high)/2; + diff = j - sm->colidx[mid]; + if (diff < 0) + high = mid - 1; + else if (diff > 0) + low = mid + 1; + else { /* found */ + vidxs[l] = mid; + iidxs[l++] = i; + break; + } + } + } + + return l; +} + +/* a more straighforward (but slower) implementation of the above function */ +/*** +int sba_crsm_col_elmidxs(struct sba_crsm *sm, int j, int *vidxs, int *iidxs) +{ +register int i, k, l; + + for(i=l=0; i<sm->nr; ++i) + for(k=sm->rowptr[i]; k<sm->rowptr[i+1]; ++k) + if(sm->colidx[k]==j){ + vidxs[l]=k; + iidxs[l++]=i; + } + + return l; +} +***/ + +#if 0 +/* returns 1 if there exists a row i having columns j and k, + * i.e. a row i s.t. elements (i, j) and (i, k) are nonzero; + * 0 otherwise + */ +int sba_crsm_common_row(struct sba_crsm *sm, int j, int k) +{ +register int i, low, high, mid, diff; + + if(j==k) return 1; + + for(i=0; i<sm->nr; ++i){ + low=sm->rowptr[i]; + high=sm->rowptr[i+1]-1; + if(j<sm->colidx[low] || j>sm->colidx[high] || /* j not found */ + k<sm->colidx[low] || k>sm->colidx[high]) /* k not found */ + continue; + + /* binary search for finding the element at column j */ + while(low<=high){ + mid=(low+high)>>1; //(low+high)/2; + diff=j-sm->colidx[mid]; + if(diff<0) + high=mid-1; + else if(diff>0) + low=mid+1; + else + goto jfound; + } + + continue; /* j not found */ + +jfound: + if(j>k){ + low=sm->rowptr[i]; + high=mid-1; + } + else{ + low=mid+1; + high=sm->rowptr[i+1]-1; + } + + if(k<sm->colidx[low] || k>sm->colidx[high]) continue; /* k not found */ + + /* binary search for finding the element at column k */ + while(low<=high){ + mid=(low+high)>>1; //(low+high)/2; + diff=k-sm->colidx[mid]; + if(diff<0) + high=mid-1; + else if(diff>0) + low=mid+1; + else /* found */ + return 1; + } + } + + return 0; +} +#endif + +#if 0 + +/* sample code using the above routines */ + +main() +{ +int mat[7][6]={ + {10, 0, 0, 0, -2, 0}, + {3, 9, 0, 0, 0, 3}, + {0, 7, 8, 7, 0, 0}, + {3, 0, 8, 7, 5, 0}, + {0, 8, 0, 9, 9, 13}, + {0, 4, 0, 0, 2, -1}, + {3, 7, 0, 9, 2, 0} +}; + +struct sba_crsm sm; +int i, j, k, l; +int vidxs[7], /* max(6, 7) */ + jidxs[6], iidxs[7]; + + + sba_crsm_build(&sm, mat[0], 7, 6); + sba_crsm_print(&sm, stdout); + + for(i=0; i<7; ++i){ + for(j=0; j<6; ++j) + printf("%3d ", ((k=sba_crsm_elmidx(&sm, i, j))!=-1)? sm.val[k] : 0); + printf("\n"); + } + + for(i=0; i<7; ++i){ + k=sba_crsm_row_elmidxs(&sm, i, vidxs, jidxs); + printf("row %d\n", i); + for(l=0; l<k; ++l){ + j=jidxs[l]; + printf("%d %d ", j, sm.val[vidxs[l]]); + } + printf("\n"); + } + + for(j=0; j<6; ++j){ + k=sba_crsm_col_elmidxs(&sm, j, vidxs, iidxs); + printf("col %d\n", j); + for(l=0; l<k; ++l){ + i=iidxs[l]; + printf("%d %d ", i, sm.val[vidxs[l]]); + } + printf("\n"); + } + + sba_crsm_free(&sm); +} +#endif diff --git a/redist/sba/sba_lapack.c b/redist/sba/sba_lapack.c new file mode 100644 index 0000000..14fc44f --- /dev/null +++ b/redist/sba/sba_lapack.c @@ -0,0 +1,1730 @@ +///////////////////////////////////////////////////////////////////////////////// +//// +//// Linear algebra operations for the sba package +//// Copyright (C) 2004-2009 Manolis Lourakis (lourakis at ics forth gr) +//// Institute of Computer Science, Foundation for Research & Technology - Hellas +//// Heraklion, Crete, Greece. +//// +//// This program is free software; you can redistribute it and/or modify +//// it under the terms of the GNU General Public License as published by +//// the Free Software Foundation; either version 2 of the License, or +//// (at your option) any later version. +//// +//// This program is distributed in the hope that it will be useful, +//// but WITHOUT ANY WARRANTY; without even the implied warranty of +//// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//// GNU General Public License for more details. +//// +/////////////////////////////////////////////////////////////////////////////////// + +/* A note on memory alignment: + * + * Several of the functions below use a piece of dynamically allocated memory + * to store variables of different size (i.e., ints and doubles). To avoid + * alignment problems, care must be taken so that elements that are larger + * (doubles) are stored before smaller ones (ints). This ensures proper + * alignment under different alignment choices made by different CPUs: + * For instance, a double variable is aligned on x86 to 4 bytes but + * aligned to 8 bytes on AMD64 despite having the same size of 8 bytes. + */ + +#include <float.h> +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "compiler.h" +#include "sba.h" + +#ifdef SBA_APPEND_UNDERSCORE_SUFFIX +#define F77_FUNC(func) func##_ +#else +#define F77_FUNC(func) func +#endif /* SBA_APPEND_UNDERSCORE_SUFFIX */ + +/* declarations of LAPACK routines employed */ + +/* QR decomposition */ +extern int F77_FUNC(dgeqrf)(int *m, int *n, double *a, int *lda, double *tau, double *work, int *lwork, int *info); +extern int F77_FUNC(dorgqr)(int *m, int *n, int *k, double *a, int *lda, double *tau, double *work, int *lwork, + int *info); + +/* solution of triangular system */ +extern int F77_FUNC(dtrtrs)(char *uplo, char *trans, char *diag, int *n, int *nrhs, double *a, int *lda, double *b, + int *ldb, int *info); + +/* Cholesky decomposition, linear system solution and matrix inversion */ +extern int F77_FUNC(dpotf2)(char *uplo, int *n, double *a, int *lda, int *info); /* unblocked Cholesky */ +extern int F77_FUNC(dpotrf)(char *uplo, int *n, double *a, int *lda, int *info); /* block version of dpotf2 */ +extern int F77_FUNC(dpotrs)(char *uplo, int *n, int *nrhs, double *a, int *lda, double *b, int *ldb, int *info); +extern int F77_FUNC(dpotri)(char *uplo, int *n, double *a, int *lda, int *info); + +/* LU decomposition, linear system solution and matrix inversion */ +extern int F77_FUNC(dgetrf)(int *m, int *n, double *a, int *lda, int *ipiv, int *info); /* blocked LU */ +extern int F77_FUNC(dgetf2)(int *m, int *n, double *a, int *lda, int *ipiv, int *info); /* unblocked LU */ +extern int F77_FUNC(dgetrs)(char *trans, int *n, int *nrhs, double *a, int *lda, int *ipiv, double *b, int *ldb, + int *info); +extern int F77_FUNC(dgetri)(int *n, double *a, int *lda, int *ipiv, double *work, int *lwork, int *info); + +/* SVD */ +extern int F77_FUNC(dgesvd)(char *jobu, char *jobvt, int *m, int *n, double *a, int *lda, double *s, double *u, + int *ldu, double *vt, int *ldvt, double *work, int *lwork, int *info); + +/* lapack 3.0 routine, faster than dgesvd() */ +extern int F77_FUNC(dgesdd)(char *jobz, int *m, int *n, double *a, int *lda, double *s, double *u, int *ldu, double *vt, + int *ldvt, double *work, int *lwork, int *iwork, int *info); + +/* Bunch-Kaufman factorization of a real symmetric matrix A, solution of linear systems and matrix inverse */ +extern int F77_FUNC(dsytrf)(char *uplo, int *n, double *a, int *lda, int *ipiv, double *work, int *lwork, + int *info); /* blocked ver. */ +extern int F77_FUNC(dsytrs)(char *uplo, int *n, int *nrhs, double *a, int *lda, int *ipiv, double *b, int *ldb, + int *info); +extern int F77_FUNC(dsytri)(char *uplo, int *n, double *a, int *lda, int *ipiv, double *work, int *info); + +/* + * This function returns the solution of Ax = b + * + * The function is based on QR decomposition with explicit computation of Q: + * If A=Q R with Q orthogonal and R upper triangular, the linear system becomes + * Q R x = b or R x = Q^T b. + * + * A is mxm, b is mx1. Argument iscolmaj specifies whether A is + * stored in column or row major order. Note that if iscolmaj==1 + * this function modifies A! + * + * The function returns 0 in case of error, 1 if successfull + * + * This function is often called repetitively to solve problems of identical + * dimensions. To avoid repetitive malloc's and free's, allocated memory is + * retained between calls and free'd-malloc'ed when not of the appropriate size. + * A call with NULL as the first argument forces this memory to be released. + */ +int sba_Axb_QR(double *A, double *B, double *x, int m, int iscolmaj) { + static double *buf = NULL; + static int buf_sz = 0, nb = 0; + + double *a, *r, *tau, *work; + int a_sz, r_sz, tau_sz, tot_sz; + register int i, j; + int info, worksz, nrhs = 1; + register double sum; + + if (A == NULL) { + if (buf) + free(buf); + buf = NULL; + buf_sz = 0; + + return 1; + } + + /* calculate required memory size */ + a_sz = (iscolmaj) ? 0 : m * m; + r_sz = m * m; /* only the upper triangular part really needed */ + tau_sz = m; + if (!nb) { +#ifndef SBA_LS_SCARCE_MEMORY + double tmp; + + worksz = -1; // workspace query; optimal size is returned in tmp + F77_FUNC(dgeqrf)((int *)&m, (int *)&m, NULL, (int *)&m, NULL, (double *)&tmp, (int *)&worksz, (int *)&info); + nb = ((int)tmp) / m; // optimal worksize is m*nb +#else + nb = 1; // min worksize is m +#endif /* SBA_LS_SCARCE_MEMORY */ + } + worksz = nb * m; + tot_sz = a_sz + r_sz + tau_sz + worksz; + + if (tot_sz > buf_sz) { /* insufficient memory, allocate a "big" memory chunk at once */ + if (buf) + free(buf); /* free previously allocated memory */ + + buf_sz = tot_sz; + buf = (double *)malloc(buf_sz * sizeof(double)); + if (!buf) { + fprintf(stderr, "memory allocation in sba_Axb_QR() failed!\n"); + exit(1); + } + } + + if (!iscolmaj) { + a = buf; + /* store A (column major!) into a */ + for (i = 0; i < m; ++i) + for (j = 0; j < m; ++j) + a[i + j * m] = A[i * m + j]; + } else + a = A; /* no copying required */ + + r = buf + a_sz; + tau = r + r_sz; + work = tau + tau_sz; + + /* QR decomposition of A */ + F77_FUNC(dgeqrf)((int *)&m, (int *)&m, a, (int *)&m, tau, work, (int *)&worksz, (int *)&info); + /* error treatment */ + if (info != 0) { + if (info < 0) { + fprintf(stderr, "LAPACK error: illegal value for argument %d of dgeqrf in sba_Axb_QR()\n", -info); + exit(1); + } else { + fprintf(stderr, "Unknown LAPACK error %d for dgeqrf in sba_Axb_QR()\n", info); + return 0; + } + } + + /* R is now stored in the upper triangular part of a; copy it in r so that dorgqr() below won't destroy it */ + for (i = 0; i < r_sz; ++i) + r[i] = a[i]; + + /* compute Q using the elementary reflectors computed by the above decomposition */ + F77_FUNC(dorgqr)((int *)&m, (int *)&m, (int *)&m, a, (int *)&m, tau, work, (int *)&worksz, (int *)&info); + if (info != 0) { + if (info < 0) { + fprintf(stderr, "LAPACK error: illegal value for argument %d of dorgqr in sba_Axb_QR()\n", -info); + exit(1); + } else { + fprintf(stderr, "Unknown LAPACK error (%d) in sba_Axb_QR()\n", info); + return 0; + } + } + + /* Q is now in a; compute Q^T b in x */ + for (i = 0; i < m; ++i) { + for (j = 0, sum = 0.0; j < m; ++j) + sum += a[i * m + j] * B[j]; + x[i] = sum; + } + + /* solve the linear system R x = Q^t b */ + F77_FUNC(dtrtrs)("U", "N", "N", (int *)&m, (int *)&nrhs, r, (int *)&m, x, (int *)&m, &info); + /* error treatment */ + if (info != 0) { + if (info < 0) { + fprintf(stderr, "LAPACK error: illegal value for argument %d of dtrtrs in sba_Axb_QR()\n", -info); + exit(1); + } else { + fprintf(stderr, "LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in sba_Axb_QR()\n", + info); + return 0; + } + } + + return 1; +} + +/* + * This function returns the solution of Ax = b + * + * The function is based on QR decomposition without computation of Q: + * If A=Q R with Q orthogonal and R upper triangular, the linear system becomes + * (A^T A) x = A^T b or (R^T Q^T Q R) x = A^T b or (R^T R) x = A^T b. + * This amounts to solving R^T y = A^T b for y and then R x = y for x + * Note that Q does not need to be explicitly computed + * + * A is mxm, b is mx1. Argument iscolmaj specifies whether A is + * stored in column or row major order. Note that if iscolmaj==1 + * this function modifies A! + * + * The function returns 0 in case of error, 1 if successfull + * + * This function is often called repetitively to solve problems of identical + * dimensions. To avoid repetitive malloc's and free's, allocated memory is + * retained between calls and free'd-malloc'ed when not of the appropriate size. + * A call with NULL as the first argument forces this memory to be released. + */ +int sba_Axb_QRnoQ(double *A, double *B, double *x, int m, int iscolmaj) { + static double *buf = NULL; + static int buf_sz = 0, nb = 0; + + double *a, *tau, *work; + int a_sz, tau_sz, tot_sz; + register int i, j; + int info, worksz, nrhs = 1; + register double sum; + + if (A == NULL) { + if (buf) + free(buf); + buf = NULL; + buf_sz = 0; + + return 1; + } + + /* calculate required memory size */ + a_sz = (iscolmaj) ? 0 : m * m; + tau_sz = m; + if (!nb) { +#ifndef SBA_LS_SCARCE_MEMORY + double tmp; + + worksz = -1; // workspace query; optimal size is returned in tmp + F77_FUNC(dgeqrf)((int *)&m, (int *)&m, NULL, (int *)&m, NULL, (double *)&tmp, (int *)&worksz, (int *)&info); + nb = ((int)tmp) / m; // optimal worksize is m*nb +#else + nb = 1; // min worksize is m +#endif /* SBA_LS_SCARCE_MEMORY */ + } + worksz = nb * m; + tot_sz = a_sz + tau_sz + worksz; + + if (tot_sz > buf_sz) { /* insufficient memory, allocate a "big" memory chunk at once */ + if (buf) + free(buf); /* free previously allocated memory */ + + buf_sz = tot_sz; + buf = (double *)malloc(buf_sz * sizeof(double)); + if (!buf) { + fprintf(stderr, "memory allocation in sba_Axb_QRnoQ() failed!\n"); + exit(1); + } + } + + if (!iscolmaj) { + a = buf; + /* store A (column major!) into a */ + for (i = 0; i < m; ++i) + for (j = 0; j < m; ++j) + a[i + j * m] = A[i * m + j]; + } else + a = A; /* no copying required */ + + tau = buf + a_sz; + work = tau + tau_sz; + + /* compute A^T b in x */ + for (i = 0; i < m; ++i) { + for (j = 0, sum = 0.0; j < m; ++j) + sum += a[i * m + j] * B[j]; + x[i] = sum; + } + + /* QR decomposition of A */ + F77_FUNC(dgeqrf)((int *)&m, (int *)&m, a, (int *)&m, tau, work, (int *)&worksz, (int *)&info); + /* error treatment */ + if (info != 0) { + if (info < 0) { + fprintf(stderr, "LAPACK error: illegal value for argument %d of dgeqrf in sba_Axb_QRnoQ()\n", -info); + exit(1); + } else { + fprintf(stderr, "Unknown LAPACK error %d for dgeqrf in sba_Axb_QRnoQ()\n", info); + return 0; + } + } + + /* R is stored in the upper triangular part of a */ + + /* solve the linear system R^T y = A^t b */ + F77_FUNC(dtrtrs)("U", "T", "N", (int *)&m, (int *)&nrhs, a, (int *)&m, x, (int *)&m, &info); + /* error treatment */ + if (info != 0) { + if (info < 0) { + fprintf(stderr, "LAPACK error: illegal value for argument %d of dtrtrs in sba_Axb_QRnoQ()\n", -info); + exit(1); + } else { + fprintf(stderr, + "LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in sba_Axb_QRnoQ()\n", + info); + return 0; + } + } + + /* solve the linear system R x = y */ + F77_FUNC(dtrtrs)("U", "N", "N", (int *)&m, (int *)&nrhs, a, (int *)&m, x, (int *)&m, &info); + /* error treatment */ + if (info != 0) { + if (info < 0) { + fprintf(stderr, "LAPACK error: illegal value for argument %d of dtrtrs in sba_Axb_QRnoQ()\n", -info); + exit(1); + } else { + fprintf(stderr, + "LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in sba_Axb_QRnoQ()\n", + info); + return 0; + } + } + + return 1; +} + +/* + * This function returns the solution of Ax=b + * + * The function assumes that A is symmetric & positive definite and employs + * the Cholesky decomposition: + * If A=U^T U with U upper triangular, the system to be solved becomes + * (U^T U) x = b + * This amounts to solving U^T y = b for y and then U x = y for x + * + * A is mxm, b is mx1. Argument iscolmaj specifies whether A is + * stored in column or row major order. Note that if iscolmaj==1 + * this function modifies A! + * + * The function returns 0 in case of error, 1 if successfull + * + * This function is often called repetitively to solve problems of identical + * dimensions. To avoid repetitive malloc's and free's, allocated memory is + * retained between calls and free'd-malloc'ed when not of the appropriate size. + * A call with NULL as the first argument forces this memory to be released. + */ +int sba_Axb_Chol(double *A, double *B, double *x, int m, int iscolmaj) { + static double *buf = NULL; + static int buf_sz = 0; + + double *a; + int a_sz, tot_sz; + register int i, j; + int info, nrhs = 1; + + if (A == NULL) { + if (buf) + free(buf); + buf = NULL; + buf_sz = 0; + + return 1; + } + + /* calculate required memory size */ + a_sz = (iscolmaj) ? 0 : m * m; + tot_sz = a_sz; + + if (tot_sz > buf_sz) { /* insufficient memory, allocate a "big" memory chunk at once */ + if (buf) + free(buf); /* free previously allocated memory */ + + buf_sz = tot_sz; + buf = (double *)malloc(buf_sz * sizeof(double)); + if (!buf) { + fprintf(stderr, "memory allocation in sba_Axb_Chol() failed!\n"); + exit(1); + } + } + + if (!iscolmaj) { + a = buf; + + /* store A into a and B into x; A is assumed to be symmetric, hence + * the column and row major order representations are the same + */ + for (i = 0; i < m; ++i) { + a[i] = A[i]; + x[i] = B[i]; + } + for (j = m * m; i < j; ++i) // copy remaining rows; note that i is not re-initialized + a[i] = A[i]; + } else { /* no copying is necessary for A */ + a = A; + for (i = 0; i < m; ++i) + x[i] = B[i]; + } + + /* Cholesky decomposition of A */ + // F77_FUNC(dpotf2)("U", (int *)&m, a, (int *)&m, (int *)&info); + F77_FUNC(dpotrf)("U", (int *)&m, a, (int *)&m, (int *)&info); + /* error treatment */ + if (info != 0) { + if (info < 0) { + fprintf(stderr, "LAPACK error: illegal value for argument %d of dpotf2/dpotrf in sba_Axb_Chol()\n", -info); + exit(1); + } else { + fprintf(stderr, "LAPACK error: the leading minor of order %d is not positive definite,\nthe factorization " + "could not be completed for dpotf2/dpotrf in sba_Axb_Chol()\n", + info); + return 0; + } + } + +/* below are two alternative ways for solving the linear system: */ +#if 1 + /* use the computed Cholesky in one lapack call */ + F77_FUNC(dpotrs)("U", (int *)&m, (int *)&nrhs, a, (int *)&m, x, (int *)&m, &info); + if (info < 0) { + fprintf(stderr, "LAPACK error: illegal value for argument %d of dpotrs in sba_Axb_Chol()\n", -info); + exit(1); + } +#else + /* solve the linear systems U^T y = b, U x = y */ + F77_FUNC(dtrtrs)("U", "T", "N", (int *)&m, (int *)&nrhs, a, (int *)&m, x, (int *)&m, &info); + /* error treatment */ + if (info != 0) { + if (info < 0) { + fprintf(stderr, "LAPACK error: illegal value for argument %d of dtrtrs in sba_Axb_Chol()\n", -info); + exit(1); + } else { + fprintf(stderr, + "LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in sba_Axb_Chol()\n", + info); + return 0; + } + } + + /* solve U x = y */ + F77_FUNC(dtrtrs)("U", "N", "N", (int *)&m, (int *)&nrhs, a, (int *)&m, x, (int *)&m, &info); + /* error treatment */ + if (info != 0) { + if (info < 0) { + fprintf(stderr, "LAPACK error: illegal value for argument %d of dtrtrs in sba_Axb_Chol()\n", -info); + exit(1); + } else { + fprintf(stderr, + "LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in sba_Axb_Chol()\n", + info); + return 0; + } + } +#endif /* 1 */ + + return 1; +} + +/* + * This function returns the solution of Ax = b + * + * The function employs LU decomposition: + * If A=L U with L lower and U upper triangular, then the original system + * amounts to solving + * L y = b, U x = y + * + * A is mxm, b is mx1. Argument iscolmaj specifies whether A is + * stored in column or row major order. Note that if iscolmaj==1 + * this function modifies A! + * + * The function returns 0 in case of error, + * 1 if successfull + * + * This function is often called repetitively to solve problems of identical + * dimensions. To avoid repetitive malloc's and free's, allocated memory is + * retained between calls and free'd-malloc'ed when not of the appropriate size. + * A call with NULL as the first argument forces this memory to be released. + */ +int sba_Axb_LU(double *A, double *B, double *x, int m, int iscolmaj) { + static double *buf = NULL; + static int buf_sz = 0; + + int a_sz, ipiv_sz, tot_sz; + register int i, j; + int info, *ipiv, nrhs = 1; + double *a; + + if (A == NULL) { + if (buf) + free(buf); + buf = NULL; + buf_sz = 0; + + return 1; + } + + /* calculate required memory size */ + ipiv_sz = m; + a_sz = (iscolmaj) ? 0 : m * m; + tot_sz = a_sz * sizeof(double) + + ipiv_sz * sizeof(int); /* should be arranged in that order for proper doubles alignment */ + + if (tot_sz > buf_sz) { /* insufficient memory, allocate a "big" memory chunk at once */ + if (buf) + free(buf); /* free previously allocated memory */ + + buf_sz = tot_sz; + buf = (double *)malloc(buf_sz); + if (!buf) { + fprintf(stderr, "memory allocation in sba_Axb_LU() failed!\n"); + exit(1); + } + } + + if (!iscolmaj) { + a = buf; + ipiv = (int *)(a + a_sz); + + /* store A (column major!) into a and B into x */ + for (i = 0; i < m; ++i) { + for (j = 0; j < m; ++j) + a[i + j * m] = A[i * m + j]; + + x[i] = B[i]; + } + } else { /* no copying is necessary for A */ + a = A; + for (i = 0; i < m; ++i) + x[i] = B[i]; + ipiv = (int *)buf; + } + + /* LU decomposition for A */ + F77_FUNC(dgetrf)((int *)&m, (int *)&m, a, (int *)&m, ipiv, (int *)&info); + if (info != 0) { + if (info < 0) { + fprintf(stderr, "argument %d of dgetrf illegal in sba_Axb_LU()\n", -info); + exit(1); + } else { + fprintf(stderr, "singular matrix A for dgetrf in sba_Axb_LU()\n"); + return 0; + } + } + + /* solve the system with the computed LU */ + F77_FUNC(dgetrs)("N", (int *)&m, (int *)&nrhs, a, (int *)&m, ipiv, x, (int *)&m, (int *)&info); + if (info != 0) { + if (info < 0) { + fprintf(stderr, "argument %d of dgetrs illegal in sba_Axb_LU()\n", -info); + exit(1); + } else { + fprintf(stderr, "unknown error for dgetrs in sba_Axb_LU()\n"); + return 0; + } + } + + return 1; +} + +/* + * This function returns the solution of Ax = b + * + * The function is based on SVD decomposition: + * If A=U D V^T with U, V orthogonal and D diagonal, the linear system becomes + * (U D V^T) x = b or x=V D^{-1} U^T b + * Note that V D^{-1} U^T is the pseudoinverse A^+ + * + * A is mxm, b is mx1. Argument iscolmaj specifies whether A is + * stored in column or row major order. Note that if iscolmaj==1 + * this function modifies A! + * + * The function returns 0 in case of error, 1 if successfull + * + * This function is often called repetitively to solve problems of identical + * dimensions. To avoid repetitive malloc's and free's, allocated memory is + * retained between calls and free'd-malloc'ed when not of the appropriate size. + * A call with NULL as the first argument forces this memory to be released. + */ +int sba_Axb_SVD(double *A, double *B, double *x, int m, int iscolmaj) { + static double *buf = NULL; + static int buf_sz = 0; + static double eps = -1.0; + + register int i, j; + double *a, *u, *s, *vt, *work; + int a_sz, u_sz, s_sz, vt_sz, tot_sz; + double thresh, one_over_denom; + register double sum; + int info, rank, worksz, *iwork, iworksz; + + if (A == NULL) { + if (buf) + free(buf); + buf = NULL; + buf_sz = 0; + + return 1; + } + +/* calculate required memory size */ +#ifndef SBA_LS_SCARCE_MEMORY + worksz = -1; // workspace query. Keep in mind that dgesdd requires more memory than dgesvd + /* note that optimal work size is returned in thresh */ + F77_FUNC(dgesdd) + ("A", (int *)&m, (int *)&m, NULL, (int *)&m, NULL, NULL, (int *)&m, NULL, (int *)&m, (double *)&thresh, + (int *)&worksz, NULL, &info); + /* F77_FUNC(dgesvd)("A", "A", (int *)&m, (int *)&m, NULL, (int *)&m, NULL, NULL, (int *)&m, NULL, (int *)&m, + (double *)&thresh, (int *)&worksz, &info); */ + worksz = (int)thresh; +#else + worksz = m * (7 * m + 4); // min worksize for dgesdd + // worksz=5*m; // min worksize for dgesvd +#endif /* SBA_LS_SCARCE_MEMORY */ + iworksz = 8 * m; + a_sz = (!iscolmaj) ? m * m : 0; + u_sz = m * m; + s_sz = m; + vt_sz = m * m; + + tot_sz = (a_sz + u_sz + s_sz + vt_sz + worksz) * sizeof(double) + + iworksz * sizeof(int); /* should be arranged in that order for proper doubles alignment */ + + if (tot_sz > buf_sz) { /* insufficient memory, allocate a "big" memory chunk at once */ + if (buf) + free(buf); /* free previously allocated memory */ + + buf_sz = tot_sz; + buf = (double *)malloc(buf_sz); + if (!buf) { + fprintf(stderr, "memory allocation in sba_Axb_SVD() failed!\n"); + exit(1); + } + } + + if (!iscolmaj) { + a = buf; + u = a + a_sz; + /* store A (column major!) into a */ + for (i = 0; i < m; ++i) + for (j = 0; j < m; ++j) + a[i + j * m] = A[i * m + j]; + } else { + a = A; /* no copying required */ + u = buf; + } + + s = u + u_sz; + vt = s + s_sz; + work = vt + vt_sz; + iwork = (int *)(work + worksz); + + /* SVD decomposition of A */ + F77_FUNC(dgesdd) + ("A", (int *)&m, (int *)&m, a, (int *)&m, s, u, (int *)&m, vt, (int *)&m, work, (int *)&worksz, iwork, &info); + // F77_FUNC(dgesvd)("A", "A", (int *)&m, (int *)&m, a, (int *)&m, s, u, (int *)&m, vt, (int *)&m, work, (int + // *)&worksz, &info); + + /* error treatment */ + if (info != 0) { + if (info < 0) { + fprintf(stderr, "LAPACK error: illegal value for argument %d of dgesdd/dgesvd in sba_Axb_SVD()\n", -info); + exit(1); + } else { + fprintf(stderr, + "LAPACK error: dgesdd (dbdsdc)/dgesvd (dbdsqr) failed to converge in sba_Axb_SVD() [info=%d]\n", + info); + + return 0; + } + } + + if (eps < 0.0) { + double aux; + + /* compute machine epsilon. DBL_EPSILON should do also */ + for (eps = 1.0; aux = eps + 1.0, aux - 1.0 > 0.0; eps *= 0.5) + ; + eps *= 2.0; + } + + /* compute the pseudoinverse in a */ + memset(a, 0, m * m * sizeof(double)); /* initialize to zero */ + for (rank = 0, thresh = eps * s[0]; rank < m && s[rank] > thresh; ++rank) { + one_over_denom = 1.0 / s[rank]; + + for (j = 0; j < m; ++j) + for (i = 0; i < m; ++i) + a[i * m + j] += vt[rank + i * m] * u[j + rank * m] * one_over_denom; + } + + /* compute A^+ b in x */ + for (i = 0; i < m; ++i) { + for (j = 0, sum = 0.0; j < m; ++j) + sum += a[i * m + j] * B[j]; + x[i] = sum; + } + + return 1; +} + +/* + * This function returns the solution of Ax = b for a real symmetric matrix A + * + * The function is based on UDUT factorization with the pivoting + * strategy of Bunch and Kaufman: + * A is factored as U*D*U^T where U is upper triangular and + * D symmetric and block diagonal (aka spectral decomposition, + * Banachiewicz factorization, modified Cholesky factorization) + * + * A is mxm, b is mx1. Argument iscolmaj specifies whether A is + * stored in column or row major order. Note that if iscolmaj==1 + * this function modifies A! + * + * The function returns 0 in case of error, + * 1 if successfull + * + * This function is often called repetitively to solve problems of identical + * dimensions. To avoid repetitive malloc's and free's, allocated memory is + * retained between calls and free'd-malloc'ed when not of the appropriate size. + * A call with NULL as the first argument forces this memory to be released. + */ +int sba_Axb_BK(double *A, double *B, double *x, int m, int iscolmaj) { + static double *buf = NULL; + static int buf_sz = 0, nb = 0; + + int a_sz, ipiv_sz, work_sz, tot_sz; + register int i, j; + int info, *ipiv, nrhs = 1; + double *a, *work; + + if (A == NULL) { + if (buf) + free(buf); + buf = NULL; + buf_sz = 0; + + return 1; + } + + /* calculate required memory size */ + ipiv_sz = m; + a_sz = (iscolmaj) ? 0 : m * m; + if (!nb) { +#ifndef SBA_LS_SCARCE_MEMORY + double tmp; + + work_sz = -1; // workspace query; optimal size is returned in tmp + F77_FUNC(dsytrf)("U", (int *)&m, NULL, (int *)&m, NULL, (double *)&tmp, (int *)&work_sz, (int *)&info); + nb = ((int)tmp) / m; // optimal worksize is m*nb +#else + nb = -1; // min worksize is 1 +#endif /* SBA_LS_SCARCE_MEMORY */ + } + work_sz = (nb != -1) ? nb * m : 1; + tot_sz = (a_sz + work_sz) * sizeof(double) + + ipiv_sz * sizeof(int); /* should be arranged in that order for proper doubles alignment */ + + if (tot_sz > buf_sz) { /* insufficient memory, allocate a "big" memory chunk at once */ + if (buf) + free(buf); /* free previously allocated memory */ + + buf_sz = tot_sz; + buf = (double *)malloc(buf_sz); + if (!buf) { + fprintf(stderr, "memory allocation in sba_Axb_BK() failed!\n"); + exit(1); + } + } + + if (!iscolmaj) { + a = buf; + work = a + a_sz; + + /* store A into a and B into x; A is assumed to be symmetric, hence + * the column and row major order representations are the same + */ + for (i = 0; i < m; ++i) { + a[i] = A[i]; + x[i] = B[i]; + } + for (j = m * m; i < j; ++i) // copy remaining rows; note that i is not re-initialized + a[i] = A[i]; + } else { /* no copying is necessary for A */ + a = A; + for (i = 0; i < m; ++i) + x[i] = B[i]; + work = buf; + } + ipiv = (int *)(work + work_sz); + + /* factorization for A */ + F77_FUNC(dsytrf)("U", (int *)&m, a, (int *)&m, ipiv, work, (int *)&work_sz, (int *)&info); + if (info != 0) { + if (info < 0) { + fprintf(stderr, "argument %d of dsytrf illegal in sba_Axb_BK()\n", -info); + exit(1); + } else { + fprintf(stderr, "singular block diagonal matrix D for dsytrf in sba_Axb_BK() [D(%d, %d) is zero]\n", info, + info); + return 0; + } + } + + /* solve the system with the computed factorization */ + F77_FUNC(dsytrs)("U", (int *)&m, (int *)&nrhs, a, (int *)&m, ipiv, x, (int *)&m, (int *)&info); + if (info != 0) { + if (info < 0) { + fprintf(stderr, "argument %d of dsytrs illegal in sba_Axb_BK()\n", -info); + exit(1); + } else { + fprintf(stderr, "unknown error for dsytrs in sba_Axb_BK()\n"); + return 0; + } + } + + return 1; +} + +/* + * This function computes the inverse of a square matrix whose upper triangle + * is stored in A into its lower triangle using LU decomposition + * + * The function returns 0 in case of error (e.g. A is singular), + * 1 if successfull + * + * This function is often called repetitively to solve problems of identical + * dimensions. To avoid repetitive malloc's and free's, allocated memory is + * retained between calls and free'd-malloc'ed when not of the appropriate size. + * A call with NULL as the first argument forces this memory to be released. + */ +int sba_symat_invert_LU(double *A, int m) { + static double *buf = NULL; + static int buf_sz = 0, nb = 0; + + int a_sz, ipiv_sz, work_sz, tot_sz; + register int i, j; + int info, *ipiv; + double *a, *work; + + if (A == NULL) { + if (buf) + free(buf); + buf = NULL; + buf_sz = 0; + + return 1; + } + + /* calculate required memory size */ + ipiv_sz = m; + a_sz = m * m; + if (!nb) { +#ifndef SBA_LS_SCARCE_MEMORY + double tmp; + + work_sz = -1; // workspace query; optimal size is returned in tmp + F77_FUNC(dgetri)((int *)&m, NULL, (int *)&m, NULL, (double *)&tmp, (int *)&work_sz, (int *)&info); + nb = ((int)tmp) / m; // optimal worksize is m*nb +#else + nb = 1; // min worksize is m +#endif /* SBA_LS_SCARCE_MEMORY */ + } + work_sz = nb * m; + tot_sz = (a_sz + work_sz) * sizeof(double) + + ipiv_sz * sizeof(int); /* should be arranged in that order for proper doubles alignment */ + + if (tot_sz > buf_sz) { /* insufficient memory, allocate a "big" memory chunk at once */ + if (buf) + free(buf); /* free previously allocated memory */ + + buf_sz = tot_sz; + buf = (double *)malloc(buf_sz); + if (!buf) { + fprintf(stderr, "memory allocation in sba_symat_invert_LU() failed!\n"); + exit(1); + } + } + + a = buf; + work = a + a_sz; + ipiv = (int *)(work + work_sz); + + /* store A (column major!) into a */ + for (i = 0; i < m; ++i) + for (j = i; j < m; ++j) + a[i + j * m] = a[j + i * m] = A[i * m + j]; // copy A's upper part to a's upper & lower + + /* LU decomposition for A */ + F77_FUNC(dgetrf)((int *)&m, (int *)&m, a, (int *)&m, ipiv, (int *)&info); + if (info != 0) { + if (info < 0) { + fprintf(stderr, "argument %d of dgetrf illegal in sba_symat_invert_LU()\n", -info); + exit(1); + } else { + fprintf(stderr, "singular matrix A for dgetrf in sba_symat_invert_LU()\n"); + return 0; + } + } + + /* (A)^{-1} from LU */ + F77_FUNC(dgetri)((int *)&m, a, (int *)&m, ipiv, work, (int *)&work_sz, (int *)&info); + if (info != 0) { + if (info < 0) { + fprintf(stderr, "argument %d of dgetri illegal in sba_symat_invert_LU()\n", -info); + exit(1); + } else { + fprintf(stderr, "singular matrix A for dgetri in sba_symat_invert_LU()\n"); + return 0; + } + } + + /* store (A)^{-1} in A's lower triangle */ + for (i = 0; i < m; ++i) + for (j = 0; j <= i; ++j) + A[i * m + j] = a[i + j * m]; + + return 1; +} + +/* + * This function computes the inverse of a square symmetric positive definite + * matrix whose upper triangle is stored in A into its lower triangle using + * Cholesky factorization + * + * The function returns 0 in case of error (e.g. A is not positive definite or singular), + * 1 if successfull + * + * This function is often called repetitively to solve problems of identical + * dimensions. To avoid repetitive malloc's and free's, allocated memory is + * retained between calls and free'd-malloc'ed when not of the appropriate size. + * A call with NULL as the first argument forces this memory to be released. + */ +int sba_symat_invert_Chol(double *A, int m) { + static double *buf = NULL; + static int buf_sz = 0; + + int a_sz, tot_sz; + register int i, j; + int info; + double *a; + + if (A == NULL) { + if (buf) + free(buf); + buf = NULL; + buf_sz = 0; + + return 1; + } + + /* calculate required memory size */ + a_sz = m * m; + tot_sz = a_sz; + + if (tot_sz > buf_sz) { /* insufficient memory, allocate a "big" memory chunk at once */ + if (buf) + free(buf); /* free previously allocated memory */ + + buf_sz = tot_sz; + buf = (double *)malloc(buf_sz * sizeof(double)); + if (!buf) { + fprintf(stderr, "memory allocation in sba_symat_invert_Chol() failed!\n"); + exit(1); + } + } + + a = (double *)buf; + + /* store A into a; A is assumed symmetric, hence no transposition is needed */ + for (i = 0, j = a_sz; i < j; ++i) + a[i] = A[i]; + + /* Cholesky factorization for A; a's lower part corresponds to A's upper */ + // F77_FUNC(dpotrf)("L", (int *)&m, a, (int *)&m, (int *)&info); + F77_FUNC(dpotf2)("L", (int *)&m, a, (int *)&m, (int *)&info); + /* error treatment */ + if (info != 0) { + if (info < 0) { + fprintf(stderr, "LAPACK error: illegal value for argument %d of dpotrf in sba_symat_invert_Chol()\n", + -info); + exit(1); + } else { + fprintf(stderr, "LAPACK error: the leading minor of order %d is not positive definite,\nthe factorization " + "could not be completed for dpotrf in sba_symat_invert_Chol()\n", + info); + return 0; + } + } + + /* (A)^{-1} from Cholesky */ + F77_FUNC(dpotri)("L", (int *)&m, a, (int *)&m, (int *)&info); + if (info != 0) { + if (info < 0) { + fprintf(stderr, "argument %d of dpotri illegal in sba_symat_invert_Chol()\n", -info); + exit(1); + } else { + fprintf(stderr, "the (%d, %d) element of the factor U or L is zero, singular matrix A for dpotri in " + "sba_symat_invert_Chol()\n", + info, info); + return 0; + } + } + + /* store (A)^{-1} in A's lower triangle. The lower triangle of the symmetric A^{-1} is in the lower triangle of a */ + for (i = 0; i < m; ++i) + for (j = 0; j <= i; ++j) + A[i * m + j] = a[i + j * m]; + + return 1; +} + +/* + * This function computes the inverse of a symmetric indefinite + * matrix whose upper triangle is stored in A into its lower triangle + * using LDLT factorization with the pivoting strategy of Bunch and Kaufman + * + * The function returns 0 in case of error (e.g. A is singular), + * 1 if successfull + * + * This function is often called repetitively to solve problems of identical + * dimensions. To avoid repetitive malloc's and free's, allocated memory is + * retained between calls and free'd-malloc'ed when not of the appropriate size. + * A call with NULL as the first argument forces this memory to be released. + */ +int sba_symat_invert_BK(double *A, int m) { + static double *buf = NULL; + static int buf_sz = 0, nb = 0; + + int a_sz, ipiv_sz, work_sz, tot_sz; + register int i, j; + int info, *ipiv; + double *a, *work; + + if (A == NULL) { + if (buf) + free(buf); + buf = NULL; + buf_sz = 0; + + return 1; + } + + /* calculate required memory size */ + ipiv_sz = m; + a_sz = m * m; + if (!nb) { +#ifndef SBA_LS_SCARCE_MEMORY + double tmp; + + work_sz = -1; // workspace query; optimal size is returned in tmp + F77_FUNC(dsytrf)("L", (int *)&m, NULL, (int *)&m, NULL, (double *)&tmp, (int *)&work_sz, (int *)&info); + nb = ((int)tmp) / m; // optimal worksize is m*nb +#else + nb = -1; // min worksize is 1 +#endif /* SBA_LS_SCARCE_MEMORY */ + } + work_sz = (nb != -1) ? nb * m : 1; + work_sz = (work_sz >= m) ? work_sz : m; /* ensure that work is at least m elements long, as required by dsytri */ + tot_sz = (a_sz + work_sz) * sizeof(double) + + ipiv_sz * sizeof(int); /* should be arranged in that order for proper doubles alignment */ + + if (tot_sz > buf_sz) { /* insufficient memory, allocate a "big" memory chunk at once */ + if (buf) + free(buf); /* free previously allocated memory */ + + buf_sz = tot_sz; + buf = (double *)malloc(buf_sz); + if (!buf) { + fprintf(stderr, "memory allocation in sba_symat_invert_BK() failed!\n"); + exit(1); + } + } + + a = buf; + work = a + a_sz; + ipiv = (int *)(work + work_sz); + + /* store A into a; A is assumed symmetric, hence no transposition is needed */ + for (i = 0, j = a_sz; i < j; ++i) + a[i] = A[i]; + + /* LDLT factorization for A; a's lower part corresponds to A's upper */ + F77_FUNC(dsytrf)("L", (int *)&m, a, (int *)&m, ipiv, work, (int *)&work_sz, (int *)&info); + if (info != 0) { + if (info < 0) { + fprintf(stderr, "argument %d of dsytrf illegal in sba_symat_invert_BK()\n", -info); + exit(1); + } else { + fprintf(stderr, + "singular block diagonal matrix D for dsytrf in sba_symat_invert_BK() [D(%d, %d) is zero]\n", info, + info); + return 0; + } + } + + /* (A)^{-1} from LDLT */ + F77_FUNC(dsytri)("L", (int *)&m, a, (int *)&m, ipiv, work, (int *)&info); + if (info != 0) { + if (info < 0) { + fprintf(stderr, "argument %d of dsytri illegal in sba_symat_invert_BK()\n", -info); + exit(1); + } else { + fprintf(stderr, + "D(%d, %d)=0, matrix is singular and its inverse could not be computed in sba_symat_invert_BK()\n", + info, info); + return 0; + } + } + + /* store (A)^{-1} in A's lower triangle. The lower triangle of the symmetric A^{-1} is in the lower triangle of a */ + for (i = 0; i < m; ++i) + for (j = 0; j <= i; ++j) + A[i * m + j] = a[i + j * m]; + + return 1; +} + +#define __CG_LINALG_BLOCKSIZE 8 + +/* Dot product of two vectors x and y using loop unrolling and blocking. + * see http://www.abarnett.demon.co.uk/tutorial.html + */ + +inline static double dprod(const int n, const double *const x, const double *const y) { + register int i, j1, j2, j3, j4, j5, j6, j7; + int blockn; + register double sum0 = 0.0, sum1 = 0.0, sum2 = 0.0, sum3 = 0.0, sum4 = 0.0, sum5 = 0.0, sum6 = 0.0, sum7 = 0.0; + + /* n may not be divisible by __CG_LINALG_BLOCKSIZE, + * go as near as we can first, then tidy up. + */ + blockn = (n / __CG_LINALG_BLOCKSIZE) * __CG_LINALG_BLOCKSIZE; + + /* unroll the loop in blocks of `__CG_LINALG_BLOCKSIZE' */ + for (i = 0; i < blockn; i += __CG_LINALG_BLOCKSIZE) { + sum0 += x[i] * y[i]; + j1 = i + 1; + sum1 += x[j1] * y[j1]; + j2 = i + 2; + sum2 += x[j2] * y[j2]; + j3 = i + 3; + sum3 += x[j3] * y[j3]; + j4 = i + 4; + sum4 += x[j4] * y[j4]; + j5 = i + 5; + sum5 += x[j5] * y[j5]; + j6 = i + 6; + sum6 += x[j6] * y[j6]; + j7 = i + 7; + sum7 += x[j7] * y[j7]; + } + + /* + * There may be some left to do. + * This could be done as a simple for() loop, + * but a switch is faster (and more interesting) + */ + + if (i < n) { + /* Jump into the case at the place that will allow + * us to finish off the appropriate number of items. + */ + + switch (n - i) { + case 7: + sum0 += x[i] * y[i]; + ++i; + case 6: + sum1 += x[i] * y[i]; + ++i; + case 5: + sum2 += x[i] * y[i]; + ++i; + case 4: + sum3 += x[i] * y[i]; + ++i; + case 3: + sum4 += x[i] * y[i]; + ++i; + case 2: + sum5 += x[i] * y[i]; + ++i; + case 1: + sum6 += x[i] * y[i]; + ++i; + } + } + + return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7; +} + +/* Compute z=x+a*y for two vectors x and y and a scalar a; z can be one of x, y. + * Similarly to the dot product routine, this one uses loop unrolling and blocking + */ + +inline static void daxpy(const int n, double *const z, const double *const x, const double a, const double *const y) { + register int i, j1, j2, j3, j4, j5, j6, j7; + int blockn; + + /* n may not be divisible by __CG_LINALG_BLOCKSIZE, + * go as near as we can first, then tidy up. + */ + blockn = (n / __CG_LINALG_BLOCKSIZE) * __CG_LINALG_BLOCKSIZE; + + /* unroll the loop in blocks of `__CG_LINALG_BLOCKSIZE' */ + for (i = 0; i < blockn; i += __CG_LINALG_BLOCKSIZE) { + z[i] = x[i] + a * y[i]; + j1 = i + 1; + z[j1] = x[j1] + a * y[j1]; + j2 = i + 2; + z[j2] = x[j2] + a * y[j2]; + j3 = i + 3; + z[j3] = x[j3] + a * y[j3]; + j4 = i + 4; + z[j4] = x[j4] + a * y[j4]; + j5 = i + 5; + z[j5] = x[j5] + a * y[j5]; + j6 = i + 6; + z[j6] = x[j6] + a * y[j6]; + j7 = i + 7; + z[j7] = x[j7] + a * y[j7]; + } + + /* + * There may be some left to do. + * This could be done as a simple for() loop, + * but a switch is faster (and more interesting) + */ + + if (i < n) { + /* Jump into the case at the place that will allow + * us to finish off the appropriate number of items. + */ + + switch (n - i) { + case 7: + z[i] = x[i] + a * y[i]; + ++i; + case 6: + z[i] = x[i] + a * y[i]; + ++i; + case 5: + z[i] = x[i] + a * y[i]; + ++i; + case 4: + z[i] = x[i] + a * y[i]; + ++i; + case 3: + z[i] = x[i] + a * y[i]; + ++i; + case 2: + z[i] = x[i] + a * y[i]; + ++i; + case 1: + z[i] = x[i] + a * y[i]; + ++i; + } + } +} + +/* + * This function returns the solution of Ax = b where A is posititive definite, + * based on the conjugate gradients method; see "An intro to the CG method" by J.R. Shewchuk, p. 50-51 + * + * A is mxm, b, x are is mx1. Argument niter specifies the maximum number of + * iterations and eps is the desired solution accuracy. niter<0 signals that + * x contains a valid initial approximation to the solution; if niter>0 then + * the starting point is taken to be zero. Argument prec selects the desired + * preconditioning method as follows: + * 0: no preconditioning + * 1: jacobi (diagonal) preconditioning + * 2: SSOR preconditioning + * Argument iscolmaj specifies whether A is stored in column or row major order. + * + * The function returns 0 in case of error, + * the number of iterations performed if successfull + * + * This function is often called repetitively to solve problems of identical + * dimensions. To avoid repetitive malloc's and free's, allocated memory is + * retained between calls and free'd-malloc'ed when not of the appropriate size. + * A call with NULL as the first argument forces this memory to be released. + */ +int sba_Axb_CG(double *A, double *B, double *x, int m, int niter, double eps, int prec, int iscolmaj) { + static double *buf = NULL; + static int buf_sz = 0; + + register int i, j; + register double *aim; + int iter, a_sz, res_sz, d_sz, q_sz, s_sz, wk_sz, z_sz, tot_sz; + double *a, *res, *d, *q, *s, *wk, *z; + double delta0, deltaold, deltanew, alpha, beta, eps_sq = eps * eps; + register double sum; + int rec_res; + + if (A == NULL) { + if (buf) + free(buf); + buf = NULL; + buf_sz = 0; + + return 1; + } + + /* calculate required memory size */ + a_sz = (iscolmaj) ? m * m : 0; + res_sz = m; + d_sz = m; + q_sz = m; + if (prec != SBA_CG_NOPREC) { + s_sz = m; + wk_sz = m; + z_sz = (prec == SBA_CG_SSOR) ? m : 0; + } else + s_sz = wk_sz = z_sz = 0; + + tot_sz = a_sz + res_sz + d_sz + q_sz + s_sz + wk_sz + z_sz; + + if (tot_sz > buf_sz) { /* insufficient memory, allocate a "big" memory chunk at once */ + if (buf) + free(buf); /* free previously allocated memory */ + + buf_sz = tot_sz; + buf = (double *)malloc(buf_sz * sizeof(double)); + if (!buf) { + fprintf(stderr, "memory allocation request failed in sba_Axb_CG()\n"); + exit(1); + } + } + + if (iscolmaj) { + a = buf; + /* store A (row major!) into a */ + for (i = 0; i < m; ++i) + for (j = 0, aim = a + i * m; j < m; ++j) + aim[j] = A[i + j * m]; + } else + a = A; /* no copying required */ + + res = buf + a_sz; + d = res + res_sz; + q = d + d_sz; + if (prec != SBA_CG_NOPREC) { + s = q + q_sz; + wk = s + s_sz; + z = (prec == SBA_CG_SSOR) ? wk + wk_sz : NULL; + + for (i = 0; i < m; ++i) { // compute jacobi (i.e. diagonal) preconditioners and save them in wk + sum = a[i * m + i]; + if (sum > DBL_EPSILON || -sum < -DBL_EPSILON) // != 0.0 + wk[i] = 1.0 / sum; + else + wk[i] = 1.0 / DBL_EPSILON; + } + } else { + s = res; + wk = z = NULL; + } + + if (niter > 0) { + for (i = 0; i < m; ++i) { // clear solution and initialize residual vector: res <-- B + x[i] = 0.0; + res[i] = B[i]; + } + } else { + niter = -niter; + + for (i = 0; i < m; ++i) { // initialize residual vector: res <-- B - A*x + for (j = 0, aim = a + i * m, sum = 0.0; j < m; ++j) + sum += aim[j] * x[j]; + res[i] = B[i] - sum; + } + } + + switch (prec) { + case SBA_CG_NOPREC: + for (i = 0, deltanew = 0.0; i < m; ++i) { + d[i] = res[i]; + deltanew += res[i] * res[i]; + } + break; + case SBA_CG_JACOBI: // jacobi preconditioning + for (i = 0, deltanew = 0.0; i < m; ++i) { + d[i] = res[i] * wk[i]; + deltanew += res[i] * d[i]; + } + break; + case SBA_CG_SSOR: // SSOR preconditioning; see the "templates" book, fig. 3.2, p. 44 + for (i = 0; i < m; ++i) { + for (j = 0, sum = 0.0, aim = a + i * m; j < i; ++j) + sum += aim[j] * z[j]; + z[i] = wk[i] * (res[i] - sum); + } + + for (i = m - 1; i >= 0; --i) { + for (j = i + 1, sum = 0.0, aim = a + i * m; j < m; ++j) + sum += aim[j] * d[j]; + d[i] = z[i] - wk[i] * sum; + } + deltanew = dprod(m, res, d); + break; + default: + fprintf(stderr, "unknown preconditioning option %d in sba_Axb_CG\n", prec); + exit(1); + } + + delta0 = deltanew; + + for (iter = 1; deltanew > eps_sq * delta0 && iter <= niter; ++iter) { + for (i = 0; i < m; ++i) { // q <-- A d + aim = a + i * m; + /*** + for(j=0, sum=0.0; j<m; ++j) + sum+=aim[j]*d[j]; + ***/ + q[i] = dprod(m, aim, d); // sum; + } + + /*** + for(i=0, sum=0.0; i<m; ++i) + sum+=d[i]*q[i]; + ***/ + alpha = deltanew / dprod(m, d, q); // deltanew/sum; + + /*** + for(i=0; i<m; ++i) + x[i]+=alpha*d[i]; + ***/ + daxpy(m, x, x, alpha, d); + + if (!(iter % 50)) { + for (i = 0; i < m; ++i) { // accurate computation of the residual vector + aim = a + i * m; + /*** + for(j=0, sum=0.0; j<m; ++j) + sum+=aim[j]*x[j]; + ***/ + res[i] = B[i] - dprod(m, aim, x); // B[i]-sum; + } + rec_res = 0; + } else { + /*** + for(i=0; i<m; ++i) // approximate computation of the residual vector + res[i]-=alpha*q[i]; + ***/ + daxpy(m, res, res, -alpha, q); + rec_res = 1; + } + + if (prec) { + switch (prec) { + case SBA_CG_JACOBI: // jacobi + for (i = 0; i < m; ++i) + s[i] = res[i] * wk[i]; + break; + case SBA_CG_SSOR: // SSOR + for (i = 0; i < m; ++i) { + for (j = 0, sum = 0.0, aim = a + i * m; j < i; ++j) + sum += aim[j] * z[j]; + z[i] = wk[i] * (res[i] - sum); + } + + for (i = m - 1; i >= 0; --i) { + for (j = i + 1, sum = 0.0, aim = a + i * m; j < m; ++j) + sum += aim[j] * s[j]; + s[i] = z[i] - wk[i] * sum; + } + break; + } + } + + deltaold = deltanew; + /*** + for(i=0, sum=0.0; i<m; ++i) + sum+=res[i]*s[i]; + ***/ + deltanew = dprod(m, res, s); // sum; + + /* make sure that we get around small delta that are due to + * accumulated floating point roundoff errors + */ + if (rec_res && deltanew <= eps_sq * delta0) { + /* analytically recompute delta */ + for (i = 0; i < m; ++i) { + for (j = 0, aim = a + i * m, sum = 0.0; j < m; ++j) + sum += aim[j] * x[j]; + res[i] = B[i] - sum; + } + deltanew = dprod(m, res, s); + } + + beta = deltanew / deltaold; + + /*** + for(i=0; i<m; ++i) + d[i]=s[i]+beta*d[i]; + ***/ + daxpy(m, d, s, beta, d); + } + + return iter; +} + +/* + * This function computes the Cholesky decomposition of the inverse of a symmetric + * (covariance) matrix A into B, i.e. B is s.t. A^-1=B^t*B and B upper triangular. + * A and B can coincide + * + * The function returns 0 in case of error (e.g. A is singular), + * 1 if successfull + * + * This function is often called repetitively to operate on matrices of identical + * dimensions. To avoid repetitive malloc's and free's, allocated memory is + * retained between calls and free'd-malloc'ed when not of the appropriate size. + * A call with NULL as the first argument forces this memory to be released. + * + */ +#if 0 +int sba_mat_cholinv(double *A, double *B, int m) +{ +static double *buf=NULL; +static int buf_sz=0, nb=0; + +int a_sz, ipiv_sz, work_sz, tot_sz; +register int i, j; +int info, *ipiv; +double *a, *work; + + if(A==NULL){ + if(buf) free(buf); + buf=NULL; + buf_sz=0; + + return 1; + } + + /* calculate the required memory size */ + ipiv_sz=m; + a_sz=m*m; + if(!nb){ +#ifndef SBA_LS_SCARCE_MEMORY + double tmp; + + work_sz=-1; // workspace query; optimal size is returned in tmp + F77_FUNC(dgetri)((int *)&m, NULL, (int *)&m, NULL, (double *)&tmp, (int *)&work_sz, (int *)&info); + nb=((int)tmp)/m; // optimal worksize is m*nb +#else + nb=1; // min worksize is m +#endif /* SBA_LS_SCARCE_MEMORY */ + } + work_sz=nb*m; + tot_sz=(a_sz + work_sz)*sizeof(double) + ipiv_sz*sizeof(int); /* should be arranged in that order for proper doubles alignment */ + + if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ + if(buf) free(buf); /* free previously allocated memory */ + + buf_sz=tot_sz; + buf=(double *)malloc(buf_sz); + if(!buf){ + fprintf(stderr, "memory allocation in sba_mat_cholinv() failed!\n"); + exit(1); + } + } + + a=buf; + work=a+a_sz; + ipiv=(int *)(work+work_sz); + + /* step 1: invert A */ + /* store A into a; A is assumed symmetric, hence no transposition is needed */ + for(i=0; i<m*m; ++i) + a[i]=A[i]; + + /* LU decomposition for A (Cholesky should also do) */ + F77_FUNC(dgetf2)((int *)&m, (int *)&m, a, (int *)&m, ipiv, (int *)&info); + //F77_FUNC(dgetrf)((int *)&m, (int *)&m, a, (int *)&m, ipiv, (int *)&info); + if(info!=0){ + if(info<0){ + fprintf(stderr, "argument %d of dgetf2/dgetrf illegal in sba_mat_cholinv()\n", -info); + exit(1); + } + else{ + fprintf(stderr, "singular matrix A for dgetf2/dgetrf in sba_mat_cholinv()\n"); + return 0; + } + } + + /* (A)^{-1} from LU */ + F77_FUNC(dgetri)((int *)&m, a, (int *)&m, ipiv, work, (int *)&work_sz, (int *)&info); + if(info!=0){ + if(info<0){ + fprintf(stderr, "argument %d of dgetri illegal in sba_mat_cholinv()\n", -info); + exit(1); + } + else{ + fprintf(stderr, "singular matrix A for dgetri in sba_mat_cholinv()\n"); + return 0; + } + } + + /* (A)^{-1} is now in a (in column major!) */ + + /* step 2: Cholesky decomposition of a: A^-1=B^t B, B upper triangular */ + F77_FUNC(dpotf2)("U", (int *)&m, a, (int *)&m, (int *)&info); + /* error treatment */ + if(info!=0){ + if(info<0){ + fprintf(stderr, "LAPACK error: illegal value for argument %d of dpotf2 in sba_mat_cholinv()\n", -info); + exit(1); + } + else{ + fprintf(stderr, "LAPACK error: the leading minor of order %d is not positive definite,\n%s\n", info, + "and the Cholesky factorization could not be completed in sba_mat_cholinv()"); + return 0; + } + } + + /* the decomposition is in the upper part of a (in column-major order!). + * copying it to the lower part and zeroing the upper transposes + * a in row-major order + */ + for(i=0; i<m; ++i) + for(j=0; j<i; ++j){ + a[i+j*m]=a[j+i*m]; + a[j+i*m]=0.0; + } + for(i=0; i<m*m; ++i) + B[i]=a[i]; + + return 1; +} +#endif + +int sba_mat_cholinv(double *A, double *B, int m) { + int a_sz; + register int i, j; + int info; + double *a; + + if (A == NULL) { + return 1; + } + + a_sz = m * m; + a = B; /* use B as working memory, result is produced in it */ + + /* step 1: invert A */ + /* store A into a; A is assumed symmetric, hence no transposition is needed */ + for (i = 0; i < a_sz; ++i) + a[i] = A[i]; + + /* Cholesky decomposition for A */ + F77_FUNC(dpotf2)("U", (int *)&m, a, (int *)&m, (int *)&info); + if (info != 0) { + if (info < 0) { + fprintf(stderr, "argument %d of dpotf2 illegal in sba_mat_cholinv()\n", -info); + exit(1); + } else { + fprintf(stderr, "LAPACK error: the leading minor of order %d is not positive definite,\n%s\n", info, + "and the Cholesky factorization could not be completed in sba_mat_cholinv()"); + return 0; + } + } + + /* (A)^{-1} from Cholesky */ + F77_FUNC(dpotri)("U", (int *)&m, a, (int *)&m, (int *)&info); + if (info != 0) { + if (info < 0) { + fprintf(stderr, "argument %d of dpotri illegal in sba_mat_cholinv()\n", -info); + exit(1); + } else { + fprintf(stderr, "the (%d, %d) element of the factor U or L is zero, singular matrix A for dpotri in " + "sba_mat_cholinv()\n", + info, info); + return 0; + } + } + + /* (A)^{-1} is now in a (in column major!) */ + + /* step 2: Cholesky decomposition of a: A^-1=B^t B, B upper triangular */ + F77_FUNC(dpotf2)("U", (int *)&m, a, (int *)&m, (int *)&info); + /* error treatment */ + if (info != 0) { + if (info < 0) { + fprintf(stderr, "LAPACK error: illegal value for argument %d of dpotf2 in sba_mat_cholinv()\n", -info); + exit(1); + } else { + fprintf(stderr, "LAPACK error: the leading minor of order %d is not positive definite,\n%s\n", info, + "and the Cholesky factorization could not be completed in sba_mat_cholinv()"); + return 0; + } + } + + /* the decomposition is in the upper part of a (in column-major order!). + * copying it to the lower part and zeroing the upper transposes + * a in row-major order + */ + for (i = 0; i < m; ++i) + for (j = 0; j < i; ++j) { + a[i + j * m] = a[j + i * m]; + a[j + i * m] = 0.0; + } + + return 1; +} diff --git a/redist/sba/sba_levmar.c b/redist/sba/sba_levmar.c new file mode 100644 index 0000000..2f85f2a --- /dev/null +++ b/redist/sba/sba_levmar.c @@ -0,0 +1,2715 @@ +///////////////////////////////////////////////////////////////////////////////// +//// +//// Expert drivers for sparse bundle adjustment based on the +//// Levenberg - Marquardt minimization algorithm +//// Copyright (C) 2004-2009 Manolis Lourakis (lourakis at ics forth gr) +//// Institute of Computer Science, Foundation for Research & Technology - Hellas +//// Heraklion, Crete, Greece. +//// +//// This program is free software; you can redistribute it and/or modify +//// it under the terms of the GNU General Public License as published by +//// the Free Software Foundation; either version 2 of the License, or +//// (at your option) any later version. +//// +//// This program is distributed in the hope that it will be useful, +//// but WITHOUT ANY WARRANTY; without even the implied warranty of +//// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//// GNU General Public License for more details. +//// +/////////////////////////////////////////////////////////////////////////////////// + +#include <float.h> +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "compiler.h" +#include "sba.h" +#include "sba_chkjac.h" + +#define SBA_EPSILON 1E-12 +#define SBA_EPSILON_SQ ((SBA_EPSILON) * (SBA_EPSILON)) + +#define SBA_ONE_THIRD 0.3333333334 /* 1.0/3.0 */ + +#define emalloc(sz) emalloc_(__FILE__, __LINE__, sz) + +#define FABS(x) (((x) >= 0) ? (x) : -(x)) + +#define ROW_MAJOR 0 +#define COLUMN_MAJOR 1 +#define MAT_STORAGE COLUMN_MAJOR + +/* contains information necessary for computing a finite difference approximation to a jacobian, + * e.g. function to differentiate, problem dimensions and pointers to working memory buffers + */ +struct fdj_data_x_ { + void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, + void *adata); /* function to differentiate */ + int cnp, pnp, mnp; /* parameter numbers */ + int *func_rcidxs, *func_rcsubs; /* working memory for func invocations. + * Notice that this has to be different + * than the working memory used for + * evaluating the jacobian! + */ + double *hx, *hxx; /* memory to save results in */ + void *adata; +}; + +/* auxiliary memory allocation routine with error checking */ +inline static void *emalloc_(char *file, int line, size_t sz) { + void *ptr; + + ptr = (void *)malloc(sz); + if (ptr == NULL) { + fprintf(stderr, "SBA: memory allocation request for %zu bytes failed in file %s, line %d, exiting", sz, file, + line); + exit(1); + } + + return ptr; +} + +/* auxiliary routine for clearing an array of doubles */ +inline static void _dblzero(register double *arr, register int count) { + while (--count >= 0) + *arr++ = 0.0; +} + +/* auxiliary routine for computing the mean reprojection error; used for debugging */ +static double sba_mean_repr_error(int n, int mnp, double *x, double *hx, struct sba_crsm *idxij, int *rcidxs, + int *rcsubs) { + register int i, j; + int nnz, nprojs; + double *ptr1, *ptr2; + double err; + + for (i = nprojs = 0, err = 0.0; i < n; ++i) { + nnz = sba_crsm_row_elmidxs(idxij, i, rcidxs, rcsubs); /* find nonzero x_ij, j=0...m-1 */ + nprojs += nnz; + for (j = 0; j < nnz; ++j) { /* point i projecting on camera rcsubs[j] */ + ptr1 = x + idxij->val[rcidxs[j]] * mnp; + ptr2 = hx + idxij->val[rcidxs[j]] * mnp; + + err += sqrt((ptr1[0] - ptr2[0]) * (ptr1[0] - ptr2[0]) + (ptr1[1] - ptr2[1]) * (ptr1[1] - ptr2[1])); + } + } + + return err / ((double)(nprojs)); +} + +/* print the solution in p using sba's text format. If cnp/pnp==0 only points/cameras are printed */ +static void sba_print_sol(int n, int m, double *p, int cnp, int pnp, double *x, int mnp, struct sba_crsm *idxij, + int *rcidxs, int *rcsubs) { + register int i, j, ii; + int nnz; + double *ptr; + + if (cnp) { + /* print camera parameters */ + for (j = 0; j < m; ++j) { + ptr = p + cnp * j; + for (ii = 0; ii < cnp; ++ii) + printf("%g ", ptr[ii]); + printf("\n"); + } + } + + if (pnp) { + /* 3D & 2D point parameters */ + printf("\n\n\n# X Y Z nframes frame0 x0 y0 frame1 x1 y1 ...\n"); + for (i = 0; i < n; ++i) { + ptr = p + cnp * m + i * pnp; + for (ii = 0; ii < pnp; ++ii) // print 3D coordinates + printf("%g ", ptr[ii]); + + nnz = sba_crsm_row_elmidxs(idxij, i, rcidxs, rcsubs); /* find nonzero x_ij, j=0...m-1 */ + printf("%d ", nnz); + + for (j = 0; j < nnz; ++j) { /* point i projecting on camera rcsubs[j] */ + ptr = x + idxij->val[rcidxs[j]] * mnp; + + printf("%d ", rcsubs[j]); + for (ii = 0; ii < mnp; ++ii) // print 2D coordinates + printf("%g ", ptr[ii]); + } + printf("\n"); + } + } +} + +/* Compute e=x-y for two n-vectors x and y and return the squared L2 norm of e. + * e can coincide with either x or y. + * Uses loop unrolling and blocking to reduce bookkeeping overhead & pipeline + * stalls and increase instruction-level parallelism; see http://www.abarnett.demon.co.uk/tutorial.html + */ +static double nrmL2xmy(double *const e, const double *const x, const double *const y, const int n) { + const int blocksize = 8, bpwr = 3; /* 8=2^3 */ + register int i; + int j1, j2, j3, j4, j5, j6, j7; + int blockn; + register double sum0 = 0.0, sum1 = 0.0, sum2 = 0.0, sum3 = 0.0; + + /* n may not be divisible by blocksize, + * go as near as we can first, then tidy up. + */ + blockn = (n >> bpwr) << bpwr; /* (n / blocksize) * blocksize; */ + + /* unroll the loop in blocks of `blocksize'; looping downwards gains some more speed */ + for (i = blockn - 1; i > 0; i -= blocksize) { + e[i] = x[i] - y[i]; + sum0 += e[i] * e[i]; + j1 = i - 1; + e[j1] = x[j1] - y[j1]; + sum1 += e[j1] * e[j1]; + j2 = i - 2; + e[j2] = x[j2] - y[j2]; + sum2 += e[j2] * e[j2]; + j3 = i - 3; + e[j3] = x[j3] - y[j3]; + sum3 += e[j3] * e[j3]; + j4 = i - 4; + e[j4] = x[j4] - y[j4]; + sum0 += e[j4] * e[j4]; + j5 = i - 5; + e[j5] = x[j5] - y[j5]; + sum1 += e[j5] * e[j5]; + j6 = i - 6; + e[j6] = x[j6] - y[j6]; + sum2 += e[j6] * e[j6]; + j7 = i - 7; + e[j7] = x[j7] - y[j7]; + sum3 += e[j7] * e[j7]; + } + + /* + * There may be some left to do. + * This could be done as a simple for() loop, + * but a switch is faster (and more interesting) + */ + + i = blockn; + if (i < n) { + /* Jump into the case at the place that will allow + * us to finish off the appropriate number of items. + */ + switch (n - i) { + case 7: + e[i] = x[i] - y[i]; + sum0 += e[i] * e[i]; + ++i; + case 6: + e[i] = x[i] - y[i]; + sum0 += e[i] * e[i]; + ++i; + case 5: + e[i] = x[i] - y[i]; + sum0 += e[i] * e[i]; + ++i; + case 4: + e[i] = x[i] - y[i]; + sum0 += e[i] * e[i]; + ++i; + case 3: + e[i] = x[i] - y[i]; + sum0 += e[i] * e[i]; + ++i; + case 2: + e[i] = x[i] - y[i]; + sum0 += e[i] * e[i]; + ++i; + case 1: + e[i] = x[i] - y[i]; + sum0 += e[i] * e[i]; + ++i; + } + } + + return sum0 + sum1 + sum2 + sum3; +} + +/* Compute e=W*(x-y) for two n-vectors x and y and return the squared L2 norm of e. + * This norm equals the squared C norm of x-y with C=W^T*W, W being block diagonal + * matrix with nvis mnp*mnp blocks: e^T*e=(x-y)^T*W^T*W*(x-y)=||x-y||_C. + * Note that n=nvis*mnp; e can coincide with either x or y. + * + * Similarly to nrmL2xmy() above, uses loop unrolling and blocking + */ +static double nrmCxmy(double *const e, const double *const x, const double *const y, const double *const W, + const int mnp, const int nvis) { + const int n = nvis * mnp; + const int blocksize = 8, bpwr = 3; /* 8=2^3 */ + register int i, ii, k; + int j1, j2, j3, j4, j5, j6, j7; + int blockn, mnpsq; + register double norm, sum; + register const double *Wptr, *auxptr; + register double *eptr; + + /* n may not be divisible by blocksize, + * go as near as we can first, then tidy up. + */ + blockn = (n >> bpwr) << bpwr; /* (n / blocksize) * blocksize; */ + + /* unroll the loop in blocks of `blocksize'; looping downwards gains some more speed */ + for (i = blockn - 1; i > 0; i -= blocksize) { + e[i] = x[i] - y[i]; + j1 = i - 1; + e[j1] = x[j1] - y[j1]; + j2 = i - 2; + e[j2] = x[j2] - y[j2]; + j3 = i - 3; + e[j3] = x[j3] - y[j3]; + j4 = i - 4; + e[j4] = x[j4] - y[j4]; + j5 = i - 5; + e[j5] = x[j5] - y[j5]; + j6 = i - 6; + e[j6] = x[j6] - y[j6]; + j7 = i - 7; + e[j7] = x[j7] - y[j7]; + } + + /* + * There may be some left to do. + * This could be done as a simple for() loop, + * but a switch is faster (and more interesting) + */ + + i = blockn; + if (i < n) { + /* Jump into the case at the place that will allow + * us to finish off the appropriate number of items. + */ + switch (n - i) { + case 7: + e[i] = x[i] - y[i]; + ++i; + case 6: + e[i] = x[i] - y[i]; + ++i; + case 5: + e[i] = x[i] - y[i]; + ++i; + case 4: + e[i] = x[i] - y[i]; + ++i; + case 3: + e[i] = x[i] - y[i]; + ++i; + case 2: + e[i] = x[i] - y[i]; + ++i; + case 1: + e[i] = x[i] - y[i]; + ++i; + } + } + + /* compute w_x_ij e_ij in e and its L2 norm. + * Since w_x_ij is upper triangular, the products can be safely saved + * directly in e_ij, without the need for intermediate storage + */ + mnpsq = mnp * mnp; + /* Wptr, eptr point to w_x_ij, e_ij below */ + for (i = 0, Wptr = W, eptr = e, norm = 0.0; i < nvis; ++i, Wptr += mnpsq, eptr += mnp) { + for (ii = 0, auxptr = Wptr; ii < mnp; ++ii, auxptr += mnp) { /* auxptr=Wptr+ii*mnp */ + for (k = ii, sum = 0.0; k < mnp; ++k) // k>=ii since w_x_ij is upper triangular + sum += auxptr[k] * eptr[k]; // Wptr[ii*mnp+k]*eptr[k]; + eptr[ii] = sum; + norm += sum * sum; + } + } + + return norm; +} + +/* search for & print image projection components that are infinite; useful for identifying errors */ +static void sba_print_inf(double *hx, int nimgs, int mnp, struct sba_crsm *idxij, int *rcidxs, int *rcsubs) { + register int i, j, k; + int nnz; + double *phxij; + + for (j = 0; j < nimgs; ++j) { + nnz = sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero hx_ij, i=0...n-1 */ + for (i = 0; i < nnz; ++i) { + phxij = hx + idxij->val[rcidxs[i]] * mnp; + for (k = 0; k < mnp; ++k) + if (!SBA_FINITE(phxij[k])) + printf("SBA: component %d of the estimated projection of point %d on camera %d is invalid!\n", k, + rcsubs[i], j); + } + } +} + +/* Given a parameter vector p made up of the 3D coordinates of n points and the parameters of m cameras, compute in + * jac the jacobian of the predicted measurements, i.e. the jacobian of the projections of 3D points in the m images. + * The jacobian is approximated with the aid of finite differences and is returned in the order + * (A_11, B_11, ..., A_1m, B_1m, ..., A_n1, B_n1, ..., A_nm, B_nm), + * where A_ij=dx_ij/da_j and B_ij=dx_ij/db_i (see HZ). + * Notice that depending on idxij, some of the A_ij, B_ij might be missing + * + * Problem-specific information is assumed to be stored in a structure pointed to by "dat". + * + * NOTE: The jacobian (for n=4, m=3) in matrix form has the following structure: + * A_11 0 0 B_11 0 0 0 + * 0 A_12 0 B_12 0 0 0 + * 0 0 A_13 B_13 0 0 0 + * A_21 0 0 0 B_21 0 0 + * 0 A_22 0 0 B_22 0 0 + * 0 0 A_23 0 B_23 0 0 + * A_31 0 0 0 0 B_31 0 + * 0 A_32 0 0 0 B_32 0 + * 0 0 A_33 0 0 B_33 0 + * A_41 0 0 0 0 0 B_41 + * 0 A_42 0 0 0 0 B_42 + * 0 0 A_43 0 0 0 B_43 + * To reduce the total number of objective function evaluations, this structure can be + * exploited as follows: A certain d is added to the j-th parameters of all cameras and + * the objective function is evaluated at the resulting point. This evaluation suffices + * to compute the corresponding columns of *all* A_ij through finite differences. A similar + * strategy allows the computation of the B_ij. Overall, only cnp+pnp+1 objective function + * evaluations are needed to compute the jacobian, much fewer compared to the m*cnp+n*pnp+1 + * that would be required by the naive strategy of computing one column of the jacobian + * per function evaluation. See Nocedal-Wright, ch. 7, pp. 169. Although this approach is + * much faster compared to the naive strategy, it is not preferable to analytic jacobians, + * since the latter are considerably faster to compute and result in fewer LM iterations. + */ +static void +sba_fdjac_x(double *p, /* I: current parameter estimate, (m*cnp+n*pnp)x1 */ + struct sba_crsm *idxij, /* I: sparse matrix containing the location of x_ij in hx */ + int *rcidxs, /* work array for the indexes of nonzero elements of a single sparse matrix row/column */ + int *rcsubs, /* work array for the subscripts of nonzero elements in a single sparse matrix row/column */ + double *jac, /* O: array for storing the approximated jacobian */ + void *dat) /* I: points to a "fdj_data_x_" structure */ +{ + register int i, j, ii, jj; + double *pa, *pb, *pqr, *ppt; + register double *pAB, *phx, *phxx; + int n, m, nm, nnz, Asz, Bsz, ABsz, idx; + + double *tmpd; + register double d; + + struct fdj_data_x_ *fdjd; + void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata); + double *hx, *hxx; + int cnp, pnp, mnp; + void *adata; + + /* retrieve problem-specific information passed in *dat */ + fdjd = (struct fdj_data_x_ *)dat; + func = fdjd->func; + cnp = fdjd->cnp; + pnp = fdjd->pnp; + mnp = fdjd->mnp; + hx = fdjd->hx; + hxx = fdjd->hxx; + adata = fdjd->adata; + + n = idxij->nr; + m = idxij->nc; + pa = p; + pb = p + m * cnp; + Asz = mnp * cnp; + Bsz = mnp * pnp; + ABsz = Asz + Bsz; + + nm = (n >= m) ? n : m; // max(n, m); + tmpd = (double *)emalloc(nm * sizeof(double)); + + (*func)(p, idxij, fdjd->func_rcidxs, fdjd->func_rcsubs, hx, + adata); // evaluate supplied function on current solution + + if (cnp) { // is motion varying? + /* compute A_ij */ + for (jj = 0; jj < cnp; ++jj) { + for (j = 0; j < m; ++j) { + pqr = pa + j * cnp; // j-th camera parameters + /* determine d=max(SBA_DELTA_SCALE*|pqr[jj]|, SBA_MIN_DELTA), see HZ */ + d = (double)(SBA_DELTA_SCALE)*pqr[jj]; // force evaluation + d = FABS(d); + if (d < SBA_MIN_DELTA) + d = SBA_MIN_DELTA; + + tmpd[j] = d; + pqr[jj] += d; + } + + (*func)(p, idxij, fdjd->func_rcidxs, fdjd->func_rcsubs, hxx, adata); + + for (j = 0; j < m; ++j) { + pqr = pa + j * cnp; // j-th camera parameters + pqr[jj] -= tmpd[j]; /* restore */ + d = 1.0 / tmpd[j]; /* invert so that divisions can be carried out faster as multiplications */ + + nnz = sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero A_ij, i=0...n-1 */ + for (i = 0; i < nnz; ++i) { + idx = idxij->val[rcidxs[i]]; + phx = hx + idx * mnp; // set phx to point to hx_ij + phxx = hxx + idx * mnp; // set phxx to point to hxx_ij + pAB = jac + idx * ABsz; // set pAB to point to A_ij + + for (ii = 0; ii < mnp; ++ii) + pAB[ii * cnp + jj] = (phxx[ii] - phx[ii]) * d; + } + } + } + } + + if (pnp) { // is structure varying? + /* compute B_ij */ + for (jj = 0; jj < pnp; ++jj) { + for (i = 0; i < n; ++i) { + ppt = pb + i * pnp; // i-th point parameters + /* determine d=max(SBA_DELTA_SCALE*|ppt[jj]|, SBA_MIN_DELTA), see HZ */ + d = (double)(SBA_DELTA_SCALE)*ppt[jj]; // force evaluation + d = FABS(d); + if (d < SBA_MIN_DELTA) + d = SBA_MIN_DELTA; + + tmpd[i] = d; + ppt[jj] += d; + } + + (*func)(p, idxij, fdjd->func_rcidxs, fdjd->func_rcsubs, hxx, adata); + + for (i = 0; i < n; ++i) { + ppt = pb + i * pnp; // i-th point parameters + ppt[jj] -= tmpd[i]; /* restore */ + d = 1.0 / tmpd[i]; /* invert so that divisions can be carried out faster as multiplications */ + + nnz = sba_crsm_row_elmidxs(idxij, i, rcidxs, rcsubs); /* find nonzero B_ij, j=0...m-1 */ + for (j = 0; j < nnz; ++j) { + idx = idxij->val[rcidxs[j]]; + phx = hx + idx * mnp; // set phx to point to hx_ij + phxx = hxx + idx * mnp; // set phxx to point to hxx_ij + pAB = jac + idx * ABsz + Asz; // set pAB to point to B_ij + + for (ii = 0; ii < mnp; ++ii) + pAB[ii * pnp + jj] = (phxx[ii] - phx[ii]) * d; + } + } + } + } + + free(tmpd); +} + +typedef int (*PLS)(double *A, double *B, double *x, int m, int iscolmaj); + +/* Bundle adjustment on camera and structure parameters + * using the sparse Levenberg-Marquardt as described in HZ p. 568 + * + * Returns the number of iterations (>=0) if successfull, SBA_ERROR if failed + */ + +int sba_motstr_levmar_x( + const int n, /* number of points */ + const int ncon, /* number of points (starting from the 1st) whose parameters should not be modified. + * All B_ij (see below) with i<ncon are assumed to be zero + */ + const int m, /* number of images */ + const int mcon, /* number of images (starting from the 1st) whose parameters should not be modified. + * All A_ij (see below) with j<mcon are assumed to be zero + */ + char *vmask, /* visibility mask: vmask[i, j]=1 if point i visible in image j, 0 otherwise. nxm */ + double *p, /* initial parameter vector p0: (a1, ..., am, b1, ..., bn). + * aj are the image j parameters, bi are the i-th point parameters, + * size m*cnp + n*pnp + */ + const int cnp, /* number of parameters for ONE camera; e.g. 6 for Euclidean cameras */ + const int pnp, /* number of parameters for ONE point; e.g. 3 for Euclidean points */ + double *x, /* measurements vector: (x_11^T, .. x_1m^T, ..., x_n1^T, .. x_nm^T)^T where + * x_ij is the projection of the i-th point on the j-th image. + * NOTE: some of the x_ij might be missing, if point i is not visible in image j; + * see vmask[i, j], max. size n*m*mnp + */ + double *covx, /* measurements covariance matrices: (Sigma_x_11, .. Sigma_x_1m, ..., Sigma_x_n1, .. Sigma_x_nm), + * where Sigma_x_ij is the mnp x mnp covariance of x_ij stored row-by-row. Set to NULL if no + * covariance estimates are available (identity matrices are implicitly used in this case). + * NOTE: a certain Sigma_x_ij is missing if the corresponding x_ij is also missing; + * see vmask[i, j], max. size n*m*mnp*mnp + */ + const int mnp, /* number of parameters for EACH measurement; usually 2 */ + void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata), + /* functional relation describing measurements. Given a parameter vector p, + * computes a prediction of the measurements \hat{x}. p is (m*cnp + n*pnp)x1, + * \hat{x} is (n*m*mnp)x1, maximum + * rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used + * as working memory + */ + void (*fjac)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata), + /* function to evaluate the sparse jacobian dX/dp. + * The Jacobian is returned in jac as + * (dx_11/da_1, ..., dx_1m/da_m, ..., dx_n1/da_1, ..., dx_nm/da_m, + * dx_11/db_1, ..., dx_1m/db_1, ..., dx_n1/db_n, ..., dx_nm/db_n), or (using HZ's notation), + * jac=(A_11, B_11, ..., A_1m, B_1m, ..., A_n1, B_n1, ..., A_nm, B_nm) + * Notice that depending on idxij, some of the A_ij and B_ij might be missing. + * Note also that A_ij and B_ij are mnp x cnp and mnp x pnp matrices resp. and they + * should be stored in jac in row-major order. + * rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used + * as working memory + * + * If NULL, the jacobian is approximated by repetitive func calls and finite + * differences. This is computationally inefficient and thus NOT recommended. + */ + void *adata, /* pointer to possibly additional data, passed uninterpreted to func, fjac */ + + const int itmax, /* I: maximum number of iterations. itmax==0 signals jacobian verification followed by immediate + return */ + const int verbose, /* I: verbosity */ + const double opts[SBA_OPTSSZ], + /* I: minim. options [\mu, \epsilon1, \epsilon2, \epsilon3, \epsilon4]. Respectively the scale factor for initial + * \mu, + * stopping thresholds for ||J^T e||_inf, ||dp||_2, ||e||_2 and (||e||_2-||e_new||_2)/||e||_2 + */ + double info[SBA_INFOSZ] + /* O: information regarding the minimization. Set to NULL if don't care + * info[0]=||e||_2 at initial p. + * info[1-4]=[ ||e||_2, ||J^T e||_inf, ||dp||_2, mu/max[J^T J]_ii ], all computed at estimated p. + * info[5]= # iterations, + * info[6]=reason for terminating: 1 - stopped by small gradient J^T e + * 2 - stopped by small dp + * 3 - stopped by itmax + * 4 - stopped by small relative reduction in ||e||_2 + * 5 - stopped by small ||e||_2 + * 6 - too many attempts to increase damping. Restart with increased mu + * 7 - stopped by invalid (i.e. NaN or Inf) "func" values. This is a user error + * info[7]= # function evaluations + * info[8]= # jacobian evaluations + * info[9]= # number of linear systems solved, i.e. number of attempts for reducing error + */ + ) { + register int i, j, ii, jj, k, l; + int nvis, nnz, retval; + + /* The following are work arrays that are dynamically allocated by sba_motstr_levmar_x() */ + double *jac; /* work array for storing the jacobian, max. size n*m*(mnp*cnp + mnp*pnp) */ + double *U; /* work array for storing the U_j in the order U_1, ..., U_m, size m*cnp*cnp */ + double + *V; /* work array for storing the *strictly upper triangles* of V_i in the order V_1, ..., V_n, size n*pnp*pnp. + * V also stores the lower triangles of (V*_i)^-1 in the order (V*_1)^-1, ..., (V*_n)^-1. + * Note that diagonal elements of V_1 are saved in diagUV + */ + + double *e; /* work array for storing the e_ij in the order e_11, ..., e_1m, ..., e_n1, ..., e_nm, + max. size n*m*mnp */ + double + *eab; /* work array for storing the ea_j & eb_i in the order ea_1, .. ea_m eb_1, .. eb_n size m*cnp + n*pnp */ + + double *E; /* work array for storing the e_j in the order e_1, .. e_m, size m*cnp */ + + /* Notice that the blocks W_ij, Y_ij are zero iff A_ij (equivalently B_ij) is zero. This means + * that the matrix consisting of blocks W_ij is itself sparse, similarly to the + * block matrix made up of the A_ij and B_ij (i.e. jac) + */ + double *W; /* work array for storing the W_ij in the order W_11, ..., W_1m, ..., W_n1, ..., W_nm, + max. size n*m*cnp*pnp */ + double *Yj; /* work array for storing the Y_ij for a *fixed* j in the order Y_1j, Y_nj, + max. size n*cnp*pnp */ + double *YWt; /* work array for storing \sum_i Y_ij W_ik^T, size cnp*cnp */ + double *S; /* work array for storing the block array S_jk, size m*m*cnp*cnp */ + double *dp; /* work array for storing the parameter vector updates da_1, ..., da_m, db_1, ..., db_n, size m*cnp + + n*pnp */ + double *Wtda; /* work array for storing \sum_j W_ij^T da_j, size pnp */ + double *wght = /* work array for storing the weights computed from the covariance inverses, max. size n*m*mnp*mnp */ + NULL; + + /* Of the above arrays, jac, e, W, Yj, wght are sparse and + * U, V, eab, E, S, dp are dense. Sparse arrays (except Yj) are indexed + * through idxij (see below), that is with the same mechanism as the input + * measurements vector x + */ + + double *pa, *pb, *ea, *eb, *dpa, *dpb; /* pointers into p, jac, eab and dp respectively */ + + /* submatrices sizes */ + int Asz, Bsz, ABsz, Usz, Vsz, Wsz, Ysz, esz, easz, ebsz, YWtsz, Wtdasz, Sblsz, covsz; + + int Sdim; /* S matrix actual dimension */ + + register double *ptr1, *ptr2, *ptr3, *ptr4, sum; + struct sba_crsm idxij; /* sparse matrix containing the location of x_ij in x. This is also + * the location of A_ij, B_ij in jac, etc. + * This matrix can be thought as a map from a sparse set of pairs (i, j) to a continuous + * index k and it is used to efficiently lookup the memory locations where the non-zero + * blocks of a sparse matrix/vector are stored + */ + int maxCvis, /* max. of projections of a single point across cameras, <=m */ + maxPvis, /* max. of projections in a single camera across points, <=n */ + maxCPvis, /* max. of the above */ + *rcidxs, /* work array for the indexes corresponding to the nonzero elements of a single row or + column in a sparse matrix, size max(n, m) */ + *rcsubs; /* work array for the subscripts of nonzero elements in a single row or column of a + sparse matrix, size max(n, m) */ + + /* The following variables are needed by the LM algorithm */ + register int itno; /* iteration counter */ + int issolved; + /* temporary work arrays that are dynamically allocated */ + double *hx, /* \hat{x}_i, max. size m*n*mnp */ + *diagUV, /* diagonals of U_j, V_i, size m*cnp + n*pnp */ + *pdp; /* p + dp, size m*cnp + n*pnp */ + + double *diagU, *diagV; /* pointers into diagUV */ + + register double mu, /* damping constant */ + tmp; /* mainly used in matrix & vector multiplications */ + double p_eL2, eab_inf, pdp_eL2; /* ||e(p)||_2, ||J^T e||_inf, ||e(p+dp)||_2 */ + double p_L2, dp_L2 = DBL_MAX, dF, dL; + double tau = FABS(opts[0]), eps1 = FABS(opts[1]), eps2 = FABS(opts[2]), eps2_sq = opts[2] * opts[2], + eps3_sq = opts[3] * opts[3], eps4_sq = opts[4] * opts[4]; + double init_p_eL2; + int nu = 2, nu2, stop = 0, nfev, njev = 0, nlss = 0; + int nobs, nvars; + const int mmcon = m - mcon; + PLS linsolver = NULL; + int (*matinv)(double *A, int m) = NULL; + + struct fdj_data_x_ fdj_data; + void *jac_adata; + + /* Initialization */ + mu = eab_inf = 0.0; /* -Wall */ + + /* block sizes */ + Asz = mnp * cnp; + Bsz = mnp * pnp; + ABsz = Asz + Bsz; + Usz = cnp * cnp; + Vsz = pnp * pnp; + Wsz = cnp * pnp; + Ysz = cnp * pnp; + esz = mnp; + easz = cnp; + ebsz = pnp; + YWtsz = cnp * cnp; + Wtdasz = pnp; + Sblsz = cnp * cnp; + Sdim = mmcon * cnp; + covsz = mnp * mnp; + + /* count total number of visible image points */ + for (i = nvis = 0, jj = n * m; i < jj; ++i) + nvis += (vmask[i] != 0); + + nobs = nvis * mnp; + nvars = m * cnp + n * pnp; + if (nobs < nvars) { + fprintf(stderr, + "SBA: sba_motstr_levmar_x() cannot solve a problem with fewer measurements [%d] than unknowns [%d]\n", + nobs, nvars); + return SBA_ERROR; + } + + /* allocate & fill up the idxij structure */ + sba_crsm_alloc(&idxij, n, m, nvis); + for (i = k = 0; i < n; ++i) { + idxij.rowptr[i] = k; + ii = i * m; + for (j = 0; j < m; ++j) + if (vmask[ii + j]) { + idxij.val[k] = k; + idxij.colidx[k++] = j; + } + } + idxij.rowptr[n] = nvis; + + /* find the maximum number (for all cameras) of visible image projections coming from a single 3D point */ + for (i = maxCvis = 0; i < n; ++i) + if ((k = idxij.rowptr[i + 1] - idxij.rowptr[i]) > maxCvis) + maxCvis = k; + + /* find the maximum number (for all points) of visible image projections in any single camera */ + for (j = maxPvis = 0; j < m; ++j) { + for (i = ii = 0; i < n; ++i) + if (vmask[i * m + j]) + ++ii; + if (ii > maxPvis) + maxPvis = ii; + } + maxCPvis = (maxCvis >= maxPvis) ? maxCvis : maxPvis; + +#if 0 + /* determine the density of blocks in matrix S */ + for(j=mcon, ii=0; j<m; ++j){ + ++ii; /* block Sjj is surely nonzero */ + for(k=j+1; k<m; ++k) + if(sba_crsm_common_row(&idxij, j, k)) ii+=2; /* blocks Sjk & Skj are nonzero */ + } + printf("\nS block density: %.5g\n", ((double)ii)/(mmcon*mmcon)); fflush(stdout); +#endif + + /* allocate work arrays */ + /* W is big enough to hold both jac & W. Note also the extra Wsz, see the initialization of jac below for + * explanation */ + W = (double *)emalloc((nvis * ((Wsz >= ABsz) ? Wsz : ABsz) + Wsz) * sizeof(double)); + U = (double *)emalloc(m * Usz * sizeof(double)); + V = (double *)emalloc(n * Vsz * sizeof(double)); + e = (double *)emalloc(nobs * sizeof(double)); + eab = (double *)emalloc(nvars * sizeof(double)); + E = (double *)emalloc(m * cnp * sizeof(double)); + Yj = (double *)emalloc(maxPvis * Ysz * sizeof(double)); + YWt = (double *)emalloc(YWtsz * sizeof(double)); + S = (double *)emalloc(m * m * Sblsz * sizeof(double)); + dp = (double *)emalloc(nvars * sizeof(double)); + Wtda = (double *)emalloc(pnp * sizeof(double)); + rcidxs = (int *)emalloc(maxCPvis * sizeof(int)); + rcsubs = (int *)emalloc(maxCPvis * sizeof(int)); +#ifndef SBA_DESTROY_COVS + if (covx != NULL) + wght = (double *)emalloc(nvis * covsz * sizeof(double)); +#else + if (covx != NULL) + wght = covx; +#endif /* SBA_DESTROY_COVS */ + + hx = (double *)emalloc(nobs * sizeof(double)); + diagUV = (double *)emalloc(nvars * sizeof(double)); + pdp = (double *)emalloc(nvars * sizeof(double)); + + /* to save resources, W and jac share the same memory: First, the jacobian + * is computed in some working memory that is then overwritten during the + * computation of W. To account for the case of W being larger than jac, + * extra memory is reserved "before" jac. + * Care must be taken, however, to ensure that storing a certain W_ij + * does not overwrite the A_ij, B_ij used to compute it. To achieve + * this is, note that if p1 and p2 respectively point to the first elements + * of a certain W_ij and A_ij, B_ij pair, we should have p2-p1>=Wsz. + * There are two cases: + * a) Wsz>=ABsz: Then p1=W+k*Wsz and p2=jac+k*ABsz=W+Wsz+nvis*(Wsz-ABsz)+k*ABsz + * for some k (0<=k<nvis), thus p2-p1=(nvis-k)*(Wsz-ABsz)+Wsz. + * The right side of the last equation is obviously > Wsz for all 0<=k<nvis + * + * b) Wsz<ABsz: Then p1=W+k*Wsz and p2=jac+k*ABsz=W+Wsz+k*ABsz and + * p2-p1=Wsz+k*(ABsz-Wsz), which is again > Wsz for all 0<=k<nvis + * + * In conclusion, if jac is initialized as below, the memory allocated to all + * W_ij is guaranteed not to overlap with that allocated to their corresponding + * A_ij, B_ij pairs + */ + jac = W + Wsz + ((Wsz > ABsz) ? nvis * (Wsz - ABsz) : 0); + + /* set up auxiliary pointers */ + pa = p; + pb = p + m * cnp; + ea = eab; + eb = eab + m * cnp; + dpa = dp; + dpb = dp + m * cnp; + + diagU = diagUV; + diagV = diagUV + m * cnp; + + /* if no jacobian function is supplied, prepare to compute jacobian with finite difference */ + if (!fjac) { + fdj_data.func = func; + fdj_data.cnp = cnp; + fdj_data.pnp = pnp; + fdj_data.mnp = mnp; + fdj_data.hx = hx; + fdj_data.hxx = (double *)emalloc(nobs * sizeof(double)); + fdj_data.func_rcidxs = (int *)emalloc(2 * maxCPvis * sizeof(int)); + fdj_data.func_rcsubs = fdj_data.func_rcidxs + maxCPvis; + fdj_data.adata = adata; + + fjac = sba_fdjac_x; + jac_adata = (void *)&fdj_data; + } else { + fdj_data.hxx = NULL; + jac_adata = adata; + } + + if (itmax == 0) { /* verify jacobian */ + sba_motstr_chkjac_x(func, fjac, p, &idxij, rcidxs, rcsubs, ncon, mcon, cnp, pnp, mnp, adata, jac_adata); + retval = 0; + goto freemem_and_return; + } + + /* covariances Sigma_x_ij are accommodated by computing the Cholesky decompositions of their + * inverses and using the resulting matrices w_x_ij to weigh A_ij, B_ij, and e_ij as w_x_ij A_ij, + * w_x_ij*B_ij and w_x_ij*e_ij. In this way, auxiliary variables as U_j=\sum_i A_ij^T A_ij + * actually become \sum_i (w_x_ij A_ij)^T w_x_ij A_ij= \sum_i A_ij^T w_x_ij^T w_x_ij A_ij = + * A_ij^T Sigma_x_ij^-1 A_ij + * + * ea_j, V_i, eb_i, W_ij are weighted in a similar manner + */ + if (covx != NULL) { + for (i = 0; i < n; ++i) { + nnz = sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero x_ij, j=0...m-1 */ + for (j = 0; j < nnz; ++j) { + /* set ptr1, ptr2 to point to cov_x_ij, w_x_ij resp. */ + ptr1 = covx + (k = idxij.val[rcidxs[j]] * covsz); + ptr2 = wght + k; + if (!sba_mat_cholinv(ptr1, ptr2, mnp)) { /* compute w_x_ij s.t. w_x_ij^T w_x_ij = cov_x_ij^-1 */ + fprintf(stderr, "SBA: invalid covariance matrix for x_ij (i=%d, j=%d) in sba_motstr_levmar_x()\n", + i, rcsubs[j]); + retval = SBA_ERROR; + goto freemem_and_return; + } + } + } + sba_mat_cholinv(NULL, NULL, 0); /* cleanup */ + } + + /* compute the error vectors e_ij in hx */ + (*func)(p, &idxij, rcidxs, rcsubs, hx, adata); + nfev = 1; + /* ### compute e=x - f(p) [e=w*(x - f(p)] and its L2 norm */ + if (covx == NULL) + p_eL2 = nrmL2xmy(e, x, hx, nobs); /* e=x-hx, p_eL2=||e|| */ + else + p_eL2 = nrmCxmy(e, x, hx, wght, mnp, nvis); /* e=wght*(x-hx), p_eL2=||e||=||x-hx||_Sigma^-1 */ + if (verbose) + printf("initial motstr-SBA error %g [%g]\n", p_eL2, p_eL2 / nvis); + init_p_eL2 = p_eL2; + if (!SBA_FINITE(p_eL2)) + stop = 7; + + for (itno = 0; itno < itmax && !stop; ++itno) { + /* Note that p, e and ||e||_2 have been updated at the previous iteration */ + + /* compute derivative submatrices A_ij, B_ij */ + (*fjac)(p, &idxij, rcidxs, rcsubs, jac, jac_adata); + ++njev; + + if (covx != NULL) { + /* compute w_x_ij A_ij and w_x_ij B_ij. + * Since w_x_ij is upper triangular, the products can be safely saved + * directly in A_ij, B_ij, without the need for intermediate storage + */ + for (i = 0; i < nvis; ++i) { + /* set ptr1, ptr2, ptr3 to point to w_x_ij, A_ij, B_ij, resp. */ + ptr1 = wght + i * covsz; + ptr2 = jac + i * ABsz; + ptr3 = ptr2 + Asz; // ptr3=jac + i*ABsz + Asz; + + /* w_x_ij is mnp x mnp, A_ij is mnp x cnp, B_ij is mnp x pnp + * NOTE: Jamming the outter (i.e., ii) loops did not run faster! + */ + /* A_ij */ + for (ii = 0; ii < mnp; ++ii) + for (jj = 0; jj < cnp; ++jj) { + for (k = ii, sum = 0.0; k < mnp; ++k) // k>=ii since w_x_ij is upper triangular + sum += ptr1[ii * mnp + k] * ptr2[k * cnp + jj]; + ptr2[ii * cnp + jj] = sum; + } + + /* B_ij */ + for (ii = 0; ii < mnp; ++ii) + for (jj = 0; jj < pnp; ++jj) { + for (k = ii, sum = 0.0; k < mnp; ++k) // k>=ii since w_x_ij is upper triangular + sum += ptr1[ii * mnp + k] * ptr3[k * pnp + jj]; + ptr3[ii * pnp + jj] = sum; + } + } + } + + /* compute U_j = \sum_i A_ij^T A_ij */ // \Sigma here! + /* U_j is symmetric, therefore its computation can be sped up by + * computing only the upper part and then reusing it for the lower one. + * Recall that A_ij is mnp x cnp + */ + /* Also compute ea_j = \sum_i A_ij^T e_ij */ // \Sigma here! + /* Recall that e_ij is mnp x 1 + */ + _dblzero(U, m * Usz); /* clear all U_j */ + _dblzero(ea, m * easz); /* clear all ea_j */ + for (j = mcon; j < m; ++j) { + ptr1 = U + j * Usz; // set ptr1 to point to U_j + ptr2 = ea + j * easz; // set ptr2 to point to ea_j + + nnz = sba_crsm_col_elmidxs(&idxij, j, rcidxs, rcsubs); /* find nonzero A_ij, i=0...n-1 */ + for (i = 0; i < nnz; ++i) { + /* set ptr3 to point to A_ij, actual row number in rcsubs[i] */ + ptr3 = jac + idxij.val[rcidxs[i]] * ABsz; + + /* compute the UPPER TRIANGULAR PART of A_ij^T A_ij and add it to U_j */ + for (ii = 0; ii < cnp; ++ii) { + for (jj = ii; jj < cnp; ++jj) { + for (k = 0, sum = 0.0; k < mnp; ++k) + sum += ptr3[k * cnp + ii] * ptr3[k * cnp + jj]; + ptr1[ii * cnp + jj] += sum; + } + + /* copy the LOWER TRIANGULAR PART of U_j from the upper one */ + for (jj = 0; jj < ii; ++jj) + ptr1[ii * cnp + jj] = ptr1[jj * cnp + ii]; + } + + ptr4 = e + idxij.val[rcidxs[i]] * esz; /* set ptr4 to point to e_ij */ + /* compute A_ij^T e_ij and add it to ea_j */ + for (ii = 0; ii < cnp; ++ii) { + for (jj = 0, sum = 0.0; jj < mnp; ++jj) + sum += ptr3[jj * cnp + ii] * ptr4[jj]; + ptr2[ii] += sum; + } + } + } + + /* compute V_i = \sum_j B_ij^T B_ij */ // \Sigma here! + /* V_i is symmetric, therefore its computation can be sped up by + * computing only the upper part and then reusing it for the lower one. + * Recall that B_ij is mnp x pnp + */ + /* Also compute eb_i = \sum_j B_ij^T e_ij */ // \Sigma here! + /* Recall that e_ij is mnp x 1 + */ + _dblzero(V, n * Vsz); /* clear all V_i */ + _dblzero(eb, n * ebsz); /* clear all eb_i */ + for (i = ncon; i < n; ++i) { + ptr1 = V + i * Vsz; // set ptr1 to point to V_i + ptr2 = eb + i * ebsz; // set ptr2 to point to eb_i + + nnz = sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero B_ij, j=0...m-1 */ + for (j = 0; j < nnz; ++j) { + /* set ptr3 to point to B_ij, actual column number in rcsubs[j] */ + ptr3 = jac + idxij.val[rcidxs[j]] * ABsz + Asz; + + /* compute the UPPER TRIANGULAR PART of B_ij^T B_ij and add it to V_i */ + for (ii = 0; ii < pnp; ++ii) { + for (jj = ii; jj < pnp; ++jj) { + for (k = 0, sum = 0.0; k < mnp; ++k) + sum += ptr3[k * pnp + ii] * ptr3[k * pnp + jj]; + ptr1[ii * pnp + jj] += sum; + } + } + + ptr4 = e + idxij.val[rcidxs[j]] * esz; /* set ptr4 to point to e_ij */ + /* compute B_ij^T e_ij and add it to eb_i */ + for (ii = 0; ii < pnp; ++ii) { + for (jj = 0, sum = 0.0; jj < mnp; ++jj) + sum += ptr3[jj * pnp + ii] * ptr4[jj]; + ptr2[ii] += sum; + } + } + } + + /* compute W_ij = A_ij^T B_ij */ // \Sigma here! + /* Recall that A_ij is mnp x cnp and B_ij is mnp x pnp + */ + for (i = ncon; i < n; ++i) { + nnz = sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero W_ij, j=0...m-1 */ + for (j = 0; j < nnz; ++j) { + /* set ptr1 to point to W_ij, actual column number in rcsubs[j] */ + ptr1 = W + idxij.val[rcidxs[j]] * Wsz; + + if (rcsubs[j] < mcon) { /* A_ij is zero */ + //_dblzero(ptr1, Wsz); /* clear W_ij */ + continue; + } + + /* set ptr2 & ptr3 to point to A_ij & B_ij resp. */ + ptr2 = jac + idxij.val[rcidxs[j]] * ABsz; + ptr3 = ptr2 + Asz; + /* compute A_ij^T B_ij and store it in W_ij + * Recall that storage for A_ij, B_ij does not overlap with that for W_ij, + * see the comments related to the initialization of jac above + */ + /* assert(ptr2-ptr1>=Wsz); */ + for (ii = 0; ii < cnp; ++ii) + for (jj = 0; jj < pnp; ++jj) { + for (k = 0, sum = 0.0; k < mnp; ++k) + sum += ptr2[k * cnp + ii] * ptr3[k * pnp + jj]; + ptr1[ii * pnp + jj] = sum; + } + } + } + + /* Compute ||J^T e||_inf and ||p||^2 */ + for (i = 0, p_L2 = eab_inf = 0.0; i < nvars; ++i) { + if (eab_inf < (tmp = FABS(eab[i]))) + eab_inf = tmp; + p_L2 += p[i] * p[i]; + } + // p_L2=sqrt(p_L2); + + /* save diagonal entries so that augmentation can be later canceled. + * Diagonal entries are in U_j and V_i + */ + for (j = mcon; j < m; ++j) { + ptr1 = U + j * Usz; // set ptr1 to point to U_j + ptr2 = diagU + j * cnp; // set ptr2 to point to diagU_j + for (i = 0; i < cnp; ++i) + ptr2[i] = ptr1[i * cnp + i]; + } + for (i = ncon; i < n; ++i) { + ptr1 = V + i * Vsz; // set ptr1 to point to V_i + ptr2 = diagV + i * pnp; // set ptr2 to point to diagV_i + for (j = 0; j < pnp; ++j) + ptr2[j] = ptr1[j * pnp + j]; + } + + /* + if(!(itno%100)){ + printf("Current estimate: "); + for(i=0; i<nvars; ++i) + printf("%.9g ", p[i]); + printf("-- errors %.9g %0.9g\n", eab_inf, p_eL2); + } + */ + + /* check for convergence */ + if ((eab_inf <= eps1)) { + dp_L2 = 0.0; /* no increment for p in this case */ + stop = 1; + break; + } + + /* compute initial damping factor */ + if (itno == 0) { + /* find max diagonal element */ + for (i = mcon * cnp, tmp = DBL_MIN; i < m * cnp; ++i) + if (diagUV[i] > tmp) + tmp = diagUV[i]; + for (i = m * cnp + ncon * pnp; i < nvars; ++i) /* tmp is not re-initialized! */ + if (diagUV[i] > tmp) + tmp = diagUV[i]; + mu = tau * tmp; + } + + /* determine increment using adaptive damping */ + while (1) { + /* augment U, V */ + for (j = mcon; j < m; ++j) { + ptr1 = U + j * Usz; // set ptr1 to point to U_j + for (i = 0; i < cnp; ++i) + ptr1[i * cnp + i] += mu; + } + for (i = ncon; i < n; ++i) { + ptr1 = V + i * Vsz; // set ptr1 to point to V_i + for (j = 0; j < pnp; ++j) + ptr1[j * pnp + j] += mu; + + /* compute V*_i^-1. + * Recall that only the upper triangle of the symmetric pnp x pnp matrix V*_i + * is stored in ptr1; its (also symmetric) inverse is saved in the lower triangle of ptr1 + */ + /* inverting V*_i with LDLT seems to result in faster overall execution compared to when using LU or + * Cholesky */ + // j=sba_symat_invert_LU(ptr1, pnp); matinv=sba_symat_invert_LU; + // j=sba_symat_invert_Chol(ptr1, pnp); matinv=sba_symat_invert_Chol; + j = sba_symat_invert_BK(ptr1, pnp); + matinv = sba_symat_invert_BK; + if (!j) { + fprintf(stderr, "SBA: singular matrix V*_i (i=%d) in sba_motstr_levmar_x(), increasing damping\n", + i); + goto moredamping; // increasing damping will eventually make V*_i diagonally dominant, thus + // nonsingular + // retval=SBA_ERROR; + // goto freemem_and_return; + } + } + + _dblzero(E, m * easz); /* clear all e_j */ + /* compute the mmcon x mmcon block matrix S and e_j */ + + /* Note that S is symmetric, therefore its computation can be + * sped up by computing only the upper part and then reusing + * it for the lower one. + */ + for (j = mcon; j < m; ++j) { + int mmconxUsz = mmcon * Usz; + + nnz = sba_crsm_col_elmidxs(&idxij, j, rcidxs, rcsubs); /* find nonzero Y_ij, i=0...n-1 */ + + /* get rid of all Y_ij with i<ncon that are treated as zeros. + * In this way, all rcsubs[i] below are guaranteed to be >= ncon + */ + if (ncon) { + for (i = ii = 0; i < nnz; ++i) { + if (rcsubs[i] >= ncon) { + rcidxs[ii] = rcidxs[i]; + rcsubs[ii++] = rcsubs[i]; + } + } + nnz = ii; + } + + /* compute all Y_ij = W_ij (V*_i)^-1 for a *fixed* j. + * To save memory, the block matrix consisting of the Y_ij + * is not stored. Instead, only a block column of this matrix + * is computed & used at each time: For each j, all nonzero + * Y_ij are computed in Yj and then used in the calculations + * involving S_jk and e_j. + * Recall that W_ij is cnp x pnp and (V*_i) is pnp x pnp + */ + for (i = 0; i < nnz; ++i) { + /* set ptr3 to point to (V*_i)^-1, actual row number in rcsubs[i] */ + ptr3 = V + rcsubs[i] * Vsz; + + /* set ptr1 to point to Y_ij, actual row number in rcsubs[i] */ + ptr1 = Yj + i * Ysz; + /* set ptr2 to point to W_ij resp. */ + ptr2 = W + idxij.val[rcidxs[i]] * Wsz; + /* compute W_ij (V*_i)^-1 and store it in Y_ij. + * Recall that only the lower triangle of (V*_i)^-1 is stored + */ + for (ii = 0; ii < cnp; ++ii) { + ptr4 = ptr2 + ii * pnp; + for (jj = 0; jj < pnp; ++jj) { + for (k = 0, sum = 0.0; k <= jj; ++k) + sum += ptr4[k] * ptr3[jj * pnp + k]; // ptr2[ii*pnp+k]*ptr3[jj*pnp+k]; + for (; k < pnp; ++k) + sum += ptr4[k] * ptr3[k * pnp + jj]; // ptr2[ii*pnp+k]*ptr3[k*pnp+jj]; + ptr1[ii * pnp + jj] = sum; + } + } + } + + /* compute the UPPER TRIANGULAR PART of S */ + for (k = j; k < m; ++k) { // j>=mcon + /* compute \sum_i Y_ij W_ik^T in YWt. Note that for an off-diagonal block defined by j, k + * YWt (and thus S_jk) is nonzero only if there exists a point that is visible in both the + * j-th and k-th images + */ + + /* Recall that Y_ij is cnp x pnp and W_ik is cnp x pnp */ + _dblzero(YWt, YWtsz); /* clear YWt */ + + for (i = 0; i < nnz; ++i) { + register double *pYWt; + + /* find the min and max column indices of the elements in row i (actually rcsubs[i]) + * and make sure that k falls within them. This test handles W_ik's which are + * certain to be zero without bothering to call sba_crsm_elmidx() + */ + ii = idxij.colidx[idxij.rowptr[rcsubs[i]]]; + jj = idxij.colidx[idxij.rowptr[rcsubs[i] + 1] - 1]; + if (k < ii || k > jj) + continue; /* W_ik == 0 */ + + /* set ptr2 to point to W_ik */ + l = sba_crsm_elmidxp(&idxij, rcsubs[i], k, j, rcidxs[i]); + // l=sba_crsm_elmidx(&idxij, rcsubs[i], k); + if (l == -1) + continue; /* W_ik == 0 */ + + ptr2 = W + idxij.val[l] * Wsz; + /* set ptr1 to point to Y_ij, actual row number in rcsubs[i] */ + ptr1 = Yj + i * Ysz; + for (ii = 0; ii < cnp; ++ii) { + ptr3 = ptr1 + ii * pnp; + pYWt = YWt + ii * cnp; + + for (jj = 0; jj < cnp; ++jj) { + ptr4 = ptr2 + jj * pnp; + for (l = 0, sum = 0.0; l < pnp; ++l) + sum += ptr3[l] * ptr4[l]; // ptr1[ii*pnp+l]*ptr2[jj*pnp+l]; + pYWt[jj] += sum; // YWt[ii*cnp+jj]+=sum; + } + } + } + +/* since the linear system involving S is solved with lapack, + * it is preferable to store S in column major (i.e. fortran) + * order, so as to avoid unecessary transposing/copying. +*/ +#if MAT_STORAGE == COLUMN_MAJOR + ptr2 = S + (k - mcon) * mmconxUsz + + (j - mcon) * cnp; // set ptr2 to point to the beginning of block j,k in S +#else + ptr2 = S + (j - mcon) * mmconxUsz + + (k - mcon) * cnp; // set ptr2 to point to the beginning of block j,k in S +#endif + + if (j != k) { /* Kronecker */ + for (ii = 0; ii < cnp; ++ii, ptr2 += Sdim) + for (jj = 0; jj < cnp; ++jj) + ptr2[jj] = +#if MAT_STORAGE == COLUMN_MAJOR + -YWt[jj * cnp + ii]; +#else + -YWt[ii * cnp + jj]; +#endif + } else { + ptr1 = U + j * Usz; // set ptr1 to point to U_j + + for (ii = 0; ii < cnp; ++ii, ptr2 += Sdim) + for (jj = 0; jj < cnp; ++jj) + ptr2[jj] = +#if MAT_STORAGE == COLUMN_MAJOR + ptr1[jj * cnp + ii] - YWt[jj * cnp + ii]; +#else + ptr1[ii * cnp + jj] - YWt[ii * cnp + jj]; +#endif + } + } + + /* copy the LOWER TRIANGULAR PART of S from the upper one */ + for (k = mcon; k < j; ++k) { +#if MAT_STORAGE == COLUMN_MAJOR + ptr1 = S + (k - mcon) * mmconxUsz + + (j - mcon) * cnp; // set ptr1 to point to the beginning of block j,k in S + ptr2 = S + (j - mcon) * mmconxUsz + + (k - mcon) * cnp; // set ptr2 to point to the beginning of block k,j in S +#else + ptr1 = S + (j - mcon) * mmconxUsz + + (k - mcon) * cnp; // set ptr1 to point to the beginning of block j,k in S + ptr2 = S + (k - mcon) * mmconxUsz + + (j - mcon) * cnp; // set ptr2 to point to the beginning of block k,j in S +#endif + for (ii = 0; ii < cnp; ++ii, ptr1 += Sdim) + for (jj = 0, ptr3 = ptr2 + ii; jj < cnp; ++jj, ptr3 += Sdim) + ptr1[jj] = *ptr3; + } + + /* compute e_j=ea_j - \sum_i Y_ij eb_i */ + /* Recall that Y_ij is cnp x pnp and eb_i is pnp x 1 */ + ptr1 = E + j * easz; // set ptr1 to point to e_j + + for (i = 0; i < nnz; ++i) { + /* set ptr2 to point to Y_ij, actual row number in rcsubs[i] */ + ptr2 = Yj + i * Ysz; + + /* set ptr3 to point to eb_i */ + ptr3 = eb + rcsubs[i] * ebsz; + for (ii = 0; ii < cnp; ++ii) { + ptr4 = ptr2 + ii * pnp; + for (jj = 0, sum = 0.0; jj < pnp; ++jj) + sum += ptr4[jj] * ptr3[jj]; // ptr2[ii*pnp+jj]*ptr3[jj]; + ptr1[ii] += sum; + } + } + + ptr2 = ea + j * easz; // set ptr2 to point to ea_j + for (i = 0; i < easz; ++i) + ptr1[i] = ptr2[i] - ptr1[i]; + } + +#if 0 + if(verbose>1){ /* count the nonzeros in S */ + for(i=ii=0; i<Sdim*Sdim; ++i) + if(S[i]!=0.0) ++ii; + printf("\nS density: %.5g\n", ((double)ii)/(Sdim*Sdim)); fflush(stdout); + } +#endif + + /* solve the linear system S dpa = E to compute the da_j. + * + * Note that if MAT_STORAGE==1 S is modified in the following call; + * this is OK since S is recomputed for each iteration + */ + // issolved=sba_Axb_LU(S, E+mcon*cnp, dpa+mcon*cnp, Sdim, MAT_STORAGE); linsolver=sba_Axb_LU; + issolved = sba_Axb_Chol(S, E + mcon * cnp, dpa + mcon * cnp, Sdim, MAT_STORAGE); + linsolver = sba_Axb_Chol; + // issolved=sba_Axb_BK(S, E+mcon*cnp, dpa+mcon*cnp, Sdim, MAT_STORAGE); linsolver=sba_Axb_BK; + // issolved=sba_Axb_QRnoQ(S, E+mcon*cnp, dpa+mcon*cnp, Sdim, MAT_STORAGE); linsolver=sba_Axb_QRnoQ; + // issolved=sba_Axb_QR(S, E+mcon*cnp, dpa+mcon*cnp, Sdim, MAT_STORAGE); linsolver=sba_Axb_QR; + // issolved=sba_Axb_SVD(S, E+mcon*cnp, dpa+mcon*cnp, Sdim, MAT_STORAGE); linsolver=sba_Axb_SVD; + // issolved=sba_Axb_CG(S, E+mcon*cnp, dpa+mcon*cnp, Sdim, (3*Sdim)/2, 1E-10, SBA_CG_JACOBI, MAT_STORAGE); + // linsolver=(PLS)sba_Axb_CG; + + ++nlss; + + _dblzero(dpa, mcon * cnp); /* no change for the first mcon camera params */ + + if (issolved) { + + /* compute the db_i */ + for (i = ncon; i < n; ++i) { + ptr1 = dpb + i * ebsz; // set ptr1 to point to db_i + + /* compute \sum_j W_ij^T da_j */ + /* Recall that W_ij is cnp x pnp and da_j is cnp x 1 */ + _dblzero(Wtda, Wtdasz); /* clear Wtda */ + nnz = sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero W_ij, j=0...m-1 */ + for (j = 0; j < nnz; ++j) { + /* set ptr2 to point to W_ij, actual column number in rcsubs[j] */ + if (rcsubs[j] < mcon) + continue; /* W_ij is zero */ + + ptr2 = W + idxij.val[rcidxs[j]] * Wsz; + + /* set ptr3 to point to da_j */ + ptr3 = dpa + rcsubs[j] * cnp; + + for (ii = 0; ii < pnp; ++ii) { + ptr4 = ptr2 + ii; + for (jj = 0, sum = 0.0; jj < cnp; ++jj) + sum += ptr4[jj * pnp] * ptr3[jj]; // ptr2[jj*pnp+ii]*ptr3[jj]; + Wtda[ii] += sum; + } + } + + /* compute eb_i - \sum_j W_ij^T da_j = eb_i - Wtda in Wtda */ + ptr2 = eb + i * ebsz; // set ptr2 to point to eb_i + for (ii = 0; ii < pnp; ++ii) + Wtda[ii] = ptr2[ii] - Wtda[ii]; + + /* compute the product (V*_i)^-1 Wtda = (V*_i)^-1 (eb_i - \sum_j W_ij^T da_j). + * Recall that only the lower triangle of (V*_i)^-1 is stored + */ + ptr2 = V + i * Vsz; // set ptr2 to point to (V*_i)^-1 + for (ii = 0; ii < pnp; ++ii) { + for (jj = 0, sum = 0.0; jj <= ii; ++jj) + sum += ptr2[ii * pnp + jj] * Wtda[jj]; + for (; jj < pnp; ++jj) + sum += ptr2[jj * pnp + ii] * Wtda[jj]; + ptr1[ii] = sum; + } + } + _dblzero(dpb, ncon * pnp); /* no change for the first ncon point params */ + + /* parameter vector updates are now in dpa, dpb */ + + /* compute p's new estimate and ||dp||^2 */ + for (i = 0, dp_L2 = 0.0; i < nvars; ++i) { + pdp[i] = p[i] + (tmp = dp[i]); + dp_L2 += tmp * tmp; + } + // dp_L2=sqrt(dp_L2); + + if (dp_L2 <= eps2_sq * p_L2) { /* relative change in p is small, stop */ + // if(dp_L2<=eps2*(p_L2 + eps2)){ /* relative change in p is small, stop */ + stop = 2; + break; + } + + if (dp_L2 >= (p_L2 + eps2) / SBA_EPSILON_SQ) { /* almost singular */ + // if(dp_L2>=(p_L2+eps2)/SBA_EPSILON){ /* almost singular */ + fprintf(stderr, "SBA: the matrix of the augmented normal equations is almost singular in " + "sba_motstr_levmar_x(),\n" + " minimization should be restarted from the current solution with an increased " + "damping term\n"); + retval = SBA_ERROR; + goto freemem_and_return; + } + + (*func)(pdp, &idxij, rcidxs, rcsubs, hx, adata); + ++nfev; /* evaluate function at p + dp */ + if (verbose > 1) + printf("mean reprojection error %g\n", sba_mean_repr_error(n, mnp, x, hx, &idxij, rcidxs, rcsubs)); + /* ### compute ||e(pdp)||_2 */ + if (covx == NULL) + pdp_eL2 = nrmL2xmy(hx, x, hx, nobs); /* hx=x-hx, pdp_eL2=||hx|| */ + else + pdp_eL2 = nrmCxmy(hx, x, hx, wght, mnp, nvis); /* hx=wght*(x-hx), pdp_eL2=||hx|| */ + if (!SBA_FINITE(pdp_eL2)) { + if (verbose) /* identify the offending point projection */ + sba_print_inf(hx, m, mnp, &idxij, rcidxs, rcsubs); + + stop = 7; + break; + } + + for (i = 0, dL = 0.0; i < nvars; ++i) + dL += dp[i] * (mu * dp[i] + eab[i]); + + dF = p_eL2 - pdp_eL2; + + if (verbose > 1) + printf("\ndamping term %8g, gain ratio %8g, errors %8g / %8g = %g\n", mu, + dL != 0.0 ? dF / dL : dF / DBL_EPSILON, p_eL2 / nvis, pdp_eL2 / nvis, p_eL2 / pdp_eL2); + + if (dL > 0.0 && dF > 0.0) { /* reduction in error, increment is accepted */ + tmp = (2.0 * dF / dL - 1.0); + tmp = 1.0 - tmp * tmp * tmp; + mu = mu * ((tmp >= SBA_ONE_THIRD) ? tmp : SBA_ONE_THIRD); + nu = 2; + + /* the test below is equivalent to the relative reduction of the RMS reprojection error: + * sqrt(p_eL2)-sqrt(pdp_eL2)<eps4*sqrt(p_eL2) */ + if (pdp_eL2 - 2.0 * sqrt(p_eL2 * pdp_eL2) < (eps4_sq - 1.0) * p_eL2) + stop = 4; + + for (i = 0; i < nvars; ++i) /* update p's estimate */ + p[i] = pdp[i]; + + for (i = 0; i < nobs; ++i) /* update e and ||e||_2 */ + e[i] = hx[i]; + p_eL2 = pdp_eL2; + break; + } + } /* issolved */ + + moredamping: + /* if this point is reached (also via an explicit goto!), either the linear system could + * not be solved or the error did not reduce; in any case, the increment must be rejected + */ + + mu *= nu; + nu2 = nu << 1; // 2*nu; + if (nu2 <= nu) { /* nu has wrapped around (overflown) */ + fprintf(stderr, "SBA: too many failed attempts to increase the damping factor in " + "sba_motstr_levmar_x()! Singular Hessian matrix?\n"); + // exit(1); + stop = 6; + break; + } + nu = nu2; + +#if 0 + /* restore U, V diagonal entries */ + for(j=mcon; j<m; ++j){ + ptr1=U + j*Usz; // set ptr1 to point to U_j + ptr2=diagU + j*cnp; // set ptr2 to point to diagU_j + for(i=0; i<cnp; ++i) + ptr1[i*cnp+i]=ptr2[i]; + } + for(i=ncon; i<n; ++i){ + ptr1=V + i*Vsz; // set ptr1 to point to V_i + ptr2=diagV + i*pnp; // set ptr2 to point to diagV_i + for(j=0; j<pnp; ++j) + ptr1[j*pnp+j]=ptr2[j]; + } +#endif + } /* inner while loop */ + + if (p_eL2 <= eps3_sq) + stop = 5; // error is small, force termination of outer loop + } + + if (itno >= itmax) + stop = 3; + + /* restore U, V diagonal entries */ + for (j = mcon; j < m; ++j) { + ptr1 = U + j * Usz; // set ptr1 to point to U_j + ptr2 = diagU + j * cnp; // set ptr2 to point to diagU_j + for (i = 0; i < cnp; ++i) + ptr1[i * cnp + i] = ptr2[i]; + } + for (i = ncon; i < n; ++i) { + ptr1 = V + i * Vsz; // set ptr1 to point to V_i + ptr2 = diagV + i * pnp; // set ptr2 to point to diagV_i + for (j = 0; j < pnp; ++j) + ptr1[j * pnp + j] = ptr2[j]; + } + + if (info) { + info[0] = init_p_eL2; + info[1] = p_eL2; + info[2] = eab_inf; + info[3] = dp_L2; + for (j = mcon, tmp = DBL_MIN; j < m; ++j) { + ptr1 = U + j * Usz; // set ptr1 to point to U_j + for (i = 0; i < cnp; ++i) + if (tmp < ptr1[i * cnp + i]) + tmp = ptr1[i * cnp + i]; + } + for (i = ncon; i < n; ++i) { + ptr1 = V + i * Vsz; // set ptr1 to point to V_i + for (j = 0; j < pnp; ++j) + if (tmp < ptr1[j * pnp + j]) + tmp = ptr1[j * pnp + j]; + } + info[4] = mu / tmp; + info[5] = itno; + info[6] = stop; + info[7] = nfev; + info[8] = njev; + info[9] = nlss; + } + + // sba_print_sol(n, m, p, cnp, pnp, x, mnp, &idxij, rcidxs, rcsubs); + retval = (stop != 7) ? itno : SBA_ERROR; + +freemem_and_return: /* NOTE: this point is also reached via a goto! */ + + /* free whatever was allocated */ + free(W); + free(U); + free(V); + free(e); + free(eab); + free(E); + free(Yj); + free(YWt); + free(S); + free(dp); + free(Wtda); + free(rcidxs); + free(rcsubs); +#ifndef SBA_DESTROY_COVS + if (wght) + free(wght); +#else +/* nothing to do */ +#endif /* SBA_DESTROY_COVS */ + + free(hx); + free(diagUV); + free(pdp); + if (fdj_data.hxx) { // cleanup + free(fdj_data.hxx); + free(fdj_data.func_rcidxs); + } + + sba_crsm_free(&idxij); + + /* free the memory allocated by the matrix inversion & linear solver routines */ + if (matinv) + (*matinv)(NULL, 0); + if (linsolver) + (*linsolver)(NULL, NULL, NULL, 0, 0); + + return retval; +} + +/* Bundle adjustment on camera parameters only + * using the sparse Levenberg-Marquardt as described in HZ p. 568 + * + * Returns the number of iterations (>=0) if successfull, SBA_ERROR if failed + */ + +int sba_mot_levmar_x( + const int n, /* number of points */ + const int m, /* number of images */ + const int mcon, /* number of images (starting from the 1st) whose parameters should not be modified. + * All A_ij (see below) with j<mcon are assumed to be zero + */ + char *vmask, /* visibility mask: vmask[i, j]=1 if point i visible in image j, 0 otherwise. nxm */ + double *p, /* initial parameter vector p0: (a1, ..., am). + * aj are the image j parameters, size m*cnp */ + const int cnp, /* number of parameters for ONE camera; e.g. 6 for Euclidean cameras */ + double *x, /* measurements vector: (x_11^T, .. x_1m^T, ..., x_n1^T, .. x_nm^T)^T where + * x_ij is the projection of the i-th point on the j-th image. + * NOTE: some of the x_ij might be missing, if point i is not visible in image j; + * see vmask[i, j], max. size n*m*mnp + */ + double *covx, /* measurements covariance matrices: (Sigma_x_11, .. Sigma_x_1m, ..., Sigma_x_n1, .. Sigma_x_nm), + * where Sigma_x_ij is the mnp x mnp covariance of x_ij stored row-by-row. Set to NULL if no + * covariance estimates are available (identity matrices are implicitly used in this case). + * NOTE: a certain Sigma_x_ij is missing if the corresponding x_ij is also missing; + * see vmask[i, j], max. size n*m*mnp*mnp + */ + const int mnp, /* number of parameters for EACH measurement; usually 2 */ + void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata), + /* functional relation describing measurements. Given a parameter vector p, + * computes a prediction of the measurements \hat{x}. p is (m*cnp)x1, + * \hat{x} is (n*m*mnp)x1, maximum + * rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used + * as working memory + */ + void (*fjac)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata), + /* function to evaluate the sparse jacobian dX/dp. + * The Jacobian is returned in jac as + * (dx_11/da_1, ..., dx_1m/da_m, ..., dx_n1/da_1, ..., dx_nm/da_m), or (using HZ's notation), + * jac=(A_11, ..., A_1m, ..., A_n1, ..., A_nm) + * Notice that depending on idxij, some of the A_ij might be missing. + * Note also that the A_ij are mnp x cnp matrices and they + * should be stored in jac in row-major order. + * rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used + * as working memory + * + * If NULL, the jacobian is approximated by repetitive func calls and finite + * differences. This is computationally inefficient and thus NOT recommended. + */ + void *adata, /* pointer to possibly additional data, passed uninterpreted to func, fjac */ + + const int itmax, /* I: maximum number of iterations. itmax==0 signals jacobian verification followed by immediate + return */ + const int verbose, /* I: verbosity */ + const double opts[SBA_OPTSSZ], + /* I: minim. options [\mu, \epsilon1, \epsilon2, \epsilon3, \epsilon4]. Respectively the scale factor for initial + * \mu, + * stopping thresholds for ||J^T e||_inf, ||dp||_2, ||e||_2 and (||e||_2-||e_new||_2)/||e||_2 + */ + double info[SBA_INFOSZ] + /* O: information regarding the minimization. Set to NULL if don't care + * info[0]=||e||_2 at initial p. + * info[1-4]=[ ||e||_2, ||J^T e||_inf, ||dp||_2, mu/max[J^T J]_ii ], all computed at estimated p. + * info[5]= # iterations, + * info[6]=reason for terminating: 1 - stopped by small gradient J^T e + * 2 - stopped by small dp + * 3 - stopped by itmax + * 4 - stopped by small relative reduction in ||e||_2 + * 5 - stopped by small ||e||_2 + * 6 - too many attempts to increase damping. Restart with increased mu + * 7 - stopped by invalid (i.e. NaN or Inf) "func" values. This is a user error + * info[7]= # function evaluations + * info[8]= # jacobian evaluations + * info[9]= # number of linear systems solved, i.e. number of attempts for reducing error + */ + ) { + register int i, j, ii, jj, k; + int nvis, nnz, retval; + + /* The following are work arrays that are dynamically allocated by sba_mot_levmar_x() */ + double *jac; /* work array for storing the jacobian, max. size n*m*mnp*cnp */ + double *U; /* work array for storing the U_j in the order U_1, ..., U_m, size m*cnp*cnp */ + + double *e; /* work array for storing the e_ij in the order e_11, ..., e_1m, ..., e_n1, ..., e_nm, + max. size n*m*mnp */ + double *ea; /* work array for storing the ea_j in the order ea_1, .. ea_m, size m*cnp */ + + double *dp; /* work array for storing the parameter vector updates da_1, ..., da_m, size m*cnp */ + + double *wght = /* work array for storing the weights computed from the covariance inverses, max. size n*m*mnp*mnp */ + NULL; + + /* Of the above arrays, jac, e, wght are sparse and + * U, ea, dp are dense. Sparse arrays are indexed through + * idxij (see below), that is with the same mechanism as the input + * measurements vector x + */ + + /* submatrices sizes */ + int Asz, Usz, esz, easz, covsz; + + register double *ptr1, *ptr2, *ptr3, *ptr4, sum; + struct sba_crsm idxij; /* sparse matrix containing the location of x_ij in x. This is also the location of A_ij + * in jac, e_ij in e, etc. + * This matrix can be thought as a map from a sparse set of pairs (i, j) to a continuous + * index k and it is used to efficiently lookup the memory locations where the non-zero + * blocks of a sparse matrix/vector are stored + */ + int maxCPvis, /* max. of projections across cameras & projections across points */ + *rcidxs, /* work array for the indexes corresponding to the nonzero elements of a single row or + column in a sparse matrix, size max(n, m) */ + *rcsubs; /* work array for the subscripts of nonzero elements in a single row or column of a + sparse matrix, size max(n, m) */ + + /* The following variables are needed by the LM algorithm */ + register int itno; /* iteration counter */ + int nsolved; + /* temporary work arrays that are dynamically allocated */ + double *hx, /* \hat{x}_i, max. size m*n*mnp */ + *diagU, /* diagonals of U_j, size m*cnp */ + *pdp; /* p + dp, size m*cnp */ + + register double mu, /* damping constant */ + tmp; /* mainly used in matrix & vector multiplications */ + double p_eL2, ea_inf, pdp_eL2; /* ||e(p)||_2, ||J^T e||_inf, ||e(p+dp)||_2 */ + double p_L2, dp_L2 = DBL_MAX, dF, dL; + double tau = FABS(opts[0]), eps1 = FABS(opts[1]), eps2 = FABS(opts[2]), eps2_sq = opts[2] * opts[2], + eps3_sq = opts[3] * opts[3], eps4_sq = opts[4] * opts[4]; + double init_p_eL2; + int nu = 2, nu2, stop = 0, nfev, njev = 0, nlss = 0; + int nobs, nvars; + PLS linsolver = NULL; + + struct fdj_data_x_ fdj_data; + void *jac_adata; + + /* Initialization */ + mu = ea_inf = 0.0; /* -Wall */ + + /* block sizes */ + Asz = mnp * cnp; + Usz = cnp * cnp; + esz = mnp; + easz = cnp; + covsz = mnp * mnp; + + /* count total number of visible image points */ + for (i = nvis = 0, jj = n * m; i < jj; ++i) + nvis += (vmask[i] != 0); + + nobs = nvis * mnp; + nvars = m * cnp; + if (nobs < nvars) { + fprintf(stderr, + "SBA: sba_mot_levmar_x() cannot solve a problem with fewer measurements [%d] than unknowns [%d]\n", + nobs, nvars); + return SBA_ERROR; + } + + /* allocate & fill up the idxij structure */ + sba_crsm_alloc(&idxij, n, m, nvis); + for (i = k = 0; i < n; ++i) { + idxij.rowptr[i] = k; + ii = i * m; + for (j = 0; j < m; ++j) + if (vmask[ii + j]) { + idxij.val[k] = k; + idxij.colidx[k++] = j; + } + } + idxij.rowptr[n] = nvis; + + /* find the maximum number of visible image points in any single camera or coming from a single 3D point */ + /* cameras */ + for (i = maxCPvis = 0; i < n; ++i) + if ((k = idxij.rowptr[i + 1] - idxij.rowptr[i]) > maxCPvis) + maxCPvis = k; + + /* points, note that maxCPvis is not reinitialized! */ + for (j = 0; j < m; ++j) { + for (i = ii = 0; i < n; ++i) + if (vmask[i * m + j]) + ++ii; + if (ii > maxCPvis) + maxCPvis = ii; + } + + /* allocate work arrays */ + jac = (double *)emalloc(nvis * Asz * sizeof(double)); + U = (double *)emalloc(m * Usz * sizeof(double)); + e = (double *)emalloc(nobs * sizeof(double)); + ea = (double *)emalloc(nvars * sizeof(double)); + dp = (double *)emalloc(nvars * sizeof(double)); + rcidxs = (int *)emalloc(maxCPvis * sizeof(int)); + rcsubs = (int *)emalloc(maxCPvis * sizeof(int)); +#ifndef SBA_DESTROY_COVS + if (covx != NULL) + wght = (double *)emalloc(nvis * covsz * sizeof(double)); +#else + if (covx != NULL) + wght = covx; +#endif /* SBA_DESTROY_COVS */ + + hx = (double *)emalloc(nobs * sizeof(double)); + diagU = (double *)emalloc(nvars * sizeof(double)); + pdp = (double *)emalloc(nvars * sizeof(double)); + + /* if no jacobian function is supplied, prepare to compute jacobian with finite difference */ + if (!fjac) { + fdj_data.func = func; + fdj_data.cnp = cnp; + fdj_data.pnp = 0; + fdj_data.mnp = mnp; + fdj_data.hx = hx; + fdj_data.hxx = (double *)emalloc(nobs * sizeof(double)); + fdj_data.func_rcidxs = (int *)emalloc(2 * maxCPvis * sizeof(int)); + fdj_data.func_rcsubs = fdj_data.func_rcidxs + maxCPvis; + fdj_data.adata = adata; + + fjac = sba_fdjac_x; + jac_adata = (void *)&fdj_data; + } else { + fdj_data.hxx = NULL; + jac_adata = adata; + } + + if (itmax == 0) { /* verify jacobian */ + sba_mot_chkjac_x(func, fjac, p, &idxij, rcidxs, rcsubs, mcon, cnp, mnp, adata, jac_adata); + retval = 0; + goto freemem_and_return; + } + + /* covariances Sigma_x_ij are accommodated by computing the Cholesky decompositions of their + * inverses and using the resulting matrices w_x_ij to weigh A_ij and e_ij as w_x_ij A_ij + * and w_x_ij*e_ij. In this way, auxiliary variables as U_j=\sum_i A_ij^T A_ij + * actually become \sum_i (w_x_ij A_ij)^T w_x_ij A_ij= \sum_i A_ij^T w_x_ij^T w_x_ij A_ij = + * A_ij^T Sigma_x_ij^-1 A_ij + * + * ea_j are weighted in a similar manner + */ + if (covx != NULL) { + for (i = 0; i < n; ++i) { + nnz = sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero x_ij, j=0...m-1 */ + for (j = 0; j < nnz; ++j) { + /* set ptr1, ptr2 to point to cov_x_ij, w_x_ij resp. */ + ptr1 = covx + (k = idxij.val[rcidxs[j]] * covsz); + ptr2 = wght + k; + if (!sba_mat_cholinv(ptr1, ptr2, mnp)) { /* compute w_x_ij s.t. w_x_ij^T w_x_ij = cov_x_ij^-1 */ + fprintf(stderr, "SBA: invalid covariance matrix for x_ij (i=%d, j=%d) in sba_motstr_levmar_x()\n", + i, rcsubs[j]); + retval = SBA_ERROR; + goto freemem_and_return; + } + } + } + sba_mat_cholinv(NULL, NULL, 0); /* cleanup */ + } + + /* compute the error vectors e_ij in hx */ + (*func)(p, &idxij, rcidxs, rcsubs, hx, adata); + nfev = 1; + /* ### compute e=x - f(p) [e=w*(x - f(p)] and its L2 norm */ + if (covx == NULL) + p_eL2 = nrmL2xmy(e, x, hx, nobs); /* e=x-hx, p_eL2=||e|| */ + else + p_eL2 = nrmCxmy(e, x, hx, wght, mnp, nvis); /* e=wght*(x-hx), p_eL2=||e||=||x-hx||_Sigma^-1 */ + if (verbose) + printf("initial mot-SBA error %g [%g]\n", p_eL2, p_eL2 / nvis); + init_p_eL2 = p_eL2; + if (!SBA_FINITE(p_eL2)) + stop = 7; + + for (itno = 0; itno < itmax && !stop; ++itno) { + /* Note that p, e and ||e||_2 have been updated at the previous iteration */ + + /* compute derivative submatrices A_ij */ + (*fjac)(p, &idxij, rcidxs, rcsubs, jac, jac_adata); + ++njev; + + if (covx != NULL) { + /* compute w_x_ij A_ij + * Since w_x_ij is upper triangular, the products can be safely saved + * directly in A_ij, without the need for intermediate storage + */ + for (i = 0; i < nvis; ++i) { + /* set ptr1, ptr2 to point to w_x_ij, A_ij, resp. */ + ptr1 = wght + i * covsz; + ptr2 = jac + i * Asz; + + /* w_x_ij is mnp x mnp, A_ij is mnp x cnp */ + for (ii = 0; ii < mnp; ++ii) + for (jj = 0; jj < cnp; ++jj) { + for (k = ii, sum = 0.0; k < mnp; ++k) // k>=ii since w_x_ij is upper triangular + sum += ptr1[ii * mnp + k] * ptr2[k * cnp + jj]; + ptr2[ii * cnp + jj] = sum; + } + } + } + + /* compute U_j = \sum_i A_ij^T A_ij */ // \Sigma here! + /* U_j is symmetric, therefore its computation can be sped up by + * computing only the upper part and then reusing it for the lower one. + * Recall that A_ij is mnp x cnp + */ + /* Also compute ea_j = \sum_i A_ij^T e_ij */ // \Sigma here! + /* Recall that e_ij is mnp x 1 + */ + _dblzero(U, m * Usz); /* clear all U_j */ + _dblzero(ea, m * easz); /* clear all ea_j */ + for (j = mcon; j < m; ++j) { + ptr1 = U + j * Usz; // set ptr1 to point to U_j + ptr2 = ea + j * easz; // set ptr2 to point to ea_j + + nnz = sba_crsm_col_elmidxs(&idxij, j, rcidxs, rcsubs); /* find nonzero A_ij, i=0...n-1 */ + for (i = 0; i < nnz; ++i) { + /* set ptr3 to point to A_ij, actual row number in rcsubs[i] */ + ptr3 = jac + idxij.val[rcidxs[i]] * Asz; + + /* compute the UPPER TRIANGULAR PART of A_ij^T A_ij and add it to U_j */ + for (ii = 0; ii < cnp; ++ii) { + for (jj = ii; jj < cnp; ++jj) { + for (k = 0, sum = 0.0; k < mnp; ++k) + sum += ptr3[k * cnp + ii] * ptr3[k * cnp + jj]; + ptr1[ii * cnp + jj] += sum; + } + + /* copy the LOWER TRIANGULAR PART of U_j from the upper one */ + for (jj = 0; jj < ii; ++jj) + ptr1[ii * cnp + jj] = ptr1[jj * cnp + ii]; + } + + ptr4 = e + idxij.val[rcidxs[i]] * esz; /* set ptr4 to point to e_ij */ + /* compute A_ij^T e_ij and add it to ea_j */ + for (ii = 0; ii < cnp; ++ii) { + for (jj = 0, sum = 0.0; jj < mnp; ++jj) + sum += ptr3[jj * cnp + ii] * ptr4[jj]; + ptr2[ii] += sum; + } + } + } + + /* Compute ||J^T e||_inf and ||p||^2 */ + for (i = 0, p_L2 = ea_inf = 0.0; i < nvars; ++i) { + if (ea_inf < (tmp = FABS(ea[i]))) + ea_inf = tmp; + p_L2 += p[i] * p[i]; + } + // p_L2=sqrt(p_L2); + + /* save diagonal entries so that augmentation can be later canceled. + * Diagonal entries are in U_j + */ + for (j = mcon; j < m; ++j) { + ptr1 = U + j * Usz; // set ptr1 to point to U_j + ptr2 = diagU + j * cnp; // set ptr2 to point to diagU_j + for (i = 0; i < cnp; ++i) + ptr2[i] = ptr1[i * cnp + i]; + } + + /* + if(!(itno%100)){ + printf("Current estimate: "); + for(i=0; i<nvars; ++i) + printf("%.9g ", p[i]); + printf("-- errors %.9g %0.9g\n", ea_inf, p_eL2); + } + */ + + /* check for convergence */ + if ((ea_inf <= eps1)) { + dp_L2 = 0.0; /* no increment for p in this case */ + stop = 1; + break; + } + + /* compute initial damping factor */ + if (itno == 0) { + for (i = mcon * cnp, tmp = DBL_MIN; i < nvars; ++i) + if (diagU[i] > tmp) + tmp = diagU[i]; /* find max diagonal element */ + mu = tau * tmp; + } + + /* determine increment using adaptive damping */ + while (1) { + /* augment U */ + for (j = mcon; j < m; ++j) { + ptr1 = U + j * Usz; // set ptr1 to point to U_j + for (i = 0; i < cnp; ++i) + ptr1[i * cnp + i] += mu; + } + + /* solve the linear systems U_j da_j = ea_j to compute the da_j */ + _dblzero(dp, mcon * cnp); /* no change for the first mcon camera params */ + for (j = nsolved = mcon; j < m; ++j) { + ptr1 = U + j * Usz; // set ptr1 to point to U_j + ptr2 = ea + j * easz; // set ptr2 to point to ea_j + ptr3 = dp + j * cnp; // set ptr3 to point to da_j + + // nsolved+=sba_Axb_LU(ptr1, ptr2, ptr3, cnp, 0); linsolver=sba_Axb_LU; + nsolved += sba_Axb_Chol(ptr1, ptr2, ptr3, cnp, 0); + linsolver = sba_Axb_Chol; + // nsolved+=sba_Axb_BK(ptr1, ptr2, ptr3, cnp, 0); linsolver=sba_Axb_BK; + // nsolved+=sba_Axb_QRnoQ(ptr1, ptr2, ptr3, cnp, 0); linsolver=sba_Axb_QRnoQ; + // nsolved+=sba_Axb_QR(ptr1, ptr2, ptr3, cnp, 0); linsolver=sba_Axb_QR; + // nsolved+=sba_Axb_SVD(ptr1, ptr2, ptr3, cnp, 0); linsolver=sba_Axb_SVD; + // nsolved+=(sba_Axb_CG(ptr1, ptr2, ptr3, cnp, (3*cnp)/2, 1E-10, SBA_CG_JACOBI, 0) > 0); + // linsolver=(PLS)sba_Axb_CG; + + ++nlss; + } + + if (nsolved == m) { + + /* parameter vector updates are now in dp */ + + /* compute p's new estimate and ||dp||^2 */ + for (i = 0, dp_L2 = 0.0; i < nvars; ++i) { + pdp[i] = p[i] + (tmp = dp[i]); + dp_L2 += tmp * tmp; + } + // dp_L2=sqrt(dp_L2); + + if (dp_L2 <= eps2_sq * p_L2) { /* relative change in p is small, stop */ + // if(dp_L2<=eps2*(p_L2 + eps2)){ /* relative change in p is small, stop */ + stop = 2; + break; + } + + if (dp_L2 >= (p_L2 + eps2) / SBA_EPSILON_SQ) { /* almost singular */ + // if(dp_L2>=(p_L2+eps2)/SBA_EPSILON){ /* almost singular */ + fprintf( + stderr, + "SBA: the matrix of the augmented normal equations is almost singular in sba_mot_levmar_x(),\n" + " minimization should be restarted from the current solution with an increased damping " + "term\n"); + retval = SBA_ERROR; + goto freemem_and_return; + } + + (*func)(pdp, &idxij, rcidxs, rcsubs, hx, adata); + ++nfev; /* evaluate function at p + dp */ + if (verbose > 1) + printf("mean reprojection error %g\n", sba_mean_repr_error(n, mnp, x, hx, &idxij, rcidxs, rcsubs)); + /* ### compute ||e(pdp)||_2 */ + if (covx == NULL) + pdp_eL2 = nrmL2xmy(hx, x, hx, nobs); /* hx=x-hx, pdp_eL2=||hx|| */ + else + pdp_eL2 = nrmCxmy(hx, x, hx, wght, mnp, nvis); /* hx=wght*(x-hx), pdp_eL2=||hx|| */ + if (!SBA_FINITE(pdp_eL2)) { + if (verbose) /* identify the offending point projection */ + sba_print_inf(hx, m, mnp, &idxij, rcidxs, rcsubs); + + stop = 7; + break; + } + + for (i = 0, dL = 0.0; i < nvars; ++i) + dL += dp[i] * (mu * dp[i] + ea[i]); + + dF = p_eL2 - pdp_eL2; + + if (verbose > 1) + printf("\ndamping term %8g, gain ratio %8g, errors %8g / %8g = %g\n", mu, + dL != 0.0 ? dF / dL : dF / DBL_EPSILON, p_eL2 / nvis, pdp_eL2 / nvis, p_eL2 / pdp_eL2); + + if (dL > 0.0 && dF > 0.0) { /* reduction in error, increment is accepted */ + tmp = (2.0 * dF / dL - 1.0); + tmp = 1.0 - tmp * tmp * tmp; + mu = mu * ((tmp >= SBA_ONE_THIRD) ? tmp : SBA_ONE_THIRD); + nu = 2; + + /* the test below is equivalent to the relative reduction of the RMS reprojection error: + * sqrt(p_eL2)-sqrt(pdp_eL2)<eps4*sqrt(p_eL2) */ + if (pdp_eL2 - 2.0 * sqrt(p_eL2 * pdp_eL2) < (eps4_sq - 1.0) * p_eL2) + stop = 4; + + for (i = 0; i < nvars; ++i) /* update p's estimate */ + p[i] = pdp[i]; + + for (i = 0; i < nobs; ++i) /* update e and ||e||_2 */ + e[i] = hx[i]; + p_eL2 = pdp_eL2; + break; + } + } /* nsolved==m */ + + /* if this point is reached, either at least one linear system could not be solved or + * the error did not reduce; in any case, the increment must be rejected + */ + + mu *= nu; + nu2 = nu << 1; // 2*nu; + if (nu2 <= nu) { /* nu has wrapped around (overflown) */ + fprintf(stderr, "SBA: too many failed attempts to increase the damping factor in sba_mot_levmar_x()! " + "Singular Hessian matrix?\n"); + // exit(1); + stop = 6; + break; + } + nu = nu2; + +#if 0 + /* restore U diagonal entries */ + for(j=mcon; j<m; ++j){ + ptr1=U + j*Usz; // set ptr1 to point to U_j + ptr2=diagU + j*cnp; // set ptr2 to point to diagU_j + for(i=0; i<cnp; ++i) + ptr1[i*cnp+i]=ptr2[i]; + } +#endif + } /* inner while loop */ + + if (p_eL2 <= eps3_sq) + stop = 5; // error is small, force termination of outer loop + } + + if (itno >= itmax) + stop = 3; + + /* restore U diagonal entries */ + for (j = mcon; j < m; ++j) { + ptr1 = U + j * Usz; // set ptr1 to point to U_j + ptr2 = diagU + j * cnp; // set ptr2 to point to diagU_j + for (i = 0; i < cnp; ++i) + ptr1[i * cnp + i] = ptr2[i]; + } + + if (info) { + info[0] = init_p_eL2; + info[1] = p_eL2; + info[2] = ea_inf; + info[3] = dp_L2; + for (j = mcon, tmp = DBL_MIN; j < m; ++j) { + ptr1 = U + j * Usz; // set ptr1 to point to U_j + for (i = 0; i < cnp; ++i) + if (tmp < ptr1[i * cnp + i]) + tmp = ptr1[i * cnp + i]; + } + info[4] = mu / tmp; + info[5] = itno; + info[6] = stop; + info[7] = nfev; + info[8] = njev; + info[9] = nlss; + } + // sba_print_sol(n, m, p, cnp, 0, x, mnp, &idxij, rcidxs, rcsubs); + retval = (stop != 7) ? itno : SBA_ERROR; + +freemem_and_return: /* NOTE: this point is also reached via a goto! */ + + /* free whatever was allocated */ + free(jac); + free(U); + free(e); + free(ea); + free(dp); + free(rcidxs); + free(rcsubs); +#ifndef SBA_DESTROY_COVS + if (wght) + free(wght); +#else +/* nothing to do */ +#endif /* SBA_DESTROY_COVS */ + + free(hx); + free(diagU); + free(pdp); + if (fdj_data.hxx) { // cleanup + free(fdj_data.hxx); + free(fdj_data.func_rcidxs); + } + + sba_crsm_free(&idxij); + + /* free the memory allocated by the linear solver routine */ + if (linsolver) + (*linsolver)(NULL, NULL, NULL, 0, 0); + + return retval; +} + +/* Bundle adjustment on structure parameters only + * using the sparse Levenberg-Marquardt as described in HZ p. 568 + * + * Returns the number of iterations (>=0) if successfull, SBA_ERROR if failed + */ + +int sba_str_levmar_x( + const int n, /* number of points */ + const int ncon, /* number of points (starting from the 1st) whose parameters should not be modified. + * All B_ij (see below) with i<ncon are assumed to be zero + */ + const int m, /* number of images */ + char *vmask, /* visibility mask: vmask[i, j]=1 if point i visible in image j, 0 otherwise. nxm */ + double *p, /* initial parameter vector p0: (b1, ..., bn). + * bi are the i-th point parameters, * size n*pnp */ + const int pnp, /* number of parameters for ONE point; e.g. 3 for Euclidean points */ + double *x, /* measurements vector: (x_11^T, .. x_1m^T, ..., x_n1^T, .. x_nm^T)^T where + * x_ij is the projection of the i-th point on the j-th image. + * NOTE: some of the x_ij might be missing, if point i is not visible in image j; + * see vmask[i, j], max. size n*m*mnp + */ + double *covx, /* measurements covariance matrices: (Sigma_x_11, .. Sigma_x_1m, ..., Sigma_x_n1, .. Sigma_x_nm), + * where Sigma_x_ij is the mnp x mnp covariance of x_ij stored row-by-row. Set to NULL if no + * covariance estimates are available (identity matrices are implicitly used in this case). + * NOTE: a certain Sigma_x_ij is missing if the corresponding x_ij is also missing; + * see vmask[i, j], max. size n*m*mnp*mnp + */ + const int mnp, /* number of parameters for EACH measurement; usually 2 */ + void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata), + /* functional relation describing measurements. Given a parameter vector p, + * computes a prediction of the measurements \hat{x}. p is (n*pnp)x1, + * \hat{x} is (n*m*mnp)x1, maximum + * rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used + * as working memory + */ + void (*fjac)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata), + /* function to evaluate the sparse jacobian dX/dp. + * The Jacobian is returned in jac as + * (dx_11/db_1, ..., dx_1m/db_1, ..., dx_n1/db_n, ..., dx_nm/db_n), or (using HZ's notation), + * jac=(B_11, ..., B_1m, ..., B_n1, ..., B_nm) + * Notice that depending on idxij, some of the B_ij might be missing. + * Note also that B_ij are mnp x pnp matrices and they + * should be stored in jac in row-major order. + * rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used + * as working memory + * + * If NULL, the jacobian is approximated by repetitive func calls and finite + * differences. This is computationally inefficient and thus NOT recommended. + */ + void *adata, /* pointer to possibly additional data, passed uninterpreted to func, fjac */ + + const int itmax, /* I: maximum number of iterations. itmax==0 signals jacobian verification followed by immediate + return */ + const int verbose, /* I: verbosity */ + const double opts[SBA_OPTSSZ], + /* I: minim. options [\mu, \epsilon1, \epsilon2, \epsilon3, \epsilon4]. Respectively the scale factor for initial + * \mu, + * stopping thresholds for ||J^T e||_inf, ||dp||_2, ||e||_2 and (||e||_2-||e_new||_2)/||e||_2 + */ + double info[SBA_INFOSZ] + /* O: information regarding the minimization. Set to NULL if don't care + * info[0]=||e||_2 at initial p. + * info[1-4]=[ ||e||_2, ||J^T e||_inf, ||dp||_2, mu/max[J^T J]_ii ], all computed at estimated p. + * info[5]= # iterations, + * info[6]=reason for terminating: 1 - stopped by small gradient J^T e + * 2 - stopped by small dp + * 3 - stopped by itmax + * 4 - stopped by small relative reduction in ||e||_2 + * 5 - stopped by small ||e||_2 + * 6 - too many attempts to increase damping. Restart with increased mu + * 7 - stopped by invalid (i.e. NaN or Inf) "func" values. This is a user error + * info[7]= # function evaluations + * info[8]= # jacobian evaluations + * info[9]= # number of linear systems solved, i.e. number of attempts for reducing error + */ + ) { + register int i, j, ii, jj, k; + int nvis, nnz, retval; + + /* The following are work arrays that are dynamically allocated by sba_str_levmar_x() */ + double *jac; /* work array for storing the jacobian, max. size n*m*mnp*pnp */ + double *V; /* work array for storing the V_i in the order V_1, ..., V_n, size n*pnp*pnp */ + + double *e; /* work array for storing the e_ij in the order e_11, ..., e_1m, ..., e_n1, ..., e_nm, + max. size n*m*mnp */ + double *eb; /* work array for storing the eb_i in the order eb_1, .. eb_n size n*pnp */ + + double *dp; /* work array for storing the parameter vector updates db_1, ..., db_n, size n*pnp */ + + double *wght = /* work array for storing the weights computed from the covariance inverses, max. size n*m*mnp*mnp */ + NULL; + + /* Of the above arrays, jac, e, wght are sparse and + * V, eb, dp are dense. Sparse arrays are indexed through + * idxij (see below), that is with the same mechanism as the input + * measurements vector x + */ + + /* submatrices sizes */ + int Bsz, Vsz, esz, ebsz, covsz; + + register double *ptr1, *ptr2, *ptr3, *ptr4, sum; + struct sba_crsm idxij; /* sparse matrix containing the location of x_ij in x. This is also the location + * of B_ij in jac, etc. + * This matrix can be thought as a map from a sparse set of pairs (i, j) to a continuous + * index k and it is used to efficiently lookup the memory locations where the non-zero + * blocks of a sparse matrix/vector are stored + */ + int maxCPvis, /* max. of projections across cameras & projections across points */ + *rcidxs, /* work array for the indexes corresponding to the nonzero elements of a single row or + column in a sparse matrix, size max(n, m) */ + *rcsubs; /* work array for the subscripts of nonzero elements in a single row or column of a + sparse matrix, size max(n, m) */ + + /* The following variables are needed by the LM algorithm */ + register int itno; /* iteration counter */ + int nsolved; + /* temporary work arrays that are dynamically allocated */ + double *hx, /* \hat{x}_i, max. size m*n*mnp */ + *diagV, /* diagonals of V_i, size n*pnp */ + *pdp; /* p + dp, size n*pnp */ + + register double mu, /* damping constant */ + tmp; /* mainly used in matrix & vector multiplications */ + double p_eL2, eb_inf, pdp_eL2; /* ||e(p)||_2, ||J^T e||_inf, ||e(p+dp)||_2 */ + double p_L2, dp_L2 = DBL_MAX, dF, dL; + double tau = FABS(opts[0]), eps1 = FABS(opts[1]), eps2 = FABS(opts[2]), eps2_sq = opts[2] * opts[2], + eps3_sq = opts[3] * opts[3], eps4_sq = opts[4] * opts[4]; + double init_p_eL2; + int nu = 2, nu2, stop = 0, nfev, njev = 0, nlss = 0; + int nobs, nvars; + PLS linsolver = NULL; + + struct fdj_data_x_ fdj_data; + void *jac_adata; + + /* Initialization */ + mu = eb_inf = tmp = 0.0; /* -Wall */ + + /* block sizes */ + Bsz = mnp * pnp; + Vsz = pnp * pnp; + esz = mnp; + ebsz = pnp; + covsz = mnp * mnp; + + /* count total number of visible image points */ + for (i = nvis = 0, jj = n * m; i < jj; ++i) + nvis += (vmask[i] != 0); + + nobs = nvis * mnp; + nvars = n * pnp; + if (nobs < nvars) { + fprintf(stderr, + "SBA: sba_str_levmar_x() cannot solve a problem with fewer measurements [%d] than unknowns [%d]\n", + nobs, nvars); + return SBA_ERROR; + } + + /* allocate & fill up the idxij structure */ + sba_crsm_alloc(&idxij, n, m, nvis); + for (i = k = 0; i < n; ++i) { + idxij.rowptr[i] = k; + ii = i * m; + for (j = 0; j < m; ++j) + if (vmask[ii + j]) { + idxij.val[k] = k; + idxij.colidx[k++] = j; + } + } + idxij.rowptr[n] = nvis; + + /* find the maximum number of visible image points in any single camera or coming from a single 3D point */ + /* cameras */ + for (i = maxCPvis = 0; i < n; ++i) + if ((k = idxij.rowptr[i + 1] - idxij.rowptr[i]) > maxCPvis) + maxCPvis = k; + + /* points, note that maxCPvis is not reinitialized! */ + for (j = 0; j < m; ++j) { + for (i = ii = 0; i < n; ++i) + if (vmask[i * m + j]) + ++ii; + if (ii > maxCPvis) + maxCPvis = ii; + } + + /* allocate work arrays */ + jac = (double *)emalloc(nvis * Bsz * sizeof(double)); + V = (double *)emalloc(n * Vsz * sizeof(double)); + e = (double *)emalloc(nobs * sizeof(double)); + eb = (double *)emalloc(nvars * sizeof(double)); + dp = (double *)emalloc(nvars * sizeof(double)); + rcidxs = (int *)emalloc(maxCPvis * sizeof(int)); + rcsubs = (int *)emalloc(maxCPvis * sizeof(int)); +#ifndef SBA_DESTROY_COVS + if (covx != NULL) + wght = (double *)emalloc(nvis * covsz * sizeof(double)); +#else + if (covx != NULL) + wght = covx; +#endif /* SBA_DESTROY_COVS */ + + hx = (double *)emalloc(nobs * sizeof(double)); + diagV = (double *)emalloc(nvars * sizeof(double)); + pdp = (double *)emalloc(nvars * sizeof(double)); + + /* if no jacobian function is supplied, prepare to compute jacobian with finite difference */ + if (!fjac) { + fdj_data.func = func; + fdj_data.cnp = 0; + fdj_data.pnp = pnp; + fdj_data.mnp = mnp; + fdj_data.hx = hx; + fdj_data.hxx = (double *)emalloc(nobs * sizeof(double)); + fdj_data.func_rcidxs = (int *)emalloc(2 * maxCPvis * sizeof(int)); + fdj_data.func_rcsubs = fdj_data.func_rcidxs + maxCPvis; + fdj_data.adata = adata; + + fjac = sba_fdjac_x; + jac_adata = (void *)&fdj_data; + } else { + fdj_data.hxx = NULL; + jac_adata = adata; + } + + if (itmax == 0) { /* verify jacobian */ + sba_str_chkjac_x(func, fjac, p, &idxij, rcidxs, rcsubs, ncon, pnp, mnp, adata, jac_adata); + retval = 0; + goto freemem_and_return; + } + + /* covariances Sigma_x_ij are accommodated by computing the Cholesky decompositions of their + * inverses and using the resulting matrices w_x_ij to weigh B_ij and e_ij as + * w_x_ij*B_ij and w_x_ij*e_ij. In this way, auxiliary variables as V_i=\sum_j B_ij^T B_ij + * actually become \sum_j (w_x_ij B_ij)^T w_x_ij B_ij= \sum_j B_ij^T w_x_ij^T w_x_ij B_ij = + * B_ij^T Sigma_x_ij^-1 B_ij + * + * eb_i are weighted in a similar manner + */ + if (covx != NULL) { + for (i = 0; i < n; ++i) { + nnz = sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero x_ij, j=0...m-1 */ + for (j = 0; j < nnz; ++j) { + /* set ptr1, ptr2 to point to cov_x_ij, w_x_ij resp. */ + ptr1 = covx + (k = idxij.val[rcidxs[j]] * covsz); + ptr2 = wght + k; + if (!sba_mat_cholinv(ptr1, ptr2, mnp)) { /* compute w_x_ij s.t. w_x_ij^T w_x_ij = cov_x_ij^-1 */ + fprintf(stderr, "SBA: invalid covariance matrix for x_ij (i=%d, j=%d) in sba_motstr_levmar_x()\n", + i, rcsubs[j]); + retval = SBA_ERROR; + goto freemem_and_return; + } + } + } + sba_mat_cholinv(NULL, NULL, 0); /* cleanup */ + } + + /* compute the error vectors e_ij in hx */ + (*func)(p, &idxij, rcidxs, rcsubs, hx, adata); + nfev = 1; + /* ### compute e=x - f(p) [e=w*(x - f(p)] and its L2 norm */ + if (covx == NULL) + p_eL2 = nrmL2xmy(e, x, hx, nobs); /* e=x-hx, p_eL2=||e|| */ + else + p_eL2 = nrmCxmy(e, x, hx, wght, mnp, nvis); /* e=wght*(x-hx), p_eL2=||e||=||x-hx||_Sigma^-1 */ + if (verbose) + printf("initial str-SBA error %g [%g]\n", p_eL2, p_eL2 / nvis); + init_p_eL2 = p_eL2; + if (!SBA_FINITE(p_eL2)) + stop = 7; + + for (itno = 0; itno < itmax && !stop; ++itno) { + /* Note that p, e and ||e||_2 have been updated at the previous iteration */ + + /* compute derivative submatrices B_ij */ + (*fjac)(p, &idxij, rcidxs, rcsubs, jac, jac_adata); + ++njev; + + if (covx != NULL) { + /* compute w_x_ij B_ij. + * Since w_x_ij is upper triangular, the products can be safely saved + * directly in B_ij, without the need for intermediate storage + */ + for (i = 0; i < nvis; ++i) { + /* set ptr1, ptr2 to point to w_x_ij, B_ij, resp. */ + ptr1 = wght + i * covsz; + ptr2 = jac + i * Bsz; + + /* w_x_ij is mnp x mnp, B_ij is mnp x pnp */ + for (ii = 0; ii < mnp; ++ii) + for (jj = 0; jj < pnp; ++jj) { + for (k = ii, sum = 0.0; k < mnp; ++k) // k>=ii since w_x_ij is upper triangular + sum += ptr1[ii * mnp + k] * ptr2[k * pnp + jj]; + ptr2[ii * pnp + jj] = sum; + } + } + } + + /* compute V_i = \sum_j B_ij^T B_ij */ // \Sigma here! + /* V_i is symmetric, therefore its computation can be sped up by + * computing only the upper part and then reusing it for the lower one. + * Recall that B_ij is mnp x pnp + */ + /* Also compute eb_i = \sum_j B_ij^T e_ij */ // \Sigma here! + /* Recall that e_ij is mnp x 1 + */ + _dblzero(V, n * Vsz); /* clear all V_i */ + _dblzero(eb, n * ebsz); /* clear all eb_i */ + for (i = ncon; i < n; ++i) { + ptr1 = V + i * Vsz; // set ptr1 to point to V_i + ptr2 = eb + i * ebsz; // set ptr2 to point to eb_i + + nnz = sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero B_ij, j=0...m-1 */ + for (j = 0; j < nnz; ++j) { + /* set ptr3 to point to B_ij, actual column number in rcsubs[j] */ + ptr3 = jac + idxij.val[rcidxs[j]] * Bsz; + + /* compute the UPPER TRIANGULAR PART of B_ij^T B_ij and add it to V_i */ + for (ii = 0; ii < pnp; ++ii) { + for (jj = ii; jj < pnp; ++jj) { + for (k = 0, sum = 0.0; k < mnp; ++k) + sum += ptr3[k * pnp + ii] * ptr3[k * pnp + jj]; + ptr1[ii * pnp + jj] += sum; + } + + /* copy the LOWER TRIANGULAR PART of V_i from the upper one */ + for (jj = 0; jj < ii; ++jj) + ptr1[ii * pnp + jj] = ptr1[jj * pnp + ii]; + } + + ptr4 = e + idxij.val[rcidxs[j]] * esz; /* set ptr4 to point to e_ij */ + /* compute B_ij^T e_ij and add it to eb_i */ + for (ii = 0; ii < pnp; ++ii) { + for (jj = 0, sum = 0.0; jj < mnp; ++jj) + sum += ptr3[jj * pnp + ii] * ptr4[jj]; + ptr2[ii] += sum; + } + } + } + + /* Compute ||J^T e||_inf and ||p||^2 */ + for (i = 0, p_L2 = eb_inf = 0.0; i < nvars; ++i) { + if (eb_inf < (tmp = FABS(eb[i]))) + eb_inf = tmp; + p_L2 += p[i] * p[i]; + } + // p_L2=sqrt(p_L2); + + /* save diagonal entries so that augmentation can be later canceled. + * Diagonal entries are in V_i + */ + for (i = ncon; i < n; ++i) { + ptr1 = V + i * Vsz; // set ptr1 to point to V_i + ptr2 = diagV + i * pnp; // set ptr2 to point to diagV_i + for (j = 0; j < pnp; ++j) + ptr2[j] = ptr1[j * pnp + j]; + } + + /* + if(!(itno%100)){ + printf("Current estimate: "); + for(i=0; i<nvars; ++i) + printf("%.9g ", p[i]); + printf("-- errors %.9g %0.9g\n", eb_inf, p_eL2); + } + */ + + /* check for convergence */ + if ((eb_inf <= eps1)) { + dp_L2 = 0.0; /* no increment for p in this case */ + stop = 1; + break; + } + + /* compute initial damping factor */ + if (itno == 0) { + for (i = ncon * pnp, tmp = DBL_MIN; i < nvars; ++i) + if (diagV[i] > tmp) + tmp = diagV[i]; /* find max diagonal element */ + mu = tau * tmp; + } + + /* determine increment using adaptive damping */ + while (1) { + /* augment V */ + for (i = ncon; i < n; ++i) { + ptr1 = V + i * Vsz; // set ptr1 to point to V_i + for (j = 0; j < pnp; ++j) + ptr1[j * pnp + j] += mu; + } + + /* solve the linear systems V*_i db_i = eb_i to compute the db_i */ + _dblzero(dp, ncon * pnp); /* no change for the first ncon point params */ + for (i = nsolved = ncon; i < n; ++i) { + ptr1 = V + i * Vsz; // set ptr1 to point to V_i + ptr2 = eb + i * ebsz; // set ptr2 to point to eb_i + ptr3 = dp + i * pnp; // set ptr3 to point to db_i + + // nsolved+=sba_Axb_LU(ptr1, ptr2, ptr3, pnp, 0); linsolver=sba_Axb_LU; + nsolved += sba_Axb_Chol(ptr1, ptr2, ptr3, pnp, 0); + linsolver = sba_Axb_Chol; + // nsolved+=sba_Axb_BK(ptr1, ptr2, ptr3, pnp, 0); linsolver=sba_Axb_BK; + // nsolved+=sba_Axb_QRnoQ(ptr1, ptr2, ptr3, pnp, 0); linsolver=sba_Axb_QRnoQ; + // nsolved+=sba_Axb_QR(ptr1, ptr2, ptr3, pnp, 0); linsolver=sba_Axb_QR; + // nsolved+=sba_Axb_SVD(ptr1, ptr2, ptr3, pnp, 0); linsolver=sba_Axb_SVD; + // nsolved+=(sba_Axb_CG(ptr1, ptr2, ptr3, pnp, (3*pnp)/2, 1E-10, SBA_CG_JACOBI, 0) > 0); + // linsolver=(PLS)sba_Axb_CG; + + ++nlss; + } + + if (nsolved == n) { + + /* parameter vector updates are now in dp */ + + /* compute p's new estimate and ||dp||^2 */ + for (i = 0, dp_L2 = 0.0; i < nvars; ++i) { + pdp[i] = p[i] + (tmp = dp[i]); + dp_L2 += tmp * tmp; + } + // dp_L2=sqrt(dp_L2); + + if (dp_L2 <= eps2_sq * p_L2) { /* relative change in p is small, stop */ + // if(dp_L2<=eps2*(p_L2 + eps2)){ /* relative change in p is small, stop */ + stop = 2; + break; + } + + if (dp_L2 >= (p_L2 + eps2) / SBA_EPSILON_SQ) { /* almost singular */ + // if(dp_L2>=(p_L2+eps2)/SBA_EPSILON){ /* almost singular */ + fprintf(stderr, "SBA: the matrix of the augmented normal equations is almost singular in " + "sba_motstr_levmar_x(),\n" + " minimization should be restarted from the current solution with an increased " + "damping term\n"); + retval = SBA_ERROR; + goto freemem_and_return; + } + + (*func)(pdp, &idxij, rcidxs, rcsubs, hx, adata); + ++nfev; /* evaluate function at p + dp */ + if (verbose > 1) + printf("mean reprojection error %g\n", sba_mean_repr_error(n, mnp, x, hx, &idxij, rcidxs, rcsubs)); + /* ### compute ||e(pdp)||_2 */ + if (covx == NULL) + pdp_eL2 = nrmL2xmy(hx, x, hx, nobs); /* hx=x-hx, pdp_eL2=||hx|| */ + else + pdp_eL2 = nrmCxmy(hx, x, hx, wght, mnp, nvis); /* hx=wght*(x-hx), pdp_eL2=||hx|| */ + if (!SBA_FINITE(pdp_eL2)) { + if (verbose) /* identify the offending point projection */ + sba_print_inf(hx, m, mnp, &idxij, rcidxs, rcsubs); + + stop = 7; + break; + } + + for (i = 0, dL = 0.0; i < nvars; ++i) + dL += dp[i] * (mu * dp[i] + eb[i]); + + dF = p_eL2 - pdp_eL2; + + if (verbose > 1) + printf("\ndamping term %8g, gain ratio %8g, errors %8g / %8g = %g\n", mu, + dL != 0.0 ? dF / dL : dF / DBL_EPSILON, p_eL2 / nvis, pdp_eL2 / nvis, p_eL2 / pdp_eL2); + + if (dL > 0.0 && dF > 0.0) { /* reduction in error, increment is accepted */ + tmp = (2.0 * dF / dL - 1.0); + tmp = 1.0 - tmp * tmp * tmp; + mu = mu * ((tmp >= SBA_ONE_THIRD) ? tmp : SBA_ONE_THIRD); + nu = 2; + + /* the test below is equivalent to the relative reduction of the RMS reprojection error: + * sqrt(p_eL2)-sqrt(pdp_eL2)<eps4*sqrt(p_eL2) */ + if (pdp_eL2 - 2.0 * sqrt(p_eL2 * pdp_eL2) < (eps4_sq - 1.0) * p_eL2) + stop = 4; + + for (i = 0; i < nvars; ++i) /* update p's estimate */ + p[i] = pdp[i]; + + for (i = 0; i < nobs; ++i) /* update e and ||e||_2 */ + e[i] = hx[i]; + p_eL2 = pdp_eL2; + break; + } + } /* nsolved==n */ + + /* if this point is reached, either at least one linear system could not be solved or + * the error did not reduce; in any case, the increment must be rejected + */ + + mu *= nu; + nu2 = nu << 1; // 2*nu; + if (nu2 <= nu) { /* nu has wrapped around (overflown) */ + fprintf(stderr, "SBA: too many failed attempts to increase the damping factor in sba_str_levmar_x()! " + "Singular Hessian matrix?\n"); + // exit(1); + stop = 6; + break; + } + nu = nu2; + +#if 0 + /* restore V diagonal entries */ + for(i=ncon; i<n; ++i){ + ptr1=V + i*Vsz; // set ptr1 to point to V_i + ptr2=diagV + i*pnp; // set ptr2 to point to diagV_i + for(j=0; j<pnp; ++j) + ptr1[j*pnp+j]=ptr2[j]; + } +#endif + } /* inner while loop */ + + if (p_eL2 <= eps3_sq) + stop = 5; // error is small, force termination of outer loop + } + + if (itno >= itmax) + stop = 3; + + /* restore V diagonal entries */ + for (i = ncon; i < n; ++i) { + ptr1 = V + i * Vsz; // set ptr1 to point to V_i + ptr2 = diagV + i * pnp; // set ptr2 to point to diagV_i + for (j = 0; j < pnp; ++j) + ptr1[j * pnp + j] = ptr2[j]; + } + + if (info) { + info[0] = init_p_eL2; + info[1] = p_eL2; + info[2] = eb_inf; + info[3] = dp_L2; + for (i = ncon; i < n; ++i) { + ptr1 = V + i * Vsz; // set ptr1 to point to V_i + for (j = 0; j < pnp; ++j) + if (tmp < ptr1[j * pnp + j]) + tmp = ptr1[j * pnp + j]; + } + info[4] = mu / tmp; + info[5] = itno; + info[6] = stop; + info[7] = nfev; + info[8] = njev; + info[9] = nlss; + } + // sba_print_sol(n, m, p, 0, pnp, x, mnp, &idxij, rcidxs, rcsubs); + retval = (stop != 7) ? itno : SBA_ERROR; + +freemem_and_return: /* NOTE: this point is also reached via a goto! */ + + /* free whatever was allocated */ + free(jac); + free(V); + free(e); + free(eb); + free(dp); + free(rcidxs); + free(rcsubs); +#ifndef SBA_DESTROY_COVS + if (wght) + free(wght); +#else +/* nothing to do */ +#endif /* SBA_DESTROY_COVS */ + + free(hx); + free(diagV); + free(pdp); + if (fdj_data.hxx) { // cleanup + free(fdj_data.hxx); + free(fdj_data.func_rcidxs); + } + + sba_crsm_free(&idxij); + + /* free the memory allocated by the linear solver routine */ + if (linsolver) + (*linsolver)(NULL, NULL, NULL, 0, 0); + + return retval; +} diff --git a/redist/sba/sba_levmar_wrap.c b/redist/sba/sba_levmar_wrap.c new file mode 100644 index 0000000..159a040 --- /dev/null +++ b/redist/sba/sba_levmar_wrap.c @@ -0,0 +1,917 @@ +///////////////////////////////////////////////////////////////////////////////// +//// +//// Simple drivers for sparse bundle adjustment based on the +//// Levenberg - Marquardt minimization algorithm +//// This file provides simple wrappers to the functions defined in sba_levmar.c +//// Copyright (C) 2004-2009 Manolis Lourakis (lourakis at ics forth gr) +//// Institute of Computer Science, Foundation for Research & Technology - Hellas +//// Heraklion, Crete, Greece. +//// +//// This program is free software; you can redistribute it and/or modify +//// it under the terms of the GNU General Public License as published by +//// the Free Software Foundation; either version 2 of the License, or +//// (at your option) any later version. +//// +//// This program is distributed in the hope that it will be useful, +//// but WITHOUT ANY WARRANTY; without even the implied warranty of +//// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//// GNU General Public License for more details. +//// +/////////////////////////////////////////////////////////////////////////////////// + +#include <float.h> +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "sba.h" + +#define FABS(x) (((x) >= 0) ? (x) : -(x)) + +struct wrap_motstr_data_ { + void (*proj)(int j, int i, double *aj, double *bi, double *xij, void *adata); // Q + void (*projac)(int j, int i, double *aj, double *bi, double *Aij, double *Bij, void *adata); // dQ/da, dQ/db + int cnp, pnp, mnp; /* parameter numbers */ + void *adata; +}; + +struct wrap_mot_data_ { + void (*proj)(int j, int i, double *aj, double *xij, void *adata); // Q + void (*projac)(int j, int i, double *aj, double *Aij, void *adata); // dQ/da + int cnp, mnp; /* parameter numbers */ + void *adata; +}; + +struct wrap_str_data_ { + void (*proj)(int j, int i, double *bi, double *xij, void *adata); // Q + void (*projac)(int j, int i, double *bi, double *Bij, void *adata); // dQ/db + int pnp, mnp; /* parameter numbers */ + void *adata; +}; + +/* Routines to estimate the estimated measurement vector (i.e. "func") and + * its sparse jacobian (i.e. "fjac") needed by BA expert drivers. Code below + * makes use of user-supplied functions computing "Q", "dQ/da", d"Q/db", + * i.e. predicted projection and associated jacobians for a SINGLE image measurement. + * Notice also that what follows is two pairs of "func" and corresponding "fjac" routines. + * The first is to be used in full (i.e. motion + structure) BA, the second in + * motion only BA. + */ + +/* FULL BUNDLE ADJUSTMENT */ + +/* Given a parameter vector p made up of the 3D coordinates of n points and the parameters of m cameras, compute in + * hx the prediction of the measurements, i.e. the projections of 3D points in the m images. The measurements + * are returned in the order (hx_11^T, .. hx_1m^T, ..., hx_n1^T, .. hx_nm^T)^T, where hx_ij is the predicted + * projection of the i-th point on the j-th camera. + * Caller supplies rcidxs and rcsubs which can be used as working memory. + * Notice that depending on idxij, some of the hx_ij might be missing + * + */ +static void sba_motstr_Qs(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata) { + register int i, j; + int cnp, pnp, mnp; + double *pa, *pb, *paj, *pbi, *pxij; + int n, m, nnz; + struct wrap_motstr_data_ *wdata; + void (*proj)(int j, int i, double *aj, double *bi, double *xij, void *proj_adata); + void *proj_adata; + + wdata = (struct wrap_motstr_data_ *)adata; + cnp = wdata->cnp; + pnp = wdata->pnp; + mnp = wdata->mnp; + proj = wdata->proj; + proj_adata = wdata->adata; + + n = idxij->nr; + m = idxij->nc; + pa = p; + pb = p + m * cnp; + + for (j = 0; j < m; ++j) { + /* j-th camera parameters */ + paj = pa + j * cnp; + + nnz = sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero hx_ij, i=0...n-1 */ + + for (i = 0; i < nnz; ++i) { + pbi = pb + rcsubs[i] * pnp; + pxij = hx + idxij->val[rcidxs[i]] * mnp; // set pxij to point to hx_ij + + (*proj)(j, rcsubs[i], paj, pbi, pxij, proj_adata); // evaluate Q in pxij + } + } +} + +/* Given a parameter vector p made up of the 3D coordinates of n points and the parameters of m cameras, compute in + * jac the jacobian of the predicted measurements, i.e. the jacobian of the projections of 3D points in the m images. + * The jacobian is returned in the order (A_11, B_11, ..., A_1m, B_1m, ..., A_n1, B_n1, ..., A_nm, B_nm), + * where A_ij=dx_ij/db_j and B_ij=dx_ij/db_i (see HZ). + * Caller supplies rcidxs and rcsubs which can be used as working memory. + * Notice that depending on idxij, some of the A_ij, B_ij might be missing + * + */ +static void sba_motstr_Qs_jac(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata) { + register int i, j; + int cnp, pnp, mnp; + double *pa, *pb, *paj, *pbi, *pAij, *pBij; + int n, m, nnz, Asz, Bsz, ABsz, idx; + struct wrap_motstr_data_ *wdata; + void (*projac)(int j, int i, double *aj, double *bi, double *Aij, double *Bij, void *projac_adata); + void *projac_adata; + + wdata = (struct wrap_motstr_data_ *)adata; + cnp = wdata->cnp; + pnp = wdata->pnp; + mnp = wdata->mnp; + projac = wdata->projac; + projac_adata = wdata->adata; + + n = idxij->nr; + m = idxij->nc; + pa = p; + pb = p + m * cnp; + Asz = mnp * cnp; + Bsz = mnp * pnp; + ABsz = Asz + Bsz; + + for (j = 0; j < m; ++j) { + /* j-th camera parameters */ + paj = pa + j * cnp; + + nnz = sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero hx_ij, i=0...n-1 */ + + for (i = 0; i < nnz; ++i) { + pbi = pb + rcsubs[i] * pnp; + idx = idxij->val[rcidxs[i]]; + pAij = jac + idx * ABsz; // set pAij to point to A_ij + pBij = pAij + Asz; // set pBij to point to B_ij + + (*projac)(j, rcsubs[i], paj, pbi, pAij, pBij, projac_adata); // evaluate dQ/da, dQ/db in pAij, pBij + } + } +} + +/* Given a parameter vector p made up of the 3D coordinates of n points and the parameters of m cameras, compute in + * jac the jacobian of the predicted measurements, i.e. the jacobian of the projections of 3D points in the m images. + * The jacobian is approximated with the aid of finite differences and is returned in the order + * (A_11, B_11, ..., A_1m, B_1m, ..., A_n1, B_n1, ..., A_nm, B_nm), + * where A_ij=dx_ij/da_j and B_ij=dx_ij/db_i (see HZ). + * Notice that depending on idxij, some of the A_ij, B_ij might be missing + * + * Problem-specific information is assumed to be stored in a structure pointed to by "dat". + * + * NOTE: This function is provided mainly for illustration purposes; in case that execution time is a concern, + * the jacobian should be computed analytically + */ +static void sba_motstr_Qs_fdjac( + double *p, /* I: current parameter estimate, (m*cnp+n*pnp)x1 */ + struct sba_crsm *idxij, /* I: sparse matrix containing the location of x_ij in hx */ + int *rcidxs, /* work array for the indexes of nonzero elements of a single sparse matrix row/column */ + int *rcsubs, /* work array for the subscripts of nonzero elements in a single sparse matrix row/column */ + double *jac, /* O: array for storing the approximated jacobian */ + void *dat) /* I: points to a "wrap_motstr_data_" structure */ +{ + register int i, j, ii, jj; + double *pa, *pb, *paj, *pbi; + register double *pAB; + int n, m, nnz, Asz, Bsz, ABsz; + + double tmp; + register double d, d1; + + struct wrap_motstr_data_ *fdjd; + void (*proj)(int j, int i, double *aj, double *bi, double *xij, void *adata); + double *hxij, *hxxij; + int cnp, pnp, mnp; + void *adata; + + /* retrieve problem-specific information passed in *dat */ + fdjd = (struct wrap_motstr_data_ *)dat; + proj = fdjd->proj; + cnp = fdjd->cnp; + pnp = fdjd->pnp; + mnp = fdjd->mnp; + adata = fdjd->adata; + + n = idxij->nr; + m = idxij->nc; + pa = p; + pb = p + m * cnp; + Asz = mnp * cnp; + Bsz = mnp * pnp; + ABsz = Asz + Bsz; + + /* allocate memory for hxij, hxxij */ + if ((hxij = malloc(2 * mnp * sizeof(double))) == NULL) { + fprintf(stderr, "memory allocation request failed in sba_motstr_Qs_fdjac()!\n"); + exit(1); + } + hxxij = hxij + mnp; + + /* compute A_ij */ + for (j = 0; j < m; ++j) { + paj = pa + j * cnp; // j-th camera parameters + + nnz = sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero A_ij, i=0...n-1 */ + for (jj = 0; jj < cnp; ++jj) { + /* determine d=max(SBA_DELTA_SCALE*|paj[jj]|, SBA_MIN_DELTA), see HZ */ + d = (double)(SBA_DELTA_SCALE)*paj[jj]; // force evaluation + d = FABS(d); + if (d < SBA_MIN_DELTA) + d = SBA_MIN_DELTA; + d1 = 1.0 / d; /* invert so that divisions can be carried out faster as multiplications */ + + for (i = 0; i < nnz; ++i) { + pbi = pb + rcsubs[i] * pnp; // i-th point parameters + (*proj)(j, rcsubs[i], paj, pbi, hxij, adata); // evaluate supplied function on current solution + + tmp = paj[jj]; + paj[jj] += d; + (*proj)(j, rcsubs[i], paj, pbi, hxxij, adata); + paj[jj] = tmp; /* restore */ + + pAB = jac + idxij->val[rcidxs[i]] * ABsz; // set pAB to point to A_ij + for (ii = 0; ii < mnp; ++ii) + pAB[ii * cnp + jj] = (hxxij[ii] - hxij[ii]) * d1; + } + } + } + + /* compute B_ij */ + for (i = 0; i < n; ++i) { + pbi = pb + i * pnp; // i-th point parameters + + nnz = sba_crsm_row_elmidxs(idxij, i, rcidxs, rcsubs); /* find nonzero B_ij, j=0...m-1 */ + for (jj = 0; jj < pnp; ++jj) { + /* determine d=max(SBA_DELTA_SCALE*|pbi[jj]|, SBA_MIN_DELTA), see HZ */ + d = (double)(SBA_DELTA_SCALE)*pbi[jj]; // force evaluation + d = FABS(d); + if (d < SBA_MIN_DELTA) + d = SBA_MIN_DELTA; + d1 = 1.0 / d; /* invert so that divisions can be carried out faster as multiplications */ + + for (j = 0; j < nnz; ++j) { + paj = pa + rcsubs[j] * cnp; // j-th camera parameters + (*proj)(rcsubs[j], i, paj, pbi, hxij, adata); // evaluate supplied function on current solution + + tmp = pbi[jj]; + pbi[jj] += d; + (*proj)(rcsubs[j], i, paj, pbi, hxxij, adata); + pbi[jj] = tmp; /* restore */ + + pAB = jac + idxij->val[rcidxs[j]] * ABsz + Asz; // set pAB to point to B_ij + for (ii = 0; ii < mnp; ++ii) + pAB[ii * pnp + jj] = (hxxij[ii] - hxij[ii]) * d1; + } + } + } + + free(hxij); +} + +/* BUNDLE ADJUSTMENT FOR CAMERA PARAMETERS ONLY */ + +/* Given a parameter vector p made up of the parameters of m cameras, compute in + * hx the prediction of the measurements, i.e. the projections of 3D points in the m images. + * The measurements are returned in the order (hx_11^T, .. hx_1m^T, ..., hx_n1^T, .. hx_nm^T)^T, + * where hx_ij is the predicted projection of the i-th point on the j-th camera. + * Caller supplies rcidxs and rcsubs which can be used as working memory. + * Notice that depending on idxij, some of the hx_ij might be missing + * + */ +static void sba_mot_Qs(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata) { + register int i, j; + int cnp, mnp; + double *paj, *pxij; + // int n; + int m, nnz; + struct wrap_mot_data_ *wdata; + void (*proj)(int j, int i, double *aj, double *xij, void *proj_adata); + void *proj_adata; + + wdata = (struct wrap_mot_data_ *)adata; + cnp = wdata->cnp; + mnp = wdata->mnp; + proj = wdata->proj; + proj_adata = wdata->adata; + + // n=idxij->nr; + m = idxij->nc; + + for (j = 0; j < m; ++j) { + /* j-th camera parameters */ + paj = p + j * cnp; + + nnz = sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero hx_ij, i=0...n-1 */ + + for (i = 0; i < nnz; ++i) { + pxij = hx + idxij->val[rcidxs[i]] * mnp; // set pxij to point to hx_ij + + (*proj)(j, rcsubs[i], paj, pxij, proj_adata); // evaluate Q in pxij + } + } +} + +/* Given a parameter vector p made up of the parameters of m cameras, compute in jac + * the jacobian of the predicted measurements, i.e. the jacobian of the projections of 3D points in the m images. + * The jacobian is returned in the order (A_11, ..., A_1m, ..., A_n1, ..., A_nm), + * where A_ij=dx_ij/db_j (see HZ). + * Caller supplies rcidxs and rcsubs which can be used as working memory. + * Notice that depending on idxij, some of the A_ij might be missing + * + */ +static void sba_mot_Qs_jac(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata) { + register int i, j; + int cnp, mnp; + double *paj, *pAij; + // int n; + int m, nnz, Asz, idx; + struct wrap_mot_data_ *wdata; + void (*projac)(int j, int i, double *aj, double *Aij, void *projac_adata); + void *projac_adata; + + wdata = (struct wrap_mot_data_ *)adata; + cnp = wdata->cnp; + mnp = wdata->mnp; + projac = wdata->projac; + projac_adata = wdata->adata; + + // n=idxij->nr; + m = idxij->nc; + Asz = mnp * cnp; + + for (j = 0; j < m; ++j) { + /* j-th camera parameters */ + paj = p + j * cnp; + + nnz = sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero hx_ij, i=0...n-1 */ + + for (i = 0; i < nnz; ++i) { + idx = idxij->val[rcidxs[i]]; + pAij = jac + idx * Asz; // set pAij to point to A_ij + + (*projac)(j, rcsubs[i], paj, pAij, projac_adata); // evaluate dQ/da in pAij + } + } +} + +/* Given a parameter vector p made up of the parameters of m cameras, compute in jac the jacobian + * of the predicted measurements, i.e. the jacobian of the projections of 3D points in the m images. + * The jacobian is approximated with the aid of finite differences and is returned in the order + * (A_11, ..., A_1m, ..., A_n1, ..., A_nm), where A_ij=dx_ij/da_j (see HZ). + * Notice that depending on idxij, some of the A_ij might be missing + * + * Problem-specific information is assumed to be stored in a structure pointed to by "dat". + * + * NOTE: This function is provided mainly for illustration purposes; in case that execution time is a concern, + * the jacobian should be computed analytically + */ +static void sba_mot_Qs_fdjac( + double *p, /* I: current parameter estimate, (m*cnp)x1 */ + struct sba_crsm *idxij, /* I: sparse matrix containing the location of x_ij in hx */ + int *rcidxs, /* work array for the indexes of nonzero elements of a single sparse matrix row/column */ + int *rcsubs, /* work array for the subscripts of nonzero elements in a single sparse matrix row/column */ + double *jac, /* O: array for storing the approximated jacobian */ + void *dat) /* I: points to a "wrap_mot_data_" structure */ +{ + register int i, j, ii, jj; + double *paj; + register double *pA; + // int n; + int m, nnz, Asz; + + double tmp; + register double d, d1; + + struct wrap_mot_data_ *fdjd; + void (*proj)(int j, int i, double *aj, double *xij, void *adata); + double *hxij, *hxxij; + int cnp, mnp; + void *adata; + + /* retrieve problem-specific information passed in *dat */ + fdjd = (struct wrap_mot_data_ *)dat; + proj = fdjd->proj; + cnp = fdjd->cnp; + mnp = fdjd->mnp; + adata = fdjd->adata; + + // n=idxij->nr; + m = idxij->nc; + Asz = mnp * cnp; + + /* allocate memory for hxij, hxxij */ + if ((hxij = malloc(2 * mnp * sizeof(double))) == NULL) { + fprintf(stderr, "memory allocation request failed in sba_mot_Qs_fdjac()!\n"); + exit(1); + } + hxxij = hxij + mnp; + + /* compute A_ij */ + for (j = 0; j < m; ++j) { + paj = p + j * cnp; // j-th camera parameters + + nnz = sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero A_ij, i=0...n-1 */ + for (jj = 0; jj < cnp; ++jj) { + /* determine d=max(SBA_DELTA_SCALE*|paj[jj]|, SBA_MIN_DELTA), see HZ */ + d = (double)(SBA_DELTA_SCALE)*paj[jj]; // force evaluation + d = FABS(d); + if (d < SBA_MIN_DELTA) + d = SBA_MIN_DELTA; + d1 = 1.0 / d; /* invert so that divisions can be carried out faster as multiplications */ + + for (i = 0; i < nnz; ++i) { + (*proj)(j, rcsubs[i], paj, hxij, adata); // evaluate supplied function on current solution + + tmp = paj[jj]; + paj[jj] += d; + (*proj)(j, rcsubs[i], paj, hxxij, adata); + paj[jj] = tmp; /* restore */ + + pA = jac + idxij->val[rcidxs[i]] * Asz; // set pA to point to A_ij + for (ii = 0; ii < mnp; ++ii) + pA[ii * cnp + jj] = (hxxij[ii] - hxij[ii]) * d1; + } + } + } + + free(hxij); +} + +/* BUNDLE ADJUSTMENT FOR STRUCTURE PARAMETERS ONLY */ + +/* Given a parameter vector p made up of the 3D coordinates of n points, compute in + * hx the prediction of the measurements, i.e. the projections of 3D points in the m images. The measurements + * are returned in the order (hx_11^T, .. hx_1m^T, ..., hx_n1^T, .. hx_nm^T)^T, where hx_ij is the predicted + * projection of the i-th point on the j-th camera. + * Caller supplies rcidxs and rcsubs which can be used as working memory. + * Notice that depending on idxij, some of the hx_ij might be missing + * + */ +static void sba_str_Qs(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata) { + register int i, j; + int pnp, mnp; + double *pbi, *pxij; + // int n; + int m, nnz; + struct wrap_str_data_ *wdata; + void (*proj)(int j, int i, double *bi, double *xij, void *proj_adata); + void *proj_adata; + + wdata = (struct wrap_str_data_ *)adata; + pnp = wdata->pnp; + mnp = wdata->mnp; + proj = wdata->proj; + proj_adata = wdata->adata; + + // n=idxij->nr; + m = idxij->nc; + + for (j = 0; j < m; ++j) { + nnz = sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero hx_ij, i=0...n-1 */ + + for (i = 0; i < nnz; ++i) { + pbi = p + rcsubs[i] * pnp; + pxij = hx + idxij->val[rcidxs[i]] * mnp; // set pxij to point to hx_ij + + (*proj)(j, rcsubs[i], pbi, pxij, proj_adata); // evaluate Q in pxij + } + } +} + +/* Given a parameter vector p made up of the 3D coordinates of n points, compute in + * jac the jacobian of the predicted measurements, i.e. the jacobian of the projections of 3D points in the m images. + * The jacobian is returned in the order (B_11, ..., B_1m, ..., B_n1, ..., B_nm), where B_ij=dx_ij/db_i (see HZ). + * Caller supplies rcidxs and rcsubs which can be used as working memory. + * Notice that depending on idxij, some of the B_ij might be missing + * + */ +static void sba_str_Qs_jac(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata) { + register int i, j; + int pnp, mnp; + double *pbi, *pBij; + // int n; + int m, nnz, Bsz, idx; + struct wrap_str_data_ *wdata; + void (*projac)(int j, int i, double *bi, double *Bij, void *projac_adata); + void *projac_adata; + + wdata = (struct wrap_str_data_ *)adata; + pnp = wdata->pnp; + mnp = wdata->mnp; + projac = wdata->projac; + projac_adata = wdata->adata; + + // n=idxij->nr; + m = idxij->nc; + Bsz = mnp * pnp; + + for (j = 0; j < m; ++j) { + + nnz = sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero hx_ij, i=0...n-1 */ + + for (i = 0; i < nnz; ++i) { + pbi = p + rcsubs[i] * pnp; + idx = idxij->val[rcidxs[i]]; + pBij = jac + idx * Bsz; // set pBij to point to B_ij + + (*projac)(j, rcsubs[i], pbi, pBij, projac_adata); // evaluate dQ/db in pBij + } + } +} + +/* Given a parameter vector p made up of the 3D coordinates of n points, compute in + * jac the jacobian of the predicted measurements, i.e. the jacobian of the projections of 3D points in the m images. + * The jacobian is approximated with the aid of finite differences and is returned in the order + * (B_11, ..., B_1m, ..., B_n1, ..., B_nm), where B_ij=dx_ij/db_i (see HZ). + * Notice that depending on idxij, some of the B_ij might be missing + * + * Problem-specific information is assumed to be stored in a structure pointed to by "dat". + * + * NOTE: This function is provided mainly for illustration purposes; in case that execution time is a concern, + * the jacobian should be computed analytically + */ +static void sba_str_Qs_fdjac( + double *p, /* I: current parameter estimate, (n*pnp)x1 */ + struct sba_crsm *idxij, /* I: sparse matrix containing the location of x_ij in hx */ + int *rcidxs, /* work array for the indexes of nonzero elements of a single sparse matrix row/column */ + int *rcsubs, /* work array for the subscripts of nonzero elements in a single sparse matrix row/column */ + double *jac, /* O: array for storing the approximated jacobian */ + void *dat) /* I: points to a "wrap_str_data_" structure */ +{ + register int i, j, ii, jj; + double *pbi; + register double *pB; + // int m; + int n, nnz, Bsz; + + double tmp; + register double d, d1; + + struct wrap_str_data_ *fdjd; + void (*proj)(int j, int i, double *bi, double *xij, void *adata); + double *hxij, *hxxij; + int pnp, mnp; + void *adata; + + /* retrieve problem-specific information passed in *dat */ + fdjd = (struct wrap_str_data_ *)dat; + proj = fdjd->proj; + pnp = fdjd->pnp; + mnp = fdjd->mnp; + adata = fdjd->adata; + + n = idxij->nr; + // m=idxij->nc; + Bsz = mnp * pnp; + + /* allocate memory for hxij, hxxij */ + if ((hxij = malloc(2 * mnp * sizeof(double))) == NULL) { + fprintf(stderr, "memory allocation request failed in sba_str_Qs_fdjac()!\n"); + exit(1); + } + hxxij = hxij + mnp; + + /* compute B_ij */ + for (i = 0; i < n; ++i) { + pbi = p + i * pnp; // i-th point parameters + + nnz = sba_crsm_row_elmidxs(idxij, i, rcidxs, rcsubs); /* find nonzero B_ij, j=0...m-1 */ + for (jj = 0; jj < pnp; ++jj) { + /* determine d=max(SBA_DELTA_SCALE*|pbi[jj]|, SBA_MIN_DELTA), see HZ */ + d = (double)(SBA_DELTA_SCALE)*pbi[jj]; // force evaluation + d = FABS(d); + if (d < SBA_MIN_DELTA) + d = SBA_MIN_DELTA; + d1 = 1.0 / d; /* invert so that divisions can be carried out faster as multiplications */ + + for (j = 0; j < nnz; ++j) { + (*proj)(rcsubs[j], i, pbi, hxij, adata); // evaluate supplied function on current solution + + tmp = pbi[jj]; + pbi[jj] += d; + (*proj)(rcsubs[j], i, pbi, hxxij, adata); + pbi[jj] = tmp; /* restore */ + + pB = jac + idxij->val[rcidxs[j]] * Bsz; // set pB to point to B_ij + for (ii = 0; ii < mnp; ++ii) + pB[ii * pnp + jj] = (hxxij[ii] - hxij[ii]) * d1; + } + } + } + + free(hxij); +} + +/* + * Simple driver to sba_motstr_levmar_x for bundle adjustment on camera and structure parameters. + * + * Returns the number of iterations (>=0) if successfull, SBA_ERROR if failed + */ + +int sba_motstr_levmar( + const int n, /* number of points */ + const int ncon, /* number of points (starting from the 1st) whose parameters should not be modified. + * All B_ij (see below) with i<ncon are assumed to be zero + */ + const int m, /* number of images */ + const int mcon, /* number of images (starting from the 1st) whose parameters should not be modified. + * All A_ij (see below) with j<mcon are assumed to be zero + */ + char *vmask, /* visibility mask: vmask[i, j]=1 if point i visible in image j, 0 otherwise. nxm */ + double *p, /* initial parameter vector p0: (a1, ..., am, b1, ..., bn). + * aj are the image j parameters, bi are the i-th point parameters, + * size m*cnp + n*pnp + */ + const int cnp, /* number of parameters for ONE camera; e.g. 6 for Euclidean cameras */ + const int pnp, /* number of parameters for ONE point; e.g. 3 for Euclidean points */ + double *x, /* measurements vector: (x_11^T, .. x_1m^T, ..., x_n1^T, .. x_nm^T)^T where + * x_ij is the projection of the i-th point on the j-th image. + * NOTE: some of the x_ij might be missing, if point i is not visible in image j; + * see vmask[i, j], max. size n*m*mnp + */ + double *covx, /* measurements covariance matrices: (Sigma_x_11, .. Sigma_x_1m, ..., Sigma_x_n1, .. Sigma_x_nm), + * where Sigma_x_ij is the mnp x mnp covariance of x_ij stored row-by-row. Set to NULL if no + * covariance estimates are available (identity matrices are implicitly used in this case). + * NOTE: a certain Sigma_x_ij is missing if the corresponding x_ij is also missing; + * see vmask[i, j], max. size n*m*mnp*mnp + */ + const int mnp, /* number of parameters for EACH measurement; usually 2 */ + void (*proj)(int j, int i, double *aj, double *bi, double *xij, void *adata), + /* functional relation computing a SINGLE image measurement. Assuming that + * the parameters of point i are bi and the parameters of camera j aj, + * computes a prediction of \hat{x}_{ij}. aj is cnp x 1, bi is pnp x 1 and + * xij is mnp x 1. This function is called only if point i is visible in + * image j (i.e. vmask[i, j]==1) + */ + void (*projac)(int j, int i, double *aj, double *bi, double *Aij, double *Bij, void *adata), + /* functional relation to evaluate d x_ij / d a_j and + * d x_ij / d b_i in Aij and Bij resp. + * This function is called only if point i is visible in * image j + * (i.e. vmask[i, j]==1). Also, A_ij and B_ij are mnp x cnp and mnp x pnp + * matrices resp. and they should be stored in row-major order. + * + * If NULL, the jacobians are approximated by repetitive proj calls + * and finite differences. + */ + void *adata, /* pointer to possibly additional data, passed uninterpreted to proj, projac */ + + const int itmax, /* I: maximum number of iterations. itmax==0 signals jacobian verification followed by immediate + return */ + const int verbose, /* I: verbosity */ + const double opts[SBA_OPTSSZ], + /* I: minim. options [\mu, \epsilon1, \epsilon2, \epsilon3, \epsilon4]. Respectively the scale factor for initial + * \mu, + * stoping thresholds for ||J^T e||_inf, ||dp||_2, ||e||_2 and (||e||_2-||e_new||_2)/||e||_2 + */ + double info[SBA_INFOSZ] + /* O: information regarding the minimization. Set to NULL if don't care + * info[0]=||e||_2 at initial p. + * info[1-4]=[ ||e||_2, ||J^T e||_inf, ||dp||_2, mu/max[J^T J]_ii ], all computed at estimated p. + * info[5]= # iterations, + * info[6]=reason for terminating: 1 - stopped by small gradient J^T e + * 2 - stopped by small dp + * 3 - stopped by itmax + * 4 - stopped by small relative reduction in ||e||_2 + * 5 - too many attempts to increase damping. Restart with increased mu + * 6 - stopped by small ||e||_2 + * 7 - stopped by invalid (i.e. NaN or Inf) "func" values. This is a user error + * info[7]= # function evaluations + * info[8]= # jacobian evaluations + * info[9]= # number of linear systems solved, i.e. number of attempts for reducing error + */ + ) { + int retval; + struct wrap_motstr_data_ wdata; + static void (*fjac)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata); + + wdata.proj = proj; + wdata.projac = projac; + wdata.cnp = cnp; + wdata.pnp = pnp; + wdata.mnp = mnp; + wdata.adata = adata; + + fjac = (projac) ? sba_motstr_Qs_jac : sba_motstr_Qs_fdjac; + retval = sba_motstr_levmar_x(n, ncon, m, mcon, vmask, p, cnp, pnp, x, covx, mnp, sba_motstr_Qs, fjac, &wdata, itmax, + verbose, opts, info); + + if (info) { + register int i; + int nvis; + + /* count visible image points */ + for (i = nvis = 0; i < n * m; ++i) + nvis += (vmask[i] != 0); + + /* each "func" & "fjac" evaluation requires nvis "proj" & "projac" evaluations */ + info[7] *= nvis; + info[8] *= nvis; + } + + return retval; +} + +/* + * Simple driver to sba_mot_levmar_x for bundle adjustment on camera parameters. + * + * Returns the number of iterations (>=0) if successfull, SBA_ERROR if failed + */ + +int sba_mot_levmar( + const int n, /* number of points */ + const int m, /* number of images */ + const int mcon, /* number of images (starting from the 1st) whose parameters should not be modified. + * All A_ij (see below) with j<mcon are assumed to be zero + */ + char *vmask, /* visibility mask: vmask[i, j]=1 if point i visible in image j, 0 otherwise. nxm */ + double *p, /* initial parameter vector p0: (a1, ..., am). + * aj are the image j parameters, size m*cnp */ + const int cnp, /* number of parameters for ONE camera; e.g. 6 for Euclidean cameras */ + double *x, /* measurements vector: (x_11^T, .. x_1m^T, ..., x_n1^T, .. x_nm^T)^T where + * x_ij is the projection of the i-th point on the j-th image. + * NOTE: some of the x_ij might be missing, if point i is not visible in image j; + * see vmask[i, j], max. size n*m*mnp + */ + double *covx, /* measurements covariance matrices: (Sigma_x_11, .. Sigma_x_1m, ..., Sigma_x_n1, .. Sigma_x_nm), + * where Sigma_x_ij is the mnp x mnp covariance of x_ij stored row-by-row. Set to NULL if no + * covariance estimates are available (identity matrices are implicitly used in this case). + * NOTE: a certain Sigma_x_ij is missing if the corresponding x_ij is also missing; + * see vmask[i, j], max. size n*m*mnp*mnp + */ + const int mnp, /* number of parameters for EACH measurement; usually 2 */ + void (*proj)(int j, int i, double *aj, double *xij, void *adata), + /* functional relation computing a SINGLE image measurement. Assuming that + * the parameters of camera j are aj, computes a prediction of \hat{x}_{ij} + * for point i. aj is cnp x 1 and xij is mnp x 1. + * This function is called only if point i is visible in image j (i.e. vmask[i, j]==1) + */ + void (*projac)(int j, int i, double *aj, double *Aij, void *adata), + /* functional relation to evaluate d x_ij / d a_j in Aij + * This function is called only if point i is visible in image j + * (i.e. vmask[i, j]==1). Also, A_ij are a mnp x cnp matrices + * and should be stored in row-major order. + * + * If NULL, the jacobian is approximated by repetitive proj calls + * and finite differences. + */ + void *adata, /* pointer to possibly additional data, passed uninterpreted to proj, projac */ + + const int itmax, /* I: maximum number of iterations. itmax==0 signals jacobian verification followed by immediate + return */ + const int verbose, /* I: verbosity */ + const double opts[SBA_OPTSSZ], + /* I: minim. options [\mu, \epsilon1, \epsilon2, \epsilon3, \epsilon]. Respectively the scale factor for initial + * \mu, + * stoping thresholds for ||J^T e||_inf, ||dp||_2, ||e||_2 and (||e||_2-||e_new||_2)/||e||_2 + */ + double info[SBA_INFOSZ] + /* O: information regarding the minimization. Set to NULL if don't care + * info[0]=||e||_2 at initial p. + * info[1-4]=[ ||e||_2, ||J^T e||_inf, ||dp||_2, mu/max[J^T J]_ii ], all computed at estimated p. + * info[5]= # iterations, + * info[6]=reason for terminating: 1 - stopped by small gradient J^T e + * 2 - stopped by small dp + * 3 - stopped by itmax + * 4 - stopped by small relative reduction in ||e||_2 + * 5 - too many attempts to increase damping. Restart with increased mu + * 6 - stopped by small ||e||_2 + * 7 - stopped by invalid (i.e. NaN or Inf) "func" values. This is a user error + * info[7]= # function evaluations + * info[8]= # jacobian evaluations + * info[9]= # number of linear systems solved, i.e. number of attempts for reducing error + */ + ) { + int retval; + struct wrap_mot_data_ wdata; + void (*fjac)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata); + + wdata.proj = proj; + wdata.projac = projac; + wdata.cnp = cnp; + wdata.mnp = mnp; + wdata.adata = adata; + + fjac = (projac) ? sba_mot_Qs_jac : sba_mot_Qs_fdjac; + retval = + sba_mot_levmar_x(n, m, mcon, vmask, p, cnp, x, covx, mnp, sba_mot_Qs, fjac, &wdata, itmax, verbose, opts, info); + + if (info) { + register int i; + int nvis; + + /* count visible image points */ + for (i = nvis = 0; i < n * m; ++i) + nvis += (vmask[i] != 0); + + /* each "func" & "fjac" evaluation requires nvis "proj" & "projac" evaluations */ + info[7] *= nvis; + info[8] *= nvis; + } + + return retval; +} + +/* + * Simple driver to sba_str_levmar_x for bundle adjustment on structure parameters. + * + * Returns the number of iterations (>=0) if successfull, SBA_ERROR if failed + */ + +int sba_str_levmar( + const int n, /* number of points */ + const int ncon, /* number of points (starting from the 1st) whose parameters should not be modified. + * All B_ij (see below) with i<ncon are assumed to be zero + */ + const int m, /* number of images */ + char *vmask, /* visibility mask: vmask[i, j]=1 if point i visible in image j, 0 otherwise. nxm */ + double *p, /* initial parameter vector p0: (b1, ..., bn). + * bi are the i-th point parameters, size n*pnp + */ + const int pnp, /* number of parameters for ONE point; e.g. 3 for Euclidean points */ + double *x, /* measurements vector: (x_11^T, .. x_1m^T, ..., x_n1^T, .. x_nm^T)^T where + * x_ij is the projection of the i-th point on the j-th image. + * NOTE: some of the x_ij might be missing, if point i is not visible in image j; + * see vmask[i, j], max. size n*m*mnp + */ + double *covx, /* measurements covariance matrices: (Sigma_x_11, .. Sigma_x_1m, ..., Sigma_x_n1, .. Sigma_x_nm), + * where Sigma_x_ij is the mnp x mnp covariance of x_ij stored row-by-row. Set to NULL if no + * covariance estimates are available (identity matrices are implicitly used in this case). + * NOTE: a certain Sigma_x_ij is missing if the corresponding x_ij is also missing; + * see vmask[i, j], max. size n*m*mnp*mnp + */ + const int mnp, /* number of parameters for EACH measurement; usually 2 */ + void (*proj)(int j, int i, double *bi, double *xij, void *adata), + /* functional relation computing a SINGLE image measurement. Assuming that + * the parameters of point i are bi, computes a prediction of \hat{x}_{ij}. + * bi is pnp x 1 and xij is mnp x 1. This function is called only if point + * i is visible in image j (i.e. vmask[i, j]==1) + */ + void (*projac)(int j, int i, double *bi, double *Bij, void *adata), + /* functional relation to evaluate d x_ij / d b_i in Bij. + * This function is called only if point i is visible in image j + * (i.e. vmask[i, j]==1). Also, B_ij are mnp x pnp matrices + * and they should be stored in row-major order. + * + * If NULL, the jacobians are approximated by repetitive proj calls + * and finite differences. + */ + void *adata, /* pointer to possibly additional data, passed uninterpreted to proj, projac */ + + const int itmax, /* I: maximum number of iterations. itmax==0 signals jacobian verification followed by immediate + return */ + const int verbose, /* I: verbosity */ + const double opts[SBA_OPTSSZ], + /* I: minim. options [\mu, \epsilon1, \epsilon2, \epsilon3, \epsilon4]. Respectively the scale factor for initial + * \mu, + * stoping thresholds for ||J^T e||_inf, ||dp||_2, ||e||_2 and (||e||_2-||e_new||_2)/||e||_2 + */ + double info[SBA_INFOSZ] + /* O: information regarding the minimization. Set to NULL if don't care + * info[0]=||e||_2 at initial p. + * info[1-4]=[ ||e||_2, ||J^T e||_inf, ||dp||_2, mu/max[J^T J]_ii ], all computed at estimated p. + * info[5]= # iterations, + * info[6]=reason for terminating: 1 - stopped by small gradient J^T e + * 2 - stopped by small dp + * 3 - stopped by itmax + * 4 - stopped by small relative reduction in ||e||_2 + * 5 - too many attempts to increase damping. Restart with increased mu + * 6 - stopped by small ||e||_2 + * 7 - stopped by invalid (i.e. NaN or Inf) "func" values. This is a user error + * info[7]= # function evaluations + * info[8]= # jacobian evaluations + * info[9]= # number of linear systems solved, i.e. number of attempts for reducing error + */ + ) { + int retval; + struct wrap_str_data_ wdata; + static void (*fjac)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata); + + wdata.proj = proj; + wdata.projac = projac; + wdata.pnp = pnp; + wdata.mnp = mnp; + wdata.adata = adata; + + fjac = (projac) ? sba_str_Qs_jac : sba_str_Qs_fdjac; + retval = + sba_str_levmar_x(n, ncon, m, vmask, p, pnp, x, covx, mnp, sba_str_Qs, fjac, &wdata, itmax, verbose, opts, info); + + if (info) { + register int i; + int nvis; + + /* count visible image points */ + for (i = nvis = 0; i < n * m; ++i) + nvis += (vmask[i] != 0); + + /* each "func" & "fjac" evaluation requires nvis "proj" & "projac" evaluations */ + info[7] *= nvis; + info[8] *= nvis; + } + + return retval; +} diff --git a/redist/test_dcl.c b/redist/test_dcl.c index dc6a93e..ded2863 100644 --- a/redist/test_dcl.c +++ b/redist/test_dcl.c @@ -1,4 +1,5 @@ -//gcc -msse2 -O3 -ftree-vectorize test_dcl.c dclhelpers.c os_generic.c -DFLT=double -lpthread -lcblas && valgrind ./a.out +// gcc -msse2 -O3 -ftree-vectorize test_dcl.c dclhelpers.c os_generic.c -DFLT=double -lpthread -lcblas && valgrind +// ./a.out #include "dclhelpers.h" #include "os_generic.h" @@ -99,44 +100,39 @@ void compareToCblasTrans() { DMS(em1), 0); } -int main() -{ - FLT A[2][4] = { { 0, 1, 2, 3 }, { 4, 5, 6, 7} }; +int main() { + FLT A[2][4] = {{0, 1, 2, 3}, {4, 5, 6, 7}}; FLT B[4][2]; - dclPrint( A[0], 4, 2, 4 ); + dclPrint(A[0], 4, 2, 4); dclTransp(B[0], 2, A[0], 4, 2, 4); - dclPrint( B[0], 2, 4, 2 ); + dclPrint(B[0], 2, 4, 2); int i, j; - for( i = 0; i < 8; i++ ) - { - printf( "%f\n", ((float*)(B[0]))[i] ); + for (i = 0; i < 8; i++) { + printf("%f\n", ((float *)(B[0]))[i]); } - FLT M[3][3] = { - { .32, 1, 0 }, - { 0, 1, 2 }, - { 1, 0, 1 } }; + FLT M[3][3] = {{.32, 1, 0}, {0, 1, 2}, {1, 0, 1}}; FLT Mo[3][3]; - dclInv( Mo[0], 3, M[0], 3, 3 ); - dclPrint( Mo[0], 3, 3, 3 ); + dclInv(Mo[0], 3, M[0], 3, 3); + dclPrint(Mo[0], 3, 3, 3); FLT MM[3][3]; - dclMul( MM[0], 3, M[0], 3, Mo[0], 3, 3, 3, 3 ); + dclMul(MM[0], 3, M[0], 3, Mo[0], 3, 3, 3, 3); - printf( "The following should be an identity matrix\n" ); - dclPrint( MM[0], 3, 3, 3 ); + printf("The following should be an identity matrix\n"); + dclPrint(MM[0], 3, 3, 3); { FLT A[3][4]; - dclIdentity( DMS(A), 3, 4); - dclPrint( DMS(A), 3, 4); + dclIdentity(DMS(A), 3, 4); + dclPrint(DMS(A), 3, 4); FLT x[4][2] = { {7, -7}, {8, -8}, {9, -9}, {10, -10}, }; FLT R[4][2]; - dclZero( DMS(R), 4, 2 ); + dclZero(DMS(R), 4, 2); // dclMul(R, 1, A[0], 4, x, 1, 4, 1, 3); dcldgemm(0, 0, 3, 4, 2, 1, A[0], 4, x[0], 2, 0, R[0], 2); @@ -145,9 +141,8 @@ int main() dclPrint(DMS(R), 3, 2); for (int j = 0; j < 2; j++) { - for (int i = 0; i < 3; i++) - { - printf( "[%d][%d]\n", i, j ); + for (int i = 0; i < 3; i++) { + printf("[%d][%d]\n", i, j); assert(R[i][j] == x[i][j]); } @@ -158,4 +153,3 @@ int main() compareToCblas(); compareToCblasTrans(); } - |