aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile6
-rw-r--r--redist/dclapack.h61
-rw-r--r--redist/dclhelpers.c109
-rw-r--r--redist/dclhelpers.h91
-rw-r--r--redist/json_helpers.c2
-rw-r--r--redist/minimal_opencv.c2
-rw-r--r--redist/sba/compiler.h42
-rw-r--r--redist/sba/sba.h145
-rw-r--r--redist/sba/sba_chkjac.c468
-rw-r--r--redist/sba/sba_chkjac.h69
-rw-r--r--redist/sba/sba_crsm.c338
-rw-r--r--redist/sba/sba_lapack.c1730
-rw-r--r--redist/sba/sba_levmar.c2715
-rw-r--r--redist/sba/sba_levmar_wrap.c917
-rw-r--r--redist/test_dcl.c44
-rw-r--r--src/poser_daveortho.c40
-rw-r--r--src/poser_epnp.c2
-rw-r--r--src/poser_sba.c329
-rw-r--r--src/poser_sba.h16
19 files changed, 6920 insertions, 206 deletions
diff --git a/Makefile b/Makefile
index 1b5d5e6..7c0b946 100644
--- a/Makefile
+++ b/Makefile
@@ -4,6 +4,7 @@ CC?=gcc
CFLAGS:=-Iinclude/libsurvive -fPIC -g -O0 -Iredist -flto -DUSE_DOUBLE -std=gnu99 -rdynamic -llapacke -lcblas -lm
CFLAGS_RELEASE:=-Iinclude/libsurvive -fPIC -msse2 -ftree-vectorize -O3 -Iredist -flto -DUSE_DOUBLE -std=gnu99 -rdynamic -llapacke -lcblas -lm
+
#LDFLAGS:=-L/usr/local/lib -lpthread -lusb-1.0 -lz -lm -flto -g
LDFLAGS:=-L/usr/local/lib -lpthread -lz -lm -flto -g
@@ -31,7 +32,8 @@ GRAPHICS_LOFI:=redist/CNFGFunctions.o redist/CNFGXDriver.o
endif
-POSERS:=src/poser_dummy.o src/poser_daveortho.o src/poser_charlesslow.o src/poser_octavioradii.o src/poser_turveytori.o src/poser_epnp.o
+SBA:=redist/sba/sba_chkjac.o redist/sba/sba_crsm.o redist/sba/sba_lapack.o redist/sba/sba_levmar.o redist/sba/sba_levmar_wrap.o
+POSERS:=src/poser_dummy.o src/poser_daveortho.o src/poser_charlesslow.o src/poser_octavioradii.o src/poser_turveytori.o src/poser_epnp.o src/poser_sba.o
REDISTS:=redist/json_helpers.o redist/linmath.o redist/jsmn.o redist/os_generic.o redist/minimal_opencv.o
ifeq ($(UNAME), Darwin)
REDISTS:=$(REDISTS) redist/hid-osx.c
@@ -56,7 +58,7 @@ LIBSURVIVE_CORE:=src/survive.o src/survive_usb.o src/survive_data.o src/survive_
LIBSURVIVE_CORE:=$(LIBSURVIVE_CORE)
-LIBSURVIVE_O:=$(POSERS) $(REDISTS) $(LIBSURVIVE_CORE)
+LIBSURVIVE_O:=$(POSERS) $(REDISTS) $(LIBSURVIVE_CORE) $(SBA)
LIBSURVIVE_C:=$(LIBSURVIVE_O:.o=.c)
# unused: redist/crc32.c
diff --git a/redist/dclapack.h b/redist/dclapack.h
index 7f30187..0950815 100644
--- a/redist/dclapack.h
+++ b/redist/dclapack.h
@@ -9,14 +9,14 @@
#define _ABS(a) ( (a)<=0 ? 0-(a) : (a) )
-//Tricky: If you want to use this with pointers, instead of 2D arrays, you will
-//need to #define DYNAMIC_INDEX, as well as, for all arrays, suffix their name
-//with 'c'
+// Tricky: If you want to use this with pointers, instead of 2D arrays, you will
+// need to #define DYNAMIC_INDEX, as well as, for all arrays, suffix their name
+// with 'c'
#ifdef DYNAMIC_INDEX
- #define _(AM,O,P) AM[O*AM##c+P]
+#define _(AM, O, P) AM[O * AM##c + P]
#else
- #define _(AM,O,P) AM[O][P]
+#define _(AM, O, P) AM[O][P]
#endif
/*
@@ -50,7 +50,6 @@
} \
}
-
/*
* Returns the identity matrix (with size n x n, but width Ic)
*/
@@ -90,7 +89,7 @@
_(U, _i, _j) = _(A, _i, _j); \
} \
} \
- IDENTITY(L, n, n); \
+ IDENTITY(L, n, n); \
\
for (_i = 0; _i < (n)-1; _i++) { \
\
@@ -181,30 +180,36 @@
*/
#ifdef DYNAMIC_INDEX
- #define INV_SETUP(ORDER) \
- FLOAT Ipiv[ORDER*ORDER]; const int Ipivc = ORDER; \
- FLOAT L[ORDER*ORDER]; const int Lc = ORDER; \
- FLOAT U[ORDER*ORDER]; const int Uc = ORDER; \
- FLOAT I[ORDER*ORDER]; const int Ic = ORDER; \
- FLOAT C[ORDER*ORDER]; const int Cc = ORDER;
+#define INV_SETUP(ORDER) \
+ FLOAT Ipiv[ORDER * ORDER]; \
+ const int Ipivc = ORDER; \
+ FLOAT L[ORDER * ORDER]; \
+ const int Lc = ORDER; \
+ FLOAT U[ORDER * ORDER]; \
+ const int Uc = ORDER; \
+ FLOAT I[ORDER * ORDER]; \
+ const int Ic = ORDER; \
+ FLOAT C[ORDER * ORDER]; \
+ const int Cc = ORDER;
#else
- #define INV_SETUP(ORDER) \
- FLOAT Ipiv[ORDER][ORDER]; \
- FLOAT L[ORDER][ORDER]; \
- FLOAT U[ORDER][ORDER]; \
- FLOAT I[ORDER][ORDER]; \
- FLOAT C[ORDER][ORDER];
+#define INV_SETUP(ORDER) \
+ FLOAT Ipiv[ORDER][ORDER]; \
+ FLOAT L[ORDER][ORDER]; \
+ FLOAT U[ORDER][ORDER]; \
+ FLOAT I[ORDER][ORDER]; \
+ FLOAT C[ORDER][ORDER];
#endif
-#define INV(Ainv,A,n,ORDER) { \
- INV_SETUP(ORDER) \
- int Piv[ORDER]; \
- IDENTITY(I,n,n); \
- LU(L,U,A,Piv,n); \
- PIVOT(Ipiv,I,Piv,n,n); \
- L_SUB(C,L,Ipiv,n,n); \
- U_SUB(Ainv,U,C,n,n); \
-}
+#define INV(Ainv, A, n, ORDER) \
+ { \
+ INV_SETUP(ORDER) \
+ int Piv[ORDER]; \
+ IDENTITY(I, n, n); \
+ LU(L, U, A, Piv, n); \
+ PIVOT(Ipiv, I, Piv, n, n); \
+ L_SUB(C, L, Ipiv, n, n); \
+ U_SUB(Ainv, U, C, n, n); \
+ }
/*
PRINT(A,n,n); \
diff --git a/redist/dclhelpers.c b/redist/dclhelpers.c
index 5b956b0..9ed7c5f 100644
--- a/redist/dclhelpers.c
+++ b/redist/dclhelpers.c
@@ -7,111 +7,74 @@
#include <stdio.h>
#include <string.h>
-void dclPrint( const DCL_FLOAT * PMATRIX, int PMATRIXc, int n, int m )
-{
- PRINT( PMATRIX, n, m );
-}
-
-void dclIdentity( DCL_FLOAT * I, int Ic, int m, int n )
-{
- IDENTITY( I, m, n );
-}
+void dclPrint(const DCL_FLOAT *PMATRIX, int PMATRIXc, int n, int m) { PRINT(PMATRIX, n, m); }
+void dclIdentity(DCL_FLOAT *I, int Ic, int m, int n) { IDENTITY(I, m, n); }
/* Returns the zero matrix */
-void dclZero( DCL_FLOAT * Z, int Zc, int m, int n )
-{
- memset( Z, 0, m*n*sizeof(DCL_FLOAT) );
-}
+void dclZero(DCL_FLOAT *Z, int Zc, int m, int n) { memset(Z, 0, m * n * sizeof(DCL_FLOAT)); }
-void dclTransp( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, int n, int m )
-{
- TRANSP(R,A,n,m);
-}
+void dclTransp(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, int n, int m) { TRANSP(R, A, n, m); }
-void dclLU( DCL_FLOAT * L, int Lc, DCL_FLOAT * U, int Uc, const DCL_FLOAT * A, int Ac, int * Piv, int n )
-{
- LU(L,U,A,Piv,n);
+void dclLU(DCL_FLOAT *L, int Lc, DCL_FLOAT *U, int Uc, const DCL_FLOAT *A, int Ac, int *Piv, int n) {
+ LU(L, U, A, Piv, n);
}
-void dclPivot( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, int * Piv, int n, int m )
-{
- PIVOT(R,A,Piv,n,m);
-}
+void dclPivot(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, int *Piv, int n, int m) { PIVOT(R, A, Piv, n, m); }
-void dclLSub( DCL_FLOAT * X, int Xc, const DCL_FLOAT * L, int Lc, const DCL_FLOAT * B, int Bc, int n, int m )
-{
- L_SUB(X,L,B,n,m);
+void dclLSub(DCL_FLOAT *X, int Xc, const DCL_FLOAT *L, int Lc, const DCL_FLOAT *B, int Bc, int n, int m) {
+ L_SUB(X, L, B, n, m);
}
-void dclUSub( DCL_FLOAT * X, int Xc, const DCL_FLOAT * U, int Uc, const DCL_FLOAT * B, int Bc, int n, int m )
-{
- U_SUB(X,U,B,n,m);
+void dclUSub(DCL_FLOAT *X, int Xc, const DCL_FLOAT *U, int Uc, const DCL_FLOAT *B, int Bc, int n, int m) {
+ U_SUB(X, U, B, n, m);
}
-void dclInv( DCL_FLOAT * Ainv, int Ainvc, const DCL_FLOAT * A, int Ac, int n )
-{
- INV(Ainv,A,n,n);
-}
+void dclInv(DCL_FLOAT *Ainv, int Ainvc, const DCL_FLOAT *A, int Ac, int n) { INV(Ainv, A, n, n); }
-void dclMul( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, const DCL_FLOAT * B, int Bc, int n, int m, int p )
-{
- MUL(R,A,B,n,m,p);
+void dclMul(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, int n, int m, int p) {
+ MUL(R, A, B, n, m, p);
}
-void dclMulAdd( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, const DCL_FLOAT * B, int Bc, const DCL_FLOAT * C, int Cc, int n, int m, int p )
-{
- MULADD(R,A,B,C,n,m,p);
+void dclMulAdd(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, const DCL_FLOAT *C, int Cc,
+ int n, int m, int p) {
+ MULADD(R, A, B, C, n, m, p);
}
-void dclGMulAdd( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, const DCL_FLOAT * B, int Bc, const DCL_FLOAT * C, int Cc, DCL_FLOAT alpha, DCL_FLOAT beta, int n, int m, int p )
-{
- GMULADD(R,A,B,C,alpha,beta,n,m,p);
+void dclGMulAdd(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, const DCL_FLOAT *C,
+ int Cc, DCL_FLOAT alpha, DCL_FLOAT beta, int n, int m, int p) {
+ GMULADD(R, A, B, C, alpha, beta, n, m, p);
}
/* dclGMulAdd( R, ((transA)?TRANS(A):A, (transB)?TRANS(B):B), C, alpha, beta, n, m, p ); */
-void dcldgemm(
- char transA,
- char transB,
- int m,
- int n,
- int k,
- DCL_FLOAT alpha,
- const DCL_FLOAT* A,
- int Ac, //must be n
- const DCL_FLOAT* B,
- int Bc, //must be m
- DCL_FLOAT beta,
- DCL_FLOAT * C,
- int Cc //must be n
- )
-{
+void dcldgemm(char transA, char transB, int m, int n, int k, DCL_FLOAT alpha, const DCL_FLOAT *A,
+ int Ac, // must be n
+ const DCL_FLOAT *B,
+ int Bc, // must be m
+ DCL_FLOAT beta, DCL_FLOAT *C,
+ int Cc // must be n
+ ) {
const DCL_FLOAT *ta;
- const DCL_FLOAT * tb;
+ const DCL_FLOAT *tb;
int tac = Ac;
int tbc = Bc;
- if( transA )
- {
- DCL_FLOAT * la = alloca( sizeof( DCL_FLOAT ) * n * m );
+ if (transA) {
+ DCL_FLOAT *la = alloca(sizeof(DCL_FLOAT) * n * m);
const int lac = m;
- TRANSP( la, A, n, m );
+ TRANSP(la, A, n, m);
ta = la;
tac = lac;
- }
- else
+ } else
ta = A;
- if( transB )
- {
- DCL_FLOAT * lb = alloca( sizeof( DCL_FLOAT ) * n * m );
+ if (transB) {
+ DCL_FLOAT *lb = alloca(sizeof(DCL_FLOAT) * n * m);
const int lbc = m;
- TRANSP( lb, B, n, m );
+ TRANSP(lb, B, n, m);
tb = lb;
tbc = lbc;
- }
- else
+ } else
tb = B;
GMULADD(C, ta, tb, C, alpha, beta, m, n, k);
}
-
diff --git a/redist/dclhelpers.h b/redist/dclhelpers.h
index b4acec8..00acdac 100644
--- a/redist/dclhelpers.h
+++ b/redist/dclhelpers.h
@@ -3,87 +3,72 @@
#define DCL_FLOAT FLT
-//Use this macro to safely
-#define DMS( m ) ((m)[0]), (sizeof((m)[0])/sizeof((m)[0][0]))
+// Use this macro to safely
+#define DMS(m) ((m)[0]), (sizeof((m)[0]) / sizeof((m)[0][0]))
/* Prints matrix A of size[n][m] */
-void dclPrint( const DCL_FLOAT * A, int Ac, int n, int m );
+void dclPrint(const DCL_FLOAT *A, int Ac, int n, int m);
/* Returns the identity matrix */
-void dclIdentity( DCL_FLOAT * I, int Ic, int m, int n );
+void dclIdentity(DCL_FLOAT *I, int Ic, int m, int n);
/* Returns the zero matrix */
-void dclZero( DCL_FLOAT * I, int Ic, int m, int n );
+void dclZero(DCL_FLOAT *I, int Ic, int m, int n);
/* R = Transpose(A)
- A is (n by m)
- R is (m by n) */
-void dclTransp( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, int n, int m );
+ A is (n by m)
+ R is (m by n) */
+void dclTransp(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, int n, int m);
/* Calculate L,U of a matrix A with pivot table; the pivot table is output. */
-void dclLU( DCL_FLOAT * L, int Lc, DCL_FLOAT * U, int Uc, const DCL_FLOAT * A, int Ac, int * Piv, int n );
+void dclLU(DCL_FLOAT *L, int Lc, DCL_FLOAT *U, int Uc, const DCL_FLOAT *A, int Ac, int *Piv, int n);
/* Pivots a matrix to a different matrix
- R = Pivot(A) given table 'Piv'
- A and R are (n by m) */
-void dclPivot( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, int * Piv, int n, int m );
+ R = Pivot(A) given table 'Piv'
+ A and R are (n by m) */
+void dclPivot(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, int *Piv, int n, int m);
/* Solve LX=B for matrix X and B
- L is n by n (lower triangular)
- B is n by m */
-void dclLSub( DCL_FLOAT * X, int Xc, const DCL_FLOAT * L, int Lc, const DCL_FLOAT * B, int Bc, int n, int m );
+ L is n by n (lower triangular)
+ B is n by m */
+void dclLSub(DCL_FLOAT *X, int Xc, const DCL_FLOAT *L, int Lc, const DCL_FLOAT *B, int Bc, int n, int m);
/* Solve UX=B for matrix X and B
- U is n by n (upper triangular)
- B is n by m */
-void dclUSub( DCL_FLOAT * X, int Xc, const DCL_FLOAT * U, int Uc, const DCL_FLOAT * B, int Bc, int n, int m );
+ U is n by n (upper triangular)
+ B is n by m */
+void dclUSub(DCL_FLOAT *X, int Xc, const DCL_FLOAT *U, int Uc, const DCL_FLOAT *B, int Bc, int n, int m);
/* Inverts a matrix X (n by n) using the method of LU decomposition */
-void dclInv( DCL_FLOAT * Ainv, int Ainvc, const DCL_FLOAT * A, int Ac, int n );
+void dclInv(DCL_FLOAT *Ainv, int Ainvc, const DCL_FLOAT *A, int Ac, int n);
/* Matrix Multiply R = A * B
- A (n by m)
- B (m by p)
- R (n by p) */
-void dclMul( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, const DCL_FLOAT * B, int Bc, int n, int m, int p );
+ A (n by m)
+ B (m by p)
+ R (n by p) */
+void dclMul(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, int n, int m, int p);
/* Matrix Multiply R = A * B + C
- A (n by m)
- B (m by p)
- C (n by p)
- R (n by p) */
-void dclMulAdd( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, const DCL_FLOAT * B, int Bc, const DCL_FLOAT * C, int Cc, int n, int m, int p );
+ A (n by m)
+ B (m by p)
+ C (n by p)
+ R (n by p) */
+void dclMulAdd(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, const DCL_FLOAT *C, int Cc,
+ int n, int m, int p);
/* Matrix Multiply R = alpha * A * B + beta * C
- A (n by m)
- B (m by p)
- C (n by p)
- R (n by p) */
-void dclGMulAdd( DCL_FLOAT * R, int Rc, const DCL_FLOAT * A, int Ac, const DCL_FLOAT * B, int Bc, const DCL_FLOAT * C, int Cc, DCL_FLOAT alpha, DCL_FLOAT beta, int n, int m, int p );
-
+ A (n by m)
+ B (m by p)
+ C (n by p)
+ R (n by p) */
+void dclGMulAdd(DCL_FLOAT *R, int Rc, const DCL_FLOAT *A, int Ac, const DCL_FLOAT *B, int Bc, const DCL_FLOAT *C,
+ int Cc, DCL_FLOAT alpha, DCL_FLOAT beta, int n, int m, int p);
/********************************
* Auxiliary functionality in C *
********************************/
-//Matches dgemm from lapack.
-void dcldgemm(
- char transA,
- char transB,
- int m,
- int n,
- int k,
- DCL_FLOAT alpha,
- const DCL_FLOAT* A,
- int Ac,
- const DCL_FLOAT* B,
- int Bc,
- DCL_FLOAT beta,
- DCL_FLOAT * C,
- int Cc
- );
-
-
+// Matches dgemm from lapack.
+void dcldgemm(char transA, char transB, int m, int n, int k, DCL_FLOAT alpha, const DCL_FLOAT *A, int Ac,
+ const DCL_FLOAT *B, int Bc, DCL_FLOAT beta, DCL_FLOAT *C, int Cc);
#endif
-
diff --git a/redist/json_helpers.c b/redist/json_helpers.c
index 8704a93..ebacd2c 100644
--- a/redist/json_helpers.c
+++ b/redist/json_helpers.c
@@ -188,7 +188,6 @@ void json_load_file(const char* path) {
if (value_t->type == JSMN_ARRAY) {
i += json_load_array(JSON_STRING, tokens+i+2,value_t->size, tag); //look at array children
} else if (value_t->type == JSMN_OBJECT) {
- printf("Begin Object\n");
if (json_begin_object != NULL) json_begin_object(tag);
children = (int16_t)(value_t->size +1); //+1 to account for this loop where we are not yed parsing children
// i += decode_jsmn_object(JSON_STRING, tokens+i+2,value_t->size);
@@ -200,7 +199,6 @@ void json_load_file(const char* path) {
if (children>=0) children--;
if (children == 0) {
children = -1;
- printf("End Object\n");
if (json_end_object!=NULL) json_end_object();
}
diff --git a/redist/minimal_opencv.c b/redist/minimal_opencv.c
index 8e71034..50eb7df 100644
--- a/redist/minimal_opencv.c
+++ b/redist/minimal_opencv.c
@@ -168,10 +168,8 @@ double cvInvert(const CvMat *srcarr, CvMat *dstarr, int method) {
lapack_int *ipiv = malloc(sizeof(lapack_int) * MIN(srcarr->rows, srcarr->cols));
inf = LAPACKE_dgetrf(LAPACK_ROW_MAJOR, rows, cols, a, lda, ipiv);
assert(inf == 0);
- print_mat(dstarr);
inf = LAPACKE_dgetri(LAPACK_ROW_MAJOR, rows, a, lda, ipiv);
- print_mat(dstarr);
assert(inf >= 0);
if (inf > 0) {
printf("Warning: Singular matrix: \n");
diff --git a/redist/sba/compiler.h b/redist/sba/compiler.h
new file mode 100644
index 0000000..09c01f1
--- /dev/null
+++ b/redist/sba/compiler.h
@@ -0,0 +1,42 @@
+/////////////////////////////////////////////////////////////////////////////////
+////
+//// Compiler-specific definitions for sparse bundle adjustment based on the
+//// Levenberg - Marquardt minimization algorithm
+//// Copyright (C) 2004-2008 Manolis Lourakis (lourakis at ics forth gr)
+//// Institute of Computer Science, Foundation for Research & Technology - Hellas
+//// Heraklion, Crete, Greece.
+////
+//// This program is free software; you can redistribute it and/or modify
+//// it under the terms of the GNU General Public License as published by
+//// the Free Software Foundation; either version 2 of the License, or
+//// (at your option) any later version.
+////
+//// This program is distributed in the hope that it will be useful,
+//// but WITHOUT ANY WARRANTY; without even the implied warranty of
+//// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//// GNU General Public License for more details.
+////
+///////////////////////////////////////////////////////////////////////////////////
+
+#ifndef _COMPILER_H_
+#define _COMPILER_H_
+
+/* note: intel's icc defines both __ICC & __INTEL_COMPILER.
+ * Also, some compilers other than gcc define __GNUC__,
+ * therefore gcc should be checked last
+ */
+#ifdef _MSC_VER
+#define inline __inline // MSVC
+#elif !defined(__ICC) && !defined(__INTEL_COMPILER) && !defined(__GNUC__)
+#define inline // other than MSVC, ICC, GCC: define empty
+#endif
+
+#ifdef _MSC_VER
+#define SBA_FINITE _finite // MSVC
+#elif defined(__ICC) || defined(__INTEL_COMPILER) || defined(__GNUC__)
+#define SBA_FINITE isfinite // ICC, GCC
+#else
+#define SBA_FINITE isfinite // other than MSVC, ICC, GCC, let's hope this will work
+#endif
+
+#endif /* _COMPILER_H_ */
diff --git a/redist/sba/sba.h b/redist/sba/sba.h
new file mode 100644
index 0000000..dd7b2b8
--- /dev/null
+++ b/redist/sba/sba.h
@@ -0,0 +1,145 @@
+/*
+/////////////////////////////////////////////////////////////////////////////////
+////
+//// Prototypes and definitions for sparse bundle adjustment
+//// Copyright (C) 2004-2009 Manolis Lourakis (lourakis at ics forth gr)
+//// Institute of Computer Science, Foundation for Research & Technology - Hellas
+//// Heraklion, Crete, Greece.
+////
+//// This program is free software; you can redistribute it and/or modify
+//// it under the terms of the GNU General Public License as published by
+//// the Free Software Foundation; either version 2 of the License, or
+//// (at your option) any later version.
+////
+//// This program is distributed in the hope that it will be useful,
+//// but WITHOUT ANY WARRANTY; without even the implied warranty of
+//// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//// GNU General Public License for more details.
+////
+///////////////////////////////////////////////////////////////////////////////////
+*/
+
+#ifndef _SBA_H_
+#define _SBA_H_
+
+/**************************** Start of configuration options ****************************/
+
+/* define the following if a trailing underscore should be appended to lapack functions names */
+#define SBA_APPEND_UNDERSCORE_SUFFIX // undef this for AIX
+
+/* define this to make linear solver routines use the minimum amount of memory
+ * possible. This lowers the memory requirements of large BA problems but will
+ * most likely induce a penalty on performance
+ */
+/*#define SBA_LS_SCARCE_MEMORY */
+
+/* define this to save some memory by storing the weight matrices for the image
+ * projections (i.e. wght in the expert drivers) into the memory containing their
+ * covariances (i.e. covx arg). Note that this overwrites covx, making changes
+ * noticeable by the caller
+ */
+/*#define SBA_DESTROY_COVS */
+
+/********* End of configuration options, no changes necessary beyond this point *********/
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define SBA_MIN_DELTA 1E-06 // finite differentiation minimum delta
+#define SBA_DELTA_SCALE 1E-04 // finite differentiation delta scale
+
+#define SBA_OPTSSZ 5
+#define SBA_INFOSZ 10
+#define SBA_ERROR -1
+#define SBA_INIT_MU 1E-03
+#define SBA_STOP_THRESH 1E-12
+#define SBA_CG_NOPREC 0
+#define SBA_CG_JACOBI 1
+#define SBA_CG_SSOR 2
+#define SBA_VERSION "1.6 (Aug. 2009)"
+
+/* Sparse matrix representation using Compressed Row Storage (CRS) format.
+ * See http://www.netlib.org/linalg/html_templates/node91.html#SECTION00931100000000000000
+ */
+
+struct sba_crsm {
+ int nr, nc; /* #rows, #cols for the sparse matrix */
+ int nnz; /* number of nonzero array elements */
+ int *val; /* storage for nonzero array elements. size: nnz */
+ int *colidx; /* column indexes of nonzero elements. size: nnz */
+ int *rowptr; /* locations in val that start a row. size: nr+1.
+ * By convention, rowptr[nr]=nnz
+ */
+};
+
+/* sparse LM */
+
+/* simple drivers */
+extern int sba_motstr_levmar(const int n, const int ncon, const int m, const int mcon, char *vmask, double *p,
+ const int cnp, const int pnp, double *x, double *covx, const int mnp,
+ void (*proj)(int j, int i, double *aj, double *bi, double *xij, void *adata),
+ void (*projac)(int j, int i, double *aj, double *bi, double *Aij, double *Bij,
+ void *adata),
+ void *adata, const int itmax, const int verbose, const double opts[SBA_OPTSSZ],
+ double info[SBA_INFOSZ]);
+
+extern int sba_mot_levmar(const int n, const int m, const int mcon, char *vmask, double *p, const int cnp, double *x,
+ double *covx, const int mnp, void (*proj)(int j, int i, double *aj, double *xij, void *adata),
+ void (*projac)(int j, int i, double *aj, double *Aij, void *adata), void *adata,
+ const int itmax, const int verbose, const double opts[SBA_OPTSSZ], double info[SBA_INFOSZ]);
+
+extern int sba_str_levmar(const int n, const int ncon, const int m, char *vmask, double *p, const int pnp, double *x,
+ double *covx, const int mnp, void (*proj)(int j, int i, double *bi, double *xij, void *adata),
+ void (*projac)(int j, int i, double *bi, double *Bij, void *adata), void *adata,
+ const int itmax, const int verbose, const double opts[SBA_OPTSSZ], double info[SBA_INFOSZ]);
+
+/* expert drivers */
+extern int
+sba_motstr_levmar_x(const int n, const int ncon, const int m, const int mcon, char *vmask, double *p, const int cnp,
+ const int pnp, double *x, double *covx, const int mnp,
+ void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata),
+ void (*fjac)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata),
+ void *adata, const int itmax, const int verbose, const double opts[SBA_OPTSSZ],
+ double info[SBA_INFOSZ]);
+
+extern int sba_mot_levmar_x(
+ const int n, const int m, const int mcon, char *vmask, double *p, const int cnp, double *x, double *covx,
+ const int mnp, void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata),
+ void (*fjac)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata), void *adata,
+ const int itmax, const int verbose, const double opts[SBA_OPTSSZ], double info[SBA_INFOSZ]);
+
+extern int sba_str_levmar_x(
+ const int n, const int ncon, const int m, char *vmask, double *p, const int pnp, double *x, double *covx,
+ const int mnp, void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata),
+ void (*fjac)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata), void *adata,
+ const int itmax, const int verbose, const double opts[SBA_OPTSSZ], double info[SBA_INFOSZ]);
+
+/* interfaces to LAPACK routines: solution of linear systems, matrix inversion, cholesky of inverse */
+extern int sba_Axb_QR(double *A, double *B, double *x, int m, int iscolmaj);
+extern int sba_Axb_QRnoQ(double *A, double *B, double *x, int m, int iscolmaj);
+extern int sba_Axb_Chol(double *A, double *B, double *x, int m, int iscolmaj);
+extern int sba_Axb_LDLt(double *A, double *B, double *x, int m, int iscolmaj);
+extern int sba_Axb_LU(double *A, double *B, double *x, int m, int iscolmaj);
+extern int sba_Axb_SVD(double *A, double *B, double *x, int m, int iscolmaj);
+extern int sba_Axb_BK(double *A, double *B, double *x, int m, int iscolmaj);
+extern int sba_Axb_CG(double *A, double *B, double *x, int m, int niter, double eps, int prec, int iscolmaj);
+extern int sba_symat_invert_LU(double *A, int m);
+extern int sba_symat_invert_Chol(double *A, int m);
+extern int sba_symat_invert_BK(double *A, int m);
+extern int sba_mat_cholinv(double *A, double *B, int m);
+
+/* CRS sparse matrices manipulation routines */
+extern void sba_crsm_alloc(struct sba_crsm *sm, int nr, int nc, int nnz);
+extern void sba_crsm_free(struct sba_crsm *sm);
+extern int sba_crsm_elmidx(struct sba_crsm *sm, int i, int j);
+extern int sba_crsm_elmidxp(struct sba_crsm *sm, int i, int j, int jp, int jpidx);
+extern int sba_crsm_row_elmidxs(struct sba_crsm *sm, int i, int *vidxs, int *jidxs);
+extern int sba_crsm_col_elmidxs(struct sba_crsm *sm, int j, int *vidxs, int *iidxs);
+/* extern int sba_crsm_common_row(struct sba_crsm *sm, int j, int k); */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SBA_H_ */
diff --git a/redist/sba/sba_chkjac.c b/redist/sba/sba_chkjac.c
new file mode 100644
index 0000000..7cb53ec
--- /dev/null
+++ b/redist/sba/sba_chkjac.c
@@ -0,0 +1,468 @@
+/////////////////////////////////////////////////////////////////////////////////
+////
+//// Verification routines for the jacobians employed in the expert & simple drivers
+//// for sparse bundle adjustment based on the Levenberg - Marquardt minimization algorithm
+//// Copyright (C) 2005-2008 Manolis Lourakis (lourakis at ics forth gr)
+//// Institute of Computer Science, Foundation for Research & Technology - Hellas
+//// Heraklion, Crete, Greece.
+////
+//// This program is free software; you can redistribute it and/or modify
+//// it under the terms of the GNU General Public License as published by
+//// the Free Software Foundation; either version 2 of the License, or
+//// (at your option) any later version.
+////
+//// This program is distributed in the hope that it will be useful,
+//// but WITHOUT ANY WARRANTY; without even the implied warranty of
+//// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//// GNU General Public License for more details.
+////
+///////////////////////////////////////////////////////////////////////////////////
+
+#include <float.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "compiler.h"
+#include "sba.h"
+
+#define emalloc(sz) emalloc_(__FILE__, __LINE__, sz)
+
+#define FABS(x) (((x) >= 0) ? (x) : -(x))
+
+/* auxiliary memory allocation routine with error checking */
+inline static void *emalloc_(char *file, int line, size_t sz) {
+ void *ptr;
+
+ ptr = (void *)malloc(sz);
+ if (ptr == NULL) {
+ fprintf(stderr, "SBA: memory allocation request for %zu bytes failed in file %s, line %d, exiting", sz, file,
+ line);
+ exit(1);
+ }
+
+ return ptr;
+}
+
+/*
+ * Check the jacobian of a projection function in nvars variables
+ * evaluated at a point p, for consistency with the function itself.
+ * Expert version
+ *
+ * Based on fortran77 subroutine CHKDER by
+ * Burton S. Garbow, Kenneth E. Hillstrom, Jorge J. More
+ * Argonne National Laboratory. MINPACK project. March 1980.
+ *
+ *
+ * func points to a function from R^{nvars} --> R^{nobs}: Given a p in R^{nvars}
+ * it yields hx in R^{nobs}
+ * jacf points to a function implementing the jacobian of func, whose consistency with
+ * func is to be tested. Given a p in R^{nvars}, jacf computes into the nvis*(Asz+Bsz)
+ * matrix jac the jacobian of func at p. Note the jacobian is sparse, consisting of
+ * all A_ij, B_ij and that row i of jac corresponds to the gradient of the i-th
+ * component of func, evaluated at p.
+ * p is an input array of length nvars containing the point of evaluation.
+ * idxij, rcidxs, rcsubs, ncon, mcon, cnp, pnp, mnp are as usual. Note that if cnp=0 or
+ * pnp=0 a jacobian corresponding resp. to motion or camera parameters
+ * only is assumed.
+ * func_adata, jac_adata point to possible additional data and are passed
+ * uninterpreted to func, jacf respectively.
+ * err is an array of length nobs. On output, err contains measures
+ * of correctness of the respective gradients. if there is
+ * no severe loss of significance, then if err[i] is 1.0 the
+ * i-th gradient is correct, while if err[i] is 0.0 the i-th
+ * gradient is incorrect. For values of err between 0.0 and 1.0,
+ * the categorization is less certain. In general, a value of
+ * err[i] greater than 0.5 indicates that the i-th gradient is
+ * probably correct, while a value of err[i] less than 0.5
+ * indicates that the i-th gradient is probably incorrect.
+ *
+ * CAUTION: THIS FUNCTION IS NOT 100% FOOLPROOF. The
+ * following excerpt comes from CHKDER's documentation:
+ *
+ * "The function does not perform reliably if cancellation or
+ * rounding errors cause a severe loss of significance in the
+ * evaluation of a function. therefore, none of the components
+ * of p should be unusually small (in particular, zero) or any
+ * other value which may cause loss of significance."
+ */
+
+void sba_motstr_chkjac_x(void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx,
+ void *adata),
+ void (*jacf)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac,
+ void *adata),
+ double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, int ncon, int mcon, int cnp,
+ int pnp, int mnp, void *func_adata, void *jac_adata) {
+ const double factor = 100.0, one = 1.0, zero = 0.0;
+ double *fvec, *fjac, *pp, *fvecp, *buf, *err;
+
+ int nvars, nobs, m, n, Asz, Bsz, ABsz, nnz;
+ register int i, j, ii, jj;
+ double eps, epsf, temp, epsmch, epslog;
+ register double *ptr1, *ptr2, *pab;
+ double *pa, *pb;
+ int fvec_sz, pp_sz, fvecp_sz, numerr = 0;
+
+ nobs = idxij->nnz * mnp;
+ n = idxij->nr;
+ m = idxij->nc;
+ nvars = m * cnp + n * pnp;
+ epsmch = DBL_EPSILON;
+ eps = sqrt(epsmch);
+
+ Asz = mnp * cnp;
+ Bsz = mnp * pnp;
+ ABsz = Asz + Bsz;
+ fjac = (double *)emalloc(idxij->nnz * ABsz * sizeof(double));
+
+ fvec_sz = fvecp_sz = nobs;
+ pp_sz = nvars;
+ buf = (double *)emalloc((fvec_sz + pp_sz + fvecp_sz) * sizeof(double));
+ fvec = buf;
+ pp = fvec + fvec_sz;
+ fvecp = pp + pp_sz;
+
+ err = (double *)emalloc(nobs * sizeof(double));
+
+ /* compute fvec=func(p) */
+ (*func)(p, idxij, rcidxs, rcsubs, fvec, func_adata);
+
+ /* compute the jacobian at p */
+ (*jacf)(p, idxij, rcidxs, rcsubs, fjac, jac_adata);
+
+ /* compute pp */
+ for (j = 0; j < nvars; ++j) {
+ temp = eps * FABS(p[j]);
+ if (temp == zero)
+ temp = eps;
+ pp[j] = p[j] + temp;
+ }
+
+ /* compute fvecp=func(pp) */
+ (*func)(pp, idxij, rcidxs, rcsubs, fvecp, func_adata);
+
+ epsf = factor * epsmch;
+ epslog = log10(eps);
+
+ for (i = 0; i < nobs; ++i)
+ err[i] = zero;
+
+ pa = p;
+ pb = p + m * cnp;
+ for (i = 0; i < n; ++i) {
+ nnz = sba_crsm_row_elmidxs(idxij, i, rcidxs,
+ rcsubs); /* find nonzero A_ij, B_ij, j=0...m-1, actual column numbers in rcsubs */
+ for (j = 0; j < nnz; ++j) {
+ ptr2 = err + idxij->val[rcidxs[j]] * mnp; // set ptr2 to point into err
+
+ if (cnp && rcsubs[j] >= mcon) { // A_ij is nonzero
+ ptr1 = fjac + idxij->val[rcidxs[j]] * ABsz; // set ptr1 to point to A_ij
+ pab = pa + rcsubs[j] * cnp;
+ for (jj = 0; jj < cnp; ++jj) {
+ temp = FABS(pab[jj]);
+ if (temp == zero)
+ temp = one;
+
+ for (ii = 0; ii < mnp; ++ii)
+ ptr2[ii] += temp * ptr1[ii * cnp + jj];
+ }
+ }
+
+ if (pnp && i >= ncon) { // B_ij is nonzero
+ ptr1 = fjac + idxij->val[rcidxs[j]] * ABsz + Asz; // set ptr1 to point to B_ij
+ pab = pb + i * pnp;
+ for (jj = 0; jj < pnp; ++jj) {
+ temp = FABS(pab[jj]);
+ if (temp == zero)
+ temp = one;
+
+ for (ii = 0; ii < mnp; ++ii)
+ ptr2[ii] += temp * ptr1[ii * pnp + jj];
+ }
+ }
+ }
+ }
+
+ for (i = 0; i < nobs; ++i) {
+ temp = one;
+ if (fvec[i] != zero && fvecp[i] != zero && FABS(fvecp[i] - fvec[i]) >= epsf * FABS(fvec[i]))
+ temp = eps * FABS((fvecp[i] - fvec[i]) / eps - err[i]) / (FABS(fvec[i]) + FABS(fvecp[i]));
+ err[i] = one;
+ if (temp > epsmch && temp < eps)
+ err[i] = (log10(temp) - epslog) / epslog;
+ if (temp >= eps)
+ err[i] = zero;
+ }
+
+ free(fjac);
+ free(buf);
+
+ for (i = 0; i < n; ++i) {
+ nnz = sba_crsm_row_elmidxs(idxij, i, rcidxs, rcsubs); /* find nonzero err_ij, j=0...m-1 */
+ for (j = 0; j < nnz; ++j) {
+ if (i < ncon && rcsubs[j] < mcon)
+ continue; // corresponding gradients are taken to be zero
+
+ ptr1 = err + idxij->val[rcidxs[j]] * mnp; // set ptr1 to point into err
+ for (ii = 0; ii < mnp; ++ii)
+ if (ptr1[ii] <= 0.5) {
+ fprintf(stderr, "SBA: gradient %d (corresponding to element %d of the projection of point %d on "
+ "camera %d) is %s (err=%g)\n",
+ idxij->val[rcidxs[j]] * mnp + ii, ii, i, rcsubs[j],
+ (ptr1[ii] == 0.0) ? "wrong" : "probably wrong", ptr1[ii]);
+ ++numerr;
+ }
+ }
+ }
+ if (numerr)
+ fprintf(stderr, "SBA: found %d suspicious gradients out of %d\n\n", numerr, nobs);
+
+ free(err);
+
+ return;
+}
+
+void sba_mot_chkjac_x(
+ void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata),
+ void (*jacf)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata), double *p,
+ struct sba_crsm *idxij, int *rcidxs, int *rcsubs, int mcon, int cnp, int mnp, void *func_adata, void *jac_adata) {
+ sba_motstr_chkjac_x(func, jacf, p, idxij, rcidxs, rcsubs, 0, mcon, cnp, 0, mnp, func_adata, jac_adata);
+}
+
+void sba_str_chkjac_x(
+ void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata),
+ void (*jacf)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata), double *p,
+ struct sba_crsm *idxij, int *rcidxs, int *rcsubs, int ncon, int pnp, int mnp, void *func_adata, void *jac_adata) {
+ sba_motstr_chkjac_x(func, jacf, p, idxij, rcidxs, rcsubs, ncon, 0, 0, pnp, mnp, func_adata, jac_adata);
+}
+
+#if 0
+/* Routines for directly checking the jacobians supplied to the simple drivers.
+ * They shouldn't be necessary since these jacobians can be verified indirectly
+ * through the expert sba_XXX_chkjac_x() routines.
+ */
+
+/*****************************************************************************************/
+// Sample code for using sba_motstr_chkjac():
+
+ for(i=ncon; i<n; ++i)
+ for(j=mcon; j<m; ++j){
+ if(!vmask[i*m+j]) continue; // point i does not appear in image j
+
+ sba_motstr_chkjac(proj, projac, p+j*cnp, p+m*cnp+i*pnp, j, i, cnp, pnp, mnp, adata, adata);
+ }
+
+
+/*****************************************************************************************/
+
+
+/* union used for passing pointers to the user-supplied functions for the motstr/mot/str simple drivers */
+union proj_projac{
+ struct{
+ void (*proj)(int j, int i, double *aj, double *bi, double *xij, void *adata);
+ void (*projac)(int j, int i, double *aj, double *bi, double *Aij, double *Bij, void *adata);
+ } motstr;
+
+ struct{
+ void (*proj)(int j, int i, double *aj, double *xij, void *adata);
+ void (*projac)(int j, int i, double *aj, double *Aij, void *adata);
+ } mot;
+
+ struct{
+ void (*proj)(int j, int i, double *bi, double *xij, void *adata);
+ void (*projac)(int j, int i, double *bi, double *Bij, void *adata);
+ } str;
+};
+
+
+/*
+ * Check the jacobian of a projection function in cnp+pnp variables
+ * evaluated at a point p, for consistency with the function itself.
+ * Simple version of the above, NOT to be called directly
+ *
+ * Based on fortran77 subroutine CHKDER by
+ * Burton S. Garbow, Kenneth E. Hillstrom, Jorge J. More
+ * Argonne National Laboratory. MINPACK project. March 1980.
+ *
+ *
+ * proj points to a function from R^{cnp+pnp} --> R^{mnp}: Given a p=(aj, bi) in R^{cnp+pnp}
+ * it yields hx in R^{mnp}
+ * projac points to a function implementing the jacobian of func, whose consistency with proj
+ * is to be tested. Given a p in R^{cnp+pnp}, jacf computes into the matrix jac=[Aij | Bij]
+ * jacobian of proj at p. Note that row i of jac corresponds to the gradient of the i-th
+ * component of proj, evaluated at p.
+ * aj, bi are input arrays of lengths cnp, pnp containing the parameters for the point of
+ * evaluation, i.e. j-th camera and i-th point
+ * jj, ii specify the point (ii) whose projection jacobian in image (jj) is being checked
+ * cnp, pnp, mnp are as usual. Note that if cnp=0 or
+ * pnp=0 a jacobian corresponding resp. to motion or camera parameters
+ * only is assumed.
+ * func_adata, jac_adata point to possible additional data and are passed
+ * uninterpreted to func, jacf respectively.
+ * err is an array of length mnp. On output, err contains measures
+ * of correctness of the respective gradients. if there is
+ * no severe loss of significance, then if err[i] is 1.0 the
+ * i-th gradient is correct, while if err[i] is 0.0 the i-th
+ * gradient is incorrect. For values of err between 0.0 and 1.0,
+ * the categorization is less certain. In general, a value of
+ * err[i] greater than 0.5 indicates that the i-th gradient is
+ * probably correct, while a value of err[i] less than 0.5
+ * indicates that the i-th gradient is probably incorrect.
+ *
+ * CAUTION: THIS FUNCTION IS NOT 100% FOOLPROOF. The
+ * following excerpt comes from CHKDER's documentation:
+ *
+ * "The function does not perform reliably if cancellation or
+ * rounding errors cause a severe loss of significance in the
+ * evaluation of a function. therefore, none of the components
+ * of p should be unusually small (in particular, zero) or any
+ * other value which may cause loss of significance."
+ */
+
+static void sba_chkjac(
+ union proj_projac *funcs, double *aj, double *bi, int jj, int ii, int cnp, int pnp, int mnp, void *func_adata, void *jac_adata)
+{
+const double factor=100.0, one=1.0, zero=0.0;
+double *fvec, *fjac, *Aij, *Bij, *ajp, *bip, *fvecp, *buf, *err;
+
+int Asz, Bsz;
+register int i, j;
+double eps, epsf, temp, epsmch, epslog;
+int fvec_sz, ajp_sz, bip_sz, fvecp_sz, err_sz, numerr=0;
+
+ epsmch=DBL_EPSILON;
+ eps=sqrt(epsmch);
+
+ Asz=mnp*cnp; Bsz=mnp*pnp;
+ fjac=(double *)emalloc((Asz+Bsz)*sizeof(double));
+ Aij=fjac;
+ Bij=Aij+Asz;
+
+ fvec_sz=fvecp_sz=mnp;
+ ajp_sz=cnp; bip_sz=pnp;
+ err_sz=mnp;
+ buf=(double *)emalloc((fvec_sz + ajp_sz + bip_sz + fvecp_sz + err_sz)*sizeof(double));
+ fvec=buf;
+ ajp=fvec+fvec_sz;
+ bip=ajp+ajp_sz;
+ fvecp=bip+bip_sz;
+ err=fvecp+fvecp_sz;
+
+ /* compute fvec=proj(p), p=(aj, bi) & the jacobian at p */
+ if(cnp && pnp){
+ (*(funcs->motstr.proj))(jj, ii, aj, bi, fvec, func_adata);
+ (*(funcs->motstr.projac))(jj, ii, aj, bi, Aij, Bij, jac_adata);
+ }
+ else if(cnp){
+ (*(funcs->mot.proj))(jj, ii, aj, fvec, func_adata);
+ (*(funcs->mot.projac))(jj, ii, aj, Aij, jac_adata);
+ }
+ else{
+ (*(funcs->str.proj))(jj, ii, bi, fvec, func_adata);
+ (*(funcs->str.projac))(jj, ii, bi, Bij, jac_adata);
+ }
+
+ /* compute pp, pp=(ajp, bip) */
+ for(j=0; j<cnp; ++j){
+ temp=eps*FABS(aj[j]);
+ if(temp==zero) temp=eps;
+ ajp[j]=aj[j]+temp;
+ }
+ for(j=0; j<pnp; ++j){
+ temp=eps*FABS(bi[j]);
+ if(temp==zero) temp=eps;
+ bip[j]=bi[j]+temp;
+ }
+
+ /* compute fvecp=proj(pp) */
+ if(cnp && pnp)
+ (*(funcs->motstr.proj))(jj, ii, ajp, bip, fvecp, func_adata);
+ else if(cnp)
+ (*(funcs->mot.proj))(jj, ii, ajp, fvecp, func_adata);
+ else
+ (*(funcs->str.proj))(jj, ii, bip, fvecp, func_adata);
+
+ epsf=factor*epsmch;
+ epslog=log10(eps);
+
+ for(i=0; i<mnp; ++i)
+ err[i]=zero;
+
+ for(j=0; j<cnp; ++j){
+ temp=FABS(aj[j]);
+ if(temp==zero) temp=one;
+
+ for(i=0; i<mnp; ++i)
+ err[i]+=temp*Aij[i*cnp+j];
+ }
+ for(j=0; j<pnp; ++j){
+ temp=FABS(bi[j]);
+ if(temp==zero) temp=one;
+
+ for(i=0; i<mnp; ++i)
+ err[i]+=temp*Bij[i*pnp+j];
+ }
+
+ for(i=0; i<mnp; ++i){
+ temp=one;
+ if(fvec[i]!=zero && fvecp[i]!=zero && FABS(fvecp[i]-fvec[i])>=epsf*FABS(fvec[i]))
+ temp=eps*FABS((fvecp[i]-fvec[i])/eps - err[i])/(FABS(fvec[i])+FABS(fvecp[i]));
+ err[i]=one;
+ if(temp>epsmch && temp<eps)
+ err[i]=(log10(temp) - epslog)/epslog;
+ if(temp>=eps) err[i]=zero;
+ }
+
+ for(i=0; i<mnp; ++i)
+ if(err[i]<=0.5){
+ fprintf(stderr, "SBA: gradient %d (corresponding to element %d of the projection of point %d on camera %d) is %s (err=%g)\n",
+ i, i, ii, jj, (err[i]==0.0)? "wrong" : "probably wrong", err[i]);
+ ++numerr;
+ }
+ if(numerr) fprintf(stderr, "SBA: found %d suspicious gradients out of %d\n\n", numerr, mnp);
+
+ free(fjac);
+ free(buf);
+
+ return;
+}
+
+void sba_motstr_chkjac(
+ void (*proj)(int jj, int ii, double *aj, double *bi, double *xij, void *adata),
+ void (*projac)(int jj, int ii, double *aj, double *bi, double *Aij, double *Bij, void *adata),
+ double *aj, double *bi, int jj, int ii, int cnp, int pnp, int mnp, void *func_adata, void *jac_adata)
+{
+union proj_projac funcs;
+
+ funcs.motstr.proj=proj;
+ funcs.motstr.projac=projac;
+
+ sba_chkjac(&funcs, aj, bi, jj, ii, cnp, pnp, mnp, func_adata, jac_adata);
+}
+
+void sba_mot_chkjac(
+ void (*proj)(int jj, int ii, double *aj, double *xij, void *adata),
+ void (*projac)(int jj, int ii, double *aj, double *Aij, void *adata),
+ double *aj, double *bi, int jj, int ii, int cnp, int pnp, int mnp, void *func_adata, void *jac_adata)
+{
+union proj_projac funcs;
+
+ funcs.mot.proj=proj;
+ funcs.mot.projac=projac;
+
+ sba_chkjac(&funcs, aj, NULL, jj, ii, cnp, 0, mnp, func_adata, jac_adata);
+}
+
+void sba_str_chkjac(
+ void (*proj)(int jj, int ii, double *bi, double *xij, void *adata),
+ void (*projac)(int jj, int ii, double *bi, double *Bij, void *adata),
+ double *aj, double *bi, int jj, int ii, int cnp, int pnp, int mnp, void *func_adata, void *jac_adata)
+{
+union proj_projac funcs;
+
+ funcs.str.proj=proj;
+ funcs.str.projac=projac;
+
+ sba_chkjac(&funcs, NULL, bi, jj, ii, 0, pnp, mnp, func_adata, jac_adata);
+}
+#endif /* 0 */
diff --git a/redist/sba/sba_chkjac.h b/redist/sba/sba_chkjac.h
new file mode 100644
index 0000000..899c35e
--- /dev/null
+++ b/redist/sba/sba_chkjac.h
@@ -0,0 +1,69 @@
+/////////////////////////////////////////////////////////////////////////////////
+////
+//// Prototypes and definitions for verification routines for the jacobians
+//// employed in the expert & simple drivers for sparse bundle adjustment
+//// Copyright (C) 2005-2008 Manolis Lourakis (lourakis at ics forth gr)
+//// Institute of Computer Science, Foundation for Research & Technology - Hellas
+//// Heraklion, Crete, Greece.
+////
+//// This program is free software; you can redistribute it and/or modify
+//// it under the terms of the GNU General Public License as published by
+//// the Free Software Foundation; either version 2 of the License, or
+//// (at your option) any later version.
+////
+//// This program is distributed in the hope that it will be useful,
+//// but WITHOUT ANY WARRANTY; without even the implied warranty of
+//// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//// GNU General Public License for more details.
+////
+///////////////////////////////////////////////////////////////////////////////////
+
+#ifndef _SBA_CHKJAC_H_
+#define _SBA_CHKJAC_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if 0
+/* simple driver jacobians */
+extern void sba_motstr_chkjac(
+ void (*proj)(int jj, int ii, double *aj, double *bi, double *xij, void *adata),
+ void (*projac)(int jj, int ii, double *aj, double *bi, double *Aij, double *Bij, void *adata),
+ double *aj, double *bi, int jj, int ii, int cnp, int pnp, int mnp, void *func_adata, void *jac_adata);
+
+extern void sba_mot_chkjac(
+ void (*proj)(int jj, int ii, double *aj, double *xij, void *adata),
+ void (*projac)(int jj, int ii, double *aj, double *Aij, void *adata),
+ double *aj, double *bi, int jj, int ii, int cnp, int pnp, int mnp, void *func_adata, void *jac_adata);
+
+extern void sba_str_chkjac(
+ void (*proj)(int jj, int ii, double *bi, double *xij, void *adata),
+ void (*projac)(int jj, int ii, double *bi, double *Bij, void *adata),
+ double *aj, double *bi, int jj, int ii, int cnp, int pnp, int mnp, void *func_adata, void *jac_adata);
+#endif /* 0 */
+
+/* expert driver jacobians */
+extern void
+sba_motstr_chkjac_x(void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata),
+ void (*jacf)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata),
+ double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, int ncon, int mcon, int cnp, int pnp,
+ int mnp, void *func_adata, void *jac_adata);
+
+extern void
+sba_mot_chkjac_x(void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata),
+ void (*jacf)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata),
+ double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, int mcon, int cnp, int mnp,
+ void *func_adata, void *jac_adata);
+
+extern void
+sba_str_chkjac_x(void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata),
+ void (*jacf)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata),
+ double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, int ncon, int pnp, int mnp,
+ void *func_adata, void *jac_adata);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SBA_CHKJAC_H_ */
diff --git a/redist/sba/sba_crsm.c b/redist/sba/sba_crsm.c
new file mode 100644
index 0000000..9ba11f1
--- /dev/null
+++ b/redist/sba/sba_crsm.c
@@ -0,0 +1,338 @@
+/////////////////////////////////////////////////////////////////////////////////
+////
+//// CRS sparse matrices manipulation routines
+//// Copyright (C) 2004-2008 Manolis Lourakis (lourakis at ics forth gr)
+//// Institute of Computer Science, Foundation for Research & Technology - Hellas
+//// Heraklion, Crete, Greece.
+////
+//// This program is free software; you can redistribute it and/or modify
+//// it under the terms of the GNU General Public License as published by
+//// the Free Software Foundation; either version 2 of the License, or
+//// (at your option) any later version.
+////
+//// This program is distributed in the hope that it will be useful,
+//// but WITHOUT ANY WARRANTY; without even the implied warranty of
+//// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//// GNU General Public License for more details.
+////
+///////////////////////////////////////////////////////////////////////////////////
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "sba.h"
+
+static void sba_crsm_print(struct sba_crsm *sm, FILE *fp);
+static void sba_crsm_build(struct sba_crsm *sm, int *m, int nr, int nc);
+
+/* allocate a sparse CRS matrix */
+void sba_crsm_alloc(struct sba_crsm *sm, int nr, int nc, int nnz) {
+ int msz;
+
+ sm->nr = nr;
+ sm->nc = nc;
+ sm->nnz = nnz;
+ msz = 2 * nnz + nr + 1;
+ sm->val = (int *)malloc(msz * sizeof(int)); /* required memory is allocated in a single step */
+ if (!sm->val) {
+ fprintf(stderr, "SBA: memory allocation request failed in sba_crsm_alloc() [nr=%d, nc=%d, nnz=%d]\n", nr, nc,
+ nnz);
+ exit(1);
+ }
+ sm->colidx = sm->val + nnz;
+ sm->rowptr = sm->colidx + nnz;
+}
+
+/* free a sparse CRS matrix */
+void sba_crsm_free(struct sba_crsm *sm) {
+ sm->nr = sm->nc = sm->nnz = -1;
+ free(sm->val);
+ sm->val = sm->colidx = sm->rowptr = NULL;
+}
+
+static void sba_crsm_print(struct sba_crsm *sm, FILE *fp) {
+ register int i;
+
+ fprintf(fp, "matrix is %dx%d, %d non-zeros\nval: ", sm->nr, sm->nc, sm->nnz);
+ for (i = 0; i < sm->nnz; ++i)
+ fprintf(fp, "%d ", sm->val[i]);
+ fprintf(fp, "\ncolidx: ");
+ for (i = 0; i < sm->nnz; ++i)
+ fprintf(fp, "%d ", sm->colidx[i]);
+ fprintf(fp, "\nrowptr: ");
+ for (i = 0; i <= sm->nr; ++i)
+ fprintf(fp, "%d ", sm->rowptr[i]);
+ fprintf(fp, "\n");
+}
+
+/* build a sparse CRS matrix from a dense one. intended to serve as an example for sm creation */
+static void sba_crsm_build(struct sba_crsm *sm, int *m, int nr, int nc) {
+ int nnz;
+ register int i, j, k;
+
+ /* count nonzeros */
+ for (i = nnz = 0; i < nr; ++i)
+ for (j = 0; j < nc; ++j)
+ if (m[i * nc + j] != 0)
+ ++nnz;
+
+ sba_crsm_alloc(sm, nr, nc, nnz);
+
+ /* fill up the sm structure */
+ for (i = k = 0; i < nr; ++i) {
+ sm->rowptr[i] = k;
+ for (j = 0; j < nc; ++j)
+ if (m[i * nc + j] != 0) {
+ sm->val[k] = m[i * nc + j];
+ sm->colidx[k++] = j;
+ }
+ }
+ sm->rowptr[nr] = nnz;
+}
+
+/* returns the index of the (i, j) element. No bounds checking! */
+int sba_crsm_elmidx(struct sba_crsm *sm, int i, int j) {
+ register int low, high, mid, diff;
+
+ low = sm->rowptr[i];
+ high = sm->rowptr[i + 1] - 1;
+
+ /* binary search for finding the element at column j */
+ while (low <= high) {
+ /* following early termination test seems to actually slow down the search */
+ // if(j<sm->colidx[low] || j>sm->colidx[high]) return -1; /* not found */
+
+ /* mid=low+((high-low)>>1) ensures no index overflows */
+ mid = (low + high) >> 1; //(low+high)/2;
+ diff = j - sm->colidx[mid];
+ if (diff < 0)
+ high = mid - 1;
+ else if (diff > 0)
+ low = mid + 1;
+ else
+ return mid;
+ }
+
+ return -1; /* not found */
+}
+
+/* similarly to sba_crsm_elmidx() above, returns the index of the (i, j) element using the
+ * fact that the index of element (i, jp) was previously found to be jpidx. This can be
+ * slightly faster than sba_crsm_elmidx(). No bounds checking!
+ */
+int sba_crsm_elmidxp(struct sba_crsm *sm, int i, int j, int jp, int jpidx) {
+ register int low, high, mid, diff;
+
+ diff = j - jp;
+ if (diff > 0) {
+ low = jpidx + 1;
+ high = sm->rowptr[i + 1] - 1;
+ } else if (diff == 0)
+ return jpidx;
+ else { /* diff<0 */
+ low = sm->rowptr[i];
+ high = jpidx - 1;
+ }
+
+ /* binary search for finding the element at column j */
+ while (low <= high) {
+ /* following early termination test seems to actually slow down the search */
+ // if(j<sm->colidx[low] || j>sm->colidx[high]) return -1; /* not found */
+
+ /* mid=low+((high-low)>>1) ensures no index overflows */
+ mid = (low + high) >> 1; //(low+high)/2;
+ diff = j - sm->colidx[mid];
+ if (diff < 0)
+ high = mid - 1;
+ else if (diff > 0)
+ low = mid + 1;
+ else
+ return mid;
+ }
+
+ return -1; /* not found */
+}
+
+/* returns the number of nonzero elements in row i and
+ * fills up the vidxs and jidxs arrays with the val and column
+ * indexes of the elements found, respectively.
+ * vidxs and jidxs are assumed preallocated and of max. size sm->nc
+ */
+int sba_crsm_row_elmidxs(struct sba_crsm *sm, int i, int *vidxs, int *jidxs) {
+ register int j, k;
+
+ for (j = sm->rowptr[i], k = 0; j < sm->rowptr[i + 1]; ++j, ++k) {
+ vidxs[k] = j;
+ jidxs[k] = sm->colidx[j];
+ }
+
+ return k;
+}
+
+/* returns the number of nonzero elements in col j and
+ * fills up the vidxs and iidxs arrays with the val and row
+ * indexes of the elements found, respectively.
+ * vidxs and iidxs are assumed preallocated and of max. size sm->nr
+ */
+int sba_crsm_col_elmidxs(struct sba_crsm *sm, int j, int *vidxs, int *iidxs) {
+ register int *nextrowptr = sm->rowptr + 1;
+ register int i, l;
+ register int low, high, mid, diff;
+
+ for (i = l = 0; i < sm->nr; ++i) {
+ low = sm->rowptr[i];
+ high = nextrowptr[i] - 1;
+
+ /* binary search attempting to find an element at column j */
+ while (low <= high) {
+ // if(j<sm->colidx[low] || j>sm->colidx[high]) break; /* not found */
+
+ mid = (low + high) >> 1; //(low+high)/2;
+ diff = j - sm->colidx[mid];
+ if (diff < 0)
+ high = mid - 1;
+ else if (diff > 0)
+ low = mid + 1;
+ else { /* found */
+ vidxs[l] = mid;
+ iidxs[l++] = i;
+ break;
+ }
+ }
+ }
+
+ return l;
+}
+
+/* a more straighforward (but slower) implementation of the above function */
+/***
+int sba_crsm_col_elmidxs(struct sba_crsm *sm, int j, int *vidxs, int *iidxs)
+{
+register int i, k, l;
+
+ for(i=l=0; i<sm->nr; ++i)
+ for(k=sm->rowptr[i]; k<sm->rowptr[i+1]; ++k)
+ if(sm->colidx[k]==j){
+ vidxs[l]=k;
+ iidxs[l++]=i;
+ }
+
+ return l;
+}
+***/
+
+#if 0
+/* returns 1 if there exists a row i having columns j and k,
+ * i.e. a row i s.t. elements (i, j) and (i, k) are nonzero;
+ * 0 otherwise
+ */
+int sba_crsm_common_row(struct sba_crsm *sm, int j, int k)
+{
+register int i, low, high, mid, diff;
+
+ if(j==k) return 1;
+
+ for(i=0; i<sm->nr; ++i){
+ low=sm->rowptr[i];
+ high=sm->rowptr[i+1]-1;
+ if(j<sm->colidx[low] || j>sm->colidx[high] || /* j not found */
+ k<sm->colidx[low] || k>sm->colidx[high]) /* k not found */
+ continue;
+
+ /* binary search for finding the element at column j */
+ while(low<=high){
+ mid=(low+high)>>1; //(low+high)/2;
+ diff=j-sm->colidx[mid];
+ if(diff<0)
+ high=mid-1;
+ else if(diff>0)
+ low=mid+1;
+ else
+ goto jfound;
+ }
+
+ continue; /* j not found */
+
+jfound:
+ if(j>k){
+ low=sm->rowptr[i];
+ high=mid-1;
+ }
+ else{
+ low=mid+1;
+ high=sm->rowptr[i+1]-1;
+ }
+
+ if(k<sm->colidx[low] || k>sm->colidx[high]) continue; /* k not found */
+
+ /* binary search for finding the element at column k */
+ while(low<=high){
+ mid=(low+high)>>1; //(low+high)/2;
+ diff=k-sm->colidx[mid];
+ if(diff<0)
+ high=mid-1;
+ else if(diff>0)
+ low=mid+1;
+ else /* found */
+ return 1;
+ }
+ }
+
+ return 0;
+}
+#endif
+
+#if 0
+
+/* sample code using the above routines */
+
+main()
+{
+int mat[7][6]={
+ {10, 0, 0, 0, -2, 0},
+ {3, 9, 0, 0, 0, 3},
+ {0, 7, 8, 7, 0, 0},
+ {3, 0, 8, 7, 5, 0},
+ {0, 8, 0, 9, 9, 13},
+ {0, 4, 0, 0, 2, -1},
+ {3, 7, 0, 9, 2, 0}
+};
+
+struct sba_crsm sm;
+int i, j, k, l;
+int vidxs[7], /* max(6, 7) */
+ jidxs[6], iidxs[7];
+
+
+ sba_crsm_build(&sm, mat[0], 7, 6);
+ sba_crsm_print(&sm, stdout);
+
+ for(i=0; i<7; ++i){
+ for(j=0; j<6; ++j)
+ printf("%3d ", ((k=sba_crsm_elmidx(&sm, i, j))!=-1)? sm.val[k] : 0);
+ printf("\n");
+ }
+
+ for(i=0; i<7; ++i){
+ k=sba_crsm_row_elmidxs(&sm, i, vidxs, jidxs);
+ printf("row %d\n", i);
+ for(l=0; l<k; ++l){
+ j=jidxs[l];
+ printf("%d %d ", j, sm.val[vidxs[l]]);
+ }
+ printf("\n");
+ }
+
+ for(j=0; j<6; ++j){
+ k=sba_crsm_col_elmidxs(&sm, j, vidxs, iidxs);
+ printf("col %d\n", j);
+ for(l=0; l<k; ++l){
+ i=iidxs[l];
+ printf("%d %d ", i, sm.val[vidxs[l]]);
+ }
+ printf("\n");
+ }
+
+ sba_crsm_free(&sm);
+}
+#endif
diff --git a/redist/sba/sba_lapack.c b/redist/sba/sba_lapack.c
new file mode 100644
index 0000000..14fc44f
--- /dev/null
+++ b/redist/sba/sba_lapack.c
@@ -0,0 +1,1730 @@
+/////////////////////////////////////////////////////////////////////////////////
+////
+//// Linear algebra operations for the sba package
+//// Copyright (C) 2004-2009 Manolis Lourakis (lourakis at ics forth gr)
+//// Institute of Computer Science, Foundation for Research & Technology - Hellas
+//// Heraklion, Crete, Greece.
+////
+//// This program is free software; you can redistribute it and/or modify
+//// it under the terms of the GNU General Public License as published by
+//// the Free Software Foundation; either version 2 of the License, or
+//// (at your option) any later version.
+////
+//// This program is distributed in the hope that it will be useful,
+//// but WITHOUT ANY WARRANTY; without even the implied warranty of
+//// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//// GNU General Public License for more details.
+////
+///////////////////////////////////////////////////////////////////////////////////
+
+/* A note on memory alignment:
+ *
+ * Several of the functions below use a piece of dynamically allocated memory
+ * to store variables of different size (i.e., ints and doubles). To avoid
+ * alignment problems, care must be taken so that elements that are larger
+ * (doubles) are stored before smaller ones (ints). This ensures proper
+ * alignment under different alignment choices made by different CPUs:
+ * For instance, a double variable is aligned on x86 to 4 bytes but
+ * aligned to 8 bytes on AMD64 despite having the same size of 8 bytes.
+ */
+
+#include <float.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "compiler.h"
+#include "sba.h"
+
+#ifdef SBA_APPEND_UNDERSCORE_SUFFIX
+#define F77_FUNC(func) func##_
+#else
+#define F77_FUNC(func) func
+#endif /* SBA_APPEND_UNDERSCORE_SUFFIX */
+
+/* declarations of LAPACK routines employed */
+
+/* QR decomposition */
+extern int F77_FUNC(dgeqrf)(int *m, int *n, double *a, int *lda, double *tau, double *work, int *lwork, int *info);
+extern int F77_FUNC(dorgqr)(int *m, int *n, int *k, double *a, int *lda, double *tau, double *work, int *lwork,
+ int *info);
+
+/* solution of triangular system */
+extern int F77_FUNC(dtrtrs)(char *uplo, char *trans, char *diag, int *n, int *nrhs, double *a, int *lda, double *b,
+ int *ldb, int *info);
+
+/* Cholesky decomposition, linear system solution and matrix inversion */
+extern int F77_FUNC(dpotf2)(char *uplo, int *n, double *a, int *lda, int *info); /* unblocked Cholesky */
+extern int F77_FUNC(dpotrf)(char *uplo, int *n, double *a, int *lda, int *info); /* block version of dpotf2 */
+extern int F77_FUNC(dpotrs)(char *uplo, int *n, int *nrhs, double *a, int *lda, double *b, int *ldb, int *info);
+extern int F77_FUNC(dpotri)(char *uplo, int *n, double *a, int *lda, int *info);
+
+/* LU decomposition, linear system solution and matrix inversion */
+extern int F77_FUNC(dgetrf)(int *m, int *n, double *a, int *lda, int *ipiv, int *info); /* blocked LU */
+extern int F77_FUNC(dgetf2)(int *m, int *n, double *a, int *lda, int *ipiv, int *info); /* unblocked LU */
+extern int F77_FUNC(dgetrs)(char *trans, int *n, int *nrhs, double *a, int *lda, int *ipiv, double *b, int *ldb,
+ int *info);
+extern int F77_FUNC(dgetri)(int *n, double *a, int *lda, int *ipiv, double *work, int *lwork, int *info);
+
+/* SVD */
+extern int F77_FUNC(dgesvd)(char *jobu, char *jobvt, int *m, int *n, double *a, int *lda, double *s, double *u,
+ int *ldu, double *vt, int *ldvt, double *work, int *lwork, int *info);
+
+/* lapack 3.0 routine, faster than dgesvd() */
+extern int F77_FUNC(dgesdd)(char *jobz, int *m, int *n, double *a, int *lda, double *s, double *u, int *ldu, double *vt,
+ int *ldvt, double *work, int *lwork, int *iwork, int *info);
+
+/* Bunch-Kaufman factorization of a real symmetric matrix A, solution of linear systems and matrix inverse */
+extern int F77_FUNC(dsytrf)(char *uplo, int *n, double *a, int *lda, int *ipiv, double *work, int *lwork,
+ int *info); /* blocked ver. */
+extern int F77_FUNC(dsytrs)(char *uplo, int *n, int *nrhs, double *a, int *lda, int *ipiv, double *b, int *ldb,
+ int *info);
+extern int F77_FUNC(dsytri)(char *uplo, int *n, double *a, int *lda, int *ipiv, double *work, int *info);
+
+/*
+ * This function returns the solution of Ax = b
+ *
+ * The function is based on QR decomposition with explicit computation of Q:
+ * If A=Q R with Q orthogonal and R upper triangular, the linear system becomes
+ * Q R x = b or R x = Q^T b.
+ *
+ * A is mxm, b is mx1. Argument iscolmaj specifies whether A is
+ * stored in column or row major order. Note that if iscolmaj==1
+ * this function modifies A!
+ *
+ * The function returns 0 in case of error, 1 if successfull
+ *
+ * This function is often called repetitively to solve problems of identical
+ * dimensions. To avoid repetitive malloc's and free's, allocated memory is
+ * retained between calls and free'd-malloc'ed when not of the appropriate size.
+ * A call with NULL as the first argument forces this memory to be released.
+ */
+int sba_Axb_QR(double *A, double *B, double *x, int m, int iscolmaj) {
+ static double *buf = NULL;
+ static int buf_sz = 0, nb = 0;
+
+ double *a, *r, *tau, *work;
+ int a_sz, r_sz, tau_sz, tot_sz;
+ register int i, j;
+ int info, worksz, nrhs = 1;
+ register double sum;
+
+ if (A == NULL) {
+ if (buf)
+ free(buf);
+ buf = NULL;
+ buf_sz = 0;
+
+ return 1;
+ }
+
+ /* calculate required memory size */
+ a_sz = (iscolmaj) ? 0 : m * m;
+ r_sz = m * m; /* only the upper triangular part really needed */
+ tau_sz = m;
+ if (!nb) {
+#ifndef SBA_LS_SCARCE_MEMORY
+ double tmp;
+
+ worksz = -1; // workspace query; optimal size is returned in tmp
+ F77_FUNC(dgeqrf)((int *)&m, (int *)&m, NULL, (int *)&m, NULL, (double *)&tmp, (int *)&worksz, (int *)&info);
+ nb = ((int)tmp) / m; // optimal worksize is m*nb
+#else
+ nb = 1; // min worksize is m
+#endif /* SBA_LS_SCARCE_MEMORY */
+ }
+ worksz = nb * m;
+ tot_sz = a_sz + r_sz + tau_sz + worksz;
+
+ if (tot_sz > buf_sz) { /* insufficient memory, allocate a "big" memory chunk at once */
+ if (buf)
+ free(buf); /* free previously allocated memory */
+
+ buf_sz = tot_sz;
+ buf = (double *)malloc(buf_sz * sizeof(double));
+ if (!buf) {
+ fprintf(stderr, "memory allocation in sba_Axb_QR() failed!\n");
+ exit(1);
+ }
+ }
+
+ if (!iscolmaj) {
+ a = buf;
+ /* store A (column major!) into a */
+ for (i = 0; i < m; ++i)
+ for (j = 0; j < m; ++j)
+ a[i + j * m] = A[i * m + j];
+ } else
+ a = A; /* no copying required */
+
+ r = buf + a_sz;
+ tau = r + r_sz;
+ work = tau + tau_sz;
+
+ /* QR decomposition of A */
+ F77_FUNC(dgeqrf)((int *)&m, (int *)&m, a, (int *)&m, tau, work, (int *)&worksz, (int *)&info);
+ /* error treatment */
+ if (info != 0) {
+ if (info < 0) {
+ fprintf(stderr, "LAPACK error: illegal value for argument %d of dgeqrf in sba_Axb_QR()\n", -info);
+ exit(1);
+ } else {
+ fprintf(stderr, "Unknown LAPACK error %d for dgeqrf in sba_Axb_QR()\n", info);
+ return 0;
+ }
+ }
+
+ /* R is now stored in the upper triangular part of a; copy it in r so that dorgqr() below won't destroy it */
+ for (i = 0; i < r_sz; ++i)
+ r[i] = a[i];
+
+ /* compute Q using the elementary reflectors computed by the above decomposition */
+ F77_FUNC(dorgqr)((int *)&m, (int *)&m, (int *)&m, a, (int *)&m, tau, work, (int *)&worksz, (int *)&info);
+ if (info != 0) {
+ if (info < 0) {
+ fprintf(stderr, "LAPACK error: illegal value for argument %d of dorgqr in sba_Axb_QR()\n", -info);
+ exit(1);
+ } else {
+ fprintf(stderr, "Unknown LAPACK error (%d) in sba_Axb_QR()\n", info);
+ return 0;
+ }
+ }
+
+ /* Q is now in a; compute Q^T b in x */
+ for (i = 0; i < m; ++i) {
+ for (j = 0, sum = 0.0; j < m; ++j)
+ sum += a[i * m + j] * B[j];
+ x[i] = sum;
+ }
+
+ /* solve the linear system R x = Q^t b */
+ F77_FUNC(dtrtrs)("U", "N", "N", (int *)&m, (int *)&nrhs, r, (int *)&m, x, (int *)&m, &info);
+ /* error treatment */
+ if (info != 0) {
+ if (info < 0) {
+ fprintf(stderr, "LAPACK error: illegal value for argument %d of dtrtrs in sba_Axb_QR()\n", -info);
+ exit(1);
+ } else {
+ fprintf(stderr, "LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in sba_Axb_QR()\n",
+ info);
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+/*
+ * This function returns the solution of Ax = b
+ *
+ * The function is based on QR decomposition without computation of Q:
+ * If A=Q R with Q orthogonal and R upper triangular, the linear system becomes
+ * (A^T A) x = A^T b or (R^T Q^T Q R) x = A^T b or (R^T R) x = A^T b.
+ * This amounts to solving R^T y = A^T b for y and then R x = y for x
+ * Note that Q does not need to be explicitly computed
+ *
+ * A is mxm, b is mx1. Argument iscolmaj specifies whether A is
+ * stored in column or row major order. Note that if iscolmaj==1
+ * this function modifies A!
+ *
+ * The function returns 0 in case of error, 1 if successfull
+ *
+ * This function is often called repetitively to solve problems of identical
+ * dimensions. To avoid repetitive malloc's and free's, allocated memory is
+ * retained between calls and free'd-malloc'ed when not of the appropriate size.
+ * A call with NULL as the first argument forces this memory to be released.
+ */
+int sba_Axb_QRnoQ(double *A, double *B, double *x, int m, int iscolmaj) {
+ static double *buf = NULL;
+ static int buf_sz = 0, nb = 0;
+
+ double *a, *tau, *work;
+ int a_sz, tau_sz, tot_sz;
+ register int i, j;
+ int info, worksz, nrhs = 1;
+ register double sum;
+
+ if (A == NULL) {
+ if (buf)
+ free(buf);
+ buf = NULL;
+ buf_sz = 0;
+
+ return 1;
+ }
+
+ /* calculate required memory size */
+ a_sz = (iscolmaj) ? 0 : m * m;
+ tau_sz = m;
+ if (!nb) {
+#ifndef SBA_LS_SCARCE_MEMORY
+ double tmp;
+
+ worksz = -1; // workspace query; optimal size is returned in tmp
+ F77_FUNC(dgeqrf)((int *)&m, (int *)&m, NULL, (int *)&m, NULL, (double *)&tmp, (int *)&worksz, (int *)&info);
+ nb = ((int)tmp) / m; // optimal worksize is m*nb
+#else
+ nb = 1; // min worksize is m
+#endif /* SBA_LS_SCARCE_MEMORY */
+ }
+ worksz = nb * m;
+ tot_sz = a_sz + tau_sz + worksz;
+
+ if (tot_sz > buf_sz) { /* insufficient memory, allocate a "big" memory chunk at once */
+ if (buf)
+ free(buf); /* free previously allocated memory */
+
+ buf_sz = tot_sz;
+ buf = (double *)malloc(buf_sz * sizeof(double));
+ if (!buf) {
+ fprintf(stderr, "memory allocation in sba_Axb_QRnoQ() failed!\n");
+ exit(1);
+ }
+ }
+
+ if (!iscolmaj) {
+ a = buf;
+ /* store A (column major!) into a */
+ for (i = 0; i < m; ++i)
+ for (j = 0; j < m; ++j)
+ a[i + j * m] = A[i * m + j];
+ } else
+ a = A; /* no copying required */
+
+ tau = buf + a_sz;
+ work = tau + tau_sz;
+
+ /* compute A^T b in x */
+ for (i = 0; i < m; ++i) {
+ for (j = 0, sum = 0.0; j < m; ++j)
+ sum += a[i * m + j] * B[j];
+ x[i] = sum;
+ }
+
+ /* QR decomposition of A */
+ F77_FUNC(dgeqrf)((int *)&m, (int *)&m, a, (int *)&m, tau, work, (int *)&worksz, (int *)&info);
+ /* error treatment */
+ if (info != 0) {
+ if (info < 0) {
+ fprintf(stderr, "LAPACK error: illegal value for argument %d of dgeqrf in sba_Axb_QRnoQ()\n", -info);
+ exit(1);
+ } else {
+ fprintf(stderr, "Unknown LAPACK error %d for dgeqrf in sba_Axb_QRnoQ()\n", info);
+ return 0;
+ }
+ }
+
+ /* R is stored in the upper triangular part of a */
+
+ /* solve the linear system R^T y = A^t b */
+ F77_FUNC(dtrtrs)("U", "T", "N", (int *)&m, (int *)&nrhs, a, (int *)&m, x, (int *)&m, &info);
+ /* error treatment */
+ if (info != 0) {
+ if (info < 0) {
+ fprintf(stderr, "LAPACK error: illegal value for argument %d of dtrtrs in sba_Axb_QRnoQ()\n", -info);
+ exit(1);
+ } else {
+ fprintf(stderr,
+ "LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in sba_Axb_QRnoQ()\n",
+ info);
+ return 0;
+ }
+ }
+
+ /* solve the linear system R x = y */
+ F77_FUNC(dtrtrs)("U", "N", "N", (int *)&m, (int *)&nrhs, a, (int *)&m, x, (int *)&m, &info);
+ /* error treatment */
+ if (info != 0) {
+ if (info < 0) {
+ fprintf(stderr, "LAPACK error: illegal value for argument %d of dtrtrs in sba_Axb_QRnoQ()\n", -info);
+ exit(1);
+ } else {
+ fprintf(stderr,
+ "LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in sba_Axb_QRnoQ()\n",
+ info);
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+/*
+ * This function returns the solution of Ax=b
+ *
+ * The function assumes that A is symmetric & positive definite and employs
+ * the Cholesky decomposition:
+ * If A=U^T U with U upper triangular, the system to be solved becomes
+ * (U^T U) x = b
+ * This amounts to solving U^T y = b for y and then U x = y for x
+ *
+ * A is mxm, b is mx1. Argument iscolmaj specifies whether A is
+ * stored in column or row major order. Note that if iscolmaj==1
+ * this function modifies A!
+ *
+ * The function returns 0 in case of error, 1 if successfull
+ *
+ * This function is often called repetitively to solve problems of identical
+ * dimensions. To avoid repetitive malloc's and free's, allocated memory is
+ * retained between calls and free'd-malloc'ed when not of the appropriate size.
+ * A call with NULL as the first argument forces this memory to be released.
+ */
+int sba_Axb_Chol(double *A, double *B, double *x, int m, int iscolmaj) {
+ static double *buf = NULL;
+ static int buf_sz = 0;
+
+ double *a;
+ int a_sz, tot_sz;
+ register int i, j;
+ int info, nrhs = 1;
+
+ if (A == NULL) {
+ if (buf)
+ free(buf);
+ buf = NULL;
+ buf_sz = 0;
+
+ return 1;
+ }
+
+ /* calculate required memory size */
+ a_sz = (iscolmaj) ? 0 : m * m;
+ tot_sz = a_sz;
+
+ if (tot_sz > buf_sz) { /* insufficient memory, allocate a "big" memory chunk at once */
+ if (buf)
+ free(buf); /* free previously allocated memory */
+
+ buf_sz = tot_sz;
+ buf = (double *)malloc(buf_sz * sizeof(double));
+ if (!buf) {
+ fprintf(stderr, "memory allocation in sba_Axb_Chol() failed!\n");
+ exit(1);
+ }
+ }
+
+ if (!iscolmaj) {
+ a = buf;
+
+ /* store A into a and B into x; A is assumed to be symmetric, hence
+ * the column and row major order representations are the same
+ */
+ for (i = 0; i < m; ++i) {
+ a[i] = A[i];
+ x[i] = B[i];
+ }
+ for (j = m * m; i < j; ++i) // copy remaining rows; note that i is not re-initialized
+ a[i] = A[i];
+ } else { /* no copying is necessary for A */
+ a = A;
+ for (i = 0; i < m; ++i)
+ x[i] = B[i];
+ }
+
+ /* Cholesky decomposition of A */
+ // F77_FUNC(dpotf2)("U", (int *)&m, a, (int *)&m, (int *)&info);
+ F77_FUNC(dpotrf)("U", (int *)&m, a, (int *)&m, (int *)&info);
+ /* error treatment */
+ if (info != 0) {
+ if (info < 0) {
+ fprintf(stderr, "LAPACK error: illegal value for argument %d of dpotf2/dpotrf in sba_Axb_Chol()\n", -info);
+ exit(1);
+ } else {
+ fprintf(stderr, "LAPACK error: the leading minor of order %d is not positive definite,\nthe factorization "
+ "could not be completed for dpotf2/dpotrf in sba_Axb_Chol()\n",
+ info);
+ return 0;
+ }
+ }
+
+/* below are two alternative ways for solving the linear system: */
+#if 1
+ /* use the computed Cholesky in one lapack call */
+ F77_FUNC(dpotrs)("U", (int *)&m, (int *)&nrhs, a, (int *)&m, x, (int *)&m, &info);
+ if (info < 0) {
+ fprintf(stderr, "LAPACK error: illegal value for argument %d of dpotrs in sba_Axb_Chol()\n", -info);
+ exit(1);
+ }
+#else
+ /* solve the linear systems U^T y = b, U x = y */
+ F77_FUNC(dtrtrs)("U", "T", "N", (int *)&m, (int *)&nrhs, a, (int *)&m, x, (int *)&m, &info);
+ /* error treatment */
+ if (info != 0) {
+ if (info < 0) {
+ fprintf(stderr, "LAPACK error: illegal value for argument %d of dtrtrs in sba_Axb_Chol()\n", -info);
+ exit(1);
+ } else {
+ fprintf(stderr,
+ "LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in sba_Axb_Chol()\n",
+ info);
+ return 0;
+ }
+ }
+
+ /* solve U x = y */
+ F77_FUNC(dtrtrs)("U", "N", "N", (int *)&m, (int *)&nrhs, a, (int *)&m, x, (int *)&m, &info);
+ /* error treatment */
+ if (info != 0) {
+ if (info < 0) {
+ fprintf(stderr, "LAPACK error: illegal value for argument %d of dtrtrs in sba_Axb_Chol()\n", -info);
+ exit(1);
+ } else {
+ fprintf(stderr,
+ "LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in sba_Axb_Chol()\n",
+ info);
+ return 0;
+ }
+ }
+#endif /* 1 */
+
+ return 1;
+}
+
+/*
+ * This function returns the solution of Ax = b
+ *
+ * The function employs LU decomposition:
+ * If A=L U with L lower and U upper triangular, then the original system
+ * amounts to solving
+ * L y = b, U x = y
+ *
+ * A is mxm, b is mx1. Argument iscolmaj specifies whether A is
+ * stored in column or row major order. Note that if iscolmaj==1
+ * this function modifies A!
+ *
+ * The function returns 0 in case of error,
+ * 1 if successfull
+ *
+ * This function is often called repetitively to solve problems of identical
+ * dimensions. To avoid repetitive malloc's and free's, allocated memory is
+ * retained between calls and free'd-malloc'ed when not of the appropriate size.
+ * A call with NULL as the first argument forces this memory to be released.
+ */
+int sba_Axb_LU(double *A, double *B, double *x, int m, int iscolmaj) {
+ static double *buf = NULL;
+ static int buf_sz = 0;
+
+ int a_sz, ipiv_sz, tot_sz;
+ register int i, j;
+ int info, *ipiv, nrhs = 1;
+ double *a;
+
+ if (A == NULL) {
+ if (buf)
+ free(buf);
+ buf = NULL;
+ buf_sz = 0;
+
+ return 1;
+ }
+
+ /* calculate required memory size */
+ ipiv_sz = m;
+ a_sz = (iscolmaj) ? 0 : m * m;
+ tot_sz = a_sz * sizeof(double) +
+ ipiv_sz * sizeof(int); /* should be arranged in that order for proper doubles alignment */
+
+ if (tot_sz > buf_sz) { /* insufficient memory, allocate a "big" memory chunk at once */
+ if (buf)
+ free(buf); /* free previously allocated memory */
+
+ buf_sz = tot_sz;
+ buf = (double *)malloc(buf_sz);
+ if (!buf) {
+ fprintf(stderr, "memory allocation in sba_Axb_LU() failed!\n");
+ exit(1);
+ }
+ }
+
+ if (!iscolmaj) {
+ a = buf;
+ ipiv = (int *)(a + a_sz);
+
+ /* store A (column major!) into a and B into x */
+ for (i = 0; i < m; ++i) {
+ for (j = 0; j < m; ++j)
+ a[i + j * m] = A[i * m + j];
+
+ x[i] = B[i];
+ }
+ } else { /* no copying is necessary for A */
+ a = A;
+ for (i = 0; i < m; ++i)
+ x[i] = B[i];
+ ipiv = (int *)buf;
+ }
+
+ /* LU decomposition for A */
+ F77_FUNC(dgetrf)((int *)&m, (int *)&m, a, (int *)&m, ipiv, (int *)&info);
+ if (info != 0) {
+ if (info < 0) {
+ fprintf(stderr, "argument %d of dgetrf illegal in sba_Axb_LU()\n", -info);
+ exit(1);
+ } else {
+ fprintf(stderr, "singular matrix A for dgetrf in sba_Axb_LU()\n");
+ return 0;
+ }
+ }
+
+ /* solve the system with the computed LU */
+ F77_FUNC(dgetrs)("N", (int *)&m, (int *)&nrhs, a, (int *)&m, ipiv, x, (int *)&m, (int *)&info);
+ if (info != 0) {
+ if (info < 0) {
+ fprintf(stderr, "argument %d of dgetrs illegal in sba_Axb_LU()\n", -info);
+ exit(1);
+ } else {
+ fprintf(stderr, "unknown error for dgetrs in sba_Axb_LU()\n");
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+/*
+ * This function returns the solution of Ax = b
+ *
+ * The function is based on SVD decomposition:
+ * If A=U D V^T with U, V orthogonal and D diagonal, the linear system becomes
+ * (U D V^T) x = b or x=V D^{-1} U^T b
+ * Note that V D^{-1} U^T is the pseudoinverse A^+
+ *
+ * A is mxm, b is mx1. Argument iscolmaj specifies whether A is
+ * stored in column or row major order. Note that if iscolmaj==1
+ * this function modifies A!
+ *
+ * The function returns 0 in case of error, 1 if successfull
+ *
+ * This function is often called repetitively to solve problems of identical
+ * dimensions. To avoid repetitive malloc's and free's, allocated memory is
+ * retained between calls and free'd-malloc'ed when not of the appropriate size.
+ * A call with NULL as the first argument forces this memory to be released.
+ */
+int sba_Axb_SVD(double *A, double *B, double *x, int m, int iscolmaj) {
+ static double *buf = NULL;
+ static int buf_sz = 0;
+ static double eps = -1.0;
+
+ register int i, j;
+ double *a, *u, *s, *vt, *work;
+ int a_sz, u_sz, s_sz, vt_sz, tot_sz;
+ double thresh, one_over_denom;
+ register double sum;
+ int info, rank, worksz, *iwork, iworksz;
+
+ if (A == NULL) {
+ if (buf)
+ free(buf);
+ buf = NULL;
+ buf_sz = 0;
+
+ return 1;
+ }
+
+/* calculate required memory size */
+#ifndef SBA_LS_SCARCE_MEMORY
+ worksz = -1; // workspace query. Keep in mind that dgesdd requires more memory than dgesvd
+ /* note that optimal work size is returned in thresh */
+ F77_FUNC(dgesdd)
+ ("A", (int *)&m, (int *)&m, NULL, (int *)&m, NULL, NULL, (int *)&m, NULL, (int *)&m, (double *)&thresh,
+ (int *)&worksz, NULL, &info);
+ /* F77_FUNC(dgesvd)("A", "A", (int *)&m, (int *)&m, NULL, (int *)&m, NULL, NULL, (int *)&m, NULL, (int *)&m,
+ (double *)&thresh, (int *)&worksz, &info); */
+ worksz = (int)thresh;
+#else
+ worksz = m * (7 * m + 4); // min worksize for dgesdd
+ // worksz=5*m; // min worksize for dgesvd
+#endif /* SBA_LS_SCARCE_MEMORY */
+ iworksz = 8 * m;
+ a_sz = (!iscolmaj) ? m * m : 0;
+ u_sz = m * m;
+ s_sz = m;
+ vt_sz = m * m;
+
+ tot_sz = (a_sz + u_sz + s_sz + vt_sz + worksz) * sizeof(double) +
+ iworksz * sizeof(int); /* should be arranged in that order for proper doubles alignment */
+
+ if (tot_sz > buf_sz) { /* insufficient memory, allocate a "big" memory chunk at once */
+ if (buf)
+ free(buf); /* free previously allocated memory */
+
+ buf_sz = tot_sz;
+ buf = (double *)malloc(buf_sz);
+ if (!buf) {
+ fprintf(stderr, "memory allocation in sba_Axb_SVD() failed!\n");
+ exit(1);
+ }
+ }
+
+ if (!iscolmaj) {
+ a = buf;
+ u = a + a_sz;
+ /* store A (column major!) into a */
+ for (i = 0; i < m; ++i)
+ for (j = 0; j < m; ++j)
+ a[i + j * m] = A[i * m + j];
+ } else {
+ a = A; /* no copying required */
+ u = buf;
+ }
+
+ s = u + u_sz;
+ vt = s + s_sz;
+ work = vt + vt_sz;
+ iwork = (int *)(work + worksz);
+
+ /* SVD decomposition of A */
+ F77_FUNC(dgesdd)
+ ("A", (int *)&m, (int *)&m, a, (int *)&m, s, u, (int *)&m, vt, (int *)&m, work, (int *)&worksz, iwork, &info);
+ // F77_FUNC(dgesvd)("A", "A", (int *)&m, (int *)&m, a, (int *)&m, s, u, (int *)&m, vt, (int *)&m, work, (int
+ // *)&worksz, &info);
+
+ /* error treatment */
+ if (info != 0) {
+ if (info < 0) {
+ fprintf(stderr, "LAPACK error: illegal value for argument %d of dgesdd/dgesvd in sba_Axb_SVD()\n", -info);
+ exit(1);
+ } else {
+ fprintf(stderr,
+ "LAPACK error: dgesdd (dbdsdc)/dgesvd (dbdsqr) failed to converge in sba_Axb_SVD() [info=%d]\n",
+ info);
+
+ return 0;
+ }
+ }
+
+ if (eps < 0.0) {
+ double aux;
+
+ /* compute machine epsilon. DBL_EPSILON should do also */
+ for (eps = 1.0; aux = eps + 1.0, aux - 1.0 > 0.0; eps *= 0.5)
+ ;
+ eps *= 2.0;
+ }
+
+ /* compute the pseudoinverse in a */
+ memset(a, 0, m * m * sizeof(double)); /* initialize to zero */
+ for (rank = 0, thresh = eps * s[0]; rank < m && s[rank] > thresh; ++rank) {
+ one_over_denom = 1.0 / s[rank];
+
+ for (j = 0; j < m; ++j)
+ for (i = 0; i < m; ++i)
+ a[i * m + j] += vt[rank + i * m] * u[j + rank * m] * one_over_denom;
+ }
+
+ /* compute A^+ b in x */
+ for (i = 0; i < m; ++i) {
+ for (j = 0, sum = 0.0; j < m; ++j)
+ sum += a[i * m + j] * B[j];
+ x[i] = sum;
+ }
+
+ return 1;
+}
+
+/*
+ * This function returns the solution of Ax = b for a real symmetric matrix A
+ *
+ * The function is based on UDUT factorization with the pivoting
+ * strategy of Bunch and Kaufman:
+ * A is factored as U*D*U^T where U is upper triangular and
+ * D symmetric and block diagonal (aka spectral decomposition,
+ * Banachiewicz factorization, modified Cholesky factorization)
+ *
+ * A is mxm, b is mx1. Argument iscolmaj specifies whether A is
+ * stored in column or row major order. Note that if iscolmaj==1
+ * this function modifies A!
+ *
+ * The function returns 0 in case of error,
+ * 1 if successfull
+ *
+ * This function is often called repetitively to solve problems of identical
+ * dimensions. To avoid repetitive malloc's and free's, allocated memory is
+ * retained between calls and free'd-malloc'ed when not of the appropriate size.
+ * A call with NULL as the first argument forces this memory to be released.
+ */
+int sba_Axb_BK(double *A, double *B, double *x, int m, int iscolmaj) {
+ static double *buf = NULL;
+ static int buf_sz = 0, nb = 0;
+
+ int a_sz, ipiv_sz, work_sz, tot_sz;
+ register int i, j;
+ int info, *ipiv, nrhs = 1;
+ double *a, *work;
+
+ if (A == NULL) {
+ if (buf)
+ free(buf);
+ buf = NULL;
+ buf_sz = 0;
+
+ return 1;
+ }
+
+ /* calculate required memory size */
+ ipiv_sz = m;
+ a_sz = (iscolmaj) ? 0 : m * m;
+ if (!nb) {
+#ifndef SBA_LS_SCARCE_MEMORY
+ double tmp;
+
+ work_sz = -1; // workspace query; optimal size is returned in tmp
+ F77_FUNC(dsytrf)("U", (int *)&m, NULL, (int *)&m, NULL, (double *)&tmp, (int *)&work_sz, (int *)&info);
+ nb = ((int)tmp) / m; // optimal worksize is m*nb
+#else
+ nb = -1; // min worksize is 1
+#endif /* SBA_LS_SCARCE_MEMORY */
+ }
+ work_sz = (nb != -1) ? nb * m : 1;
+ tot_sz = (a_sz + work_sz) * sizeof(double) +
+ ipiv_sz * sizeof(int); /* should be arranged in that order for proper doubles alignment */
+
+ if (tot_sz > buf_sz) { /* insufficient memory, allocate a "big" memory chunk at once */
+ if (buf)
+ free(buf); /* free previously allocated memory */
+
+ buf_sz = tot_sz;
+ buf = (double *)malloc(buf_sz);
+ if (!buf) {
+ fprintf(stderr, "memory allocation in sba_Axb_BK() failed!\n");
+ exit(1);
+ }
+ }
+
+ if (!iscolmaj) {
+ a = buf;
+ work = a + a_sz;
+
+ /* store A into a and B into x; A is assumed to be symmetric, hence
+ * the column and row major order representations are the same
+ */
+ for (i = 0; i < m; ++i) {
+ a[i] = A[i];
+ x[i] = B[i];
+ }
+ for (j = m * m; i < j; ++i) // copy remaining rows; note that i is not re-initialized
+ a[i] = A[i];
+ } else { /* no copying is necessary for A */
+ a = A;
+ for (i = 0; i < m; ++i)
+ x[i] = B[i];
+ work = buf;
+ }
+ ipiv = (int *)(work + work_sz);
+
+ /* factorization for A */
+ F77_FUNC(dsytrf)("U", (int *)&m, a, (int *)&m, ipiv, work, (int *)&work_sz, (int *)&info);
+ if (info != 0) {
+ if (info < 0) {
+ fprintf(stderr, "argument %d of dsytrf illegal in sba_Axb_BK()\n", -info);
+ exit(1);
+ } else {
+ fprintf(stderr, "singular block diagonal matrix D for dsytrf in sba_Axb_BK() [D(%d, %d) is zero]\n", info,
+ info);
+ return 0;
+ }
+ }
+
+ /* solve the system with the computed factorization */
+ F77_FUNC(dsytrs)("U", (int *)&m, (int *)&nrhs, a, (int *)&m, ipiv, x, (int *)&m, (int *)&info);
+ if (info != 0) {
+ if (info < 0) {
+ fprintf(stderr, "argument %d of dsytrs illegal in sba_Axb_BK()\n", -info);
+ exit(1);
+ } else {
+ fprintf(stderr, "unknown error for dsytrs in sba_Axb_BK()\n");
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+/*
+ * This function computes the inverse of a square matrix whose upper triangle
+ * is stored in A into its lower triangle using LU decomposition
+ *
+ * The function returns 0 in case of error (e.g. A is singular),
+ * 1 if successfull
+ *
+ * This function is often called repetitively to solve problems of identical
+ * dimensions. To avoid repetitive malloc's and free's, allocated memory is
+ * retained between calls and free'd-malloc'ed when not of the appropriate size.
+ * A call with NULL as the first argument forces this memory to be released.
+ */
+int sba_symat_invert_LU(double *A, int m) {
+ static double *buf = NULL;
+ static int buf_sz = 0, nb = 0;
+
+ int a_sz, ipiv_sz, work_sz, tot_sz;
+ register int i, j;
+ int info, *ipiv;
+ double *a, *work;
+
+ if (A == NULL) {
+ if (buf)
+ free(buf);
+ buf = NULL;
+ buf_sz = 0;
+
+ return 1;
+ }
+
+ /* calculate required memory size */
+ ipiv_sz = m;
+ a_sz = m * m;
+ if (!nb) {
+#ifndef SBA_LS_SCARCE_MEMORY
+ double tmp;
+
+ work_sz = -1; // workspace query; optimal size is returned in tmp
+ F77_FUNC(dgetri)((int *)&m, NULL, (int *)&m, NULL, (double *)&tmp, (int *)&work_sz, (int *)&info);
+ nb = ((int)tmp) / m; // optimal worksize is m*nb
+#else
+ nb = 1; // min worksize is m
+#endif /* SBA_LS_SCARCE_MEMORY */
+ }
+ work_sz = nb * m;
+ tot_sz = (a_sz + work_sz) * sizeof(double) +
+ ipiv_sz * sizeof(int); /* should be arranged in that order for proper doubles alignment */
+
+ if (tot_sz > buf_sz) { /* insufficient memory, allocate a "big" memory chunk at once */
+ if (buf)
+ free(buf); /* free previously allocated memory */
+
+ buf_sz = tot_sz;
+ buf = (double *)malloc(buf_sz);
+ if (!buf) {
+ fprintf(stderr, "memory allocation in sba_symat_invert_LU() failed!\n");
+ exit(1);
+ }
+ }
+
+ a = buf;
+ work = a + a_sz;
+ ipiv = (int *)(work + work_sz);
+
+ /* store A (column major!) into a */
+ for (i = 0; i < m; ++i)
+ for (j = i; j < m; ++j)
+ a[i + j * m] = a[j + i * m] = A[i * m + j]; // copy A's upper part to a's upper & lower
+
+ /* LU decomposition for A */
+ F77_FUNC(dgetrf)((int *)&m, (int *)&m, a, (int *)&m, ipiv, (int *)&info);
+ if (info != 0) {
+ if (info < 0) {
+ fprintf(stderr, "argument %d of dgetrf illegal in sba_symat_invert_LU()\n", -info);
+ exit(1);
+ } else {
+ fprintf(stderr, "singular matrix A for dgetrf in sba_symat_invert_LU()\n");
+ return 0;
+ }
+ }
+
+ /* (A)^{-1} from LU */
+ F77_FUNC(dgetri)((int *)&m, a, (int *)&m, ipiv, work, (int *)&work_sz, (int *)&info);
+ if (info != 0) {
+ if (info < 0) {
+ fprintf(stderr, "argument %d of dgetri illegal in sba_symat_invert_LU()\n", -info);
+ exit(1);
+ } else {
+ fprintf(stderr, "singular matrix A for dgetri in sba_symat_invert_LU()\n");
+ return 0;
+ }
+ }
+
+ /* store (A)^{-1} in A's lower triangle */
+ for (i = 0; i < m; ++i)
+ for (j = 0; j <= i; ++j)
+ A[i * m + j] = a[i + j * m];
+
+ return 1;
+}
+
+/*
+ * This function computes the inverse of a square symmetric positive definite
+ * matrix whose upper triangle is stored in A into its lower triangle using
+ * Cholesky factorization
+ *
+ * The function returns 0 in case of error (e.g. A is not positive definite or singular),
+ * 1 if successfull
+ *
+ * This function is often called repetitively to solve problems of identical
+ * dimensions. To avoid repetitive malloc's and free's, allocated memory is
+ * retained between calls and free'd-malloc'ed when not of the appropriate size.
+ * A call with NULL as the first argument forces this memory to be released.
+ */
+int sba_symat_invert_Chol(double *A, int m) {
+ static double *buf = NULL;
+ static int buf_sz = 0;
+
+ int a_sz, tot_sz;
+ register int i, j;
+ int info;
+ double *a;
+
+ if (A == NULL) {
+ if (buf)
+ free(buf);
+ buf = NULL;
+ buf_sz = 0;
+
+ return 1;
+ }
+
+ /* calculate required memory size */
+ a_sz = m * m;
+ tot_sz = a_sz;
+
+ if (tot_sz > buf_sz) { /* insufficient memory, allocate a "big" memory chunk at once */
+ if (buf)
+ free(buf); /* free previously allocated memory */
+
+ buf_sz = tot_sz;
+ buf = (double *)malloc(buf_sz * sizeof(double));
+ if (!buf) {
+ fprintf(stderr, "memory allocation in sba_symat_invert_Chol() failed!\n");
+ exit(1);
+ }
+ }
+
+ a = (double *)buf;
+
+ /* store A into a; A is assumed symmetric, hence no transposition is needed */
+ for (i = 0, j = a_sz; i < j; ++i)
+ a[i] = A[i];
+
+ /* Cholesky factorization for A; a's lower part corresponds to A's upper */
+ // F77_FUNC(dpotrf)("L", (int *)&m, a, (int *)&m, (int *)&info);
+ F77_FUNC(dpotf2)("L", (int *)&m, a, (int *)&m, (int *)&info);
+ /* error treatment */
+ if (info != 0) {
+ if (info < 0) {
+ fprintf(stderr, "LAPACK error: illegal value for argument %d of dpotrf in sba_symat_invert_Chol()\n",
+ -info);
+ exit(1);
+ } else {
+ fprintf(stderr, "LAPACK error: the leading minor of order %d is not positive definite,\nthe factorization "
+ "could not be completed for dpotrf in sba_symat_invert_Chol()\n",
+ info);
+ return 0;
+ }
+ }
+
+ /* (A)^{-1} from Cholesky */
+ F77_FUNC(dpotri)("L", (int *)&m, a, (int *)&m, (int *)&info);
+ if (info != 0) {
+ if (info < 0) {
+ fprintf(stderr, "argument %d of dpotri illegal in sba_symat_invert_Chol()\n", -info);
+ exit(1);
+ } else {
+ fprintf(stderr, "the (%d, %d) element of the factor U or L is zero, singular matrix A for dpotri in "
+ "sba_symat_invert_Chol()\n",
+ info, info);
+ return 0;
+ }
+ }
+
+ /* store (A)^{-1} in A's lower triangle. The lower triangle of the symmetric A^{-1} is in the lower triangle of a */
+ for (i = 0; i < m; ++i)
+ for (j = 0; j <= i; ++j)
+ A[i * m + j] = a[i + j * m];
+
+ return 1;
+}
+
+/*
+ * This function computes the inverse of a symmetric indefinite
+ * matrix whose upper triangle is stored in A into its lower triangle
+ * using LDLT factorization with the pivoting strategy of Bunch and Kaufman
+ *
+ * The function returns 0 in case of error (e.g. A is singular),
+ * 1 if successfull
+ *
+ * This function is often called repetitively to solve problems of identical
+ * dimensions. To avoid repetitive malloc's and free's, allocated memory is
+ * retained between calls and free'd-malloc'ed when not of the appropriate size.
+ * A call with NULL as the first argument forces this memory to be released.
+ */
+int sba_symat_invert_BK(double *A, int m) {
+ static double *buf = NULL;
+ static int buf_sz = 0, nb = 0;
+
+ int a_sz, ipiv_sz, work_sz, tot_sz;
+ register int i, j;
+ int info, *ipiv;
+ double *a, *work;
+
+ if (A == NULL) {
+ if (buf)
+ free(buf);
+ buf = NULL;
+ buf_sz = 0;
+
+ return 1;
+ }
+
+ /* calculate required memory size */
+ ipiv_sz = m;
+ a_sz = m * m;
+ if (!nb) {
+#ifndef SBA_LS_SCARCE_MEMORY
+ double tmp;
+
+ work_sz = -1; // workspace query; optimal size is returned in tmp
+ F77_FUNC(dsytrf)("L", (int *)&m, NULL, (int *)&m, NULL, (double *)&tmp, (int *)&work_sz, (int *)&info);
+ nb = ((int)tmp) / m; // optimal worksize is m*nb
+#else
+ nb = -1; // min worksize is 1
+#endif /* SBA_LS_SCARCE_MEMORY */
+ }
+ work_sz = (nb != -1) ? nb * m : 1;
+ work_sz = (work_sz >= m) ? work_sz : m; /* ensure that work is at least m elements long, as required by dsytri */
+ tot_sz = (a_sz + work_sz) * sizeof(double) +
+ ipiv_sz * sizeof(int); /* should be arranged in that order for proper doubles alignment */
+
+ if (tot_sz > buf_sz) { /* insufficient memory, allocate a "big" memory chunk at once */
+ if (buf)
+ free(buf); /* free previously allocated memory */
+
+ buf_sz = tot_sz;
+ buf = (double *)malloc(buf_sz);
+ if (!buf) {
+ fprintf(stderr, "memory allocation in sba_symat_invert_BK() failed!\n");
+ exit(1);
+ }
+ }
+
+ a = buf;
+ work = a + a_sz;
+ ipiv = (int *)(work + work_sz);
+
+ /* store A into a; A is assumed symmetric, hence no transposition is needed */
+ for (i = 0, j = a_sz; i < j; ++i)
+ a[i] = A[i];
+
+ /* LDLT factorization for A; a's lower part corresponds to A's upper */
+ F77_FUNC(dsytrf)("L", (int *)&m, a, (int *)&m, ipiv, work, (int *)&work_sz, (int *)&info);
+ if (info != 0) {
+ if (info < 0) {
+ fprintf(stderr, "argument %d of dsytrf illegal in sba_symat_invert_BK()\n", -info);
+ exit(1);
+ } else {
+ fprintf(stderr,
+ "singular block diagonal matrix D for dsytrf in sba_symat_invert_BK() [D(%d, %d) is zero]\n", info,
+ info);
+ return 0;
+ }
+ }
+
+ /* (A)^{-1} from LDLT */
+ F77_FUNC(dsytri)("L", (int *)&m, a, (int *)&m, ipiv, work, (int *)&info);
+ if (info != 0) {
+ if (info < 0) {
+ fprintf(stderr, "argument %d of dsytri illegal in sba_symat_invert_BK()\n", -info);
+ exit(1);
+ } else {
+ fprintf(stderr,
+ "D(%d, %d)=0, matrix is singular and its inverse could not be computed in sba_symat_invert_BK()\n",
+ info, info);
+ return 0;
+ }
+ }
+
+ /* store (A)^{-1} in A's lower triangle. The lower triangle of the symmetric A^{-1} is in the lower triangle of a */
+ for (i = 0; i < m; ++i)
+ for (j = 0; j <= i; ++j)
+ A[i * m + j] = a[i + j * m];
+
+ return 1;
+}
+
+#define __CG_LINALG_BLOCKSIZE 8
+
+/* Dot product of two vectors x and y using loop unrolling and blocking.
+ * see http://www.abarnett.demon.co.uk/tutorial.html
+ */
+
+inline static double dprod(const int n, const double *const x, const double *const y) {
+ register int i, j1, j2, j3, j4, j5, j6, j7;
+ int blockn;
+ register double sum0 = 0.0, sum1 = 0.0, sum2 = 0.0, sum3 = 0.0, sum4 = 0.0, sum5 = 0.0, sum6 = 0.0, sum7 = 0.0;
+
+ /* n may not be divisible by __CG_LINALG_BLOCKSIZE,
+ * go as near as we can first, then tidy up.
+ */
+ blockn = (n / __CG_LINALG_BLOCKSIZE) * __CG_LINALG_BLOCKSIZE;
+
+ /* unroll the loop in blocks of `__CG_LINALG_BLOCKSIZE' */
+ for (i = 0; i < blockn; i += __CG_LINALG_BLOCKSIZE) {
+ sum0 += x[i] * y[i];
+ j1 = i + 1;
+ sum1 += x[j1] * y[j1];
+ j2 = i + 2;
+ sum2 += x[j2] * y[j2];
+ j3 = i + 3;
+ sum3 += x[j3] * y[j3];
+ j4 = i + 4;
+ sum4 += x[j4] * y[j4];
+ j5 = i + 5;
+ sum5 += x[j5] * y[j5];
+ j6 = i + 6;
+ sum6 += x[j6] * y[j6];
+ j7 = i + 7;
+ sum7 += x[j7] * y[j7];
+ }
+
+ /*
+ * There may be some left to do.
+ * This could be done as a simple for() loop,
+ * but a switch is faster (and more interesting)
+ */
+
+ if (i < n) {
+ /* Jump into the case at the place that will allow
+ * us to finish off the appropriate number of items.
+ */
+
+ switch (n - i) {
+ case 7:
+ sum0 += x[i] * y[i];
+ ++i;
+ case 6:
+ sum1 += x[i] * y[i];
+ ++i;
+ case 5:
+ sum2 += x[i] * y[i];
+ ++i;
+ case 4:
+ sum3 += x[i] * y[i];
+ ++i;
+ case 3:
+ sum4 += x[i] * y[i];
+ ++i;
+ case 2:
+ sum5 += x[i] * y[i];
+ ++i;
+ case 1:
+ sum6 += x[i] * y[i];
+ ++i;
+ }
+ }
+
+ return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7;
+}
+
+/* Compute z=x+a*y for two vectors x and y and a scalar a; z can be one of x, y.
+ * Similarly to the dot product routine, this one uses loop unrolling and blocking
+ */
+
+inline static void daxpy(const int n, double *const z, const double *const x, const double a, const double *const y) {
+ register int i, j1, j2, j3, j4, j5, j6, j7;
+ int blockn;
+
+ /* n may not be divisible by __CG_LINALG_BLOCKSIZE,
+ * go as near as we can first, then tidy up.
+ */
+ blockn = (n / __CG_LINALG_BLOCKSIZE) * __CG_LINALG_BLOCKSIZE;
+
+ /* unroll the loop in blocks of `__CG_LINALG_BLOCKSIZE' */
+ for (i = 0; i < blockn; i += __CG_LINALG_BLOCKSIZE) {
+ z[i] = x[i] + a * y[i];
+ j1 = i + 1;
+ z[j1] = x[j1] + a * y[j1];
+ j2 = i + 2;
+ z[j2] = x[j2] + a * y[j2];
+ j3 = i + 3;
+ z[j3] = x[j3] + a * y[j3];
+ j4 = i + 4;
+ z[j4] = x[j4] + a * y[j4];
+ j5 = i + 5;
+ z[j5] = x[j5] + a * y[j5];
+ j6 = i + 6;
+ z[j6] = x[j6] + a * y[j6];
+ j7 = i + 7;
+ z[j7] = x[j7] + a * y[j7];
+ }
+
+ /*
+ * There may be some left to do.
+ * This could be done as a simple for() loop,
+ * but a switch is faster (and more interesting)
+ */
+
+ if (i < n) {
+ /* Jump into the case at the place that will allow
+ * us to finish off the appropriate number of items.
+ */
+
+ switch (n - i) {
+ case 7:
+ z[i] = x[i] + a * y[i];
+ ++i;
+ case 6:
+ z[i] = x[i] + a * y[i];
+ ++i;
+ case 5:
+ z[i] = x[i] + a * y[i];
+ ++i;
+ case 4:
+ z[i] = x[i] + a * y[i];
+ ++i;
+ case 3:
+ z[i] = x[i] + a * y[i];
+ ++i;
+ case 2:
+ z[i] = x[i] + a * y[i];
+ ++i;
+ case 1:
+ z[i] = x[i] + a * y[i];
+ ++i;
+ }
+ }
+}
+
+/*
+ * This function returns the solution of Ax = b where A is posititive definite,
+ * based on the conjugate gradients method; see "An intro to the CG method" by J.R. Shewchuk, p. 50-51
+ *
+ * A is mxm, b, x are is mx1. Argument niter specifies the maximum number of
+ * iterations and eps is the desired solution accuracy. niter<0 signals that
+ * x contains a valid initial approximation to the solution; if niter>0 then
+ * the starting point is taken to be zero. Argument prec selects the desired
+ * preconditioning method as follows:
+ * 0: no preconditioning
+ * 1: jacobi (diagonal) preconditioning
+ * 2: SSOR preconditioning
+ * Argument iscolmaj specifies whether A is stored in column or row major order.
+ *
+ * The function returns 0 in case of error,
+ * the number of iterations performed if successfull
+ *
+ * This function is often called repetitively to solve problems of identical
+ * dimensions. To avoid repetitive malloc's and free's, allocated memory is
+ * retained between calls and free'd-malloc'ed when not of the appropriate size.
+ * A call with NULL as the first argument forces this memory to be released.
+ */
+int sba_Axb_CG(double *A, double *B, double *x, int m, int niter, double eps, int prec, int iscolmaj) {
+ static double *buf = NULL;
+ static int buf_sz = 0;
+
+ register int i, j;
+ register double *aim;
+ int iter, a_sz, res_sz, d_sz, q_sz, s_sz, wk_sz, z_sz, tot_sz;
+ double *a, *res, *d, *q, *s, *wk, *z;
+ double delta0, deltaold, deltanew, alpha, beta, eps_sq = eps * eps;
+ register double sum;
+ int rec_res;
+
+ if (A == NULL) {
+ if (buf)
+ free(buf);
+ buf = NULL;
+ buf_sz = 0;
+
+ return 1;
+ }
+
+ /* calculate required memory size */
+ a_sz = (iscolmaj) ? m * m : 0;
+ res_sz = m;
+ d_sz = m;
+ q_sz = m;
+ if (prec != SBA_CG_NOPREC) {
+ s_sz = m;
+ wk_sz = m;
+ z_sz = (prec == SBA_CG_SSOR) ? m : 0;
+ } else
+ s_sz = wk_sz = z_sz = 0;
+
+ tot_sz = a_sz + res_sz + d_sz + q_sz + s_sz + wk_sz + z_sz;
+
+ if (tot_sz > buf_sz) { /* insufficient memory, allocate a "big" memory chunk at once */
+ if (buf)
+ free(buf); /* free previously allocated memory */
+
+ buf_sz = tot_sz;
+ buf = (double *)malloc(buf_sz * sizeof(double));
+ if (!buf) {
+ fprintf(stderr, "memory allocation request failed in sba_Axb_CG()\n");
+ exit(1);
+ }
+ }
+
+ if (iscolmaj) {
+ a = buf;
+ /* store A (row major!) into a */
+ for (i = 0; i < m; ++i)
+ for (j = 0, aim = a + i * m; j < m; ++j)
+ aim[j] = A[i + j * m];
+ } else
+ a = A; /* no copying required */
+
+ res = buf + a_sz;
+ d = res + res_sz;
+ q = d + d_sz;
+ if (prec != SBA_CG_NOPREC) {
+ s = q + q_sz;
+ wk = s + s_sz;
+ z = (prec == SBA_CG_SSOR) ? wk + wk_sz : NULL;
+
+ for (i = 0; i < m; ++i) { // compute jacobi (i.e. diagonal) preconditioners and save them in wk
+ sum = a[i * m + i];
+ if (sum > DBL_EPSILON || -sum < -DBL_EPSILON) // != 0.0
+ wk[i] = 1.0 / sum;
+ else
+ wk[i] = 1.0 / DBL_EPSILON;
+ }
+ } else {
+ s = res;
+ wk = z = NULL;
+ }
+
+ if (niter > 0) {
+ for (i = 0; i < m; ++i) { // clear solution and initialize residual vector: res <-- B
+ x[i] = 0.0;
+ res[i] = B[i];
+ }
+ } else {
+ niter = -niter;
+
+ for (i = 0; i < m; ++i) { // initialize residual vector: res <-- B - A*x
+ for (j = 0, aim = a + i * m, sum = 0.0; j < m; ++j)
+ sum += aim[j] * x[j];
+ res[i] = B[i] - sum;
+ }
+ }
+
+ switch (prec) {
+ case SBA_CG_NOPREC:
+ for (i = 0, deltanew = 0.0; i < m; ++i) {
+ d[i] = res[i];
+ deltanew += res[i] * res[i];
+ }
+ break;
+ case SBA_CG_JACOBI: // jacobi preconditioning
+ for (i = 0, deltanew = 0.0; i < m; ++i) {
+ d[i] = res[i] * wk[i];
+ deltanew += res[i] * d[i];
+ }
+ break;
+ case SBA_CG_SSOR: // SSOR preconditioning; see the "templates" book, fig. 3.2, p. 44
+ for (i = 0; i < m; ++i) {
+ for (j = 0, sum = 0.0, aim = a + i * m; j < i; ++j)
+ sum += aim[j] * z[j];
+ z[i] = wk[i] * (res[i] - sum);
+ }
+
+ for (i = m - 1; i >= 0; --i) {
+ for (j = i + 1, sum = 0.0, aim = a + i * m; j < m; ++j)
+ sum += aim[j] * d[j];
+ d[i] = z[i] - wk[i] * sum;
+ }
+ deltanew = dprod(m, res, d);
+ break;
+ default:
+ fprintf(stderr, "unknown preconditioning option %d in sba_Axb_CG\n", prec);
+ exit(1);
+ }
+
+ delta0 = deltanew;
+
+ for (iter = 1; deltanew > eps_sq * delta0 && iter <= niter; ++iter) {
+ for (i = 0; i < m; ++i) { // q <-- A d
+ aim = a + i * m;
+ /***
+ for(j=0, sum=0.0; j<m; ++j)
+ sum+=aim[j]*d[j];
+ ***/
+ q[i] = dprod(m, aim, d); // sum;
+ }
+
+ /***
+ for(i=0, sum=0.0; i<m; ++i)
+ sum+=d[i]*q[i];
+ ***/
+ alpha = deltanew / dprod(m, d, q); // deltanew/sum;
+
+ /***
+ for(i=0; i<m; ++i)
+ x[i]+=alpha*d[i];
+ ***/
+ daxpy(m, x, x, alpha, d);
+
+ if (!(iter % 50)) {
+ for (i = 0; i < m; ++i) { // accurate computation of the residual vector
+ aim = a + i * m;
+ /***
+ for(j=0, sum=0.0; j<m; ++j)
+ sum+=aim[j]*x[j];
+ ***/
+ res[i] = B[i] - dprod(m, aim, x); // B[i]-sum;
+ }
+ rec_res = 0;
+ } else {
+ /***
+ for(i=0; i<m; ++i) // approximate computation of the residual vector
+ res[i]-=alpha*q[i];
+ ***/
+ daxpy(m, res, res, -alpha, q);
+ rec_res = 1;
+ }
+
+ if (prec) {
+ switch (prec) {
+ case SBA_CG_JACOBI: // jacobi
+ for (i = 0; i < m; ++i)
+ s[i] = res[i] * wk[i];
+ break;
+ case SBA_CG_SSOR: // SSOR
+ for (i = 0; i < m; ++i) {
+ for (j = 0, sum = 0.0, aim = a + i * m; j < i; ++j)
+ sum += aim[j] * z[j];
+ z[i] = wk[i] * (res[i] - sum);
+ }
+
+ for (i = m - 1; i >= 0; --i) {
+ for (j = i + 1, sum = 0.0, aim = a + i * m; j < m; ++j)
+ sum += aim[j] * s[j];
+ s[i] = z[i] - wk[i] * sum;
+ }
+ break;
+ }
+ }
+
+ deltaold = deltanew;
+ /***
+ for(i=0, sum=0.0; i<m; ++i)
+ sum+=res[i]*s[i];
+ ***/
+ deltanew = dprod(m, res, s); // sum;
+
+ /* make sure that we get around small delta that are due to
+ * accumulated floating point roundoff errors
+ */
+ if (rec_res && deltanew <= eps_sq * delta0) {
+ /* analytically recompute delta */
+ for (i = 0; i < m; ++i) {
+ for (j = 0, aim = a + i * m, sum = 0.0; j < m; ++j)
+ sum += aim[j] * x[j];
+ res[i] = B[i] - sum;
+ }
+ deltanew = dprod(m, res, s);
+ }
+
+ beta = deltanew / deltaold;
+
+ /***
+ for(i=0; i<m; ++i)
+ d[i]=s[i]+beta*d[i];
+ ***/
+ daxpy(m, d, s, beta, d);
+ }
+
+ return iter;
+}
+
+/*
+ * This function computes the Cholesky decomposition of the inverse of a symmetric
+ * (covariance) matrix A into B, i.e. B is s.t. A^-1=B^t*B and B upper triangular.
+ * A and B can coincide
+ *
+ * The function returns 0 in case of error (e.g. A is singular),
+ * 1 if successfull
+ *
+ * This function is often called repetitively to operate on matrices of identical
+ * dimensions. To avoid repetitive malloc's and free's, allocated memory is
+ * retained between calls and free'd-malloc'ed when not of the appropriate size.
+ * A call with NULL as the first argument forces this memory to be released.
+ *
+ */
+#if 0
+int sba_mat_cholinv(double *A, double *B, int m)
+{
+static double *buf=NULL;
+static int buf_sz=0, nb=0;
+
+int a_sz, ipiv_sz, work_sz, tot_sz;
+register int i, j;
+int info, *ipiv;
+double *a, *work;
+
+ if(A==NULL){
+ if(buf) free(buf);
+ buf=NULL;
+ buf_sz=0;
+
+ return 1;
+ }
+
+ /* calculate the required memory size */
+ ipiv_sz=m;
+ a_sz=m*m;
+ if(!nb){
+#ifndef SBA_LS_SCARCE_MEMORY
+ double tmp;
+
+ work_sz=-1; // workspace query; optimal size is returned in tmp
+ F77_FUNC(dgetri)((int *)&m, NULL, (int *)&m, NULL, (double *)&tmp, (int *)&work_sz, (int *)&info);
+ nb=((int)tmp)/m; // optimal worksize is m*nb
+#else
+ nb=1; // min worksize is m
+#endif /* SBA_LS_SCARCE_MEMORY */
+ }
+ work_sz=nb*m;
+ tot_sz=(a_sz + work_sz)*sizeof(double) + ipiv_sz*sizeof(int); /* should be arranged in that order for proper doubles alignment */
+
+ if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
+ if(buf) free(buf); /* free previously allocated memory */
+
+ buf_sz=tot_sz;
+ buf=(double *)malloc(buf_sz);
+ if(!buf){
+ fprintf(stderr, "memory allocation in sba_mat_cholinv() failed!\n");
+ exit(1);
+ }
+ }
+
+ a=buf;
+ work=a+a_sz;
+ ipiv=(int *)(work+work_sz);
+
+ /* step 1: invert A */
+ /* store A into a; A is assumed symmetric, hence no transposition is needed */
+ for(i=0; i<m*m; ++i)
+ a[i]=A[i];
+
+ /* LU decomposition for A (Cholesky should also do) */
+ F77_FUNC(dgetf2)((int *)&m, (int *)&m, a, (int *)&m, ipiv, (int *)&info);
+ //F77_FUNC(dgetrf)((int *)&m, (int *)&m, a, (int *)&m, ipiv, (int *)&info);
+ if(info!=0){
+ if(info<0){
+ fprintf(stderr, "argument %d of dgetf2/dgetrf illegal in sba_mat_cholinv()\n", -info);
+ exit(1);
+ }
+ else{
+ fprintf(stderr, "singular matrix A for dgetf2/dgetrf in sba_mat_cholinv()\n");
+ return 0;
+ }
+ }
+
+ /* (A)^{-1} from LU */
+ F77_FUNC(dgetri)((int *)&m, a, (int *)&m, ipiv, work, (int *)&work_sz, (int *)&info);
+ if(info!=0){
+ if(info<0){
+ fprintf(stderr, "argument %d of dgetri illegal in sba_mat_cholinv()\n", -info);
+ exit(1);
+ }
+ else{
+ fprintf(stderr, "singular matrix A for dgetri in sba_mat_cholinv()\n");
+ return 0;
+ }
+ }
+
+ /* (A)^{-1} is now in a (in column major!) */
+
+ /* step 2: Cholesky decomposition of a: A^-1=B^t B, B upper triangular */
+ F77_FUNC(dpotf2)("U", (int *)&m, a, (int *)&m, (int *)&info);
+ /* error treatment */
+ if(info!=0){
+ if(info<0){
+ fprintf(stderr, "LAPACK error: illegal value for argument %d of dpotf2 in sba_mat_cholinv()\n", -info);
+ exit(1);
+ }
+ else{
+ fprintf(stderr, "LAPACK error: the leading minor of order %d is not positive definite,\n%s\n", info,
+ "and the Cholesky factorization could not be completed in sba_mat_cholinv()");
+ return 0;
+ }
+ }
+
+ /* the decomposition is in the upper part of a (in column-major order!).
+ * copying it to the lower part and zeroing the upper transposes
+ * a in row-major order
+ */
+ for(i=0; i<m; ++i)
+ for(j=0; j<i; ++j){
+ a[i+j*m]=a[j+i*m];
+ a[j+i*m]=0.0;
+ }
+ for(i=0; i<m*m; ++i)
+ B[i]=a[i];
+
+ return 1;
+}
+#endif
+
+int sba_mat_cholinv(double *A, double *B, int m) {
+ int a_sz;
+ register int i, j;
+ int info;
+ double *a;
+
+ if (A == NULL) {
+ return 1;
+ }
+
+ a_sz = m * m;
+ a = B; /* use B as working memory, result is produced in it */
+
+ /* step 1: invert A */
+ /* store A into a; A is assumed symmetric, hence no transposition is needed */
+ for (i = 0; i < a_sz; ++i)
+ a[i] = A[i];
+
+ /* Cholesky decomposition for A */
+ F77_FUNC(dpotf2)("U", (int *)&m, a, (int *)&m, (int *)&info);
+ if (info != 0) {
+ if (info < 0) {
+ fprintf(stderr, "argument %d of dpotf2 illegal in sba_mat_cholinv()\n", -info);
+ exit(1);
+ } else {
+ fprintf(stderr, "LAPACK error: the leading minor of order %d is not positive definite,\n%s\n", info,
+ "and the Cholesky factorization could not be completed in sba_mat_cholinv()");
+ return 0;
+ }
+ }
+
+ /* (A)^{-1} from Cholesky */
+ F77_FUNC(dpotri)("U", (int *)&m, a, (int *)&m, (int *)&info);
+ if (info != 0) {
+ if (info < 0) {
+ fprintf(stderr, "argument %d of dpotri illegal in sba_mat_cholinv()\n", -info);
+ exit(1);
+ } else {
+ fprintf(stderr, "the (%d, %d) element of the factor U or L is zero, singular matrix A for dpotri in "
+ "sba_mat_cholinv()\n",
+ info, info);
+ return 0;
+ }
+ }
+
+ /* (A)^{-1} is now in a (in column major!) */
+
+ /* step 2: Cholesky decomposition of a: A^-1=B^t B, B upper triangular */
+ F77_FUNC(dpotf2)("U", (int *)&m, a, (int *)&m, (int *)&info);
+ /* error treatment */
+ if (info != 0) {
+ if (info < 0) {
+ fprintf(stderr, "LAPACK error: illegal value for argument %d of dpotf2 in sba_mat_cholinv()\n", -info);
+ exit(1);
+ } else {
+ fprintf(stderr, "LAPACK error: the leading minor of order %d is not positive definite,\n%s\n", info,
+ "and the Cholesky factorization could not be completed in sba_mat_cholinv()");
+ return 0;
+ }
+ }
+
+ /* the decomposition is in the upper part of a (in column-major order!).
+ * copying it to the lower part and zeroing the upper transposes
+ * a in row-major order
+ */
+ for (i = 0; i < m; ++i)
+ for (j = 0; j < i; ++j) {
+ a[i + j * m] = a[j + i * m];
+ a[j + i * m] = 0.0;
+ }
+
+ return 1;
+}
diff --git a/redist/sba/sba_levmar.c b/redist/sba/sba_levmar.c
new file mode 100644
index 0000000..2f85f2a
--- /dev/null
+++ b/redist/sba/sba_levmar.c
@@ -0,0 +1,2715 @@
+/////////////////////////////////////////////////////////////////////////////////
+////
+//// Expert drivers for sparse bundle adjustment based on the
+//// Levenberg - Marquardt minimization algorithm
+//// Copyright (C) 2004-2009 Manolis Lourakis (lourakis at ics forth gr)
+//// Institute of Computer Science, Foundation for Research & Technology - Hellas
+//// Heraklion, Crete, Greece.
+////
+//// This program is free software; you can redistribute it and/or modify
+//// it under the terms of the GNU General Public License as published by
+//// the Free Software Foundation; either version 2 of the License, or
+//// (at your option) any later version.
+////
+//// This program is distributed in the hope that it will be useful,
+//// but WITHOUT ANY WARRANTY; without even the implied warranty of
+//// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//// GNU General Public License for more details.
+////
+///////////////////////////////////////////////////////////////////////////////////
+
+#include <float.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "compiler.h"
+#include "sba.h"
+#include "sba_chkjac.h"
+
+#define SBA_EPSILON 1E-12
+#define SBA_EPSILON_SQ ((SBA_EPSILON) * (SBA_EPSILON))
+
+#define SBA_ONE_THIRD 0.3333333334 /* 1.0/3.0 */
+
+#define emalloc(sz) emalloc_(__FILE__, __LINE__, sz)
+
+#define FABS(x) (((x) >= 0) ? (x) : -(x))
+
+#define ROW_MAJOR 0
+#define COLUMN_MAJOR 1
+#define MAT_STORAGE COLUMN_MAJOR
+
+/* contains information necessary for computing a finite difference approximation to a jacobian,
+ * e.g. function to differentiate, problem dimensions and pointers to working memory buffers
+ */
+struct fdj_data_x_ {
+ void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx,
+ void *adata); /* function to differentiate */
+ int cnp, pnp, mnp; /* parameter numbers */
+ int *func_rcidxs, *func_rcsubs; /* working memory for func invocations.
+ * Notice that this has to be different
+ * than the working memory used for
+ * evaluating the jacobian!
+ */
+ double *hx, *hxx; /* memory to save results in */
+ void *adata;
+};
+
+/* auxiliary memory allocation routine with error checking */
+inline static void *emalloc_(char *file, int line, size_t sz) {
+ void *ptr;
+
+ ptr = (void *)malloc(sz);
+ if (ptr == NULL) {
+ fprintf(stderr, "SBA: memory allocation request for %zu bytes failed in file %s, line %d, exiting", sz, file,
+ line);
+ exit(1);
+ }
+
+ return ptr;
+}
+
+/* auxiliary routine for clearing an array of doubles */
+inline static void _dblzero(register double *arr, register int count) {
+ while (--count >= 0)
+ *arr++ = 0.0;
+}
+
+/* auxiliary routine for computing the mean reprojection error; used for debugging */
+static double sba_mean_repr_error(int n, int mnp, double *x, double *hx, struct sba_crsm *idxij, int *rcidxs,
+ int *rcsubs) {
+ register int i, j;
+ int nnz, nprojs;
+ double *ptr1, *ptr2;
+ double err;
+
+ for (i = nprojs = 0, err = 0.0; i < n; ++i) {
+ nnz = sba_crsm_row_elmidxs(idxij, i, rcidxs, rcsubs); /* find nonzero x_ij, j=0...m-1 */
+ nprojs += nnz;
+ for (j = 0; j < nnz; ++j) { /* point i projecting on camera rcsubs[j] */
+ ptr1 = x + idxij->val[rcidxs[j]] * mnp;
+ ptr2 = hx + idxij->val[rcidxs[j]] * mnp;
+
+ err += sqrt((ptr1[0] - ptr2[0]) * (ptr1[0] - ptr2[0]) + (ptr1[1] - ptr2[1]) * (ptr1[1] - ptr2[1]));
+ }
+ }
+
+ return err / ((double)(nprojs));
+}
+
+/* print the solution in p using sba's text format. If cnp/pnp==0 only points/cameras are printed */
+static void sba_print_sol(int n, int m, double *p, int cnp, int pnp, double *x, int mnp, struct sba_crsm *idxij,
+ int *rcidxs, int *rcsubs) {
+ register int i, j, ii;
+ int nnz;
+ double *ptr;
+
+ if (cnp) {
+ /* print camera parameters */
+ for (j = 0; j < m; ++j) {
+ ptr = p + cnp * j;
+ for (ii = 0; ii < cnp; ++ii)
+ printf("%g ", ptr[ii]);
+ printf("\n");
+ }
+ }
+
+ if (pnp) {
+ /* 3D & 2D point parameters */
+ printf("\n\n\n# X Y Z nframes frame0 x0 y0 frame1 x1 y1 ...\n");
+ for (i = 0; i < n; ++i) {
+ ptr = p + cnp * m + i * pnp;
+ for (ii = 0; ii < pnp; ++ii) // print 3D coordinates
+ printf("%g ", ptr[ii]);
+
+ nnz = sba_crsm_row_elmidxs(idxij, i, rcidxs, rcsubs); /* find nonzero x_ij, j=0...m-1 */
+ printf("%d ", nnz);
+
+ for (j = 0; j < nnz; ++j) { /* point i projecting on camera rcsubs[j] */
+ ptr = x + idxij->val[rcidxs[j]] * mnp;
+
+ printf("%d ", rcsubs[j]);
+ for (ii = 0; ii < mnp; ++ii) // print 2D coordinates
+ printf("%g ", ptr[ii]);
+ }
+ printf("\n");
+ }
+ }
+}
+
+/* Compute e=x-y for two n-vectors x and y and return the squared L2 norm of e.
+ * e can coincide with either x or y.
+ * Uses loop unrolling and blocking to reduce bookkeeping overhead & pipeline
+ * stalls and increase instruction-level parallelism; see http://www.abarnett.demon.co.uk/tutorial.html
+ */
+static double nrmL2xmy(double *const e, const double *const x, const double *const y, const int n) {
+ const int blocksize = 8, bpwr = 3; /* 8=2^3 */
+ register int i;
+ int j1, j2, j3, j4, j5, j6, j7;
+ int blockn;
+ register double sum0 = 0.0, sum1 = 0.0, sum2 = 0.0, sum3 = 0.0;
+
+ /* n may not be divisible by blocksize,
+ * go as near as we can first, then tidy up.
+ */
+ blockn = (n >> bpwr) << bpwr; /* (n / blocksize) * blocksize; */
+
+ /* unroll the loop in blocks of `blocksize'; looping downwards gains some more speed */
+ for (i = blockn - 1; i > 0; i -= blocksize) {
+ e[i] = x[i] - y[i];
+ sum0 += e[i] * e[i];
+ j1 = i - 1;
+ e[j1] = x[j1] - y[j1];
+ sum1 += e[j1] * e[j1];
+ j2 = i - 2;
+ e[j2] = x[j2] - y[j2];
+ sum2 += e[j2] * e[j2];
+ j3 = i - 3;
+ e[j3] = x[j3] - y[j3];
+ sum3 += e[j3] * e[j3];
+ j4 = i - 4;
+ e[j4] = x[j4] - y[j4];
+ sum0 += e[j4] * e[j4];
+ j5 = i - 5;
+ e[j5] = x[j5] - y[j5];
+ sum1 += e[j5] * e[j5];
+ j6 = i - 6;
+ e[j6] = x[j6] - y[j6];
+ sum2 += e[j6] * e[j6];
+ j7 = i - 7;
+ e[j7] = x[j7] - y[j7];
+ sum3 += e[j7] * e[j7];
+ }
+
+ /*
+ * There may be some left to do.
+ * This could be done as a simple for() loop,
+ * but a switch is faster (and more interesting)
+ */
+
+ i = blockn;
+ if (i < n) {
+ /* Jump into the case at the place that will allow
+ * us to finish off the appropriate number of items.
+ */
+ switch (n - i) {
+ case 7:
+ e[i] = x[i] - y[i];
+ sum0 += e[i] * e[i];
+ ++i;
+ case 6:
+ e[i] = x[i] - y[i];
+ sum0 += e[i] * e[i];
+ ++i;
+ case 5:
+ e[i] = x[i] - y[i];
+ sum0 += e[i] * e[i];
+ ++i;
+ case 4:
+ e[i] = x[i] - y[i];
+ sum0 += e[i] * e[i];
+ ++i;
+ case 3:
+ e[i] = x[i] - y[i];
+ sum0 += e[i] * e[i];
+ ++i;
+ case 2:
+ e[i] = x[i] - y[i];
+ sum0 += e[i] * e[i];
+ ++i;
+ case 1:
+ e[i] = x[i] - y[i];
+ sum0 += e[i] * e[i];
+ ++i;
+ }
+ }
+
+ return sum0 + sum1 + sum2 + sum3;
+}
+
+/* Compute e=W*(x-y) for two n-vectors x and y and return the squared L2 norm of e.
+ * This norm equals the squared C norm of x-y with C=W^T*W, W being block diagonal
+ * matrix with nvis mnp*mnp blocks: e^T*e=(x-y)^T*W^T*W*(x-y)=||x-y||_C.
+ * Note that n=nvis*mnp; e can coincide with either x or y.
+ *
+ * Similarly to nrmL2xmy() above, uses loop unrolling and blocking
+ */
+static double nrmCxmy(double *const e, const double *const x, const double *const y, const double *const W,
+ const int mnp, const int nvis) {
+ const int n = nvis * mnp;
+ const int blocksize = 8, bpwr = 3; /* 8=2^3 */
+ register int i, ii, k;
+ int j1, j2, j3, j4, j5, j6, j7;
+ int blockn, mnpsq;
+ register double norm, sum;
+ register const double *Wptr, *auxptr;
+ register double *eptr;
+
+ /* n may not be divisible by blocksize,
+ * go as near as we can first, then tidy up.
+ */
+ blockn = (n >> bpwr) << bpwr; /* (n / blocksize) * blocksize; */
+
+ /* unroll the loop in blocks of `blocksize'; looping downwards gains some more speed */
+ for (i = blockn - 1; i > 0; i -= blocksize) {
+ e[i] = x[i] - y[i];
+ j1 = i - 1;
+ e[j1] = x[j1] - y[j1];
+ j2 = i - 2;
+ e[j2] = x[j2] - y[j2];
+ j3 = i - 3;
+ e[j3] = x[j3] - y[j3];
+ j4 = i - 4;
+ e[j4] = x[j4] - y[j4];
+ j5 = i - 5;
+ e[j5] = x[j5] - y[j5];
+ j6 = i - 6;
+ e[j6] = x[j6] - y[j6];
+ j7 = i - 7;
+ e[j7] = x[j7] - y[j7];
+ }
+
+ /*
+ * There may be some left to do.
+ * This could be done as a simple for() loop,
+ * but a switch is faster (and more interesting)
+ */
+
+ i = blockn;
+ if (i < n) {
+ /* Jump into the case at the place that will allow
+ * us to finish off the appropriate number of items.
+ */
+ switch (n - i) {
+ case 7:
+ e[i] = x[i] - y[i];
+ ++i;
+ case 6:
+ e[i] = x[i] - y[i];
+ ++i;
+ case 5:
+ e[i] = x[i] - y[i];
+ ++i;
+ case 4:
+ e[i] = x[i] - y[i];
+ ++i;
+ case 3:
+ e[i] = x[i] - y[i];
+ ++i;
+ case 2:
+ e[i] = x[i] - y[i];
+ ++i;
+ case 1:
+ e[i] = x[i] - y[i];
+ ++i;
+ }
+ }
+
+ /* compute w_x_ij e_ij in e and its L2 norm.
+ * Since w_x_ij is upper triangular, the products can be safely saved
+ * directly in e_ij, without the need for intermediate storage
+ */
+ mnpsq = mnp * mnp;
+ /* Wptr, eptr point to w_x_ij, e_ij below */
+ for (i = 0, Wptr = W, eptr = e, norm = 0.0; i < nvis; ++i, Wptr += mnpsq, eptr += mnp) {
+ for (ii = 0, auxptr = Wptr; ii < mnp; ++ii, auxptr += mnp) { /* auxptr=Wptr+ii*mnp */
+ for (k = ii, sum = 0.0; k < mnp; ++k) // k>=ii since w_x_ij is upper triangular
+ sum += auxptr[k] * eptr[k]; // Wptr[ii*mnp+k]*eptr[k];
+ eptr[ii] = sum;
+ norm += sum * sum;
+ }
+ }
+
+ return norm;
+}
+
+/* search for & print image projection components that are infinite; useful for identifying errors */
+static void sba_print_inf(double *hx, int nimgs, int mnp, struct sba_crsm *idxij, int *rcidxs, int *rcsubs) {
+ register int i, j, k;
+ int nnz;
+ double *phxij;
+
+ for (j = 0; j < nimgs; ++j) {
+ nnz = sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero hx_ij, i=0...n-1 */
+ for (i = 0; i < nnz; ++i) {
+ phxij = hx + idxij->val[rcidxs[i]] * mnp;
+ for (k = 0; k < mnp; ++k)
+ if (!SBA_FINITE(phxij[k]))
+ printf("SBA: component %d of the estimated projection of point %d on camera %d is invalid!\n", k,
+ rcsubs[i], j);
+ }
+ }
+}
+
+/* Given a parameter vector p made up of the 3D coordinates of n points and the parameters of m cameras, compute in
+ * jac the jacobian of the predicted measurements, i.e. the jacobian of the projections of 3D points in the m images.
+ * The jacobian is approximated with the aid of finite differences and is returned in the order
+ * (A_11, B_11, ..., A_1m, B_1m, ..., A_n1, B_n1, ..., A_nm, B_nm),
+ * where A_ij=dx_ij/da_j and B_ij=dx_ij/db_i (see HZ).
+ * Notice that depending on idxij, some of the A_ij, B_ij might be missing
+ *
+ * Problem-specific information is assumed to be stored in a structure pointed to by "dat".
+ *
+ * NOTE: The jacobian (for n=4, m=3) in matrix form has the following structure:
+ * A_11 0 0 B_11 0 0 0
+ * 0 A_12 0 B_12 0 0 0
+ * 0 0 A_13 B_13 0 0 0
+ * A_21 0 0 0 B_21 0 0
+ * 0 A_22 0 0 B_22 0 0
+ * 0 0 A_23 0 B_23 0 0
+ * A_31 0 0 0 0 B_31 0
+ * 0 A_32 0 0 0 B_32 0
+ * 0 0 A_33 0 0 B_33 0
+ * A_41 0 0 0 0 0 B_41
+ * 0 A_42 0 0 0 0 B_42
+ * 0 0 A_43 0 0 0 B_43
+ * To reduce the total number of objective function evaluations, this structure can be
+ * exploited as follows: A certain d is added to the j-th parameters of all cameras and
+ * the objective function is evaluated at the resulting point. This evaluation suffices
+ * to compute the corresponding columns of *all* A_ij through finite differences. A similar
+ * strategy allows the computation of the B_ij. Overall, only cnp+pnp+1 objective function
+ * evaluations are needed to compute the jacobian, much fewer compared to the m*cnp+n*pnp+1
+ * that would be required by the naive strategy of computing one column of the jacobian
+ * per function evaluation. See Nocedal-Wright, ch. 7, pp. 169. Although this approach is
+ * much faster compared to the naive strategy, it is not preferable to analytic jacobians,
+ * since the latter are considerably faster to compute and result in fewer LM iterations.
+ */
+static void
+sba_fdjac_x(double *p, /* I: current parameter estimate, (m*cnp+n*pnp)x1 */
+ struct sba_crsm *idxij, /* I: sparse matrix containing the location of x_ij in hx */
+ int *rcidxs, /* work array for the indexes of nonzero elements of a single sparse matrix row/column */
+ int *rcsubs, /* work array for the subscripts of nonzero elements in a single sparse matrix row/column */
+ double *jac, /* O: array for storing the approximated jacobian */
+ void *dat) /* I: points to a "fdj_data_x_" structure */
+{
+ register int i, j, ii, jj;
+ double *pa, *pb, *pqr, *ppt;
+ register double *pAB, *phx, *phxx;
+ int n, m, nm, nnz, Asz, Bsz, ABsz, idx;
+
+ double *tmpd;
+ register double d;
+
+ struct fdj_data_x_ *fdjd;
+ void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata);
+ double *hx, *hxx;
+ int cnp, pnp, mnp;
+ void *adata;
+
+ /* retrieve problem-specific information passed in *dat */
+ fdjd = (struct fdj_data_x_ *)dat;
+ func = fdjd->func;
+ cnp = fdjd->cnp;
+ pnp = fdjd->pnp;
+ mnp = fdjd->mnp;
+ hx = fdjd->hx;
+ hxx = fdjd->hxx;
+ adata = fdjd->adata;
+
+ n = idxij->nr;
+ m = idxij->nc;
+ pa = p;
+ pb = p + m * cnp;
+ Asz = mnp * cnp;
+ Bsz = mnp * pnp;
+ ABsz = Asz + Bsz;
+
+ nm = (n >= m) ? n : m; // max(n, m);
+ tmpd = (double *)emalloc(nm * sizeof(double));
+
+ (*func)(p, idxij, fdjd->func_rcidxs, fdjd->func_rcsubs, hx,
+ adata); // evaluate supplied function on current solution
+
+ if (cnp) { // is motion varying?
+ /* compute A_ij */
+ for (jj = 0; jj < cnp; ++jj) {
+ for (j = 0; j < m; ++j) {
+ pqr = pa + j * cnp; // j-th camera parameters
+ /* determine d=max(SBA_DELTA_SCALE*|pqr[jj]|, SBA_MIN_DELTA), see HZ */
+ d = (double)(SBA_DELTA_SCALE)*pqr[jj]; // force evaluation
+ d = FABS(d);
+ if (d < SBA_MIN_DELTA)
+ d = SBA_MIN_DELTA;
+
+ tmpd[j] = d;
+ pqr[jj] += d;
+ }
+
+ (*func)(p, idxij, fdjd->func_rcidxs, fdjd->func_rcsubs, hxx, adata);
+
+ for (j = 0; j < m; ++j) {
+ pqr = pa + j * cnp; // j-th camera parameters
+ pqr[jj] -= tmpd[j]; /* restore */
+ d = 1.0 / tmpd[j]; /* invert so that divisions can be carried out faster as multiplications */
+
+ nnz = sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero A_ij, i=0...n-1 */
+ for (i = 0; i < nnz; ++i) {
+ idx = idxij->val[rcidxs[i]];
+ phx = hx + idx * mnp; // set phx to point to hx_ij
+ phxx = hxx + idx * mnp; // set phxx to point to hxx_ij
+ pAB = jac + idx * ABsz; // set pAB to point to A_ij
+
+ for (ii = 0; ii < mnp; ++ii)
+ pAB[ii * cnp + jj] = (phxx[ii] - phx[ii]) * d;
+ }
+ }
+ }
+ }
+
+ if (pnp) { // is structure varying?
+ /* compute B_ij */
+ for (jj = 0; jj < pnp; ++jj) {
+ for (i = 0; i < n; ++i) {
+ ppt = pb + i * pnp; // i-th point parameters
+ /* determine d=max(SBA_DELTA_SCALE*|ppt[jj]|, SBA_MIN_DELTA), see HZ */
+ d = (double)(SBA_DELTA_SCALE)*ppt[jj]; // force evaluation
+ d = FABS(d);
+ if (d < SBA_MIN_DELTA)
+ d = SBA_MIN_DELTA;
+
+ tmpd[i] = d;
+ ppt[jj] += d;
+ }
+
+ (*func)(p, idxij, fdjd->func_rcidxs, fdjd->func_rcsubs, hxx, adata);
+
+ for (i = 0; i < n; ++i) {
+ ppt = pb + i * pnp; // i-th point parameters
+ ppt[jj] -= tmpd[i]; /* restore */
+ d = 1.0 / tmpd[i]; /* invert so that divisions can be carried out faster as multiplications */
+
+ nnz = sba_crsm_row_elmidxs(idxij, i, rcidxs, rcsubs); /* find nonzero B_ij, j=0...m-1 */
+ for (j = 0; j < nnz; ++j) {
+ idx = idxij->val[rcidxs[j]];
+ phx = hx + idx * mnp; // set phx to point to hx_ij
+ phxx = hxx + idx * mnp; // set phxx to point to hxx_ij
+ pAB = jac + idx * ABsz + Asz; // set pAB to point to B_ij
+
+ for (ii = 0; ii < mnp; ++ii)
+ pAB[ii * pnp + jj] = (phxx[ii] - phx[ii]) * d;
+ }
+ }
+ }
+ }
+
+ free(tmpd);
+}
+
+typedef int (*PLS)(double *A, double *B, double *x, int m, int iscolmaj);
+
+/* Bundle adjustment on camera and structure parameters
+ * using the sparse Levenberg-Marquardt as described in HZ p. 568
+ *
+ * Returns the number of iterations (>=0) if successfull, SBA_ERROR if failed
+ */
+
+int sba_motstr_levmar_x(
+ const int n, /* number of points */
+ const int ncon, /* number of points (starting from the 1st) whose parameters should not be modified.
+ * All B_ij (see below) with i<ncon are assumed to be zero
+ */
+ const int m, /* number of images */
+ const int mcon, /* number of images (starting from the 1st) whose parameters should not be modified.
+ * All A_ij (see below) with j<mcon are assumed to be zero
+ */
+ char *vmask, /* visibility mask: vmask[i, j]=1 if point i visible in image j, 0 otherwise. nxm */
+ double *p, /* initial parameter vector p0: (a1, ..., am, b1, ..., bn).
+ * aj are the image j parameters, bi are the i-th point parameters,
+ * size m*cnp + n*pnp
+ */
+ const int cnp, /* number of parameters for ONE camera; e.g. 6 for Euclidean cameras */
+ const int pnp, /* number of parameters for ONE point; e.g. 3 for Euclidean points */
+ double *x, /* measurements vector: (x_11^T, .. x_1m^T, ..., x_n1^T, .. x_nm^T)^T where
+ * x_ij is the projection of the i-th point on the j-th image.
+ * NOTE: some of the x_ij might be missing, if point i is not visible in image j;
+ * see vmask[i, j], max. size n*m*mnp
+ */
+ double *covx, /* measurements covariance matrices: (Sigma_x_11, .. Sigma_x_1m, ..., Sigma_x_n1, .. Sigma_x_nm),
+ * where Sigma_x_ij is the mnp x mnp covariance of x_ij stored row-by-row. Set to NULL if no
+ * covariance estimates are available (identity matrices are implicitly used in this case).
+ * NOTE: a certain Sigma_x_ij is missing if the corresponding x_ij is also missing;
+ * see vmask[i, j], max. size n*m*mnp*mnp
+ */
+ const int mnp, /* number of parameters for EACH measurement; usually 2 */
+ void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata),
+ /* functional relation describing measurements. Given a parameter vector p,
+ * computes a prediction of the measurements \hat{x}. p is (m*cnp + n*pnp)x1,
+ * \hat{x} is (n*m*mnp)x1, maximum
+ * rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used
+ * as working memory
+ */
+ void (*fjac)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata),
+ /* function to evaluate the sparse jacobian dX/dp.
+ * The Jacobian is returned in jac as
+ * (dx_11/da_1, ..., dx_1m/da_m, ..., dx_n1/da_1, ..., dx_nm/da_m,
+ * dx_11/db_1, ..., dx_1m/db_1, ..., dx_n1/db_n, ..., dx_nm/db_n), or (using HZ's notation),
+ * jac=(A_11, B_11, ..., A_1m, B_1m, ..., A_n1, B_n1, ..., A_nm, B_nm)
+ * Notice that depending on idxij, some of the A_ij and B_ij might be missing.
+ * Note also that A_ij and B_ij are mnp x cnp and mnp x pnp matrices resp. and they
+ * should be stored in jac in row-major order.
+ * rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used
+ * as working memory
+ *
+ * If NULL, the jacobian is approximated by repetitive func calls and finite
+ * differences. This is computationally inefficient and thus NOT recommended.
+ */
+ void *adata, /* pointer to possibly additional data, passed uninterpreted to func, fjac */
+
+ const int itmax, /* I: maximum number of iterations. itmax==0 signals jacobian verification followed by immediate
+ return */
+ const int verbose, /* I: verbosity */
+ const double opts[SBA_OPTSSZ],
+ /* I: minim. options [\mu, \epsilon1, \epsilon2, \epsilon3, \epsilon4]. Respectively the scale factor for initial
+ * \mu,
+ * stopping thresholds for ||J^T e||_inf, ||dp||_2, ||e||_2 and (||e||_2-||e_new||_2)/||e||_2
+ */
+ double info[SBA_INFOSZ]
+ /* O: information regarding the minimization. Set to NULL if don't care
+ * info[0]=||e||_2 at initial p.
+ * info[1-4]=[ ||e||_2, ||J^T e||_inf, ||dp||_2, mu/max[J^T J]_ii ], all computed at estimated p.
+ * info[5]= # iterations,
+ * info[6]=reason for terminating: 1 - stopped by small gradient J^T e
+ * 2 - stopped by small dp
+ * 3 - stopped by itmax
+ * 4 - stopped by small relative reduction in ||e||_2
+ * 5 - stopped by small ||e||_2
+ * 6 - too many attempts to increase damping. Restart with increased mu
+ * 7 - stopped by invalid (i.e. NaN or Inf) "func" values. This is a user error
+ * info[7]= # function evaluations
+ * info[8]= # jacobian evaluations
+ * info[9]= # number of linear systems solved, i.e. number of attempts for reducing error
+ */
+ ) {
+ register int i, j, ii, jj, k, l;
+ int nvis, nnz, retval;
+
+ /* The following are work arrays that are dynamically allocated by sba_motstr_levmar_x() */
+ double *jac; /* work array for storing the jacobian, max. size n*m*(mnp*cnp + mnp*pnp) */
+ double *U; /* work array for storing the U_j in the order U_1, ..., U_m, size m*cnp*cnp */
+ double
+ *V; /* work array for storing the *strictly upper triangles* of V_i in the order V_1, ..., V_n, size n*pnp*pnp.
+ * V also stores the lower triangles of (V*_i)^-1 in the order (V*_1)^-1, ..., (V*_n)^-1.
+ * Note that diagonal elements of V_1 are saved in diagUV
+ */
+
+ double *e; /* work array for storing the e_ij in the order e_11, ..., e_1m, ..., e_n1, ..., e_nm,
+ max. size n*m*mnp */
+ double
+ *eab; /* work array for storing the ea_j & eb_i in the order ea_1, .. ea_m eb_1, .. eb_n size m*cnp + n*pnp */
+
+ double *E; /* work array for storing the e_j in the order e_1, .. e_m, size m*cnp */
+
+ /* Notice that the blocks W_ij, Y_ij are zero iff A_ij (equivalently B_ij) is zero. This means
+ * that the matrix consisting of blocks W_ij is itself sparse, similarly to the
+ * block matrix made up of the A_ij and B_ij (i.e. jac)
+ */
+ double *W; /* work array for storing the W_ij in the order W_11, ..., W_1m, ..., W_n1, ..., W_nm,
+ max. size n*m*cnp*pnp */
+ double *Yj; /* work array for storing the Y_ij for a *fixed* j in the order Y_1j, Y_nj,
+ max. size n*cnp*pnp */
+ double *YWt; /* work array for storing \sum_i Y_ij W_ik^T, size cnp*cnp */
+ double *S; /* work array for storing the block array S_jk, size m*m*cnp*cnp */
+ double *dp; /* work array for storing the parameter vector updates da_1, ..., da_m, db_1, ..., db_n, size m*cnp +
+ n*pnp */
+ double *Wtda; /* work array for storing \sum_j W_ij^T da_j, size pnp */
+ double *wght = /* work array for storing the weights computed from the covariance inverses, max. size n*m*mnp*mnp */
+ NULL;
+
+ /* Of the above arrays, jac, e, W, Yj, wght are sparse and
+ * U, V, eab, E, S, dp are dense. Sparse arrays (except Yj) are indexed
+ * through idxij (see below), that is with the same mechanism as the input
+ * measurements vector x
+ */
+
+ double *pa, *pb, *ea, *eb, *dpa, *dpb; /* pointers into p, jac, eab and dp respectively */
+
+ /* submatrices sizes */
+ int Asz, Bsz, ABsz, Usz, Vsz, Wsz, Ysz, esz, easz, ebsz, YWtsz, Wtdasz, Sblsz, covsz;
+
+ int Sdim; /* S matrix actual dimension */
+
+ register double *ptr1, *ptr2, *ptr3, *ptr4, sum;
+ struct sba_crsm idxij; /* sparse matrix containing the location of x_ij in x. This is also
+ * the location of A_ij, B_ij in jac, etc.
+ * This matrix can be thought as a map from a sparse set of pairs (i, j) to a continuous
+ * index k and it is used to efficiently lookup the memory locations where the non-zero
+ * blocks of a sparse matrix/vector are stored
+ */
+ int maxCvis, /* max. of projections of a single point across cameras, <=m */
+ maxPvis, /* max. of projections in a single camera across points, <=n */
+ maxCPvis, /* max. of the above */
+ *rcidxs, /* work array for the indexes corresponding to the nonzero elements of a single row or
+ column in a sparse matrix, size max(n, m) */
+ *rcsubs; /* work array for the subscripts of nonzero elements in a single row or column of a
+ sparse matrix, size max(n, m) */
+
+ /* The following variables are needed by the LM algorithm */
+ register int itno; /* iteration counter */
+ int issolved;
+ /* temporary work arrays that are dynamically allocated */
+ double *hx, /* \hat{x}_i, max. size m*n*mnp */
+ *diagUV, /* diagonals of U_j, V_i, size m*cnp + n*pnp */
+ *pdp; /* p + dp, size m*cnp + n*pnp */
+
+ double *diagU, *diagV; /* pointers into diagUV */
+
+ register double mu, /* damping constant */
+ tmp; /* mainly used in matrix & vector multiplications */
+ double p_eL2, eab_inf, pdp_eL2; /* ||e(p)||_2, ||J^T e||_inf, ||e(p+dp)||_2 */
+ double p_L2, dp_L2 = DBL_MAX, dF, dL;
+ double tau = FABS(opts[0]), eps1 = FABS(opts[1]), eps2 = FABS(opts[2]), eps2_sq = opts[2] * opts[2],
+ eps3_sq = opts[3] * opts[3], eps4_sq = opts[4] * opts[4];
+ double init_p_eL2;
+ int nu = 2, nu2, stop = 0, nfev, njev = 0, nlss = 0;
+ int nobs, nvars;
+ const int mmcon = m - mcon;
+ PLS linsolver = NULL;
+ int (*matinv)(double *A, int m) = NULL;
+
+ struct fdj_data_x_ fdj_data;
+ void *jac_adata;
+
+ /* Initialization */
+ mu = eab_inf = 0.0; /* -Wall */
+
+ /* block sizes */
+ Asz = mnp * cnp;
+ Bsz = mnp * pnp;
+ ABsz = Asz + Bsz;
+ Usz = cnp * cnp;
+ Vsz = pnp * pnp;
+ Wsz = cnp * pnp;
+ Ysz = cnp * pnp;
+ esz = mnp;
+ easz = cnp;
+ ebsz = pnp;
+ YWtsz = cnp * cnp;
+ Wtdasz = pnp;
+ Sblsz = cnp * cnp;
+ Sdim = mmcon * cnp;
+ covsz = mnp * mnp;
+
+ /* count total number of visible image points */
+ for (i = nvis = 0, jj = n * m; i < jj; ++i)
+ nvis += (vmask[i] != 0);
+
+ nobs = nvis * mnp;
+ nvars = m * cnp + n * pnp;
+ if (nobs < nvars) {
+ fprintf(stderr,
+ "SBA: sba_motstr_levmar_x() cannot solve a problem with fewer measurements [%d] than unknowns [%d]\n",
+ nobs, nvars);
+ return SBA_ERROR;
+ }
+
+ /* allocate & fill up the idxij structure */
+ sba_crsm_alloc(&idxij, n, m, nvis);
+ for (i = k = 0; i < n; ++i) {
+ idxij.rowptr[i] = k;
+ ii = i * m;
+ for (j = 0; j < m; ++j)
+ if (vmask[ii + j]) {
+ idxij.val[k] = k;
+ idxij.colidx[k++] = j;
+ }
+ }
+ idxij.rowptr[n] = nvis;
+
+ /* find the maximum number (for all cameras) of visible image projections coming from a single 3D point */
+ for (i = maxCvis = 0; i < n; ++i)
+ if ((k = idxij.rowptr[i + 1] - idxij.rowptr[i]) > maxCvis)
+ maxCvis = k;
+
+ /* find the maximum number (for all points) of visible image projections in any single camera */
+ for (j = maxPvis = 0; j < m; ++j) {
+ for (i = ii = 0; i < n; ++i)
+ if (vmask[i * m + j])
+ ++ii;
+ if (ii > maxPvis)
+ maxPvis = ii;
+ }
+ maxCPvis = (maxCvis >= maxPvis) ? maxCvis : maxPvis;
+
+#if 0
+ /* determine the density of blocks in matrix S */
+ for(j=mcon, ii=0; j<m; ++j){
+ ++ii; /* block Sjj is surely nonzero */
+ for(k=j+1; k<m; ++k)
+ if(sba_crsm_common_row(&idxij, j, k)) ii+=2; /* blocks Sjk & Skj are nonzero */
+ }
+ printf("\nS block density: %.5g\n", ((double)ii)/(mmcon*mmcon)); fflush(stdout);
+#endif
+
+ /* allocate work arrays */
+ /* W is big enough to hold both jac & W. Note also the extra Wsz, see the initialization of jac below for
+ * explanation */
+ W = (double *)emalloc((nvis * ((Wsz >= ABsz) ? Wsz : ABsz) + Wsz) * sizeof(double));
+ U = (double *)emalloc(m * Usz * sizeof(double));
+ V = (double *)emalloc(n * Vsz * sizeof(double));
+ e = (double *)emalloc(nobs * sizeof(double));
+ eab = (double *)emalloc(nvars * sizeof(double));
+ E = (double *)emalloc(m * cnp * sizeof(double));
+ Yj = (double *)emalloc(maxPvis * Ysz * sizeof(double));
+ YWt = (double *)emalloc(YWtsz * sizeof(double));
+ S = (double *)emalloc(m * m * Sblsz * sizeof(double));
+ dp = (double *)emalloc(nvars * sizeof(double));
+ Wtda = (double *)emalloc(pnp * sizeof(double));
+ rcidxs = (int *)emalloc(maxCPvis * sizeof(int));
+ rcsubs = (int *)emalloc(maxCPvis * sizeof(int));
+#ifndef SBA_DESTROY_COVS
+ if (covx != NULL)
+ wght = (double *)emalloc(nvis * covsz * sizeof(double));
+#else
+ if (covx != NULL)
+ wght = covx;
+#endif /* SBA_DESTROY_COVS */
+
+ hx = (double *)emalloc(nobs * sizeof(double));
+ diagUV = (double *)emalloc(nvars * sizeof(double));
+ pdp = (double *)emalloc(nvars * sizeof(double));
+
+ /* to save resources, W and jac share the same memory: First, the jacobian
+ * is computed in some working memory that is then overwritten during the
+ * computation of W. To account for the case of W being larger than jac,
+ * extra memory is reserved "before" jac.
+ * Care must be taken, however, to ensure that storing a certain W_ij
+ * does not overwrite the A_ij, B_ij used to compute it. To achieve
+ * this is, note that if p1 and p2 respectively point to the first elements
+ * of a certain W_ij and A_ij, B_ij pair, we should have p2-p1>=Wsz.
+ * There are two cases:
+ * a) Wsz>=ABsz: Then p1=W+k*Wsz and p2=jac+k*ABsz=W+Wsz+nvis*(Wsz-ABsz)+k*ABsz
+ * for some k (0<=k<nvis), thus p2-p1=(nvis-k)*(Wsz-ABsz)+Wsz.
+ * The right side of the last equation is obviously > Wsz for all 0<=k<nvis
+ *
+ * b) Wsz<ABsz: Then p1=W+k*Wsz and p2=jac+k*ABsz=W+Wsz+k*ABsz and
+ * p2-p1=Wsz+k*(ABsz-Wsz), which is again > Wsz for all 0<=k<nvis
+ *
+ * In conclusion, if jac is initialized as below, the memory allocated to all
+ * W_ij is guaranteed not to overlap with that allocated to their corresponding
+ * A_ij, B_ij pairs
+ */
+ jac = W + Wsz + ((Wsz > ABsz) ? nvis * (Wsz - ABsz) : 0);
+
+ /* set up auxiliary pointers */
+ pa = p;
+ pb = p + m * cnp;
+ ea = eab;
+ eb = eab + m * cnp;
+ dpa = dp;
+ dpb = dp + m * cnp;
+
+ diagU = diagUV;
+ diagV = diagUV + m * cnp;
+
+ /* if no jacobian function is supplied, prepare to compute jacobian with finite difference */
+ if (!fjac) {
+ fdj_data.func = func;
+ fdj_data.cnp = cnp;
+ fdj_data.pnp = pnp;
+ fdj_data.mnp = mnp;
+ fdj_data.hx = hx;
+ fdj_data.hxx = (double *)emalloc(nobs * sizeof(double));
+ fdj_data.func_rcidxs = (int *)emalloc(2 * maxCPvis * sizeof(int));
+ fdj_data.func_rcsubs = fdj_data.func_rcidxs + maxCPvis;
+ fdj_data.adata = adata;
+
+ fjac = sba_fdjac_x;
+ jac_adata = (void *)&fdj_data;
+ } else {
+ fdj_data.hxx = NULL;
+ jac_adata = adata;
+ }
+
+ if (itmax == 0) { /* verify jacobian */
+ sba_motstr_chkjac_x(func, fjac, p, &idxij, rcidxs, rcsubs, ncon, mcon, cnp, pnp, mnp, adata, jac_adata);
+ retval = 0;
+ goto freemem_and_return;
+ }
+
+ /* covariances Sigma_x_ij are accommodated by computing the Cholesky decompositions of their
+ * inverses and using the resulting matrices w_x_ij to weigh A_ij, B_ij, and e_ij as w_x_ij A_ij,
+ * w_x_ij*B_ij and w_x_ij*e_ij. In this way, auxiliary variables as U_j=\sum_i A_ij^T A_ij
+ * actually become \sum_i (w_x_ij A_ij)^T w_x_ij A_ij= \sum_i A_ij^T w_x_ij^T w_x_ij A_ij =
+ * A_ij^T Sigma_x_ij^-1 A_ij
+ *
+ * ea_j, V_i, eb_i, W_ij are weighted in a similar manner
+ */
+ if (covx != NULL) {
+ for (i = 0; i < n; ++i) {
+ nnz = sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero x_ij, j=0...m-1 */
+ for (j = 0; j < nnz; ++j) {
+ /* set ptr1, ptr2 to point to cov_x_ij, w_x_ij resp. */
+ ptr1 = covx + (k = idxij.val[rcidxs[j]] * covsz);
+ ptr2 = wght + k;
+ if (!sba_mat_cholinv(ptr1, ptr2, mnp)) { /* compute w_x_ij s.t. w_x_ij^T w_x_ij = cov_x_ij^-1 */
+ fprintf(stderr, "SBA: invalid covariance matrix for x_ij (i=%d, j=%d) in sba_motstr_levmar_x()\n",
+ i, rcsubs[j]);
+ retval = SBA_ERROR;
+ goto freemem_and_return;
+ }
+ }
+ }
+ sba_mat_cholinv(NULL, NULL, 0); /* cleanup */
+ }
+
+ /* compute the error vectors e_ij in hx */
+ (*func)(p, &idxij, rcidxs, rcsubs, hx, adata);
+ nfev = 1;
+ /* ### compute e=x - f(p) [e=w*(x - f(p)] and its L2 norm */
+ if (covx == NULL)
+ p_eL2 = nrmL2xmy(e, x, hx, nobs); /* e=x-hx, p_eL2=||e|| */
+ else
+ p_eL2 = nrmCxmy(e, x, hx, wght, mnp, nvis); /* e=wght*(x-hx), p_eL2=||e||=||x-hx||_Sigma^-1 */
+ if (verbose)
+ printf("initial motstr-SBA error %g [%g]\n", p_eL2, p_eL2 / nvis);
+ init_p_eL2 = p_eL2;
+ if (!SBA_FINITE(p_eL2))
+ stop = 7;
+
+ for (itno = 0; itno < itmax && !stop; ++itno) {
+ /* Note that p, e and ||e||_2 have been updated at the previous iteration */
+
+ /* compute derivative submatrices A_ij, B_ij */
+ (*fjac)(p, &idxij, rcidxs, rcsubs, jac, jac_adata);
+ ++njev;
+
+ if (covx != NULL) {
+ /* compute w_x_ij A_ij and w_x_ij B_ij.
+ * Since w_x_ij is upper triangular, the products can be safely saved
+ * directly in A_ij, B_ij, without the need for intermediate storage
+ */
+ for (i = 0; i < nvis; ++i) {
+ /* set ptr1, ptr2, ptr3 to point to w_x_ij, A_ij, B_ij, resp. */
+ ptr1 = wght + i * covsz;
+ ptr2 = jac + i * ABsz;
+ ptr3 = ptr2 + Asz; // ptr3=jac + i*ABsz + Asz;
+
+ /* w_x_ij is mnp x mnp, A_ij is mnp x cnp, B_ij is mnp x pnp
+ * NOTE: Jamming the outter (i.e., ii) loops did not run faster!
+ */
+ /* A_ij */
+ for (ii = 0; ii < mnp; ++ii)
+ for (jj = 0; jj < cnp; ++jj) {
+ for (k = ii, sum = 0.0; k < mnp; ++k) // k>=ii since w_x_ij is upper triangular
+ sum += ptr1[ii * mnp + k] * ptr2[k * cnp + jj];
+ ptr2[ii * cnp + jj] = sum;
+ }
+
+ /* B_ij */
+ for (ii = 0; ii < mnp; ++ii)
+ for (jj = 0; jj < pnp; ++jj) {
+ for (k = ii, sum = 0.0; k < mnp; ++k) // k>=ii since w_x_ij is upper triangular
+ sum += ptr1[ii * mnp + k] * ptr3[k * pnp + jj];
+ ptr3[ii * pnp + jj] = sum;
+ }
+ }
+ }
+
+ /* compute U_j = \sum_i A_ij^T A_ij */ // \Sigma here!
+ /* U_j is symmetric, therefore its computation can be sped up by
+ * computing only the upper part and then reusing it for the lower one.
+ * Recall that A_ij is mnp x cnp
+ */
+ /* Also compute ea_j = \sum_i A_ij^T e_ij */ // \Sigma here!
+ /* Recall that e_ij is mnp x 1
+ */
+ _dblzero(U, m * Usz); /* clear all U_j */
+ _dblzero(ea, m * easz); /* clear all ea_j */
+ for (j = mcon; j < m; ++j) {
+ ptr1 = U + j * Usz; // set ptr1 to point to U_j
+ ptr2 = ea + j * easz; // set ptr2 to point to ea_j
+
+ nnz = sba_crsm_col_elmidxs(&idxij, j, rcidxs, rcsubs); /* find nonzero A_ij, i=0...n-1 */
+ for (i = 0; i < nnz; ++i) {
+ /* set ptr3 to point to A_ij, actual row number in rcsubs[i] */
+ ptr3 = jac + idxij.val[rcidxs[i]] * ABsz;
+
+ /* compute the UPPER TRIANGULAR PART of A_ij^T A_ij and add it to U_j */
+ for (ii = 0; ii < cnp; ++ii) {
+ for (jj = ii; jj < cnp; ++jj) {
+ for (k = 0, sum = 0.0; k < mnp; ++k)
+ sum += ptr3[k * cnp + ii] * ptr3[k * cnp + jj];
+ ptr1[ii * cnp + jj] += sum;
+ }
+
+ /* copy the LOWER TRIANGULAR PART of U_j from the upper one */
+ for (jj = 0; jj < ii; ++jj)
+ ptr1[ii * cnp + jj] = ptr1[jj * cnp + ii];
+ }
+
+ ptr4 = e + idxij.val[rcidxs[i]] * esz; /* set ptr4 to point to e_ij */
+ /* compute A_ij^T e_ij and add it to ea_j */
+ for (ii = 0; ii < cnp; ++ii) {
+ for (jj = 0, sum = 0.0; jj < mnp; ++jj)
+ sum += ptr3[jj * cnp + ii] * ptr4[jj];
+ ptr2[ii] += sum;
+ }
+ }
+ }
+
+ /* compute V_i = \sum_j B_ij^T B_ij */ // \Sigma here!
+ /* V_i is symmetric, therefore its computation can be sped up by
+ * computing only the upper part and then reusing it for the lower one.
+ * Recall that B_ij is mnp x pnp
+ */
+ /* Also compute eb_i = \sum_j B_ij^T e_ij */ // \Sigma here!
+ /* Recall that e_ij is mnp x 1
+ */
+ _dblzero(V, n * Vsz); /* clear all V_i */
+ _dblzero(eb, n * ebsz); /* clear all eb_i */
+ for (i = ncon; i < n; ++i) {
+ ptr1 = V + i * Vsz; // set ptr1 to point to V_i
+ ptr2 = eb + i * ebsz; // set ptr2 to point to eb_i
+
+ nnz = sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero B_ij, j=0...m-1 */
+ for (j = 0; j < nnz; ++j) {
+ /* set ptr3 to point to B_ij, actual column number in rcsubs[j] */
+ ptr3 = jac + idxij.val[rcidxs[j]] * ABsz + Asz;
+
+ /* compute the UPPER TRIANGULAR PART of B_ij^T B_ij and add it to V_i */
+ for (ii = 0; ii < pnp; ++ii) {
+ for (jj = ii; jj < pnp; ++jj) {
+ for (k = 0, sum = 0.0; k < mnp; ++k)
+ sum += ptr3[k * pnp + ii] * ptr3[k * pnp + jj];
+ ptr1[ii * pnp + jj] += sum;
+ }
+ }
+
+ ptr4 = e + idxij.val[rcidxs[j]] * esz; /* set ptr4 to point to e_ij */
+ /* compute B_ij^T e_ij and add it to eb_i */
+ for (ii = 0; ii < pnp; ++ii) {
+ for (jj = 0, sum = 0.0; jj < mnp; ++jj)
+ sum += ptr3[jj * pnp + ii] * ptr4[jj];
+ ptr2[ii] += sum;
+ }
+ }
+ }
+
+ /* compute W_ij = A_ij^T B_ij */ // \Sigma here!
+ /* Recall that A_ij is mnp x cnp and B_ij is mnp x pnp
+ */
+ for (i = ncon; i < n; ++i) {
+ nnz = sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero W_ij, j=0...m-1 */
+ for (j = 0; j < nnz; ++j) {
+ /* set ptr1 to point to W_ij, actual column number in rcsubs[j] */
+ ptr1 = W + idxij.val[rcidxs[j]] * Wsz;
+
+ if (rcsubs[j] < mcon) { /* A_ij is zero */
+ //_dblzero(ptr1, Wsz); /* clear W_ij */
+ continue;
+ }
+
+ /* set ptr2 & ptr3 to point to A_ij & B_ij resp. */
+ ptr2 = jac + idxij.val[rcidxs[j]] * ABsz;
+ ptr3 = ptr2 + Asz;
+ /* compute A_ij^T B_ij and store it in W_ij
+ * Recall that storage for A_ij, B_ij does not overlap with that for W_ij,
+ * see the comments related to the initialization of jac above
+ */
+ /* assert(ptr2-ptr1>=Wsz); */
+ for (ii = 0; ii < cnp; ++ii)
+ for (jj = 0; jj < pnp; ++jj) {
+ for (k = 0, sum = 0.0; k < mnp; ++k)
+ sum += ptr2[k * cnp + ii] * ptr3[k * pnp + jj];
+ ptr1[ii * pnp + jj] = sum;
+ }
+ }
+ }
+
+ /* Compute ||J^T e||_inf and ||p||^2 */
+ for (i = 0, p_L2 = eab_inf = 0.0; i < nvars; ++i) {
+ if (eab_inf < (tmp = FABS(eab[i])))
+ eab_inf = tmp;
+ p_L2 += p[i] * p[i];
+ }
+ // p_L2=sqrt(p_L2);
+
+ /* save diagonal entries so that augmentation can be later canceled.
+ * Diagonal entries are in U_j and V_i
+ */
+ for (j = mcon; j < m; ++j) {
+ ptr1 = U + j * Usz; // set ptr1 to point to U_j
+ ptr2 = diagU + j * cnp; // set ptr2 to point to diagU_j
+ for (i = 0; i < cnp; ++i)
+ ptr2[i] = ptr1[i * cnp + i];
+ }
+ for (i = ncon; i < n; ++i) {
+ ptr1 = V + i * Vsz; // set ptr1 to point to V_i
+ ptr2 = diagV + i * pnp; // set ptr2 to point to diagV_i
+ for (j = 0; j < pnp; ++j)
+ ptr2[j] = ptr1[j * pnp + j];
+ }
+
+ /*
+ if(!(itno%100)){
+ printf("Current estimate: ");
+ for(i=0; i<nvars; ++i)
+ printf("%.9g ", p[i]);
+ printf("-- errors %.9g %0.9g\n", eab_inf, p_eL2);
+ }
+ */
+
+ /* check for convergence */
+ if ((eab_inf <= eps1)) {
+ dp_L2 = 0.0; /* no increment for p in this case */
+ stop = 1;
+ break;
+ }
+
+ /* compute initial damping factor */
+ if (itno == 0) {
+ /* find max diagonal element */
+ for (i = mcon * cnp, tmp = DBL_MIN; i < m * cnp; ++i)
+ if (diagUV[i] > tmp)
+ tmp = diagUV[i];
+ for (i = m * cnp + ncon * pnp; i < nvars; ++i) /* tmp is not re-initialized! */
+ if (diagUV[i] > tmp)
+ tmp = diagUV[i];
+ mu = tau * tmp;
+ }
+
+ /* determine increment using adaptive damping */
+ while (1) {
+ /* augment U, V */
+ for (j = mcon; j < m; ++j) {
+ ptr1 = U + j * Usz; // set ptr1 to point to U_j
+ for (i = 0; i < cnp; ++i)
+ ptr1[i * cnp + i] += mu;
+ }
+ for (i = ncon; i < n; ++i) {
+ ptr1 = V + i * Vsz; // set ptr1 to point to V_i
+ for (j = 0; j < pnp; ++j)
+ ptr1[j * pnp + j] += mu;
+
+ /* compute V*_i^-1.
+ * Recall that only the upper triangle of the symmetric pnp x pnp matrix V*_i
+ * is stored in ptr1; its (also symmetric) inverse is saved in the lower triangle of ptr1
+ */
+ /* inverting V*_i with LDLT seems to result in faster overall execution compared to when using LU or
+ * Cholesky */
+ // j=sba_symat_invert_LU(ptr1, pnp); matinv=sba_symat_invert_LU;
+ // j=sba_symat_invert_Chol(ptr1, pnp); matinv=sba_symat_invert_Chol;
+ j = sba_symat_invert_BK(ptr1, pnp);
+ matinv = sba_symat_invert_BK;
+ if (!j) {
+ fprintf(stderr, "SBA: singular matrix V*_i (i=%d) in sba_motstr_levmar_x(), increasing damping\n",
+ i);
+ goto moredamping; // increasing damping will eventually make V*_i diagonally dominant, thus
+ // nonsingular
+ // retval=SBA_ERROR;
+ // goto freemem_and_return;
+ }
+ }
+
+ _dblzero(E, m * easz); /* clear all e_j */
+ /* compute the mmcon x mmcon block matrix S and e_j */
+
+ /* Note that S is symmetric, therefore its computation can be
+ * sped up by computing only the upper part and then reusing
+ * it for the lower one.
+ */
+ for (j = mcon; j < m; ++j) {
+ int mmconxUsz = mmcon * Usz;
+
+ nnz = sba_crsm_col_elmidxs(&idxij, j, rcidxs, rcsubs); /* find nonzero Y_ij, i=0...n-1 */
+
+ /* get rid of all Y_ij with i<ncon that are treated as zeros.
+ * In this way, all rcsubs[i] below are guaranteed to be >= ncon
+ */
+ if (ncon) {
+ for (i = ii = 0; i < nnz; ++i) {
+ if (rcsubs[i] >= ncon) {
+ rcidxs[ii] = rcidxs[i];
+ rcsubs[ii++] = rcsubs[i];
+ }
+ }
+ nnz = ii;
+ }
+
+ /* compute all Y_ij = W_ij (V*_i)^-1 for a *fixed* j.
+ * To save memory, the block matrix consisting of the Y_ij
+ * is not stored. Instead, only a block column of this matrix
+ * is computed & used at each time: For each j, all nonzero
+ * Y_ij are computed in Yj and then used in the calculations
+ * involving S_jk and e_j.
+ * Recall that W_ij is cnp x pnp and (V*_i) is pnp x pnp
+ */
+ for (i = 0; i < nnz; ++i) {
+ /* set ptr3 to point to (V*_i)^-1, actual row number in rcsubs[i] */
+ ptr3 = V + rcsubs[i] * Vsz;
+
+ /* set ptr1 to point to Y_ij, actual row number in rcsubs[i] */
+ ptr1 = Yj + i * Ysz;
+ /* set ptr2 to point to W_ij resp. */
+ ptr2 = W + idxij.val[rcidxs[i]] * Wsz;
+ /* compute W_ij (V*_i)^-1 and store it in Y_ij.
+ * Recall that only the lower triangle of (V*_i)^-1 is stored
+ */
+ for (ii = 0; ii < cnp; ++ii) {
+ ptr4 = ptr2 + ii * pnp;
+ for (jj = 0; jj < pnp; ++jj) {
+ for (k = 0, sum = 0.0; k <= jj; ++k)
+ sum += ptr4[k] * ptr3[jj * pnp + k]; // ptr2[ii*pnp+k]*ptr3[jj*pnp+k];
+ for (; k < pnp; ++k)
+ sum += ptr4[k] * ptr3[k * pnp + jj]; // ptr2[ii*pnp+k]*ptr3[k*pnp+jj];
+ ptr1[ii * pnp + jj] = sum;
+ }
+ }
+ }
+
+ /* compute the UPPER TRIANGULAR PART of S */
+ for (k = j; k < m; ++k) { // j>=mcon
+ /* compute \sum_i Y_ij W_ik^T in YWt. Note that for an off-diagonal block defined by j, k
+ * YWt (and thus S_jk) is nonzero only if there exists a point that is visible in both the
+ * j-th and k-th images
+ */
+
+ /* Recall that Y_ij is cnp x pnp and W_ik is cnp x pnp */
+ _dblzero(YWt, YWtsz); /* clear YWt */
+
+ for (i = 0; i < nnz; ++i) {
+ register double *pYWt;
+
+ /* find the min and max column indices of the elements in row i (actually rcsubs[i])
+ * and make sure that k falls within them. This test handles W_ik's which are
+ * certain to be zero without bothering to call sba_crsm_elmidx()
+ */
+ ii = idxij.colidx[idxij.rowptr[rcsubs[i]]];
+ jj = idxij.colidx[idxij.rowptr[rcsubs[i] + 1] - 1];
+ if (k < ii || k > jj)
+ continue; /* W_ik == 0 */
+
+ /* set ptr2 to point to W_ik */
+ l = sba_crsm_elmidxp(&idxij, rcsubs[i], k, j, rcidxs[i]);
+ // l=sba_crsm_elmidx(&idxij, rcsubs[i], k);
+ if (l == -1)
+ continue; /* W_ik == 0 */
+
+ ptr2 = W + idxij.val[l] * Wsz;
+ /* set ptr1 to point to Y_ij, actual row number in rcsubs[i] */
+ ptr1 = Yj + i * Ysz;
+ for (ii = 0; ii < cnp; ++ii) {
+ ptr3 = ptr1 + ii * pnp;
+ pYWt = YWt + ii * cnp;
+
+ for (jj = 0; jj < cnp; ++jj) {
+ ptr4 = ptr2 + jj * pnp;
+ for (l = 0, sum = 0.0; l < pnp; ++l)
+ sum += ptr3[l] * ptr4[l]; // ptr1[ii*pnp+l]*ptr2[jj*pnp+l];
+ pYWt[jj] += sum; // YWt[ii*cnp+jj]+=sum;
+ }
+ }
+ }
+
+/* since the linear system involving S is solved with lapack,
+ * it is preferable to store S in column major (i.e. fortran)
+ * order, so as to avoid unecessary transposing/copying.
+*/
+#if MAT_STORAGE == COLUMN_MAJOR
+ ptr2 = S + (k - mcon) * mmconxUsz +
+ (j - mcon) * cnp; // set ptr2 to point to the beginning of block j,k in S
+#else
+ ptr2 = S + (j - mcon) * mmconxUsz +
+ (k - mcon) * cnp; // set ptr2 to point to the beginning of block j,k in S
+#endif
+
+ if (j != k) { /* Kronecker */
+ for (ii = 0; ii < cnp; ++ii, ptr2 += Sdim)
+ for (jj = 0; jj < cnp; ++jj)
+ ptr2[jj] =
+#if MAT_STORAGE == COLUMN_MAJOR
+ -YWt[jj * cnp + ii];
+#else
+ -YWt[ii * cnp + jj];
+#endif
+ } else {
+ ptr1 = U + j * Usz; // set ptr1 to point to U_j
+
+ for (ii = 0; ii < cnp; ++ii, ptr2 += Sdim)
+ for (jj = 0; jj < cnp; ++jj)
+ ptr2[jj] =
+#if MAT_STORAGE == COLUMN_MAJOR
+ ptr1[jj * cnp + ii] - YWt[jj * cnp + ii];
+#else
+ ptr1[ii * cnp + jj] - YWt[ii * cnp + jj];
+#endif
+ }
+ }
+
+ /* copy the LOWER TRIANGULAR PART of S from the upper one */
+ for (k = mcon; k < j; ++k) {
+#if MAT_STORAGE == COLUMN_MAJOR
+ ptr1 = S + (k - mcon) * mmconxUsz +
+ (j - mcon) * cnp; // set ptr1 to point to the beginning of block j,k in S
+ ptr2 = S + (j - mcon) * mmconxUsz +
+ (k - mcon) * cnp; // set ptr2 to point to the beginning of block k,j in S
+#else
+ ptr1 = S + (j - mcon) * mmconxUsz +
+ (k - mcon) * cnp; // set ptr1 to point to the beginning of block j,k in S
+ ptr2 = S + (k - mcon) * mmconxUsz +
+ (j - mcon) * cnp; // set ptr2 to point to the beginning of block k,j in S
+#endif
+ for (ii = 0; ii < cnp; ++ii, ptr1 += Sdim)
+ for (jj = 0, ptr3 = ptr2 + ii; jj < cnp; ++jj, ptr3 += Sdim)
+ ptr1[jj] = *ptr3;
+ }
+
+ /* compute e_j=ea_j - \sum_i Y_ij eb_i */
+ /* Recall that Y_ij is cnp x pnp and eb_i is pnp x 1 */
+ ptr1 = E + j * easz; // set ptr1 to point to e_j
+
+ for (i = 0; i < nnz; ++i) {
+ /* set ptr2 to point to Y_ij, actual row number in rcsubs[i] */
+ ptr2 = Yj + i * Ysz;
+
+ /* set ptr3 to point to eb_i */
+ ptr3 = eb + rcsubs[i] * ebsz;
+ for (ii = 0; ii < cnp; ++ii) {
+ ptr4 = ptr2 + ii * pnp;
+ for (jj = 0, sum = 0.0; jj < pnp; ++jj)
+ sum += ptr4[jj] * ptr3[jj]; // ptr2[ii*pnp+jj]*ptr3[jj];
+ ptr1[ii] += sum;
+ }
+ }
+
+ ptr2 = ea + j * easz; // set ptr2 to point to ea_j
+ for (i = 0; i < easz; ++i)
+ ptr1[i] = ptr2[i] - ptr1[i];
+ }
+
+#if 0
+ if(verbose>1){ /* count the nonzeros in S */
+ for(i=ii=0; i<Sdim*Sdim; ++i)
+ if(S[i]!=0.0) ++ii;
+ printf("\nS density: %.5g\n", ((double)ii)/(Sdim*Sdim)); fflush(stdout);
+ }
+#endif
+
+ /* solve the linear system S dpa = E to compute the da_j.
+ *
+ * Note that if MAT_STORAGE==1 S is modified in the following call;
+ * this is OK since S is recomputed for each iteration
+ */
+ // issolved=sba_Axb_LU(S, E+mcon*cnp, dpa+mcon*cnp, Sdim, MAT_STORAGE); linsolver=sba_Axb_LU;
+ issolved = sba_Axb_Chol(S, E + mcon * cnp, dpa + mcon * cnp, Sdim, MAT_STORAGE);
+ linsolver = sba_Axb_Chol;
+ // issolved=sba_Axb_BK(S, E+mcon*cnp, dpa+mcon*cnp, Sdim, MAT_STORAGE); linsolver=sba_Axb_BK;
+ // issolved=sba_Axb_QRnoQ(S, E+mcon*cnp, dpa+mcon*cnp, Sdim, MAT_STORAGE); linsolver=sba_Axb_QRnoQ;
+ // issolved=sba_Axb_QR(S, E+mcon*cnp, dpa+mcon*cnp, Sdim, MAT_STORAGE); linsolver=sba_Axb_QR;
+ // issolved=sba_Axb_SVD(S, E+mcon*cnp, dpa+mcon*cnp, Sdim, MAT_STORAGE); linsolver=sba_Axb_SVD;
+ // issolved=sba_Axb_CG(S, E+mcon*cnp, dpa+mcon*cnp, Sdim, (3*Sdim)/2, 1E-10, SBA_CG_JACOBI, MAT_STORAGE);
+ // linsolver=(PLS)sba_Axb_CG;
+
+ ++nlss;
+
+ _dblzero(dpa, mcon * cnp); /* no change for the first mcon camera params */
+
+ if (issolved) {
+
+ /* compute the db_i */
+ for (i = ncon; i < n; ++i) {
+ ptr1 = dpb + i * ebsz; // set ptr1 to point to db_i
+
+ /* compute \sum_j W_ij^T da_j */
+ /* Recall that W_ij is cnp x pnp and da_j is cnp x 1 */
+ _dblzero(Wtda, Wtdasz); /* clear Wtda */
+ nnz = sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero W_ij, j=0...m-1 */
+ for (j = 0; j < nnz; ++j) {
+ /* set ptr2 to point to W_ij, actual column number in rcsubs[j] */
+ if (rcsubs[j] < mcon)
+ continue; /* W_ij is zero */
+
+ ptr2 = W + idxij.val[rcidxs[j]] * Wsz;
+
+ /* set ptr3 to point to da_j */
+ ptr3 = dpa + rcsubs[j] * cnp;
+
+ for (ii = 0; ii < pnp; ++ii) {
+ ptr4 = ptr2 + ii;
+ for (jj = 0, sum = 0.0; jj < cnp; ++jj)
+ sum += ptr4[jj * pnp] * ptr3[jj]; // ptr2[jj*pnp+ii]*ptr3[jj];
+ Wtda[ii] += sum;
+ }
+ }
+
+ /* compute eb_i - \sum_j W_ij^T da_j = eb_i - Wtda in Wtda */
+ ptr2 = eb + i * ebsz; // set ptr2 to point to eb_i
+ for (ii = 0; ii < pnp; ++ii)
+ Wtda[ii] = ptr2[ii] - Wtda[ii];
+
+ /* compute the product (V*_i)^-1 Wtda = (V*_i)^-1 (eb_i - \sum_j W_ij^T da_j).
+ * Recall that only the lower triangle of (V*_i)^-1 is stored
+ */
+ ptr2 = V + i * Vsz; // set ptr2 to point to (V*_i)^-1
+ for (ii = 0; ii < pnp; ++ii) {
+ for (jj = 0, sum = 0.0; jj <= ii; ++jj)
+ sum += ptr2[ii * pnp + jj] * Wtda[jj];
+ for (; jj < pnp; ++jj)
+ sum += ptr2[jj * pnp + ii] * Wtda[jj];
+ ptr1[ii] = sum;
+ }
+ }
+ _dblzero(dpb, ncon * pnp); /* no change for the first ncon point params */
+
+ /* parameter vector updates are now in dpa, dpb */
+
+ /* compute p's new estimate and ||dp||^2 */
+ for (i = 0, dp_L2 = 0.0; i < nvars; ++i) {
+ pdp[i] = p[i] + (tmp = dp[i]);
+ dp_L2 += tmp * tmp;
+ }
+ // dp_L2=sqrt(dp_L2);
+
+ if (dp_L2 <= eps2_sq * p_L2) { /* relative change in p is small, stop */
+ // if(dp_L2<=eps2*(p_L2 + eps2)){ /* relative change in p is small, stop */
+ stop = 2;
+ break;
+ }
+
+ if (dp_L2 >= (p_L2 + eps2) / SBA_EPSILON_SQ) { /* almost singular */
+ // if(dp_L2>=(p_L2+eps2)/SBA_EPSILON){ /* almost singular */
+ fprintf(stderr, "SBA: the matrix of the augmented normal equations is almost singular in "
+ "sba_motstr_levmar_x(),\n"
+ " minimization should be restarted from the current solution with an increased "
+ "damping term\n");
+ retval = SBA_ERROR;
+ goto freemem_and_return;
+ }
+
+ (*func)(pdp, &idxij, rcidxs, rcsubs, hx, adata);
+ ++nfev; /* evaluate function at p + dp */
+ if (verbose > 1)
+ printf("mean reprojection error %g\n", sba_mean_repr_error(n, mnp, x, hx, &idxij, rcidxs, rcsubs));
+ /* ### compute ||e(pdp)||_2 */
+ if (covx == NULL)
+ pdp_eL2 = nrmL2xmy(hx, x, hx, nobs); /* hx=x-hx, pdp_eL2=||hx|| */
+ else
+ pdp_eL2 = nrmCxmy(hx, x, hx, wght, mnp, nvis); /* hx=wght*(x-hx), pdp_eL2=||hx|| */
+ if (!SBA_FINITE(pdp_eL2)) {
+ if (verbose) /* identify the offending point projection */
+ sba_print_inf(hx, m, mnp, &idxij, rcidxs, rcsubs);
+
+ stop = 7;
+ break;
+ }
+
+ for (i = 0, dL = 0.0; i < nvars; ++i)
+ dL += dp[i] * (mu * dp[i] + eab[i]);
+
+ dF = p_eL2 - pdp_eL2;
+
+ if (verbose > 1)
+ printf("\ndamping term %8g, gain ratio %8g, errors %8g / %8g = %g\n", mu,
+ dL != 0.0 ? dF / dL : dF / DBL_EPSILON, p_eL2 / nvis, pdp_eL2 / nvis, p_eL2 / pdp_eL2);
+
+ if (dL > 0.0 && dF > 0.0) { /* reduction in error, increment is accepted */
+ tmp = (2.0 * dF / dL - 1.0);
+ tmp = 1.0 - tmp * tmp * tmp;
+ mu = mu * ((tmp >= SBA_ONE_THIRD) ? tmp : SBA_ONE_THIRD);
+ nu = 2;
+
+ /* the test below is equivalent to the relative reduction of the RMS reprojection error:
+ * sqrt(p_eL2)-sqrt(pdp_eL2)<eps4*sqrt(p_eL2) */
+ if (pdp_eL2 - 2.0 * sqrt(p_eL2 * pdp_eL2) < (eps4_sq - 1.0) * p_eL2)
+ stop = 4;
+
+ for (i = 0; i < nvars; ++i) /* update p's estimate */
+ p[i] = pdp[i];
+
+ for (i = 0; i < nobs; ++i) /* update e and ||e||_2 */
+ e[i] = hx[i];
+ p_eL2 = pdp_eL2;
+ break;
+ }
+ } /* issolved */
+
+ moredamping:
+ /* if this point is reached (also via an explicit goto!), either the linear system could
+ * not be solved or the error did not reduce; in any case, the increment must be rejected
+ */
+
+ mu *= nu;
+ nu2 = nu << 1; // 2*nu;
+ if (nu2 <= nu) { /* nu has wrapped around (overflown) */
+ fprintf(stderr, "SBA: too many failed attempts to increase the damping factor in "
+ "sba_motstr_levmar_x()! Singular Hessian matrix?\n");
+ // exit(1);
+ stop = 6;
+ break;
+ }
+ nu = nu2;
+
+#if 0
+ /* restore U, V diagonal entries */
+ for(j=mcon; j<m; ++j){
+ ptr1=U + j*Usz; // set ptr1 to point to U_j
+ ptr2=diagU + j*cnp; // set ptr2 to point to diagU_j
+ for(i=0; i<cnp; ++i)
+ ptr1[i*cnp+i]=ptr2[i];
+ }
+ for(i=ncon; i<n; ++i){
+ ptr1=V + i*Vsz; // set ptr1 to point to V_i
+ ptr2=diagV + i*pnp; // set ptr2 to point to diagV_i
+ for(j=0; j<pnp; ++j)
+ ptr1[j*pnp+j]=ptr2[j];
+ }
+#endif
+ } /* inner while loop */
+
+ if (p_eL2 <= eps3_sq)
+ stop = 5; // error is small, force termination of outer loop
+ }
+
+ if (itno >= itmax)
+ stop = 3;
+
+ /* restore U, V diagonal entries */
+ for (j = mcon; j < m; ++j) {
+ ptr1 = U + j * Usz; // set ptr1 to point to U_j
+ ptr2 = diagU + j * cnp; // set ptr2 to point to diagU_j
+ for (i = 0; i < cnp; ++i)
+ ptr1[i * cnp + i] = ptr2[i];
+ }
+ for (i = ncon; i < n; ++i) {
+ ptr1 = V + i * Vsz; // set ptr1 to point to V_i
+ ptr2 = diagV + i * pnp; // set ptr2 to point to diagV_i
+ for (j = 0; j < pnp; ++j)
+ ptr1[j * pnp + j] = ptr2[j];
+ }
+
+ if (info) {
+ info[0] = init_p_eL2;
+ info[1] = p_eL2;
+ info[2] = eab_inf;
+ info[3] = dp_L2;
+ for (j = mcon, tmp = DBL_MIN; j < m; ++j) {
+ ptr1 = U + j * Usz; // set ptr1 to point to U_j
+ for (i = 0; i < cnp; ++i)
+ if (tmp < ptr1[i * cnp + i])
+ tmp = ptr1[i * cnp + i];
+ }
+ for (i = ncon; i < n; ++i) {
+ ptr1 = V + i * Vsz; // set ptr1 to point to V_i
+ for (j = 0; j < pnp; ++j)
+ if (tmp < ptr1[j * pnp + j])
+ tmp = ptr1[j * pnp + j];
+ }
+ info[4] = mu / tmp;
+ info[5] = itno;
+ info[6] = stop;
+ info[7] = nfev;
+ info[8] = njev;
+ info[9] = nlss;
+ }
+
+ // sba_print_sol(n, m, p, cnp, pnp, x, mnp, &idxij, rcidxs, rcsubs);
+ retval = (stop != 7) ? itno : SBA_ERROR;
+
+freemem_and_return: /* NOTE: this point is also reached via a goto! */
+
+ /* free whatever was allocated */
+ free(W);
+ free(U);
+ free(V);
+ free(e);
+ free(eab);
+ free(E);
+ free(Yj);
+ free(YWt);
+ free(S);
+ free(dp);
+ free(Wtda);
+ free(rcidxs);
+ free(rcsubs);
+#ifndef SBA_DESTROY_COVS
+ if (wght)
+ free(wght);
+#else
+/* nothing to do */
+#endif /* SBA_DESTROY_COVS */
+
+ free(hx);
+ free(diagUV);
+ free(pdp);
+ if (fdj_data.hxx) { // cleanup
+ free(fdj_data.hxx);
+ free(fdj_data.func_rcidxs);
+ }
+
+ sba_crsm_free(&idxij);
+
+ /* free the memory allocated by the matrix inversion & linear solver routines */
+ if (matinv)
+ (*matinv)(NULL, 0);
+ if (linsolver)
+ (*linsolver)(NULL, NULL, NULL, 0, 0);
+
+ return retval;
+}
+
+/* Bundle adjustment on camera parameters only
+ * using the sparse Levenberg-Marquardt as described in HZ p. 568
+ *
+ * Returns the number of iterations (>=0) if successfull, SBA_ERROR if failed
+ */
+
+int sba_mot_levmar_x(
+ const int n, /* number of points */
+ const int m, /* number of images */
+ const int mcon, /* number of images (starting from the 1st) whose parameters should not be modified.
+ * All A_ij (see below) with j<mcon are assumed to be zero
+ */
+ char *vmask, /* visibility mask: vmask[i, j]=1 if point i visible in image j, 0 otherwise. nxm */
+ double *p, /* initial parameter vector p0: (a1, ..., am).
+ * aj are the image j parameters, size m*cnp */
+ const int cnp, /* number of parameters for ONE camera; e.g. 6 for Euclidean cameras */
+ double *x, /* measurements vector: (x_11^T, .. x_1m^T, ..., x_n1^T, .. x_nm^T)^T where
+ * x_ij is the projection of the i-th point on the j-th image.
+ * NOTE: some of the x_ij might be missing, if point i is not visible in image j;
+ * see vmask[i, j], max. size n*m*mnp
+ */
+ double *covx, /* measurements covariance matrices: (Sigma_x_11, .. Sigma_x_1m, ..., Sigma_x_n1, .. Sigma_x_nm),
+ * where Sigma_x_ij is the mnp x mnp covariance of x_ij stored row-by-row. Set to NULL if no
+ * covariance estimates are available (identity matrices are implicitly used in this case).
+ * NOTE: a certain Sigma_x_ij is missing if the corresponding x_ij is also missing;
+ * see vmask[i, j], max. size n*m*mnp*mnp
+ */
+ const int mnp, /* number of parameters for EACH measurement; usually 2 */
+ void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata),
+ /* functional relation describing measurements. Given a parameter vector p,
+ * computes a prediction of the measurements \hat{x}. p is (m*cnp)x1,
+ * \hat{x} is (n*m*mnp)x1, maximum
+ * rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used
+ * as working memory
+ */
+ void (*fjac)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata),
+ /* function to evaluate the sparse jacobian dX/dp.
+ * The Jacobian is returned in jac as
+ * (dx_11/da_1, ..., dx_1m/da_m, ..., dx_n1/da_1, ..., dx_nm/da_m), or (using HZ's notation),
+ * jac=(A_11, ..., A_1m, ..., A_n1, ..., A_nm)
+ * Notice that depending on idxij, some of the A_ij might be missing.
+ * Note also that the A_ij are mnp x cnp matrices and they
+ * should be stored in jac in row-major order.
+ * rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used
+ * as working memory
+ *
+ * If NULL, the jacobian is approximated by repetitive func calls and finite
+ * differences. This is computationally inefficient and thus NOT recommended.
+ */
+ void *adata, /* pointer to possibly additional data, passed uninterpreted to func, fjac */
+
+ const int itmax, /* I: maximum number of iterations. itmax==0 signals jacobian verification followed by immediate
+ return */
+ const int verbose, /* I: verbosity */
+ const double opts[SBA_OPTSSZ],
+ /* I: minim. options [\mu, \epsilon1, \epsilon2, \epsilon3, \epsilon4]. Respectively the scale factor for initial
+ * \mu,
+ * stopping thresholds for ||J^T e||_inf, ||dp||_2, ||e||_2 and (||e||_2-||e_new||_2)/||e||_2
+ */
+ double info[SBA_INFOSZ]
+ /* O: information regarding the minimization. Set to NULL if don't care
+ * info[0]=||e||_2 at initial p.
+ * info[1-4]=[ ||e||_2, ||J^T e||_inf, ||dp||_2, mu/max[J^T J]_ii ], all computed at estimated p.
+ * info[5]= # iterations,
+ * info[6]=reason for terminating: 1 - stopped by small gradient J^T e
+ * 2 - stopped by small dp
+ * 3 - stopped by itmax
+ * 4 - stopped by small relative reduction in ||e||_2
+ * 5 - stopped by small ||e||_2
+ * 6 - too many attempts to increase damping. Restart with increased mu
+ * 7 - stopped by invalid (i.e. NaN or Inf) "func" values. This is a user error
+ * info[7]= # function evaluations
+ * info[8]= # jacobian evaluations
+ * info[9]= # number of linear systems solved, i.e. number of attempts for reducing error
+ */
+ ) {
+ register int i, j, ii, jj, k;
+ int nvis, nnz, retval;
+
+ /* The following are work arrays that are dynamically allocated by sba_mot_levmar_x() */
+ double *jac; /* work array for storing the jacobian, max. size n*m*mnp*cnp */
+ double *U; /* work array for storing the U_j in the order U_1, ..., U_m, size m*cnp*cnp */
+
+ double *e; /* work array for storing the e_ij in the order e_11, ..., e_1m, ..., e_n1, ..., e_nm,
+ max. size n*m*mnp */
+ double *ea; /* work array for storing the ea_j in the order ea_1, .. ea_m, size m*cnp */
+
+ double *dp; /* work array for storing the parameter vector updates da_1, ..., da_m, size m*cnp */
+
+ double *wght = /* work array for storing the weights computed from the covariance inverses, max. size n*m*mnp*mnp */
+ NULL;
+
+ /* Of the above arrays, jac, e, wght are sparse and
+ * U, ea, dp are dense. Sparse arrays are indexed through
+ * idxij (see below), that is with the same mechanism as the input
+ * measurements vector x
+ */
+
+ /* submatrices sizes */
+ int Asz, Usz, esz, easz, covsz;
+
+ register double *ptr1, *ptr2, *ptr3, *ptr4, sum;
+ struct sba_crsm idxij; /* sparse matrix containing the location of x_ij in x. This is also the location of A_ij
+ * in jac, e_ij in e, etc.
+ * This matrix can be thought as a map from a sparse set of pairs (i, j) to a continuous
+ * index k and it is used to efficiently lookup the memory locations where the non-zero
+ * blocks of a sparse matrix/vector are stored
+ */
+ int maxCPvis, /* max. of projections across cameras & projections across points */
+ *rcidxs, /* work array for the indexes corresponding to the nonzero elements of a single row or
+ column in a sparse matrix, size max(n, m) */
+ *rcsubs; /* work array for the subscripts of nonzero elements in a single row or column of a
+ sparse matrix, size max(n, m) */
+
+ /* The following variables are needed by the LM algorithm */
+ register int itno; /* iteration counter */
+ int nsolved;
+ /* temporary work arrays that are dynamically allocated */
+ double *hx, /* \hat{x}_i, max. size m*n*mnp */
+ *diagU, /* diagonals of U_j, size m*cnp */
+ *pdp; /* p + dp, size m*cnp */
+
+ register double mu, /* damping constant */
+ tmp; /* mainly used in matrix & vector multiplications */
+ double p_eL2, ea_inf, pdp_eL2; /* ||e(p)||_2, ||J^T e||_inf, ||e(p+dp)||_2 */
+ double p_L2, dp_L2 = DBL_MAX, dF, dL;
+ double tau = FABS(opts[0]), eps1 = FABS(opts[1]), eps2 = FABS(opts[2]), eps2_sq = opts[2] * opts[2],
+ eps3_sq = opts[3] * opts[3], eps4_sq = opts[4] * opts[4];
+ double init_p_eL2;
+ int nu = 2, nu2, stop = 0, nfev, njev = 0, nlss = 0;
+ int nobs, nvars;
+ PLS linsolver = NULL;
+
+ struct fdj_data_x_ fdj_data;
+ void *jac_adata;
+
+ /* Initialization */
+ mu = ea_inf = 0.0; /* -Wall */
+
+ /* block sizes */
+ Asz = mnp * cnp;
+ Usz = cnp * cnp;
+ esz = mnp;
+ easz = cnp;
+ covsz = mnp * mnp;
+
+ /* count total number of visible image points */
+ for (i = nvis = 0, jj = n * m; i < jj; ++i)
+ nvis += (vmask[i] != 0);
+
+ nobs = nvis * mnp;
+ nvars = m * cnp;
+ if (nobs < nvars) {
+ fprintf(stderr,
+ "SBA: sba_mot_levmar_x() cannot solve a problem with fewer measurements [%d] than unknowns [%d]\n",
+ nobs, nvars);
+ return SBA_ERROR;
+ }
+
+ /* allocate & fill up the idxij structure */
+ sba_crsm_alloc(&idxij, n, m, nvis);
+ for (i = k = 0; i < n; ++i) {
+ idxij.rowptr[i] = k;
+ ii = i * m;
+ for (j = 0; j < m; ++j)
+ if (vmask[ii + j]) {
+ idxij.val[k] = k;
+ idxij.colidx[k++] = j;
+ }
+ }
+ idxij.rowptr[n] = nvis;
+
+ /* find the maximum number of visible image points in any single camera or coming from a single 3D point */
+ /* cameras */
+ for (i = maxCPvis = 0; i < n; ++i)
+ if ((k = idxij.rowptr[i + 1] - idxij.rowptr[i]) > maxCPvis)
+ maxCPvis = k;
+
+ /* points, note that maxCPvis is not reinitialized! */
+ for (j = 0; j < m; ++j) {
+ for (i = ii = 0; i < n; ++i)
+ if (vmask[i * m + j])
+ ++ii;
+ if (ii > maxCPvis)
+ maxCPvis = ii;
+ }
+
+ /* allocate work arrays */
+ jac = (double *)emalloc(nvis * Asz * sizeof(double));
+ U = (double *)emalloc(m * Usz * sizeof(double));
+ e = (double *)emalloc(nobs * sizeof(double));
+ ea = (double *)emalloc(nvars * sizeof(double));
+ dp = (double *)emalloc(nvars * sizeof(double));
+ rcidxs = (int *)emalloc(maxCPvis * sizeof(int));
+ rcsubs = (int *)emalloc(maxCPvis * sizeof(int));
+#ifndef SBA_DESTROY_COVS
+ if (covx != NULL)
+ wght = (double *)emalloc(nvis * covsz * sizeof(double));
+#else
+ if (covx != NULL)
+ wght = covx;
+#endif /* SBA_DESTROY_COVS */
+
+ hx = (double *)emalloc(nobs * sizeof(double));
+ diagU = (double *)emalloc(nvars * sizeof(double));
+ pdp = (double *)emalloc(nvars * sizeof(double));
+
+ /* if no jacobian function is supplied, prepare to compute jacobian with finite difference */
+ if (!fjac) {
+ fdj_data.func = func;
+ fdj_data.cnp = cnp;
+ fdj_data.pnp = 0;
+ fdj_data.mnp = mnp;
+ fdj_data.hx = hx;
+ fdj_data.hxx = (double *)emalloc(nobs * sizeof(double));
+ fdj_data.func_rcidxs = (int *)emalloc(2 * maxCPvis * sizeof(int));
+ fdj_data.func_rcsubs = fdj_data.func_rcidxs + maxCPvis;
+ fdj_data.adata = adata;
+
+ fjac = sba_fdjac_x;
+ jac_adata = (void *)&fdj_data;
+ } else {
+ fdj_data.hxx = NULL;
+ jac_adata = adata;
+ }
+
+ if (itmax == 0) { /* verify jacobian */
+ sba_mot_chkjac_x(func, fjac, p, &idxij, rcidxs, rcsubs, mcon, cnp, mnp, adata, jac_adata);
+ retval = 0;
+ goto freemem_and_return;
+ }
+
+ /* covariances Sigma_x_ij are accommodated by computing the Cholesky decompositions of their
+ * inverses and using the resulting matrices w_x_ij to weigh A_ij and e_ij as w_x_ij A_ij
+ * and w_x_ij*e_ij. In this way, auxiliary variables as U_j=\sum_i A_ij^T A_ij
+ * actually become \sum_i (w_x_ij A_ij)^T w_x_ij A_ij= \sum_i A_ij^T w_x_ij^T w_x_ij A_ij =
+ * A_ij^T Sigma_x_ij^-1 A_ij
+ *
+ * ea_j are weighted in a similar manner
+ */
+ if (covx != NULL) {
+ for (i = 0; i < n; ++i) {
+ nnz = sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero x_ij, j=0...m-1 */
+ for (j = 0; j < nnz; ++j) {
+ /* set ptr1, ptr2 to point to cov_x_ij, w_x_ij resp. */
+ ptr1 = covx + (k = idxij.val[rcidxs[j]] * covsz);
+ ptr2 = wght + k;
+ if (!sba_mat_cholinv(ptr1, ptr2, mnp)) { /* compute w_x_ij s.t. w_x_ij^T w_x_ij = cov_x_ij^-1 */
+ fprintf(stderr, "SBA: invalid covariance matrix for x_ij (i=%d, j=%d) in sba_motstr_levmar_x()\n",
+ i, rcsubs[j]);
+ retval = SBA_ERROR;
+ goto freemem_and_return;
+ }
+ }
+ }
+ sba_mat_cholinv(NULL, NULL, 0); /* cleanup */
+ }
+
+ /* compute the error vectors e_ij in hx */
+ (*func)(p, &idxij, rcidxs, rcsubs, hx, adata);
+ nfev = 1;
+ /* ### compute e=x - f(p) [e=w*(x - f(p)] and its L2 norm */
+ if (covx == NULL)
+ p_eL2 = nrmL2xmy(e, x, hx, nobs); /* e=x-hx, p_eL2=||e|| */
+ else
+ p_eL2 = nrmCxmy(e, x, hx, wght, mnp, nvis); /* e=wght*(x-hx), p_eL2=||e||=||x-hx||_Sigma^-1 */
+ if (verbose)
+ printf("initial mot-SBA error %g [%g]\n", p_eL2, p_eL2 / nvis);
+ init_p_eL2 = p_eL2;
+ if (!SBA_FINITE(p_eL2))
+ stop = 7;
+
+ for (itno = 0; itno < itmax && !stop; ++itno) {
+ /* Note that p, e and ||e||_2 have been updated at the previous iteration */
+
+ /* compute derivative submatrices A_ij */
+ (*fjac)(p, &idxij, rcidxs, rcsubs, jac, jac_adata);
+ ++njev;
+
+ if (covx != NULL) {
+ /* compute w_x_ij A_ij
+ * Since w_x_ij is upper triangular, the products can be safely saved
+ * directly in A_ij, without the need for intermediate storage
+ */
+ for (i = 0; i < nvis; ++i) {
+ /* set ptr1, ptr2 to point to w_x_ij, A_ij, resp. */
+ ptr1 = wght + i * covsz;
+ ptr2 = jac + i * Asz;
+
+ /* w_x_ij is mnp x mnp, A_ij is mnp x cnp */
+ for (ii = 0; ii < mnp; ++ii)
+ for (jj = 0; jj < cnp; ++jj) {
+ for (k = ii, sum = 0.0; k < mnp; ++k) // k>=ii since w_x_ij is upper triangular
+ sum += ptr1[ii * mnp + k] * ptr2[k * cnp + jj];
+ ptr2[ii * cnp + jj] = sum;
+ }
+ }
+ }
+
+ /* compute U_j = \sum_i A_ij^T A_ij */ // \Sigma here!
+ /* U_j is symmetric, therefore its computation can be sped up by
+ * computing only the upper part and then reusing it for the lower one.
+ * Recall that A_ij is mnp x cnp
+ */
+ /* Also compute ea_j = \sum_i A_ij^T e_ij */ // \Sigma here!
+ /* Recall that e_ij is mnp x 1
+ */
+ _dblzero(U, m * Usz); /* clear all U_j */
+ _dblzero(ea, m * easz); /* clear all ea_j */
+ for (j = mcon; j < m; ++j) {
+ ptr1 = U + j * Usz; // set ptr1 to point to U_j
+ ptr2 = ea + j * easz; // set ptr2 to point to ea_j
+
+ nnz = sba_crsm_col_elmidxs(&idxij, j, rcidxs, rcsubs); /* find nonzero A_ij, i=0...n-1 */
+ for (i = 0; i < nnz; ++i) {
+ /* set ptr3 to point to A_ij, actual row number in rcsubs[i] */
+ ptr3 = jac + idxij.val[rcidxs[i]] * Asz;
+
+ /* compute the UPPER TRIANGULAR PART of A_ij^T A_ij and add it to U_j */
+ for (ii = 0; ii < cnp; ++ii) {
+ for (jj = ii; jj < cnp; ++jj) {
+ for (k = 0, sum = 0.0; k < mnp; ++k)
+ sum += ptr3[k * cnp + ii] * ptr3[k * cnp + jj];
+ ptr1[ii * cnp + jj] += sum;
+ }
+
+ /* copy the LOWER TRIANGULAR PART of U_j from the upper one */
+ for (jj = 0; jj < ii; ++jj)
+ ptr1[ii * cnp + jj] = ptr1[jj * cnp + ii];
+ }
+
+ ptr4 = e + idxij.val[rcidxs[i]] * esz; /* set ptr4 to point to e_ij */
+ /* compute A_ij^T e_ij and add it to ea_j */
+ for (ii = 0; ii < cnp; ++ii) {
+ for (jj = 0, sum = 0.0; jj < mnp; ++jj)
+ sum += ptr3[jj * cnp + ii] * ptr4[jj];
+ ptr2[ii] += sum;
+ }
+ }
+ }
+
+ /* Compute ||J^T e||_inf and ||p||^2 */
+ for (i = 0, p_L2 = ea_inf = 0.0; i < nvars; ++i) {
+ if (ea_inf < (tmp = FABS(ea[i])))
+ ea_inf = tmp;
+ p_L2 += p[i] * p[i];
+ }
+ // p_L2=sqrt(p_L2);
+
+ /* save diagonal entries so that augmentation can be later canceled.
+ * Diagonal entries are in U_j
+ */
+ for (j = mcon; j < m; ++j) {
+ ptr1 = U + j * Usz; // set ptr1 to point to U_j
+ ptr2 = diagU + j * cnp; // set ptr2 to point to diagU_j
+ for (i = 0; i < cnp; ++i)
+ ptr2[i] = ptr1[i * cnp + i];
+ }
+
+ /*
+ if(!(itno%100)){
+ printf("Current estimate: ");
+ for(i=0; i<nvars; ++i)
+ printf("%.9g ", p[i]);
+ printf("-- errors %.9g %0.9g\n", ea_inf, p_eL2);
+ }
+ */
+
+ /* check for convergence */
+ if ((ea_inf <= eps1)) {
+ dp_L2 = 0.0; /* no increment for p in this case */
+ stop = 1;
+ break;
+ }
+
+ /* compute initial damping factor */
+ if (itno == 0) {
+ for (i = mcon * cnp, tmp = DBL_MIN; i < nvars; ++i)
+ if (diagU[i] > tmp)
+ tmp = diagU[i]; /* find max diagonal element */
+ mu = tau * tmp;
+ }
+
+ /* determine increment using adaptive damping */
+ while (1) {
+ /* augment U */
+ for (j = mcon; j < m; ++j) {
+ ptr1 = U + j * Usz; // set ptr1 to point to U_j
+ for (i = 0; i < cnp; ++i)
+ ptr1[i * cnp + i] += mu;
+ }
+
+ /* solve the linear systems U_j da_j = ea_j to compute the da_j */
+ _dblzero(dp, mcon * cnp); /* no change for the first mcon camera params */
+ for (j = nsolved = mcon; j < m; ++j) {
+ ptr1 = U + j * Usz; // set ptr1 to point to U_j
+ ptr2 = ea + j * easz; // set ptr2 to point to ea_j
+ ptr3 = dp + j * cnp; // set ptr3 to point to da_j
+
+ // nsolved+=sba_Axb_LU(ptr1, ptr2, ptr3, cnp, 0); linsolver=sba_Axb_LU;
+ nsolved += sba_Axb_Chol(ptr1, ptr2, ptr3, cnp, 0);
+ linsolver = sba_Axb_Chol;
+ // nsolved+=sba_Axb_BK(ptr1, ptr2, ptr3, cnp, 0); linsolver=sba_Axb_BK;
+ // nsolved+=sba_Axb_QRnoQ(ptr1, ptr2, ptr3, cnp, 0); linsolver=sba_Axb_QRnoQ;
+ // nsolved+=sba_Axb_QR(ptr1, ptr2, ptr3, cnp, 0); linsolver=sba_Axb_QR;
+ // nsolved+=sba_Axb_SVD(ptr1, ptr2, ptr3, cnp, 0); linsolver=sba_Axb_SVD;
+ // nsolved+=(sba_Axb_CG(ptr1, ptr2, ptr3, cnp, (3*cnp)/2, 1E-10, SBA_CG_JACOBI, 0) > 0);
+ // linsolver=(PLS)sba_Axb_CG;
+
+ ++nlss;
+ }
+
+ if (nsolved == m) {
+
+ /* parameter vector updates are now in dp */
+
+ /* compute p's new estimate and ||dp||^2 */
+ for (i = 0, dp_L2 = 0.0; i < nvars; ++i) {
+ pdp[i] = p[i] + (tmp = dp[i]);
+ dp_L2 += tmp * tmp;
+ }
+ // dp_L2=sqrt(dp_L2);
+
+ if (dp_L2 <= eps2_sq * p_L2) { /* relative change in p is small, stop */
+ // if(dp_L2<=eps2*(p_L2 + eps2)){ /* relative change in p is small, stop */
+ stop = 2;
+ break;
+ }
+
+ if (dp_L2 >= (p_L2 + eps2) / SBA_EPSILON_SQ) { /* almost singular */
+ // if(dp_L2>=(p_L2+eps2)/SBA_EPSILON){ /* almost singular */
+ fprintf(
+ stderr,
+ "SBA: the matrix of the augmented normal equations is almost singular in sba_mot_levmar_x(),\n"
+ " minimization should be restarted from the current solution with an increased damping "
+ "term\n");
+ retval = SBA_ERROR;
+ goto freemem_and_return;
+ }
+
+ (*func)(pdp, &idxij, rcidxs, rcsubs, hx, adata);
+ ++nfev; /* evaluate function at p + dp */
+ if (verbose > 1)
+ printf("mean reprojection error %g\n", sba_mean_repr_error(n, mnp, x, hx, &idxij, rcidxs, rcsubs));
+ /* ### compute ||e(pdp)||_2 */
+ if (covx == NULL)
+ pdp_eL2 = nrmL2xmy(hx, x, hx, nobs); /* hx=x-hx, pdp_eL2=||hx|| */
+ else
+ pdp_eL2 = nrmCxmy(hx, x, hx, wght, mnp, nvis); /* hx=wght*(x-hx), pdp_eL2=||hx|| */
+ if (!SBA_FINITE(pdp_eL2)) {
+ if (verbose) /* identify the offending point projection */
+ sba_print_inf(hx, m, mnp, &idxij, rcidxs, rcsubs);
+
+ stop = 7;
+ break;
+ }
+
+ for (i = 0, dL = 0.0; i < nvars; ++i)
+ dL += dp[i] * (mu * dp[i] + ea[i]);
+
+ dF = p_eL2 - pdp_eL2;
+
+ if (verbose > 1)
+ printf("\ndamping term %8g, gain ratio %8g, errors %8g / %8g = %g\n", mu,
+ dL != 0.0 ? dF / dL : dF / DBL_EPSILON, p_eL2 / nvis, pdp_eL2 / nvis, p_eL2 / pdp_eL2);
+
+ if (dL > 0.0 && dF > 0.0) { /* reduction in error, increment is accepted */
+ tmp = (2.0 * dF / dL - 1.0);
+ tmp = 1.0 - tmp * tmp * tmp;
+ mu = mu * ((tmp >= SBA_ONE_THIRD) ? tmp : SBA_ONE_THIRD);
+ nu = 2;
+
+ /* the test below is equivalent to the relative reduction of the RMS reprojection error:
+ * sqrt(p_eL2)-sqrt(pdp_eL2)<eps4*sqrt(p_eL2) */
+ if (pdp_eL2 - 2.0 * sqrt(p_eL2 * pdp_eL2) < (eps4_sq - 1.0) * p_eL2)
+ stop = 4;
+
+ for (i = 0; i < nvars; ++i) /* update p's estimate */
+ p[i] = pdp[i];
+
+ for (i = 0; i < nobs; ++i) /* update e and ||e||_2 */
+ e[i] = hx[i];
+ p_eL2 = pdp_eL2;
+ break;
+ }
+ } /* nsolved==m */
+
+ /* if this point is reached, either at least one linear system could not be solved or
+ * the error did not reduce; in any case, the increment must be rejected
+ */
+
+ mu *= nu;
+ nu2 = nu << 1; // 2*nu;
+ if (nu2 <= nu) { /* nu has wrapped around (overflown) */
+ fprintf(stderr, "SBA: too many failed attempts to increase the damping factor in sba_mot_levmar_x()! "
+ "Singular Hessian matrix?\n");
+ // exit(1);
+ stop = 6;
+ break;
+ }
+ nu = nu2;
+
+#if 0
+ /* restore U diagonal entries */
+ for(j=mcon; j<m; ++j){
+ ptr1=U + j*Usz; // set ptr1 to point to U_j
+ ptr2=diagU + j*cnp; // set ptr2 to point to diagU_j
+ for(i=0; i<cnp; ++i)
+ ptr1[i*cnp+i]=ptr2[i];
+ }
+#endif
+ } /* inner while loop */
+
+ if (p_eL2 <= eps3_sq)
+ stop = 5; // error is small, force termination of outer loop
+ }
+
+ if (itno >= itmax)
+ stop = 3;
+
+ /* restore U diagonal entries */
+ for (j = mcon; j < m; ++j) {
+ ptr1 = U + j * Usz; // set ptr1 to point to U_j
+ ptr2 = diagU + j * cnp; // set ptr2 to point to diagU_j
+ for (i = 0; i < cnp; ++i)
+ ptr1[i * cnp + i] = ptr2[i];
+ }
+
+ if (info) {
+ info[0] = init_p_eL2;
+ info[1] = p_eL2;
+ info[2] = ea_inf;
+ info[3] = dp_L2;
+ for (j = mcon, tmp = DBL_MIN; j < m; ++j) {
+ ptr1 = U + j * Usz; // set ptr1 to point to U_j
+ for (i = 0; i < cnp; ++i)
+ if (tmp < ptr1[i * cnp + i])
+ tmp = ptr1[i * cnp + i];
+ }
+ info[4] = mu / tmp;
+ info[5] = itno;
+ info[6] = stop;
+ info[7] = nfev;
+ info[8] = njev;
+ info[9] = nlss;
+ }
+ // sba_print_sol(n, m, p, cnp, 0, x, mnp, &idxij, rcidxs, rcsubs);
+ retval = (stop != 7) ? itno : SBA_ERROR;
+
+freemem_and_return: /* NOTE: this point is also reached via a goto! */
+
+ /* free whatever was allocated */
+ free(jac);
+ free(U);
+ free(e);
+ free(ea);
+ free(dp);
+ free(rcidxs);
+ free(rcsubs);
+#ifndef SBA_DESTROY_COVS
+ if (wght)
+ free(wght);
+#else
+/* nothing to do */
+#endif /* SBA_DESTROY_COVS */
+
+ free(hx);
+ free(diagU);
+ free(pdp);
+ if (fdj_data.hxx) { // cleanup
+ free(fdj_data.hxx);
+ free(fdj_data.func_rcidxs);
+ }
+
+ sba_crsm_free(&idxij);
+
+ /* free the memory allocated by the linear solver routine */
+ if (linsolver)
+ (*linsolver)(NULL, NULL, NULL, 0, 0);
+
+ return retval;
+}
+
+/* Bundle adjustment on structure parameters only
+ * using the sparse Levenberg-Marquardt as described in HZ p. 568
+ *
+ * Returns the number of iterations (>=0) if successfull, SBA_ERROR if failed
+ */
+
+int sba_str_levmar_x(
+ const int n, /* number of points */
+ const int ncon, /* number of points (starting from the 1st) whose parameters should not be modified.
+ * All B_ij (see below) with i<ncon are assumed to be zero
+ */
+ const int m, /* number of images */
+ char *vmask, /* visibility mask: vmask[i, j]=1 if point i visible in image j, 0 otherwise. nxm */
+ double *p, /* initial parameter vector p0: (b1, ..., bn).
+ * bi are the i-th point parameters, * size n*pnp */
+ const int pnp, /* number of parameters for ONE point; e.g. 3 for Euclidean points */
+ double *x, /* measurements vector: (x_11^T, .. x_1m^T, ..., x_n1^T, .. x_nm^T)^T where
+ * x_ij is the projection of the i-th point on the j-th image.
+ * NOTE: some of the x_ij might be missing, if point i is not visible in image j;
+ * see vmask[i, j], max. size n*m*mnp
+ */
+ double *covx, /* measurements covariance matrices: (Sigma_x_11, .. Sigma_x_1m, ..., Sigma_x_n1, .. Sigma_x_nm),
+ * where Sigma_x_ij is the mnp x mnp covariance of x_ij stored row-by-row. Set to NULL if no
+ * covariance estimates are available (identity matrices are implicitly used in this case).
+ * NOTE: a certain Sigma_x_ij is missing if the corresponding x_ij is also missing;
+ * see vmask[i, j], max. size n*m*mnp*mnp
+ */
+ const int mnp, /* number of parameters for EACH measurement; usually 2 */
+ void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata),
+ /* functional relation describing measurements. Given a parameter vector p,
+ * computes a prediction of the measurements \hat{x}. p is (n*pnp)x1,
+ * \hat{x} is (n*m*mnp)x1, maximum
+ * rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used
+ * as working memory
+ */
+ void (*fjac)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata),
+ /* function to evaluate the sparse jacobian dX/dp.
+ * The Jacobian is returned in jac as
+ * (dx_11/db_1, ..., dx_1m/db_1, ..., dx_n1/db_n, ..., dx_nm/db_n), or (using HZ's notation),
+ * jac=(B_11, ..., B_1m, ..., B_n1, ..., B_nm)
+ * Notice that depending on idxij, some of the B_ij might be missing.
+ * Note also that B_ij are mnp x pnp matrices and they
+ * should be stored in jac in row-major order.
+ * rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used
+ * as working memory
+ *
+ * If NULL, the jacobian is approximated by repetitive func calls and finite
+ * differences. This is computationally inefficient and thus NOT recommended.
+ */
+ void *adata, /* pointer to possibly additional data, passed uninterpreted to func, fjac */
+
+ const int itmax, /* I: maximum number of iterations. itmax==0 signals jacobian verification followed by immediate
+ return */
+ const int verbose, /* I: verbosity */
+ const double opts[SBA_OPTSSZ],
+ /* I: minim. options [\mu, \epsilon1, \epsilon2, \epsilon3, \epsilon4]. Respectively the scale factor for initial
+ * \mu,
+ * stopping thresholds for ||J^T e||_inf, ||dp||_2, ||e||_2 and (||e||_2-||e_new||_2)/||e||_2
+ */
+ double info[SBA_INFOSZ]
+ /* O: information regarding the minimization. Set to NULL if don't care
+ * info[0]=||e||_2 at initial p.
+ * info[1-4]=[ ||e||_2, ||J^T e||_inf, ||dp||_2, mu/max[J^T J]_ii ], all computed at estimated p.
+ * info[5]= # iterations,
+ * info[6]=reason for terminating: 1 - stopped by small gradient J^T e
+ * 2 - stopped by small dp
+ * 3 - stopped by itmax
+ * 4 - stopped by small relative reduction in ||e||_2
+ * 5 - stopped by small ||e||_2
+ * 6 - too many attempts to increase damping. Restart with increased mu
+ * 7 - stopped by invalid (i.e. NaN or Inf) "func" values. This is a user error
+ * info[7]= # function evaluations
+ * info[8]= # jacobian evaluations
+ * info[9]= # number of linear systems solved, i.e. number of attempts for reducing error
+ */
+ ) {
+ register int i, j, ii, jj, k;
+ int nvis, nnz, retval;
+
+ /* The following are work arrays that are dynamically allocated by sba_str_levmar_x() */
+ double *jac; /* work array for storing the jacobian, max. size n*m*mnp*pnp */
+ double *V; /* work array for storing the V_i in the order V_1, ..., V_n, size n*pnp*pnp */
+
+ double *e; /* work array for storing the e_ij in the order e_11, ..., e_1m, ..., e_n1, ..., e_nm,
+ max. size n*m*mnp */
+ double *eb; /* work array for storing the eb_i in the order eb_1, .. eb_n size n*pnp */
+
+ double *dp; /* work array for storing the parameter vector updates db_1, ..., db_n, size n*pnp */
+
+ double *wght = /* work array for storing the weights computed from the covariance inverses, max. size n*m*mnp*mnp */
+ NULL;
+
+ /* Of the above arrays, jac, e, wght are sparse and
+ * V, eb, dp are dense. Sparse arrays are indexed through
+ * idxij (see below), that is with the same mechanism as the input
+ * measurements vector x
+ */
+
+ /* submatrices sizes */
+ int Bsz, Vsz, esz, ebsz, covsz;
+
+ register double *ptr1, *ptr2, *ptr3, *ptr4, sum;
+ struct sba_crsm idxij; /* sparse matrix containing the location of x_ij in x. This is also the location
+ * of B_ij in jac, etc.
+ * This matrix can be thought as a map from a sparse set of pairs (i, j) to a continuous
+ * index k and it is used to efficiently lookup the memory locations where the non-zero
+ * blocks of a sparse matrix/vector are stored
+ */
+ int maxCPvis, /* max. of projections across cameras & projections across points */
+ *rcidxs, /* work array for the indexes corresponding to the nonzero elements of a single row or
+ column in a sparse matrix, size max(n, m) */
+ *rcsubs; /* work array for the subscripts of nonzero elements in a single row or column of a
+ sparse matrix, size max(n, m) */
+
+ /* The following variables are needed by the LM algorithm */
+ register int itno; /* iteration counter */
+ int nsolved;
+ /* temporary work arrays that are dynamically allocated */
+ double *hx, /* \hat{x}_i, max. size m*n*mnp */
+ *diagV, /* diagonals of V_i, size n*pnp */
+ *pdp; /* p + dp, size n*pnp */
+
+ register double mu, /* damping constant */
+ tmp; /* mainly used in matrix & vector multiplications */
+ double p_eL2, eb_inf, pdp_eL2; /* ||e(p)||_2, ||J^T e||_inf, ||e(p+dp)||_2 */
+ double p_L2, dp_L2 = DBL_MAX, dF, dL;
+ double tau = FABS(opts[0]), eps1 = FABS(opts[1]), eps2 = FABS(opts[2]), eps2_sq = opts[2] * opts[2],
+ eps3_sq = opts[3] * opts[3], eps4_sq = opts[4] * opts[4];
+ double init_p_eL2;
+ int nu = 2, nu2, stop = 0, nfev, njev = 0, nlss = 0;
+ int nobs, nvars;
+ PLS linsolver = NULL;
+
+ struct fdj_data_x_ fdj_data;
+ void *jac_adata;
+
+ /* Initialization */
+ mu = eb_inf = tmp = 0.0; /* -Wall */
+
+ /* block sizes */
+ Bsz = mnp * pnp;
+ Vsz = pnp * pnp;
+ esz = mnp;
+ ebsz = pnp;
+ covsz = mnp * mnp;
+
+ /* count total number of visible image points */
+ for (i = nvis = 0, jj = n * m; i < jj; ++i)
+ nvis += (vmask[i] != 0);
+
+ nobs = nvis * mnp;
+ nvars = n * pnp;
+ if (nobs < nvars) {
+ fprintf(stderr,
+ "SBA: sba_str_levmar_x() cannot solve a problem with fewer measurements [%d] than unknowns [%d]\n",
+ nobs, nvars);
+ return SBA_ERROR;
+ }
+
+ /* allocate & fill up the idxij structure */
+ sba_crsm_alloc(&idxij, n, m, nvis);
+ for (i = k = 0; i < n; ++i) {
+ idxij.rowptr[i] = k;
+ ii = i * m;
+ for (j = 0; j < m; ++j)
+ if (vmask[ii + j]) {
+ idxij.val[k] = k;
+ idxij.colidx[k++] = j;
+ }
+ }
+ idxij.rowptr[n] = nvis;
+
+ /* find the maximum number of visible image points in any single camera or coming from a single 3D point */
+ /* cameras */
+ for (i = maxCPvis = 0; i < n; ++i)
+ if ((k = idxij.rowptr[i + 1] - idxij.rowptr[i]) > maxCPvis)
+ maxCPvis = k;
+
+ /* points, note that maxCPvis is not reinitialized! */
+ for (j = 0; j < m; ++j) {
+ for (i = ii = 0; i < n; ++i)
+ if (vmask[i * m + j])
+ ++ii;
+ if (ii > maxCPvis)
+ maxCPvis = ii;
+ }
+
+ /* allocate work arrays */
+ jac = (double *)emalloc(nvis * Bsz * sizeof(double));
+ V = (double *)emalloc(n * Vsz * sizeof(double));
+ e = (double *)emalloc(nobs * sizeof(double));
+ eb = (double *)emalloc(nvars * sizeof(double));
+ dp = (double *)emalloc(nvars * sizeof(double));
+ rcidxs = (int *)emalloc(maxCPvis * sizeof(int));
+ rcsubs = (int *)emalloc(maxCPvis * sizeof(int));
+#ifndef SBA_DESTROY_COVS
+ if (covx != NULL)
+ wght = (double *)emalloc(nvis * covsz * sizeof(double));
+#else
+ if (covx != NULL)
+ wght = covx;
+#endif /* SBA_DESTROY_COVS */
+
+ hx = (double *)emalloc(nobs * sizeof(double));
+ diagV = (double *)emalloc(nvars * sizeof(double));
+ pdp = (double *)emalloc(nvars * sizeof(double));
+
+ /* if no jacobian function is supplied, prepare to compute jacobian with finite difference */
+ if (!fjac) {
+ fdj_data.func = func;
+ fdj_data.cnp = 0;
+ fdj_data.pnp = pnp;
+ fdj_data.mnp = mnp;
+ fdj_data.hx = hx;
+ fdj_data.hxx = (double *)emalloc(nobs * sizeof(double));
+ fdj_data.func_rcidxs = (int *)emalloc(2 * maxCPvis * sizeof(int));
+ fdj_data.func_rcsubs = fdj_data.func_rcidxs + maxCPvis;
+ fdj_data.adata = adata;
+
+ fjac = sba_fdjac_x;
+ jac_adata = (void *)&fdj_data;
+ } else {
+ fdj_data.hxx = NULL;
+ jac_adata = adata;
+ }
+
+ if (itmax == 0) { /* verify jacobian */
+ sba_str_chkjac_x(func, fjac, p, &idxij, rcidxs, rcsubs, ncon, pnp, mnp, adata, jac_adata);
+ retval = 0;
+ goto freemem_and_return;
+ }
+
+ /* covariances Sigma_x_ij are accommodated by computing the Cholesky decompositions of their
+ * inverses and using the resulting matrices w_x_ij to weigh B_ij and e_ij as
+ * w_x_ij*B_ij and w_x_ij*e_ij. In this way, auxiliary variables as V_i=\sum_j B_ij^T B_ij
+ * actually become \sum_j (w_x_ij B_ij)^T w_x_ij B_ij= \sum_j B_ij^T w_x_ij^T w_x_ij B_ij =
+ * B_ij^T Sigma_x_ij^-1 B_ij
+ *
+ * eb_i are weighted in a similar manner
+ */
+ if (covx != NULL) {
+ for (i = 0; i < n; ++i) {
+ nnz = sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero x_ij, j=0...m-1 */
+ for (j = 0; j < nnz; ++j) {
+ /* set ptr1, ptr2 to point to cov_x_ij, w_x_ij resp. */
+ ptr1 = covx + (k = idxij.val[rcidxs[j]] * covsz);
+ ptr2 = wght + k;
+ if (!sba_mat_cholinv(ptr1, ptr2, mnp)) { /* compute w_x_ij s.t. w_x_ij^T w_x_ij = cov_x_ij^-1 */
+ fprintf(stderr, "SBA: invalid covariance matrix for x_ij (i=%d, j=%d) in sba_motstr_levmar_x()\n",
+ i, rcsubs[j]);
+ retval = SBA_ERROR;
+ goto freemem_and_return;
+ }
+ }
+ }
+ sba_mat_cholinv(NULL, NULL, 0); /* cleanup */
+ }
+
+ /* compute the error vectors e_ij in hx */
+ (*func)(p, &idxij, rcidxs, rcsubs, hx, adata);
+ nfev = 1;
+ /* ### compute e=x - f(p) [e=w*(x - f(p)] and its L2 norm */
+ if (covx == NULL)
+ p_eL2 = nrmL2xmy(e, x, hx, nobs); /* e=x-hx, p_eL2=||e|| */
+ else
+ p_eL2 = nrmCxmy(e, x, hx, wght, mnp, nvis); /* e=wght*(x-hx), p_eL2=||e||=||x-hx||_Sigma^-1 */
+ if (verbose)
+ printf("initial str-SBA error %g [%g]\n", p_eL2, p_eL2 / nvis);
+ init_p_eL2 = p_eL2;
+ if (!SBA_FINITE(p_eL2))
+ stop = 7;
+
+ for (itno = 0; itno < itmax && !stop; ++itno) {
+ /* Note that p, e and ||e||_2 have been updated at the previous iteration */
+
+ /* compute derivative submatrices B_ij */
+ (*fjac)(p, &idxij, rcidxs, rcsubs, jac, jac_adata);
+ ++njev;
+
+ if (covx != NULL) {
+ /* compute w_x_ij B_ij.
+ * Since w_x_ij is upper triangular, the products can be safely saved
+ * directly in B_ij, without the need for intermediate storage
+ */
+ for (i = 0; i < nvis; ++i) {
+ /* set ptr1, ptr2 to point to w_x_ij, B_ij, resp. */
+ ptr1 = wght + i * covsz;
+ ptr2 = jac + i * Bsz;
+
+ /* w_x_ij is mnp x mnp, B_ij is mnp x pnp */
+ for (ii = 0; ii < mnp; ++ii)
+ for (jj = 0; jj < pnp; ++jj) {
+ for (k = ii, sum = 0.0; k < mnp; ++k) // k>=ii since w_x_ij is upper triangular
+ sum += ptr1[ii * mnp + k] * ptr2[k * pnp + jj];
+ ptr2[ii * pnp + jj] = sum;
+ }
+ }
+ }
+
+ /* compute V_i = \sum_j B_ij^T B_ij */ // \Sigma here!
+ /* V_i is symmetric, therefore its computation can be sped up by
+ * computing only the upper part and then reusing it for the lower one.
+ * Recall that B_ij is mnp x pnp
+ */
+ /* Also compute eb_i = \sum_j B_ij^T e_ij */ // \Sigma here!
+ /* Recall that e_ij is mnp x 1
+ */
+ _dblzero(V, n * Vsz); /* clear all V_i */
+ _dblzero(eb, n * ebsz); /* clear all eb_i */
+ for (i = ncon; i < n; ++i) {
+ ptr1 = V + i * Vsz; // set ptr1 to point to V_i
+ ptr2 = eb + i * ebsz; // set ptr2 to point to eb_i
+
+ nnz = sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero B_ij, j=0...m-1 */
+ for (j = 0; j < nnz; ++j) {
+ /* set ptr3 to point to B_ij, actual column number in rcsubs[j] */
+ ptr3 = jac + idxij.val[rcidxs[j]] * Bsz;
+
+ /* compute the UPPER TRIANGULAR PART of B_ij^T B_ij and add it to V_i */
+ for (ii = 0; ii < pnp; ++ii) {
+ for (jj = ii; jj < pnp; ++jj) {
+ for (k = 0, sum = 0.0; k < mnp; ++k)
+ sum += ptr3[k * pnp + ii] * ptr3[k * pnp + jj];
+ ptr1[ii * pnp + jj] += sum;
+ }
+
+ /* copy the LOWER TRIANGULAR PART of V_i from the upper one */
+ for (jj = 0; jj < ii; ++jj)
+ ptr1[ii * pnp + jj] = ptr1[jj * pnp + ii];
+ }
+
+ ptr4 = e + idxij.val[rcidxs[j]] * esz; /* set ptr4 to point to e_ij */
+ /* compute B_ij^T e_ij and add it to eb_i */
+ for (ii = 0; ii < pnp; ++ii) {
+ for (jj = 0, sum = 0.0; jj < mnp; ++jj)
+ sum += ptr3[jj * pnp + ii] * ptr4[jj];
+ ptr2[ii] += sum;
+ }
+ }
+ }
+
+ /* Compute ||J^T e||_inf and ||p||^2 */
+ for (i = 0, p_L2 = eb_inf = 0.0; i < nvars; ++i) {
+ if (eb_inf < (tmp = FABS(eb[i])))
+ eb_inf = tmp;
+ p_L2 += p[i] * p[i];
+ }
+ // p_L2=sqrt(p_L2);
+
+ /* save diagonal entries so that augmentation can be later canceled.
+ * Diagonal entries are in V_i
+ */
+ for (i = ncon; i < n; ++i) {
+ ptr1 = V + i * Vsz; // set ptr1 to point to V_i
+ ptr2 = diagV + i * pnp; // set ptr2 to point to diagV_i
+ for (j = 0; j < pnp; ++j)
+ ptr2[j] = ptr1[j * pnp + j];
+ }
+
+ /*
+ if(!(itno%100)){
+ printf("Current estimate: ");
+ for(i=0; i<nvars; ++i)
+ printf("%.9g ", p[i]);
+ printf("-- errors %.9g %0.9g\n", eb_inf, p_eL2);
+ }
+ */
+
+ /* check for convergence */
+ if ((eb_inf <= eps1)) {
+ dp_L2 = 0.0; /* no increment for p in this case */
+ stop = 1;
+ break;
+ }
+
+ /* compute initial damping factor */
+ if (itno == 0) {
+ for (i = ncon * pnp, tmp = DBL_MIN; i < nvars; ++i)
+ if (diagV[i] > tmp)
+ tmp = diagV[i]; /* find max diagonal element */
+ mu = tau * tmp;
+ }
+
+ /* determine increment using adaptive damping */
+ while (1) {
+ /* augment V */
+ for (i = ncon; i < n; ++i) {
+ ptr1 = V + i * Vsz; // set ptr1 to point to V_i
+ for (j = 0; j < pnp; ++j)
+ ptr1[j * pnp + j] += mu;
+ }
+
+ /* solve the linear systems V*_i db_i = eb_i to compute the db_i */
+ _dblzero(dp, ncon * pnp); /* no change for the first ncon point params */
+ for (i = nsolved = ncon; i < n; ++i) {
+ ptr1 = V + i * Vsz; // set ptr1 to point to V_i
+ ptr2 = eb + i * ebsz; // set ptr2 to point to eb_i
+ ptr3 = dp + i * pnp; // set ptr3 to point to db_i
+
+ // nsolved+=sba_Axb_LU(ptr1, ptr2, ptr3, pnp, 0); linsolver=sba_Axb_LU;
+ nsolved += sba_Axb_Chol(ptr1, ptr2, ptr3, pnp, 0);
+ linsolver = sba_Axb_Chol;
+ // nsolved+=sba_Axb_BK(ptr1, ptr2, ptr3, pnp, 0); linsolver=sba_Axb_BK;
+ // nsolved+=sba_Axb_QRnoQ(ptr1, ptr2, ptr3, pnp, 0); linsolver=sba_Axb_QRnoQ;
+ // nsolved+=sba_Axb_QR(ptr1, ptr2, ptr3, pnp, 0); linsolver=sba_Axb_QR;
+ // nsolved+=sba_Axb_SVD(ptr1, ptr2, ptr3, pnp, 0); linsolver=sba_Axb_SVD;
+ // nsolved+=(sba_Axb_CG(ptr1, ptr2, ptr3, pnp, (3*pnp)/2, 1E-10, SBA_CG_JACOBI, 0) > 0);
+ // linsolver=(PLS)sba_Axb_CG;
+
+ ++nlss;
+ }
+
+ if (nsolved == n) {
+
+ /* parameter vector updates are now in dp */
+
+ /* compute p's new estimate and ||dp||^2 */
+ for (i = 0, dp_L2 = 0.0; i < nvars; ++i) {
+ pdp[i] = p[i] + (tmp = dp[i]);
+ dp_L2 += tmp * tmp;
+ }
+ // dp_L2=sqrt(dp_L2);
+
+ if (dp_L2 <= eps2_sq * p_L2) { /* relative change in p is small, stop */
+ // if(dp_L2<=eps2*(p_L2 + eps2)){ /* relative change in p is small, stop */
+ stop = 2;
+ break;
+ }
+
+ if (dp_L2 >= (p_L2 + eps2) / SBA_EPSILON_SQ) { /* almost singular */
+ // if(dp_L2>=(p_L2+eps2)/SBA_EPSILON){ /* almost singular */
+ fprintf(stderr, "SBA: the matrix of the augmented normal equations is almost singular in "
+ "sba_motstr_levmar_x(),\n"
+ " minimization should be restarted from the current solution with an increased "
+ "damping term\n");
+ retval = SBA_ERROR;
+ goto freemem_and_return;
+ }
+
+ (*func)(pdp, &idxij, rcidxs, rcsubs, hx, adata);
+ ++nfev; /* evaluate function at p + dp */
+ if (verbose > 1)
+ printf("mean reprojection error %g\n", sba_mean_repr_error(n, mnp, x, hx, &idxij, rcidxs, rcsubs));
+ /* ### compute ||e(pdp)||_2 */
+ if (covx == NULL)
+ pdp_eL2 = nrmL2xmy(hx, x, hx, nobs); /* hx=x-hx, pdp_eL2=||hx|| */
+ else
+ pdp_eL2 = nrmCxmy(hx, x, hx, wght, mnp, nvis); /* hx=wght*(x-hx), pdp_eL2=||hx|| */
+ if (!SBA_FINITE(pdp_eL2)) {
+ if (verbose) /* identify the offending point projection */
+ sba_print_inf(hx, m, mnp, &idxij, rcidxs, rcsubs);
+
+ stop = 7;
+ break;
+ }
+
+ for (i = 0, dL = 0.0; i < nvars; ++i)
+ dL += dp[i] * (mu * dp[i] + eb[i]);
+
+ dF = p_eL2 - pdp_eL2;
+
+ if (verbose > 1)
+ printf("\ndamping term %8g, gain ratio %8g, errors %8g / %8g = %g\n", mu,
+ dL != 0.0 ? dF / dL : dF / DBL_EPSILON, p_eL2 / nvis, pdp_eL2 / nvis, p_eL2 / pdp_eL2);
+
+ if (dL > 0.0 && dF > 0.0) { /* reduction in error, increment is accepted */
+ tmp = (2.0 * dF / dL - 1.0);
+ tmp = 1.0 - tmp * tmp * tmp;
+ mu = mu * ((tmp >= SBA_ONE_THIRD) ? tmp : SBA_ONE_THIRD);
+ nu = 2;
+
+ /* the test below is equivalent to the relative reduction of the RMS reprojection error:
+ * sqrt(p_eL2)-sqrt(pdp_eL2)<eps4*sqrt(p_eL2) */
+ if (pdp_eL2 - 2.0 * sqrt(p_eL2 * pdp_eL2) < (eps4_sq - 1.0) * p_eL2)
+ stop = 4;
+
+ for (i = 0; i < nvars; ++i) /* update p's estimate */
+ p[i] = pdp[i];
+
+ for (i = 0; i < nobs; ++i) /* update e and ||e||_2 */
+ e[i] = hx[i];
+ p_eL2 = pdp_eL2;
+ break;
+ }
+ } /* nsolved==n */
+
+ /* if this point is reached, either at least one linear system could not be solved or
+ * the error did not reduce; in any case, the increment must be rejected
+ */
+
+ mu *= nu;
+ nu2 = nu << 1; // 2*nu;
+ if (nu2 <= nu) { /* nu has wrapped around (overflown) */
+ fprintf(stderr, "SBA: too many failed attempts to increase the damping factor in sba_str_levmar_x()! "
+ "Singular Hessian matrix?\n");
+ // exit(1);
+ stop = 6;
+ break;
+ }
+ nu = nu2;
+
+#if 0
+ /* restore V diagonal entries */
+ for(i=ncon; i<n; ++i){
+ ptr1=V + i*Vsz; // set ptr1 to point to V_i
+ ptr2=diagV + i*pnp; // set ptr2 to point to diagV_i
+ for(j=0; j<pnp; ++j)
+ ptr1[j*pnp+j]=ptr2[j];
+ }
+#endif
+ } /* inner while loop */
+
+ if (p_eL2 <= eps3_sq)
+ stop = 5; // error is small, force termination of outer loop
+ }
+
+ if (itno >= itmax)
+ stop = 3;
+
+ /* restore V diagonal entries */
+ for (i = ncon; i < n; ++i) {
+ ptr1 = V + i * Vsz; // set ptr1 to point to V_i
+ ptr2 = diagV + i * pnp; // set ptr2 to point to diagV_i
+ for (j = 0; j < pnp; ++j)
+ ptr1[j * pnp + j] = ptr2[j];
+ }
+
+ if (info) {
+ info[0] = init_p_eL2;
+ info[1] = p_eL2;
+ info[2] = eb_inf;
+ info[3] = dp_L2;
+ for (i = ncon; i < n; ++i) {
+ ptr1 = V + i * Vsz; // set ptr1 to point to V_i
+ for (j = 0; j < pnp; ++j)
+ if (tmp < ptr1[j * pnp + j])
+ tmp = ptr1[j * pnp + j];
+ }
+ info[4] = mu / tmp;
+ info[5] = itno;
+ info[6] = stop;
+ info[7] = nfev;
+ info[8] = njev;
+ info[9] = nlss;
+ }
+ // sba_print_sol(n, m, p, 0, pnp, x, mnp, &idxij, rcidxs, rcsubs);
+ retval = (stop != 7) ? itno : SBA_ERROR;
+
+freemem_and_return: /* NOTE: this point is also reached via a goto! */
+
+ /* free whatever was allocated */
+ free(jac);
+ free(V);
+ free(e);
+ free(eb);
+ free(dp);
+ free(rcidxs);
+ free(rcsubs);
+#ifndef SBA_DESTROY_COVS
+ if (wght)
+ free(wght);
+#else
+/* nothing to do */
+#endif /* SBA_DESTROY_COVS */
+
+ free(hx);
+ free(diagV);
+ free(pdp);
+ if (fdj_data.hxx) { // cleanup
+ free(fdj_data.hxx);
+ free(fdj_data.func_rcidxs);
+ }
+
+ sba_crsm_free(&idxij);
+
+ /* free the memory allocated by the linear solver routine */
+ if (linsolver)
+ (*linsolver)(NULL, NULL, NULL, 0, 0);
+
+ return retval;
+}
diff --git a/redist/sba/sba_levmar_wrap.c b/redist/sba/sba_levmar_wrap.c
new file mode 100644
index 0000000..159a040
--- /dev/null
+++ b/redist/sba/sba_levmar_wrap.c
@@ -0,0 +1,917 @@
+/////////////////////////////////////////////////////////////////////////////////
+////
+//// Simple drivers for sparse bundle adjustment based on the
+//// Levenberg - Marquardt minimization algorithm
+//// This file provides simple wrappers to the functions defined in sba_levmar.c
+//// Copyright (C) 2004-2009 Manolis Lourakis (lourakis at ics forth gr)
+//// Institute of Computer Science, Foundation for Research & Technology - Hellas
+//// Heraklion, Crete, Greece.
+////
+//// This program is free software; you can redistribute it and/or modify
+//// it under the terms of the GNU General Public License as published by
+//// the Free Software Foundation; either version 2 of the License, or
+//// (at your option) any later version.
+////
+//// This program is distributed in the hope that it will be useful,
+//// but WITHOUT ANY WARRANTY; without even the implied warranty of
+//// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//// GNU General Public License for more details.
+////
+///////////////////////////////////////////////////////////////////////////////////
+
+#include <float.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "sba.h"
+
+#define FABS(x) (((x) >= 0) ? (x) : -(x))
+
+struct wrap_motstr_data_ {
+ void (*proj)(int j, int i, double *aj, double *bi, double *xij, void *adata); // Q
+ void (*projac)(int j, int i, double *aj, double *bi, double *Aij, double *Bij, void *adata); // dQ/da, dQ/db
+ int cnp, pnp, mnp; /* parameter numbers */
+ void *adata;
+};
+
+struct wrap_mot_data_ {
+ void (*proj)(int j, int i, double *aj, double *xij, void *adata); // Q
+ void (*projac)(int j, int i, double *aj, double *Aij, void *adata); // dQ/da
+ int cnp, mnp; /* parameter numbers */
+ void *adata;
+};
+
+struct wrap_str_data_ {
+ void (*proj)(int j, int i, double *bi, double *xij, void *adata); // Q
+ void (*projac)(int j, int i, double *bi, double *Bij, void *adata); // dQ/db
+ int pnp, mnp; /* parameter numbers */
+ void *adata;
+};
+
+/* Routines to estimate the estimated measurement vector (i.e. "func") and
+ * its sparse jacobian (i.e. "fjac") needed by BA expert drivers. Code below
+ * makes use of user-supplied functions computing "Q", "dQ/da", d"Q/db",
+ * i.e. predicted projection and associated jacobians for a SINGLE image measurement.
+ * Notice also that what follows is two pairs of "func" and corresponding "fjac" routines.
+ * The first is to be used in full (i.e. motion + structure) BA, the second in
+ * motion only BA.
+ */
+
+/* FULL BUNDLE ADJUSTMENT */
+
+/* Given a parameter vector p made up of the 3D coordinates of n points and the parameters of m cameras, compute in
+ * hx the prediction of the measurements, i.e. the projections of 3D points in the m images. The measurements
+ * are returned in the order (hx_11^T, .. hx_1m^T, ..., hx_n1^T, .. hx_nm^T)^T, where hx_ij is the predicted
+ * projection of the i-th point on the j-th camera.
+ * Caller supplies rcidxs and rcsubs which can be used as working memory.
+ * Notice that depending on idxij, some of the hx_ij might be missing
+ *
+ */
+static void sba_motstr_Qs(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata) {
+ register int i, j;
+ int cnp, pnp, mnp;
+ double *pa, *pb, *paj, *pbi, *pxij;
+ int n, m, nnz;
+ struct wrap_motstr_data_ *wdata;
+ void (*proj)(int j, int i, double *aj, double *bi, double *xij, void *proj_adata);
+ void *proj_adata;
+
+ wdata = (struct wrap_motstr_data_ *)adata;
+ cnp = wdata->cnp;
+ pnp = wdata->pnp;
+ mnp = wdata->mnp;
+ proj = wdata->proj;
+ proj_adata = wdata->adata;
+
+ n = idxij->nr;
+ m = idxij->nc;
+ pa = p;
+ pb = p + m * cnp;
+
+ for (j = 0; j < m; ++j) {
+ /* j-th camera parameters */
+ paj = pa + j * cnp;
+
+ nnz = sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero hx_ij, i=0...n-1 */
+
+ for (i = 0; i < nnz; ++i) {
+ pbi = pb + rcsubs[i] * pnp;
+ pxij = hx + idxij->val[rcidxs[i]] * mnp; // set pxij to point to hx_ij
+
+ (*proj)(j, rcsubs[i], paj, pbi, pxij, proj_adata); // evaluate Q in pxij
+ }
+ }
+}
+
+/* Given a parameter vector p made up of the 3D coordinates of n points and the parameters of m cameras, compute in
+ * jac the jacobian of the predicted measurements, i.e. the jacobian of the projections of 3D points in the m images.
+ * The jacobian is returned in the order (A_11, B_11, ..., A_1m, B_1m, ..., A_n1, B_n1, ..., A_nm, B_nm),
+ * where A_ij=dx_ij/db_j and B_ij=dx_ij/db_i (see HZ).
+ * Caller supplies rcidxs and rcsubs which can be used as working memory.
+ * Notice that depending on idxij, some of the A_ij, B_ij might be missing
+ *
+ */
+static void sba_motstr_Qs_jac(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata) {
+ register int i, j;
+ int cnp, pnp, mnp;
+ double *pa, *pb, *paj, *pbi, *pAij, *pBij;
+ int n, m, nnz, Asz, Bsz, ABsz, idx;
+ struct wrap_motstr_data_ *wdata;
+ void (*projac)(int j, int i, double *aj, double *bi, double *Aij, double *Bij, void *projac_adata);
+ void *projac_adata;
+
+ wdata = (struct wrap_motstr_data_ *)adata;
+ cnp = wdata->cnp;
+ pnp = wdata->pnp;
+ mnp = wdata->mnp;
+ projac = wdata->projac;
+ projac_adata = wdata->adata;
+
+ n = idxij->nr;
+ m = idxij->nc;
+ pa = p;
+ pb = p + m * cnp;
+ Asz = mnp * cnp;
+ Bsz = mnp * pnp;
+ ABsz = Asz + Bsz;
+
+ for (j = 0; j < m; ++j) {
+ /* j-th camera parameters */
+ paj = pa + j * cnp;
+
+ nnz = sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero hx_ij, i=0...n-1 */
+
+ for (i = 0; i < nnz; ++i) {
+ pbi = pb + rcsubs[i] * pnp;
+ idx = idxij->val[rcidxs[i]];
+ pAij = jac + idx * ABsz; // set pAij to point to A_ij
+ pBij = pAij + Asz; // set pBij to point to B_ij
+
+ (*projac)(j, rcsubs[i], paj, pbi, pAij, pBij, projac_adata); // evaluate dQ/da, dQ/db in pAij, pBij
+ }
+ }
+}
+
+/* Given a parameter vector p made up of the 3D coordinates of n points and the parameters of m cameras, compute in
+ * jac the jacobian of the predicted measurements, i.e. the jacobian of the projections of 3D points in the m images.
+ * The jacobian is approximated with the aid of finite differences and is returned in the order
+ * (A_11, B_11, ..., A_1m, B_1m, ..., A_n1, B_n1, ..., A_nm, B_nm),
+ * where A_ij=dx_ij/da_j and B_ij=dx_ij/db_i (see HZ).
+ * Notice that depending on idxij, some of the A_ij, B_ij might be missing
+ *
+ * Problem-specific information is assumed to be stored in a structure pointed to by "dat".
+ *
+ * NOTE: This function is provided mainly for illustration purposes; in case that execution time is a concern,
+ * the jacobian should be computed analytically
+ */
+static void sba_motstr_Qs_fdjac(
+ double *p, /* I: current parameter estimate, (m*cnp+n*pnp)x1 */
+ struct sba_crsm *idxij, /* I: sparse matrix containing the location of x_ij in hx */
+ int *rcidxs, /* work array for the indexes of nonzero elements of a single sparse matrix row/column */
+ int *rcsubs, /* work array for the subscripts of nonzero elements in a single sparse matrix row/column */
+ double *jac, /* O: array for storing the approximated jacobian */
+ void *dat) /* I: points to a "wrap_motstr_data_" structure */
+{
+ register int i, j, ii, jj;
+ double *pa, *pb, *paj, *pbi;
+ register double *pAB;
+ int n, m, nnz, Asz, Bsz, ABsz;
+
+ double tmp;
+ register double d, d1;
+
+ struct wrap_motstr_data_ *fdjd;
+ void (*proj)(int j, int i, double *aj, double *bi, double *xij, void *adata);
+ double *hxij, *hxxij;
+ int cnp, pnp, mnp;
+ void *adata;
+
+ /* retrieve problem-specific information passed in *dat */
+ fdjd = (struct wrap_motstr_data_ *)dat;
+ proj = fdjd->proj;
+ cnp = fdjd->cnp;
+ pnp = fdjd->pnp;
+ mnp = fdjd->mnp;
+ adata = fdjd->adata;
+
+ n = idxij->nr;
+ m = idxij->nc;
+ pa = p;
+ pb = p + m * cnp;
+ Asz = mnp * cnp;
+ Bsz = mnp * pnp;
+ ABsz = Asz + Bsz;
+
+ /* allocate memory for hxij, hxxij */
+ if ((hxij = malloc(2 * mnp * sizeof(double))) == NULL) {
+ fprintf(stderr, "memory allocation request failed in sba_motstr_Qs_fdjac()!\n");
+ exit(1);
+ }
+ hxxij = hxij + mnp;
+
+ /* compute A_ij */
+ for (j = 0; j < m; ++j) {
+ paj = pa + j * cnp; // j-th camera parameters
+
+ nnz = sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero A_ij, i=0...n-1 */
+ for (jj = 0; jj < cnp; ++jj) {
+ /* determine d=max(SBA_DELTA_SCALE*|paj[jj]|, SBA_MIN_DELTA), see HZ */
+ d = (double)(SBA_DELTA_SCALE)*paj[jj]; // force evaluation
+ d = FABS(d);
+ if (d < SBA_MIN_DELTA)
+ d = SBA_MIN_DELTA;
+ d1 = 1.0 / d; /* invert so that divisions can be carried out faster as multiplications */
+
+ for (i = 0; i < nnz; ++i) {
+ pbi = pb + rcsubs[i] * pnp; // i-th point parameters
+ (*proj)(j, rcsubs[i], paj, pbi, hxij, adata); // evaluate supplied function on current solution
+
+ tmp = paj[jj];
+ paj[jj] += d;
+ (*proj)(j, rcsubs[i], paj, pbi, hxxij, adata);
+ paj[jj] = tmp; /* restore */
+
+ pAB = jac + idxij->val[rcidxs[i]] * ABsz; // set pAB to point to A_ij
+ for (ii = 0; ii < mnp; ++ii)
+ pAB[ii * cnp + jj] = (hxxij[ii] - hxij[ii]) * d1;
+ }
+ }
+ }
+
+ /* compute B_ij */
+ for (i = 0; i < n; ++i) {
+ pbi = pb + i * pnp; // i-th point parameters
+
+ nnz = sba_crsm_row_elmidxs(idxij, i, rcidxs, rcsubs); /* find nonzero B_ij, j=0...m-1 */
+ for (jj = 0; jj < pnp; ++jj) {
+ /* determine d=max(SBA_DELTA_SCALE*|pbi[jj]|, SBA_MIN_DELTA), see HZ */
+ d = (double)(SBA_DELTA_SCALE)*pbi[jj]; // force evaluation
+ d = FABS(d);
+ if (d < SBA_MIN_DELTA)
+ d = SBA_MIN_DELTA;
+ d1 = 1.0 / d; /* invert so that divisions can be carried out faster as multiplications */
+
+ for (j = 0; j < nnz; ++j) {
+ paj = pa + rcsubs[j] * cnp; // j-th camera parameters
+ (*proj)(rcsubs[j], i, paj, pbi, hxij, adata); // evaluate supplied function on current solution
+
+ tmp = pbi[jj];
+ pbi[jj] += d;
+ (*proj)(rcsubs[j], i, paj, pbi, hxxij, adata);
+ pbi[jj] = tmp; /* restore */
+
+ pAB = jac + idxij->val[rcidxs[j]] * ABsz + Asz; // set pAB to point to B_ij
+ for (ii = 0; ii < mnp; ++ii)
+ pAB[ii * pnp + jj] = (hxxij[ii] - hxij[ii]) * d1;
+ }
+ }
+ }
+
+ free(hxij);
+}
+
+/* BUNDLE ADJUSTMENT FOR CAMERA PARAMETERS ONLY */
+
+/* Given a parameter vector p made up of the parameters of m cameras, compute in
+ * hx the prediction of the measurements, i.e. the projections of 3D points in the m images.
+ * The measurements are returned in the order (hx_11^T, .. hx_1m^T, ..., hx_n1^T, .. hx_nm^T)^T,
+ * where hx_ij is the predicted projection of the i-th point on the j-th camera.
+ * Caller supplies rcidxs and rcsubs which can be used as working memory.
+ * Notice that depending on idxij, some of the hx_ij might be missing
+ *
+ */
+static void sba_mot_Qs(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata) {
+ register int i, j;
+ int cnp, mnp;
+ double *paj, *pxij;
+ // int n;
+ int m, nnz;
+ struct wrap_mot_data_ *wdata;
+ void (*proj)(int j, int i, double *aj, double *xij, void *proj_adata);
+ void *proj_adata;
+
+ wdata = (struct wrap_mot_data_ *)adata;
+ cnp = wdata->cnp;
+ mnp = wdata->mnp;
+ proj = wdata->proj;
+ proj_adata = wdata->adata;
+
+ // n=idxij->nr;
+ m = idxij->nc;
+
+ for (j = 0; j < m; ++j) {
+ /* j-th camera parameters */
+ paj = p + j * cnp;
+
+ nnz = sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero hx_ij, i=0...n-1 */
+
+ for (i = 0; i < nnz; ++i) {
+ pxij = hx + idxij->val[rcidxs[i]] * mnp; // set pxij to point to hx_ij
+
+ (*proj)(j, rcsubs[i], paj, pxij, proj_adata); // evaluate Q in pxij
+ }
+ }
+}
+
+/* Given a parameter vector p made up of the parameters of m cameras, compute in jac
+ * the jacobian of the predicted measurements, i.e. the jacobian of the projections of 3D points in the m images.
+ * The jacobian is returned in the order (A_11, ..., A_1m, ..., A_n1, ..., A_nm),
+ * where A_ij=dx_ij/db_j (see HZ).
+ * Caller supplies rcidxs and rcsubs which can be used as working memory.
+ * Notice that depending on idxij, some of the A_ij might be missing
+ *
+ */
+static void sba_mot_Qs_jac(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata) {
+ register int i, j;
+ int cnp, mnp;
+ double *paj, *pAij;
+ // int n;
+ int m, nnz, Asz, idx;
+ struct wrap_mot_data_ *wdata;
+ void (*projac)(int j, int i, double *aj, double *Aij, void *projac_adata);
+ void *projac_adata;
+
+ wdata = (struct wrap_mot_data_ *)adata;
+ cnp = wdata->cnp;
+ mnp = wdata->mnp;
+ projac = wdata->projac;
+ projac_adata = wdata->adata;
+
+ // n=idxij->nr;
+ m = idxij->nc;
+ Asz = mnp * cnp;
+
+ for (j = 0; j < m; ++j) {
+ /* j-th camera parameters */
+ paj = p + j * cnp;
+
+ nnz = sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero hx_ij, i=0...n-1 */
+
+ for (i = 0; i < nnz; ++i) {
+ idx = idxij->val[rcidxs[i]];
+ pAij = jac + idx * Asz; // set pAij to point to A_ij
+
+ (*projac)(j, rcsubs[i], paj, pAij, projac_adata); // evaluate dQ/da in pAij
+ }
+ }
+}
+
+/* Given a parameter vector p made up of the parameters of m cameras, compute in jac the jacobian
+ * of the predicted measurements, i.e. the jacobian of the projections of 3D points in the m images.
+ * The jacobian is approximated with the aid of finite differences and is returned in the order
+ * (A_11, ..., A_1m, ..., A_n1, ..., A_nm), where A_ij=dx_ij/da_j (see HZ).
+ * Notice that depending on idxij, some of the A_ij might be missing
+ *
+ * Problem-specific information is assumed to be stored in a structure pointed to by "dat".
+ *
+ * NOTE: This function is provided mainly for illustration purposes; in case that execution time is a concern,
+ * the jacobian should be computed analytically
+ */
+static void sba_mot_Qs_fdjac(
+ double *p, /* I: current parameter estimate, (m*cnp)x1 */
+ struct sba_crsm *idxij, /* I: sparse matrix containing the location of x_ij in hx */
+ int *rcidxs, /* work array for the indexes of nonzero elements of a single sparse matrix row/column */
+ int *rcsubs, /* work array for the subscripts of nonzero elements in a single sparse matrix row/column */
+ double *jac, /* O: array for storing the approximated jacobian */
+ void *dat) /* I: points to a "wrap_mot_data_" structure */
+{
+ register int i, j, ii, jj;
+ double *paj;
+ register double *pA;
+ // int n;
+ int m, nnz, Asz;
+
+ double tmp;
+ register double d, d1;
+
+ struct wrap_mot_data_ *fdjd;
+ void (*proj)(int j, int i, double *aj, double *xij, void *adata);
+ double *hxij, *hxxij;
+ int cnp, mnp;
+ void *adata;
+
+ /* retrieve problem-specific information passed in *dat */
+ fdjd = (struct wrap_mot_data_ *)dat;
+ proj = fdjd->proj;
+ cnp = fdjd->cnp;
+ mnp = fdjd->mnp;
+ adata = fdjd->adata;
+
+ // n=idxij->nr;
+ m = idxij->nc;
+ Asz = mnp * cnp;
+
+ /* allocate memory for hxij, hxxij */
+ if ((hxij = malloc(2 * mnp * sizeof(double))) == NULL) {
+ fprintf(stderr, "memory allocation request failed in sba_mot_Qs_fdjac()!\n");
+ exit(1);
+ }
+ hxxij = hxij + mnp;
+
+ /* compute A_ij */
+ for (j = 0; j < m; ++j) {
+ paj = p + j * cnp; // j-th camera parameters
+
+ nnz = sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero A_ij, i=0...n-1 */
+ for (jj = 0; jj < cnp; ++jj) {
+ /* determine d=max(SBA_DELTA_SCALE*|paj[jj]|, SBA_MIN_DELTA), see HZ */
+ d = (double)(SBA_DELTA_SCALE)*paj[jj]; // force evaluation
+ d = FABS(d);
+ if (d < SBA_MIN_DELTA)
+ d = SBA_MIN_DELTA;
+ d1 = 1.0 / d; /* invert so that divisions can be carried out faster as multiplications */
+
+ for (i = 0; i < nnz; ++i) {
+ (*proj)(j, rcsubs[i], paj, hxij, adata); // evaluate supplied function on current solution
+
+ tmp = paj[jj];
+ paj[jj] += d;
+ (*proj)(j, rcsubs[i], paj, hxxij, adata);
+ paj[jj] = tmp; /* restore */
+
+ pA = jac + idxij->val[rcidxs[i]] * Asz; // set pA to point to A_ij
+ for (ii = 0; ii < mnp; ++ii)
+ pA[ii * cnp + jj] = (hxxij[ii] - hxij[ii]) * d1;
+ }
+ }
+ }
+
+ free(hxij);
+}
+
+/* BUNDLE ADJUSTMENT FOR STRUCTURE PARAMETERS ONLY */
+
+/* Given a parameter vector p made up of the 3D coordinates of n points, compute in
+ * hx the prediction of the measurements, i.e. the projections of 3D points in the m images. The measurements
+ * are returned in the order (hx_11^T, .. hx_1m^T, ..., hx_n1^T, .. hx_nm^T)^T, where hx_ij is the predicted
+ * projection of the i-th point on the j-th camera.
+ * Caller supplies rcidxs and rcsubs which can be used as working memory.
+ * Notice that depending on idxij, some of the hx_ij might be missing
+ *
+ */
+static void sba_str_Qs(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata) {
+ register int i, j;
+ int pnp, mnp;
+ double *pbi, *pxij;
+ // int n;
+ int m, nnz;
+ struct wrap_str_data_ *wdata;
+ void (*proj)(int j, int i, double *bi, double *xij, void *proj_adata);
+ void *proj_adata;
+
+ wdata = (struct wrap_str_data_ *)adata;
+ pnp = wdata->pnp;
+ mnp = wdata->mnp;
+ proj = wdata->proj;
+ proj_adata = wdata->adata;
+
+ // n=idxij->nr;
+ m = idxij->nc;
+
+ for (j = 0; j < m; ++j) {
+ nnz = sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero hx_ij, i=0...n-1 */
+
+ for (i = 0; i < nnz; ++i) {
+ pbi = p + rcsubs[i] * pnp;
+ pxij = hx + idxij->val[rcidxs[i]] * mnp; // set pxij to point to hx_ij
+
+ (*proj)(j, rcsubs[i], pbi, pxij, proj_adata); // evaluate Q in pxij
+ }
+ }
+}
+
+/* Given a parameter vector p made up of the 3D coordinates of n points, compute in
+ * jac the jacobian of the predicted measurements, i.e. the jacobian of the projections of 3D points in the m images.
+ * The jacobian is returned in the order (B_11, ..., B_1m, ..., B_n1, ..., B_nm), where B_ij=dx_ij/db_i (see HZ).
+ * Caller supplies rcidxs and rcsubs which can be used as working memory.
+ * Notice that depending on idxij, some of the B_ij might be missing
+ *
+ */
+static void sba_str_Qs_jac(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata) {
+ register int i, j;
+ int pnp, mnp;
+ double *pbi, *pBij;
+ // int n;
+ int m, nnz, Bsz, idx;
+ struct wrap_str_data_ *wdata;
+ void (*projac)(int j, int i, double *bi, double *Bij, void *projac_adata);
+ void *projac_adata;
+
+ wdata = (struct wrap_str_data_ *)adata;
+ pnp = wdata->pnp;
+ mnp = wdata->mnp;
+ projac = wdata->projac;
+ projac_adata = wdata->adata;
+
+ // n=idxij->nr;
+ m = idxij->nc;
+ Bsz = mnp * pnp;
+
+ for (j = 0; j < m; ++j) {
+
+ nnz = sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero hx_ij, i=0...n-1 */
+
+ for (i = 0; i < nnz; ++i) {
+ pbi = p + rcsubs[i] * pnp;
+ idx = idxij->val[rcidxs[i]];
+ pBij = jac + idx * Bsz; // set pBij to point to B_ij
+
+ (*projac)(j, rcsubs[i], pbi, pBij, projac_adata); // evaluate dQ/db in pBij
+ }
+ }
+}
+
+/* Given a parameter vector p made up of the 3D coordinates of n points, compute in
+ * jac the jacobian of the predicted measurements, i.e. the jacobian of the projections of 3D points in the m images.
+ * The jacobian is approximated with the aid of finite differences and is returned in the order
+ * (B_11, ..., B_1m, ..., B_n1, ..., B_nm), where B_ij=dx_ij/db_i (see HZ).
+ * Notice that depending on idxij, some of the B_ij might be missing
+ *
+ * Problem-specific information is assumed to be stored in a structure pointed to by "dat".
+ *
+ * NOTE: This function is provided mainly for illustration purposes; in case that execution time is a concern,
+ * the jacobian should be computed analytically
+ */
+static void sba_str_Qs_fdjac(
+ double *p, /* I: current parameter estimate, (n*pnp)x1 */
+ struct sba_crsm *idxij, /* I: sparse matrix containing the location of x_ij in hx */
+ int *rcidxs, /* work array for the indexes of nonzero elements of a single sparse matrix row/column */
+ int *rcsubs, /* work array for the subscripts of nonzero elements in a single sparse matrix row/column */
+ double *jac, /* O: array for storing the approximated jacobian */
+ void *dat) /* I: points to a "wrap_str_data_" structure */
+{
+ register int i, j, ii, jj;
+ double *pbi;
+ register double *pB;
+ // int m;
+ int n, nnz, Bsz;
+
+ double tmp;
+ register double d, d1;
+
+ struct wrap_str_data_ *fdjd;
+ void (*proj)(int j, int i, double *bi, double *xij, void *adata);
+ double *hxij, *hxxij;
+ int pnp, mnp;
+ void *adata;
+
+ /* retrieve problem-specific information passed in *dat */
+ fdjd = (struct wrap_str_data_ *)dat;
+ proj = fdjd->proj;
+ pnp = fdjd->pnp;
+ mnp = fdjd->mnp;
+ adata = fdjd->adata;
+
+ n = idxij->nr;
+ // m=idxij->nc;
+ Bsz = mnp * pnp;
+
+ /* allocate memory for hxij, hxxij */
+ if ((hxij = malloc(2 * mnp * sizeof(double))) == NULL) {
+ fprintf(stderr, "memory allocation request failed in sba_str_Qs_fdjac()!\n");
+ exit(1);
+ }
+ hxxij = hxij + mnp;
+
+ /* compute B_ij */
+ for (i = 0; i < n; ++i) {
+ pbi = p + i * pnp; // i-th point parameters
+
+ nnz = sba_crsm_row_elmidxs(idxij, i, rcidxs, rcsubs); /* find nonzero B_ij, j=0...m-1 */
+ for (jj = 0; jj < pnp; ++jj) {
+ /* determine d=max(SBA_DELTA_SCALE*|pbi[jj]|, SBA_MIN_DELTA), see HZ */
+ d = (double)(SBA_DELTA_SCALE)*pbi[jj]; // force evaluation
+ d = FABS(d);
+ if (d < SBA_MIN_DELTA)
+ d = SBA_MIN_DELTA;
+ d1 = 1.0 / d; /* invert so that divisions can be carried out faster as multiplications */
+
+ for (j = 0; j < nnz; ++j) {
+ (*proj)(rcsubs[j], i, pbi, hxij, adata); // evaluate supplied function on current solution
+
+ tmp = pbi[jj];
+ pbi[jj] += d;
+ (*proj)(rcsubs[j], i, pbi, hxxij, adata);
+ pbi[jj] = tmp; /* restore */
+
+ pB = jac + idxij->val[rcidxs[j]] * Bsz; // set pB to point to B_ij
+ for (ii = 0; ii < mnp; ++ii)
+ pB[ii * pnp + jj] = (hxxij[ii] - hxij[ii]) * d1;
+ }
+ }
+ }
+
+ free(hxij);
+}
+
+/*
+ * Simple driver to sba_motstr_levmar_x for bundle adjustment on camera and structure parameters.
+ *
+ * Returns the number of iterations (>=0) if successfull, SBA_ERROR if failed
+ */
+
+int sba_motstr_levmar(
+ const int n, /* number of points */
+ const int ncon, /* number of points (starting from the 1st) whose parameters should not be modified.
+ * All B_ij (see below) with i<ncon are assumed to be zero
+ */
+ const int m, /* number of images */
+ const int mcon, /* number of images (starting from the 1st) whose parameters should not be modified.
+ * All A_ij (see below) with j<mcon are assumed to be zero
+ */
+ char *vmask, /* visibility mask: vmask[i, j]=1 if point i visible in image j, 0 otherwise. nxm */
+ double *p, /* initial parameter vector p0: (a1, ..., am, b1, ..., bn).
+ * aj are the image j parameters, bi are the i-th point parameters,
+ * size m*cnp + n*pnp
+ */
+ const int cnp, /* number of parameters for ONE camera; e.g. 6 for Euclidean cameras */
+ const int pnp, /* number of parameters for ONE point; e.g. 3 for Euclidean points */
+ double *x, /* measurements vector: (x_11^T, .. x_1m^T, ..., x_n1^T, .. x_nm^T)^T where
+ * x_ij is the projection of the i-th point on the j-th image.
+ * NOTE: some of the x_ij might be missing, if point i is not visible in image j;
+ * see vmask[i, j], max. size n*m*mnp
+ */
+ double *covx, /* measurements covariance matrices: (Sigma_x_11, .. Sigma_x_1m, ..., Sigma_x_n1, .. Sigma_x_nm),
+ * where Sigma_x_ij is the mnp x mnp covariance of x_ij stored row-by-row. Set to NULL if no
+ * covariance estimates are available (identity matrices are implicitly used in this case).
+ * NOTE: a certain Sigma_x_ij is missing if the corresponding x_ij is also missing;
+ * see vmask[i, j], max. size n*m*mnp*mnp
+ */
+ const int mnp, /* number of parameters for EACH measurement; usually 2 */
+ void (*proj)(int j, int i, double *aj, double *bi, double *xij, void *adata),
+ /* functional relation computing a SINGLE image measurement. Assuming that
+ * the parameters of point i are bi and the parameters of camera j aj,
+ * computes a prediction of \hat{x}_{ij}. aj is cnp x 1, bi is pnp x 1 and
+ * xij is mnp x 1. This function is called only if point i is visible in
+ * image j (i.e. vmask[i, j]==1)
+ */
+ void (*projac)(int j, int i, double *aj, double *bi, double *Aij, double *Bij, void *adata),
+ /* functional relation to evaluate d x_ij / d a_j and
+ * d x_ij / d b_i in Aij and Bij resp.
+ * This function is called only if point i is visible in * image j
+ * (i.e. vmask[i, j]==1). Also, A_ij and B_ij are mnp x cnp and mnp x pnp
+ * matrices resp. and they should be stored in row-major order.
+ *
+ * If NULL, the jacobians are approximated by repetitive proj calls
+ * and finite differences.
+ */
+ void *adata, /* pointer to possibly additional data, passed uninterpreted to proj, projac */
+
+ const int itmax, /* I: maximum number of iterations. itmax==0 signals jacobian verification followed by immediate
+ return */
+ const int verbose, /* I: verbosity */
+ const double opts[SBA_OPTSSZ],
+ /* I: minim. options [\mu, \epsilon1, \epsilon2, \epsilon3, \epsilon4]. Respectively the scale factor for initial
+ * \mu,
+ * stoping thresholds for ||J^T e||_inf, ||dp||_2, ||e||_2 and (||e||_2-||e_new||_2)/||e||_2
+ */
+ double info[SBA_INFOSZ]
+ /* O: information regarding the minimization. Set to NULL if don't care
+ * info[0]=||e||_2 at initial p.
+ * info[1-4]=[ ||e||_2, ||J^T e||_inf, ||dp||_2, mu/max[J^T J]_ii ], all computed at estimated p.
+ * info[5]= # iterations,
+ * info[6]=reason for terminating: 1 - stopped by small gradient J^T e
+ * 2 - stopped by small dp
+ * 3 - stopped by itmax
+ * 4 - stopped by small relative reduction in ||e||_2
+ * 5 - too many attempts to increase damping. Restart with increased mu
+ * 6 - stopped by small ||e||_2
+ * 7 - stopped by invalid (i.e. NaN or Inf) "func" values. This is a user error
+ * info[7]= # function evaluations
+ * info[8]= # jacobian evaluations
+ * info[9]= # number of linear systems solved, i.e. number of attempts for reducing error
+ */
+ ) {
+ int retval;
+ struct wrap_motstr_data_ wdata;
+ static void (*fjac)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata);
+
+ wdata.proj = proj;
+ wdata.projac = projac;
+ wdata.cnp = cnp;
+ wdata.pnp = pnp;
+ wdata.mnp = mnp;
+ wdata.adata = adata;
+
+ fjac = (projac) ? sba_motstr_Qs_jac : sba_motstr_Qs_fdjac;
+ retval = sba_motstr_levmar_x(n, ncon, m, mcon, vmask, p, cnp, pnp, x, covx, mnp, sba_motstr_Qs, fjac, &wdata, itmax,
+ verbose, opts, info);
+
+ if (info) {
+ register int i;
+ int nvis;
+
+ /* count visible image points */
+ for (i = nvis = 0; i < n * m; ++i)
+ nvis += (vmask[i] != 0);
+
+ /* each "func" & "fjac" evaluation requires nvis "proj" & "projac" evaluations */
+ info[7] *= nvis;
+ info[8] *= nvis;
+ }
+
+ return retval;
+}
+
+/*
+ * Simple driver to sba_mot_levmar_x for bundle adjustment on camera parameters.
+ *
+ * Returns the number of iterations (>=0) if successfull, SBA_ERROR if failed
+ */
+
+int sba_mot_levmar(
+ const int n, /* number of points */
+ const int m, /* number of images */
+ const int mcon, /* number of images (starting from the 1st) whose parameters should not be modified.
+ * All A_ij (see below) with j<mcon are assumed to be zero
+ */
+ char *vmask, /* visibility mask: vmask[i, j]=1 if point i visible in image j, 0 otherwise. nxm */
+ double *p, /* initial parameter vector p0: (a1, ..., am).
+ * aj are the image j parameters, size m*cnp */
+ const int cnp, /* number of parameters for ONE camera; e.g. 6 for Euclidean cameras */
+ double *x, /* measurements vector: (x_11^T, .. x_1m^T, ..., x_n1^T, .. x_nm^T)^T where
+ * x_ij is the projection of the i-th point on the j-th image.
+ * NOTE: some of the x_ij might be missing, if point i is not visible in image j;
+ * see vmask[i, j], max. size n*m*mnp
+ */
+ double *covx, /* measurements covariance matrices: (Sigma_x_11, .. Sigma_x_1m, ..., Sigma_x_n1, .. Sigma_x_nm),
+ * where Sigma_x_ij is the mnp x mnp covariance of x_ij stored row-by-row. Set to NULL if no
+ * covariance estimates are available (identity matrices are implicitly used in this case).
+ * NOTE: a certain Sigma_x_ij is missing if the corresponding x_ij is also missing;
+ * see vmask[i, j], max. size n*m*mnp*mnp
+ */
+ const int mnp, /* number of parameters for EACH measurement; usually 2 */
+ void (*proj)(int j, int i, double *aj, double *xij, void *adata),
+ /* functional relation computing a SINGLE image measurement. Assuming that
+ * the parameters of camera j are aj, computes a prediction of \hat{x}_{ij}
+ * for point i. aj is cnp x 1 and xij is mnp x 1.
+ * This function is called only if point i is visible in image j (i.e. vmask[i, j]==1)
+ */
+ void (*projac)(int j, int i, double *aj, double *Aij, void *adata),
+ /* functional relation to evaluate d x_ij / d a_j in Aij
+ * This function is called only if point i is visible in image j
+ * (i.e. vmask[i, j]==1). Also, A_ij are a mnp x cnp matrices
+ * and should be stored in row-major order.
+ *
+ * If NULL, the jacobian is approximated by repetitive proj calls
+ * and finite differences.
+ */
+ void *adata, /* pointer to possibly additional data, passed uninterpreted to proj, projac */
+
+ const int itmax, /* I: maximum number of iterations. itmax==0 signals jacobian verification followed by immediate
+ return */
+ const int verbose, /* I: verbosity */
+ const double opts[SBA_OPTSSZ],
+ /* I: minim. options [\mu, \epsilon1, \epsilon2, \epsilon3, \epsilon]. Respectively the scale factor for initial
+ * \mu,
+ * stoping thresholds for ||J^T e||_inf, ||dp||_2, ||e||_2 and (||e||_2-||e_new||_2)/||e||_2
+ */
+ double info[SBA_INFOSZ]
+ /* O: information regarding the minimization. Set to NULL if don't care
+ * info[0]=||e||_2 at initial p.
+ * info[1-4]=[ ||e||_2, ||J^T e||_inf, ||dp||_2, mu/max[J^T J]_ii ], all computed at estimated p.
+ * info[5]= # iterations,
+ * info[6]=reason for terminating: 1 - stopped by small gradient J^T e
+ * 2 - stopped by small dp
+ * 3 - stopped by itmax
+ * 4 - stopped by small relative reduction in ||e||_2
+ * 5 - too many attempts to increase damping. Restart with increased mu
+ * 6 - stopped by small ||e||_2
+ * 7 - stopped by invalid (i.e. NaN or Inf) "func" values. This is a user error
+ * info[7]= # function evaluations
+ * info[8]= # jacobian evaluations
+ * info[9]= # number of linear systems solved, i.e. number of attempts for reducing error
+ */
+ ) {
+ int retval;
+ struct wrap_mot_data_ wdata;
+ void (*fjac)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata);
+
+ wdata.proj = proj;
+ wdata.projac = projac;
+ wdata.cnp = cnp;
+ wdata.mnp = mnp;
+ wdata.adata = adata;
+
+ fjac = (projac) ? sba_mot_Qs_jac : sba_mot_Qs_fdjac;
+ retval =
+ sba_mot_levmar_x(n, m, mcon, vmask, p, cnp, x, covx, mnp, sba_mot_Qs, fjac, &wdata, itmax, verbose, opts, info);
+
+ if (info) {
+ register int i;
+ int nvis;
+
+ /* count visible image points */
+ for (i = nvis = 0; i < n * m; ++i)
+ nvis += (vmask[i] != 0);
+
+ /* each "func" & "fjac" evaluation requires nvis "proj" & "projac" evaluations */
+ info[7] *= nvis;
+ info[8] *= nvis;
+ }
+
+ return retval;
+}
+
+/*
+ * Simple driver to sba_str_levmar_x for bundle adjustment on structure parameters.
+ *
+ * Returns the number of iterations (>=0) if successfull, SBA_ERROR if failed
+ */
+
+int sba_str_levmar(
+ const int n, /* number of points */
+ const int ncon, /* number of points (starting from the 1st) whose parameters should not be modified.
+ * All B_ij (see below) with i<ncon are assumed to be zero
+ */
+ const int m, /* number of images */
+ char *vmask, /* visibility mask: vmask[i, j]=1 if point i visible in image j, 0 otherwise. nxm */
+ double *p, /* initial parameter vector p0: (b1, ..., bn).
+ * bi are the i-th point parameters, size n*pnp
+ */
+ const int pnp, /* number of parameters for ONE point; e.g. 3 for Euclidean points */
+ double *x, /* measurements vector: (x_11^T, .. x_1m^T, ..., x_n1^T, .. x_nm^T)^T where
+ * x_ij is the projection of the i-th point on the j-th image.
+ * NOTE: some of the x_ij might be missing, if point i is not visible in image j;
+ * see vmask[i, j], max. size n*m*mnp
+ */
+ double *covx, /* measurements covariance matrices: (Sigma_x_11, .. Sigma_x_1m, ..., Sigma_x_n1, .. Sigma_x_nm),
+ * where Sigma_x_ij is the mnp x mnp covariance of x_ij stored row-by-row. Set to NULL if no
+ * covariance estimates are available (identity matrices are implicitly used in this case).
+ * NOTE: a certain Sigma_x_ij is missing if the corresponding x_ij is also missing;
+ * see vmask[i, j], max. size n*m*mnp*mnp
+ */
+ const int mnp, /* number of parameters for EACH measurement; usually 2 */
+ void (*proj)(int j, int i, double *bi, double *xij, void *adata),
+ /* functional relation computing a SINGLE image measurement. Assuming that
+ * the parameters of point i are bi, computes a prediction of \hat{x}_{ij}.
+ * bi is pnp x 1 and xij is mnp x 1. This function is called only if point
+ * i is visible in image j (i.e. vmask[i, j]==1)
+ */
+ void (*projac)(int j, int i, double *bi, double *Bij, void *adata),
+ /* functional relation to evaluate d x_ij / d b_i in Bij.
+ * This function is called only if point i is visible in image j
+ * (i.e. vmask[i, j]==1). Also, B_ij are mnp x pnp matrices
+ * and they should be stored in row-major order.
+ *
+ * If NULL, the jacobians are approximated by repetitive proj calls
+ * and finite differences.
+ */
+ void *adata, /* pointer to possibly additional data, passed uninterpreted to proj, projac */
+
+ const int itmax, /* I: maximum number of iterations. itmax==0 signals jacobian verification followed by immediate
+ return */
+ const int verbose, /* I: verbosity */
+ const double opts[SBA_OPTSSZ],
+ /* I: minim. options [\mu, \epsilon1, \epsilon2, \epsilon3, \epsilon4]. Respectively the scale factor for initial
+ * \mu,
+ * stoping thresholds for ||J^T e||_inf, ||dp||_2, ||e||_2 and (||e||_2-||e_new||_2)/||e||_2
+ */
+ double info[SBA_INFOSZ]
+ /* O: information regarding the minimization. Set to NULL if don't care
+ * info[0]=||e||_2 at initial p.
+ * info[1-4]=[ ||e||_2, ||J^T e||_inf, ||dp||_2, mu/max[J^T J]_ii ], all computed at estimated p.
+ * info[5]= # iterations,
+ * info[6]=reason for terminating: 1 - stopped by small gradient J^T e
+ * 2 - stopped by small dp
+ * 3 - stopped by itmax
+ * 4 - stopped by small relative reduction in ||e||_2
+ * 5 - too many attempts to increase damping. Restart with increased mu
+ * 6 - stopped by small ||e||_2
+ * 7 - stopped by invalid (i.e. NaN or Inf) "func" values. This is a user error
+ * info[7]= # function evaluations
+ * info[8]= # jacobian evaluations
+ * info[9]= # number of linear systems solved, i.e. number of attempts for reducing error
+ */
+ ) {
+ int retval;
+ struct wrap_str_data_ wdata;
+ static void (*fjac)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata);
+
+ wdata.proj = proj;
+ wdata.projac = projac;
+ wdata.pnp = pnp;
+ wdata.mnp = mnp;
+ wdata.adata = adata;
+
+ fjac = (projac) ? sba_str_Qs_jac : sba_str_Qs_fdjac;
+ retval =
+ sba_str_levmar_x(n, ncon, m, vmask, p, pnp, x, covx, mnp, sba_str_Qs, fjac, &wdata, itmax, verbose, opts, info);
+
+ if (info) {
+ register int i;
+ int nvis;
+
+ /* count visible image points */
+ for (i = nvis = 0; i < n * m; ++i)
+ nvis += (vmask[i] != 0);
+
+ /* each "func" & "fjac" evaluation requires nvis "proj" & "projac" evaluations */
+ info[7] *= nvis;
+ info[8] *= nvis;
+ }
+
+ return retval;
+}
diff --git a/redist/test_dcl.c b/redist/test_dcl.c
index dc6a93e..ded2863 100644
--- a/redist/test_dcl.c
+++ b/redist/test_dcl.c
@@ -1,4 +1,5 @@
-//gcc -msse2 -O3 -ftree-vectorize test_dcl.c dclhelpers.c os_generic.c -DFLT=double -lpthread -lcblas && valgrind ./a.out
+// gcc -msse2 -O3 -ftree-vectorize test_dcl.c dclhelpers.c os_generic.c -DFLT=double -lpthread -lcblas && valgrind
+// ./a.out
#include "dclhelpers.h"
#include "os_generic.h"
@@ -99,44 +100,39 @@ void compareToCblasTrans() {
DMS(em1), 0);
}
-int main()
-{
- FLT A[2][4] = { { 0, 1, 2, 3 }, { 4, 5, 6, 7} };
+int main() {
+ FLT A[2][4] = {{0, 1, 2, 3}, {4, 5, 6, 7}};
FLT B[4][2];
- dclPrint( A[0], 4, 2, 4 );
+ dclPrint(A[0], 4, 2, 4);
dclTransp(B[0], 2, A[0], 4, 2, 4);
- dclPrint( B[0], 2, 4, 2 );
+ dclPrint(B[0], 2, 4, 2);
int i, j;
- for( i = 0; i < 8; i++ )
- {
- printf( "%f\n", ((float*)(B[0]))[i] );
+ for (i = 0; i < 8; i++) {
+ printf("%f\n", ((float *)(B[0]))[i]);
}
- FLT M[3][3] = {
- { .32, 1, 0 },
- { 0, 1, 2 },
- { 1, 0, 1 } };
+ FLT M[3][3] = {{.32, 1, 0}, {0, 1, 2}, {1, 0, 1}};
FLT Mo[3][3];
- dclInv( Mo[0], 3, M[0], 3, 3 );
- dclPrint( Mo[0], 3, 3, 3 );
+ dclInv(Mo[0], 3, M[0], 3, 3);
+ dclPrint(Mo[0], 3, 3, 3);
FLT MM[3][3];
- dclMul( MM[0], 3, M[0], 3, Mo[0], 3, 3, 3, 3 );
+ dclMul(MM[0], 3, M[0], 3, Mo[0], 3, 3, 3, 3);
- printf( "The following should be an identity matrix\n" );
- dclPrint( MM[0], 3, 3, 3 );
+ printf("The following should be an identity matrix\n");
+ dclPrint(MM[0], 3, 3, 3);
{
FLT A[3][4];
- dclIdentity( DMS(A), 3, 4);
- dclPrint( DMS(A), 3, 4);
+ dclIdentity(DMS(A), 3, 4);
+ dclPrint(DMS(A), 3, 4);
FLT x[4][2] = {
{7, -7}, {8, -8}, {9, -9}, {10, -10},
};
FLT R[4][2];
- dclZero( DMS(R), 4, 2 );
+ dclZero(DMS(R), 4, 2);
// dclMul(R, 1, A[0], 4, x, 1, 4, 1, 3);
dcldgemm(0, 0, 3, 4, 2, 1, A[0], 4, x[0], 2, 0, R[0], 2);
@@ -145,9 +141,8 @@ int main()
dclPrint(DMS(R), 3, 2);
for (int j = 0; j < 2; j++) {
- for (int i = 0; i < 3; i++)
- {
- printf( "[%d][%d]\n", i, j );
+ for (int i = 0; i < 3; i++) {
+ printf("[%d][%d]\n", i, j);
assert(R[i][j] == x[i][j]);
}
@@ -158,4 +153,3 @@ int main()
compareToCblas();
compareToCblasTrans();
}
-
diff --git a/src/poser_daveortho.c b/src/poser_daveortho.c
index 4cf6dd8..2bf57c6 100644
--- a/src/poser_daveortho.c
+++ b/src/poser_daveortho.c
@@ -193,17 +193,19 @@ void OrthoSolve(
FLT invXXt[3][3];
FLT SXt[2][3];
FLT M[2][3]; // Morph matrix! (2 by 3)
- TRANSP(Xt,X,3,nPoints);
- MUL(XXt,X,Xt,3,nPoints,3);
- MUL(SXt,S,Xt,2,nPoints,3);
- INV(invXXt,XXt,3,3);
- MUL(M,SXt,invXXt,2,3,3);
-//PRINT(M,2,3);
-
-// Double checking work
-FLT S_morph[2][SENSORS_PER_OBJECT];
-MUL(S_morph,M,X,2,3,nPoints);
-for (i=0; i<nPoints; i++) { S_morph[0][i]+=sbar[0]; S_morph[1][i]+=sbar[1]; }
+ TRANSP(Xt, X, 3, nPoints);
+ MUL(XXt, X, Xt, 3, nPoints, 3);
+ MUL(SXt, S, Xt, 2, nPoints, 3);
+ INV(invXXt, XXt, 3, 3);
+ MUL(M, SXt, invXXt, 2, 3, 3);
+ // PRINT(M,2,3);
+
+ // Double checking work
+ FLT S_morph[2][SENSORS_PER_OBJECT];
+ MUL(S_morph, M, X, 2, 3, nPoints);
+ for (i = 0; i < nPoints; i++) {
+ S_morph[0][i] += sbar[0];
+ S_morph[1][i] += sbar[1]; }
//--------------------
// Solve for the non-trivial vector
@@ -218,10 +220,10 @@ for (i=0; i<nPoints; i++) { S_morph[0][i]+=sbar[0]; S_morph[1][i]+=sbar[1]; }
FLT B[3][1] = { {0.0}, {0.0}, {1.0} };
FLT inv_uM[3][3];
FLT uf[3][1];
- INV(inv_uM,uM,3,3);
- MUL(uf,inv_uM,B,3,3,1);
-
- //--------------------
+ INV(inv_uM, uM, 3, 3);
+ MUL(uf, inv_uM, B, 3, 3, 1);
+
+ //--------------------
// Solve for unit length vector
// f that goes into the camera
//--------------------
@@ -250,10 +252,10 @@ for (i=0; i<nPoints; i++) { S_morph[0][i]+=sbar[0]; S_morph[1][i]+=sbar[1]; }
// uhat,rhat
//--------------------
FLT uhat[2][1], rhat[2][1], fhat[2][1];
- MUL(fhat,M,f,2,3,1);
- MUL(uhat,M,u,2,3,1);
- MUL(rhat,M,r,2,3,1);
- FLT fhat_len = sqrt( fhat[0][0]*fhat[0][0] + fhat[1][0]*fhat[1][0] );
+ MUL(fhat, M, f, 2, 3, 1);
+ MUL(uhat, M, u, 2, 3, 1);
+ MUL(rhat, M, r, 2, 3, 1);
+ FLT fhat_len = sqrt( fhat[0][0]*fhat[0][0] + fhat[1][0]*fhat[1][0] );
FLT uhat_len = sqrt( uhat[0][0]*uhat[0][0] + uhat[1][0]*uhat[1][0] );
FLT rhat_len = sqrt( rhat[0][0]*rhat[0][0] + rhat[1][0]*rhat[1][0] );
FLT urhat_len = 0.5 * (uhat_len + rhat_len);
diff --git a/src/poser_epnp.c b/src/poser_epnp.c
index f2d0062..dfe86ff 100644
--- a/src/poser_epnp.c
+++ b/src/poser_epnp.c
@@ -37,8 +37,6 @@ static SurvivePose solve_correspondence(SurviveObject *so, epnp *pnp, bool camer
cvTranspose(&R, &R);
// Then 'tvec = -R * tvec'
cvGEMM(&R, &Tmp, -1, 0, 0, &T, 0);
- print_mat(&R);
- print_mat(&T);
}
FLT tmp[4];
diff --git a/src/poser_sba.c b/src/poser_sba.c
new file mode 100644
index 0000000..125c52f
--- /dev/null
+++ b/src/poser_sba.c
@@ -0,0 +1,329 @@
+#ifndef USE_DOUBLE
+#define FLT double
+#define USE_DOUBLE
+#endif
+
+#include <sba/sba.h>
+
+#include "poser.h"
+#include <survive.h>
+
+#include "assert.h"
+#include "linmath.h"
+#include "string.h"
+#include "survive_config.h"
+#include "survive_reproject.h"
+
+#include "math.h"
+
+typedef struct {
+ survive_calibration_config calibration_config;
+ PoserData *pdfs;
+ SurviveObject *so;
+} sba_context;
+
+void metric_function(int j, int i, double *aj, double *xij, void *adata) {
+ sba_context *ctx = (sba_context *)(adata);
+ SurviveObject *so = ctx->so;
+
+ survive_reproject_from_pose_with_config(so->ctx, &ctx->calibration_config, j, (SurvivePose *)aj,
+ &so->sensor_locations[i * 3], xij);
+}
+
+size_t construct_input(const SurviveObject *so, PoserDataFullScene *pdfs, char *vmask, double *meas) {
+ size_t measCount = 0;
+ size_t size = so->sensor_ct * NUM_LIGHTHOUSES; // One set per lighthouse
+ for (size_t sensor = 0; sensor < so->sensor_ct; sensor++) {
+ for (size_t lh = 0; lh < 2; lh++) {
+ FLT *l = pdfs->lengths[sensor][lh];
+ if (l[0] < 0 || l[1] < 0) {
+ vmask[sensor * NUM_LIGHTHOUSES + lh] = 0;
+ continue;
+ }
+
+ double *angles = pdfs->angles[sensor][lh];
+ vmask[sensor * NUM_LIGHTHOUSES + lh] = 1;
+
+ meas[measCount++] = angles[0];
+ meas[measCount++] = angles[1];
+ }
+ }
+ return measCount;
+}
+
+size_t construct_input_from_scene(const SurviveObject *so, PoserDataLight *pdl, SurviveSensorActivations *scene,
+ char *vmask, double *meas) {
+ size_t rtn = 0;
+
+ for (size_t sensor = 0; sensor < so->sensor_ct; sensor++) {
+ for (size_t lh = 0; lh < 2; lh++) {
+ if (SurviveSensorActivations_isPairValid(scene, SurviveSensorActivations_default_tolerance, pdl->timecode,
+ sensor, lh)) {
+ double *a = scene->angles[sensor][lh];
+ vmask[sensor * NUM_LIGHTHOUSES + lh] = 1;
+ meas[rtn++] = a[0];
+ meas[rtn++] = a[1];
+ } else {
+ vmask[sensor * NUM_LIGHTHOUSES + lh] = 0;
+ }
+ }
+ }
+ return rtn;
+}
+
+void sba_set_cameras(SurviveObject *so, uint8_t lighthouse, SurvivePose *pose, void *user) {
+ SurvivePose *poses = (SurvivePose *)(user);
+ poses[lighthouse] = *pose;
+}
+
+typedef struct {
+ bool hasInfo;
+ SurvivePose poses;
+} sba_set_position_t;
+
+void sba_set_position(SurviveObject *so, uint8_t lighthouse, SurvivePose *new_pose, void *_user) {
+ sba_set_position_t *user = _user;
+ assert(user->hasInfo == false);
+ user->hasInfo = 1;
+ user->poses = *new_pose;
+}
+void *GetDriver(const char *name);
+
+void str_metric_function(int j, int i, double *bi, double *xij, void *adata) {
+ SurvivePose obj = *(SurvivePose *)bi;
+ int sensor_idx = j >> 1;
+ int lh = j & 1;
+
+ sba_context *ctx = (sba_context *)(adata);
+ SurviveObject *so = ctx->so;
+
+ assert(lh < 2);
+ assert(sensor_idx < so->sensor_ct);
+
+ quatnormalize(obj.Rot, obj.Rot);
+ FLT xyz[3];
+ ApplyPoseToPoint(xyz, obj.Pos, &so->sensor_locations[sensor_idx * 3]);
+
+ // std::cerr << "Processing " << sensor_idx << ", " << lh << std::endl;
+ SurvivePose *camera = &so->ctx->bsd[lh].Pose;
+ survive_reproject_from_pose_with_config(so->ctx, &ctx->calibration_config, lh, camera, xyz, xij);
+}
+
+static double run_sba_find_3d_structure(survive_calibration_config options, PoserDataLight *pdl, SurviveObject *so,
+ SurviveSensorActivations *scene, int max_iterations /* = 50*/,
+ double max_reproj_error /* = 0.005*/) {
+ double *covx = 0;
+
+ char *vmask = malloc(sizeof(char) * so->sensor_ct * NUM_LIGHTHOUSES);
+ double *meas = malloc(sizeof(double) * 2 * so->sensor_ct * NUM_LIGHTHOUSES);
+ size_t meas_size = construct_input_from_scene(so, pdl, scene, vmask, meas);
+
+ static int failure_count = 500;
+ if (so->ctx->bsd[0].PositionSet == 0 || so->ctx->bsd[1].PositionSet == 0 || meas_size < 7) {
+ if (meas_size < 7 && failure_count++ == 500) {
+ SurviveContext *ctx = so->ctx;
+ SV_INFO("Can't solve for position with just %lu measurements", meas_size);
+ failure_count = 0;
+ }
+ free(vmask);
+ free(meas);
+ return -1;
+ }
+ failure_count = 0;
+
+ SurvivePose soLocation = so->OutPose;
+ bool currentPositionValid = quatmagnitude(&soLocation.Rot[0]);
+
+ {
+ const char *subposer = config_read_str(so->ctx->global_config_values, "SBASeedPoser", "PoserEPNP");
+ PoserCB driver = (PoserCB)GetDriver(subposer);
+ SurviveContext *ctx = so->ctx;
+ if (driver) {
+ PoserData hdr = pdl->hdr;
+ memset(&pdl->hdr, 0, sizeof(pdl->hdr)); // Clear callback functions
+ pdl->hdr.pt = hdr.pt;
+ pdl->hdr.rawposeproc = sba_set_position;
+
+ sba_set_position_t locations = {};
+ pdl->hdr.userdata = &locations;
+ driver(so, &pdl->hdr);
+ pdl->hdr = hdr;
+
+ if (locations.hasInfo == false) {
+ free(vmask);
+ free(meas);
+
+ return -1;
+ } else if (locations.hasInfo) {
+ soLocation = locations.poses;
+ }
+ } else {
+ SV_INFO("Not using a seed poser for SBA; results will likely be way off");
+ }
+ }
+
+ double opts[SBA_OPTSSZ] = {};
+ double info[SBA_INFOSZ] = {};
+
+ sba_context ctx = {options, &pdl->hdr, so};
+
+ opts[0] = SBA_INIT_MU;
+ opts[1] = SBA_STOP_THRESH;
+ opts[2] = SBA_STOP_THRESH;
+ opts[3] = SBA_STOP_THRESH;
+ opts[3] = SBA_STOP_THRESH; // max_reproj_error * meas.size();
+ opts[4] = 0.0;
+
+ int status = sba_str_levmar(1, // Number of 3d points
+ 0, // Number of 3d points to fix in spot
+ NUM_LIGHTHOUSES * so->sensor_ct, vmask,
+ soLocation.Pos, // Reads as the full pose though
+ 7, // pnp -- SurvivePose
+ meas, // x* -- measurement data
+ 0, // cov data
+ 2, // mnp -- 2 points per image
+ str_metric_function,
+ 0, // jacobia of metric_func
+ &ctx, // user data
+ max_iterations, // Max iterations
+ 0, // verbosity
+ opts, // options
+ info); // info
+
+ if (status > 0) {
+ quatnormalize(soLocation.Rot, soLocation.Rot);
+ PoserData_poser_raw_pose_func(&pdl->hdr, so, 1, &soLocation);
+ }
+
+ {
+ SurviveContext *ctx = so->ctx;
+ // Docs say info[0] should be divided by meas; I don't buy it really...
+ static int cnt = 0;
+ if (cnt++ > 1000 || meas_size < 8) {
+ SV_INFO("%f original reproj error for %lu meas", (info[0] / meas_size * 2), meas_size);
+ SV_INFO("%f cur reproj error", (info[1] / meas_size * 2));
+ cnt = 0;
+ }
+ }
+
+ free(vmask);
+ free(meas);
+
+ return info[1] / meas_size * 2;
+}
+
+static double run_sba(survive_calibration_config options, PoserDataFullScene *pdfs, SurviveObject *so,
+ int max_iterations /* = 50*/, double max_reproj_error /* = 0.005*/) {
+ double *covx = 0;
+
+ char *vmask = malloc(sizeof(char) * so->sensor_ct * NUM_LIGHTHOUSES);
+ double *meas = malloc(sizeof(double) * 2 * so->sensor_ct * NUM_LIGHTHOUSES);
+ size_t meas_size = construct_input(so, pdfs, vmask, meas);
+
+ SurvivePose camera_params[2] = {so->ctx->bsd[0].Pose, so->ctx->bsd[1].Pose};
+
+ if (true || so->ctx->bsd[0].PositionSet == 0 || so->ctx->bsd[1].PositionSet == 0) {
+ const char *subposer = config_read_str(so->ctx->global_config_values, "SBASeedPoser", "PoserEPNP");
+ PoserCB driver = (PoserCB)GetDriver(subposer);
+ SurviveContext *ctx = so->ctx;
+ if (driver) {
+ SV_INFO("Using %s seed poser for SBA", subposer);
+ PoserData hdr = pdfs->hdr;
+ memset(&pdfs->hdr, 0, sizeof(pdfs->hdr)); // Clear callback functions
+ pdfs->hdr.pt = hdr.pt;
+ pdfs->hdr.lighthouseposeproc = sba_set_cameras;
+ pdfs->hdr.userdata = camera_params;
+ driver(so, &pdfs->hdr);
+ pdfs->hdr = hdr;
+ } else {
+ SV_INFO("Not using a seed poser for SBA; results will likely be way off");
+ for (int i = 0; i < 2; i++) {
+ so->ctx->bsd[i].Pose = (SurvivePose){};
+ so->ctx->bsd[i].Pose.Rot[0] = 1.;
+ }
+ }
+ // opencv_solver_poser_cb(so, (PoserData *)pdfs);
+ // PoserCharlesSlow(so, (PoserData *)pdfs);
+ }
+
+ double opts[SBA_OPTSSZ] = {};
+ double info[SBA_INFOSZ] = {};
+
+ sba_context ctx = {options, &pdfs->hdr, so};
+
+ opts[0] = SBA_INIT_MU;
+ opts[1] = SBA_STOP_THRESH;
+ opts[2] = SBA_STOP_THRESH;
+ opts[3] = SBA_STOP_THRESH;
+ opts[3] = SBA_STOP_THRESH; // max_reproj_error * meas.size();
+ opts[4] = 0.0;
+
+ int status = sba_mot_levmar(so->sensor_ct, // number of 3d points
+ NUM_LIGHTHOUSES, // Number of cameras -- 2 lighthouses
+ 0, // Number of cameras to not modify
+ vmask, // boolean vis mask
+ (double *)&camera_params[0], // camera parameters
+ sizeof(SurvivePose) / sizeof(double), // The number of floats that are in a camera param
+ meas, // 2d points for 3d objs
+ covx, // covariance of measurement. Null sets to identity
+ 2, // 2 points per image
+ metric_function,
+ 0, // jacobia of metric_func
+ &ctx, // user data
+ max_iterations, // Max iterations
+ 0, // verbosity
+ opts, // options
+ info); // info
+
+ if (status >= 0) {
+ PoserData_lighthouse_pose_func(&pdfs->hdr, so, 0, &camera_params[0]);
+ PoserData_lighthouse_pose_func(&pdfs->hdr, so, 1, &camera_params[1]);
+ }
+ // Docs say info[0] should be divided by meas; I don't buy it really...
+ // std::cerr << info[0] / meas.size() * 2 << " original reproj error" << std::endl;
+
+ free(vmask);
+ free(meas);
+
+ {
+ SurviveContext *ctx = so->ctx;
+ // Docs say info[0] should be divided by meas; I don't buy it really...
+ SV_INFO("%f original reproj error for %lu meas", (info[0] / meas_size * 2), meas_size);
+ SV_INFO("%f cur reproj error", (info[1] / meas_size * 2));
+ }
+
+ return info[1] / meas_size * 2;
+}
+
+int PoserSBA(SurviveObject *so, PoserData *pd) {
+ switch (pd->pt) {
+ case POSERDATA_LIGHT: {
+ SurviveSensorActivations *scene = &so->activations;
+
+ PoserDataLight *lightData = (PoserDataLight *)pd;
+
+ survive_calibration_config config = *survive_calibration_default_config();
+ FLT error = run_sba_find_3d_structure(config, lightData, so, scene, 50, .5);
+ return 0;
+ }
+ case POSERDATA_FULL_SCENE: {
+ PoserDataFullScene *pdfs = (PoserDataFullScene *)(pd);
+ survive_calibration_config config = *survive_calibration_default_config();
+ // std::cerr << "Running sba with " << config << std::endl;
+ double error = run_sba(config, pdfs, so, 50, .005);
+ // std::cerr << "Average reproj error: " << error << std::endl;
+ return 0;
+ }
+ default: {
+ const char *subposer = config_read_str(so->ctx->global_config_values, "SBASeedPoser", "PoserEPNP");
+ PoserCB driver = (PoserCB)GetDriver(subposer);
+ if (driver) {
+ return driver(so, pd);
+ }
+ break;
+ }
+ }
+ return -1;
+}
+
+REGISTER_LINKTIME(PoserSBA);
diff --git a/src/poser_sba.h b/src/poser_sba.h
new file mode 100644
index 0000000..8a49122
--- /dev/null
+++ b/src/poser_sba.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#pragma once
+#include <stdint.h>
+#define FLT double
+#include "survive_reproject.h"
+#include <libsurvive/poser.h>
+#include <ostream>
+
+struct SurviveObject;
+
+int sba_bruteforce_config_solver_cb(SurviveObject *so, PoserData *pd);
+int sba_solver_poser_cb(SurviveObject *so, PoserData *pd);
+
+std::ostream &operator<<(std::ostream &o, const survive_calibration_options_config &self);
+std::ostream &operator<<(std::ostream &o, const survive_calibration_config &self); \ No newline at end of file