From c9cf2446c5eb18a29119c7dfe90c5b62f6b374c5 Mon Sep 17 00:00:00 2001 From: Justin Berger Date: Thu, 15 Mar 2018 23:14:29 -0600 Subject: Add sba poser --- redist/sba/compiler.h | 42 + redist/sba/sba.h | 145 +++ redist/sba/sba_chkjac.c | 468 ++++++++ redist/sba/sba_chkjac.h | 69 ++ redist/sba/sba_crsm.c | 338 ++++++ redist/sba/sba_lapack.c | 1730 +++++++++++++++++++++++++++ redist/sba/sba_levmar.c | 2715 ++++++++++++++++++++++++++++++++++++++++++ redist/sba/sba_levmar_wrap.c | 917 ++++++++++++++ 8 files changed, 6424 insertions(+) create mode 100644 redist/sba/compiler.h create mode 100644 redist/sba/sba.h create mode 100644 redist/sba/sba_chkjac.c create mode 100644 redist/sba/sba_chkjac.h create mode 100644 redist/sba/sba_crsm.c create mode 100644 redist/sba/sba_lapack.c create mode 100644 redist/sba/sba_levmar.c create mode 100644 redist/sba/sba_levmar_wrap.c (limited to 'redist') diff --git a/redist/sba/compiler.h b/redist/sba/compiler.h new file mode 100644 index 0000000..09c01f1 --- /dev/null +++ b/redist/sba/compiler.h @@ -0,0 +1,42 @@ +///////////////////////////////////////////////////////////////////////////////// +//// +//// Compiler-specific definitions for sparse bundle adjustment based on the +//// Levenberg - Marquardt minimization algorithm +//// Copyright (C) 2004-2008 Manolis Lourakis (lourakis at ics forth gr) +//// Institute of Computer Science, Foundation for Research & Technology - Hellas +//// Heraklion, Crete, Greece. +//// +//// This program is free software; you can redistribute it and/or modify +//// it under the terms of the GNU General Public License as published by +//// the Free Software Foundation; either version 2 of the License, or +//// (at your option) any later version. +//// +//// This program is distributed in the hope that it will be useful, +//// but WITHOUT ANY WARRANTY; without even the implied warranty of +//// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//// GNU General Public License for more details. +//// +/////////////////////////////////////////////////////////////////////////////////// + +#ifndef _COMPILER_H_ +#define _COMPILER_H_ + +/* note: intel's icc defines both __ICC & __INTEL_COMPILER. + * Also, some compilers other than gcc define __GNUC__, + * therefore gcc should be checked last + */ +#ifdef _MSC_VER +#define inline __inline // MSVC +#elif !defined(__ICC) && !defined(__INTEL_COMPILER) && !defined(__GNUC__) +#define inline // other than MSVC, ICC, GCC: define empty +#endif + +#ifdef _MSC_VER +#define SBA_FINITE _finite // MSVC +#elif defined(__ICC) || defined(__INTEL_COMPILER) || defined(__GNUC__) +#define SBA_FINITE isfinite // ICC, GCC +#else +#define SBA_FINITE isfinite // other than MSVC, ICC, GCC, let's hope this will work +#endif + +#endif /* _COMPILER_H_ */ diff --git a/redist/sba/sba.h b/redist/sba/sba.h new file mode 100644 index 0000000..dd7b2b8 --- /dev/null +++ b/redist/sba/sba.h @@ -0,0 +1,145 @@ +/* +///////////////////////////////////////////////////////////////////////////////// +//// +//// Prototypes and definitions for sparse bundle adjustment +//// Copyright (C) 2004-2009 Manolis Lourakis (lourakis at ics forth gr) +//// Institute of Computer Science, Foundation for Research & Technology - Hellas +//// Heraklion, Crete, Greece. +//// +//// This program is free software; you can redistribute it and/or modify +//// it under the terms of the GNU General Public License as published by +//// the Free Software Foundation; either version 2 of the License, or +//// (at your option) any later version. +//// +//// This program is distributed in the hope that it will be useful, +//// but WITHOUT ANY WARRANTY; without even the implied warranty of +//// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//// GNU General Public License for more details. +//// +/////////////////////////////////////////////////////////////////////////////////// +*/ + +#ifndef _SBA_H_ +#define _SBA_H_ + +/**************************** Start of configuration options ****************************/ + +/* define the following if a trailing underscore should be appended to lapack functions names */ +#define SBA_APPEND_UNDERSCORE_SUFFIX // undef this for AIX + +/* define this to make linear solver routines use the minimum amount of memory + * possible. This lowers the memory requirements of large BA problems but will + * most likely induce a penalty on performance + */ +/*#define SBA_LS_SCARCE_MEMORY */ + +/* define this to save some memory by storing the weight matrices for the image + * projections (i.e. wght in the expert drivers) into the memory containing their + * covariances (i.e. covx arg). Note that this overwrites covx, making changes + * noticeable by the caller + */ +/*#define SBA_DESTROY_COVS */ + +/********* End of configuration options, no changes necessary beyond this point *********/ + +#ifdef __cplusplus +extern "C" { +#endif + +#define SBA_MIN_DELTA 1E-06 // finite differentiation minimum delta +#define SBA_DELTA_SCALE 1E-04 // finite differentiation delta scale + +#define SBA_OPTSSZ 5 +#define SBA_INFOSZ 10 +#define SBA_ERROR -1 +#define SBA_INIT_MU 1E-03 +#define SBA_STOP_THRESH 1E-12 +#define SBA_CG_NOPREC 0 +#define SBA_CG_JACOBI 1 +#define SBA_CG_SSOR 2 +#define SBA_VERSION "1.6 (Aug. 2009)" + +/* Sparse matrix representation using Compressed Row Storage (CRS) format. + * See http://www.netlib.org/linalg/html_templates/node91.html#SECTION00931100000000000000 + */ + +struct sba_crsm { + int nr, nc; /* #rows, #cols for the sparse matrix */ + int nnz; /* number of nonzero array elements */ + int *val; /* storage for nonzero array elements. size: nnz */ + int *colidx; /* column indexes of nonzero elements. size: nnz */ + int *rowptr; /* locations in val that start a row. size: nr+1. + * By convention, rowptr[nr]=nnz + */ +}; + +/* sparse LM */ + +/* simple drivers */ +extern int sba_motstr_levmar(const int n, const int ncon, const int m, const int mcon, char *vmask, double *p, + const int cnp, const int pnp, double *x, double *covx, const int mnp, + void (*proj)(int j, int i, double *aj, double *bi, double *xij, void *adata), + void (*projac)(int j, int i, double *aj, double *bi, double *Aij, double *Bij, + void *adata), + void *adata, const int itmax, const int verbose, const double opts[SBA_OPTSSZ], + double info[SBA_INFOSZ]); + +extern int sba_mot_levmar(const int n, const int m, const int mcon, char *vmask, double *p, const int cnp, double *x, + double *covx, const int mnp, void (*proj)(int j, int i, double *aj, double *xij, void *adata), + void (*projac)(int j, int i, double *aj, double *Aij, void *adata), void *adata, + const int itmax, const int verbose, const double opts[SBA_OPTSSZ], double info[SBA_INFOSZ]); + +extern int sba_str_levmar(const int n, const int ncon, const int m, char *vmask, double *p, const int pnp, double *x, + double *covx, const int mnp, void (*proj)(int j, int i, double *bi, double *xij, void *adata), + void (*projac)(int j, int i, double *bi, double *Bij, void *adata), void *adata, + const int itmax, const int verbose, const double opts[SBA_OPTSSZ], double info[SBA_INFOSZ]); + +/* expert drivers */ +extern int +sba_motstr_levmar_x(const int n, const int ncon, const int m, const int mcon, char *vmask, double *p, const int cnp, + const int pnp, double *x, double *covx, const int mnp, + void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata), + void (*fjac)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata), + void *adata, const int itmax, const int verbose, const double opts[SBA_OPTSSZ], + double info[SBA_INFOSZ]); + +extern int sba_mot_levmar_x( + const int n, const int m, const int mcon, char *vmask, double *p, const int cnp, double *x, double *covx, + const int mnp, void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata), + void (*fjac)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata), void *adata, + const int itmax, const int verbose, const double opts[SBA_OPTSSZ], double info[SBA_INFOSZ]); + +extern int sba_str_levmar_x( + const int n, const int ncon, const int m, char *vmask, double *p, const int pnp, double *x, double *covx, + const int mnp, void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata), + void (*fjac)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata), void *adata, + const int itmax, const int verbose, const double opts[SBA_OPTSSZ], double info[SBA_INFOSZ]); + +/* interfaces to LAPACK routines: solution of linear systems, matrix inversion, cholesky of inverse */ +extern int sba_Axb_QR(double *A, double *B, double *x, int m, int iscolmaj); +extern int sba_Axb_QRnoQ(double *A, double *B, double *x, int m, int iscolmaj); +extern int sba_Axb_Chol(double *A, double *B, double *x, int m, int iscolmaj); +extern int sba_Axb_LDLt(double *A, double *B, double *x, int m, int iscolmaj); +extern int sba_Axb_LU(double *A, double *B, double *x, int m, int iscolmaj); +extern int sba_Axb_SVD(double *A, double *B, double *x, int m, int iscolmaj); +extern int sba_Axb_BK(double *A, double *B, double *x, int m, int iscolmaj); +extern int sba_Axb_CG(double *A, double *B, double *x, int m, int niter, double eps, int prec, int iscolmaj); +extern int sba_symat_invert_LU(double *A, int m); +extern int sba_symat_invert_Chol(double *A, int m); +extern int sba_symat_invert_BK(double *A, int m); +extern int sba_mat_cholinv(double *A, double *B, int m); + +/* CRS sparse matrices manipulation routines */ +extern void sba_crsm_alloc(struct sba_crsm *sm, int nr, int nc, int nnz); +extern void sba_crsm_free(struct sba_crsm *sm); +extern int sba_crsm_elmidx(struct sba_crsm *sm, int i, int j); +extern int sba_crsm_elmidxp(struct sba_crsm *sm, int i, int j, int jp, int jpidx); +extern int sba_crsm_row_elmidxs(struct sba_crsm *sm, int i, int *vidxs, int *jidxs); +extern int sba_crsm_col_elmidxs(struct sba_crsm *sm, int j, int *vidxs, int *iidxs); +/* extern int sba_crsm_common_row(struct sba_crsm *sm, int j, int k); */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SBA_H_ */ diff --git a/redist/sba/sba_chkjac.c b/redist/sba/sba_chkjac.c new file mode 100644 index 0000000..7cb53ec --- /dev/null +++ b/redist/sba/sba_chkjac.c @@ -0,0 +1,468 @@ +///////////////////////////////////////////////////////////////////////////////// +//// +//// Verification routines for the jacobians employed in the expert & simple drivers +//// for sparse bundle adjustment based on the Levenberg - Marquardt minimization algorithm +//// Copyright (C) 2005-2008 Manolis Lourakis (lourakis at ics forth gr) +//// Institute of Computer Science, Foundation for Research & Technology - Hellas +//// Heraklion, Crete, Greece. +//// +//// This program is free software; you can redistribute it and/or modify +//// it under the terms of the GNU General Public License as published by +//// the Free Software Foundation; either version 2 of the License, or +//// (at your option) any later version. +//// +//// This program is distributed in the hope that it will be useful, +//// but WITHOUT ANY WARRANTY; without even the implied warranty of +//// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//// GNU General Public License for more details. +//// +/////////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include + +#include "compiler.h" +#include "sba.h" + +#define emalloc(sz) emalloc_(__FILE__, __LINE__, sz) + +#define FABS(x) (((x) >= 0) ? (x) : -(x)) + +/* auxiliary memory allocation routine with error checking */ +inline static void *emalloc_(char *file, int line, size_t sz) { + void *ptr; + + ptr = (void *)malloc(sz); + if (ptr == NULL) { + fprintf(stderr, "SBA: memory allocation request for %zu bytes failed in file %s, line %d, exiting", sz, file, + line); + exit(1); + } + + return ptr; +} + +/* + * Check the jacobian of a projection function in nvars variables + * evaluated at a point p, for consistency with the function itself. + * Expert version + * + * Based on fortran77 subroutine CHKDER by + * Burton S. Garbow, Kenneth E. Hillstrom, Jorge J. More + * Argonne National Laboratory. MINPACK project. March 1980. + * + * + * func points to a function from R^{nvars} --> R^{nobs}: Given a p in R^{nvars} + * it yields hx in R^{nobs} + * jacf points to a function implementing the jacobian of func, whose consistency with + * func is to be tested. Given a p in R^{nvars}, jacf computes into the nvis*(Asz+Bsz) + * matrix jac the jacobian of func at p. Note the jacobian is sparse, consisting of + * all A_ij, B_ij and that row i of jac corresponds to the gradient of the i-th + * component of func, evaluated at p. + * p is an input array of length nvars containing the point of evaluation. + * idxij, rcidxs, rcsubs, ncon, mcon, cnp, pnp, mnp are as usual. Note that if cnp=0 or + * pnp=0 a jacobian corresponding resp. to motion or camera parameters + * only is assumed. + * func_adata, jac_adata point to possible additional data and are passed + * uninterpreted to func, jacf respectively. + * err is an array of length nobs. On output, err contains measures + * of correctness of the respective gradients. if there is + * no severe loss of significance, then if err[i] is 1.0 the + * i-th gradient is correct, while if err[i] is 0.0 the i-th + * gradient is incorrect. For values of err between 0.0 and 1.0, + * the categorization is less certain. In general, a value of + * err[i] greater than 0.5 indicates that the i-th gradient is + * probably correct, while a value of err[i] less than 0.5 + * indicates that the i-th gradient is probably incorrect. + * + * CAUTION: THIS FUNCTION IS NOT 100% FOOLPROOF. The + * following excerpt comes from CHKDER's documentation: + * + * "The function does not perform reliably if cancellation or + * rounding errors cause a severe loss of significance in the + * evaluation of a function. therefore, none of the components + * of p should be unusually small (in particular, zero) or any + * other value which may cause loss of significance." + */ + +void sba_motstr_chkjac_x(void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, + void *adata), + void (*jacf)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, + void *adata), + double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, int ncon, int mcon, int cnp, + int pnp, int mnp, void *func_adata, void *jac_adata) { + const double factor = 100.0, one = 1.0, zero = 0.0; + double *fvec, *fjac, *pp, *fvecp, *buf, *err; + + int nvars, nobs, m, n, Asz, Bsz, ABsz, nnz; + register int i, j, ii, jj; + double eps, epsf, temp, epsmch, epslog; + register double *ptr1, *ptr2, *pab; + double *pa, *pb; + int fvec_sz, pp_sz, fvecp_sz, numerr = 0; + + nobs = idxij->nnz * mnp; + n = idxij->nr; + m = idxij->nc; + nvars = m * cnp + n * pnp; + epsmch = DBL_EPSILON; + eps = sqrt(epsmch); + + Asz = mnp * cnp; + Bsz = mnp * pnp; + ABsz = Asz + Bsz; + fjac = (double *)emalloc(idxij->nnz * ABsz * sizeof(double)); + + fvec_sz = fvecp_sz = nobs; + pp_sz = nvars; + buf = (double *)emalloc((fvec_sz + pp_sz + fvecp_sz) * sizeof(double)); + fvec = buf; + pp = fvec + fvec_sz; + fvecp = pp + pp_sz; + + err = (double *)emalloc(nobs * sizeof(double)); + + /* compute fvec=func(p) */ + (*func)(p, idxij, rcidxs, rcsubs, fvec, func_adata); + + /* compute the jacobian at p */ + (*jacf)(p, idxij, rcidxs, rcsubs, fjac, jac_adata); + + /* compute pp */ + for (j = 0; j < nvars; ++j) { + temp = eps * FABS(p[j]); + if (temp == zero) + temp = eps; + pp[j] = p[j] + temp; + } + + /* compute fvecp=func(pp) */ + (*func)(pp, idxij, rcidxs, rcsubs, fvecp, func_adata); + + epsf = factor * epsmch; + epslog = log10(eps); + + for (i = 0; i < nobs; ++i) + err[i] = zero; + + pa = p; + pb = p + m * cnp; + for (i = 0; i < n; ++i) { + nnz = sba_crsm_row_elmidxs(idxij, i, rcidxs, + rcsubs); /* find nonzero A_ij, B_ij, j=0...m-1, actual column numbers in rcsubs */ + for (j = 0; j < nnz; ++j) { + ptr2 = err + idxij->val[rcidxs[j]] * mnp; // set ptr2 to point into err + + if (cnp && rcsubs[j] >= mcon) { // A_ij is nonzero + ptr1 = fjac + idxij->val[rcidxs[j]] * ABsz; // set ptr1 to point to A_ij + pab = pa + rcsubs[j] * cnp; + for (jj = 0; jj < cnp; ++jj) { + temp = FABS(pab[jj]); + if (temp == zero) + temp = one; + + for (ii = 0; ii < mnp; ++ii) + ptr2[ii] += temp * ptr1[ii * cnp + jj]; + } + } + + if (pnp && i >= ncon) { // B_ij is nonzero + ptr1 = fjac + idxij->val[rcidxs[j]] * ABsz + Asz; // set ptr1 to point to B_ij + pab = pb + i * pnp; + for (jj = 0; jj < pnp; ++jj) { + temp = FABS(pab[jj]); + if (temp == zero) + temp = one; + + for (ii = 0; ii < mnp; ++ii) + ptr2[ii] += temp * ptr1[ii * pnp + jj]; + } + } + } + } + + for (i = 0; i < nobs; ++i) { + temp = one; + if (fvec[i] != zero && fvecp[i] != zero && FABS(fvecp[i] - fvec[i]) >= epsf * FABS(fvec[i])) + temp = eps * FABS((fvecp[i] - fvec[i]) / eps - err[i]) / (FABS(fvec[i]) + FABS(fvecp[i])); + err[i] = one; + if (temp > epsmch && temp < eps) + err[i] = (log10(temp) - epslog) / epslog; + if (temp >= eps) + err[i] = zero; + } + + free(fjac); + free(buf); + + for (i = 0; i < n; ++i) { + nnz = sba_crsm_row_elmidxs(idxij, i, rcidxs, rcsubs); /* find nonzero err_ij, j=0...m-1 */ + for (j = 0; j < nnz; ++j) { + if (i < ncon && rcsubs[j] < mcon) + continue; // corresponding gradients are taken to be zero + + ptr1 = err + idxij->val[rcidxs[j]] * mnp; // set ptr1 to point into err + for (ii = 0; ii < mnp; ++ii) + if (ptr1[ii] <= 0.5) { + fprintf(stderr, "SBA: gradient %d (corresponding to element %d of the projection of point %d on " + "camera %d) is %s (err=%g)\n", + idxij->val[rcidxs[j]] * mnp + ii, ii, i, rcsubs[j], + (ptr1[ii] == 0.0) ? "wrong" : "probably wrong", ptr1[ii]); + ++numerr; + } + } + } + if (numerr) + fprintf(stderr, "SBA: found %d suspicious gradients out of %d\n\n", numerr, nobs); + + free(err); + + return; +} + +void sba_mot_chkjac_x( + void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata), + void (*jacf)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata), double *p, + struct sba_crsm *idxij, int *rcidxs, int *rcsubs, int mcon, int cnp, int mnp, void *func_adata, void *jac_adata) { + sba_motstr_chkjac_x(func, jacf, p, idxij, rcidxs, rcsubs, 0, mcon, cnp, 0, mnp, func_adata, jac_adata); +} + +void sba_str_chkjac_x( + void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata), + void (*jacf)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata), double *p, + struct sba_crsm *idxij, int *rcidxs, int *rcsubs, int ncon, int pnp, int mnp, void *func_adata, void *jac_adata) { + sba_motstr_chkjac_x(func, jacf, p, idxij, rcidxs, rcsubs, ncon, 0, 0, pnp, mnp, func_adata, jac_adata); +} + +#if 0 +/* Routines for directly checking the jacobians supplied to the simple drivers. + * They shouldn't be necessary since these jacobians can be verified indirectly + * through the expert sba_XXX_chkjac_x() routines. + */ + +/*****************************************************************************************/ +// Sample code for using sba_motstr_chkjac(): + + for(i=ncon; i R^{mnp}: Given a p=(aj, bi) in R^{cnp+pnp} + * it yields hx in R^{mnp} + * projac points to a function implementing the jacobian of func, whose consistency with proj + * is to be tested. Given a p in R^{cnp+pnp}, jacf computes into the matrix jac=[Aij | Bij] + * jacobian of proj at p. Note that row i of jac corresponds to the gradient of the i-th + * component of proj, evaluated at p. + * aj, bi are input arrays of lengths cnp, pnp containing the parameters for the point of + * evaluation, i.e. j-th camera and i-th point + * jj, ii specify the point (ii) whose projection jacobian in image (jj) is being checked + * cnp, pnp, mnp are as usual. Note that if cnp=0 or + * pnp=0 a jacobian corresponding resp. to motion or camera parameters + * only is assumed. + * func_adata, jac_adata point to possible additional data and are passed + * uninterpreted to func, jacf respectively. + * err is an array of length mnp. On output, err contains measures + * of correctness of the respective gradients. if there is + * no severe loss of significance, then if err[i] is 1.0 the + * i-th gradient is correct, while if err[i] is 0.0 the i-th + * gradient is incorrect. For values of err between 0.0 and 1.0, + * the categorization is less certain. In general, a value of + * err[i] greater than 0.5 indicates that the i-th gradient is + * probably correct, while a value of err[i] less than 0.5 + * indicates that the i-th gradient is probably incorrect. + * + * CAUTION: THIS FUNCTION IS NOT 100% FOOLPROOF. The + * following excerpt comes from CHKDER's documentation: + * + * "The function does not perform reliably if cancellation or + * rounding errors cause a severe loss of significance in the + * evaluation of a function. therefore, none of the components + * of p should be unusually small (in particular, zero) or any + * other value which may cause loss of significance." + */ + +static void sba_chkjac( + union proj_projac *funcs, double *aj, double *bi, int jj, int ii, int cnp, int pnp, int mnp, void *func_adata, void *jac_adata) +{ +const double factor=100.0, one=1.0, zero=0.0; +double *fvec, *fjac, *Aij, *Bij, *ajp, *bip, *fvecp, *buf, *err; + +int Asz, Bsz; +register int i, j; +double eps, epsf, temp, epsmch, epslog; +int fvec_sz, ajp_sz, bip_sz, fvecp_sz, err_sz, numerr=0; + + epsmch=DBL_EPSILON; + eps=sqrt(epsmch); + + Asz=mnp*cnp; Bsz=mnp*pnp; + fjac=(double *)emalloc((Asz+Bsz)*sizeof(double)); + Aij=fjac; + Bij=Aij+Asz; + + fvec_sz=fvecp_sz=mnp; + ajp_sz=cnp; bip_sz=pnp; + err_sz=mnp; + buf=(double *)emalloc((fvec_sz + ajp_sz + bip_sz + fvecp_sz + err_sz)*sizeof(double)); + fvec=buf; + ajp=fvec+fvec_sz; + bip=ajp+ajp_sz; + fvecp=bip+bip_sz; + err=fvecp+fvecp_sz; + + /* compute fvec=proj(p), p=(aj, bi) & the jacobian at p */ + if(cnp && pnp){ + (*(funcs->motstr.proj))(jj, ii, aj, bi, fvec, func_adata); + (*(funcs->motstr.projac))(jj, ii, aj, bi, Aij, Bij, jac_adata); + } + else if(cnp){ + (*(funcs->mot.proj))(jj, ii, aj, fvec, func_adata); + (*(funcs->mot.projac))(jj, ii, aj, Aij, jac_adata); + } + else{ + (*(funcs->str.proj))(jj, ii, bi, fvec, func_adata); + (*(funcs->str.projac))(jj, ii, bi, Bij, jac_adata); + } + + /* compute pp, pp=(ajp, bip) */ + for(j=0; jmotstr.proj))(jj, ii, ajp, bip, fvecp, func_adata); + else if(cnp) + (*(funcs->mot.proj))(jj, ii, ajp, fvecp, func_adata); + else + (*(funcs->str.proj))(jj, ii, bip, fvecp, func_adata); + + epsf=factor*epsmch; + epslog=log10(eps); + + for(i=0; i=epsf*FABS(fvec[i])) + temp=eps*FABS((fvecp[i]-fvec[i])/eps - err[i])/(FABS(fvec[i])+FABS(fvecp[i])); + err[i]=one; + if(temp>epsmch && temp=eps) err[i]=zero; + } + + for(i=0; i +#include + +#include "sba.h" + +static void sba_crsm_print(struct sba_crsm *sm, FILE *fp); +static void sba_crsm_build(struct sba_crsm *sm, int *m, int nr, int nc); + +/* allocate a sparse CRS matrix */ +void sba_crsm_alloc(struct sba_crsm *sm, int nr, int nc, int nnz) { + int msz; + + sm->nr = nr; + sm->nc = nc; + sm->nnz = nnz; + msz = 2 * nnz + nr + 1; + sm->val = (int *)malloc(msz * sizeof(int)); /* required memory is allocated in a single step */ + if (!sm->val) { + fprintf(stderr, "SBA: memory allocation request failed in sba_crsm_alloc() [nr=%d, nc=%d, nnz=%d]\n", nr, nc, + nnz); + exit(1); + } + sm->colidx = sm->val + nnz; + sm->rowptr = sm->colidx + nnz; +} + +/* free a sparse CRS matrix */ +void sba_crsm_free(struct sba_crsm *sm) { + sm->nr = sm->nc = sm->nnz = -1; + free(sm->val); + sm->val = sm->colidx = sm->rowptr = NULL; +} + +static void sba_crsm_print(struct sba_crsm *sm, FILE *fp) { + register int i; + + fprintf(fp, "matrix is %dx%d, %d non-zeros\nval: ", sm->nr, sm->nc, sm->nnz); + for (i = 0; i < sm->nnz; ++i) + fprintf(fp, "%d ", sm->val[i]); + fprintf(fp, "\ncolidx: "); + for (i = 0; i < sm->nnz; ++i) + fprintf(fp, "%d ", sm->colidx[i]); + fprintf(fp, "\nrowptr: "); + for (i = 0; i <= sm->nr; ++i) + fprintf(fp, "%d ", sm->rowptr[i]); + fprintf(fp, "\n"); +} + +/* build a sparse CRS matrix from a dense one. intended to serve as an example for sm creation */ +static void sba_crsm_build(struct sba_crsm *sm, int *m, int nr, int nc) { + int nnz; + register int i, j, k; + + /* count nonzeros */ + for (i = nnz = 0; i < nr; ++i) + for (j = 0; j < nc; ++j) + if (m[i * nc + j] != 0) + ++nnz; + + sba_crsm_alloc(sm, nr, nc, nnz); + + /* fill up the sm structure */ + for (i = k = 0; i < nr; ++i) { + sm->rowptr[i] = k; + for (j = 0; j < nc; ++j) + if (m[i * nc + j] != 0) { + sm->val[k] = m[i * nc + j]; + sm->colidx[k++] = j; + } + } + sm->rowptr[nr] = nnz; +} + +/* returns the index of the (i, j) element. No bounds checking! */ +int sba_crsm_elmidx(struct sba_crsm *sm, int i, int j) { + register int low, high, mid, diff; + + low = sm->rowptr[i]; + high = sm->rowptr[i + 1] - 1; + + /* binary search for finding the element at column j */ + while (low <= high) { + /* following early termination test seems to actually slow down the search */ + // if(jcolidx[low] || j>sm->colidx[high]) return -1; /* not found */ + + /* mid=low+((high-low)>>1) ensures no index overflows */ + mid = (low + high) >> 1; //(low+high)/2; + diff = j - sm->colidx[mid]; + if (diff < 0) + high = mid - 1; + else if (diff > 0) + low = mid + 1; + else + return mid; + } + + return -1; /* not found */ +} + +/* similarly to sba_crsm_elmidx() above, returns the index of the (i, j) element using the + * fact that the index of element (i, jp) was previously found to be jpidx. This can be + * slightly faster than sba_crsm_elmidx(). No bounds checking! + */ +int sba_crsm_elmidxp(struct sba_crsm *sm, int i, int j, int jp, int jpidx) { + register int low, high, mid, diff; + + diff = j - jp; + if (diff > 0) { + low = jpidx + 1; + high = sm->rowptr[i + 1] - 1; + } else if (diff == 0) + return jpidx; + else { /* diff<0 */ + low = sm->rowptr[i]; + high = jpidx - 1; + } + + /* binary search for finding the element at column j */ + while (low <= high) { + /* following early termination test seems to actually slow down the search */ + // if(jcolidx[low] || j>sm->colidx[high]) return -1; /* not found */ + + /* mid=low+((high-low)>>1) ensures no index overflows */ + mid = (low + high) >> 1; //(low+high)/2; + diff = j - sm->colidx[mid]; + if (diff < 0) + high = mid - 1; + else if (diff > 0) + low = mid + 1; + else + return mid; + } + + return -1; /* not found */ +} + +/* returns the number of nonzero elements in row i and + * fills up the vidxs and jidxs arrays with the val and column + * indexes of the elements found, respectively. + * vidxs and jidxs are assumed preallocated and of max. size sm->nc + */ +int sba_crsm_row_elmidxs(struct sba_crsm *sm, int i, int *vidxs, int *jidxs) { + register int j, k; + + for (j = sm->rowptr[i], k = 0; j < sm->rowptr[i + 1]; ++j, ++k) { + vidxs[k] = j; + jidxs[k] = sm->colidx[j]; + } + + return k; +} + +/* returns the number of nonzero elements in col j and + * fills up the vidxs and iidxs arrays with the val and row + * indexes of the elements found, respectively. + * vidxs and iidxs are assumed preallocated and of max. size sm->nr + */ +int sba_crsm_col_elmidxs(struct sba_crsm *sm, int j, int *vidxs, int *iidxs) { + register int *nextrowptr = sm->rowptr + 1; + register int i, l; + register int low, high, mid, diff; + + for (i = l = 0; i < sm->nr; ++i) { + low = sm->rowptr[i]; + high = nextrowptr[i] - 1; + + /* binary search attempting to find an element at column j */ + while (low <= high) { + // if(jcolidx[low] || j>sm->colidx[high]) break; /* not found */ + + mid = (low + high) >> 1; //(low+high)/2; + diff = j - sm->colidx[mid]; + if (diff < 0) + high = mid - 1; + else if (diff > 0) + low = mid + 1; + else { /* found */ + vidxs[l] = mid; + iidxs[l++] = i; + break; + } + } + } + + return l; +} + +/* a more straighforward (but slower) implementation of the above function */ +/*** +int sba_crsm_col_elmidxs(struct sba_crsm *sm, int j, int *vidxs, int *iidxs) +{ +register int i, k, l; + + for(i=l=0; inr; ++i) + for(k=sm->rowptr[i]; krowptr[i+1]; ++k) + if(sm->colidx[k]==j){ + vidxs[l]=k; + iidxs[l++]=i; + } + + return l; +} +***/ + +#if 0 +/* returns 1 if there exists a row i having columns j and k, + * i.e. a row i s.t. elements (i, j) and (i, k) are nonzero; + * 0 otherwise + */ +int sba_crsm_common_row(struct sba_crsm *sm, int j, int k) +{ +register int i, low, high, mid, diff; + + if(j==k) return 1; + + for(i=0; inr; ++i){ + low=sm->rowptr[i]; + high=sm->rowptr[i+1]-1; + if(jcolidx[low] || j>sm->colidx[high] || /* j not found */ + kcolidx[low] || k>sm->colidx[high]) /* k not found */ + continue; + + /* binary search for finding the element at column j */ + while(low<=high){ + mid=(low+high)>>1; //(low+high)/2; + diff=j-sm->colidx[mid]; + if(diff<0) + high=mid-1; + else if(diff>0) + low=mid+1; + else + goto jfound; + } + + continue; /* j not found */ + +jfound: + if(j>k){ + low=sm->rowptr[i]; + high=mid-1; + } + else{ + low=mid+1; + high=sm->rowptr[i+1]-1; + } + + if(kcolidx[low] || k>sm->colidx[high]) continue; /* k not found */ + + /* binary search for finding the element at column k */ + while(low<=high){ + mid=(low+high)>>1; //(low+high)/2; + diff=k-sm->colidx[mid]; + if(diff<0) + high=mid-1; + else if(diff>0) + low=mid+1; + else /* found */ + return 1; + } + } + + return 0; +} +#endif + +#if 0 + +/* sample code using the above routines */ + +main() +{ +int mat[7][6]={ + {10, 0, 0, 0, -2, 0}, + {3, 9, 0, 0, 0, 3}, + {0, 7, 8, 7, 0, 0}, + {3, 0, 8, 7, 5, 0}, + {0, 8, 0, 9, 9, 13}, + {0, 4, 0, 0, 2, -1}, + {3, 7, 0, 9, 2, 0} +}; + +struct sba_crsm sm; +int i, j, k, l; +int vidxs[7], /* max(6, 7) */ + jidxs[6], iidxs[7]; + + + sba_crsm_build(&sm, mat[0], 7, 6); + sba_crsm_print(&sm, stdout); + + for(i=0; i<7; ++i){ + for(j=0; j<6; ++j) + printf("%3d ", ((k=sba_crsm_elmidx(&sm, i, j))!=-1)? sm.val[k] : 0); + printf("\n"); + } + + for(i=0; i<7; ++i){ + k=sba_crsm_row_elmidxs(&sm, i, vidxs, jidxs); + printf("row %d\n", i); + for(l=0; l +#include +#include +#include +#include + +#include "compiler.h" +#include "sba.h" + +#ifdef SBA_APPEND_UNDERSCORE_SUFFIX +#define F77_FUNC(func) func##_ +#else +#define F77_FUNC(func) func +#endif /* SBA_APPEND_UNDERSCORE_SUFFIX */ + +/* declarations of LAPACK routines employed */ + +/* QR decomposition */ +extern int F77_FUNC(dgeqrf)(int *m, int *n, double *a, int *lda, double *tau, double *work, int *lwork, int *info); +extern int F77_FUNC(dorgqr)(int *m, int *n, int *k, double *a, int *lda, double *tau, double *work, int *lwork, + int *info); + +/* solution of triangular system */ +extern int F77_FUNC(dtrtrs)(char *uplo, char *trans, char *diag, int *n, int *nrhs, double *a, int *lda, double *b, + int *ldb, int *info); + +/* Cholesky decomposition, linear system solution and matrix inversion */ +extern int F77_FUNC(dpotf2)(char *uplo, int *n, double *a, int *lda, int *info); /* unblocked Cholesky */ +extern int F77_FUNC(dpotrf)(char *uplo, int *n, double *a, int *lda, int *info); /* block version of dpotf2 */ +extern int F77_FUNC(dpotrs)(char *uplo, int *n, int *nrhs, double *a, int *lda, double *b, int *ldb, int *info); +extern int F77_FUNC(dpotri)(char *uplo, int *n, double *a, int *lda, int *info); + +/* LU decomposition, linear system solution and matrix inversion */ +extern int F77_FUNC(dgetrf)(int *m, int *n, double *a, int *lda, int *ipiv, int *info); /* blocked LU */ +extern int F77_FUNC(dgetf2)(int *m, int *n, double *a, int *lda, int *ipiv, int *info); /* unblocked LU */ +extern int F77_FUNC(dgetrs)(char *trans, int *n, int *nrhs, double *a, int *lda, int *ipiv, double *b, int *ldb, + int *info); +extern int F77_FUNC(dgetri)(int *n, double *a, int *lda, int *ipiv, double *work, int *lwork, int *info); + +/* SVD */ +extern int F77_FUNC(dgesvd)(char *jobu, char *jobvt, int *m, int *n, double *a, int *lda, double *s, double *u, + int *ldu, double *vt, int *ldvt, double *work, int *lwork, int *info); + +/* lapack 3.0 routine, faster than dgesvd() */ +extern int F77_FUNC(dgesdd)(char *jobz, int *m, int *n, double *a, int *lda, double *s, double *u, int *ldu, double *vt, + int *ldvt, double *work, int *lwork, int *iwork, int *info); + +/* Bunch-Kaufman factorization of a real symmetric matrix A, solution of linear systems and matrix inverse */ +extern int F77_FUNC(dsytrf)(char *uplo, int *n, double *a, int *lda, int *ipiv, double *work, int *lwork, + int *info); /* blocked ver. */ +extern int F77_FUNC(dsytrs)(char *uplo, int *n, int *nrhs, double *a, int *lda, int *ipiv, double *b, int *ldb, + int *info); +extern int F77_FUNC(dsytri)(char *uplo, int *n, double *a, int *lda, int *ipiv, double *work, int *info); + +/* + * This function returns the solution of Ax = b + * + * The function is based on QR decomposition with explicit computation of Q: + * If A=Q R with Q orthogonal and R upper triangular, the linear system becomes + * Q R x = b or R x = Q^T b. + * + * A is mxm, b is mx1. Argument iscolmaj specifies whether A is + * stored in column or row major order. Note that if iscolmaj==1 + * this function modifies A! + * + * The function returns 0 in case of error, 1 if successfull + * + * This function is often called repetitively to solve problems of identical + * dimensions. To avoid repetitive malloc's and free's, allocated memory is + * retained between calls and free'd-malloc'ed when not of the appropriate size. + * A call with NULL as the first argument forces this memory to be released. + */ +int sba_Axb_QR(double *A, double *B, double *x, int m, int iscolmaj) { + static double *buf = NULL; + static int buf_sz = 0, nb = 0; + + double *a, *r, *tau, *work; + int a_sz, r_sz, tau_sz, tot_sz; + register int i, j; + int info, worksz, nrhs = 1; + register double sum; + + if (A == NULL) { + if (buf) + free(buf); + buf = NULL; + buf_sz = 0; + + return 1; + } + + /* calculate required memory size */ + a_sz = (iscolmaj) ? 0 : m * m; + r_sz = m * m; /* only the upper triangular part really needed */ + tau_sz = m; + if (!nb) { +#ifndef SBA_LS_SCARCE_MEMORY + double tmp; + + worksz = -1; // workspace query; optimal size is returned in tmp + F77_FUNC(dgeqrf)((int *)&m, (int *)&m, NULL, (int *)&m, NULL, (double *)&tmp, (int *)&worksz, (int *)&info); + nb = ((int)tmp) / m; // optimal worksize is m*nb +#else + nb = 1; // min worksize is m +#endif /* SBA_LS_SCARCE_MEMORY */ + } + worksz = nb * m; + tot_sz = a_sz + r_sz + tau_sz + worksz; + + if (tot_sz > buf_sz) { /* insufficient memory, allocate a "big" memory chunk at once */ + if (buf) + free(buf); /* free previously allocated memory */ + + buf_sz = tot_sz; + buf = (double *)malloc(buf_sz * sizeof(double)); + if (!buf) { + fprintf(stderr, "memory allocation in sba_Axb_QR() failed!\n"); + exit(1); + } + } + + if (!iscolmaj) { + a = buf; + /* store A (column major!) into a */ + for (i = 0; i < m; ++i) + for (j = 0; j < m; ++j) + a[i + j * m] = A[i * m + j]; + } else + a = A; /* no copying required */ + + r = buf + a_sz; + tau = r + r_sz; + work = tau + tau_sz; + + /* QR decomposition of A */ + F77_FUNC(dgeqrf)((int *)&m, (int *)&m, a, (int *)&m, tau, work, (int *)&worksz, (int *)&info); + /* error treatment */ + if (info != 0) { + if (info < 0) { + fprintf(stderr, "LAPACK error: illegal value for argument %d of dgeqrf in sba_Axb_QR()\n", -info); + exit(1); + } else { + fprintf(stderr, "Unknown LAPACK error %d for dgeqrf in sba_Axb_QR()\n", info); + return 0; + } + } + + /* R is now stored in the upper triangular part of a; copy it in r so that dorgqr() below won't destroy it */ + for (i = 0; i < r_sz; ++i) + r[i] = a[i]; + + /* compute Q using the elementary reflectors computed by the above decomposition */ + F77_FUNC(dorgqr)((int *)&m, (int *)&m, (int *)&m, a, (int *)&m, tau, work, (int *)&worksz, (int *)&info); + if (info != 0) { + if (info < 0) { + fprintf(stderr, "LAPACK error: illegal value for argument %d of dorgqr in sba_Axb_QR()\n", -info); + exit(1); + } else { + fprintf(stderr, "Unknown LAPACK error (%d) in sba_Axb_QR()\n", info); + return 0; + } + } + + /* Q is now in a; compute Q^T b in x */ + for (i = 0; i < m; ++i) { + for (j = 0, sum = 0.0; j < m; ++j) + sum += a[i * m + j] * B[j]; + x[i] = sum; + } + + /* solve the linear system R x = Q^t b */ + F77_FUNC(dtrtrs)("U", "N", "N", (int *)&m, (int *)&nrhs, r, (int *)&m, x, (int *)&m, &info); + /* error treatment */ + if (info != 0) { + if (info < 0) { + fprintf(stderr, "LAPACK error: illegal value for argument %d of dtrtrs in sba_Axb_QR()\n", -info); + exit(1); + } else { + fprintf(stderr, "LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in sba_Axb_QR()\n", + info); + return 0; + } + } + + return 1; +} + +/* + * This function returns the solution of Ax = b + * + * The function is based on QR decomposition without computation of Q: + * If A=Q R with Q orthogonal and R upper triangular, the linear system becomes + * (A^T A) x = A^T b or (R^T Q^T Q R) x = A^T b or (R^T R) x = A^T b. + * This amounts to solving R^T y = A^T b for y and then R x = y for x + * Note that Q does not need to be explicitly computed + * + * A is mxm, b is mx1. Argument iscolmaj specifies whether A is + * stored in column or row major order. Note that if iscolmaj==1 + * this function modifies A! + * + * The function returns 0 in case of error, 1 if successfull + * + * This function is often called repetitively to solve problems of identical + * dimensions. To avoid repetitive malloc's and free's, allocated memory is + * retained between calls and free'd-malloc'ed when not of the appropriate size. + * A call with NULL as the first argument forces this memory to be released. + */ +int sba_Axb_QRnoQ(double *A, double *B, double *x, int m, int iscolmaj) { + static double *buf = NULL; + static int buf_sz = 0, nb = 0; + + double *a, *tau, *work; + int a_sz, tau_sz, tot_sz; + register int i, j; + int info, worksz, nrhs = 1; + register double sum; + + if (A == NULL) { + if (buf) + free(buf); + buf = NULL; + buf_sz = 0; + + return 1; + } + + /* calculate required memory size */ + a_sz = (iscolmaj) ? 0 : m * m; + tau_sz = m; + if (!nb) { +#ifndef SBA_LS_SCARCE_MEMORY + double tmp; + + worksz = -1; // workspace query; optimal size is returned in tmp + F77_FUNC(dgeqrf)((int *)&m, (int *)&m, NULL, (int *)&m, NULL, (double *)&tmp, (int *)&worksz, (int *)&info); + nb = ((int)tmp) / m; // optimal worksize is m*nb +#else + nb = 1; // min worksize is m +#endif /* SBA_LS_SCARCE_MEMORY */ + } + worksz = nb * m; + tot_sz = a_sz + tau_sz + worksz; + + if (tot_sz > buf_sz) { /* insufficient memory, allocate a "big" memory chunk at once */ + if (buf) + free(buf); /* free previously allocated memory */ + + buf_sz = tot_sz; + buf = (double *)malloc(buf_sz * sizeof(double)); + if (!buf) { + fprintf(stderr, "memory allocation in sba_Axb_QRnoQ() failed!\n"); + exit(1); + } + } + + if (!iscolmaj) { + a = buf; + /* store A (column major!) into a */ + for (i = 0; i < m; ++i) + for (j = 0; j < m; ++j) + a[i + j * m] = A[i * m + j]; + } else + a = A; /* no copying required */ + + tau = buf + a_sz; + work = tau + tau_sz; + + /* compute A^T b in x */ + for (i = 0; i < m; ++i) { + for (j = 0, sum = 0.0; j < m; ++j) + sum += a[i * m + j] * B[j]; + x[i] = sum; + } + + /* QR decomposition of A */ + F77_FUNC(dgeqrf)((int *)&m, (int *)&m, a, (int *)&m, tau, work, (int *)&worksz, (int *)&info); + /* error treatment */ + if (info != 0) { + if (info < 0) { + fprintf(stderr, "LAPACK error: illegal value for argument %d of dgeqrf in sba_Axb_QRnoQ()\n", -info); + exit(1); + } else { + fprintf(stderr, "Unknown LAPACK error %d for dgeqrf in sba_Axb_QRnoQ()\n", info); + return 0; + } + } + + /* R is stored in the upper triangular part of a */ + + /* solve the linear system R^T y = A^t b */ + F77_FUNC(dtrtrs)("U", "T", "N", (int *)&m, (int *)&nrhs, a, (int *)&m, x, (int *)&m, &info); + /* error treatment */ + if (info != 0) { + if (info < 0) { + fprintf(stderr, "LAPACK error: illegal value for argument %d of dtrtrs in sba_Axb_QRnoQ()\n", -info); + exit(1); + } else { + fprintf(stderr, + "LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in sba_Axb_QRnoQ()\n", + info); + return 0; + } + } + + /* solve the linear system R x = y */ + F77_FUNC(dtrtrs)("U", "N", "N", (int *)&m, (int *)&nrhs, a, (int *)&m, x, (int *)&m, &info); + /* error treatment */ + if (info != 0) { + if (info < 0) { + fprintf(stderr, "LAPACK error: illegal value for argument %d of dtrtrs in sba_Axb_QRnoQ()\n", -info); + exit(1); + } else { + fprintf(stderr, + "LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in sba_Axb_QRnoQ()\n", + info); + return 0; + } + } + + return 1; +} + +/* + * This function returns the solution of Ax=b + * + * The function assumes that A is symmetric & positive definite and employs + * the Cholesky decomposition: + * If A=U^T U with U upper triangular, the system to be solved becomes + * (U^T U) x = b + * This amounts to solving U^T y = b for y and then U x = y for x + * + * A is mxm, b is mx1. Argument iscolmaj specifies whether A is + * stored in column or row major order. Note that if iscolmaj==1 + * this function modifies A! + * + * The function returns 0 in case of error, 1 if successfull + * + * This function is often called repetitively to solve problems of identical + * dimensions. To avoid repetitive malloc's and free's, allocated memory is + * retained between calls and free'd-malloc'ed when not of the appropriate size. + * A call with NULL as the first argument forces this memory to be released. + */ +int sba_Axb_Chol(double *A, double *B, double *x, int m, int iscolmaj) { + static double *buf = NULL; + static int buf_sz = 0; + + double *a; + int a_sz, tot_sz; + register int i, j; + int info, nrhs = 1; + + if (A == NULL) { + if (buf) + free(buf); + buf = NULL; + buf_sz = 0; + + return 1; + } + + /* calculate required memory size */ + a_sz = (iscolmaj) ? 0 : m * m; + tot_sz = a_sz; + + if (tot_sz > buf_sz) { /* insufficient memory, allocate a "big" memory chunk at once */ + if (buf) + free(buf); /* free previously allocated memory */ + + buf_sz = tot_sz; + buf = (double *)malloc(buf_sz * sizeof(double)); + if (!buf) { + fprintf(stderr, "memory allocation in sba_Axb_Chol() failed!\n"); + exit(1); + } + } + + if (!iscolmaj) { + a = buf; + + /* store A into a and B into x; A is assumed to be symmetric, hence + * the column and row major order representations are the same + */ + for (i = 0; i < m; ++i) { + a[i] = A[i]; + x[i] = B[i]; + } + for (j = m * m; i < j; ++i) // copy remaining rows; note that i is not re-initialized + a[i] = A[i]; + } else { /* no copying is necessary for A */ + a = A; + for (i = 0; i < m; ++i) + x[i] = B[i]; + } + + /* Cholesky decomposition of A */ + // F77_FUNC(dpotf2)("U", (int *)&m, a, (int *)&m, (int *)&info); + F77_FUNC(dpotrf)("U", (int *)&m, a, (int *)&m, (int *)&info); + /* error treatment */ + if (info != 0) { + if (info < 0) { + fprintf(stderr, "LAPACK error: illegal value for argument %d of dpotf2/dpotrf in sba_Axb_Chol()\n", -info); + exit(1); + } else { + fprintf(stderr, "LAPACK error: the leading minor of order %d is not positive definite,\nthe factorization " + "could not be completed for dpotf2/dpotrf in sba_Axb_Chol()\n", + info); + return 0; + } + } + +/* below are two alternative ways for solving the linear system: */ +#if 1 + /* use the computed Cholesky in one lapack call */ + F77_FUNC(dpotrs)("U", (int *)&m, (int *)&nrhs, a, (int *)&m, x, (int *)&m, &info); + if (info < 0) { + fprintf(stderr, "LAPACK error: illegal value for argument %d of dpotrs in sba_Axb_Chol()\n", -info); + exit(1); + } +#else + /* solve the linear systems U^T y = b, U x = y */ + F77_FUNC(dtrtrs)("U", "T", "N", (int *)&m, (int *)&nrhs, a, (int *)&m, x, (int *)&m, &info); + /* error treatment */ + if (info != 0) { + if (info < 0) { + fprintf(stderr, "LAPACK error: illegal value for argument %d of dtrtrs in sba_Axb_Chol()\n", -info); + exit(1); + } else { + fprintf(stderr, + "LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in sba_Axb_Chol()\n", + info); + return 0; + } + } + + /* solve U x = y */ + F77_FUNC(dtrtrs)("U", "N", "N", (int *)&m, (int *)&nrhs, a, (int *)&m, x, (int *)&m, &info); + /* error treatment */ + if (info != 0) { + if (info < 0) { + fprintf(stderr, "LAPACK error: illegal value for argument %d of dtrtrs in sba_Axb_Chol()\n", -info); + exit(1); + } else { + fprintf(stderr, + "LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in sba_Axb_Chol()\n", + info); + return 0; + } + } +#endif /* 1 */ + + return 1; +} + +/* + * This function returns the solution of Ax = b + * + * The function employs LU decomposition: + * If A=L U with L lower and U upper triangular, then the original system + * amounts to solving + * L y = b, U x = y + * + * A is mxm, b is mx1. Argument iscolmaj specifies whether A is + * stored in column or row major order. Note that if iscolmaj==1 + * this function modifies A! + * + * The function returns 0 in case of error, + * 1 if successfull + * + * This function is often called repetitively to solve problems of identical + * dimensions. To avoid repetitive malloc's and free's, allocated memory is + * retained between calls and free'd-malloc'ed when not of the appropriate size. + * A call with NULL as the first argument forces this memory to be released. + */ +int sba_Axb_LU(double *A, double *B, double *x, int m, int iscolmaj) { + static double *buf = NULL; + static int buf_sz = 0; + + int a_sz, ipiv_sz, tot_sz; + register int i, j; + int info, *ipiv, nrhs = 1; + double *a; + + if (A == NULL) { + if (buf) + free(buf); + buf = NULL; + buf_sz = 0; + + return 1; + } + + /* calculate required memory size */ + ipiv_sz = m; + a_sz = (iscolmaj) ? 0 : m * m; + tot_sz = a_sz * sizeof(double) + + ipiv_sz * sizeof(int); /* should be arranged in that order for proper doubles alignment */ + + if (tot_sz > buf_sz) { /* insufficient memory, allocate a "big" memory chunk at once */ + if (buf) + free(buf); /* free previously allocated memory */ + + buf_sz = tot_sz; + buf = (double *)malloc(buf_sz); + if (!buf) { + fprintf(stderr, "memory allocation in sba_Axb_LU() failed!\n"); + exit(1); + } + } + + if (!iscolmaj) { + a = buf; + ipiv = (int *)(a + a_sz); + + /* store A (column major!) into a and B into x */ + for (i = 0; i < m; ++i) { + for (j = 0; j < m; ++j) + a[i + j * m] = A[i * m + j]; + + x[i] = B[i]; + } + } else { /* no copying is necessary for A */ + a = A; + for (i = 0; i < m; ++i) + x[i] = B[i]; + ipiv = (int *)buf; + } + + /* LU decomposition for A */ + F77_FUNC(dgetrf)((int *)&m, (int *)&m, a, (int *)&m, ipiv, (int *)&info); + if (info != 0) { + if (info < 0) { + fprintf(stderr, "argument %d of dgetrf illegal in sba_Axb_LU()\n", -info); + exit(1); + } else { + fprintf(stderr, "singular matrix A for dgetrf in sba_Axb_LU()\n"); + return 0; + } + } + + /* solve the system with the computed LU */ + F77_FUNC(dgetrs)("N", (int *)&m, (int *)&nrhs, a, (int *)&m, ipiv, x, (int *)&m, (int *)&info); + if (info != 0) { + if (info < 0) { + fprintf(stderr, "argument %d of dgetrs illegal in sba_Axb_LU()\n", -info); + exit(1); + } else { + fprintf(stderr, "unknown error for dgetrs in sba_Axb_LU()\n"); + return 0; + } + } + + return 1; +} + +/* + * This function returns the solution of Ax = b + * + * The function is based on SVD decomposition: + * If A=U D V^T with U, V orthogonal and D diagonal, the linear system becomes + * (U D V^T) x = b or x=V D^{-1} U^T b + * Note that V D^{-1} U^T is the pseudoinverse A^+ + * + * A is mxm, b is mx1. Argument iscolmaj specifies whether A is + * stored in column or row major order. Note that if iscolmaj==1 + * this function modifies A! + * + * The function returns 0 in case of error, 1 if successfull + * + * This function is often called repetitively to solve problems of identical + * dimensions. To avoid repetitive malloc's and free's, allocated memory is + * retained between calls and free'd-malloc'ed when not of the appropriate size. + * A call with NULL as the first argument forces this memory to be released. + */ +int sba_Axb_SVD(double *A, double *B, double *x, int m, int iscolmaj) { + static double *buf = NULL; + static int buf_sz = 0; + static double eps = -1.0; + + register int i, j; + double *a, *u, *s, *vt, *work; + int a_sz, u_sz, s_sz, vt_sz, tot_sz; + double thresh, one_over_denom; + register double sum; + int info, rank, worksz, *iwork, iworksz; + + if (A == NULL) { + if (buf) + free(buf); + buf = NULL; + buf_sz = 0; + + return 1; + } + +/* calculate required memory size */ +#ifndef SBA_LS_SCARCE_MEMORY + worksz = -1; // workspace query. Keep in mind that dgesdd requires more memory than dgesvd + /* note that optimal work size is returned in thresh */ + F77_FUNC(dgesdd) + ("A", (int *)&m, (int *)&m, NULL, (int *)&m, NULL, NULL, (int *)&m, NULL, (int *)&m, (double *)&thresh, + (int *)&worksz, NULL, &info); + /* F77_FUNC(dgesvd)("A", "A", (int *)&m, (int *)&m, NULL, (int *)&m, NULL, NULL, (int *)&m, NULL, (int *)&m, + (double *)&thresh, (int *)&worksz, &info); */ + worksz = (int)thresh; +#else + worksz = m * (7 * m + 4); // min worksize for dgesdd + // worksz=5*m; // min worksize for dgesvd +#endif /* SBA_LS_SCARCE_MEMORY */ + iworksz = 8 * m; + a_sz = (!iscolmaj) ? m * m : 0; + u_sz = m * m; + s_sz = m; + vt_sz = m * m; + + tot_sz = (a_sz + u_sz + s_sz + vt_sz + worksz) * sizeof(double) + + iworksz * sizeof(int); /* should be arranged in that order for proper doubles alignment */ + + if (tot_sz > buf_sz) { /* insufficient memory, allocate a "big" memory chunk at once */ + if (buf) + free(buf); /* free previously allocated memory */ + + buf_sz = tot_sz; + buf = (double *)malloc(buf_sz); + if (!buf) { + fprintf(stderr, "memory allocation in sba_Axb_SVD() failed!\n"); + exit(1); + } + } + + if (!iscolmaj) { + a = buf; + u = a + a_sz; + /* store A (column major!) into a */ + for (i = 0; i < m; ++i) + for (j = 0; j < m; ++j) + a[i + j * m] = A[i * m + j]; + } else { + a = A; /* no copying required */ + u = buf; + } + + s = u + u_sz; + vt = s + s_sz; + work = vt + vt_sz; + iwork = (int *)(work + worksz); + + /* SVD decomposition of A */ + F77_FUNC(dgesdd) + ("A", (int *)&m, (int *)&m, a, (int *)&m, s, u, (int *)&m, vt, (int *)&m, work, (int *)&worksz, iwork, &info); + // F77_FUNC(dgesvd)("A", "A", (int *)&m, (int *)&m, a, (int *)&m, s, u, (int *)&m, vt, (int *)&m, work, (int + // *)&worksz, &info); + + /* error treatment */ + if (info != 0) { + if (info < 0) { + fprintf(stderr, "LAPACK error: illegal value for argument %d of dgesdd/dgesvd in sba_Axb_SVD()\n", -info); + exit(1); + } else { + fprintf(stderr, + "LAPACK error: dgesdd (dbdsdc)/dgesvd (dbdsqr) failed to converge in sba_Axb_SVD() [info=%d]\n", + info); + + return 0; + } + } + + if (eps < 0.0) { + double aux; + + /* compute machine epsilon. DBL_EPSILON should do also */ + for (eps = 1.0; aux = eps + 1.0, aux - 1.0 > 0.0; eps *= 0.5) + ; + eps *= 2.0; + } + + /* compute the pseudoinverse in a */ + memset(a, 0, m * m * sizeof(double)); /* initialize to zero */ + for (rank = 0, thresh = eps * s[0]; rank < m && s[rank] > thresh; ++rank) { + one_over_denom = 1.0 / s[rank]; + + for (j = 0; j < m; ++j) + for (i = 0; i < m; ++i) + a[i * m + j] += vt[rank + i * m] * u[j + rank * m] * one_over_denom; + } + + /* compute A^+ b in x */ + for (i = 0; i < m; ++i) { + for (j = 0, sum = 0.0; j < m; ++j) + sum += a[i * m + j] * B[j]; + x[i] = sum; + } + + return 1; +} + +/* + * This function returns the solution of Ax = b for a real symmetric matrix A + * + * The function is based on UDUT factorization with the pivoting + * strategy of Bunch and Kaufman: + * A is factored as U*D*U^T where U is upper triangular and + * D symmetric and block diagonal (aka spectral decomposition, + * Banachiewicz factorization, modified Cholesky factorization) + * + * A is mxm, b is mx1. Argument iscolmaj specifies whether A is + * stored in column or row major order. Note that if iscolmaj==1 + * this function modifies A! + * + * The function returns 0 in case of error, + * 1 if successfull + * + * This function is often called repetitively to solve problems of identical + * dimensions. To avoid repetitive malloc's and free's, allocated memory is + * retained between calls and free'd-malloc'ed when not of the appropriate size. + * A call with NULL as the first argument forces this memory to be released. + */ +int sba_Axb_BK(double *A, double *B, double *x, int m, int iscolmaj) { + static double *buf = NULL; + static int buf_sz = 0, nb = 0; + + int a_sz, ipiv_sz, work_sz, tot_sz; + register int i, j; + int info, *ipiv, nrhs = 1; + double *a, *work; + + if (A == NULL) { + if (buf) + free(buf); + buf = NULL; + buf_sz = 0; + + return 1; + } + + /* calculate required memory size */ + ipiv_sz = m; + a_sz = (iscolmaj) ? 0 : m * m; + if (!nb) { +#ifndef SBA_LS_SCARCE_MEMORY + double tmp; + + work_sz = -1; // workspace query; optimal size is returned in tmp + F77_FUNC(dsytrf)("U", (int *)&m, NULL, (int *)&m, NULL, (double *)&tmp, (int *)&work_sz, (int *)&info); + nb = ((int)tmp) / m; // optimal worksize is m*nb +#else + nb = -1; // min worksize is 1 +#endif /* SBA_LS_SCARCE_MEMORY */ + } + work_sz = (nb != -1) ? nb * m : 1; + tot_sz = (a_sz + work_sz) * sizeof(double) + + ipiv_sz * sizeof(int); /* should be arranged in that order for proper doubles alignment */ + + if (tot_sz > buf_sz) { /* insufficient memory, allocate a "big" memory chunk at once */ + if (buf) + free(buf); /* free previously allocated memory */ + + buf_sz = tot_sz; + buf = (double *)malloc(buf_sz); + if (!buf) { + fprintf(stderr, "memory allocation in sba_Axb_BK() failed!\n"); + exit(1); + } + } + + if (!iscolmaj) { + a = buf; + work = a + a_sz; + + /* store A into a and B into x; A is assumed to be symmetric, hence + * the column and row major order representations are the same + */ + for (i = 0; i < m; ++i) { + a[i] = A[i]; + x[i] = B[i]; + } + for (j = m * m; i < j; ++i) // copy remaining rows; note that i is not re-initialized + a[i] = A[i]; + } else { /* no copying is necessary for A */ + a = A; + for (i = 0; i < m; ++i) + x[i] = B[i]; + work = buf; + } + ipiv = (int *)(work + work_sz); + + /* factorization for A */ + F77_FUNC(dsytrf)("U", (int *)&m, a, (int *)&m, ipiv, work, (int *)&work_sz, (int *)&info); + if (info != 0) { + if (info < 0) { + fprintf(stderr, "argument %d of dsytrf illegal in sba_Axb_BK()\n", -info); + exit(1); + } else { + fprintf(stderr, "singular block diagonal matrix D for dsytrf in sba_Axb_BK() [D(%d, %d) is zero]\n", info, + info); + return 0; + } + } + + /* solve the system with the computed factorization */ + F77_FUNC(dsytrs)("U", (int *)&m, (int *)&nrhs, a, (int *)&m, ipiv, x, (int *)&m, (int *)&info); + if (info != 0) { + if (info < 0) { + fprintf(stderr, "argument %d of dsytrs illegal in sba_Axb_BK()\n", -info); + exit(1); + } else { + fprintf(stderr, "unknown error for dsytrs in sba_Axb_BK()\n"); + return 0; + } + } + + return 1; +} + +/* + * This function computes the inverse of a square matrix whose upper triangle + * is stored in A into its lower triangle using LU decomposition + * + * The function returns 0 in case of error (e.g. A is singular), + * 1 if successfull + * + * This function is often called repetitively to solve problems of identical + * dimensions. To avoid repetitive malloc's and free's, allocated memory is + * retained between calls and free'd-malloc'ed when not of the appropriate size. + * A call with NULL as the first argument forces this memory to be released. + */ +int sba_symat_invert_LU(double *A, int m) { + static double *buf = NULL; + static int buf_sz = 0, nb = 0; + + int a_sz, ipiv_sz, work_sz, tot_sz; + register int i, j; + int info, *ipiv; + double *a, *work; + + if (A == NULL) { + if (buf) + free(buf); + buf = NULL; + buf_sz = 0; + + return 1; + } + + /* calculate required memory size */ + ipiv_sz = m; + a_sz = m * m; + if (!nb) { +#ifndef SBA_LS_SCARCE_MEMORY + double tmp; + + work_sz = -1; // workspace query; optimal size is returned in tmp + F77_FUNC(dgetri)((int *)&m, NULL, (int *)&m, NULL, (double *)&tmp, (int *)&work_sz, (int *)&info); + nb = ((int)tmp) / m; // optimal worksize is m*nb +#else + nb = 1; // min worksize is m +#endif /* SBA_LS_SCARCE_MEMORY */ + } + work_sz = nb * m; + tot_sz = (a_sz + work_sz) * sizeof(double) + + ipiv_sz * sizeof(int); /* should be arranged in that order for proper doubles alignment */ + + if (tot_sz > buf_sz) { /* insufficient memory, allocate a "big" memory chunk at once */ + if (buf) + free(buf); /* free previously allocated memory */ + + buf_sz = tot_sz; + buf = (double *)malloc(buf_sz); + if (!buf) { + fprintf(stderr, "memory allocation in sba_symat_invert_LU() failed!\n"); + exit(1); + } + } + + a = buf; + work = a + a_sz; + ipiv = (int *)(work + work_sz); + + /* store A (column major!) into a */ + for (i = 0; i < m; ++i) + for (j = i; j < m; ++j) + a[i + j * m] = a[j + i * m] = A[i * m + j]; // copy A's upper part to a's upper & lower + + /* LU decomposition for A */ + F77_FUNC(dgetrf)((int *)&m, (int *)&m, a, (int *)&m, ipiv, (int *)&info); + if (info != 0) { + if (info < 0) { + fprintf(stderr, "argument %d of dgetrf illegal in sba_symat_invert_LU()\n", -info); + exit(1); + } else { + fprintf(stderr, "singular matrix A for dgetrf in sba_symat_invert_LU()\n"); + return 0; + } + } + + /* (A)^{-1} from LU */ + F77_FUNC(dgetri)((int *)&m, a, (int *)&m, ipiv, work, (int *)&work_sz, (int *)&info); + if (info != 0) { + if (info < 0) { + fprintf(stderr, "argument %d of dgetri illegal in sba_symat_invert_LU()\n", -info); + exit(1); + } else { + fprintf(stderr, "singular matrix A for dgetri in sba_symat_invert_LU()\n"); + return 0; + } + } + + /* store (A)^{-1} in A's lower triangle */ + for (i = 0; i < m; ++i) + for (j = 0; j <= i; ++j) + A[i * m + j] = a[i + j * m]; + + return 1; +} + +/* + * This function computes the inverse of a square symmetric positive definite + * matrix whose upper triangle is stored in A into its lower triangle using + * Cholesky factorization + * + * The function returns 0 in case of error (e.g. A is not positive definite or singular), + * 1 if successfull + * + * This function is often called repetitively to solve problems of identical + * dimensions. To avoid repetitive malloc's and free's, allocated memory is + * retained between calls and free'd-malloc'ed when not of the appropriate size. + * A call with NULL as the first argument forces this memory to be released. + */ +int sba_symat_invert_Chol(double *A, int m) { + static double *buf = NULL; + static int buf_sz = 0; + + int a_sz, tot_sz; + register int i, j; + int info; + double *a; + + if (A == NULL) { + if (buf) + free(buf); + buf = NULL; + buf_sz = 0; + + return 1; + } + + /* calculate required memory size */ + a_sz = m * m; + tot_sz = a_sz; + + if (tot_sz > buf_sz) { /* insufficient memory, allocate a "big" memory chunk at once */ + if (buf) + free(buf); /* free previously allocated memory */ + + buf_sz = tot_sz; + buf = (double *)malloc(buf_sz * sizeof(double)); + if (!buf) { + fprintf(stderr, "memory allocation in sba_symat_invert_Chol() failed!\n"); + exit(1); + } + } + + a = (double *)buf; + + /* store A into a; A is assumed symmetric, hence no transposition is needed */ + for (i = 0, j = a_sz; i < j; ++i) + a[i] = A[i]; + + /* Cholesky factorization for A; a's lower part corresponds to A's upper */ + // F77_FUNC(dpotrf)("L", (int *)&m, a, (int *)&m, (int *)&info); + F77_FUNC(dpotf2)("L", (int *)&m, a, (int *)&m, (int *)&info); + /* error treatment */ + if (info != 0) { + if (info < 0) { + fprintf(stderr, "LAPACK error: illegal value for argument %d of dpotrf in sba_symat_invert_Chol()\n", + -info); + exit(1); + } else { + fprintf(stderr, "LAPACK error: the leading minor of order %d is not positive definite,\nthe factorization " + "could not be completed for dpotrf in sba_symat_invert_Chol()\n", + info); + return 0; + } + } + + /* (A)^{-1} from Cholesky */ + F77_FUNC(dpotri)("L", (int *)&m, a, (int *)&m, (int *)&info); + if (info != 0) { + if (info < 0) { + fprintf(stderr, "argument %d of dpotri illegal in sba_symat_invert_Chol()\n", -info); + exit(1); + } else { + fprintf(stderr, "the (%d, %d) element of the factor U or L is zero, singular matrix A for dpotri in " + "sba_symat_invert_Chol()\n", + info, info); + return 0; + } + } + + /* store (A)^{-1} in A's lower triangle. The lower triangle of the symmetric A^{-1} is in the lower triangle of a */ + for (i = 0; i < m; ++i) + for (j = 0; j <= i; ++j) + A[i * m + j] = a[i + j * m]; + + return 1; +} + +/* + * This function computes the inverse of a symmetric indefinite + * matrix whose upper triangle is stored in A into its lower triangle + * using LDLT factorization with the pivoting strategy of Bunch and Kaufman + * + * The function returns 0 in case of error (e.g. A is singular), + * 1 if successfull + * + * This function is often called repetitively to solve problems of identical + * dimensions. To avoid repetitive malloc's and free's, allocated memory is + * retained between calls and free'd-malloc'ed when not of the appropriate size. + * A call with NULL as the first argument forces this memory to be released. + */ +int sba_symat_invert_BK(double *A, int m) { + static double *buf = NULL; + static int buf_sz = 0, nb = 0; + + int a_sz, ipiv_sz, work_sz, tot_sz; + register int i, j; + int info, *ipiv; + double *a, *work; + + if (A == NULL) { + if (buf) + free(buf); + buf = NULL; + buf_sz = 0; + + return 1; + } + + /* calculate required memory size */ + ipiv_sz = m; + a_sz = m * m; + if (!nb) { +#ifndef SBA_LS_SCARCE_MEMORY + double tmp; + + work_sz = -1; // workspace query; optimal size is returned in tmp + F77_FUNC(dsytrf)("L", (int *)&m, NULL, (int *)&m, NULL, (double *)&tmp, (int *)&work_sz, (int *)&info); + nb = ((int)tmp) / m; // optimal worksize is m*nb +#else + nb = -1; // min worksize is 1 +#endif /* SBA_LS_SCARCE_MEMORY */ + } + work_sz = (nb != -1) ? nb * m : 1; + work_sz = (work_sz >= m) ? work_sz : m; /* ensure that work is at least m elements long, as required by dsytri */ + tot_sz = (a_sz + work_sz) * sizeof(double) + + ipiv_sz * sizeof(int); /* should be arranged in that order for proper doubles alignment */ + + if (tot_sz > buf_sz) { /* insufficient memory, allocate a "big" memory chunk at once */ + if (buf) + free(buf); /* free previously allocated memory */ + + buf_sz = tot_sz; + buf = (double *)malloc(buf_sz); + if (!buf) { + fprintf(stderr, "memory allocation in sba_symat_invert_BK() failed!\n"); + exit(1); + } + } + + a = buf; + work = a + a_sz; + ipiv = (int *)(work + work_sz); + + /* store A into a; A is assumed symmetric, hence no transposition is needed */ + for (i = 0, j = a_sz; i < j; ++i) + a[i] = A[i]; + + /* LDLT factorization for A; a's lower part corresponds to A's upper */ + F77_FUNC(dsytrf)("L", (int *)&m, a, (int *)&m, ipiv, work, (int *)&work_sz, (int *)&info); + if (info != 0) { + if (info < 0) { + fprintf(stderr, "argument %d of dsytrf illegal in sba_symat_invert_BK()\n", -info); + exit(1); + } else { + fprintf(stderr, + "singular block diagonal matrix D for dsytrf in sba_symat_invert_BK() [D(%d, %d) is zero]\n", info, + info); + return 0; + } + } + + /* (A)^{-1} from LDLT */ + F77_FUNC(dsytri)("L", (int *)&m, a, (int *)&m, ipiv, work, (int *)&info); + if (info != 0) { + if (info < 0) { + fprintf(stderr, "argument %d of dsytri illegal in sba_symat_invert_BK()\n", -info); + exit(1); + } else { + fprintf(stderr, + "D(%d, %d)=0, matrix is singular and its inverse could not be computed in sba_symat_invert_BK()\n", + info, info); + return 0; + } + } + + /* store (A)^{-1} in A's lower triangle. The lower triangle of the symmetric A^{-1} is in the lower triangle of a */ + for (i = 0; i < m; ++i) + for (j = 0; j <= i; ++j) + A[i * m + j] = a[i + j * m]; + + return 1; +} + +#define __CG_LINALG_BLOCKSIZE 8 + +/* Dot product of two vectors x and y using loop unrolling and blocking. + * see http://www.abarnett.demon.co.uk/tutorial.html + */ + +inline static double dprod(const int n, const double *const x, const double *const y) { + register int i, j1, j2, j3, j4, j5, j6, j7; + int blockn; + register double sum0 = 0.0, sum1 = 0.0, sum2 = 0.0, sum3 = 0.0, sum4 = 0.0, sum5 = 0.0, sum6 = 0.0, sum7 = 0.0; + + /* n may not be divisible by __CG_LINALG_BLOCKSIZE, + * go as near as we can first, then tidy up. + */ + blockn = (n / __CG_LINALG_BLOCKSIZE) * __CG_LINALG_BLOCKSIZE; + + /* unroll the loop in blocks of `__CG_LINALG_BLOCKSIZE' */ + for (i = 0; i < blockn; i += __CG_LINALG_BLOCKSIZE) { + sum0 += x[i] * y[i]; + j1 = i + 1; + sum1 += x[j1] * y[j1]; + j2 = i + 2; + sum2 += x[j2] * y[j2]; + j3 = i + 3; + sum3 += x[j3] * y[j3]; + j4 = i + 4; + sum4 += x[j4] * y[j4]; + j5 = i + 5; + sum5 += x[j5] * y[j5]; + j6 = i + 6; + sum6 += x[j6] * y[j6]; + j7 = i + 7; + sum7 += x[j7] * y[j7]; + } + + /* + * There may be some left to do. + * This could be done as a simple for() loop, + * but a switch is faster (and more interesting) + */ + + if (i < n) { + /* Jump into the case at the place that will allow + * us to finish off the appropriate number of items. + */ + + switch (n - i) { + case 7: + sum0 += x[i] * y[i]; + ++i; + case 6: + sum1 += x[i] * y[i]; + ++i; + case 5: + sum2 += x[i] * y[i]; + ++i; + case 4: + sum3 += x[i] * y[i]; + ++i; + case 3: + sum4 += x[i] * y[i]; + ++i; + case 2: + sum5 += x[i] * y[i]; + ++i; + case 1: + sum6 += x[i] * y[i]; + ++i; + } + } + + return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7; +} + +/* Compute z=x+a*y for two vectors x and y and a scalar a; z can be one of x, y. + * Similarly to the dot product routine, this one uses loop unrolling and blocking + */ + +inline static void daxpy(const int n, double *const z, const double *const x, const double a, const double *const y) { + register int i, j1, j2, j3, j4, j5, j6, j7; + int blockn; + + /* n may not be divisible by __CG_LINALG_BLOCKSIZE, + * go as near as we can first, then tidy up. + */ + blockn = (n / __CG_LINALG_BLOCKSIZE) * __CG_LINALG_BLOCKSIZE; + + /* unroll the loop in blocks of `__CG_LINALG_BLOCKSIZE' */ + for (i = 0; i < blockn; i += __CG_LINALG_BLOCKSIZE) { + z[i] = x[i] + a * y[i]; + j1 = i + 1; + z[j1] = x[j1] + a * y[j1]; + j2 = i + 2; + z[j2] = x[j2] + a * y[j2]; + j3 = i + 3; + z[j3] = x[j3] + a * y[j3]; + j4 = i + 4; + z[j4] = x[j4] + a * y[j4]; + j5 = i + 5; + z[j5] = x[j5] + a * y[j5]; + j6 = i + 6; + z[j6] = x[j6] + a * y[j6]; + j7 = i + 7; + z[j7] = x[j7] + a * y[j7]; + } + + /* + * There may be some left to do. + * This could be done as a simple for() loop, + * but a switch is faster (and more interesting) + */ + + if (i < n) { + /* Jump into the case at the place that will allow + * us to finish off the appropriate number of items. + */ + + switch (n - i) { + case 7: + z[i] = x[i] + a * y[i]; + ++i; + case 6: + z[i] = x[i] + a * y[i]; + ++i; + case 5: + z[i] = x[i] + a * y[i]; + ++i; + case 4: + z[i] = x[i] + a * y[i]; + ++i; + case 3: + z[i] = x[i] + a * y[i]; + ++i; + case 2: + z[i] = x[i] + a * y[i]; + ++i; + case 1: + z[i] = x[i] + a * y[i]; + ++i; + } + } +} + +/* + * This function returns the solution of Ax = b where A is posititive definite, + * based on the conjugate gradients method; see "An intro to the CG method" by J.R. Shewchuk, p. 50-51 + * + * A is mxm, b, x are is mx1. Argument niter specifies the maximum number of + * iterations and eps is the desired solution accuracy. niter<0 signals that + * x contains a valid initial approximation to the solution; if niter>0 then + * the starting point is taken to be zero. Argument prec selects the desired + * preconditioning method as follows: + * 0: no preconditioning + * 1: jacobi (diagonal) preconditioning + * 2: SSOR preconditioning + * Argument iscolmaj specifies whether A is stored in column or row major order. + * + * The function returns 0 in case of error, + * the number of iterations performed if successfull + * + * This function is often called repetitively to solve problems of identical + * dimensions. To avoid repetitive malloc's and free's, allocated memory is + * retained between calls and free'd-malloc'ed when not of the appropriate size. + * A call with NULL as the first argument forces this memory to be released. + */ +int sba_Axb_CG(double *A, double *B, double *x, int m, int niter, double eps, int prec, int iscolmaj) { + static double *buf = NULL; + static int buf_sz = 0; + + register int i, j; + register double *aim; + int iter, a_sz, res_sz, d_sz, q_sz, s_sz, wk_sz, z_sz, tot_sz; + double *a, *res, *d, *q, *s, *wk, *z; + double delta0, deltaold, deltanew, alpha, beta, eps_sq = eps * eps; + register double sum; + int rec_res; + + if (A == NULL) { + if (buf) + free(buf); + buf = NULL; + buf_sz = 0; + + return 1; + } + + /* calculate required memory size */ + a_sz = (iscolmaj) ? m * m : 0; + res_sz = m; + d_sz = m; + q_sz = m; + if (prec != SBA_CG_NOPREC) { + s_sz = m; + wk_sz = m; + z_sz = (prec == SBA_CG_SSOR) ? m : 0; + } else + s_sz = wk_sz = z_sz = 0; + + tot_sz = a_sz + res_sz + d_sz + q_sz + s_sz + wk_sz + z_sz; + + if (tot_sz > buf_sz) { /* insufficient memory, allocate a "big" memory chunk at once */ + if (buf) + free(buf); /* free previously allocated memory */ + + buf_sz = tot_sz; + buf = (double *)malloc(buf_sz * sizeof(double)); + if (!buf) { + fprintf(stderr, "memory allocation request failed in sba_Axb_CG()\n"); + exit(1); + } + } + + if (iscolmaj) { + a = buf; + /* store A (row major!) into a */ + for (i = 0; i < m; ++i) + for (j = 0, aim = a + i * m; j < m; ++j) + aim[j] = A[i + j * m]; + } else + a = A; /* no copying required */ + + res = buf + a_sz; + d = res + res_sz; + q = d + d_sz; + if (prec != SBA_CG_NOPREC) { + s = q + q_sz; + wk = s + s_sz; + z = (prec == SBA_CG_SSOR) ? wk + wk_sz : NULL; + + for (i = 0; i < m; ++i) { // compute jacobi (i.e. diagonal) preconditioners and save them in wk + sum = a[i * m + i]; + if (sum > DBL_EPSILON || -sum < -DBL_EPSILON) // != 0.0 + wk[i] = 1.0 / sum; + else + wk[i] = 1.0 / DBL_EPSILON; + } + } else { + s = res; + wk = z = NULL; + } + + if (niter > 0) { + for (i = 0; i < m; ++i) { // clear solution and initialize residual vector: res <-- B + x[i] = 0.0; + res[i] = B[i]; + } + } else { + niter = -niter; + + for (i = 0; i < m; ++i) { // initialize residual vector: res <-- B - A*x + for (j = 0, aim = a + i * m, sum = 0.0; j < m; ++j) + sum += aim[j] * x[j]; + res[i] = B[i] - sum; + } + } + + switch (prec) { + case SBA_CG_NOPREC: + for (i = 0, deltanew = 0.0; i < m; ++i) { + d[i] = res[i]; + deltanew += res[i] * res[i]; + } + break; + case SBA_CG_JACOBI: // jacobi preconditioning + for (i = 0, deltanew = 0.0; i < m; ++i) { + d[i] = res[i] * wk[i]; + deltanew += res[i] * d[i]; + } + break; + case SBA_CG_SSOR: // SSOR preconditioning; see the "templates" book, fig. 3.2, p. 44 + for (i = 0; i < m; ++i) { + for (j = 0, sum = 0.0, aim = a + i * m; j < i; ++j) + sum += aim[j] * z[j]; + z[i] = wk[i] * (res[i] - sum); + } + + for (i = m - 1; i >= 0; --i) { + for (j = i + 1, sum = 0.0, aim = a + i * m; j < m; ++j) + sum += aim[j] * d[j]; + d[i] = z[i] - wk[i] * sum; + } + deltanew = dprod(m, res, d); + break; + default: + fprintf(stderr, "unknown preconditioning option %d in sba_Axb_CG\n", prec); + exit(1); + } + + delta0 = deltanew; + + for (iter = 1; deltanew > eps_sq * delta0 && iter <= niter; ++iter) { + for (i = 0; i < m; ++i) { // q <-- A d + aim = a + i * m; + /*** + for(j=0, sum=0.0; j= 0; --i) { + for (j = i + 1, sum = 0.0, aim = a + i * m; j < m; ++j) + sum += aim[j] * s[j]; + s[i] = z[i] - wk[i] * sum; + } + break; + } + } + + deltaold = deltanew; + /*** + for(i=0, sum=0.0; ibuf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ + if(buf) free(buf); /* free previously allocated memory */ + + buf_sz=tot_sz; + buf=(double *)malloc(buf_sz); + if(!buf){ + fprintf(stderr, "memory allocation in sba_mat_cholinv() failed!\n"); + exit(1); + } + } + + a=buf; + work=a+a_sz; + ipiv=(int *)(work+work_sz); + + /* step 1: invert A */ + /* store A into a; A is assumed symmetric, hence no transposition is needed */ + for(i=0; i +#include +#include +#include +#include + +#include "compiler.h" +#include "sba.h" +#include "sba_chkjac.h" + +#define SBA_EPSILON 1E-12 +#define SBA_EPSILON_SQ ((SBA_EPSILON) * (SBA_EPSILON)) + +#define SBA_ONE_THIRD 0.3333333334 /* 1.0/3.0 */ + +#define emalloc(sz) emalloc_(__FILE__, __LINE__, sz) + +#define FABS(x) (((x) >= 0) ? (x) : -(x)) + +#define ROW_MAJOR 0 +#define COLUMN_MAJOR 1 +#define MAT_STORAGE COLUMN_MAJOR + +/* contains information necessary for computing a finite difference approximation to a jacobian, + * e.g. function to differentiate, problem dimensions and pointers to working memory buffers + */ +struct fdj_data_x_ { + void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, + void *adata); /* function to differentiate */ + int cnp, pnp, mnp; /* parameter numbers */ + int *func_rcidxs, *func_rcsubs; /* working memory for func invocations. + * Notice that this has to be different + * than the working memory used for + * evaluating the jacobian! + */ + double *hx, *hxx; /* memory to save results in */ + void *adata; +}; + +/* auxiliary memory allocation routine with error checking */ +inline static void *emalloc_(char *file, int line, size_t sz) { + void *ptr; + + ptr = (void *)malloc(sz); + if (ptr == NULL) { + fprintf(stderr, "SBA: memory allocation request for %zu bytes failed in file %s, line %d, exiting", sz, file, + line); + exit(1); + } + + return ptr; +} + +/* auxiliary routine for clearing an array of doubles */ +inline static void _dblzero(register double *arr, register int count) { + while (--count >= 0) + *arr++ = 0.0; +} + +/* auxiliary routine for computing the mean reprojection error; used for debugging */ +static double sba_mean_repr_error(int n, int mnp, double *x, double *hx, struct sba_crsm *idxij, int *rcidxs, + int *rcsubs) { + register int i, j; + int nnz, nprojs; + double *ptr1, *ptr2; + double err; + + for (i = nprojs = 0, err = 0.0; i < n; ++i) { + nnz = sba_crsm_row_elmidxs(idxij, i, rcidxs, rcsubs); /* find nonzero x_ij, j=0...m-1 */ + nprojs += nnz; + for (j = 0; j < nnz; ++j) { /* point i projecting on camera rcsubs[j] */ + ptr1 = x + idxij->val[rcidxs[j]] * mnp; + ptr2 = hx + idxij->val[rcidxs[j]] * mnp; + + err += sqrt((ptr1[0] - ptr2[0]) * (ptr1[0] - ptr2[0]) + (ptr1[1] - ptr2[1]) * (ptr1[1] - ptr2[1])); + } + } + + return err / ((double)(nprojs)); +} + +/* print the solution in p using sba's text format. If cnp/pnp==0 only points/cameras are printed */ +static void sba_print_sol(int n, int m, double *p, int cnp, int pnp, double *x, int mnp, struct sba_crsm *idxij, + int *rcidxs, int *rcsubs) { + register int i, j, ii; + int nnz; + double *ptr; + + if (cnp) { + /* print camera parameters */ + for (j = 0; j < m; ++j) { + ptr = p + cnp * j; + for (ii = 0; ii < cnp; ++ii) + printf("%g ", ptr[ii]); + printf("\n"); + } + } + + if (pnp) { + /* 3D & 2D point parameters */ + printf("\n\n\n# X Y Z nframes frame0 x0 y0 frame1 x1 y1 ...\n"); + for (i = 0; i < n; ++i) { + ptr = p + cnp * m + i * pnp; + for (ii = 0; ii < pnp; ++ii) // print 3D coordinates + printf("%g ", ptr[ii]); + + nnz = sba_crsm_row_elmidxs(idxij, i, rcidxs, rcsubs); /* find nonzero x_ij, j=0...m-1 */ + printf("%d ", nnz); + + for (j = 0; j < nnz; ++j) { /* point i projecting on camera rcsubs[j] */ + ptr = x + idxij->val[rcidxs[j]] * mnp; + + printf("%d ", rcsubs[j]); + for (ii = 0; ii < mnp; ++ii) // print 2D coordinates + printf("%g ", ptr[ii]); + } + printf("\n"); + } + } +} + +/* Compute e=x-y for two n-vectors x and y and return the squared L2 norm of e. + * e can coincide with either x or y. + * Uses loop unrolling and blocking to reduce bookkeeping overhead & pipeline + * stalls and increase instruction-level parallelism; see http://www.abarnett.demon.co.uk/tutorial.html + */ +static double nrmL2xmy(double *const e, const double *const x, const double *const y, const int n) { + const int blocksize = 8, bpwr = 3; /* 8=2^3 */ + register int i; + int j1, j2, j3, j4, j5, j6, j7; + int blockn; + register double sum0 = 0.0, sum1 = 0.0, sum2 = 0.0, sum3 = 0.0; + + /* n may not be divisible by blocksize, + * go as near as we can first, then tidy up. + */ + blockn = (n >> bpwr) << bpwr; /* (n / blocksize) * blocksize; */ + + /* unroll the loop in blocks of `blocksize'; looping downwards gains some more speed */ + for (i = blockn - 1; i > 0; i -= blocksize) { + e[i] = x[i] - y[i]; + sum0 += e[i] * e[i]; + j1 = i - 1; + e[j1] = x[j1] - y[j1]; + sum1 += e[j1] * e[j1]; + j2 = i - 2; + e[j2] = x[j2] - y[j2]; + sum2 += e[j2] * e[j2]; + j3 = i - 3; + e[j3] = x[j3] - y[j3]; + sum3 += e[j3] * e[j3]; + j4 = i - 4; + e[j4] = x[j4] - y[j4]; + sum0 += e[j4] * e[j4]; + j5 = i - 5; + e[j5] = x[j5] - y[j5]; + sum1 += e[j5] * e[j5]; + j6 = i - 6; + e[j6] = x[j6] - y[j6]; + sum2 += e[j6] * e[j6]; + j7 = i - 7; + e[j7] = x[j7] - y[j7]; + sum3 += e[j7] * e[j7]; + } + + /* + * There may be some left to do. + * This could be done as a simple for() loop, + * but a switch is faster (and more interesting) + */ + + i = blockn; + if (i < n) { + /* Jump into the case at the place that will allow + * us to finish off the appropriate number of items. + */ + switch (n - i) { + case 7: + e[i] = x[i] - y[i]; + sum0 += e[i] * e[i]; + ++i; + case 6: + e[i] = x[i] - y[i]; + sum0 += e[i] * e[i]; + ++i; + case 5: + e[i] = x[i] - y[i]; + sum0 += e[i] * e[i]; + ++i; + case 4: + e[i] = x[i] - y[i]; + sum0 += e[i] * e[i]; + ++i; + case 3: + e[i] = x[i] - y[i]; + sum0 += e[i] * e[i]; + ++i; + case 2: + e[i] = x[i] - y[i]; + sum0 += e[i] * e[i]; + ++i; + case 1: + e[i] = x[i] - y[i]; + sum0 += e[i] * e[i]; + ++i; + } + } + + return sum0 + sum1 + sum2 + sum3; +} + +/* Compute e=W*(x-y) for two n-vectors x and y and return the squared L2 norm of e. + * This norm equals the squared C norm of x-y with C=W^T*W, W being block diagonal + * matrix with nvis mnp*mnp blocks: e^T*e=(x-y)^T*W^T*W*(x-y)=||x-y||_C. + * Note that n=nvis*mnp; e can coincide with either x or y. + * + * Similarly to nrmL2xmy() above, uses loop unrolling and blocking + */ +static double nrmCxmy(double *const e, const double *const x, const double *const y, const double *const W, + const int mnp, const int nvis) { + const int n = nvis * mnp; + const int blocksize = 8, bpwr = 3; /* 8=2^3 */ + register int i, ii, k; + int j1, j2, j3, j4, j5, j6, j7; + int blockn, mnpsq; + register double norm, sum; + register const double *Wptr, *auxptr; + register double *eptr; + + /* n may not be divisible by blocksize, + * go as near as we can first, then tidy up. + */ + blockn = (n >> bpwr) << bpwr; /* (n / blocksize) * blocksize; */ + + /* unroll the loop in blocks of `blocksize'; looping downwards gains some more speed */ + for (i = blockn - 1; i > 0; i -= blocksize) { + e[i] = x[i] - y[i]; + j1 = i - 1; + e[j1] = x[j1] - y[j1]; + j2 = i - 2; + e[j2] = x[j2] - y[j2]; + j3 = i - 3; + e[j3] = x[j3] - y[j3]; + j4 = i - 4; + e[j4] = x[j4] - y[j4]; + j5 = i - 5; + e[j5] = x[j5] - y[j5]; + j6 = i - 6; + e[j6] = x[j6] - y[j6]; + j7 = i - 7; + e[j7] = x[j7] - y[j7]; + } + + /* + * There may be some left to do. + * This could be done as a simple for() loop, + * but a switch is faster (and more interesting) + */ + + i = blockn; + if (i < n) { + /* Jump into the case at the place that will allow + * us to finish off the appropriate number of items. + */ + switch (n - i) { + case 7: + e[i] = x[i] - y[i]; + ++i; + case 6: + e[i] = x[i] - y[i]; + ++i; + case 5: + e[i] = x[i] - y[i]; + ++i; + case 4: + e[i] = x[i] - y[i]; + ++i; + case 3: + e[i] = x[i] - y[i]; + ++i; + case 2: + e[i] = x[i] - y[i]; + ++i; + case 1: + e[i] = x[i] - y[i]; + ++i; + } + } + + /* compute w_x_ij e_ij in e and its L2 norm. + * Since w_x_ij is upper triangular, the products can be safely saved + * directly in e_ij, without the need for intermediate storage + */ + mnpsq = mnp * mnp; + /* Wptr, eptr point to w_x_ij, e_ij below */ + for (i = 0, Wptr = W, eptr = e, norm = 0.0; i < nvis; ++i, Wptr += mnpsq, eptr += mnp) { + for (ii = 0, auxptr = Wptr; ii < mnp; ++ii, auxptr += mnp) { /* auxptr=Wptr+ii*mnp */ + for (k = ii, sum = 0.0; k < mnp; ++k) // k>=ii since w_x_ij is upper triangular + sum += auxptr[k] * eptr[k]; // Wptr[ii*mnp+k]*eptr[k]; + eptr[ii] = sum; + norm += sum * sum; + } + } + + return norm; +} + +/* search for & print image projection components that are infinite; useful for identifying errors */ +static void sba_print_inf(double *hx, int nimgs, int mnp, struct sba_crsm *idxij, int *rcidxs, int *rcsubs) { + register int i, j, k; + int nnz; + double *phxij; + + for (j = 0; j < nimgs; ++j) { + nnz = sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero hx_ij, i=0...n-1 */ + for (i = 0; i < nnz; ++i) { + phxij = hx + idxij->val[rcidxs[i]] * mnp; + for (k = 0; k < mnp; ++k) + if (!SBA_FINITE(phxij[k])) + printf("SBA: component %d of the estimated projection of point %d on camera %d is invalid!\n", k, + rcsubs[i], j); + } + } +} + +/* Given a parameter vector p made up of the 3D coordinates of n points and the parameters of m cameras, compute in + * jac the jacobian of the predicted measurements, i.e. the jacobian of the projections of 3D points in the m images. + * The jacobian is approximated with the aid of finite differences and is returned in the order + * (A_11, B_11, ..., A_1m, B_1m, ..., A_n1, B_n1, ..., A_nm, B_nm), + * where A_ij=dx_ij/da_j and B_ij=dx_ij/db_i (see HZ). + * Notice that depending on idxij, some of the A_ij, B_ij might be missing + * + * Problem-specific information is assumed to be stored in a structure pointed to by "dat". + * + * NOTE: The jacobian (for n=4, m=3) in matrix form has the following structure: + * A_11 0 0 B_11 0 0 0 + * 0 A_12 0 B_12 0 0 0 + * 0 0 A_13 B_13 0 0 0 + * A_21 0 0 0 B_21 0 0 + * 0 A_22 0 0 B_22 0 0 + * 0 0 A_23 0 B_23 0 0 + * A_31 0 0 0 0 B_31 0 + * 0 A_32 0 0 0 B_32 0 + * 0 0 A_33 0 0 B_33 0 + * A_41 0 0 0 0 0 B_41 + * 0 A_42 0 0 0 0 B_42 + * 0 0 A_43 0 0 0 B_43 + * To reduce the total number of objective function evaluations, this structure can be + * exploited as follows: A certain d is added to the j-th parameters of all cameras and + * the objective function is evaluated at the resulting point. This evaluation suffices + * to compute the corresponding columns of *all* A_ij through finite differences. A similar + * strategy allows the computation of the B_ij. Overall, only cnp+pnp+1 objective function + * evaluations are needed to compute the jacobian, much fewer compared to the m*cnp+n*pnp+1 + * that would be required by the naive strategy of computing one column of the jacobian + * per function evaluation. See Nocedal-Wright, ch. 7, pp. 169. Although this approach is + * much faster compared to the naive strategy, it is not preferable to analytic jacobians, + * since the latter are considerably faster to compute and result in fewer LM iterations. + */ +static void +sba_fdjac_x(double *p, /* I: current parameter estimate, (m*cnp+n*pnp)x1 */ + struct sba_crsm *idxij, /* I: sparse matrix containing the location of x_ij in hx */ + int *rcidxs, /* work array for the indexes of nonzero elements of a single sparse matrix row/column */ + int *rcsubs, /* work array for the subscripts of nonzero elements in a single sparse matrix row/column */ + double *jac, /* O: array for storing the approximated jacobian */ + void *dat) /* I: points to a "fdj_data_x_" structure */ +{ + register int i, j, ii, jj; + double *pa, *pb, *pqr, *ppt; + register double *pAB, *phx, *phxx; + int n, m, nm, nnz, Asz, Bsz, ABsz, idx; + + double *tmpd; + register double d; + + struct fdj_data_x_ *fdjd; + void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata); + double *hx, *hxx; + int cnp, pnp, mnp; + void *adata; + + /* retrieve problem-specific information passed in *dat */ + fdjd = (struct fdj_data_x_ *)dat; + func = fdjd->func; + cnp = fdjd->cnp; + pnp = fdjd->pnp; + mnp = fdjd->mnp; + hx = fdjd->hx; + hxx = fdjd->hxx; + adata = fdjd->adata; + + n = idxij->nr; + m = idxij->nc; + pa = p; + pb = p + m * cnp; + Asz = mnp * cnp; + Bsz = mnp * pnp; + ABsz = Asz + Bsz; + + nm = (n >= m) ? n : m; // max(n, m); + tmpd = (double *)emalloc(nm * sizeof(double)); + + (*func)(p, idxij, fdjd->func_rcidxs, fdjd->func_rcsubs, hx, + adata); // evaluate supplied function on current solution + + if (cnp) { // is motion varying? + /* compute A_ij */ + for (jj = 0; jj < cnp; ++jj) { + for (j = 0; j < m; ++j) { + pqr = pa + j * cnp; // j-th camera parameters + /* determine d=max(SBA_DELTA_SCALE*|pqr[jj]|, SBA_MIN_DELTA), see HZ */ + d = (double)(SBA_DELTA_SCALE)*pqr[jj]; // force evaluation + d = FABS(d); + if (d < SBA_MIN_DELTA) + d = SBA_MIN_DELTA; + + tmpd[j] = d; + pqr[jj] += d; + } + + (*func)(p, idxij, fdjd->func_rcidxs, fdjd->func_rcsubs, hxx, adata); + + for (j = 0; j < m; ++j) { + pqr = pa + j * cnp; // j-th camera parameters + pqr[jj] -= tmpd[j]; /* restore */ + d = 1.0 / tmpd[j]; /* invert so that divisions can be carried out faster as multiplications */ + + nnz = sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero A_ij, i=0...n-1 */ + for (i = 0; i < nnz; ++i) { + idx = idxij->val[rcidxs[i]]; + phx = hx + idx * mnp; // set phx to point to hx_ij + phxx = hxx + idx * mnp; // set phxx to point to hxx_ij + pAB = jac + idx * ABsz; // set pAB to point to A_ij + + for (ii = 0; ii < mnp; ++ii) + pAB[ii * cnp + jj] = (phxx[ii] - phx[ii]) * d; + } + } + } + } + + if (pnp) { // is structure varying? + /* compute B_ij */ + for (jj = 0; jj < pnp; ++jj) { + for (i = 0; i < n; ++i) { + ppt = pb + i * pnp; // i-th point parameters + /* determine d=max(SBA_DELTA_SCALE*|ppt[jj]|, SBA_MIN_DELTA), see HZ */ + d = (double)(SBA_DELTA_SCALE)*ppt[jj]; // force evaluation + d = FABS(d); + if (d < SBA_MIN_DELTA) + d = SBA_MIN_DELTA; + + tmpd[i] = d; + ppt[jj] += d; + } + + (*func)(p, idxij, fdjd->func_rcidxs, fdjd->func_rcsubs, hxx, adata); + + for (i = 0; i < n; ++i) { + ppt = pb + i * pnp; // i-th point parameters + ppt[jj] -= tmpd[i]; /* restore */ + d = 1.0 / tmpd[i]; /* invert so that divisions can be carried out faster as multiplications */ + + nnz = sba_crsm_row_elmidxs(idxij, i, rcidxs, rcsubs); /* find nonzero B_ij, j=0...m-1 */ + for (j = 0; j < nnz; ++j) { + idx = idxij->val[rcidxs[j]]; + phx = hx + idx * mnp; // set phx to point to hx_ij + phxx = hxx + idx * mnp; // set phxx to point to hxx_ij + pAB = jac + idx * ABsz + Asz; // set pAB to point to B_ij + + for (ii = 0; ii < mnp; ++ii) + pAB[ii * pnp + jj] = (phxx[ii] - phx[ii]) * d; + } + } + } + } + + free(tmpd); +} + +typedef int (*PLS)(double *A, double *B, double *x, int m, int iscolmaj); + +/* Bundle adjustment on camera and structure parameters + * using the sparse Levenberg-Marquardt as described in HZ p. 568 + * + * Returns the number of iterations (>=0) if successfull, SBA_ERROR if failed + */ + +int sba_motstr_levmar_x( + const int n, /* number of points */ + const int ncon, /* number of points (starting from the 1st) whose parameters should not be modified. + * All B_ij (see below) with i maxCvis) + maxCvis = k; + + /* find the maximum number (for all points) of visible image projections in any single camera */ + for (j = maxPvis = 0; j < m; ++j) { + for (i = ii = 0; i < n; ++i) + if (vmask[i * m + j]) + ++ii; + if (ii > maxPvis) + maxPvis = ii; + } + maxCPvis = (maxCvis >= maxPvis) ? maxCvis : maxPvis; + +#if 0 + /* determine the density of blocks in matrix S */ + for(j=mcon, ii=0; j= ABsz) ? Wsz : ABsz) + Wsz) * sizeof(double)); + U = (double *)emalloc(m * Usz * sizeof(double)); + V = (double *)emalloc(n * Vsz * sizeof(double)); + e = (double *)emalloc(nobs * sizeof(double)); + eab = (double *)emalloc(nvars * sizeof(double)); + E = (double *)emalloc(m * cnp * sizeof(double)); + Yj = (double *)emalloc(maxPvis * Ysz * sizeof(double)); + YWt = (double *)emalloc(YWtsz * sizeof(double)); + S = (double *)emalloc(m * m * Sblsz * sizeof(double)); + dp = (double *)emalloc(nvars * sizeof(double)); + Wtda = (double *)emalloc(pnp * sizeof(double)); + rcidxs = (int *)emalloc(maxCPvis * sizeof(int)); + rcsubs = (int *)emalloc(maxCPvis * sizeof(int)); +#ifndef SBA_DESTROY_COVS + if (covx != NULL) + wght = (double *)emalloc(nvis * covsz * sizeof(double)); +#else + if (covx != NULL) + wght = covx; +#endif /* SBA_DESTROY_COVS */ + + hx = (double *)emalloc(nobs * sizeof(double)); + diagUV = (double *)emalloc(nvars * sizeof(double)); + pdp = (double *)emalloc(nvars * sizeof(double)); + + /* to save resources, W and jac share the same memory: First, the jacobian + * is computed in some working memory that is then overwritten during the + * computation of W. To account for the case of W being larger than jac, + * extra memory is reserved "before" jac. + * Care must be taken, however, to ensure that storing a certain W_ij + * does not overwrite the A_ij, B_ij used to compute it. To achieve + * this is, note that if p1 and p2 respectively point to the first elements + * of a certain W_ij and A_ij, B_ij pair, we should have p2-p1>=Wsz. + * There are two cases: + * a) Wsz>=ABsz: Then p1=W+k*Wsz and p2=jac+k*ABsz=W+Wsz+nvis*(Wsz-ABsz)+k*ABsz + * for some k (0<=k Wsz for all 0<=k Wsz for all 0<=k ABsz) ? nvis * (Wsz - ABsz) : 0); + + /* set up auxiliary pointers */ + pa = p; + pb = p + m * cnp; + ea = eab; + eb = eab + m * cnp; + dpa = dp; + dpb = dp + m * cnp; + + diagU = diagUV; + diagV = diagUV + m * cnp; + + /* if no jacobian function is supplied, prepare to compute jacobian with finite difference */ + if (!fjac) { + fdj_data.func = func; + fdj_data.cnp = cnp; + fdj_data.pnp = pnp; + fdj_data.mnp = mnp; + fdj_data.hx = hx; + fdj_data.hxx = (double *)emalloc(nobs * sizeof(double)); + fdj_data.func_rcidxs = (int *)emalloc(2 * maxCPvis * sizeof(int)); + fdj_data.func_rcsubs = fdj_data.func_rcidxs + maxCPvis; + fdj_data.adata = adata; + + fjac = sba_fdjac_x; + jac_adata = (void *)&fdj_data; + } else { + fdj_data.hxx = NULL; + jac_adata = adata; + } + + if (itmax == 0) { /* verify jacobian */ + sba_motstr_chkjac_x(func, fjac, p, &idxij, rcidxs, rcsubs, ncon, mcon, cnp, pnp, mnp, adata, jac_adata); + retval = 0; + goto freemem_and_return; + } + + /* covariances Sigma_x_ij are accommodated by computing the Cholesky decompositions of their + * inverses and using the resulting matrices w_x_ij to weigh A_ij, B_ij, and e_ij as w_x_ij A_ij, + * w_x_ij*B_ij and w_x_ij*e_ij. In this way, auxiliary variables as U_j=\sum_i A_ij^T A_ij + * actually become \sum_i (w_x_ij A_ij)^T w_x_ij A_ij= \sum_i A_ij^T w_x_ij^T w_x_ij A_ij = + * A_ij^T Sigma_x_ij^-1 A_ij + * + * ea_j, V_i, eb_i, W_ij are weighted in a similar manner + */ + if (covx != NULL) { + for (i = 0; i < n; ++i) { + nnz = sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero x_ij, j=0...m-1 */ + for (j = 0; j < nnz; ++j) { + /* set ptr1, ptr2 to point to cov_x_ij, w_x_ij resp. */ + ptr1 = covx + (k = idxij.val[rcidxs[j]] * covsz); + ptr2 = wght + k; + if (!sba_mat_cholinv(ptr1, ptr2, mnp)) { /* compute w_x_ij s.t. w_x_ij^T w_x_ij = cov_x_ij^-1 */ + fprintf(stderr, "SBA: invalid covariance matrix for x_ij (i=%d, j=%d) in sba_motstr_levmar_x()\n", + i, rcsubs[j]); + retval = SBA_ERROR; + goto freemem_and_return; + } + } + } + sba_mat_cholinv(NULL, NULL, 0); /* cleanup */ + } + + /* compute the error vectors e_ij in hx */ + (*func)(p, &idxij, rcidxs, rcsubs, hx, adata); + nfev = 1; + /* ### compute e=x - f(p) [e=w*(x - f(p)] and its L2 norm */ + if (covx == NULL) + p_eL2 = nrmL2xmy(e, x, hx, nobs); /* e=x-hx, p_eL2=||e|| */ + else + p_eL2 = nrmCxmy(e, x, hx, wght, mnp, nvis); /* e=wght*(x-hx), p_eL2=||e||=||x-hx||_Sigma^-1 */ + if (verbose) + printf("initial motstr-SBA error %g [%g]\n", p_eL2, p_eL2 / nvis); + init_p_eL2 = p_eL2; + if (!SBA_FINITE(p_eL2)) + stop = 7; + + for (itno = 0; itno < itmax && !stop; ++itno) { + /* Note that p, e and ||e||_2 have been updated at the previous iteration */ + + /* compute derivative submatrices A_ij, B_ij */ + (*fjac)(p, &idxij, rcidxs, rcsubs, jac, jac_adata); + ++njev; + + if (covx != NULL) { + /* compute w_x_ij A_ij and w_x_ij B_ij. + * Since w_x_ij is upper triangular, the products can be safely saved + * directly in A_ij, B_ij, without the need for intermediate storage + */ + for (i = 0; i < nvis; ++i) { + /* set ptr1, ptr2, ptr3 to point to w_x_ij, A_ij, B_ij, resp. */ + ptr1 = wght + i * covsz; + ptr2 = jac + i * ABsz; + ptr3 = ptr2 + Asz; // ptr3=jac + i*ABsz + Asz; + + /* w_x_ij is mnp x mnp, A_ij is mnp x cnp, B_ij is mnp x pnp + * NOTE: Jamming the outter (i.e., ii) loops did not run faster! + */ + /* A_ij */ + for (ii = 0; ii < mnp; ++ii) + for (jj = 0; jj < cnp; ++jj) { + for (k = ii, sum = 0.0; k < mnp; ++k) // k>=ii since w_x_ij is upper triangular + sum += ptr1[ii * mnp + k] * ptr2[k * cnp + jj]; + ptr2[ii * cnp + jj] = sum; + } + + /* B_ij */ + for (ii = 0; ii < mnp; ++ii) + for (jj = 0; jj < pnp; ++jj) { + for (k = ii, sum = 0.0; k < mnp; ++k) // k>=ii since w_x_ij is upper triangular + sum += ptr1[ii * mnp + k] * ptr3[k * pnp + jj]; + ptr3[ii * pnp + jj] = sum; + } + } + } + + /* compute U_j = \sum_i A_ij^T A_ij */ // \Sigma here! + /* U_j is symmetric, therefore its computation can be sped up by + * computing only the upper part and then reusing it for the lower one. + * Recall that A_ij is mnp x cnp + */ + /* Also compute ea_j = \sum_i A_ij^T e_ij */ // \Sigma here! + /* Recall that e_ij is mnp x 1 + */ + _dblzero(U, m * Usz); /* clear all U_j */ + _dblzero(ea, m * easz); /* clear all ea_j */ + for (j = mcon; j < m; ++j) { + ptr1 = U + j * Usz; // set ptr1 to point to U_j + ptr2 = ea + j * easz; // set ptr2 to point to ea_j + + nnz = sba_crsm_col_elmidxs(&idxij, j, rcidxs, rcsubs); /* find nonzero A_ij, i=0...n-1 */ + for (i = 0; i < nnz; ++i) { + /* set ptr3 to point to A_ij, actual row number in rcsubs[i] */ + ptr3 = jac + idxij.val[rcidxs[i]] * ABsz; + + /* compute the UPPER TRIANGULAR PART of A_ij^T A_ij and add it to U_j */ + for (ii = 0; ii < cnp; ++ii) { + for (jj = ii; jj < cnp; ++jj) { + for (k = 0, sum = 0.0; k < mnp; ++k) + sum += ptr3[k * cnp + ii] * ptr3[k * cnp + jj]; + ptr1[ii * cnp + jj] += sum; + } + + /* copy the LOWER TRIANGULAR PART of U_j from the upper one */ + for (jj = 0; jj < ii; ++jj) + ptr1[ii * cnp + jj] = ptr1[jj * cnp + ii]; + } + + ptr4 = e + idxij.val[rcidxs[i]] * esz; /* set ptr4 to point to e_ij */ + /* compute A_ij^T e_ij and add it to ea_j */ + for (ii = 0; ii < cnp; ++ii) { + for (jj = 0, sum = 0.0; jj < mnp; ++jj) + sum += ptr3[jj * cnp + ii] * ptr4[jj]; + ptr2[ii] += sum; + } + } + } + + /* compute V_i = \sum_j B_ij^T B_ij */ // \Sigma here! + /* V_i is symmetric, therefore its computation can be sped up by + * computing only the upper part and then reusing it for the lower one. + * Recall that B_ij is mnp x pnp + */ + /* Also compute eb_i = \sum_j B_ij^T e_ij */ // \Sigma here! + /* Recall that e_ij is mnp x 1 + */ + _dblzero(V, n * Vsz); /* clear all V_i */ + _dblzero(eb, n * ebsz); /* clear all eb_i */ + for (i = ncon; i < n; ++i) { + ptr1 = V + i * Vsz; // set ptr1 to point to V_i + ptr2 = eb + i * ebsz; // set ptr2 to point to eb_i + + nnz = sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero B_ij, j=0...m-1 */ + for (j = 0; j < nnz; ++j) { + /* set ptr3 to point to B_ij, actual column number in rcsubs[j] */ + ptr3 = jac + idxij.val[rcidxs[j]] * ABsz + Asz; + + /* compute the UPPER TRIANGULAR PART of B_ij^T B_ij and add it to V_i */ + for (ii = 0; ii < pnp; ++ii) { + for (jj = ii; jj < pnp; ++jj) { + for (k = 0, sum = 0.0; k < mnp; ++k) + sum += ptr3[k * pnp + ii] * ptr3[k * pnp + jj]; + ptr1[ii * pnp + jj] += sum; + } + } + + ptr4 = e + idxij.val[rcidxs[j]] * esz; /* set ptr4 to point to e_ij */ + /* compute B_ij^T e_ij and add it to eb_i */ + for (ii = 0; ii < pnp; ++ii) { + for (jj = 0, sum = 0.0; jj < mnp; ++jj) + sum += ptr3[jj * pnp + ii] * ptr4[jj]; + ptr2[ii] += sum; + } + } + } + + /* compute W_ij = A_ij^T B_ij */ // \Sigma here! + /* Recall that A_ij is mnp x cnp and B_ij is mnp x pnp + */ + for (i = ncon; i < n; ++i) { + nnz = sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero W_ij, j=0...m-1 */ + for (j = 0; j < nnz; ++j) { + /* set ptr1 to point to W_ij, actual column number in rcsubs[j] */ + ptr1 = W + idxij.val[rcidxs[j]] * Wsz; + + if (rcsubs[j] < mcon) { /* A_ij is zero */ + //_dblzero(ptr1, Wsz); /* clear W_ij */ + continue; + } + + /* set ptr2 & ptr3 to point to A_ij & B_ij resp. */ + ptr2 = jac + idxij.val[rcidxs[j]] * ABsz; + ptr3 = ptr2 + Asz; + /* compute A_ij^T B_ij and store it in W_ij + * Recall that storage for A_ij, B_ij does not overlap with that for W_ij, + * see the comments related to the initialization of jac above + */ + /* assert(ptr2-ptr1>=Wsz); */ + for (ii = 0; ii < cnp; ++ii) + for (jj = 0; jj < pnp; ++jj) { + for (k = 0, sum = 0.0; k < mnp; ++k) + sum += ptr2[k * cnp + ii] * ptr3[k * pnp + jj]; + ptr1[ii * pnp + jj] = sum; + } + } + } + + /* Compute ||J^T e||_inf and ||p||^2 */ + for (i = 0, p_L2 = eab_inf = 0.0; i < nvars; ++i) { + if (eab_inf < (tmp = FABS(eab[i]))) + eab_inf = tmp; + p_L2 += p[i] * p[i]; + } + // p_L2=sqrt(p_L2); + + /* save diagonal entries so that augmentation can be later canceled. + * Diagonal entries are in U_j and V_i + */ + for (j = mcon; j < m; ++j) { + ptr1 = U + j * Usz; // set ptr1 to point to U_j + ptr2 = diagU + j * cnp; // set ptr2 to point to diagU_j + for (i = 0; i < cnp; ++i) + ptr2[i] = ptr1[i * cnp + i]; + } + for (i = ncon; i < n; ++i) { + ptr1 = V + i * Vsz; // set ptr1 to point to V_i + ptr2 = diagV + i * pnp; // set ptr2 to point to diagV_i + for (j = 0; j < pnp; ++j) + ptr2[j] = ptr1[j * pnp + j]; + } + + /* + if(!(itno%100)){ + printf("Current estimate: "); + for(i=0; i tmp) + tmp = diagUV[i]; + for (i = m * cnp + ncon * pnp; i < nvars; ++i) /* tmp is not re-initialized! */ + if (diagUV[i] > tmp) + tmp = diagUV[i]; + mu = tau * tmp; + } + + /* determine increment using adaptive damping */ + while (1) { + /* augment U, V */ + for (j = mcon; j < m; ++j) { + ptr1 = U + j * Usz; // set ptr1 to point to U_j + for (i = 0; i < cnp; ++i) + ptr1[i * cnp + i] += mu; + } + for (i = ncon; i < n; ++i) { + ptr1 = V + i * Vsz; // set ptr1 to point to V_i + for (j = 0; j < pnp; ++j) + ptr1[j * pnp + j] += mu; + + /* compute V*_i^-1. + * Recall that only the upper triangle of the symmetric pnp x pnp matrix V*_i + * is stored in ptr1; its (also symmetric) inverse is saved in the lower triangle of ptr1 + */ + /* inverting V*_i with LDLT seems to result in faster overall execution compared to when using LU or + * Cholesky */ + // j=sba_symat_invert_LU(ptr1, pnp); matinv=sba_symat_invert_LU; + // j=sba_symat_invert_Chol(ptr1, pnp); matinv=sba_symat_invert_Chol; + j = sba_symat_invert_BK(ptr1, pnp); + matinv = sba_symat_invert_BK; + if (!j) { + fprintf(stderr, "SBA: singular matrix V*_i (i=%d) in sba_motstr_levmar_x(), increasing damping\n", + i); + goto moredamping; // increasing damping will eventually make V*_i diagonally dominant, thus + // nonsingular + // retval=SBA_ERROR; + // goto freemem_and_return; + } + } + + _dblzero(E, m * easz); /* clear all e_j */ + /* compute the mmcon x mmcon block matrix S and e_j */ + + /* Note that S is symmetric, therefore its computation can be + * sped up by computing only the upper part and then reusing + * it for the lower one. + */ + for (j = mcon; j < m; ++j) { + int mmconxUsz = mmcon * Usz; + + nnz = sba_crsm_col_elmidxs(&idxij, j, rcidxs, rcsubs); /* find nonzero Y_ij, i=0...n-1 */ + + /* get rid of all Y_ij with i= ncon + */ + if (ncon) { + for (i = ii = 0; i < nnz; ++i) { + if (rcsubs[i] >= ncon) { + rcidxs[ii] = rcidxs[i]; + rcsubs[ii++] = rcsubs[i]; + } + } + nnz = ii; + } + + /* compute all Y_ij = W_ij (V*_i)^-1 for a *fixed* j. + * To save memory, the block matrix consisting of the Y_ij + * is not stored. Instead, only a block column of this matrix + * is computed & used at each time: For each j, all nonzero + * Y_ij are computed in Yj and then used in the calculations + * involving S_jk and e_j. + * Recall that W_ij is cnp x pnp and (V*_i) is pnp x pnp + */ + for (i = 0; i < nnz; ++i) { + /* set ptr3 to point to (V*_i)^-1, actual row number in rcsubs[i] */ + ptr3 = V + rcsubs[i] * Vsz; + + /* set ptr1 to point to Y_ij, actual row number in rcsubs[i] */ + ptr1 = Yj + i * Ysz; + /* set ptr2 to point to W_ij resp. */ + ptr2 = W + idxij.val[rcidxs[i]] * Wsz; + /* compute W_ij (V*_i)^-1 and store it in Y_ij. + * Recall that only the lower triangle of (V*_i)^-1 is stored + */ + for (ii = 0; ii < cnp; ++ii) { + ptr4 = ptr2 + ii * pnp; + for (jj = 0; jj < pnp; ++jj) { + for (k = 0, sum = 0.0; k <= jj; ++k) + sum += ptr4[k] * ptr3[jj * pnp + k]; // ptr2[ii*pnp+k]*ptr3[jj*pnp+k]; + for (; k < pnp; ++k) + sum += ptr4[k] * ptr3[k * pnp + jj]; // ptr2[ii*pnp+k]*ptr3[k*pnp+jj]; + ptr1[ii * pnp + jj] = sum; + } + } + } + + /* compute the UPPER TRIANGULAR PART of S */ + for (k = j; k < m; ++k) { // j>=mcon + /* compute \sum_i Y_ij W_ik^T in YWt. Note that for an off-diagonal block defined by j, k + * YWt (and thus S_jk) is nonzero only if there exists a point that is visible in both the + * j-th and k-th images + */ + + /* Recall that Y_ij is cnp x pnp and W_ik is cnp x pnp */ + _dblzero(YWt, YWtsz); /* clear YWt */ + + for (i = 0; i < nnz; ++i) { + register double *pYWt; + + /* find the min and max column indices of the elements in row i (actually rcsubs[i]) + * and make sure that k falls within them. This test handles W_ik's which are + * certain to be zero without bothering to call sba_crsm_elmidx() + */ + ii = idxij.colidx[idxij.rowptr[rcsubs[i]]]; + jj = idxij.colidx[idxij.rowptr[rcsubs[i] + 1] - 1]; + if (k < ii || k > jj) + continue; /* W_ik == 0 */ + + /* set ptr2 to point to W_ik */ + l = sba_crsm_elmidxp(&idxij, rcsubs[i], k, j, rcidxs[i]); + // l=sba_crsm_elmidx(&idxij, rcsubs[i], k); + if (l == -1) + continue; /* W_ik == 0 */ + + ptr2 = W + idxij.val[l] * Wsz; + /* set ptr1 to point to Y_ij, actual row number in rcsubs[i] */ + ptr1 = Yj + i * Ysz; + for (ii = 0; ii < cnp; ++ii) { + ptr3 = ptr1 + ii * pnp; + pYWt = YWt + ii * cnp; + + for (jj = 0; jj < cnp; ++jj) { + ptr4 = ptr2 + jj * pnp; + for (l = 0, sum = 0.0; l < pnp; ++l) + sum += ptr3[l] * ptr4[l]; // ptr1[ii*pnp+l]*ptr2[jj*pnp+l]; + pYWt[jj] += sum; // YWt[ii*cnp+jj]+=sum; + } + } + } + +/* since the linear system involving S is solved with lapack, + * it is preferable to store S in column major (i.e. fortran) + * order, so as to avoid unecessary transposing/copying. +*/ +#if MAT_STORAGE == COLUMN_MAJOR + ptr2 = S + (k - mcon) * mmconxUsz + + (j - mcon) * cnp; // set ptr2 to point to the beginning of block j,k in S +#else + ptr2 = S + (j - mcon) * mmconxUsz + + (k - mcon) * cnp; // set ptr2 to point to the beginning of block j,k in S +#endif + + if (j != k) { /* Kronecker */ + for (ii = 0; ii < cnp; ++ii, ptr2 += Sdim) + for (jj = 0; jj < cnp; ++jj) + ptr2[jj] = +#if MAT_STORAGE == COLUMN_MAJOR + -YWt[jj * cnp + ii]; +#else + -YWt[ii * cnp + jj]; +#endif + } else { + ptr1 = U + j * Usz; // set ptr1 to point to U_j + + for (ii = 0; ii < cnp; ++ii, ptr2 += Sdim) + for (jj = 0; jj < cnp; ++jj) + ptr2[jj] = +#if MAT_STORAGE == COLUMN_MAJOR + ptr1[jj * cnp + ii] - YWt[jj * cnp + ii]; +#else + ptr1[ii * cnp + jj] - YWt[ii * cnp + jj]; +#endif + } + } + + /* copy the LOWER TRIANGULAR PART of S from the upper one */ + for (k = mcon; k < j; ++k) { +#if MAT_STORAGE == COLUMN_MAJOR + ptr1 = S + (k - mcon) * mmconxUsz + + (j - mcon) * cnp; // set ptr1 to point to the beginning of block j,k in S + ptr2 = S + (j - mcon) * mmconxUsz + + (k - mcon) * cnp; // set ptr2 to point to the beginning of block k,j in S +#else + ptr1 = S + (j - mcon) * mmconxUsz + + (k - mcon) * cnp; // set ptr1 to point to the beginning of block j,k in S + ptr2 = S + (k - mcon) * mmconxUsz + + (j - mcon) * cnp; // set ptr2 to point to the beginning of block k,j in S +#endif + for (ii = 0; ii < cnp; ++ii, ptr1 += Sdim) + for (jj = 0, ptr3 = ptr2 + ii; jj < cnp; ++jj, ptr3 += Sdim) + ptr1[jj] = *ptr3; + } + + /* compute e_j=ea_j - \sum_i Y_ij eb_i */ + /* Recall that Y_ij is cnp x pnp and eb_i is pnp x 1 */ + ptr1 = E + j * easz; // set ptr1 to point to e_j + + for (i = 0; i < nnz; ++i) { + /* set ptr2 to point to Y_ij, actual row number in rcsubs[i] */ + ptr2 = Yj + i * Ysz; + + /* set ptr3 to point to eb_i */ + ptr3 = eb + rcsubs[i] * ebsz; + for (ii = 0; ii < cnp; ++ii) { + ptr4 = ptr2 + ii * pnp; + for (jj = 0, sum = 0.0; jj < pnp; ++jj) + sum += ptr4[jj] * ptr3[jj]; // ptr2[ii*pnp+jj]*ptr3[jj]; + ptr1[ii] += sum; + } + } + + ptr2 = ea + j * easz; // set ptr2 to point to ea_j + for (i = 0; i < easz; ++i) + ptr1[i] = ptr2[i] - ptr1[i]; + } + +#if 0 + if(verbose>1){ /* count the nonzeros in S */ + for(i=ii=0; i= (p_L2 + eps2) / SBA_EPSILON_SQ) { /* almost singular */ + // if(dp_L2>=(p_L2+eps2)/SBA_EPSILON){ /* almost singular */ + fprintf(stderr, "SBA: the matrix of the augmented normal equations is almost singular in " + "sba_motstr_levmar_x(),\n" + " minimization should be restarted from the current solution with an increased " + "damping term\n"); + retval = SBA_ERROR; + goto freemem_and_return; + } + + (*func)(pdp, &idxij, rcidxs, rcsubs, hx, adata); + ++nfev; /* evaluate function at p + dp */ + if (verbose > 1) + printf("mean reprojection error %g\n", sba_mean_repr_error(n, mnp, x, hx, &idxij, rcidxs, rcsubs)); + /* ### compute ||e(pdp)||_2 */ + if (covx == NULL) + pdp_eL2 = nrmL2xmy(hx, x, hx, nobs); /* hx=x-hx, pdp_eL2=||hx|| */ + else + pdp_eL2 = nrmCxmy(hx, x, hx, wght, mnp, nvis); /* hx=wght*(x-hx), pdp_eL2=||hx|| */ + if (!SBA_FINITE(pdp_eL2)) { + if (verbose) /* identify the offending point projection */ + sba_print_inf(hx, m, mnp, &idxij, rcidxs, rcsubs); + + stop = 7; + break; + } + + for (i = 0, dL = 0.0; i < nvars; ++i) + dL += dp[i] * (mu * dp[i] + eab[i]); + + dF = p_eL2 - pdp_eL2; + + if (verbose > 1) + printf("\ndamping term %8g, gain ratio %8g, errors %8g / %8g = %g\n", mu, + dL != 0.0 ? dF / dL : dF / DBL_EPSILON, p_eL2 / nvis, pdp_eL2 / nvis, p_eL2 / pdp_eL2); + + if (dL > 0.0 && dF > 0.0) { /* reduction in error, increment is accepted */ + tmp = (2.0 * dF / dL - 1.0); + tmp = 1.0 - tmp * tmp * tmp; + mu = mu * ((tmp >= SBA_ONE_THIRD) ? tmp : SBA_ONE_THIRD); + nu = 2; + + /* the test below is equivalent to the relative reduction of the RMS reprojection error: + * sqrt(p_eL2)-sqrt(pdp_eL2)= itmax) + stop = 3; + + /* restore U, V diagonal entries */ + for (j = mcon; j < m; ++j) { + ptr1 = U + j * Usz; // set ptr1 to point to U_j + ptr2 = diagU + j * cnp; // set ptr2 to point to diagU_j + for (i = 0; i < cnp; ++i) + ptr1[i * cnp + i] = ptr2[i]; + } + for (i = ncon; i < n; ++i) { + ptr1 = V + i * Vsz; // set ptr1 to point to V_i + ptr2 = diagV + i * pnp; // set ptr2 to point to diagV_i + for (j = 0; j < pnp; ++j) + ptr1[j * pnp + j] = ptr2[j]; + } + + if (info) { + info[0] = init_p_eL2; + info[1] = p_eL2; + info[2] = eab_inf; + info[3] = dp_L2; + for (j = mcon, tmp = DBL_MIN; j < m; ++j) { + ptr1 = U + j * Usz; // set ptr1 to point to U_j + for (i = 0; i < cnp; ++i) + if (tmp < ptr1[i * cnp + i]) + tmp = ptr1[i * cnp + i]; + } + for (i = ncon; i < n; ++i) { + ptr1 = V + i * Vsz; // set ptr1 to point to V_i + for (j = 0; j < pnp; ++j) + if (tmp < ptr1[j * pnp + j]) + tmp = ptr1[j * pnp + j]; + } + info[4] = mu / tmp; + info[5] = itno; + info[6] = stop; + info[7] = nfev; + info[8] = njev; + info[9] = nlss; + } + + // sba_print_sol(n, m, p, cnp, pnp, x, mnp, &idxij, rcidxs, rcsubs); + retval = (stop != 7) ? itno : SBA_ERROR; + +freemem_and_return: /* NOTE: this point is also reached via a goto! */ + + /* free whatever was allocated */ + free(W); + free(U); + free(V); + free(e); + free(eab); + free(E); + free(Yj); + free(YWt); + free(S); + free(dp); + free(Wtda); + free(rcidxs); + free(rcsubs); +#ifndef SBA_DESTROY_COVS + if (wght) + free(wght); +#else +/* nothing to do */ +#endif /* SBA_DESTROY_COVS */ + + free(hx); + free(diagUV); + free(pdp); + if (fdj_data.hxx) { // cleanup + free(fdj_data.hxx); + free(fdj_data.func_rcidxs); + } + + sba_crsm_free(&idxij); + + /* free the memory allocated by the matrix inversion & linear solver routines */ + if (matinv) + (*matinv)(NULL, 0); + if (linsolver) + (*linsolver)(NULL, NULL, NULL, 0, 0); + + return retval; +} + +/* Bundle adjustment on camera parameters only + * using the sparse Levenberg-Marquardt as described in HZ p. 568 + * + * Returns the number of iterations (>=0) if successfull, SBA_ERROR if failed + */ + +int sba_mot_levmar_x( + const int n, /* number of points */ + const int m, /* number of images */ + const int mcon, /* number of images (starting from the 1st) whose parameters should not be modified. + * All A_ij (see below) with j maxCPvis) + maxCPvis = k; + + /* points, note that maxCPvis is not reinitialized! */ + for (j = 0; j < m; ++j) { + for (i = ii = 0; i < n; ++i) + if (vmask[i * m + j]) + ++ii; + if (ii > maxCPvis) + maxCPvis = ii; + } + + /* allocate work arrays */ + jac = (double *)emalloc(nvis * Asz * sizeof(double)); + U = (double *)emalloc(m * Usz * sizeof(double)); + e = (double *)emalloc(nobs * sizeof(double)); + ea = (double *)emalloc(nvars * sizeof(double)); + dp = (double *)emalloc(nvars * sizeof(double)); + rcidxs = (int *)emalloc(maxCPvis * sizeof(int)); + rcsubs = (int *)emalloc(maxCPvis * sizeof(int)); +#ifndef SBA_DESTROY_COVS + if (covx != NULL) + wght = (double *)emalloc(nvis * covsz * sizeof(double)); +#else + if (covx != NULL) + wght = covx; +#endif /* SBA_DESTROY_COVS */ + + hx = (double *)emalloc(nobs * sizeof(double)); + diagU = (double *)emalloc(nvars * sizeof(double)); + pdp = (double *)emalloc(nvars * sizeof(double)); + + /* if no jacobian function is supplied, prepare to compute jacobian with finite difference */ + if (!fjac) { + fdj_data.func = func; + fdj_data.cnp = cnp; + fdj_data.pnp = 0; + fdj_data.mnp = mnp; + fdj_data.hx = hx; + fdj_data.hxx = (double *)emalloc(nobs * sizeof(double)); + fdj_data.func_rcidxs = (int *)emalloc(2 * maxCPvis * sizeof(int)); + fdj_data.func_rcsubs = fdj_data.func_rcidxs + maxCPvis; + fdj_data.adata = adata; + + fjac = sba_fdjac_x; + jac_adata = (void *)&fdj_data; + } else { + fdj_data.hxx = NULL; + jac_adata = adata; + } + + if (itmax == 0) { /* verify jacobian */ + sba_mot_chkjac_x(func, fjac, p, &idxij, rcidxs, rcsubs, mcon, cnp, mnp, adata, jac_adata); + retval = 0; + goto freemem_and_return; + } + + /* covariances Sigma_x_ij are accommodated by computing the Cholesky decompositions of their + * inverses and using the resulting matrices w_x_ij to weigh A_ij and e_ij as w_x_ij A_ij + * and w_x_ij*e_ij. In this way, auxiliary variables as U_j=\sum_i A_ij^T A_ij + * actually become \sum_i (w_x_ij A_ij)^T w_x_ij A_ij= \sum_i A_ij^T w_x_ij^T w_x_ij A_ij = + * A_ij^T Sigma_x_ij^-1 A_ij + * + * ea_j are weighted in a similar manner + */ + if (covx != NULL) { + for (i = 0; i < n; ++i) { + nnz = sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero x_ij, j=0...m-1 */ + for (j = 0; j < nnz; ++j) { + /* set ptr1, ptr2 to point to cov_x_ij, w_x_ij resp. */ + ptr1 = covx + (k = idxij.val[rcidxs[j]] * covsz); + ptr2 = wght + k; + if (!sba_mat_cholinv(ptr1, ptr2, mnp)) { /* compute w_x_ij s.t. w_x_ij^T w_x_ij = cov_x_ij^-1 */ + fprintf(stderr, "SBA: invalid covariance matrix for x_ij (i=%d, j=%d) in sba_motstr_levmar_x()\n", + i, rcsubs[j]); + retval = SBA_ERROR; + goto freemem_and_return; + } + } + } + sba_mat_cholinv(NULL, NULL, 0); /* cleanup */ + } + + /* compute the error vectors e_ij in hx */ + (*func)(p, &idxij, rcidxs, rcsubs, hx, adata); + nfev = 1; + /* ### compute e=x - f(p) [e=w*(x - f(p)] and its L2 norm */ + if (covx == NULL) + p_eL2 = nrmL2xmy(e, x, hx, nobs); /* e=x-hx, p_eL2=||e|| */ + else + p_eL2 = nrmCxmy(e, x, hx, wght, mnp, nvis); /* e=wght*(x-hx), p_eL2=||e||=||x-hx||_Sigma^-1 */ + if (verbose) + printf("initial mot-SBA error %g [%g]\n", p_eL2, p_eL2 / nvis); + init_p_eL2 = p_eL2; + if (!SBA_FINITE(p_eL2)) + stop = 7; + + for (itno = 0; itno < itmax && !stop; ++itno) { + /* Note that p, e and ||e||_2 have been updated at the previous iteration */ + + /* compute derivative submatrices A_ij */ + (*fjac)(p, &idxij, rcidxs, rcsubs, jac, jac_adata); + ++njev; + + if (covx != NULL) { + /* compute w_x_ij A_ij + * Since w_x_ij is upper triangular, the products can be safely saved + * directly in A_ij, without the need for intermediate storage + */ + for (i = 0; i < nvis; ++i) { + /* set ptr1, ptr2 to point to w_x_ij, A_ij, resp. */ + ptr1 = wght + i * covsz; + ptr2 = jac + i * Asz; + + /* w_x_ij is mnp x mnp, A_ij is mnp x cnp */ + for (ii = 0; ii < mnp; ++ii) + for (jj = 0; jj < cnp; ++jj) { + for (k = ii, sum = 0.0; k < mnp; ++k) // k>=ii since w_x_ij is upper triangular + sum += ptr1[ii * mnp + k] * ptr2[k * cnp + jj]; + ptr2[ii * cnp + jj] = sum; + } + } + } + + /* compute U_j = \sum_i A_ij^T A_ij */ // \Sigma here! + /* U_j is symmetric, therefore its computation can be sped up by + * computing only the upper part and then reusing it for the lower one. + * Recall that A_ij is mnp x cnp + */ + /* Also compute ea_j = \sum_i A_ij^T e_ij */ // \Sigma here! + /* Recall that e_ij is mnp x 1 + */ + _dblzero(U, m * Usz); /* clear all U_j */ + _dblzero(ea, m * easz); /* clear all ea_j */ + for (j = mcon; j < m; ++j) { + ptr1 = U + j * Usz; // set ptr1 to point to U_j + ptr2 = ea + j * easz; // set ptr2 to point to ea_j + + nnz = sba_crsm_col_elmidxs(&idxij, j, rcidxs, rcsubs); /* find nonzero A_ij, i=0...n-1 */ + for (i = 0; i < nnz; ++i) { + /* set ptr3 to point to A_ij, actual row number in rcsubs[i] */ + ptr3 = jac + idxij.val[rcidxs[i]] * Asz; + + /* compute the UPPER TRIANGULAR PART of A_ij^T A_ij and add it to U_j */ + for (ii = 0; ii < cnp; ++ii) { + for (jj = ii; jj < cnp; ++jj) { + for (k = 0, sum = 0.0; k < mnp; ++k) + sum += ptr3[k * cnp + ii] * ptr3[k * cnp + jj]; + ptr1[ii * cnp + jj] += sum; + } + + /* copy the LOWER TRIANGULAR PART of U_j from the upper one */ + for (jj = 0; jj < ii; ++jj) + ptr1[ii * cnp + jj] = ptr1[jj * cnp + ii]; + } + + ptr4 = e + idxij.val[rcidxs[i]] * esz; /* set ptr4 to point to e_ij */ + /* compute A_ij^T e_ij and add it to ea_j */ + for (ii = 0; ii < cnp; ++ii) { + for (jj = 0, sum = 0.0; jj < mnp; ++jj) + sum += ptr3[jj * cnp + ii] * ptr4[jj]; + ptr2[ii] += sum; + } + } + } + + /* Compute ||J^T e||_inf and ||p||^2 */ + for (i = 0, p_L2 = ea_inf = 0.0; i < nvars; ++i) { + if (ea_inf < (tmp = FABS(ea[i]))) + ea_inf = tmp; + p_L2 += p[i] * p[i]; + } + // p_L2=sqrt(p_L2); + + /* save diagonal entries so that augmentation can be later canceled. + * Diagonal entries are in U_j + */ + for (j = mcon; j < m; ++j) { + ptr1 = U + j * Usz; // set ptr1 to point to U_j + ptr2 = diagU + j * cnp; // set ptr2 to point to diagU_j + for (i = 0; i < cnp; ++i) + ptr2[i] = ptr1[i * cnp + i]; + } + + /* + if(!(itno%100)){ + printf("Current estimate: "); + for(i=0; i tmp) + tmp = diagU[i]; /* find max diagonal element */ + mu = tau * tmp; + } + + /* determine increment using adaptive damping */ + while (1) { + /* augment U */ + for (j = mcon; j < m; ++j) { + ptr1 = U + j * Usz; // set ptr1 to point to U_j + for (i = 0; i < cnp; ++i) + ptr1[i * cnp + i] += mu; + } + + /* solve the linear systems U_j da_j = ea_j to compute the da_j */ + _dblzero(dp, mcon * cnp); /* no change for the first mcon camera params */ + for (j = nsolved = mcon; j < m; ++j) { + ptr1 = U + j * Usz; // set ptr1 to point to U_j + ptr2 = ea + j * easz; // set ptr2 to point to ea_j + ptr3 = dp + j * cnp; // set ptr3 to point to da_j + + // nsolved+=sba_Axb_LU(ptr1, ptr2, ptr3, cnp, 0); linsolver=sba_Axb_LU; + nsolved += sba_Axb_Chol(ptr1, ptr2, ptr3, cnp, 0); + linsolver = sba_Axb_Chol; + // nsolved+=sba_Axb_BK(ptr1, ptr2, ptr3, cnp, 0); linsolver=sba_Axb_BK; + // nsolved+=sba_Axb_QRnoQ(ptr1, ptr2, ptr3, cnp, 0); linsolver=sba_Axb_QRnoQ; + // nsolved+=sba_Axb_QR(ptr1, ptr2, ptr3, cnp, 0); linsolver=sba_Axb_QR; + // nsolved+=sba_Axb_SVD(ptr1, ptr2, ptr3, cnp, 0); linsolver=sba_Axb_SVD; + // nsolved+=(sba_Axb_CG(ptr1, ptr2, ptr3, cnp, (3*cnp)/2, 1E-10, SBA_CG_JACOBI, 0) > 0); + // linsolver=(PLS)sba_Axb_CG; + + ++nlss; + } + + if (nsolved == m) { + + /* parameter vector updates are now in dp */ + + /* compute p's new estimate and ||dp||^2 */ + for (i = 0, dp_L2 = 0.0; i < nvars; ++i) { + pdp[i] = p[i] + (tmp = dp[i]); + dp_L2 += tmp * tmp; + } + // dp_L2=sqrt(dp_L2); + + if (dp_L2 <= eps2_sq * p_L2) { /* relative change in p is small, stop */ + // if(dp_L2<=eps2*(p_L2 + eps2)){ /* relative change in p is small, stop */ + stop = 2; + break; + } + + if (dp_L2 >= (p_L2 + eps2) / SBA_EPSILON_SQ) { /* almost singular */ + // if(dp_L2>=(p_L2+eps2)/SBA_EPSILON){ /* almost singular */ + fprintf( + stderr, + "SBA: the matrix of the augmented normal equations is almost singular in sba_mot_levmar_x(),\n" + " minimization should be restarted from the current solution with an increased damping " + "term\n"); + retval = SBA_ERROR; + goto freemem_and_return; + } + + (*func)(pdp, &idxij, rcidxs, rcsubs, hx, adata); + ++nfev; /* evaluate function at p + dp */ + if (verbose > 1) + printf("mean reprojection error %g\n", sba_mean_repr_error(n, mnp, x, hx, &idxij, rcidxs, rcsubs)); + /* ### compute ||e(pdp)||_2 */ + if (covx == NULL) + pdp_eL2 = nrmL2xmy(hx, x, hx, nobs); /* hx=x-hx, pdp_eL2=||hx|| */ + else + pdp_eL2 = nrmCxmy(hx, x, hx, wght, mnp, nvis); /* hx=wght*(x-hx), pdp_eL2=||hx|| */ + if (!SBA_FINITE(pdp_eL2)) { + if (verbose) /* identify the offending point projection */ + sba_print_inf(hx, m, mnp, &idxij, rcidxs, rcsubs); + + stop = 7; + break; + } + + for (i = 0, dL = 0.0; i < nvars; ++i) + dL += dp[i] * (mu * dp[i] + ea[i]); + + dF = p_eL2 - pdp_eL2; + + if (verbose > 1) + printf("\ndamping term %8g, gain ratio %8g, errors %8g / %8g = %g\n", mu, + dL != 0.0 ? dF / dL : dF / DBL_EPSILON, p_eL2 / nvis, pdp_eL2 / nvis, p_eL2 / pdp_eL2); + + if (dL > 0.0 && dF > 0.0) { /* reduction in error, increment is accepted */ + tmp = (2.0 * dF / dL - 1.0); + tmp = 1.0 - tmp * tmp * tmp; + mu = mu * ((tmp >= SBA_ONE_THIRD) ? tmp : SBA_ONE_THIRD); + nu = 2; + + /* the test below is equivalent to the relative reduction of the RMS reprojection error: + * sqrt(p_eL2)-sqrt(pdp_eL2)= itmax) + stop = 3; + + /* restore U diagonal entries */ + for (j = mcon; j < m; ++j) { + ptr1 = U + j * Usz; // set ptr1 to point to U_j + ptr2 = diagU + j * cnp; // set ptr2 to point to diagU_j + for (i = 0; i < cnp; ++i) + ptr1[i * cnp + i] = ptr2[i]; + } + + if (info) { + info[0] = init_p_eL2; + info[1] = p_eL2; + info[2] = ea_inf; + info[3] = dp_L2; + for (j = mcon, tmp = DBL_MIN; j < m; ++j) { + ptr1 = U + j * Usz; // set ptr1 to point to U_j + for (i = 0; i < cnp; ++i) + if (tmp < ptr1[i * cnp + i]) + tmp = ptr1[i * cnp + i]; + } + info[4] = mu / tmp; + info[5] = itno; + info[6] = stop; + info[7] = nfev; + info[8] = njev; + info[9] = nlss; + } + // sba_print_sol(n, m, p, cnp, 0, x, mnp, &idxij, rcidxs, rcsubs); + retval = (stop != 7) ? itno : SBA_ERROR; + +freemem_and_return: /* NOTE: this point is also reached via a goto! */ + + /* free whatever was allocated */ + free(jac); + free(U); + free(e); + free(ea); + free(dp); + free(rcidxs); + free(rcsubs); +#ifndef SBA_DESTROY_COVS + if (wght) + free(wght); +#else +/* nothing to do */ +#endif /* SBA_DESTROY_COVS */ + + free(hx); + free(diagU); + free(pdp); + if (fdj_data.hxx) { // cleanup + free(fdj_data.hxx); + free(fdj_data.func_rcidxs); + } + + sba_crsm_free(&idxij); + + /* free the memory allocated by the linear solver routine */ + if (linsolver) + (*linsolver)(NULL, NULL, NULL, 0, 0); + + return retval; +} + +/* Bundle adjustment on structure parameters only + * using the sparse Levenberg-Marquardt as described in HZ p. 568 + * + * Returns the number of iterations (>=0) if successfull, SBA_ERROR if failed + */ + +int sba_str_levmar_x( + const int n, /* number of points */ + const int ncon, /* number of points (starting from the 1st) whose parameters should not be modified. + * All B_ij (see below) with i maxCPvis) + maxCPvis = k; + + /* points, note that maxCPvis is not reinitialized! */ + for (j = 0; j < m; ++j) { + for (i = ii = 0; i < n; ++i) + if (vmask[i * m + j]) + ++ii; + if (ii > maxCPvis) + maxCPvis = ii; + } + + /* allocate work arrays */ + jac = (double *)emalloc(nvis * Bsz * sizeof(double)); + V = (double *)emalloc(n * Vsz * sizeof(double)); + e = (double *)emalloc(nobs * sizeof(double)); + eb = (double *)emalloc(nvars * sizeof(double)); + dp = (double *)emalloc(nvars * sizeof(double)); + rcidxs = (int *)emalloc(maxCPvis * sizeof(int)); + rcsubs = (int *)emalloc(maxCPvis * sizeof(int)); +#ifndef SBA_DESTROY_COVS + if (covx != NULL) + wght = (double *)emalloc(nvis * covsz * sizeof(double)); +#else + if (covx != NULL) + wght = covx; +#endif /* SBA_DESTROY_COVS */ + + hx = (double *)emalloc(nobs * sizeof(double)); + diagV = (double *)emalloc(nvars * sizeof(double)); + pdp = (double *)emalloc(nvars * sizeof(double)); + + /* if no jacobian function is supplied, prepare to compute jacobian with finite difference */ + if (!fjac) { + fdj_data.func = func; + fdj_data.cnp = 0; + fdj_data.pnp = pnp; + fdj_data.mnp = mnp; + fdj_data.hx = hx; + fdj_data.hxx = (double *)emalloc(nobs * sizeof(double)); + fdj_data.func_rcidxs = (int *)emalloc(2 * maxCPvis * sizeof(int)); + fdj_data.func_rcsubs = fdj_data.func_rcidxs + maxCPvis; + fdj_data.adata = adata; + + fjac = sba_fdjac_x; + jac_adata = (void *)&fdj_data; + } else { + fdj_data.hxx = NULL; + jac_adata = adata; + } + + if (itmax == 0) { /* verify jacobian */ + sba_str_chkjac_x(func, fjac, p, &idxij, rcidxs, rcsubs, ncon, pnp, mnp, adata, jac_adata); + retval = 0; + goto freemem_and_return; + } + + /* covariances Sigma_x_ij are accommodated by computing the Cholesky decompositions of their + * inverses and using the resulting matrices w_x_ij to weigh B_ij and e_ij as + * w_x_ij*B_ij and w_x_ij*e_ij. In this way, auxiliary variables as V_i=\sum_j B_ij^T B_ij + * actually become \sum_j (w_x_ij B_ij)^T w_x_ij B_ij= \sum_j B_ij^T w_x_ij^T w_x_ij B_ij = + * B_ij^T Sigma_x_ij^-1 B_ij + * + * eb_i are weighted in a similar manner + */ + if (covx != NULL) { + for (i = 0; i < n; ++i) { + nnz = sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero x_ij, j=0...m-1 */ + for (j = 0; j < nnz; ++j) { + /* set ptr1, ptr2 to point to cov_x_ij, w_x_ij resp. */ + ptr1 = covx + (k = idxij.val[rcidxs[j]] * covsz); + ptr2 = wght + k; + if (!sba_mat_cholinv(ptr1, ptr2, mnp)) { /* compute w_x_ij s.t. w_x_ij^T w_x_ij = cov_x_ij^-1 */ + fprintf(stderr, "SBA: invalid covariance matrix for x_ij (i=%d, j=%d) in sba_motstr_levmar_x()\n", + i, rcsubs[j]); + retval = SBA_ERROR; + goto freemem_and_return; + } + } + } + sba_mat_cholinv(NULL, NULL, 0); /* cleanup */ + } + + /* compute the error vectors e_ij in hx */ + (*func)(p, &idxij, rcidxs, rcsubs, hx, adata); + nfev = 1; + /* ### compute e=x - f(p) [e=w*(x - f(p)] and its L2 norm */ + if (covx == NULL) + p_eL2 = nrmL2xmy(e, x, hx, nobs); /* e=x-hx, p_eL2=||e|| */ + else + p_eL2 = nrmCxmy(e, x, hx, wght, mnp, nvis); /* e=wght*(x-hx), p_eL2=||e||=||x-hx||_Sigma^-1 */ + if (verbose) + printf("initial str-SBA error %g [%g]\n", p_eL2, p_eL2 / nvis); + init_p_eL2 = p_eL2; + if (!SBA_FINITE(p_eL2)) + stop = 7; + + for (itno = 0; itno < itmax && !stop; ++itno) { + /* Note that p, e and ||e||_2 have been updated at the previous iteration */ + + /* compute derivative submatrices B_ij */ + (*fjac)(p, &idxij, rcidxs, rcsubs, jac, jac_adata); + ++njev; + + if (covx != NULL) { + /* compute w_x_ij B_ij. + * Since w_x_ij is upper triangular, the products can be safely saved + * directly in B_ij, without the need for intermediate storage + */ + for (i = 0; i < nvis; ++i) { + /* set ptr1, ptr2 to point to w_x_ij, B_ij, resp. */ + ptr1 = wght + i * covsz; + ptr2 = jac + i * Bsz; + + /* w_x_ij is mnp x mnp, B_ij is mnp x pnp */ + for (ii = 0; ii < mnp; ++ii) + for (jj = 0; jj < pnp; ++jj) { + for (k = ii, sum = 0.0; k < mnp; ++k) // k>=ii since w_x_ij is upper triangular + sum += ptr1[ii * mnp + k] * ptr2[k * pnp + jj]; + ptr2[ii * pnp + jj] = sum; + } + } + } + + /* compute V_i = \sum_j B_ij^T B_ij */ // \Sigma here! + /* V_i is symmetric, therefore its computation can be sped up by + * computing only the upper part and then reusing it for the lower one. + * Recall that B_ij is mnp x pnp + */ + /* Also compute eb_i = \sum_j B_ij^T e_ij */ // \Sigma here! + /* Recall that e_ij is mnp x 1 + */ + _dblzero(V, n * Vsz); /* clear all V_i */ + _dblzero(eb, n * ebsz); /* clear all eb_i */ + for (i = ncon; i < n; ++i) { + ptr1 = V + i * Vsz; // set ptr1 to point to V_i + ptr2 = eb + i * ebsz; // set ptr2 to point to eb_i + + nnz = sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero B_ij, j=0...m-1 */ + for (j = 0; j < nnz; ++j) { + /* set ptr3 to point to B_ij, actual column number in rcsubs[j] */ + ptr3 = jac + idxij.val[rcidxs[j]] * Bsz; + + /* compute the UPPER TRIANGULAR PART of B_ij^T B_ij and add it to V_i */ + for (ii = 0; ii < pnp; ++ii) { + for (jj = ii; jj < pnp; ++jj) { + for (k = 0, sum = 0.0; k < mnp; ++k) + sum += ptr3[k * pnp + ii] * ptr3[k * pnp + jj]; + ptr1[ii * pnp + jj] += sum; + } + + /* copy the LOWER TRIANGULAR PART of V_i from the upper one */ + for (jj = 0; jj < ii; ++jj) + ptr1[ii * pnp + jj] = ptr1[jj * pnp + ii]; + } + + ptr4 = e + idxij.val[rcidxs[j]] * esz; /* set ptr4 to point to e_ij */ + /* compute B_ij^T e_ij and add it to eb_i */ + for (ii = 0; ii < pnp; ++ii) { + for (jj = 0, sum = 0.0; jj < mnp; ++jj) + sum += ptr3[jj * pnp + ii] * ptr4[jj]; + ptr2[ii] += sum; + } + } + } + + /* Compute ||J^T e||_inf and ||p||^2 */ + for (i = 0, p_L2 = eb_inf = 0.0; i < nvars; ++i) { + if (eb_inf < (tmp = FABS(eb[i]))) + eb_inf = tmp; + p_L2 += p[i] * p[i]; + } + // p_L2=sqrt(p_L2); + + /* save diagonal entries so that augmentation can be later canceled. + * Diagonal entries are in V_i + */ + for (i = ncon; i < n; ++i) { + ptr1 = V + i * Vsz; // set ptr1 to point to V_i + ptr2 = diagV + i * pnp; // set ptr2 to point to diagV_i + for (j = 0; j < pnp; ++j) + ptr2[j] = ptr1[j * pnp + j]; + } + + /* + if(!(itno%100)){ + printf("Current estimate: "); + for(i=0; i tmp) + tmp = diagV[i]; /* find max diagonal element */ + mu = tau * tmp; + } + + /* determine increment using adaptive damping */ + while (1) { + /* augment V */ + for (i = ncon; i < n; ++i) { + ptr1 = V + i * Vsz; // set ptr1 to point to V_i + for (j = 0; j < pnp; ++j) + ptr1[j * pnp + j] += mu; + } + + /* solve the linear systems V*_i db_i = eb_i to compute the db_i */ + _dblzero(dp, ncon * pnp); /* no change for the first ncon point params */ + for (i = nsolved = ncon; i < n; ++i) { + ptr1 = V + i * Vsz; // set ptr1 to point to V_i + ptr2 = eb + i * ebsz; // set ptr2 to point to eb_i + ptr3 = dp + i * pnp; // set ptr3 to point to db_i + + // nsolved+=sba_Axb_LU(ptr1, ptr2, ptr3, pnp, 0); linsolver=sba_Axb_LU; + nsolved += sba_Axb_Chol(ptr1, ptr2, ptr3, pnp, 0); + linsolver = sba_Axb_Chol; + // nsolved+=sba_Axb_BK(ptr1, ptr2, ptr3, pnp, 0); linsolver=sba_Axb_BK; + // nsolved+=sba_Axb_QRnoQ(ptr1, ptr2, ptr3, pnp, 0); linsolver=sba_Axb_QRnoQ; + // nsolved+=sba_Axb_QR(ptr1, ptr2, ptr3, pnp, 0); linsolver=sba_Axb_QR; + // nsolved+=sba_Axb_SVD(ptr1, ptr2, ptr3, pnp, 0); linsolver=sba_Axb_SVD; + // nsolved+=(sba_Axb_CG(ptr1, ptr2, ptr3, pnp, (3*pnp)/2, 1E-10, SBA_CG_JACOBI, 0) > 0); + // linsolver=(PLS)sba_Axb_CG; + + ++nlss; + } + + if (nsolved == n) { + + /* parameter vector updates are now in dp */ + + /* compute p's new estimate and ||dp||^2 */ + for (i = 0, dp_L2 = 0.0; i < nvars; ++i) { + pdp[i] = p[i] + (tmp = dp[i]); + dp_L2 += tmp * tmp; + } + // dp_L2=sqrt(dp_L2); + + if (dp_L2 <= eps2_sq * p_L2) { /* relative change in p is small, stop */ + // if(dp_L2<=eps2*(p_L2 + eps2)){ /* relative change in p is small, stop */ + stop = 2; + break; + } + + if (dp_L2 >= (p_L2 + eps2) / SBA_EPSILON_SQ) { /* almost singular */ + // if(dp_L2>=(p_L2+eps2)/SBA_EPSILON){ /* almost singular */ + fprintf(stderr, "SBA: the matrix of the augmented normal equations is almost singular in " + "sba_motstr_levmar_x(),\n" + " minimization should be restarted from the current solution with an increased " + "damping term\n"); + retval = SBA_ERROR; + goto freemem_and_return; + } + + (*func)(pdp, &idxij, rcidxs, rcsubs, hx, adata); + ++nfev; /* evaluate function at p + dp */ + if (verbose > 1) + printf("mean reprojection error %g\n", sba_mean_repr_error(n, mnp, x, hx, &idxij, rcidxs, rcsubs)); + /* ### compute ||e(pdp)||_2 */ + if (covx == NULL) + pdp_eL2 = nrmL2xmy(hx, x, hx, nobs); /* hx=x-hx, pdp_eL2=||hx|| */ + else + pdp_eL2 = nrmCxmy(hx, x, hx, wght, mnp, nvis); /* hx=wght*(x-hx), pdp_eL2=||hx|| */ + if (!SBA_FINITE(pdp_eL2)) { + if (verbose) /* identify the offending point projection */ + sba_print_inf(hx, m, mnp, &idxij, rcidxs, rcsubs); + + stop = 7; + break; + } + + for (i = 0, dL = 0.0; i < nvars; ++i) + dL += dp[i] * (mu * dp[i] + eb[i]); + + dF = p_eL2 - pdp_eL2; + + if (verbose > 1) + printf("\ndamping term %8g, gain ratio %8g, errors %8g / %8g = %g\n", mu, + dL != 0.0 ? dF / dL : dF / DBL_EPSILON, p_eL2 / nvis, pdp_eL2 / nvis, p_eL2 / pdp_eL2); + + if (dL > 0.0 && dF > 0.0) { /* reduction in error, increment is accepted */ + tmp = (2.0 * dF / dL - 1.0); + tmp = 1.0 - tmp * tmp * tmp; + mu = mu * ((tmp >= SBA_ONE_THIRD) ? tmp : SBA_ONE_THIRD); + nu = 2; + + /* the test below is equivalent to the relative reduction of the RMS reprojection error: + * sqrt(p_eL2)-sqrt(pdp_eL2)= itmax) + stop = 3; + + /* restore V diagonal entries */ + for (i = ncon; i < n; ++i) { + ptr1 = V + i * Vsz; // set ptr1 to point to V_i + ptr2 = diagV + i * pnp; // set ptr2 to point to diagV_i + for (j = 0; j < pnp; ++j) + ptr1[j * pnp + j] = ptr2[j]; + } + + if (info) { + info[0] = init_p_eL2; + info[1] = p_eL2; + info[2] = eb_inf; + info[3] = dp_L2; + for (i = ncon; i < n; ++i) { + ptr1 = V + i * Vsz; // set ptr1 to point to V_i + for (j = 0; j < pnp; ++j) + if (tmp < ptr1[j * pnp + j]) + tmp = ptr1[j * pnp + j]; + } + info[4] = mu / tmp; + info[5] = itno; + info[6] = stop; + info[7] = nfev; + info[8] = njev; + info[9] = nlss; + } + // sba_print_sol(n, m, p, 0, pnp, x, mnp, &idxij, rcidxs, rcsubs); + retval = (stop != 7) ? itno : SBA_ERROR; + +freemem_and_return: /* NOTE: this point is also reached via a goto! */ + + /* free whatever was allocated */ + free(jac); + free(V); + free(e); + free(eb); + free(dp); + free(rcidxs); + free(rcsubs); +#ifndef SBA_DESTROY_COVS + if (wght) + free(wght); +#else +/* nothing to do */ +#endif /* SBA_DESTROY_COVS */ + + free(hx); + free(diagV); + free(pdp); + if (fdj_data.hxx) { // cleanup + free(fdj_data.hxx); + free(fdj_data.func_rcidxs); + } + + sba_crsm_free(&idxij); + + /* free the memory allocated by the linear solver routine */ + if (linsolver) + (*linsolver)(NULL, NULL, NULL, 0, 0); + + return retval; +} diff --git a/redist/sba/sba_levmar_wrap.c b/redist/sba/sba_levmar_wrap.c new file mode 100644 index 0000000..159a040 --- /dev/null +++ b/redist/sba/sba_levmar_wrap.c @@ -0,0 +1,917 @@ +///////////////////////////////////////////////////////////////////////////////// +//// +//// Simple drivers for sparse bundle adjustment based on the +//// Levenberg - Marquardt minimization algorithm +//// This file provides simple wrappers to the functions defined in sba_levmar.c +//// Copyright (C) 2004-2009 Manolis Lourakis (lourakis at ics forth gr) +//// Institute of Computer Science, Foundation for Research & Technology - Hellas +//// Heraklion, Crete, Greece. +//// +//// This program is free software; you can redistribute it and/or modify +//// it under the terms of the GNU General Public License as published by +//// the Free Software Foundation; either version 2 of the License, or +//// (at your option) any later version. +//// +//// This program is distributed in the hope that it will be useful, +//// but WITHOUT ANY WARRANTY; without even the implied warranty of +//// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//// GNU General Public License for more details. +//// +/////////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include + +#include "sba.h" + +#define FABS(x) (((x) >= 0) ? (x) : -(x)) + +struct wrap_motstr_data_ { + void (*proj)(int j, int i, double *aj, double *bi, double *xij, void *adata); // Q + void (*projac)(int j, int i, double *aj, double *bi, double *Aij, double *Bij, void *adata); // dQ/da, dQ/db + int cnp, pnp, mnp; /* parameter numbers */ + void *adata; +}; + +struct wrap_mot_data_ { + void (*proj)(int j, int i, double *aj, double *xij, void *adata); // Q + void (*projac)(int j, int i, double *aj, double *Aij, void *adata); // dQ/da + int cnp, mnp; /* parameter numbers */ + void *adata; +}; + +struct wrap_str_data_ { + void (*proj)(int j, int i, double *bi, double *xij, void *adata); // Q + void (*projac)(int j, int i, double *bi, double *Bij, void *adata); // dQ/db + int pnp, mnp; /* parameter numbers */ + void *adata; +}; + +/* Routines to estimate the estimated measurement vector (i.e. "func") and + * its sparse jacobian (i.e. "fjac") needed by BA expert drivers. Code below + * makes use of user-supplied functions computing "Q", "dQ/da", d"Q/db", + * i.e. predicted projection and associated jacobians for a SINGLE image measurement. + * Notice also that what follows is two pairs of "func" and corresponding "fjac" routines. + * The first is to be used in full (i.e. motion + structure) BA, the second in + * motion only BA. + */ + +/* FULL BUNDLE ADJUSTMENT */ + +/* Given a parameter vector p made up of the 3D coordinates of n points and the parameters of m cameras, compute in + * hx the prediction of the measurements, i.e. the projections of 3D points in the m images. The measurements + * are returned in the order (hx_11^T, .. hx_1m^T, ..., hx_n1^T, .. hx_nm^T)^T, where hx_ij is the predicted + * projection of the i-th point on the j-th camera. + * Caller supplies rcidxs and rcsubs which can be used as working memory. + * Notice that depending on idxij, some of the hx_ij might be missing + * + */ +static void sba_motstr_Qs(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata) { + register int i, j; + int cnp, pnp, mnp; + double *pa, *pb, *paj, *pbi, *pxij; + int n, m, nnz; + struct wrap_motstr_data_ *wdata; + void (*proj)(int j, int i, double *aj, double *bi, double *xij, void *proj_adata); + void *proj_adata; + + wdata = (struct wrap_motstr_data_ *)adata; + cnp = wdata->cnp; + pnp = wdata->pnp; + mnp = wdata->mnp; + proj = wdata->proj; + proj_adata = wdata->adata; + + n = idxij->nr; + m = idxij->nc; + pa = p; + pb = p + m * cnp; + + for (j = 0; j < m; ++j) { + /* j-th camera parameters */ + paj = pa + j * cnp; + + nnz = sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero hx_ij, i=0...n-1 */ + + for (i = 0; i < nnz; ++i) { + pbi = pb + rcsubs[i] * pnp; + pxij = hx + idxij->val[rcidxs[i]] * mnp; // set pxij to point to hx_ij + + (*proj)(j, rcsubs[i], paj, pbi, pxij, proj_adata); // evaluate Q in pxij + } + } +} + +/* Given a parameter vector p made up of the 3D coordinates of n points and the parameters of m cameras, compute in + * jac the jacobian of the predicted measurements, i.e. the jacobian of the projections of 3D points in the m images. + * The jacobian is returned in the order (A_11, B_11, ..., A_1m, B_1m, ..., A_n1, B_n1, ..., A_nm, B_nm), + * where A_ij=dx_ij/db_j and B_ij=dx_ij/db_i (see HZ). + * Caller supplies rcidxs and rcsubs which can be used as working memory. + * Notice that depending on idxij, some of the A_ij, B_ij might be missing + * + */ +static void sba_motstr_Qs_jac(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata) { + register int i, j; + int cnp, pnp, mnp; + double *pa, *pb, *paj, *pbi, *pAij, *pBij; + int n, m, nnz, Asz, Bsz, ABsz, idx; + struct wrap_motstr_data_ *wdata; + void (*projac)(int j, int i, double *aj, double *bi, double *Aij, double *Bij, void *projac_adata); + void *projac_adata; + + wdata = (struct wrap_motstr_data_ *)adata; + cnp = wdata->cnp; + pnp = wdata->pnp; + mnp = wdata->mnp; + projac = wdata->projac; + projac_adata = wdata->adata; + + n = idxij->nr; + m = idxij->nc; + pa = p; + pb = p + m * cnp; + Asz = mnp * cnp; + Bsz = mnp * pnp; + ABsz = Asz + Bsz; + + for (j = 0; j < m; ++j) { + /* j-th camera parameters */ + paj = pa + j * cnp; + + nnz = sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero hx_ij, i=0...n-1 */ + + for (i = 0; i < nnz; ++i) { + pbi = pb + rcsubs[i] * pnp; + idx = idxij->val[rcidxs[i]]; + pAij = jac + idx * ABsz; // set pAij to point to A_ij + pBij = pAij + Asz; // set pBij to point to B_ij + + (*projac)(j, rcsubs[i], paj, pbi, pAij, pBij, projac_adata); // evaluate dQ/da, dQ/db in pAij, pBij + } + } +} + +/* Given a parameter vector p made up of the 3D coordinates of n points and the parameters of m cameras, compute in + * jac the jacobian of the predicted measurements, i.e. the jacobian of the projections of 3D points in the m images. + * The jacobian is approximated with the aid of finite differences and is returned in the order + * (A_11, B_11, ..., A_1m, B_1m, ..., A_n1, B_n1, ..., A_nm, B_nm), + * where A_ij=dx_ij/da_j and B_ij=dx_ij/db_i (see HZ). + * Notice that depending on idxij, some of the A_ij, B_ij might be missing + * + * Problem-specific information is assumed to be stored in a structure pointed to by "dat". + * + * NOTE: This function is provided mainly for illustration purposes; in case that execution time is a concern, + * the jacobian should be computed analytically + */ +static void sba_motstr_Qs_fdjac( + double *p, /* I: current parameter estimate, (m*cnp+n*pnp)x1 */ + struct sba_crsm *idxij, /* I: sparse matrix containing the location of x_ij in hx */ + int *rcidxs, /* work array for the indexes of nonzero elements of a single sparse matrix row/column */ + int *rcsubs, /* work array for the subscripts of nonzero elements in a single sparse matrix row/column */ + double *jac, /* O: array for storing the approximated jacobian */ + void *dat) /* I: points to a "wrap_motstr_data_" structure */ +{ + register int i, j, ii, jj; + double *pa, *pb, *paj, *pbi; + register double *pAB; + int n, m, nnz, Asz, Bsz, ABsz; + + double tmp; + register double d, d1; + + struct wrap_motstr_data_ *fdjd; + void (*proj)(int j, int i, double *aj, double *bi, double *xij, void *adata); + double *hxij, *hxxij; + int cnp, pnp, mnp; + void *adata; + + /* retrieve problem-specific information passed in *dat */ + fdjd = (struct wrap_motstr_data_ *)dat; + proj = fdjd->proj; + cnp = fdjd->cnp; + pnp = fdjd->pnp; + mnp = fdjd->mnp; + adata = fdjd->adata; + + n = idxij->nr; + m = idxij->nc; + pa = p; + pb = p + m * cnp; + Asz = mnp * cnp; + Bsz = mnp * pnp; + ABsz = Asz + Bsz; + + /* allocate memory for hxij, hxxij */ + if ((hxij = malloc(2 * mnp * sizeof(double))) == NULL) { + fprintf(stderr, "memory allocation request failed in sba_motstr_Qs_fdjac()!\n"); + exit(1); + } + hxxij = hxij + mnp; + + /* compute A_ij */ + for (j = 0; j < m; ++j) { + paj = pa + j * cnp; // j-th camera parameters + + nnz = sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero A_ij, i=0...n-1 */ + for (jj = 0; jj < cnp; ++jj) { + /* determine d=max(SBA_DELTA_SCALE*|paj[jj]|, SBA_MIN_DELTA), see HZ */ + d = (double)(SBA_DELTA_SCALE)*paj[jj]; // force evaluation + d = FABS(d); + if (d < SBA_MIN_DELTA) + d = SBA_MIN_DELTA; + d1 = 1.0 / d; /* invert so that divisions can be carried out faster as multiplications */ + + for (i = 0; i < nnz; ++i) { + pbi = pb + rcsubs[i] * pnp; // i-th point parameters + (*proj)(j, rcsubs[i], paj, pbi, hxij, adata); // evaluate supplied function on current solution + + tmp = paj[jj]; + paj[jj] += d; + (*proj)(j, rcsubs[i], paj, pbi, hxxij, adata); + paj[jj] = tmp; /* restore */ + + pAB = jac + idxij->val[rcidxs[i]] * ABsz; // set pAB to point to A_ij + for (ii = 0; ii < mnp; ++ii) + pAB[ii * cnp + jj] = (hxxij[ii] - hxij[ii]) * d1; + } + } + } + + /* compute B_ij */ + for (i = 0; i < n; ++i) { + pbi = pb + i * pnp; // i-th point parameters + + nnz = sba_crsm_row_elmidxs(idxij, i, rcidxs, rcsubs); /* find nonzero B_ij, j=0...m-1 */ + for (jj = 0; jj < pnp; ++jj) { + /* determine d=max(SBA_DELTA_SCALE*|pbi[jj]|, SBA_MIN_DELTA), see HZ */ + d = (double)(SBA_DELTA_SCALE)*pbi[jj]; // force evaluation + d = FABS(d); + if (d < SBA_MIN_DELTA) + d = SBA_MIN_DELTA; + d1 = 1.0 / d; /* invert so that divisions can be carried out faster as multiplications */ + + for (j = 0; j < nnz; ++j) { + paj = pa + rcsubs[j] * cnp; // j-th camera parameters + (*proj)(rcsubs[j], i, paj, pbi, hxij, adata); // evaluate supplied function on current solution + + tmp = pbi[jj]; + pbi[jj] += d; + (*proj)(rcsubs[j], i, paj, pbi, hxxij, adata); + pbi[jj] = tmp; /* restore */ + + pAB = jac + idxij->val[rcidxs[j]] * ABsz + Asz; // set pAB to point to B_ij + for (ii = 0; ii < mnp; ++ii) + pAB[ii * pnp + jj] = (hxxij[ii] - hxij[ii]) * d1; + } + } + } + + free(hxij); +} + +/* BUNDLE ADJUSTMENT FOR CAMERA PARAMETERS ONLY */ + +/* Given a parameter vector p made up of the parameters of m cameras, compute in + * hx the prediction of the measurements, i.e. the projections of 3D points in the m images. + * The measurements are returned in the order (hx_11^T, .. hx_1m^T, ..., hx_n1^T, .. hx_nm^T)^T, + * where hx_ij is the predicted projection of the i-th point on the j-th camera. + * Caller supplies rcidxs and rcsubs which can be used as working memory. + * Notice that depending on idxij, some of the hx_ij might be missing + * + */ +static void sba_mot_Qs(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata) { + register int i, j; + int cnp, mnp; + double *paj, *pxij; + // int n; + int m, nnz; + struct wrap_mot_data_ *wdata; + void (*proj)(int j, int i, double *aj, double *xij, void *proj_adata); + void *proj_adata; + + wdata = (struct wrap_mot_data_ *)adata; + cnp = wdata->cnp; + mnp = wdata->mnp; + proj = wdata->proj; + proj_adata = wdata->adata; + + // n=idxij->nr; + m = idxij->nc; + + for (j = 0; j < m; ++j) { + /* j-th camera parameters */ + paj = p + j * cnp; + + nnz = sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero hx_ij, i=0...n-1 */ + + for (i = 0; i < nnz; ++i) { + pxij = hx + idxij->val[rcidxs[i]] * mnp; // set pxij to point to hx_ij + + (*proj)(j, rcsubs[i], paj, pxij, proj_adata); // evaluate Q in pxij + } + } +} + +/* Given a parameter vector p made up of the parameters of m cameras, compute in jac + * the jacobian of the predicted measurements, i.e. the jacobian of the projections of 3D points in the m images. + * The jacobian is returned in the order (A_11, ..., A_1m, ..., A_n1, ..., A_nm), + * where A_ij=dx_ij/db_j (see HZ). + * Caller supplies rcidxs and rcsubs which can be used as working memory. + * Notice that depending on idxij, some of the A_ij might be missing + * + */ +static void sba_mot_Qs_jac(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata) { + register int i, j; + int cnp, mnp; + double *paj, *pAij; + // int n; + int m, nnz, Asz, idx; + struct wrap_mot_data_ *wdata; + void (*projac)(int j, int i, double *aj, double *Aij, void *projac_adata); + void *projac_adata; + + wdata = (struct wrap_mot_data_ *)adata; + cnp = wdata->cnp; + mnp = wdata->mnp; + projac = wdata->projac; + projac_adata = wdata->adata; + + // n=idxij->nr; + m = idxij->nc; + Asz = mnp * cnp; + + for (j = 0; j < m; ++j) { + /* j-th camera parameters */ + paj = p + j * cnp; + + nnz = sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero hx_ij, i=0...n-1 */ + + for (i = 0; i < nnz; ++i) { + idx = idxij->val[rcidxs[i]]; + pAij = jac + idx * Asz; // set pAij to point to A_ij + + (*projac)(j, rcsubs[i], paj, pAij, projac_adata); // evaluate dQ/da in pAij + } + } +} + +/* Given a parameter vector p made up of the parameters of m cameras, compute in jac the jacobian + * of the predicted measurements, i.e. the jacobian of the projections of 3D points in the m images. + * The jacobian is approximated with the aid of finite differences and is returned in the order + * (A_11, ..., A_1m, ..., A_n1, ..., A_nm), where A_ij=dx_ij/da_j (see HZ). + * Notice that depending on idxij, some of the A_ij might be missing + * + * Problem-specific information is assumed to be stored in a structure pointed to by "dat". + * + * NOTE: This function is provided mainly for illustration purposes; in case that execution time is a concern, + * the jacobian should be computed analytically + */ +static void sba_mot_Qs_fdjac( + double *p, /* I: current parameter estimate, (m*cnp)x1 */ + struct sba_crsm *idxij, /* I: sparse matrix containing the location of x_ij in hx */ + int *rcidxs, /* work array for the indexes of nonzero elements of a single sparse matrix row/column */ + int *rcsubs, /* work array for the subscripts of nonzero elements in a single sparse matrix row/column */ + double *jac, /* O: array for storing the approximated jacobian */ + void *dat) /* I: points to a "wrap_mot_data_" structure */ +{ + register int i, j, ii, jj; + double *paj; + register double *pA; + // int n; + int m, nnz, Asz; + + double tmp; + register double d, d1; + + struct wrap_mot_data_ *fdjd; + void (*proj)(int j, int i, double *aj, double *xij, void *adata); + double *hxij, *hxxij; + int cnp, mnp; + void *adata; + + /* retrieve problem-specific information passed in *dat */ + fdjd = (struct wrap_mot_data_ *)dat; + proj = fdjd->proj; + cnp = fdjd->cnp; + mnp = fdjd->mnp; + adata = fdjd->adata; + + // n=idxij->nr; + m = idxij->nc; + Asz = mnp * cnp; + + /* allocate memory for hxij, hxxij */ + if ((hxij = malloc(2 * mnp * sizeof(double))) == NULL) { + fprintf(stderr, "memory allocation request failed in sba_mot_Qs_fdjac()!\n"); + exit(1); + } + hxxij = hxij + mnp; + + /* compute A_ij */ + for (j = 0; j < m; ++j) { + paj = p + j * cnp; // j-th camera parameters + + nnz = sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero A_ij, i=0...n-1 */ + for (jj = 0; jj < cnp; ++jj) { + /* determine d=max(SBA_DELTA_SCALE*|paj[jj]|, SBA_MIN_DELTA), see HZ */ + d = (double)(SBA_DELTA_SCALE)*paj[jj]; // force evaluation + d = FABS(d); + if (d < SBA_MIN_DELTA) + d = SBA_MIN_DELTA; + d1 = 1.0 / d; /* invert so that divisions can be carried out faster as multiplications */ + + for (i = 0; i < nnz; ++i) { + (*proj)(j, rcsubs[i], paj, hxij, adata); // evaluate supplied function on current solution + + tmp = paj[jj]; + paj[jj] += d; + (*proj)(j, rcsubs[i], paj, hxxij, adata); + paj[jj] = tmp; /* restore */ + + pA = jac + idxij->val[rcidxs[i]] * Asz; // set pA to point to A_ij + for (ii = 0; ii < mnp; ++ii) + pA[ii * cnp + jj] = (hxxij[ii] - hxij[ii]) * d1; + } + } + } + + free(hxij); +} + +/* BUNDLE ADJUSTMENT FOR STRUCTURE PARAMETERS ONLY */ + +/* Given a parameter vector p made up of the 3D coordinates of n points, compute in + * hx the prediction of the measurements, i.e. the projections of 3D points in the m images. The measurements + * are returned in the order (hx_11^T, .. hx_1m^T, ..., hx_n1^T, .. hx_nm^T)^T, where hx_ij is the predicted + * projection of the i-th point on the j-th camera. + * Caller supplies rcidxs and rcsubs which can be used as working memory. + * Notice that depending on idxij, some of the hx_ij might be missing + * + */ +static void sba_str_Qs(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata) { + register int i, j; + int pnp, mnp; + double *pbi, *pxij; + // int n; + int m, nnz; + struct wrap_str_data_ *wdata; + void (*proj)(int j, int i, double *bi, double *xij, void *proj_adata); + void *proj_adata; + + wdata = (struct wrap_str_data_ *)adata; + pnp = wdata->pnp; + mnp = wdata->mnp; + proj = wdata->proj; + proj_adata = wdata->adata; + + // n=idxij->nr; + m = idxij->nc; + + for (j = 0; j < m; ++j) { + nnz = sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero hx_ij, i=0...n-1 */ + + for (i = 0; i < nnz; ++i) { + pbi = p + rcsubs[i] * pnp; + pxij = hx + idxij->val[rcidxs[i]] * mnp; // set pxij to point to hx_ij + + (*proj)(j, rcsubs[i], pbi, pxij, proj_adata); // evaluate Q in pxij + } + } +} + +/* Given a parameter vector p made up of the 3D coordinates of n points, compute in + * jac the jacobian of the predicted measurements, i.e. the jacobian of the projections of 3D points in the m images. + * The jacobian is returned in the order (B_11, ..., B_1m, ..., B_n1, ..., B_nm), where B_ij=dx_ij/db_i (see HZ). + * Caller supplies rcidxs and rcsubs which can be used as working memory. + * Notice that depending on idxij, some of the B_ij might be missing + * + */ +static void sba_str_Qs_jac(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata) { + register int i, j; + int pnp, mnp; + double *pbi, *pBij; + // int n; + int m, nnz, Bsz, idx; + struct wrap_str_data_ *wdata; + void (*projac)(int j, int i, double *bi, double *Bij, void *projac_adata); + void *projac_adata; + + wdata = (struct wrap_str_data_ *)adata; + pnp = wdata->pnp; + mnp = wdata->mnp; + projac = wdata->projac; + projac_adata = wdata->adata; + + // n=idxij->nr; + m = idxij->nc; + Bsz = mnp * pnp; + + for (j = 0; j < m; ++j) { + + nnz = sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero hx_ij, i=0...n-1 */ + + for (i = 0; i < nnz; ++i) { + pbi = p + rcsubs[i] * pnp; + idx = idxij->val[rcidxs[i]]; + pBij = jac + idx * Bsz; // set pBij to point to B_ij + + (*projac)(j, rcsubs[i], pbi, pBij, projac_adata); // evaluate dQ/db in pBij + } + } +} + +/* Given a parameter vector p made up of the 3D coordinates of n points, compute in + * jac the jacobian of the predicted measurements, i.e. the jacobian of the projections of 3D points in the m images. + * The jacobian is approximated with the aid of finite differences and is returned in the order + * (B_11, ..., B_1m, ..., B_n1, ..., B_nm), where B_ij=dx_ij/db_i (see HZ). + * Notice that depending on idxij, some of the B_ij might be missing + * + * Problem-specific information is assumed to be stored in a structure pointed to by "dat". + * + * NOTE: This function is provided mainly for illustration purposes; in case that execution time is a concern, + * the jacobian should be computed analytically + */ +static void sba_str_Qs_fdjac( + double *p, /* I: current parameter estimate, (n*pnp)x1 */ + struct sba_crsm *idxij, /* I: sparse matrix containing the location of x_ij in hx */ + int *rcidxs, /* work array for the indexes of nonzero elements of a single sparse matrix row/column */ + int *rcsubs, /* work array for the subscripts of nonzero elements in a single sparse matrix row/column */ + double *jac, /* O: array for storing the approximated jacobian */ + void *dat) /* I: points to a "wrap_str_data_" structure */ +{ + register int i, j, ii, jj; + double *pbi; + register double *pB; + // int m; + int n, nnz, Bsz; + + double tmp; + register double d, d1; + + struct wrap_str_data_ *fdjd; + void (*proj)(int j, int i, double *bi, double *xij, void *adata); + double *hxij, *hxxij; + int pnp, mnp; + void *adata; + + /* retrieve problem-specific information passed in *dat */ + fdjd = (struct wrap_str_data_ *)dat; + proj = fdjd->proj; + pnp = fdjd->pnp; + mnp = fdjd->mnp; + adata = fdjd->adata; + + n = idxij->nr; + // m=idxij->nc; + Bsz = mnp * pnp; + + /* allocate memory for hxij, hxxij */ + if ((hxij = malloc(2 * mnp * sizeof(double))) == NULL) { + fprintf(stderr, "memory allocation request failed in sba_str_Qs_fdjac()!\n"); + exit(1); + } + hxxij = hxij + mnp; + + /* compute B_ij */ + for (i = 0; i < n; ++i) { + pbi = p + i * pnp; // i-th point parameters + + nnz = sba_crsm_row_elmidxs(idxij, i, rcidxs, rcsubs); /* find nonzero B_ij, j=0...m-1 */ + for (jj = 0; jj < pnp; ++jj) { + /* determine d=max(SBA_DELTA_SCALE*|pbi[jj]|, SBA_MIN_DELTA), see HZ */ + d = (double)(SBA_DELTA_SCALE)*pbi[jj]; // force evaluation + d = FABS(d); + if (d < SBA_MIN_DELTA) + d = SBA_MIN_DELTA; + d1 = 1.0 / d; /* invert so that divisions can be carried out faster as multiplications */ + + for (j = 0; j < nnz; ++j) { + (*proj)(rcsubs[j], i, pbi, hxij, adata); // evaluate supplied function on current solution + + tmp = pbi[jj]; + pbi[jj] += d; + (*proj)(rcsubs[j], i, pbi, hxxij, adata); + pbi[jj] = tmp; /* restore */ + + pB = jac + idxij->val[rcidxs[j]] * Bsz; // set pB to point to B_ij + for (ii = 0; ii < mnp; ++ii) + pB[ii * pnp + jj] = (hxxij[ii] - hxij[ii]) * d1; + } + } + } + + free(hxij); +} + +/* + * Simple driver to sba_motstr_levmar_x for bundle adjustment on camera and structure parameters. + * + * Returns the number of iterations (>=0) if successfull, SBA_ERROR if failed + */ + +int sba_motstr_levmar( + const int n, /* number of points */ + const int ncon, /* number of points (starting from the 1st) whose parameters should not be modified. + * All B_ij (see below) with i=0) if successfull, SBA_ERROR if failed + */ + +int sba_mot_levmar( + const int n, /* number of points */ + const int m, /* number of images */ + const int mcon, /* number of images (starting from the 1st) whose parameters should not be modified. + * All A_ij (see below) with j=0) if successfull, SBA_ERROR if failed + */ + +int sba_str_levmar( + const int n, /* number of points */ + const int ncon, /* number of points (starting from the 1st) whose parameters should not be modified. + * All B_ij (see below) with i