///////////////////////////////////////////////////////////////////////////////// //// //// Expert drivers for sparse bundle adjustment based on the //// Levenberg - Marquardt minimization algorithm //// Copyright (C) 2004-2009 Manolis Lourakis (lourakis at ics forth gr) //// Institute of Computer Science, Foundation for Research & Technology - Hellas //// Heraklion, Crete, Greece. //// //// This program is free software; you can redistribute it and/or modify //// it under the terms of the GNU General Public License as published by //// the Free Software Foundation; either version 2 of the License, or //// (at your option) any later version. //// //// This program is distributed in the hope that it will be useful, //// but WITHOUT ANY WARRANTY; without even the implied warranty of //// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //// GNU General Public License for more details. //// /////////////////////////////////////////////////////////////////////////////////// #include #include #include #include #include #include "compiler.h" #include "sba.h" #include "sba_chkjac.h" #define SBA_EPSILON 1E-12 #define SBA_EPSILON_SQ ((SBA_EPSILON) * (SBA_EPSILON)) #define SBA_ONE_THIRD 0.3333333334 /* 1.0/3.0 */ #define emalloc(sz) emalloc_(__FILE__, __LINE__, sz) #define FABS(x) (((x) >= 0) ? (x) : -(x)) #define ROW_MAJOR 0 #define COLUMN_MAJOR 1 #define MAT_STORAGE COLUMN_MAJOR /* contains information necessary for computing a finite difference approximation to a jacobian, * e.g. function to differentiate, problem dimensions and pointers to working memory buffers */ struct fdj_data_x_ { void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata); /* function to differentiate */ int cnp, pnp, mnp; /* parameter numbers */ int *func_rcidxs, *func_rcsubs; /* working memory for func invocations. * Notice that this has to be different * than the working memory used for * evaluating the jacobian! */ double *hx, *hxx; /* memory to save results in */ void *adata; }; /* auxiliary memory allocation routine with error checking */ inline static void *emalloc_(char *file, int line, size_t sz) { void *ptr; ptr = (void *)malloc(sz); if (ptr == NULL) { fprintf(stderr, "SBA: memory allocation request for %zu bytes failed in file %s, line %d, exiting", sz, file, line); exit(1); } return ptr; } /* auxiliary routine for clearing an array of doubles */ inline static void _dblzero(register double *arr, register int count) { while (--count >= 0) *arr++ = 0.0; } /* auxiliary routine for computing the mean reprojection error; used for debugging */ static double sba_mean_repr_error(int n, int mnp, double *x, double *hx, struct sba_crsm *idxij, int *rcidxs, int *rcsubs) { register int i, j; int nnz, nprojs; double *ptr1, *ptr2; double err; for (i = nprojs = 0, err = 0.0; i < n; ++i) { nnz = sba_crsm_row_elmidxs(idxij, i, rcidxs, rcsubs); /* find nonzero x_ij, j=0...m-1 */ nprojs += nnz; for (j = 0; j < nnz; ++j) { /* point i projecting on camera rcsubs[j] */ ptr1 = x + idxij->val[rcidxs[j]] * mnp; ptr2 = hx + idxij->val[rcidxs[j]] * mnp; err += sqrt((ptr1[0] - ptr2[0]) * (ptr1[0] - ptr2[0]) + (ptr1[1] - ptr2[1]) * (ptr1[1] - ptr2[1])); } } return err / ((double)(nprojs)); } /* Compute e=x-y for two n-vectors x and y and return the squared L2 norm of e. * e can coincide with either x or y. * Uses loop unrolling and blocking to reduce bookkeeping overhead & pipeline * stalls and increase instruction-level parallelism; see http://www.abarnett.demon.co.uk/tutorial.html */ static double nrmL2xmy(double *const e, const double *const x, const double *const y, const int n) { const int blocksize = 8, bpwr = 3; /* 8=2^3 */ register int i; int j1, j2, j3, j4, j5, j6, j7; int blockn; register double sum0 = 0.0, sum1 = 0.0, sum2 = 0.0, sum3 = 0.0; /* n may not be divisible by blocksize, * go as near as we can first, then tidy up. */ blockn = (n >> bpwr) << bpwr; /* (n / blocksize) * blocksize; */ /* unroll the loop in blocks of `blocksize'; looping downwards gains some more speed */ for (i = blockn - 1; i > 0; i -= blocksize) { e[i] = x[i] - y[i]; sum0 += e[i] * e[i]; j1 = i - 1; e[j1] = x[j1] - y[j1]; sum1 += e[j1] * e[j1]; j2 = i - 2; e[j2] = x[j2] - y[j2]; sum2 += e[j2] * e[j2]; j3 = i - 3; e[j3] = x[j3] - y[j3]; sum3 += e[j3] * e[j3]; j4 = i - 4; e[j4] = x[j4] - y[j4]; sum0 += e[j4] * e[j4]; j5 = i - 5; e[j5] = x[j5] - y[j5]; sum1 += e[j5] * e[j5]; j6 = i - 6; e[j6] = x[j6] - y[j6]; sum2 += e[j6] * e[j6]; j7 = i - 7; e[j7] = x[j7] - y[j7]; sum3 += e[j7] * e[j7]; } /* * There may be some left to do. * This could be done as a simple for() loop, * but a switch is faster (and more interesting) */ i = blockn; if (i < n) { /* Jump into the case at the place that will allow * us to finish off the appropriate number of items. */ switch (n - i) { case 7: e[i] = x[i] - y[i]; sum0 += e[i] * e[i]; ++i; case 6: e[i] = x[i] - y[i]; sum0 += e[i] * e[i]; ++i; case 5: e[i] = x[i] - y[i]; sum0 += e[i] * e[i]; ++i; case 4: e[i] = x[i] - y[i]; sum0 += e[i] * e[i]; ++i; case 3: e[i] = x[i] - y[i]; sum0 += e[i] * e[i]; ++i; case 2: e[i] = x[i] - y[i]; sum0 += e[i] * e[i]; ++i; case 1: e[i] = x[i] - y[i]; sum0 += e[i] * e[i]; ++i; } } return sum0 + sum1 + sum2 + sum3; } /* Compute e=W*(x-y) for two n-vectors x and y and return the squared L2 norm of e. * This norm equals the squared C norm of x-y with C=W^T*W, W being block diagonal * matrix with nvis mnp*mnp blocks: e^T*e=(x-y)^T*W^T*W*(x-y)=||x-y||_C. * Note that n=nvis*mnp; e can coincide with either x or y. * * Similarly to nrmL2xmy() above, uses loop unrolling and blocking */ static double nrmCxmy(double *const e, const double *const x, const double *const y, const double *const W, const int mnp, const int nvis) { const int n = nvis * mnp; const int blocksize = 8, bpwr = 3; /* 8=2^3 */ register int i, ii, k; int j1, j2, j3, j4, j5, j6, j7; int blockn, mnpsq; register double norm, sum; register const double *Wptr, *auxptr; register double *eptr; /* n may not be divisible by blocksize, * go as near as we can first, then tidy up. */ blockn = (n >> bpwr) << bpwr; /* (n / blocksize) * blocksize; */ /* unroll the loop in blocks of `blocksize'; looping downwards gains some more speed */ for (i = blockn - 1; i > 0; i -= blocksize) { e[i] = x[i] - y[i]; j1 = i - 1; e[j1] = x[j1] - y[j1]; j2 = i - 2; e[j2] = x[j2] - y[j2]; j3 = i - 3; e[j3] = x[j3] - y[j3]; j4 = i - 4; e[j4] = x[j4] - y[j4]; j5 = i - 5; e[j5] = x[j5] - y[j5]; j6 = i - 6; e[j6] = x[j6] - y[j6]; j7 = i - 7; e[j7] = x[j7] - y[j7]; } /* * There may be some left to do. * This could be done as a simple for() loop, * but a switch is faster (and more interesting) */ i = blockn; if (i < n) { /* Jump into the case at the place that will allow * us to finish off the appropriate number of items. */ switch (n - i) { case 7: e[i] = x[i] - y[i]; ++i; case 6: e[i] = x[i] - y[i]; ++i; case 5: e[i] = x[i] - y[i]; ++i; case 4: e[i] = x[i] - y[i]; ++i; case 3: e[i] = x[i] - y[i]; ++i; case 2: e[i] = x[i] - y[i]; ++i; case 1: e[i] = x[i] - y[i]; ++i; } } /* compute w_x_ij e_ij in e and its L2 norm. * Since w_x_ij is upper triangular, the products can be safely saved * directly in e_ij, without the need for intermediate storage */ mnpsq = mnp * mnp; /* Wptr, eptr point to w_x_ij, e_ij below */ for (i = 0, Wptr = W, eptr = e, norm = 0.0; i < nvis; ++i, Wptr += mnpsq, eptr += mnp) { for (ii = 0, auxptr = Wptr; ii < mnp; ++ii, auxptr += mnp) { /* auxptr=Wptr+ii*mnp */ for (k = ii, sum = 0.0; k < mnp; ++k) // k>=ii since w_x_ij is upper triangular sum += auxptr[k] * eptr[k]; // Wptr[ii*mnp+k]*eptr[k]; eptr[ii] = sum; norm += sum * sum; } } return norm; } /* search for & print image projection components that are infinite; useful for identifying errors */ static void sba_print_inf(double *hx, int nimgs, int mnp, struct sba_crsm *idxij, int *rcidxs, int *rcsubs) { register int i, j, k; int nnz; double *phxij; for (j = 0; j < nimgs; ++j) { nnz = sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero hx_ij, i=0...n-1 */ for (i = 0; i < nnz; ++i) { phxij = hx + idxij->val[rcidxs[i]] * mnp; for (k = 0; k < mnp; ++k) if (!SBA_FINITE(phxij[k])) printf("SBA: component %d of the estimated projection of point %d on camera %d is invalid!\n", k, rcsubs[i], j); } } } /* Given a parameter vector p made up of the 3D coordinates of n points and the parameters of m cameras, compute in * jac the jacobian of the predicted measurements, i.e. the jacobian of the projections of 3D points in the m images. * The jacobian is approximated with the aid of finite differences and is returned in the order * (A_11, B_11, ..., A_1m, B_1m, ..., A_n1, B_n1, ..., A_nm, B_nm), * where A_ij=dx_ij/da_j and B_ij=dx_ij/db_i (see HZ). * Notice that depending on idxij, some of the A_ij, B_ij might be missing * * Problem-specific information is assumed to be stored in a structure pointed to by "dat". * * NOTE: The jacobian (for n=4, m=3) in matrix form has the following structure: * A_11 0 0 B_11 0 0 0 * 0 A_12 0 B_12 0 0 0 * 0 0 A_13 B_13 0 0 0 * A_21 0 0 0 B_21 0 0 * 0 A_22 0 0 B_22 0 0 * 0 0 A_23 0 B_23 0 0 * A_31 0 0 0 0 B_31 0 * 0 A_32 0 0 0 B_32 0 * 0 0 A_33 0 0 B_33 0 * A_41 0 0 0 0 0 B_41 * 0 A_42 0 0 0 0 B_42 * 0 0 A_43 0 0 0 B_43 * To reduce the total number of objective function evaluations, this structure can be * exploited as follows: A certain d is added to the j-th parameters of all cameras and * the objective function is evaluated at the resulting point. This evaluation suffices * to compute the corresponding columns of *all* A_ij through finite differences. A similar * strategy allows the computation of the B_ij. Overall, only cnp+pnp+1 objective function * evaluations are needed to compute the jacobian, much fewer compared to the m*cnp+n*pnp+1 * that would be required by the naive strategy of computing one column of the jacobian * per function evaluation. See Nocedal-Wright, ch. 7, pp. 169. Although this approach is * much faster compared to the naive strategy, it is not preferable to analytic jacobians, * since the latter are considerably faster to compute and result in fewer LM iterations. */ static void sba_fdjac_x(double *p, /* I: current parameter estimate, (m*cnp+n*pnp)x1 */ struct sba_crsm *idxij, /* I: sparse matrix containing the location of x_ij in hx */ int *rcidxs, /* work array for the indexes of nonzero elements of a single sparse matrix row/column */ int *rcsubs, /* work array for the subscripts of nonzero elements in a single sparse matrix row/column */ double *jac, /* O: array for storing the approximated jacobian */ void *dat) /* I: points to a "fdj_data_x_" structure */ { register int i, j, ii, jj; double *pa, *pb, *pqr, *ppt; register double *pAB, *phx, *phxx; int n, m, nm, nnz, Asz, Bsz, ABsz, idx; double *tmpd; register double d; struct fdj_data_x_ *fdjd; void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata); double *hx, *hxx; int cnp, pnp, mnp; void *adata; /* retrieve problem-specific information passed in *dat */ fdjd = (struct fdj_data_x_ *)dat; func = fdjd->func; cnp = fdjd->cnp; pnp = fdjd->pnp; mnp = fdjd->mnp; hx = fdjd->hx; hxx = fdjd->hxx; adata = fdjd->adata; n = idxij->nr; m = idxij->nc; pa = p; pb = p + m * cnp; Asz = mnp * cnp; Bsz = mnp * pnp; ABsz = Asz + Bsz; nm = (n >= m) ? n : m; // max(n, m); tmpd = (double *)emalloc(nm * sizeof(double)); (*func)(p, idxij, fdjd->func_rcidxs, fdjd->func_rcsubs, hx, adata); // evaluate supplied function on current solution if (cnp) { // is motion varying? /* compute A_ij */ for (jj = 0; jj < cnp; ++jj) { for (j = 0; j < m; ++j) { pqr = pa + j * cnp; // j-th camera parameters /* determine d=max(SBA_DELTA_SCALE*|pqr[jj]|, SBA_MIN_DELTA), see HZ */ d = (double)(SBA_DELTA_SCALE)*pqr[jj]; // force evaluation d = FABS(d); if (d < SBA_MIN_DELTA) d = SBA_MIN_DELTA; tmpd[j] = d; pqr[jj] += d; } (*func)(p, idxij, fdjd->func_rcidxs, fdjd->func_rcsubs, hxx, adata); for (j = 0; j < m; ++j) { pqr = pa + j * cnp; // j-th camera parameters pqr[jj] -= tmpd[j]; /* restore */ d = 1.0 / tmpd[j]; /* invert so that divisions can be carried out faster as multiplications */ nnz = sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero A_ij, i=0...n-1 */ for (i = 0; i < nnz; ++i) { idx = idxij->val[rcidxs[i]]; phx = hx + idx * mnp; // set phx to point to hx_ij phxx = hxx + idx * mnp; // set phxx to point to hxx_ij pAB = jac + idx * ABsz; // set pAB to point to A_ij for (ii = 0; ii < mnp; ++ii) pAB[ii * cnp + jj] = (phxx[ii] - phx[ii]) * d; } } } } if (pnp) { // is structure varying? /* compute B_ij */ for (jj = 0; jj < pnp; ++jj) { for (i = 0; i < n; ++i) { ppt = pb + i * pnp; // i-th point parameters /* determine d=max(SBA_DELTA_SCALE*|ppt[jj]|, SBA_MIN_DELTA), see HZ */ d = (double)(SBA_DELTA_SCALE)*ppt[jj]; // force evaluation d = FABS(d); if (d < SBA_MIN_DELTA) d = SBA_MIN_DELTA; tmpd[i] = d; ppt[jj] += d; } (*func)(p, idxij, fdjd->func_rcidxs, fdjd->func_rcsubs, hxx, adata); for (i = 0; i < n; ++i) { ppt = pb + i * pnp; // i-th point parameters ppt[jj] -= tmpd[i]; /* restore */ d = 1.0 / tmpd[i]; /* invert so that divisions can be carried out faster as multiplications */ nnz = sba_crsm_row_elmidxs(idxij, i, rcidxs, rcsubs); /* find nonzero B_ij, j=0...m-1 */ for (j = 0; j < nnz; ++j) { idx = idxij->val[rcidxs[j]]; phx = hx + idx * mnp; // set phx to point to hx_ij phxx = hxx + idx * mnp; // set phxx to point to hxx_ij pAB = jac + idx * ABsz + Asz; // set pAB to point to B_ij for (ii = 0; ii < mnp; ++ii) pAB[ii * pnp + jj] = (phxx[ii] - phx[ii]) * d; } } } } free(tmpd); } typedef int (*PLS)(double *A, double *B, double *x, int m, int iscolmaj); /* Bundle adjustment on camera and structure parameters * using the sparse Levenberg-Marquardt as described in HZ p. 568 * * Returns the number of iterations (>=0) if successfull, SBA_ERROR if failed */ int sba_motstr_levmar_x( const int n, /* number of points */ const int ncon, /* number of points (starting from the 1st) whose parameters should not be modified. * All B_ij (see below) with i maxCvis) maxCvis = k; /* find the maximum number (for all points) of visible image projections in any single camera */ for (j = maxPvis = 0; j < m; ++j) { for (i = ii = 0; i < n; ++i) if (vmask[i * m + j]) ++ii; if (ii > maxPvis) maxPvis = ii; } maxCPvis = (maxCvis >= maxPvis) ? maxCvis : maxPvis; #if 0 /* determine the density of blocks in matrix S */ for(j=mcon, ii=0; j= ABsz) ? Wsz : ABsz) + Wsz) * sizeof(double)); U = (double *)emalloc(m * Usz * sizeof(double)); V = (double *)emalloc(n * Vsz * sizeof(double)); e = (double *)emalloc(nobs * sizeof(double)); eab = (double *)emalloc(nvars * sizeof(double)); E = (double *)emalloc(m * cnp * sizeof(double)); Yj = (double *)emalloc(maxPvis * Ysz * sizeof(double)); YWt = (double *)emalloc(YWtsz * sizeof(double)); S = (double *)emalloc(m * m * Sblsz * sizeof(double)); dp = (double *)emalloc(nvars * sizeof(double)); Wtda = (double *)emalloc(pnp * sizeof(double)); rcidxs = (int *)emalloc(maxCPvis * sizeof(int)); rcsubs = (int *)emalloc(maxCPvis * sizeof(int)); #ifndef SBA_DESTROY_COVS if (covx != NULL) wght = (double *)emalloc(nvis * covsz * sizeof(double)); #else if (covx != NULL) wght = covx; #endif /* SBA_DESTROY_COVS */ hx = (double *)emalloc(nobs * sizeof(double)); diagUV = (double *)emalloc(nvars * sizeof(double)); pdp = (double *)emalloc(nvars * sizeof(double)); /* to save resources, W and jac share the same memory: First, the jacobian * is computed in some working memory that is then overwritten during the * computation of W. To account for the case of W being larger than jac, * extra memory is reserved "before" jac. * Care must be taken, however, to ensure that storing a certain W_ij * does not overwrite the A_ij, B_ij used to compute it. To achieve * this is, note that if p1 and p2 respectively point to the first elements * of a certain W_ij and A_ij, B_ij pair, we should have p2-p1>=Wsz. * There are two cases: * a) Wsz>=ABsz: Then p1=W+k*Wsz and p2=jac+k*ABsz=W+Wsz+nvis*(Wsz-ABsz)+k*ABsz * for some k (0<=k Wsz for all 0<=k Wsz for all 0<=k ABsz) ? nvis * (Wsz - ABsz) : 0); /* set up auxiliary pointers */ // pa = p; // pb = p + m * cnp; ea = eab; eb = eab + m * cnp; dpa = dp; dpb = dp + m * cnp; diagU = diagUV; diagV = diagUV + m * cnp; /* if no jacobian function is supplied, prepare to compute jacobian with finite difference */ if (!fjac) { fdj_data.func = func; fdj_data.cnp = cnp; fdj_data.pnp = pnp; fdj_data.mnp = mnp; fdj_data.hx = hx; fdj_data.hxx = (double *)emalloc(nobs * sizeof(double)); fdj_data.func_rcidxs = (int *)emalloc(2 * maxCPvis * sizeof(int)); fdj_data.func_rcsubs = fdj_data.func_rcidxs + maxCPvis; fdj_data.adata = adata; fjac = sba_fdjac_x; jac_adata = (void *)&fdj_data; } else { fdj_data.hxx = NULL; jac_adata = adata; } if (itmax == 0) { /* verify jacobian */ sba_motstr_chkjac_x(func, fjac, p, &idxij, rcidxs, rcsubs, ncon, mcon, cnp, pnp, mnp, adata, jac_adata); retval = 0; goto freemem_and_return; } /* covariances Sigma_x_ij are accommodated by computing the Cholesky decompositions of their * inverses and using the resulting matrices w_x_ij to weigh A_ij, B_ij, and e_ij as w_x_ij A_ij, * w_x_ij*B_ij and w_x_ij*e_ij. In this way, auxiliary variables as U_j=\sum_i A_ij^T A_ij * actually become \sum_i (w_x_ij A_ij)^T w_x_ij A_ij= \sum_i A_ij^T w_x_ij^T w_x_ij A_ij = * A_ij^T Sigma_x_ij^-1 A_ij * * ea_j, V_i, eb_i, W_ij are weighted in a similar manner */ if (covx != NULL) { for (i = 0; i < n; ++i) { nnz = sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero x_ij, j=0...m-1 */ for (j = 0; j < nnz; ++j) { /* set ptr1, ptr2 to point to cov_x_ij, w_x_ij resp. */ ptr1 = covx + (k = idxij.val[rcidxs[j]] * covsz); ptr2 = wght + k; if (!sba_mat_cholinv(ptr1, ptr2, mnp)) { /* compute w_x_ij s.t. w_x_ij^T w_x_ij = cov_x_ij^-1 */ fprintf(stderr, "SBA: invalid covariance matrix for x_ij (i=%d, j=%d) in sba_motstr_levmar_x()\n", i, rcsubs[j]); retval = SBA_ERROR; goto freemem_and_return; } } } sba_mat_cholinv(NULL, NULL, 0); /* cleanup */ } /* compute the error vectors e_ij in hx */ (*func)(p, &idxij, rcidxs, rcsubs, hx, adata); nfev = 1; /* ### compute e=x - f(p) [e=w*(x - f(p)] and its L2 norm */ if (covx == NULL) p_eL2 = nrmL2xmy(e, x, hx, nobs); /* e=x-hx, p_eL2=||e|| */ else p_eL2 = nrmCxmy(e, x, hx, wght, mnp, nvis); /* e=wght*(x-hx), p_eL2=||e||=||x-hx||_Sigma^-1 */ if (verbose) printf("initial motstr-SBA error %g [%g]\n", p_eL2, p_eL2 / nvis); init_p_eL2 = p_eL2; if (!SBA_FINITE(p_eL2)) stop = 7; for (itno = 0; itno < itmax && !stop; ++itno) { /* Note that p, e and ||e||_2 have been updated at the previous iteration */ /* compute derivative submatrices A_ij, B_ij */ (*fjac)(p, &idxij, rcidxs, rcsubs, jac, jac_adata); ++njev; if (covx != NULL) { /* compute w_x_ij A_ij and w_x_ij B_ij. * Since w_x_ij is upper triangular, the products can be safely saved * directly in A_ij, B_ij, without the need for intermediate storage */ for (i = 0; i < nvis; ++i) { /* set ptr1, ptr2, ptr3 to point to w_x_ij, A_ij, B_ij, resp. */ ptr1 = wght + i * covsz; ptr2 = jac + i * ABsz; ptr3 = ptr2 + Asz; // ptr3=jac + i*ABsz + Asz; /* w_x_ij is mnp x mnp, A_ij is mnp x cnp, B_ij is mnp x pnp * NOTE: Jamming the outter (i.e., ii) loops did not run faster! */ /* A_ij */ for (ii = 0; ii < mnp; ++ii) for (jj = 0; jj < cnp; ++jj) { for (k = ii, sum = 0.0; k < mnp; ++k) // k>=ii since w_x_ij is upper triangular sum += ptr1[ii * mnp + k] * ptr2[k * cnp + jj]; ptr2[ii * cnp + jj] = sum; } /* B_ij */ for (ii = 0; ii < mnp; ++ii) for (jj = 0; jj < pnp; ++jj) { for (k = ii, sum = 0.0; k < mnp; ++k) // k>=ii since w_x_ij is upper triangular sum += ptr1[ii * mnp + k] * ptr3[k * pnp + jj]; ptr3[ii * pnp + jj] = sum; } } } /* compute U_j = \sum_i A_ij^T A_ij */ // \Sigma here! /* U_j is symmetric, therefore its computation can be sped up by * computing only the upper part and then reusing it for the lower one. * Recall that A_ij is mnp x cnp */ /* Also compute ea_j = \sum_i A_ij^T e_ij */ // \Sigma here! /* Recall that e_ij is mnp x 1 */ _dblzero(U, m * Usz); /* clear all U_j */ _dblzero(ea, m * easz); /* clear all ea_j */ for (j = mcon; j < m; ++j) { ptr1 = U + j * Usz; // set ptr1 to point to U_j ptr2 = ea + j * easz; // set ptr2 to point to ea_j nnz = sba_crsm_col_elmidxs(&idxij, j, rcidxs, rcsubs); /* find nonzero A_ij, i=0...n-1 */ for (i = 0; i < nnz; ++i) { /* set ptr3 to point to A_ij, actual row number in rcsubs[i] */ ptr3 = jac + idxij.val[rcidxs[i]] * ABsz; /* compute the UPPER TRIANGULAR PART of A_ij^T A_ij and add it to U_j */ for (ii = 0; ii < cnp; ++ii) { for (jj = ii; jj < cnp; ++jj) { for (k = 0, sum = 0.0; k < mnp; ++k) sum += ptr3[k * cnp + ii] * ptr3[k * cnp + jj]; ptr1[ii * cnp + jj] += sum; } /* copy the LOWER TRIANGULAR PART of U_j from the upper one */ for (jj = 0; jj < ii; ++jj) ptr1[ii * cnp + jj] = ptr1[jj * cnp + ii]; } ptr4 = e + idxij.val[rcidxs[i]] * esz; /* set ptr4 to point to e_ij */ /* compute A_ij^T e_ij and add it to ea_j */ for (ii = 0; ii < cnp; ++ii) { for (jj = 0, sum = 0.0; jj < mnp; ++jj) sum += ptr3[jj * cnp + ii] * ptr4[jj]; ptr2[ii] += sum; } } } /* compute V_i = \sum_j B_ij^T B_ij */ // \Sigma here! /* V_i is symmetric, therefore its computation can be sped up by * computing only the upper part and then reusing it for the lower one. * Recall that B_ij is mnp x pnp */ /* Also compute eb_i = \sum_j B_ij^T e_ij */ // \Sigma here! /* Recall that e_ij is mnp x 1 */ _dblzero(V, n * Vsz); /* clear all V_i */ _dblzero(eb, n * ebsz); /* clear all eb_i */ for (i = ncon; i < n; ++i) { ptr1 = V + i * Vsz; // set ptr1 to point to V_i ptr2 = eb + i * ebsz; // set ptr2 to point to eb_i nnz = sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero B_ij, j=0...m-1 */ for (j = 0; j < nnz; ++j) { /* set ptr3 to point to B_ij, actual column number in rcsubs[j] */ ptr3 = jac + idxij.val[rcidxs[j]] * ABsz + Asz; /* compute the UPPER TRIANGULAR PART of B_ij^T B_ij and add it to V_i */ for (ii = 0; ii < pnp; ++ii) { for (jj = ii; jj < pnp; ++jj) { for (k = 0, sum = 0.0; k < mnp; ++k) sum += ptr3[k * pnp + ii] * ptr3[k * pnp + jj]; ptr1[ii * pnp + jj] += sum; } } ptr4 = e + idxij.val[rcidxs[j]] * esz; /* set ptr4 to point to e_ij */ /* compute B_ij^T e_ij and add it to eb_i */ for (ii = 0; ii < pnp; ++ii) { for (jj = 0, sum = 0.0; jj < mnp; ++jj) sum += ptr3[jj * pnp + ii] * ptr4[jj]; ptr2[ii] += sum; } } } /* compute W_ij = A_ij^T B_ij */ // \Sigma here! /* Recall that A_ij is mnp x cnp and B_ij is mnp x pnp */ for (i = ncon; i < n; ++i) { nnz = sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero W_ij, j=0...m-1 */ for (j = 0; j < nnz; ++j) { /* set ptr1 to point to W_ij, actual column number in rcsubs[j] */ ptr1 = W + idxij.val[rcidxs[j]] * Wsz; if (rcsubs[j] < mcon) { /* A_ij is zero */ //_dblzero(ptr1, Wsz); /* clear W_ij */ continue; } /* set ptr2 & ptr3 to point to A_ij & B_ij resp. */ ptr2 = jac + idxij.val[rcidxs[j]] * ABsz; ptr3 = ptr2 + Asz; /* compute A_ij^T B_ij and store it in W_ij * Recall that storage for A_ij, B_ij does not overlap with that for W_ij, * see the comments related to the initialization of jac above */ /* assert(ptr2-ptr1>=Wsz); */ for (ii = 0; ii < cnp; ++ii) for (jj = 0; jj < pnp; ++jj) { for (k = 0, sum = 0.0; k < mnp; ++k) sum += ptr2[k * cnp + ii] * ptr3[k * pnp + jj]; ptr1[ii * pnp + jj] = sum; } } } /* Compute ||J^T e||_inf and ||p||^2 */ for (i = 0, p_L2 = eab_inf = 0.0; i < nvars; ++i) { if (eab_inf < (tmp = FABS(eab[i]))) eab_inf = tmp; p_L2 += p[i] * p[i]; } // p_L2=sqrt(p_L2); /* save diagonal entries so that augmentation can be later canceled. * Diagonal entries are in U_j and V_i */ for (j = mcon; j < m; ++j) { ptr1 = U + j * Usz; // set ptr1 to point to U_j ptr2 = diagU + j * cnp; // set ptr2 to point to diagU_j for (i = 0; i < cnp; ++i) ptr2[i] = ptr1[i * cnp + i]; } for (i = ncon; i < n; ++i) { ptr1 = V + i * Vsz; // set ptr1 to point to V_i ptr2 = diagV + i * pnp; // set ptr2 to point to diagV_i for (j = 0; j < pnp; ++j) ptr2[j] = ptr1[j * pnp + j]; } /* if(!(itno%100)){ printf("Current estimate: "); for(i=0; i tmp) tmp = diagUV[i]; for (i = m * cnp + ncon * pnp; i < nvars; ++i) /* tmp is not re-initialized! */ if (diagUV[i] > tmp) tmp = diagUV[i]; mu = tau * tmp; } /* determine increment using adaptive damping */ while (1) { /* augment U, V */ for (j = mcon; j < m; ++j) { ptr1 = U + j * Usz; // set ptr1 to point to U_j for (i = 0; i < cnp; ++i) ptr1[i * cnp + i] += mu; } for (i = ncon; i < n; ++i) { ptr1 = V + i * Vsz; // set ptr1 to point to V_i for (j = 0; j < pnp; ++j) ptr1[j * pnp + j] += mu; /* compute V*_i^-1. * Recall that only the upper triangle of the symmetric pnp x pnp matrix V*_i * is stored in ptr1; its (also symmetric) inverse is saved in the lower triangle of ptr1 */ /* inverting V*_i with LDLT seems to result in faster overall execution compared to when using LU or * Cholesky */ // j=sba_symat_invert_LU(ptr1, pnp); matinv=sba_symat_invert_LU; // j=sba_symat_invert_Chol(ptr1, pnp); matinv=sba_symat_invert_Chol; j = sba_symat_invert_BK(ptr1, pnp); matinv = sba_symat_invert_BK; if (!j) { fprintf(stderr, "SBA: singular matrix V*_i (i=%d) in sba_motstr_levmar_x(), increasing damping\n", i); goto moredamping; // increasing damping will eventually make V*_i diagonally dominant, thus // nonsingular // retval=SBA_ERROR; // goto freemem_and_return; } } _dblzero(E, m * easz); /* clear all e_j */ /* compute the mmcon x mmcon block matrix S and e_j */ /* Note that S is symmetric, therefore its computation can be * sped up by computing only the upper part and then reusing * it for the lower one. */ for (j = mcon; j < m; ++j) { int mmconxUsz = mmcon * Usz; nnz = sba_crsm_col_elmidxs(&idxij, j, rcidxs, rcsubs); /* find nonzero Y_ij, i=0...n-1 */ /* get rid of all Y_ij with i= ncon */ if (ncon) { for (i = ii = 0; i < nnz; ++i) { if (rcsubs[i] >= ncon) { rcidxs[ii] = rcidxs[i]; rcsubs[ii++] = rcsubs[i]; } } nnz = ii; } /* compute all Y_ij = W_ij (V*_i)^-1 for a *fixed* j. * To save memory, the block matrix consisting of the Y_ij * is not stored. Instead, only a block column of this matrix * is computed & used at each time: For each j, all nonzero * Y_ij are computed in Yj and then used in the calculations * involving S_jk and e_j. * Recall that W_ij is cnp x pnp and (V*_i) is pnp x pnp */ for (i = 0; i < nnz; ++i) { /* set ptr3 to point to (V*_i)^-1, actual row number in rcsubs[i] */ ptr3 = V + rcsubs[i] * Vsz; /* set ptr1 to point to Y_ij, actual row number in rcsubs[i] */ ptr1 = Yj + i * Ysz; /* set ptr2 to point to W_ij resp. */ ptr2 = W + idxij.val[rcidxs[i]] * Wsz; /* compute W_ij (V*_i)^-1 and store it in Y_ij. * Recall that only the lower triangle of (V*_i)^-1 is stored */ for (ii = 0; ii < cnp; ++ii) { ptr4 = ptr2 + ii * pnp; for (jj = 0; jj < pnp; ++jj) { for (k = 0, sum = 0.0; k <= jj; ++k) sum += ptr4[k] * ptr3[jj * pnp + k]; // ptr2[ii*pnp+k]*ptr3[jj*pnp+k]; for (; k < pnp; ++k) sum += ptr4[k] * ptr3[k * pnp + jj]; // ptr2[ii*pnp+k]*ptr3[k*pnp+jj]; ptr1[ii * pnp + jj] = sum; } } } /* compute the UPPER TRIANGULAR PART of S */ for (k = j; k < m; ++k) { // j>=mcon /* compute \sum_i Y_ij W_ik^T in YWt. Note that for an off-diagonal block defined by j, k * YWt (and thus S_jk) is nonzero only if there exists a point that is visible in both the * j-th and k-th images */ /* Recall that Y_ij is cnp x pnp and W_ik is cnp x pnp */ _dblzero(YWt, YWtsz); /* clear YWt */ for (i = 0; i < nnz; ++i) { register double *pYWt; /* find the min and max column indices of the elements in row i (actually rcsubs[i]) * and make sure that k falls within them. This test handles W_ik's which are * certain to be zero without bothering to call sba_crsm_elmidx() */ ii = idxij.colidx[idxij.rowptr[rcsubs[i]]]; jj = idxij.colidx[idxij.rowptr[rcsubs[i] + 1] - 1]; if (k < ii || k > jj) continue; /* W_ik == 0 */ /* set ptr2 to point to W_ik */ l = sba_crsm_elmidxp(&idxij, rcsubs[i], k, j, rcidxs[i]); // l=sba_crsm_elmidx(&idxij, rcsubs[i], k); if (l == -1) continue; /* W_ik == 0 */ ptr2 = W + idxij.val[l] * Wsz; /* set ptr1 to point to Y_ij, actual row number in rcsubs[i] */ ptr1 = Yj + i * Ysz; for (ii = 0; ii < cnp; ++ii) { ptr3 = ptr1 + ii * pnp; pYWt = YWt + ii * cnp; for (jj = 0; jj < cnp; ++jj) { ptr4 = ptr2 + jj * pnp; for (l = 0, sum = 0.0; l < pnp; ++l) sum += ptr3[l] * ptr4[l]; // ptr1[ii*pnp+l]*ptr2[jj*pnp+l]; pYWt[jj] += sum; // YWt[ii*cnp+jj]+=sum; } } } /* since the linear system involving S is solved with lapack, * it is preferable to store S in column major (i.e. fortran) * order, so as to avoid unecessary transposing/copying. */ #if MAT_STORAGE == COLUMN_MAJOR ptr2 = S + (k - mcon) * mmconxUsz + (j - mcon) * cnp; // set ptr2 to point to the beginning of block j,k in S #else ptr2 = S + (j - mcon) * mmconxUsz + (k - mcon) * cnp; // set ptr2 to point to the beginning of block j,k in S #endif if (j != k) { /* Kronecker */ for (ii = 0; ii < cnp; ++ii, ptr2 += Sdim) for (jj = 0; jj < cnp; ++jj) ptr2[jj] = #if MAT_STORAGE == COLUMN_MAJOR -YWt[jj * cnp + ii]; #else -YWt[ii * cnp + jj]; #endif } else { ptr1 = U + j * Usz; // set ptr1 to point to U_j for (ii = 0; ii < cnp; ++ii, ptr2 += Sdim) for (jj = 0; jj < cnp; ++jj) ptr2[jj] = #if MAT_STORAGE == COLUMN_MAJOR ptr1[jj * cnp + ii] - YWt[jj * cnp + ii]; #else ptr1[ii * cnp + jj] - YWt[ii * cnp + jj]; #endif } } /* copy the LOWER TRIANGULAR PART of S from the upper one */ for (k = mcon; k < j; ++k) { #if MAT_STORAGE == COLUMN_MAJOR ptr1 = S + (k - mcon) * mmconxUsz + (j - mcon) * cnp; // set ptr1 to point to the beginning of block j,k in S ptr2 = S + (j - mcon) * mmconxUsz + (k - mcon) * cnp; // set ptr2 to point to the beginning of block k,j in S #else ptr1 = S + (j - mcon) * mmconxUsz + (k - mcon) * cnp; // set ptr1 to point to the beginning of block j,k in S ptr2 = S + (k - mcon) * mmconxUsz + (j - mcon) * cnp; // set ptr2 to point to the beginning of block k,j in S #endif for (ii = 0; ii < cnp; ++ii, ptr1 += Sdim) for (jj = 0, ptr3 = ptr2 + ii; jj < cnp; ++jj, ptr3 += Sdim) ptr1[jj] = *ptr3; } /* compute e_j=ea_j - \sum_i Y_ij eb_i */ /* Recall that Y_ij is cnp x pnp and eb_i is pnp x 1 */ ptr1 = E + j * easz; // set ptr1 to point to e_j for (i = 0; i < nnz; ++i) { /* set ptr2 to point to Y_ij, actual row number in rcsubs[i] */ ptr2 = Yj + i * Ysz; /* set ptr3 to point to eb_i */ ptr3 = eb + rcsubs[i] * ebsz; for (ii = 0; ii < cnp; ++ii) { ptr4 = ptr2 + ii * pnp; for (jj = 0, sum = 0.0; jj < pnp; ++jj) sum += ptr4[jj] * ptr3[jj]; // ptr2[ii*pnp+jj]*ptr3[jj]; ptr1[ii] += sum; } } ptr2 = ea + j * easz; // set ptr2 to point to ea_j for (i = 0; i < easz; ++i) ptr1[i] = ptr2[i] - ptr1[i]; } #if 0 if(verbose>1){ /* count the nonzeros in S */ for(i=ii=0; i= (p_L2 + eps2) / SBA_EPSILON_SQ) { /* almost singular */ // if(dp_L2>=(p_L2+eps2)/SBA_EPSILON){ /* almost singular */ fprintf(stderr, "SBA: the matrix of the augmented normal equations is almost singular in " "sba_motstr_levmar_x(),\n" " minimization should be restarted from the current solution with an increased " "damping term\n"); retval = SBA_ERROR; goto freemem_and_return; } (*func)(pdp, &idxij, rcidxs, rcsubs, hx, adata); ++nfev; /* evaluate function at p + dp */ if (verbose > 1) printf("mean reprojection error %g\n", sba_mean_repr_error(n, mnp, x, hx, &idxij, rcidxs, rcsubs)); /* ### compute ||e(pdp)||_2 */ if (covx == NULL) pdp_eL2 = nrmL2xmy(hx, x, hx, nobs); /* hx=x-hx, pdp_eL2=||hx|| */ else pdp_eL2 = nrmCxmy(hx, x, hx, wght, mnp, nvis); /* hx=wght*(x-hx), pdp_eL2=||hx|| */ if (!SBA_FINITE(pdp_eL2)) { if (verbose) /* identify the offending point projection */ sba_print_inf(hx, m, mnp, &idxij, rcidxs, rcsubs); stop = 7; break; } for (i = 0, dL = 0.0; i < nvars; ++i) dL += dp[i] * (mu * dp[i] + eab[i]); dF = p_eL2 - pdp_eL2; if (verbose > 1) printf("\ndamping term %8g, gain ratio %8g, errors %8g / %8g = %g\n", mu, dL != 0.0 ? dF / dL : dF / DBL_EPSILON, p_eL2 / nvis, pdp_eL2 / nvis, p_eL2 / pdp_eL2); if (dL > 0.0 && dF > 0.0) { /* reduction in error, increment is accepted */ tmp = (2.0 * dF / dL - 1.0); tmp = 1.0 - tmp * tmp * tmp; mu = mu * ((tmp >= SBA_ONE_THIRD) ? tmp : SBA_ONE_THIRD); nu = 2; /* the test below is equivalent to the relative reduction of the RMS reprojection error: * sqrt(p_eL2)-sqrt(pdp_eL2)= itmax) stop = 3; /* restore U, V diagonal entries */ for (j = mcon; j < m; ++j) { ptr1 = U + j * Usz; // set ptr1 to point to U_j ptr2 = diagU + j * cnp; // set ptr2 to point to diagU_j for (i = 0; i < cnp; ++i) ptr1[i * cnp + i] = ptr2[i]; } for (i = ncon; i < n; ++i) { ptr1 = V + i * Vsz; // set ptr1 to point to V_i ptr2 = diagV + i * pnp; // set ptr2 to point to diagV_i for (j = 0; j < pnp; ++j) ptr1[j * pnp + j] = ptr2[j]; } if (info) { info[0] = init_p_eL2; info[1] = p_eL2; info[2] = eab_inf; info[3] = dp_L2; for (j = mcon, tmp = DBL_MIN; j < m; ++j) { ptr1 = U + j * Usz; // set ptr1 to point to U_j for (i = 0; i < cnp; ++i) if (tmp < ptr1[i * cnp + i]) tmp = ptr1[i * cnp + i]; } for (i = ncon; i < n; ++i) { ptr1 = V + i * Vsz; // set ptr1 to point to V_i for (j = 0; j < pnp; ++j) if (tmp < ptr1[j * pnp + j]) tmp = ptr1[j * pnp + j]; } info[4] = mu / tmp; info[5] = itno; info[6] = stop; info[7] = nfev; info[8] = njev; info[9] = nlss; } // sba_print_sol(n, m, p, cnp, pnp, x, mnp, &idxij, rcidxs, rcsubs); retval = (stop != 7) ? itno : SBA_ERROR; freemem_and_return: /* NOTE: this point is also reached via a goto! */ /* free whatever was allocated */ free(W); free(U); free(V); free(e); free(eab); free(E); free(Yj); free(YWt); free(S); free(dp); free(Wtda); free(rcidxs); free(rcsubs); #ifndef SBA_DESTROY_COVS if (wght) free(wght); #else /* nothing to do */ #endif /* SBA_DESTROY_COVS */ free(hx); free(diagUV); free(pdp); if (fdj_data.hxx) { // cleanup free(fdj_data.hxx); free(fdj_data.func_rcidxs); } sba_crsm_free(&idxij); /* free the memory allocated by the matrix inversion & linear solver routines */ if (matinv) (*matinv)(NULL, 0); if (linsolver) (*linsolver)(NULL, NULL, NULL, 0, 0); return retval; } /* Bundle adjustment on camera parameters only * using the sparse Levenberg-Marquardt as described in HZ p. 568 * * Returns the number of iterations (>=0) if successfull, SBA_ERROR if failed */ int sba_mot_levmar_x( const int n, /* number of points */ const int m, /* number of images */ const int mcon, /* number of images (starting from the 1st) whose parameters should not be modified. * All A_ij (see below) with j maxCPvis) maxCPvis = k; /* points, note that maxCPvis is not reinitialized! */ for (j = 0; j < m; ++j) { for (i = ii = 0; i < n; ++i) if (vmask[i * m + j]) ++ii; if (ii > maxCPvis) maxCPvis = ii; } /* allocate work arrays */ jac = (double *)emalloc(nvis * Asz * sizeof(double)); U = (double *)emalloc(m * Usz * sizeof(double)); e = (double *)emalloc(nobs * sizeof(double)); ea = (double *)emalloc(nvars * sizeof(double)); dp = (double *)emalloc(nvars * sizeof(double)); rcidxs = (int *)emalloc(maxCPvis * sizeof(int)); rcsubs = (int *)emalloc(maxCPvis * sizeof(int)); #ifndef SBA_DESTROY_COVS if (covx != NULL) wght = (double *)emalloc(nvis * covsz * sizeof(double)); #else if (covx != NULL) wght = covx; #endif /* SBA_DESTROY_COVS */ hx = (double *)emalloc(nobs * sizeof(double)); diagU = (double *)emalloc(nvars * sizeof(double)); pdp = (double *)emalloc(nvars * sizeof(double)); /* if no jacobian function is supplied, prepare to compute jacobian with finite difference */ if (!fjac) { fdj_data.func = func; fdj_data.cnp = cnp; fdj_data.pnp = 0; fdj_data.mnp = mnp; fdj_data.hx = hx; fdj_data.hxx = (double *)emalloc(nobs * sizeof(double)); fdj_data.func_rcidxs = (int *)emalloc(2 * maxCPvis * sizeof(int)); fdj_data.func_rcsubs = fdj_data.func_rcidxs + maxCPvis; fdj_data.adata = adata; fjac = sba_fdjac_x; jac_adata = (void *)&fdj_data; } else { fdj_data.hxx = NULL; jac_adata = adata; } if (itmax == 0) { /* verify jacobian */ sba_mot_chkjac_x(func, fjac, p, &idxij, rcidxs, rcsubs, mcon, cnp, mnp, adata, jac_adata); retval = 0; goto freemem_and_return; } /* covariances Sigma_x_ij are accommodated by computing the Cholesky decompositions of their * inverses and using the resulting matrices w_x_ij to weigh A_ij and e_ij as w_x_ij A_ij * and w_x_ij*e_ij. In this way, auxiliary variables as U_j=\sum_i A_ij^T A_ij * actually become \sum_i (w_x_ij A_ij)^T w_x_ij A_ij= \sum_i A_ij^T w_x_ij^T w_x_ij A_ij = * A_ij^T Sigma_x_ij^-1 A_ij * * ea_j are weighted in a similar manner */ if (covx != NULL) { for (i = 0; i < n; ++i) { nnz = sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero x_ij, j=0...m-1 */ for (j = 0; j < nnz; ++j) { /* set ptr1, ptr2 to point to cov_x_ij, w_x_ij resp. */ ptr1 = covx + (k = idxij.val[rcidxs[j]] * covsz); ptr2 = wght + k; if (!sba_mat_cholinv(ptr1, ptr2, mnp)) { /* compute w_x_ij s.t. w_x_ij^T w_x_ij = cov_x_ij^-1 */ fprintf(stderr, "SBA: invalid covariance matrix for x_ij (i=%d, j=%d) in sba_motstr_levmar_x()\n", i, rcsubs[j]); retval = SBA_ERROR; goto freemem_and_return; } } } sba_mat_cholinv(NULL, NULL, 0); /* cleanup */ } /* compute the error vectors e_ij in hx */ (*func)(p, &idxij, rcidxs, rcsubs, hx, adata); nfev = 1; /* ### compute e=x - f(p) [e=w*(x - f(p)] and its L2 norm */ if (covx == NULL) p_eL2 = nrmL2xmy(e, x, hx, nobs); /* e=x-hx, p_eL2=||e|| */ else p_eL2 = nrmCxmy(e, x, hx, wght, mnp, nvis); /* e=wght*(x-hx), p_eL2=||e||=||x-hx||_Sigma^-1 */ if (verbose) printf("initial mot-SBA error %g [%g]\n", p_eL2, p_eL2 / nvis); init_p_eL2 = p_eL2; if (!SBA_FINITE(p_eL2)) stop = 7; for (itno = 0; itno < itmax && !stop; ++itno) { /* Note that p, e and ||e||_2 have been updated at the previous iteration */ /* compute derivative submatrices A_ij */ (*fjac)(p, &idxij, rcidxs, rcsubs, jac, jac_adata); ++njev; if (covx != NULL) { /* compute w_x_ij A_ij * Since w_x_ij is upper triangular, the products can be safely saved * directly in A_ij, without the need for intermediate storage */ for (i = 0; i < nvis; ++i) { /* set ptr1, ptr2 to point to w_x_ij, A_ij, resp. */ ptr1 = wght + i * covsz; ptr2 = jac + i * Asz; /* w_x_ij is mnp x mnp, A_ij is mnp x cnp */ for (ii = 0; ii < mnp; ++ii) for (jj = 0; jj < cnp; ++jj) { for (k = ii, sum = 0.0; k < mnp; ++k) // k>=ii since w_x_ij is upper triangular sum += ptr1[ii * mnp + k] * ptr2[k * cnp + jj]; ptr2[ii * cnp + jj] = sum; } } } /* compute U_j = \sum_i A_ij^T A_ij */ // \Sigma here! /* U_j is symmetric, therefore its computation can be sped up by * computing only the upper part and then reusing it for the lower one. * Recall that A_ij is mnp x cnp */ /* Also compute ea_j = \sum_i A_ij^T e_ij */ // \Sigma here! /* Recall that e_ij is mnp x 1 */ _dblzero(U, m * Usz); /* clear all U_j */ _dblzero(ea, m * easz); /* clear all ea_j */ for (j = mcon; j < m; ++j) { ptr1 = U + j * Usz; // set ptr1 to point to U_j ptr2 = ea + j * easz; // set ptr2 to point to ea_j nnz = sba_crsm_col_elmidxs(&idxij, j, rcidxs, rcsubs); /* find nonzero A_ij, i=0...n-1 */ for (i = 0; i < nnz; ++i) { /* set ptr3 to point to A_ij, actual row number in rcsubs[i] */ ptr3 = jac + idxij.val[rcidxs[i]] * Asz; /* compute the UPPER TRIANGULAR PART of A_ij^T A_ij and add it to U_j */ for (ii = 0; ii < cnp; ++ii) { for (jj = ii; jj < cnp; ++jj) { for (k = 0, sum = 0.0; k < mnp; ++k) sum += ptr3[k * cnp + ii] * ptr3[k * cnp + jj]; ptr1[ii * cnp + jj] += sum; } /* copy the LOWER TRIANGULAR PART of U_j from the upper one */ for (jj = 0; jj < ii; ++jj) ptr1[ii * cnp + jj] = ptr1[jj * cnp + ii]; } ptr4 = e + idxij.val[rcidxs[i]] * esz; /* set ptr4 to point to e_ij */ /* compute A_ij^T e_ij and add it to ea_j */ for (ii = 0; ii < cnp; ++ii) { for (jj = 0, sum = 0.0; jj < mnp; ++jj) sum += ptr3[jj * cnp + ii] * ptr4[jj]; ptr2[ii] += sum; } } } /* Compute ||J^T e||_inf and ||p||^2 */ for (i = 0, p_L2 = ea_inf = 0.0; i < nvars; ++i) { if (ea_inf < (tmp = FABS(ea[i]))) ea_inf = tmp; p_L2 += p[i] * p[i]; } // p_L2=sqrt(p_L2); /* save diagonal entries so that augmentation can be later canceled. * Diagonal entries are in U_j */ for (j = mcon; j < m; ++j) { ptr1 = U + j * Usz; // set ptr1 to point to U_j ptr2 = diagU + j * cnp; // set ptr2 to point to diagU_j for (i = 0; i < cnp; ++i) ptr2[i] = ptr1[i * cnp + i]; } /* if(!(itno%100)){ printf("Current estimate: "); for(i=0; i tmp) tmp = diagU[i]; /* find max diagonal element */ mu = tau * tmp; } /* determine increment using adaptive damping */ while (1) { /* augment U */ for (j = mcon; j < m; ++j) { ptr1 = U + j * Usz; // set ptr1 to point to U_j for (i = 0; i < cnp; ++i) ptr1[i * cnp + i] += mu; } /* solve the linear systems U_j da_j = ea_j to compute the da_j */ _dblzero(dp, mcon * cnp); /* no change for the first mcon camera params */ for (j = nsolved = mcon; j < m; ++j) { ptr1 = U + j * Usz; // set ptr1 to point to U_j ptr2 = ea + j * easz; // set ptr2 to point to ea_j ptr3 = dp + j * cnp; // set ptr3 to point to da_j // nsolved+=sba_Axb_LU(ptr1, ptr2, ptr3, cnp, 0); linsolver=sba_Axb_LU; nsolved += sba_Axb_Chol(ptr1, ptr2, ptr3, cnp, 0); linsolver = sba_Axb_Chol; // nsolved+=sba_Axb_BK(ptr1, ptr2, ptr3, cnp, 0); linsolver=sba_Axb_BK; // nsolved+=sba_Axb_QRnoQ(ptr1, ptr2, ptr3, cnp, 0); linsolver=sba_Axb_QRnoQ; // nsolved+=sba_Axb_QR(ptr1, ptr2, ptr3, cnp, 0); linsolver=sba_Axb_QR; // nsolved+=sba_Axb_SVD(ptr1, ptr2, ptr3, cnp, 0); linsolver=sba_Axb_SVD; // nsolved+=(sba_Axb_CG(ptr1, ptr2, ptr3, cnp, (3*cnp)/2, 1E-10, SBA_CG_JACOBI, 0) > 0); // linsolver=(PLS)sba_Axb_CG; ++nlss; } if (nsolved == m) { /* parameter vector updates are now in dp */ /* compute p's new estimate and ||dp||^2 */ for (i = 0, dp_L2 = 0.0; i < nvars; ++i) { pdp[i] = p[i] + (tmp = dp[i]); dp_L2 += tmp * tmp; } // dp_L2=sqrt(dp_L2); if (dp_L2 <= eps2_sq * p_L2) { /* relative change in p is small, stop */ // if(dp_L2<=eps2*(p_L2 + eps2)){ /* relative change in p is small, stop */ stop = 2; break; } if (dp_L2 >= (p_L2 + eps2) / SBA_EPSILON_SQ) { /* almost singular */ // if(dp_L2>=(p_L2+eps2)/SBA_EPSILON){ /* almost singular */ fprintf( stderr, "SBA: the matrix of the augmented normal equations is almost singular in sba_mot_levmar_x(),\n" " minimization should be restarted from the current solution with an increased damping " "term\n"); retval = SBA_ERROR; goto freemem_and_return; } (*func)(pdp, &idxij, rcidxs, rcsubs, hx, adata); ++nfev; /* evaluate function at p + dp */ if (verbose > 1) printf("mean reprojection error %g\n", sba_mean_repr_error(n, mnp, x, hx, &idxij, rcidxs, rcsubs)); /* ### compute ||e(pdp)||_2 */ if (covx == NULL) pdp_eL2 = nrmL2xmy(hx, x, hx, nobs); /* hx=x-hx, pdp_eL2=||hx|| */ else pdp_eL2 = nrmCxmy(hx, x, hx, wght, mnp, nvis); /* hx=wght*(x-hx), pdp_eL2=||hx|| */ if (!SBA_FINITE(pdp_eL2)) { if (verbose) /* identify the offending point projection */ sba_print_inf(hx, m, mnp, &idxij, rcidxs, rcsubs); stop = 7; break; } for (i = 0, dL = 0.0; i < nvars; ++i) dL += dp[i] * (mu * dp[i] + ea[i]); dF = p_eL2 - pdp_eL2; if (verbose > 1) printf("\ndamping term %8g, gain ratio %8g, errors %8g / %8g = %g\n", mu, dL != 0.0 ? dF / dL : dF / DBL_EPSILON, p_eL2 / nvis, pdp_eL2 / nvis, p_eL2 / pdp_eL2); if (dL > 0.0 && dF > 0.0) { /* reduction in error, increment is accepted */ tmp = (2.0 * dF / dL - 1.0); tmp = 1.0 - tmp * tmp * tmp; mu = mu * ((tmp >= SBA_ONE_THIRD) ? tmp : SBA_ONE_THIRD); nu = 2; /* the test below is equivalent to the relative reduction of the RMS reprojection error: * sqrt(p_eL2)-sqrt(pdp_eL2)= itmax) stop = 3; /* restore U diagonal entries */ for (j = mcon; j < m; ++j) { ptr1 = U + j * Usz; // set ptr1 to point to U_j ptr2 = diagU + j * cnp; // set ptr2 to point to diagU_j for (i = 0; i < cnp; ++i) ptr1[i * cnp + i] = ptr2[i]; } if (info) { info[0] = init_p_eL2; info[1] = p_eL2; info[2] = ea_inf; info[3] = dp_L2; for (j = mcon, tmp = DBL_MIN; j < m; ++j) { ptr1 = U + j * Usz; // set ptr1 to point to U_j for (i = 0; i < cnp; ++i) if (tmp < ptr1[i * cnp + i]) tmp = ptr1[i * cnp + i]; } info[4] = mu / tmp; info[5] = itno; info[6] = stop; info[7] = nfev; info[8] = njev; info[9] = nlss; } // sba_print_sol(n, m, p, cnp, 0, x, mnp, &idxij, rcidxs, rcsubs); retval = (stop != 7) ? itno : SBA_ERROR; freemem_and_return: /* NOTE: this point is also reached via a goto! */ /* free whatever was allocated */ free(jac); free(U); free(e); free(ea); free(dp); free(rcidxs); free(rcsubs); #ifndef SBA_DESTROY_COVS if (wght) free(wght); #else /* nothing to do */ #endif /* SBA_DESTROY_COVS */ free(hx); free(diagU); free(pdp); if (fdj_data.hxx) { // cleanup free(fdj_data.hxx); free(fdj_data.func_rcidxs); } sba_crsm_free(&idxij); /* free the memory allocated by the linear solver routine */ if (linsolver) (*linsolver)(NULL, NULL, NULL, 0, 0); return retval; } /* Bundle adjustment on structure parameters only * using the sparse Levenberg-Marquardt as described in HZ p. 568 * * Returns the number of iterations (>=0) if successfull, SBA_ERROR if failed */ int sba_str_levmar_x( const int n, /* number of points */ const int ncon, /* number of points (starting from the 1st) whose parameters should not be modified. * All B_ij (see below) with i maxCPvis) maxCPvis = k; /* points, note that maxCPvis is not reinitialized! */ for (j = 0; j < m; ++j) { for (i = ii = 0; i < n; ++i) if (vmask[i * m + j]) ++ii; if (ii > maxCPvis) maxCPvis = ii; } /* allocate work arrays */ jac = (double *)emalloc(nvis * Bsz * sizeof(double)); V = (double *)emalloc(n * Vsz * sizeof(double)); e = (double *)emalloc(nobs * sizeof(double)); eb = (double *)emalloc(nvars * sizeof(double)); dp = (double *)emalloc(nvars * sizeof(double)); rcidxs = (int *)emalloc(maxCPvis * sizeof(int)); rcsubs = (int *)emalloc(maxCPvis * sizeof(int)); #ifndef SBA_DESTROY_COVS if (covx != NULL) wght = (double *)emalloc(nvis * covsz * sizeof(double)); #else if (covx != NULL) wght = covx; #endif /* SBA_DESTROY_COVS */ hx = (double *)emalloc(nobs * sizeof(double)); diagV = (double *)emalloc(nvars * sizeof(double)); pdp = (double *)emalloc(nvars * sizeof(double)); /* if no jacobian function is supplied, prepare to compute jacobian with finite difference */ if (!fjac) { fdj_data.func = func; fdj_data.cnp = 0; fdj_data.pnp = pnp; fdj_data.mnp = mnp; fdj_data.hx = hx; fdj_data.hxx = (double *)emalloc(nobs * sizeof(double)); fdj_data.func_rcidxs = (int *)emalloc(2 * maxCPvis * sizeof(int)); fdj_data.func_rcsubs = fdj_data.func_rcidxs + maxCPvis; fdj_data.adata = adata; fjac = sba_fdjac_x; jac_adata = (void *)&fdj_data; } else { fdj_data.hxx = NULL; jac_adata = adata; } if (itmax == 0) { /* verify jacobian */ sba_str_chkjac_x(func, fjac, p, &idxij, rcidxs, rcsubs, ncon, pnp, mnp, adata, jac_adata); retval = 0; goto freemem_and_return; } /* covariances Sigma_x_ij are accommodated by computing the Cholesky decompositions of their * inverses and using the resulting matrices w_x_ij to weigh B_ij and e_ij as * w_x_ij*B_ij and w_x_ij*e_ij. In this way, auxiliary variables as V_i=\sum_j B_ij^T B_ij * actually become \sum_j (w_x_ij B_ij)^T w_x_ij B_ij= \sum_j B_ij^T w_x_ij^T w_x_ij B_ij = * B_ij^T Sigma_x_ij^-1 B_ij * * eb_i are weighted in a similar manner */ if (covx != NULL) { for (i = 0; i < n; ++i) { nnz = sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero x_ij, j=0...m-1 */ for (j = 0; j < nnz; ++j) { /* set ptr1, ptr2 to point to cov_x_ij, w_x_ij resp. */ ptr1 = covx + (k = idxij.val[rcidxs[j]] * covsz); ptr2 = wght + k; if (!sba_mat_cholinv(ptr1, ptr2, mnp)) { /* compute w_x_ij s.t. w_x_ij^T w_x_ij = cov_x_ij^-1 */ fprintf(stderr, "SBA: invalid covariance matrix for x_ij (i=%d, j=%d) in sba_motstr_levmar_x()\n", i, rcsubs[j]); retval = SBA_ERROR; goto freemem_and_return; } } } sba_mat_cholinv(NULL, NULL, 0); /* cleanup */ } /* compute the error vectors e_ij in hx */ (*func)(p, &idxij, rcidxs, rcsubs, hx, adata); nfev = 1; /* ### compute e=x - f(p) [e=w*(x - f(p)] and its L2 norm */ if (covx == NULL) p_eL2 = nrmL2xmy(e, x, hx, nobs); /* e=x-hx, p_eL2=||e|| */ else p_eL2 = nrmCxmy(e, x, hx, wght, mnp, nvis); /* e=wght*(x-hx), p_eL2=||e||=||x-hx||_Sigma^-1 */ if (verbose) printf("initial str-SBA error %g [%g]\n", p_eL2, p_eL2 / nvis); init_p_eL2 = p_eL2; if (!SBA_FINITE(p_eL2)) stop = 7; for (itno = 0; itno < itmax && !stop; ++itno) { /* Note that p, e and ||e||_2 have been updated at the previous iteration */ /* compute derivative submatrices B_ij */ (*fjac)(p, &idxij, rcidxs, rcsubs, jac, jac_adata); ++njev; if (covx != NULL) { /* compute w_x_ij B_ij. * Since w_x_ij is upper triangular, the products can be safely saved * directly in B_ij, without the need for intermediate storage */ for (i = 0; i < nvis; ++i) { /* set ptr1, ptr2 to point to w_x_ij, B_ij, resp. */ ptr1 = wght + i * covsz; ptr2 = jac + i * Bsz; /* w_x_ij is mnp x mnp, B_ij is mnp x pnp */ for (ii = 0; ii < mnp; ++ii) for (jj = 0; jj < pnp; ++jj) { for (k = ii, sum = 0.0; k < mnp; ++k) // k>=ii since w_x_ij is upper triangular sum += ptr1[ii * mnp + k] * ptr2[k * pnp + jj]; ptr2[ii * pnp + jj] = sum; } } } /* compute V_i = \sum_j B_ij^T B_ij */ // \Sigma here! /* V_i is symmetric, therefore its computation can be sped up by * computing only the upper part and then reusing it for the lower one. * Recall that B_ij is mnp x pnp */ /* Also compute eb_i = \sum_j B_ij^T e_ij */ // \Sigma here! /* Recall that e_ij is mnp x 1 */ _dblzero(V, n * Vsz); /* clear all V_i */ _dblzero(eb, n * ebsz); /* clear all eb_i */ for (i = ncon; i < n; ++i) { ptr1 = V + i * Vsz; // set ptr1 to point to V_i ptr2 = eb + i * ebsz; // set ptr2 to point to eb_i nnz = sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero B_ij, j=0...m-1 */ for (j = 0; j < nnz; ++j) { /* set ptr3 to point to B_ij, actual column number in rcsubs[j] */ ptr3 = jac + idxij.val[rcidxs[j]] * Bsz; /* compute the UPPER TRIANGULAR PART of B_ij^T B_ij and add it to V_i */ for (ii = 0; ii < pnp; ++ii) { for (jj = ii; jj < pnp; ++jj) { for (k = 0, sum = 0.0; k < mnp; ++k) sum += ptr3[k * pnp + ii] * ptr3[k * pnp + jj]; ptr1[ii * pnp + jj] += sum; } /* copy the LOWER TRIANGULAR PART of V_i from the upper one */ for (jj = 0; jj < ii; ++jj) ptr1[ii * pnp + jj] = ptr1[jj * pnp + ii]; } ptr4 = e + idxij.val[rcidxs[j]] * esz; /* set ptr4 to point to e_ij */ /* compute B_ij^T e_ij and add it to eb_i */ for (ii = 0; ii < pnp; ++ii) { for (jj = 0, sum = 0.0; jj < mnp; ++jj) sum += ptr3[jj * pnp + ii] * ptr4[jj]; ptr2[ii] += sum; } } } /* Compute ||J^T e||_inf and ||p||^2 */ for (i = 0, p_L2 = eb_inf = 0.0; i < nvars; ++i) { if (eb_inf < (tmp = FABS(eb[i]))) eb_inf = tmp; p_L2 += p[i] * p[i]; } // p_L2=sqrt(p_L2); /* save diagonal entries so that augmentation can be later canceled. * Diagonal entries are in V_i */ for (i = ncon; i < n; ++i) { ptr1 = V + i * Vsz; // set ptr1 to point to V_i ptr2 = diagV + i * pnp; // set ptr2 to point to diagV_i for (j = 0; j < pnp; ++j) ptr2[j] = ptr1[j * pnp + j]; } /* if(!(itno%100)){ printf("Current estimate: "); for(i=0; i tmp) tmp = diagV[i]; /* find max diagonal element */ mu = tau * tmp; } /* determine increment using adaptive damping */ while (1) { /* augment V */ for (i = ncon; i < n; ++i) { ptr1 = V + i * Vsz; // set ptr1 to point to V_i for (j = 0; j < pnp; ++j) ptr1[j * pnp + j] += mu; } /* solve the linear systems V*_i db_i = eb_i to compute the db_i */ _dblzero(dp, ncon * pnp); /* no change for the first ncon point params */ for (i = nsolved = ncon; i < n; ++i) { ptr1 = V + i * Vsz; // set ptr1 to point to V_i ptr2 = eb + i * ebsz; // set ptr2 to point to eb_i ptr3 = dp + i * pnp; // set ptr3 to point to db_i // nsolved+=sba_Axb_LU(ptr1, ptr2, ptr3, pnp, 0); linsolver=sba_Axb_LU; nsolved += sba_Axb_Chol(ptr1, ptr2, ptr3, pnp, 0); linsolver = sba_Axb_Chol; // nsolved+=sba_Axb_BK(ptr1, ptr2, ptr3, pnp, 0); linsolver=sba_Axb_BK; // nsolved+=sba_Axb_QRnoQ(ptr1, ptr2, ptr3, pnp, 0); linsolver=sba_Axb_QRnoQ; // nsolved+=sba_Axb_QR(ptr1, ptr2, ptr3, pnp, 0); linsolver=sba_Axb_QR; // nsolved+=sba_Axb_SVD(ptr1, ptr2, ptr3, pnp, 0); linsolver=sba_Axb_SVD; // nsolved+=(sba_Axb_CG(ptr1, ptr2, ptr3, pnp, (3*pnp)/2, 1E-10, SBA_CG_JACOBI, 0) > 0); // linsolver=(PLS)sba_Axb_CG; ++nlss; } if (nsolved == n) { /* parameter vector updates are now in dp */ /* compute p's new estimate and ||dp||^2 */ for (i = 0, dp_L2 = 0.0; i < nvars; ++i) { pdp[i] = p[i] + (tmp = dp[i]); dp_L2 += tmp * tmp; } // dp_L2=sqrt(dp_L2); if (dp_L2 <= eps2_sq * p_L2) { /* relative change in p is small, stop */ // if(dp_L2<=eps2*(p_L2 + eps2)){ /* relative change in p is small, stop */ stop = 2; break; } if (dp_L2 >= (p_L2 + eps2) / SBA_EPSILON_SQ) { /* almost singular */ // if(dp_L2>=(p_L2+eps2)/SBA_EPSILON){ /* almost singular */ fprintf(stderr, "SBA: the matrix of the augmented normal equations is almost singular in " "sba_motstr_levmar_x(),\n" " minimization should be restarted from the current solution with an increased " "damping term\n"); retval = SBA_ERROR; goto freemem_and_return; } (*func)(pdp, &idxij, rcidxs, rcsubs, hx, adata); ++nfev; /* evaluate function at p + dp */ if (verbose > 1) printf("mean reprojection error %g\n", sba_mean_repr_error(n, mnp, x, hx, &idxij, rcidxs, rcsubs)); /* ### compute ||e(pdp)||_2 */ if (covx == NULL) pdp_eL2 = nrmL2xmy(hx, x, hx, nobs); /* hx=x-hx, pdp_eL2=||hx|| */ else pdp_eL2 = nrmCxmy(hx, x, hx, wght, mnp, nvis); /* hx=wght*(x-hx), pdp_eL2=||hx|| */ if (!SBA_FINITE(pdp_eL2)) { if (verbose) /* identify the offending point projection */ sba_print_inf(hx, m, mnp, &idxij, rcidxs, rcsubs); stop = 7; break; } for (i = 0, dL = 0.0; i < nvars; ++i) dL += dp[i] * (mu * dp[i] + eb[i]); dF = p_eL2 - pdp_eL2; if (verbose > 1) printf("\ndamping term %8g, gain ratio %8g, errors %8g / %8g = %g\n", mu, dL != 0.0 ? dF / dL : dF / DBL_EPSILON, p_eL2 / nvis, pdp_eL2 / nvis, p_eL2 / pdp_eL2); if (dL > 0.0 && dF > 0.0) { /* reduction in error, increment is accepted */ tmp = (2.0 * dF / dL - 1.0); tmp = 1.0 - tmp * tmp * tmp; mu = mu * ((tmp >= SBA_ONE_THIRD) ? tmp : SBA_ONE_THIRD); nu = 2; /* the test below is equivalent to the relative reduction of the RMS reprojection error: * sqrt(p_eL2)-sqrt(pdp_eL2)= itmax) stop = 3; /* restore V diagonal entries */ for (i = ncon; i < n; ++i) { ptr1 = V + i * Vsz; // set ptr1 to point to V_i ptr2 = diagV + i * pnp; // set ptr2 to point to diagV_i for (j = 0; j < pnp; ++j) ptr1[j * pnp + j] = ptr2[j]; } if (info) { info[0] = init_p_eL2; info[1] = p_eL2; info[2] = eb_inf; info[3] = dp_L2; for (i = ncon; i < n; ++i) { ptr1 = V + i * Vsz; // set ptr1 to point to V_i for (j = 0; j < pnp; ++j) if (tmp < ptr1[j * pnp + j]) tmp = ptr1[j * pnp + j]; } info[4] = mu / tmp; info[5] = itno; info[6] = stop; info[7] = nfev; info[8] = njev; info[9] = nlss; } // sba_print_sol(n, m, p, 0, pnp, x, mnp, &idxij, rcidxs, rcsubs); retval = (stop != 7) ? itno : SBA_ERROR; freemem_and_return: /* NOTE: this point is also reached via a goto! */ /* free whatever was allocated */ free(jac); free(V); free(e); free(eb); free(dp); free(rcidxs); free(rcsubs); #ifndef SBA_DESTROY_COVS if (wght) free(wght); #else /* nothing to do */ #endif /* SBA_DESTROY_COVS */ free(hx); free(diagV); free(pdp); if (fdj_data.hxx) { // cleanup free(fdj_data.hxx); free(fdj_data.func_rcidxs); } sba_crsm_free(&idxij); /* free the memory allocated by the linear solver routine */ if (linsolver) (*linsolver)(NULL, NULL, NULL, 0, 0); return retval; }