/*
 * MINPACK-1 Least Squares Fitting Library
 *
 * Original public domain version by B. Garbow, K. Hillstrom, J. More'
 *   (Argonne National Laboratory, MINPACK project, March 1980)
 * See the file DISCLAIMER for copyright information.
 *
 * Tranlation to C Language by S. Moshier (moshier.net)
 *
 * Enhancements and packaging by C. Markwardt
 *   (comparable to IDL fitting routine MPFIT
 *    see http://cow.physics.wisc.edu/~craigm/idl/idl.html)
 */

/* Main mpfit library routines (double precision)
   $Id: mpfit.c,v 1.24 2013/04/23 18:37:38 craigm Exp $
 */

#include "mpfit.h"
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* Forward declarations of functions in this module */
static int mp_fdjac2(mp_func funct, int m, int n, int *ifree, int npar, double *x, double *fvec, double *fjac,
					 int ldfjac, double epsfcn, double *wa, void *priv, int *nfev, double *step, double *dstep,
					 int *dside, int *qulimited, double *ulimit, int *ddebug, double *ddrtol, double *ddatol,
					 double *wa2, double **dvecptr);
static void mp_qrfac(int m, int n, double *a, int lda, int pivot, int *ipvt, int lipvt, double *rdiag, double *acnorm,
					 double *wa);
static void mp_qrsolv(int n, double *r, int ldr, int *ipvt, double *diag, double *qtb, double *x, double *sdiag,
					  double *wa);
static void mp_lmpar(int n, double *r, int ldr, int *ipvt, int *ifree, double *diag, double *qtb, double delta,
					 double *par, double *x, double *sdiag, double *wa1, double *wa2);
static double mp_enorm(int n, double *x);
static double mp_dmax1(double a, double b);
static double mp_dmin1(double a, double b);
static int mp_min0(int a, int b);
static int mp_covar(int n, double *r, int ldr, int *ipvt, double tol, double *wa);

/* Macro to call user function */
#define mp_call(funct, m, n, x, fvec, dvec, priv) (*(funct))(m, n, x, fvec, dvec, priv)

/* Macro to safely allocate memory */
#define mp_malloc(dest, type, size)                                                                                    \
	dest = (type *)malloc(sizeof(type) * size);                                                                        \
	if (dest == 0) {                                                                                                   \
		info = MP_ERR_MEMORY;                                                                                          \
		goto CLEANUP;                                                                                                  \
	} else {                                                                                                           \
		int _k;                                                                                                        \
		for (_k = 0; _k < (size); _k++)                                                                                \
			dest[_k] = 0;                                                                                              \
	}

/*
*     **********
*
*     subroutine mpfit
*
*     the purpose of mpfit is to minimize the sum of the squares of
*     m nonlinear functions in n variables by a modification of
*     the levenberg-marquardt algorithm. the user must provide a
*     subroutine which calculates the functions. the jacobian is
*     then calculated by a finite-difference approximation.
*
*     mp_funct funct - function to be minimized
*     int m          - number of data points
*     int npar       - number of fit parameters
*     double *xall   - array of n initial parameter values
*                      upon return, contains adjusted parameter values
*     mp_par *pars   - array of npar structures specifying constraints;
*                      or 0 (null pointer) for unconstrained fitting
*                      [ see README and mpfit.h for definition & use of mp_par]
*     mp_config *config - pointer to structure which specifies the
*                      configuration of mpfit(); or 0 (null pointer)
*                      if the default configuration is to be used.
*                      See README and mpfit.h for definition and use
*                      of config.
*     void *private  - any private user data which is to be passed directly
*                      to funct without modification by mpfit().
*     mp_result *result - pointer to structure, which upon return, contains
*                      the results of the fit.  The user should zero this
*                      structure.  If any of the array values are to be
*                      returned, the user should allocate storage for them
*                      and assign the corresponding pointer in *result.
*                      Upon return, *result will be updated, and
*                      any of the non-null arrays will be filled.
*
*
* FORTRAN DOCUMENTATION BELOW
*
*
*     the subroutine statement is
*
*	subroutine lmdif(fcn,m,n,x,fvec,ftol,xtol,gtol,maxfev,epsfcn,
*			 diag,mode,factor,nprint,info,nfev,fjac,
*			 ldfjac,ipvt,qtf,wa1,wa2,wa3,wa4)
*
*     where
*
*	fcn is the name of the user-supplied subroutine which
*	  calculates the functions. fcn must be declared
*	  in an external statement in the user calling
*	  program, and should be written as follows.
*
*	  subroutine fcn(m,n,x,fvec,iflag)
*	  integer m,n,iflag
*	  double precision x(n),fvec(m)
*	  ----------
*	  calculate the functions at x and
*	  return this vector in fvec.
*	  ----------
*	  return
*	  end
*
*	  the value of iflag should not be changed by fcn unless
*	  the user wants to terminate execution of lmdif.
*	  in this case set iflag to a negative integer.
*
*	m is a positive integer input variable set to the number
*	  of functions.
*
*	n is a positive integer input variable set to the number
*	  of variables. n must not exceed m.
*
*	x is an array of length n. on input x must contain
*	  an initial estimate of the solution vector. on output x
*	  contains the final estimate of the solution vector.
*
*	fvec is an output array of length m which contains
*	  the functions evaluated at the output x.
*
*	ftol is a nonnegative input variable. termination
*	  occurs when both the actual and predicted relative
*	  reductions in the sum of squares are at most ftol.
*	  therefore, ftol measures the relative error desired
*	  in the sum of squares.
*
*	xtol is a nonnegative input variable. termination
*	  occurs when the relative error between two consecutive
*	  iterates is at most xtol. therefore, xtol measures the
*	  relative error desired in the approximate solution.
*
*	gtol is a nonnegative input variable. termination
*	  occurs when the cosine of the angle between fvec and
*	  any column of the jacobian is at most gtol in absolute
*	  value. therefore, gtol measures the orthogonality
*	  desired between the function vector and the columns
*	  of the jacobian.
*
*	maxfev is a positive integer input variable. termination
*	  occurs when the number of calls to fcn is at least
*	  maxfev by the end of an iteration.
*
*	epsfcn is an input variable used in determining a suitable
*	  step length for the forward-difference approximation. this
*	  approximation assumes that the relative errors in the
*	  functions are of the order of epsfcn. if epsfcn is less
*	  than the machine precision, it is assumed that the relative
*	  errors in the functions are of the order of the machine
*	  precision.
*
*	diag is an array of length n. if mode = 1 (see
*	  below), diag is internally set. if mode = 2, diag
*	  must contain positive entries that serve as
*	  multiplicative scale factors for the variables.
*
*	mode is an integer input variable. if mode = 1, the
*	  variables will be scaled internally. if mode = 2,
*	  the scaling is specified by the input diag. other
*	  values of mode are equivalent to mode = 1.
*
*	factor is a positive input variable used in determining the
*	  initial step bound. this bound is set to the product of
*	  factor and the euclidean norm of diag*x if nonzero, or else
*	  to factor itself. in most cases factor should lie in the
*	  interval (.1,100.). 100. is a generally recommended value.
*
*	nprint is an integer input variable that enables controlled
*	  printing of iterates if it is positive. in this case,
*	  fcn is called with iflag = 0 at the beginning of the first
*	  iteration and every nprint iterations thereafter and
*	  immediately prior to return, with x and fvec available
*	  for printing. if nprint is not positive, no special calls
*	  of fcn with iflag = 0 are made.
*
*	info is an integer output variable. if the user has
*	  terminated execution, info is set to the (negative)
*	  value of iflag. see description of fcn. otherwise,
*	  info is set as follows.
*
*	  info = 0  improper input parameters.
*
*	  info = 1  both actual and predicted relative reductions
*		    in the sum of squares are at most ftol.
*
*	  info = 2  relative error between two consecutive iterates
*		    is at most xtol.
*
*	  info = 3  conditions for info = 1 and info = 2 both hold.
*
*	  info = 4  the cosine of the angle between fvec and any
*		    column of the jacobian is at most gtol in
*		    absolute value.
*
*	  info = 5  number of calls to fcn has reached or
*		    exceeded maxfev.
*
*	  info = 6  ftol is too small. no further reduction in
*		    the sum of squares is possible.
*
*	  info = 7  xtol is too small. no further improvement in
*		    the approximate solution x is possible.
*
*	  info = 8  gtol is too small. fvec is orthogonal to the
*		    columns of the jacobian to machine precision.
*
*	nfev is an integer output variable set to the number of
*	  calls to fcn.
*
*	fjac is an output m by n array. the upper n by n submatrix
*	  of fjac contains an upper triangular matrix r with
*	  diagonal elements of nonincreasing magnitude such that
*
*		 t     t	   t
*		p *(jac *jac)*p = r *r,
*
*	  where p is a permutation matrix and jac is the final
*	  calculated jacobian. column j of p is column ipvt(j)
*	  (see below) of the identity matrix. the lower trapezoidal
*	  part of fjac contains information generated during
*	  the computation of r.
*
*	ldfjac is a positive integer input variable not less than m
*	  which specifies the leading dimension of the array fjac.
*
*	ipvt is an integer output array of length n. ipvt
*	  defines a permutation matrix p such that jac*p = q*r,
*	  where jac is the final calculated jacobian, q is
*	  orthogonal (not stored), and r is upper triangular
*	  with diagonal elements of nonincreasing magnitude.
*	  column j of p is column ipvt(j) of the identity matrix.
*
*	qtf is an output array of length n which contains
*	  the first n elements of the vector (q transpose)*fvec.
*
*	wa1, wa2, and wa3 are work arrays of length n.
*
*	wa4 is a work array of length m.
*
*     subprograms called
*
*	user-supplied ...... fcn
*
*	minpack-supplied ... dpmpar,enorm,fdjac2,lmpar,qrfac
*
*	fortran-supplied ... dabs,dmax1,dmin1,dsqrt,mod
*
*     argonne national laboratory. minpack project. march 1980.
*     burton s. garbow, kenneth e. hillstrom, jorge j. more
*
* ********** */

int mpfit(mp_func funct, int m, int npar, double *xall, mp_par *pars, mp_config *config, void *private_data,
		  mp_result *result) {
	mp_config conf;
	int i, j, info, iflag, nfree, npegged, iter;
	int qanylim = 0;

	int ij, jj, l;
	double actred, delta, dirder, fnorm, fnorm1, gnorm, orignorm;
	double par, pnorm, prered, ratio;
	double sum, temp, temp1, temp2, temp3, xnorm, alpha;
	static double one = 1.0;
	static double p1 = 0.1;
	static double p5 = 0.5;
	static double p25 = 0.25;
	static double p75 = 0.75;
	static double p0001 = 1.0e-4;
	static double zero = 0.0;
	int nfev = 0;

	double *step = 0, *dstep = 0, *llim = 0, *ulim = 0;
	int *pfixed = 0, *mpside = 0, *ifree = 0, *qllim = 0, *qulim = 0;
	int *ddebug = 0;
	double *ddrtol = 0, *ddatol = 0;

	double *fvec = 0, *qtf = 0;
	double *x = 0, *xnew = 0, *fjac = 0, *diag = 0;
	double *wa1 = 0, *wa2 = 0, *wa3 = 0, *wa4 = 0;
	double **dvecptr = 0;
	int *ipvt = 0;

	int ldfjac;

	/* Default configuration */
	conf.ftol = 1e-10;
	conf.xtol = 1e-10;
	conf.gtol = 1e-10;
	conf.stepfactor = 100.0;
	conf.nprint = 1;
	conf.epsfcn = MP_MACHEP0;
	conf.maxiter = 200;
	conf.douserscale = 0;
	conf.maxfev = 0;
	conf.covtol = 1e-14;
	conf.nofinitecheck = 0;

	if (config) {
		/* Transfer any user-specified configurations */
		if (config->ftol > 0)
			conf.ftol = config->ftol;
		if (config->xtol > 0)
			conf.xtol = config->xtol;
		if (config->gtol > 0)
			conf.gtol = config->gtol;
		if (config->stepfactor > 0)
			conf.stepfactor = config->stepfactor;
		if (config->nprint >= 0)
			conf.nprint = config->nprint;
		if (config->epsfcn > 0)
			conf.epsfcn = config->epsfcn;
		if (config->maxiter > 0)
			conf.maxiter = config->maxiter;
		if (config->maxiter == MP_NO_ITER)
			conf.maxiter = 0;
		if (config->douserscale != 0)
			conf.douserscale = config->douserscale;
		if (config->covtol > 0)
			conf.covtol = config->covtol;
		if (config->nofinitecheck > 0)
			conf.nofinitecheck = config->nofinitecheck;
		conf.maxfev = config->maxfev;
	}

	info = MP_ERR_INPUT; /* = 0 */
	iflag = 0;
	nfree = 0;
	npegged = 0;

	/* Basic error checking */
	if (funct == 0) {
		return MP_ERR_FUNC;
	}

	if ((m <= 0) || (xall == 0)) {
		return MP_ERR_NPOINTS;
	}

	if (npar <= 0) {
		return MP_ERR_NFREE;
	}

	fnorm = -1.0;
	fnorm1 = -1.0;
	xnorm = -1.0;
	delta = 0.0;

	/* FIXED parameters? */
	mp_malloc(pfixed, int, npar);
	if (pars)
		for (i = 0; i < npar; i++) {
			pfixed[i] = (pars[i].fixed) ? 1 : 0;
		}

	/* Finite differencing step, absolute and relative, and sidedness of deriv */
	mp_malloc(step, double, npar);
	mp_malloc(dstep, double, npar);
	mp_malloc(mpside, int, npar);
	mp_malloc(ddebug, int, npar);
	mp_malloc(ddrtol, double, npar);
	mp_malloc(ddatol, double, npar);
	if (pars)
		for (i = 0; i < npar; i++) {
			step[i] = pars[i].step;
			dstep[i] = pars[i].relstep;
			mpside[i] = pars[i].side;
			ddebug[i] = pars[i].deriv_debug;
			ddrtol[i] = pars[i].deriv_reltol;
			ddatol[i] = pars[i].deriv_abstol;
		}

	/* Finish up the free parameters */
	nfree = 0;
	mp_malloc(ifree, int, npar);
	for (i = 0, j = 0; i < npar; i++) {
		if (pfixed[i] == 0) {
			nfree++;
			ifree[j++] = i;
		}
	}
	if (nfree == 0) {
		info = MP_ERR_NFREE;
		goto CLEANUP;
	}

	if (pars) {
		for (i = 0; i < npar; i++) {
			if ((pars[i].limited[0] && (xall[i] < pars[i].limits[0])) ||
				(pars[i].limited[1] && (xall[i] > pars[i].limits[1]))) {
				info = MP_ERR_INITBOUNDS;
				goto CLEANUP;
			}
			if ((pars[i].fixed == 0) && pars[i].limited[0] && pars[i].limited[1] &&
				(pars[i].limits[0] >= pars[i].limits[1])) {
				info = MP_ERR_BOUNDS;
				goto CLEANUP;
			}
		}

		mp_malloc(qulim, int, nfree);
		mp_malloc(qllim, int, nfree);
		mp_malloc(ulim, double, nfree);
		mp_malloc(llim, double, nfree);

		for (i = 0; i < nfree; i++) {
			qllim[i] = pars[ifree[i]].limited[0];
			qulim[i] = pars[ifree[i]].limited[1];
			llim[i] = pars[ifree[i]].limits[0];
			ulim[i] = pars[ifree[i]].limits[1];
			if (qllim[i] || qulim[i])
				qanylim = 1;
		}
	}

	/* Sanity checking on input configuration */
	if ((npar <= 0) || (conf.ftol <= 0) || (conf.xtol <= 0) || (conf.gtol <= 0) || (conf.maxiter < 0) ||
		(conf.stepfactor <= 0)) {
		info = MP_ERR_PARAM;
		goto CLEANUP;
	}

	/* Ensure there are some degrees of freedom */
	if (m < nfree) {
		info = MP_ERR_DOF;
		goto CLEANUP;
	}

	/* Allocate temporary storage */
	mp_malloc(fvec, double, m);
	mp_malloc(qtf, double, nfree);
	mp_malloc(x, double, nfree);
	mp_malloc(xnew, double, npar);
	mp_malloc(fjac, double, m *nfree);
	ldfjac = m;
	mp_malloc(diag, double, npar);
	mp_malloc(wa1, double, npar);
	mp_malloc(wa2, double, npar);
	mp_malloc(wa3, double, npar);
	mp_malloc(wa4, double, m);
	mp_malloc(ipvt, int, npar);
	mp_malloc(dvecptr, double *, npar);

	/* Evaluate user function with initial parameter values */
	iflag = mp_call(funct, m, npar, xall, fvec, 0, private_data);
	nfev += 1;
	if (iflag < 0) {
		goto CLEANUP;
	}

	fnorm = mp_enorm(m, fvec);
	orignorm = fnorm * fnorm;

	/* Make a new copy */
	for (i = 0; i < npar; i++) {
		xnew[i] = xall[i];
	}

	/* Transfer free parameters to 'x' */
	for (i = 0; i < nfree; i++) {
		x[i] = xall[ifree[i]];
	}

	/* Initialize Levelberg-Marquardt parameter and iteration counter */

	par = 0.0;
	iter = 1;
	for (i = 0; i < nfree; i++) {
		qtf[i] = 0;
	}

/* Beginning of the outer loop */
OUTER_LOOP:
	for (i = 0; i < nfree; i++) {
		xnew[ifree[i]] = x[i];
	}

	/* XXX call iterproc */

	/* Calculate the jacobian matrix */
	iflag = mp_fdjac2(funct, m, nfree, ifree, npar, xnew, fvec, fjac, ldfjac, conf.epsfcn, wa4, private_data, &nfev,
					  step, dstep, mpside, qulim, ulim, ddebug, ddrtol, ddatol, wa2, dvecptr);
	if (iflag < 0) {
		goto CLEANUP;
	}

	/* Determine if any of the parameters are pegged at the limits */
	if (qanylim) {
		for (j = 0; j < nfree; j++) {
			int lpegged = (qllim[j] && (x[j] == llim[j]));
			int upegged = (qulim[j] && (x[j] == ulim[j]));
			sum = 0;

			/* If the parameter is pegged at a limit, compute the gradient
		   direction */
			if (lpegged || upegged) {
				ij = j * ldfjac;
				for (i = 0; i < m; i++, ij++) {
					sum += fvec[i] * fjac[ij];
				}
			}
			/* If pegged at lower limit and gradient is toward negative then
		   reset gradient to zero */
			if (lpegged && (sum > 0)) {
				ij = j * ldfjac;
				for (i = 0; i < m; i++, ij++)
					fjac[ij] = 0;
			}
			/* If pegged at upper limit and gradient is toward positive then
		   reset gradient to zero */
			if (upegged && (sum < 0)) {
				ij = j * ldfjac;
				for (i = 0; i < m; i++, ij++)
					fjac[ij] = 0;
			}
		}
	}

	/* Compute the QR factorization of the jacobian */
	mp_qrfac(m, nfree, fjac, ldfjac, 1, ipvt, nfree, wa1, wa2, wa3);

	/*
	 *	 on the first iteration and if mode is 1, scale according
	 *	 to the norms of the columns of the initial jacobian.
	 */
	if (iter == 1) {
		if (conf.douserscale == 0) {
			for (j = 0; j < nfree; j++) {
				diag[ifree[j]] = wa2[j];
				if (wa2[j] == zero) {
					diag[ifree[j]] = one;
				}
			}
		}

		/*
		 *	 on the first iteration, calculate the norm of the scaled x
		 *	 and initialize the step bound delta.
		 */
		for (j = 0; j < nfree; j++) {
			wa3[j] = diag[ifree[j]] * x[j];
		}

		xnorm = mp_enorm(nfree, wa3);
		delta = conf.stepfactor * xnorm;
		if (delta == zero)
			delta = conf.stepfactor;
	}

	/*
	 *	 form (q transpose)*fvec and store the first n components in
	 *	 qtf.
	 */
	for (i = 0; i < m; i++) {
		wa4[i] = fvec[i];
	}

	jj = 0;
	for (j = 0; j < nfree; j++) {
		temp3 = fjac[jj];
		if (temp3 != zero) {
			sum = zero;
			ij = jj;
			for (i = j; i < m; i++) {
				sum += fjac[ij] * wa4[i];
				ij += 1; /* fjac[i+m*j] */
			}
			temp = -sum / temp3;
			ij = jj;
			for (i = j; i < m; i++) {
				wa4[i] += fjac[ij] * temp;
				ij += 1; /* fjac[i+m*j] */
			}
		}
		fjac[jj] = wa1[j];
		jj += m + 1; /* fjac[j+m*j] */
		qtf[j] = wa4[j];
	}

	/* ( From this point on, only the square matrix, consisting of the
	   triangle of R, is needed.) */

	if (conf.nofinitecheck) {
		/* Check for overflow.  This should be a cheap test here since FJAC
		   has been reduced to a (small) square matrix, and the test is
		   O(N^2). */
		int off = 0, nonfinite = 0;

		for (j = 0; j < nfree; j++) {
			for (i = 0; i < nfree; i++) {
				if (mpfinite(fjac[off + i]) == 0)
					nonfinite = 1;
			}
			off += ldfjac;
		}

		if (nonfinite) {
			info = MP_ERR_NAN;
			goto CLEANUP;
		}
	}

	/*
	 *	 compute the norm of the scaled gradient.
	 */
	gnorm = zero;
	if (fnorm != zero) {
		jj = 0;
		for (j = 0; j < nfree; j++) {
			l = ipvt[j];
			if (wa2[l] != zero) {
				sum = zero;
				ij = jj;
				for (i = 0; i <= j; i++) {
					sum += fjac[ij] * (qtf[i] / fnorm);
					ij += 1; /* fjac[i+m*j] */
				}
				gnorm = mp_dmax1(gnorm, fabs(sum / wa2[l]));
			}
			jj += m;
		}
	}

	/*
	 *	 test for convergence of the gradient norm.
	 */
	if (gnorm <= conf.gtol)
		info = MP_OK_DIR;
	if (info != 0)
		goto L300;
	if (conf.maxiter == 0) {
		info = MP_MAXITER;
		goto L300;
	}

	/*
	 *	 rescale if necessary.
	 */
	if (conf.douserscale == 0) {
		for (j = 0; j < nfree; j++) {
			diag[ifree[j]] = mp_dmax1(diag[ifree[j]], wa2[j]);
		}
	}

/*
 *	 beginning of the inner loop.
 */
L200:
	/*
	 *	    determine the levenberg-marquardt parameter.
	 */
	mp_lmpar(nfree, fjac, ldfjac, ipvt, ifree, diag, qtf, delta, &par, wa1, wa2, wa3, wa4);
	/*
	 *	    store the direction p and x + p. calculate the norm of p.
	 */
	for (j = 0; j < nfree; j++) {
		wa1[j] = -wa1[j];
	}

	alpha = 1.0;
	if (qanylim == 0) {
		/* No parameter limits, so just move to new position WA2 */
		for (j = 0; j < nfree; j++) {
			wa2[j] = x[j] + wa1[j];
		}

	} else {
		/* Respect the limits.  If a step were to go out of bounds, then
		 * we should take a step in the same direction but shorter distance.
		 * The step should take us right to the limit in that case.
		 */
		for (j = 0; j < nfree; j++) {
			int lpegged = (qllim[j] && (x[j] <= llim[j]));
			int upegged = (qulim[j] && (x[j] >= ulim[j]));
			int dwa1 = fabs(wa1[j]) > MP_MACHEP0;

			if (lpegged && (wa1[j] < 0))
				wa1[j] = 0;
			if (upegged && (wa1[j] > 0))
				wa1[j] = 0;

			if (dwa1 && qllim[j] && ((x[j] + wa1[j]) < llim[j])) {
				alpha = mp_dmin1(alpha, (llim[j] - x[j]) / wa1[j]);
			}
			if (dwa1 && qulim[j] && ((x[j] + wa1[j]) > ulim[j])) {
				alpha = mp_dmin1(alpha, (ulim[j] - x[j]) / wa1[j]);
			}
		}

		/* Scale the resulting vector, advance to the next position */
		for (j = 0; j < nfree; j++) {
			double sgnu, sgnl;
			double ulim1, llim1;

			wa1[j] = wa1[j] * alpha;
			wa2[j] = x[j] + wa1[j];

			/* Adjust the output values.  If the step put us exactly
			 * on a boundary, make sure it is exact.
			 */
			sgnu = (ulim[j] >= 0) ? (+1) : (-1);
			sgnl = (llim[j] >= 0) ? (+1) : (-1);
			ulim1 = ulim[j] * (1 - sgnu * MP_MACHEP0) - ((ulim[j] == 0) ? (MP_MACHEP0) : 0);
			llim1 = llim[j] * (1 + sgnl * MP_MACHEP0) + ((llim[j] == 0) ? (MP_MACHEP0) : 0);

			if (qulim[j] && (wa2[j] >= ulim1)) {
				wa2[j] = ulim[j];
			}
			if (qllim[j] && (wa2[j] <= llim1)) {
				wa2[j] = llim[j];
			}
		}
	}

	for (j = 0; j < nfree; j++) {
		wa3[j] = diag[ifree[j]] * wa1[j];
	}

	pnorm = mp_enorm(nfree, wa3);

	/*
	 *	    on the first iteration, adjust the initial step bound.
	 */
	if (iter == 1) {
		delta = mp_dmin1(delta, pnorm);
	}

	/*
	 *	    evaluate the function at x + p and calculate its norm.
	 */
	for (i = 0; i < nfree; i++) {
		xnew[ifree[i]] = wa2[i];
	}

	iflag = mp_call(funct, m, npar, xnew, wa4, 0, private_data);
	nfev += 1;
	if (iflag < 0)
		goto L300;

	fnorm1 = mp_enorm(m, wa4);

	/*
	 *	    compute the scaled actual reduction.
	 */
	actred = -one;
	if ((p1 * fnorm1) < fnorm) {
		temp = fnorm1 / fnorm;
		actred = one - temp * temp;
	}

	/*
	 *	    compute the scaled predicted reduction and
	 *	    the scaled directional derivative.
	 */
	jj = 0;
	for (j = 0; j < nfree; j++) {
		wa3[j] = zero;
		l = ipvt[j];
		temp = wa1[l];
		ij = jj;
		for (i = 0; i <= j; i++) {
			wa3[i] += fjac[ij] * temp;
			ij += 1; /* fjac[i+m*j] */
		}
		jj += m;
	}

	/* Remember, alpha is the fraction of the full LM step actually
	 * taken
	 */

	temp1 = mp_enorm(nfree, wa3) * alpha / fnorm;
	temp2 = (sqrt(alpha * par) * pnorm) / fnorm;
	prered = temp1 * temp1 + (temp2 * temp2) / p5;
	dirder = -(temp1 * temp1 + temp2 * temp2);

	/*
	 *	    compute the ratio of the actual to the predicted
	 *	    reduction.
	 */
	ratio = zero;
	if (prered != zero) {
		ratio = actred / prered;
	}

	/*
	 *	    update the step bound.
	 */

	if (ratio <= p25) {
		if (actred >= zero) {
			temp = p5;
		} else {
			temp = p5 * dirder / (dirder + p5 * actred);
		}
		if (((p1 * fnorm1) >= fnorm) || (temp < p1)) {
			temp = p1;
		}
		delta = temp * mp_dmin1(delta, pnorm / p1);
		par = par / temp;
	} else {
		if ((par == zero) || (ratio >= p75)) {
			delta = pnorm / p5;
			par = p5 * par;
		}
	}

	/*
	 *	    test for successful iteration.
	 */
	if (ratio >= p0001) {

		/*
		 *	    successful iteration. update x, fvec, and their norms.
		 */
		for (j = 0; j < nfree; j++) {
			x[j] = wa2[j];
			wa2[j] = diag[ifree[j]] * x[j];
		}
		for (i = 0; i < m; i++) {
			fvec[i] = wa4[i];
		}
		xnorm = mp_enorm(nfree, wa2);
		fnorm = fnorm1;
		iter += 1;
	}

	/*
	 *	    tests for convergence.
	 */
	if ((fabs(actred) <= conf.ftol) && (prered <= conf.ftol) && (p5 * ratio <= one)) {
		info = MP_OK_CHI;
	}
	if (delta <= conf.xtol * xnorm) {
		info = MP_OK_PAR;
	}
	if ((fabs(actred) <= conf.ftol) && (prered <= conf.ftol) && (p5 * ratio <= one) && (info == 2)) {
		info = MP_OK_BOTH;
	}
	if (info != 0) {
		goto L300;
	}

	/*
	 *	    tests for termination and stringent tolerances.
	 */
	if ((conf.maxfev > 0) && (nfev >= conf.maxfev)) {
		/* Too many function evaluations */
		info = MP_MAXITER;
	}
	if (iter >= conf.maxiter) {
		/* Too many iterations */
		info = MP_MAXITER;
	}
	if ((fabs(actred) <= MP_MACHEP0) && (prered <= MP_MACHEP0) && (p5 * ratio <= one)) {
		info = MP_FTOL;
	}
	if (delta <= MP_MACHEP0 * xnorm) {
		info = MP_XTOL;
	}
	if (gnorm <= MP_MACHEP0) {
		info = MP_GTOL;
	}
	if (info != 0) {
		goto L300;
	}

	/*
	 *	    end of the inner loop. repeat if iteration unsuccessful.
	 */
	if (ratio < p0001)
		goto L200;
	/*
	 *	 end of the outer loop.
	 */
	goto OUTER_LOOP;

L300:
	/*
	 *     termination, either normal or user imposed.
	 */
	if (iflag < 0) {
		info = iflag;
	}
	iflag = 0;

	for (i = 0; i < nfree; i++) {
		xall[ifree[i]] = x[i];
	}

	if ((conf.nprint > 0) && (info > 0)) {
		iflag = mp_call(funct, m, npar, xall, fvec, 0, private_data);
		nfev += 1;
	}

	/* Compute number of pegged parameters */
	npegged = 0;
	if (pars)
		for (i = 0; i < npar; i++) {
			if ((pars[i].limited[0] && (pars[i].limits[0] == xall[i])) ||
				(pars[i].limited[1] && (pars[i].limits[1] == xall[i]))) {
				npegged++;
			}
		}

	/* Compute and return the covariance matrix and/or parameter errors */
	if (result && (result->covar || result->xerror)) {
		mp_covar(nfree, fjac, ldfjac, ipvt, conf.covtol, wa2);

		if (result->covar) {
			/* Zero the destination covariance array */
			for (j = 0; j < (npar * npar); j++)
				result->covar[j] = 0;

			/* Transfer the covariance array */
			for (j = 0; j < nfree; j++) {
				for (i = 0; i < nfree; i++) {
					result->covar[ifree[j] * npar + ifree[i]] = fjac[j * ldfjac + i];
				}
			}
		}

		if (result->xerror) {
			for (j = 0; j < npar; j++)
				result->xerror[j] = 0;

			for (j = 0; j < nfree; j++) {
				double cc = fjac[j * ldfjac + j];
				if (cc > 0)
					result->xerror[ifree[j]] = sqrt(cc);
			}
		}
	}

	if (result) {
		strcpy(result->version, MPFIT_VERSION);
		result->bestnorm = mp_dmax1(fnorm, fnorm1);
		result->bestnorm *= result->bestnorm;
		result->orignorm = orignorm;
		result->status = info;
		result->niter = iter;
		result->nfev = nfev;
		result->npar = npar;
		result->nfree = nfree;
		result->npegged = npegged;
		result->nfunc = m;

		/* Copy residuals if requested */
		if (result->resid) {
			for (j = 0; j < m; j++)
				result->resid[j] = fvec[j];
		}
	}

CLEANUP:
	if (fvec)
		free(fvec);
	if (qtf)
		free(qtf);
	if (x)
		free(x);
	if (xnew)
		free(xnew);
	if (fjac)
		free(fjac);
	if (diag)
		free(diag);
	if (wa1)
		free(wa1);
	if (wa2)
		free(wa2);
	if (wa3)
		free(wa3);
	if (wa4)
		free(wa4);
	if (ipvt)
		free(ipvt);
	if (pfixed)
		free(pfixed);
	if (step)
		free(step);
	if (dstep)
		free(dstep);
	if (mpside)
		free(mpside);
	if (ddebug)
		free(ddebug);
	if (ddrtol)
		free(ddrtol);
	if (ddatol)
		free(ddatol);
	if (ifree)
		free(ifree);
	if (qllim)
		free(qllim);
	if (qulim)
		free(qulim);
	if (llim)
		free(llim);
	if (ulim)
		free(ulim);
	if (dvecptr)
		free(dvecptr);

	return info;
}

/************************fdjac2.c*************************/

static int mp_fdjac2(mp_func funct, int m, int n, int *ifree, int npar, double *x, double *fvec, double *fjac,
					 int ldfjac, double epsfcn, double *wa, void *priv, int *nfev, double *step, double *dstep,
					 int *dside, int *qulimited, double *ulimit, int *ddebug, double *ddrtol, double *ddatol,
					 double *wa2, double **dvec) {
	/*
	*     **********
	*
	*     subroutine fdjac2
	*
	*     this subroutine computes a forward-difference approximation
	*     to the m by n jacobian matrix associated with a specified
	*     problem of m functions in n variables.
	*
	*     the subroutine statement is
	*
	*	subroutine fdjac2(fcn,m,n,x,fvec,fjac,ldfjac,iflag,epsfcn,wa)
	*
	*     where
	*
	*	fcn is the name of the user-supplied subroutine which
	*	  calculates the functions. fcn must be declared
	*	  in an external statement in the user calling
	*	  program, and should be written as follows.
	*
	*	  subroutine fcn(m,n,x,fvec,iflag)
	*	  integer m,n,iflag
	*	  double precision x(n),fvec(m)
	*	  ----------
	*	  calculate the functions at x and
	*	  return this vector in fvec.
	*	  ----------
	*	  return
	*	  end
	*
	*	  the value of iflag should not be changed by fcn unless
	*	  the user wants to terminate execution of fdjac2.
	*	  in this case set iflag to a negative integer.
	*
	*	m is a positive integer input variable set to the number
	*	  of functions.
	*
	*	n is a positive integer input variable set to the number
	*	  of variables. n must not exceed m.
	*
	*	x is an input array of length n.
	*
	*	fvec is an input array of length m which must contain the
	*	  functions evaluated at x.
	*
	*	fjac is an output m by n array which contains the
	*	  approximation to the jacobian matrix evaluated at x.
	*
	*	ldfjac is a positive integer input variable not less than m
	*	  which specifies the leading dimension of the array fjac.
	*
	*	iflag is an integer variable which can be used to terminate
	*	  the execution of fdjac2. see description of fcn.
	*
	*	epsfcn is an input variable used in determining a suitable
	*	  step length for the forward-difference approximation. this
	*	  approximation assumes that the relative errors in the
	*	  functions are of the order of epsfcn. if epsfcn is less
	*	  than the machine precision, it is assumed that the relative
	*	  errors in the functions are of the order of the machine
	*	  precision.
	*
	*	wa is a work array of length m.
	*
	*     subprograms called
	*
	*	user-supplied ...... fcn
	*
	*	minpack-supplied ... dpmpar
	*
	*	fortran-supplied ... dabs,dmax1,dsqrt
	*
	*     argonne national laboratory. minpack project. march 1980.
	*     burton s. garbow, kenneth e. hillstrom, jorge j. more
	*
		  **********
	*/
	int i, j, ij;
	int iflag = 0;
	double eps, h, temp;
	static double zero = 0.0;
	int has_analytical_deriv = 0, has_numerical_deriv = 0;
	int has_debug_deriv = 0;

	temp = mp_dmax1(epsfcn, MP_MACHEP0);
	eps = sqrt(temp);
	ij = 0;
	ldfjac = 0; /* Prevent compiler warning */
	if (ldfjac) {
	} /* Prevent compiler warning */

	for (j = 0; j < npar; j++)
		dvec[j] = 0;

	/* Initialize the Jacobian derivative matrix */
	for (j = 0; j < (n * m); j++)
		fjac[j] = 0;

	/* Check for which parameters need analytical derivatives and which
	   need numerical ones */
	for (j = 0; j < n; j++) { /* Loop through free parameters only */
		if (dside && dside[ifree[j]] == 3 && ddebug[ifree[j]] == 0) {
			/* Purely analytical derivatives */
			dvec[ifree[j]] = fjac + j * m;
			has_analytical_deriv = 1;
		} else if (dside && ddebug[ifree[j]] == 1) {
			/* Numerical and analytical derivatives as a debug cross-check */
			dvec[ifree[j]] = fjac + j * m;
			has_analytical_deriv = 1;
			has_numerical_deriv = 1;
			has_debug_deriv = 1;
		} else {
			has_numerical_deriv = 1;
		}
	}

	/* If there are any parameters requiring analytical derivatives,
	   then compute them first. */
	if (has_analytical_deriv) {
		iflag = mp_call(funct, m, npar, x, wa, dvec, priv);
		if (nfev)
			*nfev = *nfev + 1;
		if (iflag < 0)
			goto DONE;
	}

	if (has_debug_deriv) {
		printf("FJAC DEBUG BEGIN\n");
		printf("#  %10s %10s %10s %10s %10s %10s\n", "IPNT", "FUNC", "DERIV_U", "DERIV_N", "DIFF_ABS", "DIFF_REL");
	}

	/* Any parameters requiring numerical derivatives */
	if (has_numerical_deriv)
		for (j = 0; j < n; j++) { /* Loop thru free parms */
			int dsidei = (dside) ? (dside[ifree[j]]) : (0);
			int debug = ddebug[ifree[j]];
			double dr = ddrtol[ifree[j]], da = ddatol[ifree[j]];

			/* Check for debugging */
			if (debug) {
				printf("FJAC PARM %d\n", ifree[j]);
			}

			/* Skip parameters already done by user-computed partials */
			if (dside && dsidei == 3)
				continue;

			temp = x[ifree[j]];
			h = eps * fabs(temp);
			if (step && step[ifree[j]] > 0)
				h = step[ifree[j]];
			if (dstep && dstep[ifree[j]] > 0)
				h = fabs(dstep[ifree[j]] * temp);
			if (h == zero)
				h = eps;

			/* If negative step requested, or we are against the upper limit */
			if ((dside && dsidei == -1) ||
				(dside && dsidei == 0 && qulimited && ulimit && qulimited[j] && (temp > (ulimit[j] - h)))) {
				h = -h;
			}

			x[ifree[j]] = temp + h;
			iflag = mp_call(funct, m, npar, x, wa, 0, priv);
			if (nfev)
				*nfev = *nfev + 1;
			if (iflag < 0)
				goto DONE;
			x[ifree[j]] = temp;

			if (dsidei <= 1) {
				/* COMPUTE THE ONE-SIDED DERIVATIVE */
				if (!debug) {
					/* Non-debug path for speed */
					for (i = 0; i < m; i++, ij++) {
						fjac[ij] = (wa[i] - fvec[i]) / h; /* fjac[i+m*j] */
					}
				} else {
					/* Debug path for correctness */
					for (i = 0; i < m; i++, ij++) {
						double fjold = fjac[ij];
						fjac[ij] = (wa[i] - fvec[i]) / h; /* fjac[i+m*j] */
						if ((da == 0 && dr == 0 && (fjold != 0 || fjac[ij] != 0)) ||
							((da != 0 || dr != 0) && (fabs(fjold - fjac[ij]) > da + fabs(fjold) * dr))) {
							printf("   %10d %10.4g %10.4g %10.4g %10.4g %10.4g\n", i, fvec[i], fjold, fjac[ij],
								   fjold - fjac[ij], (fjold == 0) ? (0) : ((fjold - fjac[ij]) / fjold));
						}
					}
				} /* end debugging */

			} else { /* dside > 2 */
				/* COMPUTE THE TWO-SIDED DERIVATIVE */
				for (i = 0; i < m; i++) {
					wa2[i] = wa[i];
				}

				/* Evaluate at x - h */
				x[ifree[j]] = temp - h;
				iflag = mp_call(funct, m, npar, x, wa, 0, priv);
				if (nfev)
					*nfev = *nfev + 1;
				if (iflag < 0)
					goto DONE;
				x[ifree[j]] = temp;

				/* Now compute derivative as (f(x+h) - f(x-h))/(2h) */
				if (!debug) {
					/* Non-debug path for speed */
					for (i = 0; i < m; i++, ij++) {
						fjac[ij] = (fjac[ij] - wa[i]) / (2 * h); /* fjac[i+m*j] */
					}
				} else {
					/* Debug path for correctness */
					for (i = 0; i < m; i++, ij++) {
						double fjold = fjac[ij];
						fjac[ij] = (wa2[i] - wa[i]) / (2 * h); /* fjac[i+m*j] */
						if ((da == 0 && dr == 0 && (fjold != 0 || fjac[ij] != 0)) ||
							((da != 0 || dr != 0) && (fabs(fjold - fjac[ij]) > da + fabs(fjold) * dr))) {
							printf("   %10d %10.4g %10.4g %10.4g %10.4g %10.4g\n", i, fvec[i], fjold, fjac[ij],
								   fjold - fjac[ij], (fjold == 0) ? (0) : ((fjold - fjac[ij]) / fjold));
						}
					}
				} /* end debugging */

			} /* if (dside > 2) */
		}	 /* if (has_numerical_derivative) */

	if (has_debug_deriv) {
		printf("FJAC DEBUG END\n");
	}

DONE:
	if (iflag < 0)
		return iflag;
	return 0;
	/*
	 *     last card of subroutine fdjac2.
	 */
}

/************************qrfac.c*************************/

static void mp_qrfac(int m, int n, double *a, int lda, int pivot, int *ipvt, int lipvt, double *rdiag, double *acnorm,
					 double *wa) {
	/*
	*     **********
	*
	*     subroutine qrfac
	*
	*     this subroutine uses householder transformations with column
	*     pivoting (optional) to compute a qr factorization of the
	*     m by n matrix a. that is, qrfac determines an orthogonal
	*     matrix q, a permutation matrix p, and an upper trapezoidal
	*     matrix r with diagonal elements of nonincreasing magnitude,
	*     such that a*p = q*r. the householder transformation for
	*     column k, k = 1,2,...,min(m,n), is of the form
	*
	*			    t
	*	    i - (1/u(k))*u*u
	*
	*     where u has zeros in the first k-1 positions. the form of
	*     this transformation and the method of pivoting first
	*     appeared in the corresponding linpack subroutine.
	*
	*     the subroutine statement is
	*
	*	subroutine qrfac(m,n,a,lda,pivot,ipvt,lipvt,rdiag,acnorm,wa)
	*
	*     where
	*
	*	m is a positive integer input variable set to the number
	*	  of rows of a.
	*
	*	n is a positive integer input variable set to the number
	*	  of columns of a.
	*
	*	a is an m by n array. on input a contains the matrix for
	*	  which the qr factorization is to be computed. on output
	*	  the strict upper trapezoidal part of a contains the strict
	*	  upper trapezoidal part of r, and the lower trapezoidal
	*	  part of a contains a factored form of q (the non-trivial
	*	  elements of the u vectors described above).
	*
	*	lda is a positive integer input variable not less than m
	*	  which specifies the leading dimension of the array a.
	*
	*	pivot is a logical input variable. if pivot is set true,
	*	  then column pivoting is enforced. if pivot is set false,
	*	  then no column pivoting is done.
	*
	*	ipvt is an integer output array of length lipvt. ipvt
	*	  defines the permutation matrix p such that a*p = q*r.
	*	  column j of p is column ipvt(j) of the identity matrix.
	*	  if pivot is false, ipvt is not referenced.
	*
	*	lipvt is a positive integer input variable. if pivot is false,
	*	  then lipvt may be as small as 1. if pivot is true, then
	*	  lipvt must be at least n.
	*
	*	rdiag is an output array of length n which contains the
	*	  diagonal elements of r.
	*
	*	acnorm is an output array of length n which contains the
	*	  norms of the corresponding columns of the input matrix a.
	*	  if this information is not needed, then acnorm can coincide
	*	  with rdiag.
	*
	*	wa is a work array of length n. if pivot is false, then wa
	*	  can coincide with rdiag.
	*
	*     subprograms called
	*
	*	minpack-supplied ... dpmpar,enorm
	*
	*	fortran-supplied ... dmax1,dsqrt,min0
	*
	*     argonne national laboratory. minpack project. march 1980.
	*     burton s. garbow, kenneth e. hillstrom, jorge j. more
	*
	*     **********
	*/
	int i, ij, jj, j, jp1, k, kmax, minmn;
	double ajnorm, sum, temp;
	static double zero = 0.0;
	static double one = 1.0;
	static double p05 = 0.05;

	lda = 0;   /* Prevent compiler warning */
	lipvt = 0; /* Prevent compiler warning */
	if (lda) {
	} /* Prevent compiler warning */
	if (lipvt) {
	} /* Prevent compiler warning */

	/*
	 *     compute the initial column norms and initialize several arrays.
	 */
	ij = 0;
	for (j = 0; j < n; j++) {
		acnorm[j] = mp_enorm(m, &a[ij]);
		rdiag[j] = acnorm[j];
		wa[j] = rdiag[j];
		if (pivot != 0)
			ipvt[j] = j;
		ij += m; /* m*j */
	}
	/*
	 *     reduce a to r with householder transformations.
	 */
	minmn = mp_min0(m, n);
	for (j = 0; j < minmn; j++) {
		if (pivot == 0)
			goto L40;
		/*
		 *	 bring the column of largest norm into the pivot position.
		 */
		kmax = j;
		for (k = j; k < n; k++) {
			if (rdiag[k] > rdiag[kmax])
				kmax = k;
		}
		if (kmax == j)
			goto L40;

		ij = m * j;
		jj = m * kmax;
		for (i = 0; i < m; i++) {
			temp = a[ij];  /* [i+m*j] */
			a[ij] = a[jj]; /* [i+m*kmax] */
			a[jj] = temp;
			ij += 1;
			jj += 1;
		}
		rdiag[kmax] = rdiag[j];
		wa[kmax] = wa[j];
		k = ipvt[j];
		ipvt[j] = ipvt[kmax];
		ipvt[kmax] = k;

	L40:
		/*
		 *	 compute the householder transformation to reduce the
		 *	 j-th column of a to a multiple of the j-th unit vector.
		 */
		jj = j + m * j;
		ajnorm = mp_enorm(m - j, &a[jj]);
		if (ajnorm == zero)
			goto L100;
		if (a[jj] < zero)
			ajnorm = -ajnorm;
		ij = jj;
		for (i = j; i < m; i++) {
			a[ij] /= ajnorm;
			ij += 1; /* [i+m*j] */
		}
		a[jj] += one;
		/*
		 *	 apply the transformation to the remaining columns
		 *	 and update the norms.
		 */
		jp1 = j + 1;
		if (jp1 < n) {
			for (k = jp1; k < n; k++) {
				sum = zero;
				ij = j + m * k;
				jj = j + m * j;
				for (i = j; i < m; i++) {
					sum += a[jj] * a[ij];
					ij += 1; /* [i+m*k] */
					jj += 1; /* [i+m*j] */
				}
				temp = sum / a[j + m * j];
				ij = j + m * k;
				jj = j + m * j;
				for (i = j; i < m; i++) {
					a[ij] -= temp * a[jj];
					ij += 1; /* [i+m*k] */
					jj += 1; /* [i+m*j] */
				}
				if ((pivot != 0) && (rdiag[k] != zero)) {
					temp = a[j + m * k] / rdiag[k];
					temp = mp_dmax1(zero, one - temp * temp);
					rdiag[k] *= sqrt(temp);
					temp = rdiag[k] / wa[k];
					if ((p05 * temp * temp) <= MP_MACHEP0) {
						rdiag[k] = mp_enorm(m - j - 1, &a[jp1 + m * k]);
						wa[k] = rdiag[k];
					}
				}
			}
		}

	L100:
		rdiag[j] = -ajnorm;
	}
	/*
	 *     last card of subroutine qrfac.
	 */
}

/************************qrsolv.c*************************/

static void mp_qrsolv(int n, double *r, int ldr, int *ipvt, double *diag, double *qtb, double *x, double *sdiag,
					  double *wa) {
	/*
	*     **********
	*
	*     subroutine qrsolv
	*
	*     given an m by n matrix a, an n by n diagonal matrix d,
	*     and an m-vector b, the problem is to determine an x which
	*     solves the system
	*
	*	    a*x = b ,	  d*x = 0 ,
	*
	*     in the least squares sense.
	*
	*     this subroutine completes the solution of the problem
	*     if it is provided with the necessary information from the
	*     qr factorization, with column pivoting, of a. that is, if
	*     a*p = q*r, where p is a permutation matrix, q has orthogonal
	*     columns, and r is an upper triangular matrix with diagonal
	*     elements of nonincreasing magnitude, then qrsolv expects
	*     the full upper triangle of r, the permutation matrix p,
	*     and the first n components of (q transpose)*b. the system
	*     a*x = b, d*x = 0, is then equivalent to
	*
	*		   t	   t
	*	    r*z = q *b ,  p *d*p*z = 0 ,
	*
	*     where x = p*z. if this system does not have full rank,
	*     then a least squares solution is obtained. on output qrsolv
	*     also provides an upper triangular matrix s such that
	*
	*	     t	 t		 t
	*	    p *(a *a + d*d)*p = s *s .
	*
	*     s is computed within qrsolv and may be of separate interest.
	*
	*     the subroutine statement is
	*
	*	subroutine qrsolv(n,r,ldr,ipvt,diag,qtb,x,sdiag,wa)
	*
	*     where
	*
	*	n is a positive integer input variable set to the order of r.
	*
	*	r is an n by n array. on input the full upper triangle
	*	  must contain the full upper triangle of the matrix r.
	*	  on output the full upper triangle is unaltered, and the
	*	  strict lower triangle contains the strict upper triangle
	*	  (transposed) of the upper triangular matrix s.
	*
	*	ldr is a positive integer input variable not less than n
	*	  which specifies the leading dimension of the array r.
	*
	*	ipvt is an integer input array of length n which defines the
	*	  permutation matrix p such that a*p = q*r. column j of p
	*	  is column ipvt(j) of the identity matrix.
	*
	*	diag is an input array of length n which must contain the
	*	  diagonal elements of the matrix d.
	*
	*	qtb is an input array of length n which must contain the first
	*	  n elements of the vector (q transpose)*b.
	*
	*	x is an output array of length n which contains the least
	*	  squares solution of the system a*x = b, d*x = 0.
	*
	*	sdiag is an output array of length n which contains the
	*	  diagonal elements of the upper triangular matrix s.
	*
	*	wa is a work array of length n.
	*
	*     subprograms called
	*
	*	fortran-supplied ... dabs,dsqrt
	*
	*     argonne national laboratory. minpack project. march 1980.
	*     burton s. garbow, kenneth e. hillstrom, jorge j. more
	*
	*     **********
	*/
	int i, ij, ik, kk, j, jp1, k, kp1, l, nsing;
	double cosx, cotan, qtbpj, sinx, sum, tanx, temp;
	static double zero = 0.0;
	static double p25 = 0.25;
	static double p5 = 0.5;

	/*
	 *     copy r and (q transpose)*b to preserve input and initialize s.
	 *     in particular, save the diagonal elements of r in x.
	 */
	kk = 0;
	for (j = 0; j < n; j++) {
		ij = kk;
		ik = kk;
		for (i = j; i < n; i++) {
			r[ij] = r[ik];
			ij += 1;   /* [i+ldr*j] */
			ik += ldr; /* [j+ldr*i] */
		}
		x[j] = r[kk];
		wa[j] = qtb[j];
		kk += ldr + 1; /* j+ldr*j */
	}

	/*
	 *     eliminate the diagonal matrix d using a givens rotation.
	 */
	for (j = 0; j < n; j++) {
		/*
		 *	 prepare the row of d to be eliminated, locating the
		 *	 diagonal element using p from the qr factorization.
		 */
		l = ipvt[j];
		if (diag[l] == zero)
			goto L90;
		for (k = j; k < n; k++)
			sdiag[k] = zero;
		sdiag[j] = diag[l];
		/*
		 *	 the transformations to eliminate the row of d
		 *	 modify only a single element of (q transpose)*b
		 *	 beyond the first n, which is initially zero.
		 */
		qtbpj = zero;
		for (k = j; k < n; k++) {
			/*
			 *	    determine a givens rotation which eliminates the
			 *	    appropriate element in the current row of d.
			 */
			if (sdiag[k] == zero)
				continue;
			kk = k + ldr * k;
			if (fabs(r[kk]) < fabs(sdiag[k])) {
				cotan = r[kk] / sdiag[k];
				sinx = p5 / sqrt(p25 + p25 * cotan * cotan);
				cosx = sinx * cotan;
			} else {
				tanx = sdiag[k] / r[kk];
				cosx = p5 / sqrt(p25 + p25 * tanx * tanx);
				sinx = cosx * tanx;
			}
			/*
			 *	    compute the modified diagonal element of r and
			 *	    the modified element of ((q transpose)*b,0).
			 */
			r[kk] = cosx * r[kk] + sinx * sdiag[k];
			temp = cosx * wa[k] + sinx * qtbpj;
			qtbpj = -sinx * wa[k] + cosx * qtbpj;
			wa[k] = temp;
			/*
			 *	    accumulate the tranformation in the row of s.
			 */
			kp1 = k + 1;
			if (n > kp1) {
				ik = kk + 1;
				for (i = kp1; i < n; i++) {
					temp = cosx * r[ik] + sinx * sdiag[i];
					sdiag[i] = -sinx * r[ik] + cosx * sdiag[i];
					r[ik] = temp;
					ik += 1; /* [i+ldr*k] */
				}
			}
		}
	L90:
		/*
		 *	 store the diagonal element of s and restore
		 *	 the corresponding diagonal element of r.
		 */
		kk = j + ldr * j;
		sdiag[j] = r[kk];
		r[kk] = x[j];
	}
	/*
	 *     solve the triangular system for z. if the system is
	 *     singular, then obtain a least squares solution.
	 */
	nsing = n;
	for (j = 0; j < n; j++) {
		if ((sdiag[j] == zero) && (nsing == n))
			nsing = j;
		if (nsing < n)
			wa[j] = zero;
	}
	if (nsing < 1)
		goto L150;

	for (k = 0; k < nsing; k++) {
		j = nsing - k - 1;
		sum = zero;
		jp1 = j + 1;
		if (nsing > jp1) {
			ij = jp1 + ldr * j;
			for (i = jp1; i < nsing; i++) {
				sum += r[ij] * wa[i];
				ij += 1; /* [i+ldr*j] */
			}
		}
		wa[j] = (wa[j] - sum) / sdiag[j];
	}
L150:
	/*
	 *     permute the components of z back to components of x.
	 */
	for (j = 0; j < n; j++) {
		l = ipvt[j];
		x[l] = wa[j];
	}
	/*
	 *     last card of subroutine qrsolv.
	 */
}

/************************lmpar.c*************************/

static void mp_lmpar(int n, double *r, int ldr, int *ipvt, int *ifree, double *diag, double *qtb, double delta,
					 double *par, double *x, double *sdiag, double *wa1, double *wa2) {
	/*     **********
	 *
	 *     subroutine lmpar
	 *
	 *     given an m by n matrix a, an n by n nonsingular diagonal
	 *     matrix d, an m-vector b, and a positive number delta,
	 *     the problem is to determine a value for the parameter
	 *     par such that if x solves the system
	 *
	 *	    a*x = b ,	  sqrt(par)*d*x = 0 ,
	 *
	 *     in the least squares sense, and dxnorm is the euclidean
	 *     norm of d*x, then either par is zero and
	 *
	 *	    (dxnorm-delta) .le. 0.1*delta ,
	 *
	 *     or par is positive and
	 *
	 *	    abs(dxnorm-delta) .le. 0.1*delta .
	 *
	 *     this subroutine completes the solution of the problem
	 *     if it is provided with the necessary information from the
	 *     qr factorization, with column pivoting, of a. that is, if
	 *     a*p = q*r, where p is a permutation matrix, q has orthogonal
	 *     columns, and r is an upper triangular matrix with diagonal
	 *     elements of nonincreasing magnitude, then lmpar expects
	 *     the full upper triangle of r, the permutation matrix p,
	 *     and the first n components of (q transpose)*b. on output
	 *     lmpar also provides an upper triangular matrix s such that
	 *
	 *	     t	 t		     t
	 *	    p *(a *a + par*d*d)*p = s *s .
	 *
	 *     s is employed within lmpar and may be of separate interest.
	 *
	 *     only a few iterations are generally needed for convergence
	 *     of the algorithm. if, however, the limit of 10 iterations
	 *     is reached, then the output par will contain the best
	 *     value obtained so far.
	 *
	 *     the subroutine statement is
	 *
	 *	subroutine lmpar(n,r,ldr,ipvt,diag,qtb,delta,par,x,sdiag,
	 *			 wa1,wa2)
	 *
	 *     where
	 *
	 *	n is a positive integer input variable set to the order of r.
	 *
	 *	r is an n by n array. on input the full upper triangle
	 *	  must contain the full upper triangle of the matrix r.
	 *	  on output the full upper triangle is unaltered, and the
	 *	  strict lower triangle contains the strict upper triangle
	 *	  (transposed) of the upper triangular matrix s.
	 *
	 *	ldr is a positive integer input variable not less than n
	 *	  which specifies the leading dimension of the array r.
	 *
	 *	ipvt is an integer input array of length n which defines the
	 *	  permutation matrix p such that a*p = q*r. column j of p
	 *	  is column ipvt(j) of the identity matrix.
	 *
	 *	diag is an input array of length n which must contain the
	 *	  diagonal elements of the matrix d.
	 *
	 *	qtb is an input array of length n which must contain the first
	 *	  n elements of the vector (q transpose)*b.
	 *
	 *	delta is a positive input variable which specifies an upper
	 *	  bound on the euclidean norm of d*x.
	 *
	 *	par is a nonnegative variable. on input par contains an
	 *	  initial estimate of the levenberg-marquardt parameter.
	 *	  on output par contains the final estimate.
	 *
	 *	x is an output array of length n which contains the least
	 *	  squares solution of the system a*x = b, sqrt(par)*d*x = 0,
	 *	  for the output par.
	 *
	 *	sdiag is an output array of length n which contains the
	 *	  diagonal elements of the upper triangular matrix s.
	 *
	 *	wa1 and wa2 are work arrays of length n.
	 *
	 *     subprograms called
	 *
	 *	minpack-supplied ... dpmpar,mp_enorm,qrsolv
	 *
	 *	fortran-supplied ... dabs,mp_dmax1,dmin1,dsqrt
	 *
	 *     argonne national laboratory. minpack project. march 1980.
	 *     burton s. garbow, kenneth e. hillstrom, jorge j. more
	 *
	 *     **********
	 */
	int i, iter, ij, jj, j, jm1, jp1, k, l, nsing;
	double dxnorm, fp, gnorm, parc, parl, paru;
	double sum, temp;
	static double zero = 0.0;
	/* static double one = 1.0; */
	static double p1 = 0.1;
	static double p001 = 0.001;

	/*
	 *     compute and store in x the gauss-newton direction. if the
	 *     jacobian is rank-deficient, obtain a least squares solution.
	 */
	nsing = n;
	jj = 0;
	for (j = 0; j < n; j++) {
		wa1[j] = qtb[j];
		if ((r[jj] == zero) && (nsing == n))
			nsing = j;
		if (nsing < n)
			wa1[j] = zero;
		jj += ldr + 1; /* [j+ldr*j] */
	}

	if (nsing >= 1) {
		for (k = 0; k < nsing; k++) {
			j = nsing - k - 1;
			wa1[j] = wa1[j] / r[j + ldr * j];
			temp = wa1[j];
			jm1 = j - 1;
			if (jm1 >= 0) {
				ij = ldr * j;
				for (i = 0; i <= jm1; i++) {
					wa1[i] -= r[ij] * temp;
					ij += 1;
				}
			}
		}
	}

	for (j = 0; j < n; j++) {
		l = ipvt[j];
		x[l] = wa1[j];
	}
	/*
	 *     initialize the iteration counter.
	 *     evaluate the function at the origin, and test
	 *     for acceptance of the gauss-newton direction.
	 */
	iter = 0;
	for (j = 0; j < n; j++)
		wa2[j] = diag[ifree[j]] * x[j];
	dxnorm = mp_enorm(n, wa2);
	fp = dxnorm - delta;
	if (fp <= p1 * delta) {
		goto L220;
	}
	/*
	 *     if the jacobian is not rank deficient, the newton
	 *     step provides a lower bound, parl, for the zero of
	 *     the function. otherwise set this bound to zero.
	 */
	parl = zero;
	if (nsing >= n) {
		for (j = 0; j < n; j++) {
			l = ipvt[j];
			wa1[j] = diag[ifree[l]] * (wa2[l] / dxnorm);
		}
		jj = 0;
		for (j = 0; j < n; j++) {
			sum = zero;
			jm1 = j - 1;
			if (jm1 >= 0) {
				ij = jj;
				for (i = 0; i <= jm1; i++) {
					sum += r[ij] * wa1[i];
					ij += 1;
				}
			}
			wa1[j] = (wa1[j] - sum) / r[j + ldr * j];
			jj += ldr; /* [i+ldr*j] */
		}
		temp = mp_enorm(n, wa1);
		parl = ((fp / delta) / temp) / temp;
	}
	/*
	 *     calculate an upper bound, paru, for the zero of the function.
	 */
	jj = 0;
	for (j = 0; j < n; j++) {
		sum = zero;
		ij = jj;
		for (i = 0; i <= j; i++) {
			sum += r[ij] * qtb[i];
			ij += 1;
		}
		l = ipvt[j];
		wa1[j] = sum / diag[ifree[l]];
		jj += ldr; /* [i+ldr*j] */
	}
	gnorm = mp_enorm(n, wa1);
	paru = gnorm / delta;
	if (paru == zero)
		paru = MP_DWARF / mp_dmin1(delta, p1);
	/*
	 *     if the input par lies outside of the interval (parl,paru),
	 *     set par to the closer endpoint.
	 */
	*par = mp_dmax1(*par, parl);
	*par = mp_dmin1(*par, paru);
	if (*par == zero)
		*par = gnorm / dxnorm;

/*
 *     beginning of an iteration.
 */
L150:
	iter += 1;
	/*
	 *	 evaluate the function at the current value of par.
	 */
	if (*par == zero)
		*par = mp_dmax1(MP_DWARF, p001 * paru);
	temp = sqrt(*par);
	for (j = 0; j < n; j++)
		wa1[j] = temp * diag[ifree[j]];
	mp_qrsolv(n, r, ldr, ipvt, wa1, qtb, x, sdiag, wa2);
	for (j = 0; j < n; j++)
		wa2[j] = diag[ifree[j]] * x[j];
	dxnorm = mp_enorm(n, wa2);
	temp = fp;
	fp = dxnorm - delta;
	/*
	 *	 if the function is small enough, accept the current value
	 *	 of par. also test for the exceptional cases where parl
	 *	 is zero or the number of iterations has reached 10.
	 */
	if ((fabs(fp) <= p1 * delta) || ((parl == zero) && (fp <= temp) && (temp < zero)) || (iter == 10))
		goto L220;
	/*
	 *	 compute the newton correction.
	 */
	for (j = 0; j < n; j++) {
		l = ipvt[j];
		wa1[j] = diag[ifree[l]] * (wa2[l] / dxnorm);
	}
	jj = 0;
	for (j = 0; j < n; j++) {
		wa1[j] = wa1[j] / sdiag[j];
		temp = wa1[j];
		jp1 = j + 1;
		if (jp1 < n) {
			ij = jp1 + jj;
			for (i = jp1; i < n; i++) {
				wa1[i] -= r[ij] * temp;
				ij += 1; /* [i+ldr*j] */
			}
		}
		jj += ldr; /* ldr*j */
	}
	temp = mp_enorm(n, wa1);
	parc = ((fp / delta) / temp) / temp;
	/*
	 *	 depending on the sign of the function, update parl or paru.
	 */
	if (fp > zero)
		parl = mp_dmax1(parl, *par);
	if (fp < zero)
		paru = mp_dmin1(paru, *par);
	/*
	 *	 compute an improved estimate for par.
	 */
	*par = mp_dmax1(parl, *par + parc);
	/*
	 *	 end of an iteration.
	 */
	goto L150;

L220:
	/*
	 *     termination.
	 */
	if (iter == 0)
		*par = zero;
	/*
	 *     last card of subroutine lmpar.
	 */
}

/************************enorm.c*************************/

static double mp_enorm(int n, double *x) {
	/*
	 *     **********
	 *
	 *     function enorm
	 *
	 *     given an n-vector x, this function calculates the
	 *     euclidean norm of x.
	 *
	 *     the euclidean norm is computed by accumulating the sum of
	 *     squares in three different sums. the sums of squares for the
	 *     small and large components are scaled so that no overflows
	 *     occur. non-destructive underflows are permitted. underflows
	 *     and overflows do not occur in the computation of the unscaled
	 *     sum of squares for the intermediate components.
	 *     the definitions of small, intermediate and large components
	 *     depend on two constants, rdwarf and rgiant. the main
	 *     restrictions on these constants are that rdwarf**2 not
	 *     underflow and rgiant**2 not overflow. the constants
	 *     given here are suitable for every known computer.
	 *
	 *     the function statement is
	 *
	 *	double precision function enorm(n,x)
	 *
	 *     where
	 *
	 *	n is a positive integer input variable.
	 *
	 *	x is an input array of length n.
	 *
	 *     subprograms called
	 *
	 *	fortran-supplied ... dabs,dsqrt
	 *
	 *     argonne national laboratory. minpack project. march 1980.
	 *     burton s. garbow, kenneth e. hillstrom, jorge j. more
	 *
	 *     **********
	 */
	int i;
	double agiant, floatn, s1, s2, s3, xabs, x1max, x3max;
	double ans, temp;
	double rdwarf = MP_RDWARF;
	double rgiant = MP_RGIANT;
	static double zero = 0.0;
	static double one = 1.0;

	s1 = zero;
	s2 = zero;
	s3 = zero;
	x1max = zero;
	x3max = zero;
	floatn = n;
	agiant = rgiant / floatn;

	for (i = 0; i < n; i++) {
		xabs = fabs(x[i]);
		if ((xabs > rdwarf) && (xabs < agiant)) {
			/*
			 *	    sum for intermediate components.
			 */
			s2 += xabs * xabs;
			continue;
		}

		if (xabs > rdwarf) {
			/*
			 *	       sum for large components.
			 */
			if (xabs > x1max) {
				temp = x1max / xabs;
				s1 = one + s1 * temp * temp;
				x1max = xabs;
			} else {
				temp = xabs / x1max;
				s1 += temp * temp;
			}
			continue;
		}
		/*
		 *	       sum for small components.
		 */
		if (xabs > x3max) {
			temp = x3max / xabs;
			s3 = one + s3 * temp * temp;
			x3max = xabs;
		} else {
			if (xabs != zero) {
				temp = xabs / x3max;
				s3 += temp * temp;
			}
		}
	}
	/*
	 *     calculation of norm.
	 */
	if (s1 != zero) {
		temp = s1 + (s2 / x1max) / x1max;
		ans = x1max * sqrt(temp);
		return (ans);
	}
	if (s2 != zero) {
		if (s2 >= x3max)
			temp = s2 * (one + (x3max / s2) * (x3max * s3));
		else
			temp = x3max * ((s2 / x3max) + (x3max * s3));
		ans = sqrt(temp);
	} else {
		ans = x3max * sqrt(s3);
	}
	return (ans);
	/*
	 *     last card of function enorm.
	 */
}

/************************lmmisc.c*************************/

static double mp_dmax1(double a, double b) {
	if (a >= b)
		return (a);
	else
		return (b);
}

static double mp_dmin1(double a, double b) {
	if (a <= b)
		return (a);
	else
		return (b);
}

static int mp_min0(int a, int b) {
	if (a <= b)
		return (a);
	else
		return (b);
}

/************************covar.c*************************/
/*
c     **********
c
c     subroutine covar
c
c     given an m by n matrix a, the problem is to determine
c     the covariance matrix corresponding to a, defined as
c
c                    t
c           inverse(a *a) .
c
c     this subroutine completes the solution of the problem
c     if it is provided with the necessary information from the
c     qr factorization, with column pivoting, of a. that is, if
c     a*p = q*r, where p is a permutation matrix, q has orthogonal
c     columns, and r is an upper triangular matrix with diagonal
c     elements of nonincreasing magnitude, then covar expects
c     the full upper triangle of r and the permutation matrix p.
c     the covariance matrix is then computed as
c
c                      t     t
c           p*inverse(r *r)*p  .
c
c     if a is nearly rank deficient, it may be desirable to compute
c     the covariance matrix corresponding to the linearly independent
c     columns of a. to define the numerical rank of a, covar uses
c     the tolerance tol. if l is the largest integer such that
c
c           abs(r(l,l)) .gt. tol*abs(r(1,1)) ,
c
c     then covar computes the covariance matrix corresponding to
c     the first l columns of r. for k greater than l, column
c     and row ipvt(k) of the covariance matrix are set to zero.
c
c     the subroutine statement is
c
c       subroutine covar(n,r,ldr,ipvt,tol,wa)
c
c     where
c
c       n is a positive integer input variable set to the order of r.
c
c       r is an n by n array. on input the full upper triangle must
c         contain the full upper triangle of the matrix r. on output
c         r contains the square symmetric covariance matrix.
c
c       ldr is a positive integer input variable not less than n
c         which specifies the leading dimension of the array r.
c
c       ipvt is an integer input array of length n which defines the
c         permutation matrix p such that a*p = q*r. column j of p
c         is column ipvt(j) of the identity matrix.
c
c       tol is a nonnegative input variable used to define the
c         numerical rank of a in the manner described above.
c
c       wa is a work array of length n.
c
c     subprograms called
c
c       fortran-supplied ... dabs
c
c     argonne national laboratory. minpack project. august 1980.
c     burton s. garbow, kenneth e. hillstrom, jorge j. more
c
c     **********
*/

static int mp_covar(int n, double *r, int ldr, int *ipvt, double tol, double *wa) {
	int i, ii, j, jj, k, l;
	int kk, kj, ji, j0, k0, jj0;
	int sing;
	double one = 1.0, temp, tolr, zero = 0.0;

/*
 * form the inverse of r in the full upper triangle of r.
 */

#if 0
  for (j=0; j<n; j++) {
    for (i=0; i<n; i++) {
      printf("%f ", r[j*ldr+i]);
    }
    printf("\n");
  }
#endif

	tolr = tol * fabs(r[0]);
	l = -1;
	for (k = 0; k < n; k++) {
		kk = k * ldr + k;
		if (fabs(r[kk]) <= tolr)
			break;

		r[kk] = one / r[kk];
		for (j = 0; j < k; j++) {
			kj = k * ldr + j;
			temp = r[kk] * r[kj];
			r[kj] = zero;

			k0 = k * ldr;
			j0 = j * ldr;
			for (i = 0; i <= j; i++) {
				r[k0 + i] += (-temp * r[j0 + i]);
			}
		}
		l = k;
	}

	/*
	 * Form the full upper triangle of the inverse of (r transpose)*r
	 * in the full upper triangle of r
	 */

	if (l >= 0) {
		for (k = 0; k <= l; k++) {
			k0 = k * ldr;

			for (j = 0; j < k; j++) {
				temp = r[k * ldr + j];

				j0 = j * ldr;
				for (i = 0; i <= j; i++) {
					r[j0 + i] += temp * r[k0 + i];
				}
			}

			temp = r[k0 + k];
			for (i = 0; i <= k; i++) {
				r[k0 + i] *= temp;
			}
		}
	}

	/*
	 * For the full lower triangle of the covariance matrix
	 * in the strict lower triangle or and in wa
	 */
	for (j = 0; j < n; j++) {
		jj = ipvt[j];
		sing = (j > l);
		j0 = j * ldr;
		jj0 = jj * ldr;
		for (i = 0; i <= j; i++) {
			ji = j0 + i;

			if (sing)
				r[ji] = zero;
			ii = ipvt[i];
			if (ii > jj)
				r[jj0 + ii] = r[ji];
			if (ii < jj)
				r[ii * ldr + jj] = r[ji];
		}
		wa[jj] = r[j0 + j];
	}

	/*
	 * Symmetrize the covariance matrix in r
	 */
	for (j = 0; j < n; j++) {
		j0 = j * ldr;
		for (i = 0; i < j; i++) {
			r[j0 + i] = r[i * ldr + j];
		}
		r[j0 + j] = wa[j];
	}

#if 0
  for (j=0; j<n; j++) {
    for (i=0; i<n; i++) {
      printf("%f ", r[j*ldr+i]);
    }
    printf("\n");
  }
#endif

	return 0;
}