/* Concatenated dnsq code */

/*
 * This concatenation, translation to C and modifications,
 * Copyright 1998 Graeme Gill
 * All rights reserved.
 *
 * This material is licenced under the GNU AFFERO GENERAL PUBLIC LICENSE Version 3 :-
 * see the License.txt file for licencing details.
 */

#include "numsup.h"
#include "dnsq.h"		/* Public interface definitions */

#undef DEBUG

typedef long int bool;
#ifndef TRUE
# define FALSE (0)
# define TRUE (!FALSE)
#endif

#ifndef min
#define min(a,b) ((a) <= (b) ? (a) : (b))
#endif
#ifndef max
#define max(a,b) ((a) >= (b) ? (a) : (b))
#endif
#ifndef fabs
#define fabs(x) ((x) >= 0.0 ? (x) : -(x))
#endif

/* Forward difference jacobian approximation */
static int dfdjc1(
	void *fdata,
	int (*fcn)(void *fdata, int n, double *x, double *fvec, int iflag),
	int n, double *x, double * fvec, double *fjac,
	int ldfjac, int ml, int mu,
	double epsfcn, double *wa1, double *wa2);

/* QR factorization */
static int dqrfac(int m, int n, double *a, int lda,
	bool pivot, int *ipvt, double *sigma,
	double *acnorm, double *wa);

/* Use QR decomposition to form the orthogonal matrix */
static int dqform(int m, int n, double *q, int ldq, double *wa);

/* QR decomposition update */
static int d1updt(int m, int n, double *s,
	double *u, double *v, double *w);

/* Jacobian rotation, called by QR update */
static int d1mpyq(int m, int n, double *a, int lda,
	double *v, double *w);

/* Compute norm of a vector */
static double denorm(int n, double *x);

/* Line search */
static int ddoglg(int n, double *r, double *diag, double *qtb,
	double delta, double *x, double *wa1, double *wa2);

/***************************************************************/

/*
 * A simplified interface to dnsq().
 */

int dnsqe(
	void *fdata,	/* Opaque pointer to pass to fcn() and jac() */
	int (*fcn)(void *fdata, int n, double *x, double *fvec, int iflag),
					/* Pointer to function we are solving */
	int (*jac)(void *fdata, int n, double *x, double *fvec, double **fjac),
					/* Optional function to compute jacobian, NULL if not used */
	int n,			/* Number of functions and variables */
	double x[],		/* Initial solution estimate, RETURNs final solution */
	double ss,		/* Initial search area */
	double fvec[],	/* Array that will be RETURNed with thefunction values at the solution */
	double dtol,	/* Desired delta tollerance of the solution */
	double atol,	/* Desired absolute tollerance of the solution */
	int maxfev,		/* Maximum number of function calls. set to 0 for automatic */
	int nprint	 	/* Turn on debugging printouts from func() every nprint itterations */
) {
	int info = 0;		/* Return status */

	int nfev, njev;
	int i,j, index, ml, lr, mu;
	double epsfcn = ss * ss;		/* Jacobian estimate step */
	double factor = ss;		/* Initial step size */
	double maxstep = 0.0;	/* Subsequent step size (not working ??) */

	if (maxfev <= 0) {
		maxfev = (n + 1) * 100;
		if (jac == NULL)
			maxfev <<= 1;
	}
	ml = n - 1;		/* number of subdiagonals within the band of the Jacobian matrix. */
	mu = n - 1;		/* number of superdiagonals within the band of the Jacobian matrix. */

	lr = n * (n + 1) / 2;
	index = n * 6 + lr;

	/* Call dnsq. */
	info = dnsq(fdata, fcn, jac, NULL, 0,
			n, &x[0], &fvec[0], dtol, atol,
			maxfev, ml, mu, epsfcn, NULL, factor, maxstep, nprint, 
			&nfev, &njev);

	if (info == 5)
		info = 4;
	if (info == 0)
		warning("dnsqe: invalid input parameter.");
	return info;
} /* dnsqe */


/***************************************************************/
/*
 * Library:  SLATEC 
 * Category: F2A 
 * Type:     Double precision (SNSQE-S, DNSQE-D) 
 * Keywords  easy-to-use, nonlinear square system, 
 *			 powell hybrid method, zeros 
 * Author:   Hiebert, K. L. (SNLA) 
 * Translated to C by f2c and Graeme W. Gill
 *
 * 1. Purpose. 
 *
 * The purpose of DNSQ is to find a zero of a system of N nonlinear 
 * functions in N variables by a modification of the Powell 
 * hybrid method.  The user must provide a subroutine which 
 * calculates the functions.  The user has the option of either to 
 * provide a subroutine which calculates the Jacobian or to let the 
 * code calculate it by a forward-difference approximation. 
 * This code is the combination of the MINPACK codes (Argonne) 
 * HYBRD and HYBRDJ. 
 *
 * 2. Subroutine and Type Statements. 
 *
 *	   SUBROUTINE DNSQ(FCN,JAC,N,X,FVEC,FJAC,LDFJAC,XTOL,MAXFEV, 
 *	  				 ML,MU,EPSFCN,DIAG,MODE,FACTOR,NPRINT,INFO,NFEV, 
 *	  				 NJEV,R,LR,QTF,WA1,WA2,WA3,WA4) 
 *	   INTEGER N,MAXFEV,ML,MU,MODE,NPRINT,INFO,NFEV,LDFJAC,NJEV,LR 
 *	   DOUBLE PRECISION XTOL,EPSFCN,FACTOR 
 *	   DOUBLE PRECISION 
 *	   X(N),FVEC(N),DIAG(N),FJAC(LDFJAC,N),R(LR),QTF(N), 
 *	  	 WA1(N),WA2(N),WA3(N),WA4(N) 
 *	   EXTERNAL FCN,JAC 
 *
 * 3. Parameters. 
 *
 *	   Parameters designated as input parameters must be specified on 
 *	   entry to DNSQ and are not changed on exit, while parameters 
 *	   designated as output parameters need not be specified on entry 
 *	   and are set to appropriate values on exit from DNSQ. 
 *
 *	   fcn() is the name of the user-supplied subroutine which calculates 
 *		 the functions.  fcn() must be declared in an external statement 
 *		 in the user calling program, and should be written as follows. 
 *
 *	int fcn(
 *      void *fdata,
 *		int n,
 *		double *x,
 * 		double *fvec,
 *		int iflag);
 *	{
 *		 calculate the functions at x and 
 *		 return this vector in fvec. 
 *       print out vector if iflag == 0
 *		 return 0 (or < 0 to abort)
 *	}
 *		 The value 0 should be returned fcn() unless the 
 *		 user wants to terminate execution of DNSQ.  In this case
 *		 return a negative integer. 
 *
 *	   jac() is the name of the user-supplied subroutine which calculates 
 *		 the Jacobian.  If jac!=NULL, then jac() must be declared in an 
 *		 external statement in the user calling program, and should be 
 *		 written as follows. 
 *
 *	int jac(
 *      void *fdata,
 *		int n,
 *		double *x,
 *		double *fvec,
 *		double **fjac,
 *	{
 *		 Calculate the Jacobian at x and return this 
 *		 matrix in fjac.  fvec contains the function 
 *		 values at x and should not be altered. 
 *		 return 0 (or < 0 to abort)
 *	}
 *		 The value 0 should be returned jac() unless the 
 *		 user wants to terminate execution of DNSQ.  In this case
 *		 return a negative integer. 
 *
 *		 If jac == NULL, then the code will approximate the
 *       Jacobian by forward-differencing. 
 *
 *	double **sjac;
 *		sjac is an optional n by n matrix that can hold an initial
 *		jacobian matrix that will be used in preference to calling the jac()
 *		function, or to using forward differencing. If the array is provided,
 *		it will also contain the last jacobian matrix used before exiting.
 * 		If this array is not used, it should be set to NULL.
 *
 *	int startsjac;
 *		styatsjac is a flag, that when set to non-zero, will cause the
 *		sjac[] array to be used as the initial jacobian matrix, in preference
 *		to calling the jac() function, or to using forward differencing.
 *
 *	   n is a positive integer input variable set to the number of 
 *		 functions and variables. 
 *
 *	   x is an array of length n.  On input x must contain an initial 
 *		 estimate of the solution vector.  On output x contains the 
 *		 final estimate of the solution vector. 
 *
 *	   fvec is an output array of length n which contains the functions 
 *		 evaluated at the output x. 
 *
 *	   fjac is an output N by N array which contains the orthogonal 
 *		 matrix Q produced by the QR factorization of the final 
 *		 approximate Jacobian. 
 *
 *	   ldfjac is a positive integer input variable not less than n 
 *		 which specifies the leading dimension of the array fjac. 
 *
 *	   dtol is a nonnegative input variable.  Termination occurs when 
 *		 the relative error between two consecutive iterates is at most 
 *		 dtol.  Therefore, dtol measures the relative error desired in 
 *		 the approximate solution.  Section 4 contains more details 
 *		 about dtol. 
 *
 *	   tol is a nonnegative input variable.  Termination occurs when 
 *		 the value of all the function values falls below this threshold.
 *       Termination occurs when either the dtol or tol condition is met.
 *
 *	   maxfev is a positive integer input variable.  Termination occurs 
 *		 when the number of calls to fcn is at least maxfev by the end 
 *		 of an iteration. 
 *
 *	   ml is a nonnegative integer input variable which specifies the 
 *		 number of subdiagonals within the band of the Jacobian matrix. 
 *		 If the Jacobian is not banded or jac!=null, set ml to at 
 *		 least n - 1. 
 *
 *	   mu is a nonnegative integer input variable which specifies the 
 *		 number of superdiagonals within the band of the Jacobian 
 *		 matrix.  If the Jacobian is not banded or jac!=null, set mu to at 
 *		 least n - 1. 
 *
 *	   epsfcn is an input variable used in determining a suitable step 
 *		 for the forward-difference approximation.  This approximation 
 *		 assumes that the relative errors in the functions are of the 
 *		 order of epsfcn.  If epsfcn is less than the machine 
 *		 precision, it is assumed that the relative errors in the 
 *		 functions are of the order of the machine precision.  If 
 *		 jac!=null, then epsfcn can be ignored (treat it as a dummy 
 *		 argument). 
 *
 *	   diag is an array of length n.  If MODE = 1 (see below), diag is 
 *		 internally set.  If mode = 2, diag must contain positive 
 *		 entries that serve as implicit (multiplicative) scale factors 
 *		 for the variables. 
 *
 *	   mode is an integer input variable.  If mode = 1, the variables 
 *		 will be scaled internally.  If mode = 2, the scaling is 
 *		 specified by the input diag.  Other values of mode are 
 *		 equivalent to mode = 1. 
 *
 *	   factor is a positive input variable used in determining the 
 *		 initial step bound.  This bound is set to the product of 
 *		 factor and the Euclidean norm of diag*x if nonzero, or else to 
 *		 factor itself.  In most cases factor should lie in the 
 *		 interval (.1,100.).  100. is a generally recommended value. 
 *
 *	   nprint is an integer input variable that enables controlled 
 *		 printing of iterates if it is positive.  In this case, fcn is 
 *		 called with iflag = 0 at the beginning of the first iteration 
 *		 and every nprint iterations thereafter and immediately prior 
 *		 to return, with x and fvec available for printing. Appropriate 
 *		 print statements must be added to fcn(see example).  If nprint 
 *		 is not positive, no special calls of fcn with iflag = 0 are 
 *		 made. 
 *
 *	   info is an integer output variable.  If the user has terminated 
 *		 execution, info is set to the (negative) value of iflag.  See 
 *		 description of fcn and jac. Otherwise, INFO is set as follows. 
 *
 *		 INFO = 0  Improper input parameters. 
 *
 *		 INFO = 1  Relative error between two consecutive iterates is 
 *				   at most XTOL. Normal sucessful return value. 
 *
 *		 INFO = 2  Number of calls to FCN has reached or exceeded 
 *				   MAXFEV. 
 *
 *		 INFO = 3  XTOL is too small.  No further improvement in the 
 *				   approximate solution X is possible. 
 *
 *		 INFO = 4  Iteration is not making good progress, as measured 
 *				   by the improvement from the last five Jacobian 
 *				   evaluations. 
 *
 *		 INFO = 5  Iteration is not making good progress, as measured 
 *				   by the improvement from the last ten iterations. 
 *                 Return value if no zero can be found from this starting
 *                 point.
 *
 *		 Sections 4 and 5 contain more details about INFO. 
 *
 *	   nfev is an integer output variable set to the number of calls to 
 *		 fcn. 
 *
 *	   njev is an integer output variable set to the number of calls to 
 *		 jac. (If jac==null, then njev is set to zero.) 
 *
 * 4. Successful completion. 
 *
 *	   The accuracy of DNSQ is controlled by the convergence parameter 
 *	   XTOL.  This parameter is used in a test which makes a comparison 
 *	   between the approximation X and a solution XSOL.  DNSQ 
 *	   terminates when the test is satisfied.  If the convergence 
 *	   parameter is less than the machine precision (as defined by the 
 *	   function D1MACH(4)), then DNSQ only attempts to satisfy the test 
 *	   defined by the machine precision.  Further progress is not 
 *	   usually possible. 
 *
 *	   The test assumes that the functions are reasonably well behaved, 
 *	   and, if the Jacobian is supplied by the user, that the functions 
 *	   and the Jacobian are coded consistently.  If these conditions 
 *	   are not satisfied, then DNSQ may incorrectly indicate 
 *	   convergence.  The coding of the Jacobian can be checked by the 
 *	   subroutine DCKDER. If the Jacobian is coded correctly or JAC==NULL, 
 *	   then the validity of the answer can be checked, for example, by 
 *	   rerunning DNSQ with a tighter tolerance. 
 *
 *	   Convergence Test.  If DENORM(Z) denotes the Euclidean norm of a 
 *		 vector Z and D is the diagonal matrix whose entries are 
 *		 defined by the array DIAG, then this test attempts to 
 *		 guarantee that 
 *
 *			   DENORM(D*(X-XSOL)) .LE. XTOL*DENORM(D*XSOL). 
 *
 *		 If this condition is satisfied with XTOL = 10**(-K), then the 
 *		 larger components of D*X have K significant decimal digits and 
 *		 INFO is set to 1.  There is a danger that the smaller 
 *		 components of D*X may have large relative errors, but the fast 
 *		 rate of convergence of DNSQ usually avoids this possibility. 
 *
 *		 Unless high precision solutions are required, the recommended 
 *		 value for XTOL is the square root of the machine precision. 
 *
 *
 * 5. Unsuccessful Completion. 
 *
 *	   Unsuccessful termination of DNSQ can be due to improper input 
 *	   parameters, arithmetic interrupts, an excessive number of 
 *	   function evaluations, or lack of good progress. 
 *
 *	   Improper Input Parameters.  INFO is set to 0 if
 *		 N .LE. 0, or LDFJAC .LT. N, or 
 *		 XTOL .LT. 0.E0, or MAXFEV .LE. 0, or ML .LT. 0, or MU .LT. 0, 
 *		 or FACTOR .LE. 0.E0, or LR .LT. (N*(N+1))/2. 
 *
 *	   Arithmetic Interrupts.  If these interrupts occur in the FCN 
 *		 subroutine during an early stage of the computation, they may 
 *		 be caused by an unacceptable choice of X by DNSQ.  In this 
 *		 case, it may be possible to remedy the situation by rerunning 
 *		 DNSQ with a smaller value of FACTOR. 
 *
 *	   Excessive Number of Function Evaluations.  A reasonable value 
 *		 for MAXFEV is 100*(N+1) for JAC!=NULL and 200*(N+1) for JAC==NULL. 
 *
 *		 If the number of calls to FCN reaches MAXFEV, then this 
 *		 indicates that the routine is converging very slowly as 
 *		 measured by the progress of FVEC, and INFO is set to 2. This 
 *
 *		 situation should be unusual because, as indicated below, lack 
 *		 of good progress is usually diagnosed earlier by DNSQ, 
 *		 causing termination with info = 4 or INFO = 5. 
 *
 *	   Lack of Good Progress.  DNSQ searches for a zero of the system 
 *
 *		 by minimizing the sum of the squares of the functions.  In so 
 *		 doing, it can become trapped in a region where the minimum 
 *		 does not correspond to a zero of the system and, in this 
 *		 situation, the iteration eventually fails to make good 
 *		 progress.  In particular, this will happen if the system does 
 *		 not have a zero.  If the system has a zero, rerunning DNSQ 
 *		 from a different starting point may be helpful. 
 *
 *
 * 6. Characteristics of The Algorithm. 
 *
 *	   DNSQ is a modification of the Powell Hybrid method.  Two of its 
 *	   main characteristics involve the choice of the correction as a 
 *	   convex combination of the Newton and scaled gradient directions, 
 *	   and the updating of the Jacobian by the rank-1 method of 
 *	   Broyden.  The choice of the correction guarantees (under 
 *	   reasonable conditions) global convergence for starting points 
 *	   far from the solution and a fast rate of convergence.  The 
 *	   Jacobian is calculated at the starting point by either the 
 *	   user-supplied subroutine or a forward-difference approximation, 
 *	   but it is not recalculated until the rank-1 method fails to 
 *	   produce satisfactory progress. 
 *
 *	   Timing.  The time required by DNSQ to solve a given problem 
 *		 depends on N, the behavior of the functions, the accuracy 
 *		 requested, and the starting point.  The number of arithmetic 
 *		 operations needed by DNSQ is about 11.5*(N**2) to process 
 *		 each evaluation of the functions (call to FCN) and 1.3*(N**3) 
 *		 to process each evaluation of the Jacobian (call to JAC, 
 *		 if JAC!=NULL).  Unless FCN and JAC can be evaluated quickly, 
 *		 the timing of DNSQ will be strongly influenced by the time 
 *		 spent in FCN and JAC. 
 *
 *	   Storage.  DNSQ requires (3*N**2 + 17*N)/2 single precision 
 *		 storage locations, in addition to the storage required by the 
 *		 program.  There are no internally declared storage arrays. 
 *
 * References:   M. J. D. Powell, A hybrid method for nonlinear equa- 
 *				 tions. In Numerical Methods for Nonlinear Algebraic 
 *				 Equations, P. Rabinowitz, Editor.  Gordon and Breach, 
 *				 1988. 
 *
 * Routines called: D1MPYQ, D1UPDT, DDOGLG, DENORM, DFDJC1, DQFORM, DQRFAC
 *
 * Revision history: (YYMMDD) 
 *   800301  DATE WRITTEN 
 *   890531  Changed all specific intrinsics to generic.  (WRB) 
 *   890831  Modified array declarations.  (WRB) 
 *   890831  REVISION DATE from Version 3.2 
 *   891214  Prologue converted to Version 4.0 format.  (BAB) 
 *   900315  CALLs to XERROR changed to CALLs to XERMSG.  (THJ) 
 *   920501  Reformatted the REFERENCES section.  (WRB) 
 *   960510  Translated to C and features added. (GWG)
 */

/* Returns status. 0 = OK. */
int dnsq(
	void *fdata,	/* Opaque data pointer, passed to called functions */
	int (*fcn)(void *fdata, int n, double *x, double *fvec, int iflag),
					/* Pointer to function we are solving */
	int (*jac)(void *fdata, int n, double *x, double *fvec, double **fjac),
					/* Optional function to compute jacobian, NULL if not used */
	double **sjac,	/* Optional initial jacobian matrix/last jacobian matrix. NULL if not used.*/
	int startsjac,	/* Set to non-zero to use sjac as the initial jacobian */
	int n,			/* Number of functions and variables */
	double x[],		/* Initial solution estimate, RETURNs final solution */
	double fvec[],	/* Array that will be RETURNed with thefunction values at the solution */
	double dtol,	/* Desired delta tollerance of the solution */
	double atol,	/* Desired tollerance of the root (stops on dtol or tol) */
	int maxfev,		/* Set excessive Number of Function Evaluations */
	int ml,			/* number of subdiagonals within the band of the Jacobian matrix. */
	int mu, 		/* number of superdiagonals within the band of the Jacobian matrix. */
	double epsfcn,	/* determines suitable step for forward-difference approximation */
	double diag[],	/* Optional scaling factors, use NULL for internal scaling */
	double factor,	/* Determines the initial step bound */
	double maxstep,	/* Determines the maximum subsequent step bound (0.0 for none) */
					/* maxstep is NOT WORKING !!! ??? */
	int nprint, 	/* Turn on debugging printouts from func() every nprint itterations */
	int *nfev,		/* RETURNs the number of calls to fcn() */ 
	int *njev		/* RETURNs the number of calls to jac() */
) {
	int info = 0;	/* Return status  - invalid argument */
	int smode = 0;	/* Scaling mode, 1 = internal */

	/* Internal working arrays */
	double *fjac = NULL;	/* n by n array which contains the orthogonal matrix Q */
					/* produced by the QR factorization of the final approximate Jacobian. */ 
	int ldfjac = n; /* stride of 2d array */
	double **jjac = NULL;	/* NR style pointers to fjac */

	double *r = NULL;  	/* Array of length (n*(n+1))/2 which contains the upper  */
					/* triangular matrix produced by the QR factorization of the  */
					/* final approximate Jacobian, stored rowwise. */
	double *qtf = NULL;	/* Array of length n which contains the vector (q transpose)*fvec. */

	double *wa1 = NULL; 	/* Four working arrays length n */
	double *wa2 = NULL;
	double *wa3 = NULL;
	double *wa4 = NULL;

	int iwa[1];		/* Pivot swap array (only one element used) */

	/* Local variables */
	bool jeval;
	int iter;
	int i, j, k, l, iflag;
	int qrflag;					/* Set when a valid Q is in fjac[], and R is in r[] */
	int ncsuc;
	int nslow1, nslow2, ncfail;
	double temp;
	double delta = 0.0;
	double ratio, fnorm, pnorm;
	double xnorm = 0.0;
	double fnorm1;
	double actred, prered;
	double sum;

	/* Allocate out working arrays */
	if (diag == NULL) {	/* Internal scaling */
		smode = 1;
		diag = dvector(0,n-1);
	}
	fjac = dvector(0,(n * n)-1);
	jjac = convert_dmatrix(fjac,0,n-1,0,ldfjac-1);
	r    = dvector(0,((n * (n+1))/2)-1);
	qtf  = dvector(0,n-1);
	wa1  = dvector(0,n-1);
	wa2  = dvector(0,n-1);
	wa3  = dvector(0,n-1);
	wa4  = dvector(0,n-1);

	qrflag = 0;
	iflag = 0;
	*nfev = 0;
	*njev = 0;

	/* Check the input parameters for errors. */
	if (n <= 0 || dtol < 0.0 || maxfev <= 0
		|| ml < 0 || mu < 0 || factor <= 0.0 || maxstep < 0.0
	    || (sjac == NULL && startsjac != 0)) {
		goto func_exit;
	}
	if (!smode) {	/* Check the scaling array we were given */
		for (j = 0; j < n; ++j) {
			if (diag[j] <= 0.0) {
				goto func_exit;
			}
		}
	}

	/* Evaluate the function at the starting point */
	/* and calculate its norm. */
	*nfev = 1;
	if ((iflag = (*fcn)(fdata, n, &x[0], &fvec[0], 1)) < 0)
		goto func_exit;
	fnorm = denorm(n, &fvec[0]);

	/* Initialize iteration counter and monitors. */

	iter = 1;
	ncsuc = 0;
	ncfail = 0;
	nslow1 = 0;
	nslow2 = 0;

	/*	Beginning of the outer loop. */
	for (;;) {
		jeval = TRUE;

		/* initialize the jacobian matrix. */
		if (startsjac) {	/* User provided array */
			for (j = 0; j < n; j++) {
				for (i = 0; i < n; i++) 
					fjac[j * ldfjac + i] = sjac[j][i];
			}
		} else if (jac == NULL) { /* Code approximates the jacobian */
			int ti;
			iflag = dfdjc1(fdata, fcn, n, &x[0], &fvec[0], &fjac[0], ldfjac,
				ml, mu, epsfcn, &wa1[0], &wa2[0]);
			ti = ml + mu + 1;
			*nfev += min(ti,n);
		} else { /* User supplies jacobian function */
			iflag = (*jac)(fdata, n, &x[0], &fvec[0], jjac);
			++(*njev);
		}
#ifdef DEBUG
printf("DNSQ: Jacobian initialized\n");
#endif

		if (iflag < 0)
			goto func_exit;

		/* Compute the qr factorization of the jacobian. */
		dqrfac(n, n, &fjac[0], ldfjac, FALSE, iwa, &wa1[0], &wa2[0], &wa3[0]);

		/* On the first iteration and if internal scaling mode set, scale */
		/* according to the norms of the columns of the initial jacobian. */
		/* (wa2[] will contain norms) */
		/* Do this on subsequent itterations too, if a maxstep is set. */
		if (iter == 1 || maxstep > 0.0) {
			if (smode) {
				for (j = 0; j < n; ++j) {
					diag[j] = wa2[j];
					if (wa2[j] == 0.0) {
						diag[j] = 1.0;
					}
				}
			}
	
			/* On the first iteration, calculate the norm of the scaled */
			/* x[], and initialize the step bound delta. */
			for (j = 0; j < n; ++j)
				wa3[j] = diag[j] * x[j];
	
			xnorm = denorm(n, &wa3[0]);
			if (iter == 1) {
				delta = factor * xnorm;
				if (delta == 0.0)
					delta = factor;
#ifdef DEBUG
				printf("Initial Delta = %f\n",delta);
#endif /* DEBUG */
			} else {
				delta = maxstep * xnorm;
				if (delta == 0.0)
					delta = maxstep;
#ifdef DEBUG
				printf("Subsequent Delta = %f\n",delta);
#endif /* DEBUG */
			}
		}

		/* Form (q transpose)*fvec and store in qtf. */
		for (i = 0; i < n; ++i)
			qtf[i] = fvec[i];

		for (j = 0; j < n; ++j) {
			if (fjac[j + j * ldfjac] != 0.0) {
				sum = 0.0;
				for (i = j; i < n; ++i)
					sum += fjac[i + j * ldfjac] * qtf[i];
				temp = -sum / fjac[j + j * ldfjac];
	
				for (i = j; i < n; ++i)
					qtf[i] += fjac[i + j * ldfjac] * temp;
			}
		}

		/* Copy the triangular factor of the qr factorization into r. */
		for (j = 0; j < n; ++j) {
			l = j;
			if (j >= 1) {
				for (i = 0; i < j; ++i) {
					r[l] = fjac[i + j * ldfjac];
					l += (n-1) - i;
				}
			}
			r[l] = wa1[j];
		}

		/* Accumulate the orthogonal factor Q in fjac. */
		dqform(n, n, &fjac[0], ldfjac, &wa1[0]);

		qrflag = 1;

		/* Rescale if necessary. */
		if (smode) {
			for (j = 0; j < n; ++j) {
				diag[j] = max(diag[j],wa2[j]);
			}
		}

		/* Beginning of the inner loop. */
		for (;;) {
			/* If requested, call fcn to enable printing of iterates. */
			if (nprint > 0) {
				if ((iter - 1) % nprint == 0)
					if ((iflag = (*fcn)(fdata, n, &x[0], &fvec[0], 0)) < 0)
						goto func_exit;
			}

#ifdef DEBUG
			/* If the user supplied an array, and there is a valid Q in */
			/* fjac[], and R is in r[], recover the most recent Jacobian */
			/* matrix by multiplying Q by R */
			if (qrflag && sjac) {
				for (i = 0; i < n; ++i)  {
					for (j = 0; j < n; ++j) {
						double temp = 0.0;
						l = j;
						for (k = 0; k <= j; ++k) {
							temp += fjac[k * ldfjac + i] * r[l];
							l += (n-1-k);
						}
						sjac[j][i] = temp;
					}
				}
				printf("Current Jacobian = \n");
				for (j = 0; j < n; ++j) {
					for (i = 0; i < n; ++i)  {
						printf("%f  ",sjac[j][i]);
					}
				printf("\n");
				}
			}
#endif /* DEBUG */

			/* Determine the direction p. */
			ddoglg(n, &r[0], &diag[0], &qtf[0], delta, &wa1[0], &wa2[0], &wa3[0]);

			/* Store the direction p and x + p. calculate the norm of p. */
			/* (wa1[] is X[] output array from ddoglg()) */
			for (j = 0; j < n; ++j) {
				wa1[j] = -wa1[j];
				wa2[j] = x[j] + wa1[j];
				wa3[j] = diag[j] * wa1[j];
			}
			pnorm = denorm(n, &wa3[0]);

			/* On the first iteration, adjust the initial step bound. */
			/* Do this on subsequent itterations too, if maxstep is set. */
			if (iter == 1 || maxstep > 0.0) {
				delta = min(delta,pnorm);
#ifdef DEBUG
				if (iter == 1)
					printf("First itter Delta = %f\n",delta);
				else
					printf("Subsequent itter Delta = %f\n",delta);
#endif /* DEBUG */
			}

			/* Evaluate the function at x + p and calculate its norm. */
			++(*nfev);
			if ((iflag = (*fcn)(fdata, n, &wa2[0], &wa4[0], 1)) < 0)
				goto func_exit;
			fnorm1 = denorm(n, &wa4[0]);

			/* Compute the scaled actual reduction. */
			actred = -1.0;				/* Assume norm is made worse */
			if (fnorm1 < fnorm) {		/* There was a reduction in the norm */
				double td;
				td = fnorm1 / fnorm;
				actred = 1.0 - td * td;
			}

			/* Compute the scaled predicted reduction. */
			l = 0;
			for (i = 0; i < n; ++i) {
				sum = 0.0;
				for (j = i; j < n; ++j) {
					sum += r[l] * wa1[j];
					++l;
				}
				wa3[i] = qtf[i] + sum;
			}

			temp = denorm(n, &wa3[0]);
			prered = 0.0;
			if (temp < fnorm) {
				double td;
				td = temp / fnorm;
				prered = 1.0 - td * td;
			}

			/* Compute the ratio of the actual to the predicted reduction. */
			ratio = 0.0;		/* Assume no improvement */ 
			if (prered > 0.0)
				ratio = actred / prered;
#ifdef DEBUG
printf("DNSQ: actual/predicted ratio = %f\n",ratio);
#endif /* DEBUG */

			/* Update the step bound. */
			if (ratio < 0.1) {
				ncsuc = 0;
				++ncfail;		/* Forces jacobian recalc when ncfail == 2 */
				delta = 0.5 * delta;
#ifdef DEBUG
printf("ratio < 0.1 bad, Delta = %f, ncfail = %d\n",delta,ncfail);
#endif /* DEBUG */
			} else {
				ncfail = 0;		/* Pospone jacobian recalc */
				++ncsuc;
				if (ratio >= 0.5 || ncsuc > 1) {
					delta = max(delta, pnorm / 0.5);
#ifdef DEBUG
printf("ratio > 0.1 good, Delta = %f, ncsuc = %d\n",delta,ncsuc);
#endif /* DEBUG */
				}
				if (fabs(ratio - 1.0) <= 0.1) {
					delta = 2.0 * pnorm;
#ifdef DEBUG
printf("abs(ratio -1.0) <= 0.1 Delta = %f\n",delta);
#endif /* DEBUG */
				}
			}

			/* Test for progressing iteration. */
			if (ratio > 0.0001) {
#ifdef DEBUG
printf("Successful itter\n");
#endif /* DEBUG */
				/* Successful iteration. update x, fvec, and their norms. */
				for (j = 0; j < n; ++j) {
					x[j] = wa2[j];
					wa2[j] = diag[j] * x[j];
					fvec[j] = wa4[j];
				}
				xnorm = denorm(n, &wa2[0]);
				fnorm = fnorm1;
				++iter;
			}

#ifdef DEBUG
printf("DNSQ: actual = %f\n",actred);
#endif /* DEBUG */
			/* Determine the progress of the iteration. */
			++nslow1;
			if (fabs(actred) >= 0.001)
				nslow1 = 0;
			if (jeval)
				++nslow2;
			if (fabs(actred) >= 0.1)
				nslow2 = 0;

			/* Test for convergence. */
			if (delta <= dtol * xnorm || fnorm == 0.0) {
#ifdef DEBUG
printf("DNSQ: delta %f <= dtol * xnorm %f || fnorm == %f\n",delta,dtol * xnorm,fnorm);
#endif /* DEBUG */
				info = 1;
				goto func_exit;
			}
			/* Test for root meeting tolerance (GWG) */
			for (j = 0; j < n; ++j) {
				if (fabs(fvec[j]) > atol)
					break;
			}
			if (j >= n) {		/* All were below tollerance */
#ifdef DEBUG
printf("DNSQ: fvecs are all <= atol %f\n",atol);
#endif /* DEBUG */
				info = 1;
				goto func_exit;
			}

			/* Tests for termination and stringent tolerances. */
			if (*nfev >= maxfev)
				info = 2;
			if (0.1 * max(0.1 * delta, pnorm) <= M_DIVER * xnorm)
				info = 3;
			if (nslow2 == 5)
				info = 4;
			if (nslow1 == 10)
				info = 5;
			if (info != 0)
				goto func_exit;

			/* Criterion for recalculating jacobian */
			if (ncfail == 2) {
				break;	/* Break out of inner loop */
			}

			/* Calculate the rank one modification to the jacobian */
			/* and update qtf if necessary. */
			for (j = 0; j < n; ++j) {
				sum = 0.0;
				for (i = 0; i < n; ++i) {
					sum += fjac[i + j * ldfjac] * wa4[i];
				}
				wa2[j] = (sum - wa3[j]) / pnorm;
				wa1[j] = diag[j] * (diag[j] * wa1[j] / pnorm);
				if (ratio >= 1e-4) {
					qtf[j] = sum;
				}
			}

			/* Compute the qr factorization of the updated jacobian. */
			d1updt(n, n, &r[0], &wa1[0], &wa2[0], &wa3[0]);
			d1mpyq(n, n, &fjac[0], ldfjac, &wa2[0], &wa3[0]);
			d1mpyq(1, n, &qtf[0], 1, &wa2[0], &wa3[0]);

			jeval = FALSE;
		} /* End of the inner loop. */
	} /* End of the outer loop. */


	/* Termination, either normal or user imposed. */
func_exit:

	/* If the user supplied an array, and there is a valid Q in */
	/* fjac[], and R is in r[], recover the most recent Jacobian */
	/* matrix by multiplying Q by R */
	if (qrflag && sjac) {
		for (i = 0; i < n; ++i)  {
			for (j = 0; j < n; ++j) {
				double temp = 0.0;
				l = j;
				for (k = 0; k <= j; ++k) {
					temp += fjac[k * ldfjac + i] * r[l];
					l += (n-1-k);
				}
				sjac[j][i] = temp;
			}
		}
	}

	/* Free our working arrays */
	if (smode)
		free_dvector(diag,0,n-1);
	free_dvector(fjac,0,(n * n)-1);
	free_convert_dmatrix(jjac,0,n-1,0,ldfjac-1);
	free_dvector(r,0,((n * (n+1))/2)-1);
	free_dvector(qtf,0,n-1);
	free_dvector(wa1,0,n-1);
	free_dvector(wa2,0,n-1);
	free_dvector(wa3,0,n-1);
	free_dvector(wa4,0,n-1);


	if (iflag < 0)
		info = iflag;
	if (nprint > 0)
		(*fcn)(fdata, n, &x[0], &fvec[0], 0);
#ifdef DEBUG
	if (info < 0)
		printf("dnsq: Execution terminated because user set iflag negative\n");
	if (info == 0)
		printf("dnsq: Invalid input parameter\n");
	if (info == 2)
		printf("dnsq: Too many function evaluations\n");
	if (info == 3)
		printf("dnsq: dtol too small. no further improvement possible\n");
	if (info > 4)
		printf("dnsq: Iteration not making good progress\n");
#endif /* DEBUG */
	return info;
} /* dnsq */

/***************************************************************/
/***************************************************************/

/*
 *	 Given an M by N matrix A, this subroutine computes A*Q where 
 *	 Q is the product of 2*(N - 1) transformations                
 *                                                                  
 *		   GV(N-1)*...*GV(1)*GW(1)*...*GW(N-1) 
 *                                                                  
 *	 and GV(I), GW(I) are Givens rotations in the (I,N) plane which 
 *	 eliminate elements in the I-th and N-th planes, respectively. 
 *	 Q itself is not given, rather the information to recover the 
 *	 GV, GW rotations is supplied. 
 *                                                                  
 */

static int d1mpyq(
	int m,		/* Number of rows of A */
	int n,		/* Number of columns of A */
	double *a,	/* M by N array */
	int lda,	/* stride for a[][] */
    double *v,	/* Input array */
	double *w)	/* Input array */
{
	/* Local variables */
	int i, j;
	int nm1 = n - 1;
	double temp;
	double cos_ = 0.0, sin_ = 0.0;

	/* Apply the first set of givens rotations to a. */
	if (nm1 >= 1) {
		for (j = n-2; j >= 0; --j) {
			if (fabs(v[j]) > 1.0) 
				cos_ = 1.0 / v[j];
			if (fabs(v[j]) > 1.0) { /* Computing 2nd power */
				sin_ = sqrt(1.0 - cos_ * cos_);
			}
			if (fabs(v[j]) <= 1.0) {
				sin_ = v[j];
			}
			if (fabs(v[j]) <= 1.0) { /* Computing 2nd power */
				cos_ = sqrt(1.0 - sin_ * sin_);
			}
			for (i = 0; i < m; ++i) {
				temp = cos_ * a[i + j * lda] - sin_ * a[i + nm1 * lda];
				a[i + nm1 * lda] = sin_ * a[i + j * lda] + cos_ * a[i + nm1 * lda];
				a[i + j * lda] = temp;
			}
		}

		/* Apply the second set of givens rotations to a. */
		for (j = 0; j < nm1; ++j) {
			if (fabs(w[j]) > 1.0) 
				cos_ = 1.0 / w[j];
			if (fabs(w[j]) > 1.0) { /* Computing 2nd power */
				sin_ = sqrt(1.0 - cos_ * cos_);
			}
			if (fabs(w[j]) <= 1.0)
				sin_ = w[j];
			if (fabs(w[j]) <= 1.0) { /* Computing 2nd power */
				cos_ = sqrt(1.0 - sin_ * sin_);
			}
			for (i = 0; i < m; ++i) {
				temp = cos_ * a[i + j * lda] + sin_ * a[i + nm1 * lda];
				a[i + nm1 * lda] = -sin_ * a[i + j * lda] + cos_ * a[i + nm1 * lda];
				a[i + j * lda] = temp;
			}
		}
	}
	return 0;
}

/***************************************************************/
/***************************************************************/

/*
 *	 Given an M by N lower trapezoidal matrix S, an M-vector U, 
 *	 and an N-vector V, the problem is to determine an 
 *	 orthogonal matrix Q such that 
 *
 *				   t 
 *		   (S + U*V )*Q 
 *
 *	 is again lower trapezoidal. 
 *
 *	 This subroutine determines Q as the product of 2*(N - 1) 
 *	 transformations 
 *
 *		   GV(N-1)*...*GV(1)*GW(1)*...*GW(N-1) 
 *
 *	 where GV(I), GW(I) are Givens rotations in the (I,N) plane 
 *	 which eliminate elements in the I-th and N-th planes, 
 *	 respectively. Q itself is not accumulated, rather the 
 *	 information to recover the GV, GW rotations is returned. 
 *
 */

static int d1updt(
	int m,			/* Number of rows of S */
	int n,			/* Number of columns of S */
	double *s,		/* Array of length (n*(2*m-n+1))/2 containing the lower trapezoid matrix */
	double *u,		/* U vector lenghth m */
	double *v,		/* V vector length n */
	double *w)		/* Output array W length m */
{
	int i, j, l;
	int jj;
	int nm1 = n - 1;
	int nmj;
	double temp;
	double giant, cotan;
	double tan_;
	double cos_, sin_, tau;

	/* Giant is the largest magnitude. */
	giant = M_LARGE;

	/* Initialize the diagonal element pointer. */
	jj = n * ((m << 1) - n + 1) / 2 - (m - n) - 1;

	/* Move the nontrivial part of the last column of s into w. */
	l = jj;
	for (i = nm1; i < m; ++i) {
		w[i] = s[l];
		++l;
	}

	/* Rotate the vector v into a multiple of the n-th unit vector */
	/* in such a way that a spike is introduced into w. */
	if (nm1 >= 1) {
		for (nmj = 1; nmj <= nm1; ++nmj) {
			j = n - nmj - 1;
			jj -= m - j;
			w[j] = 0.0;
			if (v[j] == 0.0)
				continue;

			/* Determine a givens rotation which eliminates the */
			/* j-th element of v. */
			if (fabs(v[nm1]) < fabs(v[j])) {
				cotan = v[nm1] / v[j];
				sin_ = 0.5 / sqrt(0.25 + 0.25 * (cotan * cotan));
				cos_ = sin_ * cotan;
				tau = 1.0;
				if (fabs(cos_) * giant > 1.0) {
					tau = 1.0 / cos_;
				}
			} else {
				tan_ = v[j] / v[nm1];
				cos_ = 0.5 / sqrt(0.25 + 0.25 * (tan_ * tan_));
				sin_ = cos_ * tan_;
				tau = sin_;
			}

			/* Apply the transformation to v and store the information */
			/* necessary to recover the givens rotation. */
			v[nm1] = sin_ * v[j] + cos_ * v[nm1];
			v[j] = tau;

			/* Apply the transformation to s and extend the spike in w. */
			l = jj;
			for (i = j; i < m; ++i) {
				temp = cos_ * s[l] - sin_ * w[i];
				w[i] = sin_ * s[l] + cos_ * w[i];
				s[l] = temp;
				++l;
			}
		}
	}

	/* Add the spike from the rank 1 update to w. */
	for (i = 0; i < m; ++i)
		w[i] += v[nm1] * u[i];

	/* Eliminate the spike. */
	if (nm1 >= 1) {
		for (j = 0; j < nm1; ++j) {
			if (w[j] != 0.0) {
				/* Determine a givens rotation which eliminates the */
				/* j-th element of the spike. */
				if (fabs(s[jj]) < fabs(w[j])) {
					cotan = s[jj] / w[j];
					sin_ = 0.5 / sqrt(0.25 + 0.25 * (cotan * cotan));
					cos_ = sin_ * cotan;
					tau = 1.0;
					if (fabs(cos_) * giant > 1.0) {
						tau = 1.0 / cos_;
					}
				} else {
					tan_ = w[j] / s[jj];
					cos_ = 0.5 / sqrt(0.25 + 0.25 * (tan_ * tan_));
					sin_ = cos_ * tan_;
					tau = sin_;
				}

				/* Apply the transformation to s and reduce the spike in w. */
				l = jj;
				for (i = j; i < m; ++i) {
					temp = cos_ * s[l] + sin_ * w[i];
					w[i] = -sin_ * s[l] + cos_ * w[i];
					s[l] = temp;
					++l;
				}

				/* Store the information necessary to recover the givens rotation. */
				w[j] = tau;
			}
			jj += m - j;
		}
	}

	/* Move w back into the last column of the output s. */
	l = jj;
	for (i = nm1; i < m; ++i) {
		s[l] = w[i];
		++l;
	}
	return 0;
}

/***************************************************************/
/***************************************************************/

/*
 *	 Given an M by N matrix A, an N by N nonsingular diagonal 
 *	 matrix D, an M-vector B, and a positive number DELTA, the 
 *	 problem is to determine the convex combination X of the 
 *	 Gauss-Newton and scaled gradient directions that minimizes 
 *	 (A*X - B) in the least squares sense, subject to the 
 *	 restriction that the Euclidean norm of D*X be at most DELTA. 
 *
 *	 This subroutine completes the solution of the problem 
 *	 if it is provided with the necessary information from the 
 *	 QR factorization of A. That is, if A = Q*R, where Q has 
 *	 orthogonal columns and R is an upper triangular matrix, 
 *	 then DDOGLG expects the full upper triangle of R and 
 *	 the first N components of (Q transpose)*B. 
 *
 */

static int ddoglg(
	int n,			/* Order of r[] */
	double r[],		/* Array of length (n*(n+!))/2 containing upper triangular matrix */
	double diag[],	/* Array of length n containing the diagonal elements of the matrix d[] */
	double qtb[],	/* Array of length n containing first n elements of vector (Q transpose)*B */
	double delta,	/* Upper bound on the Euclidean norm of D*X  */
	double x[],		/* output array of length N containing the desired direction */
	double wa1[],	/* Working arrays length n */
	double wa2[])
{
	int i, j, k, l;
	int jj;
	int jp1;
	int nm1 = n-1;
	double temp;
	double alpha, gnorm, qnorm;
	double epsmch;
	double sgnorm;
	double sum;

	/* Epsmch is the machine precision. */
	epsmch = M_DIVER;

	/* First, calculate the gauss-newton direction. */
	jj = n * (n + 1) / 2;
	for (k = 0; k < n; ++k) {
		j = nm1 - k;
		jp1 = j + 1;
		jj -= (k+1);
		l = jj + 1;
		sum = 0.0;
		if (n >= (jp1+1)) {
			for (i = jp1; i < n; ++i) {
				sum += r[l] * x[i];
				++l;
			}
		}

		temp = r[jj];
		if (temp == 0.0) {
			l = j;
			for (i = 0; i <= j; ++i) { /* Computing MAX */
				double dt;
				dt = fabs(r[l]);
				temp = max(temp,dt);
				l += nm1 - i;
			}
			temp = epsmch * temp;
			if (temp == 0.0) {
				temp = epsmch;
			}
		}
		x[j] = (qtb[j] - sum) / temp;
	}


	/* Test whether the gauss-newton direction is acceptable. */
	for (j = 0; j < n; ++j) {
		wa1[j] = 0.0;
		wa2[j] = diag[j] * x[j];
	}
	qnorm = denorm(n, &wa2[0]);
	if (qnorm <= delta) {
		return 0;		/* Done - use this direction */
	}

	/* The gauss-newton direction is not acceptable. */
	/* Next, calculate the scaled gradient direction. */
	l = 0;
	for (j = 0; j < n; ++j) {
		temp = qtb[j];
		for (i = j; i < n; ++i) {
			wa1[i] += r[l] * temp;
			++l;
		}
		wa1[j] /= diag[j];
	}

	/* Calculate the norm of the scaled gradient and test for */
	/* The special case in which the scaled gradient is zero. */
	gnorm = denorm(n, &wa1[0]);
	sgnorm = 0.0;
	alpha = delta / qnorm;
	if (gnorm != 0.0) {
		/* Calculate the point along the scaled gradient */
		/* at which the quadratic is minimized. */
		for (j = 0; j < n; ++j)
			wa1[j] = wa1[j] / gnorm / diag[j];
		l = 0;
		for (j = 0; j < n; ++j) {
			sum = 0.0;
			for (i = j; i < n; ++i) {
				sum += r[l] * wa1[i];
				++l;
			}
			wa2[j] = sum;
		}
		temp = denorm(n, &wa2[0]);
		sgnorm = gnorm / temp / temp;

		/* Test whether the scaled gradient direction is acceptable. */
		alpha = 0.0;
		if (sgnorm < delta) {
			double d0,d1,d2,d3,d4,
			/* The scaled gradient direction is not acceptable. */
			/* Finally, calculate the point along the dogleg */
			/* at which the quadratic is minimized. */
			bnorm = denorm(n, &qtb[0]);
			d0 = bnorm / gnorm * (bnorm / qnorm) * (sgnorm / delta);
			d1 = sgnorm / delta;
			d2 = d0 - delta / qnorm;
			d3 = delta / qnorm;
			d4 = sgnorm / delta;
			d0 = d0 - delta / qnorm * (d1 * d1)
			   + sqrt(d2 * d2 + (1.0 - d3 * d3) * (1.0 - d4 * d4));
			d1 = sgnorm / delta;
			alpha = delta / qnorm * (1.0 - d1 * d1) / d0;
		}
	}

	/* Form appropriate convex combination of the gauss-newton */
	/* direction and the scaled gradient direction. */
	temp = (1.0 - alpha) * min(sgnorm,delta);
	for (j = 0; j < n; ++j)
		x[j] = temp * wa1[j] + alpha * x[j];

	return 0;
} /* ddoglg */


/***************************************************************/
/***************************************************************/

/*
 *	 Given an N-vector X, this function calculates the 
 *	 Euclidean norm of X. 
 *
 *	 The Euclidean norm is computed by accumulating the sum of 
 *	 squares in three different sums. The sums of squares for the 
 *	 small and large components are scaled so that no overflows 
 *	 occur. Non-destructive underflows are permitted. Underflows 
 *	 and overflows do not occur in the computation of the unscaled 
 *	 sum of squares for the intermediate components. 
 *	 The definitions of small, intermediate and large components 
 *	 depend on two constants, RDWARF and RGIANT. The main 
 *	 restrictions on these constants are that RDWARF**2 not 
 *	 underflow and RGIANT**2 not overflow. The constants 
 *	 given here are suitable for every known computer. 
 *
 */

static double denorm(
	int n,			/* Size of x[] */
	double x[])		/* Input vector */
{
	if (n < 8) {	/* Make it simple and fast */
		double ss = 0.0;
		switch (n) {
			case 8:
				ss += x[7] * x[7];
			case 7:
				ss += x[6] * x[6];
			case 6:
				ss += x[5] * x[5];
			case 5:
				ss += x[4] * x[4];
			case 4:
				ss += x[3] * x[3];
			case 3:
				ss += x[2] * x[2];
			case 2:
				ss += x[1] * x[1];
			case 1:
				ss += x[0] * x[0];
		}
		return sqrt(ss);
	} else {
		/* Initialized data */
		static double rdwarf = 3.834e-20;
		static double rgiant = 1.304e19;
	
		/* Local variables */
		static double xabs, x1max, x3max;
		static int i;
		static double s1, s2, s3, agiant, floatn;
		double ret_val, td;
	
		s1 = 0.0;	/* Large component */
		s2 = 0.0;	/* Intermedate component */
		s3 = 0.0;	/* Small component */
		x1max = 0.0;
		x3max = 0.0;
		floatn = (double) (n + 1);
		agiant = rgiant / floatn;
		for (i = 0; i < n; i++) {
			xabs = (td = x[i], fabs(td));
	
			/* Sum for intermediate components. */
			if (xabs > rdwarf && xabs < agiant) {
				td = xabs;				 	/* Computing 2nd power */
				s2 += td * td;
	
			/* Sum for small components. */
			} else if (xabs <= rdwarf) {
				if (xabs <= x3max) {
					if (xabs != 0.0) {		/* Computing 2nd power */
					td = xabs / x3max;
					s3 += td * td;
					}
				} else { /* Computing 2nd power */
					td = x3max / xabs;
					s3 = 1.0 + s3 * (td * td);
					x3max = xabs;
				}
	
			/* Sum for large components. */
			} else {
				if (xabs <= x1max) {		/* Computing 2nd power */
					td = xabs / x1max;
					s1 += td * td;
				} else {					/* Computing 2nd power */
					td = x1max / xabs;
					s1 = 1.0 + s1 * (td * td);
					x1max = xabs;
				}
			}
		}
	
		/* Calculation of norm. */
		if (s1 != 0.0) {		/* Large is present */
			ret_val = x1max * sqrt(s1 + s2 / x1max / x1max);
		} else {				/* Medium and small are present */
			if (s2 == 0.0) {
				ret_val = x3max * sqrt(s3);		/* Small only */
			} else {
				if (s2 >= x3max) {		/* Medium larger than small */
					ret_val = sqrt(s2 * (1.0 + x3max / s2 * (x3max * s3)));
				} else {				/* Small large than medium */
					ret_val = sqrt(x3max * (s2 / x3max + x3max * s3));
				}
			}
		}
		return ret_val;
	}
}

/***************************************************************/
/***************************************************************/

/*
 *	 This subroutine computes a forward-difference approximation 
 *	 to the N by N Jacobian matrix associated with a specified 
 *	 problem of N functions in N variables. If the Jacobian has 
 *	 a banded form, then function evaluations are saved by only 
 *	 approximating the nonzero terms. 
 *
 */

static int dfdjc1(	/* Return < 0 if fcn() aborts */
	void *fdata,	/* Opaque data pointer to pass to fcn() */
	int (*fcn)(void *fdata, int n, double *x, double *fvec, int iflag),
					/* Pointer to function we are solving */
	int n,			/* Number of functions and variables  */
	double x[],		/* Input array size n */
	double fvec[],	/* array of length n which must contain the functions evaluated at x[]  */
	double fjac[],	/* output n by n array containing approximation to the Jacobian matrix at x[] */
	int ldfjac,		/* stride of fjac[] */
	int ml, 		/* Number of subdiagonals within the band of the Jacobian matrix */
	int mu, 		/* Number of superdiagonals within the band of the Jacobian matrix */
	double epsfcn,	/* Step length for the forward-difference approximation */
	double *wa1,	/* Working arrays of length n */
	double *wa2)
{
	/* Local variables */
	int iflag = 0;
	double temp;
	int msum;
	double h;
	int i, j, k;
	double eps;
	int nm1 = n-1;

	/* M_DIVER is the machine precision. */
	eps = sqrt((max(epsfcn,M_DIVER)));
	msum = ml + mu + 1;
	if (msum >= n) {
		/* Computation of dense approximate jacobian. */
		for (j = 0; j < n; ++j) {
			temp = x[j];
			h = eps * fabs(temp);
			if (h == 0.0)
				h = eps;
			x[j] = temp + h;
			if ((iflag = (*fcn)(fdata,n, &x[0], &wa1[0], 1)) < 0)
				break;
			x[j] = temp;
			for (i = 0; i < n; ++i)
				fjac[i + j * ldfjac] = (wa1[i] - fvec[i]) / h;
		}
	} else {
		/* Computation of banded approximate jacobian. */
		for (k = 0; k < msum; ++k) {
			for (j = k; msum < 0 ? j >= nm1 : j <= nm1; j += msum) {
				wa2[j] = x[j];
				h = eps * fabs(wa2[j]);
				if (h == 0.0)
					h = eps;
				x[j] = wa2[j] + h;
			}
			if ((iflag = (*fcn)(fdata, n, &x[0], &wa1[0], 1)) < 0)
				break;
			for (j = k; msum < 0 ? j >= nm1 : j <= nm1; j += msum) {
				x[j] = wa2[j];
				h = eps * fabs(wa2[j]);
				if (h == 0.0)
					h = eps;
				for (i = 0; i < n; ++i) {
					fjac[i + j * ldfjac] = 0.0;
					if (i >= j - mu && i <= j + ml)
						fjac[i + j * ldfjac] = (wa1[i] - fvec[i]) / h;
				}
			}
		}
	}
	return iflag;
} /* dfdjc1_ */

/***************************************************************/
/***************************************************************/

/*
 *	 This subroutine proceeds from the computed QR factorization of 
 *	 an M by N matrix A to accumulate the M by M orthogonal matrix 
 *	 Q from its factored form. 
 *
 */

static int dqform(
	int m,		/* No of rows of A and the order of Q. */
	int n, 		/* No of columns of A.  */
	double *q,	/* m by m array */
	int ldq,	/* stride of q[][] */
	double *wa)	/* Working aray length m */
{
	int i, j, k, l, minmn;
	double sum;

	/* Zero out upper triangle of q in the first min(m,n) columns. */
	minmn = min(m,n);
	if (minmn >= 2) {
		for (j = 1; j < minmn; ++j) {
			for (i = 0; i < j; ++i)
				q[i + j * ldq] = 0.0;
		}
	}

	/* Initialize remaining columns to those of the identity matrix. */
	if (m > n) {
		for (j = n; j < m; ++j) {
			for (i = 0; i < m; ++i) {
				q[i + j * ldq] = 0.0;
			}
			q[j + j * ldq] = 1.0;
		}
	}

	/* Accumulate q from its factored form. */
	for (l = 0; l < minmn; ++l) {
		k = minmn - l - 1;
		for (i = k; i < m; ++i) {
			wa[i] = q[i + k * ldq];
			q[i + k * ldq] = 0.0;
		}
		q[k + k * ldq] = 1.0;
		if (wa[k] != 0.0) {
			for (j = k; j < m; ++j) {
				double temp;
				sum = 0.0;
				for (i = k; i < m; ++i)
					sum += q[i + j * ldq] * wa[i];
				temp = sum / wa[k];
				for (i = k; i < m; ++i)
					q[i + j * ldq] -= temp * wa[i];
			}
		}
	}
	return 0;
} /* dqform_ */

/***************************************************************/
/***************************************************************/

/*
 *	 This subroutine uses Householder transformations with column 
 *	 pivoting (optional) to compute a QR factorization of the 
 *	 M by N matrix A. That is, DQRFAC determines an orthogonal 
 *	 matrix Q, a permutation matrix P, and an upper trapezoidal 
 *	 matrix R with diagonal elements of nonincreasing magnitude, 
 *	 such that A*P = Q*R. The Householder transformation for 
 *	 column K, K = 1,2,...,MIN(M,N), is of the form 
 *
 *						   T 
 *		   I - (1/U(K))*U*U 
 *
 *	 where U has zeros in the first K-1 positions. The form of 
 *	 this transformation and the method of pivoting first 
 *	 appeared in the corresponding LINPACK subroutine. 
 *
 */

static int dqrfac(
	int m,		/* Number of rows of a[] */
	int n,		/* Number of columns of a[] */
	double *a,	/* m by n array */
	int lda,	/* stride of a[][] */
	bool pivot,	/* TRUE to enforce column pivoting */
	int *ipvt,	/* Pivot output array, size n */
	double *sigma,	/* Output diagonal elements of R, length n */
	double *acnorm,	/* Output norms of A, length n */
	double *wa)		/* Working array size n */
{
	/* Local variables */
	int kmax;
	int i, j, k, minmn;
	double ajnorm;
	int jp1;
	double sum;

	/* Compute the initial column norms and initialize several arrays. */
	for (j = 0; j < n; ++j) {
		acnorm[j] = denorm(m, &a[j * lda]);
		sigma[j] = acnorm[j];
		wa[j] = sigma[j];
		if (pivot)
			ipvt[j] = j;
	}

	/* Reduce a to r with householder transformations. */
	minmn = min(m,n);
	for (j = 0; j < minmn; ++j) {
		if (pivot) {
			/* Bring the column of largest norm into the pivot position. */
			kmax = j;
			for (k = j; k < n; ++k) {
				if (sigma[k] > sigma[kmax]) {
					kmax = k;
				}
			}
			if (kmax != j) {
				for (i = 0; i < m; ++i) {
					double temp;
					temp = a[i + j * lda];
					a[i + j * lda] = a[i + kmax * lda];
					a[i + kmax * lda] = temp;
				}
				sigma[kmax] = sigma[j];
				wa[kmax] = wa[j];
				k = ipvt[j];
				ipvt[j] = ipvt[kmax];
				ipvt[kmax] = k;
			}
		}

		/* Compute the householder transformation to reduce the */
		/* j-th column of a to a multiple of the j-th unit vector. */
		ajnorm = denorm(m - j, &a[j + j * lda]);
		if (ajnorm != 0.0) {
			if (a[j + j * lda] < 0.0)
				ajnorm = -ajnorm;
			for (i = j; i < m; ++i)
				a[i + j * lda] /= ajnorm;
			a[j + j * lda] += 1.0;

			/* Apply the transformation to the remaining columns */
			/* and update the norms. */
			jp1 = j + 1;
			if (n > jp1) {
				for (k = jp1; k < n; ++k) {
					double temp;
					sum = 0.0;
					for (i = j; i < m; ++i)
						sum += a[i + j * lda] * a[i + k * lda];
					temp = sum / a[j + j * lda];
					for (i = j; i < m; ++i)
						a[i + k * lda] -= temp * a[i + j * lda];
					if (pivot && sigma[k] != 0.0) {
						temp = a[j + k * lda] / sigma[k];
						temp = 1.0 - temp * temp;
						sigma[k] *= sqrt((max(0.0,temp)));
						temp = sigma[k] / wa[k];
						if (0.05 * (temp * temp) <= M_DIVER) {
							sigma[k] = denorm(m - jp1, &a[jp1 + k * lda]);
							wa[k] = sigma[k];
						}
					}
				}
			}
		}
		sigma[j] = -ajnorm;
	}
	return 0;
} /* dqrfac_ */

/***************************************************************/
/***************************************************************/