23 files changed, 1862 insertions, 209 deletions
diff --git a/numlib/Jamfile b/numlib/Jamfile
index 2d5edaf..e2a06fd 100755
--- a/numlib/Jamfile
+++ b/numlib/Jamfile
@@ -20,13 +20,13 @@ Headers = numlib.h libui.h ;
 HDRS = ../h ;
 
 # Numeric library
-Library libnum.lib : numsup.c dnsq.c powell.c dhsx.c varmet.c ludecomp.c svd.c zbrent.c rand.c sobol.c aatree.c quadprog.c ;
+Library libnum.lib : numsup.c dnsq.c powell.c dhsx.c varmet.c ludecomp.c svd.c zbrent.c rand.c sobol.c aatree.c quadprog.c gnewt.c roots.c ;
 
 # Link all utilities with libnum
 LINKLIBS = libnum ;
 
 # All test programs are made from a single source file
-MainsFromSources dnsqtest.c tpowell.c tdhsx.c LUtest.c svdtest.c zbrenttest.c soboltest.c qptest.c ;
+MainsFromSources dnsqtest.c tpowell.c tconjgrad.c tdhsx.c LUtest.c svdtest.c zbrenttest.c soboltest.c qptest.c ;
 
 # Compile .c as .m
 if $(OS) = MACOSX {
diff --git a/numlib/afiles b/numlib/afiles
index b2138b2..e427b7d 100755
--- a/numlib/afiles
+++ b/numlib/afiles
@@ -4,6 +4,8 @@ afiles
 Jamfile
 dnsq.c
 dnsq.h
+gnewt.c
+gnewt.h
 dnsqtest.c
 numlib.h
 numsup.c
@@ -11,6 +13,7 @@ numsup.h
 powell.h
 powell.c
 tpowell.c
+tconjgrad.c
 varmet.h
 varmet.c
 dhsx.h
@@ -35,5 +38,7 @@ aatree.c
 quadprog.h
 quadprog.c
 qptest.c
+roots.h
+roots.c
 ui.h
 ui.c
diff --git a/numlib/dhsx.c b/numlib/dhsx.c
index e9da0eb..aa9f462 100755
--- a/numlib/dhsx.c
+++ b/numlib/dhsx.c
@@ -20,7 +20,7 @@
 
 #undef DEBUG
 
-int dhsx_debug = 1;
+int dhsx_debug = 0;
 
 static void simplexinit(int di, double *cp, double **p, double *r, double sv, int ii);
 static double trypoint(int di,double *cp, double **p, double *y, int hix, double hpfac,
@@ -92,9 +92,15 @@ void *fdata				/* Data needed by function */
 			lox = i;
 	}
 	tryy = (*funk)(fdata, cp);	/* Value at initial point */
+#ifdef DEBUG
+	if (dhsx_debug) printf(" initial point %s = %e\n",debPdv(di,cp),tryy);
+#endif /* DEBUG */
 
 	/* If our initial point is better than any of the simplex verticies */
 	if (y[lox] > tryy) {
+#ifdef DEBUG
+		if (dhsx_debug) printf(" initial point is better than surrounding simplex\n");
+#endif /* DEBUG */
 		/* Move all the verticies to match moving lox to cp */
 		for (i = 0; i < nsp; i++) {
 			if (i == lox)
@@ -263,6 +269,9 @@ void *fdata				/* Data needed by function */
 				for (j = 0; j < di; j++)
 					cp[j] = p[lox][j];
 			}
+#ifdef DEBUG
+			else if (dhsx_debug) printf("C point val %f is best\n",tryy);
+#endif
 			free_dvector(y2, 0, nsp-1);
 			free_dmatrix(p2, 0, nsp-1, 0, di-1);
 			free_dvector(y, 0, nsp-1);
diff --git a/numlib/dhsx.h b/numlib/dhsx.h
index 78cdb10..8a4e1ff 100755
--- a/numlib/dhsx.h
+++ b/numlib/dhsx.h
@@ -14,9 +14,9 @@
 /* return 0 on sucess, 1 on failure due to excessive itterations */
 /* Result will be in cp */
 int dhsx(
- double *rv,				/* If not NULL, return the residual error */
- int di,					/* Dimentionality */
- double *cp,				/* Initial starting point, return minimum */
+ double *rv,			/* If not NULL, return the residual error */
+ int di,				/* Dimentionality */
+ double *cp,			/* Initial starting point, return minimum */
  double *s,				/* Size of initial search area */
  double ftol,			/* Finishing tollerance of error change */
  double athr,			/* Absolute return value threshold. (Set high to not use) */
diff --git a/numlib/dnsq.c b/numlib/dnsq.c
index 75f00a8..31bebe4 100755
--- a/numlib/dnsq.c
+++ b/numlib/dnsq.c
@@ -112,6 +112,7 @@ int dnsqe(
 		info = 4;
 	if (info == 0)
 		warning("dnsqe: invalid input parameter.");
+
 	return info;
 } /* dnsqe */
 
diff --git a/numlib/dnsq.h b/numlib/dnsq.h
index e06e562..28dd5de 100755
--- a/numlib/dnsq.h
+++ b/numlib/dnsq.h
@@ -84,7 +84,8 @@ int dnsq_jac(		/* Return < 0 on abort */
 );
 
 #define		M_LARGE 1.79e+308
-#define		M_DIVER 2.22e-15
+#define		M_DIVER 2.22e-15		/* Machine precision (10 x IEEE double) */
+#define		M_SQRT_DIVER 4.71e-8	/* sqrt(M_DIVER) */
 
 /* Simplified dnsq() */
 int dnsqe(
diff --git a/numlib/dnsqtest.c b/numlib/dnsqtest.c
index 3f02819..f05bed4 100755
--- a/numlib/dnsqtest.c
+++ b/numlib/dnsqtest.c
@@ -74,7 +74,7 @@ int main(void)
 	/*	 Unless high precision solutions are required, */
 	/*	 this is the recommended setting. */
 
-	tol = sqrt(M_DIVER);
+	tol = M_SQRT_DIVER;
 
 	info = dnsqe(NULL, fcn, NULL, n, x, ss, fvec, tol, tol, 0, nprint);
 	fnorm = denorm(n, fvec);
diff --git a/numlib/gnewt.c b/numlib/gnewt.c
new file mode 100755
index 0000000..c95cec7
--- /dev/null
+++ b/numlib/gnewt.c
@@ -0,0 +1,413 @@
+
+/*
+ * Copyright 2018 Graeme Gill
+ * All rights reserved.
+ *
+ * This material is licenced under the GNU AFFERO GENERAL PUBLIC LICENSE Version 3 :-
+ * see the License.txt file for licencing details.
+ */
+
+#include "numsup.h"
+#include "ludecomp.h"
+#include "gnewt.h"		/* Public interface definitions */
+
+#undef DEBUG
+
+#ifdef DEBUG
+# define DBG(xx)	printf xx;
+#else
+# define DBG(xx)
+#endif
+
+
+#define TOLX 1.0e-7 	/* Convergence criterion on delx */
+#define STPMX 100.0		/* Maximum step multiplier */
+
+static void apxjac(int n, double *x, double *fvec, double **df, void *fdata,
+	void (*fcn)(void *fdata, int n, double *x, double *fvec));
+
+static int linesearch(int n, double *xold, double fold, double *delf, double *delx,
+	double *x, double *fvec, double *fp, double maxstep, void *fdata,
+	void (*fcn)(void *fdata, int n, double *x, double *f), int *pfit, int maxjac, int it);
+
+#define FMAX(A, B) ((A) > (B) ? (A) : (B))
+
+int gnewt(
+	void *fdata,	/* Opaque pointer to pass to fcn() and jac() */
+	void (*fcn)(void *fdata, int n, double *x, double *fvec),
+					/* Pointer to function we are solving */
+	void (*jac)(void *fdata, int n, double *x, double **fjac),
+					/* Function to compute jacobian */
+	int n,			/* Number of functions and variables */
+	double x[],		/* Initial solution estimate, returns final solution */
+	double rfvec[],	/* Optionaly return soln. function values */
+	double xtol,	/* Desired tollerance of root */
+	double ftol,	/* Desired tollerance of the solution */
+	int maxfcn,		/* Maximum number of function itterations */
+	int maxjac		/* Maximum number of jacobian itterations */
+) {
+	int i, j, it, fit, jit, *pivx, _pivx[10];
+	double f, fold;				/* half magnitide squared of fvec[] */
+	double *delf, _delf[10];	/* del f where f = 0.5 F.F */
+	double *fvec, _fvec[10];	/* F(x) */
+	double **fjac, *_fjac[11], __fjac[10 * 10];
+	double *xold, _xold[10];
+	double bigfx, bigx, maxstep;
+	double *delx, _delx[10];	/* Full step delta x */
+	double sum;
+	int rv = 0;
+	
+#ifdef DEBUG
+	double *fvec_check;
+	double **fjac_check;
+#endif
+
+	DBG(("gnewt:\n"))
+
+	fit = jit = 0;
+
+	/* Do local vector/array allocations */
+	if (n <= 10) {
+		pivx = _pivx;
+		if (rfvec == NULL) {
+			fvec = _fvec;
+		} else
+			fvec = rfvec;
+		_fjac[0] = __fjac;
+		fjac = _fjac+1;		/* dmatrix_reset() will setup fjac */
+		xold = _xold;
+		delf = _delf;
+		delx = _delx;
+	} else {
+		pivx = ivector(0, n-1);		/* LU decomp. pivod record */
+		if (rfvec == NULL) {
+			fvec = dvector(0, n-1);				/* Function value */
+		} else
+			fvec = rfvec;
+		fjac = dmatrix(0, n-1, 0, n-1);		/* Jacobian matrix */
+		xold = dvector(0, n-1);				/* Previous value of x[] */
+		delf = dvector(0, n-1);				/* del f */
+		delx = dvector(0, n-1);				/* Full step delta x */
+	}
+#ifdef DEBUG
+	fvec_check = dvector(0, n-1);
+	fjac_check = dmatrix(0, n-1, 0, n-1);
+#endif
+
+	/* Initial function value */
+	fcn(fdata, n, x, fvec);
+	fit++;
+
+	DBG((" x %s\n",debPdv(n,x)))
+	DBG((" fvec %s\n",debPdv(n,fvec)))
+
+	/* Compute half magnitide squared of function value at x */
+	for (sum = 0.0, i = 0; i < n; i++)
+		sum += fvec[i] * fvec[i];
+	f = 0.5 * sum;
+	DBG((" f %f\n",f))
+
+	/* test for initial value being a root */
+	for (bigfx = 0.0, i = 0; i < n; i++) {
+		double tt = fabs(fvec[i]);
+		if (tt > bigfx)
+			bigfx = tt;
+	}
+	if (bigfx < (0.01 * ftol)) {
+		goto done;
+	}
+
+	/* Compute line search x maximum step size */
+	for (sum = 0.0, i = 0 ; i < n; i++)
+		sum += x[i] * x[i];
+	maxstep = STPMX * FMAX(sqrt(sum), (double)n);
+	DBG((" maxstep %f\n",maxstep))
+
+	/* Until we are done */
+	for (it = 0; fit < maxfcn && jit < maxjac; it++) {
+		double rip;
+
+		DBG((" fit %d jit %d\n",fit,jit))
+
+		/* Compute Jacobian matrix */
+		if (jac != NULL) {
+			/* lu_decomp may have swapped rows - so fix it */
+			dmatrix_reset(fjac, 0, n-1, 0, n-1);
+			jac(fdata, n, x, fjac);				/* User function */
+		} else {
+			apxjac(n, x, fvec, fjac, fdata, fcn);	/* Numerical aproximation */
+		}
+		jit++;
+
+#ifdef DEBUG
+		copy_dmatrix(fjac_check, fjac, 0, n-1, 0, n-1);
+
+		DBG((" fjac = \n"))
+		for (i = 0; i < n; i++)
+			DBG(("  %d: %s\n",i,debPdv(n, fjac[i])))
+		DBG(("\n"))
+#endif
+
+		/* Compute del f for the line search. */
+		for (i = 0; i < n; i++) {
+			for (sum = 0.0, j = 0; j < n; j++)
+				sum += fjac[j][i] * fvec[j];		/* Hmm. df/dx . f */
+			delf[i] = sum;
+		}
+
+		/* Save current values of x and f to be able to monitor progres */
+		for (i = 0; i < n; i++)
+			xold[i] = x[i];
+		fold = f;
+
+		/* Desired delta f to make F(x) == 0 */
+		for (i = 0; i < n; i++)
+			delx[i] = -fvec[i];
+		DBG((" -fvec %s\n",debPdv(n,delx)))
+
+		/* Solve for delta x using Jacobian and desired delta f */
+		if (lu_decomp(fjac, n, pivx, &rip)) {
+			rv = 2;
+			goto done;
+		}
+		lu_backsub(fjac, n, pivx, delx);
+		DBG((" delx %s\n",debPdv(n,delx)))
+
+#ifdef DEBUG
+		matrix_vect_mult(fvec_check, n, fjac_check, n, n, delx, n);
+		DBG((" check -fvec : %s\n\n",debPdv(n,fvec_check)))
+#endif
+
+		if ((rv = linesearch(n, xold, fold, delf, delx, x, fvec, &f, maxstep, fdata, fcn,
+		                                                          &fit, maxfcn, it)) != 0) {
+			if (rv != 1) {	/* Not run out of itterations error */
+			DBG((" linesearch failed with %d\n",rv))
+				goto done;
+			}
+		}
+
+		DBG((" after linesearch:\n"))
+		DBG((" x %s\n",debPdv(n,x)))
+		DBG((" fvec %s\n",debPdv(n,fvec)))
+
+		/* See if f() has converged */
+		for (bigfx = 0.0, i = 0; i < n; i++) {
+			if (fabs(fvec[i]) > bigfx)
+				bigfx = fabs(fvec[i]);
+		}
+		DBG((" bigfx %f ftol %f\n",bigfx,ftol))
+		if (bigfx < ftol) {
+			goto done;
+		}
+
+		/* Could check for zero gradient problem here... */
+
+		/* See if x[] has converged */
+		for (bigx = 0.0, i = 0; i < n; i++) {
+			double tt = (fabs(x[i] - xold[i]))/FMAX(fabs(x[i]), 1.0);
+			if (tt > bigx)
+				bigx = tt;
+		}
+		DBG((" bigx %f xtol %f\n",bigx,xtol))
+		if (bigx < xtol)
+			goto done;
+	}
+
+	rv = 1;
+
+  done:;
+
+	if (n > 10) {
+		if (fvec != rfvec)
+			free_dvector(fvec, 0, n-1);
+		free_dvector(xold, 0, n-1);
+		free_dvector(delx, 0, n-1);
+		free_dvector(delf, 0, n-1);
+		free_dmatrix(fjac, 0, n-1, 0, n-1);
+		free_ivector(pivx, 0, n-1);
+	}
+#ifdef DEBUG
+	free_dvector(fvec_check,0, n-1);
+	free_dmatrix(fjac_check,0, n-1, 0, n-1);
+#endif
+
+	return rv;
+}
+
+/* - - - - - - - - */
+
+#define ALF 1.0e-4	/* Ensures sufficient decrease in function value. */
+
+/* Search for a step size that makes progress */
+/* Return nz on error */
+static int linesearch(
+	int n,
+	double *xold,
+	double fold,
+	double *delf,	/* del f */
+	double *delx,	/* full step delta x[] */
+	double *x,		/* in/out current x[] */
+	double *fvec,	/* return fvec at x[] */
+	double *fp,		/* in/out f value */
+	double maxstep,	/* maximum x step */
+	void *fdata,	/* Context for fcn */
+	void (*fcn)(void *fdata, int n, double *x, double *f),
+	int *pfit,		/* Inc number of itts */
+	int maxfcn,		/* Max function its */
+	int it			/* Caller iteration count */
+) {
+	int i;
+	double f = *fp, f2; 
+	double lmda1, lmda2, min_lmda;
+	double sum, slope, bigx;
+
+	DBG(("linesearch:\n"))
+
+	/* Comute magnitude of step */
+	for (sum = 0.0, i = 0; i < n; i++)
+		sum += delx[i] * delx[i];
+	sum = sqrt(sum);
+
+	/* re-scale if step is too big */
+	if (sum > maxstep) {
+		for (i = 0; i < n; i++)
+			delx[i] *= maxstep/sum;
+	}
+
+	for (slope = 0.0, i = 0; i < n; i++)
+		slope += delf[i] * delx[i];
+	if (slope >= 0.0) {
+		DBG((" slope %f >= 0.0\n",slope))
+		return 3;
+	}
+
+	bigx = 0.0;
+	for (i = 0;i < n; i++) {
+		double tt = fabs(delx[i])/FMAX(fabs(xold[i]), 1.0);
+		if (tt > bigx)
+			bigx = tt;
+	}
+	min_lmda = TOLX/bigx;
+
+	/* Try full Newton step first */
+	lmda1 = 1.0;
+
+	DBG((" lmda1 %f min_lmda %f\n",lmda1, min_lmda))
+
+	/* Top of loop */
+	for (; *pfit < maxfcn; it++) {
+		double tmp_lmda;
+
+		DBG(("  lmda1 %f\n",lmda1))
+
+		/* Take step */
+		for (i = 0;i < n;i++)
+			x[i] = xold[i] + lmda1 * delx[i];
+
+		/* Compute f = 0.5 F.F at x */
+		fcn(fdata, n, x, fvec);
+		(*pfit)++;
+		DBG(("  x %s\n",debPdv(n,x)))
+		DBG(("  fvec %s\n",debPdv(n,fvec)))
+		for (sum = 0.0, i = 0; i < n; i++)
+			sum += fvec[i] * fvec[i];
+		f = 0.5 * sum;
+
+//if (it == 0) printf(" linesearch: At 1st full step f %f -> %f\n", *fp, f);
+
+		/* Convergence on delx. */
+		if (lmda1 < min_lmda) {
+
+			for (i = 0; i < n; i++)
+				x[i] = xold[i];
+			return 0;
+
+		} else if (f <= fold + ALF * lmda1 * slope) {
+			*fp = f;
+			return 0;			/* Sufficient function decrease */
+
+		} else {				/* Backtrack. */
+			if (lmda1 == 1.0)	/* First time */
+				tmp_lmda = -slope/(2.0 * (f - fold-slope));
+
+			else {			/* Subsequent backtracks */
+				double c, d, e;
+				double a, b, rhs1, rhs2;
+
+				rhs1 = f - fold - slope * lmda1;
+				rhs2 = f2 - fold - slope * lmda2;
+				c = rhs1/(lmda1 * lmda1);
+				d = rhs2/(lmda2 * lmda2);
+				e = lmda1 - lmda2;
+				a = (c - d)/e;
+				b = (-lmda2 * c + lmda1 * d)/e;
+				if (a == 0.0)
+					tmp_lmda = -slope/(2.0 * b);
+				else {
+					double disc = b * b - 3.0 * a * slope;
+
+					if (disc < 0.0)
+						tmp_lmda = 0.5 * lmda1;
+					else if (b <= 0.0)
+						tmp_lmda = (-b + sqrt(disc))/(3.0 * a);
+					else
+						tmp_lmda = -slope/(b + sqrt(disc));
+				}
+				if (tmp_lmda > 0.5 * lmda1)
+					tmp_lmda = 0.5 * lmda1;
+			}
+		}
+		lmda2 = lmda1;
+		lmda1 = FMAX(tmp_lmda, lmda1 * 0.1);
+		f2 = f;
+	}
+
+	*fp = f;
+
+	return 1;
+}
+
+/* - - - - - - - - */
+
+/* Compute forward difference as aprox. Jacobian matrix */
+
+#define JEPS 1.0e-8	/* Aprox. sqrt of machine precision */
+
+static void apxjac(
+	int n,					/* Dimensions */
+	double *x,				/* Location x to compute Jacobian */
+	double *fvec,			/* Function value at x */
+	double **df,			/* Return Jacobian */
+	void *fdata,			/* fcn() context */
+	void (*fcn)(void *fdata, int n, double *x, double *fvec)
+) {
+	int i, j;
+	double h, temp, *f, _f[10];
+
+	if (n <= 10)
+		f = _f;
+	else
+		f = dvector(0, n);
+
+	for (j = 0; j < n; j++) {
+		temp = x[j];
+
+		h = JEPS * fabs(temp);
+		if (h == 0.0)
+			h = JEPS;
+		x[j] = temp + h;		/* Add delta */
+		h = x[j] - temp;		/* Actual delta with fp precision limits */
+
+		fcn(fdata, n, x, f);
+
+		x[j] = temp;			/* Restore value */
+
+		for (i = 0; i < n; i++)
+			df[i][j] = (f[i] - fvec[i])/h;
+	}
+
+	if (f != _f)
+		free_dvector(f, 0, n-1);
+}
+
+
diff --git a/numlib/gnewt.h b/numlib/gnewt.h
new file mode 100755
index 0000000..322d511
--- /dev/null
+++ b/numlib/gnewt.h
@@ -0,0 +1,59 @@
+#ifndef GNEWT_H
+#define GNEWT_H
+
+/* Global newton non-linear equation solver */
+
+/*
+ * Copyright 2018 Graeme W. Gill
+ * All rights reserved.
+ *
+ * This material is licenced under the GNU AFFERO GENERAL PUBLIC LICENSE Version 3 :-
+ * see the License.txt file for licencing details.
+ */
+
+#ifdef __cplusplus
+	extern "C" {
+#endif
+
+/*
+
+	return values:
+
+ 	0  Success
+
+	1  Ran out of itterations
+
+	2  Inverting Jacobian failed
+
+ */
+
+int gnewt(
+	void *fdata,	/* Opaque pointer to pass to fcn() and jac() */
+	void (*fcn)(void *fdata, int n, double *x, double *fvec),
+					/* Pointer to function we are solving */
+	void (*jac)(void *fdata, int n, double *x, double **fjac),
+					/* Function to compute jacobian */
+	int n,			/* Number of functions and variables */
+	double x[],		/* Initial solution estimate, returns final solution */
+	double rfvec[],	/* Optionaly return soln. function values */
+	double xtol,	/* Desired tollerance of root */
+	double ftol,	/* Desired tollerance of the solution */
+	int maxfcn,		/* Maximum number of function itterations */
+	int maxjac		/* Maximum number of jacobian itterations */
+);
+
+#ifdef __cplusplus
+	}
+#endif
+
+#endif /* GNEWT_H */
+
+
+
+
+
+
+
+
+
+
diff --git a/numlib/ludecomp.c b/numlib/ludecomp.c
index b756bfe..06fc6a8 100755
--- a/numlib/ludecomp.c
+++ b/numlib/ludecomp.c
@@ -163,6 +163,7 @@ int      n	/* Dimensionality */
 
 /* Decompose the square matrix A[][] into lower and upper triangles */
 /* NOTE that it returns transposed inverse by normal convention. */
+/* NOTE that rows get swaped by swapping matrix pointers! */ 
 /* Use sym_matrix_trans() to fix this. */
 /* Return 1 if the matrix is singular. */
 int
@@ -260,14 +261,14 @@ double  *rip	/* Row interchange parity, +/- 1.0, used for determinant */
 	return 0;
 }
 
-/* Solve a set of simultaneous equations from the */
+/* Solve a set of simultaneous equations A.x = b from the */
 /* LU decomposition, by back substitution. */
 void
 lu_backsub(
 double **a,		/* A[][] LU decomposed matrix */
 int      n,		/* Dimensionality */
 int     *pivx,	/* Pivoting row permutations record */
-double  *b		/* Input B[] vecor, return X[] */
+double  *b		/* Input B[] vector, return X[] */
 ) {
 	int i, j;
 	int nvi;		/* When >= 0, indicates non-vanishing B[] index */
@@ -441,15 +442,13 @@ int      n	/* Dimensionality */
 }
 
 /* Pseudo-Invert matrix A using lu decomposition */
-/* NOTE that it returns transposed inverse by normal convention. */
-/* Use matrix_trans() to fix this, or use matrix_trans_mult(). */
 /* Return 1 if the matrix is singular, 0 if OK */
 int
 lu_psinvert(
 double **out,	/* Output[0..N-1][0..M-1] */
 double **in,	/*  Input[0..M-1][0..N-1] input matrix */
-int      m,		/* Rows */
-int      n		/* Columns */
+int      m,		/* In Rows */
+int      n		/* In Columns */
 ) {
 	int rv = 0;
 	double **tr;		/* Transpose */
@@ -490,13 +489,73 @@ int      n		/* Columns */
 		}
 		free_dmatrix(sq, 0, m-1, 0, m-1);
 	}
-
 	free_dmatrix(tr, 0, n-1, 0, m-1);
 
 	return rv;
 }
 
 
+/* ----------------------------------------------------------------- */
+
+// ~~~ Hmm. Need to verify this code is correct...
+
+/* Use Cholesky decomposition on a symetric positive-definite matrix. */
+/* Only the upper triangle of the matrix A is accessed. */
+/* L returns the decomposition */
+/* Return nz if A is not positive-definite */
+int llt_decomp(double **L, double **A, int n) {
+	int i, j, k;
+	double sum;
+
+	/* Scan though upper triangle */
+	for (i = 0; i < n; i++) {
+
+		for (j = i; j < n; j++) {
+
+			sum = A[i][j];
+			for (k = i-1; k >= 0; k--) {
+				sum -= A[i][k] * A[j][k];
+			}
+
+			if (i != j) {
+				L[j][i] = sum/L[i][i];
+			} else {
+				if (sum <= 0.0)
+					return 1;
+				L[i][i] = sqrt(sum);
+			}
+		}
+	}
+	return 0;
+}
+
+/* Solve a set of simultaneous equations A.x = b from the */
+/* LLt decomposition, by back substitution. */
+void llt_backsub(
+double **L,			/* A[][] LLt decomposition in lower triangle */
+int n,				/* Dimensionality */
+double *b,			/* Input B[] */
+double *x			/* Return X[] (may be same as B[]) */
+) {
+	int i, k;
+	double sum;
+
+	/* Solve L.y = b, storing y in x. */
+	for (i = 0; i < n; i++) {
+		sum = b[i];
+		for (k = i-1; k >= 0; k--)
+			sum -= L[i][k] * x[k];
+		x[i] = sum/L[i][i];
+	}
+
+	/* Solve Lt.x = y */
+	for (i = n; i >= 0; i--) {
+		sum = x[i];
+		for (k = i+1 ; k < n; k++)
+			sum -= L[k][i] * x[k];
+		x[i] = sum/L[i][i];
+	}
+}
 
 
 
diff --git a/numlib/ludecomp.h b/numlib/ludecomp.h
index 1075a5b..3f92ef8 100755
--- a/numlib/ludecomp.h
+++ b/numlib/ludecomp.h
@@ -38,6 +38,7 @@ int      n	/* Dimensionality */
 );
 
 /* Decompose the square matrix A[][] into lower and upper triangles */
+/* NOTE that rows get swaped by swapping matrix pointers! */
 /* Return 1 if the matrix is singular. */
 int
 lu_decomp(
@@ -89,17 +90,34 @@ int      n	/* Dimensionality */
 );
 
 /* Pseudo-Invert matrix A using lu decomposition */
-/* NOTE that it returns transposed inverse by normal convention. */
-/* Use matrix_trans() to fix this, or use matrix_trans_mult(). */ 
 /* Return 1 if the matrix is singular, 0 if OK */
 int
 lu_psinvert(
 double **out,	/* Output[0..N-1][0..M-1] */
 double **in,	/*  Input[0..M-1][0..N-1] input matrix */
-int      m,		/* Rows */
-int      n		/* Columns */
+int      m,		/* In Rows */
+int      n		/* In Columns */
 );
 
+/* - - - - - - - - - - - - - - - - - - - */
+
+/* Use Cholesky decomposition on a symetric positive-definite matrix. */
+/* Only the upper triangle of the matrix A is accessed. */
+/* L returns the decomposition */
+/* Return nz if A is not positive-definite */
+int llt_decomp(double **L, double **A, int n);
+
+
+/* Solve a set of simultaneous equations A.x = b from the */
+/* LLt decomposition, by back substitution. */
+void llt_backsub(
+double **L,			/* A[][] LLt decomposition in lower triangle */
+int n,				/* Dimensionality */
+double *b,			/* Input B[] */
+double *x			/* Return X[] (may be same as B[]) */
+);
+
+
 #ifdef __cplusplus
 	}
 #endif
diff --git a/numlib/numlib.h b/numlib/numlib.h
index 430ce7a..28ec9ab 100755
--- a/numlib/numlib.h
+++ b/numlib/numlib.h
@@ -6,6 +6,7 @@
 
 #include "numsup.h"		/* Support routines, macros */
 #include "dnsq.h"		/* Non-linear equation solver */
+#include "gnewt.h"		/* Global Newton non-linear equation solver */
 #include "powell.h"		/* Powell multi dimentional minimiser */
 #include "dhsx.h"		/* Downhill simplex multi dimentional minimiser */
 #include "varmet.h"		/* Variable Metric multi dimentional minimiser */
@@ -16,5 +17,6 @@
 #include "sobol.h"		/* Sub-random vector generators */
 #include "aatree.h"		/* Anderson balanced binary tree */
 #include "quadprog.h"	/* Quadradic Programming solution */
+#include "roots.h"		/* Quadratic, Cubic and Quartic root solving */
 
 #endif /* NUMLIB_H */
diff --git a/numlib/numsup.c b/numlib/numsup.c
index eab81ed..ce76337 100755
--- a/numlib/numsup.c
+++ b/numlib/numsup.c
@@ -1080,6 +1080,29 @@ int nch
 	free((char *)(m+nrl-1));
 }
 
+/* In case rows have been swapped, reset the pointers */
+void dmatrix_reset(
+double **m,
+int nrl,	/* Row low index */
+int nrh,	/* Row high index */
+int ncl,	/* Col low index */
+int nch		/* Col high index */
+) {
+	int i;
+	int cols;
+
+	if (nrh < nrl)	/* Prevent failure for 0 dimension */
+		nrh = nrl;
+	if (nch < ncl)
+		nch = ncl;
+
+	cols = nch - ncl + 1;
+
+	m[nrl] = m[nrl-1] - ncl;		/* Set first row address, offset to ncl */
+	for(i = nrl+1; i <= nrh; i++)	/* Set subsequent row addresses */
+		m[i] = m[i-1] + cols;
+}
+
 /* --------------------- */
 /* 2D diagonal half matrix vector malloc/free */
 /* A half matrix must have equal rows and columns, */
@@ -1854,19 +1877,61 @@ int matrix_vect_mult(
 	return 0;
 }
 
+/* Multiply a 0 based transposed matrix by a vector */
+/* d may be same as v */
+int matrix_trans_vect_mult(
+	double *d, int nd,
+	double **m, int nr, int nc,
+	double *v, int nv
+) {
+	int i, j;
+	double *_v = v, vv[20];
+
+	if (d == v) {
+		if (nv <= 20) {
+			_v = vv;
+		} else {
+			_v = dvector(0, nv-1);
+		}
+		for (j = 0; j < nv; j++)
+			_v[j]  = v[j];
+	}
+
+	/* Input vector must match matrix columns */
+	if (nv != nr)
+		return 1;
+
+	/* Output vector must match matrix rows */
+	if (nd != nc)
+		return 1;
+
+	for (i = 0; i < nd; i++) {
+		d[i] = 0.0;  
+		for (j = 0; j < nv; j++)
+			d[i] += m[j][i] * _v[j];
+	}
+
+	if (_v != v && _v != vv)
+		free_dvector(_v, 0, nv-1);
+
+	return 0;
+}
+
 
 /* Set zero based dvector */
 void vect_set(double *d, double v, int len) {
-	int i;
-	for (i = 0; i < len; i++)
-		d[i] = v;
+	if (v == 0.0)
+		memset((char *)d, 0, len * sizeof(double));
+	else {
+		int i;
+		for (i = 0; i < len; i++)
+			d[i] = v;
+	}
 }
 
 /* Copy zero based dvector */
 void vect_cpy(double *d, double *s, int len) {
-	int i;
-	for (i = 0; i < len; i++)
-		d[i] = s[i];
+	memmove((char *)d, (char *)s, len * sizeof(double));
 }
 
 
@@ -1900,6 +1965,35 @@ void vect_sub(
 		d[i] -= v[i];
 }
 
+/* Scale a vector, */
+/* d may be same as v */
+void vect_scale(double *d, double *s, double scale, int len) {
+	int i;
+
+	for (i = 0; i < len; i++)
+		d[i] = s[i] * scale;
+}
+
+/* Take dot product of two vectors */
+double vect_dot(double *s1, double *s2, int len) {
+	int i;
+	double rv = 0.0;
+	for (i = 0; i < len; i++)
+		rv += s1[i] * s2[i];
+	return rv;
+}
+
+/* Return the vectors magnitude */
+double vect_mag(double *s, int len) {
+	int i;
+	double rv = 0.0;
+
+	for (i = 0; i < len; i++)
+		rv += s[i] * s[i];
+
+	return sqrt(rv);
+}
+
 
 /* - - - - - - - - - - - - - - - - - - - - - - */
 
@@ -2019,6 +2113,85 @@ void adump_svector(a1log *log, char *id, char *pfx, short *a, int nc) {
 	a1logd(g_log, 0, "\n");
 }
 
+/* ============================================================================ */
+/* C matrix support */
+
+/* Clip a vector to the range 0.0 .. 1.0 */
+/* and return any clipping margine */
+double vect_ClipNmarg(int n, double *out, double *in) {
+	int j;
+	double tt, marg = 0.0;
+	for (j = 0; j < n; j++) {
+		out[j] = in[j];
+		if (out[j] < 0.0) {
+			tt = 0.0 - out[j];
+			out[j] = 0.0;
+			if (tt > marg)
+				marg = tt;
+		} else if (out[j] > 1.0) {
+			tt = out[j] - 1.0;
+			out[j] = 1.0;
+			if (tt > marg)
+				marg = tt;
+		}
+	}
+	return marg;
+}
+
+/* 
+
+  mat     in    out
+
+[     ]   []    []
+[     ]   []    []
+[     ] * [] => []
+[     ]   []    []
+[     ]   []    []
+
+ */
+
+/* Multiply N vector by NxN transform matrix */
+/* Organization is mat[out][in] */
+void vect_MulByNxN(int n, double *out, double *mat, double *in) {
+	int i, j;
+	double _tt[20], *tt = _tt;
+
+	if (n > 20)
+		tt = dvector(0, n-1);
+
+	for (i = 0; i < n; i++) {
+		tt[i] = 0.0;
+		for (j = 0; j < n; j++)
+			tt[i] += mat[i * n + j] * in[j];
+	}
+
+	for (i = 0; i < n; i++)
+		out[i] = tt[i];
+
+	if (n > 20)
+		free_dvector(tt, 0, n-1);
+}
+
+/* Transpose an NxN matrix */
+void matrix_TransposeNxN(int n, double *out, double *in) {
+	int i, j;
+
+	if (in != out) {
+		for (i = 0; i < n; i++)
+			for (j = 0; j < n; j++)
+				out[i * n + j] = in[j * n + i];
+	} else {
+		for (i = 0; i < n; i++) {
+			for (j = i+1; j < n; j++) {
+				double tt;
+				tt = out[i * n + j];
+				out[i * n + j] = out[j * n + i];
+				out[j * n + i] = tt;
+			}
+		}
+	}
+}
+
 /* Print C double matrix to g_log debug */
 /* id identifies matrix */
 /* pfx used at start of each line */
@@ -2694,9 +2867,9 @@ char *debPiv(int di, int *p) {
 	return buf[ix];
 }
 
-/* Print a double color vector to a string. */
+/* Print a double color vector to a string with format. */
 /* Returned static buffer is re-used every 5 calls. */
-char *debPdv(int di, double *p) {
+char *debPdvf(int di, char *fmt, double *p) {
 	static char buf[5][DEB_MAX_CHAN * 100];
 	static int ix = 0;
 	int e;
@@ -2705,6 +2878,9 @@ char *debPdv(int di, double *p) {
 	if (p == NULL)
 		return "(null)";
 
+	if (fmt == NULL)
+		fmt = "%.8f";
+
 	if (++ix >= 5)
 		ix = 0;
 	bp = buf[ix];
@@ -2715,11 +2891,17 @@ char *debPdv(int di, double *p) {
 	for (e = 0; e < di; e++) {
 		if (e > 0)
 			*bp++ = ' ';
-		sprintf(bp, "%.8f", p[e]); bp += strlen(bp);
+		sprintf(bp, fmt, p[e]); bp += strlen(bp);
 	}
 	return buf[ix];
 }
 
+/* Print a double color vector to a string. */
+/* Returned static buffer is re-used every 5 calls. */
+char *debPdv(int di, double *p) {
+	return debPdvf(di, NULL, p);
+}
+
 /* Print a float color vector to a string. */
 /* Returned static buffer is re-used every 5 calls. */
 char *debPfv(int di, float *p) {
diff --git a/numlib/numsup.h b/numlib/numsup.h
index 86ad6a8..972d5b3 100755
--- a/numlib/numsup.h
+++ b/numlib/numsup.h
@@ -411,6 +411,7 @@ void free_dvector(double *v,int nl,int nh);
 double **dmatrix(int nrl, int nrh, int ncl, int nch);
 double **dmatrixz(int nrl, int nrh, int ncl, int nch);
 void free_dmatrix(double **m, int nrl, int nrh, int ncl, int nch);
+void dmatrix_reset(double **m, int nrl, int nrh, int ncl, int nch);
 
 double **dhmatrix(int nrl, int nrh, int ncl, int nch);
 double **dhmatrixz(int nrl, int nrh, int ncl, int nch);
@@ -486,6 +487,14 @@ int matrix_vect_mult(
 	double *v, int nv
 );
 
+/* Multiply a 0 based transposed matrix by a vector */
+/* d may be same as v */
+int matrix_trans_vect_mult(
+	double *d, int nd,
+	double **m, int nr, int nc,
+	double *v, int nv
+);
+
 /* Set zero based dvector */
 void vect_set(double *d, double v, int len);
 
@@ -498,16 +507,21 @@ void vect_neg(double *d, double *s, int len);
 
 /* Add two vectors, d += v */
 /* d may be same as v */
-void vect_add(
-	double *d, double *v, int len
-);
+void vect_add(double *d, double *v, int len);
 
 /* Subtract two vectors, d -= v */
 /* d may be same as v */
-void vect_sub(
-	double *d, double *v, int len
-);
+void vect_sub(double *d, double *v, int len);
+
+/* Scale a vector, */
+/* d may be same as v */
+void vect_scale(double *d, double *s, double scale, int len);
+
+/* Take dot product of two vectors */
+double vect_dot(double *s1, double *s2, int len);
 
+/* Return the vectors magnitude */
+double vect_mag(double *s, int len);
 
 /* Diagnostics */
 /* id identifies matrix/vector */
@@ -524,6 +538,20 @@ void adump_fvector(a1log *log, char *id, char *pfx, float *a, int nc);
 void adump_ivector(a1log *log, char *id, char *pfx, int *a, int nc);
 void adump_svector(a1log *log, char *id, char *pfx, short *a, int nc);
 
+/* ===================================================== */
+/* C matrix support */
+
+/* Clip a vector to the range 0.0 .. 1.0 */
+/* and return any clipping margine */
+double vect_ClipNmarg(int n, double *out, double *in);
+
+/* Multiply N vector by NxN transform matrix */
+/* Organization is mat[out][in] */
+void vect_MulByNxN(int n, double *out, double *mat, double *in);
+
+/* Transpose an NxN matrix */
+void matrix_TransposeNxN(int n, double *out, double *in);
+
 /* Dump C type matrix */
 void adump_C_dmatrix(a1log *log, char *id, char *pfx, double *a, int nr,  int nc);
 
@@ -631,10 +659,15 @@ char *debPiv(int di, int *p);
 /* Returned static buffer is re-used every 5 calls. */
 char *debPdv(int di, double *p);
 
+/* Print a double vector to a string with format. */
+/* Returned static buffer is re-used every 5 calls. */
+char *debPdvf(int di, char *fmt, double *p);
+
 /* Print a float vector to a string. */
 /* Returned static buffer is re-used every 5 calls. */
 char *debPfv(int di, float *p);
 
+
 /*******************************************/
 /* Numerical diagnostics */
 
diff --git a/numlib/powell.c b/numlib/powell.c
index 47acc15..7fb57a7 100755
--- a/numlib/powell.c
+++ b/numlib/powell.c
@@ -40,8 +40,10 @@
 #include "numsup.h"
 #include "powell.h"
 
-#undef SLOPE_SANITY_CHECK		/* exermental */
-#undef ABSTOL					/* Make tollerance absolute */
+#undef SLOPE_SANITY_CHECK		/* [und] expermental */
+#undef ABSTOL					/* [und] Make tollerance absolute */
+#undef USE_LINMIND				/* [und] Use limind for conjgrad (typically slower) */
+
 								/* Some debugging printfs (not comprehensive): */
 #undef PDEBUG					/* Powell debug */
 #undef CDEBUG					/* Conjgrad debug */
@@ -71,6 +73,7 @@
 # define LDBG(xxx) 
 #endif
 
+/* -------------------------------------- */
 /* Standard interface for powell function */
 /* return 0 on sucess, 1 on failure due to excessive itterions */
 /* Result will be in cp */
@@ -91,10 +94,10 @@ void (*prog)(void *pdata, int perc),		/* Optional progress percentage callback *
 void *pdata				/* Opaque data needed by prog() */
 ) {
 	int i;
-	double **dmtx;			/* Direction vector */
-	double *spt;			/* Sarting point before exploring all the directions */
-	double *xpt;			/* Extrapolated point */
-	double *svec;			/* Search vector */
+	double **dmtx, *_dmtx[10], __dmtx[10 * 10] = { 0.0 };		/* Direction vector */
+	double *spt, _spt[10];			/* Sarting point before exploring all the directions */
+	double *xpt, _xpt[10];			/* Extrapolated point */
+	double *svec, _svec[10];		/* Search vector */
 	int    iter;
 	double retv; 			/* Returned function value at p */
 	double stopth;			/* Current stop threshold */
@@ -102,10 +105,20 @@ void *pdata				/* Opaque data needed by prog() */
 	double curdel;			/* Current change in function value */
 	int pc = 0;				/* Percentage complete */
 
-	dmtx = dmatrixz(0, di-1, 0, di-1);	/* Zero filled */
-	spt  = dvector(0,di-1);
-	xpt  = dvector(0,di-1);
-	svec = dvector(0,di-1);
+	if (di <= 10) {
+		int j;
+		for (j = i = 0; i < di; i++, j += di)
+			_dmtx[i] = __dmtx + j;
+		dmtx = _dmtx;
+		spt  = _spt;
+		xpt  = _xpt;
+		svec = _svec;
+	} else {
+		dmtx = dmatrixz(0, di-1, 0, di-1);	/* Zero filled */
+		spt  = dvector(0, di-1);
+		xpt  = dvector(0, di-1);
+		svec = dvector(0, di-1);
+	}
 
 	/* Create initial direction matrix by */
 	/* placing search start on diagonal */
@@ -213,11 +226,14 @@ void *pdata				/* Opaque data needed by prog() */
 	}
 
 //printf("~1 iters = %d\n",iter);
+
 	/* Free up all the temporary vectors and matrix */
-	free_dvector(svec,0,di-1);
-	free_dvector(xpt,0,di-1);
-	free_dvector(spt,0,di-1);
-	free_dmatrix(dmtx, 0, di-1, 0, di-1);
+	if (di > 10) {
+		free_dvector(svec, 0, di-1);
+		free_dvector(xpt, 0, di-1);
+		free_dvector(spt, 0, di-1);
+		free_dmatrix(dmtx, 0, di-1, 0, di-1);
+	}
 
 	if (prog != NULL)		/* Report final progress */
 		prog(pdata, 100);
@@ -232,12 +248,323 @@ void *pdata				/* Opaque data needed by prog() */
 	return 1;		/* Failed due to execessive itterations */
 }
 
+/* - - - - - - - - - - - - - - - - - */
+
+#define POWELL_GOLD 1.618034
+#define POWELL_CGOLD 0.3819660
+#define POWELL_MAXIT 100
+
+/* Line bracketing and minimisation routine. */
+/* Return value at minimum. */
+double linmin(
+double cp[],		/* Start point, and returned value */
+double xi[],		/* Search vector */
+int di,				/* Dimensionality */
+#ifdef ABSTOL
+double ftol,		/* Absolute tolerance to stop on */
+#else
+double ftol,		/* Relative tolerance to stop on */
+#endif
+double (*func)(void *fdata, double tp[]),		/* Error function to evaluate */
+void *fdata)		/* Opaque data for func() */
+{
+	int i;
+	double ax, xx, bx;	/* Search vector multipliers */
+	double af, xf, bf;	/* Function values at those points */
+	double *xt, _xt[10];	/* Trial point */
+
+	if (di <= 10)
+		xt = _xt;
+	else
+		xt = dvector(0, di-1);			/* Vector for trial point */
+
+	/* -------------------------- */
+	/* First bracket the solution */
+
+	LDBG((" linmin: Bracketing solution\n"))
+
+	/* The line is measured as startpoint + offset * search vector. */
+	/* (Search isn't symetric, but it seems to depend on cp being */
+	/* best current solution ?) */
+	ax = 0.0;
+	for (i = 0; i < di; i++)
+		xt[i] = cp[i] + ax * xi[i];
+	af = (*func)(fdata, xt);
+
+	/* xx being vector offset 0.618 */
+	xx =  1.0/POWELL_GOLD;
+	for (i = 0; i < di; i++)
+		xt[i] = cp[i] + xx * xi[i];
+	xf = (*func)(fdata, xt);
+
+	LDBG((" linmin: Initial points a:%f:%f -> b:%f:%f\n",ax,af,xx,xf))
+
+	/* Fix it so that we are decreasing from point a -> x */
+	if (xf > af) {
+		double tt;
+		tt = ax; ax = xx; xx = tt;
+		tt = af; af = xf; xf = tt;
+	}
+	LDBG((" linmin: Ordered Initial points a:%f:%f -> b:%f:%f\n",ax,af,xx,xf))
+
+	bx = xx + POWELL_GOLD * (xx-ax);	/* Guess b beyond a -> x */
+	for (i = 0; i < di; i++)
+		xt[i] = cp[i] + bx * xi[i];
+	bf = (*func)(fdata, xt);
+
+	LDBG((" linmin: Initial bracket a:%f:%f x:%f:%f b:%f:%f\n",ax,af,xx,xf,bx,bf))
+
+#ifdef SLOPE_SANITY_CHECK
+	/* If we're not seeing a slope indicitive of progress */
+	/* of order ftol, give up straight away */
+	if (2000.0 * fabs(xf - bf) <= ftol * (fabs(xf) + fabs(bf))
+	 && 2000.0 * fabs(af - xf) <= ftol * (fabs(af) + fabs(xf))) {
+		LDBG((" linmin: giving up because slope is too shallow\n"))
+		if (xt != _xt)
+			free_dvector(xt,0,di-1);
+
+		if (bf < xf) {
+			xf = bf;
+			xx = bx;
+		}
+		goto done;
+	}
+#endif /* SLOPE_SANITY_CHECK */
+
+	/* While not bracketed */
+	while (xf > bf) {
+		double ulim, ux, uf;
+		double tt, r, q;
+
+		LDBG((" linmin: Not bracketed because xf %f > bf %f\n",xf, bf))
+		LDBG(("        ax = %f, xx = %f, bx = %f\n",ax,xx,bx))
+
+		/* Compute ux by parabolic interpolation from a, x & b */
+		q = (xx - bx) * (xf - af);
+		r = (xx - ax) * (xf - bf);
+		tt = q - r;
+		if (tt >= 0.0 && tt < 1e-20)				/* If +ve too small */
+			tt = 1e-20;
+		else if (tt <= 0.0 && tt > -1e-20)		/* If -ve too small */
+			tt = -1e-20;
+		ux = xx - ((xx - bx) * q - (xx - ax) * r) / (2.0 * tt);
+		ulim = xx + 100.0 * (bx - xx);			/* Extrapolation limit */
+
+//printf("~1 ux = %f, ulim = %f\n",ux,ulim);
+		if ((xx - ux) * (ux - bx) > 0.0) {		/* u is between x and b */
+
+			for (i = 0; i < di; i++)			/* Evaluate u */
+				xt[i] = cp[i] + ux * xi[i];
+			uf = (*func)(fdata, xt);
+
+//printf("~1 u is between x and b, uf = %f\n",uf);
+
+			if (uf < bf) {						/* Minimum is between x and b */
+//printf("~1 min is between x and b\n");
+				ax = xx; af = xf;
+				xx = ux; xf = uf;
+				break;
+			} else if (uf > xf) {				/* Minimum is between a and u */
+//printf("~1 min is between a and u\n");
+				bx = ux; bf = uf;
+				break;
+			}
+
+			/* Parabolic fit didn't work, look further out in direction of b */
+			ux = bx + POWELL_GOLD * (bx-xx);
+//printf("~1 parabolic fit didn't work,look further in direction of b (%f)\n",ux);
+
+		} else if ((bx - ux) * (ux - ulim) > 0.0) {	/* u is between b and limit */
+			for (i = 0; i < di; i++)			/* Evaluate u */
+				xt[i] = cp[i] + ux * xi[i];
+			uf = (*func)(fdata, xt);
+
+//printf("~1 u is between b and limit uf = %f\n",uf);
+			if (uf > bf) {						/* Minimum is between x and u */
+//printf("~1 min is between x and uf\n");
+				ax = xx; af = xf;
+				xx = bx; xf = bf;
+				bx = ux; bf = uf;
+				break;
+			}
+			xx = bx; xf = bf;					/* Continue looking */
+			bx = ux; bf = uf;
+			ux = bx + POWELL_GOLD * (bx - xx);	/* Test beyond b */
+//printf("~1 continue looking beyond b (%f)\n",ux);
+
+		} else if ((ux - ulim) * (ulim - bx) >= 0.0) {	/* u is beyond limit */
+			ux = ulim;
+//printf("~1 use limit\n");
+		} else {							/* u is to left side of x ? */
+			ux = bx + POWELL_GOLD * (bx-xx);
+//printf("~1 look gold beyond b (%f)\n",ux);
+		}
+		/* Evaluate u, and move into place at b */
+		for (i = 0; i < di; i++)
+			xt[i] = cp[i] + ux * xi[i];
+		uf = (*func)(fdata, xt);
+//printf("~1 lookup ux %f value uf = %f\n",ux,uf);
+		ax = xx; af = xf;
+		xx = bx; xf = bf;
+		bx = ux; bf = uf;
+//printf("~1 move along to the right (a<-x, x<-b, b-<u)\n");
+	}
+	LDBG((" linmin: Got bracket a:%f:%f x:%f:%f b:%f:%f\n",ax,af,xx,xf,bx,bf))
+	/* Got bracketed minimum between a -> x -> b */
+//printf("~1 got bracketed minimum at %f (%f), %f (%f), %f (%f)\n",ax,af,xx,xf,bx,bf);
+
+	/* --------------------------------------- */
+	/* Now use brent minimiser bewteen a and b */
+	{
+		/* a and b bracket solution */
+		/* x is best function value so far */
+		/* w is second best function value so far */
+		/* v is previous second best, or third best */
+		/* u is most recently tested point */
+		double wx, vx, ux;			/* Search vector multipliers */
+		double wf, vf = 0.0, uf;	/* Function values at those points */
+		int iter;
+		double de = 0.0;	/* Distance moved on previous step */
+		double e = 0.0;		/* Distance moved on 2nd previous step */
+
+		/* Make sure a and b are in ascending order */
+		if (ax > bx) {
+			double tt;
+			tt = ax; ax = bx; bx = tt;
+			tt = af; af = bf; bf = tt;
+		}
+
+		wx = vx = xx;	/* Initial values of other center points */
+		wf = xf = xf;
+
+		for (iter = 1; iter <= POWELL_MAXIT; iter++) {
+			double mx = 0.5 * (ax + bx);		/* m is center of bracket values */
+#ifdef ABSTOL
+			double tol1 = ftol;			/* Absolute tollerance */
+#else
+			double tol1 = ftol * fabs(xx) + 1e-10;
+#endif
+			double tol2 = 2.0 * tol1;
+
+			LDBG((" linmin it %d: Got bracket a:%f:%f x:%f:%f b:%f:%f\n",iter,ax,af,xx,xf,bx,bf))
+
+			/* See if we're done */
+//printf("~1 linmin check %f <= %f\n",fabs(xx - mx), tol2 - 0.5 * (bx - ax));
+			if (fabs(xx - mx) <= (tol2 - 0.5 * (bx - ax))) {
+				LDBG((" linmin: We're done because %e <= %e\n",fabs(xx - mx), tol2 - 0.5 * (bx - ax)))
+				break;
+			}
+
+			LDBG((" linmin: e %e tol2 %e\n",e,tol1))
+
+			if (fabs(e) > tol1) {			/* Do a trial parabolic fit */
+				double te, p, q, r;
+				r = (xx-wx) * (xf-vf);
+				q = (xx-vx) * (xf-wf);
+				p = (xx-vx) * q - (xx-wx) * r;
+				q = 2.0 * (q - r);
+				if (q > 0.0)
+					p = -p;
+				else
+					q = -q;
+				te = e;				/* Save previous e value */
+				e = de;				/* Previous steps distance moved */
+
+				LDBG((" linmin: Trial parabolic fit\n" ))
+
+				if (fabs(p) >= fabs(0.5 * q * te) || p <= q * (ax-xx) || p >= q * (bx-xx)) {
+					/* Give up on the parabolic fit, and use the golden section search */
+					e = ((xx >= mx) ? ax-xx : bx-xx);	/* Override previous distance moved */
+					de = POWELL_CGOLD * e;
+					LDBG((" linmin: Moving to golden section search\n" ))
+				} else {	/* Use parabolic fit */
+					de = p/q;			/* Change in xb */
+					ux = xx + de;		/* Trial point according to parabolic fit */
+					if ((ux - ax) < tol2 || (bx - ux) < tol2) {
+						if ((mx - xx) > 0.0)	/* Don't use parabolic, use tol1 */
+							de = tol1;			/* tol1 is +ve */
+						else
+							de = -tol1;
+					}
+					LDBG((" linmin: Using parabolic fit\n" ))
+				}
+			} else {	/* Keep using the golden section search */
+				e = ((xx >= mx) ? ax-xx : bx-xx);	/* Override previous distance moved */
+				de = POWELL_CGOLD * e;
+				LDBG((" linmin: Continuing golden section search\n" ))
+			}
+
+			if (fabs(de) >= tol1) {		/* If de moves as much as tol1 would */
+				ux = xx + de;			/* use it */
+				LDBG((" linmin: ux = %f = xx %f + de %f\n",ux,xx,de))
+			} else {					/* else move by tol1 in direction de */
+				if (de > 0.0) {
+					ux = xx + tol1;
+					LDBG((" linmin: ux = %f = xx %f + tol1 %e\n",ux,xx,tol1))
+				} else {
+					ux = xx - tol1;
+					LDBG((" linmin: ux = %f = xx %f - tol1 %f\n",ux,xx,tol1))
+				}
+			}
+
+			/* Evaluate function */
+			for (i = 0; i < di; i++)
+				xt[i] = cp[i] + ux * xi[i];
+			uf = (*func)(fdata, xt);
+
+			if (uf <= xf) {					/* Found new best solution */
+				LDBG((" linmin: found new best solution at %f val %f\n",ux,uf))
+				if (ux >= xx) {	
+					ax = xx; af = xf;		/* New lower bracket */
+				} else {
+					bx = xx; bf = xf;		/* New upper bracket */
+				}
+				vx = wx; vf = wf;			/* New previous 2nd best solution */
+				wx = xx; wf = xf;			/* New 2nd best solution from previous best */
+				xx = ux; xf = uf;			/* New best solution from latest */
+			} else {						/* Found a worse solution */
+				LDBG((" linmin: found new worse solution at %f val %f\n",ux,uf))
+				LDBG((" linmin:             current best at %f val %f\n",xx,xf))
+				if (ux < xx) {
+					ax = ux; af = uf;		/* New lower bracket */
+				} else {
+					bx = ux; bf = uf;		/* New upper bracket */
+				}
+				if (uf <= wf || wx == xx) {	/* New 2nd best solution, or equal best */
+					vx = wx; vf = wf;		/* New previous 2nd best solution */
+					wx = ux; wf = uf;		/* New 2nd best from latest */
+				} else if (uf <= vf || vx == xx || vx == wx) {	/* New 3rd best, or equal 1st & 2nd */
+					vx = ux; vf = uf;		/* New previous 2nd best from latest */
+				}
+			}
+		}
+		/* !!! should do something if iter > POWELL_MAXIT !!!! */
+		/* Solution is at xx, xf */
+	}
+
+  done:;
+
+	/* Compute solution vector at xx */
+	LDBG((" linmin: computing soln from best at %f val %f\n",xx,xf))
+	for (i = 0; i < di; i++) 
+		cp[i] += xx * xi[i];
+
+	if (xt != _xt)
+		free_dvector(xt,0,di-1);
+//printf("~~~ line minimizer returning %e\n",xf);
+	return xf;
+}
+
+#undef POWELL_GOLD
+#undef POWELL_CGOLD
+#undef POWELL_MAXIT
+
 /* -------------------------------------- */
-/* Conjugate Gradient optimiser */
+/* Conjugate Gradient optimiser using partial derivatives. */
 /* return 0 on sucess, 1 on failure due to excessive itterions */
 /* Result will be in cp */
 /* Note that we could use gradient in line minimiser, */
-/* but haven't bothered yet. */
+/* but this seems to be slower, so we don't use it. */
 int conjgrad(
 double *rv,				/* If not NULL, return the residual error */
 int di,					/* Dimentionality */
@@ -250,79 +577,93 @@ double ftol,			/* Relative tollerance of error change to stop on */
 #endif
 int maxit,				/* Maximum iterations allowed */
 double (*func)(void *fdata, double tp[]),		/* Error function to evaluate */
-double (*dfunc)(void *fdata, double dp[], double tp[]),		/* Gradient function to evaluate */
+double (*dfunc)(void *fdata, double dp[], double tp[]),		/* Gradient & function to evaluate */
+						/* dfunc() should return DFUNC_NRV if it doesn't return function value */
 void *fdata,			/* Opaque data needed by function */
 void (*prog)(void *pdata, int perc),		/* Optional progress percentage callback */
 void *pdata				/* Opaque data needed by prog() */
 ) {
 	int i, iter;
-	double *svec;			/* Search vector */
-	double *gvec;			/* G direction vector */
-	double *hvec;			/* H direction vector */
+	double *svec, _svec[10];	/* Search vector */
+	double *ssvec, _ssvec[10];	/* s[] scaled search vector */
+	double *gvec, _gvec[10];	/* G direction vector */
+	double *hvec, _hvec[10];	/* H direction vector */
 	double retv; 			/* Returned function value at p */
 	double stopth;			/* Current stop threshold */
 	double startdel = -1.0;	/* Initial change in function value */
 	double curdel;			/* Current change in function value */
-	double svec_sca;			/* initial svec scale factor */
+	double brat;			/* svec to s[] ratio */
+	double svec_sca;		/* svec scale factor */
 	int pc = 0;				/* Percentage complete */
 
-	svec = dvector(0,di-1);
-	gvec  = dvector(0,di-1);
-	hvec  = dvector(0,di-1);
+	if (di <= 10) {
+		svec = _svec;
+		ssvec = _ssvec;
+		gvec  = _gvec;
+		hvec  = _hvec;
+	} else {
+		svec = dvector(0, di-1);
+		ssvec = dvector(0, di-1);
+		gvec  = dvector(0, di-1);
+		hvec  = dvector(0, di-1);
+	}
+
+	CDBG(("conjgrad with di %d\n", di))
 
 	if (prog != NULL)		/* Report initial progress */
 		prog(pdata, pc);
 
-	/* Initial gradient function evaluation */
+	/* Initial function and gradient evaluation */
 	retv = (*dfunc)(fdata, svec, cp);
-
-	/* svec[] seems to be large after this. */
-	/* Rescale it to conform to maximum of s[] */
-	for (svec_sca = 0.0, i = 0; i < di; i++) {
-		if (fabs(svec[i]) > svec_sca)
-			svec_sca = fabs(svec[i]);
+	if (retv == DFUNC_NRV) 
+		retv = (*func)(fdata, cp);
+
+	/* svec[] seems to be large after this. Compute scaled version that */
+	/* has maximum of s[] so that line search is guided by the search radius. */
+	for (brat = 0.0, i = 0; i < di; i++) {
+		double rat = fabs(svec[i]) / fabs(s[i]);
+		if (rat > brat)
+			brat = rat;
 	}
-	/* set scale so largest <= 1 */
-	if (svec_sca < 1e-12)
-		svec_sca = 1.0;
-	else
-		svec_sca = 1.0/svec_sca;
-
-	CDBG((" initial dir = %s\n", debPdv(di,svec)));
-	CDBG((" initial retv = %f\n",retv));
+	svec_sca = 1.0/brat;
 
 	/* Initial vector setup */
 	for (i = 0; i < di; i++) {
-		gvec[i] = hvec[i] = -svec[i];			/* Inverse gradient */
-		svec[i] = s[i] * -svec[i] * svec_sca;	/* Scale the search vector */
+		svec[i] = gvec[i] = hvec[i] = -svec[i];			/* Inverse gradient */
+		ssvec[i] = svec[i] * svec_sca;	/* Scale the search vector to s[] size */
 	}
-	CDBG(("Initial svec = %s\n", debPdv(di,svec)));
+
+	CDBG((" initial dir = %s\n", debPdv(di, ssvec)));
+	CDBG((" initial retv = %f\n",retv));
 
 	/* Itterate untill we converge on a solution, or give up. */
 	for (iter = 1; iter < maxit; iter++) {
 		double gamden, gamnum, gam;
 		double pretv;			/* Previous function return value */
 
-		CDBG(("conjrad: about to do linmin\n"))
+		CDBG(("conjrad it %d: about to do linmind\n",iter))
 		pretv = retv;
-		retv = linmin(cp, svec, di, ftol, func, fdata);
+#ifdef USE_LINMIND
+		retv = linmind(cp, ssvec, di, ftol, func, dfunc, fdata);
+#else
+		retv = linmin(cp, ssvec, di, ftol, func, fdata);
+#endif
 
 #ifdef ABSTOL
 		stopth = ftol;				/* Absolute tollerance */
 #else
-		stopth = ftol * 0.5 * (fabs(pretv) + fabs(retv) + DBL_EPSILON);		// Old code
+		stopth = ftol * 0.5 * (fabs(pretv) + fabs(retv) + DBL_EPSILON);
 #endif
 		curdel = fabs(pretv - retv);
 		CDBG((" this retv = %f, pretv = %f, curdel = %f\n",retv,pretv,curdel));
 		if (startdel < 0.0) {
 			startdel = curdel;
-		} else {
+		} else if (prog != NULL) {	/* Update percentage */
 			int tt;
 			tt = (int)(100.0 * pow((log(curdel) - log(startdel))/(log(stopth) - log(startdel)), 4.0) + 0.5);
 			if (tt > pc && tt < 100) {
 				pc = tt;
-				if (prog != NULL)		/* Report initial progress */
-					prog(pdata, pc);
+				prog(pdata, pc); /* Report initial progress */
 			}
 		}
 
@@ -336,7 +677,7 @@ void *pdata				/* Opaque data needed by prog() */
 
 		CDBG(("conjrad: recomputing direction\n"))
 		(*dfunc)(fdata, svec, cp);		/* (Don't use retv as it wrecks stop test) */
-		CDBG((" pderiv = %s\n", debPdv(di,svec)));
+		CDBG((" pderiv = %s\n", debPdv(di, svec)));
 
 		/* Compute gamma */
 		for (gamnum = gamden = 0.0, i = 0; i < di; i++) {
@@ -360,25 +701,26 @@ void *pdata				/* Opaque data needed by prog() */
 			svec[i] = hvec[i] = gvec[i] + gam * hvec[i];
 		}
 
-		/* svec[] seems to be large after this. */
-		/* Rescale it to conform to maximum of s[] */
-		for (svec_sca = 0.0, i = 0; i < di; i++) {
-			if (fabs(svec[i]) > svec_sca)
-				svec_sca = fabs(svec[i]);
+		/* svec[] seems to be large after this. Compute scaled version that */
+		/* has maximum of s[] so that line search is guided by the search radius. */
+		for (brat = 0.0, i = 0; i < di; i++) {
+			double rat = fabs(svec[i]) / fabs(s[i]);
+			if (rat > brat)
+				brat = rat;
 		}
-		/* set scale so largest <= 1 */
-		if (svec_sca < 1e-12)
-			svec_sca = 1.0;
-		else
-			svec_sca = 1.0/svec_sca;
+		svec_sca = 1.0/brat;
 		for (i = 0; i < di; i++)
-			svec[i] = svec[i] * s[i] * svec_sca;
-		CDBG((" svec = %s\n", debPdv(di,svec)));
+			ssvec[i] = svec[i] * svec_sca;
+
+		CDBG((" ssvec = %s\n", debPdv(di,ssvec)));
 	}
 	/* Free up all the temporary vectors and matrix */
-	free_dvector(hvec,0,di-1);
-	free_dvector(gvec,0,di-1);
-	free_dvector(svec,0,di-1);
+	if (di > 10) {
+		free_dvector(hvec, 0, di-1);
+		free_dvector(gvec, 0, di-1);
+		free_dvector(ssvec, 0, di-1);
+		free_dvector(svec, 0, di-1);
+	}
 
 	if (prog != NULL)		/* Report final progress */
 		prog(pdata, 100);
@@ -394,14 +736,15 @@ void *pdata				/* Opaque data needed by prog() */
 	return 1;		/* Failed due to execessive itterations */
 }
 
-/*------------------------------*/
 #define POWELL_GOLD 1.618034
-#define POWELL_CGOLD 0.3819660
 #define POWELL_MAXIT 100
 
-/* Line bracketing and minimisation routine. */
+/* Line bracketing and minimisation routine using derivatives */
+/* This is not used, because it typically makes it slower */
+/* - it may take less itterations, but each itteration uses */
+/* a func() and dfunc() call, at least doubling itter overhead. */
 /* Return value at minimum. */
-double linmin(
+double linmind(
 double cp[],		/* Start point, and returned value */
 double xi[],		/* Search vector */
 int di,				/* Dimensionality */
@@ -410,23 +753,29 @@ double ftol,		/* Absolute tolerance to stop on */
 #else
 double ftol,		/* Relative tolerance to stop on */
 #endif
-double (*func)(void *fdata, double tp[]),		/* Error function to evaluate */
+double (*func)(void *fdata, double tp[]),				/* Error function to evaluate */
+double (*dfunc)(void *fdata, double dp[], double tp[]),	/* Gradient function to evaluate */
+					/* dfunc() should return DFUNC_NRV if it doesn't return function value */
 void *fdata)		/* Opaque data for func() */
 {
 	int i;
 	double ax, xx, bx;	/* Search vector multipliers */
 	double af, xf, bf;	/* Function values at those points */
-	double *xt, XT[10];	/* Trial point */
+	double *xt, _xt[10];	/* Trial point */
+	double *df, _df[10];	/* Derivative vector */
 
-	if (di <= 10)
-		xt = XT;
-	else
+	if (di <= 10) {
+		xt = _xt;
+		df = _df;
+	} else {
 		xt = dvector(0, di-1);			/* Vector for trial point */
+		df = dvector(0, di-1);			/* Vector for trial point */
+	}
 
 	/* -------------------------- */
 	/* First bracket the solution */
 
-	LDBG(("linmin: Bracketing solution\n"))
+	LDBG((" linmind: Bracketing solution\n"))
 
 	/* The line is measured as startpoint + offset * search vector. */
 	/* (Search isn't symetric, but it seems to depend on cp being */
@@ -442,7 +791,7 @@ void *fdata)		/* Opaque data for func() */
 		xt[i] = cp[i] + xx * xi[i];
 	xf = (*func)(fdata, xt);
 
-	LDBG(("linmin: Initial points a:%f:%f -> b:%f:%f\n",ax,af,xx,xf))
+	LDBG((" linmind: Initial points a:%f:%f -> b:%f:%f\n",ax,af,xx,xf))
 
 	/* Fix it so that we are decreasing from point a -> x */
 	if (xf > af) {
@@ -450,33 +799,31 @@ void *fdata)		/* Opaque data for func() */
 		tt = ax; ax = xx; xx = tt;
 		tt = af; af = xf; xf = tt;
 	}
-	LDBG(("linmin: Ordered Initial points a:%f:%f -> b:%f:%f\n",ax,af,xx,xf))
+	LDBG((" linmind: Ordered Initial points a:%f:%f -> b:%f:%f\n",ax,af,xx,xf))
 
 	bx = xx + POWELL_GOLD * (xx-ax);	/* Guess b beyond a -> x */
 	for (i = 0; i < di; i++)
 		xt[i] = cp[i] + bx * xi[i];
 	bf = (*func)(fdata, xt);
 
-	LDBG(("linmin: Initial bracket a:%f:%f x:%f:%f b:%f:%f\n",ax,af,xx,xf,bx,bf))
+	LDBG((" linmind: Initial bracket a:%f:%f x:%f:%f b:%f:%f\n",ax,af,xx,xf,bx,bf))
 
 #ifdef SLOPE_SANITY_CHECK
 	/* If we're not seeing a slope indicitive of progress */
 	/* of order ftol, give up straight away */
 	if (2000.0 * fabs(xf - bf) <= ftol * (fabs(xf) + fabs(bf))
 	 && 2000.0 * fabs(af - xf) <= ftol * (fabs(af) + fabs(xf))) {
-		LDBG(("linmin: giving up because slope is too shallow\n"))
-		if (xt != XT)
-			free_dvector(xt,0,di-1);
+		LDBG((" linmind: giving up because slope is too shallow\n"))
+		if (di > 10) {
+			free_dvector(df, 0, di-1);
+			free_dvector(xt, 0, di-1);
+		}
 
 		if (bf < xf) {
 			xf = bf;
 			xx = bx;
 		}
-
-		/* Compute solution vector */
-		for (i = 0; i < di; i++) 
-			cp[i] += xx * xi[i];
-		return xf;
+		goto done;
 	}
 #endif /* SLOPE_SANITY_CHECK */
 
@@ -485,8 +832,7 @@ void *fdata)		/* Opaque data for func() */
 		double ulim, ux, uf;
 		double tt, r, q;
 
-//		LDBG(("linmin: Not bracketed a:%f:%f x:%f%f b:%f:%f\n",ax,af,xx,xf,bx,bf))
-		LDBG(("linmin: Not bracketed because xf %f > bf %f\n",xf, bf))
+		LDBG((" linmind: Not bracketed because xf %f > bf %f\n",xf, bf))
 		LDBG(("        ax = %f, xx = %f, bx = %f\n",ax,xx,bx))
 
 		/* Compute ux by parabolic interpolation from a, x & b */
@@ -559,7 +905,7 @@ void *fdata)		/* Opaque data for func() */
 		bx = ux; bf = uf;
 //printf("~1 move along to the right (a<-x, x<-b, b-<u)\n");
 	}
-	LDBG(("linmin: Got bracket a:%f:%f x:%f:%f b:%f:%f\n",ax,af,xx,xf,bx,bf))
+	LDBG((" linmind: Got bracket a:%f:%f x:%f:%f b:%f:%f\n",ax,af,xx,xf,bx,bf))
 	/* Got bracketed minimum between a -> x -> b */
 //printf("~1 got bracketed minimum at %f (%f), %f (%f), %f (%f)\n",ax,af,xx,xf,bx,bf);
 
@@ -571,8 +917,10 @@ void *fdata)		/* Opaque data for func() */
 		/* w is second best function value so far */
 		/* v is previous second best, or third best */
 		/* u is most recently tested point */
+
 		double wx, vx, ux;			/* Search vector multipliers */
 		double wf, vf = 0.0, uf;	/* Function values at those points */
+		double xd, wd, vd, ud;		/* Derivative values at those points */
 		int iter;
 		double de = 0.0;	/* Distance moved on previous step */
 		double e = 0.0;		/* Distance moved on 2nd previous step */
@@ -587,6 +935,16 @@ void *fdata)		/* Opaque data for func() */
 		wx = vx = xx;	/* Initial values of other center points */
 		wf = xf = xf;
 
+		/* Lookup derivative at x (we already have xf from bracketing) */
+		for (i = 0; i < di; i++)
+			xt[i] = cp[i] + xx * xi[i];
+		(*dfunc)(fdata, df, xt);
+		for (xd = 0.0, i = 0; i < di; i++)
+			xd += xi[i] * df[i];
+		wd = ud = xd;
+
+		LDBG((" linmind: xx %f, deriv. xd %f\n",xx,xd))
+
 		for (iter = 1; iter <= POWELL_MAXIT; iter++) {
 			double mx = 0.5 * (ax + bx);		/* m is center of bracket values */
 #ifdef ABSTOL
@@ -596,113 +954,169 @@ void *fdata)		/* Opaque data for func() */
 #endif
 			double tol2 = 2.0 * tol1;
 
-			LDBG(("linmin: Got bracket a:%f:%f x:%f:%f b:%f:%f\n",ax,af,xx,xf,bx,bf))
+			LDBG((" linmind it %d: Got bracket a:%f:%f x:%f:%f b:%f:%f\n",iter, ax,af,xx,xf,bx,bf))
 
 			/* See if we're done */
-//printf("~1 linmin check %f <= %f\n",fabs(xx - mx), tol2 - 0.5 * (bx - ax));
 			if (fabs(xx - mx) <= (tol2 - 0.5 * (bx - ax))) {
-				LDBG(("linmin: We're done because %f <= %f\n",fabs(xx - mx), tol2 - 0.5 * (bx - ax)))
+				LDBG((" linmind: We're done because %e <= %e\n",fabs(xx - mx), tol2 - 0.5 * (bx - ax)))
 				break;
 			}
 
-			if (fabs(e) > tol1) {			/* Do a trial parabolic fit */
-				double te, p, q, r;
-				r = (xx-wx) * (xf-vf);
-				q = (xx-vx) * (xf-wf);
-				p = (xx-vx) * q - (xx-wx) * r;
-				q = 2.0 * (q - r);
-				if (q > 0.0)
-					p = -p;
-				else
-					q = -q;
+			LDBG((" linmind: e %f tol2 %f\n",e,tol1))
+
+			if (fabs(e) > tol1) {			/* Do a trial secant fit */
+				double te;
+				double dx1, dx2;				/* Secant extrapolation points */
+				double ux1, ux2;
+				int ch1, ch2;
+
+				LDBG((" linmind: Doing trial secant fit\n" ))
+
+				dx2 = dx1 = 2.0 * (bx - ax);	/* Default to values out of the ax..bx bracket */
+				
+				/* Extrapolated points from last two points (secant method) */
+				if (wd != xd)
+					dx1 = (wx - xx) * xd/(xd - wd);
+				if (vd != xd)
+					dx2 = (vx - xx) * xd/(xd - vd);
+
+				ux1 = xx + dx1;
+				ux2 = xx + dx2;
+
+				/* Check which one is reasonable */
+				ch1 = (ax - ux1)  * (ux1 - bx) > 0.0 && xd * dx1 < 0.0;
+				ch2 = (ax - ux2)  * (ux2 - bx) > 0.0 && xd * dx2 < 0.0;
+
+				LDBG((" linmind: Doing dx1 %f dx2 %f ux1 %f ux2 %f ch1 %d ch2 %d\n",dx1,dx2,ux1,ux2,ch1,ch2))
+
 				te = e;				/* Save previous e value */
 				e = de;				/* Previous steps distance moved */
 
-				LDBG(("linmin: Trial parabolic fit\n" ))
+				if (!ch1 && !ch2)
+					goto bisect;
 
-				if (fabs(p) >= fabs(0.5 * q * te) || p <= q * (ax-xx) || p >= q * (bx-xx)) {
-					/* Give up on the parabolic fit, and use the golden section search */
-					e = ((xx >= mx) ? ax-xx : bx-xx);	/* Override previous distance moved */
-					de = POWELL_CGOLD * e;
-					LDBG(("linmin: Moving to golden section search\n" ))
-				} else {	/* Use parabolic fit */
-					de = p/q;			/* Change in xb */
-					ux = xx + de;		/* Trial point according to parabolic fit */
-					if ((ux - ax) < tol2 || (bx - ux) < tol2) {
-						if ((mx - xx) > 0.0)	/* Don't use parabolic, use tol1 */
-							de = tol1;			/* tol1 is +ve */
-						else
-							de = -tol1;
-					}
-					LDBG(("linmin: Using parabolic fit\n" ))
+				/* Use smallest or the one that's valid */
+				if (ch1 && ch2)
+					de = fabs(dx1) < fabs(dx2) ? dx1 : dx2;
+				if (ch1)
+					de = dx1;
+				else if (ch2)
+					de = dx2;
+
+				LDBG((" linmind: set de %f\n",de))
+
+				if (fabs(de) > fabs(0.5 * te)) {
+					LDBG((" linmind: abs(de) %f > abs(te/2 = %f)\n",fabs(de),fabs(0.5 * te)))
+					goto bisect;
 				}
-			} else {	/* Keep using the golden section search */
-				e = ((xx >= mx) ? ax-xx : bx-xx);	/* Override previous distance moved */
-				de = POWELL_CGOLD * e;
-				LDBG(("linmin: Continuing golden section search\n" ))
+
+				ux = xx + de;
+
+				if ((ux - ax) < tol2 || (bx - ux) < tol2) {
+					if ((mx - xx) < 0.0)
+						de = -fabs(tol1);
+					else
+						de = fabs(tol1);
+					LDBG((" linmind: Set de to tol1 %f\n",de))
+				}
+#ifdef LDEBUG
+				  else {
+					LDBG((" linmind: Using secant fit de %f\n",de))
+				}
+#endif
+
+			/* else bisect picking side using sign of derivative */
+			} else {
+		  bisect:
+				e = (xd >= 0.0 ? ax - xx : bx  -xx);
+				de = 0.5 * e;
+				LDBG((" linmind: Continuing bisection search de %f\n",de))
 			}
 
-			if (fabs(de) >= tol1) {		/* If de moves as much as tol1 would */
+			if (fabs(de) >= tol1) {		/* If de moves as much as tol1 or more */
 				ux = xx + de;			/* use it */
-				LDBG(("linmin: ux = %f = xx %f + de %f\n",ux,xx,de))
+
+				/* Evaluate function */
+				for (i = 0; i < di; i++)
+					xt[i] = cp[i] + ux * xi[i];
+				uf = (*func)(fdata, xt);
+
+				LDBG((" linmind: ux = %f = xx %f + de %f, uf %f\n",ux,xx,de,uf))
+
 			} else {					/* else move by tol1 in direction de */
 				if (de > 0.0) {
 					ux = xx + tol1;
-					LDBG(("linmin: ux = %f = xx %f + tol1 %f\n",ux,xx,tol1))
+					LDBG((" linmind: ux = %f = xx %f + tol1 %f\n",ux,xx,tol1))
 				} else {
 					ux = xx - tol1;
-					LDBG(("linmin: ux = %f = xx %f - tol1 %f\n",ux,xx,tol1))
+					LDBG((" linmind: ux = %f = xx %f - tol1 %f\n",ux,xx,tol1))
+				}
+				/* Evaluate function */
+				for (i = 0; i < di; i++)
+					xt[i] = cp[i] + ux * xi[i];
+				uf = (*func)(fdata, xt);
+
+				LDBG((" linmind: uf %f\n",uf))
+
+				if (uf > xf) {		/* If tol1 step downhill takes us uphill, we're done */
+					goto done;
 				}
 			}
 
-			/* Evaluate function */
-			for (i = 0; i < di; i++)
-				xt[i] = cp[i] + ux * xi[i];
-			uf = (*func)(fdata, xt);
+			/* Evaluate derivative at trial point */
+			(*dfunc)(fdata, df, xt);
+			for (ud = 0.0, i = 0; i < di; i++)
+				ud += xi[i] * df[i];
+
+			LDBG((" linmind: ux %f, deriv. ud %f\n",ux,ud))
 
+			/* Houskeeping: */
 			if (uf <= xf) {					/* Found new best solution */
-				LDBG(("linmin: found new best solution at %f val %f\n",ux,uf))
+				LDBG((" linmind: found new best solution at %f val %f dval %f\n",ux,uf,ud))
 				if (ux >= xx) {	
-					ax = xx; af = xf;		/* New lower bracket */
+					ax = xx; af = xf; 		/* New lower bracket */
 				} else {
 					bx = xx; bf = xf;		/* New upper bracket */
 				}
-				vx = wx; vf = wf;			/* New previous 2nd best solution */
-				wx = xx; wf = xf;			/* New 2nd best solution from previous best */
-				xx = ux; xf = uf;			/* New best solution from latest */
-			} else {						/* Found a worse solution */
-				LDBG(("linmin: found new worse solution at %f val %f\n",ux,uf))
-				LDBG(("linmin:             current best at %f val %f\n",xx,xf))
+				vx = wx; vf = wf; vd = wd;		/* New previous 2nd best solution */
+				wx = xx; wf = xf; wd = xd;		/* New 2nd best solution from previous best */
+				xx = ux; xf = uf; xd = ud;		/* New best solution from latest */
+
+			} else {			/* Found a worse solution */
+				LDBG((" linmind: found new worse solution at %f val %f dval %f\n",ux,uf,ud))
+				LDBG((" linmind:             current best at %f val %f dval %f\n",xx,xf,xd))
 				if (ux < xx) {
 					ax = ux; af = uf;		/* New lower bracket */
 				} else {
 					bx = ux; bf = uf;		/* New upper bracket */
 				}
 				if (uf <= wf || wx == xx) {	/* New 2nd best solution, or equal best */
-					vx = wx; vf = wf;		/* New previous 2nd best solution */
-					wx = ux; wf = uf;		/* New 2nd best from latest */
+					vx = wx; vf = wf; vd = wd;	/* New previous 2nd best solution */
+					wx = ux; wf = uf; wd = ud;	/* New 2nd best from latest */
 				} else if (uf <= vf || vx == xx || vx == wx) {	/* New 3rd best, or equal 1st & 2nd */
-					vx = ux; vf = uf;		/* New previous 2nd best from latest */
+					vx = ux; vf = uf; vd = ud;	/* New previous 2nd best from latest */
 				}
 			}
-		}
-		/* !!! should do something if iter > POWELL_MAXIT !!!! */
+		}	/* Next itter */
+
+		/* !!! should do something if iter > POWELL_MAXIT ???  */
 		/* Solution is at xx, xf */
 
+	  done:;
+		if (di > 10) {
+			free_dvector(df, 0, di-1);
+			free_dvector(xt, 0, di-1);
+		}
 		/* Compute solution vector */
-		LDBG(("linmin: computing soln from best at %f val %f\n",xx,xf))
+		LDBG((" linmind: computing soln from best at %f val %f dval %f\n",xx,xf,xd))
 		for (i = 0; i < di; i++) 
 			cp[i] += xx * xi[i];
-	}
 
-	if (xt != XT)
-		free_dvector(xt,0,di-1);
-//printf("~~~ line minimizer returning %e\n",xf);
+	}	/* Minimizer context */
+
 	return xf;
 }
 
 #undef POWELL_GOLD
-#undef POWELL_CGOLD
 #undef POWELL_MAXIT
 
-/**************************************************/
diff --git a/numlib/powell.h b/numlib/powell.h
index 4b86573..642cb65 100755
--- a/numlib/powell.h
+++ b/numlib/powell.h
@@ -15,6 +15,8 @@
 	extern "C" {
 #endif
 
+#define DFUNC_NRV	1e100
+
 /* Standard interface for powell function */
 /* return 0 on sucess, 1 on failure due to excessive itterations */
 /* Result will be in cp */
@@ -32,7 +34,7 @@ void (*prog)(void *pdata, int perc),		/* Optional progress percentage callback *
 void *pdata				/* Opaque data needed by prog() */
 );
 
-/* Conjugate Gradient optimiser */
+/* Conjugate Gradient optimiser using partial derivatives. */
 /* return 0 on sucess, 1 on failure due to excessive itterations */
 /* Result will be in cp */
 int conjgrad(
@@ -43,7 +45,8 @@ double s[],				/* Size of initial search area */
 double ftol,			/* Tollerance of error change to stop on */
 int maxit,				/* Maximum iterations allowed */
 double (*func)(void *fdata, double tp[]),		/* Error function to evaluate */
-double (*dfunc)(void *fdata, double dp[], double tp[]),		/* Gradient function to evaluate */
+double (*dfunc)(void *fdata, double dp[], double tp[]),		/* Gradient & function to evaluate */
+						/* dfunc() should return DFUNC_NRV if it doesn't return function value */
 void *fdata,			/* Opaque data needed by function */
 void (*prog)(void *pdata, int perc),		/* Optional progress percentage callback */
 void *pdata				/* Opaque data needed by prog() */
@@ -69,6 +72,22 @@ double (*func)(void *fdata, double tp[]),		/* Error function to evaluate */
 void *fdata			/* Opaque data for func() */
 );
 
+/* Line bracketing and minimisation routine using derivatives. */
+/* Return value at minimum. */
+double linmind(
+double cp[],		/* Start point, and returned value */
+double xi[],		/* Search vector */
+int di,				/* Dimensionality */
+#ifdef ABSTOL
+double ftol,		/* Absolute tolerance to stop on */
+#else
+double ftol,		/* Relative tolerance to stop on */
+#endif
+double (*func)(void *fdata, double tp[]),		/* Error function to evaluate */
+double (*dfunc)(void *fdata, double dp[], double tp[]),		/* Gradient & function to evaluate */
+void *fdata			/* Opaque data for func() */
+);
+
 #ifdef __cplusplus
 	}
 #endif
diff --git a/numlib/quadprog.c b/numlib/quadprog.c
index 847601f..4e94cec 100755
--- a/numlib/quadprog.c
+++ b/numlib/quadprog.c
@@ -233,7 +233,7 @@ double quadprog(	/* Return solution cost, QP_INFEASIBLE if infeasible/error */
 	/* and compute the current solution value */ 
 	f_value = 0.5 * scalar_product(g0, x, n);
 #ifdef TRACE_SOLVER
-	printf("Unconstrained solution: %f",f_value);
+	printf("Unconstrained solution: f_value %f\n",f_value);
 	print_vector("x", x, n);
 #endif
 	
@@ -314,9 +314,11 @@ l1:	iter++;
 #endif
 	
 	if (fabs(psi) <= m * DBL_EPSILON * c1 * c2* 100.0) {
-		/* numerically there are not infeasibilities anymore */
+		/* numerically there are no infeasibilities anymore */
+#ifdef TRACE_SOLVER
+		printf("numerically there are no infeasibilities anymore\n");
+#endif
 		q = iq;
-		
 		goto done;
 	}
 	
@@ -338,8 +340,10 @@ l2:	/* Step 2: check for feasibility and determine a new S-pair */
 		}
 	}
 	if (ss >= 0.0) {
+#ifdef TRACE_SOLVER
+		printf(" ss %f >= 0.0\n",ss);
+#endif
 		q = iq;
-		
 		goto done;
 	}
 	
@@ -398,7 +402,7 @@ l2a:
 	/* the step is chosen as the minimum of t1 and t2 */
 	t = FMIN(t1, t2);
 #ifdef TRACE_SOLVER
-	printf("Step sizes: %f (t1 = %f, t2 = %f\n",t,t1,t2);
+	printf("Step size: %e (t1 = %e, t2 = %e\n",t,t1,t2);
 #endif
 	
 	/* Step 2c: determine new S-pair and take step: */
@@ -407,6 +411,9 @@ l2a:
 	if (t >= QP_INFEASIBLE) {
 		/* QPP is infeasible */
 		// FIXME: unbounded to raise
+#ifdef TRACE_SOLVER
+		printf(" QPP is infeasible\n");
+#endif
 		q = iq;
 		f_value = QP_INFEASIBLE;
 		goto done;
@@ -449,18 +456,21 @@ l2a:
 	
 	if (fabs(t - t2) < DBL_EPSILON) {
 #ifdef TRACE_SOLVER
-		printf("Full step has taken %f\n",t);
+		printf("Full step has been taken %f\n",t);
 		print_vector("x", x, n);
 #endif
-		/* full step has taken */
-		/* add constraint ip to the active set*/
+		/* full step has been taken */
+		/* Add constraint ip to the active set*/
 		if (!add_constraint(R, J, d, &iq, &R_norm, n)) {
+#ifdef TRACE_SOLVER
+			printf("add_constraint failed - removing constraint ant step\n",t);
+#endif
 			iaexcl[ip] = 0;
 			delete_constraint(R, J, A, u, n, p, &iq, ip);
 #ifdef TRACE_SOLVER
 			print_matrix("R", R, n, n);
 			print_ivector("A", A, iq);
-			print_ivector("iai", iai, iq);		// ?? iq size of m+p ?
+			print_ivector("iai", iai, m+p);
 #endif
 			for (i = 0; i < m; i++)
 				iai[i] = i;
@@ -477,7 +487,7 @@ l2a:
 #ifdef TRACE_SOLVER
 		print_matrix("R", R, n, n);
 		print_ivector("A", A, iq);
-		print_ivector("iai", iai, iq);		// ?? iq size of m+p ?
+		print_ivector("iai", iai, m + p);
 #endif
 		goto l1;
 	}
@@ -532,6 +542,11 @@ l2a:
 		free_svector(iaexcl, 0, m + p-1);
 	}
 
+#ifdef TRACE_SOLVER
+	printf("Returning f_value %e\n",f_value);
+	print_vector("Returning x", x, n);
+#endif
+
 	return f_value;
 }
 
diff --git a/numlib/quadprog.h b/numlib/quadprog.h
index 7d28ec8..d091b75 100755
--- a/numlib/quadprog.h
+++ b/numlib/quadprog.h
@@ -25,20 +25,20 @@ The problem is in the form:
 
 min 0.5 * x G x + g0 x
 s.t.
-    CE^T x + ce0 = 0
-    CI^T x + ci0 >= 0
+    CE^t x + ce0 = 0
+    CI^t x + ci0 >= 0
 	 
  The matrix and vectors dimensions are as follows:
-     G: n * n
-		g0: n
+      G: n * n
+	 g0: n
 				
-		CE: n * p
-	 ce0: p
+	 CE: n * p
+	ce0: p
 				
-	  CI: n * m
-   ci0: m
+	 CI: n * m
+    ci0: m
 
-     x: n
+      x: n
  
  References: D. Goldfarb, A. Idnani. A numerically stable dual method for solving
              strictly convex quadratic programs. Mathematical Programming 27 (1983) pp. 1-33.
diff --git a/numlib/rand.c b/numlib/rand.c
index 9e58de3..e2fdfdf 100755
--- a/numlib/rand.c
+++ b/numlib/rand.c
@@ -39,12 +39,18 @@ double ranno(void) {
 	return rand32_th(NULL, 0) / 4294967295.0;
 }
 
-/* Return a random double in the range min to max */
+/* Return a uniform random double in the range min to max */
 double
 d_rand(double min, double max) {
 	return d_rand_th(NULL, min, max);
 }
 
+/* Return a squared distribution random double in the range min to max */
+double
+d2_rand(double min, double max) {
+	return d2_rand_th(NULL, min, max);
+}
+
 /* Return a random integer in the range min to max inclusive */
 int
 i_rand(int min, int max) {
@@ -81,6 +87,7 @@ unsigned int seed		/* Optional seed. Non-zero re-initialized with that seed */
 		p = &g_rand;
 
 	if (seed != 0) {
+//printf("~1 rand 0x%x seed 0x%x\n",p,seed);
 		p->pvs_inited = 0;
 		p->ran = seed;
 	}
@@ -98,21 +105,29 @@ unsigned int seed		/* Optional seed. Non-zero re-initialized with that seed */
 	p->last = p->pvs[i];					/* Value generated */
   	p->pvs[i] = p->ran = PSRAND32(p->ran);	/* New value */
 
+//printf("~1 rand 0x%x ret 0x%x\n",p,p->last-1);
 	return p->last-1;
 }
 
 /* return a random number between 0.0 and 1.0 */
 /* based on rand32 */
 double ranno_th(rand_state *p) {
-	return rand32_th(NULL, 0) / 4294967295.0;
+	return rand32_th(p, 0) / 4294967295.0;
 }
 
-/* Return a random double in the range min to max */
+/* Return a uniform random double in the range min to max */
 double
 d_rand_th(rand_state *p, double min, double max) {
 	return min + (max - min) * ranno_th(p);
 }
 
+/* Return a squared distribution random double in the range min to max */
+double
+d2_rand_th(rand_state *p, double min, double max) {
+	double val = ranno_th(p);
+	return min + (max - min) * val * val;
+}
+
 /* Return a random integer in the range min to max inclusive */
 int
 i_rand_th(rand_state *p, int min, int max) {
diff --git a/numlib/rand.h b/numlib/rand.h
index e190fcd..0339b24 100755
--- a/numlib/rand.h
+++ b/numlib/rand.h
@@ -24,9 +24,12 @@ unsigned int seed);			/* Optional seed. Non-zero re-initialized with that seed *
 /* Return a random integer in the range min to max inclusive */
 int i_rand(int min, int max);
 
-/* Return a random double in the range min to max inclusive */
+/* Return a uniform random double in the range min to max inclusive */
 double d_rand(double min, double max);
 
+/* Return a squared distribution random double in the range min to max inclusive */
+double d2_rand(double min, double max);
+
 /* Return a random floating point number with a gausian/normal */
 /* distribution, centered about 0.0, with standard deviation 1.0 */
 /* and an average deviation of 0.564 */
@@ -61,9 +64,12 @@ unsigned int seed);			/* Optional seed. Non-zero re-initialized with that seed *
 /* Return a random integer in the range min to max inclusive */
 int i_rand_th(rand_state *p, int min, int max);
 
-/* Return a random double in the range min to max inclusive */
+/* Return a uniform random double in the range min to max inclusive */
 double d_rand_th(rand_state *p, double min, double max);
 
+/* Return a squared distribution random double in the range min to max inclusive */
+double d2_rand_th(rand_state *p, double min, double max);
+
 /* Return a random floating point number with a gausian/normal */
 /* distribution, centered about 0.0, with standard deviation 1.0 */
 /* and an average deviation of 0.564 */
diff --git a/numlib/roots.c b/numlib/roots.c
new file mode 100755
index 0000000..04e1526
--- /dev/null
+++ b/numlib/roots.c
@@ -0,0 +1,217 @@
+
+/*
+ *  Roots3And4.c
+ *
+ *  Utility functions to find cubic and quartic roots.
+ *
+ *  Coefficients are passed like this:
+ *
+ *	  c[0] + c[1]*x + c[2]*x^2 + c[3]*x^3 + c[4]*x^4 = 0
+ *
+ *  The functions return the number of non-complex roots and
+ *  put the values into the s array.
+ *
+ *  Author:		 Jochen Schwarze (schwarze@isa.de)
+ *
+ *  Jan 26, 1990	Version for Graphics Gems
+ *  Oct 11, 1990	Fixed sign problem for negative q's in SolveQuartic
+ *  						(reported by Mark Podlipec),
+ *  						Old-style function definitions,
+ *  						IsZero() as a macro
+ *  Nov 23, 1990	Some systems do not declare acos() and cbrt() in
+ *				  <math.h>, though the functions exist in the library.
+ *				  If large coefficients are used, EQN_EPS should be
+ *				  reduced considerably (e.g. to 1E-30), results will be
+ *				  correct but multiple roots might be reported more
+ *				  than once.
+ *  Apr 18, 2018  Reformat for inclusing in ArgyllCMS - GWG
+ *
+ *  The authors and the publisher hold no copyright restrictions 
+ *  on any of these files; this source code is public domain, and 
+ *  is freely available to the entire computer graphics community 
+ *  for study, use, and modification.  We do request that the 
+ *  comment at the top of each file, identifying the original 
+ *  author and its original publication in the book Graphics 
+ *  Gems, be retained in all programs that use these files.
+ *
+ */
+
+#include	<math.h>
+#ifndef M_PI
+#define M_PI		  3.14159265358979323846
+#endif
+
+/* epsilon surrounding for near zero values */
+#define	EQN_EPS 1e-9
+#define	IsZero(x) ((x) > -EQN_EPS && (x) < EQN_EPS)
+
+#define	CBRT(x) ((x) > 0.0 ? pow((double)(x), 1.0/3.0) : \
+                ((x) < 0.0 ? -pow((double)-(x), 1.0/3.0) : 0.0))
+
+int SolveQuadric(double c[3], double s[2]) {
+	double p, q, D;
+
+	/* normal form: x^2 + px + q = 0 */
+	p = c[1] / (2 * c[2]);
+	q = c[0] / c[2];
+
+	D = p * p - q;
+
+	if (IsZero(D)) {
+		s[ 0 ] = - p;
+		return 1;
+	} else if (D < 0) {
+		return 0;
+	} else /* if (D > 0) */ {
+		double sqrt_D = sqrt(D);
+
+		s[0] =   sqrt_D - p;
+		s[1] = - sqrt_D - p;
+		return 2;
+	}
+}
+
+
+int SolveCubic(double c[4], double s[3]) {
+	int i, num;
+	double sub;
+	double A, B, C;
+	double sq_A, p, q;
+	double cb_p, D;
+
+	/* normal form: x^3 + Ax^2 + Bx + C = 0 */
+	A = c[2] / c[3];
+	B = c[1] / c[3];
+	C = c[0] / c[3];
+
+	/* substitute x = y - A/3 to eliminate quadric term: */
+	/* x^3 +px + q = 0 */
+	sq_A = A * A;
+	p = 1.0/3 * (-1.0/3 * sq_A + B);
+	q = 1.0/2 * (2.0/27 * A * sq_A - 1.0/3 * A * B + C);
+
+	/* use Cardano's formula */
+	cb_p = p * p * p;
+	D = q * q + cb_p;
+
+	if (IsZero(D)) {
+		if (IsZero(q)) { /* one triple solution */
+			s[0] = 0.0;
+			num = 1;
+		} else { /* one single and one double solution */
+			double u = CBRT(-q);
+			s[0] = 2.0 * u;
+			s[1] = - u;
+			num = 2;
+		}
+	} else if (D < 0) { /* Casus irreducibilis: three real solutions */
+		double phi = 1.0/3 * acos(-q / sqrt(-cb_p));
+		double t = 2 * sqrt(-p);
+
+		s[0] =  t * cos(phi);
+		s[1] = -t * cos(phi + M_PI / 3.0);
+		s[2] = -t * cos(phi - M_PI / 3.0);
+		num = 3;
+	} else { /* one real solution */
+		double sqrt_D = sqrt(D);
+		double u = CBRT(sqrt_D - q);
+		double v = -CBRT(sqrt_D + q);
+
+		s[0] = u + v;
+		num = 1;
+	}
+
+	/* resubstitute */
+	sub = 1.0/3.0 * A;
+
+	for (i = 0; i < num; i++)
+		s[i] -= sub;
+
+	return num;
+}
+
+
+int SolveQuartic(double c[5], double s[4]) {
+	double coeffs[4];
+	double z, u, v, sub;
+	double A, B, C, D;
+	double sq_A, p, q, r;
+	int	i, num;
+
+	/* normal form: x^4 + Ax^3 + Bx^2 + Cx + D = 0 */
+	A = c[3] / c[4];
+	B = c[2] / c[4];
+	C = c[1] / c[4];
+	D = c[0] / c[4];
+
+	/*  substitute x = y - A/4 to eliminate cubic term:
+		x^4 + px^2 + qx + r = 0 */
+	sq_A = A * A;
+	p = - 3.0/8 * sq_A + B;
+	q = 1.0/8 * sq_A * A - 1.0/2 * A * B + C;
+	r = - 3.0/256*sq_A*sq_A + 1.0/16*sq_A*B - 1.0/4*A*C + D;
+
+	if (IsZero(r)) {
+		/* no absolute term: y(y^3 + py + q) = 0 */
+
+		coeffs[0] = q;
+		coeffs[1] = p;
+		coeffs[2] = 0;
+		coeffs[3] = 1.0;
+
+		num = SolveCubic(coeffs, s);
+
+		s[num++] = 0;
+	} else {
+		/* solve the resolvent cubic ... */
+		coeffs[0] = 1.0/2.0 * r * p - 1.0/8.0 * q * q;
+		coeffs[1] = - r;
+		coeffs[2] = - 1.0/2.0 * p;
+		coeffs[3] = 1.0;
+
+		(void) SolveCubic(coeffs, s);
+
+		/* ... and take the one real solution ... */
+		z = s[0];
+
+		/* ... to build two quadric equations */
+		u = z * z - r;
+		v = 2 * z - p;
+
+		if (IsZero(u))
+			u = 0;
+		else if (u > 0)
+			u = sqrt(u);
+		else
+			return 0;
+
+		if (IsZero(v))
+			v = 0;
+		else if (v > 0)
+			v = sqrt(v);
+		else
+			return 0;
+
+		coeffs[0] = z - u;
+		coeffs[1] = q < 0 ? -v : v;
+		coeffs[2] = 1.0;
+
+		num = SolveQuadric(coeffs, s);
+
+		coeffs[0]= z + u;
+		coeffs[1] = q < 0 ? v : -v;
+		coeffs[2] = 1.0;
+
+		num += SolveQuadric(coeffs, s + num);
+	}
+
+	/* resubstitute */
+	sub = 1.0/4.0 * A;
+
+	for (i = 0; i < num; i++)
+		s[i] -= sub;
+
+	return num;
+}
+
+
diff --git a/numlib/roots.h b/numlib/roots.h
new file mode 100755
index 0000000..e14b72d
--- /dev/null
+++ b/numlib/roots.h
@@ -0,0 +1,23 @@
+
+/*
+ *  Roots3And4.c
+ *
+ *  Utility functions to find cubic and quartic roots,
+ *  coefficients are passed like this:
+ *
+ *	  c[0] + c[1]*x + c[2]*x^2 + c[3]*x^3 + c[4]*x^4 = 0
+ *
+ *  The functions return the number of non-complex roots and
+ *  put the values into the s array.
+ *
+ *  Author:		 Jochen Schwarze (schwarze@isa.de)
+ *               (From Graphics Gems I)
+ */
+
+int SolveQuadric(double c[3], double s[2]);
+
+int SolveCubic(double c[4], double s[3]);
+
+int SolveQuartic(double c[5], double s[4]);
+
+
diff --git a/numlib/tconjgrad.c b/numlib/tconjgrad.c
new file mode 100755
index 0000000..aa98871
--- /dev/null
+++ b/numlib/tconjgrad.c
@@ -0,0 +1,162 @@
+/* Code to test the conjgrad minimiser */
+/*
+ * Copyright 1999, 2018 Graeme W. Gill
+ * All rights reserved.
+ *
+ * This material is licenced under the GNU AFFERO GENERAL PUBLIC LICENSE Version 3 :-
+ * see the License.txt file for licencing details.
+ */
+
+#include <stdio.h>
+#include "numlib.h"
+
+/*       Final approximate solution:                                       */
+
+double expect[9] = {
+	-0.5706545E+00,
+	-0.6816283E+00,
+	-0.7017325E+00,
+	-0.7042129E+00,
+	-0.7013690E+00,
+	-0.6918656E+00,
+	-0.6657920E+00,
+	-0.5960342E+00,
+	-0.4164121E+00 };
+
+double fcn(		/* Return function value */
+	void *fdata,	/* Opaque data pointer */
+	double tp[]		/* Multivriate input value */
+);
+
+static double dfcn(
+	void *fdata,
+	double dp[],
+	double tp[]
+);
+
+#define N 9
+
+static void progress(void *pdata, int perc) {
+	printf("%c% 3d%%",cr_char,perc); 
+	if (perc == 100)
+		printf("\n");
+	fflush(stdout);
+}
+
+int main(void)
+{
+	double cp[N];		/* Function input values */
+	double s[N];		/* Search area */
+	double err;
+	int j;
+	int nprint = 0;		/* Itteration debugging print = off */
+	int rc;
+
+	error_program = "tpowell";	/* Set global error reporting string */
+	check_if_not_interactive();
+
+	/*	 The following starting values provide a rough solution. */
+	for (j = 0; j < N; j++) {
+		cp[j] = -1.f;
+		s[j] = 0.9;
+	}
+
+	nprint = 0;
+
+	/*	 Set tol to the square root of the machine precision. */
+	/*	 Unless high precision solutions are required, */
+	/*	 this is the recommended setting. */
+
+	rc = conjgrad(
+		&err,
+		N, 				/* Dimentionality */
+		cp,				/* Initial starting point */
+		s,				/* Size of initial search area */
+		0.00000001,		/* Tollerance of error change to stop on */
+		1000,			/* Maximum iterations allowed */
+		fcn, 			/* Error function to evaluate */
+		dfcn,			/* Partial derivative function */
+		NULL,			/* Opaque data needed by function */
+		progress,		/* Progress callback */
+		NULL			/* Context for callback */
+	);
+
+
+	fprintf(stdout,"Status = %d, final approximate solution err = %f:\n",rc,err);
+	for (j = 0; j < N; j++) {
+		fprintf(stdout,"cp[%d] = %e, expect %e\n",j,cp[j],expect[j]);
+	}
+
+	return 0;
+} /* main() */
+
+/* Function being minimized */
+double fcn(		/* Return function value */
+void *fdata,	/* Opaque data pointer */
+double tp[]		/* Multivriate input value */
+) {
+	double err, tt;
+	double temp, temp1, temp2;
+	int k;
+
+	/* Function Body */
+	err = 0.0;
+	for (k = 0; k < N; ++k) {
+		temp = (3.0 - 2.0 * tp[k]) * tp[k];
+		temp1 = 0.0;
+		if (k != 0) {
+			temp1 = tp[k-1];
+		}
+		temp2 = 0.0;
+		if (k != ((N)-1))
+			temp2 = tp[k+1];
+		tt = temp - temp1 - 2.0 * temp2 + 1.0;
+		err += tt * tt;
+	}
+	err = sqrt(err);
+//printf("Returning %16.14f\n",err);
+	return err;
+}
+
+
+/* Compute aprox. forward difference */
+
+#define JEPS 1.0e-8	/* Aprox. sqrt of machine precision */
+
+static double dfcn(
+void *fdata,
+double dp[],
+double tp[]
+) {
+	int i;
+	double d, h, temp;
+
+	d = fcn(fdata, tp);
+
+	for (i = 0; i < N; i++) {
+		temp = tp[i];
+
+		h = JEPS * fabs(temp);
+		if (h == 0.0)
+			h = JEPS;
+		tp[i] = temp + h;		/* Add delta */
+		h = tp[i] - temp;		/* Actual delta with fp precision limits */
+
+		dp[i] = (fcn(fdata, tp) - d)/h;
+
+		tp[i] = temp;			/* Restore value */
+	}
+
+	return DFUNC_NRV;
+}
+
+
+
+
+
+
+
+
+
+
+