2 * Library: lmfit (Levenberg-Marquardt least squares fitting)
6 * Contents: Levenberg-Marquardt minimization.
8 * Copyright: MINPACK authors, The University of Chikago (1980-1999)
9 * Joachim Wuttke, Forschungszentrum Juelich GmbH (2004-2013)
11 * License: see ../COPYING (FreeBSD)
13 * Homepage: apps.jcns.fz-juelich.de/lmfit
22 #define MIN(a, b) (((a) <= (b)) ? (a) : (b))
23 #define MAX(a, b) (((a) >= (b)) ? (a) : (b))
24 #define SQR(x) (x)*(x)
26 /* function declarations (implemented below). */
27 void lm_lmpar( int n, double *r, int ldr, int *ipvt, double *diag,
28 double *qtb, double delta, double *par, double *x,
29 double *sdiag, double *aux, double *xdi );
30 void lm_qrfac( int m, int n, double *a, int *ipvt,
31 double *rdiag, double *acnorm, double *wa );
32 void lm_qrsolv( int n, double *r, int ldr, int *ipvt, double *diag,
33 double *qtb, double *x, double *sdiag, double *wa );
36 /*****************************************************************************/
37 /* Numeric constants */
38 /*****************************************************************************/
40 /* machine-dependent constants from float.h */
41 #define LM_MACHEP DBL_EPSILON /* resolution of arithmetic */
42 #define LM_DWARF DBL_MIN /* smallest nonzero number */
43 #define LM_SQRT_DWARF sqrt(DBL_MIN) /* square should not underflow */
44 #define LM_SQRT_GIANT sqrt(DBL_MAX) /* square should not overflow */
45 #define LM_USERTOL 30*LM_MACHEP /* users are recommended to require this */
47 /* If the above values do not work, the following seem good for an x86:
53 The following values should work on any machine:
56 LM_SQRT_DWARF 3.834e-20
57 LM_SQRT_GIANT 1.304e19
61 const lm_control_struct lm_control_double = {
62 LM_USERTOL, LM_USERTOL, LM_USERTOL, LM_USERTOL, 100., 100, 1,
65 const lm_control_struct lm_control_float = {
66 1.e-7, 1.e-7, 1.e-7, 1.e-7, 100., 100, 1,
71 /*****************************************************************************/
72 /* Message texts (indexed by status.info) */
73 /*****************************************************************************/
75 const char *lm_infmsg[] = {
76 "found zero (sum of squares below underflow limit)",
77 "converged (the relative error in the sum of squares is at most tol)",
78 "converged (the relative error of the parameter vector is at most tol)",
79 "converged (both errors are at most tol)",
80 "trapped (by degeneracy; increasing epsilon might help)",
81 "exhausted (number of function calls exceeding preset patience)",
82 "failed (ftol<tol: cannot reduce sum of squares any further)",
83 "failed (xtol<tol: cannot improve approximate solution any further)",
84 "failed (gtol<tol: cannot improve approximate solution any further)",
85 "crashed (not enough memory)",
86 "exploded (fatal coding error: improper input parameters)",
87 "stopped (break requested within function evaluation)"
90 const char *lm_shortmsg[] = {
106 /*****************************************************************************/
107 /* Monitoring auxiliaries. */
108 /*****************************************************************************/
110 void lm_print_pars( int nout, const double *par, double fnorm, FILE* fout )
113 for (i = 0; i < nout; ++i)
115 fprintf( fout, " %16.9g", par[i] );
117 fprintf( fout, " => %18.11g\n", fnorm );
121 /*****************************************************************************/
122 /* lmmin (main minimization routine) */
123 /*****************************************************************************/
125 void lmmin( int n, double *x, int m, const void *data,
126 void (*evaluate) (const double *par, int m_dat, const void *data,
127 double *fvec, int *userbreak),
128 const lm_control_struct *C, lm_status_struct *S )
130 double *fvec, *diag, *fjac, *qtf, *wa1, *wa2, *wa3, *wf;
133 double actred, dirder, fnorm, fnorm1, gnorm, pnorm,
134 prered, ratio, step, sum, temp, temp1, temp2, temp3;
135 static double p0001 = 1.0e-4;
137 int maxfev = C->patience * (n+1);
139 int outer, inner; /* loop counters, for monitoring */
140 int inner_success; /* flag for loop control */
141 double lmpar = 0; /* Levenberg-Marquardt parameter */
144 double eps = sqrt(MAX(C->epsilon, LM_MACHEP)); /* for forward differences */
146 int nout = C->n_maxpri == -1 ? n : MIN( C->n_maxpri, n );
148 /* The workaround msgfile=NULL is needed for default initialization */
149 FILE* msgfile = C->msgfile ? C->msgfile : stdout;
151 /* Default status info; must be set ahead of first return statements */
152 S->outcome = 0; /* status code */
154 S->nfev = 0; /* function evaluation counter */
156 /*** Check input parameters for errors. ***/
160 fprintf( stderr, "lmmin: invalid number of parameters %i\n", n );
161 S->outcome = 10; /* invalid parameter */
166 fprintf( stderr, "lmmin: number of data points (%i) "
167 "smaller than number of parameters (%i)\n", m, n );
171 if (C->ftol < 0. || C->xtol < 0. || C->gtol < 0.)
174 "lmmin: negative tolerance (at least one of %g %g %g)\n",
175 C->ftol, C->xtol, C->gtol );
181 fprintf( stderr, "lmmin: nonpositive function evaluations limit %i\n",
186 if (C->stepbound <= 0.)
188 fprintf( stderr, "lmmin: nonpositive stepbound %g\n", C->stepbound );
192 if (C->scale_diag != 0 && C->scale_diag != 1)
194 fprintf( stderr, "lmmin: logical variable scale_diag=%i, "
195 "should be 0 or 1\n", C->scale_diag );
200 /*** Allocate work space. ***/
201 fvec = diag = fjac = qtf = wa1 = wa2 = wa3 = wf = NULL;
203 if ( (fvec = (double *) malloc(m * sizeof(double))) == NULL ||
204 (diag = (double *) malloc(n * sizeof(double))) == NULL ||
205 (qtf = (double *) malloc(n * sizeof(double))) == NULL ||
206 (fjac = (double *) malloc(n*m*sizeof(double))) == NULL ||
207 (wa1 = (double *) malloc(n * sizeof(double))) == NULL ||
208 (wa2 = (double *) malloc(n * sizeof(double))) == NULL ||
209 (wa3 = (double *) malloc(n * sizeof(double))) == NULL ||
210 (wf = (double *) malloc(m * sizeof(double))) == NULL ||
211 (ipvt = (int *) malloc(n * sizeof(int) )) == NULL)
255 for (j = 0; j < n; j++)
261 /*** Evaluate function at starting point and calculate norm. ***/
263 (*evaluate)( x, m, data, fvec, &(S->userbreak) );
269 fnorm = lm_enorm(m, fvec);
272 fprintf( msgfile, "lmmin start " );
273 lm_print_pars( nout, x, fnorm, msgfile );
275 if (fnorm <= LM_DWARF)
277 S->outcome = 0; /* sum of squares almost zero, nothing to do */
281 /*** The outer loop: compute gradient, then descend. ***/
283 for (outer = 0;; ++outer)
286 /*** [outer] Calculate the Jacobian. ***/
288 for (j = 0; j < n; j++)
291 step = MAX(eps*eps, eps * fabs(temp));
292 x[j] += step; /* replace temporarily */
293 (*evaluate)( x, m, data, wf, &(S->userbreak) );
299 for (i = 0; i < m; i++)
301 fjac[j*m+i] = (wf[i] - fvec[i]) / step;
303 x[j] = temp; /* restore */
305 if (C->verbosity >= 10)
307 /* print the entire matrix */
308 printf("\nlmmin Jacobian\n");
309 for (i = 0; i < m; i++)
312 for (j = 0; j < n; j++)
314 printf("%.5e ", fjac[j*m+i]);
320 /*** [outer] Compute the QR factorization of the Jacobian. ***/
322 /* fjac is an m by n array. The upper n by n submatrix of fjac
323 * is made to contain an upper triangular matrix r with diagonal
324 * elements of nonincreasing magnitude such that
326 * p^T*(jac^T*jac)*p = r^T*r
328 * (NOTE: ^T stands for matrix transposition),
330 * where p is a permutation matrix and jac is the final calculated
331 * Jacobian. Column j of p is column ipvt(j) of the identity matrix.
332 * The lower trapezoidal part of fjac contains information generated
333 * during the computation of r.
335 * ipvt is an integer array of length n. It defines a permutation
336 * matrix p such that jac*p = q*r, where jac is the final calculated
337 * Jacobian, q is orthogonal (not stored), and r is upper triangular
338 * with diagonal elements of nonincreasing magnitude. Column j of p
339 * is column ipvt(j) of the identity matrix.
342 lm_qrfac(m, n, fjac, ipvt, wa1, wa2, wa3);
343 /* return values are ipvt, wa1=rdiag, wa2=acnorm */
345 /*** [outer] Form q^T * fvec and store first n components in qtf. ***/
347 for (i = 0; i < m; i++)
352 for (j = 0; j < n; j++)
358 for (i = j; i < m; i++)
360 sum += fjac[j*m+i] * wf[i];
363 for (i = j; i < m; i++)
365 wf[i] += fjac[j*m+i] * temp;
368 fjac[j*m+j] = wa1[j];
372 /*** [outer] Compute norm of scaled gradient and detect degeneracy. ***/
375 for (j = 0; j < n; j++)
377 if (wa2[ipvt[j]] == 0)
382 for (i = 0; i <= j; i++)
384 sum += fjac[j*m+i] * qtf[i];
386 gnorm = MAX( gnorm, fabs( sum / wa2[ipvt[j]] / fnorm ) );
389 if (gnorm <= C->gtol)
395 /*** [outer] Initialize / update diag and delta. ***/
399 /* first iteration only */
402 /* diag := norms of the columns of the initial Jacobian */
403 for (j = 0; j < n; j++)
405 diag[j] = wa2[j] ? wa2[j] : 1;
407 /* xnorm := || D x || */
408 for (j = 0; j < n; j++)
410 wa3[j] = diag[j] * x[j];
412 xnorm = lm_enorm(n, wa3);
413 if (C->verbosity >= 2)
415 fprintf( msgfile, "lmmin diag " );
416 lm_print_pars( nout, x, xnorm, msgfile );
418 /* only now print the header for the loop table */
419 if (C->verbosity >= 3)
421 fprintf( msgfile, " o i lmpar prered"
422 " ratio dirder delta"
424 for (i = 0; i < nout; ++i)
426 fprintf( msgfile, " p%i", i );
428 fprintf( msgfile, "\n" );
433 xnorm = lm_enorm(n, x);
435 /* initialize the step bound delta. */
438 delta = C->stepbound * xnorm;
442 delta = C->stepbound;
449 for (j = 0; j < n; j++)
451 diag[j] = MAX( diag[j], wa2[j] );
456 /*** The inner loop. ***/
461 /*** [inner] Determine the Levenberg-Marquardt parameter. ***/
463 lm_lmpar( n, fjac, m, ipvt, diag, qtf, delta, &lmpar,
465 /* used return values are fjac (partly), lmpar, wa1=x, wa3=diag*x */
467 /* predict scaled reduction */
468 pnorm = lm_enorm(n, wa3);
469 temp2 = lmpar * SQR( pnorm / fnorm );
470 for (j = 0; j < n; j++)
473 for (i = 0; i <= j; i++)
475 wa3[i] -= fjac[j*m+i] * wa1[ipvt[j]];
478 temp1 = SQR( lm_enorm(n, wa3) / fnorm );
479 prered = temp1 + 2 * temp2;
480 dirder = -temp1 + temp2; /* scaled directional derivative */
482 /* at first call, adjust the initial step bound. */
483 if (!outer && pnorm < delta)
488 /*** [inner] Evaluate the function at x + p. ***/
490 for (j = 0; j < n; j++)
492 wa2[j] = x[j] - wa1[j];
495 (*evaluate)( wa2, m, data, wf, &(S->userbreak) );
501 fnorm1 = lm_enorm(m, wf);
503 /*** [inner] Evaluate the scaled reduction. ***/
505 /* actual scaled reduction */
506 actred = 1 - SQR(fnorm1/fnorm);
508 /* ratio of actual to predicted reduction */
509 ratio = prered ? actred/prered : 0;
511 if (C->verbosity == 2)
513 fprintf( msgfile, "lmmin (%i:%i) ", outer, inner );
514 lm_print_pars( nout, wa2, fnorm1, msgfile );
516 else if (C->verbosity >= 3)
518 printf( "%3i %2i %9.2g %9.2g %14.6g"
519 " %9.2g %10.3e %10.3e %21.15e",
520 outer, inner, lmpar, prered, ratio,
521 dirder, delta, pnorm, fnorm1 );
522 for (i = 0; i < nout; ++i)
524 fprintf( msgfile, " %16.9g", wa2[i] );
526 fprintf( msgfile, "\n" );
529 /* update the step bound */
536 else if (actred > -99) /* -99 = 1-1/0.1^2 */
538 temp = MAX( dirder / (2*dirder + actred), 0.1 );
544 delta = temp * MIN(delta, pnorm / 0.1);
547 else if (ratio >= 0.75)
557 /*** [inner] On success, update solution, and test for convergence. ***/
559 inner_success = ratio >= p0001;
563 /* update x, fvec, and their norms */
566 for (j = 0; j < n; j++)
569 wa2[j] = diag[j] * x[j];
574 for (j = 0; j < n; j++)
579 for (i = 0; i < m; i++)
583 xnorm = lm_enorm(n, wa2);
587 /* convergence tests */
589 if (fnorm <= LM_DWARF)
591 goto terminate; /* success: sum of squares almost zero */
593 /* test two criteria (both may be fulfilled) */
594 if (fabs(actred) <= C->ftol && prered <= C->ftol && ratio <= 2)
596 S->outcome = 1; /* success: x almost stable */
598 if (delta <= C->xtol * xnorm)
600 S->outcome += 2; /* success: sum of squares almost stable */
607 /*** [inner] Tests for termination and stringent tolerances. ***/
609 if (S->nfev >= maxfev)
614 if (fabs(actred) <= LM_MACHEP &&
615 prered <= LM_MACHEP && ratio <= 2)
620 if (delta <= LM_MACHEP*xnorm)
625 if (gnorm <= LM_MACHEP)
631 /*** [inner] End of the loop. Repeat if iteration unsuccessful. ***/
635 while (!inner_success);
637 /*** [outer] End of the loop. ***/
643 S->fnorm = lm_enorm(m, fvec);
644 if (C->verbosity >= 2)
646 printf("lmmin outcome (%i) xnorm %g ftol %g xtol %g\n",
647 S->outcome, xnorm, C->ftol, C->xtol );
649 if (C->verbosity & 1)
651 fprintf( msgfile, "lmmin final " );
652 lm_print_pars( nout, x, S->fnorm, msgfile );
654 if (S->userbreak) /* user-requested break */
659 /*** Deallocate the workspace. ***/
673 /*****************************************************************************/
674 /* lm_lmpar (determine Levenberg-Marquardt parameter) */
675 /*****************************************************************************/
677 void lm_lmpar(int n, double *r, int ldr, int *ipvt, double *diag,
678 double *qtb, double delta, double *par, double *x,
679 double *sdiag, double *aux, double *xdi)
681 /* Given an m by n matrix a, an n by n nonsingular diagonal
682 * matrix d, an m-vector b, and a positive number delta,
683 * the problem is to determine a value for the parameter
684 * par such that if x solves the system
686 * a*x = b and sqrt(par)*d*x = 0
688 * in the least squares sense, and dxnorm is the euclidean
689 * norm of d*x, then either par=0 and (dxnorm-delta) < 0.1*delta,
690 * or par>0 and abs(dxnorm-delta) < 0.1*delta.
692 * Using lm_qrsolv, this subroutine completes the solution of the problem
693 * if it is provided with the necessary information from the
694 * qr factorization, with column pivoting, of a. That is, if
695 * a*p = q*r, where p is a permutation matrix, q has orthogonal
696 * columns, and r is an upper triangular matrix with diagonal
697 * elements of nonincreasing magnitude, then lmpar expects
698 * the full upper triangle of r, the permutation matrix p,
699 * and the first n components of qT*b. On output
700 * lmpar also provides an upper triangular matrix s such that
702 * p^T*(a^T*a + par*d*d)*p = s^T*s.
704 * s is employed within lmpar and may be of separate interest.
706 * Only a few iterations are generally needed for convergence
707 * of the algorithm. If, however, the limit of 10 iterations
708 * is reached, then the output par will contain the best
709 * value obtained so far.
713 * n is a positive integer input variable set to the order of r.
715 * r is an n by n array. on input the full upper triangle
716 * must contain the full upper triangle of the matrix r.
717 * on OUTPUT the full upper triangle is unaltered, and the
718 * strict lower triangle contains the strict upper triangle
719 * (transposed) of the upper triangular matrix s.
721 * ldr is a positive integer input variable not less than n
722 * which specifies the leading dimension of the array r.
724 * ipvt is an integer input array of length n which defines the
725 * permutation matrix p such that a*p = q*r. column j of p
726 * is column ipvt(j) of the identity matrix.
728 * diag is an input array of length n which must contain the
729 * diagonal elements of the matrix d.
731 * qtb is an input array of length n which must contain the first
732 * n elements of the vector (q transpose)*b.
734 * delta is a positive input variable which specifies an upper
735 * bound on the euclidean norm of d*x.
737 * par is a nonnegative variable. on input par contains an
738 * initial estimate of the levenberg-marquardt parameter.
739 * on OUTPUT par contains the final estimate.
741 * x is an OUTPUT array of length n which contains the least
742 * squares solution of the system a*x = b, sqrt(par)*d*x = 0,
743 * for the output par.
745 * sdiag is an array of length n needed as workspace; on OUTPUT
746 * it contains the diagonal elements of the upper triangular matrix s.
748 * aux is a multi-purpose work array of length n.
750 * xdi is a work array of length n. On OUTPUT: diag[j] * x[j].
753 int i, iter, j, nsing;
754 double dxnorm, fp, fp_old, gnorm, parc, parl, paru;
756 static double p1 = 0.1;
758 /*** lmpar: compute and store in x the gauss-newton direction. if the
759 jacobian is rank-deficient, obtain a least squares solution. ***/
762 for (j = 0; j < n; j++)
765 if (r[j * ldr + j] == 0 && nsing == n)
774 for (j = nsing - 1; j >= 0; j--)
776 aux[j] = aux[j] / r[j + ldr * j];
778 for (i = 0; i < j; i++)
780 aux[i] -= r[j * ldr + i] * temp;
784 for (j = 0; j < n; j++)
789 /*** lmpar: initialize the iteration counter, evaluate the function at the
790 origin, and test for acceptance of the gauss-newton direction. ***/
792 for (j = 0; j < n; j++)
794 xdi[j] = diag[j] * x[j];
796 dxnorm = lm_enorm(n, xdi);
798 if (fp <= p1 * delta)
800 #ifdef LMFIT_DEBUG_MESSAGES
801 printf("debug lmpar nsing %d n %d, terminate (fp<p1*delta)\n",
808 /*** lmpar: if the jacobian is not rank deficient, the newton
809 step provides a lower bound, parl, for the 0. of
810 the function. otherwise set this bound to 0.. ***/
815 for (j = 0; j < n; j++)
817 aux[j] = diag[ipvt[j]] * xdi[ipvt[j]] / dxnorm;
820 for (j = 0; j < n; j++)
823 for (i = 0; i < j; i++)
825 sum += r[j * ldr + i] * aux[i];
827 aux[j] = (aux[j] - sum) / r[j + ldr * j];
829 temp = lm_enorm(n, aux);
830 parl = fp / delta / temp / temp;
833 /*** lmpar: calculate an upper bound, paru, for the 0. of the function. ***/
835 for (j = 0; j < n; j++)
838 for (i = 0; i <= j; i++)
840 sum += r[j * ldr + i] * qtb[i];
842 aux[j] = sum / diag[ipvt[j]];
844 gnorm = lm_enorm(n, aux);
845 paru = gnorm / delta;
848 paru = LM_DWARF / MIN(delta, p1);
851 /*** lmpar: if the input par lies outside of the interval (parl,paru),
852 set par to the closer endpoint. ***/
854 *par = MAX(*par, parl);
855 *par = MIN(*par, paru);
858 *par = gnorm / dxnorm;
861 /*** lmpar: iterate. ***/
863 for (iter = 0;; iter++)
866 /** evaluate the function at the current value of par. **/
870 *par = MAX(LM_DWARF, 0.001 * paru);
873 for (j = 0; j < n; j++)
875 aux[j] = temp * diag[j];
878 lm_qrsolv( n, r, ldr, ipvt, aux, qtb, x, sdiag, xdi );
879 /* return values are r, x, sdiag */
881 for (j = 0; j < n; j++)
883 xdi[j] = diag[j] * x[j]; /* used as output */
885 dxnorm = lm_enorm(n, xdi);
889 /** if the function is small enough, accept the current value
890 of par. Also test for the exceptional cases where parl
891 is zero or the number of iterations has reached 10. **/
893 if (fabs(fp) <= p1 * delta
894 || (parl == 0. && fp <= fp_old && fp_old < 0.)
897 #ifdef LMFIT_DEBUG_MESSAGES
898 printf("debug lmpar nsing %d iter %d "
899 "par %.4e [%.4e %.4e] delta %.4e fp %.4e\n",
900 nsing, iter, *par, parl, paru, delta, fp);
902 break; /* the only exit from the iteration. */
905 /** compute the Newton correction. **/
907 for (j = 0; j < n; j++)
909 aux[j] = diag[ipvt[j]] * xdi[ipvt[j]] / dxnorm;
912 for (j = 0; j < n; j++)
914 aux[j] = aux[j] / sdiag[j];
915 for (i = j + 1; i < n; i++)
917 aux[i] -= r[j * ldr + i] * aux[j];
920 temp = lm_enorm(n, aux);
921 parc = fp / delta / temp / temp;
923 /** depending on the sign of the function, update parl or paru. **/
927 parl = MAX(parl, *par);
931 paru = MIN(paru, *par);
933 /* the case fp==0 is precluded by the break condition */
935 /** compute an improved estimate for par. **/
937 *par = MAX(parl, *par + parc);
941 } /*** lm_lmpar. ***/
943 /*****************************************************************************/
944 /* lm_qrfac (QR factorization, from lapack) */
945 /*****************************************************************************/
947 void lm_qrfac(int m, int n, double *a, int *ipvt,
948 double *rdiag, double *acnorm, double *wa)
951 * This subroutine uses Householder transformations with column
952 * pivoting (optional) to compute a qr factorization of the
953 * m by n matrix a. That is, qrfac determines an orthogonal
954 * matrix q, a permutation matrix p, and an upper trapezoidal
955 * matrix r with diagonal elements of nonincreasing magnitude,
956 * such that a*p = q*r. The Householder transformation for
957 * column k, k = 1,2,...,min(m,n), is of the form
961 * where u has zeroes in the first k-1 positions. The form of
962 * this transformation and the method of pivoting first
963 * appeared in the corresponding linpack subroutine.
967 * m is a positive integer input variable set to the number
970 * n is a positive integer input variable set to the number
973 * a is an m by n array. On input a contains the matrix for
974 * which the qr factorization is to be computed. On OUTPUT
975 * the strict upper trapezoidal part of a contains the strict
976 * upper trapezoidal part of r, and the lower trapezoidal
977 * part of a contains a factored form of q (the non-trivial
978 * elements of the u vectors described above).
980 * ipvt is an integer OUTPUT array of length lipvt. This array
981 * defines the permutation matrix p such that a*p = q*r.
982 * Column j of p is column ipvt(j) of the identity matrix.
984 * rdiag is an OUTPUT array of length n which contains the
985 * diagonal elements of r.
987 * acnorm is an OUTPUT array of length n which contains the
988 * norms of the corresponding columns of the input matrix a.
989 * If this information is not needed, then acnorm can coincide
992 * wa is a work array of length n.
995 int i, j, k, kmax, minmn;
996 double ajnorm, sum, temp;
998 /*** qrfac: compute initial column norms and initialize several arrays. ***/
1000 for (j = 0; j < n; j++)
1002 acnorm[j] = lm_enorm(m, &a[j*m]);
1003 rdiag[j] = acnorm[j];
1007 #ifdef LMFIT_DEBUG_MESSAGES
1008 printf("debug qrfac\n");
1011 /*** qrfac: reduce a to r with Householder transformations. ***/
1014 for (j = 0; j < minmn; j++)
1017 /** bring the column of largest norm into the pivot position. **/
1020 for (k = j + 1; k < n; k++)
1022 if (rdiag[k] > rdiag[kmax])
1032 for (i = 0; i < m; i++)
1035 a[j*m+i] = a[kmax*m+i];
1038 rdiag[kmax] = rdiag[j];
1041 ipvt[j] = ipvt[kmax];
1045 /** compute the Householder transformation to reduce the
1046 j-th column of a to a multiple of the j-th unit vector. **/
1048 ajnorm = lm_enorm(m-j, &a[j*m+j]);
1059 for (i = j; i < m; i++)
1065 /** apply the transformation to the remaining columns
1066 and update the norms. **/
1068 for (k = j + 1; k < n; k++)
1072 for (i = j; i < m; i++)
1074 sum += a[j*m+i] * a[k*m+i];
1077 temp = sum / a[j + m * j];
1079 for (i = j; i < m; i++)
1081 a[k*m+i] -= temp * a[j*m+i];
1086 temp = a[m * k + j] / rdiag[k];
1087 temp = MAX(0., 1 - temp * temp);
1088 rdiag[k] *= sqrt(temp);
1089 temp = rdiag[k] / wa[k];
1090 if (0.05 * SQR(temp) <= LM_MACHEP)
1092 rdiag[k] = lm_enorm(m-j-1, &a[m*k+j+1]);
1100 } /*** lm_qrfac. ***/
1103 /*****************************************************************************/
1104 /* lm_qrsolv (linear least-squares) */
1105 /*****************************************************************************/
1107 void lm_qrsolv(int n, double *r, int ldr, int *ipvt, double *diag,
1108 double *qtb, double *x, double *sdiag, double *wa)
1111 * Given an m by n matrix a, an n by n diagonal matrix d,
1112 * and an m-vector b, the problem is to determine an x which
1115 * a*x = b and d*x = 0
1117 * in the least squares sense.
1119 * This subroutine completes the solution of the problem
1120 * if it is provided with the necessary information from the
1121 * qr factorization, with column pivoting, of a. That is, if
1122 * a*p = q*r, where p is a permutation matrix, q has orthogonal
1123 * columns, and r is an upper triangular matrix with diagonal
1124 * elements of nonincreasing magnitude, then qrsolv expects
1125 * the full upper triangle of r, the permutation matrix p,
1126 * and the first n components of (q transpose)*b. The system
1127 * a*x = b, d*x = 0, is then equivalent to
1129 * r*z = q^T*b, p^T*d*p*z = 0,
1131 * where x = p*z. If this system does not have full rank,
1132 * then a least squares solution is obtained. On output qrsolv
1133 * also provides an upper triangular matrix s such that
1135 * p^T *(a^T *a + d*d)*p = s^T *s.
1137 * s is computed within qrsolv and may be of separate interest.
1141 * n is a positive integer input variable set to the order of r.
1143 * r is an n by n array. On input the full upper triangle
1144 * must contain the full upper triangle of the matrix r.
1145 * On OUTPUT the full upper triangle is unaltered, and the
1146 * strict lower triangle contains the strict upper triangle
1147 * (transposed) of the upper triangular matrix s.
1149 * ldr is a positive integer input variable not less than n
1150 * which specifies the leading dimension of the array r.
1152 * ipvt is an integer input array of length n which defines the
1153 * permutation matrix p such that a*p = q*r. Column j of p
1154 * is column ipvt(j) of the identity matrix.
1156 * diag is an input array of length n which must contain the
1157 * diagonal elements of the matrix d.
1159 * qtb is an input array of length n which must contain the first
1160 * n elements of the vector (q transpose)*b.
1162 * x is an OUTPUT array of length n which contains the least
1163 * squares solution of the system a*x = b, d*x = 0.
1165 * sdiag is an OUTPUT array of length n which contains the
1166 * diagonal elements of the upper triangular matrix s.
1168 * wa is a work array of length n.
1171 int i, kk, j, k, nsing;
1172 double qtbpj, sum, temp;
1173 double _sin, _cos, _tan, _cot; /* local variables, not functions */
1175 /*** qrsolv: copy r and q^T*b to preserve input and initialize s.
1176 in particular, save the diagonal elements of r in x. ***/
1178 for (j = 0; j < n; j++)
1180 for (i = j; i < n; i++)
1182 r[j * ldr + i] = r[i * ldr + j];
1184 x[j] = r[j * ldr + j];
1187 /*** qrsolv: eliminate the diagonal matrix d using a Givens rotation. ***/
1189 for (j = 0; j < n; j++)
1192 /*** qrsolv: prepare the row of d to be eliminated, locating the
1193 diagonal element using p from the qr factorization. ***/
1195 if (diag[ipvt[j]] == 0.)
1199 for (k = j; k < n; k++)
1203 sdiag[j] = diag[ipvt[j]];
1205 /*** qrsolv: the transformations to eliminate the row of d modify only
1206 a single element of qT*b beyond the first n, which is initially 0. ***/
1209 for (k = j; k < n; k++)
1212 /** determine a Givens rotation which eliminates the
1213 appropriate element in the current row of d. **/
1220 if (fabs(r[kk]) < fabs(sdiag[k]))
1222 _cot = r[kk] / sdiag[k];
1223 _sin = 1 / sqrt(1 + SQR(_cot));
1228 _tan = sdiag[k] / r[kk];
1229 _cos = 1 / sqrt(1 + SQR(_tan));
1233 /** compute the modified diagonal element of r and
1234 the modified element of ((q^T)*b,0). **/
1236 r[kk] = _cos * r[kk] + _sin * sdiag[k];
1237 temp = _cos * wa[k] + _sin * qtbpj;
1238 qtbpj = -_sin * wa[k] + _cos * qtbpj;
1241 /** accumulate the tranformation in the row of s. **/
1243 for (i = k + 1; i < n; i++)
1245 temp = _cos * r[k * ldr + i] + _sin * sdiag[i];
1246 sdiag[i] = -_sin * r[k * ldr + i] + _cos * sdiag[i];
1247 r[k * ldr + i] = temp;
1252 /** store the diagonal element of s and restore
1253 the corresponding diagonal element of r. **/
1255 sdiag[j] = r[j * ldr + j];
1256 r[j * ldr + j] = x[j];
1259 /*** qrsolv: solve the triangular system for z. if the system is
1260 singular, then obtain a least squares solution. ***/
1263 for (j = 0; j < n; j++)
1265 if (sdiag[j] == 0. && nsing == n)
1275 for (j = nsing - 1; j >= 0; j--)
1278 for (i = j + 1; i < nsing; i++)
1280 sum += r[j * ldr + i] * wa[i];
1282 wa[j] = (wa[j] - sum) / sdiag[j];
1285 /*** qrsolv: permute the components of z back to components of x. ***/
1287 for (j = 0; j < n; j++)
1292 } /*** lm_qrsolv. ***/
1295 /*****************************************************************************/
1296 /* lm_enorm (Euclidean norm) */
1297 /*****************************************************************************/
1299 double lm_enorm(int n, const double *x)
1301 /* Given an n-vector x, this function calculates the
1302 * euclidean norm of x.
1304 * The euclidean norm is computed by accumulating the sum of
1305 * squares in three different sums. The sums of squares for the
1306 * small and large components are scaled so that no overflows
1307 * occur. Non-destructive underflows are permitted. Underflows
1308 * and overflows do not occur in the computation of the unscaled
1309 * sum of squares for the intermediate components.
1310 * The definitions of small, intermediate and large components
1311 * depend on two constants, LM_SQRT_DWARF and LM_SQRT_GIANT. The main
1312 * restrictions on these constants are that LM_SQRT_DWARF**2 not
1313 * underflow and LM_SQRT_GIANT**2 not overflow.
1317 * n is a positive integer input variable.
1319 * x is an input array of length n.
1322 double agiant, s1, s2, s3, xabs, x1max, x3max, temp;
1329 agiant = LM_SQRT_GIANT / n;
1331 /** sum squares. **/
1333 for (i = 0; i < n; i++)
1336 if (xabs > LM_SQRT_DWARF)
1342 else if (xabs > x1max)
1344 temp = x1max / xabs;
1345 s1 = 1 + s1 * SQR(temp);
1350 temp = xabs / x1max;
1354 else if (xabs > x3max)
1356 temp = x3max / xabs;
1357 s3 = 1 + s3 * SQR(temp);
1360 else if (xabs != 0.)
1362 temp = xabs / x3max;
1367 /** calculation of norm. **/
1371 return x1max * sqrt(s1 + (s2 / x1max) / x1max);
1377 return sqrt(s2 * (1 + (x3max / s2) * (x3max * s3)));
1381 return sqrt(x3max * ((s2 / x3max) + (x3max * s3)));
1386 return x3max * sqrt(s3);
1389 } /*** lm_enorm. ***/