2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
40 * Implements routine for fitting a data set to a curve
42 * \author David van der Spoel <david.vanderspoel@icm.uu.se>
43 * \ingroup module_correlationfunctions
54 #include "external/lmfit/lmcurve.h"
56 #include "gromacs/correlationfunctions/integrate.h"
57 #include "gromacs/fileio/xvgr.h"
58 #include "gromacs/legacyheaders/macros.h"
59 #include "gromacs/math/vec.h"
60 #include "gromacs/utility/fatalerror.h"
61 #include "gromacs/utility/futil.h"
62 #include "gromacs/utility/gmxassert.h"
63 #include "gromacs/utility/real.h"
64 #include "gromacs/utility/smalloc.h"
66 /*! \brief Number of parameters for each fitting function */
67 static const int nfp_ffn[effnNR] = { 0, 1, 2, 3, 5, 7, 9, 2, 4, 3, 6 };
69 /* +2 becuase parse_common_args wants leading and concluding NULL.
70 * We only allow exponential functions as choices on the command line,
71 * hence there are many more NULL field (which have to be at the end of
74 const char *s_ffn[effnNR+2] = {
75 NULL, "none", "exp", "aexp", "exp_exp",
76 "exp5", "exp7", "exp9",
77 NULL, NULL, NULL, NULL, NULL
80 /*! \brief Long description for each fitting function type */
81 static const char *longs_ffn[effnNR] = {
84 "y = a1 exp(-x/|a0|)",
85 "y = a1 exp(-x/|a0|) + (1-a1) exp(-x/(|a2|)), a2 > a0 > 0",
86 "y = a0 exp(-x/|a1|) + a2 exp(-x/|a3|) + a4, a3 >= a1",
87 "y = a0 exp(-x/|a1|) + a2 exp(-x/|a3|) + a4 exp(-x/|a5|) + a6, a5 >= a3 >= a1",
88 "y = a0 exp(-x/|a1|) + a2 exp(-x/|a3|) + a4 exp(-x/|a5|) + a6 exp(-x/|a7|) + a8, a7 >= a5 >= a3 >= a1",
89 "y = exp(-v) (cosh(wv) + 1/w sinh(wv)), v = x/(2 a0), w = sqrt(1 - a1)",
90 "y = 1/2*(a0+a1) - 1/2*(a0-a1)*erf( (x-a2) /a3^2)",
91 "y = a1 *2*a0*((exp(-x/a0)-1)*(a0/x)+1)+(1-a1)*2*a2*((exp(-x/a2)-1)*(a2/x)+1)",
92 "y = (1-a0)*cos(x*a1)*exp(-(x/a2)^a3) + a0*exp(-(x/a4)^a5)"
95 int effnNparams(int effn)
97 if ((0 <= effn) && (effn < effnNR))
107 const char *effnDescription(int effn)
109 if ((0 <= effn) && (effn < effnNR))
111 return longs_ffn[effn];
119 int sffn2effn(const char **sffn)
124 for (i = 0; i < effnNR; i++)
126 if (sffn[i+1] && strcmp(sffn[0], sffn[i+1]) == 0)
135 /*! \brief Compute exponential function A exp(-x/tau) */
136 static double myexp(double x, double A, double tau)
138 if ((A == 0) || (tau == 0))
142 return A*exp(-x/tau);
145 /*! \brief Compute y=(a0+a1)/2-(a0-a1)/2*erf((x-a2)/a3^2) */
146 static double lmc_erffit (double x, const double *a)
153 erfarg = (x-a[2])/(a[3]*a[3]);
154 myerf = gmx_erfd(erfarg);
158 /* If a[3] == 0, a[3]^2 = 0 and the erfarg becomes +/- infinity */
168 return 0.5*((a[0]+a[1]) - (a[0]-a[1])*myerf);
171 /*! \brief Exponent function that prevents overflow */
172 static double safe_exp(double x)
174 double exp_max = 200;
175 double exp_min = -exp_max;
180 else if (x >= exp_max)
190 /*! \brief Exponent minus 1 function that prevents overflow */
191 static double safe_expm1(double x)
193 double exp_max = 200;
194 double exp_min = -exp_max;
199 else if (x >= exp_max)
209 /*! \brief Compute y = exp(-x/|a0|) */
210 static double lmc_exp_one_parm(double x, const double *a)
212 return safe_exp(-x/fabs(a[0]));
215 /*! \brief Compute y = a1 exp(-x/|a0|) */
216 static double lmc_exp_two_parm(double x, const double *a)
218 return a[1]*safe_exp(-x/fabs(a[0]));
221 /*! \brief Compute y = a1 exp(-x/|a0|) + (1-a1) exp(-x/|a2|) */
222 static double lmc_exp_exp(double x, const double *a)
226 e1 = safe_exp(-x/fabs(a[0]));
227 e2 = safe_exp(-x/(fabs(a[0])+fabs(a[2])));
228 return a[1]*e1 + (1-a[1])*e2;
231 /*! \brief Compute y = a0 exp(-x/|a1|) + a2 exp(-x/(|a1|+|a3|)) + a4 */
232 static double lmc_exp_5_parm(double x, const double *a)
236 e1 = safe_exp(-x/fabs(a[1]));
237 e2 = safe_exp(-x/(fabs(a[1])+fabs(a[3])));
238 return a[0]*e1 + a[2]*e2 + a[4];
241 /*! \brief Compute 7 parameter exponential function value.
243 * Compute y = a0 exp(-x/|a1|) + a2 exp(-x/(|a1|+|a3|)) +
244 * a4 exp(-x/(|a1|+|a3|+|a5|)) + a6
246 static double lmc_exp_7_parm(double x, const double *a)
249 double fa1, fa3, fa5;
252 fa3 = fa1 + fabs(a[3]);
253 fa5 = fa3 + fabs(a[5]);
254 e1 = safe_exp(-x/fa1);
255 e2 = safe_exp(-x/fa3);
256 e3 = safe_exp(-x/fa5);
257 return a[0]*e1 + a[2]*e2 + a[4]*e3 + a[6];
260 /*! \brief Compute 9 parameter exponential function value.
262 * Compute y = a0 exp(-x/|a1|) + a2 exp(-x/(|a1|+|a3|)) +
263 * a4 exp(-x/(|a1|+|a3|+|a5|)) + a6 exp(-x/(|a1|+|a3|+|a5|+|a7|)) + a8
265 static double lmc_exp_9_parm(double x, const double *a)
267 double e1, e2, e3, e4;
268 double fa1, fa3, fa5, fa7;
271 fa3 = fa1 + fabs(a[3]);
272 fa5 = fa3 + fabs(a[5]);
273 fa7 = fa5 + fabs(a[7]);
275 e1 = safe_exp(-x/fa1);
276 e2 = safe_exp(-x/fa3);
277 e3 = safe_exp(-x/fa5);
278 e4 = safe_exp(-x/fa7);
279 return a[0]*e1 + a[2]*e2 + a[4]*e3 + a[6]*e4 + a[8];
282 /*! \brief Compute y = (1-a0)*exp(-(x/|a2|)^|a3|)*cos(x*|a1|) + a0*exp(-(x/|a4|)^|a5|) */
283 static double lmc_pres_6_parm(double x, const double *a)
285 double term1, term2, term3;
289 if ((a[4] != 0) && (a[0] != 0))
291 double power = std::min(fabs(a[5]), pow_max);
292 term3 = a[0] * safe_exp(-pow((x/fabs(a[4])), power));
297 if ((term1 != 0) && (a[2] != 0))
299 double power = std::min(fabs(a[3]), pow_max);
300 term2 = safe_exp(-pow((x/fabs(a[2])), power)) * cos(x*fabs(a[1]));
303 return term1*term2 + term3;
306 /*! \brief Compute vac function */
307 static double lmc_vac_2_parm(double x, const double *a)
311 * y = 1/2 (1 - 1/w) exp(-(1+w)v) + 1/2 (1 + 1/w) exp(-(1-w)v)
313 * = exp(-v) (cosh(wv) + 1/w sinh(wv))
318 * For tranverse current autocorrelation functions:
320 * a1 = 4 tau (eta/rho) k^2
324 double y, v, det, omega, wv, em, ec, es;
327 v = x/(2*fabs(a[0]));
332 omega = sqrt(fabs(det));
333 wv = std::min(omega*v, wv_max);
337 ec = em*0.5*(safe_exp(wv)+safe_exp(-wv));
338 es = em*0.5*(safe_exp(wv)-safe_exp(-wv))/omega;
343 es = em*sin(wv)/omega;
354 /*! \brief Compute error estimate */
355 static double lmc_errest_3_parm(double x, const double *a)
357 double e1, e2, v1, v2;
358 double fa0 = fabs(a[0]);
360 double fa2 = fa0+fabs(a[2]);
364 e1 = safe_expm1(-x/fa0);
372 e2 = safe_expm1(-x/fa2);
381 v1 = 2*fa0*(e1*fa0/x + 1);
382 v2 = 2*fa2*(e2*fa2/x + 1);
383 /* We need 0 <= a1 <= 1 */
384 fa1 = std::min(1.0, std::max(0.0, a[1]));
386 return fa1*v1 + (1-fa1)*v2;
394 /*! \brief function type for passing to fitting routine */
395 typedef double (*t_lmcurve)(double x, const double *a);
397 /*! \brief array of fitting functions corresponding to the pre-defined types */
398 t_lmcurve lmcurves[effnNR+1] = {
399 lmc_exp_one_parm, lmc_exp_one_parm, lmc_exp_two_parm,
400 lmc_exp_exp, lmc_exp_5_parm, lmc_exp_7_parm,
402 lmc_vac_2_parm, lmc_erffit, lmc_errest_3_parm, lmc_pres_6_parm
405 double fit_function(const int eFitFn, const double parm[], const double x)
407 if ((eFitFn < 0) || (eFitFn >= effnNR))
409 fprintf(stderr, "fitfn = %d, should be in the range 0..%d\n",
413 return lmcurves[eFitFn](x, parm);
416 /*! \brief lmfit_exp supports fitting of different functions
418 * This routine calls the Levenberg-Marquardt non-linear fitting
419 * routine for fitting a data set with errors to a target function.
420 * Fitting routines included in gromacs in src/external/lmfit.
422 static gmx_bool lmfit_exp(int nfit,
431 double chisq, ochisq;
435 lm_control_struct control;
436 lm_status_struct *status;
437 int nparam = effnNparams(eFitFn);
441 if ((eFitFn < 0) || (eFitFn >= effnNR))
443 fprintf(stderr, "fitfn = %d, should be in the range 0..%d\n",
447 /* Using default control structure for double precision fitting that
448 * comes with the lmfit package (i.e. from the include file).
450 control = lm_control_double;
451 control.verbosity = (bVerbose ? 1 : 0);
452 control.n_maxpri = 0;
453 control.m_maxpri = 0;
461 printf("%4s %10s Parameters\n", "Step", "chi^2");
463 /* Check whether we have to skip some params */
468 p2 = 1 << (nparam-1);
469 bSkipLast = ((p2 & nfix) == p2);
476 while ((nparam > 0) && (bSkipLast));
479 printf("Using %d out of %d parameters\n", nparam, effnNparams(eFitFn));
485 lmcurve(nparam, parm, nfit, x, y, dy,
486 lmcurves[eFitFn], &control, status);
487 chisq = sqr(status->fnorm);
490 printf("status: fnorm = %g, nfev = %d, userbreak = %d\noutcome = %s\n",
491 status->fnorm, status->nfev, status->userbreak,
492 lm_infmsg[status->outcome]);
497 printf("%4d %8g", j, chisq);
498 for (mmm = 0; (mmm < effnNparams(eFitFn)); mmm++)
500 printf(" %8g", parm[mmm]);
505 bCont = (fabs(ochisq - chisq) > fabs(control.ftol*chisq));
507 while (bCont && (j < maxiter));
514 /*! \brief Ensure the fitting parameters are well-behaved.
516 * In order to make sure that fitting behaves according to the
517 * constraint that time constants are positive and increasing
518 * we enforce this here by setting all time constants to their
519 * absolute value and by adding e.g. |a_0| to |a_2|. This is
520 * done in conjunction with the fitting functions themselves.
521 * When there are multiple time constants we make sure that
522 * the successive time constants are at least double the
523 * previous ones and during fitting we enforce the they remain larger.
524 * It may very well help the convergence of the fitting routine.
526 static void initiate_fit_params(int eFitFn,
531 nparm = effnNparams(eFitFn);
536 GMX_ASSERT(params[0] >= 0, "parameters should be >= 0");
541 GMX_ASSERT(params[0] >= 0, "parameters should be >= 0");
544 GMX_ASSERT(params[2] >= 0, "parameters should be >= 0");
545 /* In order to maintain params[2] >= params[0] in the final
546 * result, we fit the difference between the two, that is
547 * params[2]-params[0] and in the add add in params[0]
550 params[2] = std::max(fabs(params[2])-params[0], params[0]);
556 GMX_ASSERT(params[1] >= 0, "parameters should be >= 0");
557 params[1] = fabs(params[1]);
560 GMX_ASSERT(params[3] >= 0, "parameters should be >= 0");
561 /* See comment under effnEXPEXP */
562 params[3] = std::max(fabs(params[3])-params[1], params[1]);
565 GMX_ASSERT(params[5] >= 0, "parameters should be >= 0");
566 /* See comment under effnEXPEXP */
567 params[5] = std::max(fabs(params[5])-params[3], params[3]);
570 GMX_ASSERT(params[7] >= 0, "parameters should be >= 0");
571 /* See comment under effnEXPEXP */
572 params[7] = std::max(fabs(params[7])-params[5], params[5]);
578 GMX_ASSERT(params[0] >= 0, "parameters should be >= 0");
579 GMX_ASSERT(params[1] >= 0 && params[1] <= 1, "parameter 1 should in 0 .. 1");
580 GMX_ASSERT(params[2] >= 0, "parameters should be >= 0");
581 /* See comment under effnEXPEXP */
582 params[2] = fabs(params[2])-params[0];
585 for (i = 1; (i < nparm); i++)
587 GMX_ASSERT(params[i] >= 0, "parameters should be >= 0");
595 /*! \brief Process the fitting parameters to get output parameters.
597 * See comment at the previous function.
599 static void extract_fit_params(int eFitFn,
604 nparm = effnNparams(eFitFn);
609 params[0] = fabs(params[0]);
614 params[0] = fabs(params[0]);
617 /* Back conversion of parameters from the fitted difference
618 * to the absolute value.
620 params[2] = fabs(params[2])+params[0];
626 params[1] = fabs(params[1]);
629 /* See comment under effnEXPEXP */
630 params[3] = fabs(params[3])+params[1];
633 /* See comment under effnEXPEXP */
634 params[5] = fabs(params[5])+params[3];
637 /* See comment under effnEXPEXP */
638 params[7] = fabs(params[7])+params[5];
644 params[0] = fabs(params[0]);
649 else if (params[1] > 1)
653 /* See comment under effnEXPEXP */
654 params[2] = params[0]+fabs(params[2]);
657 for (i = 1; (i < nparm); i++)
659 params[i] = fabs(params[i]);
667 /*! \brief Print chi-squared value and the parameters */
668 static void print_chi2_params(FILE *fp,
670 const double fitparms[],
679 for (i = 0; (i < nfitpnts); i++)
681 double yfit = lmcurves[eFitFn](x[i], fitparms);
682 chi2 += sqr(y[i] - yfit);
684 fprintf(fp, "There are %d data points, %d parameters, %s chi2 = %g\nparams:",
685 nfitpnts, effnNparams(eFitFn), label, chi2);
686 for (i = 0; (i < effnNparams(eFitFn)); i++)
688 fprintf(fp, " %10g", fitparms[i]);
693 /*! \brief See description in header file. */
694 real do_lmfit(int ndata, real c1[], real sig[], real dt, real x0[],
695 real begintimefit, real endtimefit, const output_env_t oenv,
696 gmx_bool bVerbose, int eFitFn, double fitparms[], int fix,
697 const char *fn_fitted)
701 double integral, ttt;
706 fprintf(stderr, "Using fixed parameters in curve fitting is temporarily not working.\n");
710 fprintf(debug, "There are %d points to fit %d vars!\n", ndata, effnNparams(eFitFn));
711 fprintf(debug, "Fit to function %d from %g through %g, dt=%g\n",
712 eFitFn, begintimefit, endtimefit, dt);
719 for (i = 0; i < ndata; i++)
721 ttt = x0 ? x0[i] : dt*i;
722 if (ttt >= begintimefit && ttt <= endtimefit)
728 // No weighting if all values are divided by 1.
733 dy[j] = std::max(1.0e-7, (double)sig[i]);
737 fprintf(debug, "j= %d, i= %d, x= %g, y= %g, dy=%g, ttt=%g\n",
738 j, i, x[j], y[j], dy[j], ttt);
745 if (nfitpnts < effnNparams(eFitFn))
747 fprintf(stderr, "Not enough (%d) data points for fitting, dt = %g!\n",
756 print_chi2_params(stdout, eFitFn, fitparms, "initial", nfitpnts, x, y);
758 initiate_fit_params(eFitFn, fitparms);
760 bSuccess = lmfit_exp(nfitpnts, x, y, dy, fitparms, bVerbose, eFitFn, fix);
761 extract_fit_params(eFitFn, fitparms);
765 fprintf(stderr, "Fit failed!\n");
771 print_chi2_params(stdout, eFitFn, fitparms, "final", nfitpnts, x, y);
776 integral = fitparms[0]*myexp(begintimefit, 1, fitparms[0]);
779 integral = fitparms[0]*myexp(begintimefit, fitparms[1], fitparms[0]);
782 integral = (fitparms[0]*myexp(begintimefit, fitparms[1], fitparms[0]) +
783 fitparms[2]*myexp(begintimefit, 1-fitparms[1], fitparms[2]));
789 for (i = 0; (i < (effnNparams(eFitFn)-1)/2); i++)
791 integral += fitparms[2*i]*myexp(begintimefit, fitparms[2*i+1], fitparms[2*i]);
795 /* Do numerical integration */
797 for (i = 0; (i < nfitpnts-1); i++)
799 double y0 = lmcurves[eFitFn](x[i], fitparms);
800 double y1 = lmcurves[eFitFn](x[i+1], fitparms);
801 integral += (x[i+1]-x[i])*(y1+y0)*0.5;
808 printf("FIT: Integral of fitted function: %g\n", integral);
809 if ((effnEXP5 == eFitFn) || (effnEXP7 == eFitFn) || (effnEXP9 == eFitFn))
811 printf("FIT: Note that the constant term is not taken into account when computing integral.\n");
814 /* Generate debug output */
815 if (NULL != fn_fitted)
817 fp = xvgropen(fn_fitted, "Data + Fit", "Time (ps)",
819 for (i = 0; (i < effnNparams(eFitFn)); i++)
821 fprintf(fp, "# fitparms[%d] = %g\n", i, fitparms[i]);
823 for (j = 0; (j < nfitpnts); j++)
825 real ttt = x0 ? x0[i] : dt*j;
826 fprintf(fp, "%10.5e %10.5e %10.5e\n",
827 x[j], y[j], lmcurves[eFitFn](ttt, fitparms));
841 /*! See description in header file. */
842 real fit_acf(int ncorr, int fitfn, const output_env_t oenv, gmx_bool bVerbose,
843 real tbeginfit, real tendfit, real dt, real c1[], real *fit)
846 double tStart, tail_corr, sum, sumtot = 0, c_start, ct_estimate;
848 int i, j, jmax, nf_int;
851 bPrint = bVerbose || bDebugMode();
862 nf_int = std::min(ncorr, (int)(tendfit/dt));
863 sum = print_and_integrate(debug, nf_int, dt, c1, NULL, 1);
865 /* Estimate the correlation time for better fitting */
866 ct_estimate = 0.5*c1[0];
867 for (i = 1; (i < ncorr) && (c1[i] > 0); i++)
869 ct_estimate += c1[i];
871 ct_estimate *= dt/c1[0];
875 printf("COR: Correlation time (plain integral from %6.3f to %6.3f ps) = %8.5f ps\n",
876 0.0, dt*nf_int, sum);
877 printf("COR: Relaxation times are computed as fit to an exponential:\n");
878 printf("COR: %s\n", effnDescription(fitfn));
879 printf("COR: Fit to correlation function from %6.3f ps to %6.3f ps, results in a\n", tbeginfit, std::min(ncorr*dt, tendfit));
885 printf("COR:%11s%11s%11s%11s%11s%11s%11s\n",
886 "Fit from", "Integral", "Tail Value", "Sum (ps)", " a1 (ps)",
887 (effnNparams(fitfn) >= 2) ? " a2 ()" : "",
888 (effnNparams(fitfn) >= 3) ? " a3 (ps)" : "");
901 for (j = 0; ((j < jmax) && (tStart < tendfit) && (tStart < ncorr*dt)); j++)
903 /* Estimate the correlation time for better fitting */
906 for (i = 0; (i < ncorr) && (dt*i < tStart || c1[i] > 0); i++)
913 ct_estimate = 0.5*c1[i];
918 ct_estimate += c1[i];
923 ct_estimate *= dt/c_start;
927 /* The data is strange, so we need to choose somehting */
928 ct_estimate = tendfit;
932 fprintf(debug, "tStart %g ct_estimate: %g\n", tStart, ct_estimate);
935 if (fitfn == effnEXPEXP)
937 fitparm[0] = 0.002*ncorr*dt;
939 fitparm[2] = 0.2*ncorr*dt;
943 /* Good initial guess, this increases the probability of convergence */
944 fitparm[0] = ct_estimate;
949 /* Generate more or less appropriate sigma's */
950 for (i = 0; i < ncorr; i++)
952 sig[i] = sqrt(ct_estimate+dt*i);
955 nf_int = std::min(ncorr, (int)((tStart+1e-4)/dt));
956 sum = print_and_integrate(debug, nf_int, dt, c1, NULL, 1);
957 tail_corr = do_lmfit(ncorr, c1, sig, dt, NULL, tStart, tendfit, oenv,
958 bDebugMode(), fitfn, fitparm, 0, NULL);
959 sumtot = sum+tail_corr;
960 if (fit && ((jmax == 1) || (j == 1)))
963 for (i = 0; (i < 3); i++)
967 for (i = 0; (i < ncorr); i++)
969 fit[i] = lmcurves[fitfn](i*dt, mfp);
974 printf("COR:%11.4e%11.4e%11.4e%11.4e", tStart, sum, tail_corr, sumtot);
975 for (i = 0; (i < effnNparams(fitfn)); i++)
977 printf(" %11.4e", fitparm[i]);