2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
40 * Implements routine for fitting a data set to a curve
42 * \author David van der Spoel <david.vanderspoel@icm.uu.se>
43 * \ingroup module_correlationfunctions
57 #include "gromacs/correlationfunctions/integrate.h"
58 #include "gromacs/fileio/xvgr.h"
59 #include "gromacs/math/functions.h"
60 #include "gromacs/math/vec.h"
61 #include "gromacs/utility/fatalerror.h"
62 #include "gromacs/utility/futil.h"
63 #include "gromacs/utility/gmxassert.h"
64 #include "gromacs/utility/real.h"
65 #include "gromacs/utility/smalloc.h"
67 #include "gmx_lmcurve.h"
69 /*! \brief Number of parameters for each fitting function */
70 static const int nfp_ffn[effnNR] = { 0, 1, 2, 3, 5, 7, 9, 2, 4, 3, 6 };
72 /* +2 becuase parse_common_args wants leading and concluding NULL.
73 * We only allow exponential functions as choices on the command line,
74 * hence there are many more NULL field (which have to be at the end of
77 const char *s_ffn[effnNR+2] = {
78 nullptr, "none", "exp", "aexp", "exp_exp",
79 "exp5", "exp7", "exp9",
80 nullptr, nullptr, nullptr, nullptr, nullptr
83 /*! \brief Long description for each fitting function type */
84 static const char *longs_ffn[effnNR] = {
87 "y = a1 exp(-x/|a0|)",
88 "y = a1 exp(-x/|a0|) + (1-a1) exp(-x/(|a2|)), a2 > a0 > 0",
89 "y = a0 exp(-x/|a1|) + a2 exp(-x/|a3|) + a4, a3 >= a1",
90 "y = a0 exp(-x/|a1|) + a2 exp(-x/|a3|) + a4 exp(-x/|a5|) + a6, a5 >= a3 >= a1",
91 "y = a0 exp(-x/|a1|) + a2 exp(-x/|a3|) + a4 exp(-x/|a5|) + a6 exp(-x/|a7|) + a8, a7 >= a5 >= a3 >= a1",
92 "y = exp(-v) (cosh(wv) + 1/w sinh(wv)), v = x/(2 a0), w = sqrt(1 - a1)",
93 "y = 1/2*(a0+a1) - 1/2*(a0-a1)*erf( (x-a2) /a3^2)",
94 "y = a1 *2*a0*((exp(-x/a0)-1)*(a0/x)+1)+(1-a1)*2*a2*((exp(-x/a2)-1)*(a2/x)+1)",
95 "y = (1-a0)*cos(x*a1)*exp(-(x/a2)^a3) + a0*exp(-(x/a4)^a5)"
98 int effnNparams(int effn)
100 if ((0 <= effn) && (effn < effnNR))
102 return nfp_ffn[effn];
110 const char *effnDescription(int effn)
112 if ((0 <= effn) && (effn < effnNR))
114 return longs_ffn[effn];
122 int sffn2effn(const char **sffn)
127 for (i = 0; i < effnNR; i++)
129 if (sffn[i+1] && strcmp(sffn[0], sffn[i+1]) == 0)
138 /*! \brief Compute exponential function A exp(-x/tau) */
139 static double myexp(double x, double A, double tau)
141 if ((A == 0) || (tau == 0))
145 return A*exp(-x/tau);
148 /*! \brief Compute y=(a0+a1)/2-(a0-a1)/2*erf((x-a2)/a3^2) */
149 static double lmc_erffit (double x, const double *a)
156 erfarg = (x-a[2])/(a[3]*a[3]);
157 myerf = std::erf(erfarg);
161 /* If a[3] == 0, a[3]^2 = 0 and the erfarg becomes +/- infinity */
171 return 0.5*((a[0]+a[1]) - (a[0]-a[1])*myerf);
174 /*! \brief Exponent function that prevents overflow */
175 static double safe_exp(double x)
177 double exp_max = 200;
178 double exp_min = -exp_max;
183 else if (x >= exp_max)
193 /*! \brief Exponent minus 1 function that prevents overflow */
194 static double safe_expm1(double x)
196 double exp_max = 200;
197 double exp_min = -exp_max;
202 else if (x >= exp_max)
208 return std::expm1(x);
212 /*! \brief Compute y = exp(-x/|a0|) */
213 static double lmc_exp_one_parm(double x, const double *a)
215 return safe_exp(-x/fabs(a[0]));
218 /*! \brief Compute y = a1 exp(-x/|a0|) */
219 static double lmc_exp_two_parm(double x, const double *a)
221 return a[1]*safe_exp(-x/fabs(a[0]));
224 /*! \brief Compute y = a1 exp(-x/|a0|) + (1-a1) exp(-x/|a2|) */
225 static double lmc_exp_exp(double x, const double *a)
229 e1 = safe_exp(-x/fabs(a[0]));
230 e2 = safe_exp(-x/(fabs(a[0])+fabs(a[2])));
231 return a[1]*e1 + (1-a[1])*e2;
234 /*! \brief Compute y = a0 exp(-x/|a1|) + a2 exp(-x/(|a1|+|a3|)) + a4 */
235 static double lmc_exp_5_parm(double x, const double *a)
239 e1 = safe_exp(-x/fabs(a[1]));
240 e2 = safe_exp(-x/(fabs(a[1])+fabs(a[3])));
241 return a[0]*e1 + a[2]*e2 + a[4];
244 /*! \brief Compute 7 parameter exponential function value.
246 * Compute y = a0 exp(-x/|a1|) + a2 exp(-x/(|a1|+|a3|)) +
247 * a4 exp(-x/(|a1|+|a3|+|a5|)) + a6
249 static double lmc_exp_7_parm(double x, const double *a)
252 double fa1, fa3, fa5;
255 fa3 = fa1 + fabs(a[3]);
256 fa5 = fa3 + fabs(a[5]);
257 e1 = safe_exp(-x/fa1);
258 e2 = safe_exp(-x/fa3);
259 e3 = safe_exp(-x/fa5);
260 return a[0]*e1 + a[2]*e2 + a[4]*e3 + a[6];
263 /*! \brief Compute 9 parameter exponential function value.
265 * Compute y = a0 exp(-x/|a1|) + a2 exp(-x/(|a1|+|a3|)) +
266 * a4 exp(-x/(|a1|+|a3|+|a5|)) + a6 exp(-x/(|a1|+|a3|+|a5|+|a7|)) + a8
268 static double lmc_exp_9_parm(double x, const double *a)
270 double e1, e2, e3, e4;
271 double fa1, fa3, fa5, fa7;
274 fa3 = fa1 + fabs(a[3]);
275 fa5 = fa3 + fabs(a[5]);
276 fa7 = fa5 + fabs(a[7]);
278 e1 = safe_exp(-x/fa1);
279 e2 = safe_exp(-x/fa3);
280 e3 = safe_exp(-x/fa5);
281 e4 = safe_exp(-x/fa7);
282 return a[0]*e1 + a[2]*e2 + a[4]*e3 + a[6]*e4 + a[8];
285 /*! \brief Compute y = (1-a0)*exp(-(x/|a2|)^|a3|)*cos(x*|a1|) + a0*exp(-(x/|a4|)^|a5|) */
286 static double lmc_pres_6_parm(double x, const double *a)
288 double term1, term2, term3;
292 if ((a[4] != 0) && (a[0] != 0))
294 double power = std::min(fabs(a[5]), pow_max);
295 term3 = a[0] * safe_exp(-pow((x/fabs(a[4])), power));
300 if ((term1 != 0) && (a[2] != 0))
302 double power = std::min(fabs(a[3]), pow_max);
303 term2 = safe_exp(-pow((x/fabs(a[2])), power)) * cos(x*fabs(a[1]));
306 return term1*term2 + term3;
309 /*! \brief Compute vac function */
310 static double lmc_vac_2_parm(double x, const double *a)
314 * y = 1/2 (1 - 1/w) exp(-(1+w)v) + 1/2 (1 + 1/w) exp(-(1-w)v)
316 * = exp(-v) (cosh(wv) + 1/w sinh(wv))
321 * For tranverse current autocorrelation functions:
323 * a1 = 4 tau (eta/rho) k^2
327 double y, v, det, omega, wv, em, ec, es;
330 v = x/(2*fabs(a[0]));
335 omega = sqrt(fabs(det));
336 wv = std::min(omega*v, wv_max);
340 ec = em*0.5*(safe_exp(wv)+safe_exp(-wv));
341 es = em*0.5*(safe_exp(wv)-safe_exp(-wv))/omega;
346 es = em*sin(wv)/omega;
357 /*! \brief Compute error estimate */
358 static double lmc_errest_3_parm(double x, const double *a)
360 double e1, e2, v1, v2;
361 double fa0 = fabs(a[0]);
363 double fa2 = fa0+fabs(a[2]);
367 e1 = safe_expm1(-x/fa0);
375 e2 = safe_expm1(-x/fa2);
384 v1 = 2*fa0*(e1*fa0/x + 1);
385 v2 = 2*fa2*(e2*fa2/x + 1);
386 /* We need 0 <= a1 <= 1 */
387 fa1 = std::min(1.0, std::max(0.0, a[1]));
389 return fa1*v1 + (1-fa1)*v2;
397 /*! \brief function type for passing to fitting routine */
398 typedef double (*t_lmcurve)(double x, const double *a);
400 /*! \brief array of fitting functions corresponding to the pre-defined types */
401 t_lmcurve lmcurves[effnNR+1] = {
402 lmc_exp_one_parm, lmc_exp_one_parm, lmc_exp_two_parm,
403 lmc_exp_exp, lmc_exp_5_parm, lmc_exp_7_parm,
405 lmc_vac_2_parm, lmc_erffit, lmc_errest_3_parm, lmc_pres_6_parm
408 double fit_function(const int eFitFn, const double parm[], const double x)
410 if ((eFitFn < 0) || (eFitFn >= effnNR))
412 fprintf(stderr, "fitfn = %d, should be in the range 0..%d\n",
416 return lmcurves[eFitFn](x, parm);
419 /*! \brief lmfit_exp supports fitting of different functions
421 * This routine calls the Levenberg-Marquardt non-linear fitting
422 * routine for fitting a data set with errors to a target function.
423 * Fitting routines included in gromacs in src/external/lmfit.
425 static gmx_bool lmfit_exp(int nfit,
434 double chisq, ochisq;
438 lm_control_struct control;
439 lm_status_struct *status;
440 int nparam = effnNparams(eFitFn);
444 if ((eFitFn < 0) || (eFitFn >= effnNR))
446 fprintf(stderr, "fitfn = %d, should be in the range 0..%d\n",
450 /* Using default control structure for double precision fitting that
451 * comes with the lmfit package (i.e. from the include file).
453 control = lm_control_double;
454 control.verbosity = (bVerbose ? 1 : 0);
455 control.n_maxpri = 0;
456 control.m_maxpri = 0;
464 printf("%4s %10s Parameters\n", "Step", "chi^2");
466 /* Check whether we have to skip some params */
471 p2 = 1 << (nparam-1);
472 bSkipLast = ((p2 & nfix) == p2);
479 while ((nparam > 0) && (bSkipLast));
482 printf("Using %d out of %d parameters\n", nparam, effnNparams(eFitFn));
488 gmx_lmcurve(nparam, parm, nfit, x, y, dy,
489 lmcurves[eFitFn], &control, status);
490 chisq = gmx::square(status->fnorm);
493 printf("status: fnorm = %g, nfev = %d, userbreak = %d\noutcome = %s\n",
494 status->fnorm, status->nfev, status->userbreak,
495 lm_infmsg[status->outcome]);
500 printf("%4d %8g", j, chisq);
501 for (mmm = 0; (mmm < effnNparams(eFitFn)); mmm++)
503 printf(" %8g", parm[mmm]);
508 bCont = (fabs(ochisq - chisq) > fabs(control.ftol*chisq));
510 while (bCont && (j < maxiter));
517 /*! \brief Ensure the fitting parameters are well-behaved.
519 * In order to make sure that fitting behaves according to the
520 * constraint that time constants are positive and increasing
521 * we enforce this here by setting all time constants to their
522 * absolute value and by adding e.g. |a_0| to |a_2|. This is
523 * done in conjunction with the fitting functions themselves.
524 * When there are multiple time constants we make sure that
525 * the successive time constants are at least double the
526 * previous ones and during fitting we enforce the they remain larger.
527 * It may very well help the convergence of the fitting routine.
529 static void initiate_fit_params(int eFitFn,
534 nparm = effnNparams(eFitFn);
539 GMX_ASSERT(params[0] >= 0, "parameters should be >= 0");
544 GMX_ASSERT(params[0] >= 0, "parameters should be >= 0");
547 GMX_ASSERT(params[2] >= 0, "parameters should be >= 0");
548 /* In order to maintain params[2] >= params[0] in the final
549 * result, we fit the difference between the two, that is
550 * params[2]-params[0] and in the add add in params[0]
553 params[2] = std::max(fabs(params[2])-params[0], params[0]);
559 GMX_ASSERT(params[1] >= 0, "parameters should be >= 0");
560 params[1] = fabs(params[1]);
563 GMX_ASSERT(params[3] >= 0, "parameters should be >= 0");
564 /* See comment under effnEXPEXP */
565 params[3] = std::max(fabs(params[3])-params[1], params[1]);
568 GMX_ASSERT(params[5] >= 0, "parameters should be >= 0");
569 /* See comment under effnEXPEXP */
570 params[5] = std::max(fabs(params[5])-params[3], params[3]);
573 GMX_ASSERT(params[7] >= 0, "parameters should be >= 0");
574 /* See comment under effnEXPEXP */
575 params[7] = std::max(fabs(params[7])-params[5], params[5]);
581 GMX_ASSERT(params[0] >= 0, "parameters should be >= 0");
582 GMX_ASSERT(params[1] >= 0 && params[1] <= 1, "parameter 1 should in 0 .. 1");
583 GMX_ASSERT(params[2] >= 0, "parameters should be >= 0");
584 /* See comment under effnEXPEXP */
585 params[2] = fabs(params[2])-params[0];
588 for (i = 1; (i < nparm); i++)
590 GMX_ASSERT(params[i] >= 0, "parameters should be >= 0");
598 /*! \brief Process the fitting parameters to get output parameters.
600 * See comment at the previous function.
602 static void extract_fit_params(int eFitFn,
607 nparm = effnNparams(eFitFn);
612 params[0] = fabs(params[0]);
617 params[0] = fabs(params[0]);
620 /* Back conversion of parameters from the fitted difference
621 * to the absolute value.
623 params[2] = fabs(params[2])+params[0];
629 params[1] = fabs(params[1]);
632 /* See comment under effnEXPEXP */
633 params[3] = fabs(params[3])+params[1];
636 /* See comment under effnEXPEXP */
637 params[5] = fabs(params[5])+params[3];
640 /* See comment under effnEXPEXP */
641 params[7] = fabs(params[7])+params[5];
647 params[0] = fabs(params[0]);
652 else if (params[1] > 1)
656 /* See comment under effnEXPEXP */
657 params[2] = params[0]+fabs(params[2]);
660 for (i = 1; (i < nparm); i++)
662 params[i] = fabs(params[i]);
670 /*! \brief Print chi-squared value and the parameters */
671 static void print_chi2_params(FILE *fp,
673 const double fitparms[],
682 for (i = 0; (i < nfitpnts); i++)
684 double yfit = lmcurves[eFitFn](x[i], fitparms);
685 chi2 += gmx::square(y[i] - yfit);
687 fprintf(fp, "There are %d data points, %d parameters, %s chi2 = %g\nparams:",
688 nfitpnts, effnNparams(eFitFn), label, chi2);
689 for (i = 0; (i < effnNparams(eFitFn)); i++)
691 fprintf(fp, " %10g", fitparms[i]);
696 real do_lmfit(int ndata, real c1[], real sig[], real dt, real *x0,
697 real begintimefit, real endtimefit, const gmx_output_env_t *oenv,
698 gmx_bool bVerbose, int eFitFn, double fitparms[], int fix,
699 const char *fn_fitted)
703 double integral, ttt;
708 fprintf(stderr, "Using fixed parameters in curve fitting is temporarily not working.\n");
712 fprintf(debug, "There are %d points to fit %d vars!\n", ndata, effnNparams(eFitFn));
713 fprintf(debug, "Fit to function %d from %g through %g, dt=%g\n",
714 eFitFn, begintimefit, endtimefit, dt);
721 for (i = 0; i < ndata; i++)
723 ttt = x0 ? x0[i] : dt*i;
724 if (ttt >= begintimefit && ttt <= endtimefit)
730 // No weighting if all values are divided by 1.
735 dy[j] = std::max(1.0e-7, (double)sig[i]);
739 fprintf(debug, "j= %d, i= %d, x= %g, y= %g, dy=%g, ttt=%g\n",
740 j, i, x[j], y[j], dy[j], ttt);
747 if (nfitpnts < effnNparams(eFitFn))
749 fprintf(stderr, "Not enough (%d) data points for fitting, dt = %g!\n",
758 print_chi2_params(stdout, eFitFn, fitparms, "initial", nfitpnts, x, y);
760 initiate_fit_params(eFitFn, fitparms);
762 bSuccess = lmfit_exp(nfitpnts, x, y, dy, fitparms, bVerbose, eFitFn, fix);
763 extract_fit_params(eFitFn, fitparms);
767 fprintf(stderr, "Fit failed!\n");
773 print_chi2_params(stdout, eFitFn, fitparms, "final", nfitpnts, x, y);
778 integral = fitparms[0]*myexp(begintimefit, 1, fitparms[0]);
781 integral = fitparms[0]*myexp(begintimefit, fitparms[1], fitparms[0]);
784 integral = (fitparms[0]*myexp(begintimefit, fitparms[1], fitparms[0]) +
785 fitparms[2]*myexp(begintimefit, 1-fitparms[1], fitparms[2]));
791 for (i = 0; (i < (effnNparams(eFitFn)-1)/2); i++)
793 integral += fitparms[2*i]*myexp(begintimefit, fitparms[2*i+1], fitparms[2*i]);
797 /* Do numerical integration */
799 for (i = 0; (i < nfitpnts-1); i++)
801 double y0 = lmcurves[eFitFn](x[i], fitparms);
802 double y1 = lmcurves[eFitFn](x[i+1], fitparms);
803 integral += (x[i+1]-x[i])*(y1+y0)*0.5;
810 printf("FIT: Integral of fitted function: %g\n", integral);
811 if ((effnEXP5 == eFitFn) || (effnEXP7 == eFitFn) || (effnEXP9 == eFitFn))
813 printf("FIT: Note that the constant term is not taken into account when computing integral.\n");
816 /* Generate debug output */
817 if (nullptr != fn_fitted)
819 fp = xvgropen(fn_fitted, "Data + Fit", "Time (ps)",
821 for (i = 0; (i < effnNparams(eFitFn)); i++)
823 fprintf(fp, "# fitparms[%d] = %g\n", i, fitparms[i]);
825 for (j = 0; (j < nfitpnts); j++)
827 real ttt = x0 ? x0[j] : dt*j;
828 fprintf(fp, "%10.5e %10.5e %10.5e\n",
829 x[j], y[j], (lmcurves[eFitFn])(ttt, fitparms));
843 real fit_acf(int ncorr, int fitfn, const gmx_output_env_t *oenv, gmx_bool bVerbose,
844 real tbeginfit, real tendfit, real dt, real c1[], real *fit)
847 double tStart, tail_corr, sum, sumtot = 0, c_start, ct_estimate;
849 int i, j, jmax, nf_int;
852 bPrint = bVerbose || bDebugMode();
863 nf_int = std::min(ncorr, (int)(tendfit/dt));
864 sum = print_and_integrate(debug, nf_int, dt, c1, nullptr, 1);
868 printf("COR: Correlation time (plain integral from %6.3f to %6.3f ps) = %8.5f ps\n",
869 0.0, dt*nf_int, sum);
870 printf("COR: Relaxation times are computed as fit to an exponential:\n");
871 printf("COR: %s\n", effnDescription(fitfn));
872 printf("COR: Fit to correlation function from %6.3f ps to %6.3f ps, results in a\n", tbeginfit, std::min(ncorr*dt, tendfit));
878 printf("COR:%11s%11s%11s%11s%11s%11s%11s\n",
879 "Fit from", "Integral", "Tail Value", "Sum (ps)", " a1 (ps)",
880 (effnNparams(fitfn) >= 2) ? " a2 ()" : "",
881 (effnNparams(fitfn) >= 3) ? " a3 (ps)" : "");
894 for (j = 0; ((j < jmax) && (tStart < tendfit) && (tStart < ncorr*dt)); j++)
896 /* Estimate the correlation time for better fitting */
899 for (i = 0; (i < ncorr) && (dt*i < tStart || c1[i] > 0); i++)
906 ct_estimate = 0.5*c1[i];
911 ct_estimate += c1[i];
916 ct_estimate *= dt/c_start;
920 /* The data is strange, so we need to choose somehting */
921 ct_estimate = tendfit;
925 fprintf(debug, "tStart %g ct_estimate: %g\n", tStart, ct_estimate);
928 if (fitfn == effnEXPEXP)
930 fitparm[0] = 0.002*ncorr*dt;
932 fitparm[2] = 0.2*ncorr*dt;
936 /* Good initial guess, this increases the probability of convergence */
937 fitparm[0] = ct_estimate;
942 /* Generate more or less appropriate sigma's */
943 for (i = 0; i < ncorr; i++)
945 sig[i] = sqrt(ct_estimate+dt*i);
948 nf_int = std::min(ncorr, (int)((tStart+1e-4)/dt));
949 sum = print_and_integrate(debug, nf_int, dt, c1, nullptr, 1);
950 tail_corr = do_lmfit(ncorr, c1, sig, dt, nullptr, tStart, tendfit, oenv,
951 bDebugMode(), fitfn, fitparm, 0, nullptr);
952 sumtot = sum+tail_corr;
953 if (fit && ((jmax == 1) || (j == 1)))
956 for (i = 0; (i < 3); i++)
960 for (i = 0; (i < ncorr); i++)
962 fit[i] = lmcurves[fitfn](i*dt, mfp);
967 printf("COR:%11.4e%11.4e%11.4e%11.4e", tStart, sum, tail_corr, sumtot);
968 for (i = 0; (i < effnNparams(fitfn)); i++)
970 printf(" %11.4e", fitparm[i]);