2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013-2019,2020, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
40 * Implements routine for fitting a data set to a curve
42 * \author David van der Spoel <david.vanderspoel@icm.uu.se>
43 * \ingroup module_correlationfunctions
53 #include "gromacs/correlationfunctions/integrate.h"
54 #include "gromacs/fileio/xvgr.h"
55 #include "gromacs/math/vec.h"
56 #include "gromacs/utility/fatalerror.h"
57 #include "gromacs/utility/futil.h"
58 #include "gromacs/utility/gmxassert.h"
59 #include "gromacs/utility/real.h"
60 #include "gromacs/utility/smalloc.h"
62 #include "gmx_lmcurve.h"
64 /*! \brief Number of parameters for each fitting function */
65 static const int nfp_ffn[effnNR] = { 0, 1, 2, 3, 5, 7, 9, 2, 4, 3, 6 };
67 /* +2 becuase parse_common_args wants leading and concluding NULL.
68 * We only allow exponential functions as choices on the command line,
69 * hence there are many more NULL field (which have to be at the end of
72 const char* s_ffn[effnNR + 2] = { nullptr, "none", "exp", "aexp", "exp_exp", "exp5", "exp7",
73 "exp9", nullptr, nullptr, nullptr, nullptr, nullptr };
76 // needed because clang-format wants to break the lines below
77 /*! \brief Long description for each fitting function type */
78 static const char* longs_ffn[effnNR]
81 "y = a1 exp(-x/|a0|)",
82 "y = a1 exp(-x/|a0|) + (1-a1) exp(-x/(|a2|)), a2 > a0 > 0",
83 "y = a0 exp(-x/|a1|) + a2 exp(-x/|a3|) + a4, a3 >= a1",
84 "y = a0 exp(-x/|a1|) + a2 exp(-x/|a3|) + a4 exp(-x/|a5|) + a6, a5 >= a3 >= a1",
85 "y = a0 exp(-x/|a1|) + a2 exp(-x/|a3|) + a4 exp(-x/|a5|) + a6 exp(-x/|a7|) + a8, a7 >= a5 >= a3 >= a1",
86 "y = exp(-v) (cosh(wv) + 1/w sinh(wv)), v = x/(2 a0), w = sqrt(1 - a1)",
87 "y = 1/2*(a0+a1) - 1/2*(a0-a1)*erf( (x-a2) /a3^2)",
88 "y = a1 *2*a0*((exp(-x/a0)-1)*(a0/x)+1)+(1-a1)*2*a2*((exp(-x/a2)-1)*(a2/x)+1)",
89 "y = (1-a0)*cos(x*a1)*exp(-(x/a2)^a3) + a0*exp(-(x/a4)^a5)" };
92 int effnNparams(int effn)
94 if ((0 <= effn) && (effn < effnNR))
104 const char* effnDescription(int effn)
106 if ((0 <= effn) && (effn < effnNR))
108 return longs_ffn[effn];
116 int sffn2effn(const char** sffn)
121 for (i = 0; i < effnNR; i++)
123 if (sffn[i + 1] && strcmp(sffn[0], sffn[i + 1]) == 0)
132 /*! \brief Compute exponential function A exp(-x/tau) */
133 static double myexp(double x, double A, double tau)
135 if ((A == 0) || (tau == 0))
139 return A * exp(-x / tau);
142 /*! \brief Compute y=(a0+a1)/2-(a0-a1)/2*erf((x-a2)/a3^2) */
143 static double lmc_erffit(double x, const double* a)
150 erfarg = (x - a[2]) / (a[3] * a[3]);
151 myerf = std::erf(erfarg);
155 /* If a[3] == 0, a[3]^2 = 0 and the erfarg becomes +/- infinity */
165 return 0.5 * ((a[0] + a[1]) - (a[0] - a[1]) * myerf);
168 /*! \brief Exponent function that prevents overflow */
169 static double safe_exp(double x)
171 double exp_max = 200;
172 double exp_min = -exp_max;
177 else if (x >= exp_max)
187 /*! \brief Exponent minus 1 function that prevents overflow */
188 static double safe_expm1(double x)
190 double exp_max = 200;
191 double exp_min = -exp_max;
196 else if (x >= exp_max)
202 return std::expm1(x);
206 /*! \brief Compute y = exp(-x/|a0|) */
207 static double lmc_exp_one_parm(double x, const double* a)
209 return safe_exp(-x / fabs(a[0]));
212 /*! \brief Compute y = a1 exp(-x/|a0|) */
213 static double lmc_exp_two_parm(double x, const double* a)
215 return a[1] * safe_exp(-x / fabs(a[0]));
218 /*! \brief Compute y = a1 exp(-x/|a0|) + (1-a1) exp(-x/|a2|) */
219 static double lmc_exp_exp(double x, const double* a)
223 e1 = safe_exp(-x / fabs(a[0]));
224 e2 = safe_exp(-x / (fabs(a[0]) + fabs(a[2])));
225 return a[1] * e1 + (1 - a[1]) * e2;
228 /*! \brief Compute y = a0 exp(-x/|a1|) + a2 exp(-x/(|a1|+|a3|)) + a4 */
229 static double lmc_exp_5_parm(double x, const double* a)
233 e1 = safe_exp(-x / fabs(a[1]));
234 e2 = safe_exp(-x / (fabs(a[1]) + fabs(a[3])));
235 return a[0] * e1 + a[2] * e2 + a[4];
238 /*! \brief Compute 7 parameter exponential function value.
240 * Compute y = a0 exp(-x/|a1|) + a2 exp(-x/(|a1|+|a3|)) +
241 * a4 exp(-x/(|a1|+|a3|+|a5|)) + a6
243 static double lmc_exp_7_parm(double x, const double* a)
246 double fa1, fa3, fa5;
249 fa3 = fa1 + fabs(a[3]);
250 fa5 = fa3 + fabs(a[5]);
251 e1 = safe_exp(-x / fa1);
252 e2 = safe_exp(-x / fa3);
253 e3 = safe_exp(-x / fa5);
254 return a[0] * e1 + a[2] * e2 + a[4] * e3 + a[6];
257 /*! \brief Compute 9 parameter exponential function value.
259 * Compute y = a0 exp(-x/|a1|) + a2 exp(-x/(|a1|+|a3|)) +
260 * a4 exp(-x/(|a1|+|a3|+|a5|)) + a6 exp(-x/(|a1|+|a3|+|a5|+|a7|)) + a8
262 static double lmc_exp_9_parm(double x, const double* a)
264 double e1, e2, e3, e4;
265 double fa1, fa3, fa5, fa7;
268 fa3 = fa1 + fabs(a[3]);
269 fa5 = fa3 + fabs(a[5]);
270 fa7 = fa5 + fabs(a[7]);
272 e1 = safe_exp(-x / fa1);
273 e2 = safe_exp(-x / fa3);
274 e3 = safe_exp(-x / fa5);
275 e4 = safe_exp(-x / fa7);
276 return a[0] * e1 + a[2] * e2 + a[4] * e3 + a[6] * e4 + a[8];
279 /*! \brief Compute y = (1-a0)*exp(-(x/|a2|)^|a3|)*cos(x*|a1|) + a0*exp(-(x/|a4|)^|a5|) */
280 static double lmc_pres_6_parm(double x, const double* a)
282 double term1, term2, term3;
286 if ((a[4] != 0) && (a[0] != 0))
288 double power = std::min(fabs(a[5]), pow_max);
289 term3 = a[0] * safe_exp(-pow((x / fabs(a[4])), power));
294 if ((term1 != 0) && (a[2] != 0))
296 double power = std::min(fabs(a[3]), pow_max);
297 term2 = safe_exp(-pow((x / fabs(a[2])), power)) * cos(x * fabs(a[1]));
300 return term1 * term2 + term3;
303 /*! \brief Compute vac function */
304 static double lmc_vac_2_parm(double x, const double* a)
308 * y = 1/2 (1 - 1/w) exp(-(1+w)v) + 1/2 (1 + 1/w) exp(-(1-w)v)
310 * = exp(-v) (cosh(wv) + 1/w sinh(wv))
315 * For tranverse current autocorrelation functions:
317 * a1 = 4 tau (eta/rho) k^2
321 double y, v, det, omega, wv, em, ec, es;
324 v = x / (2 * fabs(a[0]));
329 omega = sqrt(fabs(det));
330 wv = std::min(omega * v, wv_max);
334 ec = em * 0.5 * (safe_exp(wv) + safe_exp(-wv));
335 es = em * 0.5 * (safe_exp(wv) - safe_exp(-wv)) / omega;
340 es = em * sin(wv) / omega;
351 /*! \brief Compute error estimate */
352 static double lmc_errest_3_parm(double x, const double* a)
354 double e1, e2, v1, v2;
355 double fa0 = fabs(a[0]);
357 double fa2 = fa0 + fabs(a[2]);
361 e1 = safe_expm1(-x / fa0);
369 e2 = safe_expm1(-x / fa2);
378 v1 = 2 * fa0 * (e1 * fa0 / x + 1);
379 v2 = 2 * fa2 * (e2 * fa2 / x + 1);
380 /* We need 0 <= a1 <= 1 */
381 fa1 = std::min(1.0, std::max(0.0, a[1]));
383 return fa1 * v1 + (1 - fa1) * v2;
391 /*! \brief array of fitting functions corresponding to the pre-defined types */
392 t_lmcurve lmcurves[effnNR + 1] = { lmc_exp_one_parm, lmc_exp_one_parm, lmc_exp_two_parm,
393 lmc_exp_exp, lmc_exp_5_parm, lmc_exp_7_parm,
394 lmc_exp_9_parm, lmc_vac_2_parm, lmc_erffit,
395 lmc_errest_3_parm, lmc_pres_6_parm };
397 double fit_function(const int eFitFn, const double parm[], const double x)
399 if ((eFitFn < 0) || (eFitFn >= effnNR))
401 fprintf(stderr, "fitfn = %d, should be in the range 0..%d\n", eFitFn, effnNR - 1);
404 return lmcurves[eFitFn](x, parm);
407 /*! \brief Ensure the fitting parameters are well-behaved.
409 * In order to make sure that fitting behaves according to the
410 * constraint that time constants are positive and increasing
411 * we enforce this here by setting all time constants to their
412 * absolute value and by adding e.g. |a_0| to |a_2|. This is
413 * done in conjunction with the fitting functions themselves.
414 * When there are multiple time constants we make sure that
415 * the successive time constants are at least double the
416 * previous ones and during fitting we enforce the they remain larger.
417 * It may very well help the convergence of the fitting routine.
419 static void initiate_fit_params(int eFitFn, double params[])
423 nparm = effnNparams(eFitFn);
427 case effnVAC: GMX_ASSERT(params[0] >= 0, "parameters should be >= 0"); break;
431 GMX_ASSERT(params[0] >= 0, "parameters should be >= 0");
434 GMX_ASSERT(params[2] >= 0, "parameters should be >= 0");
435 /* In order to maintain params[2] >= params[0] in the final
436 * result, we fit the difference between the two, that is
437 * params[2]-params[0] and in the add add in params[0]
440 params[2] = std::max(fabs(params[2]) - params[0], params[0]);
446 GMX_ASSERT(params[1] >= 0, "parameters should be >= 0");
447 params[1] = fabs(params[1]);
450 GMX_ASSERT(params[3] >= 0, "parameters should be >= 0");
451 /* See comment under effnEXPEXP */
452 params[3] = std::max(fabs(params[3]) - params[1], params[1]);
455 GMX_ASSERT(params[5] >= 0, "parameters should be >= 0");
456 /* See comment under effnEXPEXP */
457 params[5] = std::max(fabs(params[5]) - params[3], params[3]);
460 GMX_ASSERT(params[7] >= 0, "parameters should be >= 0");
461 /* See comment under effnEXPEXP */
462 params[7] = std::max(fabs(params[7]) - params[5], params[5]);
468 GMX_ASSERT(params[0] >= 0, "parameters should be >= 0");
469 GMX_ASSERT(params[1] >= 0 && params[1] <= 1, "parameter 1 should in 0 .. 1");
470 GMX_ASSERT(params[2] >= 0, "parameters should be >= 0");
471 /* See comment under effnEXPEXP */
472 params[2] = fabs(params[2]) - params[0];
475 for (i = 1; (i < nparm); i++)
477 GMX_ASSERT(params[i] >= 0, "parameters should be >= 0");
484 /*! \brief Process the fitting parameters to get output parameters.
486 * See comment at the previous function.
488 static void extract_fit_params(int eFitFn, double params[])
492 nparm = effnNparams(eFitFn);
496 case effnVAC: params[0] = fabs(params[0]); break;
500 params[0] = fabs(params[0]);
503 /* Back conversion of parameters from the fitted difference
504 * to the absolute value.
506 params[2] = fabs(params[2]) + params[0];
512 params[1] = fabs(params[1]);
515 /* See comment under effnEXPEXP */
516 params[3] = fabs(params[3]) + params[1];
519 /* See comment under effnEXPEXP */
520 params[5] = fabs(params[5]) + params[3];
523 /* See comment under effnEXPEXP */
524 params[7] = fabs(params[7]) + params[5];
530 params[0] = fabs(params[0]);
535 else if (params[1] > 1)
539 /* See comment under effnEXPEXP */
540 params[2] = params[0] + fabs(params[2]);
543 for (i = 1; (i < nparm); i++)
545 params[i] = fabs(params[i]);
552 /*! \brief Print chi-squared value and the parameters */
553 static void print_chi2_params(FILE* fp,
555 const double fitparms[],
564 for (i = 0; (i < nfitpnts); i++)
566 double yfit = lmcurves[eFitFn](x[i], fitparms);
567 chi2 += gmx::square(y[i] - yfit);
570 "There are %d data points, %d parameters, %s chi2 = %g\nparams:",
575 for (i = 0; (i < effnNparams(eFitFn)); i++)
577 fprintf(fp, " %10g", fitparms[i]);
582 real do_lmfit(int ndata,
589 const gmx_output_env_t* oenv,
594 const char* fn_fitted)
598 double integral, ttt;
603 fprintf(stderr, "Using fixed parameters in curve fitting is temporarily not working.\n");
607 fprintf(debug, "There are %d points to fit %d vars!\n", ndata, effnNparams(eFitFn));
608 fprintf(debug, "Fit to function %d from %g through %g, dt=%g\n", eFitFn, begintimefit, endtimefit, dt);
615 for (i = 0; i < ndata; i++)
617 ttt = x0 ? x0[i] : dt * i;
618 if (ttt >= begintimefit && ttt <= endtimefit)
624 // No weighting if all values are divided by 1.
629 dy[j] = std::max(1.0e-7, static_cast<double>(sig[i]));
633 fprintf(debug, "j= %d, i= %d, x= %g, y= %g, dy=%g, ttt=%g\n", j, i, x[j], y[j], dy[j], ttt);
640 if (nfitpnts < effnNparams(eFitFn))
642 fprintf(stderr, "Not enough (%d) data points for fitting, dt = %g!\n", nfitpnts, dt);
650 print_chi2_params(stdout, eFitFn, fitparms, "initial", nfitpnts, x, y);
652 initiate_fit_params(eFitFn, fitparms);
654 bSuccess = lmfit_exp(nfitpnts, x, y, dy, fitparms, bVerbose, eFitFn, fix);
655 extract_fit_params(eFitFn, fitparms);
659 fprintf(stderr, "Fit failed!\n");
665 print_chi2_params(stdout, eFitFn, fitparms, "final", nfitpnts, x, y);
669 case effnEXP1: integral = fitparms[0] * myexp(begintimefit, 1, fitparms[0]); break;
671 integral = fitparms[0] * myexp(begintimefit, fitparms[1], fitparms[0]);
674 integral = (fitparms[0] * myexp(begintimefit, fitparms[1], fitparms[0])
675 + fitparms[2] * myexp(begintimefit, 1 - fitparms[1], fitparms[2]));
681 for (i = 0; (i < (effnNparams(eFitFn) - 1) / 2); i++)
683 integral += fitparms[2 * i]
684 * myexp(begintimefit, fitparms[2 * i + 1], fitparms[2 * i]);
688 /* Do numerical integration */
690 for (i = 0; (i < nfitpnts - 1); i++)
692 double y0 = lmcurves[eFitFn](x[i], fitparms);
693 double y1 = lmcurves[eFitFn](x[i + 1], fitparms);
694 integral += (x[i + 1] - x[i]) * (y1 + y0) * 0.5;
701 printf("FIT: Integral of fitted function: %g\n", integral);
702 if ((effnEXP5 == eFitFn) || (effnEXP7 == eFitFn) || (effnEXP9 == eFitFn))
704 printf("FIT: Note that the constant term is not taken into account when "
705 "computing integral.\n");
708 /* Generate debug output */
709 if (nullptr != fn_fitted)
711 fp = xvgropen(fn_fitted, "Data + Fit", "Time (ps)", "Data (t)", oenv);
712 for (i = 0; (i < effnNparams(eFitFn)); i++)
714 fprintf(fp, "# fitparms[%d] = %g\n", i, fitparms[i]);
716 for (j = 0; (j < nfitpnts); j++)
718 real ttt = x0 ? x0[j] : dt * j;
719 fprintf(fp, "%10.5e %10.5e %10.5e\n", x[j], y[j], (lmcurves[eFitFn])(ttt, fitparms));
733 real fit_acf(int ncorr,
735 const gmx_output_env_t* oenv,
744 double tStart, tail_corr, sum, sumtot = 0, c_start, ct_estimate;
746 int i, j, jmax, nf_int;
749 GMX_ASSERT(effnNparams(fitfn) < static_cast<int>(sizeof(fitparm) / sizeof(double)),
750 "Fitting function with more than 3 parameters not supported!");
751 bPrint = bVerbose || bDebugMode();
760 tendfit = ncorr * dt;
762 nf_int = std::min(ncorr, static_cast<int>(tendfit / dt));
763 sum = print_and_integrate(debug, nf_int, dt, c1, nullptr, 1);
767 printf("COR: Correlation time (plain integral from %6.3f to %6.3f ps) = %8.5f ps\n", 0.0, dt * nf_int, sum);
768 printf("COR: Relaxation times are computed as fit to an exponential:\n");
769 printf("COR: %s\n", effnDescription(fitfn));
770 printf("COR: Fit to correlation function from %6.3f ps to %6.3f ps, results in a\n",
772 std::min(ncorr * dt, tendfit));
778 printf("COR:%11s%11s%11s%11s%11s%11s%11s\n",
784 (effnNparams(fitfn) >= 2) ? " a2 ()" : "",
785 (effnNparams(fitfn) >= 3) ? " a3 (ps)" : "");
798 for (j = 0; ((j < jmax) && (tStart < tendfit) && (tStart < ncorr * dt)); j++)
800 /* Estimate the correlation time for better fitting */
803 for (i = 0; (i < ncorr) && (dt * i < tStart || c1[i] > 0); i++)
807 if (dt * i >= tStart)
810 ct_estimate = 0.5 * c1[i];
815 ct_estimate += c1[i];
820 ct_estimate *= dt / c_start;
824 /* The data is strange, so we need to choose somehting */
825 ct_estimate = tendfit;
829 fprintf(debug, "tStart %g ct_estimate: %g\n", tStart, ct_estimate);
832 if (fitfn == effnEXPEXP)
834 fitparm[0] = 0.002 * ncorr * dt;
836 fitparm[2] = 0.2 * ncorr * dt;
840 /* Good initial guess, this increases the probability of convergence */
841 fitparm[0] = ct_estimate;
846 /* Generate more or less appropriate sigma's */
847 for (i = 0; i < ncorr; i++)
849 sig[i] = sqrt(ct_estimate + dt * i);
852 nf_int = std::min(ncorr, static_cast<int>((tStart + 1e-4) / dt));
853 sum = print_and_integrate(debug, nf_int, dt, c1, nullptr, 1);
854 tail_corr = do_lmfit(
855 ncorr, c1, sig, dt, nullptr, tStart, tendfit, oenv, bDebugMode(), fitfn, fitparm, 0, nullptr);
856 sumtot = sum + tail_corr;
857 if (fit && ((jmax == 1) || (j == 1)))
860 for (i = 0; (i < 3); i++)
864 for (i = 0; (i < ncorr); i++)
866 fit[i] = lmcurves[fitfn](i * dt, mfp);
871 printf("COR:%11.4e%11.4e%11.4e%11.4e", tStart, sum, tail_corr, sumtot);
872 for (i = 0; (i < effnNparams(fitfn)); i++)
874 printf(" %11.4e", fitparm[i]);