2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2012,2014,2015,2017,2018 by the GROMACS development team.
7 * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
40 #include "statistics.h"
44 #include "gromacs/math/functions.h"
45 #include "gromacs/math/vec.h"
46 #include "gromacs/utility/exceptions.h"
47 #include "gromacs/utility/gmxassert.h"
48 #include "gromacs/utility/real.h"
49 #include "gromacs/utility/smalloc.h"
52 typedef struct gmx_stats
54 double aa, a, b, sigma_aa, sigma_a, sigma_b, aver, sigma_aver, error;
55 double rmsd, Rdata, Rfit, Rfitaa, chi2, chi2aa;
56 double *x, *y, *dx, *dy;
61 gmx_stats_t gmx_stats_init()
67 return static_cast<gmx_stats_t>(stats);
70 void gmx_stats_free(gmx_stats_t gstats)
72 gmx_stats* stats = static_cast<gmx_stats*>(gstats);
81 void gmx_stats_add_point(gmx_stats_t gstats, double x, double y, double dx, double dy)
83 gmx_stats* stats = gstats;
85 if (stats->np + 1 >= stats->nalloc)
87 if (stats->nalloc == 0)
95 srenew(stats->x, stats->nalloc);
96 srenew(stats->y, stats->nalloc);
97 srenew(stats->dx, stats->nalloc);
98 srenew(stats->dy, stats->nalloc);
99 for (int i = stats->np; (i < stats->nalloc); i++)
107 stats->x[stats->np] = x;
108 stats->y[stats->np] = y;
109 stats->dx[stats->np] = dx;
110 stats->dy[stats->np] = dy;
115 static void gmx_stats_compute(gmx_stats* stats, int weight)
117 double yy, yx, xx, sx, sy, dy, chi2, chi2aa, d2;
118 double ssxx, ssyy, ssxy;
119 double w, wtot, yx_nw, sy_nw, sx_nw, yy_nw, xx_nw, dx2, dy2;
123 if (stats->computed == 0)
125 GMX_RELEASE_ASSERT(N >= 1, "Must have points to work on");
134 for (int i = 0; (i < N); i++)
136 d2 += gmx::square(stats->x[i] - stats->y[i]);
137 if (((stats->dy[i]) != 0.0) && (weight == elsqWEIGHT_Y))
139 w = 1 / gmx::square(stats->dy[i]);
148 xx += w * gmx::square(stats->x[i]);
149 xx_nw += gmx::square(stats->x[i]);
151 yy += w * gmx::square(stats->y[i]);
152 yy_nw += gmx::square(stats->y[i]);
154 yx += w * stats->y[i] * stats->x[i];
155 yx_nw += stats->y[i] * stats->x[i];
157 sx += w * stats->x[i];
158 sx_nw += stats->x[i];
160 sy += w * stats->y[i];
161 sy_nw += stats->y[i];
164 /* Compute average, sigma and error */
165 stats->aver = sy_nw / N;
166 stats->sigma_aver = std::sqrt(yy_nw / N - gmx::square(sy_nw / N));
167 stats->error = stats->sigma_aver / std::sqrt(static_cast<double>(N));
169 /* Compute RMSD between x and y */
170 stats->rmsd = std::sqrt(d2 / N);
172 /* Correlation coefficient for data */
178 ssxx = N * (xx_nw - gmx::square(sx_nw));
179 ssyy = N * (yy_nw - gmx::square(sy_nw));
180 ssxy = N * (yx_nw - (sx_nw * sy_nw));
181 stats->Rdata = std::sqrt(gmx::square(ssxy) / (ssxx * ssyy));
183 /* Compute straight line through datapoints, either with intercept
184 zero (result in aa) or with intercept variable (results in a
191 stats->aa = (yx / xx);
192 stats->a = (yx - sx * sy) / (xx - sx * sx);
193 stats->b = (sy) - (stats->a) * (sx);
195 /* Compute chi2, deviation from a line y = ax+b. Also compute
196 chi2aa which returns the deviation from a line y = ax. */
199 for (int i = 0; (i < N); i++)
201 if (stats->dy[i] > 0)
209 chi2aa += gmx::square((stats->y[i] - (stats->aa * stats->x[i])) / dy);
210 chi2 += gmx::square((stats->y[i] - (stats->a * stats->x[i] + stats->b)) / dy);
214 stats->chi2 = std::sqrt(chi2 / (N - 2));
215 stats->chi2aa = std::sqrt(chi2aa / (N - 2));
217 /* Look up equations! */
218 dx2 = (xx - sx * sx);
219 dy2 = (yy - sy * sy);
220 stats->sigma_a = std::sqrt(stats->chi2 / ((N - 2) * dx2));
221 stats->sigma_b = stats->sigma_a * std::sqrt(xx);
222 stats->Rfit = std::abs(ssxy) / std::sqrt(ssxx * ssyy);
223 stats->Rfitaa = stats->aa * std::sqrt(dx2 / dy2);
239 void gmx_stats_get_ab(gmx_stats_t gstats, int weight, real* a, real* b, real* da, real* db, real* chi2, real* Rfit)
241 gmx_stats* stats = gstats;
243 gmx_stats_compute(stats, weight);
254 *da = stats->sigma_a;
258 *db = stats->sigma_b;
270 real gmx_stats_get_average(gmx_stats_t gstats)
272 gmx_stats* stats = gstats;
276 GMX_THROW(gmx::InconsistentInputError("No points to average"));
278 gmx_stats_compute(stats, elsqWEIGHT_NONE);
283 std::tuple<real, real, real> gmx_stats_get_ase(gmx_stats_t gstats)
285 gmx_stats* stats = gstats;
289 GMX_THROW(gmx::InconsistentInputError("No points to average"));
291 gmx_stats_compute(stats, elsqWEIGHT_NONE);
293 return std::make_tuple(stats->aver, stats->sigma_aver, stats->error);
296 // When using GMX_DOUBLE=OFF, some callers want to analyse x values
297 // that are already computed in double precision. So we need to
298 // compile two versions, so that the promotion to double is used when
300 template<typename RealT>
301 void low_lsq_y_ax_b(int n, const RealT* xr, real yr[], real* a, real* b, real* r, real* chi2)
303 gmx_stats_t lsq = gmx_stats_init();
304 for (int i = 0; (i < n); i++)
306 gmx_stats_add_point(lsq, double(xr[i]), yr[i], 0, 0);
308 gmx_stats_get_ab(lsq, elsqWEIGHT_NONE, a, b, nullptr, nullptr, chi2, r);
312 void lsq_y_ax_b(int n, real x[], real y[], real* a, real* b, real* r, real* chi2)
314 low_lsq_y_ax_b(n, x, y, a, b, r, chi2);
317 void lsq_y_ax_b_xdouble(int n, double x[], real y[], real* a, real* b, real* r, real* chi2)
319 low_lsq_y_ax_b(n, x, y, a, b, r, chi2);
322 void lsq_y_ax_b_error(int n, real x[], real y[], real dy[], real* a, real* b, real* da, real* db, real* r, real* chi2)
326 GMX_THROW(gmx::InconsistentInputError("No points to fit"));
329 gmx_stats_t lsq = gmx_stats_init();
330 for (int i = 0; (i < n); i++)
332 gmx_stats_add_point(lsq, x[i], y[i], 0, dy[i]);
334 gmx_stats_get_ab(lsq, elsqWEIGHT_Y, a, b, da, db, chi2, r);