2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2012,2014,2015,2017,2018 by the GROMACS development team.
7 * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
40 #include "gromacs/utility/enumerationhelpers.h"
41 #include "statistics.h"
45 #include "gromacs/math/functions.h"
46 #include "gromacs/math/vec.h"
47 #include "gromacs/utility/enumerationhelpers.h"
48 #include "gromacs/utility/fatalerror.h"
49 #include "gromacs/utility/real.h"
50 #include "gromacs/utility/smalloc.h"
53 typedef struct gmx_stats
55 double aa, a, b, sigma_aa, sigma_a, sigma_b, aver, sigma_aver, error;
56 double rmsd, Rdata, Rfit, Rfitaa, chi2, chi2aa;
57 double *x, *y, *dx, *dy;
62 gmx_stats_t gmx_stats_init()
68 return static_cast<gmx_stats_t>(stats);
71 void gmx_stats_free(gmx_stats_t gstats)
73 gmx_stats* stats = static_cast<gmx_stats*>(gstats);
82 StatisticsStatus gmx_stats_add_point(gmx_stats_t gstats, double x, double y, double dx, double dy)
84 gmx_stats* stats = gstats;
86 if (stats->np + 1 >= stats->nalloc)
88 if (stats->nalloc == 0)
96 srenew(stats->x, stats->nalloc);
97 srenew(stats->y, stats->nalloc);
98 srenew(stats->dx, stats->nalloc);
99 srenew(stats->dy, stats->nalloc);
100 for (int i = stats->np; (i < stats->nalloc); i++)
108 stats->x[stats->np] = x;
109 stats->y[stats->np] = y;
110 stats->dx[stats->np] = dx;
111 stats->dy[stats->np] = dy;
115 return StatisticsStatus::Ok;
118 static StatisticsStatus gmx_stats_compute(gmx_stats* stats, int weight)
120 double yy, yx, xx, sx, sy, dy, chi2, chi2aa, d2;
121 double ssxx, ssyy, ssxy;
122 double w, wtot, yx_nw, sy_nw, sx_nw, yy_nw, xx_nw, dx2, dy2;
126 if (stats->computed == 0)
130 return StatisticsStatus::NoPoints;
140 for (int i = 0; (i < N); i++)
142 d2 += gmx::square(stats->x[i] - stats->y[i]);
143 if (((stats->dy[i]) != 0.0) && (weight == elsqWEIGHT_Y))
145 w = 1 / gmx::square(stats->dy[i]);
154 xx += w * gmx::square(stats->x[i]);
155 xx_nw += gmx::square(stats->x[i]);
157 yy += w * gmx::square(stats->y[i]);
158 yy_nw += gmx::square(stats->y[i]);
160 yx += w * stats->y[i] * stats->x[i];
161 yx_nw += stats->y[i] * stats->x[i];
163 sx += w * stats->x[i];
164 sx_nw += stats->x[i];
166 sy += w * stats->y[i];
167 sy_nw += stats->y[i];
170 /* Compute average, sigma and error */
171 stats->aver = sy_nw / N;
172 stats->sigma_aver = std::sqrt(yy_nw / N - gmx::square(sy_nw / N));
173 stats->error = stats->sigma_aver / std::sqrt(static_cast<double>(N));
175 /* Compute RMSD between x and y */
176 stats->rmsd = std::sqrt(d2 / N);
178 /* Correlation coefficient for data */
184 ssxx = N * (xx_nw - gmx::square(sx_nw));
185 ssyy = N * (yy_nw - gmx::square(sy_nw));
186 ssxy = N * (yx_nw - (sx_nw * sy_nw));
187 stats->Rdata = std::sqrt(gmx::square(ssxy) / (ssxx * ssyy));
189 /* Compute straight line through datapoints, either with intercept
190 zero (result in aa) or with intercept variable (results in a
197 stats->aa = (yx / xx);
198 stats->a = (yx - sx * sy) / (xx - sx * sx);
199 stats->b = (sy) - (stats->a) * (sx);
201 /* Compute chi2, deviation from a line y = ax+b. Also compute
202 chi2aa which returns the deviation from a line y = ax. */
205 for (int i = 0; (i < N); i++)
207 if (stats->dy[i] > 0)
215 chi2aa += gmx::square((stats->y[i] - (stats->aa * stats->x[i])) / dy);
216 chi2 += gmx::square((stats->y[i] - (stats->a * stats->x[i] + stats->b)) / dy);
220 stats->chi2 = std::sqrt(chi2 / (N - 2));
221 stats->chi2aa = std::sqrt(chi2aa / (N - 2));
223 /* Look up equations! */
224 dx2 = (xx - sx * sx);
225 dy2 = (yy - sy * sy);
226 stats->sigma_a = std::sqrt(stats->chi2 / ((N - 2) * dx2));
227 stats->sigma_b = stats->sigma_a * std::sqrt(xx);
228 stats->Rfit = std::abs(ssxy) / std::sqrt(ssxx * ssyy);
229 stats->Rfitaa = stats->aa * std::sqrt(dx2 / dy2);
244 return StatisticsStatus::Ok;
248 gmx_stats_get_ab(gmx_stats_t gstats, int weight, real* a, real* b, real* da, real* db, real* chi2, real* Rfit)
250 gmx_stats* stats = gstats;
253 if ((ok = gmx_stats_compute(stats, weight)) != StatisticsStatus::Ok)
267 *da = stats->sigma_a;
271 *db = stats->sigma_b;
282 return StatisticsStatus::Ok;
285 StatisticsStatus gmx_stats_get_average(gmx_stats_t gstats, real* aver)
287 gmx_stats* stats = gstats;
290 if ((ok = gmx_stats_compute(stats, elsqWEIGHT_NONE)) != StatisticsStatus::Ok)
297 return StatisticsStatus::Ok;
300 StatisticsStatus gmx_stats_get_ase(gmx_stats_t gstats, real* aver, real* sigma, real* error)
302 gmx_stats* stats = gstats;
305 if ((ok = gmx_stats_compute(stats, elsqWEIGHT_NONE)) != StatisticsStatus::Ok)
314 if (nullptr != sigma)
316 *sigma = stats->sigma_aver;
318 if (nullptr != error)
320 *error = stats->error;
323 return StatisticsStatus::Ok;
326 static const char* enumValueToString(StatisticsStatus enumValue)
328 constexpr gmx::EnumerationArray<StatisticsStatus, const char*> statisticsStatusNames = {
329 "All well in STATS land", "No points", "Not enough memory",
330 "Invalid histogram input", "Unknown error", "Not implemented yet"
332 return statisticsStatusNames[enumValue];
335 void gmx_stats_message([[maybe_unused]] StatisticsStatus estats)
337 GMX_ASSERT(estats == StatisticsStatus::Ok, enumValueToString(estats));
340 static StatisticsStatus
341 low_lsq_y_ax_b(int n, const real* xr, const double* xd, real yr[], real* a, real* b, real* r, real* chi2)
343 gmx_stats_t lsq = gmx_stats_init();
346 for (int i = 0; (i < n); i++)
354 else if (xr != nullptr)
360 gmx_incons("Either xd or xr has to be non-NULL in low_lsq_y_ax_b()");
363 if ((ok = gmx_stats_add_point(lsq, pt, yr[i], 0, 0)) != StatisticsStatus::Ok)
369 ok = gmx_stats_get_ab(lsq, elsqWEIGHT_NONE, a, b, nullptr, nullptr, chi2, r);
375 StatisticsStatus lsq_y_ax_b(int n, real x[], real y[], real* a, real* b, real* r, real* chi2)
377 return low_lsq_y_ax_b(n, x, nullptr, y, a, b, r, chi2);
380 StatisticsStatus lsq_y_ax_b_xdouble(int n, double x[], real y[], real* a, real* b, real* r, real* chi2)
382 return low_lsq_y_ax_b(n, nullptr, x, y, a, b, r, chi2);
386 lsq_y_ax_b_error(int n, real x[], real y[], real dy[], real* a, real* b, real* da, real* db, real* r, real* chi2)
388 gmx_stats_t lsq = gmx_stats_init();
391 for (int i = 0; (i < n); i++)
393 ok = gmx_stats_add_point(lsq, x[i], y[i], 0, dy[i]);
394 if (ok != StatisticsStatus::Ok)
400 ok = gmx_stats_get_ab(lsq, elsqWEIGHT_Y, a, b, da, db, chi2, r);