src/gromacs/statistics/statistics.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2012,2014,2015,2017,2018 by the GROMACS development team.
   7  * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 #include "gmxpre.h"
  39
  40 #include "statistics.h"
  41
  42 #include <cmath>
  43
  44 #include "gromacs/math/functions.h"
  45 #include "gromacs/math/vec.h"
  46 #include "gromacs/utility/exceptions.h"
  47 #include "gromacs/utility/gmxassert.h"
  48 #include "gromacs/utility/real.h"
  49 #include "gromacs/utility/smalloc.h"
  50
  51
  52 typedef struct gmx_stats
  53 {
  54     double  aa, a, b, sigma_aa, sigma_a, sigma_b, aver, sigma_aver, error;
  55     double  rmsd, Rdata, Rfit, Rfitaa, chi2, chi2aa;
  56     double *x, *y, *dx, *dy;
  57     int     computed;
  58     int     np, np_c, nalloc;
  59 } gmx_stats;
  60
  61 gmx_stats_t gmx_stats_init()
  62 {
  63     gmx_stats* stats;
  64
  65     snew(stats, 1);
  66
  67     return static_cast<gmx_stats_t>(stats);
  68 }
  69
  70 void gmx_stats_free(gmx_stats_t gstats)
  71 {
  72     gmx_stats* stats = static_cast<gmx_stats*>(gstats);
  73
  74     sfree(stats->x);
  75     sfree(stats->y);
  76     sfree(stats->dx);
  77     sfree(stats->dy);
  78     sfree(stats);
  79 }
  80
  81 void gmx_stats_add_point(gmx_stats_t gstats, double x, double y, double dx, double dy)
  82 {
  83     gmx_stats* stats = gstats;
  84
  85     if (stats->np + 1 >= stats->nalloc)
  86     {
  87         if (stats->nalloc == 0)
  88         {
  89             stats->nalloc = 1024;
  90         }
  91         else
  92         {
  93             stats->nalloc *= 2;
  94         }
  95         srenew(stats->x, stats->nalloc);
  96         srenew(stats->y, stats->nalloc);
  97         srenew(stats->dx, stats->nalloc);
  98         srenew(stats->dy, stats->nalloc);
  99         for (int i = stats->np; (i < stats->nalloc); i++)
 100         {
 101             stats->x[i]  = 0;
 102             stats->y[i]  = 0;
 103             stats->dx[i] = 0;
 104             stats->dy[i] = 0;
 105         }
 106     }
 107     stats->x[stats->np]  = x;
 108     stats->y[stats->np]  = y;
 109     stats->dx[stats->np] = dx;
 110     stats->dy[stats->np] = dy;
 111     stats->np++;
 112     stats->computed = 0;
 113 }
 114
 115 static void gmx_stats_compute(gmx_stats* stats, int weight)
 116 {
 117     double yy, yx, xx, sx, sy, dy, chi2, chi2aa, d2;
 118     double ssxx, ssyy, ssxy;
 119     double w, wtot, yx_nw, sy_nw, sx_nw, yy_nw, xx_nw, dx2, dy2;
 120
 121     int N = stats->np;
 122
 123     if (stats->computed == 0)
 124     {
 125         GMX_RELEASE_ASSERT(N >= 1, "Must have points to work on");
 126
 127         xx = xx_nw = 0;
 128         yy = yy_nw = 0;
 129         yx = yx_nw = 0;
 130         sx = sx_nw = 0;
 131         sy = sy_nw = 0;
 132         wtot       = 0;
 133         d2         = 0;
 134         for (int i = 0; (i < N); i++)
 135         {
 136             d2 += gmx::square(stats->x[i] - stats->y[i]);
 137             if (((stats->dy[i]) != 0.0) && (weight == elsqWEIGHT_Y))
 138             {
 139                 w = 1 / gmx::square(stats->dy[i]);
 140             }
 141             else
 142             {
 143                 w = 1;
 144             }
 145
 146             wtot += w;
 147
 148             xx += w * gmx::square(stats->x[i]);
 149             xx_nw += gmx::square(stats->x[i]);
 150
 151             yy += w * gmx::square(stats->y[i]);
 152             yy_nw += gmx::square(stats->y[i]);
 153
 154             yx += w * stats->y[i] * stats->x[i];
 155             yx_nw += stats->y[i] * stats->x[i];
 156
 157             sx += w * stats->x[i];
 158             sx_nw += stats->x[i];
 159
 160             sy += w * stats->y[i];
 161             sy_nw += stats->y[i];
 162         }
 163
 164         /* Compute average, sigma and error */
 165         stats->aver       = sy_nw / N;
 166         stats->sigma_aver = std::sqrt(yy_nw / N - gmx::square(sy_nw / N));
 167         stats->error      = stats->sigma_aver / std::sqrt(static_cast<double>(N));
 168
 169         /* Compute RMSD between x and y */
 170         stats->rmsd = std::sqrt(d2 / N);
 171
 172         /* Correlation coefficient for data */
 173         yx_nw /= N;
 174         xx_nw /= N;
 175         yy_nw /= N;
 176         sx_nw /= N;
 177         sy_nw /= N;
 178         ssxx         = N * (xx_nw - gmx::square(sx_nw));
 179         ssyy         = N * (yy_nw - gmx::square(sy_nw));
 180         ssxy         = N * (yx_nw - (sx_nw * sy_nw));
 181         stats->Rdata = std::sqrt(gmx::square(ssxy) / (ssxx * ssyy));
 182
 183         /* Compute straight line through datapoints, either with intercept
 184            zero (result in aa) or with intercept variable (results in a
 185            and b) */
 186         yx = yx / wtot;
 187         xx = xx / wtot;
 188         sx = sx / wtot;
 189         sy = sy / wtot;
 190
 191         stats->aa = (yx / xx);
 192         stats->a  = (yx - sx * sy) / (xx - sx * sx);
 193         stats->b  = (sy) - (stats->a) * (sx);
 194
 195         /* Compute chi2, deviation from a line y = ax+b. Also compute
 196            chi2aa which returns the deviation from a line y = ax. */
 197         chi2   = 0;
 198         chi2aa = 0;
 199         for (int i = 0; (i < N); i++)
 200         {
 201             if (stats->dy[i] > 0)
 202             {
 203                 dy = stats->dy[i];
 204             }
 205             else
 206             {
 207                 dy = 1;
 208             }
 209             chi2aa += gmx::square((stats->y[i] - (stats->aa * stats->x[i])) / dy);
 210             chi2 += gmx::square((stats->y[i] - (stats->a * stats->x[i] + stats->b)) / dy);
 211         }
 212         if (N > 2)
 213         {
 214             stats->chi2   = std::sqrt(chi2 / (N - 2));
 215             stats->chi2aa = std::sqrt(chi2aa / (N - 2));
 216
 217             /* Look up equations! */
 218             dx2            = (xx - sx * sx);
 219             dy2            = (yy - sy * sy);
 220             stats->sigma_a = std::sqrt(stats->chi2 / ((N - 2) * dx2));
 221             stats->sigma_b = stats->sigma_a * std::sqrt(xx);
 222             stats->Rfit    = std::abs(ssxy) / std::sqrt(ssxx * ssyy);
 223             stats->Rfitaa  = stats->aa * std::sqrt(dx2 / dy2);
 224         }
 225         else
 226         {
 227             stats->chi2    = 0;
 228             stats->chi2aa  = 0;
 229             stats->sigma_a = 0;
 230             stats->sigma_b = 0;
 231             stats->Rfit    = 0;
 232             stats->Rfitaa  = 0;
 233         }
 234
 235         stats->computed = 1;
 236     }
 237 }
 238
 239 void gmx_stats_get_ab(gmx_stats_t gstats, int weight, real* a, real* b, real* da, real* db, real* chi2, real* Rfit)
 240 {
 241     gmx_stats* stats = gstats;
 242
 243     gmx_stats_compute(stats, weight);
 244     if (nullptr != a)
 245     {
 246         *a = stats->a;
 247     }
 248     if (nullptr != b)
 249     {
 250         *b = stats->b;
 251     }
 252     if (nullptr != da)
 253     {
 254         *da = stats->sigma_a;
 255     }
 256     if (nullptr != db)
 257     {
 258         *db = stats->sigma_b;
 259     }
 260     if (nullptr != chi2)
 261     {
 262         *chi2 = stats->chi2;
 263     }
 264     if (nullptr != Rfit)
 265     {
 266         *Rfit = stats->Rfit;
 267     }
 268 }
 269
 270 real gmx_stats_get_average(gmx_stats_t gstats)
 271 {
 272     gmx_stats* stats = gstats;
 273
 274     if (gstats->np < 1)
 275     {
 276         GMX_THROW(gmx::InconsistentInputError("No points to average"));
 277     }
 278     gmx_stats_compute(stats, elsqWEIGHT_NONE);
 279
 280     return stats->aver;
 281 }
 282
 283 std::tuple<real, real, real> gmx_stats_get_ase(gmx_stats_t gstats)
 284 {
 285     gmx_stats* stats = gstats;
 286
 287     if (gstats->np < 1)
 288     {
 289         GMX_THROW(gmx::InconsistentInputError("No points to average"));
 290     }
 291     gmx_stats_compute(stats, elsqWEIGHT_NONE);
 292
 293     return std::make_tuple(stats->aver, stats->sigma_aver, stats->error);
 294 }
 295
 296 // When using GMX_DOUBLE=OFF, some callers want to analyse x values
 297 // that are already computed in double precision. So we need to
 298 // compile two versions, so that the promotion to double is used when
 299 // needed.
 300 template<typename RealT>
 301 void low_lsq_y_ax_b(int n, const RealT* xr, real yr[], real* a, real* b, real* r, real* chi2)
 302 {
 303     gmx_stats_t lsq = gmx_stats_init();
 304     for (int i = 0; (i < n); i++)
 305     {
 306         gmx_stats_add_point(lsq, double(xr[i]), yr[i], 0, 0);
 307     }
 308     gmx_stats_get_ab(lsq, elsqWEIGHT_NONE, a, b, nullptr, nullptr, chi2, r);
 309     gmx_stats_free(lsq);
 310 }
 311
 312 void lsq_y_ax_b(int n, real x[], real y[], real* a, real* b, real* r, real* chi2)
 313 {
 314     low_lsq_y_ax_b(n, x, y, a, b, r, chi2);
 315 }
 316
 317 void lsq_y_ax_b_xdouble(int n, double x[], real y[], real* a, real* b, real* r, real* chi2)
 318 {
 319     low_lsq_y_ax_b(n, x, y, a, b, r, chi2);
 320 }
 321
 322 void lsq_y_ax_b_error(int n, real x[], real y[], real dy[], real* a, real* b, real* da, real* db, real* r, real* chi2)
 323 {
 324     if (n < 1)
 325     {
 326         GMX_THROW(gmx::InconsistentInputError("No points to fit"));
 327     }
 328
 329     gmx_stats_t lsq = gmx_stats_init();
 330     for (int i = 0; (i < n); i++)
 331     {
 332         gmx_stats_add_point(lsq, x[i], y[i], 0, dy[i]);
 333     }
 334     gmx_stats_get_ab(lsq, elsqWEIGHT_Y, a, b, da, db, chi2, r);
 335     gmx_stats_free(lsq);
 336 }