src/gromacs/tools/pme_error.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 #include "gmxpre.h"
  36
  37 #include "pme_error.h"
  38
  39 #include "config.h"
  40
  41 #include <cmath>
  42
  43 #include <algorithm>
  44
  45 #include "gromacs/commandline/pargs.h"
  46 #include "gromacs/ewald/ewald_utils.h"
  47 #include "gromacs/ewald/pme.h"
  48 #include "gromacs/fft/calcgrid.h"
  49 #include "gromacs/fileio/checkpoint.h"
  50 #include "gromacs/fileio/tpxio.h"
  51 #include "gromacs/gmxlib/network.h"
  52 #include "gromacs/math/functions.h"
  53 #include "gromacs/math/units.h"
  54 #include "gromacs/math/vec.h"
  55 #include "gromacs/mdlib/broadcaststructs.h"
  56 #include "gromacs/mdtypes/commrec.h"
  57 #include "gromacs/mdtypes/inputrec.h"
  58 #include "gromacs/mdtypes/md_enums.h"
  59 #include "gromacs/mdtypes/state.h"
  60 #include "gromacs/random/threefry.h"
  61 #include "gromacs/random/uniformintdistribution.h"
  62 #include "gromacs/topology/mtop_util.h"
  63 #include "gromacs/topology/topology.h"
  64 #include "gromacs/utility/arraysize.h"
  65 #include "gromacs/utility/fatalerror.h"
  66 #include "gromacs/utility/pleasecite.h"
  67 #include "gromacs/utility/smalloc.h"
  68
  69 /* #define TAKETIME */
  70 /* #define DEBUG  */
  71
  72 /* Enum for situations that can occur during log file parsing */
  73 enum
  74 {
  75     eParselogOK,
  76     eParselogNotFound,
  77     eParselogNoPerfData,
  78     eParselogTerm,
  79     eParselogResetProblem,
  80     eParselogNr
  81 };
  82
  83
  84 typedef struct
  85 {
  86     int64_t orig_sim_steps;  /* Number of steps to be done in the real simulation  */
  87     int     n_entries;       /* Number of entries in arrays                        */
  88     real    volume;          /* The volume of the box                              */
  89     matrix  recipbox;        /* The reciprocal box                                 */
  90     int     natoms;          /* The number of atoms in the MD system               */
  91     real*   fac;             /* The scaling factor                                 */
  92     real*   rcoulomb;        /* The coulomb radii [0...nr_inputfiles]              */
  93     real*   rvdw;            /* The vdW radii                                      */
  94     int *   nkx, *nky, *nkz; /* Number of k vectors in each spatial dimension      */
  95     real*   fourier_sp;      /* Fourierspacing                                     */
  96     real*   ewald_rtol;      /* Real space tolerance for Ewald, determines         */
  97                              /* the real/reciprocal space relative weight          */
  98     real*    ewald_beta;     /* Splitting parameter [1/nm]                         */
  99     real     fracself;       /* fraction of particles for SI error                 */
 100     real     q2all;          /* sum ( q ^2 )                                       */
 101     real     q2allnr;        /* nr of charges                                      */
 102     int*     pme_order;      /* Interpolation order for PME (bsplines)             */
 103     char**   fn_out;         /* Name of the output tpr file                        */
 104     real*    e_dir;          /* Direct space part of PME error with these settings */
 105     real*    e_rec;          /* Reciprocal space part of PME error                 */
 106     gmx_bool bTUNE;          /* flag for tuning */
 107 } t_inputinfo;
 108
 109
 110 /* Returns TRUE when atom is charged */
 111 static gmx_bool is_charge(real charge)
 112 {
 113     return charge * charge > GMX_REAL_EPS;
 114 }
 115
 116
 117 /* calculate charge density */
 118 static void calc_q2all(const gmx_mtop_t* mtop, /* molecular topology */
 119                        real*             q2all,
 120                        real*             q2allnr)
 121 {
 122     real q2_all = 0; /* Sum of squared charges */
 123     int  nrq_mol;    /* Number of charges in a single molecule */
 124     int  nrq_all;    /* Total number of charges in the MD system */
 125     real qi, q2_mol;
 126
 127 #ifdef DEBUG
 128     fprintf(stderr, "\nCharge density:\n");
 129 #endif
 130     q2_all  = 0.0; /* total q squared */
 131     nrq_all = 0;   /* total number of charges in the system */
 132     for (const gmx_molblock_t& molblock : mtop->molblock) /* Loop over molecule types */
 133     {
 134         q2_mol                        = 0.0; /* q squared value of this molecule */
 135         nrq_mol                       = 0;   /* number of charges this molecule carries */
 136         const gmx_moltype_t& molecule = mtop->moltype[molblock.type];
 137         for (int i = 0; i < molecule.atoms.nr; i++)
 138         {
 139             qi = molecule.atoms.atom[i].q;
 140             /* Is this charge worth to be considered? */
 141             if (is_charge(qi))
 142             {
 143                 q2_mol += qi * qi;
 144                 nrq_mol++;
 145             }
 146         }
 147         /* Multiply with the number of molecules present of this type and add */
 148         q2_all += q2_mol * molblock.nmol;
 149         nrq_all += nrq_mol * molblock.nmol;
 150 #ifdef DEBUG
 151         fprintf(stderr,
 152                 "Molecule %2d (%5d atoms) q2_mol=%10.3e nr.mol.charges=%5d (%6dx)  q2_all=%10.3e  "
 153                 "tot.charges=%d\n",
 154                 imol, molecule.atoms.nr, q2_mol, nrq_mol, molblock.nmol, q2_all, nrq_all);
 155 #endif
 156     }
 157
 158     *q2all   = q2_all;
 159     *q2allnr = nrq_all;
 160 }
 161
 162
 163 /* Estimate the direct space part error of the SPME Ewald sum */
 164 static real estimate_direct(t_inputinfo* info)
 165 {
 166     real e_dir     = 0; /* Error estimate */
 167     real beta      = 0; /* Splitting parameter (1/nm) */
 168     real r_coulomb = 0; /* Cut-off in direct space */
 169
 170
 171     beta      = info->ewald_beta[0];
 172     r_coulomb = info->rcoulomb[0];
 173
 174     e_dir = 2.0 * info->q2all * gmx::invsqrt(info->q2allnr * r_coulomb * info->volume);
 175     e_dir *= std::exp(-beta * beta * r_coulomb * r_coulomb);
 176
 177     return ONE_4PI_EPS0 * e_dir;
 178 }
 179
 180 #define SUMORDER 6
 181
 182 /* the following 4 functions determine polynomials required for the reciprocal error estimate */
 183
 184 static inline real eps_poly1(real m, /* grid coordinate in certain direction */
 185                              real K, /* grid size in corresponding direction */
 186                              real n) /* spline interpolation order of the SPME */
 187 {
 188     int  i;
 189     real nom   = 0; /* nominator */
 190     real denom = 0; /* denominator */
 191     real tmp   = 0;
 192
 193     if (m == 0.0)
 194     {
 195         return 0.0;
 196     }
 197
 198     for (i = -SUMORDER; i < 0; i++)
 199     {
 200         tmp = m / K + i;
 201         tmp *= 2.0 * M_PI;
 202         nom += std::pow(tmp, -n);
 203     }
 204
 205     for (i = SUMORDER; i > 0; i--)
 206     {
 207         tmp = m / K + i;
 208         tmp *= 2.0 * M_PI;
 209         nom += std::pow(tmp, -n);
 210     }
 211
 212     tmp = m / K;
 213     tmp *= 2.0 * M_PI;
 214     denom = std::pow(tmp, -n) + nom;
 215
 216     return -nom / denom;
 217 }
 218
 219 static inline real eps_poly2(real m, /* grid coordinate in certain direction */
 220                              real K, /* grid size in corresponding direction */
 221                              real n) /* spline interpolation order of the SPME */
 222 {
 223     int  i;
 224     real nom   = 0; /* nominator */
 225     real denom = 0; /* denominator */
 226     real tmp   = 0;
 227
 228     if (m == 0.0)
 229     {
 230         return 0.0;
 231     }
 232
 233     for (i = -SUMORDER; i < 0; i++)
 234     {
 235         tmp = m / K + i;
 236         tmp *= 2.0 * M_PI;
 237         nom += std::pow(tmp, -2 * n);
 238     }
 239
 240     for (i = SUMORDER; i > 0; i--)
 241     {
 242         tmp = m / K + i;
 243         tmp *= 2.0 * M_PI;
 244         nom += std::pow(tmp, -2 * n);
 245     }
 246
 247     for (i = -SUMORDER; i < SUMORDER + 1; i++)
 248     {
 249         tmp = m / K + i;
 250         tmp *= 2.0 * M_PI;
 251         denom += std::pow(tmp, -n);
 252     }
 253     tmp = eps_poly1(m, K, n);
 254     return nom / denom / denom + tmp * tmp;
 255 }
 256
 257 static inline real eps_poly3(real m, /* grid coordinate in certain direction */
 258                              real K, /* grid size in corresponding direction */
 259                              real n) /* spline interpolation order of the SPME */
 260 {
 261     int  i;
 262     real nom   = 0; /* nominator */
 263     real denom = 0; /* denominator */
 264     real tmp   = 0;
 265
 266     if (m == 0.0)
 267     {
 268         return 0.0;
 269     }
 270
 271     for (i = -SUMORDER; i < 0; i++)
 272     {
 273         tmp = m / K + i;
 274         tmp *= 2.0 * M_PI;
 275         nom += i * std::pow(tmp, -2 * n);
 276     }
 277
 278     for (i = SUMORDER; i > 0; i--)
 279     {
 280         tmp = m / K + i;
 281         tmp *= 2.0 * M_PI;
 282         nom += i * std::pow(tmp, -2 * n);
 283     }
 284
 285     for (i = -SUMORDER; i < SUMORDER + 1; i++)
 286     {
 287         tmp = m / K + i;
 288         tmp *= 2.0 * M_PI;
 289         denom += std::pow(tmp, -n);
 290     }
 291
 292     return 2.0 * M_PI * nom / denom / denom;
 293 }
 294
 295 static inline real eps_poly4(real m, /* grid coordinate in certain direction */
 296                              real K, /* grid size in corresponding direction */
 297                              real n) /* spline interpolation order of the SPME */
 298 {
 299     int  i;
 300     real nom   = 0; /* nominator */
 301     real denom = 0; /* denominator */
 302     real tmp   = 0;
 303
 304     if (m == 0.0)
 305     {
 306         return 0.0;
 307     }
 308
 309     for (i = -SUMORDER; i < 0; i++)
 310     {
 311         tmp = m / K + i;
 312         tmp *= 2.0 * M_PI;
 313         nom += i * i * std::pow(tmp, -2 * n);
 314     }
 315
 316     for (i = SUMORDER; i > 0; i--)
 317     {
 318         tmp = m / K + i;
 319         tmp *= 2.0 * M_PI;
 320         nom += i * i * std::pow(tmp, -2 * n);
 321     }
 322
 323     for (i = -SUMORDER; i < SUMORDER + 1; i++)
 324     {
 325         tmp = m / K + i;
 326         tmp *= 2.0 * M_PI;
 327         denom += std::pow(tmp, -n);
 328     }
 329
 330     return 4.0 * M_PI * M_PI * nom / denom / denom;
 331 }
 332
 333 static inline real eps_self(real m,     /* grid coordinate in certain direction */
 334                             real K,     /* grid size in corresponding direction */
 335                             rvec rboxv, /* reciprocal box vector */
 336                             real n,     /* spline interpolation order of the SPME */
 337                             rvec x)     /* coordinate of charge */
 338 {
 339     int  i;
 340     real tmp    = 0; /* temporary variables for computations */
 341     real tmp1   = 0; /* temporary variables for computations */
 342     real tmp2   = 0; /* temporary variables for computations */
 343     real rcoord = 0; /* coordinate in certain reciprocal space direction */
 344     real nom    = 0; /* nominator */
 345     real denom  = 0; /* denominator */
 346
 347
 348     if (m == 0.0)
 349     {
 350         return 0.0;
 351     }
 352
 353     rcoord = iprod(rboxv, x);
 354
 355
 356     for (i = -SUMORDER; i < 0; i++)
 357     {
 358         tmp  = -std::sin(2.0 * M_PI * i * K * rcoord);
 359         tmp1 = 2.0 * M_PI * m / K + 2.0 * M_PI * i;
 360         tmp2 = std::pow(tmp1, -n);
 361         nom += tmp * tmp2 * i;
 362         denom += tmp2;
 363     }
 364
 365     for (i = SUMORDER; i > 0; i--)
 366     {
 367         tmp  = -std::sin(2.0 * M_PI * i * K * rcoord);
 368         tmp1 = 2.0 * M_PI * m / K + 2.0 * M_PI * i;
 369         tmp2 = std::pow(tmp1, -n);
 370         nom += tmp * tmp2 * i;
 371         denom += tmp2;
 372     }
 373
 374
 375     tmp  = 2.0 * M_PI * m / K;
 376     tmp1 = pow(tmp, -n);
 377     denom += tmp1;
 378
 379     return 2.0 * M_PI * nom / denom * K;
 380 }
 381
 382 #undef SUMORDER
 383
 384 /* The following routine is just a copy from pme.c */
 385
 386 static void calc_recipbox(matrix box, matrix recipbox)
 387 {
 388     /* Save some time by assuming upper right part is zero */
 389
 390     real tmp = 1.0 / (box[XX][XX] * box[YY][YY] * box[ZZ][ZZ]);
 391
 392     recipbox[XX][XX] = box[YY][YY] * box[ZZ][ZZ] * tmp;
 393     recipbox[XX][YY] = 0;
 394     recipbox[XX][ZZ] = 0;
 395     recipbox[YY][XX] = -box[YY][XX] * box[ZZ][ZZ] * tmp;
 396     recipbox[YY][YY] = box[XX][XX] * box[ZZ][ZZ] * tmp;
 397     recipbox[YY][ZZ] = 0;
 398     recipbox[ZZ][XX] = (box[YY][XX] * box[ZZ][YY] - box[YY][YY] * box[ZZ][XX]) * tmp;
 399     recipbox[ZZ][YY] = -box[ZZ][YY] * box[XX][XX] * tmp;
 400     recipbox[ZZ][ZZ] = box[XX][XX] * box[YY][YY] * tmp;
 401 }
 402
 403
 404 /* Estimate the reciprocal space part error of the SPME Ewald sum. */
 405 static real estimate_reciprocal(t_inputinfo* info,
 406                                 rvec         x[], /* array of particles */
 407                                 const real   q[], /* array of charges */
 408                                 int          nr,  /* number of charges = size of the charge array */
 409                                 FILE gmx_unused* fp_out,
 410                                 gmx_bool         bVerbose,
 411                                 int  seed,     /* The seed for the random number generator */
 412                                 int* nsamples, /* Return the number of samples used if Monte Carlo
 413                                                 * algorithm is used for self energy error estimate */
 414                                 t_commrec* cr)
 415 {
 416     real     e_rec   = 0; /* reciprocal error estimate */
 417     real     e_rec1  = 0; /* Error estimate term 1*/
 418     real     e_rec2  = 0; /* Error estimate term 2*/
 419     real     e_rec3  = 0; /* Error estimate term 3 */
 420     real     e_rec3x = 0; /* part of Error estimate term 3 in x */
 421     real     e_rec3y = 0; /* part of Error estimate term 3 in y */
 422     real     e_rec3z = 0; /* part of Error estimate term 3 in z */
 423     int      i, ci;
 424     int      nx, ny, nz; /* grid coordinates */
 425     real     q2_all = 0; /* sum of squared charges */
 426     rvec     gridpx;     /* reciprocal grid point in x direction*/
 427     rvec     gridpxy;    /* reciprocal grid point in x and y direction*/
 428     rvec     gridp;      /* complete reciprocal grid point in 3 directions*/
 429     rvec     tmpvec;     /* template to create points from basis vectors */
 430     rvec     tmpvec2;    /* template to create points from basis vectors */
 431     real     coeff  = 0; /* variable to compute coefficients of the error estimate */
 432     real     coeff2 = 0; /* variable to compute coefficients of the error estimate */
 433     real     tmp    = 0; /* variables to compute different factors from vectors */
 434     real     tmp1   = 0;
 435     real     tmp2   = 0;
 436     gmx_bool bFraction;
 437
 438     int* numbers = nullptr;
 439
 440     /* Index variables for parallel work distribution */
 441     int startglobal, stopglobal;
 442     int startlocal, stoplocal;
 443     int x_per_core;
 444     int xtot;
 445
 446 #ifdef TAKETIME
 447     double t0 = 0.0;
 448     double t1 = 0.0;
 449 #endif
 450
 451     GMX_RELEASE_ASSERT(q != nullptr, "Must have charges");
 452
 453     if (seed == 0)
 454     {
 455         seed = static_cast<int>(gmx::makeRandomSeed());
 456     }
 457     fprintf(stderr, "Using random seed %d.\n", seed);
 458
 459     gmx::DefaultRandomEngine         rng(seed);
 460     gmx::UniformIntDistribution<int> dist(0, nr - 1);
 461
 462     clear_rvec(gridpx);
 463     clear_rvec(gridpxy);
 464     clear_rvec(gridp);
 465     clear_rvec(tmpvec);
 466     clear_rvec(tmpvec2);
 467
 468     for (i = 0; i < nr; i++)
 469     {
 470         q2_all += q[i] * q[i];
 471     }
 472
 473     /* Calculate indices for work distribution */
 474     startglobal = -info->nkx[0] / 2;
 475     stopglobal  = info->nkx[0] / 2;
 476     xtot        = stopglobal * 2 + 1;
 477     if (PAR(cr))
 478     {
 479         x_per_core = static_cast<int>(std::ceil(static_cast<real>(xtot) / cr->nnodes));
 480         startlocal = startglobal + x_per_core * cr->nodeid;
 481         stoplocal  = startlocal + x_per_core - 1;
 482         if (stoplocal > stopglobal)
 483         {
 484             stoplocal = stopglobal;
 485         }
 486     }
 487     else
 488     {
 489         startlocal = startglobal;
 490         stoplocal  = stopglobal;
 491         x_per_core = xtot;
 492     }
 493
 494 #if GMX_LIB_MPI
 495 #    ifdef TAKETIME
 496     if (MASTER(cr))
 497     {
 498         t0 = MPI_Wtime();
 499     }
 500 #    endif
 501 #endif
 502
 503     if (MASTER(cr))
 504     {
 505
 506         fprintf(stderr, "Calculating reciprocal error part 1 ...");
 507     }
 508
 509     for (nx = startlocal; nx <= stoplocal; nx++)
 510     {
 511         svmul(nx, info->recipbox[XX], gridpx);
 512         for (ny = -info->nky[0] / 2; ny < info->nky[0] / 2 + 1; ny++)
 513         {
 514             svmul(ny, info->recipbox[YY], tmpvec);
 515             rvec_add(gridpx, tmpvec, gridpxy);
 516             for (nz = -info->nkz[0] / 2; nz < info->nkz[0] / 2 + 1; nz++)
 517             {
 518                 if (0 == nx && 0 == ny && 0 == nz)
 519                 {
 520                     continue;
 521                 }
 522                 svmul(nz, info->recipbox[ZZ], tmpvec);
 523                 rvec_add(gridpxy, tmpvec, gridp);
 524                 tmp = norm2(gridp);
 525                 coeff = std::exp(-1.0 * M_PI * M_PI * tmp / info->ewald_beta[0] / info->ewald_beta[0]);
 526                 coeff /= 2.0 * M_PI * info->volume * tmp;
 527                 coeff2 = tmp;
 528
 529
 530                 tmp = eps_poly2(nx, info->nkx[0], info->pme_order[0]);
 531                 tmp += eps_poly2(ny, info->nkx[0], info->pme_order[0]);
 532                 tmp += eps_poly2(nz, info->nkx[0], info->pme_order[0]);
 533
 534                 tmp1 = eps_poly1(nx, info->nkx[0], info->pme_order[0]);
 535                 tmp2 = eps_poly1(ny, info->nky[0], info->pme_order[0]);
 536
 537                 tmp += 2.0 * tmp1 * tmp2;
 538
 539                 tmp1 = eps_poly1(nz, info->nkz[0], info->pme_order[0]);
 540                 tmp2 = eps_poly1(ny, info->nky[0], info->pme_order[0]);
 541
 542                 tmp += 2.0 * tmp1 * tmp2;
 543
 544                 tmp1 = eps_poly1(nz, info->nkz[0], info->pme_order[0]);
 545                 tmp2 = eps_poly1(nx, info->nkx[0], info->pme_order[0]);
 546
 547                 tmp += 2.0 * tmp1 * tmp2;
 548
 549                 tmp1 = eps_poly1(nx, info->nkx[0], info->pme_order[0]);
 550                 tmp1 += eps_poly1(ny, info->nky[0], info->pme_order[0]);
 551                 tmp1 += eps_poly1(nz, info->nkz[0], info->pme_order[0]);
 552
 553                 tmp += tmp1 * tmp1;
 554
 555                 e_rec1 += 32.0 * M_PI * M_PI * coeff * coeff * coeff2 * tmp * q2_all * q2_all / nr;
 556
 557                 tmp1 = eps_poly3(nx, info->nkx[0], info->pme_order[0]);
 558                 tmp1 *= info->nkx[0];
 559                 tmp2 = iprod(gridp, info->recipbox[XX]);
 560
 561                 tmp = tmp1 * tmp2;
 562
 563                 tmp1 = eps_poly3(ny, info->nky[0], info->pme_order[0]);
 564                 tmp1 *= info->nky[0];
 565                 tmp2 = iprod(gridp, info->recipbox[YY]);
 566
 567                 tmp += tmp1 * tmp2;
 568
 569                 tmp1 = eps_poly3(nz, info->nkz[0], info->pme_order[0]);
 570                 tmp1 *= info->nkz[0];
 571                 tmp2 = iprod(gridp, info->recipbox[ZZ]);
 572
 573                 tmp += tmp1 * tmp2;
 574
 575                 tmp *= 4.0 * M_PI;
 576
 577                 tmp1 = eps_poly4(nx, info->nkx[0], info->pme_order[0]);
 578                 tmp1 *= norm2(info->recipbox[XX]);
 579                 tmp1 *= info->nkx[0] * info->nkx[0];
 580
 581                 tmp += tmp1;
 582
 583                 tmp1 = eps_poly4(ny, info->nky[0], info->pme_order[0]);
 584                 tmp1 *= norm2(info->recipbox[YY]);
 585                 tmp1 *= info->nky[0] * info->nky[0];
 586
 587                 tmp += tmp1;
 588
 589                 tmp1 = eps_poly4(nz, info->nkz[0], info->pme_order[0]);
 590                 tmp1 *= norm2(info->recipbox[ZZ]);
 591                 tmp1 *= info->nkz[0] * info->nkz[0];
 592
 593                 tmp += tmp1;
 594
 595                 e_rec2 += 4.0 * coeff * coeff * tmp * q2_all * q2_all / nr;
 596             }
 597         }
 598         if (MASTER(cr))
 599         {
 600             fprintf(stderr, "\rCalculating reciprocal error part 1 ... %3.0f%%",
 601                     100.0 * (nx - startlocal + 1) / (x_per_core));
 602             fflush(stderr);
 603         }
 604     }
 605
 606     if (MASTER(cr))
 607     {
 608         fprintf(stderr, "\n");
 609     }
 610
 611     /* Use just a fraction of all charges to estimate the self energy error term? */
 612     bFraction = (info->fracself > 0.0) && (info->fracself < 1.0);
 613
 614     if (bFraction)
 615     {
 616         /* Here xtot is the number of samples taken for the Monte Carlo calculation
 617          * of the average of term IV of equation 35 in Wang2010. Round up to a
 618          * number of samples that is divisible by the number of nodes */
 619         x_per_core = static_cast<int>(std::ceil(info->fracself * nr / cr->nnodes));
 620         xtot       = x_per_core * cr->nnodes;
 621     }
 622     else
 623     {
 624         /* In this case we use all nr particle positions */
 625         xtot       = nr;
 626         x_per_core = static_cast<int>(std::ceil(static_cast<real>(xtot) / cr->nnodes));
 627     }
 628
 629     startlocal = x_per_core * cr->nodeid;
 630     stoplocal  = std::min(startlocal + x_per_core, xtot); /* min needed if xtot == nr */
 631
 632     if (bFraction)
 633     {
 634         /* Make shure we get identical results in serial and parallel. Therefore,
 635          * take the sample indices from a single, global random number array that
 636          * is constructed on the master node and that only depends on the seed */
 637         snew(numbers, xtot);
 638         if (MASTER(cr))
 639         {
 640             for (i = 0; i < xtot; i++)
 641             {
 642                 numbers[i] = dist(rng); // [0,nr-1]
 643             }
 644         }
 645         /* Broadcast the random number array to the other nodes */
 646         if (PAR(cr))
 647         {
 648             nblock_bc(cr, xtot, numbers);
 649         }
 650
 651         if (bVerbose && MASTER(cr))
 652         {
 653             fprintf(stdout, "Using %d sample%s to approximate the self interaction error term",
 654                     xtot, xtot == 1 ? "" : "s");
 655             if (PAR(cr))
 656             {
 657                 fprintf(stdout, " (%d sample%s per rank)", x_per_core, x_per_core == 1 ? "" : "s");
 658             }
 659             fprintf(stdout, ".\n");
 660         }
 661     }
 662
 663     /* Return the number of positions used for the Monte Carlo algorithm */
 664     *nsamples = xtot;
 665
 666     for (i = startlocal; i < stoplocal; i++)
 667     {
 668         e_rec3x = 0;
 669         e_rec3y = 0;
 670         e_rec3z = 0;
 671
 672         if (bFraction)
 673         {
 674             /* Randomly pick a charge */
 675             ci = numbers[i];
 676         }
 677         else
 678         {
 679             /* Use all charges */
 680             ci = i;
 681         }
 682
 683         /* for(nx=startlocal; nx<=stoplocal; nx++)*/
 684         for (nx = -info->nkx[0] / 2; nx < info->nkx[0] / 2 + 1; nx++)
 685         {
 686             svmul(nx, info->recipbox[XX], gridpx);
 687             for (ny = -info->nky[0] / 2; ny < info->nky[0] / 2 + 1; ny++)
 688             {
 689                 svmul(ny, info->recipbox[YY], tmpvec);
 690                 rvec_add(gridpx, tmpvec, gridpxy);
 691                 for (nz = -info->nkz[0] / 2; nz < info->nkz[0] / 2 + 1; nz++)
 692                 {
 693
 694                     if (0 == nx && 0 == ny && 0 == nz)
 695                     {
 696                         continue;
 697                     }
 698
 699                     svmul(nz, info->recipbox[ZZ], tmpvec);
 700                     rvec_add(gridpxy, tmpvec, gridp);
 701                     tmp = norm2(gridp);
 702                     coeff = std::exp(-1.0 * M_PI * M_PI * tmp / info->ewald_beta[0] / info->ewald_beta[0]);
 703                     coeff /= tmp;
 704                     e_rec3x += coeff
 705                                * eps_self(nx, info->nkx[0], info->recipbox[XX], info->pme_order[0], x[ci]);
 706                     e_rec3y += coeff
 707                                * eps_self(ny, info->nky[0], info->recipbox[YY], info->pme_order[0], x[ci]);
 708                     e_rec3z += coeff
 709                                * eps_self(nz, info->nkz[0], info->recipbox[ZZ], info->pme_order[0], x[ci]);
 710                 }
 711             }
 712         }
 713
 714         clear_rvec(tmpvec2);
 715
 716         svmul(e_rec3x, info->recipbox[XX], tmpvec);
 717         rvec_inc(tmpvec2, tmpvec);
 718         svmul(e_rec3y, info->recipbox[YY], tmpvec);
 719         rvec_inc(tmpvec2, tmpvec);
 720         svmul(e_rec3z, info->recipbox[ZZ], tmpvec);
 721         rvec_inc(tmpvec2, tmpvec);
 722
 723         e_rec3 += q[ci] * q[ci] * q[ci] * q[ci] * norm2(tmpvec2)
 724                   / (xtot * M_PI * info->volume * M_PI * info->volume);
 725         if (MASTER(cr))
 726         {
 727             fprintf(stderr, "\rCalculating reciprocal error part 2 ... %3.0f%%", 100.0 * (i + 1) / stoplocal);
 728             fflush(stderr);
 729         }
 730     }
 731
 732     if (MASTER(cr))
 733     {
 734         fprintf(stderr, "\n");
 735     }
 736
 737 #if GMX_LIB_MPI
 738 #    ifdef TAKETIME
 739     if (MASTER(cr))
 740     {
 741         t1 = MPI_Wtime() - t0;
 742         fprintf(fp_out, "Recip. err. est. took   : %lf s\n", t1);
 743     }
 744 #    endif
 745 #endif
 746
 747 #ifdef DEBUG
 748     if (PAR(cr))
 749     {
 750         fprintf(stderr, "Rank %3d: nx=[%3d...%3d]  e_rec3=%e\n", cr->nodeid, startlocal, stoplocal, e_rec3);
 751     }
 752 #endif
 753
 754     if (PAR(cr))
 755     {
 756         gmx_sum(1, &e_rec1, cr);
 757         gmx_sum(1, &e_rec2, cr);
 758         gmx_sum(1, &e_rec3, cr);
 759     }
 760
 761     /* e_rec1*=8.0 * q2_all / info->volume / info->volume / nr ;
 762        e_rec2*=  q2_all / M_PI / M_PI / info->volume / info->volume / nr ;
 763        e_rec3/= M_PI * M_PI * info->volume * info->volume * nr ;
 764      */
 765     e_rec = std::sqrt(e_rec1 + e_rec2 + e_rec3);
 766
 767
 768     return ONE_4PI_EPS0 * e_rec;
 769 }
 770
 771
 772 /* Allocate memory for the inputinfo struct: */
 773 static void create_info(t_inputinfo* info)
 774 {
 775     snew(info->fac, info->n_entries);
 776     snew(info->rcoulomb, info->n_entries);
 777     snew(info->rvdw, info->n_entries);
 778     snew(info->nkx, info->n_entries);
 779     snew(info->nky, info->n_entries);
 780     snew(info->nkz, info->n_entries);
 781     snew(info->fourier_sp, info->n_entries);
 782     snew(info->ewald_rtol, info->n_entries);
 783     snew(info->ewald_beta, info->n_entries);
 784     snew(info->pme_order, info->n_entries);
 785     snew(info->fn_out, info->n_entries);
 786     snew(info->e_dir, info->n_entries);
 787     snew(info->e_rec, info->n_entries);
 788 }
 789
 790
 791 /* Allocate and fill an array with coordinates and charges,
 792  * returns the number of charges found
 793  */
 794 static int prepare_x_q(real* q[], rvec* x[], const gmx_mtop_t* mtop, const rvec x_orig[], t_commrec* cr)
 795 {
 796     int nq; /* number of charged particles */
 797
 798
 799     if (MASTER(cr))
 800     {
 801         snew(*q, mtop->natoms);
 802         snew(*x, mtop->natoms);
 803         nq = 0;
 804
 805         for (const AtomProxy atomP : AtomRange(*mtop))
 806         {
 807             const t_atom& local = atomP.atom();
 808             int           i     = atomP.globalAtomNumber();
 809             if (is_charge(local.q))
 810             {
 811                 (*q)[nq]     = local.q;
 812                 (*x)[nq][XX] = x_orig[i][XX];
 813                 (*x)[nq][YY] = x_orig[i][YY];
 814                 (*x)[nq][ZZ] = x_orig[i][ZZ];
 815                 nq++;
 816             }
 817         }
 818         /* Give back some unneeded memory */
 819         srenew(*q, nq);
 820         srenew(*x, nq);
 821     }
 822     /* Broadcast x and q in the parallel case */
 823     if (PAR(cr))
 824     {
 825         /* Transfer the number of charges */
 826         block_bc(cr, nq);
 827         snew_bc(cr, *x, nq);
 828         snew_bc(cr, *q, nq);
 829         nblock_bc(cr, nq, *x);
 830         nblock_bc(cr, nq, *q);
 831     }
 832
 833     return nq;
 834 }
 835
 836
 837 /* Read in the tpr file and save information we need later in info */
 838 static void read_tpr_file(const char*  fn_sim_tpr,
 839                           t_inputinfo* info,
 840                           t_state*     state,
 841                           gmx_mtop_t*  mtop,
 842                           t_inputrec*  ir,
 843                           real         user_beta,
 844                           real         fracself)
 845 {
 846     read_tpx_state(fn_sim_tpr, ir, state, mtop);
 847
 848     /* The values of the original tpr input file are save in the first
 849      * place [0] of the arrays */
 850     info->orig_sim_steps = ir->nsteps;
 851     info->pme_order[0]   = ir->pme_order;
 852     info->rcoulomb[0]    = ir->rcoulomb;
 853     info->rvdw[0]        = ir->rvdw;
 854     info->nkx[0]         = ir->nkx;
 855     info->nky[0]         = ir->nky;
 856     info->nkz[0]         = ir->nkz;
 857     info->ewald_rtol[0]  = ir->ewald_rtol;
 858     info->fracself       = fracself;
 859     if (user_beta > 0)
 860     {
 861         info->ewald_beta[0] = user_beta;
 862     }
 863     else
 864     {
 865         info->ewald_beta[0] = calc_ewaldcoeff_q(info->rcoulomb[0], info->ewald_rtol[0]);
 866     }
 867
 868     /* Check if PME was chosen */
 869     if (EEL_PME(ir->coulombtype) == FALSE)
 870     {
 871         gmx_fatal(FARGS, "Can only do optimizations for simulations with PME");
 872     }
 873
 874     /* Check if rcoulomb == rlist, which is necessary for PME */
 875     if (!(ir->rcoulomb == ir->rlist))
 876     {
 877         gmx_fatal(FARGS, "PME requires rcoulomb (%f) to be equal to rlist (%f).", ir->rcoulomb, ir->rlist);
 878     }
 879 }
 880
 881
 882 /* Transfer what we need for parallelizing the reciprocal error estimate */
 883 static void bcast_info(t_inputinfo* info, const t_commrec* cr)
 884 {
 885     nblock_bc(cr, info->n_entries, info->nkx);
 886     nblock_bc(cr, info->n_entries, info->nky);
 887     nblock_bc(cr, info->n_entries, info->nkz);
 888     nblock_bc(cr, info->n_entries, info->ewald_beta);
 889     nblock_bc(cr, info->n_entries, info->pme_order);
 890     nblock_bc(cr, info->n_entries, info->e_dir);
 891     nblock_bc(cr, info->n_entries, info->e_rec);
 892     block_bc(cr, info->volume);
 893     block_bc(cr, info->recipbox);
 894     block_bc(cr, info->natoms);
 895     block_bc(cr, info->fracself);
 896     block_bc(cr, info->bTUNE);
 897     block_bc(cr, info->q2all);
 898     block_bc(cr, info->q2allnr);
 899 }
 900
 901
 902 /* Estimate the error of the SPME Ewald sum. This estimate is based upon
 903  * a) a homogeneous distribution of the charges
 904  * b) a total charge of zero.
 905  */
 906 static void estimate_PME_error(t_inputinfo*      info,
 907                                const t_state*    state,
 908                                const gmx_mtop_t* mtop,
 909                                FILE*             fp_out,
 910                                gmx_bool          bVerbose,
 911                                unsigned int      seed,
 912                                t_commrec*        cr)
 913 {
 914     rvec* x     = nullptr; /* The coordinates */
 915     real* q     = nullptr; /* The charges     */
 916     real  edir  = 0.0;     /* real space error */
 917     real  erec  = 0.0;     /* reciprocal space error */
 918     real  derr  = 0.0;     /* difference of real and reciprocal space error */
 919     real  derr0 = 0.0;     /* difference of real and reciprocal space error */
 920     real  beta  = 0.0;     /* splitting parameter beta */
 921     real  beta0 = 0.0;     /* splitting parameter beta */
 922     int   ncharges;        /* The number of atoms with charges */
 923     int   nsamples;        /* The number of samples used for the calculation of the
 924                             * self-energy error term */
 925     int i = 0;
 926
 927     if (MASTER(cr))
 928     {
 929         fprintf(fp_out, "\n--- PME ERROR ESTIMATE ---\n");
 930     }
 931
 932     /* Prepare an x and q array with only the charged atoms */
 933     ncharges = prepare_x_q(&q, &x, mtop, state->x.rvec_array(), cr);
 934     if (MASTER(cr))
 935     {
 936         calc_q2all(mtop, &(info->q2all), &(info->q2allnr));
 937         info->ewald_rtol[0] = std::erfc(info->rcoulomb[0] * info->ewald_beta[0]);
 938         /* Write some info to log file */
 939         fprintf(fp_out, "Box volume              : %g nm^3\n", info->volume);
 940         fprintf(fp_out, "Number of charged atoms : %d (total atoms %d)\n", ncharges, info->natoms);
 941         fprintf(fp_out, "Coulomb radius          : %g nm\n", info->rcoulomb[0]);
 942         fprintf(fp_out, "Ewald_rtol              : %g\n", info->ewald_rtol[0]);
 943         fprintf(fp_out, "Ewald parameter beta    : %g\n", info->ewald_beta[0]);
 944         fprintf(fp_out, "Interpolation order     : %d\n", info->pme_order[0]);
 945         fprintf(fp_out, "Fourier grid (nx,ny,nz) : %d x %d x %d\n", info->nkx[0], info->nky[0],
 946                 info->nkz[0]);
 947         fflush(fp_out);
 948     }
 949
 950     if (PAR(cr))
 951     {
 952         bcast_info(info, cr);
 953     }
 954
 955
 956     /* Calculate direct space error */
 957     info->e_dir[0] = estimate_direct(info);
 958
 959     /* Calculate reciprocal space error */
 960     info->e_rec[0] = estimate_reciprocal(info, x, q, ncharges, fp_out, bVerbose, seed, &nsamples, cr);
 961
 962     if (PAR(cr))
 963     {
 964         bcast_info(info, cr);
 965     }
 966
 967     if (MASTER(cr))
 968     {
 969         fprintf(fp_out, "Direct space error est. : %10.3e kJ/(mol*nm)\n", info->e_dir[0]);
 970         fprintf(fp_out, "Reciprocal sp. err. est.: %10.3e kJ/(mol*nm)\n", info->e_rec[0]);
 971         fprintf(fp_out, "Self-energy error term was estimated using %d samples\n", nsamples);
 972         fflush(fp_out);
 973         fprintf(stderr, "Direct space error est. : %10.3e kJ/(mol*nm)\n", info->e_dir[0]);
 974         fprintf(stderr, "Reciprocal sp. err. est.: %10.3e kJ/(mol*nm)\n", info->e_rec[0]);
 975     }
 976
 977     i = 0;
 978
 979     if (info->bTUNE)
 980     {
 981         if (MASTER(cr))
 982         {
 983             fprintf(stderr, "Starting tuning ...\n");
 984         }
 985         edir  = info->e_dir[0];
 986         erec  = info->e_rec[0];
 987         derr0 = edir - erec;
 988         beta0 = info->ewald_beta[0];
 989         if (derr > 0.0)
 990         {
 991             info->ewald_beta[0] += 0.1;
 992         }
 993         else
 994         {
 995             info->ewald_beta[0] -= 0.1;
 996         }
 997         info->e_dir[0] = estimate_direct(info);
 998         info->e_rec[0] = estimate_reciprocal(info, x, q, ncharges, fp_out, bVerbose, seed, &nsamples, cr);
 999
1000         if (PAR(cr))
1001         {
1002             bcast_info(info, cr);
1003         }
1004
1005
1006         edir = info->e_dir[0];
1007         erec = info->e_rec[0];
1008         derr = edir - erec;
1009         while (std::abs(derr / std::min(erec, edir)) > 1e-4)
1010         {
1011
1012             beta = info->ewald_beta[0];
1013             beta -= derr * (info->ewald_beta[0] - beta0) / (derr - derr0);
1014             beta0               = info->ewald_beta[0];
1015             info->ewald_beta[0] = beta;
1016             derr0               = derr;
1017
1018             info->e_dir[0] = estimate_direct(info);
1019             info->e_rec[0] =
1020                     estimate_reciprocal(info, x, q, ncharges, fp_out, bVerbose, seed, &nsamples, cr);
1021
1022             if (PAR(cr))
1023             {
1024                 bcast_info(info, cr);
1025             }
1026
1027             edir = info->e_dir[0];
1028             erec = info->e_rec[0];
1029             derr = edir - erec;
1030
1031             if (MASTER(cr))
1032             {
1033                 i++;
1034                 fprintf(stderr, "difference between real and rec. space error (step %d): %g\n", i,
1035                         std::abs(derr));
1036                 fprintf(stderr, "old beta: %f\n", beta0);
1037                 fprintf(stderr, "new beta: %f\n", beta);
1038             }
1039         }
1040
1041         info->ewald_rtol[0] = std::erfc(info->rcoulomb[0] * info->ewald_beta[0]);
1042
1043         if (MASTER(cr))
1044         {
1045             /* Write some info to log file */
1046             fflush(fp_out);
1047             fprintf(fp_out, "=========  After tuning ========\n");
1048             fprintf(fp_out, "Direct space error est. : %10.3e kJ/(mol*nm)\n", info->e_dir[0]);
1049             fprintf(fp_out, "Reciprocal sp. err. est.: %10.3e kJ/(mol*nm)\n", info->e_rec[0]);
1050             fprintf(stderr, "Direct space error est. : %10.3e kJ/(mol*nm)\n", info->e_dir[0]);
1051             fprintf(stderr, "Reciprocal sp. err. est.: %10.3e kJ/(mol*nm)\n", info->e_rec[0]);
1052             fprintf(fp_out, "Ewald_rtol              : %g\n", info->ewald_rtol[0]);
1053             fprintf(fp_out, "Ewald parameter beta    : %g\n", info->ewald_beta[0]);
1054             fflush(fp_out);
1055         }
1056     }
1057 }
1058
1059
1060 int gmx_pme_error(int argc, char* argv[])
1061 {
1062     const char* desc[] = {
1063         "[THISMODULE] estimates the error of the electrostatic forces",
1064         "if using the sPME algorithm. The flag [TT]-tune[tt] will determine",
1065         "the splitting parameter such that the error is equally",
1066         "distributed over the real and reciprocal space part.",
1067         "The part of the error that stems from self interaction of the particles ",
1068         "is computationally demanding. However, a good a approximation is to",
1069         "just use a fraction of the particles for this term which can be",
1070         "indicated by the flag [TT]-self[tt].[PAR]",
1071     };
1072
1073     real          fs        = 0.0; /* 0 indicates: not set by the user */
1074     real          user_beta = -1.0;
1075     real          fracself  = 1.0;
1076     t_inputinfo   info;
1077     t_state       state; /* The state from the tpr input file */
1078     gmx_mtop_t    mtop;  /* The topology from the tpr input file */
1079     FILE*         fp = nullptr;
1080     unsigned long PCA_Flags;
1081     gmx_bool      bTUNE    = FALSE;
1082     gmx_bool      bVerbose = FALSE;
1083     int           seed     = 0;
1084
1085
1086     static t_filenm fnm[] = { { efTPR, "-s", nullptr, ffREAD },
1087                               { efOUT, "-o", "error", ffWRITE },
1088                               { efTPR, "-so", "tuned", ffOPTWR } };
1089
1090     gmx_output_env_t* oenv = nullptr;
1091
1092     t_pargs pa[] = {
1093         { "-beta",
1094           FALSE,
1095           etREAL,
1096           { &user_beta },
1097           "If positive, overwrite ewald_beta from [REF].tpr[ref] file with this value" },
1098         { "-tune",
1099           FALSE,
1100           etBOOL,
1101           { &bTUNE },
1102           "Tune the splitting parameter such that the error is equally distributed between "
1103           "real and reciprocal space" },
1104         { "-self",
1105           FALSE,
1106           etREAL,
1107           { &fracself },
1108           "If between 0.0 and 1.0, determine self interaction error from just this "
1109           "fraction of the charged particles" },
1110         { "-seed",
1111           FALSE,
1112           etINT,
1113           { &seed },
1114           "Random number seed used for Monte Carlo algorithm when [TT]-self[tt] is set to "
1115           "a value between 0.0 and 1.0" },
1116         { "-v", FALSE, etBOOL, { &bVerbose }, "Be loud and noisy" }
1117     };
1118
1119
1120 #define NFILE asize(fnm)
1121
1122     CommrecHandle commrecHandle = init_commrec(MPI_COMM_WORLD, nullptr);
1123     t_commrec*    cr            = commrecHandle.get();
1124     PCA_Flags                   = PCA_NOEXIT_ON_ARGS;
1125
1126     if (!parse_common_args(&argc, argv, PCA_Flags, NFILE, fnm, asize(pa), pa, asize(desc), desc, 0,
1127                            nullptr, &oenv))
1128     {
1129         return 0;
1130     }
1131
1132     if (!bTUNE)
1133     {
1134         bTUNE = opt2bSet("-so", NFILE, fnm);
1135     }
1136
1137     info.n_entries = 1;
1138
1139     /* Allocate memory for the inputinfo struct: */
1140     create_info(&info);
1141     info.fourier_sp[0] = fs;
1142
1143     t_inputrec ir;
1144     if (MASTER(cr))
1145     {
1146         read_tpr_file(opt2fn("-s", NFILE, fnm), &info, &state, &mtop, &ir, user_beta, fracself);
1147         /* Open logfile for reading */
1148         fp = fopen(opt2fn("-o", NFILE, fnm), "w");
1149
1150         /* Determine the volume of the simulation box */
1151         info.volume = det(state.box);
1152         calc_recipbox(state.box, info.recipbox);
1153         info.natoms = mtop.natoms;
1154         info.bTUNE  = bTUNE;
1155     }
1156
1157     /* Check consistency if the user provided fourierspacing */
1158     if (fs > 0 && MASTER(cr))
1159     {
1160         /* Recalculate the grid dimensions using fourierspacing from user input */
1161         info.nkx[0] = 0;
1162         info.nky[0] = 0;
1163         info.nkz[0] = 0;
1164         calcFftGrid(stdout, state.box, info.fourier_sp[0], minimalPmeGridSize(info.pme_order[0]),
1165                     &(info.nkx[0]), &(info.nky[0]), &(info.nkz[0]));
1166         if ((ir.nkx != info.nkx[0]) || (ir.nky != info.nky[0]) || (ir.nkz != info.nkz[0]))
1167         {
1168             gmx_fatal(FARGS,
1169                       "Wrong fourierspacing %f nm, input file grid = %d x %d x %d, computed grid = "
1170                       "%d x %d x %d",
1171                       fs, ir.nkx, ir.nky, ir.nkz, info.nkx[0], info.nky[0], info.nkz[0]);
1172         }
1173     }
1174
1175     /* Estimate (S)PME force error */
1176
1177     if (PAR(cr))
1178     {
1179         bcast_info(&info, cr);
1180     }
1181
1182     /* Get an error estimate of the input tpr file and do some tuning if requested */
1183     estimate_PME_error(&info, &state, &mtop, fp, bVerbose, seed, cr);
1184
1185     if (MASTER(cr))
1186     {
1187         /* Write out optimized tpr file if requested */
1188         if (opt2bSet("-so", NFILE, fnm) || bTUNE)
1189         {
1190             ir.ewald_rtol = info.ewald_rtol[0];
1191             write_tpx_state(opt2fn("-so", NFILE, fnm), &ir, &state, &mtop);
1192         }
1193         please_cite(fp, "Wang2010");
1194         fclose(fp);
1195     }
1196
1197     return 0;
1198 }