src/gromacs/tools/pme_error.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 #include "gmxpre.h"
  36
  37 #include "pme_error.h"
  38
  39 #include "config.h"
  40
  41 #include <cmath>
  42
  43 #include <algorithm>
  44
  45 #include "gromacs/commandline/pargs.h"
  46 #include "gromacs/ewald/ewald_utils.h"
  47 #include "gromacs/ewald/pme.h"
  48 #include "gromacs/fft/calcgrid.h"
  49 #include "gromacs/fileio/checkpoint.h"
  50 #include "gromacs/fileio/tpxio.h"
  51 #include "gromacs/gmxlib/network.h"
  52 #include "gromacs/math/functions.h"
  53 #include "gromacs/math/units.h"
  54 #include "gromacs/math/vec.h"
  55 #include "gromacs/mdlib/broadcaststructs.h"
  56 #include "gromacs/mdtypes/commrec.h"
  57 #include "gromacs/mdtypes/inputrec.h"
  58 #include "gromacs/mdtypes/md_enums.h"
  59 #include "gromacs/mdtypes/state.h"
  60 #include "gromacs/random/threefry.h"
  61 #include "gromacs/random/uniformintdistribution.h"
  62 #include "gromacs/topology/mtop_util.h"
  63 #include "gromacs/topology/topology.h"
  64 #include "gromacs/utility/arraysize.h"
  65 #include "gromacs/utility/fatalerror.h"
  66 #include "gromacs/utility/pleasecite.h"
  67 #include "gromacs/utility/smalloc.h"
  68
  69 /* #define TAKETIME */
  70 /* #define DEBUG  */
  71
  72 /* Enum for situations that can occur during log file parsing */
  73 enum {
  74     eParselogOK,
  75     eParselogNotFound,
  76     eParselogNoPerfData,
  77     eParselogTerm,
  78     eParselogResetProblem,
  79     eParselogNr
  80 };
  81
  82
  83 typedef struct
  84 {
  85     int64_t         orig_sim_steps;  /* Number of steps to be done in the real simulation  */
  86     int             n_entries;       /* Number of entries in arrays                        */
  87     real            volume;          /* The volume of the box                              */
  88     matrix          recipbox;        /* The reciprocal box                                 */
  89     int             natoms;          /* The number of atoms in the MD system               */
  90     real           *fac;             /* The scaling factor                                 */
  91     real           *rcoulomb;        /* The coulomb radii [0...nr_inputfiles]              */
  92     real           *rvdw;            /* The vdW radii                                      */
  93     int            *nkx, *nky, *nkz; /* Number of k vectors in each spatial dimension      */
  94     real           *fourier_sp;      /* Fourierspacing                                     */
  95     real           *ewald_rtol;      /* Real space tolerance for Ewald, determines         */
  96                                      /* the real/reciprocal space relative weight          */
  97     real           *ewald_beta;      /* Splitting parameter [1/nm]                         */
  98     real            fracself;        /* fraction of particles for SI error                 */
  99     real            q2all;           /* sum ( q ^2 )                                       */
 100     real            q2allnr;         /* nr of charges                                      */
 101     int            *pme_order;       /* Interpolation order for PME (bsplines)             */
 102     char          **fn_out;          /* Name of the output tpr file                        */
 103     real           *e_dir;           /* Direct space part of PME error with these settings */
 104     real           *e_rec;           /* Reciprocal space part of PME error                 */
 105     gmx_bool        bTUNE;           /* flag for tuning */
 106 } t_inputinfo;
 107
 108
 109 /* Returns TRUE when atom is charged */
 110 static gmx_bool is_charge(real charge)
 111 {
 112     return charge*charge > GMX_REAL_EPS;
 113 }
 114
 115
 116 /* calculate charge density */
 117 static void calc_q2all(const gmx_mtop_t *mtop,   /* molecular topology */
 118                        real *q2all, real *q2allnr)
 119 {
 120     real            q2_all = 0;  /* Sum of squared charges */
 121     int             nrq_mol;     /* Number of charges in a single molecule */
 122     int             nrq_all;     /* Total number of charges in the MD system */
 123     real            qi, q2_mol;
 124
 125 #ifdef DEBUG
 126     fprintf(stderr, "\nCharge density:\n");
 127 #endif
 128     q2_all  = 0.0;                                        /* total q squared */
 129     nrq_all = 0;                                          /* total number of charges in the system */
 130     for (const gmx_molblock_t &molblock : mtop->molblock) /* Loop over molecule types */
 131     {
 132         q2_mol   = 0.0;                                   /* q squared value of this molecule */
 133         nrq_mol  = 0;                                     /* number of charges this molecule carries */
 134         const gmx_moltype_t &molecule = mtop->moltype[molblock.type];
 135         for (int i = 0; i < molecule.atoms.nr; i++)
 136         {
 137             qi = molecule.atoms.atom[i].q;
 138             /* Is this charge worth to be considered? */
 139             if (is_charge(qi))
 140             {
 141                 q2_mol += qi*qi;
 142                 nrq_mol++;
 143             }
 144         }
 145         /* Multiply with the number of molecules present of this type and add */
 146         q2_all  += q2_mol*molblock.nmol;
 147         nrq_all += nrq_mol*molblock.nmol;
 148 #ifdef DEBUG
 149         fprintf(stderr, "Molecule %2d (%5d atoms) q2_mol=%10.3e nr.mol.charges=%5d (%6dx)  q2_all=%10.3e  tot.charges=%d\n",
 150                 imol, molecule.atoms.nr, q2_mol, nrq_mol, molblock.nmol, q2_all, nrq_all);
 151 #endif
 152     }
 153
 154     *q2all   = q2_all;
 155     *q2allnr = nrq_all;
 156
 157 }
 158
 159
 160 /* Estimate the direct space part error of the SPME Ewald sum */
 161 static real estimate_direct(
 162         t_inputinfo *info
 163         )
 164 {
 165     real e_dir     = 0; /* Error estimate */
 166     real beta      = 0; /* Splitting parameter (1/nm) */
 167     real r_coulomb = 0; /* Cut-off in direct space */
 168
 169
 170     beta      = info->ewald_beta[0];
 171     r_coulomb = info->rcoulomb[0];
 172
 173     e_dir  = 2.0 * info->q2all * gmx::invsqrt( info->q2allnr  *  r_coulomb * info->volume );
 174     e_dir *= std::exp (-beta*beta*r_coulomb*r_coulomb);
 175
 176     return ONE_4PI_EPS0*e_dir;
 177 }
 178
 179 #define SUMORDER 6
 180
 181 /* the following 4 functions determine polynomials required for the reciprocal error estimate */
 182
 183 static inline real eps_poly1(
 184         real m,          /* grid coordinate in certain direction */
 185         real K,          /* grid size in corresponding direction */
 186         real n)          /* spline interpolation order of the SPME */
 187 {
 188     int  i;
 189     real nom   = 0; /* nominator */
 190     real denom = 0; /* denominator */
 191     real tmp   = 0;
 192
 193     if (m == 0.0)
 194     {
 195         return 0.0;
 196     }
 197
 198     for (i = -SUMORDER; i < 0; i++)
 199     {
 200         tmp  = m / K + i;
 201         tmp *= 2.0*M_PI;
 202         nom += std::pow( tmp, -n );
 203     }
 204
 205     for (i = SUMORDER; i > 0; i--)
 206     {
 207         tmp  = m / K + i;
 208         tmp *= 2.0*M_PI;
 209         nom += std::pow( tmp, -n );
 210     }
 211
 212     tmp   = m / K;
 213     tmp  *= 2.0*M_PI;
 214     denom = std::pow( tmp, -n )+nom;
 215
 216     return -nom/denom;
 217
 218 }
 219
 220 static inline real eps_poly2(
 221         real m,          /* grid coordinate in certain direction */
 222         real K,          /* grid size in corresponding direction */
 223         real n)          /* spline interpolation order of the SPME */
 224 {
 225     int  i;
 226     real nom   = 0; /* nominator */
 227     real denom = 0; /* denominator */
 228     real tmp   = 0;
 229
 230     if (m == 0.0)
 231     {
 232         return 0.0;
 233     }
 234
 235     for (i = -SUMORDER; i < 0; i++)
 236     {
 237         tmp  = m / K + i;
 238         tmp *= 2.0*M_PI;
 239         nom += std::pow( tmp, -2*n );
 240     }
 241
 242     for (i = SUMORDER; i > 0; i--)
 243     {
 244         tmp  = m / K + i;
 245         tmp *= 2.0*M_PI;
 246         nom += std::pow( tmp, -2*n );
 247     }
 248
 249     for (i = -SUMORDER; i < SUMORDER+1; i++)
 250     {
 251         tmp    = m / K + i;
 252         tmp   *= 2.0*M_PI;
 253         denom += std::pow( tmp, -n );
 254     }
 255     tmp = eps_poly1(m, K, n);
 256     return nom / denom / denom + tmp*tmp;
 257
 258 }
 259
 260 static inline real eps_poly3(
 261         real m,          /* grid coordinate in certain direction */
 262         real K,          /* grid size in corresponding direction */
 263         real n)          /* spline interpolation order of the SPME */
 264 {
 265     int  i;
 266     real nom   = 0; /* nominator */
 267     real denom = 0; /* denominator */
 268     real tmp   = 0;
 269
 270     if (m == 0.0)
 271     {
 272         return 0.0;
 273     }
 274
 275     for (i = -SUMORDER; i < 0; i++)
 276     {
 277         tmp  = m / K + i;
 278         tmp *= 2.0*M_PI;
 279         nom += i * std::pow( tmp, -2*n );
 280     }
 281
 282     for (i = SUMORDER; i > 0; i--)
 283     {
 284         tmp  = m / K + i;
 285         tmp *= 2.0*M_PI;
 286         nom += i * std::pow( tmp, -2*n );
 287     }
 288
 289     for (i = -SUMORDER; i < SUMORDER+1; i++)
 290     {
 291         tmp    = m / K + i;
 292         tmp   *= 2.0*M_PI;
 293         denom += std::pow( tmp, -n );
 294     }
 295
 296     return 2.0 * M_PI * nom / denom / denom;
 297
 298 }
 299
 300 static inline real eps_poly4(
 301         real m,          /* grid coordinate in certain direction */
 302         real K,          /* grid size in corresponding direction */
 303         real n)          /* spline interpolation order of the SPME */
 304 {
 305     int  i;
 306     real nom   = 0; /* nominator */
 307     real denom = 0; /* denominator */
 308     real tmp   = 0;
 309
 310     if (m == 0.0)
 311     {
 312         return 0.0;
 313     }
 314
 315     for (i = -SUMORDER; i < 0; i++)
 316     {
 317         tmp  = m / K + i;
 318         tmp *= 2.0*M_PI;
 319         nom += i * i * std::pow( tmp, -2*n );
 320     }
 321
 322     for (i = SUMORDER; i > 0; i--)
 323     {
 324         tmp  = m / K + i;
 325         tmp *= 2.0*M_PI;
 326         nom += i * i * std::pow( tmp, -2*n );
 327     }
 328
 329     for (i = -SUMORDER; i < SUMORDER+1; i++)
 330     {
 331         tmp    = m / K + i;
 332         tmp   *= 2.0*M_PI;
 333         denom += std::pow( tmp, -n );
 334     }
 335
 336     return 4.0 * M_PI * M_PI * nom / denom / denom;
 337
 338 }
 339
 340 static inline real eps_self(
 341         real m,       /* grid coordinate in certain direction */
 342         real K,       /* grid size in corresponding direction */
 343         rvec rboxv,   /* reciprocal box vector */
 344         real n,       /* spline interpolation order of the SPME */
 345         rvec x)       /* coordinate of charge */
 346 {
 347     int  i;
 348     real tmp    = 0; /* temporary variables for computations */
 349     real tmp1   = 0; /* temporary variables for computations */
 350     real tmp2   = 0; /* temporary variables for computations */
 351     real rcoord = 0; /* coordinate in certain reciprocal space direction */
 352     real nom    = 0; /* nominator */
 353     real denom  = 0; /* denominator */
 354
 355
 356     if (m == 0.0)
 357     {
 358         return 0.0;
 359     }
 360
 361     rcoord = iprod(rboxv, x);
 362
 363
 364     for (i = -SUMORDER; i < 0; i++)
 365     {
 366         tmp    = -std::sin(2.0 * M_PI * i * K * rcoord);
 367         tmp1   = 2.0 * M_PI * m / K + 2.0 * M_PI * i;
 368         tmp2   = std::pow(tmp1, -n);
 369         nom   += tmp * tmp2 * i;
 370         denom += tmp2;
 371     }
 372
 373     for (i = SUMORDER; i > 0; i--)
 374     {
 375         tmp    = -std::sin(2.0 * M_PI * i * K * rcoord);
 376         tmp1   = 2.0 * M_PI * m / K + 2.0 * M_PI * i;
 377         tmp2   = std::pow(tmp1, -n);
 378         nom   += tmp * tmp2 * i;
 379         denom += tmp2;
 380     }
 381
 382
 383     tmp    = 2.0 * M_PI * m / K;
 384     tmp1   = pow(tmp, -n);
 385     denom += tmp1;
 386
 387     return 2.0 * M_PI * nom / denom * K;
 388
 389 }
 390
 391 #undef SUMORDER
 392
 393 /* The following routine is just a copy from pme.c */
 394
 395 static void calc_recipbox(matrix box, matrix recipbox)
 396 {
 397     /* Save some time by assuming upper right part is zero */
 398
 399     real tmp = 1.0/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
 400
 401     recipbox[XX][XX] = box[YY][YY]*box[ZZ][ZZ]*tmp;
 402     recipbox[XX][YY] = 0;
 403     recipbox[XX][ZZ] = 0;
 404     recipbox[YY][XX] = -box[YY][XX]*box[ZZ][ZZ]*tmp;
 405     recipbox[YY][YY] = box[XX][XX]*box[ZZ][ZZ]*tmp;
 406     recipbox[YY][ZZ] = 0;
 407     recipbox[ZZ][XX] = (box[YY][XX]*box[ZZ][YY]-box[YY][YY]*box[ZZ][XX])*tmp;
 408     recipbox[ZZ][YY] = -box[ZZ][YY]*box[XX][XX]*tmp;
 409     recipbox[ZZ][ZZ] = box[XX][XX]*box[YY][YY]*tmp;
 410 }
 411
 412
 413 /* Estimate the reciprocal space part error of the SPME Ewald sum. */
 414 static real estimate_reciprocal(
 415         t_inputinfo             *info,
 416         rvec                     x[], /* array of particles */
 417         const real               q[], /* array of charges */
 418         int                      nr,  /* number of charges = size of the charge array */
 419         FILE  gmx_unused        *fp_out,
 420         gmx_bool                 bVerbose,
 421         int                      seed,     /* The seed for the random number generator */
 422         int                     *nsamples, /* Return the number of samples used if Monte Carlo
 423                                             * algorithm is used for self energy error estimate */
 424         t_commrec               *cr)
 425 {
 426     real      e_rec   = 0; /* reciprocal error estimate */
 427     real      e_rec1  = 0; /* Error estimate term 1*/
 428     real      e_rec2  = 0; /* Error estimate term 2*/
 429     real      e_rec3  = 0; /* Error estimate term 3 */
 430     real      e_rec3x = 0; /* part of Error estimate term 3 in x */
 431     real      e_rec3y = 0; /* part of Error estimate term 3 in y */
 432     real      e_rec3z = 0; /* part of Error estimate term 3 in z */
 433     int       i, ci;
 434     int       nx, ny, nz;  /* grid coordinates */
 435     real      q2_all = 0;  /* sum of squared charges */
 436     rvec      gridpx;      /* reciprocal grid point in x direction*/
 437     rvec      gridpxy;     /* reciprocal grid point in x and y direction*/
 438     rvec      gridp;       /* complete reciprocal grid point in 3 directions*/
 439     rvec      tmpvec;      /* template to create points from basis vectors */
 440     rvec      tmpvec2;     /* template to create points from basis vectors */
 441     real      coeff  = 0;  /* variable to compute coefficients of the error estimate */
 442     real      coeff2 = 0;  /* variable to compute coefficients of the error estimate */
 443     real      tmp    = 0;  /* variables to compute different factors from vectors */
 444     real      tmp1   = 0;
 445     real      tmp2   = 0;
 446     gmx_bool  bFraction;
 447
 448     int      *numbers = nullptr;
 449
 450     /* Index variables for parallel work distribution */
 451     int startglobal, stopglobal;
 452     int startlocal, stoplocal;
 453     int x_per_core;
 454     int xtot;
 455
 456 #ifdef TAKETIME
 457     double t0 = 0.0;
 458     double t1 = 0.0;
 459 #endif
 460
 461     GMX_RELEASE_ASSERT(q != nullptr, "Must have charges");
 462
 463     if (seed == 0)
 464     {
 465         seed = static_cast<int>(gmx::makeRandomSeed());
 466     }
 467     fprintf(stderr, "Using random seed %d.\n", seed);
 468
 469     gmx::DefaultRandomEngine           rng(seed);
 470     gmx::UniformIntDistribution<int>   dist(0, nr-1);
 471
 472     clear_rvec(gridpx);
 473     clear_rvec(gridpxy);
 474     clear_rvec(gridp);
 475     clear_rvec(tmpvec);
 476     clear_rvec(tmpvec2);
 477
 478     for (i = 0; i < nr; i++)
 479     {
 480         q2_all += q[i]*q[i];
 481     }
 482
 483     /* Calculate indices for work distribution */
 484     startglobal = -info->nkx[0]/2;
 485     stopglobal  = info->nkx[0]/2;
 486     xtot        = stopglobal*2+1;
 487     if (PAR(cr))
 488     {
 489         x_per_core = static_cast<int>(std::ceil(static_cast<real>(xtot) / cr->nnodes));
 490         startlocal = startglobal + x_per_core*cr->nodeid;
 491         stoplocal  = startlocal + x_per_core -1;
 492         if (stoplocal > stopglobal)
 493         {
 494             stoplocal = stopglobal;
 495         }
 496     }
 497     else
 498     {
 499         startlocal = startglobal;
 500         stoplocal  = stopglobal;
 501         x_per_core = xtot;
 502     }
 503
 504 #if GMX_LIB_MPI
 505 #ifdef TAKETIME
 506     if (MASTER(cr))
 507     {
 508         t0 = MPI_Wtime();
 509     }
 510 #endif
 511 #endif
 512
 513     if (MASTER(cr))
 514     {
 515
 516         fprintf(stderr, "Calculating reciprocal error part 1 ...");
 517
 518     }
 519
 520     for (nx = startlocal; nx <= stoplocal; nx++)
 521     {
 522         svmul(nx, info->recipbox[XX], gridpx);
 523         for (ny = -info->nky[0]/2; ny < info->nky[0]/2+1; ny++)
 524         {
 525             svmul(ny, info->recipbox[YY], tmpvec);
 526             rvec_add(gridpx, tmpvec, gridpxy);
 527             for (nz = -info->nkz[0]/2; nz < info->nkz[0]/2+1; nz++)
 528             {
 529                 if (0 == nx &&  0 == ny &&  0 == nz)
 530                 {
 531                     continue;
 532                 }
 533                 svmul(nz, info->recipbox[ZZ], tmpvec);
 534                 rvec_add(gridpxy, tmpvec, gridp);
 535                 tmp    = norm2(gridp);
 536                 coeff  = std::exp(-1.0 * M_PI * M_PI * tmp / info->ewald_beta[0] / info->ewald_beta[0] );
 537                 coeff /= 2.0 * M_PI * info->volume * tmp;
 538                 coeff2 = tmp;
 539
 540
 541                 tmp  = eps_poly2(nx, info->nkx[0], info->pme_order[0]);
 542                 tmp += eps_poly2(ny, info->nkx[0], info->pme_order[0]);
 543                 tmp += eps_poly2(nz, info->nkx[0], info->pme_order[0]);
 544
 545                 tmp1 = eps_poly1(nx, info->nkx[0], info->pme_order[0]);
 546                 tmp2 = eps_poly1(ny, info->nky[0], info->pme_order[0]);
 547
 548                 tmp += 2.0 * tmp1 * tmp2;
 549
 550                 tmp1 = eps_poly1(nz, info->nkz[0], info->pme_order[0]);
 551                 tmp2 = eps_poly1(ny, info->nky[0], info->pme_order[0]);
 552
 553                 tmp += 2.0 * tmp1 * tmp2;
 554
 555                 tmp1 = eps_poly1(nz, info->nkz[0], info->pme_order[0]);
 556                 tmp2 = eps_poly1(nx, info->nkx[0], info->pme_order[0]);
 557
 558                 tmp += 2.0 * tmp1 * tmp2;
 559
 560                 tmp1  = eps_poly1(nx, info->nkx[0], info->pme_order[0]);
 561                 tmp1 += eps_poly1(ny, info->nky[0], info->pme_order[0]);
 562                 tmp1 += eps_poly1(nz, info->nkz[0], info->pme_order[0]);
 563
 564                 tmp += tmp1 * tmp1;
 565
 566                 e_rec1 += 32.0 * M_PI * M_PI * coeff * coeff * coeff2 * tmp  * q2_all * q2_all / nr;
 567
 568                 tmp1  = eps_poly3(nx, info->nkx[0], info->pme_order[0]);
 569                 tmp1 *= info->nkx[0];
 570                 tmp2  = iprod(gridp, info->recipbox[XX]);
 571
 572                 tmp = tmp1*tmp2;
 573
 574                 tmp1  = eps_poly3(ny, info->nky[0], info->pme_order[0]);
 575                 tmp1 *= info->nky[0];
 576                 tmp2  = iprod(gridp, info->recipbox[YY]);
 577
 578                 tmp += tmp1*tmp2;
 579
 580                 tmp1  = eps_poly3(nz, info->nkz[0], info->pme_order[0]);
 581                 tmp1 *= info->nkz[0];
 582                 tmp2  = iprod(gridp, info->recipbox[ZZ]);
 583
 584                 tmp += tmp1*tmp2;
 585
 586                 tmp *= 4.0 * M_PI;
 587
 588                 tmp1  = eps_poly4(nx, info->nkx[0], info->pme_order[0]);
 589                 tmp1 *= norm2(info->recipbox[XX]);
 590                 tmp1 *= info->nkx[0] * info->nkx[0];
 591
 592                 tmp += tmp1;
 593
 594                 tmp1  = eps_poly4(ny, info->nky[0], info->pme_order[0]);
 595                 tmp1 *= norm2(info->recipbox[YY]);
 596                 tmp1 *= info->nky[0] * info->nky[0];
 597
 598                 tmp += tmp1;
 599
 600                 tmp1  = eps_poly4(nz, info->nkz[0], info->pme_order[0]);
 601                 tmp1 *= norm2(info->recipbox[ZZ]);
 602                 tmp1 *= info->nkz[0] * info->nkz[0];
 603
 604                 tmp += tmp1;
 605
 606                 e_rec2 += 4.0 * coeff * coeff * tmp * q2_all * q2_all / nr;
 607
 608             }
 609         }
 610         if (MASTER(cr))
 611         {
 612             fprintf(stderr, "\rCalculating reciprocal error part 1 ... %3.0f%%", 100.0*(nx-startlocal+1)/(x_per_core));
 613             fflush(stderr);
 614         }
 615
 616     }
 617
 618     if (MASTER(cr))
 619     {
 620         fprintf(stderr, "\n");
 621     }
 622
 623     /* Use just a fraction of all charges to estimate the self energy error term? */
 624     bFraction =  (info->fracself > 0.0) && (info->fracself < 1.0);
 625
 626     if (bFraction)
 627     {
 628         /* Here xtot is the number of samples taken for the Monte Carlo calculation
 629          * of the average of term IV of equation 35 in Wang2010. Round up to a
 630          * number of samples that is divisible by the number of nodes */
 631         x_per_core  = static_cast<int>(std::ceil(info->fracself * nr / cr->nnodes));
 632         xtot        = x_per_core * cr->nnodes;
 633     }
 634     else
 635     {
 636         /* In this case we use all nr particle positions */
 637         xtot       = nr;
 638         x_per_core = static_cast<int>(std::ceil(static_cast<real>(xtot) / cr->nnodes));
 639     }
 640
 641     startlocal = x_per_core *  cr->nodeid;
 642     stoplocal  = std::min(startlocal + x_per_core, xtot);  /* min needed if xtot == nr */
 643
 644     if (bFraction)
 645     {
 646         /* Make shure we get identical results in serial and parallel. Therefore,
 647          * take the sample indices from a single, global random number array that
 648          * is constructed on the master node and that only depends on the seed */
 649         snew(numbers, xtot);
 650         if (MASTER(cr))
 651         {
 652             for (i = 0; i < xtot; i++)
 653             {
 654                 numbers[i] = dist(rng); // [0,nr-1]
 655             }
 656         }
 657         /* Broadcast the random number array to the other nodes */
 658         if (PAR(cr))
 659         {
 660             nblock_bc(cr, xtot, numbers);
 661         }
 662
 663         if (bVerbose && MASTER(cr))
 664         {
 665             fprintf(stdout, "Using %d sample%s to approximate the self interaction error term",
 666                     xtot, xtot == 1 ? "" : "s");
 667             if (PAR(cr))
 668             {
 669                 fprintf(stdout, " (%d sample%s per rank)", x_per_core, x_per_core == 1 ? "" : "s");
 670             }
 671             fprintf(stdout, ".\n");
 672         }
 673     }
 674
 675     /* Return the number of positions used for the Monte Carlo algorithm */
 676     *nsamples = xtot;
 677
 678     for (i = startlocal; i < stoplocal; i++)
 679     {
 680         e_rec3x = 0;
 681         e_rec3y = 0;
 682         e_rec3z = 0;
 683
 684         if (bFraction)
 685         {
 686             /* Randomly pick a charge */
 687             ci = numbers[i];
 688         }
 689         else
 690         {
 691             /* Use all charges */
 692             ci = i;
 693         }
 694
 695         /* for(nx=startlocal; nx<=stoplocal; nx++)*/
 696         for (nx = -info->nkx[0]/2; nx < info->nkx[0]/2+1; nx++)
 697         {
 698             svmul(nx, info->recipbox[XX], gridpx);
 699             for (ny = -info->nky[0]/2; ny < info->nky[0]/2+1; ny++)
 700             {
 701                 svmul(ny, info->recipbox[YY], tmpvec);
 702                 rvec_add(gridpx, tmpvec, gridpxy);
 703                 for (nz = -info->nkz[0]/2; nz < info->nkz[0]/2+1; nz++)
 704                 {
 705
 706                     if (0 == nx && 0 == ny && 0 == nz)
 707                     {
 708                         continue;
 709                     }
 710
 711                     svmul(nz, info->recipbox[ZZ], tmpvec);
 712                     rvec_add(gridpxy, tmpvec, gridp);
 713                     tmp      = norm2(gridp);
 714                     coeff    = std::exp(-1.0 * M_PI * M_PI * tmp / info->ewald_beta[0] / info->ewald_beta[0] );
 715                     coeff   /= tmp;
 716                     e_rec3x += coeff*eps_self(nx, info->nkx[0], info->recipbox[XX], info->pme_order[0], x[ci]);
 717                     e_rec3y += coeff*eps_self(ny, info->nky[0], info->recipbox[YY], info->pme_order[0], x[ci]);
 718                     e_rec3z += coeff*eps_self(nz, info->nkz[0], info->recipbox[ZZ], info->pme_order[0], x[ci]);
 719
 720                 }
 721             }
 722         }
 723
 724         clear_rvec(tmpvec2);
 725
 726         svmul(e_rec3x, info->recipbox[XX], tmpvec);
 727         rvec_inc(tmpvec2, tmpvec);
 728         svmul(e_rec3y, info->recipbox[YY], tmpvec);
 729         rvec_inc(tmpvec2, tmpvec);
 730         svmul(e_rec3z, info->recipbox[ZZ], tmpvec);
 731         rvec_inc(tmpvec2, tmpvec);
 732
 733         e_rec3 += q[ci]*q[ci]*q[ci]*q[ci]*norm2(tmpvec2) / ( xtot * M_PI * info->volume * M_PI * info->volume);
 734         if (MASTER(cr))
 735         {
 736             fprintf(stderr, "\rCalculating reciprocal error part 2 ... %3.0f%%",
 737                     100.0*(i+1)/stoplocal);
 738             fflush(stderr);
 739         }
 740     }
 741
 742     if (MASTER(cr))
 743     {
 744         fprintf(stderr, "\n");
 745     }
 746
 747 #if GMX_LIB_MPI
 748 #ifdef TAKETIME
 749     if (MASTER(cr))
 750     {
 751         t1 = MPI_Wtime() - t0;
 752         fprintf(fp_out, "Recip. err. est. took   : %lf s\n", t1);
 753     }
 754 #endif
 755 #endif
 756
 757 #ifdef DEBUG
 758     if (PAR(cr))
 759     {
 760         fprintf(stderr, "Rank %3d: nx=[%3d...%3d]  e_rec3=%e\n",
 761                 cr->nodeid, startlocal, stoplocal, e_rec3);
 762     }
 763 #endif
 764
 765     if (PAR(cr))
 766     {
 767         gmx_sum(1, &e_rec1, cr);
 768         gmx_sum(1, &e_rec2, cr);
 769         gmx_sum(1, &e_rec3, cr);
 770     }
 771
 772     /* e_rec1*=8.0 * q2_all / info->volume / info->volume / nr ;
 773        e_rec2*=  q2_all / M_PI / M_PI / info->volume / info->volume / nr ;
 774        e_rec3/= M_PI * M_PI * info->volume * info->volume * nr ;
 775      */
 776     e_rec = std::sqrt(e_rec1+e_rec2+e_rec3);
 777
 778
 779     return ONE_4PI_EPS0 * e_rec;
 780 }
 781
 782
 783 /* Allocate memory for the inputinfo struct: */
 784 static void create_info(t_inputinfo *info)
 785 {
 786     snew(info->fac, info->n_entries);
 787     snew(info->rcoulomb, info->n_entries);
 788     snew(info->rvdw, info->n_entries);
 789     snew(info->nkx, info->n_entries);
 790     snew(info->nky, info->n_entries);
 791     snew(info->nkz, info->n_entries);
 792     snew(info->fourier_sp, info->n_entries);
 793     snew(info->ewald_rtol, info->n_entries);
 794     snew(info->ewald_beta, info->n_entries);
 795     snew(info->pme_order, info->n_entries);
 796     snew(info->fn_out, info->n_entries);
 797     snew(info->e_dir, info->n_entries);
 798     snew(info->e_rec, info->n_entries);
 799 }
 800
 801
 802 /* Allocate and fill an array with coordinates and charges,
 803  * returns the number of charges found
 804  */
 805 static int prepare_x_q(real *q[], rvec *x[], const gmx_mtop_t *mtop, const rvec x_orig[], t_commrec *cr)
 806 {
 807     int                     nq; /* number of charged particles */
 808
 809
 810     if (MASTER(cr))
 811     {
 812         snew(*q, mtop->natoms);
 813         snew(*x, mtop->natoms);
 814         nq = 0;
 815
 816         for (const AtomProxy atomP : AtomRange(*mtop))
 817         {
 818             const t_atom &local = atomP.atom();
 819             int           i     = atomP.globalAtomNumber();
 820             if (is_charge(local.q))
 821             {
 822                 (*q)[nq]     = local.q;
 823                 (*x)[nq][XX] = x_orig[i][XX];
 824                 (*x)[nq][YY] = x_orig[i][YY];
 825                 (*x)[nq][ZZ] = x_orig[i][ZZ];
 826                 nq++;
 827             }
 828         }
 829         /* Give back some unneeded memory */
 830         srenew(*q, nq);
 831         srenew(*x, nq);
 832     }
 833     /* Broadcast x and q in the parallel case */
 834     if (PAR(cr))
 835     {
 836         /* Transfer the number of charges */
 837         block_bc(cr, nq);
 838         snew_bc(cr, *x, nq);
 839         snew_bc(cr, *q, nq);
 840         nblock_bc(cr, nq, *x);
 841         nblock_bc(cr, nq, *q);
 842     }
 843
 844     return nq;
 845 }
 846
 847
 848
 849 /* Read in the tpr file and save information we need later in info */
 850 static void read_tpr_file(const char *fn_sim_tpr, t_inputinfo *info, t_state *state, gmx_mtop_t *mtop, t_inputrec *ir, real user_beta, real fracself)
 851 {
 852     read_tpx_state(fn_sim_tpr, ir, state, mtop);
 853
 854     /* The values of the original tpr input file are save in the first
 855      * place [0] of the arrays */
 856     info->orig_sim_steps = ir->nsteps;
 857     info->pme_order[0]   = ir->pme_order;
 858     info->rcoulomb[0]    = ir->rcoulomb;
 859     info->rvdw[0]        = ir->rvdw;
 860     info->nkx[0]         = ir->nkx;
 861     info->nky[0]         = ir->nky;
 862     info->nkz[0]         = ir->nkz;
 863     info->ewald_rtol[0]  = ir->ewald_rtol;
 864     info->fracself       = fracself;
 865     if (user_beta > 0)
 866     {
 867         info->ewald_beta[0] = user_beta;
 868     }
 869     else
 870     {
 871         info->ewald_beta[0]  = calc_ewaldcoeff_q(info->rcoulomb[0], info->ewald_rtol[0]);
 872     }
 873
 874     /* Check if PME was chosen */
 875     if (EEL_PME(ir->coulombtype) == FALSE)
 876     {
 877         gmx_fatal(FARGS, "Can only do optimizations for simulations with PME");
 878     }
 879
 880     /* Check if rcoulomb == rlist, which is necessary for PME */
 881     if (!(ir->rcoulomb == ir->rlist))
 882     {
 883         gmx_fatal(FARGS, "PME requires rcoulomb (%f) to be equal to rlist (%f).", ir->rcoulomb, ir->rlist);
 884     }
 885 }
 886
 887
 888 /* Transfer what we need for parallelizing the reciprocal error estimate */
 889 static void bcast_info(t_inputinfo *info, const t_commrec *cr)
 890 {
 891     nblock_bc(cr, info->n_entries, info->nkx);
 892     nblock_bc(cr, info->n_entries, info->nky);
 893     nblock_bc(cr, info->n_entries, info->nkz);
 894     nblock_bc(cr, info->n_entries, info->ewald_beta);
 895     nblock_bc(cr, info->n_entries, info->pme_order);
 896     nblock_bc(cr, info->n_entries, info->e_dir);
 897     nblock_bc(cr, info->n_entries, info->e_rec);
 898     block_bc(cr, info->volume);
 899     block_bc(cr, info->recipbox);
 900     block_bc(cr, info->natoms);
 901     block_bc(cr, info->fracself);
 902     block_bc(cr, info->bTUNE);
 903     block_bc(cr, info->q2all);
 904     block_bc(cr, info->q2allnr);
 905 }
 906
 907
 908 /* Estimate the error of the SPME Ewald sum. This estimate is based upon
 909  * a) a homogeneous distribution of the charges
 910  * b) a total charge of zero.
 911  */
 912 static void estimate_PME_error(t_inputinfo *info, const t_state *state,
 913                                const gmx_mtop_t *mtop, FILE *fp_out, gmx_bool bVerbose, unsigned int seed,
 914                                t_commrec *cr)
 915 {
 916     rvec *x     = nullptr; /* The coordinates */
 917     real *q     = nullptr; /* The charges     */
 918     real  edir  = 0.0;     /* real space error */
 919     real  erec  = 0.0;     /* reciprocal space error */
 920     real  derr  = 0.0;     /* difference of real and reciprocal space error */
 921     real  derr0 = 0.0;     /* difference of real and reciprocal space error */
 922     real  beta  = 0.0;     /* splitting parameter beta */
 923     real  beta0 = 0.0;     /* splitting parameter beta */
 924     int   ncharges;        /* The number of atoms with charges */
 925     int   nsamples;        /* The number of samples used for the calculation of the
 926                             * self-energy error term */
 927     int   i = 0;
 928
 929     if (MASTER(cr))
 930     {
 931         fprintf(fp_out, "\n--- PME ERROR ESTIMATE ---\n");
 932     }
 933
 934     /* Prepare an x and q array with only the charged atoms */
 935     ncharges = prepare_x_q(&q, &x, mtop, state->x.rvec_array(), cr);
 936     if (MASTER(cr))
 937     {
 938         calc_q2all(mtop, &(info->q2all), &(info->q2allnr));
 939         info->ewald_rtol[0] = std::erfc(info->rcoulomb[0]*info->ewald_beta[0]);
 940         /* Write some info to log file */
 941         fprintf(fp_out, "Box volume              : %g nm^3\n", info->volume);
 942         fprintf(fp_out, "Number of charged atoms : %d (total atoms %d)\n", ncharges, info->natoms);
 943         fprintf(fp_out, "Coulomb radius          : %g nm\n", info->rcoulomb[0]);
 944         fprintf(fp_out, "Ewald_rtol              : %g\n", info->ewald_rtol[0]);
 945         fprintf(fp_out, "Ewald parameter beta    : %g\n", info->ewald_beta[0]);
 946         fprintf(fp_out, "Interpolation order     : %d\n", info->pme_order[0]);
 947         fprintf(fp_out, "Fourier grid (nx,ny,nz) : %d x %d x %d\n",
 948                 info->nkx[0], info->nky[0], info->nkz[0]);
 949         fflush(fp_out);
 950
 951     }
 952
 953     if (PAR(cr))
 954     {
 955         bcast_info(info, cr);
 956     }
 957
 958
 959     /* Calculate direct space error */
 960     info->e_dir[0] = estimate_direct(info);
 961
 962     /* Calculate reciprocal space error */
 963     info->e_rec[0] = estimate_reciprocal(info, x, q, ncharges, fp_out, bVerbose,
 964                                          seed, &nsamples, cr);
 965
 966     if (PAR(cr))
 967     {
 968         bcast_info(info, cr);
 969     }
 970
 971     if (MASTER(cr))
 972     {
 973         fprintf(fp_out, "Direct space error est. : %10.3e kJ/(mol*nm)\n", info->e_dir[0]);
 974         fprintf(fp_out, "Reciprocal sp. err. est.: %10.3e kJ/(mol*nm)\n", info->e_rec[0]);
 975         fprintf(fp_out, "Self-energy error term was estimated using %d samples\n", nsamples);
 976         fflush(fp_out);
 977         fprintf(stderr, "Direct space error est. : %10.3e kJ/(mol*nm)\n", info->e_dir[0]);
 978         fprintf(stderr, "Reciprocal sp. err. est.: %10.3e kJ/(mol*nm)\n", info->e_rec[0]);
 979     }
 980
 981     i = 0;
 982
 983     if (info->bTUNE)
 984     {
 985         if (MASTER(cr))
 986         {
 987             fprintf(stderr, "Starting tuning ...\n");
 988         }
 989         edir  = info->e_dir[0];
 990         erec  = info->e_rec[0];
 991         derr0 = edir-erec;
 992         beta0 = info->ewald_beta[0];
 993         if (derr > 0.0)
 994         {
 995             info->ewald_beta[0] += 0.1;
 996         }
 997         else
 998         {
 999             info->ewald_beta[0] -= 0.1;
1000         }
1001         info->e_dir[0] = estimate_direct(info);
1002         info->e_rec[0] = estimate_reciprocal(info, x, q, ncharges, fp_out, bVerbose,
1003                                              seed, &nsamples, cr);
1004
1005         if (PAR(cr))
1006         {
1007             bcast_info(info, cr);
1008         }
1009
1010
1011         edir = info->e_dir[0];
1012         erec = info->e_rec[0];
1013         derr = edir-erec;
1014         while (std::abs(derr/std::min(erec, edir)) > 1e-4)
1015         {
1016
1017             beta                = info->ewald_beta[0];
1018             beta               -= derr*(info->ewald_beta[0]-beta0)/(derr-derr0);
1019             beta0               = info->ewald_beta[0];
1020             info->ewald_beta[0] = beta;
1021             derr0               = derr;
1022
1023             info->e_dir[0] = estimate_direct(info);
1024             info->e_rec[0] = estimate_reciprocal(info, x, q, ncharges, fp_out, bVerbose,
1025                                                  seed, &nsamples, cr);
1026
1027             if (PAR(cr))
1028             {
1029                 bcast_info(info, cr);
1030             }
1031
1032             edir = info->e_dir[0];
1033             erec = info->e_rec[0];
1034             derr = edir-erec;
1035
1036             if (MASTER(cr))
1037             {
1038                 i++;
1039                 fprintf(stderr, "difference between real and rec. space error (step %d): %g\n", i, std::abs(derr));
1040                 fprintf(stderr, "old beta: %f\n", beta0);
1041                 fprintf(stderr, "new beta: %f\n", beta);
1042             }
1043         }
1044
1045         info->ewald_rtol[0] = std::erfc(info->rcoulomb[0]*info->ewald_beta[0]);
1046
1047         if (MASTER(cr))
1048         {
1049             /* Write some info to log file */
1050             fflush(fp_out);
1051             fprintf(fp_out, "=========  After tuning ========\n");
1052             fprintf(fp_out, "Direct space error est. : %10.3e kJ/(mol*nm)\n", info->e_dir[0]);
1053             fprintf(fp_out, "Reciprocal sp. err. est.: %10.3e kJ/(mol*nm)\n", info->e_rec[0]);
1054             fprintf(stderr, "Direct space error est. : %10.3e kJ/(mol*nm)\n", info->e_dir[0]);
1055             fprintf(stderr, "Reciprocal sp. err. est.: %10.3e kJ/(mol*nm)\n", info->e_rec[0]);
1056             fprintf(fp_out, "Ewald_rtol              : %g\n", info->ewald_rtol[0]);
1057             fprintf(fp_out, "Ewald parameter beta    : %g\n", info->ewald_beta[0]);
1058             fflush(fp_out);
1059
1060         }
1061
1062     }
1063
1064 }
1065
1066
1067 int gmx_pme_error(int argc, char *argv[])
1068 {
1069     const char     *desc[] = {
1070         "[THISMODULE] estimates the error of the electrostatic forces",
1071         "if using the sPME algorithm. The flag [TT]-tune[tt] will determine",
1072         "the splitting parameter such that the error is equally",
1073         "distributed over the real and reciprocal space part.",
1074         "The part of the error that stems from self interaction of the particles ",
1075         "is computationally demanding. However, a good a approximation is to",
1076         "just use a fraction of the particles for this term which can be",
1077         "indicated by the flag [TT]-self[tt].[PAR]",
1078     };
1079
1080     real            fs        = 0.0; /* 0 indicates: not set by the user */
1081     real            user_beta = -1.0;
1082     real            fracself  = 1.0;
1083     t_inputinfo     info;
1084     t_state         state;        /* The state from the tpr input file */
1085     gmx_mtop_t      mtop;         /* The topology from the tpr input file */
1086     FILE           *fp = nullptr;
1087     t_commrec      *cr;
1088     unsigned long   PCA_Flags;
1089     gmx_bool        bTUNE    = FALSE;
1090     gmx_bool        bVerbose = FALSE;
1091     int             seed     = 0;
1092
1093
1094     static t_filenm   fnm[] = {
1095         { efTPR, "-s",     nullptr,    ffREAD },
1096         { efOUT, "-o",    "error",  ffWRITE },
1097         { efTPR, "-so",   "tuned",  ffOPTWR }
1098     };
1099
1100     gmx_output_env_t *oenv = nullptr;
1101
1102     t_pargs           pa[] = {
1103         { "-beta",     FALSE, etREAL, {&user_beta},
1104           "If positive, overwrite ewald_beta from [REF].tpr[ref] file with this value" },
1105         { "-tune",     FALSE, etBOOL, {&bTUNE},
1106           "Tune the splitting parameter such that the error is equally distributed between real and reciprocal space" },
1107         { "-self",     FALSE, etREAL, {&fracself},
1108           "If between 0.0 and 1.0, determine self interaction error from just this fraction of the charged particles" },
1109         { "-seed",     FALSE, etINT,  {&seed},
1110           "Random number seed used for Monte Carlo algorithm when [TT]-self[tt] is set to a value between 0.0 and 1.0" },
1111         { "-v",        FALSE, etBOOL, {&bVerbose},
1112           "Be loud and noisy" }
1113     };
1114
1115
1116 #define NFILE asize(fnm)
1117
1118     cr         = init_commrec();
1119     PCA_Flags  = PCA_NOEXIT_ON_ARGS;
1120
1121     if (!parse_common_args(&argc, argv, PCA_Flags,
1122                            NFILE, fnm, asize(pa), pa, asize(desc), desc,
1123                            0, nullptr, &oenv))
1124     {
1125         sfree(cr);
1126         return 0;
1127     }
1128
1129     if (!bTUNE)
1130     {
1131         bTUNE = opt2bSet("-so", NFILE, fnm);
1132     }
1133
1134     info.n_entries = 1;
1135
1136     /* Allocate memory for the inputinfo struct: */
1137     create_info(&info);
1138     info.fourier_sp[0] = fs;
1139
1140     t_inputrec ir;
1141     if (MASTER(cr))
1142     {
1143         read_tpr_file(opt2fn("-s", NFILE, fnm), &info, &state, &mtop, &ir, user_beta, fracself);
1144         /* Open logfile for reading */
1145         fp = fopen(opt2fn("-o", NFILE, fnm), "w");
1146
1147         /* Determine the volume of the simulation box */
1148         info.volume = det(state.box);
1149         calc_recipbox(state.box, info.recipbox);
1150         info.natoms = mtop.natoms;
1151         info.bTUNE  = bTUNE;
1152     }
1153
1154     /* Check consistency if the user provided fourierspacing */
1155     if (fs > 0 && MASTER(cr))
1156     {
1157         /* Recalculate the grid dimensions using fourierspacing from user input */
1158         info.nkx[0] = 0;
1159         info.nky[0] = 0;
1160         info.nkz[0] = 0;
1161         calcFftGrid(stdout, state.box, info.fourier_sp[0], minimalPmeGridSize(info.pme_order[0]),
1162                     &(info.nkx[0]), &(info.nky[0]), &(info.nkz[0]));
1163         if ( (ir.nkx != info.nkx[0]) || (ir.nky != info.nky[0]) || (ir.nkz != info.nkz[0]) )
1164         {
1165             gmx_fatal(FARGS, "Wrong fourierspacing %f nm, input file grid = %d x %d x %d, computed grid = %d x %d x %d",
1166                       fs, ir.nkx, ir.nky, ir.nkz, info.nkx[0], info.nky[0], info.nkz[0]);
1167         }
1168     }
1169
1170     /* Estimate (S)PME force error */
1171
1172     if (PAR(cr))
1173     {
1174         bcast_info(&info, cr);
1175     }
1176
1177     /* Get an error estimate of the input tpr file and do some tuning if requested */
1178     estimate_PME_error(&info, &state, &mtop, fp, bVerbose, seed, cr);
1179
1180     if (MASTER(cr))
1181     {
1182         /* Write out optimized tpr file if requested */
1183         if (opt2bSet("-so", NFILE, fnm) || bTUNE)
1184         {
1185             ir.ewald_rtol = info.ewald_rtol[0];
1186             write_tpx_state(opt2fn("-so", NFILE, fnm), &ir, &state, &mtop);
1187         }
1188         please_cite(fp, "Wang2010");
1189         fclose(fp);
1190     }
1191
1192     return 0;
1193 }