src/gromacs/gmxana/gmx_pme_error.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2010,2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 #include "gmxpre.h"
  36
  37 #include "config.h"
  38
  39 #include <cmath>
  40
  41 #include <algorithm>
  42
  43 #include "gromacs/commandline/pargs.h"
  44 #include "gromacs/ewald/ewald-utils.h"
  45 #include "gromacs/ewald/pme.h"
  46 #include "gromacs/fft/calcgrid.h"
  47 #include "gromacs/fileio/checkpoint.h"
  48 #include "gromacs/fileio/tpxio.h"
  49 #include "gromacs/gmxana/gmx_ana.h"
  50 #include "gromacs/gmxlib/network.h"
  51 #include "gromacs/math/functions.h"
  52 #include "gromacs/math/units.h"
  53 #include "gromacs/math/vec.h"
  54 #include "gromacs/mdlib/broadcaststructs.h"
  55 #include "gromacs/mdtypes/commrec.h"
  56 #include "gromacs/mdtypes/inputrec.h"
  57 #include "gromacs/mdtypes/md_enums.h"
  58 #include "gromacs/mdtypes/state.h"
  59 #include "gromacs/random/threefry.h"
  60 #include "gromacs/random/uniformintdistribution.h"
  61 #include "gromacs/topology/mtop_util.h"
  62 #include "gromacs/topology/topology.h"
  63 #include "gromacs/utility/arraysize.h"
  64 #include "gromacs/utility/fatalerror.h"
  65 #include "gromacs/utility/pleasecite.h"
  66 #include "gromacs/utility/smalloc.h"
  67
  68 /* #define TAKETIME */
  69 /* #define DEBUG  */
  70
  71 /* Enum for situations that can occur during log file parsing */
  72 enum {
  73     eParselogOK,
  74     eParselogNotFound,
  75     eParselogNoPerfData,
  76     eParselogTerm,
  77     eParselogResetProblem,
  78     eParselogNr
  79 };
  80
  81
  82 typedef struct
  83 {
  84     int64_t         orig_sim_steps;  /* Number of steps to be done in the real simulation  */
  85     int             n_entries;       /* Number of entries in arrays                        */
  86     real            volume;          /* The volume of the box                              */
  87     matrix          recipbox;        /* The reciprocal box                                 */
  88     int             natoms;          /* The number of atoms in the MD system               */
  89     real           *fac;             /* The scaling factor                                 */
  90     real           *rcoulomb;        /* The coulomb radii [0...nr_inputfiles]              */
  91     real           *rvdw;            /* The vdW radii                                      */
  92     int            *nkx, *nky, *nkz; /* Number of k vectors in each spatial dimension      */
  93     real           *fourier_sp;      /* Fourierspacing                                     */
  94     real           *ewald_rtol;      /* Real space tolerance for Ewald, determines         */
  95                                      /* the real/reciprocal space relative weight          */
  96     real           *ewald_beta;      /* Splitting parameter [1/nm]                         */
  97     real            fracself;        /* fraction of particles for SI error                 */
  98     real            q2all;           /* sum ( q ^2 )                                       */
  99     real            q2allnr;         /* nr of charges                                      */
 100     int            *pme_order;       /* Interpolation order for PME (bsplines)             */
 101     char          **fn_out;          /* Name of the output tpr file                        */
 102     real           *e_dir;           /* Direct space part of PME error with these settings */
 103     real           *e_rec;           /* Reciprocal space part of PME error                 */
 104     gmx_bool        bTUNE;           /* flag for tuning */
 105 } t_inputinfo;
 106
 107
 108 /* Returns TRUE when atom is charged */
 109 static gmx_bool is_charge(real charge)
 110 {
 111     return charge*charge > GMX_REAL_EPS;
 112 }
 113
 114
 115 /* calculate charge density */
 116 static void calc_q2all(const gmx_mtop_t *mtop,   /* molecular topology */
 117                        real *q2all, real *q2allnr)
 118 {
 119     real            q2_all = 0;  /* Sum of squared charges */
 120     int             nrq_mol;     /* Number of charges in a single molecule */
 121     int             nrq_all;     /* Total number of charges in the MD system */
 122     real            qi, q2_mol;
 123
 124 #ifdef DEBUG
 125     fprintf(stderr, "\nCharge density:\n");
 126 #endif
 127     q2_all  = 0.0;                                        /* total q squared */
 128     nrq_all = 0;                                          /* total number of charges in the system */
 129     for (const gmx_molblock_t &molblock : mtop->molblock) /* Loop over molecule types */
 130     {
 131         q2_mol   = 0.0;                                   /* q squared value of this molecule */
 132         nrq_mol  = 0;                                     /* number of charges this molecule carries */
 133         const gmx_moltype_t &molecule = mtop->moltype[molblock.type];
 134         for (int i = 0; i < molecule.atoms.nr; i++)
 135         {
 136             qi = molecule.atoms.atom[i].q;
 137             /* Is this charge worth to be considered? */
 138             if (is_charge(qi))
 139             {
 140                 q2_mol += qi*qi;
 141                 nrq_mol++;
 142             }
 143         }
 144         /* Multiply with the number of molecules present of this type and add */
 145         q2_all  += q2_mol*molblock.nmol;
 146         nrq_all += nrq_mol*molblock.nmol;
 147 #ifdef DEBUG
 148         fprintf(stderr, "Molecule %2d (%5d atoms) q2_mol=%10.3e nr.mol.charges=%5d (%6dx)  q2_all=%10.3e  tot.charges=%d\n",
 149                 imol, molecule.atoms.nr, q2_mol, nrq_mol, molblock.nmol, q2_all, nrq_all);
 150 #endif
 151     }
 152
 153     *q2all   = q2_all;
 154     *q2allnr = nrq_all;
 155
 156 }
 157
 158
 159 /* Estimate the direct space part error of the SPME Ewald sum */
 160 static real estimate_direct(
 161         t_inputinfo *info
 162         )
 163 {
 164     real e_dir     = 0; /* Error estimate */
 165     real beta      = 0; /* Splitting parameter (1/nm) */
 166     real r_coulomb = 0; /* Cut-off in direct space */
 167
 168
 169     beta      = info->ewald_beta[0];
 170     r_coulomb = info->rcoulomb[0];
 171
 172     e_dir  = 2.0 * info->q2all * gmx::invsqrt( info->q2allnr  *  r_coulomb * info->volume );
 173     e_dir *= std::exp (-beta*beta*r_coulomb*r_coulomb);
 174
 175     return ONE_4PI_EPS0*e_dir;
 176 }
 177
 178 #define SUMORDER 6
 179
 180 /* the following 4 functions determine polynomials required for the reciprocal error estimate */
 181
 182 static inline real eps_poly1(
 183         real m,          /* grid coordinate in certain direction */
 184         real K,          /* grid size in corresponding direction */
 185         real n)          /* spline interpolation order of the SPME */
 186 {
 187     int  i;
 188     real nom   = 0; /* nominator */
 189     real denom = 0; /* denominator */
 190     real tmp   = 0;
 191
 192     if (m == 0.0)
 193     {
 194         return 0.0;
 195     }
 196
 197     for (i = -SUMORDER; i < 0; i++)
 198     {
 199         tmp  = m / K + i;
 200         tmp *= 2.0*M_PI;
 201         nom += std::pow( tmp, -n );
 202     }
 203
 204     for (i = SUMORDER; i > 0; i--)
 205     {
 206         tmp  = m / K + i;
 207         tmp *= 2.0*M_PI;
 208         nom += std::pow( tmp, -n );
 209     }
 210
 211     tmp   = m / K;
 212     tmp  *= 2.0*M_PI;
 213     denom = std::pow( tmp, -n )+nom;
 214
 215     return -nom/denom;
 216
 217 }
 218
 219 static inline real eps_poly2(
 220         real m,          /* grid coordinate in certain direction */
 221         real K,          /* grid size in corresponding direction */
 222         real n)          /* spline interpolation order of the SPME */
 223 {
 224     int  i;
 225     real nom   = 0; /* nominator */
 226     real denom = 0; /* denominator */
 227     real tmp   = 0;
 228
 229     if (m == 0.0)
 230     {
 231         return 0.0;
 232     }
 233
 234     for (i = -SUMORDER; i < 0; i++)
 235     {
 236         tmp  = m / K + i;
 237         tmp *= 2.0*M_PI;
 238         nom += std::pow( tmp, -2*n );
 239     }
 240
 241     for (i = SUMORDER; i > 0; i--)
 242     {
 243         tmp  = m / K + i;
 244         tmp *= 2.0*M_PI;
 245         nom += std::pow( tmp, -2*n );
 246     }
 247
 248     for (i = -SUMORDER; i < SUMORDER+1; i++)
 249     {
 250         tmp    = m / K + i;
 251         tmp   *= 2.0*M_PI;
 252         denom += std::pow( tmp, -n );
 253     }
 254     tmp = eps_poly1(m, K, n);
 255     return nom / denom / denom + tmp*tmp;
 256
 257 }
 258
 259 static inline real eps_poly3(
 260         real m,          /* grid coordinate in certain direction */
 261         real K,          /* grid size in corresponding direction */
 262         real n)          /* spline interpolation order of the SPME */
 263 {
 264     int  i;
 265     real nom   = 0; /* nominator */
 266     real denom = 0; /* denominator */
 267     real tmp   = 0;
 268
 269     if (m == 0.0)
 270     {
 271         return 0.0;
 272     }
 273
 274     for (i = -SUMORDER; i < 0; i++)
 275     {
 276         tmp  = m / K + i;
 277         tmp *= 2.0*M_PI;
 278         nom += i * std::pow( tmp, -2*n );
 279     }
 280
 281     for (i = SUMORDER; i > 0; i--)
 282     {
 283         tmp  = m / K + i;
 284         tmp *= 2.0*M_PI;
 285         nom += i * std::pow( tmp, -2*n );
 286     }
 287
 288     for (i = -SUMORDER; i < SUMORDER+1; i++)
 289     {
 290         tmp    = m / K + i;
 291         tmp   *= 2.0*M_PI;
 292         denom += std::pow( tmp, -n );
 293     }
 294
 295     return 2.0 * M_PI * nom / denom / denom;
 296
 297 }
 298
 299 static inline real eps_poly4(
 300         real m,          /* grid coordinate in certain direction */
 301         real K,          /* grid size in corresponding direction */
 302         real n)          /* spline interpolation order of the SPME */
 303 {
 304     int  i;
 305     real nom   = 0; /* nominator */
 306     real denom = 0; /* denominator */
 307     real tmp   = 0;
 308
 309     if (m == 0.0)
 310     {
 311         return 0.0;
 312     }
 313
 314     for (i = -SUMORDER; i < 0; i++)
 315     {
 316         tmp  = m / K + i;
 317         tmp *= 2.0*M_PI;
 318         nom += i * i * std::pow( tmp, -2*n );
 319     }
 320
 321     for (i = SUMORDER; i > 0; i--)
 322     {
 323         tmp  = m / K + i;
 324         tmp *= 2.0*M_PI;
 325         nom += i * i * std::pow( tmp, -2*n );
 326     }
 327
 328     for (i = -SUMORDER; i < SUMORDER+1; i++)
 329     {
 330         tmp    = m / K + i;
 331         tmp   *= 2.0*M_PI;
 332         denom += std::pow( tmp, -n );
 333     }
 334
 335     return 4.0 * M_PI * M_PI * nom / denom / denom;
 336
 337 }
 338
 339 static inline real eps_self(
 340         real m,       /* grid coordinate in certain direction */
 341         real K,       /* grid size in corresponding direction */
 342         rvec rboxv,   /* reciprocal box vector */
 343         real n,       /* spline interpolation order of the SPME */
 344         rvec x)       /* coordinate of charge */
 345 {
 346     int  i;
 347     real tmp    = 0; /* temporary variables for computations */
 348     real tmp1   = 0; /* temporary variables for computations */
 349     real tmp2   = 0; /* temporary variables for computations */
 350     real rcoord = 0; /* coordinate in certain reciprocal space direction */
 351     real nom    = 0; /* nominator */
 352     real denom  = 0; /* denominator */
 353
 354
 355     if (m == 0.0)
 356     {
 357         return 0.0;
 358     }
 359
 360     rcoord = iprod(rboxv, x);
 361
 362
 363     for (i = -SUMORDER; i < 0; i++)
 364     {
 365         tmp    = -std::sin(2.0 * M_PI * i * K * rcoord);
 366         tmp1   = 2.0 * M_PI * m / K + 2.0 * M_PI * i;
 367         tmp2   = std::pow(tmp1, -n);
 368         nom   += tmp * tmp2 * i;
 369         denom += tmp2;
 370     }
 371
 372     for (i = SUMORDER; i > 0; i--)
 373     {
 374         tmp    = -std::sin(2.0 * M_PI * i * K * rcoord);
 375         tmp1   = 2.0 * M_PI * m / K + 2.0 * M_PI * i;
 376         tmp2   = std::pow(tmp1, -n);
 377         nom   += tmp * tmp2 * i;
 378         denom += tmp2;
 379     }
 380
 381
 382     tmp    = 2.0 * M_PI * m / K;
 383     tmp1   = pow(tmp, -n);
 384     denom += tmp1;
 385
 386     return 2.0 * M_PI * nom / denom * K;
 387
 388 }
 389
 390 #undef SUMORDER
 391
 392 /* The following routine is just a copy from pme.c */
 393
 394 static void calc_recipbox(matrix box, matrix recipbox)
 395 {
 396     /* Save some time by assuming upper right part is zero */
 397
 398     real tmp = 1.0/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
 399
 400     recipbox[XX][XX] = box[YY][YY]*box[ZZ][ZZ]*tmp;
 401     recipbox[XX][YY] = 0;
 402     recipbox[XX][ZZ] = 0;
 403     recipbox[YY][XX] = -box[YY][XX]*box[ZZ][ZZ]*tmp;
 404     recipbox[YY][YY] = box[XX][XX]*box[ZZ][ZZ]*tmp;
 405     recipbox[YY][ZZ] = 0;
 406     recipbox[ZZ][XX] = (box[YY][XX]*box[ZZ][YY]-box[YY][YY]*box[ZZ][XX])*tmp;
 407     recipbox[ZZ][YY] = -box[ZZ][YY]*box[XX][XX]*tmp;
 408     recipbox[ZZ][ZZ] = box[XX][XX]*box[YY][YY]*tmp;
 409 }
 410
 411
 412 /* Estimate the reciprocal space part error of the SPME Ewald sum. */
 413 static real estimate_reciprocal(
 414         t_inputinfo             *info,
 415         rvec                     x[], /* array of particles */
 416         const real               q[], /* array of charges */
 417         int                      nr,  /* number of charges = size of the charge array */
 418         FILE  gmx_unused        *fp_out,
 419         gmx_bool                 bVerbose,
 420         int                      seed,     /* The seed for the random number generator */
 421         int                     *nsamples, /* Return the number of samples used if Monte Carlo
 422                                             * algorithm is used for self energy error estimate */
 423         t_commrec               *cr)
 424 {
 425     real      e_rec   = 0; /* reciprocal error estimate */
 426     real      e_rec1  = 0; /* Error estimate term 1*/
 427     real      e_rec2  = 0; /* Error estimate term 2*/
 428     real      e_rec3  = 0; /* Error estimate term 3 */
 429     real      e_rec3x = 0; /* part of Error estimate term 3 in x */
 430     real      e_rec3y = 0; /* part of Error estimate term 3 in y */
 431     real      e_rec3z = 0; /* part of Error estimate term 3 in z */
 432     int       i, ci;
 433     int       nx, ny, nz;  /* grid coordinates */
 434     real      q2_all = 0;  /* sum of squared charges */
 435     rvec      gridpx;      /* reciprocal grid point in x direction*/
 436     rvec      gridpxy;     /* reciprocal grid point in x and y direction*/
 437     rvec      gridp;       /* complete reciprocal grid point in 3 directions*/
 438     rvec      tmpvec;      /* template to create points from basis vectors */
 439     rvec      tmpvec2;     /* template to create points from basis vectors */
 440     real      coeff  = 0;  /* variable to compute coefficients of the error estimate */
 441     real      coeff2 = 0;  /* variable to compute coefficients of the error estimate */
 442     real      tmp    = 0;  /* variables to compute different factors from vectors */
 443     real      tmp1   = 0;
 444     real      tmp2   = 0;
 445     gmx_bool  bFraction;
 446
 447     int      *numbers = nullptr;
 448
 449     /* Index variables for parallel work distribution */
 450     int startglobal, stopglobal;
 451     int startlocal, stoplocal;
 452     int x_per_core;
 453     int xtot;
 454
 455 #ifdef TAKETIME
 456     double t0 = 0.0;
 457     double t1 = 0.0;
 458 #endif
 459
 460     if (seed == 0)
 461     {
 462         seed = static_cast<int>(gmx::makeRandomSeed());
 463     }
 464     fprintf(stderr, "Using random seed %d.\n", seed);
 465
 466     gmx::DefaultRandomEngine           rng(seed);
 467     gmx::UniformIntDistribution<int>   dist(0, nr-1);
 468
 469     clear_rvec(gridpx);
 470     clear_rvec(gridpxy);
 471     clear_rvec(gridp);
 472     clear_rvec(tmpvec);
 473     clear_rvec(tmpvec2);
 474
 475     for (i = 0; i < nr; i++)
 476     {
 477         q2_all += q[i]*q[i];
 478     }
 479
 480     /* Calculate indices for work distribution */
 481     startglobal = -info->nkx[0]/2;
 482     stopglobal  = info->nkx[0]/2;
 483     xtot        = stopglobal*2+1;
 484     if (PAR(cr))
 485     {
 486         x_per_core = static_cast<int>(std::ceil(static_cast<real>(xtot) / cr->nnodes));
 487         startlocal = startglobal + x_per_core*cr->nodeid;
 488         stoplocal  = startlocal + x_per_core -1;
 489         if (stoplocal > stopglobal)
 490         {
 491             stoplocal = stopglobal;
 492         }
 493     }
 494     else
 495     {
 496         startlocal = startglobal;
 497         stoplocal  = stopglobal;
 498         x_per_core = xtot;
 499     }
 500
 501 #if GMX_LIB_MPI
 502 #ifdef TAKETIME
 503     if (MASTER(cr))
 504     {
 505         t0 = MPI_Wtime();
 506     }
 507 #endif
 508 #endif
 509
 510     if (MASTER(cr))
 511     {
 512
 513         fprintf(stderr, "Calculating reciprocal error part 1 ...");
 514
 515     }
 516
 517     for (nx = startlocal; nx <= stoplocal; nx++)
 518     {
 519         svmul(nx, info->recipbox[XX], gridpx);
 520         for (ny = -info->nky[0]/2; ny < info->nky[0]/2+1; ny++)
 521         {
 522             svmul(ny, info->recipbox[YY], tmpvec);
 523             rvec_add(gridpx, tmpvec, gridpxy);
 524             for (nz = -info->nkz[0]/2; nz < info->nkz[0]/2+1; nz++)
 525             {
 526                 if (0 == nx &&  0 == ny &&  0 == nz)
 527                 {
 528                     continue;
 529                 }
 530                 svmul(nz, info->recipbox[ZZ], tmpvec);
 531                 rvec_add(gridpxy, tmpvec, gridp);
 532                 tmp    = norm2(gridp);
 533                 coeff  = std::exp(-1.0 * M_PI * M_PI * tmp / info->ewald_beta[0] / info->ewald_beta[0] );
 534                 coeff /= 2.0 * M_PI * info->volume * tmp;
 535                 coeff2 = tmp;
 536
 537
 538                 tmp  = eps_poly2(nx, info->nkx[0], info->pme_order[0]);
 539                 tmp += eps_poly2(ny, info->nkx[0], info->pme_order[0]);
 540                 tmp += eps_poly2(nz, info->nkx[0], info->pme_order[0]);
 541
 542                 tmp1 = eps_poly1(nx, info->nkx[0], info->pme_order[0]);
 543                 tmp2 = eps_poly1(ny, info->nky[0], info->pme_order[0]);
 544
 545                 tmp += 2.0 * tmp1 * tmp2;
 546
 547                 tmp1 = eps_poly1(nz, info->nkz[0], info->pme_order[0]);
 548                 tmp2 = eps_poly1(ny, info->nky[0], info->pme_order[0]);
 549
 550                 tmp += 2.0 * tmp1 * tmp2;
 551
 552                 tmp1 = eps_poly1(nz, info->nkz[0], info->pme_order[0]);
 553                 tmp2 = eps_poly1(nx, info->nkx[0], info->pme_order[0]);
 554
 555                 tmp += 2.0 * tmp1 * tmp2;
 556
 557                 tmp1  = eps_poly1(nx, info->nkx[0], info->pme_order[0]);
 558                 tmp1 += eps_poly1(ny, info->nky[0], info->pme_order[0]);
 559                 tmp1 += eps_poly1(nz, info->nkz[0], info->pme_order[0]);
 560
 561                 tmp += tmp1 * tmp1;
 562
 563                 e_rec1 += 32.0 * M_PI * M_PI * coeff * coeff * coeff2 * tmp  * q2_all * q2_all / nr;
 564
 565                 tmp1  = eps_poly3(nx, info->nkx[0], info->pme_order[0]);
 566                 tmp1 *= info->nkx[0];
 567                 tmp2  = iprod(gridp, info->recipbox[XX]);
 568
 569                 tmp = tmp1*tmp2;
 570
 571                 tmp1  = eps_poly3(ny, info->nky[0], info->pme_order[0]);
 572                 tmp1 *= info->nky[0];
 573                 tmp2  = iprod(gridp, info->recipbox[YY]);
 574
 575                 tmp += tmp1*tmp2;
 576
 577                 tmp1  = eps_poly3(nz, info->nkz[0], info->pme_order[0]);
 578                 tmp1 *= info->nkz[0];
 579                 tmp2  = iprod(gridp, info->recipbox[ZZ]);
 580
 581                 tmp += tmp1*tmp2;
 582
 583                 tmp *= 4.0 * M_PI;
 584
 585                 tmp1  = eps_poly4(nx, info->nkx[0], info->pme_order[0]);
 586                 tmp1 *= norm2(info->recipbox[XX]);
 587                 tmp1 *= info->nkx[0] * info->nkx[0];
 588
 589                 tmp += tmp1;
 590
 591                 tmp1  = eps_poly4(ny, info->nky[0], info->pme_order[0]);
 592                 tmp1 *= norm2(info->recipbox[YY]);
 593                 tmp1 *= info->nky[0] * info->nky[0];
 594
 595                 tmp += tmp1;
 596
 597                 tmp1  = eps_poly4(nz, info->nkz[0], info->pme_order[0]);
 598                 tmp1 *= norm2(info->recipbox[ZZ]);
 599                 tmp1 *= info->nkz[0] * info->nkz[0];
 600
 601                 tmp += tmp1;
 602
 603                 e_rec2 += 4.0 * coeff * coeff * tmp * q2_all * q2_all / nr;
 604
 605             }
 606         }
 607         if (MASTER(cr))
 608         {
 609             fprintf(stderr, "\rCalculating reciprocal error part 1 ... %3.0f%%", 100.0*(nx-startlocal+1)/(x_per_core));
 610             fflush(stderr);
 611         }
 612
 613     }
 614
 615     if (MASTER(cr))
 616     {
 617         fprintf(stderr, "\n");
 618     }
 619
 620     /* Use just a fraction of all charges to estimate the self energy error term? */
 621     bFraction =  (info->fracself > 0.0) && (info->fracself < 1.0);
 622
 623     if (bFraction)
 624     {
 625         /* Here xtot is the number of samples taken for the Monte Carlo calculation
 626          * of the average of term IV of equation 35 in Wang2010. Round up to a
 627          * number of samples that is divisible by the number of nodes */
 628         x_per_core  = static_cast<int>(std::ceil(info->fracself * nr / cr->nnodes));
 629         xtot        = x_per_core * cr->nnodes;
 630     }
 631     else
 632     {
 633         /* In this case we use all nr particle positions */
 634         xtot       = nr;
 635         x_per_core = static_cast<int>(std::ceil(static_cast<real>(xtot) / cr->nnodes));
 636     }
 637
 638     startlocal = x_per_core *  cr->nodeid;
 639     stoplocal  = std::min(startlocal + x_per_core, xtot);  /* min needed if xtot == nr */
 640
 641     if (bFraction)
 642     {
 643         /* Make shure we get identical results in serial and parallel. Therefore,
 644          * take the sample indices from a single, global random number array that
 645          * is constructed on the master node and that only depends on the seed */
 646         snew(numbers, xtot);
 647         if (MASTER(cr))
 648         {
 649             for (i = 0; i < xtot; i++)
 650             {
 651                 numbers[i] = dist(rng); // [0,nr-1]
 652             }
 653         }
 654         /* Broadcast the random number array to the other nodes */
 655         if (PAR(cr))
 656         {
 657             nblock_bc(cr, xtot, numbers);
 658         }
 659
 660         if (bVerbose && MASTER(cr))
 661         {
 662             fprintf(stdout, "Using %d sample%s to approximate the self interaction error term",
 663                     xtot, xtot == 1 ? "" : "s");
 664             if (PAR(cr))
 665             {
 666                 fprintf(stdout, " (%d sample%s per rank)", x_per_core, x_per_core == 1 ? "" : "s");
 667             }
 668             fprintf(stdout, ".\n");
 669         }
 670     }
 671
 672     /* Return the number of positions used for the Monte Carlo algorithm */
 673     *nsamples = xtot;
 674
 675     for (i = startlocal; i < stoplocal; i++)
 676     {
 677         e_rec3x = 0;
 678         e_rec3y = 0;
 679         e_rec3z = 0;
 680
 681         if (bFraction)
 682         {
 683             /* Randomly pick a charge */
 684             ci = numbers[i];
 685         }
 686         else
 687         {
 688             /* Use all charges */
 689             ci = i;
 690         }
 691
 692         /* for(nx=startlocal; nx<=stoplocal; nx++)*/
 693         for (nx = -info->nkx[0]/2; nx < info->nkx[0]/2+1; nx++)
 694         {
 695             svmul(nx, info->recipbox[XX], gridpx);
 696             for (ny = -info->nky[0]/2; ny < info->nky[0]/2+1; ny++)
 697             {
 698                 svmul(ny, info->recipbox[YY], tmpvec);
 699                 rvec_add(gridpx, tmpvec, gridpxy);
 700                 for (nz = -info->nkz[0]/2; nz < info->nkz[0]/2+1; nz++)
 701                 {
 702
 703                     if (0 == nx && 0 == ny && 0 == nz)
 704                     {
 705                         continue;
 706                     }
 707
 708                     svmul(nz, info->recipbox[ZZ], tmpvec);
 709                     rvec_add(gridpxy, tmpvec, gridp);
 710                     tmp      = norm2(gridp);
 711                     coeff    = std::exp(-1.0 * M_PI * M_PI * tmp / info->ewald_beta[0] / info->ewald_beta[0] );
 712                     coeff   /= tmp;
 713                     e_rec3x += coeff*eps_self(nx, info->nkx[0], info->recipbox[XX], info->pme_order[0], x[ci]);
 714                     e_rec3y += coeff*eps_self(ny, info->nky[0], info->recipbox[YY], info->pme_order[0], x[ci]);
 715                     e_rec3z += coeff*eps_self(nz, info->nkz[0], info->recipbox[ZZ], info->pme_order[0], x[ci]);
 716
 717                 }
 718             }
 719         }
 720
 721         clear_rvec(tmpvec2);
 722
 723         svmul(e_rec3x, info->recipbox[XX], tmpvec);
 724         rvec_inc(tmpvec2, tmpvec);
 725         svmul(e_rec3y, info->recipbox[YY], tmpvec);
 726         rvec_inc(tmpvec2, tmpvec);
 727         svmul(e_rec3z, info->recipbox[ZZ], tmpvec);
 728         rvec_inc(tmpvec2, tmpvec);
 729
 730         e_rec3 += q[ci]*q[ci]*q[ci]*q[ci]*norm2(tmpvec2) / ( xtot * M_PI * info->volume * M_PI * info->volume);
 731         if (MASTER(cr))
 732         {
 733             fprintf(stderr, "\rCalculating reciprocal error part 2 ... %3.0f%%",
 734                     100.0*(i+1)/stoplocal);
 735             fflush(stderr);
 736         }
 737     }
 738
 739     if (MASTER(cr))
 740     {
 741         fprintf(stderr, "\n");
 742     }
 743
 744 #if GMX_LIB_MPI
 745 #ifdef TAKETIME
 746     if (MASTER(cr))
 747     {
 748         t1 = MPI_Wtime() - t0;
 749         fprintf(fp_out, "Recip. err. est. took   : %lf s\n", t1);
 750     }
 751 #endif
 752 #endif
 753
 754 #ifdef DEBUG
 755     if (PAR(cr))
 756     {
 757         fprintf(stderr, "Rank %3d: nx=[%3d...%3d]  e_rec3=%e\n",
 758                 cr->nodeid, startlocal, stoplocal, e_rec3);
 759     }
 760 #endif
 761
 762     if (PAR(cr))
 763     {
 764         gmx_sum(1, &e_rec1, cr);
 765         gmx_sum(1, &e_rec2, cr);
 766         gmx_sum(1, &e_rec3, cr);
 767     }
 768
 769     /* e_rec1*=8.0 * q2_all / info->volume / info->volume / nr ;
 770        e_rec2*=  q2_all / M_PI / M_PI / info->volume / info->volume / nr ;
 771        e_rec3/= M_PI * M_PI * info->volume * info->volume * nr ;
 772      */
 773     e_rec = std::sqrt(e_rec1+e_rec2+e_rec3);
 774
 775
 776     return ONE_4PI_EPS0 * e_rec;
 777 }
 778
 779
 780 /* Allocate memory for the inputinfo struct: */
 781 static void create_info(t_inputinfo *info)
 782 {
 783     snew(info->fac, info->n_entries);
 784     snew(info->rcoulomb, info->n_entries);
 785     snew(info->rvdw, info->n_entries);
 786     snew(info->nkx, info->n_entries);
 787     snew(info->nky, info->n_entries);
 788     snew(info->nkz, info->n_entries);
 789     snew(info->fourier_sp, info->n_entries);
 790     snew(info->ewald_rtol, info->n_entries);
 791     snew(info->ewald_beta, info->n_entries);
 792     snew(info->pme_order, info->n_entries);
 793     snew(info->fn_out, info->n_entries);
 794     snew(info->e_dir, info->n_entries);
 795     snew(info->e_rec, info->n_entries);
 796 }
 797
 798
 799 /* Allocate and fill an array with coordinates and charges,
 800  * returns the number of charges found
 801  */
 802 static int prepare_x_q(real *q[], rvec *x[], const gmx_mtop_t *mtop, const rvec x_orig[], t_commrec *cr)
 803 {
 804     int                     nq; /* number of charged particles */
 805
 806
 807     if (MASTER(cr))
 808     {
 809         snew(*q, mtop->natoms);
 810         snew(*x, mtop->natoms);
 811         nq = 0;
 812
 813         for (const AtomProxy atomP : AtomRange(*mtop))
 814         {
 815             const t_atom &local = atomP.atom();
 816             int           i     = atomP.globalAtomNumber();
 817             if (is_charge(local.q))
 818             {
 819                 (*q)[nq]     = local.q;
 820                 (*x)[nq][XX] = x_orig[i][XX];
 821                 (*x)[nq][YY] = x_orig[i][YY];
 822                 (*x)[nq][ZZ] = x_orig[i][ZZ];
 823                 nq++;
 824             }
 825         }
 826         /* Give back some unneeded memory */
 827         srenew(*q, nq);
 828         srenew(*x, nq);
 829     }
 830     /* Broadcast x and q in the parallel case */
 831     if (PAR(cr))
 832     {
 833         /* Transfer the number of charges */
 834         block_bc(cr, nq);
 835         snew_bc(cr, *x, nq);
 836         snew_bc(cr, *q, nq);
 837         nblock_bc(cr, nq, *x);
 838         nblock_bc(cr, nq, *q);
 839     }
 840
 841     return nq;
 842 }
 843
 844
 845
 846 /* Read in the tpr file and save information we need later in info */
 847 static void read_tpr_file(const char *fn_sim_tpr, t_inputinfo *info, t_state *state, gmx_mtop_t *mtop, t_inputrec *ir, real user_beta, real fracself)
 848 {
 849     read_tpx_state(fn_sim_tpr, ir, state, mtop);
 850
 851     /* The values of the original tpr input file are save in the first
 852      * place [0] of the arrays */
 853     info->orig_sim_steps = ir->nsteps;
 854     info->pme_order[0]   = ir->pme_order;
 855     info->rcoulomb[0]    = ir->rcoulomb;
 856     info->rvdw[0]        = ir->rvdw;
 857     info->nkx[0]         = ir->nkx;
 858     info->nky[0]         = ir->nky;
 859     info->nkz[0]         = ir->nkz;
 860     info->ewald_rtol[0]  = ir->ewald_rtol;
 861     info->fracself       = fracself;
 862     if (user_beta > 0)
 863     {
 864         info->ewald_beta[0] = user_beta;
 865     }
 866     else
 867     {
 868         info->ewald_beta[0]  = calc_ewaldcoeff_q(info->rcoulomb[0], info->ewald_rtol[0]);
 869     }
 870
 871     /* Check if PME was chosen */
 872     if (EEL_PME(ir->coulombtype) == FALSE)
 873     {
 874         gmx_fatal(FARGS, "Can only do optimizations for simulations with PME");
 875     }
 876
 877     /* Check if rcoulomb == rlist, which is necessary for PME */
 878     if (!(ir->rcoulomb == ir->rlist))
 879     {
 880         gmx_fatal(FARGS, "PME requires rcoulomb (%f) to be equal to rlist (%f).", ir->rcoulomb, ir->rlist);
 881     }
 882 }
 883
 884
 885 /* Transfer what we need for parallelizing the reciprocal error estimate */
 886 static void bcast_info(t_inputinfo *info, const t_commrec *cr)
 887 {
 888     nblock_bc(cr, info->n_entries, info->nkx);
 889     nblock_bc(cr, info->n_entries, info->nky);
 890     nblock_bc(cr, info->n_entries, info->nkz);
 891     nblock_bc(cr, info->n_entries, info->ewald_beta);
 892     nblock_bc(cr, info->n_entries, info->pme_order);
 893     nblock_bc(cr, info->n_entries, info->e_dir);
 894     nblock_bc(cr, info->n_entries, info->e_rec);
 895     block_bc(cr, info->volume);
 896     block_bc(cr, info->recipbox);
 897     block_bc(cr, info->natoms);
 898     block_bc(cr, info->fracself);
 899     block_bc(cr, info->bTUNE);
 900     block_bc(cr, info->q2all);
 901     block_bc(cr, info->q2allnr);
 902 }
 903
 904
 905 /* Estimate the error of the SPME Ewald sum. This estimate is based upon
 906  * a) a homogeneous distribution of the charges
 907  * b) a total charge of zero.
 908  */
 909 static void estimate_PME_error(t_inputinfo *info, const t_state *state,
 910                                const gmx_mtop_t *mtop, FILE *fp_out, gmx_bool bVerbose, unsigned int seed,
 911                                t_commrec *cr)
 912 {
 913     rvec *x     = nullptr; /* The coordinates */
 914     real *q     = nullptr; /* The charges     */
 915     real  edir  = 0.0;     /* real space error */
 916     real  erec  = 0.0;     /* reciprocal space error */
 917     real  derr  = 0.0;     /* difference of real and reciprocal space error */
 918     real  derr0 = 0.0;     /* difference of real and reciprocal space error */
 919     real  beta  = 0.0;     /* splitting parameter beta */
 920     real  beta0 = 0.0;     /* splitting parameter beta */
 921     int   ncharges;        /* The number of atoms with charges */
 922     int   nsamples;        /* The number of samples used for the calculation of the
 923                             * self-energy error term */
 924     int   i = 0;
 925
 926     if (MASTER(cr))
 927     {
 928         fprintf(fp_out, "\n--- PME ERROR ESTIMATE ---\n");
 929     }
 930
 931     /* Prepare an x and q array with only the charged atoms */
 932     ncharges = prepare_x_q(&q, &x, mtop, state->x.rvec_array(), cr);
 933     if (MASTER(cr))
 934     {
 935         calc_q2all(mtop, &(info->q2all), &(info->q2allnr));
 936         info->ewald_rtol[0] = std::erfc(info->rcoulomb[0]*info->ewald_beta[0]);
 937         /* Write some info to log file */
 938         fprintf(fp_out, "Box volume              : %g nm^3\n", info->volume);
 939         fprintf(fp_out, "Number of charged atoms : %d (total atoms %d)\n", ncharges, info->natoms);
 940         fprintf(fp_out, "Coulomb radius          : %g nm\n", info->rcoulomb[0]);
 941         fprintf(fp_out, "Ewald_rtol              : %g\n", info->ewald_rtol[0]);
 942         fprintf(fp_out, "Ewald parameter beta    : %g\n", info->ewald_beta[0]);
 943         fprintf(fp_out, "Interpolation order     : %d\n", info->pme_order[0]);
 944         fprintf(fp_out, "Fourier grid (nx,ny,nz) : %d x %d x %d\n",
 945                 info->nkx[0], info->nky[0], info->nkz[0]);
 946         fflush(fp_out);
 947
 948     }
 949
 950     if (PAR(cr))
 951     {
 952         bcast_info(info, cr);
 953     }
 954
 955
 956     /* Calculate direct space error */
 957     info->e_dir[0] = estimate_direct(info);
 958
 959     /* Calculate reciprocal space error */
 960     info->e_rec[0] = estimate_reciprocal(info, x, q, ncharges, fp_out, bVerbose,
 961                                          seed, &nsamples, cr);
 962
 963     if (PAR(cr))
 964     {
 965         bcast_info(info, cr);
 966     }
 967
 968     if (MASTER(cr))
 969     {
 970         fprintf(fp_out, "Direct space error est. : %10.3e kJ/(mol*nm)\n", info->e_dir[0]);
 971         fprintf(fp_out, "Reciprocal sp. err. est.: %10.3e kJ/(mol*nm)\n", info->e_rec[0]);
 972         fprintf(fp_out, "Self-energy error term was estimated using %d samples\n", nsamples);
 973         fflush(fp_out);
 974         fprintf(stderr, "Direct space error est. : %10.3e kJ/(mol*nm)\n", info->e_dir[0]);
 975         fprintf(stderr, "Reciprocal sp. err. est.: %10.3e kJ/(mol*nm)\n", info->e_rec[0]);
 976     }
 977
 978     i = 0;
 979
 980     if (info->bTUNE)
 981     {
 982         if (MASTER(cr))
 983         {
 984             fprintf(stderr, "Starting tuning ...\n");
 985         }
 986         edir  = info->e_dir[0];
 987         erec  = info->e_rec[0];
 988         derr0 = edir-erec;
 989         beta0 = info->ewald_beta[0];
 990         if (derr > 0.0)
 991         {
 992             info->ewald_beta[0] += 0.1;
 993         }
 994         else
 995         {
 996             info->ewald_beta[0] -= 0.1;
 997         }
 998         info->e_dir[0] = estimate_direct(info);
 999         info->e_rec[0] = estimate_reciprocal(info, x, q, ncharges, fp_out, bVerbose,
1000                                              seed, &nsamples, cr);
1001
1002         if (PAR(cr))
1003         {
1004             bcast_info(info, cr);
1005         }
1006
1007
1008         edir = info->e_dir[0];
1009         erec = info->e_rec[0];
1010         derr = edir-erec;
1011         while (std::abs(derr/std::min(erec, edir)) > 1e-4)
1012         {
1013
1014             beta                = info->ewald_beta[0];
1015             beta               -= derr*(info->ewald_beta[0]-beta0)/(derr-derr0);
1016             beta0               = info->ewald_beta[0];
1017             info->ewald_beta[0] = beta;
1018             derr0               = derr;
1019
1020             info->e_dir[0] = estimate_direct(info);
1021             info->e_rec[0] = estimate_reciprocal(info, x, q, ncharges, fp_out, bVerbose,
1022                                                  seed, &nsamples, cr);
1023
1024             if (PAR(cr))
1025             {
1026                 bcast_info(info, cr);
1027             }
1028
1029             edir = info->e_dir[0];
1030             erec = info->e_rec[0];
1031             derr = edir-erec;
1032
1033             if (MASTER(cr))
1034             {
1035                 i++;
1036                 fprintf(stderr, "difference between real and rec. space error (step %d): %g\n", i, std::abs(derr));
1037                 fprintf(stderr, "old beta: %f\n", beta0);
1038                 fprintf(stderr, "new beta: %f\n", beta);
1039             }
1040         }
1041
1042         info->ewald_rtol[0] = std::erfc(info->rcoulomb[0]*info->ewald_beta[0]);
1043
1044         if (MASTER(cr))
1045         {
1046             /* Write some info to log file */
1047             fflush(fp_out);
1048             fprintf(fp_out, "=========  After tuning ========\n");
1049             fprintf(fp_out, "Direct space error est. : %10.3e kJ/(mol*nm)\n", info->e_dir[0]);
1050             fprintf(fp_out, "Reciprocal sp. err. est.: %10.3e kJ/(mol*nm)\n", info->e_rec[0]);
1051             fprintf(stderr, "Direct space error est. : %10.3e kJ/(mol*nm)\n", info->e_dir[0]);
1052             fprintf(stderr, "Reciprocal sp. err. est.: %10.3e kJ/(mol*nm)\n", info->e_rec[0]);
1053             fprintf(fp_out, "Ewald_rtol              : %g\n", info->ewald_rtol[0]);
1054             fprintf(fp_out, "Ewald parameter beta    : %g\n", info->ewald_beta[0]);
1055             fflush(fp_out);
1056
1057         }
1058
1059     }
1060
1061 }
1062
1063
1064 int gmx_pme_error(int argc, char *argv[])
1065 {
1066     const char     *desc[] = {
1067         "[THISMODULE] estimates the error of the electrostatic forces",
1068         "if using the sPME algorithm. The flag [TT]-tune[tt] will determine",
1069         "the splitting parameter such that the error is equally",
1070         "distributed over the real and reciprocal space part.",
1071         "The part of the error that stems from self interaction of the particles ",
1072         "is computationally demanding. However, a good a approximation is to",
1073         "just use a fraction of the particles for this term which can be",
1074         "indicated by the flag [TT]-self[tt].[PAR]",
1075     };
1076
1077     real            fs        = 0.0; /* 0 indicates: not set by the user */
1078     real            user_beta = -1.0;
1079     real            fracself  = 1.0;
1080     t_inputinfo     info;
1081     t_state         state;        /* The state from the tpr input file */
1082     gmx_mtop_t      mtop;         /* The topology from the tpr input file */
1083     FILE           *fp = nullptr;
1084     t_commrec      *cr;
1085     unsigned long   PCA_Flags;
1086     gmx_bool        bTUNE    = FALSE;
1087     gmx_bool        bVerbose = FALSE;
1088     int             seed     = 0;
1089
1090
1091     static t_filenm   fnm[] = {
1092         { efTPR, "-s",     nullptr,    ffREAD },
1093         { efOUT, "-o",    "error",  ffWRITE },
1094         { efTPR, "-so",   "tuned",  ffOPTWR }
1095     };
1096
1097     gmx_output_env_t *oenv = nullptr;
1098
1099     t_pargs           pa[] = {
1100         { "-beta",     FALSE, etREAL, {&user_beta},
1101           "If positive, overwrite ewald_beta from [REF].tpr[ref] file with this value" },
1102         { "-tune",     FALSE, etBOOL, {&bTUNE},
1103           "Tune the splitting parameter such that the error is equally distributed between real and reciprocal space" },
1104         { "-self",     FALSE, etREAL, {&fracself},
1105           "If between 0.0 and 1.0, determine self interaction error from just this fraction of the charged particles" },
1106         { "-seed",     FALSE, etINT,  {&seed},
1107           "Random number seed used for Monte Carlo algorithm when [TT]-self[tt] is set to a value between 0.0 and 1.0" },
1108         { "-v",        FALSE, etBOOL, {&bVerbose},
1109           "Be loud and noisy" }
1110     };
1111
1112
1113 #define NFILE asize(fnm)
1114
1115     cr         = init_commrec();
1116     PCA_Flags  = PCA_NOEXIT_ON_ARGS;
1117
1118     if (!parse_common_args(&argc, argv, PCA_Flags,
1119                            NFILE, fnm, asize(pa), pa, asize(desc), desc,
1120                            0, nullptr, &oenv))
1121     {
1122         sfree(cr);
1123         return 0;
1124     }
1125
1126     if (!bTUNE)
1127     {
1128         bTUNE = opt2bSet("-so", NFILE, fnm);
1129     }
1130
1131     info.n_entries = 1;
1132
1133     /* Allocate memory for the inputinfo struct: */
1134     create_info(&info);
1135     info.fourier_sp[0] = fs;
1136
1137     t_inputrec ir;
1138     if (MASTER(cr))
1139     {
1140         read_tpr_file(opt2fn("-s", NFILE, fnm), &info, &state, &mtop, &ir, user_beta, fracself);
1141         /* Open logfile for reading */
1142         fp = fopen(opt2fn("-o", NFILE, fnm), "w");
1143
1144         /* Determine the volume of the simulation box */
1145         info.volume = det(state.box);
1146         calc_recipbox(state.box, info.recipbox);
1147         info.natoms = mtop.natoms;
1148         info.bTUNE  = bTUNE;
1149     }
1150
1151     /* Check consistency if the user provided fourierspacing */
1152     if (fs > 0 && MASTER(cr))
1153     {
1154         /* Recalculate the grid dimensions using fourierspacing from user input */
1155         info.nkx[0] = 0;
1156         info.nky[0] = 0;
1157         info.nkz[0] = 0;
1158         calcFftGrid(stdout, state.box, info.fourier_sp[0], minimalPmeGridSize(info.pme_order[0]),
1159                     &(info.nkx[0]), &(info.nky[0]), &(info.nkz[0]));
1160         if ( (ir.nkx != info.nkx[0]) || (ir.nky != info.nky[0]) || (ir.nkz != info.nkz[0]) )
1161         {
1162             gmx_fatal(FARGS, "Wrong fourierspacing %f nm, input file grid = %d x %d x %d, computed grid = %d x %d x %d",
1163                       fs, ir.nkx, ir.nky, ir.nkz, info.nkx[0], info.nky[0], info.nkz[0]);
1164         }
1165     }
1166
1167     /* Estimate (S)PME force error */
1168
1169     if (PAR(cr))
1170     {
1171         bcast_info(&info, cr);
1172     }
1173
1174     /* Get an error estimate of the input tpr file and do some tuning if requested */
1175     estimate_PME_error(&info, &state, &mtop, fp, bVerbose, seed, cr);
1176
1177     if (MASTER(cr))
1178     {
1179         /* Write out optimized tpr file if requested */
1180         if (opt2bSet("-so", NFILE, fnm) || bTUNE)
1181         {
1182             ir.ewald_rtol = info.ewald_rtol[0];
1183             write_tpx_state(opt2fn("-so", NFILE, fnm), &ir, &state, &mtop);
1184         }
1185         please_cite(fp, "Wang2010");
1186         fclose(fp);
1187     }
1188
1189     return 0;
1190 }