src/gromacs/gmxana/gmx_pme_error.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2010,2011,2012,2013,2014, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 #include <algorithm>
  36
  37 #include "gromacs/commandline/pargs.h"
  38 #include "typedefs.h"
  39 #include "types/commrec.h"
  40 #include "gromacs/utility/smalloc.h"
  41 #include "vec.h"
  42 #include "copyrite.h"
  43 #include "gromacs/fileio/tpxio.h"
  44 #include "readinp.h"
  45 #include "calcgrid.h"
  46 #include "checkpoint.h"
  47 #include "gmx_ana.h"
  48 #include "gromacs/random/random.h"
  49 #include "physics.h"
  50 #include "mdatoms.h"
  51 #include "coulomb.h"
  52 #include "mtop_util.h"
  53 #include "network.h"
  54 #include "main.h"
  55 #include "macros.h"
  56
  57 /* We use the same defines as in mvdata.c here */
  58 #define  block_bc(cr,   d) gmx_bcast(     sizeof(d),     &(d), (cr))
  59 #define nblock_bc(cr, nr, d) gmx_bcast((nr)*sizeof((d)[0]), (d), (cr))
  60 #define   snew_bc(cr, d, nr) { if (!MASTER(cr)) {snew((d), (nr)); }}
  61 /* #define TAKETIME */
  62 /* #define DEBUG  */
  63 enum {
  64     ddnoSEL, ddnoINTERLEAVE, ddnoPP_PME, ddnoCARTESIAN, ddnoNR
  65 };
  66
  67 /* Enum for situations that can occur during log file parsing */
  68 enum {
  69     eParselogOK,
  70     eParselogNotFound,
  71     eParselogNoPerfData,
  72     eParselogTerm,
  73     eParselogResetProblem,
  74     eParselogNr
  75 };
  76
  77
  78 typedef struct
  79 {
  80     gmx_int64_t     orig_sim_steps;  /* Number of steps to be done in the real simulation  */
  81     int             n_entries;       /* Number of entries in arrays                        */
  82     real            volume;          /* The volume of the box                              */
  83     matrix          recipbox;        /* The reciprocal box                                 */
  84     int             natoms;          /* The number of atoms in the MD system               */
  85     real           *fac;             /* The scaling factor                                 */
  86     real           *rcoulomb;        /* The coulomb radii [0...nr_inputfiles]              */
  87     real           *rvdw;            /* The vdW radii                                      */
  88     int            *nkx, *nky, *nkz; /* Number of k vectors in each spatial dimension      */
  89     real           *fourier_sp;      /* Fourierspacing                                     */
  90     real           *ewald_rtol;      /* Real space tolerance for Ewald, determines         */
  91                                      /* the real/reciprocal space relative weight          */
  92     real           *ewald_beta;      /* Splitting parameter [1/nm]                         */
  93     real            fracself;        /* fraction of particles for SI error                 */
  94     real            q2all;           /* sum ( q ^2 )                                       */
  95     real            q2allnr;         /* nr of charges                                      */
  96     int            *pme_order;       /* Interpolation order for PME (bsplines)             */
  97     char          **fn_out;          /* Name of the output tpr file                        */
  98     real           *e_dir;           /* Direct space part of PME error with these settings */
  99     real           *e_rec;           /* Reciprocal space part of PME error                 */
 100     gmx_bool        bTUNE;           /* flag for tuning */
 101 } t_inputinfo;
 102
 103
 104 /* Returns TRUE when atom is charged */
 105 static gmx_bool is_charge(real charge)
 106 {
 107     if (charge*charge > GMX_REAL_EPS)
 108     {
 109         return TRUE;
 110     }
 111     else
 112     {
 113         return FALSE;
 114     }
 115 }
 116
 117
 118 /* calculate charge density */
 119 static void calc_q2all(
 120         gmx_mtop_t *mtop,   /* molecular topology */
 121         real *q2all, real *q2allnr)
 122 {
 123     int             imol, iatom; /* indices for loops */
 124     real            q2_all = 0;  /* Sum of squared charges */
 125     int             nrq_mol;     /* Number of charges in a single molecule */
 126     int             nrq_all;     /* Total number of charges in the MD system */
 127     real            qi, q2_mol;
 128     gmx_moltype_t  *molecule;
 129     gmx_molblock_t *molblock;
 130
 131 #ifdef DEBUG
 132     fprintf(stderr, "\nCharge density:\n");
 133 #endif
 134     q2_all  = 0.0;                                 /* total q squared */
 135     nrq_all = 0;                                   /* total number of charges in the system */
 136     for (imol = 0; imol < mtop->nmolblock; imol++) /* Loop over molecule types */
 137     {
 138         q2_mol   = 0.0;                            /* q squared value of this molecule */
 139         nrq_mol  = 0;                              /* number of charges this molecule carries */
 140         molecule = &(mtop->moltype[imol]);
 141         molblock = &(mtop->molblock[imol]);
 142         for (iatom = 0; iatom < molblock->natoms_mol; iatom++) /* Loop over atoms in this molecule */
 143         {
 144             qi = molecule->atoms.atom[iatom].q;                /* Charge of this atom */
 145             /* Is this charge worth to be considered? */
 146             if (is_charge(qi))
 147             {
 148                 q2_mol += qi*qi;
 149                 nrq_mol++;
 150             }
 151         }
 152         /* Multiply with the number of molecules present of this type and add */
 153         q2_all  += q2_mol*molblock->nmol;
 154         nrq_all += nrq_mol*molblock->nmol;
 155 #ifdef DEBUG
 156         fprintf(stderr, "Molecule %2d (%5d atoms) q2_mol=%10.3e nr.mol.charges=%5d (%6dx)  q2_all=%10.3e  tot.charges=%d\n",
 157                 imol, molblock->natoms_mol, q2_mol, nrq_mol, molblock->nmol, q2_all, nrq_all);
 158 #endif
 159     }
 160
 161     *q2all   = q2_all;
 162     *q2allnr = nrq_all;
 163
 164 }
 165
 166
 167 /* Estimate the direct space part error of the SPME Ewald sum */
 168 static real estimate_direct(
 169         t_inputinfo *info
 170         )
 171 {
 172     real e_dir     = 0; /* Error estimate */
 173     real beta      = 0; /* Splitting parameter (1/nm) */
 174     real r_coulomb = 0; /* Cut-off in direct space */
 175
 176
 177     beta      = info->ewald_beta[0];
 178     r_coulomb = info->rcoulomb[0];
 179
 180     e_dir  = 2.0 * info->q2all * gmx_invsqrt( info->q2allnr  *  r_coulomb * info->volume );
 181     e_dir *= exp (-beta*beta*r_coulomb*r_coulomb);
 182
 183     return ONE_4PI_EPS0*e_dir;
 184 }
 185
 186 #define SUMORDER 6
 187
 188 /* the following 4 functions determine polynomials required for the reciprocal error estimate */
 189
 190 static inline real eps_poly1(
 191         real m,          /* grid coordinate in certain direction */
 192         real K,          /* grid size in corresponding direction */
 193         real n)          /* spline interpolation order of the SPME */
 194 {
 195     int  i;
 196     real nom   = 0; /* nominator */
 197     real denom = 0; /* denominator */
 198     real tmp   = 0;
 199
 200     if (m == 0.0)
 201     {
 202         return 0.0;
 203     }
 204
 205     for (i = -SUMORDER; i < 0; i++)
 206     {
 207         tmp  = m / K + i;
 208         tmp *= 2.0*M_PI;
 209         nom += pow( tmp, -n );
 210     }
 211
 212     for (i = SUMORDER; i > 0; i--)
 213     {
 214         tmp  = m / K + i;
 215         tmp *= 2.0*M_PI;
 216         nom += pow( tmp, -n );
 217     }
 218
 219     tmp   = m / K;
 220     tmp  *= 2.0*M_PI;
 221     denom = pow( tmp, -n )+nom;
 222
 223     return -nom/denom;
 224
 225 }
 226
 227 static inline real eps_poly2(
 228         real m,          /* grid coordinate in certain direction */
 229         real K,          /* grid size in corresponding direction */
 230         real n)          /* spline interpolation order of the SPME */
 231 {
 232     int  i;
 233     real nom   = 0; /* nominator */
 234     real denom = 0; /* denominator */
 235     real tmp   = 0;
 236
 237     if (m == 0.0)
 238     {
 239         return 0.0;
 240     }
 241
 242     for (i = -SUMORDER; i < 0; i++)
 243     {
 244         tmp  = m / K + i;
 245         tmp *= 2.0*M_PI;
 246         nom += pow( tmp, -2*n );
 247     }
 248
 249     for (i = SUMORDER; i > 0; i--)
 250     {
 251         tmp  = m / K + i;
 252         tmp *= 2.0*M_PI;
 253         nom += pow( tmp, -2*n );
 254     }
 255
 256     for (i = -SUMORDER; i < SUMORDER+1; i++)
 257     {
 258         tmp    = m / K + i;
 259         tmp   *= 2.0*M_PI;
 260         denom += pow( tmp, -n );
 261     }
 262     tmp = eps_poly1(m, K, n);
 263     return nom / denom / denom + tmp*tmp;
 264
 265 }
 266
 267 static inline real eps_poly3(
 268         real m,          /* grid coordinate in certain direction */
 269         real K,          /* grid size in corresponding direction */
 270         real n)          /* spline interpolation order of the SPME */
 271 {
 272     int  i;
 273     real nom   = 0; /* nominator */
 274     real denom = 0; /* denominator */
 275     real tmp   = 0;
 276
 277     if (m == 0.0)
 278     {
 279         return 0.0;
 280     }
 281
 282     for (i = -SUMORDER; i < 0; i++)
 283     {
 284         tmp  = m / K + i;
 285         tmp *= 2.0*M_PI;
 286         nom += i * pow( tmp, -2*n );
 287     }
 288
 289     for (i = SUMORDER; i > 0; i--)
 290     {
 291         tmp  = m / K + i;
 292         tmp *= 2.0*M_PI;
 293         nom += i * pow( tmp, -2*n );
 294     }
 295
 296     for (i = -SUMORDER; i < SUMORDER+1; i++)
 297     {
 298         tmp    = m / K + i;
 299         tmp   *= 2.0*M_PI;
 300         denom += pow( tmp, -n );
 301     }
 302
 303     return 2.0 * M_PI * nom / denom / denom;
 304
 305 }
 306
 307 static inline real eps_poly4(
 308         real m,          /* grid coordinate in certain direction */
 309         real K,          /* grid size in corresponding direction */
 310         real n)          /* spline interpolation order of the SPME */
 311 {
 312     int  i;
 313     real nom   = 0; /* nominator */
 314     real denom = 0; /* denominator */
 315     real tmp   = 0;
 316
 317     if (m == 0.0)
 318     {
 319         return 0.0;
 320     }
 321
 322     for (i = -SUMORDER; i < 0; i++)
 323     {
 324         tmp  = m / K + i;
 325         tmp *= 2.0*M_PI;
 326         nom += i * i * pow( tmp, -2*n );
 327     }
 328
 329     for (i = SUMORDER; i > 0; i--)
 330     {
 331         tmp  = m / K + i;
 332         tmp *= 2.0*M_PI;
 333         nom += i * i * pow( tmp, -2*n );
 334     }
 335
 336     for (i = -SUMORDER; i < SUMORDER+1; i++)
 337     {
 338         tmp    = m / K + i;
 339         tmp   *= 2.0*M_PI;
 340         denom += pow( tmp, -n );
 341     }
 342
 343     return 4.0 * M_PI * M_PI * nom / denom / denom;
 344
 345 }
 346
 347 static inline real eps_self(
 348         real m,       /* grid coordinate in certain direction */
 349         real K,       /* grid size in corresponding direction */
 350         rvec rboxv,   /* reciprocal box vector */
 351         real n,       /* spline interpolation order of the SPME */
 352         rvec x)       /* coordinate of charge */
 353 {
 354     int  i;
 355     real tmp    = 0; /* temporary variables for computations */
 356     real tmp1   = 0; /* temporary variables for computations */
 357     real tmp2   = 0; /* temporary variables for computations */
 358     real rcoord = 0; /* coordinate in certain reciprocal space direction */
 359     real nom    = 0; /* nominator */
 360     real denom  = 0; /* denominator */
 361
 362
 363     if (m == 0.0)
 364     {
 365         return 0.0;
 366     }
 367
 368     rcoord = iprod(rboxv, x);
 369
 370
 371     for (i = -SUMORDER; i < 0; i++)
 372     {
 373         tmp    = -sin(2.0 * M_PI * i * K * rcoord);
 374         tmp1   = 2.0 * M_PI * m / K + 2.0 * M_PI * i;
 375         tmp2   = pow(tmp1, -n);
 376         nom   += tmp * tmp2 * i;
 377         denom += tmp2;
 378     }
 379
 380     for (i = SUMORDER; i > 0; i--)
 381     {
 382         tmp    = -sin(2.0 * M_PI * i * K * rcoord);
 383         tmp1   = 2.0 * M_PI * m / K + 2.0 * M_PI * i;
 384         tmp2   = pow(tmp1, -n);
 385         nom   += tmp * tmp2 * i;
 386         denom += tmp2;
 387     }
 388
 389
 390     tmp    = 2.0 * M_PI * m / K;
 391     tmp1   = pow(tmp, -n);
 392     denom += tmp1;
 393
 394     return 2.0 * M_PI * nom / denom * K;
 395
 396 }
 397
 398 #undef SUMORDER
 399
 400 /* The following routine is just a copy from pme.c */
 401
 402 static void calc_recipbox(matrix box, matrix recipbox)
 403 {
 404     /* Save some time by assuming upper right part is zero */
 405
 406     real tmp = 1.0/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
 407
 408     recipbox[XX][XX] = box[YY][YY]*box[ZZ][ZZ]*tmp;
 409     recipbox[XX][YY] = 0;
 410     recipbox[XX][ZZ] = 0;
 411     recipbox[YY][XX] = -box[YY][XX]*box[ZZ][ZZ]*tmp;
 412     recipbox[YY][YY] = box[XX][XX]*box[ZZ][ZZ]*tmp;
 413     recipbox[YY][ZZ] = 0;
 414     recipbox[ZZ][XX] = (box[YY][XX]*box[ZZ][YY]-box[YY][YY]*box[ZZ][XX])*tmp;
 415     recipbox[ZZ][YY] = -box[ZZ][YY]*box[XX][XX]*tmp;
 416     recipbox[ZZ][ZZ] = box[XX][XX]*box[YY][YY]*tmp;
 417 }
 418
 419
 420 /* Estimate the reciprocal space part error of the SPME Ewald sum. */
 421 static real estimate_reciprocal(
 422         t_inputinfo       *info,
 423         rvec               x[], /* array of particles */
 424         real               q[], /* array of charges */
 425         int                nr,  /* number of charges = size of the charge array */
 426         FILE  gmx_unused  *fp_out,
 427         gmx_bool           bVerbose,
 428         unsigned int       seed,     /* The seed for the random number generator */
 429         int               *nsamples, /* Return the number of samples used if Monte Carlo
 430                                       * algorithm is used for self energy error estimate */
 431         t_commrec         *cr)
 432 {
 433     real     e_rec   = 0; /* reciprocal error estimate */
 434     real     e_rec1  = 0; /* Error estimate term 1*/
 435     real     e_rec2  = 0; /* Error estimate term 2*/
 436     real     e_rec3  = 0; /* Error estimate term 3 */
 437     real     e_rec3x = 0; /* part of Error estimate term 3 in x */
 438     real     e_rec3y = 0; /* part of Error estimate term 3 in y */
 439     real     e_rec3z = 0; /* part of Error estimate term 3 in z */
 440     int      i, ci;
 441     int      nx, ny, nz;  /* grid coordinates */
 442     real     q2_all = 0;  /* sum of squared charges */
 443     rvec     gridpx;      /* reciprocal grid point in x direction*/
 444     rvec     gridpxy;     /* reciprocal grid point in x and y direction*/
 445     rvec     gridp;       /* complete reciprocal grid point in 3 directions*/
 446     rvec     tmpvec;      /* template to create points from basis vectors */
 447     rvec     tmpvec2;     /* template to create points from basis vectors */
 448     real     coeff  = 0;  /* variable to compute coefficients of the error estimate */
 449     real     coeff2 = 0;  /* variable to compute coefficients of the error estimate */
 450     real     tmp    = 0;  /* variables to compute different factors from vectors */
 451     real     tmp1   = 0;
 452     real     tmp2   = 0;
 453     gmx_bool bFraction;
 454
 455     /* Random number generator */
 456     gmx_rng_t rng     = NULL;
 457     int      *numbers = NULL;
 458
 459     /* Index variables for parallel work distribution */
 460     int startglobal, stopglobal;
 461     int startlocal, stoplocal;
 462     int x_per_core;
 463     int xtot;
 464
 465 #ifdef TAKETIME
 466     double t0 = 0.0;
 467     double t1 = 0.0;
 468 #endif
 469
 470     rng = gmx_rng_init(seed);
 471
 472     clear_rvec(gridpx);
 473     clear_rvec(gridpxy);
 474     clear_rvec(gridp);
 475     clear_rvec(tmpvec);
 476     clear_rvec(tmpvec2);
 477
 478     for (i = 0; i < nr; i++)
 479     {
 480         q2_all += q[i]*q[i];
 481     }
 482
 483     /* Calculate indices for work distribution */
 484     startglobal = -info->nkx[0]/2;
 485     stopglobal  = info->nkx[0]/2;
 486     xtot        = stopglobal*2+1;
 487     if (PAR(cr))
 488     {
 489         x_per_core = static_cast<int>(ceil(static_cast<real>(xtot) / cr->nnodes));
 490         startlocal = startglobal + x_per_core*cr->nodeid;
 491         stoplocal  = startlocal + x_per_core -1;
 492         if (stoplocal > stopglobal)
 493         {
 494             stoplocal = stopglobal;
 495         }
 496     }
 497     else
 498     {
 499         startlocal = startglobal;
 500         stoplocal  = stopglobal;
 501         x_per_core = xtot;
 502     }
 503 /*
 504    #ifdef GMX_LIB_MPI
 505     MPI_Barrier(MPI_COMM_WORLD);
 506    #endif
 507  */
 508
 509 #ifdef GMX_LIB_MPI
 510 #ifdef TAKETIME
 511     if (MASTER(cr))
 512     {
 513         t0 = MPI_Wtime();
 514     }
 515 #endif
 516 #endif
 517
 518     if (MASTER(cr))
 519     {
 520
 521         fprintf(stderr, "Calculating reciprocal error part 1 ...");
 522
 523     }
 524
 525     for (nx = startlocal; nx <= stoplocal; nx++)
 526     {
 527         svmul(nx, info->recipbox[XX], gridpx);
 528         for (ny = -info->nky[0]/2; ny < info->nky[0]/2+1; ny++)
 529         {
 530             svmul(ny, info->recipbox[YY], tmpvec);
 531             rvec_add(gridpx, tmpvec, gridpxy);
 532             for (nz = -info->nkz[0]/2; nz < info->nkz[0]/2+1; nz++)
 533             {
 534                 if (0 == nx &&  0 == ny &&  0 == nz)
 535                 {
 536                     continue;
 537                 }
 538                 svmul(nz, info->recipbox[ZZ], tmpvec);
 539                 rvec_add(gridpxy, tmpvec, gridp);
 540                 tmp    = norm2(gridp);
 541                 coeff  = exp(-1.0 * M_PI * M_PI * tmp / info->ewald_beta[0] / info->ewald_beta[0] );
 542                 coeff /= 2.0 * M_PI * info->volume * tmp;
 543                 coeff2 = tmp;
 544
 545
 546                 tmp  = eps_poly2(nx, info->nkx[0], info->pme_order[0]);
 547                 tmp += eps_poly2(ny, info->nkx[0], info->pme_order[0]);
 548                 tmp += eps_poly2(nz, info->nkx[0], info->pme_order[0]);
 549
 550                 tmp1 = eps_poly1(nx, info->nkx[0], info->pme_order[0]);
 551                 tmp2 = eps_poly1(ny, info->nky[0], info->pme_order[0]);
 552
 553                 tmp += 2.0 * tmp1 * tmp2;
 554
 555                 tmp1 = eps_poly1(nz, info->nkz[0], info->pme_order[0]);
 556                 tmp2 = eps_poly1(ny, info->nky[0], info->pme_order[0]);
 557
 558                 tmp += 2.0 * tmp1 * tmp2;
 559
 560                 tmp1 = eps_poly1(nz, info->nkz[0], info->pme_order[0]);
 561                 tmp2 = eps_poly1(nx, info->nkx[0], info->pme_order[0]);
 562
 563                 tmp += 2.0 * tmp1 * tmp2;
 564
 565                 tmp1  = eps_poly1(nx, info->nkx[0], info->pme_order[0]);
 566                 tmp1 += eps_poly1(ny, info->nky[0], info->pme_order[0]);
 567                 tmp1 += eps_poly1(nz, info->nkz[0], info->pme_order[0]);
 568
 569                 tmp += tmp1 * tmp1;
 570
 571                 e_rec1 += 32.0 * M_PI * M_PI * coeff * coeff * coeff2 * tmp  * q2_all * q2_all / nr;
 572
 573                 tmp1  = eps_poly3(nx, info->nkx[0], info->pme_order[0]);
 574                 tmp1 *= info->nkx[0];
 575                 tmp2  = iprod(gridp, info->recipbox[XX]);
 576
 577                 tmp = tmp1*tmp2;
 578
 579                 tmp1  = eps_poly3(ny, info->nky[0], info->pme_order[0]);
 580                 tmp1 *= info->nky[0];
 581                 tmp2  = iprod(gridp, info->recipbox[YY]);
 582
 583                 tmp += tmp1*tmp2;
 584
 585                 tmp1  = eps_poly3(nz, info->nkz[0], info->pme_order[0]);
 586                 tmp1 *= info->nkz[0];
 587                 tmp2  = iprod(gridp, info->recipbox[ZZ]);
 588
 589                 tmp += tmp1*tmp2;
 590
 591                 tmp *= 4.0 * M_PI;
 592
 593                 tmp1  = eps_poly4(nx, info->nkx[0], info->pme_order[0]);
 594                 tmp1 *= norm2(info->recipbox[XX]);
 595                 tmp1 *= info->nkx[0] * info->nkx[0];
 596
 597                 tmp += tmp1;
 598
 599                 tmp1  = eps_poly4(ny, info->nky[0], info->pme_order[0]);
 600                 tmp1 *= norm2(info->recipbox[YY]);
 601                 tmp1 *= info->nky[0] * info->nky[0];
 602
 603                 tmp += tmp1;
 604
 605                 tmp1  = eps_poly4(nz, info->nkz[0], info->pme_order[0]);
 606                 tmp1 *= norm2(info->recipbox[ZZ]);
 607                 tmp1 *= info->nkz[0] * info->nkz[0];
 608
 609                 tmp += tmp1;
 610
 611                 e_rec2 += 4.0 * coeff * coeff * tmp * q2_all * q2_all / nr;
 612
 613             }
 614         }
 615         if (MASTER(cr))
 616         {
 617             fprintf(stderr, "\rCalculating reciprocal error part 1 ... %3.0f%%", 100.0*(nx-startlocal+1)/(x_per_core));
 618         }
 619
 620     }
 621
 622     if (MASTER(cr))
 623     {
 624         fprintf(stderr, "\n");
 625     }
 626
 627     /* Use just a fraction of all charges to estimate the self energy error term? */
 628     bFraction =  (info->fracself > 0.0) && (info->fracself < 1.0);
 629
 630     if (bFraction)
 631     {
 632         /* Here xtot is the number of samples taken for the Monte Carlo calculation
 633          * of the average of term IV of equation 35 in Wang2010. Round up to a
 634          * number of samples that is divisible by the number of nodes */
 635         x_per_core  = static_cast<int>(ceil(info->fracself * nr / cr->nnodes));
 636         xtot        = x_per_core * cr->nnodes;
 637     }
 638     else
 639     {
 640         /* In this case we use all nr particle positions */
 641         xtot       = nr;
 642         x_per_core = static_cast<int>(ceil(static_cast<real>(xtot) / cr->nnodes));
 643     }
 644
 645     startlocal = x_per_core *  cr->nodeid;
 646     stoplocal  = std::min(startlocal + x_per_core, xtot);  /* min needed if xtot == nr */
 647
 648     if (bFraction)
 649     {
 650         /* Make shure we get identical results in serial and parallel. Therefore,
 651          * take the sample indices from a single, global random number array that
 652          * is constructed on the master node and that only depends on the seed */
 653         snew(numbers, xtot);
 654         if (MASTER(cr))
 655         {
 656             for (i = 0; i < xtot; i++)
 657             {
 658                 numbers[i] = static_cast<int>(floor(gmx_rng_uniform_real(rng) * nr));
 659             }
 660         }
 661         /* Broadcast the random number array to the other nodes */
 662         if (PAR(cr))
 663         {
 664             nblock_bc(cr, xtot, numbers);
 665         }
 666
 667         if (bVerbose && MASTER(cr))
 668         {
 669             fprintf(stdout, "Using %d sample%s to approximate the self interaction error term",
 670                     xtot, xtot == 1 ? "" : "s");
 671             if (PAR(cr))
 672             {
 673                 fprintf(stdout, " (%d sample%s per node)", x_per_core, x_per_core == 1 ? "" : "s");
 674             }
 675             fprintf(stdout, ".\n");
 676         }
 677     }
 678
 679     /* Return the number of positions used for the Monte Carlo algorithm */
 680     *nsamples = xtot;
 681
 682     for (i = startlocal; i < stoplocal; i++)
 683     {
 684         e_rec3x = 0;
 685         e_rec3y = 0;
 686         e_rec3z = 0;
 687
 688         if (bFraction)
 689         {
 690             /* Randomly pick a charge */
 691             ci = numbers[i];
 692         }
 693         else
 694         {
 695             /* Use all charges */
 696             ci = i;
 697         }
 698
 699         /* for(nx=startlocal; nx<=stoplocal; nx++)*/
 700         for (nx = -info->nkx[0]/2; nx < info->nkx[0]/2+1; nx++)
 701         {
 702             svmul(nx, info->recipbox[XX], gridpx);
 703             for (ny = -info->nky[0]/2; ny < info->nky[0]/2+1; ny++)
 704             {
 705                 svmul(ny, info->recipbox[YY], tmpvec);
 706                 rvec_add(gridpx, tmpvec, gridpxy);
 707                 for (nz = -info->nkz[0]/2; nz < info->nkz[0]/2+1; nz++)
 708                 {
 709
 710                     if (0 == nx && 0 == ny && 0 == nz)
 711                     {
 712                         continue;
 713                     }
 714
 715                     svmul(nz, info->recipbox[ZZ], tmpvec);
 716                     rvec_add(gridpxy, tmpvec, gridp);
 717                     tmp      = norm2(gridp);
 718                     coeff    = exp(-1.0 * M_PI * M_PI * tmp / info->ewald_beta[0] / info->ewald_beta[0] );
 719                     coeff   /= tmp;
 720                     e_rec3x += coeff*eps_self(nx, info->nkx[0], info->recipbox[XX], info->pme_order[0], x[ci]);
 721                     e_rec3y += coeff*eps_self(ny, info->nky[0], info->recipbox[YY], info->pme_order[0], x[ci]);
 722                     e_rec3z += coeff*eps_self(nz, info->nkz[0], info->recipbox[ZZ], info->pme_order[0], x[ci]);
 723
 724                 }
 725             }
 726         }
 727
 728         clear_rvec(tmpvec2);
 729
 730         svmul(e_rec3x, info->recipbox[XX], tmpvec);
 731         rvec_inc(tmpvec2, tmpvec);
 732         svmul(e_rec3y, info->recipbox[YY], tmpvec);
 733         rvec_inc(tmpvec2, tmpvec);
 734         svmul(e_rec3z, info->recipbox[ZZ], tmpvec);
 735         rvec_inc(tmpvec2, tmpvec);
 736
 737         e_rec3 += q[ci]*q[ci]*q[ci]*q[ci]*norm2(tmpvec2) / ( xtot * M_PI * info->volume * M_PI * info->volume);
 738         if (MASTER(cr))
 739         {
 740             fprintf(stderr, "\rCalculating reciprocal error part 2 ... %3.0f%%",
 741                     100.0*(i+1)/stoplocal);
 742
 743         }
 744     }
 745
 746     if (MASTER(cr))
 747     {
 748         fprintf(stderr, "\n");
 749     }
 750
 751 #ifdef GMX_LIB_MPI
 752 #ifdef TAKETIME
 753     if (MASTER(cr))
 754     {
 755         t1 = MPI_Wtime() - t0;
 756         fprintf(fp_out, "Recip. err. est. took   : %lf s\n", t1);
 757     }
 758 #endif
 759 #endif
 760
 761 #ifdef DEBUG
 762     if (PAR(cr))
 763     {
 764         fprintf(stderr, "Node %3d: nx=[%3d...%3d]  e_rec3=%e\n",
 765                 cr->nodeid, startlocal, stoplocal, e_rec3);
 766     }
 767 #endif
 768
 769     if (PAR(cr))
 770     {
 771         gmx_sum(1, &e_rec1, cr);
 772         gmx_sum(1, &e_rec2, cr);
 773         gmx_sum(1, &e_rec3, cr);
 774     }
 775
 776     /* e_rec1*=8.0 * q2_all / info->volume / info->volume / nr ;
 777        e_rec2*=  q2_all / M_PI / M_PI / info->volume / info->volume / nr ;
 778        e_rec3/= M_PI * M_PI * info->volume * info->volume * nr ;
 779      */
 780     e_rec = sqrt(e_rec1+e_rec2+e_rec3);
 781
 782
 783     return ONE_4PI_EPS0 * e_rec;
 784 }
 785
 786
 787 /* Allocate memory for the inputinfo struct: */
 788 static void create_info(t_inputinfo *info)
 789 {
 790     snew(info->fac, info->n_entries);
 791     snew(info->rcoulomb, info->n_entries);
 792     snew(info->rvdw, info->n_entries);
 793     snew(info->nkx, info->n_entries);
 794     snew(info->nky, info->n_entries);
 795     snew(info->nkz, info->n_entries);
 796     snew(info->fourier_sp, info->n_entries);
 797     snew(info->ewald_rtol, info->n_entries);
 798     snew(info->ewald_beta, info->n_entries);
 799     snew(info->pme_order, info->n_entries);
 800     snew(info->fn_out, info->n_entries);
 801     snew(info->e_dir, info->n_entries);
 802     snew(info->e_rec, info->n_entries);
 803 }
 804
 805
 806 /* Allocate and fill an array with coordinates and charges,
 807  * returns the number of charges found
 808  */
 809 static int prepare_x_q(real *q[], rvec *x[], gmx_mtop_t *mtop, rvec x_orig[], t_commrec *cr)
 810 {
 811     int                     i;
 812     int                     nq; /* number of charged particles */
 813     gmx_mtop_atomloop_all_t aloop;
 814     t_atom                 *atom;
 815
 816
 817     if (MASTER(cr))
 818     {
 819         snew(*q, mtop->natoms);
 820         snew(*x, mtop->natoms);
 821         nq = 0;
 822
 823         aloop = gmx_mtop_atomloop_all_init(mtop);
 824
 825         while (gmx_mtop_atomloop_all_next(aloop, &i, &atom))
 826         {
 827             if (is_charge(atom->q))
 828             {
 829                 (*q)[nq]     = atom->q;
 830                 (*x)[nq][XX] = x_orig[i][XX];
 831                 (*x)[nq][YY] = x_orig[i][YY];
 832                 (*x)[nq][ZZ] = x_orig[i][ZZ];
 833                 nq++;
 834             }
 835         }
 836         /* Give back some unneeded memory */
 837         srenew(*q, nq);
 838         srenew(*x, nq);
 839     }
 840     /* Broadcast x and q in the parallel case */
 841     if (PAR(cr))
 842     {
 843         /* Transfer the number of charges */
 844         block_bc(cr, nq);
 845         snew_bc(cr, *x, nq);
 846         snew_bc(cr, *q, nq);
 847         nblock_bc(cr, nq, *x);
 848         nblock_bc(cr, nq, *q);
 849     }
 850
 851     return nq;
 852 }
 853
 854
 855
 856 /* Read in the tpr file and save information we need later in info */
 857 static void read_tpr_file(const char *fn_sim_tpr, t_inputinfo *info, t_state *state, gmx_mtop_t *mtop, t_inputrec *ir, real user_beta, real fracself)
 858 {
 859     read_tpx_state(fn_sim_tpr, ir, state, NULL, mtop);
 860
 861     /* The values of the original tpr input file are save in the first
 862      * place [0] of the arrays */
 863     info->orig_sim_steps = ir->nsteps;
 864     info->pme_order[0]   = ir->pme_order;
 865     info->rcoulomb[0]    = ir->rcoulomb;
 866     info->rvdw[0]        = ir->rvdw;
 867     info->nkx[0]         = ir->nkx;
 868     info->nky[0]         = ir->nky;
 869     info->nkz[0]         = ir->nkz;
 870     info->ewald_rtol[0]  = ir->ewald_rtol;
 871     info->fracself       = fracself;
 872     if (user_beta > 0)
 873     {
 874         info->ewald_beta[0] = user_beta;
 875     }
 876     else
 877     {
 878         info->ewald_beta[0]  = calc_ewaldcoeff_q(info->rcoulomb[0], info->ewald_rtol[0]);
 879     }
 880
 881     /* Check if PME was chosen */
 882     if (EEL_PME(ir->coulombtype) == FALSE)
 883     {
 884         gmx_fatal(FARGS, "Can only do optimizations for simulations with PME");
 885     }
 886
 887     /* Check if rcoulomb == rlist, which is necessary for PME */
 888     if (!(ir->rcoulomb == ir->rlist))
 889     {
 890         gmx_fatal(FARGS, "PME requires rcoulomb (%f) to be equal to rlist (%f).", ir->rcoulomb, ir->rlist);
 891     }
 892 }
 893
 894
 895 /* Transfer what we need for parallelizing the reciprocal error estimate */
 896 static void bcast_info(t_inputinfo *info, t_commrec *cr)
 897 {
 898     nblock_bc(cr, info->n_entries, info->nkx);
 899     nblock_bc(cr, info->n_entries, info->nky);
 900     nblock_bc(cr, info->n_entries, info->nkz);
 901     nblock_bc(cr, info->n_entries, info->ewald_beta);
 902     nblock_bc(cr, info->n_entries, info->pme_order);
 903     nblock_bc(cr, info->n_entries, info->e_dir);
 904     nblock_bc(cr, info->n_entries, info->e_rec);
 905     block_bc(cr, info->volume);
 906     block_bc(cr, info->recipbox);
 907     block_bc(cr, info->natoms);
 908     block_bc(cr, info->fracself);
 909     block_bc(cr, info->bTUNE);
 910     block_bc(cr, info->q2all);
 911     block_bc(cr, info->q2allnr);
 912 }
 913
 914
 915 /* Estimate the error of the SPME Ewald sum. This estimate is based upon
 916  * a) a homogeneous distribution of the charges
 917  * b) a total charge of zero.
 918  */
 919 static void estimate_PME_error(t_inputinfo *info, t_state *state,
 920                                gmx_mtop_t *mtop, FILE *fp_out, gmx_bool bVerbose, unsigned int seed,
 921                                t_commrec *cr)
 922 {
 923     rvec *x     = NULL; /* The coordinates */
 924     real *q     = NULL; /* The charges     */
 925     real  edir  = 0.0;  /* real space error */
 926     real  erec  = 0.0;  /* reciprocal space error */
 927     real  derr  = 0.0;  /* difference of real and reciprocal space error */
 928     real  derr0 = 0.0;  /* difference of real and reciprocal space error */
 929     real  beta  = 0.0;  /* splitting parameter beta */
 930     real  beta0 = 0.0;  /* splitting parameter beta */
 931     int   ncharges;     /* The number of atoms with charges */
 932     int   nsamples;     /* The number of samples used for the calculation of the
 933                          * self-energy error term */
 934     int   i = 0;
 935
 936     if (MASTER(cr))
 937     {
 938         fprintf(fp_out, "\n--- PME ERROR ESTIMATE ---\n");
 939     }
 940
 941     /* Prepare an x and q array with only the charged atoms */
 942     ncharges = prepare_x_q(&q, &x, mtop, state->x, cr);
 943     if (MASTER(cr))
 944     {
 945         calc_q2all(mtop, &(info->q2all), &(info->q2allnr));
 946         info->ewald_rtol[0] = gmx_erfc(info->rcoulomb[0]*info->ewald_beta[0]);
 947         /* Write some info to log file */
 948         fprintf(fp_out, "Box volume              : %g nm^3\n", info->volume);
 949         fprintf(fp_out, "Number of charged atoms : %d (total atoms %d)\n", ncharges, info->natoms);
 950         fprintf(fp_out, "Coulomb radius          : %g nm\n", info->rcoulomb[0]);
 951         fprintf(fp_out, "Ewald_rtol              : %g\n", info->ewald_rtol[0]);
 952         fprintf(fp_out, "Ewald parameter beta    : %g\n", info->ewald_beta[0]);
 953         fprintf(fp_out, "Interpolation order     : %d\n", info->pme_order[0]);
 954         fprintf(fp_out, "Fourier grid (nx,ny,nz) : %d x %d x %d\n",
 955                 info->nkx[0], info->nky[0], info->nkz[0]);
 956         fflush(fp_out);
 957
 958     }
 959
 960     if (PAR(cr))
 961     {
 962         bcast_info(info, cr);
 963     }
 964
 965
 966     /* Calculate direct space error */
 967     info->e_dir[0] = estimate_direct(info);
 968
 969     /* Calculate reciprocal space error */
 970     info->e_rec[0] = estimate_reciprocal(info, x, q, ncharges, fp_out, bVerbose,
 971                                          seed, &nsamples, cr);
 972
 973     if (PAR(cr))
 974     {
 975         bcast_info(info, cr);
 976     }
 977
 978     if (MASTER(cr))
 979     {
 980         fprintf(fp_out, "Direct space error est. : %10.3e kJ/(mol*nm)\n", info->e_dir[0]);
 981         fprintf(fp_out, "Reciprocal sp. err. est.: %10.3e kJ/(mol*nm)\n", info->e_rec[0]);
 982         fprintf(fp_out, "Self-energy error term was estimated using %d samples\n", nsamples);
 983         fflush(fp_out);
 984         fprintf(stderr, "Direct space error est. : %10.3e kJ/(mol*nm)\n", info->e_dir[0]);
 985         fprintf(stderr, "Reciprocal sp. err. est.: %10.3e kJ/(mol*nm)\n", info->e_rec[0]);
 986     }
 987
 988     i = 0;
 989
 990     if (info->bTUNE)
 991     {
 992         if (MASTER(cr))
 993         {
 994             fprintf(stderr, "Starting tuning ...\n");
 995         }
 996         edir  = info->e_dir[0];
 997         erec  = info->e_rec[0];
 998         derr0 = edir-erec;
 999         beta0 = info->ewald_beta[0];
1000         if (derr > 0.0)
1001         {
1002             info->ewald_beta[0] += 0.1;
1003         }
1004         else
1005         {
1006             info->ewald_beta[0] -= 0.1;
1007         }
1008         info->e_dir[0] = estimate_direct(info);
1009         info->e_rec[0] = estimate_reciprocal(info, x, q, ncharges, fp_out, bVerbose,
1010                                              seed, &nsamples, cr);
1011
1012         if (PAR(cr))
1013         {
1014             bcast_info(info, cr);
1015         }
1016
1017
1018         edir = info->e_dir[0];
1019         erec = info->e_rec[0];
1020         derr = edir-erec;
1021         while (fabs(derr/std::min(erec, edir)) > 1e-4)
1022         {
1023
1024             beta                = info->ewald_beta[0];
1025             beta               -= derr*(info->ewald_beta[0]-beta0)/(derr-derr0);
1026             beta0               = info->ewald_beta[0];
1027             info->ewald_beta[0] = beta;
1028             derr0               = derr;
1029
1030             info->e_dir[0] = estimate_direct(info);
1031             info->e_rec[0] = estimate_reciprocal(info, x, q, ncharges, fp_out, bVerbose,
1032                                                  seed, &nsamples, cr);
1033
1034             if (PAR(cr))
1035             {
1036                 bcast_info(info, cr);
1037             }
1038
1039             edir = info->e_dir[0];
1040             erec = info->e_rec[0];
1041             derr = edir-erec;
1042
1043             if (MASTER(cr))
1044             {
1045                 i++;
1046                 fprintf(stderr, "difference between real and rec. space error (step %d): %g\n", i, fabs(derr));
1047                 fprintf(stderr, "old beta: %f\n", beta0);
1048                 fprintf(stderr, "new beta: %f\n", beta);
1049             }
1050         }
1051
1052         info->ewald_rtol[0] = gmx_erfc(info->rcoulomb[0]*info->ewald_beta[0]);
1053
1054         if (MASTER(cr))
1055         {
1056             /* Write some info to log file */
1057             fflush(fp_out);
1058             fprintf(fp_out, "=========  After tuning ========\n");
1059             fprintf(fp_out, "Direct space error est. : %10.3e kJ/(mol*nm)\n", info->e_dir[0]);
1060             fprintf(fp_out, "Reciprocal sp. err. est.: %10.3e kJ/(mol*nm)\n", info->e_rec[0]);
1061             fprintf(stderr, "Direct space error est. : %10.3e kJ/(mol*nm)\n", info->e_dir[0]);
1062             fprintf(stderr, "Reciprocal sp. err. est.: %10.3e kJ/(mol*nm)\n", info->e_rec[0]);
1063             fprintf(fp_out, "Ewald_rtol              : %g\n", info->ewald_rtol[0]);
1064             fprintf(fp_out, "Ewald parameter beta    : %g\n", info->ewald_beta[0]);
1065             fflush(fp_out);
1066
1067         }
1068
1069     }
1070
1071 }
1072
1073
1074 int gmx_pme_error(int argc, char *argv[])
1075 {
1076     const char     *desc[] = {
1077         "[THISMODULE] estimates the error of the electrostatic forces",
1078         "if using the sPME algorithm. The flag [TT]-tune[tt] will determine",
1079         "the splitting parameter such that the error is equally",
1080         "distributed over the real and reciprocal space part.",
1081         "The part of the error that stems from self interaction of the particles "
1082         "is computationally demanding. However, a good a approximation is to",
1083         "just use a fraction of the particles for this term which can be",
1084         "indicated by the flag [TT]-self[tt].[PAR]",
1085     };
1086
1087     real            fs        = 0.0; /* 0 indicates: not set by the user */
1088     real            user_beta = -1.0;
1089     real            fracself  = 1.0;
1090     t_inputinfo     info;
1091     t_state         state;     /* The state from the tpr input file */
1092     gmx_mtop_t      mtop;      /* The topology from the tpr input file */
1093     t_inputrec     *ir = NULL; /* The inputrec from the tpr file */
1094     FILE           *fp = NULL;
1095     t_commrec      *cr;
1096     unsigned long   PCA_Flags;
1097     gmx_bool        bTUNE    = FALSE;
1098     gmx_bool        bVerbose = FALSE;
1099     int             seed     = 0;
1100
1101
1102     static t_filenm fnm[] = {
1103         { efTPX, "-s",     NULL,    ffREAD },
1104         { efOUT, "-o",    "error",  ffWRITE },
1105         { efTPX, "-so",   "tuned",  ffOPTWR }
1106     };
1107
1108     output_env_t    oenv = NULL;
1109
1110     t_pargs         pa[] = {
1111         { "-beta",     FALSE, etREAL, {&user_beta},
1112           "If positive, overwrite ewald_beta from [TT].tpr[tt] file with this value" },
1113         { "-tune",     FALSE, etBOOL, {&bTUNE},
1114           "Tune the splitting parameter such that the error is equally distributed between real and reciprocal space" },
1115         { "-self",     FALSE, etREAL, {&fracself},
1116           "If between 0.0 and 1.0, determine self interaction error from just this fraction of the charged particles" },
1117         { "-seed",     FALSE, etINT,  {&seed},
1118           "Random number seed used for Monte Carlo algorithm when [TT]-self[tt] is set to a value between 0.0 and 1.0" },
1119         { "-v",        FALSE, etBOOL, {&bVerbose},
1120           "Be loud and noisy" }
1121     };
1122
1123
1124 #define NFILE asize(fnm)
1125
1126     cr = init_commrec();
1127 #ifdef GMX_LIB_MPI
1128     MPI_Barrier(MPI_COMM_WORLD);
1129 #endif
1130
1131     PCA_Flags  = PCA_NOEXIT_ON_ARGS;
1132     PCA_Flags |= (MASTER(cr) ? 0 : PCA_QUIET);
1133
1134     if (!parse_common_args(&argc, argv, PCA_Flags,
1135                            NFILE, fnm, asize(pa), pa, asize(desc), desc,
1136                            0, NULL, &oenv))
1137     {
1138         return 0;
1139     }
1140
1141     if (!bTUNE)
1142     {
1143         bTUNE = opt2bSet("-so", NFILE, fnm);
1144     }
1145
1146     info.n_entries = 1;
1147
1148     /* Allocate memory for the inputinfo struct: */
1149     create_info(&info);
1150     info.fourier_sp[0] = fs;
1151
1152     /* Read in the tpr file and open logfile for reading */
1153     if (MASTER(cr))
1154     {
1155         snew(ir, 1);
1156         read_tpr_file(opt2fn("-s", NFILE, fnm), &info, &state, &mtop, ir, user_beta, fracself);
1157
1158         fp = fopen(opt2fn("-o", NFILE, fnm), "w");
1159     }
1160
1161     /* Check consistency if the user provided fourierspacing */
1162     if (fs > 0 && MASTER(cr))
1163     {
1164         /* Recalculate the grid dimensions using fourierspacing from user input */
1165         info.nkx[0] = 0;
1166         info.nky[0] = 0;
1167         info.nkz[0] = 0;
1168         calc_grid(stdout, state.box, info.fourier_sp[0], &(info.nkx[0]), &(info.nky[0]), &(info.nkz[0]));
1169         if ( (ir->nkx != info.nkx[0]) || (ir->nky != info.nky[0]) || (ir->nkz != info.nkz[0]) )
1170         {
1171             gmx_fatal(FARGS, "Wrong fourierspacing %f nm, input file grid = %d x %d x %d, computed grid = %d x %d x %d",
1172                       fs, ir->nkx, ir->nky, ir->nkz, info.nkx[0], info.nky[0], info.nkz[0]);
1173         }
1174     }
1175
1176     /* Estimate (S)PME force error */
1177
1178     /* Determine the volume of the simulation box */
1179     if (MASTER(cr))
1180     {
1181         info.volume = det(state.box);
1182         calc_recipbox(state.box, info.recipbox);
1183         info.natoms = mtop.natoms;
1184         info.bTUNE  = bTUNE;
1185     }
1186
1187     if (PAR(cr))
1188     {
1189         bcast_info(&info, cr);
1190     }
1191
1192     /* Get an error estimate of the input tpr file and do some tuning if requested */
1193     estimate_PME_error(&info, &state, &mtop, fp, bVerbose, seed, cr);
1194
1195     if (MASTER(cr))
1196     {
1197         /* Write out optimized tpr file if requested */
1198         if (opt2bSet("-so", NFILE, fnm) || bTUNE)
1199         {
1200             ir->ewald_rtol = info.ewald_rtol[0];
1201             write_tpx_state(opt2fn("-so", NFILE, fnm), ir, &state, &mtop);
1202         }
1203         please_cite(fp, "Wang2010");
1204         fclose(fp);
1205     }
1206
1207     return 0;
1208 }