src/gromacs/mdlib/calc_verletbuf.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012-2018, The GROMACS development team.
   5  * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
   6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   7  * and including many others, as listed in the AUTHORS file in the
   8  * top-level source directory and at http://www.gromacs.org.
   9  *
  10  * GROMACS is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation; either version 2.1
  13  * of the License, or (at your option) any later version.
  14  *
  15  * GROMACS is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with GROMACS; if not, see
  22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  24  *
  25  * If you want to redistribute modifications to GROMACS, please
  26  * consider that scientific software is very special. Version
  27  * control is crucial - bugs must be traceable. We will be happy to
  28  * consider code for inclusion in the official distribution, but
  29  * derived work must not be called official GROMACS. Details are found
  30  * in the README & COPYING files - if they are missing, get the
  31  * official version at http://www.gromacs.org.
  32  *
  33  * To help us fund GROMACS development, we humbly ask that you cite
  34  * the research papers on the package. Check out http://www.gromacs.org.
  35  */
  36 #include "gmxpre.h"
  37
  38 #include "calc_verletbuf.h"
  39
  40 #include <cmath>
  41 #include <cstdlib>
  42
  43 #include <algorithm>
  44
  45 #include "gromacs/ewald/ewald_utils.h"
  46 #include "gromacs/math/functions.h"
  47 #include "gromacs/math/units.h"
  48 #include "gromacs/math/vec.h"
  49 #include "gromacs/mdtypes/inputrec.h"
  50 #include "gromacs/mdtypes/md_enums.h"
  51 #include "gromacs/nbnxm/nbnxm.h"
  52 #include "gromacs/nbnxm/nbnxm_geometry.h"
  53 #include "gromacs/nbnxm/nbnxm_simd.h"
  54 #include "gromacs/topology/block.h"
  55 #include "gromacs/topology/ifunc.h"
  56 #include "gromacs/topology/topology.h"
  57 #include "gromacs/utility/fatalerror.h"
  58 #include "gromacs/utility/real.h"
  59 #include "gromacs/utility/strconvert.h"
  60
  61 /* The code in this file estimates a pairlist buffer length
  62  * given a target energy drift per atom per picosecond.
  63  * This is done by estimating the drift given a buffer length.
  64  * Ideally we would like to have a tight overestimate of the drift,
  65  * but that can be difficult to achieve.
  66  *
  67  * Significant approximations used:
  68  *
  69  * Uniform particle density. UNDERESTIMATES the drift by rho_global/rho_local.
  70  *
  71  * Interactions don't affect particle motion. OVERESTIMATES the drift on longer
  72  * time scales. This approximation probably introduces the largest errors.
  73  *
  74  * Only take one constraint per particle into account: OVERESTIMATES the drift.
  75  *
  76  * For rotating constraints assume the same functional shape for time scales
  77  * where the constraints rotate significantly as the exact expression for
  78  * short time scales. OVERESTIMATES the drift on long time scales.
  79  *
  80  * For non-linear virtual sites use the mass of the lightest constructing atom
  81  * to determine the displacement. OVER/UNDERESTIMATES the drift, depending on
  82  * the geometry and masses of constructing atoms.
  83  *
  84  * Note that the formulas for normal atoms and linear virtual sites are exact,
  85  * apart from the first two approximations.
  86  *
  87  * Note that apart from the effect of the above approximations, the actual
  88  * drift of the total energy of a system can be orders of magnitude smaller
  89  * due to cancellation of positive and negative drift for different pairs.
  90  */
  91
  92
  93 /* Struct for unique atom type for calculating the energy drift.
  94  * The atom displacement depends on mass and constraints.
  95  * The energy jump for given distance depend on LJ type and q.
  96  */
  97 struct VerletbufAtomtype
  98 {
  99     atom_nonbonded_kinetic_prop_t prop; /* non-bonded and kinetic atom prop. */
 100     int                           n;    /* #atoms of this type in the system */
 101 };
 102
 103 // Struct for derivatives of a non-bonded interaction potential
 104 struct pot_derivatives_t
 105 {
 106     real md1; // -V' at the cutoff
 107     real d2;  //  V'' at the cutoff
 108     real md3; // -V''' at the cutoff
 109 };
 110
 111 VerletbufListSetup verletbufGetListSetup(Nbnxm::KernelType nbnxnKernelType)
 112 {
 113     /* Note that the current buffer estimation code only handles clusters
 114      * of size 1, 2 or 4, so for 4x8 or 8x8 we use the estimate for 4x4.
 115      */
 116     VerletbufListSetup listSetup;
 117
 118     listSetup.cluster_size_i = Nbnxm::IClusterSizePerKernelType[nbnxnKernelType];
 119     listSetup.cluster_size_j = Nbnxm::JClusterSizePerKernelType[nbnxnKernelType];
 120
 121     if (!Nbnxm::kernelTypeUsesSimplePairlist(nbnxnKernelType))
 122     {
 123         /* The GPU kernels (except for OpenCL) split the j-clusters in two halves */
 124         listSetup.cluster_size_j /= 2;
 125     }
 126
 127     return listSetup;
 128 }
 129
 130 VerletbufListSetup verletbufGetSafeListSetup(ListSetupType listType)
 131 {
 132     /* When calling this function we often don't know which kernel type we
 133      * are going to use. We choose the kernel type with the smallest possible
 134      * i- and j-cluster sizes, so we potentially overestimate, but never
 135      * underestimate, the buffer drift.
 136      */
 137     Nbnxm::KernelType nbnxnKernelType;
 138
 139     if (listType == ListSetupType::Gpu)
 140     {
 141         nbnxnKernelType = Nbnxm::KernelType::Gpu8x8x8;
 142     }
 143     else if (GMX_SIMD && listType == ListSetupType::CpuSimdWhenSupported)
 144     {
 145 #ifdef GMX_NBNXN_SIMD_2XNN
 146         /* We use the smallest cluster size to be on the safe side */
 147         nbnxnKernelType = Nbnxm::KernelType::Cpu4xN_Simd_2xNN;
 148 #else
 149         nbnxnKernelType = Nbnxm::KernelType::Cpu4xN_Simd_4xN;
 150 #endif
 151     }
 152     else
 153     {
 154         nbnxnKernelType = Nbnxm::KernelType::Cpu4x4_PlainC;
 155     }
 156
 157     return verletbufGetListSetup(nbnxnKernelType);
 158 }
 159
 160 // Returns whether prop1 and prop2 are identical
 161 static bool atom_nonbonded_kinetic_prop_equal(const atom_nonbonded_kinetic_prop_t& prop1,
 162                                               const atom_nonbonded_kinetic_prop_t& prop2)
 163 {
 164     return (prop1.mass == prop2.mass && prop1.type == prop2.type && prop1.q == prop2.q
 165             && prop1.bConstr == prop2.bConstr && prop1.con_mass == prop2.con_mass
 166             && prop1.con_len == prop2.con_len);
 167 }
 168
 169 static void addAtomtype(std::vector<VerletbufAtomtype>* att, const atom_nonbonded_kinetic_prop_t& prop, int nmol)
 170 {
 171     if (prop.mass == 0)
 172     {
 173         /* Ignore massless particles */
 174         return;
 175     }
 176
 177     size_t i = 0;
 178     while (i < att->size() && !atom_nonbonded_kinetic_prop_equal(prop, (*att)[i].prop))
 179     {
 180         i++;
 181     }
 182
 183     if (i < att->size())
 184     {
 185         (*att)[i].n += nmol;
 186     }
 187     else
 188     {
 189         att->push_back({ prop, nmol });
 190     }
 191 }
 192
 193 /* Returns the mass of atom atomIndex or 1 when setMassesToOne=true */
 194 static real getMass(const t_atoms& atoms, int atomIndex, bool setMassesToOne)
 195 {
 196     if (!setMassesToOne)
 197     {
 198         return atoms.atom[atomIndex].m;
 199     }
 200     else
 201     {
 202         return 1;
 203     }
 204 }
 205
 206 // Set the masses of a vsites in vsite_m and the non-linear vsite count in n_nonlin_vsite
 207 static void get_vsite_masses(const gmx_moltype_t&  moltype,
 208                              const gmx_ffparams_t& ffparams,
 209                              bool                  setMassesToOne,
 210                              gmx::ArrayRef<real>   vsite_m)
 211 {
 212     int numNonlinearVsites = 0;
 213
 214     /* Check for virtual sites, determine mass from constructing atoms */
 215     for (const auto& ilist : extractILists(moltype.ilist, IF_VSITE))
 216     {
 217         for (size_t i = 0; i < ilist.iatoms.size(); i += ilistStride(ilist))
 218         {
 219             const t_iparams& ip = ffparams.iparams[ilist.iatoms[i]];
 220             const int        a1 = ilist.iatoms[i + 1];
 221
 222             if (ilist.functionType != F_VSITEN)
 223             {
 224                 /* Only vsiten can have more than four
 225                    constructing atoms, so NRAL(ft) <= 5 */
 226                 const int         maxj = NRAL(ilist.functionType);
 227                 std::vector<real> cam(maxj, 0);
 228                 GMX_ASSERT(maxj <= 5, "This code expect at most 5 atoms in a vsite");
 229                 for (int j = 1; j < maxj; j++)
 230                 {
 231                     const int aj = ilist.iatoms[i + 1 + j];
 232                     cam[j]       = getMass(moltype.atoms, aj, setMassesToOne);
 233                     if (cam[j] == 0)
 234                     {
 235                         cam[j] = vsite_m[aj];
 236                     }
 237                     /* A vsite should be constructed from normal atoms or
 238                      * vsites of lower complexity, which we have processed
 239                      * in a previous iteration.
 240                      */
 241                     GMX_ASSERT(cam[j] != 0, "We should have a non-zero mass");
 242                 }
 243
 244                 switch (ilist.functionType)
 245                 {
 246                     case F_VSITE2:
 247                         /* Exact */
 248                         vsite_m[a1] = (cam[1] * cam[2])
 249                                       / (cam[2] * gmx::square(1 - ip.vsite.a)
 250                                          + cam[1] * gmx::square(ip.vsite.a));
 251                         break;
 252                     case F_VSITE3:
 253                         /* Exact */
 254                         vsite_m[a1] = (cam[1] * cam[2] * cam[3])
 255                                       / (cam[2] * cam[3] * gmx::square(1 - ip.vsite.a - ip.vsite.b)
 256                                          + cam[1] * cam[3] * gmx::square(ip.vsite.a)
 257                                          + cam[1] * cam[2] * gmx::square(ip.vsite.b));
 258                         break;
 259                     case F_VSITEN:
 260                         GMX_RELEASE_ASSERT(false, "VsiteN should not end up in this code path");
 261                         break;
 262                     default:
 263                         /* Use the mass of the lightest constructing atom.
 264                          * This is an approximation.
 265                          * If the distance of the virtual site to the
 266                          * constructing atom is less than all distances
 267                          * between constructing atoms, this is a safe
 268                          * over-estimate of the displacement of the vsite.
 269                          * This condition holds for all H mass replacement
 270                          * vsite constructions, except for SP2/3 groups.
 271                          * In SP3 groups one H will have a F_VSITE3
 272                          * construction, so even there the total drift
 273                          * estimate shouldn't be far off.
 274                          */
 275                         vsite_m[a1] = cam[1];
 276                         for (int j = 2; j < maxj; j++)
 277                         {
 278                             vsite_m[a1] = std::min(vsite_m[a1], cam[j]);
 279                         }
 280                         numNonlinearVsites++;
 281                         break;
 282                 }
 283             }
 284             else
 285             {
 286                 /* Exact */
 287                 real inv_mass             = 0;
 288                 int  numConstructingAtoms = ffparams.iparams[ilist.iatoms[i]].vsiten.n;
 289                 for (int j = 0; j < 3 * numConstructingAtoms; j += 3)
 290                 {
 291                     int  aj    = ilist.iatoms[i + j + 2];
 292                     real coeff = ffparams.iparams[ilist.iatoms[i + j]].vsiten.a;
 293                     real m_aj;
 294                     if (moltype.atoms.atom[aj].ptype == ParticleType::VSite)
 295                     {
 296                         m_aj = vsite_m[aj];
 297                     }
 298                     else
 299                     {
 300                         m_aj = moltype.atoms.atom[aj].m;
 301                     }
 302                     if (m_aj <= 0)
 303                     {
 304                         gmx_incons("The mass of a vsiten constructing atom is <= 0");
 305                     }
 306                     inv_mass += coeff * coeff / m_aj;
 307                 }
 308                 vsite_m[a1] = 1 / inv_mass;
 309                 /* Correct the loop increment of i for processes more than 1 entry */
 310                 i += (numConstructingAtoms - 1) * ilistStride(ilist);
 311             }
 312             if (gmx_debug_at)
 313             {
 314                 fprintf(debug,
 315                         "atom %4d %-20s mass %6.3f\n",
 316                         a1,
 317                         interaction_function[ilist.functionType].longname,
 318                         vsite_m[a1]);
 319             }
 320         }
 321     }
 322
 323     if (debug && numNonlinearVsites > 0)
 324     {
 325         fprintf(debug, "The molecule type has %d non-linear virtual constructions\n", numNonlinearVsites);
 326     }
 327 }
 328
 329 static std::vector<VerletbufAtomtype> getVerletBufferAtomtypes(const gmx_mtop_t& mtop, const bool setMassesToOne)
 330 {
 331     std::vector<VerletbufAtomtype> att;
 332     int                            ft, i, a1, a2, a3, a;
 333     const t_iparams*               ip;
 334
 335     for (const gmx_molblock_t& molblock : mtop.molblock)
 336     {
 337         int                  nmol    = molblock.nmol;
 338         const gmx_moltype_t& moltype = mtop.moltype[molblock.type];
 339         const t_atoms*       atoms   = &moltype.atoms;
 340
 341         /* Check for constraints, as they affect the kinetic energy.
 342          * For virtual sites we need the masses and geometry of
 343          * the constructing atoms to determine their velocity distribution.
 344          * Thus we need a list of properties for all atoms which
 345          * we partially fill when looping over constraints.
 346          */
 347         std::vector<atom_nonbonded_kinetic_prop_t> prop(atoms->nr);
 348
 349         for (ft = F_CONSTR; ft <= F_CONSTRNC; ft++)
 350         {
 351             const InteractionList& il = moltype.ilist[ft];
 352
 353             for (i = 0; i < il.size(); i += 1 + NRAL(ft))
 354             {
 355                 ip         = &mtop.ffparams.iparams[il.iatoms[i]];
 356                 a1         = il.iatoms[i + 1];
 357                 a2         = il.iatoms[i + 2];
 358                 real mass1 = getMass(*atoms, a1, setMassesToOne);
 359                 real mass2 = getMass(*atoms, a2, setMassesToOne);
 360                 if (mass2 > prop[a1].con_mass)
 361                 {
 362                     prop[a1].con_mass = mass2;
 363                     prop[a1].con_len  = ip->constr.dA;
 364                 }
 365                 if (mass1 > prop[a2].con_mass)
 366                 {
 367                     prop[a2].con_mass = mass1;
 368                     prop[a2].con_len  = ip->constr.dA;
 369                 }
 370             }
 371         }
 372
 373         const InteractionList& il = moltype.ilist[F_SETTLE];
 374
 375         for (i = 0; i < il.size(); i += 1 + NRAL(F_SETTLE))
 376         {
 377             ip = &mtop.ffparams.iparams[il.iatoms[i]];
 378             a1 = il.iatoms[i + 1];
 379             a2 = il.iatoms[i + 2];
 380             a3 = il.iatoms[i + 3];
 381             /* Usually the mass of a1 (usually oxygen) is larger than a2/a3.
 382              * If this is not the case, we overestimate the displacement,
 383              * which leads to a larger buffer (ok since this is an exotic case).
 384              */
 385             prop[a1].con_mass = getMass(*atoms, a2, setMassesToOne);
 386             prop[a1].con_len  = ip->settle.doh;
 387
 388             prop[a2].con_mass = getMass(*atoms, a1, setMassesToOne);
 389             prop[a2].con_len  = ip->settle.doh;
 390
 391             prop[a3].con_mass = getMass(*atoms, a1, setMassesToOne);
 392             prop[a3].con_len  = ip->settle.doh;
 393         }
 394
 395         std::vector<real> vsite_m(atoms->nr);
 396         get_vsite_masses(moltype, mtop.ffparams, setMassesToOne, vsite_m);
 397
 398         for (a = 0; a < atoms->nr; a++)
 399         {
 400             if (atoms->atom[a].ptype == ParticleType::VSite)
 401             {
 402                 prop[a].mass = vsite_m[a];
 403             }
 404             else
 405             {
 406                 prop[a].mass = getMass(*atoms, a, setMassesToOne);
 407             }
 408             prop[a].type = atoms->atom[a].type;
 409             prop[a].q    = atoms->atom[a].q;
 410             /* We consider an atom constrained, #DOF=2, when it is
 411              * connected with constraints to (at least one) atom with
 412              * a mass of more than 0.4x its own mass. This is not a critical
 413              * parameter, since with roughly equal masses the unconstrained
 414              * and constrained displacement will not differ much (and both
 415              * overestimate the displacement).
 416              */
 417             prop[a].bConstr = (prop[a].con_mass > 0.4 * prop[a].mass);
 418
 419             addAtomtype(&att, prop[a], nmol);
 420         }
 421     }
 422
 423     if (gmx_debug_at)
 424     {
 425         for (size_t a = 0; a < att.size(); a++)
 426         {
 427             fprintf(debug,
 428                     "type %zu: m %5.2f t %d q %6.3f con %s con_m %5.3f con_l %5.3f n %d\n",
 429                     a,
 430                     att[a].prop.mass,
 431                     att[a].prop.type,
 432                     att[a].prop.q,
 433                     gmx::boolToString(att[a].prop.bConstr),
 434                     att[a].prop.con_mass,
 435                     att[a].prop.con_len,
 436                     att[a].n);
 437         }
 438     }
 439
 440     return att;
 441 }
 442
 443 /* This function computes two components of the estimate of the variance
 444  * in the displacement of one atom in a system of two constrained atoms.
 445  * Returns in sigma2_2d the variance due to rotation of the constrained
 446  * atom around the atom to which it constrained.
 447  * Returns in sigma2_3d the variance due to displacement of the COM
 448  * of the whole system of the two constrained atoms.
 449  *
 450  * Note that we only take a single constraint (the one to the heaviest atom)
 451  * into account. If an atom has multiple constraints, this will result in
 452  * an overestimate of the displacement, which gives a larger drift and buffer.
 453  */
 454 void constrained_atom_sigma2(real kT_fac, const atom_nonbonded_kinetic_prop_t* prop, real* sigma2_2d, real* sigma2_3d)
 455 {
 456     /* Here we decompose the motion of a constrained atom into two
 457      * components: rotation around the COM and translation of the COM.
 458      */
 459
 460     /* Determine the variance of the arc length for the two rotational DOFs */
 461     real massFraction = prop->con_mass / (prop->mass + prop->con_mass);
 462     real sigma2_rot   = kT_fac * massFraction / prop->mass;
 463
 464     /* The distance from the atom to the COM, i.e. the rotational arm */
 465     real comDistance = prop->con_len * massFraction;
 466
 467     /* The variance relative to the arm */
 468     real sigma2_rel = sigma2_rot / gmx::square(comDistance);
 469
 470     /* For sigma2_rel << 1 we don't notice the rotational effect and
 471      * we have a normal, Gaussian displacement distribution.
 472      * For larger sigma2_rel the displacement is much less, in fact it can
 473      * not exceed 2*comDistance. We can calculate MSD/arm^2 as:
 474      *   integral_x=0-inf distance2(x) x/sigma2_rel exp(-x^2/(2 sigma2_rel)) dx
 475      * where x is angular displacement and distance2(x) is the distance^2
 476      * between points at angle 0 and x:
 477      *   distance2(x) = (sin(x) - sin(0))^2 + (cos(x) - cos(0))^2
 478      * The limiting value of this MSD is 2, which is also the value for
 479      * a uniform rotation distribution that would be reached at long time.
 480      * The maximum is 2.5695 at sigma2_rel = 4.5119.
 481      * We approximate this integral with a rational polynomial with
 482      * coefficients from a Taylor expansion. This approximation is an
 483      * overestimate for all values of sigma2_rel. Its maximum value
 484      * of 2.6491 is reached at sigma2_rel = sqrt(45/2) = 4.7434.
 485      * We keep the approximation constant after that.
 486      * We use this approximate MSD as the variance for a Gaussian distribution.
 487      *
 488      * NOTE: For any sensible buffer tolerance this will result in a (large)
 489      * overestimate of the buffer size, since the Gaussian has a long tail,
 490      * whereas the actual distribution can not reach values larger than 2.
 491      */
 492     /* Coeffients obtained from a Taylor expansion */
 493     const real a = 1.0 / 3.0;
 494     const real b = 2.0 / 45.0;
 495
 496     /* Our approximation is constant after sigma2_rel = 1/sqrt(b) */
 497     sigma2_rel = std::min(sigma2_rel, 1 / std::sqrt(b));
 498
 499     /* Compute the approximate sigma^2 for 2D motion due to the rotation */
 500     *sigma2_2d =
 501             gmx::square(comDistance) * sigma2_rel / (1 + a * sigma2_rel + b * gmx::square(sigma2_rel));
 502
 503     /* The constrained atom also moves (in 3D) with the COM of both atoms */
 504     *sigma2_3d = kT_fac / (prop->mass + prop->con_mass);
 505 }
 506
 507 static void get_atom_sigma2(real kT_fac, const atom_nonbonded_kinetic_prop_t* prop, real* sigma2_2d, real* sigma2_3d)
 508 {
 509     if (prop->bConstr)
 510     {
 511         /* Complicated constraint calculation in a separate function */
 512         constrained_atom_sigma2(kT_fac, prop, sigma2_2d, sigma2_3d);
 513     }
 514     else
 515     {
 516         /* Unconstrained atom: trivial */
 517         *sigma2_2d = 0;
 518         *sigma2_3d = kT_fac / prop->mass;
 519     }
 520 }
 521
 522 static void approx_2dof(real s2, real x, real* shift, real* scale)
 523 {
 524     /* A particle with 1 DOF constrained has 2 DOFs instead of 3.
 525      * This code is also used for particles with multiple constraints,
 526      * in which case we overestimate the displacement.
 527      * The 2DOF distribution is sqrt(pi/2)*erfc(r/(sqrt(2)*s))/(2*s).
 528      * We approximate this with scale*Gaussian(s,r+shift),
 529      * by matching the distribution value and derivative at x.
 530      * This is a tight overestimate for all r>=0 at any s and x.
 531      */
 532     real ex, er;
 533
 534     ex = std::exp(-x * x / (2 * s2));
 535     er = std::erfc(x / std::sqrt(2 * s2));
 536
 537     *shift = -x + std::sqrt(2 * s2 / M_PI) * ex / er;
 538     *scale = 0.5 * M_PI * std::exp(ex * ex / (M_PI * er * er)) * er;
 539 }
 540
 541 // Returns an (over)estimate of the energy drift for a single atom pair,
 542 // given the kinetic properties, displacement variances and list buffer.
 543 static real energyDriftAtomPair(bool                     isConstrained_i,
 544                                 bool                     isConstrained_j,
 545                                 real                     s2,
 546                                 real                     s2i_2d,
 547                                 real                     s2j_2d,
 548                                 real                     r_buffer,
 549                                 const pot_derivatives_t* der)
 550 {
 551     // For relatively small arguments erfc() is so small that if will be 0.0
 552     // when stored in a float. We set an argument limit of 8 (Erfc(8)=1e-29),
 553     // such that we can divide by erfc and have some space left for arithmetic.
 554     const real erfc_arg_max = 8.0;
 555
 556     real rsh    = r_buffer;
 557     real sc_fac = 1.0;
 558
 559     real c_exp, c_erfc;
 560
 561     if (rsh * rsh > 2 * s2 * erfc_arg_max * erfc_arg_max)
 562     {
 563         // Below we calculate c_erfc = 0.5*erfc(rsh/sqrt(2*s2))
 564         // When rsh/sqrt(2*s2) increases, this erfc will be the first
 565         // result that underflows and becomes 0.0. To avoid this,
 566         // we set c_exp=0 and c_erfc=0 for large arguments.
 567         // This also avoids NaN in approx_2dof().
 568         // In any relevant case this has no effect on the results,
 569         // since c_exp < 6e-29, so the displacement is completely
 570         // negligible for such atom pairs (and an overestimate).
 571         // In nearly all use cases, there will be other atom pairs
 572         // that contribute much more to the total, so zeroing
 573         // this particular contribution has no effect at all.
 574         c_exp  = 0;
 575         c_erfc = 0;
 576     }
 577     else
 578     {
 579         /* For constraints: adapt r and scaling for the Gaussian */
 580         if (isConstrained_i)
 581         {
 582             real sh, sc;
 583
 584             approx_2dof(s2i_2d, r_buffer * s2i_2d / s2, &sh, &sc);
 585             rsh += sh;
 586             sc_fac *= sc;
 587         }
 588         if (isConstrained_j)
 589         {
 590             real sh, sc;
 591
 592             approx_2dof(s2j_2d, r_buffer * s2j_2d / s2, &sh, &sc);
 593             rsh += sh;
 594             sc_fac *= sc;
 595         }
 596
 597         /* Exact contribution of an atom pair with Gaussian displacement
 598          * with sigma s to the energy drift for a potential with
 599          * derivative -md and second derivative dd at the cut-off.
 600          * The only catch is that for potentials that change sign
 601          * near the cut-off there could be an unlucky compensation
 602          * of positive and negative energy drift.
 603          * Such potentials are extremely rare though.
 604          *
 605          * Note that pot has unit energy*length, as the linear
 606          * atom density still needs to be put in.
 607          */
 608         c_exp  = std::exp(-rsh * rsh / (2 * s2)) / std::sqrt(2 * M_PI);
 609         c_erfc = 0.5 * std::erfc(rsh / (std::sqrt(2 * s2)));
 610     }
 611     real s    = std::sqrt(s2);
 612     real rsh2 = rsh * rsh;
 613
 614     real pot1 = sc_fac * der->md1 / 2 * ((rsh2 + s2) * c_erfc - rsh * s * c_exp);
 615     real pot2 = sc_fac * der->d2 / 6 * (s * (rsh2 + 2 * s2) * c_exp - rsh * (rsh2 + 3 * s2) * c_erfc);
 616     real pot3 = sc_fac * der->md3 / 24
 617                 * ((rsh2 * rsh2 + 6 * rsh2 * s2 + 3 * s2 * s2) * c_erfc - rsh * s * (rsh2 + 5 * s2) * c_exp);
 618
 619     return pot1 + pot2 + pot3;
 620 }
 621
 622 // Computes and returns an estimate of the energy drift for the whole system
 623 static real energyDrift(gmx::ArrayRef<const VerletbufAtomtype> att,
 624                         const gmx_ffparams_t*                  ffp,
 625                         real                                   kT_fac,
 626                         const pot_derivatives_t*               ljDisp,
 627                         const pot_derivatives_t*               ljRep,
 628                         const pot_derivatives_t*               elec,
 629                         real                                   rlj,
 630                         real                                   rcoulomb,
 631                         real                                   rlist,
 632                         real                                   boxvol)
 633 {
 634     double drift_tot = 0;
 635
 636     if (kT_fac == 0)
 637     {
 638         /* No atom displacements: no drift, avoid division by 0 */
 639         return drift_tot;
 640     }
 641
 642     // Here add up the contribution of all atom pairs in the system to
 643     // (estimated) energy drift by looping over all atom type pairs.
 644     for (gmx::index i = 0; i < att.ssize(); i++)
 645     {
 646         // Get the thermal displacement variance for the i-atom type
 647         const atom_nonbonded_kinetic_prop_t* prop_i = &att[i].prop;
 648         real                                 s2i_2d, s2i_3d;
 649         get_atom_sigma2(kT_fac, prop_i, &s2i_2d, &s2i_3d);
 650
 651         for (gmx::index j = i; j < att.ssize(); j++)
 652         {
 653             // Get the thermal displacement variance for the j-atom type
 654             const atom_nonbonded_kinetic_prop_t* prop_j = &att[j].prop;
 655             real                                 s2j_2d, s2j_3d;
 656             get_atom_sigma2(kT_fac, prop_j, &s2j_2d, &s2j_3d);
 657
 658             /* Add up the up to four independent variances */
 659             real s2 = s2i_2d + s2i_3d + s2j_2d + s2j_3d;
 660
 661             // Set -V', V'' and -V''' at the cut-off for LJ */
 662             real              c6  = ffp->iparams[prop_i->type * ffp->atnr + prop_j->type].lj.c6;
 663             real              c12 = ffp->iparams[prop_i->type * ffp->atnr + prop_j->type].lj.c12;
 664             pot_derivatives_t lj;
 665             lj.md1 = c6 * ljDisp->md1 + c12 * ljRep->md1;
 666             lj.d2  = c6 * ljDisp->d2 + c12 * ljRep->d2;
 667             lj.md3 = c6 * ljDisp->md3 + c12 * ljRep->md3;
 668
 669             real pot_lj = energyDriftAtomPair(
 670                     prop_i->bConstr, prop_j->bConstr, s2, s2i_2d, s2j_2d, rlist - rlj, &lj);
 671
 672             // Set -V' and V'' at the cut-off for Coulomb
 673             pot_derivatives_t elec_qq;
 674             elec_qq.md1 = elec->md1 * prop_i->q * prop_j->q;
 675             elec_qq.d2  = elec->d2 * prop_i->q * prop_j->q;
 676             elec_qq.md3 = 0;
 677
 678             real pot_q = energyDriftAtomPair(
 679                     prop_i->bConstr, prop_j->bConstr, s2, s2i_2d, s2j_2d, rlist - rcoulomb, &elec_qq);
 680
 681             // Note that attractive and repulsive potentials for individual
 682             // pairs can partially cancel.
 683             real pot = pot_lj + pot_q;
 684
 685             /* Multiply by the number of atom pairs */
 686             if (j == i)
 687             {
 688                 pot *= static_cast<double>(att[i].n) * (att[i].n - 1) / 2;
 689             }
 690             else
 691             {
 692                 pot *= static_cast<double>(att[i].n) * att[j].n;
 693             }
 694             /* We need the line density to get the energy drift of the system.
 695              * The effective average r^2 is close to (rlist+sigma)^2.
 696              */
 697             pot *= 4 * M_PI * gmx::square(rlist + std::sqrt(s2)) / boxvol;
 698
 699             /* Add the unsigned drift to avoid cancellation of errors */
 700             drift_tot += std::abs(pot);
 701         }
 702     }
 703
 704     return drift_tot;
 705 }
 706
 707 // Returns the chance that a particle in a cluster is at distance rlist
 708 // when the cluster is at distance rlist
 709 static real surface_frac(int cluster_size, real particle_distance, real rlist)
 710 {
 711     real d, area_rel;
 712
 713     if (rlist < 0.5 * particle_distance)
 714     {
 715         /* We have non overlapping spheres */
 716         return 1.0;
 717     }
 718
 719     /* Half the inter-particle distance relative to rlist */
 720     d = 0.5 * particle_distance / rlist;
 721
 722     /* Determine the area of the surface at distance rlist to the closest
 723      * particle, relative to surface of a sphere of radius rlist.
 724      * The formulas below assume close to cubic cells for the pair search grid,
 725      * which the pair search code tries to achieve.
 726      * Note that in practice particle distances will not be delta distributed,
 727      * but have some spread, often involving shorter distances,
 728      * as e.g. O-H bonds in a water molecule. Thus the estimates below will
 729      * usually be slightly too high and thus conservative.
 730      */
 731     switch (cluster_size)
 732     {
 733         case 1:
 734             /* One particle: trivial */
 735             area_rel = 1.0;
 736             break;
 737         case 2:
 738             /* Two particles: two spheres at fractional distance 2*a */
 739             area_rel = 1.0 + d;
 740             break;
 741         case 4:
 742             /* We assume a perfect, symmetric tetrahedron geometry.
 743              * The surface around a tetrahedron is too complex for a full
 744              * analytical solution, so we use a Taylor expansion.
 745              */
 746             area_rel = (1.0
 747                         + 1 / M_PI
 748                                   * (6 * std::acos(1 / std::sqrt(3)) * d
 749                                      + std::sqrt(3) * d * d
 750                                                * (1.0 + 5.0 / 18.0 * d * d + 7.0 / 45.0 * d * d * d * d
 751                                                   + 83.0 / 756.0 * d * d * d * d * d * d)));
 752             break;
 753         default: gmx_incons("surface_frac called with unsupported cluster_size");
 754     }
 755
 756     return area_rel / cluster_size;
 757 }
 758
 759 /* Returns the negative of the third derivative of a potential r^-p
 760  * with a force-switch function, evaluated at the cut-off rc.
 761  */
 762 static real md3_force_switch(real p, real rswitch, real rc)
 763 {
 764     /* The switched force function is:
 765      * p*r^-(p+1) + a*(r - rswitch)^2 + b*(r - rswitch)^3
 766      */
 767     real a, b;
 768     real md3_pot, md3_sw;
 769
 770     a = -((p + 4) * rc - (p + 1) * rswitch) / (pow(rc, p + 2) * gmx::square(rc - rswitch));
 771     b = ((p + 3) * rc - (p + 1) * rswitch) / (pow(rc, p + 2) * gmx::power3(rc - rswitch));
 772
 773     md3_pot = (p + 2) * (p + 1) * p * pow(rc, p + 3);
 774     md3_sw  = 2 * a + 6 * b * (rc - rswitch);
 775
 776     return md3_pot + md3_sw;
 777 }
 778
 779 /* Returns the variance of the atomic displacement over timePeriod.
 780  *
 781  * Note: When not using BD with a non-mass dependendent friction coefficient,
 782  *       the return value still needs to be divided by the particle mass.
 783  */
 784 static real displacementVariance(const t_inputrec& ir, real temperature, real timePeriod)
 785 {
 786     real kT_fac;
 787
 788     if (ir.eI == IntegrationAlgorithm::BD)
 789     {
 790         /* Get the displacement distribution from the random component only.
 791          * With accurate integration the systematic (force) displacement
 792          * should be negligible (unless nstlist is extremely large, which
 793          * you wouldn't do anyhow).
 794          */
 795         kT_fac = 2 * gmx::c_boltz * temperature * timePeriod;
 796         if (ir.bd_fric > 0)
 797         {
 798             /* This is directly sigma^2 of the displacement */
 799             kT_fac /= ir.bd_fric;
 800         }
 801         else
 802         {
 803             /* Per group tau_t is not implemented yet, use the maximum */
 804             real tau_t = ir.opts.tau_t[0];
 805             for (int i = 1; i < ir.opts.ngtc; i++)
 806             {
 807                 tau_t = std::max(tau_t, ir.opts.tau_t[i]);
 808             }
 809
 810             kT_fac *= tau_t;
 811             /* This kT_fac needs to be divided by the mass to get sigma^2 */
 812         }
 813     }
 814     else
 815     {
 816         kT_fac = gmx::c_boltz * temperature * gmx::square(timePeriod);
 817     }
 818
 819     return kT_fac;
 820 }
 821
 822 /* Returns the largest sigma of the Gaussian displacement over all particle
 823  * types. This ignores constraints, so is an overestimate.
 824  */
 825 static real maxSigma(real kT_fac, gmx::ArrayRef<const VerletbufAtomtype> att)
 826 {
 827     GMX_ASSERT(!att.empty(), "We should have at least one type");
 828     real smallestMass = att[0].prop.mass;
 829     for (const auto& atomType : att)
 830     {
 831         smallestMass = std::min(smallestMass, atomType.prop.mass);
 832     }
 833
 834     return 2 * std::sqrt(kT_fac / smallestMass);
 835 }
 836
 837 real calcVerletBufferSize(const gmx_mtop_t&         mtop,
 838                           const real                boxVolume,
 839                           const t_inputrec&         ir,
 840                           const int                 nstlist,
 841                           const int                 listLifetime,
 842                           real                      referenceTemperature,
 843                           const VerletbufListSetup& listSetup)
 844 {
 845     double resolution;
 846     char*  env;
 847
 848     real particle_distance;
 849     real nb_clust_frac_pairs_not_in_list_at_cutoff;
 850
 851     real elfac;
 852     int  ib0, ib1, ib;
 853     real rb, rl;
 854     real drift;
 855
 856     if (!EI_DYNAMICS(ir.eI))
 857     {
 858         gmx_incons(
 859                 "Can only determine the Verlet buffer size for integrators that perform dynamics");
 860     }
 861     if (ir.verletbuf_tol <= 0)
 862     {
 863         gmx_incons("The Verlet buffer tolerance needs to be larger than zero");
 864     }
 865
 866     if (referenceTemperature < 0)
 867     {
 868         /* We use the maximum temperature with multiple T-coupl groups.
 869          * We could use a per particle temperature, but since particles
 870          * interact, this might underestimate the buffer size.
 871          */
 872         referenceTemperature = maxReferenceTemperature(ir);
 873
 874         GMX_RELEASE_ASSERT(referenceTemperature >= 0,
 875                            "Without T-coupling we should not end up here");
 876     }
 877
 878     /* Resolution of the buffer size */
 879     resolution = 0.001;
 880
 881     env = getenv("GMX_VERLET_BUFFER_RES");
 882     if (env != nullptr)
 883     {
 884         sscanf(env, "%lf", &resolution);
 885     }
 886
 887     /* In an atom wise pair-list there would be no pairs in the list
 888      * beyond the pair-list cut-off.
 889      * However, we use a pair-list of groups vs groups of atoms.
 890      * For groups of 4 atoms, the parallelism of SSE instructions, only
 891      * 10% of the atoms pairs are not in the list just beyond the cut-off.
 892      * As this percentage increases slowly compared to the decrease of the
 893      * Gaussian displacement distribution over this range, we can simply
 894      * reduce the drift by this fraction.
 895      * For larger groups, e.g. of 8 atoms, this fraction will be lower,
 896      * so then buffer size will be on the conservative (large) side.
 897      *
 898      * Note that the formulas used here do not take into account
 899      * cancellation of errors which could occur by missing both
 900      * attractive and repulsive interactions.
 901      *
 902      * The only major assumption is homogeneous particle distribution.
 903      * For an inhomogeneous system, such as a liquid-vapor system,
 904      * the buffer will be underestimated. The actual energy drift
 905      * will be higher by the factor: local/homogeneous particle density.
 906      *
 907      * The results of this estimate have been checked againt simulations.
 908      * In most cases the real drift differs by less than a factor 2.
 909      */
 910
 911     /* Worst case assumption: HCP packing of particles gives largest distance */
 912     particle_distance = std::cbrt(boxVolume * std::sqrt(2) / mtop.natoms);
 913
 914     /* TODO: Obtain masses through (future) integrator functionality
 915      *       to avoid scattering the code with (or forgetting) checks.
 916      */
 917     const bool setMassesToOne = (ir.eI == IntegrationAlgorithm::BD && ir.bd_fric > 0);
 918     const auto att            = getVerletBufferAtomtypes(mtop, setMassesToOne);
 919     GMX_ASSERT(!att.empty(), "We expect at least one type");
 920
 921     if (debug)
 922     {
 923         fprintf(debug, "particle distance assuming HCP packing: %f nm\n", particle_distance);
 924         fprintf(debug, "energy drift atom types: %zu\n", att.size());
 925     }
 926
 927     pot_derivatives_t ljDisp = { 0, 0, 0 };
 928     pot_derivatives_t ljRep  = { 0, 0, 0 };
 929     real              repPow = mtop.ffparams.reppow;
 930
 931     if (ir.vdwtype == VanDerWaalsType::Cut)
 932     {
 933         real sw_range, md3_pswf;
 934
 935         switch (ir.vdw_modifier)
 936         {
 937             case InteractionModifiers::None:
 938             case InteractionModifiers::PotShift:
 939                 /* -dV/dr of -r^-6 and r^-reppow */
 940                 ljDisp.md1 = -6 * std::pow(ir.rvdw, -7.0);
 941                 ljRep.md1  = repPow * std::pow(ir.rvdw, -(repPow + 1));
 942                 /* The contribution of the higher derivatives is negligible */
 943                 break;
 944             case InteractionModifiers::ForceSwitch:
 945                 /* At the cut-off: V=V'=V''=0, so we use only V''' */
 946                 ljDisp.md3 = -md3_force_switch(6.0, ir.rvdw_switch, ir.rvdw);
 947                 ljRep.md3  = md3_force_switch(repPow, ir.rvdw_switch, ir.rvdw);
 948                 break;
 949             case InteractionModifiers::PotSwitch:
 950                 /* At the cut-off: V=V'=V''=0.
 951                  * V''' is given by the original potential times
 952                  * the third derivative of the switch function.
 953                  */
 954                 sw_range = ir.rvdw - ir.rvdw_switch;
 955                 md3_pswf = 60.0 / gmx::power3(sw_range);
 956
 957                 ljDisp.md3 = -std::pow(ir.rvdw, -6.0) * md3_pswf;
 958                 ljRep.md3  = std::pow(ir.rvdw, -repPow) * md3_pswf;
 959                 break;
 960             default: gmx_incons("Unimplemented VdW modifier");
 961         }
 962     }
 963     else if (EVDW_PME(ir.vdwtype))
 964     {
 965         real b   = calc_ewaldcoeff_lj(ir.rvdw, ir.ewald_rtol_lj);
 966         real r   = ir.rvdw;
 967         real br  = b * r;
 968         real br2 = br * br;
 969         real br4 = br2 * br2;
 970         real br6 = br4 * br2;
 971         // -dV/dr of g(br)*r^-6 [where g(x) = exp(-x^2)(1+x^2+x^4/2),
 972         // see LJ-PME equations in manual] and r^-reppow
 973         ljDisp.md1 = -std::exp(-br2) * (br6 + 3.0 * br4 + 6.0 * br2 + 6.0) * std::pow(r, -7.0);
 974         ljRep.md1  = repPow * pow(r, -(repPow + 1));
 975         // The contribution of the higher derivatives is negligible
 976     }
 977     else
 978     {
 979         gmx_fatal(FARGS,
 980                   "Energy drift calculation is only implemented for plain cut-off Lennard-Jones "
 981                   "interactions");
 982     }
 983
 984     elfac = gmx::c_one4PiEps0 / ir.epsilon_r;
 985
 986     // Determine the 1st and 2nd derivative for the electostatics
 987     pot_derivatives_t elec = { 0, 0, 0 };
 988
 989     if (ir.coulombtype == CoulombInteractionType::Cut || EEL_RF(ir.coulombtype))
 990     {
 991         real eps_rf, k_rf;
 992
 993         if (ir.coulombtype == CoulombInteractionType::Cut)
 994         {
 995             eps_rf = 1;
 996             k_rf   = 0;
 997         }
 998         else
 999         {
1000             eps_rf = ir.epsilon_rf / ir.epsilon_r;
1001             if (eps_rf != 0)
1002             {
1003                 k_rf = (eps_rf - ir.epsilon_r) / (gmx::power3(ir.rcoulomb) * (2 * eps_rf + ir.epsilon_r));
1004             }
1005             else
1006             {
1007                 /* reactionFieldPermitivity = infinity */
1008                 k_rf = 0.5 / gmx::power3(ir.rcoulomb);
1009             }
1010         }
1011
1012         if (eps_rf > 0)
1013         {
1014             elec.md1 = elfac * (1.0 / gmx::square(ir.rcoulomb) - 2 * k_rf * ir.rcoulomb);
1015         }
1016         elec.d2 = elfac * (2.0 / gmx::power3(ir.rcoulomb) + 2 * k_rf);
1017     }
1018     else if (EEL_PME(ir.coulombtype) || ir.coulombtype == CoulombInteractionType::Ewald)
1019     {
1020         real b, rc, br;
1021
1022         b        = calc_ewaldcoeff_q(ir.rcoulomb, ir.ewald_rtol);
1023         rc       = ir.rcoulomb;
1024         br       = b * rc;
1025         elec.md1 = elfac * (b * std::exp(-br * br) * M_2_SQRTPI / rc + std::erfc(br) / (rc * rc));
1026         elec.d2  = elfac / (rc * rc)
1027                   * (2 * b * (1 + br * br) * std::exp(-br * br) * M_2_SQRTPI + 2 * std::erfc(br) / rc);
1028     }
1029     else
1030     {
1031         gmx_fatal(FARGS,
1032                   "Energy drift calculation is only implemented for Reaction-Field and Ewald "
1033                   "electrostatics");
1034     }
1035
1036     /* Determine the variance of the atomic displacement
1037      * over list_lifetime steps: kT_fac
1038      * For inertial dynamics (not Brownian dynamics) the mass factor
1039      * is not included in kT_fac, it is added later.
1040      */
1041     const real kT_fac = displacementVariance(ir, referenceTemperature, listLifetime * ir.delta_t);
1042
1043     if (debug)
1044     {
1045         fprintf(debug, "Derivatives of non-bonded potentials at the cut-off:\n");
1046         fprintf(debug, "LJ disp. -V' %9.2e V'' %9.2e -V''' %9.2e\n", ljDisp.md1, ljDisp.d2, ljDisp.md3);
1047         fprintf(debug, "LJ rep.  -V' %9.2e V'' %9.2e -V''' %9.2e\n", ljRep.md1, ljRep.d2, ljRep.md3);
1048         fprintf(debug, "Electro. -V' %9.2e V'' %9.2e\n", elec.md1, elec.d2);
1049         fprintf(debug, "sqrt(kT_fac) %f\n", std::sqrt(kT_fac));
1050     }
1051
1052     /* Search using bisection */
1053     ib0 = -1;
1054     /* The drift will be neglible at 5 times the max sigma */
1055     ib1 = static_cast<int>(5 * maxSigma(kT_fac, att) / resolution) + 1;
1056     while (ib1 - ib0 > 1)
1057     {
1058         ib = (ib0 + ib1) / 2;
1059         rb = ib * resolution;
1060         rl = std::max(ir.rvdw, ir.rcoulomb) + rb;
1061
1062         /* Calculate the average energy drift at the last step
1063          * of the nstlist steps at which the pair-list is used.
1064          */
1065         drift = energyDrift(
1066                 att, &mtop.ffparams, kT_fac, &ljDisp, &ljRep, &elec, ir.rvdw, ir.rcoulomb, rl, boxVolume);
1067
1068         /* Correct for the fact that we are using a Ni x Nj particle pair list
1069          * and not a 1 x 1 particle pair list. This reduces the drift.
1070          */
1071         /* We don't have a formula for 8 (yet), use 4 which is conservative */
1072         nb_clust_frac_pairs_not_in_list_at_cutoff =
1073                 surface_frac(std::min(listSetup.cluster_size_i, 4), particle_distance, rl)
1074                 * surface_frac(std::min(listSetup.cluster_size_j, 4), particle_distance, rl);
1075         drift *= nb_clust_frac_pairs_not_in_list_at_cutoff;
1076
1077         /* Convert the drift to drift per unit time per atom */
1078         drift /= nstlist * ir.delta_t * mtop.natoms;
1079
1080         if (debug)
1081         {
1082             fprintf(debug,
1083                     "ib %3d %3d %3d rb %.3f %dx%d fac %.3f drift %.1e\n",
1084                     ib0,
1085                     ib,
1086                     ib1,
1087                     rb,
1088                     listSetup.cluster_size_i,
1089                     listSetup.cluster_size_j,
1090                     nb_clust_frac_pairs_not_in_list_at_cutoff,
1091                     drift);
1092         }
1093
1094         if (std::abs(drift) > ir.verletbuf_tol)
1095         {
1096             ib0 = ib;
1097         }
1098         else
1099         {
1100             ib1 = ib;
1101         }
1102     }
1103
1104     return std::max(ir.rvdw, ir.rcoulomb) + ib1 * resolution;
1105 }
1106
1107 /* Returns the pairlist buffer size for use as a minimum buffer size
1108  *
1109  * Note that this is a rather crude estimate. It is ok for a buffer
1110  * set for good energy conservation or RF electrostatics. But it is
1111  * too small with PME and the buffer set with the default tolerance.
1112  */
1113 static real minCellSizeFromPairlistBuffer(const t_inputrec& ir)
1114 {
1115     return ir.rlist - std::max(ir.rvdw, ir.rcoulomb);
1116 }
1117
1118 static real chanceOfAtomCrossingCell(gmx::ArrayRef<const VerletbufAtomtype> atomtypes, real kT_fac, real cellSize)
1119 {
1120     /* We assume atoms are distributed uniformly over the cell width.
1121      * Once an atom has moved by more than the cellSize (as passed
1122      * as the buffer argument to energyDriftAtomPair() below),
1123      * the chance of crossing the boundary of the neighbor cell
1124      * thus increases as 1/cellSize with the additional displacement
1125      * on top of cellSize. We thus create a linear interaction with
1126      * derivative = -1/cellSize. Using this in the energyDriftAtomPair
1127      * function will return the chance of crossing the next boundary.
1128      */
1129     const pot_derivatives_t boundaryInteraction = { 1 / cellSize, 0, 0 };
1130
1131     real chance = 0;
1132     for (const VerletbufAtomtype& att : atomtypes)
1133     {
1134         const atom_nonbonded_kinetic_prop_t& propAtom = att.prop;
1135         real                                 s2_2d;
1136         real                                 s2_3d;
1137         get_atom_sigma2(kT_fac, &propAtom, &s2_2d, &s2_3d);
1138
1139         real chancePerAtom = energyDriftAtomPair(
1140                 propAtom.bConstr, false, s2_2d + s2_3d, s2_2d, 0, cellSize, &boundaryInteraction);
1141
1142         if (propAtom.bConstr)
1143         {
1144             /* energyDriftAtomPair() uses an unlimited Gaussian displacement
1145              * distribution for constrained atoms, whereas they can
1146              * actually not move more than the COM of the two constrained
1147              * atoms plus twice the distance from the COM.
1148              * Use this maximum, limited displacement when this results in
1149              * a smaller chance (note that this is still an overestimate).
1150              */
1151             real massFraction = propAtom.con_mass / (propAtom.mass + propAtom.con_mass);
1152             real comDistance  = propAtom.con_len * massFraction;
1153
1154             real chanceWithMaxDistance = energyDriftAtomPair(
1155                     false, false, s2_3d, 0, 0, cellSize - 2 * comDistance, &boundaryInteraction);
1156             chancePerAtom = std::min(chancePerAtom, chanceWithMaxDistance);
1157         }
1158
1159         /* Take into account the line density of the boundary */
1160         chancePerAtom /= cellSize;
1161
1162         chance += att.n * chancePerAtom;
1163     }
1164
1165     return chance;
1166 }
1167
1168 /* Struct for storing constraint properties of atoms */
1169 struct AtomConstraintProps
1170 {
1171     void addConstraint(real length)
1172     {
1173         numConstraints += 1;
1174         sumLengths += length;
1175     }
1176
1177     int  numConstraints = 0; /* The number of constraints of an atom */
1178     real sumLengths     = 0; /* The sum of constraint length over the constraints */
1179 };
1180
1181 /* Constructs and returns a list of constraint properties per atom */
1182 static std::vector<AtomConstraintProps> getAtomConstraintProps(const gmx_moltype_t&  moltype,
1183                                                                const gmx_ffparams_t& ffparams)
1184 {
1185     const t_atoms&                   atoms = moltype.atoms;
1186     std::vector<AtomConstraintProps> props(atoms.nr);
1187
1188     for (const auto& ilist : extractILists(moltype.ilist, IF_CONSTRAINT))
1189     {
1190         // Settles are handled separately
1191         if (ilist.functionType == F_SETTLE)
1192         {
1193             continue;
1194         }
1195
1196         for (size_t i = 0; i < ilist.iatoms.size(); i += ilistStride(ilist))
1197         {
1198             int  type   = ilist.iatoms[i];
1199             int  a1     = ilist.iatoms[i + 1];
1200             int  a2     = ilist.iatoms[i + 2];
1201             real length = ffparams.iparams[type].constr.dA;
1202             props[a1].addConstraint(length);
1203             props[a2].addConstraint(length);
1204         }
1205     }
1206
1207     return props;
1208 }
1209
1210 /* Return the chance of at least one update group in a molecule crossing a cell of size cellSize */
1211 static real chanceOfUpdateGroupCrossingCell(const gmx_moltype_t&          moltype,
1212                                             const gmx_ffparams_t&         ffparams,
1213                                             const gmx::RangePartitioning& updateGrouping,
1214                                             real                          kT_fac,
1215                                             real                          cellSize)
1216 {
1217     const t_atoms& atoms = moltype.atoms;
1218     GMX_ASSERT(updateGrouping.fullRange().end() == atoms.nr,
1219                "The update groups should match the molecule type");
1220
1221     const pot_derivatives_t boundaryInteraction = { 1 / cellSize, 0, 0 };
1222
1223     const auto atomConstraintProps = getAtomConstraintProps(moltype, ffparams);
1224
1225     real chance = 0;
1226     for (int group = 0; group < updateGrouping.numBlocks(); group++)
1227     {
1228         const auto& block = updateGrouping.block(group);
1229         /* Determine the number of atoms with constraints and the mass of the COG */
1230         int  numAtomsWithConstraints = 0;
1231         real massSum                 = 0;
1232         for (const int atom : block)
1233         {
1234             if (atomConstraintProps[atom].numConstraints > 0)
1235             {
1236                 numAtomsWithConstraints++;
1237             }
1238             massSum += moltype.atoms.atom[atom].m;
1239         }
1240         /* Determine the maximum possible distance between the center of mass
1241          * and the center of geometry of the update group
1242          */
1243         real maxComCogDistance = 0;
1244         if (numAtomsWithConstraints == 2)
1245         {
1246             for (const int atom : block)
1247             {
1248                 if (atomConstraintProps[atom].numConstraints > 0)
1249                 {
1250                     GMX_ASSERT(atomConstraintProps[atom].numConstraints == 1,
1251                                "Two atoms should be connected by one constraint");
1252                     maxComCogDistance = std::abs(atoms.atom[atom].m / massSum - 0.5)
1253                                         * atomConstraintProps[atom].sumLengths;
1254                     break;
1255                 }
1256             }
1257         }
1258         else if (numAtomsWithConstraints > 2)
1259         {
1260             for (const int atom : block)
1261             {
1262                 if (atomConstraintProps[atom].numConstraints == numAtomsWithConstraints - 1)
1263                 {
1264                     real comCogDistance = atomConstraintProps[atom].sumLengths / numAtomsWithConstraints;
1265                     maxComCogDistance = std::max(maxComCogDistance, comCogDistance);
1266                 }
1267             }
1268         }
1269         else if (block.size() > 1)
1270         {
1271             // All normal atoms must be connected by SETTLE
1272             for (const int atom : block)
1273             {
1274                 const auto& ilist = moltype.ilist[F_SETTLE];
1275                 GMX_RELEASE_ASSERT(!ilist.empty(),
1276                                    "There should be at least one settle in this moltype");
1277                 for (int i = 0; i < ilist.size(); i += 1 + NRAL(F_SETTLE))
1278                 {
1279                     if (atom == ilist.iatoms[i + 1])
1280                     {
1281                         const t_iparams& iparams = ffparams.iparams[ilist.iatoms[i]];
1282                         real             dOH     = iparams.settle.doh;
1283                         real             dHH     = iparams.settle.dhh;
1284                         real             dOMidH  = std::sqrt(dOH * dOH - 0.25_real * dHH * dHH);
1285                         maxComCogDistance =
1286                                 std::abs(atoms.atom[atom].m / massSum - 1.0_real / 3.0_real) * dOMidH;
1287                     }
1288                 }
1289             }
1290         }
1291         real s2_3d = kT_fac / massSum;
1292         chance += energyDriftAtomPair(
1293                 false, false, s2_3d, 0, 0, cellSize - 2 * maxComCogDistance, &boundaryInteraction);
1294     }
1295
1296     return chance;
1297 }
1298
1299 /* Return the chance of at least one update group in the system crossing a cell of size cellSize */
1300 static real chanceOfUpdateGroupCrossingCell(const gmx_mtop_t&      mtop,
1301                                             PartitioningPerMoltype updateGrouping,
1302                                             real                   kT_fac,
1303                                             real                   cellSize)
1304 {
1305     GMX_RELEASE_ASSERT(updateGrouping.size() == mtop.moltype.size(),
1306                        "The update groups should match the topology");
1307
1308     real chance = 0;
1309     for (const gmx_molblock_t& molblock : mtop.molblock)
1310     {
1311         const gmx_moltype_t& moltype = mtop.moltype[molblock.type];
1312         chance += molblock.nmol
1313                   * chanceOfUpdateGroupCrossingCell(
1314                           moltype, mtop.ffparams, updateGrouping[molblock.type], kT_fac, cellSize);
1315     }
1316
1317     return chance;
1318 }
1319
1320 real minCellSizeForAtomDisplacement(const gmx_mtop_t&      mtop,
1321                                     const t_inputrec&      ir,
1322                                     PartitioningPerMoltype updateGrouping,
1323                                     real                   chanceRequested)
1324 {
1325     if (!EI_DYNAMICS(ir.eI) || (EI_MD(ir.eI) && ir.etc == TemperatureCoupling::No))
1326     {
1327         return minCellSizeFromPairlistBuffer(ir);
1328     }
1329
1330     /* We use the maximum temperature with multiple T-coupl groups.
1331      * We could use a per particle temperature, but since particles
1332      * interact, this might underestimate the displacements.
1333      */
1334     const real temperature = maxReferenceTemperature(ir);
1335
1336     const bool setMassesToOne = (ir.eI == IntegrationAlgorithm::BD && ir.bd_fric > 0);
1337
1338     const auto atomtypes = getVerletBufferAtomtypes(mtop, setMassesToOne);
1339
1340     const real kT_fac = displacementVariance(ir, temperature, ir.nstlist * ir.delta_t);
1341
1342     /* Resolution of the cell size */
1343     real resolution = 0.001;
1344
1345     /* Search using bisection, avoid 0 and start at 1 */
1346     int ib0 = 0;
1347     /* The chance will be neglible at 10 times the max sigma */
1348     int  ib1      = int(10 * maxSigma(kT_fac, atomtypes) / resolution) + 1;
1349     real cellSize = 0;
1350     while (ib1 - ib0 > 1)
1351     {
1352         int ib   = (ib0 + ib1) / 2;
1353         cellSize = ib * resolution;
1354
1355         real chance;
1356         if (updateGrouping.empty())
1357         {
1358             chance = chanceOfAtomCrossingCell(atomtypes, kT_fac, cellSize);
1359         }
1360         else
1361         {
1362             chance = chanceOfUpdateGroupCrossingCell(mtop, updateGrouping, kT_fac, cellSize);
1363         }
1364
1365         /* Note: chance is for every nstlist steps */
1366         if (chance > chanceRequested * ir.nstlist)
1367         {
1368             ib0 = ib;
1369         }
1370         else
1371         {
1372             ib1 = ib;
1373         }
1374     }
1375
1376     return cellSize;
1377 }