src/gromacs/listed_forces/pairs.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
   5  * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
   6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   7  * and including many others, as listed in the AUTHORS file in the
   8  * top-level source directory and at http://www.gromacs.org.
   9  *
  10  * GROMACS is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation; either version 2.1
  13  * of the License, or (at your option) any later version.
  14  *
  15  * GROMACS is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with GROMACS; if not, see
  22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  24  *
  25  * If you want to redistribute modifications to GROMACS, please
  26  * consider that scientific software is very special. Version
  27  * control is crucial - bugs must be traceable. We will be happy to
  28  * consider code for inclusion in the official distribution, but
  29  * derived work must not be called official GROMACS. Details are found
  30  * in the README & COPYING files - if they are missing, get the
  31  * official version at http://www.gromacs.org.
  32  *
  33  * To help us fund GROMACS development, we humbly ask that you cite
  34  * the research papers on the package. Check out http://www.gromacs.org.
  35  */
  36 /*! \internal \file
  37  *
  38  * \brief This file defines functions for "pair" interactions
  39  * (i.e. listed non-bonded interactions, e.g. 1-4 interactions)
  40  *
  41  * \author Mark Abraham <mark.j.abraham@gmail.com>
  42  *
  43  * \ingroup module_listed_forces
  44  */
  45 #include "gmxpre.h"
  46
  47 #include "pairs.h"
  48
  49 #include <cmath>
  50
  51 #include "gromacs/listed_forces/bonded.h"
  52 #include "gromacs/math/functions.h"
  53 #include "gromacs/math/vec.h"
  54 #include "gromacs/mdtypes/forcerec.h"
  55 #include "gromacs/mdtypes/group.h"
  56 #include "gromacs/mdtypes/interaction_const.h"
  57 #include "gromacs/mdtypes/md_enums.h"
  58 #include "gromacs/mdtypes/nblist.h"
  59 #include "gromacs/mdtypes/simulation_workload.h"
  60 #include "gromacs/pbcutil/ishift.h"
  61 #include "gromacs/pbcutil/pbc.h"
  62 #include "gromacs/pbcutil/pbc_simd.h"
  63 #include "gromacs/simd/simd.h"
  64 #include "gromacs/simd/simd_math.h"
  65 #include "gromacs/simd/vector_operations.h"
  66 #include "gromacs/tables/forcetable.h"
  67 #include "gromacs/utility/basedefinitions.h"
  68 #include "gromacs/utility/fatalerror.h"
  69 #include "gromacs/utility/gmxassert.h"
  70
  71 #include "listed_internal.h"
  72
  73 using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
  74
  75 /*! \brief Issue a warning if a listed interaction is beyond a table limit */
  76 static void warning_rlimit(const rvec* x, int ai, int aj, int* global_atom_index, real r, real rlimit)
  77 {
  78     gmx_warning(
  79             "Listed nonbonded interaction between particles %d and %d\n"
  80             "at distance %.3f which is larger than the table limit %.3f nm.\n\n"
  81             "This is likely either a 1,4 interaction, or a listed interaction inside\n"
  82             "a smaller molecule you are decoupling during a free energy calculation.\n"
  83             "Since interactions at distances beyond the table cannot be computed,\n"
  84             "they are skipped until they are inside the table limit again. You will\n"
  85             "only see this message once, even if it occurs for several interactions.\n\n"
  86             "IMPORTANT: This should not happen in a stable simulation, so there is\n"
  87             "probably something wrong with your system. Only change the table-extension\n"
  88             "distance in the mdp file if you are really sure that is the reason.\n",
  89             glatnr(global_atom_index, ai),
  90             glatnr(global_atom_index, aj),
  91             r,
  92             rlimit);
  93
  94     if (debug)
  95     {
  96         fprintf(debug,
  97                 "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. "
  98                 "Ignored\n",
  99                 x[ai][XX],
 100                 x[ai][YY],
 101                 x[ai][ZZ],
 102                 x[aj][XX],
 103                 x[aj][YY],
 104                 x[aj][ZZ],
 105                 glatnr(global_atom_index, ai),
 106                 glatnr(global_atom_index, aj),
 107                 r);
 108     }
 109 }
 110
 111 /*! \brief Compute the energy and force for a single pair interaction */
 112 static real evaluate_single(real        r2,
 113                             real        tabscale,
 114                             const real* vftab,
 115                             real        tableStride,
 116                             real        qq,
 117                             real        c6,
 118                             real        c12,
 119                             real*       velec,
 120                             real*       vvdw)
 121 {
 122     real rinv, r, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VVe, FFe, VVd, FFd, VVr, FFr, fscal;
 123     int  ntab;
 124
 125     /* Do the tabulated interactions - first table lookup */
 126     rinv = gmx::invsqrt(r2);
 127     r    = r2 * rinv;
 128     rtab = r * tabscale;
 129     ntab = static_cast<int>(rtab);
 130     eps  = rtab - ntab;
 131     eps2 = eps * eps;
 132     ntab = static_cast<int>(tableStride * ntab);
 133     /* Electrostatics */
 134     Y     = vftab[ntab];
 135     F     = vftab[ntab + 1];
 136     Geps  = eps * vftab[ntab + 2];
 137     Heps2 = eps2 * vftab[ntab + 3];
 138     Fp    = F + Geps + Heps2;
 139     VVe   = Y + eps * Fp;
 140     FFe   = Fp + Geps + 2.0 * Heps2;
 141     /* Dispersion */
 142     Y     = vftab[ntab + 4];
 143     F     = vftab[ntab + 5];
 144     Geps  = eps * vftab[ntab + 6];
 145     Heps2 = eps2 * vftab[ntab + 7];
 146     Fp    = F + Geps + Heps2;
 147     VVd   = Y + eps * Fp;
 148     FFd   = Fp + Geps + 2.0 * Heps2;
 149     /* Repulsion */
 150     Y     = vftab[ntab + 8];
 151     F     = vftab[ntab + 9];
 152     Geps  = eps * vftab[ntab + 10];
 153     Heps2 = eps2 * vftab[ntab + 11];
 154     Fp    = F + Geps + Heps2;
 155     VVr   = Y + eps * Fp;
 156     FFr   = Fp + Geps + 2.0 * Heps2;
 157
 158     *velec = qq * VVe;
 159     *vvdw  = c6 * VVd + c12 * VVr;
 160
 161     fscal = -(qq * FFe + c6 * FFd + c12 * FFr) * tabscale * rinv;
 162
 163     return fscal;
 164 }
 165
 166 static inline real sixthRoot(const real r)
 167 {
 168     return gmx::invsqrt(std::cbrt(r));
 169 }
 170
 171 /*! \brief Compute the energy and force for a single pair interaction under FEP */
 172 template<SoftcoreType softcoreType>
 173 static real free_energy_evaluate_single(real                                           r2,
 174                                         real                                           rCoulCutoff,
 175                                         const interaction_const_t::SoftCoreParameters& scParams,
 176                                         real                                           tabscale,
 177                                         const real*                                    vftab,
 178                                         real                                           tableStride,
 179                                         real                                           qqA,
 180                                         real                                           c6A,
 181                                         real                                           c12A,
 182                                         real                                           qqB,
 183                                         real                                           c6B,
 184                                         real                                           c12B,
 185                                         real                                           facel,
 186                                         const real                                     LFC[2],
 187                                         const real                                     LFV[2],
 188                                         const real                                     DLF[2],
 189                                         const real                                     lfac_coul[2],
 190                                         const real                                     lfac_vdw[2],
 191                                         const real dlfac_coul[2],
 192                                         const real dlfac_vdw[2],
 193                                         real*      velectot,
 194                                         real*      vvdwtot,
 195                                         real*      dvdl)
 196 {
 197     real       rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VV, FF, fscal;
 198     real       qq[2], c6[2], c12[2], sigma6[2], sigma_pow[2];
 199     real       alpha_coul_eff, alpha_vdw_eff, dvdl_coul_sum, dvdl_vdw_sum;
 200     real       rpinv, r_coul, r_vdw, velecsum, vvdwsum;
 201     real       fscal_vdw[2], fscal_elec[2];
 202     real       velec[2], vvdw[2];
 203     real       dvdl_elec[2], dvdl_vdw[2];
 204     real       rQ, rLJ;
 205     real       scaleDvdlRCoul;
 206     int        i, ntab;
 207     const real half = 0.5_real;
 208     const real one  = 1.0_real;
 209     const real two  = 2.0_real;
 210
 211     qq[0]  = qqA;
 212     qq[1]  = qqB;
 213     c6[0]  = c6A;
 214     c6[1]  = c6B;
 215     c12[0] = c12A;
 216     c12[1] = c12B;
 217
 218     const real rpm2 = r2 * r2;   /* r4 */
 219     const real rp   = rpm2 * r2; /* r6 */
 220     const real r    = sqrt(r2);  /* r1 */
 221
 222     /* Loop over state A(0) and B(1) */
 223     for (i = 0; i < 2; i++)
 224     {
 225         if (softcoreType == SoftcoreType::Beutler || softcoreType == SoftcoreType::Gapsys)
 226         {
 227             if ((c6[i] > 0) && (c12[i] > 0))
 228             {
 229                 /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
 230                  * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
 231                  */
 232                 sigma6[i] = half * c12[i] / c6[i];
 233                 if ((sigma6[i] < scParams.sigma6Minimum) && softcoreType == SoftcoreType::Beutler) /* for disappearing coul and vdw with soft core at the same time */
 234                 {
 235                     sigma6[i] = scParams.sigma6Minimum;
 236                 }
 237             }
 238             else
 239             {
 240                 sigma6[i] = scParams.sigma6WithInvalidSigma;
 241             }
 242             sigma_pow[i] = sigma6[i];
 243         }
 244     }
 245
 246     if (softcoreType == SoftcoreType::Beutler || softcoreType == SoftcoreType::Gapsys)
 247     {
 248         /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 249         if ((c12[0] > 0) && (c12[1] > 0))
 250         {
 251             alpha_vdw_eff  = 0;
 252             alpha_coul_eff = 0;
 253         }
 254         else
 255         {
 256             if (softcoreType == SoftcoreType::Beutler)
 257             {
 258                 alpha_vdw_eff  = scParams.alphaVdw;
 259                 alpha_coul_eff = scParams.alphaCoulomb;
 260             }
 261             else if (softcoreType == SoftcoreType::Gapsys)
 262             {
 263                 alpha_vdw_eff  = scParams.alphaVdw;
 264                 alpha_coul_eff = gmx::sixthroot(scParams.sigma6WithInvalidSigma);
 265             }
 266         }
 267     }
 268
 269     /* Loop over A and B states again */
 270     for (i = 0; i < 2; i++)
 271     {
 272         fscal_elec[i] = 0;
 273         fscal_vdw[i]  = 0;
 274         velec[i]      = 0;
 275         vvdw[i]       = 0;
 276         dvdl_elec[i]  = 0;
 277         dvdl_vdw[i]   = 0;
 278         rQ            = 0;
 279         rLJ           = 0;
 280
 281         /* Only spend time on A or B state if it is non-zero */
 282         if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
 283         {
 284             /* Coulomb */
 285             if (softcoreType == SoftcoreType::Beutler)
 286             {
 287                 rpinv  = one / (alpha_coul_eff * lfac_coul[i] * sigma_pow[i] + rp);
 288                 r_coul = sixthRoot(rpinv);
 289             }
 290             else
 291             {
 292                 rpinv  = one / rp;
 293                 r_coul = r;
 294             }
 295
 296             if (softcoreType == SoftcoreType::Gapsys)
 297             {
 298                 rQ = gmx::sixthroot(1. - LFC[i]) * (1. + std::fabs(qq[i] / facel));
 299                 rQ *= alpha_coul_eff;
 300                 scaleDvdlRCoul = 1.0;
 301                 if (rQ > rCoulCutoff)
 302                 {
 303                     rQ             = rCoulCutoff;
 304                     scaleDvdlRCoul = 0.0;
 305                 }
 306             }
 307
 308             if ((softcoreType == SoftcoreType::Gapsys) && (r < rQ))
 309             {
 310                 real rInvQ    = 1.0 / rQ;
 311                 real constFac = qq[i] * rInvQ;
 312                 real linFac   = constFac * r * rInvQ;
 313                 real quadrFac = linFac * r * rInvQ;
 314
 315                 /* Computing Coulomb force and potential energy */
 316                 fscal_elec[i] = 2. * quadrFac - 3. * linFac;
 317                 fscal_elec[i] *= rpinv;
 318
 319                 velec[i] = quadrFac - 3. * (linFac - constFac);
 320
 321                 dvdl_elec[i] += scaleDvdlRCoul * DLF[i] * 0.5 * (LFC[i] / (1. - LFC[i]))
 322                                 * (quadrFac - 2. * linFac + constFac);
 323             }
 324             else // Beutler, resp. hardcore
 325             {
 326                 /* Electrostatics table lookup data */
 327                 rtab = r_coul * tabscale;
 328                 ntab = static_cast<int>(rtab);
 329                 eps  = rtab - ntab;
 330                 eps2 = eps * eps;
 331                 ntab = static_cast<int>(tableStride * ntab);
 332                 /* Electrostatics */
 333                 Y             = vftab[ntab];
 334                 F             = vftab[ntab + 1];
 335                 Geps          = eps * vftab[ntab + 2];
 336                 Heps2         = eps2 * vftab[ntab + 3];
 337                 Fp            = F + Geps + Heps2;
 338                 VV            = Y + eps * Fp;
 339                 FF            = Fp + Geps + two * Heps2;
 340                 velec[i]      = qq[i] * VV;
 341                 fscal_elec[i] = -qq[i] * FF * r_coul * rpinv * tabscale;
 342             }
 343
 344             /* Vdw */
 345             if (softcoreType == SoftcoreType::Beutler)
 346             {
 347                 rpinv = one / (alpha_vdw_eff * lfac_vdw[i] * sigma_pow[i] + rp);
 348                 r_vdw = sixthRoot(rpinv);
 349             }
 350             else
 351             {
 352                 rpinv = one / rp;
 353                 r_vdw = r;
 354             }
 355
 356             if (softcoreType == SoftcoreType::Gapsys)
 357             {
 358                 constexpr real c_twentySixSeventh = 26.0 / 7.0;
 359
 360                 rLJ = gmx::sixthroot(c_twentySixSeventh * sigma6[i] * (1. - LFV[i]));
 361                 rLJ *= alpha_vdw_eff;
 362             }
 363
 364             if ((softcoreType == SoftcoreType::Gapsys) && (r < rLJ))
 365             {
 366                 // scaled values for c6 and c12
 367                 real c6s, c12s;
 368                 c6s  = c6[i] / 6.0;
 369                 c12s = c12[i] / 12.0;
 370
 371                 /* Temporary variables for inverted values */
 372                 real rInvLJ = 1.0 / rLJ;
 373                 real rInv14, rInv13, rInv12;
 374                 real rInv8, rInv7, rInv6;
 375                 rInv6 = rInvLJ * rInvLJ * rInvLJ;
 376                 rInv6 *= rInv6;
 377                 rInv7  = rInv6 * rInvLJ;
 378                 rInv8  = rInv7 * rInvLJ;
 379                 rInv14 = c12s * rInv7 * rInv7 * r2;
 380                 rInv13 = c12s * rInv7 * rInv6 * r;
 381                 rInv12 = c12s * rInv6 * rInv6;
 382                 rInv8 *= c6s * r2;
 383                 rInv7 *= c6s * r;
 384                 rInv6 *= c6s;
 385
 386                 /* Temporary variables for A and B */
 387                 real quadrFac, linearFac, constFac;
 388                 quadrFac  = 156. * rInv14 - 42. * rInv8;
 389                 linearFac = 168. * rInv13 - 48. * rInv7;
 390                 constFac  = 91. * rInv12 - 28. * rInv6;
 391
 392                 /* Computing LJ force and potential energy*/
 393                 fscal_vdw[i] = quadrFac - linearFac;
 394                 fscal_vdw[i] *= rpinv;
 395
 396                 vvdw[i] = 0.5 * quadrFac - linearFac + constFac;
 397
 398                 dvdl_vdw[i] += DLF[i] * 28. * (LFV[i] / (1. - LFV[i]))
 399                                * ((6.5 * rInv14 - rInv8) - (13. * rInv13 - 2. * rInv7)
 400                                   + (6.5 * rInv12 - rInv6));
 401             }
 402             else // Beutler, resp. hardcore
 403             {
 404                 /* Vdw table lookup data */
 405                 rtab = r_vdw * tabscale;
 406                 ntab = static_cast<int>(rtab);
 407                 eps  = rtab - ntab;
 408                 eps2 = eps * eps;
 409                 ntab = 12 * ntab;
 410                 /* Dispersion */
 411                 Y            = vftab[ntab + 4];
 412                 F            = vftab[ntab + 5];
 413                 Geps         = eps * vftab[ntab + 6];
 414                 Heps2        = eps2 * vftab[ntab + 7];
 415                 Fp           = F + Geps + Heps2;
 416                 VV           = Y + eps * Fp;
 417                 FF           = Fp + Geps + two * Heps2;
 418                 vvdw[i]      = c6[i] * VV;
 419                 fscal_vdw[i] = -c6[i] * FF;
 420
 421                 /* Repulsion */
 422                 Y     = vftab[ntab + 8];
 423                 F     = vftab[ntab + 9];
 424                 Geps  = eps * vftab[ntab + 10];
 425                 Heps2 = eps2 * vftab[ntab + 11];
 426                 Fp    = F + Geps + Heps2;
 427                 VV    = Y + eps * Fp;
 428                 FF    = Fp + Geps + two * Heps2;
 429                 vvdw[i] += c12[i] * VV;
 430                 fscal_vdw[i] -= c12[i] * FF;
 431                 fscal_vdw[i] *= r_vdw * rpinv * tabscale;
 432             }
 433         }
 434     }
 435     /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
 436     /* Assemble A and B states */
 437     velecsum      = 0;
 438     vvdwsum       = 0;
 439     dvdl_coul_sum = 0;
 440     dvdl_vdw_sum  = 0;
 441     fscal         = 0;
 442     for (i = 0; i < 2; i++)
 443     {
 444         velecsum += LFC[i] * velec[i];
 445         vvdwsum += LFV[i] * vvdw[i];
 446
 447         fscal += (LFC[i] * fscal_elec[i] + LFV[i] * fscal_vdw[i]) * rpm2;
 448
 449         if (softcoreType == SoftcoreType::Gapsys)
 450         {
 451             dvdl_coul_sum += dvdl_elec[i];
 452             dvdl_vdw_sum += dvdl_vdw[i];
 453         }
 454         dvdl_coul_sum += velec[i] * DLF[i];
 455         dvdl_vdw_sum += vvdw[i] * DLF[i];
 456         if (softcoreType == SoftcoreType::Beutler)
 457         {
 458             dvdl_coul_sum += LFC[i] * alpha_coul_eff * dlfac_coul[i] * fscal_elec[i] * sigma_pow[i];
 459             dvdl_vdw_sum += LFV[i] * alpha_vdw_eff * dlfac_vdw[i] * fscal_vdw[i] * sigma_pow[i];
 460         }
 461     }
 462
 463     dvdl[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)] += dvdl_coul_sum;
 464     dvdl[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)] += dvdl_vdw_sum;
 465
 466     *velectot = velecsum;
 467     *vvdwtot  = vvdwsum;
 468
 469     return fscal;
 470 }
 471
 472 /*! \brief Calculate pair interactions, supports all types and conditions. */
 473 template<BondedKernelFlavor flavor>
 474 static real do_pairs_general(int                           ftype,
 475                              int                           nbonds,
 476                              const t_iatom                 iatoms[],
 477                              const t_iparams               iparams[],
 478                              const rvec                    x[],
 479                              rvec4                         f[],
 480                              rvec                          fshift[],
 481                              const struct t_pbc*           pbc,
 482                              const real*                   lambda,
 483                              real*                         dvdl,
 484                              gmx::ArrayRef<real>           chargeA,
 485                              gmx::ArrayRef<real>           chargeB,
 486                              gmx::ArrayRef<bool>           atomIsPerturbed,
 487                              gmx::ArrayRef<unsigned short> cENER,
 488                              int                           numEnergyGroups,
 489                              const t_forcerec*             fr,
 490                              gmx_grppairener_t*            grppener,
 491                              int*                          global_atom_index)
 492 {
 493     real            qq, c6, c12;
 494     rvec            dx;
 495     int             i, itype, ai, aj, gid;
 496     int             fshift_index;
 497     real            r2;
 498     real            fscal, velec, vvdw;
 499     real*           energygrp_elec;
 500     real*           energygrp_vdw;
 501     static gmx_bool warned_rlimit = FALSE;
 502     /* Free energy stuff */
 503     gmx_bool   bFreeEnergy;
 504     real       LFC[2], LFV[2], DLF[2], lfac_coul[2], lfac_vdw[2], dlfac_coul[2], dlfac_vdw[2];
 505     real       qqB, c6B, c12B;
 506     const real oneSixth = 1.0_real / 6.0_real;
 507
 508     switch (ftype)
 509     {
 510         case F_LJ14:
 511         case F_LJC14_Q:
 512             energygrp_elec = grppener->energyGroupPairTerms[NonBondedEnergyTerms::Coulomb14].data();
 513             energygrp_vdw  = grppener->energyGroupPairTerms[NonBondedEnergyTerms::LJ14].data();
 514             break;
 515         case F_LJC_PAIRS_NB:
 516             energygrp_elec = grppener->energyGroupPairTerms[NonBondedEnergyTerms::CoulombSR].data();
 517             energygrp_vdw  = grppener->energyGroupPairTerms[NonBondedEnergyTerms::LJSR].data();
 518             break;
 519         default:
 520             energygrp_elec = nullptr; /* Keep compiler happy */
 521             energygrp_vdw  = nullptr; /* Keep compiler happy */
 522             gmx_fatal(FARGS, "Unknown function type %d in do_nonbonded14", ftype);
 523     }
 524
 525     if (fr->efep != FreeEnergyPerturbationType::No)
 526     {
 527         /* Lambda factor for state A=1-lambda and B=lambda */
 528         LFC[0] = 1.0 - lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)];
 529         LFV[0] = 1.0 - lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)];
 530         LFC[1] = lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)];
 531         LFV[1] = lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)];
 532
 533         /*derivative of the lambda factor for state A and B */
 534         DLF[0] = -1;
 535         DLF[1] = 1;
 536
 537         GMX_ASSERT(fr->ic->softCoreParameters, "We need soft-core parameters");
 538         const auto& scParams = *fr->ic->softCoreParameters;
 539
 540         for (i = 0; i < 2; i++)
 541         {
 542             lfac_coul[i] = (scParams.lambdaPower == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
 543             dlfac_coul[i] = DLF[i] * scParams.lambdaPower * oneSixth
 544                             * (scParams.lambdaPower == 2 ? (1 - LFC[i]) : 1);
 545             lfac_vdw[i]  = (scParams.lambdaPower == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
 546             dlfac_vdw[i] = DLF[i] * scParams.lambdaPower * oneSixth
 547                            * (scParams.lambdaPower == 2 ? (1 - LFV[i]) : 1);
 548         }
 549     }
 550
 551     /* TODO This code depends on the logic in tables.c that constructs
 552        the table layout, which should be made explicit in future
 553        cleanup. */
 554     GMX_ASSERT(
 555             etiNR == 3,
 556             "Pair-interaction code that uses GROMACS interaction tables supports exactly 3 tables");
 557     GMX_ASSERT(
 558             fr->pairsTable->interaction == TableInteraction::ElectrostaticVdwRepulsionVdwDispersion,
 559             "Pair interaction kernels need a table with Coulomb, repulsion and dispersion entries");
 560
 561     const real epsfac = fr->ic->epsfac;
 562
 563     bFreeEnergy = FALSE;
 564     for (i = 0; (i < nbonds);)
 565     {
 566         itype = iatoms[i++];
 567         ai    = iatoms[i++];
 568         aj    = iatoms[i++];
 569         gid   = GID(cENER[ai], cENER[aj], numEnergyGroups);
 570
 571         /* Get parameters */
 572         switch (ftype)
 573         {
 574             case F_LJ14:
 575                 bFreeEnergy =
 576                         (fr->efep != FreeEnergyPerturbationType::No
 577                          && ((!atomIsPerturbed.empty() && (atomIsPerturbed[ai] || atomIsPerturbed[aj]))
 578                              || iparams[itype].lj14.c6A != iparams[itype].lj14.c6B
 579                              || iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
 580                 qq  = chargeA[ai] * chargeA[aj] * epsfac * fr->fudgeQQ;
 581                 c6  = iparams[itype].lj14.c6A;
 582                 c12 = iparams[itype].lj14.c12A;
 583                 break;
 584             case F_LJC14_Q:
 585                 qq = iparams[itype].ljc14.qi * iparams[itype].ljc14.qj * epsfac
 586                      * iparams[itype].ljc14.fqq;
 587                 c6  = iparams[itype].ljc14.c6;
 588                 c12 = iparams[itype].ljc14.c12;
 589                 break;
 590             case F_LJC_PAIRS_NB:
 591                 qq  = iparams[itype].ljcnb.qi * iparams[itype].ljcnb.qj * epsfac;
 592                 c6  = iparams[itype].ljcnb.c6;
 593                 c12 = iparams[itype].ljcnb.c12;
 594                 break;
 595             default:
 596                 /* Cannot happen since we called gmx_fatal() above in this case */
 597                 qq = c6 = c12 = 0; /* Keep compiler happy */
 598                 break;
 599         }
 600
 601         /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
 602          * included in the general nfbp array now. This means the tables are scaled down by the
 603          * same factor, so when we use the original c6/c12 parameters from iparams[] they must
 604          * be scaled up.
 605          */
 606         c6 *= 6.0;
 607         c12 *= 12.0;
 608
 609         /* Do we need to apply full periodic boundary conditions? */
 610         if (fr->bMolPBC)
 611         {
 612             fshift_index = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
 613         }
 614         else
 615         {
 616             fshift_index = c_centralShiftIndex;
 617             rvec_sub(x[ai], x[aj], dx);
 618         }
 619         r2 = norm2(dx);
 620
 621         if (r2 >= fr->pairsTable->interactionRange * fr->pairsTable->interactionRange)
 622         {
 623             /* This check isn't race free. But it doesn't matter because if a race occurs the only
 624              * disadvantage is that the warning is printed twice */
 625             if (!warned_rlimit)
 626             {
 627                 warning_rlimit(x, ai, aj, global_atom_index, sqrt(r2), fr->pairsTable->interactionRange);
 628                 warned_rlimit = TRUE;
 629             }
 630             continue;
 631         }
 632
 633         if (bFreeEnergy)
 634         {
 635             /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
 636             qqB  = chargeB[ai] * chargeB[aj] * epsfac * fr->fudgeQQ;
 637             c6B  = iparams[itype].lj14.c6B * 6.0;
 638             c12B = iparams[itype].lj14.c12B * 12.0;
 639
 640             if (fr->ic->softCoreParameters->softcoreType == SoftcoreType::Gapsys)
 641             {
 642                 fscal = free_energy_evaluate_single<SoftcoreType::Gapsys>(r2,
 643                                                                           fr->ic->rcoulomb,
 644                                                                           *fr->ic->softCoreParameters,
 645                                                                           fr->pairsTable->scale,
 646                                                                           fr->pairsTable->data.data(),
 647                                                                           fr->pairsTable->stride,
 648                                                                           qq,
 649                                                                           c6,
 650                                                                           c12,
 651                                                                           qqB,
 652                                                                           c6B,
 653                                                                           c12B,
 654                                                                           epsfac,
 655                                                                           LFC,
 656                                                                           LFV,
 657                                                                           DLF,
 658                                                                           lfac_coul,
 659                                                                           lfac_vdw,
 660                                                                           dlfac_coul,
 661                                                                           dlfac_vdw,
 662                                                                           &velec,
 663                                                                           &vvdw,
 664                                                                           dvdl);
 665             }
 666             else if (fr->ic->softCoreParameters->softcoreType == SoftcoreType::Beutler)
 667             {
 668                 fscal = free_energy_evaluate_single<SoftcoreType::Beutler>(r2,
 669                                                                            fr->ic->rcoulomb,
 670                                                                            *fr->ic->softCoreParameters,
 671                                                                            fr->pairsTable->scale,
 672                                                                            fr->pairsTable->data.data(),
 673                                                                            fr->pairsTable->stride,
 674                                                                            qq,
 675                                                                            c6,
 676                                                                            c12,
 677                                                                            qqB,
 678                                                                            c6B,
 679                                                                            c12B,
 680                                                                            epsfac,
 681                                                                            LFC,
 682                                                                            LFV,
 683                                                                            DLF,
 684                                                                            lfac_coul,
 685                                                                            lfac_vdw,
 686                                                                            dlfac_coul,
 687                                                                            dlfac_vdw,
 688                                                                            &velec,
 689                                                                            &vvdw,
 690                                                                            dvdl);
 691             }
 692             else
 693             {
 694                 fscal = free_energy_evaluate_single<SoftcoreType::None>(r2,
 695                                                                         fr->ic->rcoulomb,
 696                                                                         *fr->ic->softCoreParameters,
 697                                                                         fr->pairsTable->scale,
 698                                                                         fr->pairsTable->data.data(),
 699                                                                         fr->pairsTable->stride,
 700                                                                         qq,
 701                                                                         c6,
 702                                                                         c12,
 703                                                                         qqB,
 704                                                                         c6B,
 705                                                                         c12B,
 706                                                                         epsfac,
 707                                                                         LFC,
 708                                                                         LFV,
 709                                                                         DLF,
 710                                                                         lfac_coul,
 711                                                                         lfac_vdw,
 712                                                                         dlfac_coul,
 713                                                                         dlfac_vdw,
 714                                                                         &velec,
 715                                                                         &vvdw,
 716                                                                         dvdl);
 717             }
 718         }
 719         else
 720         {
 721             /* Evaluate tabulated interaction without free energy */
 722             fscal = evaluate_single(r2,
 723                                     fr->pairsTable->scale,
 724                                     fr->pairsTable->data.data(),
 725                                     fr->pairsTable->stride,
 726                                     qq,
 727                                     c6,
 728                                     c12,
 729                                     &velec,
 730                                     &vvdw);
 731         }
 732
 733         energygrp_elec[gid] += velec;
 734         energygrp_vdw[gid] += vvdw;
 735         svmul(fscal, dx, dx);
 736
 737         /* Add the forces */
 738         rvec_inc(f[ai], dx);
 739         rvec_dec(f[aj], dx);
 740
 741         if (computeVirial(flavor))
 742         {
 743             if (fshift_index != c_centralShiftIndex)
 744             {
 745                 rvec_inc(fshift[fshift_index], dx);
 746                 rvec_dec(fshift[c_centralShiftIndex], dx);
 747             }
 748         }
 749     }
 750     return 0.0;
 751 }
 752
 753 /*! \brief Calculate pairs, only for plain-LJ + plain Coulomb normal type.
 754  *
 755  * This function is templated for real/SimdReal and for optimization.
 756  */
 757 template<typename T, int pack_size, typename pbc_type>
 758 static void do_pairs_simple(int                 nbonds,
 759                             const t_iatom       iatoms[],
 760                             const t_iparams     iparams[],
 761                             const rvec          x[],
 762                             rvec4               f[],
 763                             const pbc_type      pbc,
 764                             gmx::ArrayRef<real> charge,
 765                             const real          scale_factor)
 766 {
 767     const int nfa1 = 1 + 2;
 768
 769     T six(6);
 770     T twelve(12);
 771     T ef(scale_factor);
 772
 773 #if GMX_SIMD_HAVE_REAL
 774     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[pack_size];
 775     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[pack_size];
 776     alignas(GMX_SIMD_ALIGNMENT) real         coeff[3 * pack_size];
 777 #else
 778     std::int32_t ai[pack_size];
 779     std::int32_t aj[pack_size];
 780     real         coeff[3 * pack_size];
 781 #endif
 782
 783     /* nbonds is #pairs*nfa1, here we step pack_size pairs */
 784     for (int i = 0; i < nbonds; i += pack_size * nfa1)
 785     {
 786         /* Collect atoms for pack_size pairs.
 787          * iu indexes into iatoms, we should not let iu go beyond nbonds.
 788          */
 789         int iu = i;
 790         for (int s = 0; s < pack_size; s++)
 791         {
 792             int itype = iatoms[iu];
 793             ai[s]     = iatoms[iu + 1];
 794             aj[s]     = iatoms[iu + 2];
 795
 796             if (i + s * nfa1 < nbonds)
 797             {
 798                 coeff[0 * pack_size + s] = iparams[itype].lj14.c6A;
 799                 coeff[1 * pack_size + s] = iparams[itype].lj14.c12A;
 800                 coeff[2 * pack_size + s] = charge[ai[s]] * charge[aj[s]];
 801
 802                 /* Avoid indexing the iatoms array out of bounds.
 803                  * We pad the coordinate indices with the last atom pair.
 804                  */
 805                 if (iu + nfa1 < nbonds)
 806                 {
 807                     iu += nfa1;
 808                 }
 809             }
 810             else
 811             {
 812                 /* Pad the coefficient arrays with zeros to get zero forces */
 813                 coeff[0 * pack_size + s] = 0;
 814                 coeff[1 * pack_size + s] = 0;
 815                 coeff[2 * pack_size + s] = 0;
 816             }
 817         }
 818
 819         /* Load the coordinates */
 820         T xi[DIM], xj[DIM];
 821         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ai, &xi[XX], &xi[YY], &xi[ZZ]);
 822         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), aj, &xj[XX], &xj[YY], &xj[ZZ]);
 823
 824         T c6  = load<T>(coeff + 0 * pack_size);
 825         T c12 = load<T>(coeff + 1 * pack_size);
 826         T qq  = load<T>(coeff + 2 * pack_size);
 827
 828         /* We could save these operations by storing 6*C6,12*C12 */
 829         c6  = six * c6;
 830         c12 = twelve * c12;
 831
 832         T dr[DIM];
 833         pbc_dx_aiuc(pbc, xi, xj, dr);
 834
 835         T rsq   = dr[XX] * dr[XX] + dr[YY] * dr[YY] + dr[ZZ] * dr[ZZ];
 836         T rinv  = gmx::invsqrt(rsq);
 837         T rinv2 = rinv * rinv;
 838         T rinv6 = rinv2 * rinv2 * rinv2;
 839
 840         /* Calculate the Coulomb force * r */
 841         T cfr = ef * qq * rinv;
 842
 843         /* Calculate the LJ force * r and add it to the Coulomb part */
 844         T fr = gmx::fma(fms(c12, rinv6, c6), rinv6, cfr);
 845
 846         T finvr = fr * rinv2;
 847         T fx    = finvr * dr[XX];
 848         T fy    = finvr * dr[YY];
 849         T fz    = finvr * dr[ZZ];
 850
 851         /* Add the pair forces to the force array.
 852          * Note that here we might add multiple force components for some atoms
 853          * due to the SIMD padding. But the extra force components are zero.
 854          */
 855         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ai, fx, fy, fz);
 856         transposeScatterDecrU<4>(reinterpret_cast<real*>(f), aj, fx, fy, fz);
 857     }
 858 }
 859
 860 /*! \brief Calculate all listed pair interactions */
 861 void do_pairs(int                           ftype,
 862               int                           nbonds,
 863               const t_iatom                 iatoms[],
 864               const t_iparams               iparams[],
 865               const rvec                    x[],
 866               rvec4                         f[],
 867               rvec                          fshift[],
 868               const struct t_pbc*           pbc,
 869               const real*                   lambda,
 870               real*                         dvdl,
 871               gmx::ArrayRef<real>           chargeA,
 872               gmx::ArrayRef<real>           chargeB,
 873               gmx::ArrayRef<bool>           atomIsPerturbed,
 874               gmx::ArrayRef<unsigned short> cENER,
 875               const int                     numEnergyGroups,
 876               const t_forcerec*             fr,
 877               const bool                    havePerturbedInteractions,
 878               const gmx::StepWorkload&      stepWork,
 879               gmx_grppairener_t*            grppener,
 880               int*                          global_atom_index)
 881 {
 882     if (ftype == F_LJ14 && fr->ic->vdwtype != VanDerWaalsType::User && !EEL_USER(fr->ic->eeltype)
 883         && !havePerturbedInteractions && (!stepWork.computeVirial && !stepWork.computeEnergy))
 884     {
 885         /* We use a fast code-path for plain LJ 1-4 without FEP.
 886          *
 887          * TODO: Add support for energies (straightforward) and virial
 888          * in the SIMD template. For the virial it's inconvenient to store
 889          * the force sums for the shifts and we should directly calculate
 890          * and sum the virial for the shifts. But we should do this
 891          * at once for the angles and dihedrals as well.
 892          */
 893 #if GMX_SIMD_HAVE_REAL
 894         if (fr->use_simd_kernels)
 895         {
 896             alignas(GMX_SIMD_ALIGNMENT) real pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
 897             set_pbc_simd(pbc, pbc_simd);
 898
 899             do_pairs_simple<SimdReal, GMX_SIMD_REAL_WIDTH, const real*>(
 900                     nbonds, iatoms, iparams, x, f, pbc_simd, chargeA, fr->ic->epsfac * fr->fudgeQQ);
 901         }
 902         else
 903 #endif
 904         {
 905             /* This construct is needed because pbc_dx_aiuc doesn't accept pbc=NULL */
 906             t_pbc        pbc_no;
 907             const t_pbc* pbc_nonnull;
 908
 909             if (pbc != nullptr)
 910             {
 911                 pbc_nonnull = pbc;
 912             }
 913             else
 914             {
 915                 set_pbc(&pbc_no, PbcType::No, nullptr);
 916                 pbc_nonnull = &pbc_no;
 917             }
 918
 919             do_pairs_simple<real, 1, const t_pbc*>(
 920                     nbonds, iatoms, iparams, x, f, pbc_nonnull, chargeA, fr->ic->epsfac * fr->fudgeQQ);
 921         }
 922     }
 923     else if (stepWork.computeVirial)
 924     {
 925         do_pairs_general<BondedKernelFlavor::ForcesAndVirialAndEnergy>(ftype,
 926                                                                        nbonds,
 927                                                                        iatoms,
 928                                                                        iparams,
 929                                                                        x,
 930                                                                        f,
 931                                                                        fshift,
 932                                                                        pbc,
 933                                                                        lambda,
 934                                                                        dvdl,
 935                                                                        chargeA,
 936                                                                        chargeB,
 937                                                                        atomIsPerturbed,
 938                                                                        cENER,
 939                                                                        numEnergyGroups,
 940                                                                        fr,
 941                                                                        grppener,
 942                                                                        global_atom_index);
 943     }
 944     else
 945     {
 946         do_pairs_general<BondedKernelFlavor::ForcesAndEnergy>(ftype,
 947                                                               nbonds,
 948                                                               iatoms,
 949                                                               iparams,
 950                                                               x,
 951                                                               f,
 952                                                               fshift,
 953                                                               pbc,
 954                                                               lambda,
 955                                                               dvdl,
 956                                                               chargeA,
 957                                                               chargeB,
 958                                                               atomIsPerturbed,
 959                                                               cENER,
 960                                                               numEnergyGroups,
 961                                                               fr,
 962                                                               grppener,
 963                                                               global_atom_index);
 964     }
 965 }