src/gromacs/listed_forces/pairs.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
   5  * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
   6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   7  * and including many others, as listed in the AUTHORS file in the
   8  * top-level source directory and at http://www.gromacs.org.
   9  *
  10  * GROMACS is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation; either version 2.1
  13  * of the License, or (at your option) any later version.
  14  *
  15  * GROMACS is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with GROMACS; if not, see
  22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  24  *
  25  * If you want to redistribute modifications to GROMACS, please
  26  * consider that scientific software is very special. Version
  27  * control is crucial - bugs must be traceable. We will be happy to
  28  * consider code for inclusion in the official distribution, but
  29  * derived work must not be called official GROMACS. Details are found
  30  * in the README & COPYING files - if they are missing, get the
  31  * official version at http://www.gromacs.org.
  32  *
  33  * To help us fund GROMACS development, we humbly ask that you cite
  34  * the research papers on the package. Check out http://www.gromacs.org.
  35  */
  36 /*! \internal \file
  37  *
  38  * \brief This file defines functions for "pair" interactions
  39  * (i.e. listed non-bonded interactions, e.g. 1-4 interactions)
  40  *
  41  * \author Mark Abraham <mark.j.abraham@gmail.com>
  42  *
  43  * \ingroup module_listed_forces
  44  */
  45 #include "gmxpre.h"
  46
  47 #include "pairs.h"
  48
  49 #include <cmath>
  50
  51 #include "gromacs/listed_forces/bonded.h"
  52 #include "gromacs/math/functions.h"
  53 #include "gromacs/math/vec.h"
  54 #include "gromacs/mdtypes/forcerec.h"
  55 #include "gromacs/mdtypes/group.h"
  56 #include "gromacs/mdtypes/interaction_const.h"
  57 #include "gromacs/mdtypes/md_enums.h"
  58 #include "gromacs/mdtypes/nblist.h"
  59 #include "gromacs/mdtypes/simulation_workload.h"
  60 #include "gromacs/pbcutil/ishift.h"
  61 #include "gromacs/pbcutil/pbc.h"
  62 #include "gromacs/pbcutil/pbc_simd.h"
  63 #include "gromacs/simd/simd.h"
  64 #include "gromacs/simd/simd_math.h"
  65 #include "gromacs/simd/vector_operations.h"
  66 #include "gromacs/tables/forcetable.h"
  67 #include "gromacs/utility/basedefinitions.h"
  68 #include "gromacs/utility/fatalerror.h"
  69 #include "gromacs/utility/gmxassert.h"
  70
  71 #include "listed_internal.h"
  72
  73 using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
  74
  75 /*! \brief Issue a warning if a listed interaction is beyond a table limit */
  76 static void warning_rlimit(const rvec* x, int ai, int aj, int* global_atom_index, real r, real rlimit)
  77 {
  78     gmx_warning(
  79             "Listed nonbonded interaction between particles %d and %d\n"
  80             "at distance %.3f which is larger than the table limit %.3f nm.\n\n"
  81             "This is likely either a 1,4 interaction, or a listed interaction inside\n"
  82             "a smaller molecule you are decoupling during a free energy calculation.\n"
  83             "Since interactions at distances beyond the table cannot be computed,\n"
  84             "they are skipped until they are inside the table limit again. You will\n"
  85             "only see this message once, even if it occurs for several interactions.\n\n"
  86             "IMPORTANT: This should not happen in a stable simulation, so there is\n"
  87             "probably something wrong with your system. Only change the table-extension\n"
  88             "distance in the mdp file if you are really sure that is the reason.\n",
  89             glatnr(global_atom_index, ai),
  90             glatnr(global_atom_index, aj),
  91             r,
  92             rlimit);
  93
  94     if (debug)
  95     {
  96         fprintf(debug,
  97                 "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. "
  98                 "Ignored\n",
  99                 x[ai][XX],
 100                 x[ai][YY],
 101                 x[ai][ZZ],
 102                 x[aj][XX],
 103                 x[aj][YY],
 104                 x[aj][ZZ],
 105                 glatnr(global_atom_index, ai),
 106                 glatnr(global_atom_index, aj),
 107                 r);
 108     }
 109 }
 110
 111 /*! \brief Compute the energy and force for a single pair interaction */
 112 static real evaluate_single(real        r2,
 113                             real        tabscale,
 114                             const real* vftab,
 115                             real        tableStride,
 116                             real        qq,
 117                             real        c6,
 118                             real        c12,
 119                             real*       velec,
 120                             real*       vvdw)
 121 {
 122     real rinv, r, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VVe, FFe, VVd, FFd, VVr, FFr, fscal;
 123     int  ntab;
 124
 125     /* Do the tabulated interactions - first table lookup */
 126     rinv = gmx::invsqrt(r2);
 127     r    = r2 * rinv;
 128     rtab = r * tabscale;
 129     ntab = static_cast<int>(rtab);
 130     eps  = rtab - ntab;
 131     eps2 = eps * eps;
 132     ntab = static_cast<int>(tableStride * ntab);
 133     /* Electrostatics */
 134     Y     = vftab[ntab];
 135     F     = vftab[ntab + 1];
 136     Geps  = eps * vftab[ntab + 2];
 137     Heps2 = eps2 * vftab[ntab + 3];
 138     Fp    = F + Geps + Heps2;
 139     VVe   = Y + eps * Fp;
 140     FFe   = Fp + Geps + 2.0 * Heps2;
 141     /* Dispersion */
 142     Y     = vftab[ntab + 4];
 143     F     = vftab[ntab + 5];
 144     Geps  = eps * vftab[ntab + 6];
 145     Heps2 = eps2 * vftab[ntab + 7];
 146     Fp    = F + Geps + Heps2;
 147     VVd   = Y + eps * Fp;
 148     FFd   = Fp + Geps + 2.0 * Heps2;
 149     /* Repulsion */
 150     Y     = vftab[ntab + 8];
 151     F     = vftab[ntab + 9];
 152     Geps  = eps * vftab[ntab + 10];
 153     Heps2 = eps2 * vftab[ntab + 11];
 154     Fp    = F + Geps + Heps2;
 155     VVr   = Y + eps * Fp;
 156     FFr   = Fp + Geps + 2.0 * Heps2;
 157
 158     *velec = qq * VVe;
 159     *vvdw  = c6 * VVd + c12 * VVr;
 160
 161     fscal = -(qq * FFe + c6 * FFd + c12 * FFr) * tabscale * rinv;
 162
 163     return fscal;
 164 }
 165
 166 static inline real sixthRoot(const real r)
 167 {
 168     return gmx::invsqrt(std::cbrt(r));
 169 }
 170
 171 /*! \brief Compute the energy and force for a single pair interaction under FEP */
 172 template<KernelSoftcoreType softcoreType>
 173 static real free_energy_evaluate_single(real                                           r2,
 174                                         real                                           rCoulCutoff,
 175                                         const interaction_const_t::SoftCoreParameters& scParams,
 176                                         real                                           tabscale,
 177                                         const real*                                    vftab,
 178                                         real                                           tableStride,
 179                                         real                                           qqA,
 180                                         real                                           c6A,
 181                                         real                                           c12A,
 182                                         real                                           qqB,
 183                                         real                                           c6B,
 184                                         real                                           c12B,
 185                                         real                                           facel,
 186                                         const real                                     LFC[2],
 187                                         const real                                     LFV[2],
 188                                         const real                                     DLF[2],
 189                                         const real                                     lfac_coul[2],
 190                                         const real                                     lfac_vdw[2],
 191                                         const real dlfac_coul[2],
 192                                         const real dlfac_vdw[2],
 193                                         real*      velectot,
 194                                         real*      vvdwtot,
 195                                         real*      dvdl)
 196 {
 197     real       rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VV, FF, fscal;
 198     real       qq[2], c6[2], c12[2], sigma6[2], sigma_pow[2];
 199     real       alpha_coul_eff, alpha_vdw_eff, dvdl_coul_sum, dvdl_vdw_sum;
 200     real       rpinv, r_coul, r_vdw, velecsum, vvdwsum;
 201     real       fscal_vdw[2], fscal_elec[2];
 202     real       velec[2], vvdw[2];
 203     real       dvdl_elec[2], dvdl_vdw[2];
 204     real       gapsysScaleLinpointCoul, gapsysScaleLinpointVdW, gapsysSigma6VdW[2];
 205     real       rQ, rLJ;
 206     real       scaleDvdlRCoul;
 207     int        i, ntab;
 208     const real half = 0.5_real;
 209     const real one  = 1.0_real;
 210     const real two  = 2.0_real;
 211
 212     qq[0]  = qqA;
 213     qq[1]  = qqB;
 214     c6[0]  = c6A;
 215     c6[1]  = c6B;
 216     c12[0] = c12A;
 217     c12[1] = c12B;
 218
 219     const real rpm2 = r2 * r2;   /* r4 */
 220     const real rp   = rpm2 * r2; /* r6 */
 221     const real r    = sqrt(r2);  /* r1 */
 222
 223     /* Loop over state A(0) and B(1) */
 224     for (i = 0; i < 2; i++)
 225     {
 226         if constexpr (softcoreType == KernelSoftcoreType::Beutler)
 227         {
 228             if ((c6[i] > 0) && (c12[i] > 0))
 229             {
 230                 /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
 231                  * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
 232                  */
 233                 sigma6[i] = half * c12[i] / c6[i];
 234                 if (sigma6[i] < scParams.sigma6Minimum) /* for disappearing coul and vdw with soft core at the same time */
 235                 {
 236                     sigma6[i] = scParams.sigma6Minimum;
 237                 }
 238             }
 239             else
 240             {
 241                 sigma6[i] = scParams.sigma6WithInvalidSigma;
 242             }
 243             sigma_pow[i] = sigma6[i];
 244         }
 245         // NOLINTNEXTLINE(readability-misleading-indentation) remove when clang-tidy-13 is required
 246         if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
 247         {
 248             if ((c6[i] > 0) && (c12[i] > 0))
 249             {
 250                 /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
 251                  * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
 252                  */
 253                 gapsysSigma6VdW[i] = half * c12[i] / c6[i];
 254             }
 255             else
 256             {
 257                 gapsysSigma6VdW[i] = scParams.gapsysSigma6VdW;
 258             }
 259         }
 260     }
 261
 262     // NOLINTNEXTLINE(readability-misleading-indentation) remove when clang-tidy-13 is required
 263     if constexpr (softcoreType == KernelSoftcoreType::Beutler)
 264     {
 265         /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 266         if ((c12[0] > 0) && (c12[1] > 0))
 267         {
 268             alpha_vdw_eff  = 0;
 269             alpha_coul_eff = 0;
 270         }
 271         else
 272         {
 273             alpha_vdw_eff  = scParams.alphaVdw;
 274             alpha_coul_eff = scParams.alphaCoulomb;
 275         }
 276     }
 277     // NOLINTNEXTLINE(readability-misleading-indentation) remove when clang-tidy-13 is required
 278     if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
 279     {
 280         /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 281         if ((c12[0] > 0) && (c12[1] > 0))
 282         {
 283             gapsysScaleLinpointVdW  = 0;
 284             gapsysScaleLinpointCoul = 0;
 285         }
 286         else
 287         {
 288             gapsysScaleLinpointVdW  = scParams.gapsysScaleLinpointVdW;
 289             gapsysScaleLinpointCoul = scParams.gapsysScaleLinpointCoul;
 290         }
 291     }
 292
 293     /* Loop over A and B states again */
 294     // NOLINTNEXTLINE(readability-misleading-indentation) remove when clang-tidy-13 is required
 295     for (i = 0; i < 2; i++)
 296     {
 297         fscal_elec[i] = 0;
 298         fscal_vdw[i]  = 0;
 299         velec[i]      = 0;
 300         vvdw[i]       = 0;
 301         dvdl_elec[i]  = 0;
 302         dvdl_vdw[i]   = 0;
 303         rQ            = 0;
 304         rLJ           = 0;
 305
 306         /* Only spend time on A or B state if it is non-zero */
 307         if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
 308         {
 309             /* Coulomb */
 310             if constexpr (softcoreType == KernelSoftcoreType::Beutler)
 311             {
 312                 rpinv  = one / (alpha_coul_eff * lfac_coul[i] * sigma_pow[i] + rp);
 313                 r_coul = sixthRoot(rpinv);
 314             }
 315             else
 316             {
 317                 rpinv  = one / rp;
 318                 r_coul = r;
 319             }
 320
 321             // NOLINTNEXTLINE(readability-misleading-indentation) remove when clang-tidy-13 is required
 322             if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
 323             {
 324                 if ((facel != 0) && (LFC[i] < 1))
 325                 {
 326                     rQ = gmx::sixthroot(one - LFC[i]) * (one + std::fabs(qq[i] / facel));
 327                     rQ *= gapsysScaleLinpointCoul;
 328                 }
 329                 else
 330                 {
 331                     rQ = 0;
 332                 }
 333                 scaleDvdlRCoul = 1;
 334                 if (rQ > rCoulCutoff)
 335                 {
 336                     rQ             = rCoulCutoff;
 337                     scaleDvdlRCoul = 0;
 338                 }
 339             }
 340
 341             // NOLINTNEXTLINE(readability-misleading-indentation) remove when clang-tidy-13 is required
 342             if ((softcoreType == KernelSoftcoreType::Gapsys) && (r < rQ))
 343             {
 344                 real rInvQ    = one / rQ;
 345                 real constFac = qq[i] * rInvQ;
 346                 real linFac   = constFac * r * rInvQ;
 347                 real quadrFac = linFac * r * rInvQ;
 348
 349                 /* Computing Coulomb force and potential energy */
 350                 fscal_elec[i] = -2 * quadrFac + 3 * linFac;
 351                 fscal_elec[i] *= rpinv;
 352
 353                 velec[i] = quadrFac - 3 * (linFac - constFac);
 354
 355                 dvdl_elec[i] += scaleDvdlRCoul * DLF[i] * half * (LFC[i] / (1 - LFC[i]))
 356                                 * (quadrFac - 2 * linFac + constFac);
 357             }
 358             else // Beutler, resp. hardcore
 359             {
 360                 /* Electrostatics table lookup data */
 361                 rtab = r_coul * tabscale;
 362                 ntab = static_cast<int>(rtab);
 363                 eps  = rtab - ntab;
 364                 eps2 = eps * eps;
 365                 ntab = static_cast<int>(tableStride * ntab);
 366                 /* Electrostatics */
 367                 Y             = vftab[ntab];
 368                 F             = vftab[ntab + 1];
 369                 Geps          = eps * vftab[ntab + 2];
 370                 Heps2         = eps2 * vftab[ntab + 3];
 371                 Fp            = F + Geps + Heps2;
 372                 VV            = Y + eps * Fp;
 373                 FF            = Fp + Geps + two * Heps2;
 374                 velec[i]      = qq[i] * VV;
 375                 fscal_elec[i] = -qq[i] * FF * r_coul * rpinv * tabscale;
 376             }
 377
 378             /* Vdw */
 379             if constexpr (softcoreType == KernelSoftcoreType::Beutler)
 380             {
 381                 rpinv = one / (alpha_vdw_eff * lfac_vdw[i] * sigma_pow[i] + rp);
 382                 r_vdw = sixthRoot(rpinv);
 383             }
 384             else
 385             {
 386                 rpinv = one / rp;
 387                 r_vdw = r;
 388             }
 389
 390             if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
 391             {
 392                 constexpr real c_twentySixSeventh = 26.0_real / 7.0_real;
 393
 394                 if (LFV[i] < 1)
 395                 {
 396
 397                     rLJ = gmx::sixthroot(c_twentySixSeventh * gapsysSigma6VdW[i] * (one - LFV[i]));
 398                     rLJ *= gapsysScaleLinpointVdW;
 399                 }
 400                 else
 401                 {
 402                     rLJ = 0;
 403                 }
 404             }
 405
 406             // NOLINTNEXTLINE(readability-misleading-indentation) remove when clang-tidy-13 is required
 407             if ((softcoreType == KernelSoftcoreType::Gapsys) && (r < rLJ))
 408             {
 409                 // scaled values for c6 and c12
 410                 real c6s, c12s;
 411                 c6s  = c6[i] / 6.0_real;
 412                 c12s = c12[i] / 12.0_real;
 413
 414                 /* Temporary variables for inverted values */
 415                 real rInvLJ = one / rLJ;
 416                 real rInv14, rInv13, rInv12;
 417                 real rInv8, rInv7, rInv6;
 418                 rInv6 = rInvLJ * rInvLJ * rInvLJ;
 419                 rInv6 *= rInv6;
 420                 rInv7  = rInv6 * rInvLJ;
 421                 rInv8  = rInv7 * rInvLJ;
 422                 rInv14 = c12s * rInv7 * rInv7 * r2;
 423                 rInv13 = c12s * rInv7 * rInv6 * r;
 424                 rInv12 = c12s * rInv6 * rInv6;
 425                 rInv8 *= c6s * r2;
 426                 rInv7 *= c6s * r;
 427                 rInv6 *= c6s;
 428
 429                 /* Temporary variables for A and B */
 430                 real quadrFac, linearFac, constFac;
 431                 quadrFac  = 156 * rInv14 - 42 * rInv8;
 432                 linearFac = 168 * rInv13 - 48 * rInv7;
 433                 constFac  = 91 * rInv12 - 28 * rInv6;
 434
 435                 /* Computing LJ force and potential energy*/
 436                 fscal_vdw[i] = -quadrFac + linearFac;
 437                 fscal_vdw[i] *= rpinv;
 438
 439                 vvdw[i] = 0.5_real * quadrFac - linearFac + constFac;
 440
 441                 dvdl_vdw[i] += DLF[i] * 28 * (LFV[i] / (one - LFV[i]))
 442                                * ((6.5_real * rInv14 - rInv8) - (13 * rInv13 - 2 * rInv7)
 443                                   + (6.5_real * rInv12 - rInv6));
 444             }
 445             else // Beutler, resp. hardcore
 446             {
 447                 /* Vdw table lookup data */
 448                 rtab = r_vdw * tabscale;
 449                 ntab = static_cast<int>(rtab);
 450                 eps  = rtab - ntab;
 451                 eps2 = eps * eps;
 452                 ntab = 12 * ntab;
 453                 /* Dispersion */
 454                 Y            = vftab[ntab + 4];
 455                 F            = vftab[ntab + 5];
 456                 Geps         = eps * vftab[ntab + 6];
 457                 Heps2        = eps2 * vftab[ntab + 7];
 458                 Fp           = F + Geps + Heps2;
 459                 VV           = Y + eps * Fp;
 460                 FF           = Fp + Geps + two * Heps2;
 461                 vvdw[i]      = c6[i] * VV;
 462                 fscal_vdw[i] = -c6[i] * FF;
 463
 464                 /* Repulsion */
 465                 Y     = vftab[ntab + 8];
 466                 F     = vftab[ntab + 9];
 467                 Geps  = eps * vftab[ntab + 10];
 468                 Heps2 = eps2 * vftab[ntab + 11];
 469                 Fp    = F + Geps + Heps2;
 470                 VV    = Y + eps * Fp;
 471                 FF    = Fp + Geps + two * Heps2;
 472                 vvdw[i] += c12[i] * VV;
 473                 fscal_vdw[i] -= c12[i] * FF;
 474                 fscal_vdw[i] *= r_vdw * rpinv * tabscale;
 475             }
 476         }
 477     }
 478     /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
 479     /* Assemble A and B states */
 480     velecsum      = 0;
 481     vvdwsum       = 0;
 482     dvdl_coul_sum = 0;
 483     dvdl_vdw_sum  = 0;
 484     fscal         = 0;
 485     for (i = 0; i < 2; i++)
 486     {
 487         velecsum += LFC[i] * velec[i];
 488         vvdwsum += LFV[i] * vvdw[i];
 489
 490         fscal += (LFC[i] * fscal_elec[i] + LFV[i] * fscal_vdw[i]) * rpm2;
 491
 492         if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
 493         {
 494             dvdl_coul_sum += dvdl_elec[i];
 495             dvdl_vdw_sum += dvdl_vdw[i];
 496         }
 497         // NOLINTNEXTLINE(readability-misleading-indentation) remove when clang-tidy-13 is required
 498         dvdl_coul_sum += velec[i] * DLF[i];
 499         dvdl_vdw_sum += vvdw[i] * DLF[i];
 500         if constexpr (softcoreType == KernelSoftcoreType::Beutler)
 501         {
 502             dvdl_coul_sum += LFC[i] * alpha_coul_eff * dlfac_coul[i] * fscal_elec[i] * sigma_pow[i];
 503             dvdl_vdw_sum += LFV[i] * alpha_vdw_eff * dlfac_vdw[i] * fscal_vdw[i] * sigma_pow[i];
 504         }
 505     }
 506
 507     dvdl[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)] += dvdl_coul_sum;
 508     dvdl[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)] += dvdl_vdw_sum;
 509
 510     *velectot = velecsum;
 511     *vvdwtot  = vvdwsum;
 512
 513     return fscal;
 514 }
 515
 516 /*! \brief Calculate pair interactions, supports all types and conditions. */
 517 template<BondedKernelFlavor flavor>
 518 static real do_pairs_general(int                           ftype,
 519                              int                           nbonds,
 520                              const t_iatom                 iatoms[],
 521                              const t_iparams               iparams[],
 522                              const rvec                    x[],
 523                              rvec4                         f[],
 524                              rvec                          fshift[],
 525                              const struct t_pbc*           pbc,
 526                              const real*                   lambda,
 527                              real*                         dvdl,
 528                              gmx::ArrayRef<real>           chargeA,
 529                              gmx::ArrayRef<real>           chargeB,
 530                              gmx::ArrayRef<bool>           atomIsPerturbed,
 531                              gmx::ArrayRef<unsigned short> cENER,
 532                              int                           numEnergyGroups,
 533                              const t_forcerec*             fr,
 534                              gmx_grppairener_t*            grppener,
 535                              int*                          global_atom_index)
 536 {
 537     real            qq, c6, c12;
 538     rvec            dx;
 539     int             i, itype, ai, aj, gid;
 540     int             fshift_index;
 541     real            r2;
 542     real            fscal, velec, vvdw;
 543     real*           energygrp_elec;
 544     real*           energygrp_vdw;
 545     static gmx_bool warned_rlimit = FALSE;
 546     /* Free energy stuff */
 547     gmx_bool   bFreeEnergy;
 548     real       LFC[2], LFV[2], DLF[2], lfac_coul[2], lfac_vdw[2], dlfac_coul[2], dlfac_vdw[2];
 549     real       qqB, c6B, c12B;
 550     const real oneSixth = 1.0_real / 6.0_real;
 551
 552     switch (ftype)
 553     {
 554         case F_LJ14:
 555         case F_LJC14_Q:
 556             energygrp_elec = grppener->energyGroupPairTerms[NonBondedEnergyTerms::Coulomb14].data();
 557             energygrp_vdw  = grppener->energyGroupPairTerms[NonBondedEnergyTerms::LJ14].data();
 558             break;
 559         case F_LJC_PAIRS_NB:
 560             energygrp_elec = grppener->energyGroupPairTerms[NonBondedEnergyTerms::CoulombSR].data();
 561             energygrp_vdw  = grppener->energyGroupPairTerms[NonBondedEnergyTerms::LJSR].data();
 562             break;
 563         default:
 564             energygrp_elec = nullptr; /* Keep compiler happy */
 565             energygrp_vdw  = nullptr; /* Keep compiler happy */
 566             gmx_fatal(FARGS, "Unknown function type %d in do_nonbonded14", ftype);
 567     }
 568
 569     if (fr->efep != FreeEnergyPerturbationType::No)
 570     {
 571         /* Lambda factor for state A=1-lambda and B=lambda */
 572         LFC[0] = 1.0 - lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)];
 573         LFV[0] = 1.0 - lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)];
 574         LFC[1] = lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)];
 575         LFV[1] = lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)];
 576
 577         /*derivative of the lambda factor for state A and B */
 578         DLF[0] = -1;
 579         DLF[1] = 1;
 580
 581         GMX_ASSERT(fr->ic->softCoreParameters, "We need soft-core parameters");
 582         const auto& scParams = *fr->ic->softCoreParameters;
 583
 584         for (i = 0; i < 2; i++)
 585         {
 586             lfac_coul[i] = (scParams.lambdaPower == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
 587             dlfac_coul[i] = DLF[i] * scParams.lambdaPower * oneSixth
 588                             * (scParams.lambdaPower == 2 ? (1 - LFC[i]) : 1);
 589             lfac_vdw[i]  = (scParams.lambdaPower == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
 590             dlfac_vdw[i] = DLF[i] * scParams.lambdaPower * oneSixth
 591                            * (scParams.lambdaPower == 2 ? (1 - LFV[i]) : 1);
 592         }
 593     }
 594
 595     /* TODO This code depends on the logic in tables.c that constructs
 596        the table layout, which should be made explicit in future
 597        cleanup. */
 598     GMX_ASSERT(
 599             etiNR == 3,
 600             "Pair-interaction code that uses GROMACS interaction tables supports exactly 3 tables");
 601     GMX_ASSERT(
 602             fr->pairsTable->interaction == TableInteraction::ElectrostaticVdwRepulsionVdwDispersion,
 603             "Pair interaction kernels need a table with Coulomb, repulsion and dispersion entries");
 604
 605     const real epsfac = fr->ic->epsfac;
 606
 607     bFreeEnergy = FALSE;
 608     for (i = 0; (i < nbonds);)
 609     {
 610         itype = iatoms[i++];
 611         ai    = iatoms[i++];
 612         aj    = iatoms[i++];
 613         gid   = GID(cENER[ai], cENER[aj], numEnergyGroups);
 614
 615         /* Get parameters */
 616         switch (ftype)
 617         {
 618             case F_LJ14:
 619                 bFreeEnergy =
 620                         (fr->efep != FreeEnergyPerturbationType::No
 621                          && ((!atomIsPerturbed.empty() && (atomIsPerturbed[ai] || atomIsPerturbed[aj]))
 622                              || iparams[itype].lj14.c6A != iparams[itype].lj14.c6B
 623                              || iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
 624                 qq  = chargeA[ai] * chargeA[aj] * epsfac * fr->fudgeQQ;
 625                 c6  = iparams[itype].lj14.c6A;
 626                 c12 = iparams[itype].lj14.c12A;
 627                 break;
 628             case F_LJC14_Q:
 629                 qq = iparams[itype].ljc14.qi * iparams[itype].ljc14.qj * epsfac
 630                      * iparams[itype].ljc14.fqq;
 631                 c6  = iparams[itype].ljc14.c6;
 632                 c12 = iparams[itype].ljc14.c12;
 633                 break;
 634             case F_LJC_PAIRS_NB:
 635                 qq  = iparams[itype].ljcnb.qi * iparams[itype].ljcnb.qj * epsfac;
 636                 c6  = iparams[itype].ljcnb.c6;
 637                 c12 = iparams[itype].ljcnb.c12;
 638                 break;
 639             default:
 640                 /* Cannot happen since we called gmx_fatal() above in this case */
 641                 qq = c6 = c12 = 0; /* Keep compiler happy */
 642                 break;
 643         }
 644
 645         /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
 646          * included in the general nfbp array now. This means the tables are scaled down by the
 647          * same factor, so when we use the original c6/c12 parameters from iparams[] they must
 648          * be scaled up.
 649          */
 650         c6 *= 6.0;
 651         c12 *= 12.0;
 652
 653         /* Do we need to apply full periodic boundary conditions? */
 654         if (fr->bMolPBC)
 655         {
 656             fshift_index = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
 657         }
 658         else
 659         {
 660             fshift_index = c_centralShiftIndex;
 661             rvec_sub(x[ai], x[aj], dx);
 662         }
 663         r2 = norm2(dx);
 664
 665         if (r2 >= fr->pairsTable->interactionRange * fr->pairsTable->interactionRange)
 666         {
 667             /* This check isn't race free. But it doesn't matter because if a race occurs the only
 668              * disadvantage is that the warning is printed twice */
 669             if (!warned_rlimit)
 670             {
 671                 warning_rlimit(x, ai, aj, global_atom_index, sqrt(r2), fr->pairsTable->interactionRange);
 672                 warned_rlimit = TRUE;
 673             }
 674             continue;
 675         }
 676
 677         if (bFreeEnergy)
 678         {
 679             /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
 680             qqB  = chargeB[ai] * chargeB[aj] * epsfac * fr->fudgeQQ;
 681             c6B  = iparams[itype].lj14.c6B * 6.0;
 682             c12B = iparams[itype].lj14.c12B * 12.0;
 683
 684             const auto& scParams = *fr->ic->softCoreParameters;
 685             if (scParams.softcoreType == SoftcoreType::Beutler)
 686             {
 687                 if (scParams.alphaCoulomb == 0 && scParams.alphaVdw == 0)
 688                 {
 689                     fscal = free_energy_evaluate_single<KernelSoftcoreType::None>(
 690                             r2,
 691                             fr->ic->rcoulomb,
 692                             *fr->ic->softCoreParameters,
 693                             fr->pairsTable->scale,
 694                             fr->pairsTable->data.data(),
 695                             fr->pairsTable->stride,
 696                             qq,
 697                             c6,
 698                             c12,
 699                             qqB,
 700                             c6B,
 701                             c12B,
 702                             epsfac,
 703                             LFC,
 704                             LFV,
 705                             DLF,
 706                             lfac_coul,
 707                             lfac_vdw,
 708                             dlfac_coul,
 709                             dlfac_vdw,
 710                             &velec,
 711                             &vvdw,
 712                             dvdl);
 713                 }
 714                 else
 715                 {
 716                     fscal = free_energy_evaluate_single<KernelSoftcoreType::Beutler>(
 717                             r2,
 718                             fr->ic->rcoulomb,
 719                             *fr->ic->softCoreParameters,
 720                             fr->pairsTable->scale,
 721                             fr->pairsTable->data.data(),
 722                             fr->pairsTable->stride,
 723                             qq,
 724                             c6,
 725                             c12,
 726                             qqB,
 727                             c6B,
 728                             c12B,
 729                             epsfac,
 730                             LFC,
 731                             LFV,
 732                             DLF,
 733                             lfac_coul,
 734                             lfac_vdw,
 735                             dlfac_coul,
 736                             dlfac_vdw,
 737                             &velec,
 738                             &vvdw,
 739                             dvdl);
 740                 }
 741             }
 742             else // Gapsys
 743             {
 744                 if (scParams.gapsysScaleLinpointCoul == 0 && scParams.gapsysScaleLinpointVdW == 0)
 745                 {
 746                     fscal = free_energy_evaluate_single<KernelSoftcoreType::None>(
 747                             r2,
 748                             fr->ic->rcoulomb,
 749                             *fr->ic->softCoreParameters,
 750                             fr->pairsTable->scale,
 751                             fr->pairsTable->data.data(),
 752                             fr->pairsTable->stride,
 753                             qq,
 754                             c6,
 755                             c12,
 756                             qqB,
 757                             c6B,
 758                             c12B,
 759                             epsfac,
 760                             LFC,
 761                             LFV,
 762                             DLF,
 763                             lfac_coul,
 764                             lfac_vdw,
 765                             dlfac_coul,
 766                             dlfac_vdw,
 767                             &velec,
 768                             &vvdw,
 769                             dvdl);
 770                 }
 771                 else
 772                 {
 773                     fscal = free_energy_evaluate_single<KernelSoftcoreType::Gapsys>(
 774                             r2,
 775                             fr->ic->rcoulomb,
 776                             *fr->ic->softCoreParameters,
 777                             fr->pairsTable->scale,
 778                             fr->pairsTable->data.data(),
 779                             fr->pairsTable->stride,
 780                             qq,
 781                             c6,
 782                             c12,
 783                             qqB,
 784                             c6B,
 785                             c12B,
 786                             epsfac,
 787                             LFC,
 788                             LFV,
 789                             DLF,
 790                             lfac_coul,
 791                             lfac_vdw,
 792                             dlfac_coul,
 793                             dlfac_vdw,
 794                             &velec,
 795                             &vvdw,
 796                             dvdl);
 797                 }
 798             }
 799         }
 800         else
 801         {
 802             /* Evaluate tabulated interaction without free energy */
 803             fscal = evaluate_single(r2,
 804                                     fr->pairsTable->scale,
 805                                     fr->pairsTable->data.data(),
 806                                     fr->pairsTable->stride,
 807                                     qq,
 808                                     c6,
 809                                     c12,
 810                                     &velec,
 811                                     &vvdw);
 812         }
 813
 814         energygrp_elec[gid] += velec;
 815         energygrp_vdw[gid] += vvdw;
 816         svmul(fscal, dx, dx);
 817
 818         /* Add the forces */
 819         rvec_inc(f[ai], dx);
 820         rvec_dec(f[aj], dx);
 821
 822         if (computeVirial(flavor))
 823         {
 824             if (fshift_index != c_centralShiftIndex)
 825             {
 826                 rvec_inc(fshift[fshift_index], dx);
 827                 rvec_dec(fshift[c_centralShiftIndex], dx);
 828             }
 829         }
 830     }
 831     return 0.0;
 832 }
 833
 834 /*! \brief Calculate pairs, only for plain-LJ + plain Coulomb normal type.
 835  *
 836  * This function is templated for real/SimdReal and for optimization.
 837  */
 838 template<typename T, int pack_size, typename pbc_type>
 839 static void do_pairs_simple(int                 nbonds,
 840                             const t_iatom       iatoms[],
 841                             const t_iparams     iparams[],
 842                             const rvec          x[],
 843                             rvec4               f[],
 844                             const pbc_type      pbc,
 845                             gmx::ArrayRef<real> charge,
 846                             const real          scale_factor)
 847 {
 848     const int nfa1 = 1 + 2;
 849
 850     T six(6);
 851     T twelve(12);
 852     T ef(scale_factor);
 853
 854 #if GMX_SIMD_HAVE_REAL
 855     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[pack_size];
 856     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[pack_size];
 857     alignas(GMX_SIMD_ALIGNMENT) real         coeff[3 * pack_size];
 858 #else
 859     std::int32_t ai[pack_size];
 860     std::int32_t aj[pack_size];
 861     real         coeff[3 * pack_size];
 862 #endif
 863
 864     /* nbonds is #pairs*nfa1, here we step pack_size pairs */
 865     for (int i = 0; i < nbonds; i += pack_size * nfa1)
 866     {
 867         /* Collect atoms for pack_size pairs.
 868          * iu indexes into iatoms, we should not let iu go beyond nbonds.
 869          */
 870         int iu = i;
 871         for (int s = 0; s < pack_size; s++)
 872         {
 873             int itype = iatoms[iu];
 874             ai[s]     = iatoms[iu + 1];
 875             aj[s]     = iatoms[iu + 2];
 876
 877             if (i + s * nfa1 < nbonds)
 878             {
 879                 coeff[0 * pack_size + s] = iparams[itype].lj14.c6A;
 880                 coeff[1 * pack_size + s] = iparams[itype].lj14.c12A;
 881                 coeff[2 * pack_size + s] = charge[ai[s]] * charge[aj[s]];
 882
 883                 /* Avoid indexing the iatoms array out of bounds.
 884                  * We pad the coordinate indices with the last atom pair.
 885                  */
 886                 if (iu + nfa1 < nbonds)
 887                 {
 888                     iu += nfa1;
 889                 }
 890             }
 891             else
 892             {
 893                 /* Pad the coefficient arrays with zeros to get zero forces */
 894                 coeff[0 * pack_size + s] = 0;
 895                 coeff[1 * pack_size + s] = 0;
 896                 coeff[2 * pack_size + s] = 0;
 897             }
 898         }
 899
 900         /* Load the coordinates */
 901         T xi[DIM], xj[DIM];
 902         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ai, &xi[XX], &xi[YY], &xi[ZZ]);
 903         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), aj, &xj[XX], &xj[YY], &xj[ZZ]);
 904
 905         T c6  = load<T>(coeff + 0 * pack_size);
 906         T c12 = load<T>(coeff + 1 * pack_size);
 907         T qq  = load<T>(coeff + 2 * pack_size);
 908
 909         /* We could save these operations by storing 6*C6,12*C12 */
 910         c6  = six * c6;
 911         c12 = twelve * c12;
 912
 913         T dr[DIM];
 914         pbc_dx_aiuc(pbc, xi, xj, dr);
 915
 916         T rsq   = dr[XX] * dr[XX] + dr[YY] * dr[YY] + dr[ZZ] * dr[ZZ];
 917         T rinv  = gmx::invsqrt(rsq);
 918         T rinv2 = rinv * rinv;
 919         T rinv6 = rinv2 * rinv2 * rinv2;
 920
 921         /* Calculate the Coulomb force * r */
 922         T cfr = ef * qq * rinv;
 923
 924         /* Calculate the LJ force * r and add it to the Coulomb part */
 925         T fr = gmx::fma(fms(c12, rinv6, c6), rinv6, cfr);
 926
 927         T finvr = fr * rinv2;
 928         T fx    = finvr * dr[XX];
 929         T fy    = finvr * dr[YY];
 930         T fz    = finvr * dr[ZZ];
 931
 932         /* Add the pair forces to the force array.
 933          * Note that here we might add multiple force components for some atoms
 934          * due to the SIMD padding. But the extra force components are zero.
 935          */
 936         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ai, fx, fy, fz);
 937         transposeScatterDecrU<4>(reinterpret_cast<real*>(f), aj, fx, fy, fz);
 938     }
 939 }
 940
 941 /*! \brief Calculate all listed pair interactions */
 942 void do_pairs(int                           ftype,
 943               int                           nbonds,
 944               const t_iatom                 iatoms[],
 945               const t_iparams               iparams[],
 946               const rvec                    x[],
 947               rvec4                         f[],
 948               rvec                          fshift[],
 949               const struct t_pbc*           pbc,
 950               const real*                   lambda,
 951               real*                         dvdl,
 952               gmx::ArrayRef<real>           chargeA,
 953               gmx::ArrayRef<real>           chargeB,
 954               gmx::ArrayRef<bool>           atomIsPerturbed,
 955               gmx::ArrayRef<unsigned short> cENER,
 956               const int                     numEnergyGroups,
 957               const t_forcerec*             fr,
 958               const bool                    havePerturbedInteractions,
 959               const gmx::StepWorkload&      stepWork,
 960               gmx_grppairener_t*            grppener,
 961               int*                          global_atom_index)
 962 {
 963     if (ftype == F_LJ14 && fr->ic->vdwtype != VanDerWaalsType::User && !EEL_USER(fr->ic->eeltype)
 964         && !havePerturbedInteractions && (!stepWork.computeVirial && !stepWork.computeEnergy))
 965     {
 966         /* We use a fast code-path for plain LJ 1-4 without FEP.
 967          *
 968          * TODO: Add support for energies (straightforward) and virial
 969          * in the SIMD template. For the virial it's inconvenient to store
 970          * the force sums for the shifts and we should directly calculate
 971          * and sum the virial for the shifts. But we should do this
 972          * at once for the angles and dihedrals as well.
 973          */
 974 #if GMX_SIMD_HAVE_REAL
 975         if (fr->use_simd_kernels)
 976         {
 977             alignas(GMX_SIMD_ALIGNMENT) real pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
 978             set_pbc_simd(pbc, pbc_simd);
 979
 980             do_pairs_simple<SimdReal, GMX_SIMD_REAL_WIDTH, const real*>(
 981                     nbonds, iatoms, iparams, x, f, pbc_simd, chargeA, fr->ic->epsfac * fr->fudgeQQ);
 982         }
 983         else
 984 #endif
 985         {
 986             /* This construct is needed because pbc_dx_aiuc doesn't accept pbc=NULL */
 987             t_pbc        pbc_no;
 988             const t_pbc* pbc_nonnull;
 989
 990             if (pbc != nullptr)
 991             {
 992                 pbc_nonnull = pbc;
 993             }
 994             else
 995             {
 996                 set_pbc(&pbc_no, PbcType::No, nullptr);
 997                 pbc_nonnull = &pbc_no;
 998             }
 999
1000             do_pairs_simple<real, 1, const t_pbc*>(
1001                     nbonds, iatoms, iparams, x, f, pbc_nonnull, chargeA, fr->ic->epsfac * fr->fudgeQQ);
1002         }
1003     }
1004     else if (stepWork.computeVirial)
1005     {
1006         do_pairs_general<BondedKernelFlavor::ForcesAndVirialAndEnergy>(ftype,
1007                                                                        nbonds,
1008                                                                        iatoms,
1009                                                                        iparams,
1010                                                                        x,
1011                                                                        f,
1012                                                                        fshift,
1013                                                                        pbc,
1014                                                                        lambda,
1015                                                                        dvdl,
1016                                                                        chargeA,
1017                                                                        chargeB,
1018                                                                        atomIsPerturbed,
1019                                                                        cENER,
1020                                                                        numEnergyGroups,
1021                                                                        fr,
1022                                                                        grppener,
1023                                                                        global_atom_index);
1024     }
1025     else
1026     {
1027         do_pairs_general<BondedKernelFlavor::ForcesAndEnergy>(ftype,
1028                                                               nbonds,
1029                                                               iatoms,
1030                                                               iparams,
1031                                                               x,
1032                                                               f,
1033                                                               fshift,
1034                                                               pbc,
1035                                                               lambda,
1036                                                               dvdl,
1037                                                               chargeA,
1038                                                               chargeB,
1039                                                               atomIsPerturbed,
1040                                                               cENER,
1041                                                               numEnergyGroups,
1042                                                               fr,
1043                                                               grppener,
1044                                                               global_atom_index);
1045     }
1046 }