src/gromacs/listed_forces/pairs.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
   5  * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
   6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   7  * and including many others, as listed in the AUTHORS file in the
   8  * top-level source directory and at http://www.gromacs.org.
   9  *
  10  * GROMACS is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation; either version 2.1
  13  * of the License, or (at your option) any later version.
  14  *
  15  * GROMACS is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with GROMACS; if not, see
  22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  24  *
  25  * If you want to redistribute modifications to GROMACS, please
  26  * consider that scientific software is very special. Version
  27  * control is crucial - bugs must be traceable. We will be happy to
  28  * consider code for inclusion in the official distribution, but
  29  * derived work must not be called official GROMACS. Details are found
  30  * in the README & COPYING files - if they are missing, get the
  31  * official version at http://www.gromacs.org.
  32  *
  33  * To help us fund GROMACS development, we humbly ask that you cite
  34  * the research papers on the package. Check out http://www.gromacs.org.
  35  */
  36 /*! \internal \file
  37  *
  38  * \brief This file defines functions for "pair" interactions
  39  * (i.e. listed non-bonded interactions, e.g. 1-4 interactions)
  40  *
  41  * \author Mark Abraham <mark.j.abraham@gmail.com>
  42  *
  43  * \ingroup module_listed_forces
  44  */
  45 #include "gmxpre.h"
  46
  47 #include "pairs.h"
  48
  49 #include <cmath>
  50
  51 #include "gromacs/listed_forces/bonded.h"
  52 #include "gromacs/math/functions.h"
  53 #include "gromacs/math/vec.h"
  54 #include "gromacs/mdtypes/forcerec.h"
  55 #include "gromacs/mdtypes/group.h"
  56 #include "gromacs/mdtypes/interaction_const.h"
  57 #include "gromacs/mdtypes/md_enums.h"
  58 #include "gromacs/mdtypes/nblist.h"
  59 #include "gromacs/mdtypes/simulation_workload.h"
  60 #include "gromacs/pbcutil/ishift.h"
  61 #include "gromacs/pbcutil/pbc.h"
  62 #include "gromacs/pbcutil/pbc_simd.h"
  63 #include "gromacs/simd/simd.h"
  64 #include "gromacs/simd/simd_math.h"
  65 #include "gromacs/simd/vector_operations.h"
  66 #include "gromacs/tables/forcetable.h"
  67 #include "gromacs/utility/basedefinitions.h"
  68 #include "gromacs/utility/fatalerror.h"
  69 #include "gromacs/utility/gmxassert.h"
  70
  71 #include "listed_internal.h"
  72
  73 using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
  74
  75 /*! \brief Issue a warning if a listed interaction is beyond a table limit */
  76 static void warning_rlimit(const rvec* x, int ai, int aj, int* global_atom_index, real r, real rlimit)
  77 {
  78     gmx_warning(
  79             "Listed nonbonded interaction between particles %d and %d\n"
  80             "at distance %.3f which is larger than the table limit %.3f nm.\n\n"
  81             "This is likely either a 1,4 interaction, or a listed interaction inside\n"
  82             "a smaller molecule you are decoupling during a free energy calculation.\n"
  83             "Since interactions at distances beyond the table cannot be computed,\n"
  84             "they are skipped until they are inside the table limit again. You will\n"
  85             "only see this message once, even if it occurs for several interactions.\n\n"
  86             "IMPORTANT: This should not happen in a stable simulation, so there is\n"
  87             "probably something wrong with your system. Only change the table-extension\n"
  88             "distance in the mdp file if you are really sure that is the reason.\n",
  89             glatnr(global_atom_index, ai),
  90             glatnr(global_atom_index, aj),
  91             r,
  92             rlimit);
  93
  94     if (debug)
  95     {
  96         fprintf(debug,
  97                 "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. "
  98                 "Ignored\n",
  99                 x[ai][XX],
 100                 x[ai][YY],
 101                 x[ai][ZZ],
 102                 x[aj][XX],
 103                 x[aj][YY],
 104                 x[aj][ZZ],
 105                 glatnr(global_atom_index, ai),
 106                 glatnr(global_atom_index, aj),
 107                 r);
 108     }
 109 }
 110
 111 /*! \brief Compute the energy and force for a single pair interaction */
 112 static real evaluate_single(real        r2,
 113                             real        tabscale,
 114                             const real* vftab,
 115                             real        tableStride,
 116                             real        qq,
 117                             real        c6,
 118                             real        c12,
 119                             real*       velec,
 120                             real*       vvdw)
 121 {
 122     real rinv, r, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VVe, FFe, VVd, FFd, VVr, FFr, fscal;
 123     int  ntab;
 124
 125     /* Do the tabulated interactions - first table lookup */
 126     rinv = gmx::invsqrt(r2);
 127     r    = r2 * rinv;
 128     rtab = r * tabscale;
 129     ntab = static_cast<int>(rtab);
 130     eps  = rtab - ntab;
 131     eps2 = eps * eps;
 132     ntab = static_cast<int>(tableStride * ntab);
 133     /* Electrostatics */
 134     Y     = vftab[ntab];
 135     F     = vftab[ntab + 1];
 136     Geps  = eps * vftab[ntab + 2];
 137     Heps2 = eps2 * vftab[ntab + 3];
 138     Fp    = F + Geps + Heps2;
 139     VVe   = Y + eps * Fp;
 140     FFe   = Fp + Geps + 2.0 * Heps2;
 141     /* Dispersion */
 142     Y     = vftab[ntab + 4];
 143     F     = vftab[ntab + 5];
 144     Geps  = eps * vftab[ntab + 6];
 145     Heps2 = eps2 * vftab[ntab + 7];
 146     Fp    = F + Geps + Heps2;
 147     VVd   = Y + eps * Fp;
 148     FFd   = Fp + Geps + 2.0 * Heps2;
 149     /* Repulsion */
 150     Y     = vftab[ntab + 8];
 151     F     = vftab[ntab + 9];
 152     Geps  = eps * vftab[ntab + 10];
 153     Heps2 = eps2 * vftab[ntab + 11];
 154     Fp    = F + Geps + Heps2;
 155     VVr   = Y + eps * Fp;
 156     FFr   = Fp + Geps + 2.0 * Heps2;
 157
 158     *velec = qq * VVe;
 159     *vvdw  = c6 * VVd + c12 * VVr;
 160
 161     fscal = -(qq * FFe + c6 * FFd + c12 * FFr) * tabscale * rinv;
 162
 163     return fscal;
 164 }
 165
 166 static inline real sixthRoot(const real r)
 167 {
 168     return gmx::invsqrt(std::cbrt(r));
 169 }
 170
 171 /*! \brief Compute the energy and force for a single pair interaction under FEP */
 172 template<KernelSoftcoreType softcoreType>
 173 static real free_energy_evaluate_single(real                                           r2,
 174                                         real                                           rCoulCutoff,
 175                                         const interaction_const_t::SoftCoreParameters& scParams,
 176                                         real                                           tabscale,
 177                                         const real*                                    vftab,
 178                                         real                                           tableStride,
 179                                         real                                           qqA,
 180                                         real                                           c6A,
 181                                         real                                           c12A,
 182                                         real                                           qqB,
 183                                         real                                           c6B,
 184                                         real                                           c12B,
 185                                         real                                           facel,
 186                                         const real                                     LFC[2],
 187                                         const real                                     LFV[2],
 188                                         const real                                     DLF[2],
 189                                         const real                                     lfac_coul[2],
 190                                         const real                                     lfac_vdw[2],
 191                                         const real dlfac_coul[2],
 192                                         const real dlfac_vdw[2],
 193                                         real*      velectot,
 194                                         real*      vvdwtot,
 195                                         real*      dvdl)
 196 {
 197     real       rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VV, FF, fscal;
 198     real       qq[2], c6[2], c12[2], sigma6[2], sigma_pow[2];
 199     real       alpha_coul_eff, alpha_vdw_eff, dvdl_coul_sum, dvdl_vdw_sum;
 200     real       rpinv, r_coul, r_vdw, velecsum, vvdwsum;
 201     real       fscal_vdw[2], fscal_elec[2];
 202     real       velec[2], vvdw[2];
 203     real       dvdl_elec[2], dvdl_vdw[2];
 204     real       gapsysScaleLinpointCoul, gapsysScaleLinpointVdW, gapsysSigma6VdW[2];
 205     real       rQ, rLJ;
 206     real       scaleDvdlRCoul;
 207     int        i, ntab;
 208     const real half = 0.5_real;
 209     const real one  = 1.0_real;
 210     const real two  = 2.0_real;
 211
 212     qq[0]  = qqA;
 213     qq[1]  = qqB;
 214     c6[0]  = c6A;
 215     c6[1]  = c6B;
 216     c12[0] = c12A;
 217     c12[1] = c12B;
 218
 219     const real rpm2 = r2 * r2;   /* r4 */
 220     const real rp   = rpm2 * r2; /* r6 */
 221     const real r    = sqrt(r2);  /* r1 */
 222
 223     /* Loop over state A(0) and B(1) */
 224     for (i = 0; i < 2; i++)
 225     {
 226         if constexpr (softcoreType == KernelSoftcoreType::Beutler)
 227         {
 228             if ((c6[i] > 0) && (c12[i] > 0))
 229             {
 230                 /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
 231                  * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
 232                  */
 233                 sigma6[i] = half * c12[i] / c6[i];
 234                 if (sigma6[i] < scParams.sigma6Minimum) /* for disappearing coul and vdw with soft core at the same time */
 235                 {
 236                     sigma6[i] = scParams.sigma6Minimum;
 237                 }
 238             }
 239             else
 240             {
 241                 sigma6[i] = scParams.sigma6WithInvalidSigma;
 242             }
 243             sigma_pow[i] = sigma6[i];
 244         }
 245         if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
 246         {
 247             if ((c6[i] > 0) && (c12[i] > 0))
 248             {
 249                 /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
 250                  * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
 251                  */
 252                 gapsysSigma6VdW[i] = half * c12[i] / c6[i];
 253             }
 254             else
 255             {
 256                 gapsysSigma6VdW[i] = scParams.gapsysSigma6VdW;
 257             }
 258         }
 259     }
 260
 261     if constexpr (softcoreType == KernelSoftcoreType::Beutler)
 262     {
 263         /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 264         if ((c12[0] > 0) && (c12[1] > 0))
 265         {
 266             alpha_vdw_eff  = 0;
 267             alpha_coul_eff = 0;
 268         }
 269         else
 270         {
 271             alpha_vdw_eff  = scParams.alphaVdw;
 272             alpha_coul_eff = scParams.alphaCoulomb;
 273         }
 274     }
 275     if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
 276     {
 277         /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 278         if ((c12[0] > 0) && (c12[1] > 0))
 279         {
 280             gapsysScaleLinpointVdW  = 0;
 281             gapsysScaleLinpointCoul = 0;
 282         }
 283         else
 284         {
 285             gapsysScaleLinpointVdW  = scParams.gapsysScaleLinpointVdW;
 286             gapsysScaleLinpointCoul = scParams.gapsysScaleLinpointCoul;
 287         }
 288     }
 289
 290     /* Loop over A and B states again */
 291     for (i = 0; i < 2; i++)
 292     {
 293         fscal_elec[i] = 0;
 294         fscal_vdw[i]  = 0;
 295         velec[i]      = 0;
 296         vvdw[i]       = 0;
 297         dvdl_elec[i]  = 0;
 298         dvdl_vdw[i]   = 0;
 299         rQ            = 0;
 300         rLJ           = 0;
 301
 302         /* Only spend time on A or B state if it is non-zero */
 303         if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
 304         {
 305             /* Coulomb */
 306             if constexpr (softcoreType == KernelSoftcoreType::Beutler)
 307             {
 308                 rpinv  = one / (alpha_coul_eff * lfac_coul[i] * sigma_pow[i] + rp);
 309                 r_coul = sixthRoot(rpinv);
 310             }
 311             else
 312             {
 313                 rpinv  = one / rp;
 314                 r_coul = r;
 315             }
 316
 317             if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
 318             {
 319                 if ((facel != 0) && (LFC[i] < 1))
 320                 {
 321                     rQ = gmx::sixthroot(one - LFC[i]) * (one + std::fabs(qq[i] / facel));
 322                     rQ *= gapsysScaleLinpointCoul;
 323                 }
 324                 else
 325                 {
 326                     rQ = 0;
 327                 }
 328                 scaleDvdlRCoul = 1;
 329                 if (rQ > rCoulCutoff)
 330                 {
 331                     rQ             = rCoulCutoff;
 332                     scaleDvdlRCoul = 0;
 333                 }
 334             }
 335
 336             if ((softcoreType == KernelSoftcoreType::Gapsys) && (r < rQ))
 337             {
 338                 real rInvQ    = one / rQ;
 339                 real constFac = qq[i] * rInvQ;
 340                 real linFac   = constFac * r * rInvQ;
 341                 real quadrFac = linFac * r * rInvQ;
 342
 343                 /* Computing Coulomb force and potential energy */
 344                 fscal_elec[i] = -2 * quadrFac + 3 * linFac;
 345                 fscal_elec[i] *= rpinv;
 346
 347                 velec[i] = quadrFac - 3 * (linFac - constFac);
 348
 349                 dvdl_elec[i] += scaleDvdlRCoul * DLF[i] * half * (LFC[i] / (1 - LFC[i]))
 350                                 * (quadrFac - 2 * linFac + constFac);
 351             }
 352             else // Beutler, resp. hardcore
 353             {
 354                 /* Electrostatics table lookup data */
 355                 rtab = r_coul * tabscale;
 356                 ntab = static_cast<int>(rtab);
 357                 eps  = rtab - ntab;
 358                 eps2 = eps * eps;
 359                 ntab = static_cast<int>(tableStride * ntab);
 360                 /* Electrostatics */
 361                 Y             = vftab[ntab];
 362                 F             = vftab[ntab + 1];
 363                 Geps          = eps * vftab[ntab + 2];
 364                 Heps2         = eps2 * vftab[ntab + 3];
 365                 Fp            = F + Geps + Heps2;
 366                 VV            = Y + eps * Fp;
 367                 FF            = Fp + Geps + two * Heps2;
 368                 velec[i]      = qq[i] * VV;
 369                 fscal_elec[i] = -qq[i] * FF * r_coul * rpinv * tabscale;
 370             }
 371
 372             /* Vdw */
 373             if constexpr (softcoreType == KernelSoftcoreType::Beutler)
 374             {
 375                 rpinv = one / (alpha_vdw_eff * lfac_vdw[i] * sigma_pow[i] + rp);
 376                 r_vdw = sixthRoot(rpinv);
 377             }
 378             else
 379             {
 380                 rpinv = one / rp;
 381                 r_vdw = r;
 382             }
 383
 384             if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
 385             {
 386                 constexpr real c_twentySixSeventh = 26.0_real / 7.0_real;
 387
 388                 if (LFV[i] < 1)
 389                 {
 390
 391                     rLJ = gmx::sixthroot(c_twentySixSeventh * gapsysSigma6VdW[i] * (one - LFV[i]));
 392                     rLJ *= gapsysScaleLinpointVdW;
 393                 }
 394                 else
 395                 {
 396                     rLJ = 0;
 397                 }
 398             }
 399
 400             if ((softcoreType == KernelSoftcoreType::Gapsys) && (r < rLJ))
 401             {
 402                 // scaled values for c6 and c12
 403                 real c6s, c12s;
 404                 c6s  = c6[i] / 6.0_real;
 405                 c12s = c12[i] / 12.0_real;
 406
 407                 /* Temporary variables for inverted values */
 408                 real rInvLJ = one / rLJ;
 409                 real rInv14, rInv13, rInv12;
 410                 real rInv8, rInv7, rInv6;
 411                 rInv6 = rInvLJ * rInvLJ * rInvLJ;
 412                 rInv6 *= rInv6;
 413                 rInv7  = rInv6 * rInvLJ;
 414                 rInv8  = rInv7 * rInvLJ;
 415                 rInv14 = c12s * rInv7 * rInv7 * r2;
 416                 rInv13 = c12s * rInv7 * rInv6 * r;
 417                 rInv12 = c12s * rInv6 * rInv6;
 418                 rInv8 *= c6s * r2;
 419                 rInv7 *= c6s * r;
 420                 rInv6 *= c6s;
 421
 422                 /* Temporary variables for A and B */
 423                 real quadrFac, linearFac, constFac;
 424                 quadrFac  = 156 * rInv14 - 42 * rInv8;
 425                 linearFac = 168 * rInv13 - 48 * rInv7;
 426                 constFac  = 91 * rInv12 - 28 * rInv6;
 427
 428                 /* Computing LJ force and potential energy*/
 429                 fscal_vdw[i] = -quadrFac + linearFac;
 430                 fscal_vdw[i] *= rpinv;
 431
 432                 vvdw[i] = 0.5_real * quadrFac - linearFac + constFac;
 433
 434                 dvdl_vdw[i] += DLF[i] * 28 * (LFV[i] / (one - LFV[i]))
 435                                * ((6.5_real * rInv14 - rInv8) - (13 * rInv13 - 2 * rInv7)
 436                                   + (6.5_real * rInv12 - rInv6));
 437             }
 438             else // Beutler, resp. hardcore
 439             {
 440                 /* Vdw table lookup data */
 441                 rtab = r_vdw * tabscale;
 442                 ntab = static_cast<int>(rtab);
 443                 eps  = rtab - ntab;
 444                 eps2 = eps * eps;
 445                 ntab = 12 * ntab;
 446                 /* Dispersion */
 447                 Y            = vftab[ntab + 4];
 448                 F            = vftab[ntab + 5];
 449                 Geps         = eps * vftab[ntab + 6];
 450                 Heps2        = eps2 * vftab[ntab + 7];
 451                 Fp           = F + Geps + Heps2;
 452                 VV           = Y + eps * Fp;
 453                 FF           = Fp + Geps + two * Heps2;
 454                 vvdw[i]      = c6[i] * VV;
 455                 fscal_vdw[i] = -c6[i] * FF;
 456
 457                 /* Repulsion */
 458                 Y     = vftab[ntab + 8];
 459                 F     = vftab[ntab + 9];
 460                 Geps  = eps * vftab[ntab + 10];
 461                 Heps2 = eps2 * vftab[ntab + 11];
 462                 Fp    = F + Geps + Heps2;
 463                 VV    = Y + eps * Fp;
 464                 FF    = Fp + Geps + two * Heps2;
 465                 vvdw[i] += c12[i] * VV;
 466                 fscal_vdw[i] -= c12[i] * FF;
 467                 fscal_vdw[i] *= r_vdw * rpinv * tabscale;
 468             }
 469         }
 470     }
 471     /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
 472     /* Assemble A and B states */
 473     velecsum      = 0;
 474     vvdwsum       = 0;
 475     dvdl_coul_sum = 0;
 476     dvdl_vdw_sum  = 0;
 477     fscal         = 0;
 478     for (i = 0; i < 2; i++)
 479     {
 480         velecsum += LFC[i] * velec[i];
 481         vvdwsum += LFV[i] * vvdw[i];
 482
 483         fscal += (LFC[i] * fscal_elec[i] + LFV[i] * fscal_vdw[i]) * rpm2;
 484
 485         if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
 486         {
 487             dvdl_coul_sum += dvdl_elec[i];
 488             dvdl_vdw_sum += dvdl_vdw[i];
 489         }
 490         dvdl_coul_sum += velec[i] * DLF[i];
 491         dvdl_vdw_sum += vvdw[i] * DLF[i];
 492         if constexpr (softcoreType == KernelSoftcoreType::Beutler)
 493         {
 494             dvdl_coul_sum += LFC[i] * alpha_coul_eff * dlfac_coul[i] * fscal_elec[i] * sigma_pow[i];
 495             dvdl_vdw_sum += LFV[i] * alpha_vdw_eff * dlfac_vdw[i] * fscal_vdw[i] * sigma_pow[i];
 496         }
 497     }
 498
 499     dvdl[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)] += dvdl_coul_sum;
 500     dvdl[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)] += dvdl_vdw_sum;
 501
 502     *velectot = velecsum;
 503     *vvdwtot  = vvdwsum;
 504
 505     return fscal;
 506 }
 507
 508 /*! \brief Calculate pair interactions, supports all types and conditions. */
 509 template<BondedKernelFlavor flavor>
 510 static real do_pairs_general(int                           ftype,
 511                              int                           nbonds,
 512                              const t_iatom                 iatoms[],
 513                              const t_iparams               iparams[],
 514                              const rvec                    x[],
 515                              rvec4                         f[],
 516                              rvec                          fshift[],
 517                              const struct t_pbc*           pbc,
 518                              const real*                   lambda,
 519                              real*                         dvdl,
 520                              gmx::ArrayRef<real>           chargeA,
 521                              gmx::ArrayRef<real>           chargeB,
 522                              gmx::ArrayRef<bool>           atomIsPerturbed,
 523                              gmx::ArrayRef<unsigned short> cENER,
 524                              int                           numEnergyGroups,
 525                              const t_forcerec*             fr,
 526                              gmx_grppairener_t*            grppener,
 527                              int*                          global_atom_index)
 528 {
 529     real            qq, c6, c12;
 530     rvec            dx;
 531     int             i, itype, ai, aj, gid;
 532     int             fshift_index;
 533     real            r2;
 534     real            fscal, velec, vvdw;
 535     real*           energygrp_elec;
 536     real*           energygrp_vdw;
 537     static gmx_bool warned_rlimit = FALSE;
 538     /* Free energy stuff */
 539     gmx_bool   bFreeEnergy;
 540     real       LFC[2], LFV[2], DLF[2], lfac_coul[2], lfac_vdw[2], dlfac_coul[2], dlfac_vdw[2];
 541     real       qqB, c6B, c12B;
 542     const real oneSixth = 1.0_real / 6.0_real;
 543
 544     switch (ftype)
 545     {
 546         case F_LJ14:
 547         case F_LJC14_Q:
 548             energygrp_elec = grppener->energyGroupPairTerms[NonBondedEnergyTerms::Coulomb14].data();
 549             energygrp_vdw  = grppener->energyGroupPairTerms[NonBondedEnergyTerms::LJ14].data();
 550             break;
 551         case F_LJC_PAIRS_NB:
 552             energygrp_elec = grppener->energyGroupPairTerms[NonBondedEnergyTerms::CoulombSR].data();
 553             energygrp_vdw  = grppener->energyGroupPairTerms[NonBondedEnergyTerms::LJSR].data();
 554             break;
 555         default:
 556             energygrp_elec = nullptr; /* Keep compiler happy */
 557             energygrp_vdw  = nullptr; /* Keep compiler happy */
 558             gmx_fatal(FARGS, "Unknown function type %d in do_nonbonded14", ftype);
 559     }
 560
 561     if (fr->efep != FreeEnergyPerturbationType::No)
 562     {
 563         /* Lambda factor for state A=1-lambda and B=lambda */
 564         LFC[0] = 1.0 - lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)];
 565         LFV[0] = 1.0 - lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)];
 566         LFC[1] = lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)];
 567         LFV[1] = lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)];
 568
 569         /*derivative of the lambda factor for state A and B */
 570         DLF[0] = -1;
 571         DLF[1] = 1;
 572
 573         GMX_ASSERT(fr->ic->softCoreParameters, "We need soft-core parameters");
 574         const auto& scParams = *fr->ic->softCoreParameters;
 575
 576         for (i = 0; i < 2; i++)
 577         {
 578             lfac_coul[i] = (scParams.lambdaPower == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
 579             dlfac_coul[i] = DLF[i] * scParams.lambdaPower * oneSixth
 580                             * (scParams.lambdaPower == 2 ? (1 - LFC[i]) : 1);
 581             lfac_vdw[i]  = (scParams.lambdaPower == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
 582             dlfac_vdw[i] = DLF[i] * scParams.lambdaPower * oneSixth
 583                            * (scParams.lambdaPower == 2 ? (1 - LFV[i]) : 1);
 584         }
 585     }
 586
 587     /* TODO This code depends on the logic in tables.c that constructs
 588        the table layout, which should be made explicit in future
 589        cleanup. */
 590     GMX_ASSERT(
 591             etiNR == 3,
 592             "Pair-interaction code that uses GROMACS interaction tables supports exactly 3 tables");
 593     GMX_ASSERT(
 594             fr->pairsTable->interaction == TableInteraction::ElectrostaticVdwRepulsionVdwDispersion,
 595             "Pair interaction kernels need a table with Coulomb, repulsion and dispersion entries");
 596
 597     const real epsfac = fr->ic->epsfac;
 598
 599     bFreeEnergy = FALSE;
 600     for (i = 0; (i < nbonds);)
 601     {
 602         itype = iatoms[i++];
 603         ai    = iatoms[i++];
 604         aj    = iatoms[i++];
 605         gid   = GID(cENER[ai], cENER[aj], numEnergyGroups);
 606
 607         /* Get parameters */
 608         switch (ftype)
 609         {
 610             case F_LJ14:
 611                 bFreeEnergy =
 612                         (fr->efep != FreeEnergyPerturbationType::No
 613                          && ((!atomIsPerturbed.empty() && (atomIsPerturbed[ai] || atomIsPerturbed[aj]))
 614                              || iparams[itype].lj14.c6A != iparams[itype].lj14.c6B
 615                              || iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
 616                 qq  = chargeA[ai] * chargeA[aj] * epsfac * fr->fudgeQQ;
 617                 c6  = iparams[itype].lj14.c6A;
 618                 c12 = iparams[itype].lj14.c12A;
 619                 break;
 620             case F_LJC14_Q:
 621                 qq = iparams[itype].ljc14.qi * iparams[itype].ljc14.qj * epsfac
 622                      * iparams[itype].ljc14.fqq;
 623                 c6  = iparams[itype].ljc14.c6;
 624                 c12 = iparams[itype].ljc14.c12;
 625                 break;
 626             case F_LJC_PAIRS_NB:
 627                 qq  = iparams[itype].ljcnb.qi * iparams[itype].ljcnb.qj * epsfac;
 628                 c6  = iparams[itype].ljcnb.c6;
 629                 c12 = iparams[itype].ljcnb.c12;
 630                 break;
 631             default:
 632                 /* Cannot happen since we called gmx_fatal() above in this case */
 633                 qq = c6 = c12 = 0; /* Keep compiler happy */
 634                 break;
 635         }
 636
 637         /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
 638          * included in the general nfbp array now. This means the tables are scaled down by the
 639          * same factor, so when we use the original c6/c12 parameters from iparams[] they must
 640          * be scaled up.
 641          */
 642         c6 *= 6.0;
 643         c12 *= 12.0;
 644
 645         /* Do we need to apply full periodic boundary conditions? */
 646         if (fr->bMolPBC)
 647         {
 648             fshift_index = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
 649         }
 650         else
 651         {
 652             fshift_index = c_centralShiftIndex;
 653             rvec_sub(x[ai], x[aj], dx);
 654         }
 655         r2 = norm2(dx);
 656
 657         if (r2 >= fr->pairsTable->interactionRange * fr->pairsTable->interactionRange)
 658         {
 659             /* This check isn't race free. But it doesn't matter because if a race occurs the only
 660              * disadvantage is that the warning is printed twice */
 661             if (!warned_rlimit)
 662             {
 663                 warning_rlimit(x, ai, aj, global_atom_index, sqrt(r2), fr->pairsTable->interactionRange);
 664                 warned_rlimit = TRUE;
 665             }
 666             continue;
 667         }
 668
 669         if (bFreeEnergy)
 670         {
 671             /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
 672             qqB  = chargeB[ai] * chargeB[aj] * epsfac * fr->fudgeQQ;
 673             c6B  = iparams[itype].lj14.c6B * 6.0;
 674             c12B = iparams[itype].lj14.c12B * 12.0;
 675
 676             const auto& scParams = *fr->ic->softCoreParameters;
 677             if (scParams.softcoreType == SoftcoreType::Beutler)
 678             {
 679                 if (scParams.alphaCoulomb == 0 && scParams.alphaVdw == 0)
 680                 {
 681                     fscal = free_energy_evaluate_single<KernelSoftcoreType::None>(
 682                             r2,
 683                             fr->ic->rcoulomb,
 684                             *fr->ic->softCoreParameters,
 685                             fr->pairsTable->scale,
 686                             fr->pairsTable->data.data(),
 687                             fr->pairsTable->stride,
 688                             qq,
 689                             c6,
 690                             c12,
 691                             qqB,
 692                             c6B,
 693                             c12B,
 694                             epsfac,
 695                             LFC,
 696                             LFV,
 697                             DLF,
 698                             lfac_coul,
 699                             lfac_vdw,
 700                             dlfac_coul,
 701                             dlfac_vdw,
 702                             &velec,
 703                             &vvdw,
 704                             dvdl);
 705                 }
 706                 else
 707                 {
 708                     fscal = free_energy_evaluate_single<KernelSoftcoreType::Beutler>(
 709                             r2,
 710                             fr->ic->rcoulomb,
 711                             *fr->ic->softCoreParameters,
 712                             fr->pairsTable->scale,
 713                             fr->pairsTable->data.data(),
 714                             fr->pairsTable->stride,
 715                             qq,
 716                             c6,
 717                             c12,
 718                             qqB,
 719                             c6B,
 720                             c12B,
 721                             epsfac,
 722                             LFC,
 723                             LFV,
 724                             DLF,
 725                             lfac_coul,
 726                             lfac_vdw,
 727                             dlfac_coul,
 728                             dlfac_vdw,
 729                             &velec,
 730                             &vvdw,
 731                             dvdl);
 732                 }
 733             }
 734             else // Gapsys
 735             {
 736                 if (scParams.gapsysScaleLinpointCoul == 0 && scParams.gapsysScaleLinpointVdW == 0)
 737                 {
 738                     fscal = free_energy_evaluate_single<KernelSoftcoreType::None>(
 739                             r2,
 740                             fr->ic->rcoulomb,
 741                             *fr->ic->softCoreParameters,
 742                             fr->pairsTable->scale,
 743                             fr->pairsTable->data.data(),
 744                             fr->pairsTable->stride,
 745                             qq,
 746                             c6,
 747                             c12,
 748                             qqB,
 749                             c6B,
 750                             c12B,
 751                             epsfac,
 752                             LFC,
 753                             LFV,
 754                             DLF,
 755                             lfac_coul,
 756                             lfac_vdw,
 757                             dlfac_coul,
 758                             dlfac_vdw,
 759                             &velec,
 760                             &vvdw,
 761                             dvdl);
 762                 }
 763                 else
 764                 {
 765                     fscal = free_energy_evaluate_single<KernelSoftcoreType::Gapsys>(
 766                             r2,
 767                             fr->ic->rcoulomb,
 768                             *fr->ic->softCoreParameters,
 769                             fr->pairsTable->scale,
 770                             fr->pairsTable->data.data(),
 771                             fr->pairsTable->stride,
 772                             qq,
 773                             c6,
 774                             c12,
 775                             qqB,
 776                             c6B,
 777                             c12B,
 778                             epsfac,
 779                             LFC,
 780                             LFV,
 781                             DLF,
 782                             lfac_coul,
 783                             lfac_vdw,
 784                             dlfac_coul,
 785                             dlfac_vdw,
 786                             &velec,
 787                             &vvdw,
 788                             dvdl);
 789                 }
 790             }
 791         }
 792         else
 793         {
 794             /* Evaluate tabulated interaction without free energy */
 795             fscal = evaluate_single(r2,
 796                                     fr->pairsTable->scale,
 797                                     fr->pairsTable->data.data(),
 798                                     fr->pairsTable->stride,
 799                                     qq,
 800                                     c6,
 801                                     c12,
 802                                     &velec,
 803                                     &vvdw);
 804         }
 805
 806         energygrp_elec[gid] += velec;
 807         energygrp_vdw[gid] += vvdw;
 808         svmul(fscal, dx, dx);
 809
 810         /* Add the forces */
 811         rvec_inc(f[ai], dx);
 812         rvec_dec(f[aj], dx);
 813
 814         if (computeVirial(flavor))
 815         {
 816             if (fshift_index != c_centralShiftIndex)
 817             {
 818                 rvec_inc(fshift[fshift_index], dx);
 819                 rvec_dec(fshift[c_centralShiftIndex], dx);
 820             }
 821         }
 822     }
 823     return 0.0;
 824 }
 825
 826 /*! \brief Calculate pairs, only for plain-LJ + plain Coulomb normal type.
 827  *
 828  * This function is templated for real/SimdReal and for optimization.
 829  */
 830 template<typename T, int pack_size, typename pbc_type>
 831 static void do_pairs_simple(int                 nbonds,
 832                             const t_iatom       iatoms[],
 833                             const t_iparams     iparams[],
 834                             const rvec          x[],
 835                             rvec4               f[],
 836                             const pbc_type      pbc,
 837                             gmx::ArrayRef<real> charge,
 838                             const real          scale_factor)
 839 {
 840     const int nfa1 = 1 + 2;
 841
 842     T six(6);
 843     T twelve(12);
 844     T ef(scale_factor);
 845
 846 #if GMX_SIMD_HAVE_REAL
 847     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[pack_size];
 848     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[pack_size];
 849     alignas(GMX_SIMD_ALIGNMENT) real         coeff[3 * pack_size];
 850 #else
 851     std::int32_t ai[pack_size];
 852     std::int32_t aj[pack_size];
 853     real         coeff[3 * pack_size];
 854 #endif
 855
 856     /* nbonds is #pairs*nfa1, here we step pack_size pairs */
 857     for (int i = 0; i < nbonds; i += pack_size * nfa1)
 858     {
 859         /* Collect atoms for pack_size pairs.
 860          * iu indexes into iatoms, we should not let iu go beyond nbonds.
 861          */
 862         int iu = i;
 863         for (int s = 0; s < pack_size; s++)
 864         {
 865             int itype = iatoms[iu];
 866             ai[s]     = iatoms[iu + 1];
 867             aj[s]     = iatoms[iu + 2];
 868
 869             if (i + s * nfa1 < nbonds)
 870             {
 871                 coeff[0 * pack_size + s] = iparams[itype].lj14.c6A;
 872                 coeff[1 * pack_size + s] = iparams[itype].lj14.c12A;
 873                 coeff[2 * pack_size + s] = charge[ai[s]] * charge[aj[s]];
 874
 875                 /* Avoid indexing the iatoms array out of bounds.
 876                  * We pad the coordinate indices with the last atom pair.
 877                  */
 878                 if (iu + nfa1 < nbonds)
 879                 {
 880                     iu += nfa1;
 881                 }
 882             }
 883             else
 884             {
 885                 /* Pad the coefficient arrays with zeros to get zero forces */
 886                 coeff[0 * pack_size + s] = 0;
 887                 coeff[1 * pack_size + s] = 0;
 888                 coeff[2 * pack_size + s] = 0;
 889             }
 890         }
 891
 892         /* Load the coordinates */
 893         T xi[DIM], xj[DIM];
 894         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ai, &xi[XX], &xi[YY], &xi[ZZ]);
 895         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), aj, &xj[XX], &xj[YY], &xj[ZZ]);
 896
 897         T c6  = load<T>(coeff + 0 * pack_size);
 898         T c12 = load<T>(coeff + 1 * pack_size);
 899         T qq  = load<T>(coeff + 2 * pack_size);
 900
 901         /* We could save these operations by storing 6*C6,12*C12 */
 902         c6  = six * c6;
 903         c12 = twelve * c12;
 904
 905         T dr[DIM];
 906         pbc_dx_aiuc(pbc, xi, xj, dr);
 907
 908         T rsq   = dr[XX] * dr[XX] + dr[YY] * dr[YY] + dr[ZZ] * dr[ZZ];
 909         T rinv  = gmx::invsqrt(rsq);
 910         T rinv2 = rinv * rinv;
 911         T rinv6 = rinv2 * rinv2 * rinv2;
 912
 913         /* Calculate the Coulomb force * r */
 914         T cfr = ef * qq * rinv;
 915
 916         /* Calculate the LJ force * r and add it to the Coulomb part */
 917         T fr = gmx::fma(fms(c12, rinv6, c6), rinv6, cfr);
 918
 919         T finvr = fr * rinv2;
 920         T fx    = finvr * dr[XX];
 921         T fy    = finvr * dr[YY];
 922         T fz    = finvr * dr[ZZ];
 923
 924         /* Add the pair forces to the force array.
 925          * Note that here we might add multiple force components for some atoms
 926          * due to the SIMD padding. But the extra force components are zero.
 927          */
 928         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ai, fx, fy, fz);
 929         transposeScatterDecrU<4>(reinterpret_cast<real*>(f), aj, fx, fy, fz);
 930     }
 931 }
 932
 933 /*! \brief Calculate all listed pair interactions */
 934 void do_pairs(int                           ftype,
 935               int                           nbonds,
 936               const t_iatom                 iatoms[],
 937               const t_iparams               iparams[],
 938               const rvec                    x[],
 939               rvec4                         f[],
 940               rvec                          fshift[],
 941               const struct t_pbc*           pbc,
 942               const real*                   lambda,
 943               real*                         dvdl,
 944               gmx::ArrayRef<real>           chargeA,
 945               gmx::ArrayRef<real>           chargeB,
 946               gmx::ArrayRef<bool>           atomIsPerturbed,
 947               gmx::ArrayRef<unsigned short> cENER,
 948               const int                     numEnergyGroups,
 949               const t_forcerec*             fr,
 950               const bool                    havePerturbedInteractions,
 951               const gmx::StepWorkload&      stepWork,
 952               gmx_grppairener_t*            grppener,
 953               int*                          global_atom_index)
 954 {
 955     if (ftype == F_LJ14 && fr->ic->vdwtype != VanDerWaalsType::User && !EEL_USER(fr->ic->eeltype)
 956         && !havePerturbedInteractions && (!stepWork.computeVirial && !stepWork.computeEnergy))
 957     {
 958         /* We use a fast code-path for plain LJ 1-4 without FEP.
 959          *
 960          * TODO: Add support for energies (straightforward) and virial
 961          * in the SIMD template. For the virial it's inconvenient to store
 962          * the force sums for the shifts and we should directly calculate
 963          * and sum the virial for the shifts. But we should do this
 964          * at once for the angles and dihedrals as well.
 965          */
 966 #if GMX_SIMD_HAVE_REAL
 967         if (fr->use_simd_kernels)
 968         {
 969             alignas(GMX_SIMD_ALIGNMENT) real pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
 970             set_pbc_simd(pbc, pbc_simd);
 971
 972             do_pairs_simple<SimdReal, GMX_SIMD_REAL_WIDTH, const real*>(
 973                     nbonds, iatoms, iparams, x, f, pbc_simd, chargeA, fr->ic->epsfac * fr->fudgeQQ);
 974         }
 975         else
 976 #endif
 977         {
 978             /* This construct is needed because pbc_dx_aiuc doesn't accept pbc=NULL */
 979             t_pbc        pbc_no;
 980             const t_pbc* pbc_nonnull;
 981
 982             if (pbc != nullptr)
 983             {
 984                 pbc_nonnull = pbc;
 985             }
 986             else
 987             {
 988                 set_pbc(&pbc_no, PbcType::No, nullptr);
 989                 pbc_nonnull = &pbc_no;
 990             }
 991
 992             do_pairs_simple<real, 1, const t_pbc*>(
 993                     nbonds, iatoms, iparams, x, f, pbc_nonnull, chargeA, fr->ic->epsfac * fr->fudgeQQ);
 994         }
 995     }
 996     else if (stepWork.computeVirial)
 997     {
 998         do_pairs_general<BondedKernelFlavor::ForcesAndVirialAndEnergy>(ftype,
 999                                                                        nbonds,
1000                                                                        iatoms,
1001                                                                        iparams,
1002                                                                        x,
1003                                                                        f,
1004                                                                        fshift,
1005                                                                        pbc,
1006                                                                        lambda,
1007                                                                        dvdl,
1008                                                                        chargeA,
1009                                                                        chargeB,
1010                                                                        atomIsPerturbed,
1011                                                                        cENER,
1012                                                                        numEnergyGroups,
1013                                                                        fr,
1014                                                                        grppener,
1015                                                                        global_atom_index);
1016     }
1017     else
1018     {
1019         do_pairs_general<BondedKernelFlavor::ForcesAndEnergy>(ftype,
1020                                                               nbonds,
1021                                                               iatoms,
1022                                                               iparams,
1023                                                               x,
1024                                                               f,
1025                                                               fshift,
1026                                                               pbc,
1027                                                               lambda,
1028                                                               dvdl,
1029                                                               chargeA,
1030                                                               chargeB,
1031                                                               atomIsPerturbed,
1032                                                               cENER,
1033                                                               numEnergyGroups,
1034                                                               fr,
1035                                                               grppener,
1036                                                               global_atom_index);
1037     }
1038 }