src/gromacs/listed_forces/pairs.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
   5  * Copyright (c) 2019,2020, by the GROMACS development team, led by
   6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   7  * and including many others, as listed in the AUTHORS file in the
   8  * top-level source directory and at http://www.gromacs.org.
   9  *
  10  * GROMACS is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation; either version 2.1
  13  * of the License, or (at your option) any later version.
  14  *
  15  * GROMACS is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with GROMACS; if not, see
  22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  24  *
  25  * If you want to redistribute modifications to GROMACS, please
  26  * consider that scientific software is very special. Version
  27  * control is crucial - bugs must be traceable. We will be happy to
  28  * consider code for inclusion in the official distribution, but
  29  * derived work must not be called official GROMACS. Details are found
  30  * in the README & COPYING files - if they are missing, get the
  31  * official version at http://www.gromacs.org.
  32  *
  33  * To help us fund GROMACS development, we humbly ask that you cite
  34  * the research papers on the package. Check out http://www.gromacs.org.
  35  */
  36 /*! \internal \file
  37  *
  38  * \brief This file defines functions for "pair" interactions
  39  * (i.e. listed non-bonded interactions, e.g. 1-4 interactions)
  40  *
  41  * \author Mark Abraham <mark.j.abraham@gmail.com>
  42  *
  43  * \ingroup module_listed_forces
  44  */
  45 #include "gmxpre.h"
  46
  47 #include "pairs.h"
  48
  49 #include <cmath>
  50
  51 #include "gromacs/listed_forces/bonded.h"
  52 #include "gromacs/math/functions.h"
  53 #include "gromacs/math/vec.h"
  54 #include "gromacs/mdtypes/group.h"
  55 #include "gromacs/mdtypes/md_enums.h"
  56 #include "gromacs/mdtypes/nblist.h"
  57 #include "gromacs/mdtypes/simulation_workload.h"
  58 #include "gromacs/pbcutil/ishift.h"
  59 #include "gromacs/pbcutil/mshift.h"
  60 #include "gromacs/pbcutil/pbc.h"
  61 #include "gromacs/pbcutil/pbc_simd.h"
  62 #include "gromacs/simd/simd.h"
  63 #include "gromacs/simd/simd_math.h"
  64 #include "gromacs/simd/vector_operations.h"
  65 #include "gromacs/tables/forcetable.h"
  66 #include "gromacs/utility/basedefinitions.h"
  67 #include "gromacs/utility/fatalerror.h"
  68 #include "gromacs/utility/gmxassert.h"
  69
  70 #include "listed_internal.h"
  71
  72 using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
  73
  74 /*! \brief Issue a warning if a listed interaction is beyond a table limit */
  75 static void warning_rlimit(const rvec* x, int ai, int aj, int* global_atom_index, real r, real rlimit)
  76 {
  77     gmx_warning(
  78             "Listed nonbonded interaction between particles %d and %d\n"
  79             "at distance %.3f which is larger than the table limit %.3f nm.\n\n"
  80             "This is likely either a 1,4 interaction, or a listed interaction inside\n"
  81             "a smaller molecule you are decoupling during a free energy calculation.\n"
  82             "Since interactions at distances beyond the table cannot be computed,\n"
  83             "they are skipped until they are inside the table limit again. You will\n"
  84             "only see this message once, even if it occurs for several interactions.\n\n"
  85             "IMPORTANT: This should not happen in a stable simulation, so there is\n"
  86             "probably something wrong with your system. Only change the table-extension\n"
  87             "distance in the mdp file if you are really sure that is the reason.\n",
  88             glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r, rlimit);
  89
  90     if (debug)
  91     {
  92         fprintf(debug,
  93                 "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. "
  94                 "Ignored\n",
  95                 x[ai][XX], x[ai][YY], x[ai][ZZ], x[aj][XX], x[aj][YY], x[aj][ZZ],
  96                 glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r);
  97     }
  98 }
  99
 100 /*! \brief Compute the energy and force for a single pair interaction */
 101 static real evaluate_single(real        r2,
 102                             real        tabscale,
 103                             const real* vftab,
 104                             real        tableStride,
 105                             real        qq,
 106                             real        c6,
 107                             real        c12,
 108                             real*       velec,
 109                             real*       vvdw)
 110 {
 111     real rinv, r, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VVe, FFe, VVd, FFd, VVr, FFr, fscal;
 112     int  ntab;
 113
 114     /* Do the tabulated interactions - first table lookup */
 115     rinv = gmx::invsqrt(r2);
 116     r    = r2 * rinv;
 117     rtab = r * tabscale;
 118     ntab = static_cast<int>(rtab);
 119     eps  = rtab - ntab;
 120     eps2 = eps * eps;
 121     ntab = static_cast<int>(tableStride * ntab);
 122     /* Electrostatics */
 123     Y     = vftab[ntab];
 124     F     = vftab[ntab + 1];
 125     Geps  = eps * vftab[ntab + 2];
 126     Heps2 = eps2 * vftab[ntab + 3];
 127     Fp    = F + Geps + Heps2;
 128     VVe   = Y + eps * Fp;
 129     FFe   = Fp + Geps + 2.0 * Heps2;
 130     /* Dispersion */
 131     Y     = vftab[ntab + 4];
 132     F     = vftab[ntab + 5];
 133     Geps  = eps * vftab[ntab + 6];
 134     Heps2 = eps2 * vftab[ntab + 7];
 135     Fp    = F + Geps + Heps2;
 136     VVd   = Y + eps * Fp;
 137     FFd   = Fp + Geps + 2.0 * Heps2;
 138     /* Repulsion */
 139     Y     = vftab[ntab + 8];
 140     F     = vftab[ntab + 9];
 141     Geps  = eps * vftab[ntab + 10];
 142     Heps2 = eps2 * vftab[ntab + 11];
 143     Fp    = F + Geps + Heps2;
 144     VVr   = Y + eps * Fp;
 145     FFr   = Fp + Geps + 2.0 * Heps2;
 146
 147     *velec = qq * VVe;
 148     *vvdw  = c6 * VVd + c12 * VVr;
 149
 150     fscal = -(qq * FFe + c6 * FFd + c12 * FFr) * tabscale * rinv;
 151
 152     return fscal;
 153 }
 154
 155 /*! \brief Compute the energy and force for a single pair interaction under FEP */
 156 static real free_energy_evaluate_single(real        r2,
 157                                         real        sc_r_power,
 158                                         real        alpha_coul,
 159                                         real        alpha_vdw,
 160                                         real        tabscale,
 161                                         const real* vftab,
 162                                         real        tableStride,
 163                                         real        qqA,
 164                                         real        c6A,
 165                                         real        c12A,
 166                                         real        qqB,
 167                                         real        c6B,
 168                                         real        c12B,
 169                                         const real  LFC[2],
 170                                         const real  LFV[2],
 171                                         const real  DLF[2],
 172                                         const real  lfac_coul[2],
 173                                         const real  lfac_vdw[2],
 174                                         const real  dlfac_coul[2],
 175                                         const real  dlfac_vdw[2],
 176                                         real        sigma6_def,
 177                                         real        sigma6_min,
 178                                         real        sigma2_def,
 179                                         real        sigma2_min,
 180                                         real*       velectot,
 181                                         real*       vvdwtot,
 182                                         real*       dvdl)
 183 {
 184     real       rp, rpm2, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VV, FF, fscal;
 185     real       qq[2], c6[2], c12[2], sigma6[2], sigma2[2], sigma_pow[2];
 186     real       alpha_coul_eff, alpha_vdw_eff, dvdl_coul, dvdl_vdw;
 187     real       rpinv, r_coul, r_vdw, velecsum, vvdwsum;
 188     real       fscal_vdw[2], fscal_elec[2];
 189     real       velec[2], vvdw[2];
 190     int        i, ntab;
 191     const real half        = 0.5;
 192     const real minusOne    = -1.0;
 193     const real one         = 1.0;
 194     const real two         = 2.0;
 195     const real six         = 6.0;
 196     const real fourtyeight = 48.0;
 197
 198     qq[0]  = qqA;
 199     qq[1]  = qqB;
 200     c6[0]  = c6A;
 201     c6[1]  = c6B;
 202     c12[0] = c12A;
 203     c12[1] = c12B;
 204
 205     if (sc_r_power == six)
 206     {
 207         rpm2 = r2 * r2;   /* r4 */
 208         rp   = rpm2 * r2; /* r6 */
 209     }
 210     else if (sc_r_power == fourtyeight)
 211     {
 212         rp   = r2 * r2 * r2; /* r6 */
 213         rp   = rp * rp;      /* r12 */
 214         rp   = rp * rp;      /* r24 */
 215         rp   = rp * rp;      /* r48 */
 216         rpm2 = rp / r2;      /* r46 */
 217     }
 218     else
 219     {
 220         rp = std::pow(r2, half * sc_r_power); /* not currently supported as input, but can handle it */
 221         rpm2 = rp / r2;
 222     }
 223
 224     /* Loop over state A(0) and B(1) */
 225     for (i = 0; i < 2; i++)
 226     {
 227         if ((c6[i] > 0) && (c12[i] > 0))
 228         {
 229             /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
 230              * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
 231              */
 232             sigma6[i] = half * c12[i] / c6[i];
 233             sigma2[i] = std::cbrt(half * c12[i] / c6[i]);
 234             /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
 235                what data to store externally.  Can't be fixed without larger scale changes, so not 5.0 */
 236             if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
 237             {
 238                 sigma6[i] = sigma6_min;
 239                 sigma2[i] = sigma2_min;
 240             }
 241         }
 242         else
 243         {
 244             sigma6[i] = sigma6_def;
 245             sigma2[i] = sigma2_def;
 246         }
 247         if (sc_r_power == six)
 248         {
 249             sigma_pow[i] = sigma6[i];
 250         }
 251         else if (sc_r_power == fourtyeight)
 252         {
 253             sigma_pow[i] = sigma6[i] * sigma6[i];       /* sigma^12 */
 254             sigma_pow[i] = sigma_pow[i] * sigma_pow[i]; /* sigma^24 */
 255             sigma_pow[i] = sigma_pow[i] * sigma_pow[i]; /* sigma^48 */
 256         }
 257         else
 258         { /* not really supported as input, but in here for testing the general case*/
 259             sigma_pow[i] = std::pow(sigma2[i], sc_r_power / 2);
 260         }
 261     }
 262
 263     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 264     if ((c12[0] > 0) && (c12[1] > 0))
 265     {
 266         alpha_vdw_eff  = 0;
 267         alpha_coul_eff = 0;
 268     }
 269     else
 270     {
 271         alpha_vdw_eff  = alpha_vdw;
 272         alpha_coul_eff = alpha_coul;
 273     }
 274
 275     /* Loop over A and B states again */
 276     for (i = 0; i < 2; i++)
 277     {
 278         fscal_elec[i] = 0;
 279         fscal_vdw[i]  = 0;
 280         velec[i]      = 0;
 281         vvdw[i]       = 0;
 282
 283         /* Only spend time on A or B state if it is non-zero */
 284         if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
 285         {
 286             /* Coulomb */
 287             rpinv  = one / (alpha_coul_eff * lfac_coul[i] * sigma_pow[i] + rp);
 288             r_coul = std::pow(rpinv, minusOne / sc_r_power);
 289
 290             /* Electrostatics table lookup data */
 291             rtab = r_coul * tabscale;
 292             ntab = static_cast<int>(rtab);
 293             eps  = rtab - ntab;
 294             eps2 = eps * eps;
 295             ntab = static_cast<int>(tableStride * ntab);
 296             /* Electrostatics */
 297             Y             = vftab[ntab];
 298             F             = vftab[ntab + 1];
 299             Geps          = eps * vftab[ntab + 2];
 300             Heps2         = eps2 * vftab[ntab + 3];
 301             Fp            = F + Geps + Heps2;
 302             VV            = Y + eps * Fp;
 303             FF            = Fp + Geps + two * Heps2;
 304             velec[i]      = qq[i] * VV;
 305             fscal_elec[i] = -qq[i] * FF * r_coul * rpinv * tabscale;
 306
 307             /* Vdw */
 308             rpinv = one / (alpha_vdw_eff * lfac_vdw[i] * sigma_pow[i] + rp);
 309             r_vdw = std::pow(rpinv, minusOne / sc_r_power);
 310             /* Vdw table lookup data */
 311             rtab = r_vdw * tabscale;
 312             ntab = static_cast<int>(rtab);
 313             eps  = rtab - ntab;
 314             eps2 = eps * eps;
 315             ntab = 12 * ntab;
 316             /* Dispersion */
 317             Y            = vftab[ntab + 4];
 318             F            = vftab[ntab + 5];
 319             Geps         = eps * vftab[ntab + 6];
 320             Heps2        = eps2 * vftab[ntab + 7];
 321             Fp           = F + Geps + Heps2;
 322             VV           = Y + eps * Fp;
 323             FF           = Fp + Geps + two * Heps2;
 324             vvdw[i]      = c6[i] * VV;
 325             fscal_vdw[i] = -c6[i] * FF;
 326
 327             /* Repulsion */
 328             Y     = vftab[ntab + 8];
 329             F     = vftab[ntab + 9];
 330             Geps  = eps * vftab[ntab + 10];
 331             Heps2 = eps2 * vftab[ntab + 11];
 332             Fp    = F + Geps + Heps2;
 333             VV    = Y + eps * Fp;
 334             FF    = Fp + Geps + two * Heps2;
 335             vvdw[i] += c12[i] * VV;
 336             fscal_vdw[i] -= c12[i] * FF;
 337             fscal_vdw[i] *= r_vdw * rpinv * tabscale;
 338         }
 339     }
 340     /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
 341     /* Assemble A and B states */
 342     velecsum  = 0;
 343     vvdwsum   = 0;
 344     dvdl_coul = 0;
 345     dvdl_vdw  = 0;
 346     fscal     = 0;
 347     for (i = 0; i < 2; i++)
 348     {
 349         velecsum += LFC[i] * velec[i];
 350         vvdwsum += LFV[i] * vvdw[i];
 351
 352         fscal += (LFC[i] * fscal_elec[i] + LFV[i] * fscal_vdw[i]) * rpm2;
 353
 354         dvdl_coul += velec[i] * DLF[i]
 355                      + LFC[i] * alpha_coul_eff * dlfac_coul[i] * fscal_elec[i] * sigma_pow[i];
 356         dvdl_vdw += vvdw[i] * DLF[i]
 357                     + LFV[i] * alpha_vdw_eff * dlfac_vdw[i] * fscal_vdw[i] * sigma_pow[i];
 358     }
 359
 360     dvdl[efptCOUL] += dvdl_coul;
 361     dvdl[efptVDW] += dvdl_vdw;
 362
 363     *velectot = velecsum;
 364     *vvdwtot  = vvdwsum;
 365
 366     return fscal;
 367 }
 368
 369 /*! \brief Calculate pair interactions, supports all types and conditions. */
 370 template<BondedKernelFlavor flavor>
 371 static real do_pairs_general(int                   ftype,
 372                              int                   nbonds,
 373                              const t_iatom         iatoms[],
 374                              const t_iparams       iparams[],
 375                              const rvec            x[],
 376                              rvec4                 f[],
 377                              rvec                  fshift[],
 378                              const struct t_pbc*   pbc,
 379                              const struct t_graph* g,
 380                              const real*           lambda,
 381                              real*                 dvdl,
 382                              const t_mdatoms*      md,
 383                              const t_forcerec*     fr,
 384                              gmx_grppairener_t*    grppener,
 385                              int*                  global_atom_index)
 386 {
 387     real            qq, c6, c12;
 388     rvec            dx;
 389     ivec            dt;
 390     int             i, itype, ai, aj, gid;
 391     int             fshift_index;
 392     real            r2;
 393     real            fscal, velec, vvdw;
 394     real*           energygrp_elec;
 395     real*           energygrp_vdw;
 396     static gmx_bool warned_rlimit = FALSE;
 397     /* Free energy stuff */
 398     gmx_bool bFreeEnergy;
 399     real     LFC[2], LFV[2], DLF[2], lfac_coul[2], lfac_vdw[2], dlfac_coul[2], dlfac_vdw[2];
 400     real     qqB, c6B, c12B, sigma2_def, sigma2_min;
 401
 402     switch (ftype)
 403     {
 404         case F_LJ14:
 405         case F_LJC14_Q:
 406             energygrp_elec = grppener->ener[egCOUL14].data();
 407             energygrp_vdw  = grppener->ener[egLJ14].data();
 408             break;
 409         case F_LJC_PAIRS_NB:
 410             energygrp_elec = grppener->ener[egCOULSR].data();
 411             energygrp_vdw  = grppener->ener[egLJSR].data();
 412             break;
 413         default:
 414             energygrp_elec = nullptr; /* Keep compiler happy */
 415             energygrp_vdw  = nullptr; /* Keep compiler happy */
 416             gmx_fatal(FARGS, "Unknown function type %d in do_nonbonded14", ftype);
 417     }
 418
 419     if (fr->efep != efepNO)
 420     {
 421         /* Lambda factor for state A=1-lambda and B=lambda */
 422         LFC[0] = 1.0 - lambda[efptCOUL];
 423         LFV[0] = 1.0 - lambda[efptVDW];
 424         LFC[1] = lambda[efptCOUL];
 425         LFV[1] = lambda[efptVDW];
 426
 427         /*derivative of the lambda factor for state A and B */
 428         DLF[0] = -1;
 429         DLF[1] = 1;
 430
 431         /* precalculate */
 432         sigma2_def = std::cbrt(fr->sc_sigma6_def);
 433         sigma2_min = std::cbrt(fr->sc_sigma6_min);
 434
 435         for (i = 0; i < 2; i++)
 436         {
 437             lfac_coul[i] = (fr->sc_power == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
 438             dlfac_coul[i] =
 439                     DLF[i] * fr->sc_power / fr->sc_r_power * (fr->sc_power == 2 ? (1 - LFC[i]) : 1);
 440             lfac_vdw[i] = (fr->sc_power == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
 441             dlfac_vdw[i] =
 442                     DLF[i] * fr->sc_power / fr->sc_r_power * (fr->sc_power == 2 ? (1 - LFV[i]) : 1);
 443         }
 444     }
 445     else
 446     {
 447         sigma2_min = sigma2_def = 0;
 448     }
 449
 450     /* TODO This code depends on the logic in tables.c that constructs
 451        the table layout, which should be made explicit in future
 452        cleanup. */
 453     GMX_ASSERT(
 454             etiNR == 3,
 455             "Pair-interaction code that uses GROMACS interaction tables supports exactly 3 tables");
 456     GMX_ASSERT(
 457             fr->pairsTable->interaction == GMX_TABLE_INTERACTION_ELEC_VDWREP_VDWDISP,
 458             "Pair interaction kernels need a table with Coulomb, repulsion and dispersion entries");
 459
 460     const real epsfac = fr->ic->epsfac;
 461
 462     bFreeEnergy = FALSE;
 463     for (i = 0; (i < nbonds);)
 464     {
 465         itype = iatoms[i++];
 466         ai    = iatoms[i++];
 467         aj    = iatoms[i++];
 468         gid   = GID(md->cENER[ai], md->cENER[aj], md->nenergrp);
 469
 470         /* Get parameters */
 471         switch (ftype)
 472         {
 473             case F_LJ14:
 474                 bFreeEnergy = (fr->efep != efepNO
 475                                && (((md->nPerturbed != 0) && (md->bPerturbed[ai] || md->bPerturbed[aj]))
 476                                    || iparams[itype].lj14.c6A != iparams[itype].lj14.c6B
 477                                    || iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
 478                 qq          = md->chargeA[ai] * md->chargeA[aj] * epsfac * fr->fudgeQQ;
 479                 c6          = iparams[itype].lj14.c6A;
 480                 c12         = iparams[itype].lj14.c12A;
 481                 break;
 482             case F_LJC14_Q:
 483                 qq = iparams[itype].ljc14.qi * iparams[itype].ljc14.qj * epsfac
 484                      * iparams[itype].ljc14.fqq;
 485                 c6  = iparams[itype].ljc14.c6;
 486                 c12 = iparams[itype].ljc14.c12;
 487                 break;
 488             case F_LJC_PAIRS_NB:
 489                 qq  = iparams[itype].ljcnb.qi * iparams[itype].ljcnb.qj * epsfac;
 490                 c6  = iparams[itype].ljcnb.c6;
 491                 c12 = iparams[itype].ljcnb.c12;
 492                 break;
 493             default:
 494                 /* Cannot happen since we called gmx_fatal() above in this case */
 495                 qq = c6 = c12 = 0; /* Keep compiler happy */
 496                 break;
 497         }
 498
 499         /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
 500          * included in the general nfbp array now. This means the tables are scaled down by the
 501          * same factor, so when we use the original c6/c12 parameters from iparams[] they must
 502          * be scaled up.
 503          */
 504         c6 *= 6.0;
 505         c12 *= 12.0;
 506
 507         /* Do we need to apply full periodic boundary conditions? */
 508         if (fr->bMolPBC)
 509         {
 510             fshift_index = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
 511         }
 512         else
 513         {
 514             fshift_index = CENTRAL;
 515             rvec_sub(x[ai], x[aj], dx);
 516         }
 517         r2 = norm2(dx);
 518
 519         if (r2 >= fr->pairsTable->r * fr->pairsTable->r)
 520         {
 521             /* This check isn't race free. But it doesn't matter because if a race occurs the only
 522              * disadvantage is that the warning is printed twice */
 523             if (!warned_rlimit)
 524             {
 525                 warning_rlimit(x, ai, aj, global_atom_index, sqrt(r2), fr->pairsTable->r);
 526                 warned_rlimit = TRUE;
 527             }
 528             continue;
 529         }
 530
 531         if (bFreeEnergy)
 532         {
 533             /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
 534             qqB  = md->chargeB[ai] * md->chargeB[aj] * epsfac * fr->fudgeQQ;
 535             c6B  = iparams[itype].lj14.c6B * 6.0;
 536             c12B = iparams[itype].lj14.c12B * 12.0;
 537
 538             fscal = free_energy_evaluate_single(
 539                     r2, fr->sc_r_power, fr->sc_alphacoul, fr->sc_alphavdw, fr->pairsTable->scale,
 540                     fr->pairsTable->data, fr->pairsTable->stride, qq, c6, c12, qqB, c6B, c12B, LFC,
 541                     LFV, DLF, lfac_coul, lfac_vdw, dlfac_coul, dlfac_vdw, fr->sc_sigma6_def,
 542                     fr->sc_sigma6_min, sigma2_def, sigma2_min, &velec, &vvdw, dvdl);
 543         }
 544         else
 545         {
 546             /* Evaluate tabulated interaction without free energy */
 547             fscal = evaluate_single(r2, fr->pairsTable->scale, fr->pairsTable->data,
 548                                     fr->pairsTable->stride, qq, c6, c12, &velec, &vvdw);
 549         }
 550
 551         energygrp_elec[gid] += velec;
 552         energygrp_vdw[gid] += vvdw;
 553         svmul(fscal, dx, dx);
 554
 555         /* Add the forces */
 556         rvec_inc(f[ai], dx);
 557         rvec_dec(f[aj], dx);
 558
 559         if (computeVirial(flavor))
 560         {
 561             if (g)
 562             {
 563                 /* Correct the shift forces using the graph */
 564                 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 565                 fshift_index = IVEC2IS(dt);
 566             }
 567             if (fshift_index != CENTRAL)
 568             {
 569                 rvec_inc(fshift[fshift_index], dx);
 570                 rvec_dec(fshift[CENTRAL], dx);
 571             }
 572         }
 573     }
 574     return 0.0;
 575 }
 576
 577 /*! \brief Calculate pairs, only for plain-LJ + plain Coulomb normal type.
 578  *
 579  * This function is templated for real/SimdReal and for optimization.
 580  */
 581 template<typename T, int pack_size, typename pbc_type>
 582 static void do_pairs_simple(int              nbonds,
 583                             const t_iatom    iatoms[],
 584                             const t_iparams  iparams[],
 585                             const rvec       x[],
 586                             rvec4            f[],
 587                             const pbc_type   pbc,
 588                             const t_mdatoms* md,
 589                             const real       scale_factor)
 590 {
 591     const int nfa1 = 1 + 2;
 592
 593     T six(6);
 594     T twelve(12);
 595     T ef(scale_factor);
 596
 597 #if GMX_SIMD_HAVE_REAL
 598     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[pack_size];
 599     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[pack_size];
 600     alignas(GMX_SIMD_ALIGNMENT) real         coeff[3 * pack_size];
 601 #else
 602     std::int32_t ai[pack_size];
 603     std::int32_t aj[pack_size];
 604     real         coeff[3 * pack_size];
 605 #endif
 606
 607     /* nbonds is #pairs*nfa1, here we step pack_size pairs */
 608     for (int i = 0; i < nbonds; i += pack_size * nfa1)
 609     {
 610         /* Collect atoms for pack_size pairs.
 611          * iu indexes into iatoms, we should not let iu go beyond nbonds.
 612          */
 613         int iu = i;
 614         for (int s = 0; s < pack_size; s++)
 615         {
 616             int itype = iatoms[iu];
 617             ai[s]     = iatoms[iu + 1];
 618             aj[s]     = iatoms[iu + 2];
 619
 620             if (i + s * nfa1 < nbonds)
 621             {
 622                 coeff[0 * pack_size + s] = iparams[itype].lj14.c6A;
 623                 coeff[1 * pack_size + s] = iparams[itype].lj14.c12A;
 624                 coeff[2 * pack_size + s] = md->chargeA[ai[s]] * md->chargeA[aj[s]];
 625
 626                 /* Avoid indexing the iatoms array out of bounds.
 627                  * We pad the coordinate indices with the last atom pair.
 628                  */
 629                 if (iu + nfa1 < nbonds)
 630                 {
 631                     iu += nfa1;
 632                 }
 633             }
 634             else
 635             {
 636                 /* Pad the coefficient arrays with zeros to get zero forces */
 637                 coeff[0 * pack_size + s] = 0;
 638                 coeff[1 * pack_size + s] = 0;
 639                 coeff[2 * pack_size + s] = 0;
 640             }
 641         }
 642
 643         /* Load the coordinates */
 644         T xi[DIM], xj[DIM];
 645         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ai, &xi[XX], &xi[YY], &xi[ZZ]);
 646         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), aj, &xj[XX], &xj[YY], &xj[ZZ]);
 647
 648         T c6  = load<T>(coeff + 0 * pack_size);
 649         T c12 = load<T>(coeff + 1 * pack_size);
 650         T qq  = load<T>(coeff + 2 * pack_size);
 651
 652         /* We could save these operations by storing 6*C6,12*C12 */
 653         c6  = six * c6;
 654         c12 = twelve * c12;
 655
 656         T dr[DIM];
 657         pbc_dx_aiuc(pbc, xi, xj, dr);
 658
 659         T rsq   = dr[XX] * dr[XX] + dr[YY] * dr[YY] + dr[ZZ] * dr[ZZ];
 660         T rinv  = gmx::invsqrt(rsq);
 661         T rinv2 = rinv * rinv;
 662         T rinv6 = rinv2 * rinv2 * rinv2;
 663
 664         /* Calculate the Coulomb force * r */
 665         T cfr = ef * qq * rinv;
 666
 667         /* Calculate the LJ force * r and add it to the Coulomb part */
 668         T fr = gmx::fma(fms(c12, rinv6, c6), rinv6, cfr);
 669
 670         T finvr = fr * rinv2;
 671         T fx    = finvr * dr[XX];
 672         T fy    = finvr * dr[YY];
 673         T fz    = finvr * dr[ZZ];
 674
 675         /* Add the pair forces to the force array.
 676          * Note that here we might add multiple force components for some atoms
 677          * due to the SIMD padding. But the extra force components are zero.
 678          */
 679         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ai, fx, fy, fz);
 680         transposeScatterDecrU<4>(reinterpret_cast<real*>(f), aj, fx, fy, fz);
 681     }
 682 }
 683
 684 /*! \brief Calculate all listed pair interactions */
 685 void do_pairs(int                      ftype,
 686               int                      nbonds,
 687               const t_iatom            iatoms[],
 688               const t_iparams          iparams[],
 689               const rvec               x[],
 690               rvec4                    f[],
 691               rvec                     fshift[],
 692               const struct t_pbc*      pbc,
 693               const struct t_graph*    g,
 694               const real*              lambda,
 695               real*                    dvdl,
 696               const t_mdatoms*         md,
 697               const t_forcerec*        fr,
 698               const bool               havePerturbedInteractions,
 699               const gmx::StepWorkload& stepWork,
 700               gmx_grppairener_t*       grppener,
 701               int*                     global_atom_index)
 702 {
 703     if (ftype == F_LJ14 && fr->ic->vdwtype != evdwUSER && !EEL_USER(fr->ic->eeltype)
 704         && !havePerturbedInteractions && (!stepWork.computeVirial && !stepWork.computeEnergy))
 705     {
 706         /* We use a fast code-path for plain LJ 1-4 without FEP.
 707          *
 708          * TODO: Add support for energies (straightforward) and virial
 709          * in the SIMD template. For the virial it's inconvenient to store
 710          * the force sums for the shifts and we should directly calculate
 711          * and sum the virial for the shifts. But we should do this
 712          * at once for the angles and dihedrals as well.
 713          */
 714 #if GMX_SIMD_HAVE_REAL
 715         if (fr->use_simd_kernels)
 716         {
 717             alignas(GMX_SIMD_ALIGNMENT) real pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
 718             set_pbc_simd(pbc, pbc_simd);
 719
 720             do_pairs_simple<SimdReal, GMX_SIMD_REAL_WIDTH, const real*>(
 721                     nbonds, iatoms, iparams, x, f, pbc_simd, md, fr->ic->epsfac * fr->fudgeQQ);
 722         }
 723         else
 724 #endif
 725         {
 726             /* This construct is needed because pbc_dx_aiuc doesn't accept pbc=NULL */
 727             t_pbc        pbc_no;
 728             const t_pbc* pbc_nonnull;
 729
 730             if (pbc != nullptr)
 731             {
 732                 pbc_nonnull = pbc;
 733             }
 734             else
 735             {
 736                 set_pbc(&pbc_no, PbcType::No, nullptr);
 737                 pbc_nonnull = &pbc_no;
 738             }
 739
 740             do_pairs_simple<real, 1, const t_pbc*>(nbonds, iatoms, iparams, x, f, pbc_nonnull, md,
 741                                                    fr->ic->epsfac * fr->fudgeQQ);
 742         }
 743     }
 744     else if (stepWork.computeVirial)
 745     {
 746         do_pairs_general<BondedKernelFlavor::ForcesAndVirialAndEnergy>(
 747                 ftype, nbonds, iatoms, iparams, x, f, fshift, pbc, g, lambda, dvdl, md, fr,
 748                 grppener, global_atom_index);
 749     }
 750     else
 751     {
 752         do_pairs_general<BondedKernelFlavor::ForcesAndEnergy>(ftype, nbonds, iatoms, iparams, x, f,
 753                                                               fshift, pbc, g, lambda, dvdl, md, fr,
 754                                                               grppener, global_atom_index);
 755     }
 756 }