Merge branch 'release-2020' into master
[alexxy/gromacs.git] / src / gromacs / listed_forces / pairs.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
5  * Copyright (c) 2019,2020, by the GROMACS development team, led by
6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7  * and including many others, as listed in the AUTHORS file in the
8  * top-level source directory and at http://www.gromacs.org.
9  *
10  * GROMACS is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public License
12  * as published by the Free Software Foundation; either version 2.1
13  * of the License, or (at your option) any later version.
14  *
15  * GROMACS is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with GROMACS; if not, see
22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
24  *
25  * If you want to redistribute modifications to GROMACS, please
26  * consider that scientific software is very special. Version
27  * control is crucial - bugs must be traceable. We will be happy to
28  * consider code for inclusion in the official distribution, but
29  * derived work must not be called official GROMACS. Details are found
30  * in the README & COPYING files - if they are missing, get the
31  * official version at http://www.gromacs.org.
32  *
33  * To help us fund GROMACS development, we humbly ask that you cite
34  * the research papers on the package. Check out http://www.gromacs.org.
35  */
36 /*! \internal \file
37  *
38  * \brief This file defines functions for "pair" interactions
39  * (i.e. listed non-bonded interactions, e.g. 1-4 interactions)
40  *
41  * \author Mark Abraham <mark.j.abraham@gmail.com>
42  *
43  * \ingroup module_listed_forces
44  */
45 #include "gmxpre.h"
46
47 #include "pairs.h"
48
49 #include <cmath>
50
51 #include "gromacs/listed_forces/bonded.h"
52 #include "gromacs/math/functions.h"
53 #include "gromacs/math/vec.h"
54 #include "gromacs/mdtypes/forcerec.h"
55 #include "gromacs/mdtypes/group.h"
56 #include "gromacs/mdtypes/interaction_const.h"
57 #include "gromacs/mdtypes/md_enums.h"
58 #include "gromacs/mdtypes/mdatom.h"
59 #include "gromacs/mdtypes/nblist.h"
60 #include "gromacs/mdtypes/simulation_workload.h"
61 #include "gromacs/pbcutil/ishift.h"
62 #include "gromacs/pbcutil/pbc.h"
63 #include "gromacs/pbcutil/pbc_simd.h"
64 #include "gromacs/simd/simd.h"
65 #include "gromacs/simd/simd_math.h"
66 #include "gromacs/simd/vector_operations.h"
67 #include "gromacs/tables/forcetable.h"
68 #include "gromacs/utility/basedefinitions.h"
69 #include "gromacs/utility/fatalerror.h"
70 #include "gromacs/utility/gmxassert.h"
71
72 #include "listed_internal.h"
73
74 using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
75
76 /*! \brief Issue a warning if a listed interaction is beyond a table limit */
77 static void warning_rlimit(const rvec* x, int ai, int aj, int* global_atom_index, real r, real rlimit)
78 {
79     gmx_warning(
80             "Listed nonbonded interaction between particles %d and %d\n"
81             "at distance %.3f which is larger than the table limit %.3f nm.\n\n"
82             "This is likely either a 1,4 interaction, or a listed interaction inside\n"
83             "a smaller molecule you are decoupling during a free energy calculation.\n"
84             "Since interactions at distances beyond the table cannot be computed,\n"
85             "they are skipped until they are inside the table limit again. You will\n"
86             "only see this message once, even if it occurs for several interactions.\n\n"
87             "IMPORTANT: This should not happen in a stable simulation, so there is\n"
88             "probably something wrong with your system. Only change the table-extension\n"
89             "distance in the mdp file if you are really sure that is the reason.\n",
90             glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r, rlimit);
91
92     if (debug)
93     {
94         fprintf(debug,
95                 "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. "
96                 "Ignored\n",
97                 x[ai][XX], x[ai][YY], x[ai][ZZ], x[aj][XX], x[aj][YY], x[aj][ZZ],
98                 glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r);
99     }
100 }
101
102 /*! \brief Compute the energy and force for a single pair interaction */
103 static real evaluate_single(real        r2,
104                             real        tabscale,
105                             const real* vftab,
106                             real        tableStride,
107                             real        qq,
108                             real        c6,
109                             real        c12,
110                             real*       velec,
111                             real*       vvdw)
112 {
113     real rinv, r, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VVe, FFe, VVd, FFd, VVr, FFr, fscal;
114     int  ntab;
115
116     /* Do the tabulated interactions - first table lookup */
117     rinv = gmx::invsqrt(r2);
118     r    = r2 * rinv;
119     rtab = r * tabscale;
120     ntab = static_cast<int>(rtab);
121     eps  = rtab - ntab;
122     eps2 = eps * eps;
123     ntab = static_cast<int>(tableStride * ntab);
124     /* Electrostatics */
125     Y     = vftab[ntab];
126     F     = vftab[ntab + 1];
127     Geps  = eps * vftab[ntab + 2];
128     Heps2 = eps2 * vftab[ntab + 3];
129     Fp    = F + Geps + Heps2;
130     VVe   = Y + eps * Fp;
131     FFe   = Fp + Geps + 2.0 * Heps2;
132     /* Dispersion */
133     Y     = vftab[ntab + 4];
134     F     = vftab[ntab + 5];
135     Geps  = eps * vftab[ntab + 6];
136     Heps2 = eps2 * vftab[ntab + 7];
137     Fp    = F + Geps + Heps2;
138     VVd   = Y + eps * Fp;
139     FFd   = Fp + Geps + 2.0 * Heps2;
140     /* Repulsion */
141     Y     = vftab[ntab + 8];
142     F     = vftab[ntab + 9];
143     Geps  = eps * vftab[ntab + 10];
144     Heps2 = eps2 * vftab[ntab + 11];
145     Fp    = F + Geps + Heps2;
146     VVr   = Y + eps * Fp;
147     FFr   = Fp + Geps + 2.0 * Heps2;
148
149     *velec = qq * VVe;
150     *vvdw  = c6 * VVd + c12 * VVr;
151
152     fscal = -(qq * FFe + c6 * FFd + c12 * FFr) * tabscale * rinv;
153
154     return fscal;
155 }
156
157 static inline real sixthRoot(const real r)
158 {
159     return gmx::invsqrt(std::cbrt(r));
160 }
161
162 /*! \brief Compute the energy and force for a single pair interaction under FEP */
163 static real free_energy_evaluate_single(real                                           r2,
164                                         const interaction_const_t::SoftCoreParameters& scParams,
165                                         real                                           tabscale,
166                                         const real*                                    vftab,
167                                         real                                           tableStride,
168                                         real                                           qqA,
169                                         real                                           c6A,
170                                         real                                           c12A,
171                                         real                                           qqB,
172                                         real                                           c6B,
173                                         real                                           c12B,
174                                         const real                                     LFC[2],
175                                         const real                                     LFV[2],
176                                         const real                                     DLF[2],
177                                         const real                                     lfac_coul[2],
178                                         const real                                     lfac_vdw[2],
179                                         const real dlfac_coul[2],
180                                         const real dlfac_vdw[2],
181                                         real*      velectot,
182                                         real*      vvdwtot,
183                                         real*      dvdl)
184 {
185     real       rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VV, FF, fscal;
186     real       qq[2], c6[2], c12[2], sigma6[2], sigma_pow[2];
187     real       alpha_coul_eff, alpha_vdw_eff, dvdl_coul, dvdl_vdw;
188     real       rpinv, r_coul, r_vdw, velecsum, vvdwsum;
189     real       fscal_vdw[2], fscal_elec[2];
190     real       velec[2], vvdw[2];
191     int        i, ntab;
192     const real half = 0.5_real;
193     const real one  = 1.0_real;
194     const real two  = 2.0_real;
195
196     qq[0]  = qqA;
197     qq[1]  = qqB;
198     c6[0]  = c6A;
199     c6[1]  = c6B;
200     c12[0] = c12A;
201     c12[1] = c12B;
202
203     const real rpm2 = r2 * r2;   /* r4 */
204     const real rp   = rpm2 * r2; /* r6 */
205
206     /* Loop over state A(0) and B(1) */
207     for (i = 0; i < 2; i++)
208     {
209         if ((c6[i] > 0) && (c12[i] > 0))
210         {
211             /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
212              * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
213              */
214             sigma6[i] = half * c12[i] / c6[i];
215             if (sigma6[i] < scParams.sigma6Minimum) /* for disappearing coul and vdw with soft core at the same time */
216             {
217                 sigma6[i] = scParams.sigma6Minimum;
218             }
219         }
220         else
221         {
222             sigma6[i] = scParams.sigma6WithInvalidSigma;
223         }
224         sigma_pow[i] = sigma6[i];
225     }
226
227     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
228     if ((c12[0] > 0) && (c12[1] > 0))
229     {
230         alpha_vdw_eff  = 0;
231         alpha_coul_eff = 0;
232     }
233     else
234     {
235         alpha_vdw_eff  = scParams.alphaVdw;
236         alpha_coul_eff = scParams.alphaCoulomb;
237     }
238
239     /* Loop over A and B states again */
240     for (i = 0; i < 2; i++)
241     {
242         fscal_elec[i] = 0;
243         fscal_vdw[i]  = 0;
244         velec[i]      = 0;
245         vvdw[i]       = 0;
246
247         /* Only spend time on A or B state if it is non-zero */
248         if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
249         {
250             /* Coulomb */
251             rpinv  = one / (alpha_coul_eff * lfac_coul[i] * sigma_pow[i] + rp);
252             r_coul = sixthRoot(rpinv);
253
254             /* Electrostatics table lookup data */
255             rtab = r_coul * tabscale;
256             ntab = static_cast<int>(rtab);
257             eps  = rtab - ntab;
258             eps2 = eps * eps;
259             ntab = static_cast<int>(tableStride * ntab);
260             /* Electrostatics */
261             Y             = vftab[ntab];
262             F             = vftab[ntab + 1];
263             Geps          = eps * vftab[ntab + 2];
264             Heps2         = eps2 * vftab[ntab + 3];
265             Fp            = F + Geps + Heps2;
266             VV            = Y + eps * Fp;
267             FF            = Fp + Geps + two * Heps2;
268             velec[i]      = qq[i] * VV;
269             fscal_elec[i] = -qq[i] * FF * r_coul * rpinv * tabscale;
270
271             /* Vdw */
272             rpinv = one / (alpha_vdw_eff * lfac_vdw[i] * sigma_pow[i] + rp);
273             r_vdw = sixthRoot(rpinv);
274             /* Vdw table lookup data */
275             rtab = r_vdw * tabscale;
276             ntab = static_cast<int>(rtab);
277             eps  = rtab - ntab;
278             eps2 = eps * eps;
279             ntab = 12 * ntab;
280             /* Dispersion */
281             Y            = vftab[ntab + 4];
282             F            = vftab[ntab + 5];
283             Geps         = eps * vftab[ntab + 6];
284             Heps2        = eps2 * vftab[ntab + 7];
285             Fp           = F + Geps + Heps2;
286             VV           = Y + eps * Fp;
287             FF           = Fp + Geps + two * Heps2;
288             vvdw[i]      = c6[i] * VV;
289             fscal_vdw[i] = -c6[i] * FF;
290
291             /* Repulsion */
292             Y     = vftab[ntab + 8];
293             F     = vftab[ntab + 9];
294             Geps  = eps * vftab[ntab + 10];
295             Heps2 = eps2 * vftab[ntab + 11];
296             Fp    = F + Geps + Heps2;
297             VV    = Y + eps * Fp;
298             FF    = Fp + Geps + two * Heps2;
299             vvdw[i] += c12[i] * VV;
300             fscal_vdw[i] -= c12[i] * FF;
301             fscal_vdw[i] *= r_vdw * rpinv * tabscale;
302         }
303     }
304     /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
305     /* Assemble A and B states */
306     velecsum  = 0;
307     vvdwsum   = 0;
308     dvdl_coul = 0;
309     dvdl_vdw  = 0;
310     fscal     = 0;
311     for (i = 0; i < 2; i++)
312     {
313         velecsum += LFC[i] * velec[i];
314         vvdwsum += LFV[i] * vvdw[i];
315
316         fscal += (LFC[i] * fscal_elec[i] + LFV[i] * fscal_vdw[i]) * rpm2;
317
318         dvdl_coul += velec[i] * DLF[i]
319                      + LFC[i] * alpha_coul_eff * dlfac_coul[i] * fscal_elec[i] * sigma_pow[i];
320         dvdl_vdw += vvdw[i] * DLF[i]
321                     + LFV[i] * alpha_vdw_eff * dlfac_vdw[i] * fscal_vdw[i] * sigma_pow[i];
322     }
323
324     dvdl[efptCOUL] += dvdl_coul;
325     dvdl[efptVDW] += dvdl_vdw;
326
327     *velectot = velecsum;
328     *vvdwtot  = vvdwsum;
329
330     return fscal;
331 }
332
333 /*! \brief Calculate pair interactions, supports all types and conditions. */
334 template<BondedKernelFlavor flavor>
335 static real do_pairs_general(int                 ftype,
336                              int                 nbonds,
337                              const t_iatom       iatoms[],
338                              const t_iparams     iparams[],
339                              const rvec          x[],
340                              rvec4               f[],
341                              rvec                fshift[],
342                              const struct t_pbc* pbc,
343                              const real*         lambda,
344                              real*               dvdl,
345                              const t_mdatoms*    md,
346                              const t_forcerec*   fr,
347                              gmx_grppairener_t*  grppener,
348                              int*                global_atom_index)
349 {
350     real            qq, c6, c12;
351     rvec            dx;
352     int             i, itype, ai, aj, gid;
353     int             fshift_index;
354     real            r2;
355     real            fscal, velec, vvdw;
356     real*           energygrp_elec;
357     real*           energygrp_vdw;
358     static gmx_bool warned_rlimit = FALSE;
359     /* Free energy stuff */
360     gmx_bool   bFreeEnergy;
361     real       LFC[2], LFV[2], DLF[2], lfac_coul[2], lfac_vdw[2], dlfac_coul[2], dlfac_vdw[2];
362     real       qqB, c6B, c12B;
363     const real oneSixth = 1.0_real / 6.0_real;
364
365     switch (ftype)
366     {
367         case F_LJ14:
368         case F_LJC14_Q:
369             energygrp_elec = grppener->ener[egCOUL14].data();
370             energygrp_vdw  = grppener->ener[egLJ14].data();
371             break;
372         case F_LJC_PAIRS_NB:
373             energygrp_elec = grppener->ener[egCOULSR].data();
374             energygrp_vdw  = grppener->ener[egLJSR].data();
375             break;
376         default:
377             energygrp_elec = nullptr; /* Keep compiler happy */
378             energygrp_vdw  = nullptr; /* Keep compiler happy */
379             gmx_fatal(FARGS, "Unknown function type %d in do_nonbonded14", ftype);
380     }
381
382     if (fr->efep != efepNO)
383     {
384         /* Lambda factor for state A=1-lambda and B=lambda */
385         LFC[0] = 1.0 - lambda[efptCOUL];
386         LFV[0] = 1.0 - lambda[efptVDW];
387         LFC[1] = lambda[efptCOUL];
388         LFV[1] = lambda[efptVDW];
389
390         /*derivative of the lambda factor for state A and B */
391         DLF[0] = -1;
392         DLF[1] = 1;
393
394         GMX_ASSERT(fr->ic->softCoreParameters, "We need soft-core parameters");
395         const auto& scParams = *fr->ic->softCoreParameters;
396
397         for (i = 0; i < 2; i++)
398         {
399             lfac_coul[i] = (scParams.lambdaPower == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
400             dlfac_coul[i] = DLF[i] * scParams.lambdaPower * oneSixth
401                             * (scParams.lambdaPower == 2 ? (1 - LFC[i]) : 1);
402             lfac_vdw[i]  = (scParams.lambdaPower == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
403             dlfac_vdw[i] = DLF[i] * scParams.lambdaPower * oneSixth
404                            * (scParams.lambdaPower == 2 ? (1 - LFV[i]) : 1);
405         }
406     }
407
408     /* TODO This code depends on the logic in tables.c that constructs
409        the table layout, which should be made explicit in future
410        cleanup. */
411     GMX_ASSERT(
412             etiNR == 3,
413             "Pair-interaction code that uses GROMACS interaction tables supports exactly 3 tables");
414     GMX_ASSERT(
415             fr->pairsTable->interaction == GMX_TABLE_INTERACTION_ELEC_VDWREP_VDWDISP,
416             "Pair interaction kernels need a table with Coulomb, repulsion and dispersion entries");
417
418     const real epsfac = fr->ic->epsfac;
419
420     bFreeEnergy = FALSE;
421     for (i = 0; (i < nbonds);)
422     {
423         itype = iatoms[i++];
424         ai    = iatoms[i++];
425         aj    = iatoms[i++];
426         gid   = GID(md->cENER[ai], md->cENER[aj], md->nenergrp);
427
428         /* Get parameters */
429         switch (ftype)
430         {
431             case F_LJ14:
432                 bFreeEnergy = (fr->efep != efepNO
433                                && (((md->nPerturbed != 0) && (md->bPerturbed[ai] || md->bPerturbed[aj]))
434                                    || iparams[itype].lj14.c6A != iparams[itype].lj14.c6B
435                                    || iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
436                 qq          = md->chargeA[ai] * md->chargeA[aj] * epsfac * fr->fudgeQQ;
437                 c6          = iparams[itype].lj14.c6A;
438                 c12         = iparams[itype].lj14.c12A;
439                 break;
440             case F_LJC14_Q:
441                 qq = iparams[itype].ljc14.qi * iparams[itype].ljc14.qj * epsfac
442                      * iparams[itype].ljc14.fqq;
443                 c6  = iparams[itype].ljc14.c6;
444                 c12 = iparams[itype].ljc14.c12;
445                 break;
446             case F_LJC_PAIRS_NB:
447                 qq  = iparams[itype].ljcnb.qi * iparams[itype].ljcnb.qj * epsfac;
448                 c6  = iparams[itype].ljcnb.c6;
449                 c12 = iparams[itype].ljcnb.c12;
450                 break;
451             default:
452                 /* Cannot happen since we called gmx_fatal() above in this case */
453                 qq = c6 = c12 = 0; /* Keep compiler happy */
454                 break;
455         }
456
457         /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
458          * included in the general nfbp array now. This means the tables are scaled down by the
459          * same factor, so when we use the original c6/c12 parameters from iparams[] they must
460          * be scaled up.
461          */
462         c6 *= 6.0;
463         c12 *= 12.0;
464
465         /* Do we need to apply full periodic boundary conditions? */
466         if (fr->bMolPBC)
467         {
468             fshift_index = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
469         }
470         else
471         {
472             fshift_index = CENTRAL;
473             rvec_sub(x[ai], x[aj], dx);
474         }
475         r2 = norm2(dx);
476
477         if (r2 >= fr->pairsTable->r * fr->pairsTable->r)
478         {
479             /* This check isn't race free. But it doesn't matter because if a race occurs the only
480              * disadvantage is that the warning is printed twice */
481             if (!warned_rlimit)
482             {
483                 warning_rlimit(x, ai, aj, global_atom_index, sqrt(r2), fr->pairsTable->r);
484                 warned_rlimit = TRUE;
485             }
486             continue;
487         }
488
489         if (bFreeEnergy)
490         {
491             /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
492             qqB  = md->chargeB[ai] * md->chargeB[aj] * epsfac * fr->fudgeQQ;
493             c6B  = iparams[itype].lj14.c6B * 6.0;
494             c12B = iparams[itype].lj14.c12B * 12.0;
495
496             fscal = free_energy_evaluate_single(
497                     r2, *fr->ic->softCoreParameters, fr->pairsTable->scale,
498                     fr->pairsTable->data.data(), fr->pairsTable->stride, qq, c6, c12, qqB, c6B, c12B,
499                     LFC, LFV, DLF, lfac_coul, lfac_vdw, dlfac_coul, dlfac_vdw, &velec, &vvdw, dvdl);
500         }
501         else
502         {
503             /* Evaluate tabulated interaction without free energy */
504             fscal = evaluate_single(r2, fr->pairsTable->scale, fr->pairsTable->data.data(),
505                                     fr->pairsTable->stride, qq, c6, c12, &velec, &vvdw);
506         }
507
508         energygrp_elec[gid] += velec;
509         energygrp_vdw[gid] += vvdw;
510         svmul(fscal, dx, dx);
511
512         /* Add the forces */
513         rvec_inc(f[ai], dx);
514         rvec_dec(f[aj], dx);
515
516         if (computeVirial(flavor))
517         {
518             if (fshift_index != CENTRAL)
519             {
520                 rvec_inc(fshift[fshift_index], dx);
521                 rvec_dec(fshift[CENTRAL], dx);
522             }
523         }
524     }
525     return 0.0;
526 }
527
528 /*! \brief Calculate pairs, only for plain-LJ + plain Coulomb normal type.
529  *
530  * This function is templated for real/SimdReal and for optimization.
531  */
532 template<typename T, int pack_size, typename pbc_type>
533 static void do_pairs_simple(int              nbonds,
534                             const t_iatom    iatoms[],
535                             const t_iparams  iparams[],
536                             const rvec       x[],
537                             rvec4            f[],
538                             const pbc_type   pbc,
539                             const t_mdatoms* md,
540                             const real       scale_factor)
541 {
542     const int nfa1 = 1 + 2;
543
544     T six(6);
545     T twelve(12);
546     T ef(scale_factor);
547
548 #if GMX_SIMD_HAVE_REAL
549     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[pack_size];
550     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[pack_size];
551     alignas(GMX_SIMD_ALIGNMENT) real         coeff[3 * pack_size];
552 #else
553     std::int32_t ai[pack_size];
554     std::int32_t aj[pack_size];
555     real         coeff[3 * pack_size];
556 #endif
557
558     /* nbonds is #pairs*nfa1, here we step pack_size pairs */
559     for (int i = 0; i < nbonds; i += pack_size * nfa1)
560     {
561         /* Collect atoms for pack_size pairs.
562          * iu indexes into iatoms, we should not let iu go beyond nbonds.
563          */
564         int iu = i;
565         for (int s = 0; s < pack_size; s++)
566         {
567             int itype = iatoms[iu];
568             ai[s]     = iatoms[iu + 1];
569             aj[s]     = iatoms[iu + 2];
570
571             if (i + s * nfa1 < nbonds)
572             {
573                 coeff[0 * pack_size + s] = iparams[itype].lj14.c6A;
574                 coeff[1 * pack_size + s] = iparams[itype].lj14.c12A;
575                 coeff[2 * pack_size + s] = md->chargeA[ai[s]] * md->chargeA[aj[s]];
576
577                 /* Avoid indexing the iatoms array out of bounds.
578                  * We pad the coordinate indices with the last atom pair.
579                  */
580                 if (iu + nfa1 < nbonds)
581                 {
582                     iu += nfa1;
583                 }
584             }
585             else
586             {
587                 /* Pad the coefficient arrays with zeros to get zero forces */
588                 coeff[0 * pack_size + s] = 0;
589                 coeff[1 * pack_size + s] = 0;
590                 coeff[2 * pack_size + s] = 0;
591             }
592         }
593
594         /* Load the coordinates */
595         T xi[DIM], xj[DIM];
596         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ai, &xi[XX], &xi[YY], &xi[ZZ]);
597         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), aj, &xj[XX], &xj[YY], &xj[ZZ]);
598
599         T c6  = load<T>(coeff + 0 * pack_size);
600         T c12 = load<T>(coeff + 1 * pack_size);
601         T qq  = load<T>(coeff + 2 * pack_size);
602
603         /* We could save these operations by storing 6*C6,12*C12 */
604         c6  = six * c6;
605         c12 = twelve * c12;
606
607         T dr[DIM];
608         pbc_dx_aiuc(pbc, xi, xj, dr);
609
610         T rsq   = dr[XX] * dr[XX] + dr[YY] * dr[YY] + dr[ZZ] * dr[ZZ];
611         T rinv  = gmx::invsqrt(rsq);
612         T rinv2 = rinv * rinv;
613         T rinv6 = rinv2 * rinv2 * rinv2;
614
615         /* Calculate the Coulomb force * r */
616         T cfr = ef * qq * rinv;
617
618         /* Calculate the LJ force * r and add it to the Coulomb part */
619         T fr = gmx::fma(fms(c12, rinv6, c6), rinv6, cfr);
620
621         T finvr = fr * rinv2;
622         T fx    = finvr * dr[XX];
623         T fy    = finvr * dr[YY];
624         T fz    = finvr * dr[ZZ];
625
626         /* Add the pair forces to the force array.
627          * Note that here we might add multiple force components for some atoms
628          * due to the SIMD padding. But the extra force components are zero.
629          */
630         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ai, fx, fy, fz);
631         transposeScatterDecrU<4>(reinterpret_cast<real*>(f), aj, fx, fy, fz);
632     }
633 }
634
635 /*! \brief Calculate all listed pair interactions */
636 void do_pairs(int                      ftype,
637               int                      nbonds,
638               const t_iatom            iatoms[],
639               const t_iparams          iparams[],
640               const rvec               x[],
641               rvec4                    f[],
642               rvec                     fshift[],
643               const struct t_pbc*      pbc,
644               const real*              lambda,
645               real*                    dvdl,
646               const t_mdatoms*         md,
647               const t_forcerec*        fr,
648               const bool               havePerturbedInteractions,
649               const gmx::StepWorkload& stepWork,
650               gmx_grppairener_t*       grppener,
651               int*                     global_atom_index)
652 {
653     if (ftype == F_LJ14 && fr->ic->vdwtype != evdwUSER && !EEL_USER(fr->ic->eeltype)
654         && !havePerturbedInteractions && (!stepWork.computeVirial && !stepWork.computeEnergy))
655     {
656         /* We use a fast code-path for plain LJ 1-4 without FEP.
657          *
658          * TODO: Add support for energies (straightforward) and virial
659          * in the SIMD template. For the virial it's inconvenient to store
660          * the force sums for the shifts and we should directly calculate
661          * and sum the virial for the shifts. But we should do this
662          * at once for the angles and dihedrals as well.
663          */
664 #if GMX_SIMD_HAVE_REAL
665         if (fr->use_simd_kernels)
666         {
667             alignas(GMX_SIMD_ALIGNMENT) real pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
668             set_pbc_simd(pbc, pbc_simd);
669
670             do_pairs_simple<SimdReal, GMX_SIMD_REAL_WIDTH, const real*>(
671                     nbonds, iatoms, iparams, x, f, pbc_simd, md, fr->ic->epsfac * fr->fudgeQQ);
672         }
673         else
674 #endif
675         {
676             /* This construct is needed because pbc_dx_aiuc doesn't accept pbc=NULL */
677             t_pbc        pbc_no;
678             const t_pbc* pbc_nonnull;
679
680             if (pbc != nullptr)
681             {
682                 pbc_nonnull = pbc;
683             }
684             else
685             {
686                 set_pbc(&pbc_no, PbcType::No, nullptr);
687                 pbc_nonnull = &pbc_no;
688             }
689
690             do_pairs_simple<real, 1, const t_pbc*>(nbonds, iatoms, iparams, x, f, pbc_nonnull, md,
691                                                    fr->ic->epsfac * fr->fudgeQQ);
692         }
693     }
694     else if (stepWork.computeVirial)
695     {
696         do_pairs_general<BondedKernelFlavor::ForcesAndVirialAndEnergy>(
697                 ftype, nbonds, iatoms, iparams, x, f, fshift, pbc, lambda, dvdl, md, fr, grppener,
698                 global_atom_index);
699     }
700     else
701     {
702         do_pairs_general<BondedKernelFlavor::ForcesAndEnergy>(ftype, nbonds, iatoms, iparams, x, f,
703                                                               fshift, pbc, lambda, dvdl, md, fr,
704                                                               grppener, global_atom_index);
705     }
706 }