Move t_forcetable and table enums to forcetable header
[alexxy/gromacs.git] / src / gromacs / listed_forces / pairs.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
5  * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7  * and including many others, as listed in the AUTHORS file in the
8  * top-level source directory and at http://www.gromacs.org.
9  *
10  * GROMACS is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public License
12  * as published by the Free Software Foundation; either version 2.1
13  * of the License, or (at your option) any later version.
14  *
15  * GROMACS is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with GROMACS; if not, see
22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
24  *
25  * If you want to redistribute modifications to GROMACS, please
26  * consider that scientific software is very special. Version
27  * control is crucial - bugs must be traceable. We will be happy to
28  * consider code for inclusion in the official distribution, but
29  * derived work must not be called official GROMACS. Details are found
30  * in the README & COPYING files - if they are missing, get the
31  * official version at http://www.gromacs.org.
32  *
33  * To help us fund GROMACS development, we humbly ask that you cite
34  * the research papers on the package. Check out http://www.gromacs.org.
35  */
36 /*! \internal \file
37  *
38  * \brief This file defines functions for "pair" interactions
39  * (i.e. listed non-bonded interactions, e.g. 1-4 interactions)
40  *
41  * \author Mark Abraham <mark.j.abraham@gmail.com>
42  *
43  * \ingroup module_listed_forces
44  */
45 #include "gmxpre.h"
46
47 #include "pairs.h"
48
49 #include <cmath>
50
51 #include "gromacs/listed_forces/bonded.h"
52 #include "gromacs/math/functions.h"
53 #include "gromacs/math/vec.h"
54 #include "gromacs/mdtypes/forcerec.h"
55 #include "gromacs/mdtypes/group.h"
56 #include "gromacs/mdtypes/interaction_const.h"
57 #include "gromacs/mdtypes/md_enums.h"
58 #include "gromacs/mdtypes/nblist.h"
59 #include "gromacs/mdtypes/simulation_workload.h"
60 #include "gromacs/pbcutil/ishift.h"
61 #include "gromacs/pbcutil/pbc.h"
62 #include "gromacs/pbcutil/pbc_simd.h"
63 #include "gromacs/simd/simd.h"
64 #include "gromacs/simd/simd_math.h"
65 #include "gromacs/simd/vector_operations.h"
66 #include "gromacs/tables/forcetable.h"
67 #include "gromacs/utility/basedefinitions.h"
68 #include "gromacs/utility/fatalerror.h"
69 #include "gromacs/utility/gmxassert.h"
70
71 #include "listed_internal.h"
72
73 using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
74
75 /*! \brief Issue a warning if a listed interaction is beyond a table limit */
76 static void warning_rlimit(const rvec* x, int ai, int aj, int* global_atom_index, real r, real rlimit)
77 {
78     gmx_warning(
79             "Listed nonbonded interaction between particles %d and %d\n"
80             "at distance %.3f which is larger than the table limit %.3f nm.\n\n"
81             "This is likely either a 1,4 interaction, or a listed interaction inside\n"
82             "a smaller molecule you are decoupling during a free energy calculation.\n"
83             "Since interactions at distances beyond the table cannot be computed,\n"
84             "they are skipped until they are inside the table limit again. You will\n"
85             "only see this message once, even if it occurs for several interactions.\n\n"
86             "IMPORTANT: This should not happen in a stable simulation, so there is\n"
87             "probably something wrong with your system. Only change the table-extension\n"
88             "distance in the mdp file if you are really sure that is the reason.\n",
89             glatnr(global_atom_index, ai),
90             glatnr(global_atom_index, aj),
91             r,
92             rlimit);
93
94     if (debug)
95     {
96         fprintf(debug,
97                 "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. "
98                 "Ignored\n",
99                 x[ai][XX],
100                 x[ai][YY],
101                 x[ai][ZZ],
102                 x[aj][XX],
103                 x[aj][YY],
104                 x[aj][ZZ],
105                 glatnr(global_atom_index, ai),
106                 glatnr(global_atom_index, aj),
107                 r);
108     }
109 }
110
111 /*! \brief Compute the energy and force for a single pair interaction */
112 static real evaluate_single(real        r2,
113                             real        tabscale,
114                             const real* vftab,
115                             real        tableStride,
116                             real        qq,
117                             real        c6,
118                             real        c12,
119                             real*       velec,
120                             real*       vvdw)
121 {
122     real rinv, r, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VVe, FFe, VVd, FFd, VVr, FFr, fscal;
123     int  ntab;
124
125     /* Do the tabulated interactions - first table lookup */
126     rinv = gmx::invsqrt(r2);
127     r    = r2 * rinv;
128     rtab = r * tabscale;
129     ntab = static_cast<int>(rtab);
130     eps  = rtab - ntab;
131     eps2 = eps * eps;
132     ntab = static_cast<int>(tableStride * ntab);
133     /* Electrostatics */
134     Y     = vftab[ntab];
135     F     = vftab[ntab + 1];
136     Geps  = eps * vftab[ntab + 2];
137     Heps2 = eps2 * vftab[ntab + 3];
138     Fp    = F + Geps + Heps2;
139     VVe   = Y + eps * Fp;
140     FFe   = Fp + Geps + 2.0 * Heps2;
141     /* Dispersion */
142     Y     = vftab[ntab + 4];
143     F     = vftab[ntab + 5];
144     Geps  = eps * vftab[ntab + 6];
145     Heps2 = eps2 * vftab[ntab + 7];
146     Fp    = F + Geps + Heps2;
147     VVd   = Y + eps * Fp;
148     FFd   = Fp + Geps + 2.0 * Heps2;
149     /* Repulsion */
150     Y     = vftab[ntab + 8];
151     F     = vftab[ntab + 9];
152     Geps  = eps * vftab[ntab + 10];
153     Heps2 = eps2 * vftab[ntab + 11];
154     Fp    = F + Geps + Heps2;
155     VVr   = Y + eps * Fp;
156     FFr   = Fp + Geps + 2.0 * Heps2;
157
158     *velec = qq * VVe;
159     *vvdw  = c6 * VVd + c12 * VVr;
160
161     fscal = -(qq * FFe + c6 * FFd + c12 * FFr) * tabscale * rinv;
162
163     return fscal;
164 }
165
166 static inline real sixthRoot(const real r)
167 {
168     return gmx::invsqrt(std::cbrt(r));
169 }
170
171 /*! \brief Compute the energy and force for a single pair interaction under FEP */
172 static real free_energy_evaluate_single(real                                           r2,
173                                         const interaction_const_t::SoftCoreParameters& scParams,
174                                         real                                           tabscale,
175                                         const real*                                    vftab,
176                                         real                                           tableStride,
177                                         real                                           qqA,
178                                         real                                           c6A,
179                                         real                                           c12A,
180                                         real                                           qqB,
181                                         real                                           c6B,
182                                         real                                           c12B,
183                                         const real                                     LFC[2],
184                                         const real                                     LFV[2],
185                                         const real                                     DLF[2],
186                                         const real                                     lfac_coul[2],
187                                         const real                                     lfac_vdw[2],
188                                         const real dlfac_coul[2],
189                                         const real dlfac_vdw[2],
190                                         real*      velectot,
191                                         real*      vvdwtot,
192                                         real*      dvdl)
193 {
194     real       rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VV, FF, fscal;
195     real       qq[2], c6[2], c12[2], sigma6[2], sigma_pow[2];
196     real       alpha_coul_eff, alpha_vdw_eff, dvdl_coul, dvdl_vdw;
197     real       rpinv, r_coul, r_vdw, velecsum, vvdwsum;
198     real       fscal_vdw[2], fscal_elec[2];
199     real       velec[2], vvdw[2];
200     int        i, ntab;
201     const real half = 0.5_real;
202     const real one  = 1.0_real;
203     const real two  = 2.0_real;
204
205     qq[0]  = qqA;
206     qq[1]  = qqB;
207     c6[0]  = c6A;
208     c6[1]  = c6B;
209     c12[0] = c12A;
210     c12[1] = c12B;
211
212     const real rpm2 = r2 * r2;   /* r4 */
213     const real rp   = rpm2 * r2; /* r6 */
214
215     /* Loop over state A(0) and B(1) */
216     for (i = 0; i < 2; i++)
217     {
218         if ((c6[i] > 0) && (c12[i] > 0))
219         {
220             /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
221              * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
222              */
223             sigma6[i] = half * c12[i] / c6[i];
224             if (sigma6[i] < scParams.sigma6Minimum) /* for disappearing coul and vdw with soft core at the same time */
225             {
226                 sigma6[i] = scParams.sigma6Minimum;
227             }
228         }
229         else
230         {
231             sigma6[i] = scParams.sigma6WithInvalidSigma;
232         }
233         sigma_pow[i] = sigma6[i];
234     }
235
236     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
237     if ((c12[0] > 0) && (c12[1] > 0))
238     {
239         alpha_vdw_eff  = 0;
240         alpha_coul_eff = 0;
241     }
242     else
243     {
244         alpha_vdw_eff  = scParams.alphaVdw;
245         alpha_coul_eff = scParams.alphaCoulomb;
246     }
247
248     /* Loop over A and B states again */
249     for (i = 0; i < 2; i++)
250     {
251         fscal_elec[i] = 0;
252         fscal_vdw[i]  = 0;
253         velec[i]      = 0;
254         vvdw[i]       = 0;
255
256         /* Only spend time on A or B state if it is non-zero */
257         if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
258         {
259             /* Coulomb */
260             rpinv  = one / (alpha_coul_eff * lfac_coul[i] * sigma_pow[i] + rp);
261             r_coul = sixthRoot(rpinv);
262
263             /* Electrostatics table lookup data */
264             rtab = r_coul * tabscale;
265             ntab = static_cast<int>(rtab);
266             eps  = rtab - ntab;
267             eps2 = eps * eps;
268             ntab = static_cast<int>(tableStride * ntab);
269             /* Electrostatics */
270             Y             = vftab[ntab];
271             F             = vftab[ntab + 1];
272             Geps          = eps * vftab[ntab + 2];
273             Heps2         = eps2 * vftab[ntab + 3];
274             Fp            = F + Geps + Heps2;
275             VV            = Y + eps * Fp;
276             FF            = Fp + Geps + two * Heps2;
277             velec[i]      = qq[i] * VV;
278             fscal_elec[i] = -qq[i] * FF * r_coul * rpinv * tabscale;
279
280             /* Vdw */
281             rpinv = one / (alpha_vdw_eff * lfac_vdw[i] * sigma_pow[i] + rp);
282             r_vdw = sixthRoot(rpinv);
283             /* Vdw table lookup data */
284             rtab = r_vdw * tabscale;
285             ntab = static_cast<int>(rtab);
286             eps  = rtab - ntab;
287             eps2 = eps * eps;
288             ntab = 12 * ntab;
289             /* Dispersion */
290             Y            = vftab[ntab + 4];
291             F            = vftab[ntab + 5];
292             Geps         = eps * vftab[ntab + 6];
293             Heps2        = eps2 * vftab[ntab + 7];
294             Fp           = F + Geps + Heps2;
295             VV           = Y + eps * Fp;
296             FF           = Fp + Geps + two * Heps2;
297             vvdw[i]      = c6[i] * VV;
298             fscal_vdw[i] = -c6[i] * FF;
299
300             /* Repulsion */
301             Y     = vftab[ntab + 8];
302             F     = vftab[ntab + 9];
303             Geps  = eps * vftab[ntab + 10];
304             Heps2 = eps2 * vftab[ntab + 11];
305             Fp    = F + Geps + Heps2;
306             VV    = Y + eps * Fp;
307             FF    = Fp + Geps + two * Heps2;
308             vvdw[i] += c12[i] * VV;
309             fscal_vdw[i] -= c12[i] * FF;
310             fscal_vdw[i] *= r_vdw * rpinv * tabscale;
311         }
312     }
313     /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
314     /* Assemble A and B states */
315     velecsum  = 0;
316     vvdwsum   = 0;
317     dvdl_coul = 0;
318     dvdl_vdw  = 0;
319     fscal     = 0;
320     for (i = 0; i < 2; i++)
321     {
322         velecsum += LFC[i] * velec[i];
323         vvdwsum += LFV[i] * vvdw[i];
324
325         fscal += (LFC[i] * fscal_elec[i] + LFV[i] * fscal_vdw[i]) * rpm2;
326
327         dvdl_coul += velec[i] * DLF[i]
328                      + LFC[i] * alpha_coul_eff * dlfac_coul[i] * fscal_elec[i] * sigma_pow[i];
329         dvdl_vdw += vvdw[i] * DLF[i]
330                     + LFV[i] * alpha_vdw_eff * dlfac_vdw[i] * fscal_vdw[i] * sigma_pow[i];
331     }
332
333     dvdl[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)] += dvdl_coul;
334     dvdl[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)] += dvdl_vdw;
335
336     *velectot = velecsum;
337     *vvdwtot  = vvdwsum;
338
339     return fscal;
340 }
341
342 /*! \brief Calculate pair interactions, supports all types and conditions. */
343 template<BondedKernelFlavor flavor>
344 static real do_pairs_general(int                           ftype,
345                              int                           nbonds,
346                              const t_iatom                 iatoms[],
347                              const t_iparams               iparams[],
348                              const rvec                    x[],
349                              rvec4                         f[],
350                              rvec                          fshift[],
351                              const struct t_pbc*           pbc,
352                              const real*                   lambda,
353                              real*                         dvdl,
354                              gmx::ArrayRef<real>           chargeA,
355                              gmx::ArrayRef<real>           chargeB,
356                              gmx::ArrayRef<bool>           atomIsPerturbed,
357                              gmx::ArrayRef<unsigned short> cENER,
358                              int                           numEnergyGroups,
359                              const t_forcerec*             fr,
360                              gmx_grppairener_t*            grppener,
361                              int*                          global_atom_index)
362 {
363     real            qq, c6, c12;
364     rvec            dx;
365     int             i, itype, ai, aj, gid;
366     int             fshift_index;
367     real            r2;
368     real            fscal, velec, vvdw;
369     real*           energygrp_elec;
370     real*           energygrp_vdw;
371     static gmx_bool warned_rlimit = FALSE;
372     /* Free energy stuff */
373     gmx_bool   bFreeEnergy;
374     real       LFC[2], LFV[2], DLF[2], lfac_coul[2], lfac_vdw[2], dlfac_coul[2], dlfac_vdw[2];
375     real       qqB, c6B, c12B;
376     const real oneSixth = 1.0_real / 6.0_real;
377
378     switch (ftype)
379     {
380         case F_LJ14:
381         case F_LJC14_Q:
382             energygrp_elec = grppener->energyGroupPairTerms[NonBondedEnergyTerms::Coulomb14].data();
383             energygrp_vdw  = grppener->energyGroupPairTerms[NonBondedEnergyTerms::LJ14].data();
384             break;
385         case F_LJC_PAIRS_NB:
386             energygrp_elec = grppener->energyGroupPairTerms[NonBondedEnergyTerms::CoulombSR].data();
387             energygrp_vdw  = grppener->energyGroupPairTerms[NonBondedEnergyTerms::LJSR].data();
388             break;
389         default:
390             energygrp_elec = nullptr; /* Keep compiler happy */
391             energygrp_vdw  = nullptr; /* Keep compiler happy */
392             gmx_fatal(FARGS, "Unknown function type %d in do_nonbonded14", ftype);
393     }
394
395     if (fr->efep != FreeEnergyPerturbationType::No)
396     {
397         /* Lambda factor for state A=1-lambda and B=lambda */
398         LFC[0] = 1.0 - lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)];
399         LFV[0] = 1.0 - lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)];
400         LFC[1] = lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)];
401         LFV[1] = lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)];
402
403         /*derivative of the lambda factor for state A and B */
404         DLF[0] = -1;
405         DLF[1] = 1;
406
407         GMX_ASSERT(fr->ic->softCoreParameters, "We need soft-core parameters");
408         const auto& scParams = *fr->ic->softCoreParameters;
409
410         for (i = 0; i < 2; i++)
411         {
412             lfac_coul[i] = (scParams.lambdaPower == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
413             dlfac_coul[i] = DLF[i] * scParams.lambdaPower * oneSixth
414                             * (scParams.lambdaPower == 2 ? (1 - LFC[i]) : 1);
415             lfac_vdw[i]  = (scParams.lambdaPower == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
416             dlfac_vdw[i] = DLF[i] * scParams.lambdaPower * oneSixth
417                            * (scParams.lambdaPower == 2 ? (1 - LFV[i]) : 1);
418         }
419     }
420
421     /* TODO This code depends on the logic in tables.c that constructs
422        the table layout, which should be made explicit in future
423        cleanup. */
424     GMX_ASSERT(
425             etiNR == 3,
426             "Pair-interaction code that uses GROMACS interaction tables supports exactly 3 tables");
427     GMX_ASSERT(
428             fr->pairsTable->interaction == TableInteraction::ElectrostaticVdwRepulsionVdwDispersion,
429             "Pair interaction kernels need a table with Coulomb, repulsion and dispersion entries");
430
431     const real epsfac = fr->ic->epsfac;
432
433     bFreeEnergy = FALSE;
434     for (i = 0; (i < nbonds);)
435     {
436         itype = iatoms[i++];
437         ai    = iatoms[i++];
438         aj    = iatoms[i++];
439         gid   = GID(cENER[ai], cENER[aj], numEnergyGroups);
440
441         /* Get parameters */
442         switch (ftype)
443         {
444             case F_LJ14:
445                 bFreeEnergy =
446                         (fr->efep != FreeEnergyPerturbationType::No
447                          && ((!atomIsPerturbed.empty() && (atomIsPerturbed[ai] || atomIsPerturbed[aj]))
448                              || iparams[itype].lj14.c6A != iparams[itype].lj14.c6B
449                              || iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
450                 qq  = chargeA[ai] * chargeA[aj] * epsfac * fr->fudgeQQ;
451                 c6  = iparams[itype].lj14.c6A;
452                 c12 = iparams[itype].lj14.c12A;
453                 break;
454             case F_LJC14_Q:
455                 qq = iparams[itype].ljc14.qi * iparams[itype].ljc14.qj * epsfac
456                      * iparams[itype].ljc14.fqq;
457                 c6  = iparams[itype].ljc14.c6;
458                 c12 = iparams[itype].ljc14.c12;
459                 break;
460             case F_LJC_PAIRS_NB:
461                 qq  = iparams[itype].ljcnb.qi * iparams[itype].ljcnb.qj * epsfac;
462                 c6  = iparams[itype].ljcnb.c6;
463                 c12 = iparams[itype].ljcnb.c12;
464                 break;
465             default:
466                 /* Cannot happen since we called gmx_fatal() above in this case */
467                 qq = c6 = c12 = 0; /* Keep compiler happy */
468                 break;
469         }
470
471         /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
472          * included in the general nfbp array now. This means the tables are scaled down by the
473          * same factor, so when we use the original c6/c12 parameters from iparams[] they must
474          * be scaled up.
475          */
476         c6 *= 6.0;
477         c12 *= 12.0;
478
479         /* Do we need to apply full periodic boundary conditions? */
480         if (fr->bMolPBC)
481         {
482             fshift_index = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
483         }
484         else
485         {
486             fshift_index = c_centralShiftIndex;
487             rvec_sub(x[ai], x[aj], dx);
488         }
489         r2 = norm2(dx);
490
491         if (r2 >= fr->pairsTable->interactionRange * fr->pairsTable->interactionRange)
492         {
493             /* This check isn't race free. But it doesn't matter because if a race occurs the only
494              * disadvantage is that the warning is printed twice */
495             if (!warned_rlimit)
496             {
497                 warning_rlimit(x, ai, aj, global_atom_index, sqrt(r2), fr->pairsTable->interactionRange);
498                 warned_rlimit = TRUE;
499             }
500             continue;
501         }
502
503         if (bFreeEnergy)
504         {
505             /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
506             qqB  = chargeB[ai] * chargeB[aj] * epsfac * fr->fudgeQQ;
507             c6B  = iparams[itype].lj14.c6B * 6.0;
508             c12B = iparams[itype].lj14.c12B * 12.0;
509
510             fscal = free_energy_evaluate_single(r2,
511                                                 *fr->ic->softCoreParameters,
512                                                 fr->pairsTable->scale,
513                                                 fr->pairsTable->data.data(),
514                                                 fr->pairsTable->stride,
515                                                 qq,
516                                                 c6,
517                                                 c12,
518                                                 qqB,
519                                                 c6B,
520                                                 c12B,
521                                                 LFC,
522                                                 LFV,
523                                                 DLF,
524                                                 lfac_coul,
525                                                 lfac_vdw,
526                                                 dlfac_coul,
527                                                 dlfac_vdw,
528                                                 &velec,
529                                                 &vvdw,
530                                                 dvdl);
531         }
532         else
533         {
534             /* Evaluate tabulated interaction without free energy */
535             fscal = evaluate_single(r2,
536                                     fr->pairsTable->scale,
537                                     fr->pairsTable->data.data(),
538                                     fr->pairsTable->stride,
539                                     qq,
540                                     c6,
541                                     c12,
542                                     &velec,
543                                     &vvdw);
544         }
545
546         energygrp_elec[gid] += velec;
547         energygrp_vdw[gid] += vvdw;
548         svmul(fscal, dx, dx);
549
550         /* Add the forces */
551         rvec_inc(f[ai], dx);
552         rvec_dec(f[aj], dx);
553
554         if (computeVirial(flavor))
555         {
556             if (fshift_index != c_centralShiftIndex)
557             {
558                 rvec_inc(fshift[fshift_index], dx);
559                 rvec_dec(fshift[c_centralShiftIndex], dx);
560             }
561         }
562     }
563     return 0.0;
564 }
565
566 /*! \brief Calculate pairs, only for plain-LJ + plain Coulomb normal type.
567  *
568  * This function is templated for real/SimdReal and for optimization.
569  */
570 template<typename T, int pack_size, typename pbc_type>
571 static void do_pairs_simple(int                 nbonds,
572                             const t_iatom       iatoms[],
573                             const t_iparams     iparams[],
574                             const rvec          x[],
575                             rvec4               f[],
576                             const pbc_type      pbc,
577                             gmx::ArrayRef<real> charge,
578                             const real          scale_factor)
579 {
580     const int nfa1 = 1 + 2;
581
582     T six(6);
583     T twelve(12);
584     T ef(scale_factor);
585
586 #if GMX_SIMD_HAVE_REAL
587     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[pack_size];
588     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[pack_size];
589     alignas(GMX_SIMD_ALIGNMENT) real         coeff[3 * pack_size];
590 #else
591     std::int32_t ai[pack_size];
592     std::int32_t aj[pack_size];
593     real         coeff[3 * pack_size];
594 #endif
595
596     /* nbonds is #pairs*nfa1, here we step pack_size pairs */
597     for (int i = 0; i < nbonds; i += pack_size * nfa1)
598     {
599         /* Collect atoms for pack_size pairs.
600          * iu indexes into iatoms, we should not let iu go beyond nbonds.
601          */
602         int iu = i;
603         for (int s = 0; s < pack_size; s++)
604         {
605             int itype = iatoms[iu];
606             ai[s]     = iatoms[iu + 1];
607             aj[s]     = iatoms[iu + 2];
608
609             if (i + s * nfa1 < nbonds)
610             {
611                 coeff[0 * pack_size + s] = iparams[itype].lj14.c6A;
612                 coeff[1 * pack_size + s] = iparams[itype].lj14.c12A;
613                 coeff[2 * pack_size + s] = charge[ai[s]] * charge[aj[s]];
614
615                 /* Avoid indexing the iatoms array out of bounds.
616                  * We pad the coordinate indices with the last atom pair.
617                  */
618                 if (iu + nfa1 < nbonds)
619                 {
620                     iu += nfa1;
621                 }
622             }
623             else
624             {
625                 /* Pad the coefficient arrays with zeros to get zero forces */
626                 coeff[0 * pack_size + s] = 0;
627                 coeff[1 * pack_size + s] = 0;
628                 coeff[2 * pack_size + s] = 0;
629             }
630         }
631
632         /* Load the coordinates */
633         T xi[DIM], xj[DIM];
634         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ai, &xi[XX], &xi[YY], &xi[ZZ]);
635         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), aj, &xj[XX], &xj[YY], &xj[ZZ]);
636
637         T c6  = load<T>(coeff + 0 * pack_size);
638         T c12 = load<T>(coeff + 1 * pack_size);
639         T qq  = load<T>(coeff + 2 * pack_size);
640
641         /* We could save these operations by storing 6*C6,12*C12 */
642         c6  = six * c6;
643         c12 = twelve * c12;
644
645         T dr[DIM];
646         pbc_dx_aiuc(pbc, xi, xj, dr);
647
648         T rsq   = dr[XX] * dr[XX] + dr[YY] * dr[YY] + dr[ZZ] * dr[ZZ];
649         T rinv  = gmx::invsqrt(rsq);
650         T rinv2 = rinv * rinv;
651         T rinv6 = rinv2 * rinv2 * rinv2;
652
653         /* Calculate the Coulomb force * r */
654         T cfr = ef * qq * rinv;
655
656         /* Calculate the LJ force * r and add it to the Coulomb part */
657         T fr = gmx::fma(fms(c12, rinv6, c6), rinv6, cfr);
658
659         T finvr = fr * rinv2;
660         T fx    = finvr * dr[XX];
661         T fy    = finvr * dr[YY];
662         T fz    = finvr * dr[ZZ];
663
664         /* Add the pair forces to the force array.
665          * Note that here we might add multiple force components for some atoms
666          * due to the SIMD padding. But the extra force components are zero.
667          */
668         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ai, fx, fy, fz);
669         transposeScatterDecrU<4>(reinterpret_cast<real*>(f), aj, fx, fy, fz);
670     }
671 }
672
673 /*! \brief Calculate all listed pair interactions */
674 void do_pairs(int                           ftype,
675               int                           nbonds,
676               const t_iatom                 iatoms[],
677               const t_iparams               iparams[],
678               const rvec                    x[],
679               rvec4                         f[],
680               rvec                          fshift[],
681               const struct t_pbc*           pbc,
682               const real*                   lambda,
683               real*                         dvdl,
684               gmx::ArrayRef<real>           chargeA,
685               gmx::ArrayRef<real>           chargeB,
686               gmx::ArrayRef<bool>           atomIsPerturbed,
687               gmx::ArrayRef<unsigned short> cENER,
688               const int                     numEnergyGroups,
689               const t_forcerec*             fr,
690               const bool                    havePerturbedInteractions,
691               const gmx::StepWorkload&      stepWork,
692               gmx_grppairener_t*            grppener,
693               int*                          global_atom_index)
694 {
695     if (ftype == F_LJ14 && fr->ic->vdwtype != VanDerWaalsType::User && !EEL_USER(fr->ic->eeltype)
696         && !havePerturbedInteractions && (!stepWork.computeVirial && !stepWork.computeEnergy))
697     {
698         /* We use a fast code-path for plain LJ 1-4 without FEP.
699          *
700          * TODO: Add support for energies (straightforward) and virial
701          * in the SIMD template. For the virial it's inconvenient to store
702          * the force sums for the shifts and we should directly calculate
703          * and sum the virial for the shifts. But we should do this
704          * at once for the angles and dihedrals as well.
705          */
706 #if GMX_SIMD_HAVE_REAL
707         if (fr->use_simd_kernels)
708         {
709             alignas(GMX_SIMD_ALIGNMENT) real pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
710             set_pbc_simd(pbc, pbc_simd);
711
712             do_pairs_simple<SimdReal, GMX_SIMD_REAL_WIDTH, const real*>(
713                     nbonds, iatoms, iparams, x, f, pbc_simd, chargeA, fr->ic->epsfac * fr->fudgeQQ);
714         }
715         else
716 #endif
717         {
718             /* This construct is needed because pbc_dx_aiuc doesn't accept pbc=NULL */
719             t_pbc        pbc_no;
720             const t_pbc* pbc_nonnull;
721
722             if (pbc != nullptr)
723             {
724                 pbc_nonnull = pbc;
725             }
726             else
727             {
728                 set_pbc(&pbc_no, PbcType::No, nullptr);
729                 pbc_nonnull = &pbc_no;
730             }
731
732             do_pairs_simple<real, 1, const t_pbc*>(
733                     nbonds, iatoms, iparams, x, f, pbc_nonnull, chargeA, fr->ic->epsfac * fr->fudgeQQ);
734         }
735     }
736     else if (stepWork.computeVirial)
737     {
738         do_pairs_general<BondedKernelFlavor::ForcesAndVirialAndEnergy>(ftype,
739                                                                        nbonds,
740                                                                        iatoms,
741                                                                        iparams,
742                                                                        x,
743                                                                        f,
744                                                                        fshift,
745                                                                        pbc,
746                                                                        lambda,
747                                                                        dvdl,
748                                                                        chargeA,
749                                                                        chargeB,
750                                                                        atomIsPerturbed,
751                                                                        cENER,
752                                                                        numEnergyGroups,
753                                                                        fr,
754                                                                        grppener,
755                                                                        global_atom_index);
756     }
757     else
758     {
759         do_pairs_general<BondedKernelFlavor::ForcesAndEnergy>(ftype,
760                                                               nbonds,
761                                                               iatoms,
762                                                               iparams,
763                                                               x,
764                                                               f,
765                                                               fshift,
766                                                               pbc,
767                                                               lambda,
768                                                               dvdl,
769                                                               chargeA,
770                                                               chargeB,
771                                                               atomIsPerturbed,
772                                                               cENER,
773                                                               numEnergyGroups,
774                                                               fr,
775                                                               grppener,
776                                                               global_atom_index);
777     }
778 }