Replace defines with constexpr in ishift
[alexxy/gromacs.git] / src / gromacs / listed_forces / pairs.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
5  * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7  * and including many others, as listed in the AUTHORS file in the
8  * top-level source directory and at http://www.gromacs.org.
9  *
10  * GROMACS is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public License
12  * as published by the Free Software Foundation; either version 2.1
13  * of the License, or (at your option) any later version.
14  *
15  * GROMACS is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with GROMACS; if not, see
22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
24  *
25  * If you want to redistribute modifications to GROMACS, please
26  * consider that scientific software is very special. Version
27  * control is crucial - bugs must be traceable. We will be happy to
28  * consider code for inclusion in the official distribution, but
29  * derived work must not be called official GROMACS. Details are found
30  * in the README & COPYING files - if they are missing, get the
31  * official version at http://www.gromacs.org.
32  *
33  * To help us fund GROMACS development, we humbly ask that you cite
34  * the research papers on the package. Check out http://www.gromacs.org.
35  */
36 /*! \internal \file
37  *
38  * \brief This file defines functions for "pair" interactions
39  * (i.e. listed non-bonded interactions, e.g. 1-4 interactions)
40  *
41  * \author Mark Abraham <mark.j.abraham@gmail.com>
42  *
43  * \ingroup module_listed_forces
44  */
45 #include "gmxpre.h"
46
47 #include "pairs.h"
48
49 #include <cmath>
50
51 #include "gromacs/listed_forces/bonded.h"
52 #include "gromacs/math/functions.h"
53 #include "gromacs/math/vec.h"
54 #include "gromacs/mdtypes/forcerec.h"
55 #include "gromacs/mdtypes/group.h"
56 #include "gromacs/mdtypes/interaction_const.h"
57 #include "gromacs/mdtypes/mdatom.h"
58 #include "gromacs/mdtypes/md_enums.h"
59 #include "gromacs/mdtypes/nblist.h"
60 #include "gromacs/mdtypes/simulation_workload.h"
61 #include "gromacs/pbcutil/ishift.h"
62 #include "gromacs/pbcutil/pbc.h"
63 #include "gromacs/pbcutil/pbc_simd.h"
64 #include "gromacs/simd/simd.h"
65 #include "gromacs/simd/simd_math.h"
66 #include "gromacs/simd/vector_operations.h"
67 #include "gromacs/tables/forcetable.h"
68 #include "gromacs/utility/basedefinitions.h"
69 #include "gromacs/utility/fatalerror.h"
70 #include "gromacs/utility/gmxassert.h"
71
72 #include "listed_internal.h"
73
74 using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
75
76 /*! \brief Issue a warning if a listed interaction is beyond a table limit */
77 static void warning_rlimit(const rvec* x, int ai, int aj, int* global_atom_index, real r, real rlimit)
78 {
79     gmx_warning(
80             "Listed nonbonded interaction between particles %d and %d\n"
81             "at distance %.3f which is larger than the table limit %.3f nm.\n\n"
82             "This is likely either a 1,4 interaction, or a listed interaction inside\n"
83             "a smaller molecule you are decoupling during a free energy calculation.\n"
84             "Since interactions at distances beyond the table cannot be computed,\n"
85             "they are skipped until they are inside the table limit again. You will\n"
86             "only see this message once, even if it occurs for several interactions.\n\n"
87             "IMPORTANT: This should not happen in a stable simulation, so there is\n"
88             "probably something wrong with your system. Only change the table-extension\n"
89             "distance in the mdp file if you are really sure that is the reason.\n",
90             glatnr(global_atom_index, ai),
91             glatnr(global_atom_index, aj),
92             r,
93             rlimit);
94
95     if (debug)
96     {
97         fprintf(debug,
98                 "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. "
99                 "Ignored\n",
100                 x[ai][XX],
101                 x[ai][YY],
102                 x[ai][ZZ],
103                 x[aj][XX],
104                 x[aj][YY],
105                 x[aj][ZZ],
106                 glatnr(global_atom_index, ai),
107                 glatnr(global_atom_index, aj),
108                 r);
109     }
110 }
111
112 /*! \brief Compute the energy and force for a single pair interaction */
113 static real evaluate_single(real        r2,
114                             real        tabscale,
115                             const real* vftab,
116                             real        tableStride,
117                             real        qq,
118                             real        c6,
119                             real        c12,
120                             real*       velec,
121                             real*       vvdw)
122 {
123     real rinv, r, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VVe, FFe, VVd, FFd, VVr, FFr, fscal;
124     int  ntab;
125
126     /* Do the tabulated interactions - first table lookup */
127     rinv = gmx::invsqrt(r2);
128     r    = r2 * rinv;
129     rtab = r * tabscale;
130     ntab = static_cast<int>(rtab);
131     eps  = rtab - ntab;
132     eps2 = eps * eps;
133     ntab = static_cast<int>(tableStride * ntab);
134     /* Electrostatics */
135     Y     = vftab[ntab];
136     F     = vftab[ntab + 1];
137     Geps  = eps * vftab[ntab + 2];
138     Heps2 = eps2 * vftab[ntab + 3];
139     Fp    = F + Geps + Heps2;
140     VVe   = Y + eps * Fp;
141     FFe   = Fp + Geps + 2.0 * Heps2;
142     /* Dispersion */
143     Y     = vftab[ntab + 4];
144     F     = vftab[ntab + 5];
145     Geps  = eps * vftab[ntab + 6];
146     Heps2 = eps2 * vftab[ntab + 7];
147     Fp    = F + Geps + Heps2;
148     VVd   = Y + eps * Fp;
149     FFd   = Fp + Geps + 2.0 * Heps2;
150     /* Repulsion */
151     Y     = vftab[ntab + 8];
152     F     = vftab[ntab + 9];
153     Geps  = eps * vftab[ntab + 10];
154     Heps2 = eps2 * vftab[ntab + 11];
155     Fp    = F + Geps + Heps2;
156     VVr   = Y + eps * Fp;
157     FFr   = Fp + Geps + 2.0 * Heps2;
158
159     *velec = qq * VVe;
160     *vvdw  = c6 * VVd + c12 * VVr;
161
162     fscal = -(qq * FFe + c6 * FFd + c12 * FFr) * tabscale * rinv;
163
164     return fscal;
165 }
166
167 static inline real sixthRoot(const real r)
168 {
169     return gmx::invsqrt(std::cbrt(r));
170 }
171
172 /*! \brief Compute the energy and force for a single pair interaction under FEP */
173 static real free_energy_evaluate_single(real                                           r2,
174                                         const interaction_const_t::SoftCoreParameters& scParams,
175                                         real                                           tabscale,
176                                         const real*                                    vftab,
177                                         real                                           tableStride,
178                                         real                                           qqA,
179                                         real                                           c6A,
180                                         real                                           c12A,
181                                         real                                           qqB,
182                                         real                                           c6B,
183                                         real                                           c12B,
184                                         const real                                     LFC[2],
185                                         const real                                     LFV[2],
186                                         const real                                     DLF[2],
187                                         const real                                     lfac_coul[2],
188                                         const real                                     lfac_vdw[2],
189                                         const real dlfac_coul[2],
190                                         const real dlfac_vdw[2],
191                                         real*      velectot,
192                                         real*      vvdwtot,
193                                         real*      dvdl)
194 {
195     real       rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VV, FF, fscal;
196     real       qq[2], c6[2], c12[2], sigma6[2], sigma_pow[2];
197     real       alpha_coul_eff, alpha_vdw_eff, dvdl_coul, dvdl_vdw;
198     real       rpinv, r_coul, r_vdw, velecsum, vvdwsum;
199     real       fscal_vdw[2], fscal_elec[2];
200     real       velec[2], vvdw[2];
201     int        i, ntab;
202     const real half = 0.5_real;
203     const real one  = 1.0_real;
204     const real two  = 2.0_real;
205
206     qq[0]  = qqA;
207     qq[1]  = qqB;
208     c6[0]  = c6A;
209     c6[1]  = c6B;
210     c12[0] = c12A;
211     c12[1] = c12B;
212
213     const real rpm2 = r2 * r2;   /* r4 */
214     const real rp   = rpm2 * r2; /* r6 */
215
216     /* Loop over state A(0) and B(1) */
217     for (i = 0; i < 2; i++)
218     {
219         if ((c6[i] > 0) && (c12[i] > 0))
220         {
221             /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
222              * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
223              */
224             sigma6[i] = half * c12[i] / c6[i];
225             if (sigma6[i] < scParams.sigma6Minimum) /* for disappearing coul and vdw with soft core at the same time */
226             {
227                 sigma6[i] = scParams.sigma6Minimum;
228             }
229         }
230         else
231         {
232             sigma6[i] = scParams.sigma6WithInvalidSigma;
233         }
234         sigma_pow[i] = sigma6[i];
235     }
236
237     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
238     if ((c12[0] > 0) && (c12[1] > 0))
239     {
240         alpha_vdw_eff  = 0;
241         alpha_coul_eff = 0;
242     }
243     else
244     {
245         alpha_vdw_eff  = scParams.alphaVdw;
246         alpha_coul_eff = scParams.alphaCoulomb;
247     }
248
249     /* Loop over A and B states again */
250     for (i = 0; i < 2; i++)
251     {
252         fscal_elec[i] = 0;
253         fscal_vdw[i]  = 0;
254         velec[i]      = 0;
255         vvdw[i]       = 0;
256
257         /* Only spend time on A or B state if it is non-zero */
258         if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
259         {
260             /* Coulomb */
261             rpinv  = one / (alpha_coul_eff * lfac_coul[i] * sigma_pow[i] + rp);
262             r_coul = sixthRoot(rpinv);
263
264             /* Electrostatics table lookup data */
265             rtab = r_coul * tabscale;
266             ntab = static_cast<int>(rtab);
267             eps  = rtab - ntab;
268             eps2 = eps * eps;
269             ntab = static_cast<int>(tableStride * ntab);
270             /* Electrostatics */
271             Y             = vftab[ntab];
272             F             = vftab[ntab + 1];
273             Geps          = eps * vftab[ntab + 2];
274             Heps2         = eps2 * vftab[ntab + 3];
275             Fp            = F + Geps + Heps2;
276             VV            = Y + eps * Fp;
277             FF            = Fp + Geps + two * Heps2;
278             velec[i]      = qq[i] * VV;
279             fscal_elec[i] = -qq[i] * FF * r_coul * rpinv * tabscale;
280
281             /* Vdw */
282             rpinv = one / (alpha_vdw_eff * lfac_vdw[i] * sigma_pow[i] + rp);
283             r_vdw = sixthRoot(rpinv);
284             /* Vdw table lookup data */
285             rtab = r_vdw * tabscale;
286             ntab = static_cast<int>(rtab);
287             eps  = rtab - ntab;
288             eps2 = eps * eps;
289             ntab = 12 * ntab;
290             /* Dispersion */
291             Y            = vftab[ntab + 4];
292             F            = vftab[ntab + 5];
293             Geps         = eps * vftab[ntab + 6];
294             Heps2        = eps2 * vftab[ntab + 7];
295             Fp           = F + Geps + Heps2;
296             VV           = Y + eps * Fp;
297             FF           = Fp + Geps + two * Heps2;
298             vvdw[i]      = c6[i] * VV;
299             fscal_vdw[i] = -c6[i] * FF;
300
301             /* Repulsion */
302             Y     = vftab[ntab + 8];
303             F     = vftab[ntab + 9];
304             Geps  = eps * vftab[ntab + 10];
305             Heps2 = eps2 * vftab[ntab + 11];
306             Fp    = F + Geps + Heps2;
307             VV    = Y + eps * Fp;
308             FF    = Fp + Geps + two * Heps2;
309             vvdw[i] += c12[i] * VV;
310             fscal_vdw[i] -= c12[i] * FF;
311             fscal_vdw[i] *= r_vdw * rpinv * tabscale;
312         }
313     }
314     /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
315     /* Assemble A and B states */
316     velecsum  = 0;
317     vvdwsum   = 0;
318     dvdl_coul = 0;
319     dvdl_vdw  = 0;
320     fscal     = 0;
321     for (i = 0; i < 2; i++)
322     {
323         velecsum += LFC[i] * velec[i];
324         vvdwsum += LFV[i] * vvdw[i];
325
326         fscal += (LFC[i] * fscal_elec[i] + LFV[i] * fscal_vdw[i]) * rpm2;
327
328         dvdl_coul += velec[i] * DLF[i]
329                      + LFC[i] * alpha_coul_eff * dlfac_coul[i] * fscal_elec[i] * sigma_pow[i];
330         dvdl_vdw += vvdw[i] * DLF[i]
331                     + LFV[i] * alpha_vdw_eff * dlfac_vdw[i] * fscal_vdw[i] * sigma_pow[i];
332     }
333
334     dvdl[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)] += dvdl_coul;
335     dvdl[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)] += dvdl_vdw;
336
337     *velectot = velecsum;
338     *vvdwtot  = vvdwsum;
339
340     return fscal;
341 }
342
343 /*! \brief Calculate pair interactions, supports all types and conditions. */
344 template<BondedKernelFlavor flavor>
345 static real do_pairs_general(int                 ftype,
346                              int                 nbonds,
347                              const t_iatom       iatoms[],
348                              const t_iparams     iparams[],
349                              const rvec          x[],
350                              rvec4               f[],
351                              rvec                fshift[],
352                              const struct t_pbc* pbc,
353                              const real*         lambda,
354                              real*               dvdl,
355                              const t_mdatoms*    md,
356                              const t_forcerec*   fr,
357                              gmx_grppairener_t*  grppener,
358                              int*                global_atom_index)
359 {
360     real            qq, c6, c12;
361     rvec            dx;
362     int             i, itype, ai, aj, gid;
363     int             fshift_index;
364     real            r2;
365     real            fscal, velec, vvdw;
366     real*           energygrp_elec;
367     real*           energygrp_vdw;
368     static gmx_bool warned_rlimit = FALSE;
369     /* Free energy stuff */
370     gmx_bool   bFreeEnergy;
371     real       LFC[2], LFV[2], DLF[2], lfac_coul[2], lfac_vdw[2], dlfac_coul[2], dlfac_vdw[2];
372     real       qqB, c6B, c12B;
373     const real oneSixth = 1.0_real / 6.0_real;
374
375     switch (ftype)
376     {
377         case F_LJ14:
378         case F_LJC14_Q:
379             energygrp_elec = grppener->energyGroupPairTerms[NonBondedEnergyTerms::Coulomb14].data();
380             energygrp_vdw  = grppener->energyGroupPairTerms[NonBondedEnergyTerms::LJ14].data();
381             break;
382         case F_LJC_PAIRS_NB:
383             energygrp_elec = grppener->energyGroupPairTerms[NonBondedEnergyTerms::CoulombSR].data();
384             energygrp_vdw  = grppener->energyGroupPairTerms[NonBondedEnergyTerms::LJSR].data();
385             break;
386         default:
387             energygrp_elec = nullptr; /* Keep compiler happy */
388             energygrp_vdw  = nullptr; /* Keep compiler happy */
389             gmx_fatal(FARGS, "Unknown function type %d in do_nonbonded14", ftype);
390     }
391
392     if (fr->efep != FreeEnergyPerturbationType::No)
393     {
394         /* Lambda factor for state A=1-lambda and B=lambda */
395         LFC[0] = 1.0 - lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)];
396         LFV[0] = 1.0 - lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)];
397         LFC[1] = lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)];
398         LFV[1] = lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)];
399
400         /*derivative of the lambda factor for state A and B */
401         DLF[0] = -1;
402         DLF[1] = 1;
403
404         GMX_ASSERT(fr->ic->softCoreParameters, "We need soft-core parameters");
405         const auto& scParams = *fr->ic->softCoreParameters;
406
407         for (i = 0; i < 2; i++)
408         {
409             lfac_coul[i] = (scParams.lambdaPower == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
410             dlfac_coul[i] = DLF[i] * scParams.lambdaPower * oneSixth
411                             * (scParams.lambdaPower == 2 ? (1 - LFC[i]) : 1);
412             lfac_vdw[i]  = (scParams.lambdaPower == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
413             dlfac_vdw[i] = DLF[i] * scParams.lambdaPower * oneSixth
414                            * (scParams.lambdaPower == 2 ? (1 - LFV[i]) : 1);
415         }
416     }
417
418     /* TODO This code depends on the logic in tables.c that constructs
419        the table layout, which should be made explicit in future
420        cleanup. */
421     GMX_ASSERT(
422             etiNR == 3,
423             "Pair-interaction code that uses GROMACS interaction tables supports exactly 3 tables");
424     GMX_ASSERT(
425             fr->pairsTable->interaction == GMX_TABLE_INTERACTION_ELEC_VDWREP_VDWDISP,
426             "Pair interaction kernels need a table with Coulomb, repulsion and dispersion entries");
427
428     const real epsfac = fr->ic->epsfac;
429
430     bFreeEnergy     = FALSE;
431     auto cENER      = md->cENER;
432     auto bPerturbed = md->bPerturbed;
433     auto chargeA    = md->chargeA;
434     auto chargeB    = md->chargeB;
435     for (i = 0; (i < nbonds);)
436     {
437         itype = iatoms[i++];
438         ai    = iatoms[i++];
439         aj    = iatoms[i++];
440         gid   = GID(cENER[ai], cENER[aj], md->nenergrp);
441
442         /* Get parameters */
443         switch (ftype)
444         {
445             case F_LJ14:
446                 bFreeEnergy = (fr->efep != FreeEnergyPerturbationType::No
447                                && ((md->bPerturbed && (bPerturbed[ai] || bPerturbed[aj]))
448                                    || iparams[itype].lj14.c6A != iparams[itype].lj14.c6B
449                                    || iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
450                 qq          = chargeA[ai] * chargeA[aj] * epsfac * fr->fudgeQQ;
451                 c6          = iparams[itype].lj14.c6A;
452                 c12         = iparams[itype].lj14.c12A;
453                 break;
454             case F_LJC14_Q:
455                 qq = iparams[itype].ljc14.qi * iparams[itype].ljc14.qj * epsfac
456                      * iparams[itype].ljc14.fqq;
457                 c6  = iparams[itype].ljc14.c6;
458                 c12 = iparams[itype].ljc14.c12;
459                 break;
460             case F_LJC_PAIRS_NB:
461                 qq  = iparams[itype].ljcnb.qi * iparams[itype].ljcnb.qj * epsfac;
462                 c6  = iparams[itype].ljcnb.c6;
463                 c12 = iparams[itype].ljcnb.c12;
464                 break;
465             default:
466                 /* Cannot happen since we called gmx_fatal() above in this case */
467                 qq = c6 = c12 = 0; /* Keep compiler happy */
468                 break;
469         }
470
471         /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
472          * included in the general nfbp array now. This means the tables are scaled down by the
473          * same factor, so when we use the original c6/c12 parameters from iparams[] they must
474          * be scaled up.
475          */
476         c6 *= 6.0;
477         c12 *= 12.0;
478
479         /* Do we need to apply full periodic boundary conditions? */
480         if (fr->bMolPBC)
481         {
482             fshift_index = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
483         }
484         else
485         {
486             fshift_index = c_centralShiftIndex;
487             rvec_sub(x[ai], x[aj], dx);
488         }
489         r2 = norm2(dx);
490
491         if (r2 >= fr->pairsTable->r * fr->pairsTable->r)
492         {
493             /* This check isn't race free. But it doesn't matter because if a race occurs the only
494              * disadvantage is that the warning is printed twice */
495             if (!warned_rlimit)
496             {
497                 warning_rlimit(x, ai, aj, global_atom_index, sqrt(r2), fr->pairsTable->r);
498                 warned_rlimit = TRUE;
499             }
500             continue;
501         }
502
503         if (bFreeEnergy)
504         {
505             /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
506             qqB  = chargeB[ai] * chargeB[aj] * epsfac * fr->fudgeQQ;
507             c6B  = iparams[itype].lj14.c6B * 6.0;
508             c12B = iparams[itype].lj14.c12B * 12.0;
509
510             fscal = free_energy_evaluate_single(r2,
511                                                 *fr->ic->softCoreParameters,
512                                                 fr->pairsTable->scale,
513                                                 fr->pairsTable->data.data(),
514                                                 fr->pairsTable->stride,
515                                                 qq,
516                                                 c6,
517                                                 c12,
518                                                 qqB,
519                                                 c6B,
520                                                 c12B,
521                                                 LFC,
522                                                 LFV,
523                                                 DLF,
524                                                 lfac_coul,
525                                                 lfac_vdw,
526                                                 dlfac_coul,
527                                                 dlfac_vdw,
528                                                 &velec,
529                                                 &vvdw,
530                                                 dvdl);
531         }
532         else
533         {
534             /* Evaluate tabulated interaction without free energy */
535             fscal = evaluate_single(r2,
536                                     fr->pairsTable->scale,
537                                     fr->pairsTable->data.data(),
538                                     fr->pairsTable->stride,
539                                     qq,
540                                     c6,
541                                     c12,
542                                     &velec,
543                                     &vvdw);
544         }
545
546         energygrp_elec[gid] += velec;
547         energygrp_vdw[gid] += vvdw;
548         svmul(fscal, dx, dx);
549
550         /* Add the forces */
551         rvec_inc(f[ai], dx);
552         rvec_dec(f[aj], dx);
553
554         if (computeVirial(flavor))
555         {
556             if (fshift_index != c_centralShiftIndex)
557             {
558                 rvec_inc(fshift[fshift_index], dx);
559                 rvec_dec(fshift[c_centralShiftIndex], dx);
560             }
561         }
562     }
563     return 0.0;
564 }
565
566 /*! \brief Calculate pairs, only for plain-LJ + plain Coulomb normal type.
567  *
568  * This function is templated for real/SimdReal and for optimization.
569  */
570 template<typename T, int pack_size, typename pbc_type>
571 static void do_pairs_simple(int              nbonds,
572                             const t_iatom    iatoms[],
573                             const t_iparams  iparams[],
574                             const rvec       x[],
575                             rvec4            f[],
576                             const pbc_type   pbc,
577                             const t_mdatoms* md,
578                             const real       scale_factor)
579 {
580     const int nfa1 = 1 + 2;
581
582     T six(6);
583     T twelve(12);
584     T ef(scale_factor);
585
586 #if GMX_SIMD_HAVE_REAL
587     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[pack_size];
588     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[pack_size];
589     alignas(GMX_SIMD_ALIGNMENT) real         coeff[3 * pack_size];
590 #else
591     std::int32_t ai[pack_size];
592     std::int32_t aj[pack_size];
593     real         coeff[3 * pack_size];
594 #endif
595     auto chargeA = md->chargeA;
596
597     /* nbonds is #pairs*nfa1, here we step pack_size pairs */
598     for (int i = 0; i < nbonds; i += pack_size * nfa1)
599     {
600         /* Collect atoms for pack_size pairs.
601          * iu indexes into iatoms, we should not let iu go beyond nbonds.
602          */
603         int iu = i;
604         for (int s = 0; s < pack_size; s++)
605         {
606             int itype = iatoms[iu];
607             ai[s]     = iatoms[iu + 1];
608             aj[s]     = iatoms[iu + 2];
609
610             if (i + s * nfa1 < nbonds)
611             {
612                 coeff[0 * pack_size + s] = iparams[itype].lj14.c6A;
613                 coeff[1 * pack_size + s] = iparams[itype].lj14.c12A;
614                 coeff[2 * pack_size + s] = chargeA[ai[s]] * chargeA[aj[s]];
615
616                 /* Avoid indexing the iatoms array out of bounds.
617                  * We pad the coordinate indices with the last atom pair.
618                  */
619                 if (iu + nfa1 < nbonds)
620                 {
621                     iu += nfa1;
622                 }
623             }
624             else
625             {
626                 /* Pad the coefficient arrays with zeros to get zero forces */
627                 coeff[0 * pack_size + s] = 0;
628                 coeff[1 * pack_size + s] = 0;
629                 coeff[2 * pack_size + s] = 0;
630             }
631         }
632
633         /* Load the coordinates */
634         T xi[DIM], xj[DIM];
635         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ai, &xi[XX], &xi[YY], &xi[ZZ]);
636         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), aj, &xj[XX], &xj[YY], &xj[ZZ]);
637
638         T c6  = load<T>(coeff + 0 * pack_size);
639         T c12 = load<T>(coeff + 1 * pack_size);
640         T qq  = load<T>(coeff + 2 * pack_size);
641
642         /* We could save these operations by storing 6*C6,12*C12 */
643         c6  = six * c6;
644         c12 = twelve * c12;
645
646         T dr[DIM];
647         pbc_dx_aiuc(pbc, xi, xj, dr);
648
649         T rsq   = dr[XX] * dr[XX] + dr[YY] * dr[YY] + dr[ZZ] * dr[ZZ];
650         T rinv  = gmx::invsqrt(rsq);
651         T rinv2 = rinv * rinv;
652         T rinv6 = rinv2 * rinv2 * rinv2;
653
654         /* Calculate the Coulomb force * r */
655         T cfr = ef * qq * rinv;
656
657         /* Calculate the LJ force * r and add it to the Coulomb part */
658         T fr = gmx::fma(fms(c12, rinv6, c6), rinv6, cfr);
659
660         T finvr = fr * rinv2;
661         T fx    = finvr * dr[XX];
662         T fy    = finvr * dr[YY];
663         T fz    = finvr * dr[ZZ];
664
665         /* Add the pair forces to the force array.
666          * Note that here we might add multiple force components for some atoms
667          * due to the SIMD padding. But the extra force components are zero.
668          */
669         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ai, fx, fy, fz);
670         transposeScatterDecrU<4>(reinterpret_cast<real*>(f), aj, fx, fy, fz);
671     }
672 }
673
674 /*! \brief Calculate all listed pair interactions */
675 void do_pairs(int                      ftype,
676               int                      nbonds,
677               const t_iatom            iatoms[],
678               const t_iparams          iparams[],
679               const rvec               x[],
680               rvec4                    f[],
681               rvec                     fshift[],
682               const struct t_pbc*      pbc,
683               const real*              lambda,
684               real*                    dvdl,
685               const t_mdatoms*         md,
686               const t_forcerec*        fr,
687               const bool               havePerturbedInteractions,
688               const gmx::StepWorkload& stepWork,
689               gmx_grppairener_t*       grppener,
690               int*                     global_atom_index)
691 {
692     if (ftype == F_LJ14 && fr->ic->vdwtype != VanDerWaalsType::User && !EEL_USER(fr->ic->eeltype)
693         && !havePerturbedInteractions && (!stepWork.computeVirial && !stepWork.computeEnergy))
694     {
695         /* We use a fast code-path for plain LJ 1-4 without FEP.
696          *
697          * TODO: Add support for energies (straightforward) and virial
698          * in the SIMD template. For the virial it's inconvenient to store
699          * the force sums for the shifts and we should directly calculate
700          * and sum the virial for the shifts. But we should do this
701          * at once for the angles and dihedrals as well.
702          */
703 #if GMX_SIMD_HAVE_REAL
704         if (fr->use_simd_kernels)
705         {
706             alignas(GMX_SIMD_ALIGNMENT) real pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
707             set_pbc_simd(pbc, pbc_simd);
708
709             do_pairs_simple<SimdReal, GMX_SIMD_REAL_WIDTH, const real*>(
710                     nbonds, iatoms, iparams, x, f, pbc_simd, md, fr->ic->epsfac * fr->fudgeQQ);
711         }
712         else
713 #endif
714         {
715             /* This construct is needed because pbc_dx_aiuc doesn't accept pbc=NULL */
716             t_pbc        pbc_no;
717             const t_pbc* pbc_nonnull;
718
719             if (pbc != nullptr)
720             {
721                 pbc_nonnull = pbc;
722             }
723             else
724             {
725                 set_pbc(&pbc_no, PbcType::No, nullptr);
726                 pbc_nonnull = &pbc_no;
727             }
728
729             do_pairs_simple<real, 1, const t_pbc*>(
730                     nbonds, iatoms, iparams, x, f, pbc_nonnull, md, fr->ic->epsfac * fr->fudgeQQ);
731         }
732     }
733     else if (stepWork.computeVirial)
734     {
735         do_pairs_general<BondedKernelFlavor::ForcesAndVirialAndEnergy>(
736                 ftype, nbonds, iatoms, iparams, x, f, fshift, pbc, lambda, dvdl, md, fr, grppener, global_atom_index);
737     }
738     else
739     {
740         do_pairs_general<BondedKernelFlavor::ForcesAndEnergy>(
741                 ftype, nbonds, iatoms, iparams, x, f, fshift, pbc, lambda, dvdl, md, fr, grppener, global_atom_index);
742     }
743 }