Fix some more double literals.
[alexxy/gromacs.git] / src / gromacs / listed_forces / pairs.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
5  * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7  * and including many others, as listed in the AUTHORS file in the
8  * top-level source directory and at http://www.gromacs.org.
9  *
10  * GROMACS is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public License
12  * as published by the Free Software Foundation; either version 2.1
13  * of the License, or (at your option) any later version.
14  *
15  * GROMACS is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with GROMACS; if not, see
22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
24  *
25  * If you want to redistribute modifications to GROMACS, please
26  * consider that scientific software is very special. Version
27  * control is crucial - bugs must be traceable. We will be happy to
28  * consider code for inclusion in the official distribution, but
29  * derived work must not be called official GROMACS. Details are found
30  * in the README & COPYING files - if they are missing, get the
31  * official version at http://www.gromacs.org.
32  *
33  * To help us fund GROMACS development, we humbly ask that you cite
34  * the research papers on the package. Check out http://www.gromacs.org.
35  */
36 /*! \internal \file
37  *
38  * \brief This file defines functions for "pair" interactions
39  * (i.e. listed non-bonded interactions, e.g. 1-4 interactions)
40  *
41  * \author Mark Abraham <mark.j.abraham@gmail.com>
42  *
43  * \ingroup module_listed_forces
44  */
45 #include "gmxpre.h"
46
47 #include "pairs.h"
48
49 #include <cmath>
50
51 #include "gromacs/listed_forces/bonded.h"
52 #include "gromacs/math/functions.h"
53 #include "gromacs/math/vec.h"
54 #include "gromacs/mdtypes/forcerec.h"
55 #include "gromacs/mdtypes/group.h"
56 #include "gromacs/mdtypes/interaction_const.h"
57 #include "gromacs/mdtypes/md_enums.h"
58 #include "gromacs/mdtypes/nblist.h"
59 #include "gromacs/mdtypes/simulation_workload.h"
60 #include "gromacs/pbcutil/ishift.h"
61 #include "gromacs/pbcutil/pbc.h"
62 #include "gromacs/pbcutil/pbc_simd.h"
63 #include "gromacs/simd/simd.h"
64 #include "gromacs/simd/simd_math.h"
65 #include "gromacs/simd/vector_operations.h"
66 #include "gromacs/tables/forcetable.h"
67 #include "gromacs/utility/basedefinitions.h"
68 #include "gromacs/utility/fatalerror.h"
69 #include "gromacs/utility/gmxassert.h"
70
71 #include "listed_internal.h"
72
73 using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
74
75 /*! \brief Issue a warning if a listed interaction is beyond a table limit */
76 static void warning_rlimit(const rvec* x, int ai, int aj, int* global_atom_index, real r, real rlimit)
77 {
78     gmx_warning(
79             "Listed nonbonded interaction between particles %d and %d\n"
80             "at distance %.3f which is larger than the table limit %.3f nm.\n\n"
81             "This is likely either a 1,4 interaction, or a listed interaction inside\n"
82             "a smaller molecule you are decoupling during a free energy calculation.\n"
83             "Since interactions at distances beyond the table cannot be computed,\n"
84             "they are skipped until they are inside the table limit again. You will\n"
85             "only see this message once, even if it occurs for several interactions.\n\n"
86             "IMPORTANT: This should not happen in a stable simulation, so there is\n"
87             "probably something wrong with your system. Only change the table-extension\n"
88             "distance in the mdp file if you are really sure that is the reason.\n",
89             glatnr(global_atom_index, ai),
90             glatnr(global_atom_index, aj),
91             r,
92             rlimit);
93
94     if (debug)
95     {
96         fprintf(debug,
97                 "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. "
98                 "Ignored\n",
99                 x[ai][XX],
100                 x[ai][YY],
101                 x[ai][ZZ],
102                 x[aj][XX],
103                 x[aj][YY],
104                 x[aj][ZZ],
105                 glatnr(global_atom_index, ai),
106                 glatnr(global_atom_index, aj),
107                 r);
108     }
109 }
110
111 /*! \brief Compute the energy and force for a single pair interaction */
112 static real evaluate_single(real        r2,
113                             real        tabscale,
114                             const real* vftab,
115                             real        tableStride,
116                             real        qq,
117                             real        c6,
118                             real        c12,
119                             real*       velec,
120                             real*       vvdw)
121 {
122     real rinv, r, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VVe, FFe, VVd, FFd, VVr, FFr, fscal;
123     int  ntab;
124
125     /* Do the tabulated interactions - first table lookup */
126     rinv = gmx::invsqrt(r2);
127     r    = r2 * rinv;
128     rtab = r * tabscale;
129     ntab = static_cast<int>(rtab);
130     eps  = rtab - ntab;
131     eps2 = eps * eps;
132     ntab = static_cast<int>(tableStride * ntab);
133     /* Electrostatics */
134     Y     = vftab[ntab];
135     F     = vftab[ntab + 1];
136     Geps  = eps * vftab[ntab + 2];
137     Heps2 = eps2 * vftab[ntab + 3];
138     Fp    = F + Geps + Heps2;
139     VVe   = Y + eps * Fp;
140     FFe   = Fp + Geps + 2.0 * Heps2;
141     /* Dispersion */
142     Y     = vftab[ntab + 4];
143     F     = vftab[ntab + 5];
144     Geps  = eps * vftab[ntab + 6];
145     Heps2 = eps2 * vftab[ntab + 7];
146     Fp    = F + Geps + Heps2;
147     VVd   = Y + eps * Fp;
148     FFd   = Fp + Geps + 2.0 * Heps2;
149     /* Repulsion */
150     Y     = vftab[ntab + 8];
151     F     = vftab[ntab + 9];
152     Geps  = eps * vftab[ntab + 10];
153     Heps2 = eps2 * vftab[ntab + 11];
154     Fp    = F + Geps + Heps2;
155     VVr   = Y + eps * Fp;
156     FFr   = Fp + Geps + 2.0 * Heps2;
157
158     *velec = qq * VVe;
159     *vvdw  = c6 * VVd + c12 * VVr;
160
161     fscal = -(qq * FFe + c6 * FFd + c12 * FFr) * tabscale * rinv;
162
163     return fscal;
164 }
165
166 static inline real sixthRoot(const real r)
167 {
168     return gmx::invsqrt(std::cbrt(r));
169 }
170
171 /*! \brief Compute the energy and force for a single pair interaction under FEP */
172 template<KernelSoftcoreType softcoreType>
173 static real free_energy_evaluate_single(real                                           r2,
174                                         real                                           rCoulCutoff,
175                                         const interaction_const_t::SoftCoreParameters& scParams,
176                                         real                                           tabscale,
177                                         const real*                                    vftab,
178                                         real                                           tableStride,
179                                         real                                           qqA,
180                                         real                                           c6A,
181                                         real                                           c12A,
182                                         real                                           qqB,
183                                         real                                           c6B,
184                                         real                                           c12B,
185                                         real                                           facel,
186                                         const real                                     LFC[2],
187                                         const real                                     LFV[2],
188                                         const real                                     DLF[2],
189                                         const real                                     lfac_coul[2],
190                                         const real                                     lfac_vdw[2],
191                                         const real dlfac_coul[2],
192                                         const real dlfac_vdw[2],
193                                         real*      velectot,
194                                         real*      vvdwtot,
195                                         real*      dvdl)
196 {
197     real       rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VV, FF, fscal;
198     real       qq[2], c6[2], c12[2], sigma6[2], sigma_pow[2];
199     real       alpha_coul_eff, alpha_vdw_eff, dvdl_coul_sum, dvdl_vdw_sum;
200     real       rpinv, r_coul, r_vdw, velecsum, vvdwsum;
201     real       fscal_vdw[2], fscal_elec[2];
202     real       velec[2], vvdw[2];
203     real       dvdl_elec[2], dvdl_vdw[2];
204     real       scaleLinpointCoulGapsys, scaleLinpointVdWGapsys, sigma6VdWGapsys[2];
205     real       rQ, rLJ;
206     real       scaleDvdlRCoul;
207     int        i, ntab;
208     const real half = 0.5_real;
209     const real one  = 1.0_real;
210     const real two  = 2.0_real;
211
212     qq[0]  = qqA;
213     qq[1]  = qqB;
214     c6[0]  = c6A;
215     c6[1]  = c6B;
216     c12[0] = c12A;
217     c12[1] = c12B;
218
219     const real rpm2 = r2 * r2;   /* r4 */
220     const real rp   = rpm2 * r2; /* r6 */
221     const real r    = sqrt(r2);  /* r1 */
222
223     /* Loop over state A(0) and B(1) */
224     for (i = 0; i < 2; i++)
225     {
226         if constexpr (softcoreType == KernelSoftcoreType::Beutler)
227         {
228             if ((c6[i] > 0) && (c12[i] > 0))
229             {
230                 /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
231                  * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
232                  */
233                 sigma6[i] = half * c12[i] / c6[i];
234                 if (sigma6[i] < scParams.sigma6Minimum) /* for disappearing coul and vdw with soft core at the same time */
235                 {
236                     sigma6[i] = scParams.sigma6Minimum;
237                 }
238             }
239             else
240             {
241                 sigma6[i] = scParams.sigma6WithInvalidSigma;
242             }
243             sigma_pow[i] = sigma6[i];
244         }
245         if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
246         {
247             if ((c6[i] > 0) && (c12[i] > 0))
248             {
249                 /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
250                  * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
251                  */
252                 sigma6VdWGapsys[i] = half * c12[i] / c6[i];
253             }
254             else
255             {
256                 sigma6VdWGapsys[i] = scParams.sigma6VdWGapsys;
257             }
258         }
259     }
260
261     if constexpr (softcoreType == KernelSoftcoreType::Beutler)
262     {
263         /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
264         if ((c12[0] > 0) && (c12[1] > 0))
265         {
266             alpha_vdw_eff  = 0;
267             alpha_coul_eff = 0;
268         }
269         else
270         {
271             alpha_vdw_eff  = scParams.alphaVdw;
272             alpha_coul_eff = scParams.alphaCoulomb;
273         }
274     }
275     if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
276     {
277         /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
278         if ((c12[0] > 0) && (c12[1] > 0))
279         {
280             scaleLinpointVdWGapsys  = 0;
281             scaleLinpointCoulGapsys = 0;
282         }
283         else
284         {
285             scaleLinpointVdWGapsys  = scParams.scaleLinpointVdWGapsys;
286             scaleLinpointCoulGapsys = scParams.scaleLinpointCoulGapsys;
287         }
288     }
289
290     /* Loop over A and B states again */
291     for (i = 0; i < 2; i++)
292     {
293         fscal_elec[i] = 0;
294         fscal_vdw[i]  = 0;
295         velec[i]      = 0;
296         vvdw[i]       = 0;
297         dvdl_elec[i]  = 0;
298         dvdl_vdw[i]   = 0;
299         rQ            = 0;
300         rLJ           = 0;
301
302         /* Only spend time on A or B state if it is non-zero */
303         if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
304         {
305             /* Coulomb */
306             if constexpr (softcoreType == KernelSoftcoreType::Beutler)
307             {
308                 rpinv  = one / (alpha_coul_eff * lfac_coul[i] * sigma_pow[i] + rp);
309                 r_coul = sixthRoot(rpinv);
310             }
311             else
312             {
313                 rpinv  = one / rp;
314                 r_coul = r;
315             }
316
317             if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
318             {
319                 rQ = gmx::sixthroot(one - LFC[i]) * (one + std::fabs(qq[i] / facel));
320                 rQ *= scaleLinpointCoulGapsys;
321                 scaleDvdlRCoul = 1;
322                 if (rQ > rCoulCutoff)
323                 {
324                     rQ             = rCoulCutoff;
325                     scaleDvdlRCoul = 0;
326                 }
327             }
328
329             if ((softcoreType == KernelSoftcoreType::Gapsys) && (r < rQ))
330             {
331                 real rInvQ    = one / rQ;
332                 real constFac = qq[i] * rInvQ;
333                 real linFac   = constFac * r * rInvQ;
334                 real quadrFac = linFac * r * rInvQ;
335
336                 /* Computing Coulomb force and potential energy */
337                 fscal_elec[i] = -2 * quadrFac + 3 * linFac;
338                 fscal_elec[i] *= rpinv;
339
340                 velec[i] = quadrFac - 3 * (linFac - constFac);
341
342                 dvdl_elec[i] += scaleDvdlRCoul * DLF[i] * half * (LFC[i] / (1 - LFC[i]))
343                                 * (quadrFac - 2 * linFac + constFac);
344             }
345             else // Beutler, resp. hardcore
346             {
347                 /* Electrostatics table lookup data */
348                 rtab = r_coul * tabscale;
349                 ntab = static_cast<int>(rtab);
350                 eps  = rtab - ntab;
351                 eps2 = eps * eps;
352                 ntab = static_cast<int>(tableStride * ntab);
353                 /* Electrostatics */
354                 Y             = vftab[ntab];
355                 F             = vftab[ntab + 1];
356                 Geps          = eps * vftab[ntab + 2];
357                 Heps2         = eps2 * vftab[ntab + 3];
358                 Fp            = F + Geps + Heps2;
359                 VV            = Y + eps * Fp;
360                 FF            = Fp + Geps + two * Heps2;
361                 velec[i]      = qq[i] * VV;
362                 fscal_elec[i] = -qq[i] * FF * r_coul * rpinv * tabscale;
363             }
364
365             /* Vdw */
366             if constexpr (softcoreType == KernelSoftcoreType::Beutler)
367             {
368                 rpinv = one / (alpha_vdw_eff * lfac_vdw[i] * sigma_pow[i] + rp);
369                 r_vdw = sixthRoot(rpinv);
370             }
371             else
372             {
373                 rpinv = one / rp;
374                 r_vdw = r;
375             }
376
377             if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
378             {
379                 constexpr real c_twentySixSeventh = 26.0_real / 7.0_real;
380
381                 rLJ = gmx::sixthroot(c_twentySixSeventh * sigma6VdWGapsys[i] * (one - LFV[i]));
382                 rLJ *= scaleLinpointVdWGapsys;
383             }
384
385             if ((softcoreType == KernelSoftcoreType::Gapsys) && (r < rLJ))
386             {
387                 // scaled values for c6 and c12
388                 real c6s, c12s;
389                 c6s  = c6[i] / 6.0_real;
390                 c12s = c12[i] / 12.0_real;
391
392                 /* Temporary variables for inverted values */
393                 real rInvLJ = one / rLJ;
394                 real rInv14, rInv13, rInv12;
395                 real rInv8, rInv7, rInv6;
396                 rInv6 = rInvLJ * rInvLJ * rInvLJ;
397                 rInv6 *= rInv6;
398                 rInv7  = rInv6 * rInvLJ;
399                 rInv8  = rInv7 * rInvLJ;
400                 rInv14 = c12s * rInv7 * rInv7 * r2;
401                 rInv13 = c12s * rInv7 * rInv6 * r;
402                 rInv12 = c12s * rInv6 * rInv6;
403                 rInv8 *= c6s * r2;
404                 rInv7 *= c6s * r;
405                 rInv6 *= c6s;
406
407                 /* Temporary variables for A and B */
408                 real quadrFac, linearFac, constFac;
409                 quadrFac  = 156 * rInv14 - 42 * rInv8;
410                 linearFac = 168 * rInv13 - 48 * rInv7;
411                 constFac  = 91 * rInv12 - 28 * rInv6;
412
413                 /* Computing LJ force and potential energy*/
414                 fscal_vdw[i] = -quadrFac + linearFac;
415                 fscal_vdw[i] *= rpinv;
416
417                 vvdw[i] = 0.5_real * quadrFac - linearFac + constFac;
418
419                 dvdl_vdw[i] += DLF[i] * 28 * (LFV[i] / (one - LFV[i]))
420                                * ((6.5_real * rInv14 - rInv8) - (13 * rInv13 - 2 * rInv7)
421                                   + (6.5_real * rInv12 - rInv6));
422             }
423             else // Beutler, resp. hardcore
424             {
425                 /* Vdw table lookup data */
426                 rtab = r_vdw * tabscale;
427                 ntab = static_cast<int>(rtab);
428                 eps  = rtab - ntab;
429                 eps2 = eps * eps;
430                 ntab = 12 * ntab;
431                 /* Dispersion */
432                 Y            = vftab[ntab + 4];
433                 F            = vftab[ntab + 5];
434                 Geps         = eps * vftab[ntab + 6];
435                 Heps2        = eps2 * vftab[ntab + 7];
436                 Fp           = F + Geps + Heps2;
437                 VV           = Y + eps * Fp;
438                 FF           = Fp + Geps + two * Heps2;
439                 vvdw[i]      = c6[i] * VV;
440                 fscal_vdw[i] = -c6[i] * FF;
441
442                 /* Repulsion */
443                 Y     = vftab[ntab + 8];
444                 F     = vftab[ntab + 9];
445                 Geps  = eps * vftab[ntab + 10];
446                 Heps2 = eps2 * vftab[ntab + 11];
447                 Fp    = F + Geps + Heps2;
448                 VV    = Y + eps * Fp;
449                 FF    = Fp + Geps + two * Heps2;
450                 vvdw[i] += c12[i] * VV;
451                 fscal_vdw[i] -= c12[i] * FF;
452                 fscal_vdw[i] *= r_vdw * rpinv * tabscale;
453             }
454         }
455     }
456     /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
457     /* Assemble A and B states */
458     velecsum      = 0;
459     vvdwsum       = 0;
460     dvdl_coul_sum = 0;
461     dvdl_vdw_sum  = 0;
462     fscal         = 0;
463     for (i = 0; i < 2; i++)
464     {
465         velecsum += LFC[i] * velec[i];
466         vvdwsum += LFV[i] * vvdw[i];
467
468         fscal += (LFC[i] * fscal_elec[i] + LFV[i] * fscal_vdw[i]) * rpm2;
469
470         if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
471         {
472             dvdl_coul_sum += dvdl_elec[i];
473             dvdl_vdw_sum += dvdl_vdw[i];
474         }
475         dvdl_coul_sum += velec[i] * DLF[i];
476         dvdl_vdw_sum += vvdw[i] * DLF[i];
477         if constexpr (softcoreType == KernelSoftcoreType::Beutler)
478         {
479             dvdl_coul_sum += LFC[i] * alpha_coul_eff * dlfac_coul[i] * fscal_elec[i] * sigma_pow[i];
480             dvdl_vdw_sum += LFV[i] * alpha_vdw_eff * dlfac_vdw[i] * fscal_vdw[i] * sigma_pow[i];
481         }
482     }
483
484     dvdl[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)] += dvdl_coul_sum;
485     dvdl[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)] += dvdl_vdw_sum;
486
487     *velectot = velecsum;
488     *vvdwtot  = vvdwsum;
489
490     return fscal;
491 }
492
493 /*! \brief Calculate pair interactions, supports all types and conditions. */
494 template<BondedKernelFlavor flavor>
495 static real do_pairs_general(int                           ftype,
496                              int                           nbonds,
497                              const t_iatom                 iatoms[],
498                              const t_iparams               iparams[],
499                              const rvec                    x[],
500                              rvec4                         f[],
501                              rvec                          fshift[],
502                              const struct t_pbc*           pbc,
503                              const real*                   lambda,
504                              real*                         dvdl,
505                              gmx::ArrayRef<real>           chargeA,
506                              gmx::ArrayRef<real>           chargeB,
507                              gmx::ArrayRef<bool>           atomIsPerturbed,
508                              gmx::ArrayRef<unsigned short> cENER,
509                              int                           numEnergyGroups,
510                              const t_forcerec*             fr,
511                              gmx_grppairener_t*            grppener,
512                              int*                          global_atom_index)
513 {
514     real            qq, c6, c12;
515     rvec            dx;
516     int             i, itype, ai, aj, gid;
517     int             fshift_index;
518     real            r2;
519     real            fscal, velec, vvdw;
520     real*           energygrp_elec;
521     real*           energygrp_vdw;
522     static gmx_bool warned_rlimit = FALSE;
523     /* Free energy stuff */
524     gmx_bool   bFreeEnergy;
525     real       LFC[2], LFV[2], DLF[2], lfac_coul[2], lfac_vdw[2], dlfac_coul[2], dlfac_vdw[2];
526     real       qqB, c6B, c12B;
527     const real oneSixth = 1.0_real / 6.0_real;
528
529     switch (ftype)
530     {
531         case F_LJ14:
532         case F_LJC14_Q:
533             energygrp_elec = grppener->energyGroupPairTerms[NonBondedEnergyTerms::Coulomb14].data();
534             energygrp_vdw  = grppener->energyGroupPairTerms[NonBondedEnergyTerms::LJ14].data();
535             break;
536         case F_LJC_PAIRS_NB:
537             energygrp_elec = grppener->energyGroupPairTerms[NonBondedEnergyTerms::CoulombSR].data();
538             energygrp_vdw  = grppener->energyGroupPairTerms[NonBondedEnergyTerms::LJSR].data();
539             break;
540         default:
541             energygrp_elec = nullptr; /* Keep compiler happy */
542             energygrp_vdw  = nullptr; /* Keep compiler happy */
543             gmx_fatal(FARGS, "Unknown function type %d in do_nonbonded14", ftype);
544     }
545
546     if (fr->efep != FreeEnergyPerturbationType::No)
547     {
548         /* Lambda factor for state A=1-lambda and B=lambda */
549         LFC[0] = 1.0 - lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)];
550         LFV[0] = 1.0 - lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)];
551         LFC[1] = lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)];
552         LFV[1] = lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)];
553
554         /*derivative of the lambda factor for state A and B */
555         DLF[0] = -1;
556         DLF[1] = 1;
557
558         GMX_ASSERT(fr->ic->softCoreParameters, "We need soft-core parameters");
559         const auto& scParams = *fr->ic->softCoreParameters;
560
561         for (i = 0; i < 2; i++)
562         {
563             lfac_coul[i] = (scParams.lambdaPower == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
564             dlfac_coul[i] = DLF[i] * scParams.lambdaPower * oneSixth
565                             * (scParams.lambdaPower == 2 ? (1 - LFC[i]) : 1);
566             lfac_vdw[i]  = (scParams.lambdaPower == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
567             dlfac_vdw[i] = DLF[i] * scParams.lambdaPower * oneSixth
568                            * (scParams.lambdaPower == 2 ? (1 - LFV[i]) : 1);
569         }
570     }
571
572     /* TODO This code depends on the logic in tables.c that constructs
573        the table layout, which should be made explicit in future
574        cleanup. */
575     GMX_ASSERT(
576             etiNR == 3,
577             "Pair-interaction code that uses GROMACS interaction tables supports exactly 3 tables");
578     GMX_ASSERT(
579             fr->pairsTable->interaction == TableInteraction::ElectrostaticVdwRepulsionVdwDispersion,
580             "Pair interaction kernels need a table with Coulomb, repulsion and dispersion entries");
581
582     const real epsfac = fr->ic->epsfac;
583
584     bFreeEnergy = FALSE;
585     for (i = 0; (i < nbonds);)
586     {
587         itype = iatoms[i++];
588         ai    = iatoms[i++];
589         aj    = iatoms[i++];
590         gid   = GID(cENER[ai], cENER[aj], numEnergyGroups);
591
592         /* Get parameters */
593         switch (ftype)
594         {
595             case F_LJ14:
596                 bFreeEnergy =
597                         (fr->efep != FreeEnergyPerturbationType::No
598                          && ((!atomIsPerturbed.empty() && (atomIsPerturbed[ai] || atomIsPerturbed[aj]))
599                              || iparams[itype].lj14.c6A != iparams[itype].lj14.c6B
600                              || iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
601                 qq  = chargeA[ai] * chargeA[aj] * epsfac * fr->fudgeQQ;
602                 c6  = iparams[itype].lj14.c6A;
603                 c12 = iparams[itype].lj14.c12A;
604                 break;
605             case F_LJC14_Q:
606                 qq = iparams[itype].ljc14.qi * iparams[itype].ljc14.qj * epsfac
607                      * iparams[itype].ljc14.fqq;
608                 c6  = iparams[itype].ljc14.c6;
609                 c12 = iparams[itype].ljc14.c12;
610                 break;
611             case F_LJC_PAIRS_NB:
612                 qq  = iparams[itype].ljcnb.qi * iparams[itype].ljcnb.qj * epsfac;
613                 c6  = iparams[itype].ljcnb.c6;
614                 c12 = iparams[itype].ljcnb.c12;
615                 break;
616             default:
617                 /* Cannot happen since we called gmx_fatal() above in this case */
618                 qq = c6 = c12 = 0; /* Keep compiler happy */
619                 break;
620         }
621
622         /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
623          * included in the general nfbp array now. This means the tables are scaled down by the
624          * same factor, so when we use the original c6/c12 parameters from iparams[] they must
625          * be scaled up.
626          */
627         c6 *= 6.0;
628         c12 *= 12.0;
629
630         /* Do we need to apply full periodic boundary conditions? */
631         if (fr->bMolPBC)
632         {
633             fshift_index = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
634         }
635         else
636         {
637             fshift_index = c_centralShiftIndex;
638             rvec_sub(x[ai], x[aj], dx);
639         }
640         r2 = norm2(dx);
641
642         if (r2 >= fr->pairsTable->interactionRange * fr->pairsTable->interactionRange)
643         {
644             /* This check isn't race free. But it doesn't matter because if a race occurs the only
645              * disadvantage is that the warning is printed twice */
646             if (!warned_rlimit)
647             {
648                 warning_rlimit(x, ai, aj, global_atom_index, sqrt(r2), fr->pairsTable->interactionRange);
649                 warned_rlimit = TRUE;
650             }
651             continue;
652         }
653
654         if (bFreeEnergy)
655         {
656             /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
657             qqB  = chargeB[ai] * chargeB[aj] * epsfac * fr->fudgeQQ;
658             c6B  = iparams[itype].lj14.c6B * 6.0;
659             c12B = iparams[itype].lj14.c12B * 12.0;
660
661             const auto& scParams = *fr->ic->softCoreParameters;
662             if (scParams.softcoreType == SoftcoreType::Beutler)
663             {
664                 if (scParams.alphaCoulomb == 0 && scParams.alphaVdw == 0)
665                 {
666                     fscal = free_energy_evaluate_single<KernelSoftcoreType::None>(
667                             r2,
668                             fr->ic->rcoulomb,
669                             *fr->ic->softCoreParameters,
670                             fr->pairsTable->scale,
671                             fr->pairsTable->data.data(),
672                             fr->pairsTable->stride,
673                             qq,
674                             c6,
675                             c12,
676                             qqB,
677                             c6B,
678                             c12B,
679                             epsfac,
680                             LFC,
681                             LFV,
682                             DLF,
683                             lfac_coul,
684                             lfac_vdw,
685                             dlfac_coul,
686                             dlfac_vdw,
687                             &velec,
688                             &vvdw,
689                             dvdl);
690                 }
691                 else
692                 {
693                     fscal = free_energy_evaluate_single<KernelSoftcoreType::Beutler>(
694                             r2,
695                             fr->ic->rcoulomb,
696                             *fr->ic->softCoreParameters,
697                             fr->pairsTable->scale,
698                             fr->pairsTable->data.data(),
699                             fr->pairsTable->stride,
700                             qq,
701                             c6,
702                             c12,
703                             qqB,
704                             c6B,
705                             c12B,
706                             epsfac,
707                             LFC,
708                             LFV,
709                             DLF,
710                             lfac_coul,
711                             lfac_vdw,
712                             dlfac_coul,
713                             dlfac_vdw,
714                             &velec,
715                             &vvdw,
716                             dvdl);
717                 }
718             }
719             else // Gapsys
720             {
721                 if (scParams.scaleLinpointCoulGapsys == 0 && scParams.scaleLinpointVdWGapsys == 0)
722                 {
723                     fscal = free_energy_evaluate_single<KernelSoftcoreType::None>(
724                             r2,
725                             fr->ic->rcoulomb,
726                             *fr->ic->softCoreParameters,
727                             fr->pairsTable->scale,
728                             fr->pairsTable->data.data(),
729                             fr->pairsTable->stride,
730                             qq,
731                             c6,
732                             c12,
733                             qqB,
734                             c6B,
735                             c12B,
736                             epsfac,
737                             LFC,
738                             LFV,
739                             DLF,
740                             lfac_coul,
741                             lfac_vdw,
742                             dlfac_coul,
743                             dlfac_vdw,
744                             &velec,
745                             &vvdw,
746                             dvdl);
747                 }
748                 else
749                 {
750                     fscal = free_energy_evaluate_single<KernelSoftcoreType::Gapsys>(
751                             r2,
752                             fr->ic->rcoulomb,
753                             *fr->ic->softCoreParameters,
754                             fr->pairsTable->scale,
755                             fr->pairsTable->data.data(),
756                             fr->pairsTable->stride,
757                             qq,
758                             c6,
759                             c12,
760                             qqB,
761                             c6B,
762                             c12B,
763                             epsfac,
764                             LFC,
765                             LFV,
766                             DLF,
767                             lfac_coul,
768                             lfac_vdw,
769                             dlfac_coul,
770                             dlfac_vdw,
771                             &velec,
772                             &vvdw,
773                             dvdl);
774                 }
775             }
776         }
777         else
778         {
779             /* Evaluate tabulated interaction without free energy */
780             fscal = evaluate_single(r2,
781                                     fr->pairsTable->scale,
782                                     fr->pairsTable->data.data(),
783                                     fr->pairsTable->stride,
784                                     qq,
785                                     c6,
786                                     c12,
787                                     &velec,
788                                     &vvdw);
789         }
790
791         energygrp_elec[gid] += velec;
792         energygrp_vdw[gid] += vvdw;
793         svmul(fscal, dx, dx);
794
795         /* Add the forces */
796         rvec_inc(f[ai], dx);
797         rvec_dec(f[aj], dx);
798
799         if (computeVirial(flavor))
800         {
801             if (fshift_index != c_centralShiftIndex)
802             {
803                 rvec_inc(fshift[fshift_index], dx);
804                 rvec_dec(fshift[c_centralShiftIndex], dx);
805             }
806         }
807     }
808     return 0.0;
809 }
810
811 /*! \brief Calculate pairs, only for plain-LJ + plain Coulomb normal type.
812  *
813  * This function is templated for real/SimdReal and for optimization.
814  */
815 template<typename T, int pack_size, typename pbc_type>
816 static void do_pairs_simple(int                 nbonds,
817                             const t_iatom       iatoms[],
818                             const t_iparams     iparams[],
819                             const rvec          x[],
820                             rvec4               f[],
821                             const pbc_type      pbc,
822                             gmx::ArrayRef<real> charge,
823                             const real          scale_factor)
824 {
825     const int nfa1 = 1 + 2;
826
827     T six(6);
828     T twelve(12);
829     T ef(scale_factor);
830
831 #if GMX_SIMD_HAVE_REAL
832     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[pack_size];
833     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[pack_size];
834     alignas(GMX_SIMD_ALIGNMENT) real         coeff[3 * pack_size];
835 #else
836     std::int32_t ai[pack_size];
837     std::int32_t aj[pack_size];
838     real         coeff[3 * pack_size];
839 #endif
840
841     /* nbonds is #pairs*nfa1, here we step pack_size pairs */
842     for (int i = 0; i < nbonds; i += pack_size * nfa1)
843     {
844         /* Collect atoms for pack_size pairs.
845          * iu indexes into iatoms, we should not let iu go beyond nbonds.
846          */
847         int iu = i;
848         for (int s = 0; s < pack_size; s++)
849         {
850             int itype = iatoms[iu];
851             ai[s]     = iatoms[iu + 1];
852             aj[s]     = iatoms[iu + 2];
853
854             if (i + s * nfa1 < nbonds)
855             {
856                 coeff[0 * pack_size + s] = iparams[itype].lj14.c6A;
857                 coeff[1 * pack_size + s] = iparams[itype].lj14.c12A;
858                 coeff[2 * pack_size + s] = charge[ai[s]] * charge[aj[s]];
859
860                 /* Avoid indexing the iatoms array out of bounds.
861                  * We pad the coordinate indices with the last atom pair.
862                  */
863                 if (iu + nfa1 < nbonds)
864                 {
865                     iu += nfa1;
866                 }
867             }
868             else
869             {
870                 /* Pad the coefficient arrays with zeros to get zero forces */
871                 coeff[0 * pack_size + s] = 0;
872                 coeff[1 * pack_size + s] = 0;
873                 coeff[2 * pack_size + s] = 0;
874             }
875         }
876
877         /* Load the coordinates */
878         T xi[DIM], xj[DIM];
879         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ai, &xi[XX], &xi[YY], &xi[ZZ]);
880         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), aj, &xj[XX], &xj[YY], &xj[ZZ]);
881
882         T c6  = load<T>(coeff + 0 * pack_size);
883         T c12 = load<T>(coeff + 1 * pack_size);
884         T qq  = load<T>(coeff + 2 * pack_size);
885
886         /* We could save these operations by storing 6*C6,12*C12 */
887         c6  = six * c6;
888         c12 = twelve * c12;
889
890         T dr[DIM];
891         pbc_dx_aiuc(pbc, xi, xj, dr);
892
893         T rsq   = dr[XX] * dr[XX] + dr[YY] * dr[YY] + dr[ZZ] * dr[ZZ];
894         T rinv  = gmx::invsqrt(rsq);
895         T rinv2 = rinv * rinv;
896         T rinv6 = rinv2 * rinv2 * rinv2;
897
898         /* Calculate the Coulomb force * r */
899         T cfr = ef * qq * rinv;
900
901         /* Calculate the LJ force * r and add it to the Coulomb part */
902         T fr = gmx::fma(fms(c12, rinv6, c6), rinv6, cfr);
903
904         T finvr = fr * rinv2;
905         T fx    = finvr * dr[XX];
906         T fy    = finvr * dr[YY];
907         T fz    = finvr * dr[ZZ];
908
909         /* Add the pair forces to the force array.
910          * Note that here we might add multiple force components for some atoms
911          * due to the SIMD padding. But the extra force components are zero.
912          */
913         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ai, fx, fy, fz);
914         transposeScatterDecrU<4>(reinterpret_cast<real*>(f), aj, fx, fy, fz);
915     }
916 }
917
918 /*! \brief Calculate all listed pair interactions */
919 void do_pairs(int                           ftype,
920               int                           nbonds,
921               const t_iatom                 iatoms[],
922               const t_iparams               iparams[],
923               const rvec                    x[],
924               rvec4                         f[],
925               rvec                          fshift[],
926               const struct t_pbc*           pbc,
927               const real*                   lambda,
928               real*                         dvdl,
929               gmx::ArrayRef<real>           chargeA,
930               gmx::ArrayRef<real>           chargeB,
931               gmx::ArrayRef<bool>           atomIsPerturbed,
932               gmx::ArrayRef<unsigned short> cENER,
933               const int                     numEnergyGroups,
934               const t_forcerec*             fr,
935               const bool                    havePerturbedInteractions,
936               const gmx::StepWorkload&      stepWork,
937               gmx_grppairener_t*            grppener,
938               int*                          global_atom_index)
939 {
940     if (ftype == F_LJ14 && fr->ic->vdwtype != VanDerWaalsType::User && !EEL_USER(fr->ic->eeltype)
941         && !havePerturbedInteractions && (!stepWork.computeVirial && !stepWork.computeEnergy))
942     {
943         /* We use a fast code-path for plain LJ 1-4 without FEP.
944          *
945          * TODO: Add support for energies (straightforward) and virial
946          * in the SIMD template. For the virial it's inconvenient to store
947          * the force sums for the shifts and we should directly calculate
948          * and sum the virial for the shifts. But we should do this
949          * at once for the angles and dihedrals as well.
950          */
951 #if GMX_SIMD_HAVE_REAL
952         if (fr->use_simd_kernels)
953         {
954             alignas(GMX_SIMD_ALIGNMENT) real pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
955             set_pbc_simd(pbc, pbc_simd);
956
957             do_pairs_simple<SimdReal, GMX_SIMD_REAL_WIDTH, const real*>(
958                     nbonds, iatoms, iparams, x, f, pbc_simd, chargeA, fr->ic->epsfac * fr->fudgeQQ);
959         }
960         else
961 #endif
962         {
963             /* This construct is needed because pbc_dx_aiuc doesn't accept pbc=NULL */
964             t_pbc        pbc_no;
965             const t_pbc* pbc_nonnull;
966
967             if (pbc != nullptr)
968             {
969                 pbc_nonnull = pbc;
970             }
971             else
972             {
973                 set_pbc(&pbc_no, PbcType::No, nullptr);
974                 pbc_nonnull = &pbc_no;
975             }
976
977             do_pairs_simple<real, 1, const t_pbc*>(
978                     nbonds, iatoms, iparams, x, f, pbc_nonnull, chargeA, fr->ic->epsfac * fr->fudgeQQ);
979         }
980     }
981     else if (stepWork.computeVirial)
982     {
983         do_pairs_general<BondedKernelFlavor::ForcesAndVirialAndEnergy>(ftype,
984                                                                        nbonds,
985                                                                        iatoms,
986                                                                        iparams,
987                                                                        x,
988                                                                        f,
989                                                                        fshift,
990                                                                        pbc,
991                                                                        lambda,
992                                                                        dvdl,
993                                                                        chargeA,
994                                                                        chargeB,
995                                                                        atomIsPerturbed,
996                                                                        cENER,
997                                                                        numEnergyGroups,
998                                                                        fr,
999                                                                        grppener,
1000                                                                        global_atom_index);
1001     }
1002     else
1003     {
1004         do_pairs_general<BondedKernelFlavor::ForcesAndEnergy>(ftype,
1005                                                               nbonds,
1006                                                               iatoms,
1007                                                               iparams,
1008                                                               x,
1009                                                               f,
1010                                                               fshift,
1011                                                               pbc,
1012                                                               lambda,
1013                                                               dvdl,
1014                                                               chargeA,
1015                                                               chargeB,
1016                                                               atomIsPerturbed,
1017                                                               cENER,
1018                                                               numEnergyGroups,
1019                                                               fr,
1020                                                               grppener,
1021                                                               global_atom_index);
1022     }
1023 }