c97718459e7bab513f94d8dcb7811e1437c5d406
[alexxy/gromacs.git] / src / gromacs / listed_forces / pairs.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
5  * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7  * and including many others, as listed in the AUTHORS file in the
8  * top-level source directory and at http://www.gromacs.org.
9  *
10  * GROMACS is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public License
12  * as published by the Free Software Foundation; either version 2.1
13  * of the License, or (at your option) any later version.
14  *
15  * GROMACS is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with GROMACS; if not, see
22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
24  *
25  * If you want to redistribute modifications to GROMACS, please
26  * consider that scientific software is very special. Version
27  * control is crucial - bugs must be traceable. We will be happy to
28  * consider code for inclusion in the official distribution, but
29  * derived work must not be called official GROMACS. Details are found
30  * in the README & COPYING files - if they are missing, get the
31  * official version at http://www.gromacs.org.
32  *
33  * To help us fund GROMACS development, we humbly ask that you cite
34  * the research papers on the package. Check out http://www.gromacs.org.
35  */
36 /*! \internal \file
37  *
38  * \brief This file defines functions for "pair" interactions
39  * (i.e. listed non-bonded interactions, e.g. 1-4 interactions)
40  *
41  * \author Mark Abraham <mark.j.abraham@gmail.com>
42  *
43  * \ingroup module_listed_forces
44  */
45 #include "gmxpre.h"
46
47 #include "pairs.h"
48
49 #include <cmath>
50
51 #include "gromacs/listed_forces/bonded.h"
52 #include "gromacs/math/functions.h"
53 #include "gromacs/math/vec.h"
54 #include "gromacs/mdtypes/forcerec.h"
55 #include "gromacs/mdtypes/group.h"
56 #include "gromacs/mdtypes/interaction_const.h"
57 #include "gromacs/mdtypes/md_enums.h"
58 #include "gromacs/mdtypes/nblist.h"
59 #include "gromacs/mdtypes/simulation_workload.h"
60 #include "gromacs/pbcutil/ishift.h"
61 #include "gromacs/pbcutil/pbc.h"
62 #include "gromacs/pbcutil/pbc_simd.h"
63 #include "gromacs/simd/simd.h"
64 #include "gromacs/simd/simd_math.h"
65 #include "gromacs/simd/vector_operations.h"
66 #include "gromacs/tables/forcetable.h"
67 #include "gromacs/utility/basedefinitions.h"
68 #include "gromacs/utility/fatalerror.h"
69 #include "gromacs/utility/gmxassert.h"
70
71 #include "listed_internal.h"
72
73 using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
74
75 /*! \brief Issue a warning if a listed interaction is beyond a table limit */
76 static void warning_rlimit(const rvec* x, int ai, int aj, int* global_atom_index, real r, real rlimit)
77 {
78     gmx_warning(
79             "Listed nonbonded interaction between particles %d and %d\n"
80             "at distance %.3f which is larger than the table limit %.3f nm.\n\n"
81             "This is likely either a 1,4 interaction, or a listed interaction inside\n"
82             "a smaller molecule you are decoupling during a free energy calculation.\n"
83             "Since interactions at distances beyond the table cannot be computed,\n"
84             "they are skipped until they are inside the table limit again. You will\n"
85             "only see this message once, even if it occurs for several interactions.\n\n"
86             "IMPORTANT: This should not happen in a stable simulation, so there is\n"
87             "probably something wrong with your system. Only change the table-extension\n"
88             "distance in the mdp file if you are really sure that is the reason.\n",
89             glatnr(global_atom_index, ai),
90             glatnr(global_atom_index, aj),
91             r,
92             rlimit);
93
94     if (debug)
95     {
96         fprintf(debug,
97                 "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. "
98                 "Ignored\n",
99                 x[ai][XX],
100                 x[ai][YY],
101                 x[ai][ZZ],
102                 x[aj][XX],
103                 x[aj][YY],
104                 x[aj][ZZ],
105                 glatnr(global_atom_index, ai),
106                 glatnr(global_atom_index, aj),
107                 r);
108     }
109 }
110
111 /*! \brief Compute the energy and force for a single pair interaction */
112 static real evaluate_single(real        r2,
113                             real        tabscale,
114                             const real* vftab,
115                             real        tableStride,
116                             real        qq,
117                             real        c6,
118                             real        c12,
119                             real*       velec,
120                             real*       vvdw)
121 {
122     real rinv, r, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VVe, FFe, VVd, FFd, VVr, FFr, fscal;
123     int  ntab;
124
125     /* Do the tabulated interactions - first table lookup */
126     rinv = gmx::invsqrt(r2);
127     r    = r2 * rinv;
128     rtab = r * tabscale;
129     ntab = static_cast<int>(rtab);
130     eps  = rtab - ntab;
131     eps2 = eps * eps;
132     ntab = static_cast<int>(tableStride * ntab);
133     /* Electrostatics */
134     Y     = vftab[ntab];
135     F     = vftab[ntab + 1];
136     Geps  = eps * vftab[ntab + 2];
137     Heps2 = eps2 * vftab[ntab + 3];
138     Fp    = F + Geps + Heps2;
139     VVe   = Y + eps * Fp;
140     FFe   = Fp + Geps + 2.0 * Heps2;
141     /* Dispersion */
142     Y     = vftab[ntab + 4];
143     F     = vftab[ntab + 5];
144     Geps  = eps * vftab[ntab + 6];
145     Heps2 = eps2 * vftab[ntab + 7];
146     Fp    = F + Geps + Heps2;
147     VVd   = Y + eps * Fp;
148     FFd   = Fp + Geps + 2.0 * Heps2;
149     /* Repulsion */
150     Y     = vftab[ntab + 8];
151     F     = vftab[ntab + 9];
152     Geps  = eps * vftab[ntab + 10];
153     Heps2 = eps2 * vftab[ntab + 11];
154     Fp    = F + Geps + Heps2;
155     VVr   = Y + eps * Fp;
156     FFr   = Fp + Geps + 2.0 * Heps2;
157
158     *velec = qq * VVe;
159     *vvdw  = c6 * VVd + c12 * VVr;
160
161     fscal = -(qq * FFe + c6 * FFd + c12 * FFr) * tabscale * rinv;
162
163     return fscal;
164 }
165
166 static inline real sixthRoot(const real r)
167 {
168     return gmx::invsqrt(std::cbrt(r));
169 }
170
171 /*! \brief Compute the energy and force for a single pair interaction under FEP */
172 template<KernelSoftcoreType softcoreType>
173 static real free_energy_evaluate_single(real                                           r2,
174                                         real                                           rCoulCutoff,
175                                         const interaction_const_t::SoftCoreParameters& scParams,
176                                         real                                           tabscale,
177                                         const real*                                    vftab,
178                                         real                                           tableStride,
179                                         real                                           qqA,
180                                         real                                           c6A,
181                                         real                                           c12A,
182                                         real                                           qqB,
183                                         real                                           c6B,
184                                         real                                           c12B,
185                                         real                                           facel,
186                                         const real                                     LFC[2],
187                                         const real                                     LFV[2],
188                                         const real                                     DLF[2],
189                                         const real                                     lfac_coul[2],
190                                         const real                                     lfac_vdw[2],
191                                         const real dlfac_coul[2],
192                                         const real dlfac_vdw[2],
193                                         real*      velectot,
194                                         real*      vvdwtot,
195                                         real*      dvdl)
196 {
197     real       rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VV, FF, fscal;
198     real       qq[2], c6[2], c12[2], sigma6[2], sigma_pow[2];
199     real       alpha_coul_eff, alpha_vdw_eff, dvdl_coul_sum, dvdl_vdw_sum;
200     real       rpinv, r_coul, r_vdw, velecsum, vvdwsum;
201     real       fscal_vdw[2], fscal_elec[2];
202     real       velec[2], vvdw[2];
203     real       dvdl_elec[2], dvdl_vdw[2];
204     real       scaleLinpointCoulGapsys, scaleLinpointVdWGapsys, sigma6VdWGapsys[2];
205     real       rQ, rLJ;
206     real       scaleDvdlRCoul;
207     int        i, ntab;
208     const real half = 0.5_real;
209     const real one  = 1.0_real;
210     const real two  = 2.0_real;
211
212     qq[0]  = qqA;
213     qq[1]  = qqB;
214     c6[0]  = c6A;
215     c6[1]  = c6B;
216     c12[0] = c12A;
217     c12[1] = c12B;
218
219     const real rpm2 = r2 * r2;   /* r4 */
220     const real rp   = rpm2 * r2; /* r6 */
221     const real r    = sqrt(r2);  /* r1 */
222
223     /* Loop over state A(0) and B(1) */
224     for (i = 0; i < 2; i++)
225     {
226         if constexpr (softcoreType == KernelSoftcoreType::Beutler)
227         {
228             if ((c6[i] > 0) && (c12[i] > 0))
229             {
230                 /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
231                  * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
232                  */
233                 sigma6[i] = half * c12[i] / c6[i];
234                 if (sigma6[i] < scParams.sigma6Minimum) /* for disappearing coul and vdw with soft core at the same time */
235                 {
236                     sigma6[i] = scParams.sigma6Minimum;
237                 }
238             }
239             else
240             {
241                 sigma6[i] = scParams.sigma6WithInvalidSigma;
242             }
243             sigma_pow[i] = sigma6[i];
244         }
245         if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
246         {
247             if ((c6[i] > 0) && (c12[i] > 0))
248             {
249                 /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
250                  * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
251                  */
252                 sigma6VdWGapsys[i] = half * c12[i] / c6[i];
253                 if (sigma6VdWGapsys[i]
254                     < scParams.sigma6VdWGapsys) /* for disappearing coul and vdw with soft core at the same time */
255                 {
256                     sigma6VdWGapsys[i] = scParams.sigma6VdWGapsys;
257                 }
258             }
259             else
260             {
261                 sigma6VdWGapsys[i] = scParams.sigma6VdWGapsys;
262             }
263         }
264     }
265
266     if constexpr (softcoreType == KernelSoftcoreType::Beutler)
267     {
268         /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
269         if ((c12[0] > 0) && (c12[1] > 0))
270         {
271             alpha_vdw_eff  = 0;
272             alpha_coul_eff = 0;
273         }
274         else
275         {
276             alpha_vdw_eff  = scParams.alphaVdw;
277             alpha_coul_eff = scParams.alphaCoulomb;
278         }
279     }
280     if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
281     {
282         /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
283         if ((c12[0] > 0) && (c12[1] > 0))
284         {
285             scaleLinpointVdWGapsys  = 0;
286             scaleLinpointCoulGapsys = 0;
287         }
288         else
289         {
290             scaleLinpointVdWGapsys  = scParams.scaleLinpointVdWGapsys;
291             scaleLinpointCoulGapsys = scParams.scaleLinpointCoulGapsys;
292         }
293     }
294
295     /* Loop over A and B states again */
296     for (i = 0; i < 2; i++)
297     {
298         fscal_elec[i] = 0;
299         fscal_vdw[i]  = 0;
300         velec[i]      = 0;
301         vvdw[i]       = 0;
302         dvdl_elec[i]  = 0;
303         dvdl_vdw[i]   = 0;
304         rQ            = 0;
305         rLJ           = 0;
306
307         /* Only spend time on A or B state if it is non-zero */
308         if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
309         {
310             /* Coulomb */
311             if constexpr (softcoreType == KernelSoftcoreType::Beutler)
312             {
313                 rpinv  = one / (alpha_coul_eff * lfac_coul[i] * sigma_pow[i] + rp);
314                 r_coul = sixthRoot(rpinv);
315             }
316             else
317             {
318                 rpinv  = one / rp;
319                 r_coul = r;
320             }
321
322             if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
323             {
324                 rQ = gmx::sixthroot(one - LFC[i]) * (one + std::fabs(qq[i] / facel));
325                 rQ *= scaleLinpointCoulGapsys;
326                 scaleDvdlRCoul = 1;
327                 if (rQ > rCoulCutoff)
328                 {
329                     rQ             = rCoulCutoff;
330                     scaleDvdlRCoul = 0;
331                 }
332             }
333
334             if ((softcoreType == KernelSoftcoreType::Gapsys) && (r < rQ))
335             {
336                 real rInvQ    = one / rQ;
337                 real constFac = qq[i] * rInvQ;
338                 real linFac   = constFac * r * rInvQ;
339                 real quadrFac = linFac * r * rInvQ;
340
341                 /* Computing Coulomb force and potential energy */
342                 fscal_elec[i] = 2 * quadrFac - 3 * linFac;
343                 fscal_elec[i] *= rpinv;
344
345                 velec[i] = quadrFac - 3 * (linFac - constFac);
346
347                 dvdl_elec[i] += scaleDvdlRCoul * DLF[i] * half * (LFC[i] / (1 - LFC[i]))
348                                 * (quadrFac - 2 * linFac + constFac);
349             }
350             else // Beutler, resp. hardcore
351             {
352                 /* Electrostatics table lookup data */
353                 rtab = r_coul * tabscale;
354                 ntab = static_cast<int>(rtab);
355                 eps  = rtab - ntab;
356                 eps2 = eps * eps;
357                 ntab = static_cast<int>(tableStride * ntab);
358                 /* Electrostatics */
359                 Y             = vftab[ntab];
360                 F             = vftab[ntab + 1];
361                 Geps          = eps * vftab[ntab + 2];
362                 Heps2         = eps2 * vftab[ntab + 3];
363                 Fp            = F + Geps + Heps2;
364                 VV            = Y + eps * Fp;
365                 FF            = Fp + Geps + two * Heps2;
366                 velec[i]      = qq[i] * VV;
367                 fscal_elec[i] = -qq[i] * FF * r_coul * rpinv * tabscale;
368             }
369
370             /* Vdw */
371             if constexpr (softcoreType == KernelSoftcoreType::Beutler)
372             {
373                 rpinv = one / (alpha_vdw_eff * lfac_vdw[i] * sigma_pow[i] + rp);
374                 r_vdw = sixthRoot(rpinv);
375             }
376             else
377             {
378                 rpinv = one / rp;
379                 r_vdw = r;
380             }
381
382             if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
383             {
384                 constexpr real c_twentySixSeventh = 26.0_real / 7.0_real;
385
386                 rLJ = gmx::sixthroot(c_twentySixSeventh * sigma6VdWGapsys[i] * (one - LFV[i]));
387                 rLJ *= scaleLinpointVdWGapsys;
388             }
389
390             if ((softcoreType == KernelSoftcoreType::Gapsys) && (r < rLJ))
391             {
392                 // scaled values for c6 and c12
393                 real c6s, c12s;
394                 c6s  = c6[i] / 6.0_real;
395                 c12s = c12[i] / 12.0_real;
396
397                 /* Temporary variables for inverted values */
398                 real rInvLJ = one / rLJ;
399                 real rInv14, rInv13, rInv12;
400                 real rInv8, rInv7, rInv6;
401                 rInv6 = rInvLJ * rInvLJ * rInvLJ;
402                 rInv6 *= rInv6;
403                 rInv7  = rInv6 * rInvLJ;
404                 rInv8  = rInv7 * rInvLJ;
405                 rInv14 = c12s * rInv7 * rInv7 * r2;
406                 rInv13 = c12s * rInv7 * rInv6 * r;
407                 rInv12 = c12s * rInv6 * rInv6;
408                 rInv8 *= c6s * r2;
409                 rInv7 *= c6s * r;
410                 rInv6 *= c6s;
411
412                 /* Temporary variables for A and B */
413                 real quadrFac, linearFac, constFac;
414                 quadrFac  = 156 * rInv14 - 42 * rInv8;
415                 linearFac = 168 * rInv13 - 48 * rInv7;
416                 constFac  = 91 * rInv12 - 28 * rInv6;
417
418                 /* Computing LJ force and potential energy*/
419                 fscal_vdw[i] = quadrFac - linearFac;
420                 fscal_vdw[i] *= rpinv;
421
422                 vvdw[i] = 0.5 * quadrFac - linearFac + constFac;
423
424                 dvdl_vdw[i] += DLF[i] * 28 * (LFV[i] / (one - LFV[i]))
425                                * ((6.5_real * rInv14 - rInv8) - (13 * rInv13 - 2 * rInv7)
426                                   + (6.5_real * rInv12 - rInv6));
427             }
428             else // Beutler, resp. hardcore
429             {
430                 /* Vdw table lookup data */
431                 rtab = r_vdw * tabscale;
432                 ntab = static_cast<int>(rtab);
433                 eps  = rtab - ntab;
434                 eps2 = eps * eps;
435                 ntab = 12 * ntab;
436                 /* Dispersion */
437                 Y            = vftab[ntab + 4];
438                 F            = vftab[ntab + 5];
439                 Geps         = eps * vftab[ntab + 6];
440                 Heps2        = eps2 * vftab[ntab + 7];
441                 Fp           = F + Geps + Heps2;
442                 VV           = Y + eps * Fp;
443                 FF           = Fp + Geps + two * Heps2;
444                 vvdw[i]      = c6[i] * VV;
445                 fscal_vdw[i] = -c6[i] * FF;
446
447                 /* Repulsion */
448                 Y     = vftab[ntab + 8];
449                 F     = vftab[ntab + 9];
450                 Geps  = eps * vftab[ntab + 10];
451                 Heps2 = eps2 * vftab[ntab + 11];
452                 Fp    = F + Geps + Heps2;
453                 VV    = Y + eps * Fp;
454                 FF    = Fp + Geps + two * Heps2;
455                 vvdw[i] += c12[i] * VV;
456                 fscal_vdw[i] -= c12[i] * FF;
457                 fscal_vdw[i] *= r_vdw * rpinv * tabscale;
458             }
459         }
460     }
461     /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
462     /* Assemble A and B states */
463     velecsum      = 0;
464     vvdwsum       = 0;
465     dvdl_coul_sum = 0;
466     dvdl_vdw_sum  = 0;
467     fscal         = 0;
468     for (i = 0; i < 2; i++)
469     {
470         velecsum += LFC[i] * velec[i];
471         vvdwsum += LFV[i] * vvdw[i];
472
473         fscal += (LFC[i] * fscal_elec[i] + LFV[i] * fscal_vdw[i]) * rpm2;
474
475         if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
476         {
477             dvdl_coul_sum += dvdl_elec[i];
478             dvdl_vdw_sum += dvdl_vdw[i];
479         }
480         dvdl_coul_sum += velec[i] * DLF[i];
481         dvdl_vdw_sum += vvdw[i] * DLF[i];
482         if constexpr (softcoreType == KernelSoftcoreType::Beutler)
483         {
484             dvdl_coul_sum += LFC[i] * alpha_coul_eff * dlfac_coul[i] * fscal_elec[i] * sigma_pow[i];
485             dvdl_vdw_sum += LFV[i] * alpha_vdw_eff * dlfac_vdw[i] * fscal_vdw[i] * sigma_pow[i];
486         }
487     }
488
489     dvdl[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)] += dvdl_coul_sum;
490     dvdl[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)] += dvdl_vdw_sum;
491
492     *velectot = velecsum;
493     *vvdwtot  = vvdwsum;
494
495     return fscal;
496 }
497
498 /*! \brief Calculate pair interactions, supports all types and conditions. */
499 template<BondedKernelFlavor flavor>
500 static real do_pairs_general(int                           ftype,
501                              int                           nbonds,
502                              const t_iatom                 iatoms[],
503                              const t_iparams               iparams[],
504                              const rvec                    x[],
505                              rvec4                         f[],
506                              rvec                          fshift[],
507                              const struct t_pbc*           pbc,
508                              const real*                   lambda,
509                              real*                         dvdl,
510                              gmx::ArrayRef<real>           chargeA,
511                              gmx::ArrayRef<real>           chargeB,
512                              gmx::ArrayRef<bool>           atomIsPerturbed,
513                              gmx::ArrayRef<unsigned short> cENER,
514                              int                           numEnergyGroups,
515                              const t_forcerec*             fr,
516                              gmx_grppairener_t*            grppener,
517                              int*                          global_atom_index)
518 {
519     real            qq, c6, c12;
520     rvec            dx;
521     int             i, itype, ai, aj, gid;
522     int             fshift_index;
523     real            r2;
524     real            fscal, velec, vvdw;
525     real*           energygrp_elec;
526     real*           energygrp_vdw;
527     static gmx_bool warned_rlimit = FALSE;
528     /* Free energy stuff */
529     gmx_bool   bFreeEnergy;
530     real       LFC[2], LFV[2], DLF[2], lfac_coul[2], lfac_vdw[2], dlfac_coul[2], dlfac_vdw[2];
531     real       qqB, c6B, c12B;
532     const real oneSixth = 1.0_real / 6.0_real;
533
534     switch (ftype)
535     {
536         case F_LJ14:
537         case F_LJC14_Q:
538             energygrp_elec = grppener->energyGroupPairTerms[NonBondedEnergyTerms::Coulomb14].data();
539             energygrp_vdw  = grppener->energyGroupPairTerms[NonBondedEnergyTerms::LJ14].data();
540             break;
541         case F_LJC_PAIRS_NB:
542             energygrp_elec = grppener->energyGroupPairTerms[NonBondedEnergyTerms::CoulombSR].data();
543             energygrp_vdw  = grppener->energyGroupPairTerms[NonBondedEnergyTerms::LJSR].data();
544             break;
545         default:
546             energygrp_elec = nullptr; /* Keep compiler happy */
547             energygrp_vdw  = nullptr; /* Keep compiler happy */
548             gmx_fatal(FARGS, "Unknown function type %d in do_nonbonded14", ftype);
549     }
550
551     if (fr->efep != FreeEnergyPerturbationType::No)
552     {
553         /* Lambda factor for state A=1-lambda and B=lambda */
554         LFC[0] = 1.0 - lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)];
555         LFV[0] = 1.0 - lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)];
556         LFC[1] = lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)];
557         LFV[1] = lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)];
558
559         /*derivative of the lambda factor for state A and B */
560         DLF[0] = -1;
561         DLF[1] = 1;
562
563         GMX_ASSERT(fr->ic->softCoreParameters, "We need soft-core parameters");
564         const auto& scParams = *fr->ic->softCoreParameters;
565
566         for (i = 0; i < 2; i++)
567         {
568             lfac_coul[i] = (scParams.lambdaPower == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
569             dlfac_coul[i] = DLF[i] * scParams.lambdaPower * oneSixth
570                             * (scParams.lambdaPower == 2 ? (1 - LFC[i]) : 1);
571             lfac_vdw[i]  = (scParams.lambdaPower == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
572             dlfac_vdw[i] = DLF[i] * scParams.lambdaPower * oneSixth
573                            * (scParams.lambdaPower == 2 ? (1 - LFV[i]) : 1);
574         }
575     }
576
577     /* TODO This code depends on the logic in tables.c that constructs
578        the table layout, which should be made explicit in future
579        cleanup. */
580     GMX_ASSERT(
581             etiNR == 3,
582             "Pair-interaction code that uses GROMACS interaction tables supports exactly 3 tables");
583     GMX_ASSERT(
584             fr->pairsTable->interaction == TableInteraction::ElectrostaticVdwRepulsionVdwDispersion,
585             "Pair interaction kernels need a table with Coulomb, repulsion and dispersion entries");
586
587     const real epsfac = fr->ic->epsfac;
588
589     bFreeEnergy = FALSE;
590     for (i = 0; (i < nbonds);)
591     {
592         itype = iatoms[i++];
593         ai    = iatoms[i++];
594         aj    = iatoms[i++];
595         gid   = GID(cENER[ai], cENER[aj], numEnergyGroups);
596
597         /* Get parameters */
598         switch (ftype)
599         {
600             case F_LJ14:
601                 bFreeEnergy =
602                         (fr->efep != FreeEnergyPerturbationType::No
603                          && ((!atomIsPerturbed.empty() && (atomIsPerturbed[ai] || atomIsPerturbed[aj]))
604                              || iparams[itype].lj14.c6A != iparams[itype].lj14.c6B
605                              || iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
606                 qq  = chargeA[ai] * chargeA[aj] * epsfac * fr->fudgeQQ;
607                 c6  = iparams[itype].lj14.c6A;
608                 c12 = iparams[itype].lj14.c12A;
609                 break;
610             case F_LJC14_Q:
611                 qq = iparams[itype].ljc14.qi * iparams[itype].ljc14.qj * epsfac
612                      * iparams[itype].ljc14.fqq;
613                 c6  = iparams[itype].ljc14.c6;
614                 c12 = iparams[itype].ljc14.c12;
615                 break;
616             case F_LJC_PAIRS_NB:
617                 qq  = iparams[itype].ljcnb.qi * iparams[itype].ljcnb.qj * epsfac;
618                 c6  = iparams[itype].ljcnb.c6;
619                 c12 = iparams[itype].ljcnb.c12;
620                 break;
621             default:
622                 /* Cannot happen since we called gmx_fatal() above in this case */
623                 qq = c6 = c12 = 0; /* Keep compiler happy */
624                 break;
625         }
626
627         /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
628          * included in the general nfbp array now. This means the tables are scaled down by the
629          * same factor, so when we use the original c6/c12 parameters from iparams[] they must
630          * be scaled up.
631          */
632         c6 *= 6.0;
633         c12 *= 12.0;
634
635         /* Do we need to apply full periodic boundary conditions? */
636         if (fr->bMolPBC)
637         {
638             fshift_index = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
639         }
640         else
641         {
642             fshift_index = c_centralShiftIndex;
643             rvec_sub(x[ai], x[aj], dx);
644         }
645         r2 = norm2(dx);
646
647         if (r2 >= fr->pairsTable->interactionRange * fr->pairsTable->interactionRange)
648         {
649             /* This check isn't race free. But it doesn't matter because if a race occurs the only
650              * disadvantage is that the warning is printed twice */
651             if (!warned_rlimit)
652             {
653                 warning_rlimit(x, ai, aj, global_atom_index, sqrt(r2), fr->pairsTable->interactionRange);
654                 warned_rlimit = TRUE;
655             }
656             continue;
657         }
658
659         if (bFreeEnergy)
660         {
661             /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
662             qqB  = chargeB[ai] * chargeB[aj] * epsfac * fr->fudgeQQ;
663             c6B  = iparams[itype].lj14.c6B * 6.0;
664             c12B = iparams[itype].lj14.c12B * 12.0;
665
666             const auto& scParams = *fr->ic->softCoreParameters;
667             if (scParams.softcoreType == SoftcoreType::Beutler)
668             {
669                 if (scParams.alphaCoulomb == 0 && scParams.alphaVdw == 0)
670                 {
671                     fscal = free_energy_evaluate_single<KernelSoftcoreType::None>(
672                             r2,
673                             fr->ic->rcoulomb,
674                             *fr->ic->softCoreParameters,
675                             fr->pairsTable->scale,
676                             fr->pairsTable->data.data(),
677                             fr->pairsTable->stride,
678                             qq,
679                             c6,
680                             c12,
681                             qqB,
682                             c6B,
683                             c12B,
684                             epsfac,
685                             LFC,
686                             LFV,
687                             DLF,
688                             lfac_coul,
689                             lfac_vdw,
690                             dlfac_coul,
691                             dlfac_vdw,
692                             &velec,
693                             &vvdw,
694                             dvdl);
695                 }
696                 else
697                 {
698                     fscal = free_energy_evaluate_single<KernelSoftcoreType::Beutler>(
699                             r2,
700                             fr->ic->rcoulomb,
701                             *fr->ic->softCoreParameters,
702                             fr->pairsTable->scale,
703                             fr->pairsTable->data.data(),
704                             fr->pairsTable->stride,
705                             qq,
706                             c6,
707                             c12,
708                             qqB,
709                             c6B,
710                             c12B,
711                             epsfac,
712                             LFC,
713                             LFV,
714                             DLF,
715                             lfac_coul,
716                             lfac_vdw,
717                             dlfac_coul,
718                             dlfac_vdw,
719                             &velec,
720                             &vvdw,
721                             dvdl);
722                 }
723             }
724             else // Gapsys
725             {
726                 if (scParams.scaleLinpointCoulGapsys == 0 && scParams.scaleLinpointVdWGapsys == 0)
727                 {
728                     fscal = free_energy_evaluate_single<KernelSoftcoreType::None>(
729                             r2,
730                             fr->ic->rcoulomb,
731                             *fr->ic->softCoreParameters,
732                             fr->pairsTable->scale,
733                             fr->pairsTable->data.data(),
734                             fr->pairsTable->stride,
735                             qq,
736                             c6,
737                             c12,
738                             qqB,
739                             c6B,
740                             c12B,
741                             epsfac,
742                             LFC,
743                             LFV,
744                             DLF,
745                             lfac_coul,
746                             lfac_vdw,
747                             dlfac_coul,
748                             dlfac_vdw,
749                             &velec,
750                             &vvdw,
751                             dvdl);
752                 }
753                 else
754                 {
755                     fscal = free_energy_evaluate_single<KernelSoftcoreType::Gapsys>(
756                             r2,
757                             fr->ic->rcoulomb,
758                             *fr->ic->softCoreParameters,
759                             fr->pairsTable->scale,
760                             fr->pairsTable->data.data(),
761                             fr->pairsTable->stride,
762                             qq,
763                             c6,
764                             c12,
765                             qqB,
766                             c6B,
767                             c12B,
768                             epsfac,
769                             LFC,
770                             LFV,
771                             DLF,
772                             lfac_coul,
773                             lfac_vdw,
774                             dlfac_coul,
775                             dlfac_vdw,
776                             &velec,
777                             &vvdw,
778                             dvdl);
779                 }
780             }
781         }
782         else
783         {
784             /* Evaluate tabulated interaction without free energy */
785             fscal = evaluate_single(r2,
786                                     fr->pairsTable->scale,
787                                     fr->pairsTable->data.data(),
788                                     fr->pairsTable->stride,
789                                     qq,
790                                     c6,
791                                     c12,
792                                     &velec,
793                                     &vvdw);
794         }
795
796         energygrp_elec[gid] += velec;
797         energygrp_vdw[gid] += vvdw;
798         svmul(fscal, dx, dx);
799
800         /* Add the forces */
801         rvec_inc(f[ai], dx);
802         rvec_dec(f[aj], dx);
803
804         if (computeVirial(flavor))
805         {
806             if (fshift_index != c_centralShiftIndex)
807             {
808                 rvec_inc(fshift[fshift_index], dx);
809                 rvec_dec(fshift[c_centralShiftIndex], dx);
810             }
811         }
812     }
813     return 0.0;
814 }
815
816 /*! \brief Calculate pairs, only for plain-LJ + plain Coulomb normal type.
817  *
818  * This function is templated for real/SimdReal and for optimization.
819  */
820 template<typename T, int pack_size, typename pbc_type>
821 static void do_pairs_simple(int                 nbonds,
822                             const t_iatom       iatoms[],
823                             const t_iparams     iparams[],
824                             const rvec          x[],
825                             rvec4               f[],
826                             const pbc_type      pbc,
827                             gmx::ArrayRef<real> charge,
828                             const real          scale_factor)
829 {
830     const int nfa1 = 1 + 2;
831
832     T six(6);
833     T twelve(12);
834     T ef(scale_factor);
835
836 #if GMX_SIMD_HAVE_REAL
837     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[pack_size];
838     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[pack_size];
839     alignas(GMX_SIMD_ALIGNMENT) real         coeff[3 * pack_size];
840 #else
841     std::int32_t ai[pack_size];
842     std::int32_t aj[pack_size];
843     real         coeff[3 * pack_size];
844 #endif
845
846     /* nbonds is #pairs*nfa1, here we step pack_size pairs */
847     for (int i = 0; i < nbonds; i += pack_size * nfa1)
848     {
849         /* Collect atoms for pack_size pairs.
850          * iu indexes into iatoms, we should not let iu go beyond nbonds.
851          */
852         int iu = i;
853         for (int s = 0; s < pack_size; s++)
854         {
855             int itype = iatoms[iu];
856             ai[s]     = iatoms[iu + 1];
857             aj[s]     = iatoms[iu + 2];
858
859             if (i + s * nfa1 < nbonds)
860             {
861                 coeff[0 * pack_size + s] = iparams[itype].lj14.c6A;
862                 coeff[1 * pack_size + s] = iparams[itype].lj14.c12A;
863                 coeff[2 * pack_size + s] = charge[ai[s]] * charge[aj[s]];
864
865                 /* Avoid indexing the iatoms array out of bounds.
866                  * We pad the coordinate indices with the last atom pair.
867                  */
868                 if (iu + nfa1 < nbonds)
869                 {
870                     iu += nfa1;
871                 }
872             }
873             else
874             {
875                 /* Pad the coefficient arrays with zeros to get zero forces */
876                 coeff[0 * pack_size + s] = 0;
877                 coeff[1 * pack_size + s] = 0;
878                 coeff[2 * pack_size + s] = 0;
879             }
880         }
881
882         /* Load the coordinates */
883         T xi[DIM], xj[DIM];
884         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ai, &xi[XX], &xi[YY], &xi[ZZ]);
885         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), aj, &xj[XX], &xj[YY], &xj[ZZ]);
886
887         T c6  = load<T>(coeff + 0 * pack_size);
888         T c12 = load<T>(coeff + 1 * pack_size);
889         T qq  = load<T>(coeff + 2 * pack_size);
890
891         /* We could save these operations by storing 6*C6,12*C12 */
892         c6  = six * c6;
893         c12 = twelve * c12;
894
895         T dr[DIM];
896         pbc_dx_aiuc(pbc, xi, xj, dr);
897
898         T rsq   = dr[XX] * dr[XX] + dr[YY] * dr[YY] + dr[ZZ] * dr[ZZ];
899         T rinv  = gmx::invsqrt(rsq);
900         T rinv2 = rinv * rinv;
901         T rinv6 = rinv2 * rinv2 * rinv2;
902
903         /* Calculate the Coulomb force * r */
904         T cfr = ef * qq * rinv;
905
906         /* Calculate the LJ force * r and add it to the Coulomb part */
907         T fr = gmx::fma(fms(c12, rinv6, c6), rinv6, cfr);
908
909         T finvr = fr * rinv2;
910         T fx    = finvr * dr[XX];
911         T fy    = finvr * dr[YY];
912         T fz    = finvr * dr[ZZ];
913
914         /* Add the pair forces to the force array.
915          * Note that here we might add multiple force components for some atoms
916          * due to the SIMD padding. But the extra force components are zero.
917          */
918         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ai, fx, fy, fz);
919         transposeScatterDecrU<4>(reinterpret_cast<real*>(f), aj, fx, fy, fz);
920     }
921 }
922
923 /*! \brief Calculate all listed pair interactions */
924 void do_pairs(int                           ftype,
925               int                           nbonds,
926               const t_iatom                 iatoms[],
927               const t_iparams               iparams[],
928               const rvec                    x[],
929               rvec4                         f[],
930               rvec                          fshift[],
931               const struct t_pbc*           pbc,
932               const real*                   lambda,
933               real*                         dvdl,
934               gmx::ArrayRef<real>           chargeA,
935               gmx::ArrayRef<real>           chargeB,
936               gmx::ArrayRef<bool>           atomIsPerturbed,
937               gmx::ArrayRef<unsigned short> cENER,
938               const int                     numEnergyGroups,
939               const t_forcerec*             fr,
940               const bool                    havePerturbedInteractions,
941               const gmx::StepWorkload&      stepWork,
942               gmx_grppairener_t*            grppener,
943               int*                          global_atom_index)
944 {
945     if (ftype == F_LJ14 && fr->ic->vdwtype != VanDerWaalsType::User && !EEL_USER(fr->ic->eeltype)
946         && !havePerturbedInteractions && (!stepWork.computeVirial && !stepWork.computeEnergy))
947     {
948         /* We use a fast code-path for plain LJ 1-4 without FEP.
949          *
950          * TODO: Add support for energies (straightforward) and virial
951          * in the SIMD template. For the virial it's inconvenient to store
952          * the force sums for the shifts and we should directly calculate
953          * and sum the virial for the shifts. But we should do this
954          * at once for the angles and dihedrals as well.
955          */
956 #if GMX_SIMD_HAVE_REAL
957         if (fr->use_simd_kernels)
958         {
959             alignas(GMX_SIMD_ALIGNMENT) real pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
960             set_pbc_simd(pbc, pbc_simd);
961
962             do_pairs_simple<SimdReal, GMX_SIMD_REAL_WIDTH, const real*>(
963                     nbonds, iatoms, iparams, x, f, pbc_simd, chargeA, fr->ic->epsfac * fr->fudgeQQ);
964         }
965         else
966 #endif
967         {
968             /* This construct is needed because pbc_dx_aiuc doesn't accept pbc=NULL */
969             t_pbc        pbc_no;
970             const t_pbc* pbc_nonnull;
971
972             if (pbc != nullptr)
973             {
974                 pbc_nonnull = pbc;
975             }
976             else
977             {
978                 set_pbc(&pbc_no, PbcType::No, nullptr);
979                 pbc_nonnull = &pbc_no;
980             }
981
982             do_pairs_simple<real, 1, const t_pbc*>(
983                     nbonds, iatoms, iparams, x, f, pbc_nonnull, chargeA, fr->ic->epsfac * fr->fudgeQQ);
984         }
985     }
986     else if (stepWork.computeVirial)
987     {
988         do_pairs_general<BondedKernelFlavor::ForcesAndVirialAndEnergy>(ftype,
989                                                                        nbonds,
990                                                                        iatoms,
991                                                                        iparams,
992                                                                        x,
993                                                                        f,
994                                                                        fshift,
995                                                                        pbc,
996                                                                        lambda,
997                                                                        dvdl,
998                                                                        chargeA,
999                                                                        chargeB,
1000                                                                        atomIsPerturbed,
1001                                                                        cENER,
1002                                                                        numEnergyGroups,
1003                                                                        fr,
1004                                                                        grppener,
1005                                                                        global_atom_index);
1006     }
1007     else
1008     {
1009         do_pairs_general<BondedKernelFlavor::ForcesAndEnergy>(ftype,
1010                                                               nbonds,
1011                                                               iatoms,
1012                                                               iparams,
1013                                                               x,
1014                                                               f,
1015                                                               fshift,
1016                                                               pbc,
1017                                                               lambda,
1018                                                               dvdl,
1019                                                               chargeA,
1020                                                               chargeB,
1021                                                               atomIsPerturbed,
1022                                                               cENER,
1023                                                               numEnergyGroups,
1024                                                               fr,
1025                                                               grppener,
1026                                                               global_atom_index);
1027     }
1028 }