2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
5 * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
37 /* When calculating RF or Ewald interactions we calculate the electrostatic
38 * forces and energies on excluded atom pairs here in the non-bonded loops.
40 #if defined CHECK_EXCLS && (defined CALC_COULOMB || defined LJ_EWALD)
45 const int cj = l_cj[cjind].cj;
48 const int egp_cj = nbatParams.energrp[cj];
50 for (int i = 0; i < UNROLLI; i++)
52 const int ai = ci * UNROLLI + i;
54 const int type_i_off = type[ai] * ntype2;
56 for (int j = 0; j < UNROLLJ; j++)
58 real FrLJ6 = 0, FrLJ12 = 0, frLJ = 0;
60 /* A multiply mask used to zero an interaction
61 * when either the distance cutoff is exceeded, or
62 * (if appropriate) the i and j indices are
63 * unsuitable for this kind of inner loop. */
65 /* A multiply mask used to zero an interaction
66 * when that interaction should be excluded
67 * (e.g. because of bonding). */
68 const real interact = static_cast<real>((l_cj[cjind].excl >> (i * UNROLLI + j)) & 1);
70 real skipmask = interact;
72 real skipmask = (cj == ci_sh && j <= i) ? 0.0 : 1.0;
75 constexpr real interact = 1.0;
76 real skipmask = interact;
79 real gmx_unused VLJ = 0;
81 const int aj = cj * UNROLLJ + j;
83 const real dx = xi[i * XI_STRIDE + XX] - x[aj * X_STRIDE + XX];
84 const real dy = xi[i * XI_STRIDE + YY] - x[aj * X_STRIDE + YY];
85 const real dz = xi[i * XI_STRIDE + ZZ] - x[aj * X_STRIDE + ZZ];
87 real rsq = dx * dx + dy * dy + dz * dz;
89 /* Prepare to enforce the cut-off. */
90 skipmask = (rsq >= rcut2) ? 0 : skipmask;
91 /* 9 flops for r^2 + cut-off check */
93 // Ensure the distances do not fall below the limit where r^-12 overflows.
94 // This should never happen for normal interactions.
95 rsq = std::max(rsq, c_nbnxnMinDistanceSquared);
101 real rinv = gmx::invsqrt(rsq);
102 /* 5 flops for invsqrt */
104 /* Partially enforce the cut-off (and perhaps
105 * exclusions) to avoid possible overflow of
106 * rinvsix when computing LJ, and/or overflowing
107 * the Coulomb table during lookup. */
108 rinv = rinv * skipmask;
110 const real rinvsq = rinv * rinv;
116 const real c6 = nbfp[type_i_off + type[aj] * 2];
117 const real c12 = nbfp[type_i_off + type[aj] * 2 + 1];
119 #if defined LJ_CUT || defined LJ_FORCE_SWITCH || defined LJ_POT_SWITCH
120 real rinvsix = interact * rinvsq * rinvsq * rinvsq;
121 FrLJ6 = c6 * rinvsix;
122 FrLJ12 = c12 * rinvsix * rinvsix;
123 frLJ = FrLJ12 - FrLJ6;
124 /* 7 flops for r^-2 + LJ force */
125 # if defined CALC_ENERGIES || defined LJ_POT_SWITCH
126 VLJ = (FrLJ12 + c12 * ic->repulsion_shift.cpot) / 12
127 - (FrLJ6 + c6 * ic->dispersion_shift.cpot) / 6;
128 /* 7 flops for LJ energy */
132 #if defined LJ_FORCE_SWITCH || defined LJ_POT_SWITCH
133 /* Force or potential switching from ic->rvdw_switch */
135 real rsw = r - ic->rvdw_switch;
136 rsw = (rsw >= 0.0 ? rsw : 0.0);
138 #ifdef LJ_FORCE_SWITCH
139 frLJ += -c6 * (ic->dispersion_shift.c2 + ic->dispersion_shift.c3 * rsw) * rsw * rsw * r
140 + c12 * (ic->repulsion_shift.c2 + ic->repulsion_shift.c3 * rsw) * rsw * rsw * r;
141 # if defined CALC_ENERGIES
142 VLJ += -c6 * (-ic->dispersion_shift.c2 / 3 - ic->dispersion_shift.c3 / 4 * rsw)
144 + c12 * (-ic->repulsion_shift.c2 / 3 - ic->repulsion_shift.c3 / 4 * rsw)
149 #if defined CALC_ENERGIES || defined LJ_POT_SWITCH
150 /* Masking should be done after force switching,
151 * but before potential switching.
153 /* Need to zero the interaction if there should be exclusion. */
154 VLJ = VLJ * interact;
159 const real sw = 1.0 + (swV3 + (swV4 + swV5 * rsw) * rsw) * rsw * rsw * rsw;
160 const real dsw = (swF2 + (swF3 + swF4 * rsw) * rsw) * rsw * rsw;
162 frLJ = frLJ * sw - r * VLJ * dsw;
169 # ifdef LJ_EWALD_COMB_GEOM
170 const real c6grid = ljc[type[ai] * 2] * ljc[type[aj] * 2];
171 # elif defined LJ_EWALD_COMB_LB
174 /* These sigma and epsilon are scaled to give 6*C6 */
175 const real sigma = ljc[type[ai] * 2] + ljc[type[aj] * 2];
176 const real epsilon = ljc[type[ai] * 2 + 1] * ljc[type[aj] * 2 + 1];
178 const real sigma2 = sigma * sigma;
179 c6grid = epsilon * sigma2 * sigma2 * sigma2;
182 # error "No LJ Ewald combination rule defined"
186 /* Recalculate rinvsix without exclusion mask */
187 const real rinvsix_nm = rinvsq * rinvsq * rinvsq;
189 const real rinvsix_nm = rinvsix;
191 const real cr2 = lje_coeff2 * rsq;
193 const real expmcr2 = exp(-cr2);
195 const real expmcr2 = expf(-cr2);
197 const real poly = 1 + cr2 + 0.5 * cr2 * cr2;
199 /* Subtract the grid force from the total LJ force */
200 frLJ += c6grid * (rinvsix_nm - expmcr2 * (rinvsix_nm * poly + lje_coeff6_6));
201 # ifdef CALC_ENERGIES
202 /* Shift should only be applied to real LJ pairs */
203 const real sh_mask = lje_vc * interact;
205 VLJ += c6grid / 6 * (rinvsix_nm * (1 - expmcr2 * poly) + sh_mask);
208 #endif /* LJ_EWALD */
210 #ifdef VDW_CUTOFF_CHECK
211 /* Mask for VdW cut-off shorter than Coulomb cut-off */
213 real skipmask_rvdw = (rsq < rvdw2) ? 1.0 : 0.0;
214 frLJ *= skipmask_rvdw;
215 # ifdef CALC_ENERGIES
216 VLJ *= skipmask_rvdw;
220 # if defined CALC_ENERGIES
221 /* Need to zero the interaction if r >= rcut */
222 VLJ = VLJ * skipmask;
223 /* 1 more flop for LJ energy */
225 #endif /* VDW_CUTOFF_CHECK */
229 # ifdef ENERGY_GROUPS
230 Vvdw[egp_sh_i[i] + ((egp_cj >> (nbatParams.neg_2log * j)) & egp_mask)] += VLJ;
233 /* 1 flop for LJ energy addition */
239 /* Enforce the cut-off and perhaps exclusions. In
240 * those cases, rinv is zero because of skipmask,
241 * but fcoul and vcoul will later be non-zero (in
242 * both RF and table cases) because of the
243 * contributions that do not depend on rinv. These
244 * contributions cannot be allowed to accumulate
245 * to the force and potential, and the easiest way
246 * to do this is to zero the charges in
248 const real qq = skipmask * qi[i] * q[aj];
251 real fcoul = qq * (interact * rinv * rinvsq - k_rf2);
252 /* 4 flops for RF force */
253 # ifdef CALC_ENERGIES
254 real vcoul = qq * (interact * rinv + reactionFieldCoefficient * rsq - reactionFieldShift);
255 /* 4 flops for RF energy */
259 # ifdef CALC_COUL_TAB
260 const real rs = rsq * rinv * tab_coul_scale;
261 const int ri = int(rs);
262 const real frac = rs - static_cast<real>(ri);
264 /* fexcl = F_i + frac * (F_(i+1)-F_i) */
265 const real fexcl = tab_coul_FDV0[ri * 4] + frac * tab_coul_FDV0[ri * 4 + 1];
267 /* fexcl = (1-frac) * F_i + frac * F_(i+1) */
268 const real fexcl = (1 - frac) * tab_coul_F[ri] + frac * tab_coul_F[ri + 1];
270 real fcoul = interact * rinvsq - fexcl;
271 /* 7 flops for float 1/r-table force */
272 # ifdef CALC_ENERGIES
276 * (interact * (rinv - ic->sh_ewald)
277 - (tab_coul_FDV0[ri * 4 + 2] - halfsp * frac * (tab_coul_FDV0[ri * 4] + fexcl)));
278 /* 7 flops for float 1/r-table energy (8 with excls) */
281 * (interact * (rinv - ic->sh_ewald)
282 - (tab_coul_V[ri] - halfsp * frac * (tab_coul_F[ri] + fexcl)));
288 # ifdef CALC_ENERGIES
289 # ifdef ENERGY_GROUPS
290 Vc[egp_sh_i[i] + ((egp_cj >> (nbatParams.neg_2log * j)) & egp_mask)] += vcoul;
293 /* 1 flop for Coulomb energy addition */
299 static constexpr bool sc_halfLJ =
305 /* 2 flops for scalar LJ+Coulomb force if !HALF_LJ || (i < UNROLLI / 2) */
306 const real fscal = (!sc_halfLJ || (i < UNROLLI / 2)) ? frLJ * rinvsq + fcoul : fcoul;
308 const real fscal = frLJ * rinvsq;
310 const real fx = fscal * dx;
311 const real fy = fscal * dy;
312 const real fz = fscal * dz;
314 /* Increment i-atom force */
315 fi[i * FI_STRIDE + XX] += fx;
316 fi[i * FI_STRIDE + YY] += fy;
317 fi[i * FI_STRIDE + ZZ] += fz;
318 /* Decrement j-atom force */
319 f[aj * F_STRIDE + XX] -= fx;
320 f[aj * F_STRIDE + YY] -= fy;
321 f[aj * F_STRIDE + ZZ] -= fz;
322 /* 9 flops for force addition */