2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
39 static_assert(UNROLLI == c_nbnxnCpuIClusterSize, "UNROLLI should match the i-cluster size");
41 /* We could use nbat->xstride and nbat->fstride, but macros might be faster */
44 /* Local i-atom buffer strides */
49 /* All functionality defines are set here, except for:
50 * CALC_ENERGIES, ENERGY_GROUPS which are defined before.
51 * CHECK_EXCLS, which is set just before including the inner loop contents.
54 /* We always calculate shift forces, because it's cheap anyhow */
55 #define CALC_SHIFTFORCES
58 #define NBK_FUNC_NAME2(ljt, feg) nbnxn_kernel ## _ElecRF ## ljt ## feg ## _ref
61 #ifndef VDW_CUTOFF_CHECK
62 #define NBK_FUNC_NAME2(ljt, feg) nbnxn_kernel ## _ElecQSTab ## ljt ## feg ## _ref
64 #define NBK_FUNC_NAME2(ljt, feg) nbnxn_kernel ## _ElecQSTabTwinCut ## ljt ## feg ## _ref
68 #if defined LJ_CUT && !defined LJ_EWALD
69 #define NBK_FUNC_NAME(feg) NBK_FUNC_NAME2(_VdwLJ, feg)
70 #elif defined LJ_FORCE_SWITCH
71 #define NBK_FUNC_NAME(feg) NBK_FUNC_NAME2(_VdwLJFsw, feg)
72 #elif defined LJ_POT_SWITCH
73 #define NBK_FUNC_NAME(feg) NBK_FUNC_NAME2(_VdwLJPsw, feg)
74 #elif defined LJ_EWALD
75 #ifdef LJ_EWALD_COMB_GEOM
76 #define NBK_FUNC_NAME(feg) NBK_FUNC_NAME2(_VdwLJEwCombGeom, feg)
78 #define NBK_FUNC_NAME(feg) NBK_FUNC_NAME2(_VdwLJEwCombLB, feg)
81 #error "No VdW type defined"
86 NBK_FUNC_NAME(_F) // NOLINT(misc-definitions-in-headers)
89 NBK_FUNC_NAME(_VF) // NOLINT(misc-definitions-in-headers)
91 NBK_FUNC_NAME(_VgrpF) // NOLINT(misc-definitions-in-headers)
96 (const NbnxnPairlistCpu *nbl,
97 const nbnxn_atomdata_t *nbat,
98 const interaction_const_t *ic,
99 const rvec *shift_vec,
100 nbnxn_atomdata_output_t *out)
102 /* Unpack pointers for output */
103 real *f = out->f.data();
104 #ifdef CALC_SHIFTFORCES
105 real *fshift = out->fshift.data();
108 real *Vvdw = out->Vvdw.data();
109 real *Vc = out->Vc.data();
112 const nbnxn_cj_t *l_cj;
114 #ifdef VDW_CUTOFF_CHECK
121 gmx_bool do_LJ, half_LJ, do_coul;
122 int cjind0, cjind1, cjind;
124 real xi[UNROLLI*XI_STRIDE];
125 real fi[UNROLLI*FI_STRIDE];
129 #ifndef ENERGY_GROUPS
134 int egp_sh_i[UNROLLI];
138 real swV3, swV4, swV5;
139 real swF2, swF3, swF4;
142 real lje_coeff2, lje_coeff6_6;
159 const real *tab_coul_FDV0;
161 const real *tab_coul_F;
162 const real gmx_unused *tab_coul_V;
171 swV3 = ic->vdw_switch.c3;
172 swV4 = ic->vdw_switch.c4;
173 swV5 = ic->vdw_switch.c5;
174 swF2 = 3*ic->vdw_switch.c3;
175 swF3 = 4*ic->vdw_switch.c4;
176 swF4 = 5*ic->vdw_switch.c5;
179 const nbnxn_atomdata_t::Params &nbatParams = nbat->params();
182 lje_coeff2 = ic->ewaldcoeff_lj*ic->ewaldcoeff_lj;
183 lje_coeff6_6 = lje_coeff2*lje_coeff2*lje_coeff2/6.0;
185 lje_vc = ic->sh_lj_ewald;
188 const real *ljc = nbatParams.nbfp_comb.data();
199 const real tab_coul_scale = ic->coulombEwaldTables->scale;
201 halfsp = 0.5/tab_coul_scale;
205 tab_coul_FDV0 = ic->coulombEwaldTables->tableFDV0.data();
207 tab_coul_F = ic->coulombEwaldTables->tableF.data();
208 tab_coul_V = ic->coulombEwaldTables->tableV.data();
213 egp_mask = (1 << nbatParams.neg_2log) - 1;
217 rcut2 = ic->rcoulomb*ic->rcoulomb;
218 #ifdef VDW_CUTOFF_CHECK
219 rvdw2 = ic->rvdw*ic->rvdw;
222 ntype2 = nbatParams.numTypes*2;
223 const real *nbfp = nbatParams.nbfp.data();
224 const real *q = nbatParams.q.data();
225 const int *type = nbatParams.type.data();
227 const real *shiftvec = shift_vec[0];
228 const real *x = nbat->x().data();
230 l_cj = nbl->cj.data();
232 for (const nbnxn_ci_t &ciEntry : nbl->ci)
236 ish = (ciEntry.shift & NBNXN_CI_SHIFT);
237 /* x, f and fshift are assumed to be stored with stride 3 */
239 cjind0 = ciEntry.cj_ind_start;
240 cjind1 = ciEntry.cj_ind_end;
241 /* Currently only works super-cells equal to sub-cells */
243 ci_sh = (ish == CENTRAL ? ci : -1);
245 /* We have 5 LJ/C combinations, but use only three inner loops,
246 * as the other combinations are unlikely and/or not much faster:
247 * inner half-LJ + C for half-LJ + C / no-LJ + C
248 * inner LJ + C for full-LJ + C
249 * inner LJ for full-LJ + no-C / half-LJ + no-C
251 do_LJ = ((ciEntry.shift & NBNXN_CI_DO_LJ(0)) != 0);
252 do_coul = ((ciEntry.shift & NBNXN_CI_DO_COUL(0)) != 0);
253 half_LJ = (((ciEntry.shift & NBNXN_CI_HALF_LJ(0)) != 0) || !do_LJ) && do_coul;
257 gmx_bool do_self = TRUE;
259 gmx_bool do_self = do_coul;
262 #ifndef ENERGY_GROUPS
266 for (i = 0; i < UNROLLI; i++)
268 egp_sh_i[i] = ((nbatParams.energrp[ci] >> (i*nbatParams.neg_2log)) & egp_mask)*nbatParams.nenergrp;
273 for (i = 0; i < UNROLLI; i++)
275 for (d = 0; d < DIM; d++)
277 xi[i*XI_STRIDE+d] = x[(ci*UNROLLI+i)*X_STRIDE+d] + shiftvec[ishf+d];
278 fi[i*FI_STRIDE+d] = 0;
281 qi[i] = facel*q[ci*UNROLLI+i];
290 Vc_sub_self = 0.5*c_rf;
294 Vc_sub_self = 0.5*tab_coul_V[0];
296 Vc_sub_self = 0.5*tab_coul_FDV0[2];
300 if (l_cj[ciEntry.cj_ind_start].cj == ci_sh)
302 for (i = 0; i < UNROLLI; i++)
306 egp_ind = egp_sh_i[i] + ((nbatParams.energrp[ci] >> (i*nbatParams.neg_2log)) & egp_mask);
310 /* Coulomb self interaction */
311 Vc[egp_ind] -= qi[i]*q[ci*UNROLLI+i]*Vc_sub_self;
314 /* LJ Ewald self interaction */
315 Vvdw[egp_ind] += 0.5*nbatParams.nbfp[nbatParams.type[ci*UNROLLI+i]*(nbatParams.numTypes + 1)*2]/6*lje_coeff6_6;
320 #endif /* CALC_ENERGIES */
323 while (cjind < cjind1 && nbl->cj[cjind].excl != 0xffff)
330 #include "gromacs/nbnxm/kernels_reference/kernel_ref_inner.h"
337 #include "gromacs/nbnxm/kernels_reference/kernel_ref_inner.h"
342 #include "gromacs/nbnxm/kernels_reference/kernel_ref_inner.h"
348 for (; (cjind < cjind1); cjind++)
354 #include "gromacs/nbnxm/kernels_reference/kernel_ref_inner.h"
361 #include "gromacs/nbnxm/kernels_reference/kernel_ref_inner.h"
366 #include "gromacs/nbnxm/kernels_reference/kernel_ref_inner.h"
370 /* Add accumulated i-forces to the force array */
371 for (i = 0; i < UNROLLI; i++)
373 for (d = 0; d < DIM; d++)
375 f[(ci*UNROLLI+i)*F_STRIDE+d] += fi[i*FI_STRIDE+d];
378 #ifdef CALC_SHIFTFORCES
379 if (fshift != nullptr)
381 /* Add i forces to shifted force list */
382 for (i = 0; i < UNROLLI; i++)
384 for (d = 0; d < DIM; d++)
386 fshift[ishf+d] += fi[i*FI_STRIDE+d];
393 #ifndef ENERGY_GROUPS
401 printf("atom pairs %d\n", npair);
405 #undef CALC_SHIFTFORCES