2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2017 by the GROMACS development team.
5 * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
40 static_assert(UNROLLI == c_nbnxnCpuIClusterSize, "UNROLLI should match the i-cluster size");
42 /* We could use nbat->xstride and nbat->fstride, but macros might be faster */
45 /* Local i-atom buffer strides */
50 /* All functionality defines are set here, except for:
51 * CALC_ENERGIES, ENERGY_GROUPS which are defined before.
52 * CHECK_EXCLS, which is set just before including the inner loop contents.
55 /* We always calculate shift forces, because it's cheap anyhow */
56 #define CALC_SHIFTFORCES
59 # define NBK_FUNC_NAME2(ljt, feg) nbnxn_kernel##_ElecRF##ljt##feg##_ref
62 # ifndef VDW_CUTOFF_CHECK
63 # define NBK_FUNC_NAME2(ljt, feg) nbnxn_kernel##_ElecQSTab##ljt##feg##_ref
65 # define NBK_FUNC_NAME2(ljt, feg) nbnxn_kernel##_ElecQSTabTwinCut##ljt##feg##_ref
69 #if defined LJ_CUT && !defined LJ_EWALD
70 # define NBK_FUNC_NAME(feg) NBK_FUNC_NAME2(_VdwLJ, feg)
71 #elif defined LJ_FORCE_SWITCH
72 # define NBK_FUNC_NAME(feg) NBK_FUNC_NAME2(_VdwLJFsw, feg)
73 #elif defined LJ_POT_SWITCH
74 # define NBK_FUNC_NAME(feg) NBK_FUNC_NAME2(_VdwLJPsw, feg)
75 #elif defined LJ_EWALD
76 # ifdef LJ_EWALD_COMB_GEOM
77 # define NBK_FUNC_NAME(feg) NBK_FUNC_NAME2(_VdwLJEwCombGeom, feg)
79 # define NBK_FUNC_NAME(feg) NBK_FUNC_NAME2(_VdwLJEwCombLB, feg)
82 # error "No VdW type defined"
87 NBK_FUNC_NAME(_F) // NOLINT(misc-definitions-in-headers)
89 # ifndef ENERGY_GROUPS
90 NBK_FUNC_NAME(_VF) // NOLINT(misc-definitions-in-headers)
92 NBK_FUNC_NAME(_VgrpF) // NOLINT(misc-definitions-in-headers)
97 (const NbnxnPairlistCpu* nbl,
98 const nbnxn_atomdata_t* nbat,
99 const interaction_const_t* ic,
100 const rvec* shift_vec,
101 nbnxn_atomdata_output_t* out)
103 /* Unpack pointers for output */
104 real* f = out->f.data();
105 #ifdef CALC_SHIFTFORCES
106 real* fshift = out->fshift.data();
109 real* Vvdw = out->Vvdw.data();
110 real* Vc = out->Vc.data();
113 real xi[UNROLLI * XI_STRIDE];
114 real fi[UNROLLI * FI_STRIDE];
122 const real swV3 = ic->vdw_switch.c3;
123 const real swV4 = ic->vdw_switch.c4;
124 const real swV5 = ic->vdw_switch.c5;
125 const real swF2 = 3 * ic->vdw_switch.c3;
126 const real swF3 = 4 * ic->vdw_switch.c4;
127 const real swF4 = 5 * ic->vdw_switch.c5;
130 const nbnxn_atomdata_t::Params& nbatParams = nbat->params();
133 const real lje_coeff2 = ic->ewaldcoeff_lj * ic->ewaldcoeff_lj;
134 const real lje_coeff6_6 = lje_coeff2 * lje_coeff2 * lje_coeff2 / 6.0;
135 # ifdef CALC_ENERGIES
136 const real lje_vc = ic->sh_lj_ewald;
139 const real* ljc = nbatParams.nbfp_comb.data();
143 const real k_rf2 = 2 * ic->k_rf;
144 # ifdef CALC_ENERGIES
145 const real k_rf = ic->k_rf;
146 const real c_rf = ic->c_rf;
150 const real tab_coul_scale = ic->coulombEwaldTables->scale;
151 # ifdef CALC_ENERGIES
152 const real halfsp = 0.5 / tab_coul_scale;
156 const real* tab_coul_FDV0 = ic->coulombEwaldTables->tableFDV0.data();
158 const real* tab_coul_F = ic->coulombEwaldTables->tableF.data();
159 # ifdef CALC_ENERGIES
160 const real* tab_coul_V = ic->coulombEwaldTables->tableV.data();
166 const int egp_mask = (1 << nbatParams.neg_2log) - 1;
170 const real rcut2 = ic->rcoulomb * ic->rcoulomb;
171 #ifdef VDW_CUTOFF_CHECK
172 const real rvdw2 = ic->rvdw * ic->rvdw;
175 const int ntype2 = nbatParams.numTypes * 2;
176 const real* nbfp = nbatParams.nbfp.data();
177 const real* q = nbatParams.q.data();
178 const int* type = nbatParams.type.data();
179 const real facel = ic->epsfac;
180 const real* shiftvec = shift_vec[0];
181 const real* x = nbat->x().data();
183 const nbnxn_cj_t* l_cj = nbl->cj.data();
185 for (const nbnxn_ci_t& ciEntry : nbl->ci)
187 const int ish = (ciEntry.shift & NBNXN_CI_SHIFT);
188 /* x, f and fshift are assumed to be stored with stride 3 */
189 const int ishf = ish * DIM;
190 const int cjind0 = ciEntry.cj_ind_start;
191 const int cjind1 = ciEntry.cj_ind_end;
192 /* Currently only works super-cells equal to sub-cells */
193 const int ci = ciEntry.ci;
194 const int ci_sh = (ish == CENTRAL ? ci : -1);
196 /* We have 5 LJ/C combinations, but use only three inner loops,
197 * as the other combinations are unlikely and/or not much faster:
198 * inner half-LJ + C for half-LJ + C / no-LJ + C
199 * inner LJ + C for full-LJ + C
200 * inner LJ for full-LJ + no-C / half-LJ + no-C
202 const bool do_LJ = ((ciEntry.shift & NBNXN_CI_DO_LJ(0)) != 0);
203 const bool do_coul = ((ciEntry.shift & NBNXN_CI_DO_COUL(0)) != 0);
204 const bool half_LJ = (((ciEntry.shift & NBNXN_CI_HALF_LJ(0)) != 0) || !do_LJ) && do_coul;
208 const bool do_self = true;
210 const bool do_self = do_coul;
213 # ifndef ENERGY_GROUPS
217 int egp_sh_i[UNROLLI];
218 for (int i = 0; i < UNROLLI; i++)
220 egp_sh_i[i] = ((nbatParams.energrp[ci] >> (i * nbatParams.neg_2log)) & egp_mask)
221 * nbatParams.nenergrp;
226 for (int i = 0; i < UNROLLI; i++)
228 for (int d = 0; d < DIM; d++)
230 xi[i * XI_STRIDE + d] = x[(ci * UNROLLI + i) * X_STRIDE + d] + shiftvec[ishf + d];
231 fi[i * FI_STRIDE + d] = 0;
234 qi[i] = facel * q[ci * UNROLLI + i];
241 const real Vc_sub_self = 0.5 * c_rf;
243 # ifdef CALC_COUL_TAB
245 const real Vc_sub_self = 0.5 * tab_coul_V[0];
247 const real Vc_sub_self = 0.5 * tab_coul_FDV0[2];
251 if (l_cj[ciEntry.cj_ind_start].cj == ci_sh)
253 for (int i = 0; i < UNROLLI; i++)
255 # ifdef ENERGY_GROUPS
257 egp_sh_i[i] + ((nbatParams.energrp[ci] >> (i * nbatParams.neg_2log)) & egp_mask);
259 const int egp_ind = 0;
261 /* Coulomb self interaction */
262 Vc[egp_ind] -= qi[i] * q[ci * UNROLLI + i] * Vc_sub_self;
265 /* LJ Ewald self interaction */
268 * nbatParams.nbfp[nbatParams.type[ci * UNROLLI + i] * (nbatParams.numTypes + 1) * 2]
274 #endif /* CALC_ENERGIES */
277 while (cjind < cjind1 && nbl->cj[cjind].excl != 0xffff)
284 #include "kernel_ref_inner.h"
291 #include "kernel_ref_inner.h"
296 #include "kernel_ref_inner.h"
302 for (; (cjind < cjind1); cjind++)
308 #include "kernel_ref_inner.h"
315 #include "kernel_ref_inner.h"
320 #include "kernel_ref_inner.h"
324 /* Add accumulated i-forces to the force array */
325 for (int i = 0; i < UNROLLI; i++)
327 for (int d = 0; d < DIM; d++)
329 f[(ci * UNROLLI + i) * F_STRIDE + d] += fi[i * FI_STRIDE + d];
332 #ifdef CALC_SHIFTFORCES
333 if (fshift != nullptr)
335 /* Add i forces to shifted force list */
336 for (int i = 0; i < UNROLLI; i++)
338 for (int d = 0; d < DIM; d++)
340 fshift[ishf + d] += fi[i * FI_STRIDE + d];
347 # ifndef ENERGY_GROUPS
355 printf("atom pairs %d\n", npair);
359 #undef CALC_SHIFTFORCES