2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 #define UNROLLI NBNXN_CPU_CLUSTER_I_SIZE
37 #define UNROLLJ NBNXN_CPU_CLUSTER_I_SIZE
39 /* We could use nbat->xstride and nbat->fstride, but macros might be faster */
42 /* Local i-atom buffer strides */
47 /* All functionality defines are set here, except for:
48 * CALC_ENERGIES, ENERGY_GROUPS which are defined before.
49 * CHECK_EXCLS, which is set just before including the inner loop contents.
52 /* We always calculate shift forces, because it's cheap anyhow */
53 #define CALC_SHIFTFORCES
56 #define NBK_FUNC_NAME(base, ene) base ## _rf_ ## ene
59 #ifndef VDW_CUTOFF_CHECK
60 #define NBK_FUNC_NAME(base, ene) base ## _tab_ ## ene
62 #define NBK_FUNC_NAME(base, ene) base ## _tab_twin_ ## ene
68 NBK_FUNC_NAME(nbnxn_kernel_ref, noener)
71 NBK_FUNC_NAME(nbnxn_kernel_ref, ener)
73 NBK_FUNC_NAME(nbnxn_kernel_ref, energrp)
77 (const nbnxn_pairlist_t *nbl,
78 const nbnxn_atomdata_t *nbat,
79 const interaction_const_t *ic,
82 #ifdef CALC_SHIFTFORCES
93 const nbnxn_ci_t *nbln;
94 const nbnxn_cj_t *l_cj;
101 #ifdef VDW_CUTOFF_CHECK
109 gmx_bool do_LJ, half_LJ, do_coul;
110 int cjind0, cjind1, cjind;
113 real xi[UNROLLI*XI_STRIDE];
114 real fi[UNROLLI*FI_STRIDE];
118 #ifndef ENERGY_GROUPS
123 int egp_sh_i[UNROLLI];
140 const real *tab_coul_FDV0;
142 const real *tab_coul_F;
143 const real *tab_coul_V;
154 sh_invrc6 = ic->sh_invrc6;
165 tabscale = ic->tabq_scale;
167 halfsp = 0.5/ic->tabq_scale;
171 tab_coul_FDV0 = ic->tabq_coul_FDV0;
173 tab_coul_F = ic->tabq_coul_F;
174 tab_coul_V = ic->tabq_coul_V;
179 egp_mask = (1<<nbat->neg_2log) - 1;
183 rcut2 = ic->rcoulomb*ic->rcoulomb;
184 #ifdef VDW_CUTOFF_CHECK
185 rvdw2 = ic->rvdw*ic->rvdw;
188 ntype2 = nbat->ntype*2;
193 shiftvec = shift_vec[0];
199 for (n = 0; n < nbl->nci; n++)
205 ish = (nbln->shift & NBNXN_CI_SHIFT);
206 /* x, f and fshift are assumed to be stored with stride 3 */
208 cjind0 = nbln->cj_ind_start;
209 cjind1 = nbln->cj_ind_end;
210 /* Currently only works super-cells equal to sub-cells */
212 ci_sh = (ish == CENTRAL ? ci : -1);
214 /* We have 5 LJ/C combinations, but use only three inner loops,
215 * as the other combinations are unlikely and/or not much faster:
216 * inner half-LJ + C for half-LJ + C / no-LJ + C
217 * inner LJ + C for full-LJ + C
218 * inner LJ for full-LJ + no-C / half-LJ + no-C
220 do_LJ = (nbln->shift & NBNXN_CI_DO_LJ(0));
221 do_coul = (nbln->shift & NBNXN_CI_DO_COUL(0));
222 half_LJ = ((nbln->shift & NBNXN_CI_HALF_LJ(0)) || !do_LJ) && do_coul;
225 #ifndef ENERGY_GROUPS
229 for (i = 0; i < UNROLLI; i++)
231 egp_sh_i[i] = ((nbat->energrp[ci]>>(i*nbat->neg_2log)) & egp_mask)*nbat->nenergrp;
236 for (i = 0; i < UNROLLI; i++)
238 for (d = 0; d < DIM; d++)
240 xi[i*XI_STRIDE+d] = x[(ci*UNROLLI+i)*X_STRIDE+d] + shiftvec[ishf+d];
241 fi[i*FI_STRIDE+d] = 0;
251 Vc_sub_self = 0.5*c_rf;
255 Vc_sub_self = 0.5*tab_coul_V[0];
257 Vc_sub_self = 0.5*tab_coul_FDV0[2];
262 for (i = 0; i < UNROLLI; i++)
264 qi[i] = facel*q[ci*UNROLLI+i];
267 if (l_cj[nbln->cj_ind_start].cj == ci_sh)
270 Vc[egp_sh_i[i]+((nbat->energrp[ci]>>(i*nbat->neg_2log)) & egp_mask)]
274 -= qi[i]*q[ci*UNROLLI+i]*Vc_sub_self;
281 while (cjind < cjind1 && nbl->cj[cjind].excl != 0xffff)
288 #include "nbnxn_kernel_ref_inner.h"
292 /* cppcheck-suppress duplicateBranch */
296 #include "nbnxn_kernel_ref_inner.h"
301 #include "nbnxn_kernel_ref_inner.h"
307 for (; (cjind < cjind1); cjind++)
313 #include "nbnxn_kernel_ref_inner.h"
317 /* cppcheck-suppress duplicateBranch */
321 #include "nbnxn_kernel_ref_inner.h"
326 #include "nbnxn_kernel_ref_inner.h"
329 ninner += cjind1 - cjind0;
331 /* Add accumulated i-forces to the force array */
332 for (i = 0; i < UNROLLI; i++)
334 for (d = 0; d < DIM; d++)
336 f[(ci*UNROLLI+i)*F_STRIDE+d] += fi[i*FI_STRIDE+d];
339 #ifdef CALC_SHIFTFORCES
342 /* Add i forces to shifted force list */
343 for (i = 0; i < UNROLLI; i++)
345 for (d = 0; d < DIM; d++)
347 fshift[ishf+d] += fi[i*FI_STRIDE+d];
354 #ifndef ENERGY_GROUPS
362 printf("atom pairs %d\n", npair);
366 #undef CALC_SHIFTFORCES