2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013, by the GROMACS development team, led by
5 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
6 * others, as listed in the AUTHORS file in the top-level source
7 * directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * Note: this file was generated by the Verlet kernel generator for
46 #ifdef GMX_NBNXN_SIMD_2XNN
48 /* Include the full-width SIMD macros */
50 #include "gmx_simd_macros.h"
51 #include "gmx_simd_vec.h"
52 #if !(GMX_SIMD_WIDTH_HERE == 8 || GMX_SIMD_WIDTH_HERE == 16)
53 #error "unsupported SIMD width"
56 #define GMX_SIMD_J_UNROLL_SIZE 2
57 #include "nbnxn_kernel_simd_2xnn.h"
58 #include "../nbnxn_kernel_common.h"
59 #include "gmx_omp_nthreads.h"
60 #include "types/force_flags.h"
62 /*! \brief Kinds of electrostatic treatments in SIMD Verlet kernels
65 coultRF, coultTAB, coultTAB_TWIN, coultEWALD, coultEWALD_TWIN, coultNR
68 /* Declare and define the kernel function pointer lookup tables. */
69 static p_nbk_func_ener p_nbk_ener[coultNR][ljcrNR] =
72 nbnxn_kernel_simd_2xnn_rf_comb_geom_ener,
73 nbnxn_kernel_simd_2xnn_rf_comb_lb_ener,
74 nbnxn_kernel_simd_2xnn_rf_comb_none_ener,
77 nbnxn_kernel_simd_2xnn_tab_comb_geom_ener,
78 nbnxn_kernel_simd_2xnn_tab_comb_lb_ener,
79 nbnxn_kernel_simd_2xnn_tab_comb_none_ener,
82 nbnxn_kernel_simd_2xnn_tab_twin_comb_geom_ener,
83 nbnxn_kernel_simd_2xnn_tab_twin_comb_lb_ener,
84 nbnxn_kernel_simd_2xnn_tab_twin_comb_none_ener,
87 nbnxn_kernel_simd_2xnn_ewald_comb_geom_ener,
88 nbnxn_kernel_simd_2xnn_ewald_comb_lb_ener,
89 nbnxn_kernel_simd_2xnn_ewald_comb_none_ener,
92 nbnxn_kernel_simd_2xnn_ewald_twin_comb_geom_ener,
93 nbnxn_kernel_simd_2xnn_ewald_twin_comb_lb_ener,
94 nbnxn_kernel_simd_2xnn_ewald_twin_comb_none_ener,
98 static p_nbk_func_ener p_nbk_energrp[coultNR][ljcrNR] =
101 nbnxn_kernel_simd_2xnn_rf_comb_geom_energrp,
102 nbnxn_kernel_simd_2xnn_rf_comb_lb_energrp,
103 nbnxn_kernel_simd_2xnn_rf_comb_none_energrp,
106 nbnxn_kernel_simd_2xnn_tab_comb_geom_energrp,
107 nbnxn_kernel_simd_2xnn_tab_comb_lb_energrp,
108 nbnxn_kernel_simd_2xnn_tab_comb_none_energrp,
111 nbnxn_kernel_simd_2xnn_tab_twin_comb_geom_energrp,
112 nbnxn_kernel_simd_2xnn_tab_twin_comb_lb_energrp,
113 nbnxn_kernel_simd_2xnn_tab_twin_comb_none_energrp,
116 nbnxn_kernel_simd_2xnn_ewald_comb_geom_energrp,
117 nbnxn_kernel_simd_2xnn_ewald_comb_lb_energrp,
118 nbnxn_kernel_simd_2xnn_ewald_comb_none_energrp,
121 nbnxn_kernel_simd_2xnn_ewald_twin_comb_geom_energrp,
122 nbnxn_kernel_simd_2xnn_ewald_twin_comb_lb_energrp,
123 nbnxn_kernel_simd_2xnn_ewald_twin_comb_none_energrp,
127 static p_nbk_func_noener p_nbk_noener[coultNR][ljcrNR] =
130 nbnxn_kernel_simd_2xnn_rf_comb_geom_noener,
131 nbnxn_kernel_simd_2xnn_rf_comb_lb_noener,
132 nbnxn_kernel_simd_2xnn_rf_comb_none_noener,
135 nbnxn_kernel_simd_2xnn_tab_comb_geom_noener,
136 nbnxn_kernel_simd_2xnn_tab_comb_lb_noener,
137 nbnxn_kernel_simd_2xnn_tab_comb_none_noener,
140 nbnxn_kernel_simd_2xnn_tab_twin_comb_geom_noener,
141 nbnxn_kernel_simd_2xnn_tab_twin_comb_lb_noener,
142 nbnxn_kernel_simd_2xnn_tab_twin_comb_none_noener,
145 nbnxn_kernel_simd_2xnn_ewald_comb_geom_noener,
146 nbnxn_kernel_simd_2xnn_ewald_comb_lb_noener,
147 nbnxn_kernel_simd_2xnn_ewald_comb_none_noener,
150 nbnxn_kernel_simd_2xnn_ewald_twin_comb_geom_noener,
151 nbnxn_kernel_simd_2xnn_ewald_twin_comb_lb_noener,
152 nbnxn_kernel_simd_2xnn_ewald_twin_comb_none_noener,
158 reduce_group_energies(int ng, int ng_2log,
159 const real *VSvdw, const real *VSc,
160 real *Vvdw, real *Vc)
162 const int unrollj = GMX_SIMD_WIDTH_HERE/GMX_SIMD_J_UNROLL_SIZE;
163 const int unrollj_half = unrollj/2;
164 int ng_p2, i, j, j0, j1, c, s;
166 ng_p2 = (1<<ng_2log);
168 /* The size of the x86 SIMD energy group buffer array is:
169 * ng*ng*ng_p2*unrollj_half*simd_width
171 for (i = 0; i < ng; i++)
173 for (j = 0; j < ng; j++)
179 for (j1 = 0; j1 < ng; j1++)
181 for (j0 = 0; j0 < ng; j0++)
183 c = ((i*ng + j1)*ng_p2 + j0)*unrollj_half*unrollj;
184 for (s = 0; s < unrollj_half; s++)
186 Vvdw[i*ng+j0] += VSvdw[c+0];
187 Vvdw[i*ng+j1] += VSvdw[c+1];
188 Vc [i*ng+j0] += VSc [c+0];
189 Vc [i*ng+j1] += VSc [c+1];
197 #else /* GMX_NBNXN_SIMD_2XNN */
199 #include "gmx_fatal.h"
201 #endif /* GMX_NBNXN_SIMD_2XNN */
204 nbnxn_kernel_simd_2xnn(nbnxn_pairlist_set_t gmx_unused *nbl_list,
205 const nbnxn_atomdata_t gmx_unused *nbat,
206 const interaction_const_t gmx_unused *ic,
207 int gmx_unused ewald_excl,
208 rvec gmx_unused *shift_vec,
209 int gmx_unused force_flags,
210 int gmx_unused clearF,
211 real gmx_unused *fshift,
213 real gmx_unused *Vvdw)
214 #ifdef GMX_NBNXN_SIMD_2XNN
217 nbnxn_pairlist_t **nbl;
221 nnbl = nbl_list->nnbl;
224 if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
230 if (ewald_excl == ewaldexclTable)
232 if (ic->rcoulomb == ic->rvdw)
238 coult = coultTAB_TWIN;
243 if (ic->rcoulomb == ic->rvdw)
249 coult = coultEWALD_TWIN;
254 #pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded))
255 for (nb = 0; nb < nnbl; nb++)
257 nbnxn_atomdata_output_t *out;
260 out = &nbat->out[nb];
262 if (clearF == enbvClearFYes)
264 clear_f(nbat, nb, out->f);
267 if ((force_flags & GMX_FORCE_VIRIAL) && nnbl == 1)
273 fshift_p = out->fshift;
275 if (clearF == enbvClearFYes)
277 clear_fshift(fshift_p);
281 if (!(force_flags & GMX_FORCE_ENERGY))
283 /* Don't calculate energies */
284 p_nbk_noener[coult][nbat->comb_rule](nbl[nb], nbat,
290 else if (out->nV == 1)
292 /* No energy groups */
296 p_nbk_ener[coult][nbat->comb_rule](nbl[nb], nbat,
306 /* Calculate energy group contributions */
309 for (i = 0; i < out->nVS; i++)
313 for (i = 0; i < out->nVS; i++)
318 p_nbk_energrp[coult][nbat->comb_rule](nbl[nb], nbat,
326 reduce_group_energies(nbat->nenergrp, nbat->neg_2log,
327 out->VSvdw, out->VSc,
332 if (force_flags & GMX_FORCE_ENERGY)
334 reduce_energies_over_lists(nbat, nnbl, Vvdw, Vc);
339 gmx_incons("nbnxn_kernel_simd_2xnn called when such kernels "
340 " are not enabled.");
343 #undef GMX_SIMD_J_UNROLL_SIZE