1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2012, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
33 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
45 #include "gmx_omp_nthreads.h"
46 #include "../nbnxn_consts.h"
47 #include "nbnxn_kernel_common.h"
51 #include "nbnxn_kernel_x86_simd128.h"
53 /* Include all flavors of the 128-bit SSE or AVX kernel loops */
55 #define GMX_MM128_HERE
57 /* Analytical reaction-field kernels */
60 #include "nbnxn_kernel_x86_simd_includes.h"
64 /* Tabulated exclusion interaction electrostatics kernels */
67 /* Single cut-off: rcoulomb = rvdw */
68 #include "nbnxn_kernel_x86_simd_includes.h"
70 /* Twin cut-off: rcoulomb >= rvdw */
71 #define VDW_CUTOFF_CHECK
72 #include "nbnxn_kernel_x86_simd_includes.h"
73 #undef VDW_CUTOFF_CHECK
77 /* Analytical Ewald exclusion interaction electrostatics kernels */
78 #define CALC_COUL_EWALD
80 /* Single cut-off: rcoulomb = rvdw */
81 #include "nbnxn_kernel_x86_simd_includes.h"
83 /* Twin cut-off: rcoulomb >= rvdw */
84 #define VDW_CUTOFF_CHECK
85 #include "nbnxn_kernel_x86_simd_includes.h"
86 #undef VDW_CUTOFF_CHECK
88 #undef CALC_COUL_EWALD
91 typedef void (*p_nbk_func_ener)(const nbnxn_pairlist_t *nbl,
92 const nbnxn_atomdata_t *nbat,
93 const interaction_const_t *ic,
100 typedef void (*p_nbk_func_noener)(const nbnxn_pairlist_t *nbl,
101 const nbnxn_atomdata_t *nbat,
102 const interaction_const_t *ic,
107 enum { coultRF, coultTAB, coultTAB_TWIN, coultEWALD, coultEWALD_TWIN, coultNR };
109 #define NBK_FN(elec,ljcomb) nbnxn_kernel_x86_simd128_##elec##_comb_##ljcomb##_ener
110 static p_nbk_func_ener p_nbk_ener[coultNR][ljcrNR] =
111 { { NBK_FN(rf ,geom), NBK_FN(rf ,lb), NBK_FN(rf ,none) },
112 { NBK_FN(tab ,geom), NBK_FN(tab ,lb), NBK_FN(tab ,none) },
113 { NBK_FN(tab_twin ,geom), NBK_FN(tab_twin ,lb), NBK_FN(tab_twin ,none) },
114 { NBK_FN(ewald ,geom), NBK_FN(ewald ,lb), NBK_FN(ewald ,none) },
115 { NBK_FN(ewald_twin,geom), NBK_FN(ewald_twin,lb), NBK_FN(ewald_twin,none) } };
118 #define NBK_FN(elec,ljcomb) nbnxn_kernel_x86_simd128_##elec##_comb_##ljcomb##_energrp
119 static p_nbk_func_ener p_nbk_energrp[coultNR][ljcrNR] =
120 { { NBK_FN(rf ,geom), NBK_FN(rf ,lb), NBK_FN(rf ,none) },
121 { NBK_FN(tab ,geom), NBK_FN(tab ,lb), NBK_FN(tab ,none) },
122 { NBK_FN(tab_twin ,geom), NBK_FN(tab_twin ,lb), NBK_FN(tab_twin ,none) },
123 { NBK_FN(ewald ,geom), NBK_FN(ewald ,lb), NBK_FN(ewald ,none) },
124 { NBK_FN(ewald_twin,geom), NBK_FN(ewald_twin,lb), NBK_FN(ewald_twin,none) } };
127 #define NBK_FN(elec,ljcomb) nbnxn_kernel_x86_simd128_##elec##_comb_##ljcomb##_noener
128 static p_nbk_func_noener p_nbk_noener[coultNR][ljcrNR] =
129 { { NBK_FN(rf ,geom), NBK_FN(rf ,lb), NBK_FN(rf ,none) },
130 { NBK_FN(tab ,geom), NBK_FN(tab ,lb), NBK_FN(tab ,none) },
131 { NBK_FN(tab_twin ,geom), NBK_FN(tab_twin ,lb), NBK_FN(tab_twin ,none) },
132 { NBK_FN(ewald ,geom), NBK_FN(ewald ,lb), NBK_FN(ewald ,none) },
133 { NBK_FN(ewald_twin,geom), NBK_FN(ewald_twin,lb), NBK_FN(ewald_twin,none) } };
137 static void reduce_group_energies(int ng,int ng_2log,
138 const real *VSvdw,const real *VSc,
141 int ng_p2,i,j,j0,j1,c,s;
143 #define SIMD_WIDTH (GMX_X86_SIMD_WIDTH_HERE)
144 #define SIMD_WIDTH_HALF (GMX_X86_SIMD_WIDTH_HERE/2)
146 ng_p2 = (1<<ng_2log);
148 /* The size of the x86 SIMD energy group buffer array is:
149 * ng*ng*ng_p2*SIMD_WIDTH_HALF*SIMD_WIDTH
159 for(j1=0; j1<ng; j1++)
161 for(j0=0; j0<ng; j0++)
163 c = ((i*ng + j1)*ng_p2 + j0)*SIMD_WIDTH_HALF*SIMD_WIDTH;
164 for(s=0; s<SIMD_WIDTH_HALF; s++)
166 Vvdw[i*ng+j0] += VSvdw[c+0];
167 Vvdw[i*ng+j1] += VSvdw[c+1];
168 Vc [i*ng+j0] += VSc [c+0];
169 Vc [i*ng+j1] += VSc [c+1];
177 #endif /* GMX_X86_SSE2 */
180 nbnxn_kernel_x86_simd128(nbnxn_pairlist_set_t *nbl_list,
181 const nbnxn_atomdata_t *nbat,
182 const interaction_const_t *ic,
193 nbnxn_pairlist_t **nbl;
197 nnbl = nbl_list->nnbl;
200 if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
206 if (ewald_excl == ewaldexclTable)
208 if (ic->rcoulomb == ic->rvdw)
214 coult = coultTAB_TWIN;
219 if (ic->rcoulomb == ic->rvdw)
225 coult = coultEWALD_TWIN;
230 #pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded))
231 for(nb=0; nb<nnbl; nb++)
233 nbnxn_atomdata_output_t *out;
236 out = &nbat->out[nb];
238 if (clearF == enbvClearFYes)
240 clear_f(nbat,out->f);
243 if ((force_flags & GMX_FORCE_VIRIAL) && nnbl == 1)
249 fshift_p = out->fshift;
251 if (clearF == enbvClearFYes)
253 clear_fshift(fshift_p);
257 /* With Ewald type electrostatics we the forces for excluded atom pairs
258 * should not contribute to the virial sum. The exclusion forces
259 * are not calculate in the energy kernels, but are in _noener.
261 if (!((force_flags & GMX_FORCE_ENERGY) ||
262 (EEL_FULL(ic->eeltype) && (force_flags & GMX_FORCE_VIRIAL))))
264 /* Don't calculate energies */
265 p_nbk_noener[coult][nbat->comb_rule](nbl[nb],nbat,
271 else if (out->nV == 1 || !(force_flags & GMX_FORCE_ENERGY))
273 /* No energy groups */
277 p_nbk_ener[coult][nbat->comb_rule](nbl[nb],nbat,
287 /* Calculate energy group contributions */
290 for(i=0; i<out->nVS; i++)
294 for(i=0; i<out->nVS; i++)
299 p_nbk_energrp[coult][nbat->comb_rule](nbl[nb],nbat,
307 reduce_group_energies(nbat->nenergrp,nbat->neg_2log,
313 if (force_flags & GMX_FORCE_ENERGY)
315 reduce_energies_over_lists(nbat,nnbl,Vvdw,Vc);
320 gmx_incons("nbnxn_kernel_x86_simd128 called while GROMACS was configured without SSE enabled");