1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2012, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
33 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
45 #include "gmx_omp_nthreads.h"
46 #include "../nbnxn_consts.h"
47 #include "nbnxn_kernel_common.h"
51 #include "nbnxn_kernel_x86_simd128.h"
53 /* Include all flavors of the 128-bit SSE or AVX kernel loops */
55 #define GMX_MM128_HERE
57 /* Analytical reaction-field kernels */
60 #include "nbnxn_kernel_x86_simd_includes.h"
64 /* Tabulated exclusion interaction electrostatics kernels */
67 /* Single cut-off: rcoulomb = rvdw */
68 #include "nbnxn_kernel_x86_simd_includes.h"
70 /* Twin cut-off: rcoulomb >= rvdw */
71 #define VDW_CUTOFF_CHECK
72 #include "nbnxn_kernel_x86_simd_includes.h"
73 #undef VDW_CUTOFF_CHECK
78 typedef void (*p_nbk_func_ener)(const nbnxn_pairlist_t *nbl,
79 const nbnxn_atomdata_t *nbat,
80 const interaction_const_t *ic,
87 typedef void (*p_nbk_func_noener)(const nbnxn_pairlist_t *nbl,
88 const nbnxn_atomdata_t *nbat,
89 const interaction_const_t *ic,
94 enum { coultRF, coultTAB, coultTAB_TWIN, coultNR };
97 static p_nbk_func_ener p_nbk_ener[coultNR][ljcrNR] =
98 { { nbnxn_kernel_x86_simd128_rf_comb_geom_ener,
99 nbnxn_kernel_x86_simd128_rf_comb_lb_ener,
100 nbnxn_kernel_x86_simd128_rf_comb_none_ener },
101 { nbnxn_kernel_x86_simd128_tab_comb_geom_ener,
102 nbnxn_kernel_x86_simd128_tab_comb_lb_ener,
103 nbnxn_kernel_x86_simd128_tab_twin_comb_none_ener },
104 { nbnxn_kernel_x86_simd128_tab_twin_comb_geom_ener,
105 nbnxn_kernel_x86_simd128_tab_twin_comb_lb_ener,
106 nbnxn_kernel_x86_simd128_tab_twin_comb_none_ener } };
108 static p_nbk_func_ener p_nbk_energrp[coultNR][ljcrNR] =
109 { { nbnxn_kernel_x86_simd128_rf_comb_geom_energrp,
110 nbnxn_kernel_x86_simd128_rf_comb_lb_energrp,
111 nbnxn_kernel_x86_simd128_rf_comb_none_energrp },
112 { nbnxn_kernel_x86_simd128_tab_comb_geom_energrp,
113 nbnxn_kernel_x86_simd128_tab_comb_lb_energrp,
114 nbnxn_kernel_x86_simd128_tab_comb_none_energrp },
115 { nbnxn_kernel_x86_simd128_tab_twin_comb_geom_energrp,
116 nbnxn_kernel_x86_simd128_tab_twin_comb_lb_energrp,
117 nbnxn_kernel_x86_simd128_tab_twin_comb_none_energrp } };
119 static p_nbk_func_noener p_nbk_noener[coultNR][ljcrNR] =
120 { { nbnxn_kernel_x86_simd128_rf_comb_geom_noener,
121 nbnxn_kernel_x86_simd128_rf_comb_lb_noener,
122 nbnxn_kernel_x86_simd128_rf_comb_none_noener },
123 { nbnxn_kernel_x86_simd128_tab_comb_geom_noener,
124 nbnxn_kernel_x86_simd128_tab_comb_lb_noener,
125 nbnxn_kernel_x86_simd128_tab_comb_none_noener },
126 { nbnxn_kernel_x86_simd128_tab_twin_comb_geom_noener,
127 nbnxn_kernel_x86_simd128_tab_twin_comb_lb_noener,
128 nbnxn_kernel_x86_simd128_tab_twin_comb_none_noener } };
133 static void reduce_group_energies(int ng,int ng_2log,
134 const real *VSvdw,const real *VSc,
137 int ng_p2,i,j,j0,j1,c,s;
141 #define SIMD_WIDTH_2 2
144 #define SIMD_WIDTH_2 1
147 ng_p2 = (1<<ng_2log);
149 /* The size of the SSE energy group buffer array is:
150 * stride^3*SIMD_WIDTH_2*SIMD_WIDTH
160 for(j1=0; j1<ng; j1++)
162 for(j0=0; j0<ng; j0++)
164 c = ((i*ng + j1)*ng_p2 + j0)*SIMD_WIDTH_2*SIMD_WIDTH;
165 for(s=0; s<SIMD_WIDTH_2; s++)
167 Vvdw[i*ng+j0] += VSvdw[c+0];
168 Vvdw[i*ng+j1] += VSvdw[c+1];
169 Vc [i*ng+j0] += VSc [c+0];
170 Vc [i*ng+j1] += VSc [c+1];
179 nbnxn_kernel_x86_simd128(nbnxn_pairlist_set_t *nbl_list,
180 const nbnxn_atomdata_t *nbat,
181 const interaction_const_t *ic,
191 nbnxn_pairlist_t **nbl;
195 nnbl = nbl_list->nnbl;
198 if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
204 if (ic->rcoulomb == ic->rvdw)
210 coult = coultTAB_TWIN;
214 #pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded))
215 for(nb=0; nb<nnbl; nb++)
217 nbnxn_atomdata_output_t *out;
220 out = &nbat->out[nb];
222 if (clearF == enbvClearFYes)
224 clear_f(nbat,out->f);
227 if ((force_flags & GMX_FORCE_VIRIAL) && nnbl == 1)
233 fshift_p = out->fshift;
235 if (clearF == enbvClearFYes)
237 clear_fshift(fshift_p);
241 /* With Ewald type electrostatics we the forces for excluded atom pairs
242 * should not contribute to the virial sum. The exclusion forces
243 * are not calculate in the energy kernels, but are in _noener.
245 if (!((force_flags & GMX_FORCE_ENERGY) ||
246 (EEL_FULL(ic->eeltype) && (force_flags & GMX_FORCE_VIRIAL))))
248 /* Don't calculate energies */
249 p_nbk_noener[coult][nbat->comb_rule](nbl[nb],nbat,
255 else if (out->nV == 1 || !(force_flags & GMX_FORCE_ENERGY))
257 /* No energy groups */
261 p_nbk_ener[coult][nbat->comb_rule](nbl[nb],nbat,
271 /* Calculate energy group contributions */
274 for(i=0; i<out->nVS; i++)
278 for(i=0; i<out->nVS; i++)
283 p_nbk_energrp[coult][nbat->comb_rule](nbl[nb],nbat,
291 reduce_group_energies(nbat->nenergrp,nbat->neg_2log,
297 if (force_flags & GMX_FORCE_ENERGY)
299 /* Reduce the energies */
300 for(nb=0; nb<nnbl; nb++)
304 for(i=0; i<nbat->out[nb].nV; i++)
306 Vvdw[i] += nbat->out[nb].Vvdw[i];
307 Vc[i] += nbat->out[nb].Vc[i];
314 gmx_incons("nbnxn_kernel_x86_simd128 called while GROMACS was configured without SSE enabled");