c226413ae47e9c6eb19c46e3ecd15eccb38e1dde
[alexxy/gromacs.git] / src / mdlib / nbnxn_kernels / nbnxn_kernel_simd_4xn.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2012, The GROMACS development team,
6  * check out http://www.gromacs.org for more information.
7  * Copyright (c) 2012,2013, by the GROMACS development team, led by
8  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
9  * others, as listed in the AUTHORS file in the top-level source
10  * directory and at http://www.gromacs.org.
11  *
12  * GROMACS is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public License
14  * as published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * GROMACS is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with GROMACS; if not, see
24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
26  *
27  * If you want to redistribute modifications to GROMACS, please
28  * consider that scientific software is very special. Version
29  * control is crucial - bugs must be traceable. We will be happy to
30  * consider code for inclusion in the official distribution, but
31  * derived work must not be called official GROMACS. Details are found
32  * in the README & COPYING files - if they are missing, get the
33  * official version at http://www.gromacs.org.
34  *
35  * To help us fund GROMACS development, we humbly ask that you cite
36  * the research papers on the package. Check out http://www.gromacs.org.
37  */
38 #ifdef HAVE_CONFIG_H
39 #include <config.h>
40 #endif
41
42 #include <math.h>
43
44 #include "typedefs.h"
45 #include "vec.h"
46 #include "smalloc.h"
47 #include "force.h"
48 #include "gmx_omp_nthreads.h"
49 #include "../nbnxn_consts.h"
50 #include "nbnxn_kernel_common.h"
51
52 #ifdef GMX_NBNXN_SIMD_4XN
53
54 #include "nbnxn_kernel_simd_4xn.h"
55
56 /* Include all flavors of the SSE or AVX 4xN kernel loops */
57
58 #if !(GMX_NBNXN_SIMD_BITWIDTH == 128 || GMX_NBNXN_SIMD_BITWIDTH == 256)
59 #error "unsupported GMX_NBNXN_SIMD_BITWIDTH"
60 #endif
61
62 /* Analytical reaction-field kernels */
63 #define CALC_COUL_RF
64
65 #include "nbnxn_kernel_simd_4xn_includes.h"
66
67 #undef CALC_COUL_RF
68
69 /* Tabulated exclusion interaction electrostatics kernels */
70 #define CALC_COUL_TAB
71
72 /* Single cut-off: rcoulomb = rvdw */
73 #include "nbnxn_kernel_simd_4xn_includes.h"
74
75 /* Twin cut-off: rcoulomb >= rvdw */
76 #define VDW_CUTOFF_CHECK
77 #include "nbnxn_kernel_simd_4xn_includes.h"
78 #undef VDW_CUTOFF_CHECK
79
80 #undef CALC_COUL_TAB
81
82 /* Analytical Ewald exclusion interaction electrostatics kernels */
83 #define CALC_COUL_EWALD
84
85 /* Single cut-off: rcoulomb = rvdw */
86 #include "nbnxn_kernel_simd_4xn_includes.h"
87
88 /* Twin cut-off: rcoulomb >= rvdw */
89 #define VDW_CUTOFF_CHECK
90 #include "nbnxn_kernel_simd_4xn_includes.h"
91 #undef VDW_CUTOFF_CHECK
92
93 #undef CALC_COUL_EWALD
94
95
96 typedef void (*p_nbk_func_ener)(const nbnxn_pairlist_t     *nbl,
97                                 const nbnxn_atomdata_t     *nbat,
98                                 const interaction_const_t  *ic,
99                                 rvec                       *shift_vec,
100                                 real                       *f,
101                                 real                       *fshift,
102                                 real                       *Vvdw,
103                                 real                       *Vc);
104
105 typedef void (*p_nbk_func_noener)(const nbnxn_pairlist_t     *nbl,
106                                   const nbnxn_atomdata_t     *nbat,
107                                   const interaction_const_t  *ic,
108                                   rvec                       *shift_vec,
109                                   real                       *f,
110                                   real                       *fshift);
111
112 enum {
113     coultRF, coultTAB, coultTAB_TWIN, coultEWALD, coultEWALD_TWIN, coultNR
114 };
115
116 #define NBK_FN(elec, ljcomb) nbnxn_kernel_simd_4xn_ ## elec ## _comb_ ## ljcomb ## _ener
117 static p_nbk_func_ener p_nbk_ener[coultNR][ljcrNR] =
118 { { NBK_FN(rf, geom), NBK_FN(rf, lb), NBK_FN(rf, none) },
119   { NBK_FN(tab, geom), NBK_FN(tab, lb), NBK_FN(tab, none) },
120   { NBK_FN(tab_twin, geom), NBK_FN(tab_twin, lb), NBK_FN(tab_twin, none) },
121   { NBK_FN(ewald, geom), NBK_FN(ewald, lb), NBK_FN(ewald, none) },
122   { NBK_FN(ewald_twin, geom), NBK_FN(ewald_twin, lb), NBK_FN(ewald_twin, none) } };
123 #undef NBK_FN
124
125 #define NBK_FN(elec, ljcomb) nbnxn_kernel_simd_4xn_ ## elec ## _comb_ ## ljcomb ## _energrp
126 static p_nbk_func_ener p_nbk_energrp[coultNR][ljcrNR] =
127 { { NBK_FN(rf, geom), NBK_FN(rf, lb), NBK_FN(rf, none) },
128   { NBK_FN(tab, geom), NBK_FN(tab, lb), NBK_FN(tab, none) },
129   { NBK_FN(tab_twin, geom), NBK_FN(tab_twin, lb), NBK_FN(tab_twin, none) },
130   { NBK_FN(ewald, geom), NBK_FN(ewald, lb), NBK_FN(ewald, none) },
131   { NBK_FN(ewald_twin, geom), NBK_FN(ewald_twin, lb), NBK_FN(ewald_twin, none) } };
132 #undef NBK_FN
133
134 #define NBK_FN(elec, ljcomb) nbnxn_kernel_simd_4xn_ ## elec ## _comb_ ## ljcomb ## _noener
135 static p_nbk_func_noener p_nbk_noener[coultNR][ljcrNR] =
136 { { NBK_FN(rf, geom), NBK_FN(rf, lb), NBK_FN(rf, none) },
137   { NBK_FN(tab, geom), NBK_FN(tab, lb), NBK_FN(tab, none) },
138   { NBK_FN(tab_twin, geom), NBK_FN(tab_twin, lb), NBK_FN(tab_twin, none) },
139   { NBK_FN(ewald, geom), NBK_FN(ewald, lb), NBK_FN(ewald, none) },
140   { NBK_FN(ewald_twin, geom), NBK_FN(ewald_twin, lb), NBK_FN(ewald_twin, none) } };
141 #undef NBK_FN
142
143
144 static void reduce_group_energies(int ng, int ng_2log,
145                                   const real *VSvdw, const real *VSc,
146                                   real *Vvdw, real *Vc)
147 {
148     const int simd_width   = GMX_SIMD_WIDTH_HERE;
149     const int unrollj_half = GMX_SIMD_WIDTH_HERE/2;
150     int       ng_p2, i, j, j0, j1, c, s;
151
152     ng_p2 = (1<<ng_2log);
153
154     /* The size of the x86 SIMD energy group buffer array is:
155      * ng*ng*ng_p2*unrollj_half*simd_width
156      */
157     for (i = 0; i < ng; i++)
158     {
159         for (j = 0; j < ng; j++)
160         {
161             Vvdw[i*ng+j] = 0;
162             Vc[i*ng+j]   = 0;
163         }
164
165         for (j1 = 0; j1 < ng; j1++)
166         {
167             for (j0 = 0; j0 < ng; j0++)
168             {
169                 c = ((i*ng + j1)*ng_p2 + j0)*unrollj_half*simd_width;
170                 for (s = 0; s < unrollj_half; s++)
171                 {
172                     Vvdw[i*ng+j0] += VSvdw[c+0];
173                     Vvdw[i*ng+j1] += VSvdw[c+1];
174                     Vc  [i*ng+j0] += VSc  [c+0];
175                     Vc  [i*ng+j1] += VSc  [c+1];
176                     c             += simd_width + 2;
177                 }
178             }
179         }
180     }
181 }
182
183 #endif /* GMX_NBNXN_SIMD_4XN */
184
185 void
186 nbnxn_kernel_simd_4xn(nbnxn_pairlist_set_t       *nbl_list,
187                       const nbnxn_atomdata_t     *nbat,
188                       const interaction_const_t  *ic,
189                       int                         ewald_excl,
190                       rvec                       *shift_vec,
191                       int                         force_flags,
192                       int                         clearF,
193                       real                       *fshift,
194                       real                       *Vc,
195                       real                       *Vvdw)
196 #ifdef GMX_NBNXN_SIMD_4XN
197 {
198     int                nnbl;
199     nbnxn_pairlist_t **nbl;
200     int                coult;
201     int                nb;
202
203     nnbl = nbl_list->nnbl;
204     nbl  = nbl_list->nbl;
205
206     if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
207     {
208         coult = coultRF;
209     }
210     else
211     {
212         if (ewald_excl == ewaldexclTable)
213         {
214             if (ic->rcoulomb == ic->rvdw)
215             {
216                 coult = coultTAB;
217             }
218             else
219             {
220                 coult = coultTAB_TWIN;
221             }
222         }
223         else
224         {
225             if (ic->rcoulomb == ic->rvdw)
226             {
227                 coult = coultEWALD;
228             }
229             else
230             {
231                 coult = coultEWALD_TWIN;
232             }
233         }
234     }
235
236 #pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded))
237     for (nb = 0; nb < nnbl; nb++)
238     {
239         nbnxn_atomdata_output_t *out;
240         real                    *fshift_p;
241
242         out = &nbat->out[nb];
243
244         if (clearF == enbvClearFYes)
245         {
246             clear_f(nbat, nb, out->f);
247         }
248
249         if ((force_flags & GMX_FORCE_VIRIAL) && nnbl == 1)
250         {
251             fshift_p = fshift;
252         }
253         else
254         {
255             fshift_p = out->fshift;
256
257             if (clearF == enbvClearFYes)
258             {
259                 clear_fshift(fshift_p);
260             }
261         }
262
263         /* With Ewald type electrostatics we the forces for excluded atom pairs
264          * should not contribute to the virial sum. The exclusion forces
265          * are not calculate in the energy kernels, but are in _noener.
266          */
267         if (!((force_flags & GMX_FORCE_ENERGY) ||
268               (EEL_FULL(ic->eeltype) && (force_flags & GMX_FORCE_VIRIAL))))
269         {
270             /* Don't calculate energies */
271             p_nbk_noener[coult][nbat->comb_rule](nbl[nb], nbat,
272                                                  ic,
273                                                  shift_vec,
274                                                  out->f,
275                                                  fshift_p);
276         }
277         else if (out->nV == 1 || !(force_flags & GMX_FORCE_ENERGY))
278         {
279             /* No energy groups */
280             out->Vvdw[0] = 0;
281             out->Vc[0]   = 0;
282
283             p_nbk_ener[coult][nbat->comb_rule](nbl[nb], nbat,
284                                                ic,
285                                                shift_vec,
286                                                out->f,
287                                                fshift_p,
288                                                out->Vvdw,
289                                                out->Vc);
290         }
291         else
292         {
293             /* Calculate energy group contributions */
294             int i;
295
296             for (i = 0; i < out->nVS; i++)
297             {
298                 out->VSvdw[i] = 0;
299             }
300             for (i = 0; i < out->nVS; i++)
301             {
302                 out->VSc[i] = 0;
303             }
304
305             p_nbk_energrp[coult][nbat->comb_rule](nbl[nb], nbat,
306                                                   ic,
307                                                   shift_vec,
308                                                   out->f,
309                                                   fshift_p,
310                                                   out->VSvdw,
311                                                   out->VSc);
312
313             reduce_group_energies(nbat->nenergrp, nbat->neg_2log,
314                                   out->VSvdw, out->VSc,
315                                   out->Vvdw, out->Vc);
316         }
317     }
318
319     if (force_flags & GMX_FORCE_ENERGY)
320     {
321         reduce_energies_over_lists(nbat, nnbl, Vvdw, Vc);
322     }
323 }
324 #else
325 {
326     gmx_incons("nbnxn_kernel_simd_4xn called while GROMACS was configured without 4xN SIMD kernels enabled");
327 }
328 #endif