Merge branch 'release-4-6'
[alexxy/gromacs.git] / src / gromacs / mdlib / nbnxn_kernels / simd_4xn / nbnxn_kernel_simd_4xn.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013, by the GROMACS development team, led by
5  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
6  * others, as listed in the AUTHORS file in the top-level source
7  * directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the Verlet kernel generator for
37  * kernel type 4xn.
38  */
39
40 #ifdef HAVE_CONFIG_H
41 #include <config.h>
42 #endif
43
44 #include "typedefs.h"
45
46 #ifdef GMX_NBNXN_SIMD_4XN
47
48 #ifdef GMX_NBNXN_HALF_WIDTH_SIMD
49 #define GMX_USE_HALF_WIDTH_SIMD_HERE
50 #endif
51
52 #include "gmx_simd_macros.h"
53 #include "gmx_simd_vec.h"
54 #if !(GMX_SIMD_WIDTH_HERE == 2 || GMX_SIMD_WIDTH_HERE == 4 || GMX_SIMD_WIDTH_HERE == 8)
55 #error "unsupported SIMD width"
56 #endif
57
58 #define GMX_SIMD_J_UNROLL_SIZE 1
59 #include "nbnxn_kernel_simd_4xn.h"
60 #include "../nbnxn_kernel_common.h"
61 #include "gmx_omp_nthreads.h"
62 #include "types/force_flags.h"
63
64 /*! \brief Kinds of electrostatic treatments in SIMD Verlet kernels
65  */
66 enum {
67     coultRF, coultTAB, coultTAB_TWIN, coultEWALD, coultEWALD_TWIN, coultNR
68 };
69
70 /* Declare and define the kernel function pointer lookup tables. */
71 static p_nbk_func_ener p_nbk_ener[coultNR][ljcrNR] =
72 {
73     {
74         nbnxn_kernel_simd_4xn_rf_comb_geom_ener,
75         nbnxn_kernel_simd_4xn_rf_comb_lb_ener,
76         nbnxn_kernel_simd_4xn_rf_comb_none_ener,
77     },
78     {
79         nbnxn_kernel_simd_4xn_tab_comb_geom_ener,
80         nbnxn_kernel_simd_4xn_tab_comb_lb_ener,
81         nbnxn_kernel_simd_4xn_tab_comb_none_ener,
82     },
83     {
84         nbnxn_kernel_simd_4xn_tab_twin_comb_geom_ener,
85         nbnxn_kernel_simd_4xn_tab_twin_comb_lb_ener,
86         nbnxn_kernel_simd_4xn_tab_twin_comb_none_ener,
87     },
88     {
89         nbnxn_kernel_simd_4xn_ewald_comb_geom_ener,
90         nbnxn_kernel_simd_4xn_ewald_comb_lb_ener,
91         nbnxn_kernel_simd_4xn_ewald_comb_none_ener,
92     },
93     {
94         nbnxn_kernel_simd_4xn_ewald_twin_comb_geom_ener,
95         nbnxn_kernel_simd_4xn_ewald_twin_comb_lb_ener,
96         nbnxn_kernel_simd_4xn_ewald_twin_comb_none_ener,
97     },
98 };
99
100 static p_nbk_func_ener p_nbk_energrp[coultNR][ljcrNR] =
101 {
102     {
103         nbnxn_kernel_simd_4xn_rf_comb_geom_energrp,
104         nbnxn_kernel_simd_4xn_rf_comb_lb_energrp,
105         nbnxn_kernel_simd_4xn_rf_comb_none_energrp,
106     },
107     {
108         nbnxn_kernel_simd_4xn_tab_comb_geom_energrp,
109         nbnxn_kernel_simd_4xn_tab_comb_lb_energrp,
110         nbnxn_kernel_simd_4xn_tab_comb_none_energrp,
111     },
112     {
113         nbnxn_kernel_simd_4xn_tab_twin_comb_geom_energrp,
114         nbnxn_kernel_simd_4xn_tab_twin_comb_lb_energrp,
115         nbnxn_kernel_simd_4xn_tab_twin_comb_none_energrp,
116     },
117     {
118         nbnxn_kernel_simd_4xn_ewald_comb_geom_energrp,
119         nbnxn_kernel_simd_4xn_ewald_comb_lb_energrp,
120         nbnxn_kernel_simd_4xn_ewald_comb_none_energrp,
121     },
122     {
123         nbnxn_kernel_simd_4xn_ewald_twin_comb_geom_energrp,
124         nbnxn_kernel_simd_4xn_ewald_twin_comb_lb_energrp,
125         nbnxn_kernel_simd_4xn_ewald_twin_comb_none_energrp,
126     },
127 };
128
129 static p_nbk_func_noener p_nbk_noener[coultNR][ljcrNR] =
130 {
131     {
132         nbnxn_kernel_simd_4xn_rf_comb_geom_noener,
133         nbnxn_kernel_simd_4xn_rf_comb_lb_noener,
134         nbnxn_kernel_simd_4xn_rf_comb_none_noener,
135     },
136     {
137         nbnxn_kernel_simd_4xn_tab_comb_geom_noener,
138         nbnxn_kernel_simd_4xn_tab_comb_lb_noener,
139         nbnxn_kernel_simd_4xn_tab_comb_none_noener,
140     },
141     {
142         nbnxn_kernel_simd_4xn_tab_twin_comb_geom_noener,
143         nbnxn_kernel_simd_4xn_tab_twin_comb_lb_noener,
144         nbnxn_kernel_simd_4xn_tab_twin_comb_none_noener,
145     },
146     {
147         nbnxn_kernel_simd_4xn_ewald_comb_geom_noener,
148         nbnxn_kernel_simd_4xn_ewald_comb_lb_noener,
149         nbnxn_kernel_simd_4xn_ewald_comb_none_noener,
150     },
151     {
152         nbnxn_kernel_simd_4xn_ewald_twin_comb_geom_noener,
153         nbnxn_kernel_simd_4xn_ewald_twin_comb_lb_noener,
154         nbnxn_kernel_simd_4xn_ewald_twin_comb_none_noener,
155     },
156 };
157
158
159 static void
160 reduce_group_energies(int ng, int ng_2log,
161                       const real *VSvdw, const real *VSc,
162                       real *Vvdw, real *Vc)
163 {
164     const int unrollj      = GMX_SIMD_WIDTH_HERE/GMX_SIMD_J_UNROLL_SIZE;
165     const int unrollj_half = unrollj/2;
166     int       ng_p2, i, j, j0, j1, c, s;
167
168     ng_p2 = (1<<ng_2log);
169
170     /* The size of the x86 SIMD energy group buffer array is:
171      * ng*ng*ng_p2*unrollj_half*simd_width
172      */
173     for (i = 0; i < ng; i++)
174     {
175         for (j = 0; j < ng; j++)
176         {
177             Vvdw[i*ng+j] = 0;
178             Vc[i*ng+j]   = 0;
179         }
180
181         for (j1 = 0; j1 < ng; j1++)
182         {
183             for (j0 = 0; j0 < ng; j0++)
184             {
185                 c = ((i*ng + j1)*ng_p2 + j0)*unrollj_half*unrollj;
186                 for (s = 0; s < unrollj_half; s++)
187                 {
188                     Vvdw[i*ng+j0] += VSvdw[c+0];
189                     Vvdw[i*ng+j1] += VSvdw[c+1];
190                     Vc  [i*ng+j0] += VSc  [c+0];
191                     Vc  [i*ng+j1] += VSc  [c+1];
192                     c             += unrollj + 2;
193                 }
194             }
195         }
196     }
197 }
198
199 #else /* GMX_NBNXN_SIMD_4XN */
200
201 #include "gmx_fatal.h"
202
203 #endif /* GMX_NBNXN_SIMD_4XN */
204
205 void
206 nbnxn_kernel_simd_4xn(nbnxn_pairlist_set_t       *nbl_list,
207                       const nbnxn_atomdata_t     *nbat,
208                       const interaction_const_t  *ic,
209                       int                         ewald_excl,
210                       rvec                       *shift_vec,
211                       int                         force_flags,
212                       int                         clearF,
213                       real                       *fshift,
214                       real                       *Vc,
215                       real                       *Vvdw)
216 #ifdef GMX_NBNXN_SIMD_4XN
217 {
218     int                nnbl;
219     nbnxn_pairlist_t **nbl;
220     int                coult;
221     int                nb;
222
223     nnbl = nbl_list->nnbl;
224     nbl  = nbl_list->nbl;
225
226     if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
227     {
228         coult = coultRF;
229     }
230     else
231     {
232         if (ewald_excl == ewaldexclTable)
233         {
234             if (ic->rcoulomb == ic->rvdw)
235             {
236                 coult = coultTAB;
237             }
238             else
239             {
240                 coult = coultTAB_TWIN;
241             }
242         }
243         else
244         {
245             if (ic->rcoulomb == ic->rvdw)
246             {
247                 coult = coultEWALD;
248             }
249             else
250             {
251                 coult = coultEWALD_TWIN;
252             }
253         }
254     }
255
256 #pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded))
257     for (nb = 0; nb < nnbl; nb++)
258     {
259         nbnxn_atomdata_output_t *out;
260         real                    *fshift_p;
261
262         out = &nbat->out[nb];
263
264         if (clearF == enbvClearFYes)
265         {
266             clear_f(nbat, nb, out->f);
267         }
268
269         if ((force_flags & GMX_FORCE_VIRIAL) && nnbl == 1)
270         {
271             fshift_p = fshift;
272         }
273         else
274         {
275             fshift_p = out->fshift;
276
277             if (clearF == enbvClearFYes)
278             {
279                 clear_fshift(fshift_p);
280             }
281         }
282
283         /* With Ewald type electrostatics we the forces for excluded atom pairs
284          * should not contribute to the virial sum. The exclusion forces
285          * are not calculate in the energy kernels, but are in _noener.
286          */
287         if (!((force_flags & GMX_FORCE_ENERGY) ||
288               (EEL_FULL(ic->eeltype) && (force_flags & GMX_FORCE_VIRIAL))))
289         {
290             /* Don't calculate energies */
291             p_nbk_noener[coult][nbat->comb_rule](nbl[nb], nbat,
292                                                  ic,
293                                                  shift_vec,
294                                                  out->f,
295                                                  fshift_p);
296         }
297         else if (out->nV == 1 || !(force_flags & GMX_FORCE_ENERGY))
298         {
299             /* No energy groups */
300             out->Vvdw[0] = 0;
301             out->Vc[0]   = 0;
302
303             p_nbk_ener[coult][nbat->comb_rule](nbl[nb], nbat,
304                                                ic,
305                                                shift_vec,
306                                                out->f,
307                                                fshift_p,
308                                                out->Vvdw,
309                                                out->Vc);
310         }
311         else
312         {
313             /* Calculate energy group contributions */
314             int i;
315
316             for (i = 0; i < out->nVS; i++)
317             {
318                 out->VSvdw[i] = 0;
319             }
320             for (i = 0; i < out->nVS; i++)
321             {
322                 out->VSc[i] = 0;
323             }
324
325             p_nbk_energrp[coult][nbat->comb_rule](nbl[nb], nbat,
326                                                   ic,
327                                                   shift_vec,
328                                                   out->f,
329                                                   fshift_p,
330                                                   out->VSvdw,
331                                                   out->VSc);
332
333             reduce_group_energies(nbat->nenergrp, nbat->neg_2log,
334                                   out->VSvdw, out->VSc,
335                                   out->Vvdw, out->Vc);
336         }
337     }
338
339     if (force_flags & GMX_FORCE_ENERGY)
340     {
341         reduce_energies_over_lists(nbat, nnbl, Vvdw, Vc);
342     }
343 }
344 #else
345 {
346     gmx_incons("nbnxn_kernel_simd_4xn called when such kernels "
347                " are not enabled.");
348 }
349 #endif
350 #undef GMX_SIMD_J_UNROLL_SIZE