795963c2a4d3f0bc064e4202b357972ad6fa2af5
[alexxy/gromacs.git] / src / gromacs / mdlib / nbnxn_kernels / simd_2xnn / nbnxn_kernel_simd_2xnn.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the Verlet kernel generator for
37  * kernel type 2xnn.
38  */
39
40 #ifdef HAVE_CONFIG_H
41 #include <config.h>
42 #endif
43
44 #include "typedefs.h"
45
46 #ifdef GMX_NBNXN_SIMD_2XNN
47
48 /* Include the full-width SIMD macros */
49
50 #include "gmx_simd_macros.h"
51 #include "gmx_simd_vec.h"
52 #if !(GMX_SIMD_WIDTH_HERE == 8 || GMX_SIMD_WIDTH_HERE == 16)
53 #error "unsupported SIMD width"
54 #endif
55
56 #define GMX_SIMD_J_UNROLL_SIZE 2
57 #include "nbnxn_kernel_simd_2xnn.h"
58 #include "../nbnxn_kernel_common.h"
59 #include "gmx_omp_nthreads.h"
60 #include "types/force_flags.h"
61
62 /*! \brief Kinds of electrostatic treatments in SIMD Verlet kernels
63  */
64 enum {
65     coultRF, coultTAB, coultTAB_TWIN, coultEWALD, coultEWALD_TWIN, coultNR
66 };
67
68 /* Declare and define the kernel function pointer lookup tables. */
69 static p_nbk_func_ener p_nbk_ener[coultNR][ljcrNR] =
70 {
71     {
72         nbnxn_kernel_simd_2xnn_rf_comb_geom_ener,
73         nbnxn_kernel_simd_2xnn_rf_comb_lb_ener,
74         nbnxn_kernel_simd_2xnn_rf_comb_none_ener,
75     },
76     {
77         nbnxn_kernel_simd_2xnn_tab_comb_geom_ener,
78         nbnxn_kernel_simd_2xnn_tab_comb_lb_ener,
79         nbnxn_kernel_simd_2xnn_tab_comb_none_ener,
80     },
81     {
82         nbnxn_kernel_simd_2xnn_tab_twin_comb_geom_ener,
83         nbnxn_kernel_simd_2xnn_tab_twin_comb_lb_ener,
84         nbnxn_kernel_simd_2xnn_tab_twin_comb_none_ener,
85     },
86     {
87         nbnxn_kernel_simd_2xnn_ewald_comb_geom_ener,
88         nbnxn_kernel_simd_2xnn_ewald_comb_lb_ener,
89         nbnxn_kernel_simd_2xnn_ewald_comb_none_ener,
90     },
91     {
92         nbnxn_kernel_simd_2xnn_ewald_twin_comb_geom_ener,
93         nbnxn_kernel_simd_2xnn_ewald_twin_comb_lb_ener,
94         nbnxn_kernel_simd_2xnn_ewald_twin_comb_none_ener,
95     },
96 };
97
98 static p_nbk_func_ener p_nbk_energrp[coultNR][ljcrNR] =
99 {
100     {
101         nbnxn_kernel_simd_2xnn_rf_comb_geom_energrp,
102         nbnxn_kernel_simd_2xnn_rf_comb_lb_energrp,
103         nbnxn_kernel_simd_2xnn_rf_comb_none_energrp,
104     },
105     {
106         nbnxn_kernel_simd_2xnn_tab_comb_geom_energrp,
107         nbnxn_kernel_simd_2xnn_tab_comb_lb_energrp,
108         nbnxn_kernel_simd_2xnn_tab_comb_none_energrp,
109     },
110     {
111         nbnxn_kernel_simd_2xnn_tab_twin_comb_geom_energrp,
112         nbnxn_kernel_simd_2xnn_tab_twin_comb_lb_energrp,
113         nbnxn_kernel_simd_2xnn_tab_twin_comb_none_energrp,
114     },
115     {
116         nbnxn_kernel_simd_2xnn_ewald_comb_geom_energrp,
117         nbnxn_kernel_simd_2xnn_ewald_comb_lb_energrp,
118         nbnxn_kernel_simd_2xnn_ewald_comb_none_energrp,
119     },
120     {
121         nbnxn_kernel_simd_2xnn_ewald_twin_comb_geom_energrp,
122         nbnxn_kernel_simd_2xnn_ewald_twin_comb_lb_energrp,
123         nbnxn_kernel_simd_2xnn_ewald_twin_comb_none_energrp,
124     },
125 };
126
127 static p_nbk_func_noener p_nbk_noener[coultNR][ljcrNR] =
128 {
129     {
130         nbnxn_kernel_simd_2xnn_rf_comb_geom_noener,
131         nbnxn_kernel_simd_2xnn_rf_comb_lb_noener,
132         nbnxn_kernel_simd_2xnn_rf_comb_none_noener,
133     },
134     {
135         nbnxn_kernel_simd_2xnn_tab_comb_geom_noener,
136         nbnxn_kernel_simd_2xnn_tab_comb_lb_noener,
137         nbnxn_kernel_simd_2xnn_tab_comb_none_noener,
138     },
139     {
140         nbnxn_kernel_simd_2xnn_tab_twin_comb_geom_noener,
141         nbnxn_kernel_simd_2xnn_tab_twin_comb_lb_noener,
142         nbnxn_kernel_simd_2xnn_tab_twin_comb_none_noener,
143     },
144     {
145         nbnxn_kernel_simd_2xnn_ewald_comb_geom_noener,
146         nbnxn_kernel_simd_2xnn_ewald_comb_lb_noener,
147         nbnxn_kernel_simd_2xnn_ewald_comb_none_noener,
148     },
149     {
150         nbnxn_kernel_simd_2xnn_ewald_twin_comb_geom_noener,
151         nbnxn_kernel_simd_2xnn_ewald_twin_comb_lb_noener,
152         nbnxn_kernel_simd_2xnn_ewald_twin_comb_none_noener,
153     },
154 };
155
156
157 static void
158 reduce_group_energies(int ng, int ng_2log,
159                       const real *VSvdw, const real *VSc,
160                       real *Vvdw, real *Vc)
161 {
162     const int unrollj      = GMX_SIMD_WIDTH_HERE/GMX_SIMD_J_UNROLL_SIZE;
163     const int unrollj_half = unrollj/2;
164     int       ng_p2, i, j, j0, j1, c, s;
165
166     ng_p2 = (1<<ng_2log);
167
168     /* The size of the x86 SIMD energy group buffer array is:
169      * ng*ng*ng_p2*unrollj_half*simd_width
170      */
171     for (i = 0; i < ng; i++)
172     {
173         for (j = 0; j < ng; j++)
174         {
175             Vvdw[i*ng+j] = 0;
176             Vc[i*ng+j]   = 0;
177         }
178
179         for (j1 = 0; j1 < ng; j1++)
180         {
181             for (j0 = 0; j0 < ng; j0++)
182             {
183                 c = ((i*ng + j1)*ng_p2 + j0)*unrollj_half*unrollj;
184                 for (s = 0; s < unrollj_half; s++)
185                 {
186                     Vvdw[i*ng+j0] += VSvdw[c+0];
187                     Vvdw[i*ng+j1] += VSvdw[c+1];
188                     Vc  [i*ng+j0] += VSc  [c+0];
189                     Vc  [i*ng+j1] += VSc  [c+1];
190                     c             += unrollj + 2;
191                 }
192             }
193         }
194     }
195 }
196
197 #else /* GMX_NBNXN_SIMD_2XNN */
198
199 #include "gmx_fatal.h"
200
201 #endif /* GMX_NBNXN_SIMD_2XNN */
202
203 void
204 nbnxn_kernel_simd_2xnn(nbnxn_pairlist_set_t      gmx_unused *nbl_list,
205                        const nbnxn_atomdata_t    gmx_unused *nbat,
206                        const interaction_const_t gmx_unused *ic,
207                        int                       gmx_unused  ewald_excl,
208                        rvec                      gmx_unused *shift_vec,
209                        int                       gmx_unused  force_flags,
210                        int                       gmx_unused  clearF,
211                        real                      gmx_unused *fshift,
212                        real                      gmx_unused *Vc,
213                        real                      gmx_unused *Vvdw)
214 #ifdef GMX_NBNXN_SIMD_2XNN
215 {
216     int                nnbl;
217     nbnxn_pairlist_t **nbl;
218     int                coult;
219     int                nb;
220
221     nnbl = nbl_list->nnbl;
222     nbl  = nbl_list->nbl;
223
224     if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
225     {
226         coult = coultRF;
227     }
228     else
229     {
230         if (ewald_excl == ewaldexclTable)
231         {
232             if (ic->rcoulomb == ic->rvdw)
233             {
234                 coult = coultTAB;
235             }
236             else
237             {
238                 coult = coultTAB_TWIN;
239             }
240         }
241         else
242         {
243             if (ic->rcoulomb == ic->rvdw)
244             {
245                 coult = coultEWALD;
246             }
247             else
248             {
249                 coult = coultEWALD_TWIN;
250             }
251         }
252     }
253
254 #pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded))
255     for (nb = 0; nb < nnbl; nb++)
256     {
257         nbnxn_atomdata_output_t *out;
258         real                    *fshift_p;
259
260         out = &nbat->out[nb];
261
262         if (clearF == enbvClearFYes)
263         {
264             clear_f(nbat, nb, out->f);
265         }
266
267         if ((force_flags & GMX_FORCE_VIRIAL) && nnbl == 1)
268         {
269             fshift_p = fshift;
270         }
271         else
272         {
273             fshift_p = out->fshift;
274
275             if (clearF == enbvClearFYes)
276             {
277                 clear_fshift(fshift_p);
278             }
279         }
280
281         if (!(force_flags & GMX_FORCE_ENERGY))
282         {
283             /* Don't calculate energies */
284             p_nbk_noener[coult][nbat->comb_rule](nbl[nb], nbat,
285                                                  ic,
286                                                  shift_vec,
287                                                  out->f,
288                                                  fshift_p);
289         }
290         else if (out->nV == 1)
291         {
292             /* No energy groups */
293             out->Vvdw[0] = 0;
294             out->Vc[0]   = 0;
295
296             p_nbk_ener[coult][nbat->comb_rule](nbl[nb], nbat,
297                                                ic,
298                                                shift_vec,
299                                                out->f,
300                                                fshift_p,
301                                                out->Vvdw,
302                                                out->Vc);
303         }
304         else
305         {
306             /* Calculate energy group contributions */
307             int i;
308
309             for (i = 0; i < out->nVS; i++)
310             {
311                 out->VSvdw[i] = 0;
312             }
313             for (i = 0; i < out->nVS; i++)
314             {
315                 out->VSc[i] = 0;
316             }
317
318             p_nbk_energrp[coult][nbat->comb_rule](nbl[nb], nbat,
319                                                   ic,
320                                                   shift_vec,
321                                                   out->f,
322                                                   fshift_p,
323                                                   out->VSvdw,
324                                                   out->VSc);
325
326             reduce_group_energies(nbat->nenergrp, nbat->neg_2log,
327                                   out->VSvdw, out->VSc,
328                                   out->Vvdw, out->Vc);
329         }
330     }
331
332     if (force_flags & GMX_FORCE_ENERGY)
333     {
334         reduce_energies_over_lists(nbat, nnbl, Vvdw, Vc);
335     }
336 }
337 #else
338 {
339     gmx_incons("nbnxn_kernel_simd_2xnn called when such kernels "
340                " are not enabled.");
341 }
342 #endif
343 #undef GMX_SIMD_J_UNROLL_SIZE