Sort all includes in src/gromacs
[alexxy/gromacs.git] / src / gromacs / mdlib / nbnxn_kernels / simd_4xn / nbnxn_kernel_simd_4xn.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the Verlet kernel generator for
37  * kernel type 4xn.
38  */
39
40 #include "gmxpre.h"
41
42 #include "config.h"
43
44 #include "gromacs/legacyheaders/typedefs.h"
45 #include "gromacs/mdlib/nb_verlet.h"
46 #include "gromacs/mdlib/nbnxn_simd.h"
47
48 #ifdef GMX_NBNXN_SIMD_4XN
49
50 #include "gromacs/simd/vector_operations.h"
51
52 #if !(GMX_SIMD_REAL_WIDTH == 2 || GMX_SIMD_REAL_WIDTH == 4 || GMX_SIMD_REAL_WIDTH == 8)
53 #error "unsupported SIMD width"
54 #endif
55
56 #define GMX_SIMD_J_UNROLL_SIZE 1
57 #include "nbnxn_kernel_simd_4xn.h"
58
59 #include "gromacs/legacyheaders/gmx_omp_nthreads.h"
60 #include "gromacs/legacyheaders/types/force_flags.h"
61 #include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h"
62 #include "gromacs/utility/fatalerror.h"
63
64 /*! \brief Kinds of electrostatic treatments in SIMD Verlet kernels
65  */
66 enum {
67     coulktRF, coulktTAB, coulktTAB_TWIN, coulktEWALD, coulktEWALD_TWIN, coulktNR
68 };
69
70 /*! \brief Kinds of Van der Waals treatments in SIMD Verlet kernels
71  */
72 enum {
73     vdwktLJCUT_COMBGEOM, vdwktLJCUT_COMBLB, vdwktLJCUT_COMBNONE, vdwktLJFORCESWITCH, vdwktLJPOTSWITCH, vdwktLJEWALDCOMBGEOM, vdwktNR
74 };
75
76 /* Declare and define the kernel function pointer lookup tables.
77  * The minor index of the array goes over both the LJ combination rules,
78  * which is only supported by plain cut-off, and the LJ switch/PME functions.
79  */
80 static p_nbk_func_noener p_nbk_noener[coulktNR][vdwktNR] =
81 {
82     {
83         nbnxn_kernel_ElecRF_VdwLJCombGeom_F_4xn,
84         nbnxn_kernel_ElecRF_VdwLJCombLB_F_4xn,
85         nbnxn_kernel_ElecRF_VdwLJ_F_4xn,
86         nbnxn_kernel_ElecRF_VdwLJFSw_F_4xn,
87         nbnxn_kernel_ElecRF_VdwLJPSw_F_4xn,
88         nbnxn_kernel_ElecRF_VdwLJEwCombGeom_F_4xn,
89     },
90     {
91         nbnxn_kernel_ElecQSTab_VdwLJCombGeom_F_4xn,
92         nbnxn_kernel_ElecQSTab_VdwLJCombLB_F_4xn,
93         nbnxn_kernel_ElecQSTab_VdwLJ_F_4xn,
94         nbnxn_kernel_ElecQSTab_VdwLJFSw_F_4xn,
95         nbnxn_kernel_ElecQSTab_VdwLJPSw_F_4xn,
96         nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_F_4xn,
97     },
98     {
99         nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_F_4xn,
100         nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_F_4xn,
101         nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_F_4xn,
102         nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_F_4xn,
103         nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_F_4xn,
104         nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_F_4xn,
105     },
106     {
107         nbnxn_kernel_ElecEw_VdwLJCombGeom_F_4xn,
108         nbnxn_kernel_ElecEw_VdwLJCombLB_F_4xn,
109         nbnxn_kernel_ElecEw_VdwLJ_F_4xn,
110         nbnxn_kernel_ElecEw_VdwLJFSw_F_4xn,
111         nbnxn_kernel_ElecEw_VdwLJPSw_F_4xn,
112         nbnxn_kernel_ElecEw_VdwLJEwCombGeom_F_4xn,
113     },
114     {
115         nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_F_4xn,
116         nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_F_4xn,
117         nbnxn_kernel_ElecEwTwinCut_VdwLJ_F_4xn,
118         nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_F_4xn,
119         nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_F_4xn,
120         nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_F_4xn,
121     },
122 };
123
124 static p_nbk_func_ener p_nbk_ener[coulktNR][vdwktNR] =
125 {
126     {
127         nbnxn_kernel_ElecRF_VdwLJCombGeom_VF_4xn,
128         nbnxn_kernel_ElecRF_VdwLJCombLB_VF_4xn,
129         nbnxn_kernel_ElecRF_VdwLJ_VF_4xn,
130         nbnxn_kernel_ElecRF_VdwLJFSw_VF_4xn,
131         nbnxn_kernel_ElecRF_VdwLJPSw_VF_4xn,
132         nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VF_4xn,
133     },
134     {
135         nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VF_4xn,
136         nbnxn_kernel_ElecQSTab_VdwLJCombLB_VF_4xn,
137         nbnxn_kernel_ElecQSTab_VdwLJ_VF_4xn,
138         nbnxn_kernel_ElecQSTab_VdwLJFSw_VF_4xn,
139         nbnxn_kernel_ElecQSTab_VdwLJPSw_VF_4xn,
140         nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VF_4xn,
141     },
142     {
143         nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VF_4xn,
144         nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_VF_4xn,
145         nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VF_4xn,
146         nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VF_4xn,
147         nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VF_4xn,
148         nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VF_4xn,
149     },
150     {
151         nbnxn_kernel_ElecEw_VdwLJCombGeom_VF_4xn,
152         nbnxn_kernel_ElecEw_VdwLJCombLB_VF_4xn,
153         nbnxn_kernel_ElecEw_VdwLJ_VF_4xn,
154         nbnxn_kernel_ElecEw_VdwLJFSw_VF_4xn,
155         nbnxn_kernel_ElecEw_VdwLJPSw_VF_4xn,
156         nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VF_4xn,
157     },
158     {
159         nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VF_4xn,
160         nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_VF_4xn,
161         nbnxn_kernel_ElecEwTwinCut_VdwLJ_VF_4xn,
162         nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VF_4xn,
163         nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VF_4xn,
164         nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VF_4xn,
165     },
166 };
167
168 static p_nbk_func_ener p_nbk_energrp[coulktNR][vdwktNR] =
169 {
170     {
171         nbnxn_kernel_ElecRF_VdwLJCombGeom_VgrpF_4xn,
172         nbnxn_kernel_ElecRF_VdwLJCombLB_VgrpF_4xn,
173         nbnxn_kernel_ElecRF_VdwLJ_VgrpF_4xn,
174         nbnxn_kernel_ElecRF_VdwLJFSw_VgrpF_4xn,
175         nbnxn_kernel_ElecRF_VdwLJPSw_VgrpF_4xn,
176         nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VgrpF_4xn,
177     },
178     {
179         nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VgrpF_4xn,
180         nbnxn_kernel_ElecQSTab_VdwLJCombLB_VgrpF_4xn,
181         nbnxn_kernel_ElecQSTab_VdwLJ_VgrpF_4xn,
182         nbnxn_kernel_ElecQSTab_VdwLJFSw_VgrpF_4xn,
183         nbnxn_kernel_ElecQSTab_VdwLJPSw_VgrpF_4xn,
184         nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VgrpF_4xn,
185     },
186     {
187         nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VgrpF_4xn,
188         nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_VgrpF_4xn,
189         nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VgrpF_4xn,
190         nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VgrpF_4xn,
191         nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VgrpF_4xn,
192         nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VgrpF_4xn,
193     },
194     {
195         nbnxn_kernel_ElecEw_VdwLJCombGeom_VgrpF_4xn,
196         nbnxn_kernel_ElecEw_VdwLJCombLB_VgrpF_4xn,
197         nbnxn_kernel_ElecEw_VdwLJ_VgrpF_4xn,
198         nbnxn_kernel_ElecEw_VdwLJFSw_VgrpF_4xn,
199         nbnxn_kernel_ElecEw_VdwLJPSw_VgrpF_4xn,
200         nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VgrpF_4xn,
201     },
202     {
203         nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VgrpF_4xn,
204         nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_VgrpF_4xn,
205         nbnxn_kernel_ElecEwTwinCut_VdwLJ_VgrpF_4xn,
206         nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VgrpF_4xn,
207         nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VgrpF_4xn,
208         nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VgrpF_4xn,
209     },
210 };
211
212
213 static void
214 reduce_group_energies(int ng, int ng_2log,
215                       const real *VSvdw, const real *VSc,
216                       real *Vvdw, real *Vc)
217 {
218     const int unrollj      = GMX_SIMD_REAL_WIDTH/GMX_SIMD_J_UNROLL_SIZE;
219     const int unrollj_half = unrollj/2;
220     int       ng_p2, i, j, j0, j1, c, s;
221
222     ng_p2 = (1<<ng_2log);
223
224     /* The size of the x86 SIMD energy group buffer array is:
225      * ng*ng*ng_p2*unrollj_half*simd_width
226      */
227     for (i = 0; i < ng; i++)
228     {
229         for (j = 0; j < ng; j++)
230         {
231             Vvdw[i*ng+j] = 0;
232             Vc[i*ng+j]   = 0;
233         }
234
235         for (j1 = 0; j1 < ng; j1++)
236         {
237             for (j0 = 0; j0 < ng; j0++)
238             {
239                 c = ((i*ng + j1)*ng_p2 + j0)*unrollj_half*unrollj;
240                 for (s = 0; s < unrollj_half; s++)
241                 {
242                     Vvdw[i*ng+j0] += VSvdw[c+0];
243                     Vvdw[i*ng+j1] += VSvdw[c+1];
244                     Vc  [i*ng+j0] += VSc  [c+0];
245                     Vc  [i*ng+j1] += VSc  [c+1];
246                     c             += unrollj + 2;
247                 }
248             }
249         }
250     }
251 }
252
253 #else /* GMX_NBNXN_SIMD_4XN */
254
255 #include "gromacs/utility/fatalerror.h"
256
257 #endif /* GMX_NBNXN_SIMD_4XN */
258
259 void
260 nbnxn_kernel_simd_4xn(nbnxn_pairlist_set_t      gmx_unused *nbl_list,
261                       const nbnxn_atomdata_t    gmx_unused *nbat,
262                       const interaction_const_t gmx_unused *ic,
263                       int                       gmx_unused  ewald_excl,
264                       rvec                      gmx_unused *shift_vec,
265                       int                       gmx_unused  force_flags,
266                       int                       gmx_unused  clearF,
267                       real                      gmx_unused *fshift,
268                       real                      gmx_unused *Vc,
269                       real                      gmx_unused *Vvdw)
270 #ifdef GMX_NBNXN_SIMD_4XN
271 {
272     int                nnbl;
273     nbnxn_pairlist_t **nbl;
274     int                coulkt, vdwkt = 0;
275     int                nb;
276
277     nnbl = nbl_list->nnbl;
278     nbl  = nbl_list->nbl;
279
280     if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
281     {
282         coulkt = coulktRF;
283     }
284     else
285     {
286         if (ewald_excl == ewaldexclTable)
287         {
288             if (ic->rcoulomb == ic->rvdw)
289             {
290                 coulkt = coulktTAB;
291             }
292             else
293             {
294                 coulkt = coulktTAB_TWIN;
295             }
296         }
297         else
298         {
299             if (ic->rcoulomb == ic->rvdw)
300             {
301                 coulkt = coulktEWALD;
302             }
303             else
304             {
305                 coulkt = coulktEWALD_TWIN;
306             }
307         }
308     }
309
310     if (ic->vdwtype == evdwCUT)
311     {
312         switch (ic->vdw_modifier)
313         {
314             case eintmodNONE:
315             case eintmodPOTSHIFT:
316                 switch (nbat->comb_rule)
317                 {
318                     case ljcrGEOM: vdwkt = vdwktLJCUT_COMBGEOM; break;
319                     case ljcrLB:   vdwkt = vdwktLJCUT_COMBLB;   break;
320                     case ljcrNONE: vdwkt = vdwktLJCUT_COMBNONE; break;
321                     default:       gmx_incons("Unknown combination rule");
322                 }
323                 break;
324             case eintmodFORCESWITCH:
325                 vdwkt = vdwktLJFORCESWITCH;
326                 break;
327             case eintmodPOTSWITCH:
328                 vdwkt = vdwktLJPOTSWITCH;
329                 break;
330             default:
331                 gmx_incons("Unsupported VdW interaction modifier");
332         }
333     }
334     else if (ic->vdwtype == evdwPME)
335     {
336         if (ic->ljpme_comb_rule == eljpmeLB)
337         {
338             gmx_incons("The nbnxn SIMD kernels don't suport LJ-PME with LB");
339         }
340         vdwkt = vdwktLJEWALDCOMBGEOM;
341     }
342     else
343     {
344         gmx_incons("Unsupported VdW interaction type");
345     }
346
347 #pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded))
348     for (nb = 0; nb < nnbl; nb++)
349     {
350         nbnxn_atomdata_output_t *out;
351         real                    *fshift_p;
352
353         out = &nbat->out[nb];
354
355         if (clearF == enbvClearFYes)
356         {
357             clear_f(nbat, nb, out->f);
358         }
359
360         if ((force_flags & GMX_FORCE_VIRIAL) && nnbl == 1)
361         {
362             fshift_p = fshift;
363         }
364         else
365         {
366             fshift_p = out->fshift;
367
368             if (clearF == enbvClearFYes)
369             {
370                 clear_fshift(fshift_p);
371             }
372         }
373
374         if (!(force_flags & GMX_FORCE_ENERGY))
375         {
376             /* Don't calculate energies */
377             p_nbk_noener[coulkt][vdwkt](nbl[nb], nbat,
378                                         ic,
379                                         shift_vec,
380                                         out->f,
381                                         fshift_p);
382         }
383         else if (out->nV == 1)
384         {
385             /* No energy groups */
386             out->Vvdw[0] = 0;
387             out->Vc[0]   = 0;
388
389             p_nbk_ener[coulkt][vdwkt](nbl[nb], nbat,
390                                       ic,
391                                       shift_vec,
392                                       out->f,
393                                       fshift_p,
394                                       out->Vvdw,
395                                       out->Vc);
396         }
397         else
398         {
399             /* Calculate energy group contributions */
400             int i;
401
402             for (i = 0; i < out->nVS; i++)
403             {
404                 out->VSvdw[i] = 0;
405             }
406             for (i = 0; i < out->nVS; i++)
407             {
408                 out->VSc[i] = 0;
409             }
410
411             p_nbk_energrp[coulkt][vdwkt](nbl[nb], nbat,
412                                          ic,
413                                          shift_vec,
414                                          out->f,
415                                          fshift_p,
416                                          out->VSvdw,
417                                          out->VSc);
418
419             reduce_group_energies(nbat->nenergrp, nbat->neg_2log,
420                                   out->VSvdw, out->VSc,
421                                   out->Vvdw, out->Vc);
422         }
423     }
424
425     if (force_flags & GMX_FORCE_ENERGY)
426     {
427         reduce_energies_over_lists(nbat, nnbl, Vvdw, Vc);
428     }
429 }
430 #else
431 {
432     gmx_incons("nbnxn_kernel_simd_4xn called when such kernels "
433                " are not enabled.");
434 }
435 #endif
436 #undef GMX_SIMD_J_UNROLL_SIZE