Implemented nbnxn LJ switch functions
[alexxy/gromacs.git] / src / gromacs / mdlib / nbnxn_kernels / simd_2xnn / nbnxn_kernel_simd_2xnn.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the Verlet kernel generator for
37  * kernel type 2xnn.
38  */
39
40 #ifdef HAVE_CONFIG_H
41 #include <config.h>
42 #endif
43
44 #include "typedefs.h"
45
46 #ifdef GMX_NBNXN_SIMD_2XNN
47
48 /* Include the full-width SIMD macros */
49
50 #include "gromacs/simd/macros.h"
51 #include "gromacs/simd/vector_operations.h"
52
53 #if !(GMX_SIMD_REAL_WIDTH == 8 || GMX_SIMD_REAL_WIDTH == 16)
54 #error "unsupported SIMD width"
55 #endif
56
57 #define GMX_SIMD_J_UNROLL_SIZE 2
58 #include "nbnxn_kernel_simd_2xnn.h"
59 #include "../nbnxn_kernel_common.h"
60 #include "gmx_omp_nthreads.h"
61 #include "types/force_flags.h"
62 #include "gmx_fatal.h"
63
64 /*! \brief Kinds of electrostatic treatments in SIMD Verlet kernels
65  */
66 enum {
67     coultRF, coultTAB, coultTAB_TWIN, coultEWALD, coultEWALD_TWIN, coultNR
68 };
69
70 /* Declare and define the kernel function pointer lookup tables.
71  * The minor index of the array goes over both the LJ combination rules,
72  * which is only supported by plain cut-off, and the LJ switch functions.
73  */
74 static p_nbk_func_noener p_nbk_noener[coultNR][ljcrNR+2] =
75 {
76     {
77         nbnxn_kernel_ElecRF_VdwLJCombGeom_F_2xnn,
78         nbnxn_kernel_ElecRF_VdwLJCombLB_F_2xnn,
79         nbnxn_kernel_ElecRF_VdwLJ_F_2xnn,
80         nbnxn_kernel_ElecRF_VdwLJFSw_F_2xnn,
81         nbnxn_kernel_ElecRF_VdwLJPSw_F_2xnn,
82     },
83     {
84         nbnxn_kernel_ElecQSTab_VdwLJCombGeom_F_2xnn,
85         nbnxn_kernel_ElecQSTab_VdwLJCombLB_F_2xnn,
86         nbnxn_kernel_ElecQSTab_VdwLJ_F_2xnn,
87         nbnxn_kernel_ElecQSTab_VdwLJFSw_F_2xnn,
88         nbnxn_kernel_ElecQSTab_VdwLJPSw_F_2xnn,
89     },
90     {
91         nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_F_2xnn,
92         nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_F_2xnn,
93         nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_F_2xnn,
94         nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_F_2xnn,
95         nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_F_2xnn,
96     },
97     {
98         nbnxn_kernel_ElecEw_VdwLJCombGeom_F_2xnn,
99         nbnxn_kernel_ElecEw_VdwLJCombLB_F_2xnn,
100         nbnxn_kernel_ElecEw_VdwLJ_F_2xnn,
101         nbnxn_kernel_ElecEw_VdwLJFSw_F_2xnn,
102         nbnxn_kernel_ElecEw_VdwLJPSw_F_2xnn,
103     },
104     {
105         nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_F_2xnn,
106         nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_F_2xnn,
107         nbnxn_kernel_ElecEwTwinCut_VdwLJ_F_2xnn,
108         nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_F_2xnn,
109         nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_F_2xnn,
110     },
111 };
112
113 static p_nbk_func_ener p_nbk_ener[coultNR][ljcrNR+2] =
114 {
115     {
116         nbnxn_kernel_ElecRF_VdwLJCombGeom_VF_2xnn,
117         nbnxn_kernel_ElecRF_VdwLJCombLB_VF_2xnn,
118         nbnxn_kernel_ElecRF_VdwLJ_VF_2xnn,
119         nbnxn_kernel_ElecRF_VdwLJFSw_VF_2xnn,
120         nbnxn_kernel_ElecRF_VdwLJPSw_VF_2xnn,
121     },
122     {
123         nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VF_2xnn,
124         nbnxn_kernel_ElecQSTab_VdwLJCombLB_VF_2xnn,
125         nbnxn_kernel_ElecQSTab_VdwLJ_VF_2xnn,
126         nbnxn_kernel_ElecQSTab_VdwLJFSw_VF_2xnn,
127         nbnxn_kernel_ElecQSTab_VdwLJPSw_VF_2xnn,
128     },
129     {
130         nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VF_2xnn,
131         nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_VF_2xnn,
132         nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VF_2xnn,
133         nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VF_2xnn,
134         nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VF_2xnn,
135     },
136     {
137         nbnxn_kernel_ElecEw_VdwLJCombGeom_VF_2xnn,
138         nbnxn_kernel_ElecEw_VdwLJCombLB_VF_2xnn,
139         nbnxn_kernel_ElecEw_VdwLJ_VF_2xnn,
140         nbnxn_kernel_ElecEw_VdwLJFSw_VF_2xnn,
141         nbnxn_kernel_ElecEw_VdwLJPSw_VF_2xnn,
142     },
143     {
144         nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VF_2xnn,
145         nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_VF_2xnn,
146         nbnxn_kernel_ElecEwTwinCut_VdwLJ_VF_2xnn,
147         nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VF_2xnn,
148         nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VF_2xnn,
149     },
150 };
151
152 static p_nbk_func_ener p_nbk_energrp[coultNR][ljcrNR+2] =
153 {
154     {
155         nbnxn_kernel_ElecRF_VdwLJCombGeom_VgrpF_2xnn,
156         nbnxn_kernel_ElecRF_VdwLJCombLB_VgrpF_2xnn,
157         nbnxn_kernel_ElecRF_VdwLJ_VgrpF_2xnn,
158         nbnxn_kernel_ElecRF_VdwLJFSw_VgrpF_2xnn,
159         nbnxn_kernel_ElecRF_VdwLJPSw_VgrpF_2xnn,
160     },
161     {
162         nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VgrpF_2xnn,
163         nbnxn_kernel_ElecQSTab_VdwLJCombLB_VgrpF_2xnn,
164         nbnxn_kernel_ElecQSTab_VdwLJ_VgrpF_2xnn,
165         nbnxn_kernel_ElecQSTab_VdwLJFSw_VgrpF_2xnn,
166         nbnxn_kernel_ElecQSTab_VdwLJPSw_VgrpF_2xnn,
167     },
168     {
169         nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VgrpF_2xnn,
170         nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_VgrpF_2xnn,
171         nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VgrpF_2xnn,
172         nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VgrpF_2xnn,
173         nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VgrpF_2xnn,
174     },
175     {
176         nbnxn_kernel_ElecEw_VdwLJCombGeom_VgrpF_2xnn,
177         nbnxn_kernel_ElecEw_VdwLJCombLB_VgrpF_2xnn,
178         nbnxn_kernel_ElecEw_VdwLJ_VgrpF_2xnn,
179         nbnxn_kernel_ElecEw_VdwLJFSw_VgrpF_2xnn,
180         nbnxn_kernel_ElecEw_VdwLJPSw_VgrpF_2xnn,
181     },
182     {
183         nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VgrpF_2xnn,
184         nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_VgrpF_2xnn,
185         nbnxn_kernel_ElecEwTwinCut_VdwLJ_VgrpF_2xnn,
186         nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VgrpF_2xnn,
187         nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VgrpF_2xnn,
188     },
189 };
190
191
192 static void
193 reduce_group_energies(int ng, int ng_2log,
194                       const real *VSvdw, const real *VSc,
195                       real *Vvdw, real *Vc)
196 {
197     const int unrollj      = GMX_SIMD_REAL_WIDTH/GMX_SIMD_J_UNROLL_SIZE;
198     const int unrollj_half = unrollj/2;
199     int       ng_p2, i, j, j0, j1, c, s;
200
201     ng_p2 = (1<<ng_2log);
202
203     /* The size of the x86 SIMD energy group buffer array is:
204      * ng*ng*ng_p2*unrollj_half*simd_width
205      */
206     for (i = 0; i < ng; i++)
207     {
208         for (j = 0; j < ng; j++)
209         {
210             Vvdw[i*ng+j] = 0;
211             Vc[i*ng+j]   = 0;
212         }
213
214         for (j1 = 0; j1 < ng; j1++)
215         {
216             for (j0 = 0; j0 < ng; j0++)
217             {
218                 c = ((i*ng + j1)*ng_p2 + j0)*unrollj_half*unrollj;
219                 for (s = 0; s < unrollj_half; s++)
220                 {
221                     Vvdw[i*ng+j0] += VSvdw[c+0];
222                     Vvdw[i*ng+j1] += VSvdw[c+1];
223                     Vc  [i*ng+j0] += VSc  [c+0];
224                     Vc  [i*ng+j1] += VSc  [c+1];
225                     c             += unrollj + 2;
226                 }
227             }
228         }
229     }
230 }
231
232 #else /* GMX_NBNXN_SIMD_2XNN */
233
234 #include "gmx_fatal.h"
235
236 #endif /* GMX_NBNXN_SIMD_2XNN */
237
238 void
239 nbnxn_kernel_simd_2xnn(nbnxn_pairlist_set_t      gmx_unused *nbl_list,
240                        const nbnxn_atomdata_t    gmx_unused *nbat,
241                        const interaction_const_t gmx_unused *ic,
242                        int                       gmx_unused  ewald_excl,
243                        rvec                      gmx_unused *shift_vec,
244                        int                       gmx_unused  force_flags,
245                        int                       gmx_unused  clearF,
246                        real                      gmx_unused *fshift,
247                        real                      gmx_unused *Vc,
248                        real                      gmx_unused *Vvdw)
249 #ifdef GMX_NBNXN_SIMD_2XNN
250 {
251     int                nnbl;
252     nbnxn_pairlist_t **nbl;
253     int                coult, ljtreatment = 0;
254     int                nb;
255
256     nnbl = nbl_list->nnbl;
257     nbl  = nbl_list->nbl;
258
259     if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
260     {
261         coult = coultRF;
262     }
263     else
264     {
265         if (ewald_excl == ewaldexclTable)
266         {
267             if (ic->rcoulomb == ic->rvdw)
268             {
269                 coult = coultTAB;
270             }
271             else
272             {
273                 coult = coultTAB_TWIN;
274             }
275         }
276         else
277         {
278             if (ic->rcoulomb == ic->rvdw)
279             {
280                 coult = coultEWALD;
281             }
282             else
283             {
284                 coult = coultEWALD_TWIN;
285             }
286         }
287     }
288
289     switch (ic->vdw_modifier)
290     {
291         case eintmodNONE:
292         case eintmodPOTSHIFT:
293             ljtreatment = nbat->comb_rule;
294             break;
295         /* Switch functions follow after cut-off combination rule kernels */
296         case eintmodFORCESWITCH:
297             ljtreatment = ljcrNR;
298             break;
299         case eintmodPOTSWITCH:
300             ljtreatment = ljcrNR + 1;
301             break;
302         default:
303             gmx_incons("Unsupported VdW interaction modifier");
304     }
305
306 #pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded))
307     for (nb = 0; nb < nnbl; nb++)
308     {
309         nbnxn_atomdata_output_t *out;
310         real                    *fshift_p;
311
312         out = &nbat->out[nb];
313
314         if (clearF == enbvClearFYes)
315         {
316             clear_f(nbat, nb, out->f);
317         }
318
319         if ((force_flags & GMX_FORCE_VIRIAL) && nnbl == 1)
320         {
321             fshift_p = fshift;
322         }
323         else
324         {
325             fshift_p = out->fshift;
326
327             if (clearF == enbvClearFYes)
328             {
329                 clear_fshift(fshift_p);
330             }
331         }
332
333         if (!(force_flags & GMX_FORCE_ENERGY))
334         {
335             /* Don't calculate energies */
336             p_nbk_noener[coult][ljtreatment](nbl[nb], nbat,
337                                              ic,
338                                              shift_vec,
339                                              out->f,
340                                              fshift_p);
341         }
342         else if (out->nV == 1)
343         {
344             /* No energy groups */
345             out->Vvdw[0] = 0;
346             out->Vc[0]   = 0;
347
348             p_nbk_ener[coult][ljtreatment](nbl[nb], nbat,
349                                            ic,
350                                            shift_vec,
351                                            out->f,
352                                            fshift_p,
353                                            out->Vvdw,
354                                            out->Vc);
355         }
356         else
357         {
358             /* Calculate energy group contributions */
359             int i;
360
361             for (i = 0; i < out->nVS; i++)
362             {
363                 out->VSvdw[i] = 0;
364             }
365             for (i = 0; i < out->nVS; i++)
366             {
367                 out->VSc[i] = 0;
368             }
369
370             p_nbk_energrp[coult][ljtreatment](nbl[nb], nbat,
371                                               ic,
372                                               shift_vec,
373                                               out->f,
374                                               fshift_p,
375                                               out->VSvdw,
376                                               out->VSc);
377
378             reduce_group_energies(nbat->nenergrp, nbat->neg_2log,
379                                   out->VSvdw, out->VSc,
380                                   out->Vvdw, out->Vc);
381         }
382     }
383
384     if (force_flags & GMX_FORCE_ENERGY)
385     {
386         reduce_energies_over_lists(nbat, nnbl, Vvdw, Vc);
387     }
388 }
389 #else
390 {
391     gmx_incons("nbnxn_kernel_simd_2xnn called when such kernels "
392                " are not enabled.");
393 }
394 #endif
395 #undef GMX_SIMD_J_UNROLL_SIZE