Implemented nbnxn LJ switch functions
[alexxy/gromacs.git] / src / gromacs / mdlib / nbnxn_kernels / simd_4xn / nbnxn_kernel_simd_4xn.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the Verlet kernel generator for
37  * kernel type 4xn.
38  */
39
40 #ifdef HAVE_CONFIG_H
41 #include <config.h>
42 #endif
43
44 #include "typedefs.h"
45
46 #ifdef GMX_NBNXN_SIMD_4XN
47
48 #ifdef GMX_NBNXN_HALF_WIDTH_SIMD
49 #define GMX_USE_HALF_WIDTH_SIMD_HERE
50 #endif
51
52 #include "gromacs/simd/macros.h"
53 #include "gromacs/simd/vector_operations.h"
54 #if !(GMX_SIMD_REAL_WIDTH == 2 || GMX_SIMD_REAL_WIDTH == 4 || GMX_SIMD_REAL_WIDTH == 8)
55 #error "unsupported SIMD width"
56 #endif
57
58 #define GMX_SIMD_J_UNROLL_SIZE 1
59 #include "nbnxn_kernel_simd_4xn.h"
60 #include "../nbnxn_kernel_common.h"
61 #include "gmx_omp_nthreads.h"
62 #include "types/force_flags.h"
63 #include "gmx_fatal.h"
64
65 /*! \brief Kinds of electrostatic treatments in SIMD Verlet kernels
66  */
67 enum {
68     coultRF, coultTAB, coultTAB_TWIN, coultEWALD, coultEWALD_TWIN, coultNR
69 };
70
71 /* Declare and define the kernel function pointer lookup tables.
72  * The minor index of the array goes over both the LJ combination rules,
73  * which is only supported by plain cut-off, and the LJ switch functions.
74  */
75 static p_nbk_func_noener p_nbk_noener[coultNR][ljcrNR+2] =
76 {
77     {
78         nbnxn_kernel_ElecRF_VdwLJCombGeom_F_4xn,
79         nbnxn_kernel_ElecRF_VdwLJCombLB_F_4xn,
80         nbnxn_kernel_ElecRF_VdwLJ_F_4xn,
81         nbnxn_kernel_ElecRF_VdwLJFSw_F_4xn,
82         nbnxn_kernel_ElecRF_VdwLJPSw_F_4xn,
83     },
84     {
85         nbnxn_kernel_ElecQSTab_VdwLJCombGeom_F_4xn,
86         nbnxn_kernel_ElecQSTab_VdwLJCombLB_F_4xn,
87         nbnxn_kernel_ElecQSTab_VdwLJ_F_4xn,
88         nbnxn_kernel_ElecQSTab_VdwLJFSw_F_4xn,
89         nbnxn_kernel_ElecQSTab_VdwLJPSw_F_4xn,
90     },
91     {
92         nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_F_4xn,
93         nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_F_4xn,
94         nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_F_4xn,
95         nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_F_4xn,
96         nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_F_4xn,
97     },
98     {
99         nbnxn_kernel_ElecEw_VdwLJCombGeom_F_4xn,
100         nbnxn_kernel_ElecEw_VdwLJCombLB_F_4xn,
101         nbnxn_kernel_ElecEw_VdwLJ_F_4xn,
102         nbnxn_kernel_ElecEw_VdwLJFSw_F_4xn,
103         nbnxn_kernel_ElecEw_VdwLJPSw_F_4xn,
104     },
105     {
106         nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_F_4xn,
107         nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_F_4xn,
108         nbnxn_kernel_ElecEwTwinCut_VdwLJ_F_4xn,
109         nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_F_4xn,
110         nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_F_4xn,
111     },
112 };
113
114 static p_nbk_func_ener p_nbk_ener[coultNR][ljcrNR+2] =
115 {
116     {
117         nbnxn_kernel_ElecRF_VdwLJCombGeom_VF_4xn,
118         nbnxn_kernel_ElecRF_VdwLJCombLB_VF_4xn,
119         nbnxn_kernel_ElecRF_VdwLJ_VF_4xn,
120         nbnxn_kernel_ElecRF_VdwLJFSw_VF_4xn,
121         nbnxn_kernel_ElecRF_VdwLJPSw_VF_4xn,
122     },
123     {
124         nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VF_4xn,
125         nbnxn_kernel_ElecQSTab_VdwLJCombLB_VF_4xn,
126         nbnxn_kernel_ElecQSTab_VdwLJ_VF_4xn,
127         nbnxn_kernel_ElecQSTab_VdwLJFSw_VF_4xn,
128         nbnxn_kernel_ElecQSTab_VdwLJPSw_VF_4xn,
129     },
130     {
131         nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VF_4xn,
132         nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_VF_4xn,
133         nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VF_4xn,
134         nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VF_4xn,
135         nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VF_4xn,
136     },
137     {
138         nbnxn_kernel_ElecEw_VdwLJCombGeom_VF_4xn,
139         nbnxn_kernel_ElecEw_VdwLJCombLB_VF_4xn,
140         nbnxn_kernel_ElecEw_VdwLJ_VF_4xn,
141         nbnxn_kernel_ElecEw_VdwLJFSw_VF_4xn,
142         nbnxn_kernel_ElecEw_VdwLJPSw_VF_4xn,
143     },
144     {
145         nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VF_4xn,
146         nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_VF_4xn,
147         nbnxn_kernel_ElecEwTwinCut_VdwLJ_VF_4xn,
148         nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VF_4xn,
149         nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VF_4xn,
150     },
151 };
152
153 static p_nbk_func_ener p_nbk_energrp[coultNR][ljcrNR+2] =
154 {
155     {
156         nbnxn_kernel_ElecRF_VdwLJCombGeom_VgrpF_4xn,
157         nbnxn_kernel_ElecRF_VdwLJCombLB_VgrpF_4xn,
158         nbnxn_kernel_ElecRF_VdwLJ_VgrpF_4xn,
159         nbnxn_kernel_ElecRF_VdwLJFSw_VgrpF_4xn,
160         nbnxn_kernel_ElecRF_VdwLJPSw_VgrpF_4xn,
161     },
162     {
163         nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VgrpF_4xn,
164         nbnxn_kernel_ElecQSTab_VdwLJCombLB_VgrpF_4xn,
165         nbnxn_kernel_ElecQSTab_VdwLJ_VgrpF_4xn,
166         nbnxn_kernel_ElecQSTab_VdwLJFSw_VgrpF_4xn,
167         nbnxn_kernel_ElecQSTab_VdwLJPSw_VgrpF_4xn,
168     },
169     {
170         nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VgrpF_4xn,
171         nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_VgrpF_4xn,
172         nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VgrpF_4xn,
173         nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VgrpF_4xn,
174         nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VgrpF_4xn,
175     },
176     {
177         nbnxn_kernel_ElecEw_VdwLJCombGeom_VgrpF_4xn,
178         nbnxn_kernel_ElecEw_VdwLJCombLB_VgrpF_4xn,
179         nbnxn_kernel_ElecEw_VdwLJ_VgrpF_4xn,
180         nbnxn_kernel_ElecEw_VdwLJFSw_VgrpF_4xn,
181         nbnxn_kernel_ElecEw_VdwLJPSw_VgrpF_4xn,
182     },
183     {
184         nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VgrpF_4xn,
185         nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_VgrpF_4xn,
186         nbnxn_kernel_ElecEwTwinCut_VdwLJ_VgrpF_4xn,
187         nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VgrpF_4xn,
188         nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VgrpF_4xn,
189     },
190 };
191
192
193 static void
194 reduce_group_energies(int ng, int ng_2log,
195                       const real *VSvdw, const real *VSc,
196                       real *Vvdw, real *Vc)
197 {
198     const int unrollj      = GMX_SIMD_REAL_WIDTH/GMX_SIMD_J_UNROLL_SIZE;
199     const int unrollj_half = unrollj/2;
200     int       ng_p2, i, j, j0, j1, c, s;
201
202     ng_p2 = (1<<ng_2log);
203
204     /* The size of the x86 SIMD energy group buffer array is:
205      * ng*ng*ng_p2*unrollj_half*simd_width
206      */
207     for (i = 0; i < ng; i++)
208     {
209         for (j = 0; j < ng; j++)
210         {
211             Vvdw[i*ng+j] = 0;
212             Vc[i*ng+j]   = 0;
213         }
214
215         for (j1 = 0; j1 < ng; j1++)
216         {
217             for (j0 = 0; j0 < ng; j0++)
218             {
219                 c = ((i*ng + j1)*ng_p2 + j0)*unrollj_half*unrollj;
220                 for (s = 0; s < unrollj_half; s++)
221                 {
222                     Vvdw[i*ng+j0] += VSvdw[c+0];
223                     Vvdw[i*ng+j1] += VSvdw[c+1];
224                     Vc  [i*ng+j0] += VSc  [c+0];
225                     Vc  [i*ng+j1] += VSc  [c+1];
226                     c             += unrollj + 2;
227                 }
228             }
229         }
230     }
231 }
232
233 #else /* GMX_NBNXN_SIMD_4XN */
234
235 #include "gmx_fatal.h"
236
237 #endif /* GMX_NBNXN_SIMD_4XN */
238
239 void
240 nbnxn_kernel_simd_4xn(nbnxn_pairlist_set_t      gmx_unused *nbl_list,
241                       const nbnxn_atomdata_t    gmx_unused *nbat,
242                       const interaction_const_t gmx_unused *ic,
243                       int                       gmx_unused  ewald_excl,
244                       rvec                      gmx_unused *shift_vec,
245                       int                       gmx_unused  force_flags,
246                       int                       gmx_unused  clearF,
247                       real                      gmx_unused *fshift,
248                       real                      gmx_unused *Vc,
249                       real                      gmx_unused *Vvdw)
250 #ifdef GMX_NBNXN_SIMD_4XN
251 {
252     int                nnbl;
253     nbnxn_pairlist_t **nbl;
254     int                coult, ljtreatment = 0;
255     int                nb;
256
257     nnbl = nbl_list->nnbl;
258     nbl  = nbl_list->nbl;
259
260     if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
261     {
262         coult = coultRF;
263     }
264     else
265     {
266         if (ewald_excl == ewaldexclTable)
267         {
268             if (ic->rcoulomb == ic->rvdw)
269             {
270                 coult = coultTAB;
271             }
272             else
273             {
274                 coult = coultTAB_TWIN;
275             }
276         }
277         else
278         {
279             if (ic->rcoulomb == ic->rvdw)
280             {
281                 coult = coultEWALD;
282             }
283             else
284             {
285                 coult = coultEWALD_TWIN;
286             }
287         }
288     }
289
290     switch (ic->vdw_modifier)
291     {
292         case eintmodNONE:
293         case eintmodPOTSHIFT:
294             ljtreatment = nbat->comb_rule;
295             break;
296         /* Switch functions follow after cut-off combination rule kernels */
297         case eintmodFORCESWITCH:
298             ljtreatment = ljcrNR;
299             break;
300         case eintmodPOTSWITCH:
301             ljtreatment = ljcrNR + 1;
302             break;
303         default:
304             gmx_incons("Unsupported VdW interaction modifier");
305     }
306
307 #pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded))
308     for (nb = 0; nb < nnbl; nb++)
309     {
310         nbnxn_atomdata_output_t *out;
311         real                    *fshift_p;
312
313         out = &nbat->out[nb];
314
315         if (clearF == enbvClearFYes)
316         {
317             clear_f(nbat, nb, out->f);
318         }
319
320         if ((force_flags & GMX_FORCE_VIRIAL) && nnbl == 1)
321         {
322             fshift_p = fshift;
323         }
324         else
325         {
326             fshift_p = out->fshift;
327
328             if (clearF == enbvClearFYes)
329             {
330                 clear_fshift(fshift_p);
331             }
332         }
333
334         if (!(force_flags & GMX_FORCE_ENERGY))
335         {
336             /* Don't calculate energies */
337             p_nbk_noener[coult][ljtreatment](nbl[nb], nbat,
338                                              ic,
339                                              shift_vec,
340                                              out->f,
341                                              fshift_p);
342         }
343         else if (out->nV == 1)
344         {
345             /* No energy groups */
346             out->Vvdw[0] = 0;
347             out->Vc[0]   = 0;
348
349             p_nbk_ener[coult][ljtreatment](nbl[nb], nbat,
350                                            ic,
351                                            shift_vec,
352                                            out->f,
353                                            fshift_p,
354                                            out->Vvdw,
355                                            out->Vc);
356         }
357         else
358         {
359             /* Calculate energy group contributions */
360             int i;
361
362             for (i = 0; i < out->nVS; i++)
363             {
364                 out->VSvdw[i] = 0;
365             }
366             for (i = 0; i < out->nVS; i++)
367             {
368                 out->VSc[i] = 0;
369             }
370
371             p_nbk_energrp[coult][ljtreatment](nbl[nb], nbat,
372                                               ic,
373                                               shift_vec,
374                                               out->f,
375                                               fshift_p,
376                                               out->VSvdw,
377                                               out->VSc);
378
379             reduce_group_energies(nbat->nenergrp, nbat->neg_2log,
380                                   out->VSvdw, out->VSc,
381                                   out->Vvdw, out->Vc);
382         }
383     }
384
385     if (force_flags & GMX_FORCE_ENERGY)
386     {
387         reduce_energies_over_lists(nbat, nnbl, Vvdw, Vc);
388     }
389 }
390 #else
391 {
392     gmx_incons("nbnxn_kernel_simd_4xn called when such kernels "
393                " are not enabled.");
394 }
395 #endif
396 #undef GMX_SIMD_J_UNROLL_SIZE