Update kernel templates to match kernels
[alexxy/gromacs.git] / src / gromacs / mdlib / nbnxn_kernels / nbnxn_kernel_file_generator / nbnxn_kernel_simd_template.c.pre
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 #include "gmxpre.h"
36
37 #include "config.h"
38
39 #include "gromacs/legacyheaders/typedefs.h"
40 #include "gromacs/mdlib/nb_verlet.h"
41 #include "gromacs/mdlib/nbnxn_simd.h"
42
43 #ifdef {0}
44
45 {1}#include "gromacs/simd/vector_operations.h"
46
47 {2}
48 #define GMX_SIMD_J_UNROLL_SIZE {3}
49 #include "{4}"
50
51 #include "gromacs/legacyheaders/gmx_omp_nthreads.h"
52 #include "gromacs/legacyheaders/types/force_flags.h"
53 #include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h"
54 #include "gromacs/simd/simd.h"
55 #include "gromacs/utility/fatalerror.h"
56
57 /*! \brief Kinds of electrostatic treatments in SIMD Verlet kernels
58  */
59 enum {{
60     coulktRF, coulktTAB, coulktTAB_TWIN, coulktEWALD, coulktEWALD_TWIN, coulktNR
61 }};
62
63 /*! \brief Kinds of Van der Waals treatments in SIMD Verlet kernels
64  */
65 enum {{
66     vdwktLJCUT_COMBGEOM, vdwktLJCUT_COMBLB, vdwktLJCUT_COMBNONE, vdwktLJFORCESWITCH, vdwktLJPOTSWITCH, vdwktLJEWALDCOMBGEOM, vdwktNR
67 }};
68
69 /* Declare and define the kernel function pointer lookup tables.
70  * The minor index of the array goes over both the LJ combination rules,
71  * which is only supported by plain cut-off, and the LJ switch/PME functions.
72  */
73 static p_nbk_func_noener p_nbk_noener[coulktNR][vdwktNR] =
74 {7}
75 static p_nbk_func_ener p_nbk_ener[coulktNR][vdwktNR] =
76 {8}
77 static p_nbk_func_ener p_nbk_energrp[coulktNR][vdwktNR] =
78 {9}
79
80 static void
81 reduce_group_energies(int ng, int ng_2log,
82                       const real *VSvdw, const real *VSc,
83                       real *Vvdw, real *Vc)
84 {{
85     const int unrollj      = GMX_SIMD_REAL_WIDTH/GMX_SIMD_J_UNROLL_SIZE;
86     const int unrollj_half = unrollj/2;
87     int       ng_p2, i, j, j0, j1, c, s;
88
89     ng_p2 = (1<<ng_2log);
90
91     /* The size of the x86 SIMD energy group buffer array is:
92      * ng*ng*ng_p2*unrollj_half*simd_width
93      */
94     for (i = 0; i < ng; i++)
95     {{
96         for (j = 0; j < ng; j++)
97         {{
98             Vvdw[i*ng+j] = 0;
99             Vc[i*ng+j]   = 0;
100         }}
101
102         for (j1 = 0; j1 < ng; j1++)
103         {{
104             for (j0 = 0; j0 < ng; j0++)
105             {{
106                 c = ((i*ng + j1)*ng_p2 + j0)*unrollj_half*unrollj;
107                 for (s = 0; s < unrollj_half; s++)
108                 {{
109                     Vvdw[i*ng+j0] += VSvdw[c+0];
110                     Vvdw[i*ng+j1] += VSvdw[c+1];
111                     Vc  [i*ng+j0] += VSc  [c+0];
112                     Vc  [i*ng+j1] += VSc  [c+1];
113                     c             += unrollj + 2;
114                 }}
115             }}
116         }}
117     }}
118 }}
119
120 #else /* {0} */
121
122 #include "gromacs/utility/fatalerror.h"
123
124 #endif /* {0} */
125
126 void
127 {5}(nbnxn_pairlist_set_t      gmx_unused *nbl_list,
128 {6}const nbnxn_atomdata_t    gmx_unused *nbat,
129 {6}const interaction_const_t gmx_unused *ic,
130 {6}int                       gmx_unused  ewald_excl,
131 {6}rvec                      gmx_unused *shift_vec,
132 {6}int                       gmx_unused  force_flags,
133 {6}int                       gmx_unused  clearF,
134 {6}real                      gmx_unused *fshift,
135 {6}real                      gmx_unused *Vc,
136 {6}real                      gmx_unused *Vvdw)
137 #ifdef {0}
138 {{
139     int                nnbl;
140     nbnxn_pairlist_t **nbl;
141     int                coulkt, vdwkt = 0;
142     int                nb;
143     int                nthreads gmx_unused;
144
145     nnbl = nbl_list->nnbl;
146     nbl  = nbl_list->nbl;
147
148     if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
149     {{
150         coulkt = coulktRF;
151     }}
152     else
153     {{
154         if (ewald_excl == ewaldexclTable)
155         {{
156             if (ic->rcoulomb == ic->rvdw)
157             {{
158                 coulkt = coulktTAB;
159             }}
160             else
161             {{
162                 coulkt = coulktTAB_TWIN;
163             }}
164         }}
165         else
166         {{
167             if (ic->rcoulomb == ic->rvdw)
168             {{
169                 coulkt = coulktEWALD;
170             }}
171             else
172             {{
173                 coulkt = coulktEWALD_TWIN;
174             }}
175         }}
176     }}
177
178     if (ic->vdwtype == evdwCUT)
179     {{
180         switch (ic->vdw_modifier)
181         {{
182             case eintmodNONE:
183             case eintmodPOTSHIFT:
184                 switch (nbat->comb_rule)
185                 {{
186                     case ljcrGEOM: vdwkt = vdwktLJCUT_COMBGEOM; break;
187                     case ljcrLB:   vdwkt = vdwktLJCUT_COMBLB;   break;
188                     case ljcrNONE: vdwkt = vdwktLJCUT_COMBNONE; break;
189                     default:       gmx_incons("Unknown combination rule");
190                 }}
191                 break;
192             case eintmodFORCESWITCH:
193                 vdwkt = vdwktLJFORCESWITCH;
194                 break;
195             case eintmodPOTSWITCH:
196                 vdwkt = vdwktLJPOTSWITCH;
197                 break;
198             default:
199                 gmx_incons("Unsupported VdW interaction modifier");
200         }}
201     }}
202     else if (ic->vdwtype == evdwPME)
203     {{
204         if (ic->ljpme_comb_rule == eljpmeLB)
205         {{
206             gmx_incons("The nbnxn SIMD kernels don't suport LJ-PME with LB");
207         }}
208         vdwkt = vdwktLJEWALDCOMBGEOM;
209     }}
210     else
211     {{
212         gmx_incons("Unsupported VdW interaction type");
213     }}
214
215     nthreads = gmx_omp_nthreads_get(emntNonbonded);
216 #pragma omp parallel for schedule(static) num_threads(nthreads)
217     for (nb = 0; nb < nnbl; nb++)
218     {{
219         nbnxn_atomdata_output_t *out;
220         real                    *fshift_p;
221
222         out = &nbat->out[nb];
223
224         if (clearF == enbvClearFYes)
225         {{
226             clear_f(nbat, nb, out->f);
227         }}
228
229         if ((force_flags & GMX_FORCE_VIRIAL) && nnbl == 1)
230         {{
231             fshift_p = fshift;
232         }}
233         else
234         {{
235             fshift_p = out->fshift;
236
237             if (clearF == enbvClearFYes)
238             {{
239                 clear_fshift(fshift_p);
240             }}
241         }}
242
243         if (!(force_flags & GMX_FORCE_ENERGY))
244         {{
245             /* Don't calculate energies */
246             p_nbk_noener[coulkt][vdwkt](nbl[nb], nbat,
247                                         ic,
248                                         shift_vec,
249                                         out->f,
250                                         fshift_p);
251         }}
252         else if (out->nV == 1)
253         {{
254             /* No energy groups */
255             out->Vvdw[0] = 0;
256             out->Vc[0]   = 0;
257
258             p_nbk_ener[coulkt][vdwkt](nbl[nb], nbat,
259                                       ic,
260                                       shift_vec,
261                                       out->f,
262                                       fshift_p,
263                                       out->Vvdw,
264                                       out->Vc);
265         }}
266         else
267         {{
268             /* Calculate energy group contributions */
269             int i;
270
271             for (i = 0; i < out->nVS; i++)
272             {{
273                 out->VSvdw[i] = 0;
274             }}
275             for (i = 0; i < out->nVS; i++)
276             {{
277                 out->VSc[i] = 0;
278             }}
279
280             p_nbk_energrp[coulkt][vdwkt](nbl[nb], nbat,
281                                          ic,
282                                          shift_vec,
283                                          out->f,
284                                          fshift_p,
285                                          out->VSvdw,
286                                          out->VSc);
287
288             reduce_group_energies(nbat->nenergrp, nbat->neg_2log,
289                                   out->VSvdw, out->VSc,
290                                   out->Vvdw, out->Vc);
291         }}
292     }}
293
294     if (force_flags & GMX_FORCE_ENERGY)
295     {{
296         reduce_energies_over_lists(nbat, nnbl, Vvdw, Vc);
297     }}
298 }}
299 #else
300 {{
301     gmx_incons("{5} called when such kernels "
302                " are not enabled.");
303 }}
304 #endif
305 #undef GMX_SIMD_J_UNROLL_SIZE