Merge release-4-6 into master
[alexxy/gromacs.git] / src / gromacs / mdlib / nbnxn_kernels / nbnxn_kernel_file_generator / nbnxn_kernel_simd_template.c.pre
1 #ifdef HAVE_CONFIG_H
2 #include <config.h>
3 #endif
4
5 #include "typedefs.h"
6
7 #ifdef {0}
8
9 {1}
10 #include "gmx_simd_macros.h"
11 #include "gmx_simd_vec.h"
12 {2}
13 #define GMX_SIMD_J_UNROLL_SIZE {3}
14 #include "{4}"
15 #include "../nbnxn_kernel_common.h"
16 #include "gmx_omp_nthreads.h"
17 #include "types/force_flags.h"
18
19 /*! \brief Kinds of electrostatic treatments in SIMD Verlet kernels
20  */
21 enum {{
22     coultRF, coultTAB, coultTAB_TWIN, coultEWALD, coultEWALD_TWIN, coultNR
23 }};
24
25 /* Declare and define the kernel function pointer lookup tables. */
26 static p_nbk_func_ener p_nbk_ener[coultNR][ljcrNR] =
27 {7}
28 static p_nbk_func_ener p_nbk_energrp[coultNR][ljcrNR] =
29 {8}
30 static p_nbk_func_noener p_nbk_noener[coultNR][ljcrNR] =
31 {9}
32
33 static void
34 reduce_group_energies(int ng, int ng_2log,
35                       const real *VSvdw, const real *VSc,
36                       real *Vvdw, real *Vc)
37 {{
38     const int unrollj      = GMX_SIMD_WIDTH_HERE/GMX_SIMD_J_UNROLL_SIZE;
39     const int unrollj_half = unrollj/2;
40     int       ng_p2, i, j, j0, j1, c, s;
41
42     ng_p2 = (1<<ng_2log);
43
44     /* The size of the x86 SIMD energy group buffer array is:
45      * ng*ng*ng_p2*unrollj_half*simd_width
46      */
47     for (i = 0; i < ng; i++)
48     {{
49         for (j = 0; j < ng; j++)
50         {{
51             Vvdw[i*ng+j] = 0;
52             Vc[i*ng+j]   = 0;
53         }}
54
55         for (j1 = 0; j1 < ng; j1++)
56         {{
57             for (j0 = 0; j0 < ng; j0++)
58             {{
59                 c = ((i*ng + j1)*ng_p2 + j0)*unrollj_half*unrollj;
60                 for (s = 0; s < unrollj_half; s++)
61                 {{
62                     Vvdw[i*ng+j0] += VSvdw[c+0];
63                     Vvdw[i*ng+j1] += VSvdw[c+1];
64                     Vc  [i*ng+j0] += VSc  [c+0];
65                     Vc  [i*ng+j1] += VSc  [c+1];
66                     c             += unrollj + 2;
67                 }}
68             }}
69         }}
70     }}
71 }}
72
73 #else /* {0} */
74
75 #include "gmx_fatal.h"
76
77 #endif /* {0} */
78
79 void
80 {5}(nbnxn_pairlist_set_t       *nbl_list,
81 {6}const nbnxn_atomdata_t     *nbat,
82 {6}const interaction_const_t  *ic,
83 {6}int                         ewald_excl,
84 {6}rvec                       *shift_vec,
85 {6}int                         force_flags,
86 {6}int                         clearF,
87 {6}real                       *fshift,
88 {6}real                       *Vc,
89 {6}real                       *Vvdw)
90 #ifdef {0}
91 {{
92     int                nnbl;
93     nbnxn_pairlist_t **nbl;
94     int                coult;
95     int                nb;
96
97     nnbl = nbl_list->nnbl;
98     nbl  = nbl_list->nbl;
99
100     if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
101     {{
102         coult = coultRF;
103     }}
104     else
105     {{
106         if (ewald_excl == ewaldexclTable)
107         {{
108             if (ic->rcoulomb == ic->rvdw)
109             {{
110                 coult = coultTAB;
111             }}
112             else
113             {{
114                 coult = coultTAB_TWIN;
115             }}
116         }}
117         else
118         {{
119             if (ic->rcoulomb == ic->rvdw)
120             {{
121                 coult = coultEWALD;
122             }}
123             else
124             {{
125                 coult = coultEWALD_TWIN;
126             }}
127         }}
128     }}
129
130 #pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded))
131     for (nb = 0; nb < nnbl; nb++)
132     {{
133         nbnxn_atomdata_output_t *out;
134         real                    *fshift_p;
135
136         out = &nbat->out[nb];
137
138         if (clearF == enbvClearFYes)
139         {{
140             clear_f(nbat, nb, out->f);
141         }}
142
143         if ((force_flags & GMX_FORCE_VIRIAL) && nnbl == 1)
144         {{
145             fshift_p = fshift;
146         }}
147         else
148         {{
149             fshift_p = out->fshift;
150
151             if (clearF == enbvClearFYes)
152             {{
153                 clear_fshift(fshift_p);
154             }}
155         }}
156
157         /* With Ewald type electrostatics we the forces for excluded atom pairs
158          * should not contribute to the virial sum. The exclusion forces
159          * are not calculate in the energy kernels, but are in _noener.
160          */
161         if (!((force_flags & GMX_FORCE_ENERGY) ||
162               (EEL_FULL(ic->eeltype) && (force_flags & GMX_FORCE_VIRIAL))))
163         {{
164             /* Don't calculate energies */
165             p_nbk_noener[coult][nbat->comb_rule](nbl[nb], nbat,
166                                                  ic,
167                                                  shift_vec,
168                                                  out->f,
169                                                  fshift_p);
170         }}
171         else if (out->nV == 1 || !(force_flags & GMX_FORCE_ENERGY))
172         {{
173             /* No energy groups */
174             out->Vvdw[0] = 0;
175             out->Vc[0]   = 0;
176
177             p_nbk_ener[coult][nbat->comb_rule](nbl[nb], nbat,
178                                                ic,
179                                                shift_vec,
180                                                out->f,
181                                                fshift_p,
182                                                out->Vvdw,
183                                                out->Vc);
184         }}
185         else
186         {{
187             /* Calculate energy group contributions */
188             int i;
189
190             for (i = 0; i < out->nVS; i++)
191             {{
192                 out->VSvdw[i] = 0;
193             }}
194             for (i = 0; i < out->nVS; i++)
195             {{
196                 out->VSc[i] = 0;
197             }}
198
199             p_nbk_energrp[coult][nbat->comb_rule](nbl[nb], nbat,
200                                                   ic,
201                                                   shift_vec,
202                                                   out->f,
203                                                   fshift_p,
204                                                   out->VSvdw,
205                                                   out->VSc);
206
207             reduce_group_energies(nbat->nenergrp, nbat->neg_2log,
208                                   out->VSvdw, out->VSc,
209                                   out->Vvdw, out->Vc);
210         }}
211     }}
212
213     if (force_flags & GMX_FORCE_ENERGY)
214     {{
215         reduce_energies_over_lists(nbat, nnbl, Vvdw, Vc);
216     }}
217 }}
218 #else
219 {{
220     gmx_incons("{5} called when such kernels "
221                " are not enabled.");
222 }}
223 #endif
224 #undef GMX_SIMD_J_UNROLL_SIZE