fe3aa1787e96fbd0e73095d4948016af1a49b60a
[alexxy/gromacs.git] / src / gromacs / mdlib / nbnxn_kernels / simd_2xnn / nbnxn_kernel_simd_2xnn.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the Verlet kernel generator for
37  * kernel type 2xnn.
38  */
39
40 #include "gmxpre.h"
41
42 #include "config.h"
43
44 #include "gromacs/legacyheaders/typedefs.h"
45 #include "gromacs/mdlib/nb_verlet.h"
46 #include "gromacs/mdlib/nbnxn_simd.h"
47
48 #ifdef GMX_NBNXN_SIMD_2XNN
49
50 /* Include the full-width SIMD macros */
51 #include "gromacs/simd/vector_operations.h"
52
53 #if !(GMX_SIMD_REAL_WIDTH == 8 || GMX_SIMD_REAL_WIDTH == 16)
54 #error "unsupported SIMD width"
55 #endif
56
57 #define GMX_SIMD_J_UNROLL_SIZE 2
58 #include "nbnxn_kernel_simd_2xnn.h"
59
60 #include "gromacs/legacyheaders/gmx_omp_nthreads.h"
61 #include "gromacs/legacyheaders/types/force_flags.h"
62 #include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h"
63 #include "gromacs/simd/simd.h"
64 #include "gromacs/utility/fatalerror.h"
65
66 /*! \brief Kinds of electrostatic treatments in SIMD Verlet kernels
67  */
68 enum {
69     coulktRF, coulktTAB, coulktTAB_TWIN, coulktEWALD, coulktEWALD_TWIN, coulktNR
70 };
71
72 /*! \brief Kinds of Van der Waals treatments in SIMD Verlet kernels
73  */
74 enum {
75     vdwktLJCUT_COMBGEOM, vdwktLJCUT_COMBLB, vdwktLJCUT_COMBNONE, vdwktLJFORCESWITCH, vdwktLJPOTSWITCH, vdwktLJEWALDCOMBGEOM, vdwktNR
76 };
77
78 /* Declare and define the kernel function pointer lookup tables.
79  * The minor index of the array goes over both the LJ combination rules,
80  * which is only supported by plain cut-off, and the LJ switch/PME functions.
81  */
82 static p_nbk_func_noener p_nbk_noener[coulktNR][vdwktNR] =
83 {
84     {
85         nbnxn_kernel_ElecRF_VdwLJCombGeom_F_2xnn,
86         nbnxn_kernel_ElecRF_VdwLJCombLB_F_2xnn,
87         nbnxn_kernel_ElecRF_VdwLJ_F_2xnn,
88         nbnxn_kernel_ElecRF_VdwLJFSw_F_2xnn,
89         nbnxn_kernel_ElecRF_VdwLJPSw_F_2xnn,
90         nbnxn_kernel_ElecRF_VdwLJEwCombGeom_F_2xnn,
91     },
92     {
93         nbnxn_kernel_ElecQSTab_VdwLJCombGeom_F_2xnn,
94         nbnxn_kernel_ElecQSTab_VdwLJCombLB_F_2xnn,
95         nbnxn_kernel_ElecQSTab_VdwLJ_F_2xnn,
96         nbnxn_kernel_ElecQSTab_VdwLJFSw_F_2xnn,
97         nbnxn_kernel_ElecQSTab_VdwLJPSw_F_2xnn,
98         nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_F_2xnn,
99     },
100     {
101         nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_F_2xnn,
102         nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_F_2xnn,
103         nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_F_2xnn,
104         nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_F_2xnn,
105         nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_F_2xnn,
106         nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_F_2xnn,
107     },
108     {
109         nbnxn_kernel_ElecEw_VdwLJCombGeom_F_2xnn,
110         nbnxn_kernel_ElecEw_VdwLJCombLB_F_2xnn,
111         nbnxn_kernel_ElecEw_VdwLJ_F_2xnn,
112         nbnxn_kernel_ElecEw_VdwLJFSw_F_2xnn,
113         nbnxn_kernel_ElecEw_VdwLJPSw_F_2xnn,
114         nbnxn_kernel_ElecEw_VdwLJEwCombGeom_F_2xnn,
115     },
116     {
117         nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_F_2xnn,
118         nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_F_2xnn,
119         nbnxn_kernel_ElecEwTwinCut_VdwLJ_F_2xnn,
120         nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_F_2xnn,
121         nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_F_2xnn,
122         nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_F_2xnn,
123     },
124 };
125
126 static p_nbk_func_ener p_nbk_ener[coulktNR][vdwktNR] =
127 {
128     {
129         nbnxn_kernel_ElecRF_VdwLJCombGeom_VF_2xnn,
130         nbnxn_kernel_ElecRF_VdwLJCombLB_VF_2xnn,
131         nbnxn_kernel_ElecRF_VdwLJ_VF_2xnn,
132         nbnxn_kernel_ElecRF_VdwLJFSw_VF_2xnn,
133         nbnxn_kernel_ElecRF_VdwLJPSw_VF_2xnn,
134         nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VF_2xnn,
135     },
136     {
137         nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VF_2xnn,
138         nbnxn_kernel_ElecQSTab_VdwLJCombLB_VF_2xnn,
139         nbnxn_kernel_ElecQSTab_VdwLJ_VF_2xnn,
140         nbnxn_kernel_ElecQSTab_VdwLJFSw_VF_2xnn,
141         nbnxn_kernel_ElecQSTab_VdwLJPSw_VF_2xnn,
142         nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VF_2xnn,
143     },
144     {
145         nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VF_2xnn,
146         nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_VF_2xnn,
147         nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VF_2xnn,
148         nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VF_2xnn,
149         nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VF_2xnn,
150         nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VF_2xnn,
151     },
152     {
153         nbnxn_kernel_ElecEw_VdwLJCombGeom_VF_2xnn,
154         nbnxn_kernel_ElecEw_VdwLJCombLB_VF_2xnn,
155         nbnxn_kernel_ElecEw_VdwLJ_VF_2xnn,
156         nbnxn_kernel_ElecEw_VdwLJFSw_VF_2xnn,
157         nbnxn_kernel_ElecEw_VdwLJPSw_VF_2xnn,
158         nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VF_2xnn,
159     },
160     {
161         nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VF_2xnn,
162         nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_VF_2xnn,
163         nbnxn_kernel_ElecEwTwinCut_VdwLJ_VF_2xnn,
164         nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VF_2xnn,
165         nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VF_2xnn,
166         nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VF_2xnn,
167     },
168 };
169
170 static p_nbk_func_ener p_nbk_energrp[coulktNR][vdwktNR] =
171 {
172     {
173         nbnxn_kernel_ElecRF_VdwLJCombGeom_VgrpF_2xnn,
174         nbnxn_kernel_ElecRF_VdwLJCombLB_VgrpF_2xnn,
175         nbnxn_kernel_ElecRF_VdwLJ_VgrpF_2xnn,
176         nbnxn_kernel_ElecRF_VdwLJFSw_VgrpF_2xnn,
177         nbnxn_kernel_ElecRF_VdwLJPSw_VgrpF_2xnn,
178         nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VgrpF_2xnn,
179     },
180     {
181         nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VgrpF_2xnn,
182         nbnxn_kernel_ElecQSTab_VdwLJCombLB_VgrpF_2xnn,
183         nbnxn_kernel_ElecQSTab_VdwLJ_VgrpF_2xnn,
184         nbnxn_kernel_ElecQSTab_VdwLJFSw_VgrpF_2xnn,
185         nbnxn_kernel_ElecQSTab_VdwLJPSw_VgrpF_2xnn,
186         nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VgrpF_2xnn,
187     },
188     {
189         nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VgrpF_2xnn,
190         nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_VgrpF_2xnn,
191         nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VgrpF_2xnn,
192         nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VgrpF_2xnn,
193         nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VgrpF_2xnn,
194         nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VgrpF_2xnn,
195     },
196     {
197         nbnxn_kernel_ElecEw_VdwLJCombGeom_VgrpF_2xnn,
198         nbnxn_kernel_ElecEw_VdwLJCombLB_VgrpF_2xnn,
199         nbnxn_kernel_ElecEw_VdwLJ_VgrpF_2xnn,
200         nbnxn_kernel_ElecEw_VdwLJFSw_VgrpF_2xnn,
201         nbnxn_kernel_ElecEw_VdwLJPSw_VgrpF_2xnn,
202         nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VgrpF_2xnn,
203     },
204     {
205         nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VgrpF_2xnn,
206         nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_VgrpF_2xnn,
207         nbnxn_kernel_ElecEwTwinCut_VdwLJ_VgrpF_2xnn,
208         nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VgrpF_2xnn,
209         nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VgrpF_2xnn,
210         nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VgrpF_2xnn,
211     },
212 };
213
214
215 static void
216 reduce_group_energies(int ng, int ng_2log,
217                       const real *VSvdw, const real *VSc,
218                       real *Vvdw, real *Vc)
219 {
220     const int unrollj      = GMX_SIMD_REAL_WIDTH/GMX_SIMD_J_UNROLL_SIZE;
221     const int unrollj_half = unrollj/2;
222     int       ng_p2, i, j, j0, j1, c, s;
223
224     ng_p2 = (1<<ng_2log);
225
226     /* The size of the x86 SIMD energy group buffer array is:
227      * ng*ng*ng_p2*unrollj_half*simd_width
228      */
229     for (i = 0; i < ng; i++)
230     {
231         for (j = 0; j < ng; j++)
232         {
233             Vvdw[i*ng+j] = 0;
234             Vc[i*ng+j]   = 0;
235         }
236
237         for (j1 = 0; j1 < ng; j1++)
238         {
239             for (j0 = 0; j0 < ng; j0++)
240             {
241                 c = ((i*ng + j1)*ng_p2 + j0)*unrollj_half*unrollj;
242                 for (s = 0; s < unrollj_half; s++)
243                 {
244                     Vvdw[i*ng+j0] += VSvdw[c+0];
245                     Vvdw[i*ng+j1] += VSvdw[c+1];
246                     Vc  [i*ng+j0] += VSc  [c+0];
247                     Vc  [i*ng+j1] += VSc  [c+1];
248                     c             += unrollj + 2;
249                 }
250             }
251         }
252     }
253 }
254
255 #else /* GMX_NBNXN_SIMD_2XNN */
256
257 #include "gromacs/utility/fatalerror.h"
258
259 #endif /* GMX_NBNXN_SIMD_2XNN */
260
261 void
262 nbnxn_kernel_simd_2xnn(nbnxn_pairlist_set_t      gmx_unused *nbl_list,
263                        const nbnxn_atomdata_t    gmx_unused *nbat,
264                        const interaction_const_t gmx_unused *ic,
265                        int                       gmx_unused  ewald_excl,
266                        rvec                      gmx_unused *shift_vec,
267                        int                       gmx_unused  force_flags,
268                        int                       gmx_unused  clearF,
269                        real                      gmx_unused *fshift,
270                        real                      gmx_unused *Vc,
271                        real                      gmx_unused *Vvdw)
272 #ifdef GMX_NBNXN_SIMD_2XNN
273 {
274     int                nnbl;
275     nbnxn_pairlist_t **nbl;
276     int                coulkt, vdwkt = 0;
277     int                nb;
278     int                nthreads gmx_unused;
279
280     nnbl = nbl_list->nnbl;
281     nbl  = nbl_list->nbl;
282
283     if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
284     {
285         coulkt = coulktRF;
286     }
287     else
288     {
289         if (ewald_excl == ewaldexclTable)
290         {
291             if (ic->rcoulomb == ic->rvdw)
292             {
293                 coulkt = coulktTAB;
294             }
295             else
296             {
297                 coulkt = coulktTAB_TWIN;
298             }
299         }
300         else
301         {
302             if (ic->rcoulomb == ic->rvdw)
303             {
304                 coulkt = coulktEWALD;
305             }
306             else
307             {
308                 coulkt = coulktEWALD_TWIN;
309             }
310         }
311     }
312
313     if (ic->vdwtype == evdwCUT)
314     {
315         switch (ic->vdw_modifier)
316         {
317             case eintmodNONE:
318             case eintmodPOTSHIFT:
319                 switch (nbat->comb_rule)
320                 {
321                     case ljcrGEOM: vdwkt = vdwktLJCUT_COMBGEOM; break;
322                     case ljcrLB:   vdwkt = vdwktLJCUT_COMBLB;   break;
323                     case ljcrNONE: vdwkt = vdwktLJCUT_COMBNONE; break;
324                     default:       gmx_incons("Unknown combination rule");
325                 }
326                 break;
327             case eintmodFORCESWITCH:
328                 vdwkt = vdwktLJFORCESWITCH;
329                 break;
330             case eintmodPOTSWITCH:
331                 vdwkt = vdwktLJPOTSWITCH;
332                 break;
333             default:
334                 gmx_incons("Unsupported VdW interaction modifier");
335         }
336     }
337     else if (ic->vdwtype == evdwPME)
338     {
339         if (ic->ljpme_comb_rule == eljpmeLB)
340         {
341             gmx_incons("The nbnxn SIMD kernels don't suport LJ-PME with LB");
342         }
343         vdwkt = vdwktLJEWALDCOMBGEOM;
344     }
345     else
346     {
347         gmx_incons("Unsupported VdW interaction type");
348     }
349
350     nthreads = gmx_omp_nthreads_get(emntNonbonded);
351 #pragma omp parallel for schedule(static) num_threads(nthreads)
352     for (nb = 0; nb < nnbl; nb++)
353     {
354         nbnxn_atomdata_output_t *out;
355         real                    *fshift_p;
356
357         out = &nbat->out[nb];
358
359         if (clearF == enbvClearFYes)
360         {
361             clear_f(nbat, nb, out->f);
362         }
363
364         if ((force_flags & GMX_FORCE_VIRIAL) && nnbl == 1)
365         {
366             fshift_p = fshift;
367         }
368         else
369         {
370             fshift_p = out->fshift;
371
372             if (clearF == enbvClearFYes)
373             {
374                 clear_fshift(fshift_p);
375             }
376         }
377
378         if (!(force_flags & GMX_FORCE_ENERGY))
379         {
380             /* Don't calculate energies */
381             p_nbk_noener[coulkt][vdwkt](nbl[nb], nbat,
382                                         ic,
383                                         shift_vec,
384                                         out->f,
385                                         fshift_p);
386         }
387         else if (out->nV == 1)
388         {
389             /* No energy groups */
390             out->Vvdw[0] = 0;
391             out->Vc[0]   = 0;
392
393             p_nbk_ener[coulkt][vdwkt](nbl[nb], nbat,
394                                       ic,
395                                       shift_vec,
396                                       out->f,
397                                       fshift_p,
398                                       out->Vvdw,
399                                       out->Vc);
400         }
401         else
402         {
403             /* Calculate energy group contributions */
404             int i;
405
406             for (i = 0; i < out->nVS; i++)
407             {
408                 out->VSvdw[i] = 0;
409             }
410             for (i = 0; i < out->nVS; i++)
411             {
412                 out->VSc[i] = 0;
413             }
414
415             p_nbk_energrp[coulkt][vdwkt](nbl[nb], nbat,
416                                          ic,
417                                          shift_vec,
418                                          out->f,
419                                          fshift_p,
420                                          out->VSvdw,
421                                          out->VSc);
422
423             reduce_group_energies(nbat->nenergrp, nbat->neg_2log,
424                                   out->VSvdw, out->VSc,
425                                   out->Vvdw, out->Vc);
426         }
427     }
428
429     if (force_flags & GMX_FORCE_ENERGY)
430     {
431         reduce_energies_over_lists(nbat, nnbl, Vvdw, Vc);
432     }
433 }
434 #else
435 {
436     gmx_incons("nbnxn_kernel_simd_2xnn called when such kernels "
437                " are not enabled.");
438 }
439 #endif
440 #undef GMX_SIMD_J_UNROLL_SIZE