Update copyright statements and change license to LGPL
[alexxy/gromacs.git] / src / mdlib / nbnxn_kernels / nbnxn_kernel_x86_simd128.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2012, The GROMACS development team,
6  * check out http://www.gromacs.org for more information.
7  * Copyright (c) 2012, by the GROMACS development team, led by
8  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
9  * others, as listed in the AUTHORS file in the top-level source
10  * directory and at http://www.gromacs.org.
11  *
12  * GROMACS is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public License
14  * as published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * GROMACS is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with GROMACS; if not, see
24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
26  *
27  * If you want to redistribute modifications to GROMACS, please
28  * consider that scientific software is very special. Version
29  * control is crucial - bugs must be traceable. We will be happy to
30  * consider code for inclusion in the official distribution, but
31  * derived work must not be called official GROMACS. Details are found
32  * in the README & COPYING files - if they are missing, get the
33  * official version at http://www.gromacs.org.
34  *
35  * To help us fund GROMACS development, we humbly ask that you cite
36  * the research papers on the package. Check out http://www.gromacs.org.
37  */
38 #ifdef HAVE_CONFIG_H
39 #include <config.h>
40 #endif
41
42 #include <math.h>
43
44 #include "typedefs.h"
45 #include "vec.h"
46 #include "smalloc.h"
47 #include "force.h"
48 #include "gmx_omp_nthreads.h"
49 #include "../nbnxn_consts.h"
50 #include "nbnxn_kernel_common.h"
51
52 #ifdef GMX_X86_SSE2
53
54 #include "nbnxn_kernel_x86_simd128.h"
55
56 /* Include all flavors of the 128-bit SSE or AVX kernel loops */
57
58 #define GMX_MM128_HERE
59
60 /* Analytical reaction-field kernels */
61 #define CALC_COUL_RF
62
63 #include "nbnxn_kernel_x86_simd_includes.h"
64
65 #undef CALC_COUL_RF
66
67 /* Tabulated exclusion interaction electrostatics kernels */
68 #define CALC_COUL_TAB
69
70 /* Single cut-off: rcoulomb = rvdw */
71 #include "nbnxn_kernel_x86_simd_includes.h"
72
73 /* Twin cut-off: rcoulomb >= rvdw */
74 #define VDW_CUTOFF_CHECK
75 #include "nbnxn_kernel_x86_simd_includes.h"
76 #undef VDW_CUTOFF_CHECK
77
78 #undef CALC_COUL_TAB
79
80 /* Analytical Ewald exclusion interaction electrostatics kernels */
81 #define CALC_COUL_EWALD
82
83 /* Single cut-off: rcoulomb = rvdw */
84 #include "nbnxn_kernel_x86_simd_includes.h"
85
86 /* Twin cut-off: rcoulomb >= rvdw */
87 #define VDW_CUTOFF_CHECK
88 #include "nbnxn_kernel_x86_simd_includes.h"
89 #undef VDW_CUTOFF_CHECK
90
91 #undef CALC_COUL_EWALD
92
93
94 typedef void (*p_nbk_func_ener)(const nbnxn_pairlist_t     *nbl,
95                                 const nbnxn_atomdata_t     *nbat,
96                                 const interaction_const_t  *ic,
97                                 rvec                       *shift_vec,
98                                 real                       *f,
99                                 real                       *fshift,
100                                 real                       *Vvdw,
101                                 real                       *Vc);
102
103 typedef void (*p_nbk_func_noener)(const nbnxn_pairlist_t     *nbl,
104                                   const nbnxn_atomdata_t     *nbat,
105                                   const interaction_const_t  *ic,
106                                   rvec                       *shift_vec,
107                                   real                       *f,
108                                   real                       *fshift);
109
110 enum { coultRF, coultTAB, coultTAB_TWIN, coultEWALD, coultEWALD_TWIN, coultNR };
111
112 #define NBK_FN(elec,ljcomb) nbnxn_kernel_x86_simd128_##elec##_comb_##ljcomb##_ener
113 static p_nbk_func_ener p_nbk_ener[coultNR][ljcrNR] =
114 { { NBK_FN(rf        ,geom), NBK_FN(rf        ,lb), NBK_FN(rf        ,none) },
115   { NBK_FN(tab       ,geom), NBK_FN(tab       ,lb), NBK_FN(tab       ,none) },
116   { NBK_FN(tab_twin  ,geom), NBK_FN(tab_twin  ,lb), NBK_FN(tab_twin  ,none) },
117   { NBK_FN(ewald     ,geom), NBK_FN(ewald     ,lb), NBK_FN(ewald     ,none) },
118   { NBK_FN(ewald_twin,geom), NBK_FN(ewald_twin,lb), NBK_FN(ewald_twin,none) } };
119 #undef NBK_FN
120
121 #define NBK_FN(elec,ljcomb) nbnxn_kernel_x86_simd128_##elec##_comb_##ljcomb##_energrp
122 static p_nbk_func_ener p_nbk_energrp[coultNR][ljcrNR] =
123 { { NBK_FN(rf        ,geom), NBK_FN(rf        ,lb), NBK_FN(rf        ,none) },
124   { NBK_FN(tab       ,geom), NBK_FN(tab       ,lb), NBK_FN(tab       ,none) },
125   { NBK_FN(tab_twin  ,geom), NBK_FN(tab_twin  ,lb), NBK_FN(tab_twin  ,none) },
126   { NBK_FN(ewald     ,geom), NBK_FN(ewald     ,lb), NBK_FN(ewald     ,none) },
127   { NBK_FN(ewald_twin,geom), NBK_FN(ewald_twin,lb), NBK_FN(ewald_twin,none) } };
128 #undef NBK_FN
129
130 #define NBK_FN(elec,ljcomb) nbnxn_kernel_x86_simd128_##elec##_comb_##ljcomb##_noener
131 static p_nbk_func_noener p_nbk_noener[coultNR][ljcrNR] =
132 { { NBK_FN(rf        ,geom), NBK_FN(rf        ,lb), NBK_FN(rf        ,none) },
133   { NBK_FN(tab       ,geom), NBK_FN(tab       ,lb), NBK_FN(tab       ,none) },
134   { NBK_FN(tab_twin  ,geom), NBK_FN(tab_twin  ,lb), NBK_FN(tab_twin  ,none) },
135   { NBK_FN(ewald     ,geom), NBK_FN(ewald     ,lb), NBK_FN(ewald     ,none) },
136   { NBK_FN(ewald_twin,geom), NBK_FN(ewald_twin,lb), NBK_FN(ewald_twin,none) } };
137 #undef NBK_FN
138
139
140 static void reduce_group_energies(int ng,int ng_2log,
141                                   const real *VSvdw,const real *VSc,
142                                   real *Vvdw,real *Vc)
143 {
144     int ng_p2,i,j,j0,j1,c,s;
145
146 #define SIMD_WIDTH       (GMX_X86_SIMD_WIDTH_HERE)
147 #define SIMD_WIDTH_HALF  (GMX_X86_SIMD_WIDTH_HERE/2)
148
149     ng_p2 = (1<<ng_2log);
150
151     /* The size of the x86 SIMD energy group buffer array is:
152      * ng*ng*ng_p2*SIMD_WIDTH_HALF*SIMD_WIDTH
153      */
154     for(i=0; i<ng; i++)
155     {
156         for(j=0; j<ng; j++)
157         {
158             Vvdw[i*ng+j] = 0;
159             Vc[i*ng+j]   = 0;
160         }
161
162         for(j1=0; j1<ng; j1++)
163         {
164             for(j0=0; j0<ng; j0++)
165             {
166                 c = ((i*ng + j1)*ng_p2 + j0)*SIMD_WIDTH_HALF*SIMD_WIDTH;
167                 for(s=0; s<SIMD_WIDTH_HALF; s++)
168                 {
169                     Vvdw[i*ng+j0] += VSvdw[c+0];
170                     Vvdw[i*ng+j1] += VSvdw[c+1];
171                     Vc  [i*ng+j0] += VSc  [c+0];
172                     Vc  [i*ng+j1] += VSc  [c+1];
173                     c += SIMD_WIDTH + 2;
174                 }
175             }
176         }
177     }
178 }
179
180 #endif /* GMX_X86_SSE2 */
181
182 void
183 nbnxn_kernel_x86_simd128(nbnxn_pairlist_set_t       *nbl_list,
184                          const nbnxn_atomdata_t     *nbat,
185                          const interaction_const_t  *ic,
186                          int                        ewald_excl,
187                          rvec                       *shift_vec, 
188                          int                        force_flags,
189                          int                        clearF,
190                          real                       *fshift,
191                          real                       *Vc,
192                          real                       *Vvdw)
193 #ifdef GMX_X86_SSE2
194 {
195     int              nnbl;
196     nbnxn_pairlist_t **nbl;
197     int coult;
198     int nb;
199
200     nnbl = nbl_list->nnbl;
201     nbl  = nbl_list->nbl;
202
203     if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
204     {
205         coult = coultRF;
206     }
207     else
208     {
209         if (ewald_excl == ewaldexclTable)
210         {
211             if (ic->rcoulomb == ic->rvdw)
212             {
213                 coult = coultTAB;
214             }
215             else
216             {
217                 coult = coultTAB_TWIN;
218             }
219         }
220         else
221         {
222             if (ic->rcoulomb == ic->rvdw)
223             {
224                 coult = coultEWALD;
225             }
226             else
227             {
228                 coult = coultEWALD_TWIN;
229             }
230         }
231     }
232
233 #pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded))
234     for(nb=0; nb<nnbl; nb++)
235     {
236         nbnxn_atomdata_output_t *out;
237         real *fshift_p;
238
239         out = &nbat->out[nb];
240
241         if (clearF == enbvClearFYes)
242         {
243             clear_f(nbat,out->f);
244         }
245
246         if ((force_flags & GMX_FORCE_VIRIAL) && nnbl == 1)
247         {
248             fshift_p = fshift;
249         }
250         else
251         {
252             fshift_p = out->fshift;
253
254             if (clearF == enbvClearFYes)
255             {
256                 clear_fshift(fshift_p);
257             }
258         }
259
260         /* With Ewald type electrostatics we the forces for excluded atom pairs
261          * should not contribute to the virial sum. The exclusion forces
262          * are not calculate in the energy kernels, but are in _noener.
263          */
264         if (!((force_flags & GMX_FORCE_ENERGY) ||
265               (EEL_FULL(ic->eeltype) && (force_flags & GMX_FORCE_VIRIAL))))
266         {
267             /* Don't calculate energies */
268             p_nbk_noener[coult][nbat->comb_rule](nbl[nb],nbat,
269                                                  ic,
270                                                  shift_vec,
271                                                  out->f,
272                                                  fshift_p);
273         }
274         else if (out->nV == 1 || !(force_flags & GMX_FORCE_ENERGY))
275         {
276             /* No energy groups */
277             out->Vvdw[0] = 0;
278             out->Vc[0]   = 0;
279
280             p_nbk_ener[coult][nbat->comb_rule](nbl[nb],nbat,
281                                                ic,
282                                                shift_vec,
283                                                out->f,
284                                                fshift_p,
285                                                out->Vvdw,
286                                                out->Vc);
287         }
288         else
289         {
290             /* Calculate energy group contributions */
291             int i;
292
293             for(i=0; i<out->nVS; i++)
294             {
295                 out->VSvdw[i] = 0;
296             }
297             for(i=0; i<out->nVS; i++)
298             {
299                 out->VSc[i] = 0;
300             }
301
302             p_nbk_energrp[coult][nbat->comb_rule](nbl[nb],nbat,
303                                                   ic,
304                                                   shift_vec,
305                                                   out->f,
306                                                   fshift_p,
307                                                   out->VSvdw,
308                                                   out->VSc);
309
310             reduce_group_energies(nbat->nenergrp,nbat->neg_2log,
311                                   out->VSvdw,out->VSc,
312                                   out->Vvdw,out->Vc);
313         }
314     }
315
316     if (force_flags & GMX_FORCE_ENERGY)
317     {
318         reduce_energies_over_lists(nbat,nnbl,Vvdw,Vc);
319     }
320 }
321 #else
322 {
323     gmx_incons("nbnxn_kernel_x86_simd128 called while GROMACS was configured without SSE enabled");
324 }
325 #endif