Move nbnxn files to nbnxm directory
[alexxy/gromacs.git] / src / gromacs / nbnxm / kerneldispatch.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35
36 #include "gmxpre.h"
37
38 #include "kerneldispatch.h"
39
40 #include "gromacs/math/vectypes.h"
41 #include "gromacs/mdlib/force_flags.h"
42 #include "gromacs/mdlib/gmx_omp_nthreads.h"
43 #include "gromacs/mdtypes/interaction_const.h"
44 #include "gromacs/mdtypes/md_enums.h"
45 #include "gromacs/nbnxm/nbnxm.h"
46 #include "gromacs/nbnxm/nbnxm_simd.h"
47 #include "gromacs/simd/simd.h"
48 #include "gromacs/utility/gmxassert.h"
49 #include "gromacs/utility/real.h"
50
51 #include "kernel_common.h"
52 #define INCLUDE_KERNELFUNCTION_TABLES
53 #include "gromacs/nbnxm/kernels_reference/kernel_ref.h"
54 #ifdef GMX_NBNXN_SIMD_2XNN
55 #include "gromacs/nbnxm/kernels_simd_2xmm/kernels.h"
56 #endif
57 #ifdef GMX_NBNXN_SIMD_4XN
58 #include "gromacs/nbnxm/kernels_simd_4xm/kernels.h"
59 #endif
60 #undef INCLUDE_FUNCTION_TABLES
61
62 /*! \brief Clears the energy group output buffers
63  *
64  * \param[in,out] out  nbnxn kernel output struct
65  */
66 static void clearGroupEnergies(nbnxn_atomdata_output_t *out)
67 {
68     std::fill(out->Vvdw.begin(), out->Vvdw.end(), 0.0_real);
69     std::fill(out->Vc.begin(), out->Vc.end(), 0.0_real);
70     std::fill(out->VSvdw.begin(), out->VSvdw.end(), 0.0_real);
71     std::fill(out->VSc.begin(), out->VSc.end(), 0.0_real);
72 }
73
74 /*! \brief Reduce the group-pair energy buffers produced by a SIMD kernel
75  * to single terms in the output buffers.
76  *
77  * The SIMD kernels produce a large number of energy buffer in SIMD registers
78  * to avoid scattered reads and writes.
79  *
80  * \tparam        unrollj         The unroll size for j-particles in the SIMD kernel
81  * \param[in]     numGroups       The number of energy groups
82  * \param[in]     numGroups_2log  Log2 of numGroups, rounded up
83  * \param[in,out] out             Struct with energy buffers
84  */
85 template <int unrollj> static void
86 reduceGroupEnergySimdBuffers(int                       numGroups,
87                              int                       numGroups_2log,
88                              nbnxn_atomdata_output_t  *out)
89 {
90     const int                 unrollj_half     = unrollj/2;
91     /* Energies are stored in SIMD registers with size 2^numGroups_2log */
92     const int                 numGroupsStorage = (1 << numGroups_2log);
93
94     const real * gmx_restrict vVdwSimd     = out->VSvdw.data();
95     const real * gmx_restrict vCoulombSimd = out->VSc.data();
96     real * gmx_restrict       vVdw         = out->Vvdw.data();
97     real * gmx_restrict       vCoulomb     = out->Vc.data();
98
99     /* The size of the SIMD energy group buffer array is:
100      * numGroups*numGroups*numGroupsStorage*unrollj_half*simd_width
101      */
102     for (int i = 0; i < numGroups; i++)
103     {
104         for (int j1 = 0; j1 < numGroups; j1++)
105         {
106             for (int j0 = 0; j0 < numGroups; j0++)
107             {
108                 int c = ((i*numGroups + j1)*numGroupsStorage + j0)*unrollj_half*unrollj;
109                 for (int s = 0; s < unrollj_half; s++)
110                 {
111                     vVdw    [i*numGroups + j0] += vVdwSimd    [c + 0];
112                     vVdw    [i*numGroups + j1] += vVdwSimd    [c + 1];
113                     vCoulomb[i*numGroups + j0] += vCoulombSimd[c + 0];
114                     vCoulomb[i*numGroups + j1] += vCoulombSimd[c + 1];
115                     c                          += unrollj + 2;
116                 }
117             }
118         }
119     }
120 }
121
122 void
123 nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
124                  nbnxn_atomdata_t          *nbat,
125                  const interaction_const_t *ic,
126                  rvec                      *shiftVectors,
127                  int                        forceFlags,
128                  int                        clearF,
129                  real                      *fshift,
130                  real                      *vCoulomb,
131                  real                      *vVdw)
132 {
133
134     int                      coulkt;
135     if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
136     {
137         coulkt = coulktRF;
138     }
139     else
140     {
141         if (nbvg->ewald_excl == ewaldexclTable)
142         {
143             if (ic->rcoulomb == ic->rvdw)
144             {
145                 coulkt = coulktTAB;
146             }
147             else
148             {
149                 coulkt = coulktTAB_TWIN;
150             }
151         }
152         else
153         {
154             if (ic->rcoulomb == ic->rvdw)
155             {
156                 coulkt = coulktEWALD;
157             }
158             else
159             {
160                 coulkt = coulktEWALD_TWIN;
161             }
162         }
163     }
164
165     const nbnxn_atomdata_t::Params &nbatParams = nbat->params();
166
167     int vdwkt = 0;
168     if (ic->vdwtype == evdwCUT)
169     {
170         switch (ic->vdw_modifier)
171         {
172             case eintmodNONE:
173             case eintmodPOTSHIFT:
174                 switch (nbatParams.comb_rule)
175                 {
176                     case ljcrGEOM: vdwkt = vdwktLJCUT_COMBGEOM; break;
177                     case ljcrLB:   vdwkt = vdwktLJCUT_COMBLB;   break;
178                     case ljcrNONE: vdwkt = vdwktLJCUT_COMBNONE; break;
179                     default:
180                         GMX_RELEASE_ASSERT(false, "Unknown combination rule");
181                 }
182                 break;
183             case eintmodFORCESWITCH:
184                 vdwkt = vdwktLJFORCESWITCH;
185                 break;
186             case eintmodPOTSWITCH:
187                 vdwkt = vdwktLJPOTSWITCH;
188                 break;
189             default:
190                 GMX_RELEASE_ASSERT(false, "Unsupported VdW interaction modifier");
191         }
192     }
193     else if (ic->vdwtype == evdwPME)
194     {
195         if (ic->ljpme_comb_rule == eljpmeGEOM)
196         {
197             vdwkt = vdwktLJEWALDCOMBGEOM;
198         }
199         else
200         {
201             vdwkt = vdwktLJEWALDCOMBLB;
202             /* At setup we (should have) selected the C reference kernel */
203             GMX_RELEASE_ASSERT(nbvg->kernel_type == nbnxnk4x4_PlainC, "Only the C reference nbnxn SIMD kernel supports LJ-PME with LB combination rules");
204         }
205     }
206     else
207     {
208         GMX_RELEASE_ASSERT(false, "Unsupported VdW interaction type");
209     }
210
211     int                nnbl = nbvg->nbl_lists.nnbl;
212     NbnxnPairlistCpu **nbl  = nbvg->nbl_lists.nbl;
213
214     int gmx_unused     nthreads = gmx_omp_nthreads_get(emntNonbonded);
215 #pragma omp parallel for schedule(static) num_threads(nthreads)
216     for (int nb = 0; nb < nnbl; nb++)
217     {
218         // Presently, the kernels do not call C++ code that can throw,
219         // so no need for a try/catch pair in this OpenMP region.
220         nbnxn_atomdata_output_t *out = &nbat->out[nb];
221
222         if (clearF == enbvClearFYes)
223         {
224             clear_f(nbat, nb, out->f.data());
225         }
226
227         real *fshift_p;
228         if ((forceFlags & GMX_FORCE_VIRIAL) && nnbl == 1)
229         {
230             fshift_p = fshift;
231         }
232         else
233         {
234             fshift_p = out->fshift.data();
235
236             if (clearF == enbvClearFYes)
237             {
238                 clear_fshift(fshift_p);
239             }
240         }
241
242         if (!(forceFlags & GMX_FORCE_ENERGY))
243         {
244             /* Don't calculate energies */
245             switch (nbvg->kernel_type)
246             {
247                 case nbnxnk4x4_PlainC:
248                     nbnxn_kernel_noener_ref[coulkt][vdwkt](nbl[nb], nbat,
249                                                            ic,
250                                                            shiftVectors,
251                                                            out->f.data(),
252                                                            fshift_p);
253                     break;
254 #ifdef GMX_NBNXN_SIMD_2XNN
255                 case nbnxnk4xN_SIMD_2xNN:
256                     nbnxm_kernel_noener_simd_2xmm[coulkt][vdwkt](nbl[nb], nbat,
257                                                                  ic,
258                                                                  shiftVectors,
259                                                                  out->f.data(),
260                                                                  fshift_p);
261                     break;
262 #endif
263 #ifdef GMX_NBNXN_SIMD_4XN
264                 case nbnxnk4xN_SIMD_4xN:
265                     nbnxm_kernel_noener_simd_4xm[coulkt][vdwkt](nbl[nb], nbat,
266                                                                 ic,
267                                                                 shiftVectors,
268                                                                 out->f.data(),
269                                                                 fshift_p);
270                     break;
271 #endif
272                 default:
273                     GMX_RELEASE_ASSERT(false, "Unsupported kernel architecture");
274             }
275         }
276         else if (out->Vvdw.size() == 1)
277         {
278             /* A single energy group (pair) */
279             out->Vvdw[0] = 0;
280             out->Vc[0]   = 0;
281
282             switch (nbvg->kernel_type)
283             {
284                 case nbnxnk4x4_PlainC:
285                     nbnxn_kernel_ener_ref[coulkt][vdwkt](nbl[nb], nbat,
286                                                          ic,
287                                                          shiftVectors,
288                                                          out->f.data(),
289                                                          fshift_p,
290                                                          out->Vvdw.data(),
291                                                          out->Vc.data());
292                     break;
293 #ifdef GMX_NBNXN_SIMD_2XNN
294                 case nbnxnk4xN_SIMD_2xNN:
295                     nbnxm_kernel_ener_simd_2xmm[coulkt][vdwkt](nbl[nb], nbat,
296                                                                ic,
297                                                                shiftVectors,
298                                                                out->f.data(),
299                                                                fshift_p,
300                                                                out->Vvdw.data(),
301                                                                out->Vc.data());
302                     break;
303 #endif
304 #ifdef GMX_NBNXN_SIMD_4XN
305                 case nbnxnk4xN_SIMD_4xN:
306                     nbnxm_kernel_ener_simd_4xm[coulkt][vdwkt](nbl[nb], nbat,
307                                                               ic,
308                                                               shiftVectors,
309                                                               out->f.data(),
310                                                               fshift_p,
311                                                               out->Vvdw.data(),
312                                                               out->Vc.data());
313                     break;
314 #endif
315                 default:
316                     GMX_RELEASE_ASSERT(false, "Unsupported kernel architecture");
317             }
318         }
319         else
320         {
321             /* Calculate energy group contributions */
322             clearGroupEnergies(out);
323
324             int unrollj = 0;
325
326             switch (nbvg->kernel_type)
327             {
328                 case nbnxnk4x4_PlainC:
329                     unrollj = c_nbnxnCpuIClusterSize;
330                     nbnxn_kernel_energrp_ref[coulkt][vdwkt](nbl[nb], nbat,
331                                                             ic,
332                                                             shiftVectors,
333                                                             out->f.data(),
334                                                             fshift_p,
335                                                             out->Vvdw.data(),
336                                                             out->Vc.data());
337                     break;
338 #ifdef GMX_NBNXN_SIMD_2XNN
339                 case nbnxnk4xN_SIMD_2xNN:
340                     unrollj = GMX_SIMD_REAL_WIDTH/2;
341                     nbnxm_kernel_energrp_simd_2xmm[coulkt][vdwkt](nbl[nb], nbat,
342                                                                   ic,
343                                                                   shiftVectors,
344                                                                   out->f.data(),
345                                                                   fshift_p,
346                                                                   out->VSvdw.data(),
347                                                                   out->VSc.data());
348                     break;
349 #endif
350 #ifdef GMX_NBNXN_SIMD_4XN
351                 case nbnxnk4xN_SIMD_4xN:
352                     unrollj = GMX_SIMD_REAL_WIDTH;
353                     nbnxm_kernel_energrp_simd_4xm[coulkt][vdwkt](nbl[nb], nbat,
354                                                                  ic,
355                                                                  shiftVectors,
356                                                                  out->f.data(),
357                                                                  fshift_p,
358                                                                  out->VSvdw.data(),
359                                                                  out->VSc.data());
360                     break;
361 #endif
362                 default:
363                     GMX_RELEASE_ASSERT(false, "Unsupported kernel architecture");
364             }
365
366             if (nbvg->kernel_type != nbnxnk4x4_PlainC)
367             {
368                 switch (unrollj)
369                 {
370                     case 2:
371                         reduceGroupEnergySimdBuffers<2>(nbatParams.nenergrp,
372                                                         nbatParams.neg_2log,
373                                                         out);
374                         break;
375                     case 4:
376                         reduceGroupEnergySimdBuffers<4>(nbatParams.nenergrp,
377                                                         nbatParams.neg_2log,
378                                                         out);
379                         break;
380                     case 8:
381                         reduceGroupEnergySimdBuffers<8>(nbatParams.nenergrp,
382                                                         nbatParams.neg_2log,
383                                                         out);
384                         break;
385                     default:
386                         GMX_RELEASE_ASSERT(false, "Unsupported j-unroll size");
387                 }
388             }
389         }
390     }
391
392     if (forceFlags & GMX_FORCE_ENERGY)
393     {
394         reduce_energies_over_lists(nbat, nnbl, vVdw, vCoulomb);
395     }
396 }