src/gromacs/nbnxm/kerneldispatch.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #include "gmxpre.h"
  37
  38 #include "kerneldispatch.h"
  39
  40 #include "gromacs/math/vectypes.h"
  41 #include "gromacs/mdlib/force_flags.h"
  42 #include "gromacs/mdlib/gmx_omp_nthreads.h"
  43 #include "gromacs/mdtypes/interaction_const.h"
  44 #include "gromacs/mdtypes/md_enums.h"
  45 #include "gromacs/nbnxm/nbnxm.h"
  46 #include "gromacs/nbnxm/nbnxm_simd.h"
  47 #include "gromacs/simd/simd.h"
  48 #include "gromacs/utility/gmxassert.h"
  49 #include "gromacs/utility/real.h"
  50
  51 #include "kernel_common.h"
  52 #define INCLUDE_KERNELFUNCTION_TABLES
  53 #include "gromacs/nbnxm/kernels_reference/kernel_ref.h"
  54 #ifdef GMX_NBNXN_SIMD_2XNN
  55 #include "gromacs/nbnxm/kernels_simd_2xmm/kernels.h"
  56 #endif
  57 #ifdef GMX_NBNXN_SIMD_4XN
  58 #include "gromacs/nbnxm/kernels_simd_4xm/kernels.h"
  59 #endif
  60 #undef INCLUDE_FUNCTION_TABLES
  61
  62 /*! \brief Clears the energy group output buffers
  63  *
  64  * \param[in,out] out  nbnxn kernel output struct
  65  */
  66 static void clearGroupEnergies(nbnxn_atomdata_output_t *out)
  67 {
  68     std::fill(out->Vvdw.begin(), out->Vvdw.end(), 0.0_real);
  69     std::fill(out->Vc.begin(), out->Vc.end(), 0.0_real);
  70     std::fill(out->VSvdw.begin(), out->VSvdw.end(), 0.0_real);
  71     std::fill(out->VSc.begin(), out->VSc.end(), 0.0_real);
  72 }
  73
  74 /*! \brief Reduce the group-pair energy buffers produced by a SIMD kernel
  75  * to single terms in the output buffers.
  76  *
  77  * The SIMD kernels produce a large number of energy buffer in SIMD registers
  78  * to avoid scattered reads and writes.
  79  *
  80  * \tparam        unrollj         The unroll size for j-particles in the SIMD kernel
  81  * \param[in]     numGroups       The number of energy groups
  82  * \param[in]     numGroups_2log  Log2 of numGroups, rounded up
  83  * \param[in,out] out             Struct with energy buffers
  84  */
  85 template <int unrollj> static void
  86 reduceGroupEnergySimdBuffers(int                       numGroups,
  87                              int                       numGroups_2log,
  88                              nbnxn_atomdata_output_t  *out)
  89 {
  90     const int                 unrollj_half     = unrollj/2;
  91     /* Energies are stored in SIMD registers with size 2^numGroups_2log */
  92     const int                 numGroupsStorage = (1 << numGroups_2log);
  93
  94     const real * gmx_restrict vVdwSimd     = out->VSvdw.data();
  95     const real * gmx_restrict vCoulombSimd = out->VSc.data();
  96     real * gmx_restrict       vVdw         = out->Vvdw.data();
  97     real * gmx_restrict       vCoulomb     = out->Vc.data();
  98
  99     /* The size of the SIMD energy group buffer array is:
 100      * numGroups*numGroups*numGroupsStorage*unrollj_half*simd_width
 101      */
 102     for (int i = 0; i < numGroups; i++)
 103     {
 104         for (int j1 = 0; j1 < numGroups; j1++)
 105         {
 106             for (int j0 = 0; j0 < numGroups; j0++)
 107             {
 108                 int c = ((i*numGroups + j1)*numGroupsStorage + j0)*unrollj_half*unrollj;
 109                 for (int s = 0; s < unrollj_half; s++)
 110                 {
 111                     vVdw    [i*numGroups + j0] += vVdwSimd    [c + 0];
 112                     vVdw    [i*numGroups + j1] += vVdwSimd    [c + 1];
 113                     vCoulomb[i*numGroups + j0] += vCoulombSimd[c + 0];
 114                     vCoulomb[i*numGroups + j1] += vCoulombSimd[c + 1];
 115                     c                          += unrollj + 2;
 116                 }
 117             }
 118         }
 119     }
 120 }
 121
 122 void
 123 nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
 124                  nbnxn_atomdata_t          *nbat,
 125                  const interaction_const_t *ic,
 126                  rvec                      *shiftVectors,
 127                  int                        forceFlags,
 128                  int                        clearF,
 129                  real                      *fshift,
 130                  real                      *vCoulomb,
 131                  real                      *vVdw)
 132 {
 133
 134     int                      coulkt;
 135     if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
 136     {
 137         coulkt = coulktRF;
 138     }
 139     else
 140     {
 141         if (nbvg->ewald_excl == ewaldexclTable)
 142         {
 143             if (ic->rcoulomb == ic->rvdw)
 144             {
 145                 coulkt = coulktTAB;
 146             }
 147             else
 148             {
 149                 coulkt = coulktTAB_TWIN;
 150             }
 151         }
 152         else
 153         {
 154             if (ic->rcoulomb == ic->rvdw)
 155             {
 156                 coulkt = coulktEWALD;
 157             }
 158             else
 159             {
 160                 coulkt = coulktEWALD_TWIN;
 161             }
 162         }
 163     }
 164
 165     const nbnxn_atomdata_t::Params &nbatParams = nbat->params();
 166
 167     int vdwkt = 0;
 168     if (ic->vdwtype == evdwCUT)
 169     {
 170         switch (ic->vdw_modifier)
 171         {
 172             case eintmodNONE:
 173             case eintmodPOTSHIFT:
 174                 switch (nbatParams.comb_rule)
 175                 {
 176                     case ljcrGEOM: vdwkt = vdwktLJCUT_COMBGEOM; break;
 177                     case ljcrLB:   vdwkt = vdwktLJCUT_COMBLB;   break;
 178                     case ljcrNONE: vdwkt = vdwktLJCUT_COMBNONE; break;
 179                     default:
 180                         GMX_RELEASE_ASSERT(false, "Unknown combination rule");
 181                 }
 182                 break;
 183             case eintmodFORCESWITCH:
 184                 vdwkt = vdwktLJFORCESWITCH;
 185                 break;
 186             case eintmodPOTSWITCH:
 187                 vdwkt = vdwktLJPOTSWITCH;
 188                 break;
 189             default:
 190                 GMX_RELEASE_ASSERT(false, "Unsupported VdW interaction modifier");
 191         }
 192     }
 193     else if (ic->vdwtype == evdwPME)
 194     {
 195         if (ic->ljpme_comb_rule == eljpmeGEOM)
 196         {
 197             vdwkt = vdwktLJEWALDCOMBGEOM;
 198         }
 199         else
 200         {
 201             vdwkt = vdwktLJEWALDCOMBLB;
 202             /* At setup we (should have) selected the C reference kernel */
 203             GMX_RELEASE_ASSERT(nbvg->kernel_type == nbnxnk4x4_PlainC, "Only the C reference nbnxn SIMD kernel supports LJ-PME with LB combination rules");
 204         }
 205     }
 206     else
 207     {
 208         GMX_RELEASE_ASSERT(false, "Unsupported VdW interaction type");
 209     }
 210
 211     int                nnbl = nbvg->nbl_lists.nnbl;
 212     NbnxnPairlistCpu **nbl  = nbvg->nbl_lists.nbl;
 213
 214     int gmx_unused     nthreads = gmx_omp_nthreads_get(emntNonbonded);
 215 #pragma omp parallel for schedule(static) num_threads(nthreads)
 216     for (int nb = 0; nb < nnbl; nb++)
 217     {
 218         // Presently, the kernels do not call C++ code that can throw,
 219         // so no need for a try/catch pair in this OpenMP region.
 220         nbnxn_atomdata_output_t *out = &nbat->out[nb];
 221
 222         if (clearF == enbvClearFYes)
 223         {
 224             clear_f(nbat, nb, out->f.data());
 225         }
 226
 227         real *fshift_p;
 228         if ((forceFlags & GMX_FORCE_VIRIAL) && nnbl == 1)
 229         {
 230             fshift_p = fshift;
 231         }
 232         else
 233         {
 234             fshift_p = out->fshift.data();
 235
 236             if (clearF == enbvClearFYes)
 237             {
 238                 clear_fshift(fshift_p);
 239             }
 240         }
 241
 242         if (!(forceFlags & GMX_FORCE_ENERGY))
 243         {
 244             /* Don't calculate energies */
 245             switch (nbvg->kernel_type)
 246             {
 247                 case nbnxnk4x4_PlainC:
 248                     nbnxn_kernel_noener_ref[coulkt][vdwkt](nbl[nb], nbat,
 249                                                            ic,
 250                                                            shiftVectors,
 251                                                            out->f.data(),
 252                                                            fshift_p);
 253                     break;
 254 #ifdef GMX_NBNXN_SIMD_2XNN
 255                 case nbnxnk4xN_SIMD_2xNN:
 256                     nbnxm_kernel_noener_simd_2xmm[coulkt][vdwkt](nbl[nb], nbat,
 257                                                                  ic,
 258                                                                  shiftVectors,
 259                                                                  out->f.data(),
 260                                                                  fshift_p);
 261                     break;
 262 #endif
 263 #ifdef GMX_NBNXN_SIMD_4XN
 264                 case nbnxnk4xN_SIMD_4xN:
 265                     nbnxm_kernel_noener_simd_4xm[coulkt][vdwkt](nbl[nb], nbat,
 266                                                                 ic,
 267                                                                 shiftVectors,
 268                                                                 out->f.data(),
 269                                                                 fshift_p);
 270                     break;
 271 #endif
 272                 default:
 273                     GMX_RELEASE_ASSERT(false, "Unsupported kernel architecture");
 274             }
 275         }
 276         else if (out->Vvdw.size() == 1)
 277         {
 278             /* A single energy group (pair) */
 279             out->Vvdw[0] = 0;
 280             out->Vc[0]   = 0;
 281
 282             switch (nbvg->kernel_type)
 283             {
 284                 case nbnxnk4x4_PlainC:
 285                     nbnxn_kernel_ener_ref[coulkt][vdwkt](nbl[nb], nbat,
 286                                                          ic,
 287                                                          shiftVectors,
 288                                                          out->f.data(),
 289                                                          fshift_p,
 290                                                          out->Vvdw.data(),
 291                                                          out->Vc.data());
 292                     break;
 293 #ifdef GMX_NBNXN_SIMD_2XNN
 294                 case nbnxnk4xN_SIMD_2xNN:
 295                     nbnxm_kernel_ener_simd_2xmm[coulkt][vdwkt](nbl[nb], nbat,
 296                                                                ic,
 297                                                                shiftVectors,
 298                                                                out->f.data(),
 299                                                                fshift_p,
 300                                                                out->Vvdw.data(),
 301                                                                out->Vc.data());
 302                     break;
 303 #endif
 304 #ifdef GMX_NBNXN_SIMD_4XN
 305                 case nbnxnk4xN_SIMD_4xN:
 306                     nbnxm_kernel_ener_simd_4xm[coulkt][vdwkt](nbl[nb], nbat,
 307                                                               ic,
 308                                                               shiftVectors,
 309                                                               out->f.data(),
 310                                                               fshift_p,
 311                                                               out->Vvdw.data(),
 312                                                               out->Vc.data());
 313                     break;
 314 #endif
 315                 default:
 316                     GMX_RELEASE_ASSERT(false, "Unsupported kernel architecture");
 317             }
 318         }
 319         else
 320         {
 321             /* Calculate energy group contributions */
 322             clearGroupEnergies(out);
 323
 324             int unrollj = 0;
 325
 326             switch (nbvg->kernel_type)
 327             {
 328                 case nbnxnk4x4_PlainC:
 329                     unrollj = c_nbnxnCpuIClusterSize;
 330                     nbnxn_kernel_energrp_ref[coulkt][vdwkt](nbl[nb], nbat,
 331                                                             ic,
 332                                                             shiftVectors,
 333                                                             out->f.data(),
 334                                                             fshift_p,
 335                                                             out->Vvdw.data(),
 336                                                             out->Vc.data());
 337                     break;
 338 #ifdef GMX_NBNXN_SIMD_2XNN
 339                 case nbnxnk4xN_SIMD_2xNN:
 340                     unrollj = GMX_SIMD_REAL_WIDTH/2;
 341                     nbnxm_kernel_energrp_simd_2xmm[coulkt][vdwkt](nbl[nb], nbat,
 342                                                                   ic,
 343                                                                   shiftVectors,
 344                                                                   out->f.data(),
 345                                                                   fshift_p,
 346                                                                   out->VSvdw.data(),
 347                                                                   out->VSc.data());
 348                     break;
 349 #endif
 350 #ifdef GMX_NBNXN_SIMD_4XN
 351                 case nbnxnk4xN_SIMD_4xN:
 352                     unrollj = GMX_SIMD_REAL_WIDTH;
 353                     nbnxm_kernel_energrp_simd_4xm[coulkt][vdwkt](nbl[nb], nbat,
 354                                                                  ic,
 355                                                                  shiftVectors,
 356                                                                  out->f.data(),
 357                                                                  fshift_p,
 358                                                                  out->VSvdw.data(),
 359                                                                  out->VSc.data());
 360                     break;
 361 #endif
 362                 default:
 363                     GMX_RELEASE_ASSERT(false, "Unsupported kernel architecture");
 364             }
 365
 366             if (nbvg->kernel_type != nbnxnk4x4_PlainC)
 367             {
 368                 switch (unrollj)
 369                 {
 370                     case 2:
 371                         reduceGroupEnergySimdBuffers<2>(nbatParams.nenergrp,
 372                                                         nbatParams.neg_2log,
 373                                                         out);
 374                         break;
 375                     case 4:
 376                         reduceGroupEnergySimdBuffers<4>(nbatParams.nenergrp,
 377                                                         nbatParams.neg_2log,
 378                                                         out);
 379                         break;
 380                     case 8:
 381                         reduceGroupEnergySimdBuffers<8>(nbatParams.nenergrp,
 382                                                         nbatParams.neg_2log,
 383                                                         out);
 384                         break;
 385                     default:
 386                         GMX_RELEASE_ASSERT(false, "Unsupported j-unroll size");
 387                 }
 388             }
 389         }
 390     }
 391
 392     if (forceFlags & GMX_FORCE_ENERGY)
 393     {
 394         reduce_energies_over_lists(nbat, nnbl, vVdw, vCoulomb);
 395     }
 396 }