src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
   5  * Copyright (c) 2019,2020, by the GROMACS development team, led by
   6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   7  * and including many others, as listed in the AUTHORS file in the
   8  * top-level source directory and at http://www.gromacs.org.
   9  *
  10  * GROMACS is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation; either version 2.1
  13  * of the License, or (at your option) any later version.
  14  *
  15  * GROMACS is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with GROMACS; if not, see
  22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  24  *
  25  * If you want to redistribute modifications to GROMACS, please
  26  * consider that scientific software is very special. Version
  27  * control is crucial - bugs must be traceable. We will be happy to
  28  * consider code for inclusion in the official distribution, but
  29  * derived work must not be called official GROMACS. Details are found
  30  * in the README & COPYING files - if they are missing, get the
  31  * official version at http://www.gromacs.org.
  32  *
  33  * To help us fund GROMACS development, we humbly ask that you cite
  34  * the research papers on the package. Check out http://www.gromacs.org.
  35  */
  36 /*! \internal \file
  37  *  \brief Defines functions that support JIT compilation (e.g. for OpenCL)
  38  *
  39  *  \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
  40  *  \author Mark Abraham <mark.j.abraham@gmail.com>
  41  *  \ingroup module_nbnxm
  42  */
  43 #include "gmxpre.h"
  44
  45 #include <stdlib.h>
  46
  47 #include <cassert>
  48
  49 #include <string>
  50
  51 #include "gromacs/gpu_utils/gpu_utils.h"
  52 #include "gromacs/gpu_utils/ocl_compiler.h"
  53 #include "gromacs/mdtypes/interaction_const.h"
  54 #include "gromacs/mdtypes/md_enums.h"
  55 #include "gromacs/nbnxm/gpu_jit_support.h"
  56 #include "gromacs/nbnxm/nbnxm_gpu.h"
  57 #include "gromacs/pbcutil/ishift.h"
  58 #include "gromacs/utility/cstringutil.h"
  59 #include "gromacs/utility/exceptions.h"
  60 #include "gromacs/utility/fatalerror.h"
  61 #include "gromacs/utility/stringutil.h"
  62
  63 #include "nbnxm_ocl_types.h"
  64
  65 /*! \brief Stringifies the input argument
  66  */
  67 #define STRINGIFY_PARAM(c) #c
  68
  69 /*! \brief Stringifies the result of expansion of a macro argument
  70  */
  71 #define STRINGIFY_MACRO(c) STRINGIFY_PARAM(c)
  72
  73 /*! \brief Array of the defines needed to generate a specific eel flavour
  74  *
  75  * The twin-cutoff entries are not normally used, because those setups are
  76  * not available to the user. FastGen takes care of generating both
  77  * single- and twin-cutoff versions because PME tuning might need both.
  78  */
  79 static const char* kernel_electrostatic_family_definitions[] = {
  80     " -DEL_CUTOFF -DEELNAME=_ElecCut",
  81     " -DEL_RF -DEELNAME=_ElecRF",
  82     " -DEL_EWALD_TAB -DEELNAME=_ElecEwQSTab",
  83     " -DEL_EWALD_TAB -DVDW_CUTOFF_CHECK -DEELNAME=_ElecEwQSTabTwinCut",
  84     " -DEL_EWALD_ANA -DEELNAME=_ElecEw",
  85     " -DEL_EWALD_ANA -DVDW_CUTOFF_CHECK -DEELNAME=_ElecEwTwinCut"
  86 };
  87
  88 /*! \brief Array of the defines needed to generate a specific vdw flavour
  89  */
  90 static const char* kernel_VdW_family_definitions[] = {
  91     " -DVDWNAME=_VdwLJ",
  92     " -DLJ_COMB_GEOM -DVDWNAME=_VdwLJCombGeom",
  93     " -DLJ_COMB_LB -DVDWNAME=_VdwLJCombLB",
  94     " -DLJ_FORCE_SWITCH -DVDWNAME=_VdwLJFsw",
  95     " -DLJ_POT_SWITCH -DVDWNAME=_VdwLJPsw",
  96     " -DLJ_EWALD_COMB_GEOM -DVDWNAME=_VdwLJEwCombGeom",
  97     " -DLJ_EWALD_COMB_LB -DVDWNAME=_VdwLJEwCombLB"
  98 };
  99
 100 /*! \brief Returns a string with the compiler defines required to avoid all flavour generation
 101  *
 102  * For example if flavour eelTypeRF with evdwTypeFSWITCH, the output will be such that the corresponding
 103  * kernel flavour is generated:
 104  * -DGMX_OCL_FASTGEN          (will replace flavour generator nbnxn_ocl_kernels.clh with nbnxn_ocl_kernels_fastgen.clh)
 105  * -DEL_RF                    (The eelTypeRF flavour)
 106  * -DEELNAME=_ElecRF          (The first part of the generated kernel name )
 107  * -DLJ_EWALD_COMB_GEOM       (The evdwTypeFSWITCH flavour)
 108  * -DVDWNAME=_VdwLJEwCombGeom (The second part of the generated kernel name )
 109  *
 110  * prune/energy are still generated as originally. It is only the flavour-level that has changed, so that
 111  * only the required flavour for the simulation is compiled.
 112  *
 113  * If eeltype is single-range Ewald, then we need to add the
 114  * twin-cutoff flavour kernels to the JIT, because PME tuning might
 115  * need it. This path sets -DGMX_OCL_FASTGEN_ADD_TWINCUT, which
 116  * triggers the use of nbnxn_ocl_kernels_fastgen_add_twincut.clh. This
 117  * hard-codes the generation of extra kernels that have the same base
 118  * flavour, and add the required -DVDW_CUTOFF_CHECK and "TwinCut" to
 119  * the kernel name.
 120  *
 121  * If FastGen is not active, then nothing needs to be returned. The
 122  * JIT defaults to compiling all kernel flavours.
 123  *
 124  * \param[in]  bFastGen    Whether FastGen should be used
 125  * \param[in]  eeltype     Electrostatics kernel flavour for FastGen
 126  * \param[in]  vdwtype     VDW kernel flavour for FastGen
 127  * \return                 String with the defines if FastGen is active
 128  *
 129  * \throws std::bad_alloc if out of memory
 130  */
 131 static std::string makeDefinesForKernelTypes(bool bFastGen, int eeltype, int vdwtype)
 132 {
 133     std::string defines_for_kernel_types;
 134
 135     if (bFastGen)
 136     {
 137         bool bIsEwaldSingleCutoff = (eeltype == eelTypeEWALD_TAB || eeltype == eelTypeEWALD_ANA);
 138
 139         if (bIsEwaldSingleCutoff)
 140         {
 141             defines_for_kernel_types += "-DGMX_OCL_FASTGEN_ADD_TWINCUT";
 142         }
 143         else
 144         {
 145             /* This triggers the use of
 146                nbnxn_ocl_kernels_fastgen.clh. */
 147             defines_for_kernel_types += "-DGMX_OCL_FASTGEN";
 148         }
 149         defines_for_kernel_types += kernel_electrostatic_family_definitions[eeltype];
 150         defines_for_kernel_types += kernel_VdW_family_definitions[vdwtype];
 151     }
 152
 153     return defines_for_kernel_types;
 154 }
 155
 156 /*! \brief Compiles nbnxn kernels for OpenCL GPU given by \p mygpu
 157  *
 158  * With OpenCL, a call to this function must not precede nbnxn_gpu_init() (which also calls it).
 159  *
 160  * Doing bFastGen means only the requested kernels are compiled,
 161  * significantly reducing the total compilation time. If false, all
 162  * OpenCL kernels are compiled.
 163  *
 164  * A fatal error results if compilation fails.
 165  *
 166  * \param[inout] nb  Manages OpenCL non-bonded calculations; compiled kernels returned in deviceInfo members
 167  *
 168  * Does not throw
 169  */
 170 void nbnxn_gpu_compile_kernels(NbnxmGpu* nb)
 171 {
 172     gmx_bool   bFastGen = TRUE;
 173     cl_program program  = nullptr;
 174
 175     if (getenv("GMX_OCL_NOFASTGEN") != nullptr)
 176     {
 177         bFastGen = FALSE;
 178     }
 179
 180     /* Need to catch std::bad_alloc here and during compilation string
 181        handling. */
 182     try
 183     {
 184         std::string extraDefines =
 185                 makeDefinesForKernelTypes(bFastGen, nb->nbparam->eeltype, nb->nbparam->vdwtype);
 186
 187         /* Here we pass macros and static const/constexpr int variables defined
 188          * in include files outside the opencl as macros, to avoid
 189          * including those files in the plain-C JIT compilation that happens
 190          * at runtime. */
 191         extraDefines += gmx::formatString(
 192                 " -Dc_nbnxnGpuClusterSize=%d"
 193                 " -Dc_nbnxnMinDistanceSquared=%g"
 194                 " -Dc_nbnxnGpuNumClusterPerSupercluster=%d"
 195                 " -Dc_nbnxnGpuJgroupSize=%d"
 196                 "%s",
 197                 c_nbnxnGpuClusterSize, c_nbnxnMinDistanceSquared, c_nbnxnGpuNumClusterPerSupercluster,
 198                 c_nbnxnGpuJgroupSize, (nb->bPrefetchLjParam) ? " -DIATYPE_SHMEM" : "");
 199         try
 200         {
 201             /* TODO when we have a proper MPI-aware logging module,
 202                the log output here should be written there */
 203             program = gmx::ocl::compileProgram(
 204                     stderr, "gromacs/nbnxm/opencl", "nbnxm_ocl_kernels.cl", extraDefines,
 205                     nb->deviceContext_->context(), nb->deviceContext_->deviceInfo().oclDeviceId,
 206                     nb->deviceContext_->deviceInfo().deviceVendor);
 207         }
 208         catch (gmx::GromacsException& e)
 209         {
 210             e.prependContext(gmx::formatString("Failed to compile NBNXN kernels for GPU #%s\n",
 211                                                nb->deviceContext_->deviceInfo().device_name));
 212             throw;
 213         }
 214     }
 215     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 216
 217     nb->dev_rundata->program = program;
 218 }