2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
5 * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
37 * \brief Defines functions that support JIT compilation (e.g. for OpenCL)
39 * \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
40 * \author Mark Abraham <mark.j.abraham@gmail.com>
41 * \ingroup module_nbnxm
51 #include "gromacs/gpu_utils/gpu_utils.h"
52 #include "gromacs/gpu_utils/ocl_compiler.h"
53 #include "gromacs/mdtypes/interaction_const.h"
54 #include "gromacs/mdtypes/md_enums.h"
55 #include "gromacs/nbnxm/gpu_jit_support.h"
56 #include "gromacs/nbnxm/nbnxm_gpu.h"
57 #include "gromacs/pbcutil/ishift.h"
58 #include "gromacs/utility/cstringutil.h"
59 #include "gromacs/utility/exceptions.h"
60 #include "gromacs/utility/fatalerror.h"
61 #include "gromacs/utility/stringutil.h"
63 #include "nbnxm_ocl_types.h"
65 /*! \brief Array of the defines needed to generate a specific eel flavour
67 * The twin-cutoff entries are not normally used, because those setups are
68 * not available to the user. FastGen takes care of generating both
69 * single- and twin-cutoff versions because PME tuning might need both.
71 static const char* kernel_electrostatic_family_definitions[] = {
72 " -DEL_CUTOFF -DEELNAME=_ElecCut",
73 " -DEL_RF -DEELNAME=_ElecRF",
74 " -DEL_EWALD_TAB -DEELNAME=_ElecEwQSTab",
75 " -DEL_EWALD_TAB -DVDW_CUTOFF_CHECK -DEELNAME=_ElecEwQSTabTwinCut",
76 " -DEL_EWALD_ANA -DEELNAME=_ElecEw",
77 " -DEL_EWALD_ANA -DVDW_CUTOFF_CHECK -DEELNAME=_ElecEwTwinCut"
80 /*! \brief Array of the defines needed to generate a specific vdw flavour
82 static const char* kernel_VdW_family_definitions[] = {
84 " -DLJ_COMB_GEOM -DVDWNAME=_VdwLJCombGeom",
85 " -DLJ_COMB_LB -DVDWNAME=_VdwLJCombLB",
86 " -DLJ_FORCE_SWITCH -DVDWNAME=_VdwLJFsw",
87 " -DLJ_POT_SWITCH -DVDWNAME=_VdwLJPsw",
88 " -DLJ_EWALD_COMB_GEOM -DVDWNAME=_VdwLJEwCombGeom",
89 " -DLJ_EWALD_COMB_LB -DVDWNAME=_VdwLJEwCombLB"
92 /*! \brief Returns a string with the compiler defines required to avoid all flavour generation
94 * For example if flavour ElecType::RF with VdwType::FSwitch, the output will be such that the corresponding
95 * kernel flavour is generated:
96 * -DGMX_OCL_FASTGEN (will replace flavour generator nbnxn_ocl_kernels.clh with nbnxn_ocl_kernels_fastgen.clh)
97 * -DEL_RF (The ElecType::RF flavour)
98 * -DEELNAME=_ElecRF (The first part of the generated kernel name )
99 * -DLJ_EWALD_COMB_GEOM (The VdwType::FSwitch flavour)
100 * -DVDWNAME=_VdwLJEwCombGeom (The second part of the generated kernel name )
102 * prune/energy are still generated as originally. It is only the flavour-level that has changed, so that
103 * only the required flavour for the simulation is compiled.
105 * If elecType is single-range Ewald, then we need to add the
106 * twin-cutoff flavour kernels to the JIT, because PME tuning might
107 * need it. This path sets -DGMX_OCL_FASTGEN_ADD_TWINCUT, which
108 * triggers the use of nbnxn_ocl_kernels_fastgen_add_twincut.clh. This
109 * hard-codes the generation of extra kernels that have the same base
110 * flavour, and add the required -DVDW_CUTOFF_CHECK and "TwinCut" to
113 * If FastGen is not active, then nothing needs to be returned. The
114 * JIT defaults to compiling all kernel flavours.
116 * \param[in] bFastGen Whether FastGen should be used
117 * \param[in] elecType Electrostatics kernel flavour for FastGen
118 * \param[in] vdwType VDW kernel flavour for FastGen
119 * \return String with the defines if FastGen is active
121 * \throws std::bad_alloc if out of memory
123 static std::string makeDefinesForKernelTypes(bool bFastGen,
124 enum Nbnxm::ElecType elecType,
125 enum Nbnxm::VdwType vdwType)
127 using Nbnxm::ElecType;
128 std::string defines_for_kernel_types;
132 bool bIsEwaldSingleCutoff = (elecType == ElecType::EwaldTab || elecType == ElecType::EwaldAna);
134 if (bIsEwaldSingleCutoff)
136 defines_for_kernel_types += "-DGMX_OCL_FASTGEN_ADD_TWINCUT";
140 /* This triggers the use of
141 nbnxn_ocl_kernels_fastgen.clh. */
142 defines_for_kernel_types += "-DGMX_OCL_FASTGEN";
144 defines_for_kernel_types += kernel_electrostatic_family_definitions[static_cast<int>(elecType)];
145 defines_for_kernel_types += kernel_VdW_family_definitions[static_cast<int>(vdwType)];
148 return defines_for_kernel_types;
151 /*! \brief Compiles nbnxn kernels for OpenCL GPU given by \p mygpu
153 * With OpenCL, a call to this function must not precede nbnxn_gpu_init() (which also calls it).
155 * Doing bFastGen means only the requested kernels are compiled,
156 * significantly reducing the total compilation time. If false, all
157 * OpenCL kernels are compiled.
159 * A fatal error results if compilation fails.
161 * \param[inout] nb Manages OpenCL non-bonded calculations; compiled kernels returned in deviceInfo members
165 void nbnxn_gpu_compile_kernels(NbnxmGpu* nb)
167 gmx_bool bFastGen = TRUE;
168 cl_program program = nullptr;
170 if (getenv("GMX_OCL_NOFASTGEN") != nullptr)
175 /* Need to catch std::bad_alloc here and during compilation string
179 std::string extraDefines =
180 makeDefinesForKernelTypes(bFastGen, nb->nbparam->elecType, nb->nbparam->vdwType);
182 /* Here we pass macros and static const/constexpr int variables defined
183 * in include files outside the opencl as macros, to avoid
184 * including those files in the plain-C JIT compilation that happens
186 * Note that we need to re-add the the suffix to the floating point literals
187 * passed the to the kernel to avoid type ambiguity.
189 extraDefines += gmx::formatString(
190 " -Dc_nbnxnGpuClusterSize=%d"
191 " -DNBNXM_MIN_DISTANCE_SQUARED_VALUE_FLOAT=%g"
192 " -Dc_nbnxnGpuNumClusterPerSupercluster=%d"
193 " -Dc_nbnxnGpuJgroupSize=%d"
194 " -Dc_centralShiftIndex=%d"
196 c_nbnxnGpuClusterSize,
197 c_nbnxnMinDistanceSquared,
198 c_nbnxnGpuNumClusterPerSupercluster,
199 c_nbnxnGpuJgroupSize,
200 gmx::c_centralShiftIndex,
201 (nb->bPrefetchLjParam) ? " -DIATYPE_SHMEM" : "");
204 /* TODO when we have a proper MPI-aware logging module,
205 the log output here should be written there */
206 program = gmx::ocl::compileProgram(stderr,
207 "gromacs/nbnxm/opencl",
208 "nbnxm_ocl_kernels.cl",
210 nb->deviceContext_->context(),
211 nb->deviceContext_->deviceInfo().oclDeviceId,
212 nb->deviceContext_->deviceInfo().deviceVendor);
214 catch (gmx::GromacsException& e)
217 gmx::formatString("Failed to compile/load nbnxm kernels for GPU #%d %s\n",
218 nb->deviceContext_->deviceInfo().id,
219 nb->deviceContext_->deviceInfo().device_name));
223 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
225 nb->dev_rundata->program = program;