2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * \brief Defines functions that support JIT compilation (e.g. for OpenCL)
38 * \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
39 * \author Mark Abraham <mark.j.abraham@gmail.com>
40 * \ingroup module_mdlib
50 #include "gromacs/gmxlib/gpu_utils/gpu_utils.h"
51 #include "gromacs/gmxlib/gpu_utils/ocl_compiler.h"
52 #include "gromacs/legacyheaders/types/enums.h"
53 #include "gromacs/legacyheaders/types/interaction_const.h"
54 #include "gromacs/mdlib/nbnxn_consts.h"
55 #include "gromacs/mdlib/nbnxn_gpu.h"
56 #include "gromacs/mdlib/nbnxn_gpu_jit_support.h"
57 #include "gromacs/pbcutil/ishift.h"
58 #include "gromacs/utility/cstringutil.h"
59 #include "gromacs/utility/exceptions.h"
60 #include "gromacs/utility/fatalerror.h"
62 #include "nbnxn_ocl_types.h"
64 /*! \brief Stringifies the input argument
66 #define STRINGIFY_PARAM(c) #c
68 /*! \brief Stringifies the result of expansion of a macro argument
70 #define STRINGIFY_MACRO(c) STRINGIFY_PARAM(c)
72 /*! \brief Array of the defines needed to generate a specific eel flavour
74 * The twin-cutoff entries are not normally used, because those setups are
75 * not available to the user. FastGen takes care of generating both
76 * single- and twin-cutoff versions because PME tuning might need both.
78 static const char * kernel_electrostatic_family_definitions[] =
80 " -DEL_CUTOFF -DEELNAME=_ElecCut",
81 " -DEL_RF -DEELNAME=_ElecRF",
82 " -DEL_EWALD_TAB -DEELNAME=_ElecEwQSTab",
83 " -DEL_EWALD_TAB -DVDW_CUTOFF_CHECK -DEELNAME=_ElecEwQSTabTwinCut",
84 " -DEL_EWALD_ANA -DEELNAME=_ElecEw",
85 " -DEL_EWALD_ANA -DVDW_CUTOFF_CHECK -DEELNAME=_ElecEwTwinCut"
88 /*! \brief Array of the defines needed to generate a specific vdw flavour
90 static const char * kernel_VdW_family_definitions[] =
93 " -DLJ_FORCE_SWITCH -DVDWNAME=_VdwLJFsw",
94 " -DLJ_POT_SWITCH -DVDWNAME=_VdwLJPsw",
95 " -DLJ_EWALD_COMB_GEOM -DVDWNAME=_VdwLJEwCombGeom",
96 " -DLJ_EWALD_COMB_LB -DVDWNAME=_VdwLJEwCombLB"
99 /*! \brief Returns a string with the compiler defines required to avoid all flavour generation
101 * For example if flavour eelOclRF with evdwOclFSWITCH, the output will be such that the corresponding
102 * kernel flavour is generated:
103 * -DGMX_OCL_FASTGEN (will replace flavour generator nbnxn_ocl_kernels.clh with nbnxn_ocl_kernels_fastgen.clh)
104 * -DEL_RF (The eelOclRF flavour)
105 * -DEELNAME=_ElecRF (The first part of the generated kernel name )
106 * -DLJ_EWALD_COMB_GEOM (The evdwOclFSWITCH flavour)
107 * -DVDWNAME=_VdwLJEwCombGeom (The second part of the generated kernel name )
109 * prune/energy are still generated as originally. It is only the the flavour-level that has changed, so that
110 * only the required flavour for the simulation is compiled.
112 * If eeltype is single-range Ewald, then we need to add the
113 * twin-cutoff flavour kernels to the JIT, because PME tuning might
114 * need it. This path sets -DGMX_OCL_FASTGEN_ADD_TWINCUT, which
115 * triggers the use of nbnxn_ocl_kernels_fastgen_add_twincut.clh. This
116 * hard-codes the generation of extra kernels that have the same base
117 * flavour, and add the required -DVDW_CUTOFF_CHECK and "TwinCut" to
120 * If FastGen is not active, then nothing needs to be returned. The
121 * JIT defaults to compiling all kernel flavours.
123 * \param[in] bFastGen Whether FastGen should be used
124 * \param[in] eeltype Electrostatics kernel flavour for FastGen
125 * \param[in] vdwtype VDW kernel flavour for FastGen
126 * \return String with the defines if FastGen is active
128 * \throws std::bad_alloc if out of memory
131 make_defines_for_kernel_types(bool bFastGen,
135 std::string defines_for_kernel_types;
139 bool bIsEwaldSingleCutoff = (eeltype == eelOclEWALD_TAB ||
140 eeltype == eelOclEWALD_ANA);
142 if (bIsEwaldSingleCutoff)
144 defines_for_kernel_types += "-DGMX_OCL_FASTGEN_ADD_TWINCUT";
148 /* This triggers the use of
149 nbnxn_ocl_kernels_fastgen.clh. */
150 defines_for_kernel_types += "-DGMX_OCL_FASTGEN";
152 defines_for_kernel_types += kernel_electrostatic_family_definitions[eeltype];
153 defines_for_kernel_types += kernel_VdW_family_definitions[vdwtype];
156 printf("Setting up defines for kernel types for FastGen %s \n", defines_for_kernel_types.c_str());
160 return defines_for_kernel_types;
163 /*! \brief Compiles nbnxn kernels for OpenCL GPU given by \p mygpu
165 * With OpenCL, a call to this function must precede nbnxn_gpu_init().
167 * Doing bFastGen means only the requested kernels are compiled,
168 * significantly reducing the total compilation time. If false, all
169 * OpenCL kernels are compiled.
171 * A fatal error results if compilation fails.
173 * \param[inout] nb Manages OpenCL non-bonded calculations; compiled kernels returned in dev_info members
178 nbnxn_gpu_compile_kernels(gmx_nbnxn_ocl_t *nb)
180 char gpu_err_str[STRLEN];
181 gmx_bool bFastGen = TRUE;
182 cl_device_id device_id;
185 char runtime_consts[256];
187 if (getenv("GMX_OCL_NOFASTGEN") != NULL)
192 device_id = nb->dev_info->ocl_gpu_id.ocl_device_id;
193 context = nb->dev_info->context;
195 sprintf(runtime_consts,
196 "-DCENTRAL=%d -DNBNXN_GPU_NCLUSTER_PER_SUPERCLUSTER=%d -DNBNXN_GPU_CLUSTER_SIZE=%d -DNBNXN_GPU_JGROUP_SIZE=%d -DNBNXN_AVOID_SING_R2_INC=%s",
197 CENTRAL, /* Defined in ishift.h */
198 NBNXN_GPU_NCLUSTER_PER_SUPERCLUSTER, /* Defined in nbnxn_consts.h */
199 NBNXN_GPU_CLUSTER_SIZE, /* Defined in nbnxn_consts.h */
200 NBNXN_GPU_JGROUP_SIZE, /* Defined in nbnxn_consts.h */
201 STRINGIFY_MACRO(NBNXN_AVOID_SING_R2_INC) /* Defined in nbnxn_consts.h */
202 /* NBNXN_AVOID_SING_R2_INC passed as string to avoid
203 floating point representation problems with sprintf */
206 /* Need to catch std::bad_alloc here and during compilation string
210 std::string defines_for_kernel_types =
211 make_defines_for_kernel_types(bFastGen,
212 nb->nbparam->eeltype,
213 nb->nbparam->vdwtype);
215 cl_int cl_error = ocl_compile_program(default_source,
217 defines_for_kernel_types.c_str(),
221 nb->dev_info->vendor_e,
224 if (cl_error != CL_SUCCESS)
226 gmx_fatal(FARGS, "Failed to compile NBNXN kernels for GPU #%s: %s",
227 nb->dev_info->device_name,
231 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
233 nb->dev_info->program = program;