2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 * \brief Implements CUDA bonded functionality
39 * \author Jon Vincent <jvincent@nvidia.com>
40 * \author Magnus Lundborg <lundborg.magnus@gmail.com>
41 * \author Berk Hess <hess@kth.se>
42 * \author Szilárd Páll <pall.szilard@gmail.com>
43 * \author Alan Gray <alang@nvidia.com>
44 * \author Mark Abraham <mark.j.abraham@gmail.com>
46 * \ingroup module_listed_forces
53 #include <math_constants.h>
55 #include "gromacs/gpu_utils/cudautils.cuh"
56 #include "gromacs/gpu_utils/vectype_ops.cuh"
57 #include "gromacs/listed_forces/gpubonded.h"
58 #include "gromacs/math/units.h"
59 #include "gromacs/mdlib/force_flags.h"
60 #include "gromacs/mdtypes/forcerec.h"
61 #include "gromacs/mdtypes/simulation_workload.h"
62 #include "gromacs/pbcutil/pbc.h"
63 #include "gromacs/pbcutil/pbc_aiuc_cuda.cuh"
64 #include "gromacs/utility/gmxassert.h"
66 #include "gpubonded_impl.h"
72 // CUDA threads per block
73 #define TPB_BONDED 256
75 /*-------------------------------- CUDA kernels-------------------------------- */
76 /*------------------------------------------------------------------------------*/
78 #define CUDA_DEG2RAD_F (CUDART_PI_F / 180.0f)
80 /*---------------- BONDED CUDA kernels--------------*/
83 __device__ __forceinline__ static void
84 harmonic_gpu(const float kA, const float xA, const float x, float* V, float* F)
86 constexpr float half = 0.5f;
96 template<bool calcVir, bool calcEner>
97 __device__ void bonds_gpu(const int i,
100 const t_iatom d_forceatoms[],
101 const t_iparams d_forceparams[],
102 const float4 gm_xq[],
104 float3 sm_fShiftLoc[],
105 const PbcAiuc pbcAiuc)
109 int3 bondData = *(int3*)(d_forceatoms + 3 * i);
110 int type = bondData.x;
114 /* dx = xi - xj, corrected for periodic boundary conditions. */
116 int ki = pbcDxAiuc<calcVir>(pbcAiuc, gm_xq[ai], gm_xq[aj], dx);
118 float dr2 = norm2(dx);
119 float dr = sqrt(dr2);
123 harmonic_gpu(d_forceparams[type].harmonic.krA, d_forceparams[type].harmonic.rA, dr, &vbond, &fbond);
132 fbond *= rsqrtf(dr2);
134 float3 fij = fbond * dx;
135 atomicAdd(&gm_f[ai], fij);
136 atomicAdd(&gm_f[aj], -fij);
137 if (calcVir && ki != CENTRAL)
139 atomicAdd(&sm_fShiftLoc[ki], fij);
140 atomicAdd(&sm_fShiftLoc[CENTRAL], -fij);
146 template<bool returnShift>
147 __device__ __forceinline__ static float bond_angle_gpu(const float4 xi,
150 const PbcAiuc& pbcAiuc,
156 /* Return value is the angle between the bonds i-j and j-k */
158 *t1 = pbcDxAiuc<returnShift>(pbcAiuc, xi, xj, *r_ij);
159 *t2 = pbcDxAiuc<returnShift>(pbcAiuc, xk, xj, *r_kj);
161 *costh = cos_angle(*r_ij, *r_kj);
162 float th = acosf(*costh);
167 template<bool calcVir, bool calcEner>
168 __device__ void angles_gpu(const int i,
171 const t_iatom d_forceatoms[],
172 const t_iparams d_forceparams[],
173 const float4 gm_xq[],
175 float3 sm_fShiftLoc[],
176 const PbcAiuc pbcAiuc)
180 int4 angleData = *(int4*)(d_forceatoms + 4 * i);
181 int type = angleData.x;
182 int ai = angleData.y;
183 int aj = angleData.z;
184 int ak = angleData.w;
191 float theta = bond_angle_gpu<calcVir>(gm_xq[ai], gm_xq[aj], gm_xq[ak], pbcAiuc, &r_ij,
192 &r_kj, &cos_theta, &t1, &t2);
196 harmonic_gpu(d_forceparams[type].harmonic.krA,
197 d_forceparams[type].harmonic.rA * CUDA_DEG2RAD_F, theta, &va, &dVdt);
204 float cos_theta2 = cos_theta * cos_theta;
205 if (cos_theta2 < 1.0f)
207 float st = dVdt * rsqrtf(1.0f - cos_theta2);
208 float sth = st * cos_theta;
209 float nrij2 = norm2(r_ij);
210 float nrkj2 = norm2(r_kj);
212 float nrij_1 = rsqrtf(nrij2);
213 float nrkj_1 = rsqrtf(nrkj2);
215 float cik = st * nrij_1 * nrkj_1;
216 float cii = sth * nrij_1 * nrij_1;
217 float ckk = sth * nrkj_1 * nrkj_1;
219 float3 f_i = cii * r_ij - cik * r_kj;
220 float3 f_k = ckk * r_kj - cik * r_ij;
221 float3 f_j = -f_i - f_k;
223 atomicAdd(&gm_f[ai], f_i);
224 atomicAdd(&gm_f[aj], f_j);
225 atomicAdd(&gm_f[ak], f_k);
229 atomicAdd(&sm_fShiftLoc[t1], f_i);
230 atomicAdd(&sm_fShiftLoc[CENTRAL], f_j);
231 atomicAdd(&sm_fShiftLoc[t2], f_k);
237 template<bool calcVir, bool calcEner>
238 __device__ void urey_bradley_gpu(const int i,
241 const t_iatom d_forceatoms[],
242 const t_iparams d_forceparams[],
243 const float4 gm_xq[],
245 float3 sm_fShiftLoc[],
246 const PbcAiuc pbcAiuc)
250 int4 ubData = *(int4*)(d_forceatoms + 4 * i);
256 float th0A = d_forceparams[type].u_b.thetaA * CUDA_DEG2RAD_F;
257 float kthA = d_forceparams[type].u_b.kthetaA;
258 float r13A = d_forceparams[type].u_b.r13A;
259 float kUBA = d_forceparams[type].u_b.kUBA;
266 float theta = bond_angle_gpu<calcVir>(gm_xq[ai], gm_xq[aj], gm_xq[ak], pbcAiuc, &r_ij,
267 &r_kj, &cos_theta, &t1, &t2);
271 harmonic_gpu(kthA, th0A, theta, &va, &dVdt);
279 int ki = pbcDxAiuc<calcVir>(pbcAiuc, gm_xq[ai], gm_xq[ak], r_ik);
281 float dr2 = norm2(r_ik);
282 float dr = dr2 * rsqrtf(dr2);
286 harmonic_gpu(kUBA, r13A, dr, &vbond, &fbond);
288 float cos_theta2 = cos_theta * cos_theta;
289 if (cos_theta2 < 1.0f)
291 float st = dVdt * rsqrtf(1.0f - cos_theta2);
292 float sth = st * cos_theta;
294 float nrkj2 = norm2(r_kj);
295 float nrij2 = norm2(r_ij);
297 float cik = st * rsqrtf(nrkj2 * nrij2);
298 float cii = sth / nrij2;
299 float ckk = sth / nrkj2;
301 float3 f_i = cii * r_ij - cik * r_kj;
302 float3 f_k = ckk * r_kj - cik * r_ij;
303 float3 f_j = -f_i - f_k;
305 atomicAdd(&gm_f[ai], f_i);
306 atomicAdd(&gm_f[aj], f_j);
307 atomicAdd(&gm_f[ak], f_k);
311 atomicAdd(&sm_fShiftLoc[t1], f_i);
312 atomicAdd(&sm_fShiftLoc[CENTRAL], f_j);
313 atomicAdd(&sm_fShiftLoc[t2], f_k);
317 /* Time for the bond calculations */
325 fbond *= rsqrtf(dr2);
327 float3 fik = fbond * r_ik;
328 atomicAdd(&gm_f[ai], fik);
329 atomicAdd(&gm_f[ak], -fik);
331 if (calcVir && ki != CENTRAL)
333 atomicAdd(&sm_fShiftLoc[ki], fik);
334 atomicAdd(&sm_fShiftLoc[CENTRAL], -fik);
340 template<bool returnShift, typename T>
341 __device__ __forceinline__ static float dih_angle_gpu(const T xi,
345 const PbcAiuc& pbcAiuc,
355 *t1 = pbcDxAiuc<returnShift>(pbcAiuc, xi, xj, *r_ij);
356 *t2 = pbcDxAiuc<returnShift>(pbcAiuc, xk, xj, *r_kj);
357 *t3 = pbcDxAiuc<returnShift>(pbcAiuc, xk, xl, *r_kl);
359 *m = cprod(*r_ij, *r_kj);
360 *n = cprod(*r_kj, *r_kl);
361 float phi = gmx_angle(*m, *n);
362 float ipr = iprod(*r_ij, *n);
363 float sign = (ipr < 0.0f) ? -1.0f : 1.0f;
370 __device__ __forceinline__ static void
371 dopdihs_gpu(const float cpA, const float phiA, const int mult, const float phi, float* v, float* f)
375 mdphi = mult * phi - phiA * CUDA_DEG2RAD_F;
377 *v = cpA * (1.0f + cosf(mdphi));
378 *f = -cpA * mult * sdphi;
381 template<bool calcVir>
382 __device__ static void do_dih_fup_gpu(const int i,
393 float3 sm_fShiftLoc[],
394 const PbcAiuc& pbcAiuc,
395 const float4 gm_xq[],
398 const int gmx_unused t3)
400 float iprm = norm2(m);
401 float iprn = norm2(n);
402 float nrkj2 = norm2(r_kj);
403 float toler = nrkj2 * GMX_REAL_EPS;
404 if ((iprm > toler) && (iprn > toler))
406 float nrkj_1 = rsqrtf(nrkj2); // replacing std::invsqrt call
407 float nrkj_2 = nrkj_1 * nrkj_1;
408 float nrkj = nrkj2 * nrkj_1;
409 float a = -ddphi * nrkj / iprm;
411 float b = ddphi * nrkj / iprn;
413 float p = iprod(r_ij, r_kj);
415 float q = iprod(r_kl, r_kj);
417 float3 uvec = p * f_i;
418 float3 vvec = q * f_l;
419 float3 svec = uvec - vvec;
420 float3 f_j = f_i - svec;
421 float3 f_k = f_l + svec;
423 atomicAdd(&gm_f[i], f_i);
424 atomicAdd(&gm_f[j], -f_j);
425 atomicAdd(&gm_f[k], -f_k);
426 atomicAdd(&gm_f[l], f_l);
431 int t3 = pbcDxAiuc<calcVir>(pbcAiuc, gm_xq[l], gm_xq[j], dx_jl);
433 atomicAdd(&sm_fShiftLoc[t1], f_i);
434 atomicAdd(&sm_fShiftLoc[CENTRAL], -f_j);
435 atomicAdd(&sm_fShiftLoc[t2], -f_k);
436 atomicAdd(&sm_fShiftLoc[t3], f_l);
441 template<bool calcVir, bool calcEner>
442 __device__ void pdihs_gpu(const int i,
445 const t_iatom d_forceatoms[],
446 const t_iparams d_forceparams[],
447 const float4 gm_xq[],
449 float3 sm_fShiftLoc[],
450 const PbcAiuc pbcAiuc)
454 int type = d_forceatoms[5 * i];
455 int ai = d_forceatoms[5 * i + 1];
456 int aj = d_forceatoms[5 * i + 2];
457 int ak = d_forceatoms[5 * i + 3];
458 int al = d_forceatoms[5 * i + 4];
468 float phi = dih_angle_gpu<calcVir>(gm_xq[ai], gm_xq[aj], gm_xq[ak], gm_xq[al], pbcAiuc,
469 &r_ij, &r_kj, &r_kl, &m, &n, &t1, &t2, &t3);
473 dopdihs_gpu(d_forceparams[type].pdihs.cpA, d_forceparams[type].pdihs.phiA,
474 d_forceparams[type].pdihs.mult, phi, &vpd, &ddphi);
481 do_dih_fup_gpu<calcVir>(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n, gm_f, sm_fShiftLoc,
482 pbcAiuc, gm_xq, t1, t2, t3);
486 template<bool calcVir, bool calcEner>
487 __device__ void rbdihs_gpu(const int i,
490 const t_iatom d_forceatoms[],
491 const t_iparams d_forceparams[],
492 const float4 gm_xq[],
494 float3 sm_fShiftLoc[],
495 const PbcAiuc pbcAiuc)
497 constexpr float c0 = 0.0f, c1 = 1.0f, c2 = 2.0f, c3 = 3.0f, c4 = 4.0f, c5 = 5.0f;
501 int type = d_forceatoms[5 * i];
502 int ai = d_forceatoms[5 * i + 1];
503 int aj = d_forceatoms[5 * i + 2];
504 int ak = d_forceatoms[5 * i + 3];
505 int al = d_forceatoms[5 * i + 4];
515 float phi = dih_angle_gpu<calcVir>(gm_xq[ai], gm_xq[aj], gm_xq[ak], gm_xq[al], pbcAiuc,
516 &r_ij, &r_kj, &r_kl, &m, &n, &t1, &t2, &t3);
518 /* Change to polymer convention */
527 float cos_phi = cosf(phi);
528 /* Beware of accuracy loss, cannot use 1-sqrt(cos^2) ! */
529 float sin_phi = sinf(phi);
531 float parm[NR_RBDIHS];
532 for (int j = 0; j < NR_RBDIHS; j++)
534 parm[j] = d_forceparams[type].rbdihs.rbcA[j];
536 /* Calculate cosine powers */
537 /* Calculate the energy */
538 /* Calculate the derivative */
544 ddphi += rbp * cosfac;
551 ddphi += c2 * rbp * cosfac;
558 ddphi += c3 * rbp * cosfac;
565 ddphi += c4 * rbp * cosfac;
572 ddphi += c5 * rbp * cosfac;
579 ddphi = -ddphi * sin_phi;
581 do_dih_fup_gpu<calcVir>(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n, gm_f, sm_fShiftLoc,
582 pbcAiuc, gm_xq, t1, t2, t3);
590 __device__ __forceinline__ static void make_dp_periodic_gpu(float* dp)
592 /* dp cannot be outside (-pi,pi) */
593 if (*dp >= CUDART_PI_F)
595 *dp -= 2.0f * CUDART_PI_F;
597 else if (*dp < -CUDART_PI_F)
599 *dp += 2.0f * CUDART_PI_F;
603 template<bool calcVir, bool calcEner>
604 __device__ void idihs_gpu(const int i,
607 const t_iatom d_forceatoms[],
608 const t_iparams d_forceparams[],
609 const float4 gm_xq[],
611 float3 sm_fShiftLoc[],
612 const PbcAiuc pbcAiuc)
616 int type = d_forceatoms[5 * i];
617 int ai = d_forceatoms[5 * i + 1];
618 int aj = d_forceatoms[5 * i + 2];
619 int ak = d_forceatoms[5 * i + 3];
620 int al = d_forceatoms[5 * i + 4];
630 float phi = dih_angle_gpu<calcVir>(gm_xq[ai], gm_xq[aj], gm_xq[ak], gm_xq[al], pbcAiuc,
631 &r_ij, &r_kj, &r_kl, &m, &n, &t1, &t2, &t3);
633 /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
634 * force changes if we just apply a normal harmonic.
635 * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
636 * This means we will never have the periodicity problem, unless
637 * the dihedral is Pi away from phiO, which is very unlikely due to
640 float kA = d_forceparams[type].harmonic.krA;
641 float pA = d_forceparams[type].harmonic.rA;
643 float phi0 = pA * CUDA_DEG2RAD_F;
645 float dp = phi - phi0;
647 make_dp_periodic_gpu(&dp);
649 float ddphi = -kA * dp;
651 do_dih_fup_gpu<calcVir>(ai, aj, ak, al, -ddphi, r_ij, r_kj, r_kl, m, n, gm_f, sm_fShiftLoc,
652 pbcAiuc, gm_xq, t1, t2, t3);
656 *vtot_loc += -0.5f * ddphi * dp;
661 template<bool calcVir, bool calcEner>
662 __device__ void pairs_gpu(const int i,
664 const t_iatom d_forceatoms[],
665 const t_iparams iparams[],
666 const float4 gm_xq[],
668 float3 sm_fShiftLoc[],
669 const PbcAiuc pbcAiuc,
670 const float scale_factor,
676 // TODO this should be made into a separate type, the GPU and CPU sizes should be compared
677 int3 pairData = *(int3*)(d_forceatoms + 3 * i);
678 int type = pairData.x;
682 float qq = gm_xq[ai].w * gm_xq[aj].w;
683 float c6 = iparams[type].lj14.c6A;
684 float c12 = iparams[type].lj14.c12A;
686 /* Do we need to apply full periodic boundary conditions? */
688 int fshift_index = pbcDxAiuc<calcVir>(pbcAiuc, gm_xq[ai], gm_xq[aj], dr);
690 float r2 = norm2(dr);
691 float rinv = rsqrtf(r2);
692 float rinv2 = rinv * rinv;
693 float rinv6 = rinv2 * rinv2 * rinv2;
695 /* Calculate the Coulomb force * r */
696 float velec = scale_factor * qq * rinv;
698 /* Calculate the LJ force * r and add it to the Coulomb part */
699 float fr = (12.0f * c12 * rinv6 - 6.0f * c6) * rinv6 + velec;
701 float finvr = fr * rinv2;
702 float3 f = finvr * dr;
705 atomicAdd(&gm_f[ai], f);
706 atomicAdd(&gm_f[aj], -f);
707 if (calcVir && fshift_index != CENTRAL)
709 atomicAdd(&sm_fShiftLoc[fshift_index], f);
710 atomicAdd(&sm_fShiftLoc[CENTRAL], -f);
715 *vtotVdw_loc += (c12 * rinv6 - c6) * rinv6;
716 *vtotElec_loc += velec;
724 template<bool calcVir, bool calcEner>
725 __global__ void exec_kernel_gpu(BondedCudaKernelParameters kernelParams)
727 assert(blockDim.y == 1 && blockDim.z == 1);
728 const int tid = blockIdx.x * blockDim.x + threadIdx.x;
730 float vtotVdw_loc = 0;
731 float vtotElec_loc = 0;
732 __shared__ float3 sm_fShiftLoc[SHIFTS];
736 if (threadIdx.x < SHIFTS)
738 sm_fShiftLoc[threadIdx.x] = make_float3(0.0f, 0.0f, 0.0f);
744 bool threadComputedPotential = false;
746 for (int j = 0; j < numFTypesOnGpu; j++)
748 if (tid >= kernelParams.fTypeRangeStart[j] && tid <= kernelParams.fTypeRangeEnd[j])
750 const int numBonds = kernelParams.numFTypeBonds[j];
751 int fTypeTid = tid - kernelParams.fTypeRangeStart[j];
752 const t_iatom* iatoms = kernelParams.d_iatoms[j];
753 fType = kernelParams.fTypesOnGpu[j];
756 threadComputedPotential = true;
762 bonds_gpu<calcVir, calcEner>(fTypeTid, &vtot_loc, numBonds, iatoms,
763 kernelParams.d_forceParams, kernelParams.d_xq,
764 kernelParams.d_f, sm_fShiftLoc, kernelParams.pbcAiuc);
767 angles_gpu<calcVir, calcEner>(
768 fTypeTid, &vtot_loc, numBonds, iatoms, kernelParams.d_forceParams,
769 kernelParams.d_xq, kernelParams.d_f, sm_fShiftLoc, kernelParams.pbcAiuc);
772 urey_bradley_gpu<calcVir, calcEner>(
773 fTypeTid, &vtot_loc, numBonds, iatoms, kernelParams.d_forceParams,
774 kernelParams.d_xq, kernelParams.d_f, sm_fShiftLoc, kernelParams.pbcAiuc);
778 pdihs_gpu<calcVir, calcEner>(fTypeTid, &vtot_loc, numBonds, iatoms,
779 kernelParams.d_forceParams, kernelParams.d_xq,
780 kernelParams.d_f, sm_fShiftLoc, kernelParams.pbcAiuc);
783 rbdihs_gpu<calcVir, calcEner>(
784 fTypeTid, &vtot_loc, numBonds, iatoms, kernelParams.d_forceParams,
785 kernelParams.d_xq, kernelParams.d_f, sm_fShiftLoc, kernelParams.pbcAiuc);
788 idihs_gpu<calcVir, calcEner>(fTypeTid, &vtot_loc, numBonds, iatoms,
789 kernelParams.d_forceParams, kernelParams.d_xq,
790 kernelParams.d_f, sm_fShiftLoc, kernelParams.pbcAiuc);
793 pairs_gpu<calcVir, calcEner>(fTypeTid, numBonds, iatoms, kernelParams.d_forceParams,
794 kernelParams.d_xq, kernelParams.d_f, sm_fShiftLoc,
795 kernelParams.pbcAiuc, kernelParams.scaleFactor,
796 &vtotVdw_loc, &vtotElec_loc);
803 if (threadComputedPotential)
805 float* vtotVdw = kernelParams.d_vTot + F_LJ14;
806 float* vtotElec = kernelParams.d_vTot + F_COUL14;
807 atomicAdd(kernelParams.d_vTot + fType, vtot_loc);
808 atomicAdd(vtotVdw, vtotVdw_loc);
809 atomicAdd(vtotElec, vtotElec_loc);
811 /* Accumulate shift vectors from shared memory to global memory on the first SHIFTS threads of the block. */
815 if (threadIdx.x < SHIFTS)
817 atomicAdd(kernelParams.d_fShift[threadIdx.x], sm_fShiftLoc[threadIdx.x]);
823 /*-------------------------------- End CUDA kernels-----------------------------*/
826 template<bool calcVir, bool calcEner>
827 void GpuBonded::Impl::launchKernel(const t_forcerec* fr, const matrix box)
829 GMX_ASSERT(haveInteractions_,
830 "Cannot launch bonded GPU kernels unless bonded GPU work was scheduled");
831 static_assert(TPB_BONDED >= SHIFTS,
832 "TPB_BONDED must be >= SHIFTS for the virial kernel (calcVir=true)");
835 setPbcAiuc(fr->bMolPBC ? numPbcDimensions(fr->pbcType) : 0, box, &pbcAiuc);
837 int fTypeRangeEnd = kernelParams_.fTypeRangeEnd[numFTypesOnGpu - 1];
839 if (fTypeRangeEnd < 0)
844 KernelLaunchConfig config;
845 config.blockSize[0] = TPB_BONDED;
846 config.blockSize[1] = 1;
847 config.blockSize[2] = 1;
848 config.gridSize[0] = (fTypeRangeEnd + TPB_BONDED) / TPB_BONDED;
849 config.gridSize[1] = 1;
850 config.gridSize[2] = 1;
851 config.stream = stream_;
853 auto kernelPtr = exec_kernel_gpu<calcVir, calcEner>;
854 kernelParams_.scaleFactor = fr->ic->epsfac * fr->fudgeQQ;
855 kernelParams_.pbcAiuc = pbcAiuc;
857 const auto kernelArgs = prepareGpuKernelArguments(kernelPtr, config, &kernelParams_);
859 launchGpuKernel(kernelPtr, config, nullptr, "exec_kernel_gpu<calcVir, calcEner>", kernelArgs);
862 void GpuBonded::launchKernel(const t_forcerec* fr, const gmx::StepWorkload& stepWork, const matrix box)
864 if (stepWork.computeEnergy)
866 // When we need the energy, we also need the virial
867 impl_->launchKernel<true, true>(fr, box);
869 else if (stepWork.computeVirial)
871 impl_->launchKernel<true, false>(fr, box);
875 impl_->launchKernel<false, false>(fr, box);