// we can only launch the kernel after non-local coordinates have been received.
if (ppForceWorkload->haveGpuBondedWork && !DOMAINDECOMP(cr))
{
+ wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_BONDED);
fr->gpuBonded->launchKernels(fr, flags, box);
+ wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_BONDED);
}
/* launch local nonbonded work on GPU */
if (ppForceWorkload->haveGpuBondedWork)
{
+ wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_BONDED);
fr->gpuBonded->launchKernels(fr, flags, box);
+ wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_BONDED);
}
wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED);
if (ppForceWorkload->haveGpuBondedWork && (flags & GMX_FORCE_ENERGY))
{
- // TODO The launch call could come earlier in the
- // force-calculation sequence.
+ wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
+ wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_BONDED);
fr->gpuBonded->launchEnergyTransfer();
fr->gpuBonded->accumulateEnergyTerms(enerd);
// TODO The clearing call could come later in the
// force-calculation sequence.
fr->gpuBonded->clearEnergies();
+ wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_BONDED);
+ wallcycle_stop(wcycle, ewcLAUNCH_GPU);
}
if (DOMAINDECOMP(cr))