* \param[in,out] enerd Energy data structure results are reduced into
* \param[in] flags Force flags
* \param[in] pmeFlags PME flags
- * \param[in] haveOtherWork Tells whether there is other work than non-bonded in the stream(s)
* \param[in] wcycle The wallcycle structure
*/
static void alternatePmeNbGpuWaitReduce(nonbonded_verlet_t *nbv,
gmx_enerdata_t *enerd,
int flags,
int pmeFlags,
- bool haveOtherWork,
gmx_wallcycle_t wcycle)
{
bool isPmeGpuDone = false;
isNbGpuDone = Nbnxm::gpu_try_finish_task(nbv->gpu_nbv,
flags,
Nbnxm::AtomLocality::Local,
- haveOtherWork,
enerd->grpp.ener[egLJSR].data(),
enerd->grpp.ener[egCOULSR].data(),
fshift, completionType);
/* Note that with a GPU the launch overhead of the list transfer is not timed separately */
nbv->constructPairlist(Nbnxm::InteractionLocality::Local,
&top->excls, step, nrnb);
+
+ nbv->setupGpuShortRangeWork(fr->gpuBonded, Nbnxm::InteractionLocality::Local);
+
wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_LOCAL);
wallcycle_stop(wcycle, ewcNS);
if (bNS || !useGpuXBufOps)
{
Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat.get(),
- Nbnxm::AtomLocality::Local,
- ppForceWorkload->haveGpuBondedWork);
+ Nbnxm::AtomLocality::Local);
}
wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
// with X buffer ops offloaded to the GPU on all but the search steps
/* Note that with a GPU the launch overhead of the list transfer is not timed separately */
nbv->constructPairlist(Nbnxm::InteractionLocality::NonLocal,
&top->excls, step, nrnb);
+
+ nbv->setupGpuShortRangeWork(fr->gpuBonded, Nbnxm::InteractionLocality::NonLocal);
wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_NONLOCAL);
wallcycle_stop(wcycle, ewcNS);
}
{
wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED);
Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat.get(),
- Nbnxm::AtomLocality::NonLocal,
- ppForceWorkload->haveGpuBondedWork);
+ Nbnxm::AtomLocality::NonLocal);
wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
}
if (havePPDomainDecomposition(cr))
{
Nbnxm::gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat.get(),
- flags, Nbnxm::AtomLocality::NonLocal, ppForceWorkload->haveGpuBondedWork);
+ flags, Nbnxm::AtomLocality::NonLocal);
}
Nbnxm::gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat.get(),
- flags, Nbnxm::AtomLocality::Local, ppForceWorkload->haveGpuBondedWork);
+ flags, Nbnxm::AtomLocality::Local);
wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
if (ppForceWorkload->haveGpuBondedWork && (flags & GMX_FORCE_ENERGY))
wallcycle_start(wcycle, ewcWAIT_GPU_NB_NL);
Nbnxm::gpu_wait_finish_task(nbv->gpu_nbv,
flags, Nbnxm::AtomLocality::NonLocal,
- ppForceWorkload->haveGpuBondedWork,
enerd->grpp.ener[egLJSR].data(),
enerd->grpp.ener[egCOULSR].data(),
fr->fshift);
if (alternateGpuWait)
{
alternatePmeNbGpuWaitReduce(fr->nbv.get(), fr->pmedata, &force, &forceOut.forceWithVirial, fr->fshift, enerd,
- flags, pmeFlags, ppForceWorkload->haveGpuBondedWork, wcycle);
+ flags, pmeFlags, wcycle);
}
if (!alternateGpuWait && useGpuPme)
wallcycle_start(wcycle, ewcWAIT_GPU_NB_L);
Nbnxm::gpu_wait_finish_task(nbv->gpu_nbv,
- flags, Nbnxm::AtomLocality::Local, ppForceWorkload->haveGpuBondedWork,
+ flags, Nbnxm::AtomLocality::Local,
enerd->grpp.ener[egLJSR].data(),
enerd->grpp.ener[egCOULSR].data(),
fr->fshift);