nbv->dispatchPruneKernelCpu(ilocality, fr->shift_vec);
wallcycle_sub_stop(wcycle, ewcsNONBONDED_PRUNING);
}
-
- wallcycle_sub_start(wcycle, ewcsNONBONDED);
}
nbv->dispatchNonbondedKernel(ilocality, *ic, flags, clearF, *fr, enerd, nrnb, wcycle);
-
- if (!nbv->useGpu())
- {
- wallcycle_sub_stop(wcycle, ewcsNONBONDED);
- }
}
static inline void clear_rvecs_omp(int n, rvec v[])
/* Calculate the local and non-local free energy interactions here.
* Happens here on the CPU both with and without GPU.
*/
- wallcycle_sub_start(wcycle, ewcsNONBONDED);
nbv->dispatchFreeEnergyKernel(Nbnxm::InteractionLocality::Local,
fr, as_rvec_array(x.unpaddedArrayRef().data()), forceOut.f, *mdatoms,
inputrec->fepvals, lambda.data(),
- enerd, flags, nrnb);
+ enerd, flags, nrnb, wcycle);
if (havePPDomainDecomposition(cr))
{
nbv->dispatchFreeEnergyKernel(Nbnxm::InteractionLocality::NonLocal,
fr, as_rvec_array(x.unpaddedArrayRef().data()), forceOut.f, *mdatoms,
inputrec->fepvals, lambda.data(),
- enerd, flags, nrnb);
+ enerd, flags, nrnb, wcycle);
}
- wallcycle_sub_stop(wcycle, ewcsNONBONDED);
}
if (!bUseOrEmulGPU)
gmx::ArrayRef<const NbnxnPairlistCpu> pairlists = pairlistSet.cpuLists();
int gmx_unused nthreads = gmx_omp_nthreads_get(emntNonbonded);
- wallcycle_sub_start(wcycle, ewcsNBFCLEARBUF);
+ wallcycle_sub_start(wcycle, ewcsNONBONDED_CLEAR);
#pragma omp parallel for schedule(static) num_threads(nthreads)
for (int nb = 0; nb < pairlists.ssize(); nb++)
{
if (nb == 0)
{
- wallcycle_sub_stop(wcycle, ewcsNBFCLEARBUF);
- wallcycle_sub_start(wcycle, ewcsNBFKERNEL);
+ wallcycle_sub_stop(wcycle, ewcsNONBONDED_CLEAR);
+ wallcycle_sub_start(wcycle, ewcsNONBONDED_KERNEL);
}
// TODO: Change to reference
}
}
}
- wallcycle_sub_stop(wcycle, ewcsNBFKERNEL);
+ wallcycle_sub_stop(wcycle, ewcsNONBONDED_KERNEL);
if (forceFlags & GMX_FORCE_ENERGY)
{
real *lambda,
gmx_enerdata_t *enerd,
const int forceFlags,
- t_nrnb *nrnb)
+ t_nrnb *nrnb,
+ gmx_wallcycle *wcycle)
{
const auto nbl_fep = pairlistSets().pairlistSet(iLocality).fepLists();
GMX_ASSERT(gmx_omp_nthreads_get(emntNonbonded) == nbl_fep.ssize(), "Number of lists should be same as number of NB threads");
+ wallcycle_sub_start(wcycle, ewcsNONBONDED_FEP);
#pragma omp parallel for schedule(static) num_threads(nbl_fep.ssize())
for (int th = 0; th < nbl_fep.ssize(); th++)
{
enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
}
}
+ wallcycle_sub_stop(wcycle, ewcsNONBONDED_FEP);
}
real *lambda,
gmx_enerdata_t *enerd,
int forceFlags,
- t_nrnb *nrnb);
+ t_nrnb *nrnb,
+ gmx_wallcycle *wcycle);
//! Add the forces stored in nbat to f, zeros the forces in nbat */
void atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality locality,
"Restraints F",
"Listed buffer ops.",
"Nonbonded pruning",
- "Nonbonded F",
- "NB F kernel", "NB F clear buf",
+ "Nonbonded F kernel",
+ "Nonbonded F clear",
+ "Nonbonded FEP",
"Launch NB GPU tasks",
"Launch Bonded GPU tasks",
"Launch PME GPU tasks",
ewcsRESTRAINTS,
ewcsLISTED_BUF_OPS,
ewcsNONBONDED_PRUNING,
- ewcsNONBONDED,
- ewcsNBFKERNEL, ewcsNBFCLEARBUF,
+ ewcsNONBONDED_KERNEL,
+ ewcsNONBONDED_CLEAR,
+ ewcsNONBONDED_FEP,
ewcsLAUNCH_GPU_NONBONDED,
ewcsLAUNCH_GPU_BONDED,
ewcsLAUNCH_GPU_PME,