pull_t* pull_work,
const real* lambda,
double t,
- gmx_wallcycle_t wcycle)
+ gmx_wallcycle* wcycle)
{
t_pbc pbc;
real dvdl;
/* Calculate the center of mass forces, this requires communication,
* which is why pull_potential is called close to other communication.
*/
- wallcycle_start(wcycle, ewcPULLPOT);
+ wallcycle_start(wcycle, WallCycleCounter::PullPot);
set_pbc(&pbc, ir.pbcType, box);
dvdl = 0;
enerd->term[F_COM_PULL] +=
force,
&dvdl);
enerd->dvdl_lin[FreeEnergyPerturbationCouplingType::Restraint] += dvdl;
- wallcycle_stop(wcycle, ewcPULLPOT);
+ wallcycle_stop(wcycle, WallCycleCounter::PullPot);
}
static void pme_receive_force_ener(t_forcerec* fr,
gmx_enerdata_t* enerd,
bool useGpuPmePpComms,
bool receivePmeForceToGpu,
- gmx_wallcycle_t wcycle)
+ gmx_wallcycle* wcycle)
{
real e_q, e_lj, dvdl_q, dvdl_lj;
float cycles_ppdpme, cycles_seppme;
- cycles_ppdpme = wallcycle_stop(wcycle, ewcPPDURINGPME);
+ cycles_ppdpme = wallcycle_stop(wcycle, WallCycleCounter::PpDuringPme);
dd_cycles_add(cr->dd, cycles_ppdpme, ddCyclPPduringPME);
/* In case of node-splitting, the PP nodes receive the long-range
* forces, virial and energy from the PME nodes here.
*/
- wallcycle_start(wcycle, ewcPP_PMEWAITRECVF);
+ wallcycle_start(wcycle, WallCycleCounter::PpPmeWaitRecvF);
dvdl_q = 0;
dvdl_lj = 0;
gmx_pme_receive_f(fr->pmePpCommGpu.get(),
{
dd_cycles_add(cr->dd, cycles_seppme, ddCyclPME);
}
- wallcycle_stop(wcycle, ewcPP_PMEWAITRECVF);
+ wallcycle_stop(wcycle, WallCycleCounter::PpPmeWaitRecvF);
}
static void print_large_forces(FILE* fp,
//! When necessary, spreads forces on vsites and computes the virial for \p forceOutputs->forceWithShiftForces()
static void postProcessForceWithShiftForces(t_nrnb* nrnb,
- gmx_wallcycle_t wcycle,
+ gmx_wallcycle* wcycle,
const matrix box,
ArrayRef<const RVec> x,
ForceOutputs* forceOutputs,
static void postProcessForces(const t_commrec* cr,
int64_t step,
t_nrnb* nrnb,
- gmx_wallcycle_t wcycle,
+ gmx_wallcycle* wcycle,
const matrix box,
ArrayRef<const RVec> x,
ForceOutputs* forceOutputs,
const int clearF,
const int64_t step,
t_nrnb* nrnb,
- gmx_wallcycle_t wcycle)
+ gmx_wallcycle* wcycle)
{
if (!stepWork.computeNonbondedForces)
{
/* Prune the pair-list beyond fr->ic->rlistPrune using
* the current coordinates of the atoms.
*/
- wallcycle_sub_start(wcycle, ewcsNONBONDED_PRUNING);
+ wallcycle_sub_start(wcycle, WallCycleSubCounter::NonbondedPruning);
nbv->dispatchPruneKernelCpu(ilocality, fr->shift_vec);
- wallcycle_sub_stop(wcycle, ewcsNONBONDED_PRUNING);
+ wallcycle_sub_stop(wcycle, WallCycleSubCounter::NonbondedPruning);
}
}
pull_t* pull_work,
int64_t step,
double t,
- gmx_wallcycle_t wcycle,
+ gmx_wallcycle* wcycle,
gmx::ForceProviders* forceProviders,
const matrix box,
gmx::ArrayRef<const gmx::RVec> x,
/* Add the forces from enforced rotation potentials (if any) */
if (inputrec.bRot)
{
- wallcycle_start(wcycle, ewcROTadd);
+ wallcycle_start(wcycle, WallCycleCounter::RotAdd);
enerd->term[F_COM_PULL] +=
add_rot_forces(enforcedRotation, forceWithVirialMtsLevel0->force_, cr, step, t);
- wallcycle_stop(wcycle, ewcROTadd);
+ wallcycle_stop(wcycle, WallCycleCounter::RotAdd);
}
if (ed)
const StepWorkload& stepWork,
GpuEventSynchronizer* xReadyOnDevice,
const real lambdaQ,
- gmx_wallcycle_t wcycle)
+ gmx_wallcycle* wcycle)
{
pme_gpu_prepare_computation(pmedata, box, wcycle, stepWork);
pme_gpu_launch_spread(pmedata, xReadyOnDevice, wcycle, lambdaQ);
*/
static void launchPmeGpuFftAndGather(gmx_pme_t* pmedata,
const real lambdaQ,
- gmx_wallcycle_t wcycle,
+ gmx_wallcycle* wcycle,
const gmx::StepWorkload& stepWork)
{
pme_gpu_launch_complex_transforms(pmedata, wcycle, stepWork);
gmx_enerdata_t* enerd,
const real lambdaQ,
const StepWorkload& stepWork,
- gmx_wallcycle_t wcycle)
+ gmx_wallcycle* wcycle)
{
bool isPmeGpuDone = false;
bool isNbGpuDone = false;
const DomainLifetimeWorkload& domainWork,
const StepWorkload& stepWork,
const bool havePpDomainDecomposition,
- gmx_wallcycle_t wcycle)
+ gmx_wallcycle* wcycle)
{
- wallcycle_sub_start(wcycle, ewcsCLEAR_FORCE_BUFFER);
+ wallcycle_sub_start(wcycle, WallCycleSubCounter::ClearForceBuffer);
/* NOTE: We assume fr->shiftForces is all zeros here */
gmx::ForceWithShiftForces forceWithShiftForces(
clearRVecs(forceWithVirial.force_, true);
}
- wallcycle_sub_stop(wcycle, ewcsCLEAR_FORCE_BUFFER);
+ wallcycle_sub_stop(wcycle, WallCycleSubCounter::ClearForceBuffer);
return ForceOutputs(
forceWithShiftForces, forceHelperBuffers->haveDirectVirialContributions(), forceWithVirial);
const gmx::MdrunScheduleWorkload& runScheduleWork,
bool useGpuPmeOnThisRank,
int64_t step,
- gmx_wallcycle_t wcycle)
+ gmx_wallcycle* wcycle)
{
if (runScheduleWork.simulationWork.useGpuNonbonded && runScheduleWork.stepWork.computeNonbondedForces)
{
}
/* now clear the GPU outputs while we finish the step on the CPU */
- wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
- wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED);
+ wallcycle_start_nocount(wcycle, WallCycleCounter::LaunchGpu);
+ wallcycle_sub_start_nocount(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
Nbnxm::gpu_clear_outputs(nbv->gpu_nbv, runScheduleWork.stepWork.computeVirial);
- wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
- wallcycle_stop(wcycle, ewcLAUNCH_GPU);
+ wallcycle_sub_stop(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
+ wallcycle_stop(wcycle, WallCycleCounter::LaunchGpu);
}
if (useGpuPmeOnThisRank)
pull_t* pull_work,
int64_t step,
t_nrnb* nrnb,
- gmx_wallcycle_t wcycle,
+ gmx_wallcycle* wcycle,
const gmx_localtop_t* top,
const matrix box,
gmx::ArrayRefWithPadding<gmx::RVec> x,
fr->wholeMoleculeTransform->updateForAtomPbcJumps(x.unpaddedArrayRef(), box);
}
- wallcycle_start(wcycle, ewcNS);
+ wallcycle_start(wcycle, WallCycleCounter::NS);
if (!DOMAINDECOMP(cr))
{
const rvec vzero = { 0.0_real, 0.0_real, 0.0_real };
const rvec boxDiagonal = { box[XX][XX], box[YY][YY], box[ZZ][ZZ] };
- wallcycle_sub_start(wcycle, ewcsNBS_GRID_LOCAL);
+ wallcycle_sub_start(wcycle, WallCycleSubCounter::NBSGridLocal);
nbnxn_put_on_grid(nbv,
box,
0,
x.unpaddedArrayRef(),
0,
nullptr);
- wallcycle_sub_stop(wcycle, ewcsNBS_GRID_LOCAL);
+ wallcycle_sub_stop(wcycle, WallCycleSubCounter::NBSGridLocal);
}
else
{
- wallcycle_sub_start(wcycle, ewcsNBS_GRID_NONLOCAL);
+ wallcycle_sub_start(wcycle, WallCycleSubCounter::NBSGridNonLocal);
nbnxn_put_on_grid_nonlocal(nbv, domdec_zones(cr->dd), fr->cginfo, x.unpaddedArrayRef());
- wallcycle_sub_stop(wcycle, ewcsNBS_GRID_NONLOCAL);
+ wallcycle_sub_stop(wcycle, WallCycleSubCounter::NBSGridNonLocal);
}
nbv->setAtomProperties(gmx::constArrayRefFromArray(mdatoms->typeA, mdatoms->nr),
gmx::constArrayRefFromArray(mdatoms->chargeA, mdatoms->nr),
fr->cginfo);
- wallcycle_stop(wcycle, ewcNS);
+ wallcycle_stop(wcycle, WallCycleCounter::NS);
/* initialize the GPU nbnxm atom data and bonded data structures */
if (simulationWork.useGpuNonbonded)
{
// Note: cycle counting only nononbondeds, gpuBonded counts internally
- wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
- wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED);
+ wallcycle_start_nocount(wcycle, WallCycleCounter::LaunchGpu);
+ wallcycle_sub_start_nocount(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
Nbnxm::gpu_init_atomdata(nbv->gpu_nbv, nbv->nbat.get());
- wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
- wallcycle_stop(wcycle, ewcLAUNCH_GPU);
+ wallcycle_sub_stop(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
+ wallcycle_stop(wcycle, WallCycleCounter::LaunchGpu);
if (fr->gpuBonded)
{
runScheduleWork->domainWork = setupDomainLifetimeWorkload(
inputrec, *fr, pull_work, ed, *mdatoms, simulationWork, stepWork);
- wallcycle_start_nocount(wcycle, ewcNS);
- wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_LOCAL);
+ wallcycle_start_nocount(wcycle, WallCycleCounter::NS);
+ wallcycle_sub_start(wcycle, WallCycleSubCounter::NBSSearchLocal);
/* Note that with a GPU the launch overhead of the list transfer is not timed separately */
nbv->constructPairlist(InteractionLocality::Local, top->excls, step, nrnb);
nbv->setupGpuShortRangeWork(fr->gpuBonded, InteractionLocality::Local);
- wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_LOCAL);
- wallcycle_stop(wcycle, ewcNS);
+ wallcycle_sub_stop(wcycle, WallCycleSubCounter::NBSSearchLocal);
+ wallcycle_stop(wcycle, WallCycleCounter::NS);
if (stepWork.useGpuXBufferOps)
{
{
ddBalanceRegionHandler.openBeforeForceComputationGpu();
- wallcycle_start(wcycle, ewcLAUNCH_GPU);
- wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED);
+ wallcycle_start(wcycle, WallCycleCounter::LaunchGpu);
+ wallcycle_sub_start(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
Nbnxm::gpu_upload_shiftvec(nbv->gpu_nbv, nbv->nbat.get());
if (stepWork.doNeighborSearch || !stepWork.useGpuXBufferOps)
{
Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat.get(), AtomLocality::Local);
}
- wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
- wallcycle_stop(wcycle, ewcLAUNCH_GPU);
+ wallcycle_sub_stop(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
+ wallcycle_stop(wcycle, WallCycleCounter::LaunchGpu);
// with X buffer ops offloaded to the GPU on all but the search steps
// bonded work not split into separate local and non-local, so with DD
}
/* launch local nonbonded work on GPU */
- wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
- wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED);
+ wallcycle_start_nocount(wcycle, WallCycleCounter::LaunchGpu);
+ wallcycle_sub_start_nocount(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::Local, enbvClearFNo, step, nrnb, wcycle);
- wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
- wallcycle_stop(wcycle, ewcLAUNCH_GPU);
+ wallcycle_sub_stop(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
+ wallcycle_stop(wcycle, WallCycleCounter::LaunchGpu);
}
if (useGpuPmeOnThisRank)
if (stepWork.doNeighborSearch)
{
// TODO: fuse this branch with the above large stepWork.doNeighborSearch block
- wallcycle_start_nocount(wcycle, ewcNS);
- wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_NONLOCAL);
+ wallcycle_start_nocount(wcycle, WallCycleCounter::NS);
+ wallcycle_sub_start(wcycle, WallCycleSubCounter::NBSSearchNonLocal);
/* Note that with a GPU the launch overhead of the list transfer is not timed separately */
nbv->constructPairlist(InteractionLocality::NonLocal, top->excls, step, nrnb);
nbv->setupGpuShortRangeWork(fr->gpuBonded, InteractionLocality::NonLocal);
- wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_NONLOCAL);
- wallcycle_stop(wcycle, ewcNS);
+ wallcycle_sub_stop(wcycle, WallCycleSubCounter::NBSSearchNonLocal);
+ wallcycle_stop(wcycle, WallCycleCounter::NS);
// TODO refactor this GPU halo exchange re-initialisation
// to location in do_md where GPU halo exchange is
// constructed at partitioning, after above stateGpu
if (stepWork.doNeighborSearch || !stepWork.useGpuXBufferOps)
{
- wallcycle_start(wcycle, ewcLAUNCH_GPU);
- wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED);
+ wallcycle_start(wcycle, WallCycleCounter::LaunchGpu);
+ wallcycle_sub_start(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat.get(), AtomLocality::NonLocal);
- wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
- wallcycle_stop(wcycle, ewcLAUNCH_GPU);
+ wallcycle_sub_stop(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
+ wallcycle_stop(wcycle, WallCycleCounter::LaunchGpu);
}
if (domainWork.haveGpuBondedWork)
}
/* launch non-local nonbonded tasks on GPU */
- wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
- wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED);
+ wallcycle_start_nocount(wcycle, WallCycleCounter::LaunchGpu);
+ wallcycle_sub_start(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::NonLocal, enbvClearFNo, step, nrnb, wcycle);
- wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
- wallcycle_stop(wcycle, ewcLAUNCH_GPU);
+ wallcycle_sub_stop(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
+ wallcycle_stop(wcycle, WallCycleCounter::LaunchGpu);
}
}
if (simulationWork.useGpuNonbonded && stepWork.computeNonbondedForces)
{
/* launch D2H copy-back F */
- wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
- wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED);
+ wallcycle_start_nocount(wcycle, WallCycleCounter::LaunchGpu);
+ wallcycle_sub_start_nocount(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
if (havePPDomainDecomposition(cr))
{
Nbnxm::gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat.get(), stepWork, AtomLocality::NonLocal);
}
Nbnxm::gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat.get(), stepWork, AtomLocality::Local);
- wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
+ wallcycle_sub_stop(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
if (domainWork.haveGpuBondedWork && stepWork.computeEnergy)
{
fr->gpuBonded->launchEnergyTransfer();
}
- wallcycle_stop(wcycle, ewcLAUNCH_GPU);
+ wallcycle_stop(wcycle, WallCycleCounter::LaunchGpu);
}
gmx::ArrayRef<const gmx::RVec> xWholeMolecules;
if (DOMAINDECOMP(cr) && !thisRankHasDuty(cr, DUTY_PME))
{
- wallcycle_start(wcycle, ewcPPDURINGPME);
+ wallcycle_start(wcycle, WallCycleCounter::PpDuringPme);
dd_force_flop_start(cr->dd, nrnb);
}
if (inputrec.bRot)
{
- wallcycle_start(wcycle, ewcROT);
+ wallcycle_start(wcycle, WallCycleCounter::Rot);
do_rotation(cr, enforcedRotation, box, x.unpaddedConstArrayRef(), t, step, stepWork.doNeighborSearch);
- wallcycle_stop(wcycle, ewcROT);
+ wallcycle_stop(wcycle, WallCycleCounter::Rot);
}
/* Start the force cycle counter.
* Note that a different counter is used for dynamic load balancing.
*/
- wallcycle_start(wcycle, ewcFORCE);
+ wallcycle_start(wcycle, WallCycleCounter::Force);
/* Set up and clear force outputs:
* forceOutMtsLevel0: everything except what is in the other two outputs
* This can be split into a local and a non-local part when overlapping
* communication with calculation with domain decomposition.
*/
- wallcycle_stop(wcycle, ewcFORCE);
+ wallcycle_stop(wcycle, WallCycleCounter::Force);
nbv->atomdata_add_nbat_f_to_f(AtomLocality::All,
forceOutNonbonded->forceWithShiftForces().force());
- wallcycle_start_nocount(wcycle, ewcFORCE);
+ wallcycle_start_nocount(wcycle, WallCycleCounter::Force);
}
/* If there are multiple fshift output buffers we need to reduce them */
// TODO Force flags should include haveFreeEnergyWork for this domain
if (stepWork.useGpuXHalo && (domainWork.haveCpuBondedWork || domainWork.haveFreeEnergyWork))
{
- wallcycle_stop(wcycle, ewcFORCE);
+ wallcycle_stop(wcycle, WallCycleCounter::Force);
/* Wait for non-local coordinate data to be copied from device */
stateGpu->waitCoordinatesReadyOnHost(AtomLocality::NonLocal);
- wallcycle_start_nocount(wcycle, ewcFORCE);
+ wallcycle_start_nocount(wcycle, WallCycleCounter::Force);
}
// Compute wall interactions, when present.
ddBalanceRegionHandler);
}
- wallcycle_stop(wcycle, ewcFORCE);
+ wallcycle_stop(wcycle, WallCycleCounter::Force);
// VdW dispersion correction, only computed on master rank to avoid double counting
if ((stepWork.computeEnergy || stepWork.computeVirial) && fr->dispersionCorrection && MASTER(cr))
}
else
{
- wallcycle_start_nocount(wcycle, ewcFORCE);
+ wallcycle_start_nocount(wcycle, WallCycleCounter::Force);
do_nb_verlet(
fr, ic, enerd, stepWork, InteractionLocality::NonLocal, enbvClearFYes, step, nrnb, wcycle);
- wallcycle_stop(wcycle, ewcFORCE);
+ wallcycle_stop(wcycle, WallCycleCounter::Force);
}
if (stepWork.useGpuFBufferOps)
{
// NOTE: emulation kernel is not included in the balancing region,
// but emulation mode does not target performance anyway
- wallcycle_start_nocount(wcycle, ewcFORCE);
+ wallcycle_start_nocount(wcycle, WallCycleCounter::Force);
do_nb_verlet(fr,
ic,
enerd,
step,
nrnb,
wcycle);
- wallcycle_stop(wcycle, ewcFORCE);
+ wallcycle_stop(wcycle, WallCycleCounter::Force);
}
// If on GPU PME-PP comms path, receive forces from PME before GPU buffer ops