From d94ba4ab0e0ee6bda56f91542d078b70231a44ca Mon Sep 17 00:00:00 2001 From: Berk Hess Date: Wed, 9 Oct 2019 13:57:07 +0200 Subject: [PATCH] Move locality.h from nbnxm to mdtypes Removed duplicate definition of AtomLocality from StatePropagatorDataGpu. Change-Id: I79aa415dd6fc91791d0cc54dc07d7c56e9b7c874 --- src/gromacs/ewald/pme_only.cpp | 2 +- src/gromacs/ewald/tests/pmetestcommon.cpp | 2 +- src/gromacs/mdlib/forcerec.cpp | 4 +- src/gromacs/mdlib/sim_util.cpp | 90 ++++++++++--------- src/gromacs/mdrun/md.cpp | 33 ++++--- src/gromacs/mdrun/runner.cpp | 8 +- src/gromacs/mdrun/tpi.cpp | 4 +- src/gromacs/{nbnxm => mdtypes}/locality.h | 22 +++-- .../mdtypes/state_propagator_data_gpu.h | 15 +--- src/gromacs/nbnxm/atomdata.cpp | 30 +++---- src/gromacs/nbnxm/atomdata.h | 12 +-- src/gromacs/nbnxm/benchmark/bench_setup.cpp | 8 +- src/gromacs/nbnxm/gpu_common.h | 13 ++- src/gromacs/nbnxm/gpu_data_mgmt.h | 8 +- src/gromacs/nbnxm/gpu_types_common.h | 5 +- src/gromacs/nbnxm/kerneldispatch.cpp | 4 +- src/gromacs/nbnxm/nbnxm.cpp | 12 +-- src/gromacs/nbnxm/nbnxm.h | 29 +++--- src/gromacs/nbnxm/nbnxm_gpu.h | 30 +++---- src/gromacs/nbnxm/nbnxm_setup.cpp | 4 +- src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp | 4 + src/gromacs/nbnxm/pairlist.cpp | 24 ++--- src/gromacs/nbnxm/pairlist.h | 2 +- src/gromacs/nbnxm/pairlistparams.h | 3 +- src/gromacs/nbnxm/pairlistset.h | 8 +- src/gromacs/nbnxm/pairlistsets.h | 15 ++-- src/gromacs/nbnxm/prunekerneldispatch.cpp | 12 +-- 27 files changed, 206 insertions(+), 197 deletions(-) rename src/gromacs/{nbnxm => mdtypes}/locality.h (80%) diff --git a/src/gromacs/ewald/pme_only.cpp b/src/gromacs/ewald/pme_only.cpp index 149c8d74ea..d6a14be72d 100644 --- a/src/gromacs/ewald/pme_only.cpp +++ b/src/gromacs/ewald/pme_only.cpp @@ -722,7 +722,7 @@ int gmx_pmeonly(struct gmx_pme_t *pme, pme_gpu_prepare_computation(pme, boxChanged, box, wcycle, pmeFlags, useGpuPmeForceReduction); if (!pme_pp->useGpuDirectComm) { - stateGpu->copyCoordinatesToGpu(gmx::ArrayRef(pme_pp->x), gmx::StatePropagatorDataGpu::AtomLocality::All); + stateGpu->copyCoordinatesToGpu(gmx::ArrayRef(pme_pp->x), gmx::AtomLocality::All); } // On the separate PME rank we do not need a synchronizer as we schedule everything in a single stream // TODO: with pme on GPU the receive should make a list of synchronizers and pass it here #3157 diff --git a/src/gromacs/ewald/tests/pmetestcommon.cpp b/src/gromacs/ewald/tests/pmetestcommon.cpp index 1311ab30c4..30c0c042a7 100644 --- a/src/gromacs/ewald/tests/pmetestcommon.cpp +++ b/src/gromacs/ewald/tests/pmetestcommon.cpp @@ -209,7 +209,7 @@ void pmeInitAtoms(gmx_pme_t *pme, gmx_pme_reinit_atoms(pme, atomCount, charges.data()); stateGpu->reinit(atomCount, atomCount); - stateGpu->copyCoordinatesToGpu(arrayRefFromArray(coordinates.data(), coordinates.size()), gmx::StatePropagatorDataGpu::AtomLocality::All); + stateGpu->copyCoordinatesToGpu(arrayRefFromArray(coordinates.data(), coordinates.size()), gmx::AtomLocality::All); pme_gpu_set_kernelparam_coordinates(pme->gpu, stateGpu->getCoordinates()); break; diff --git a/src/gromacs/mdlib/forcerec.cpp b/src/gromacs/mdlib/forcerec.cpp index 3c6e8603f9..25f0ad8aec 100644 --- a/src/gromacs/mdlib/forcerec.cpp +++ b/src/gromacs/mdlib/forcerec.cpp @@ -1463,8 +1463,8 @@ void init_forcerec(FILE *fp, if (useGpuForBonded) { auto stream = DOMAINDECOMP(cr) ? - Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, Nbnxm::InteractionLocality::NonLocal) : - Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, Nbnxm::InteractionLocality::Local); + Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, gmx::InteractionLocality::NonLocal) : + Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, gmx::InteractionLocality::Local); // TODO the heap allocation is only needed while // t_forcerec lacks a constructor. fr->gpuBonded = new gmx::GpuBonded(mtop->ffparams, diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp index e50cc4f919..f9fd797164 100644 --- a/src/gromacs/mdlib/sim_util.cpp +++ b/src/gromacs/mdlib/sim_util.cpp @@ -118,6 +118,8 @@ using gmx::ForceOutputs; using gmx::StepWorkload; using gmx::DomainLifetimeWorkload; using gmx::SimulationWorkload; +using gmx::AtomLocality; +using gmx::InteractionLocality; // TODO: this environment variable allows us to verify before release // that on less common architectures the total cost of polling is not larger than @@ -319,7 +321,7 @@ static void do_nb_verlet(t_forcerec *fr, const interaction_const_t *ic, gmx_enerdata_t *enerd, const StepWorkload &stepWork, - const Nbnxm::InteractionLocality ilocality, + const InteractionLocality ilocality, const int clearF, const int64_t step, t_nrnb *nrnb, @@ -694,14 +696,14 @@ static void alternatePmeNbGpuWaitReduce(nonbonded_verlet_t *nbv, GpuTaskCompletion completionType = (isPmeGpuDone) ? GpuTaskCompletion::Wait : GpuTaskCompletion::Check; isNbGpuDone = Nbnxm::gpu_try_finish_task(nbv->gpu_nbv, stepWork, - Nbnxm::AtomLocality::Local, + AtomLocality::Local, enerd->grpp.ener[egLJSR].data(), enerd->grpp.ener[egCOULSR].data(), forceWithShiftForces.shiftForces(), completionType, wcycle); if (isNbGpuDone) { - nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::Local, + nbv->atomdata_add_nbat_f_to_f(AtomLocality::Local, forceWithShiftForces.force()); } } @@ -1023,7 +1025,7 @@ void do_force(FILE *fplog, // 2. The buffers were reinitialized on search step if (!simulationWork.useGpuUpdate || stepWork.doNeighborSearch) { - stateGpu->copyCoordinatesToGpu(x.unpaddedArrayRef(), gmx::StatePropagatorDataGpu::AtomLocality::Local); + stateGpu->copyCoordinatesToGpu(x.unpaddedArrayRef(), AtomLocality::Local); } } @@ -1045,7 +1047,7 @@ void do_force(FILE *fplog, } #endif /* GMX_MPI */ - const auto localXReadyOnDevice = (stateGpu != nullptr) ? stateGpu->getCoordinatesReadyOnDeviceEvent(gmx::StatePropagatorDataGpu::AtomLocality::Local, + const auto localXReadyOnDevice = (stateGpu != nullptr) ? stateGpu->getCoordinatesReadyOnDeviceEvent(AtomLocality::Local, simulationWork, stepWork) : nullptr; if (useGpuPmeOnThisRank) { @@ -1143,10 +1145,10 @@ void do_force(FILE *fplog, wallcycle_start_nocount(wcycle, ewcNS); wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_LOCAL); /* Note that with a GPU the launch overhead of the list transfer is not timed separately */ - nbv->constructPairlist(Nbnxm::InteractionLocality::Local, + nbv->constructPairlist(InteractionLocality::Local, &top->excls, step, nrnb); - nbv->setupGpuShortRangeWork(fr->gpuBonded, Nbnxm::InteractionLocality::Local); + nbv->setupGpuShortRangeWork(fr->gpuBonded, InteractionLocality::Local); wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_LOCAL); wallcycle_stop(wcycle, ewcNS); @@ -1169,13 +1171,13 @@ void do_force(FILE *fplog, if (useGpuXBufOps == BufferOpsUseGpu::True) { GMX_ASSERT(stateGpu, "stateGpu should be valid when buffer ops are offloaded"); - nbv->convertCoordinatesGpu(Nbnxm::AtomLocality::Local, false, + nbv->convertCoordinatesGpu(AtomLocality::Local, false, stateGpu->getCoordinates(), localXReadyOnDevice); } else { - nbv->convertCoordinates(Nbnxm::AtomLocality::Local, false, + nbv->convertCoordinates(AtomLocality::Local, false, x.unpaddedArrayRef()); } } @@ -1193,7 +1195,7 @@ void do_force(FILE *fplog, if (stepWork.doNeighborSearch || (useGpuXBufOps == BufferOpsUseGpu::False)) { Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat.get(), - Nbnxm::AtomLocality::Local); + AtomLocality::Local); } wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED); // with X buffer ops offloaded to the GPU on all but the search steps @@ -1209,7 +1211,7 @@ void do_force(FILE *fplog, /* launch local nonbonded work on GPU */ wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED); - do_nb_verlet(fr, ic, enerd, stepWork, Nbnxm::InteractionLocality::Local, enbvClearFNo, + do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::Local, enbvClearFNo, step, nrnb, wcycle); wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED); wallcycle_stop(wcycle, ewcLAUNCH_GPU); @@ -1245,10 +1247,10 @@ void do_force(FILE *fplog, wallcycle_start_nocount(wcycle, ewcNS); wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_NONLOCAL); /* Note that with a GPU the launch overhead of the list transfer is not timed separately */ - nbv->constructPairlist(Nbnxm::InteractionLocality::NonLocal, + nbv->constructPairlist(InteractionLocality::NonLocal, &top->excls, step, nrnb); - nbv->setupGpuShortRangeWork(fr->gpuBonded, Nbnxm::InteractionLocality::NonLocal); + nbv->setupGpuShortRangeWork(fr->gpuBonded, InteractionLocality::NonLocal); wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_NONLOCAL); wallcycle_stop(wcycle, ewcNS); if (ddUsesGpuDirectCommunication) @@ -1267,7 +1269,7 @@ void do_force(FILE *fplog, if (domainWork.haveCpuBondedWork || domainWork.haveFreeEnergyWork) { //non-local part of coordinate buffer must be copied back to host for CPU work - stateGpu->copyCoordinatesFromGpu(x.unpaddedArrayRef(), gmx::StatePropagatorDataGpu::AtomLocality::NonLocal); + stateGpu->copyCoordinatesFromGpu(x.unpaddedArrayRef(), AtomLocality::NonLocal); } } else @@ -1280,16 +1282,16 @@ void do_force(FILE *fplog, // The condition here was (pme != nullptr && pme_gpu_get_device_x(fr->pmedata) != nullptr) if (!useGpuPmeOnThisRank && !ddUsesGpuDirectCommunication) { - stateGpu->copyCoordinatesToGpu(x.unpaddedArrayRef(), gmx::StatePropagatorDataGpu::AtomLocality::NonLocal); + stateGpu->copyCoordinatesToGpu(x.unpaddedArrayRef(), AtomLocality::NonLocal); } - nbv->convertCoordinatesGpu(Nbnxm::AtomLocality::NonLocal, false, + nbv->convertCoordinatesGpu(AtomLocality::NonLocal, false, stateGpu->getCoordinates(), - stateGpu->getCoordinatesReadyOnDeviceEvent(gmx::StatePropagatorDataGpu::AtomLocality::NonLocal, + stateGpu->getCoordinatesReadyOnDeviceEvent(AtomLocality::NonLocal, simulationWork, stepWork)); } else { - nbv->convertCoordinates(Nbnxm::AtomLocality::NonLocal, false, + nbv->convertCoordinates(AtomLocality::NonLocal, false, x.unpaddedArrayRef()); } @@ -1303,7 +1305,7 @@ void do_force(FILE *fplog, { wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED); Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat.get(), - Nbnxm::AtomLocality::NonLocal); + AtomLocality::NonLocal); wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED); } @@ -1316,7 +1318,7 @@ void do_force(FILE *fplog, /* launch non-local nonbonded tasks on GPU */ wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED); - do_nb_verlet(fr, ic, enerd, stepWork, Nbnxm::InteractionLocality::NonLocal, enbvClearFNo, + do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::NonLocal, enbvClearFNo, step, nrnb, wcycle); wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED); @@ -1333,10 +1335,10 @@ void do_force(FILE *fplog, if (havePPDomainDecomposition(cr)) { Nbnxm::gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat.get(), - stepWork, Nbnxm::AtomLocality::NonLocal); + stepWork, AtomLocality::NonLocal); } Nbnxm::gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat.get(), - stepWork, Nbnxm::AtomLocality::Local); + stepWork, AtomLocality::Local); wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED); if (domainWork.haveGpuBondedWork && stepWork.computeEnergy) @@ -1420,7 +1422,7 @@ void do_force(FILE *fplog, if (!useOrEmulateGpuNb) { - do_nb_verlet(fr, ic, enerd, stepWork, Nbnxm::InteractionLocality::Local, enbvClearFYes, + do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::Local, enbvClearFYes, step, nrnb, wcycle); } @@ -1429,14 +1431,14 @@ void do_force(FILE *fplog, /* Calculate the local and non-local free energy interactions here. * Happens here on the CPU both with and without GPU. */ - nbv->dispatchFreeEnergyKernel(Nbnxm::InteractionLocality::Local, + nbv->dispatchFreeEnergyKernel(InteractionLocality::Local, fr, as_rvec_array(x.unpaddedArrayRef().data()), &forceOut.forceWithShiftForces(), *mdatoms, inputrec->fepvals, lambda.data(), enerd, stepWork, nrnb); if (havePPDomainDecomposition(cr)) { - nbv->dispatchFreeEnergyKernel(Nbnxm::InteractionLocality::NonLocal, + nbv->dispatchFreeEnergyKernel(InteractionLocality::NonLocal, fr, as_rvec_array(x.unpaddedArrayRef().data()), &forceOut.forceWithShiftForces(), *mdatoms, inputrec->fepvals, lambda.data(), enerd, stepWork, nrnb); @@ -1447,7 +1449,7 @@ void do_force(FILE *fplog, { if (havePPDomainDecomposition(cr)) { - do_nb_verlet(fr, ic, enerd, stepWork, Nbnxm::InteractionLocality::NonLocal, enbvClearFNo, + do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::NonLocal, enbvClearFNo, step, nrnb, wcycle); } @@ -1458,7 +1460,7 @@ void do_force(FILE *fplog, * communication with calculation with domain decomposition. */ wallcycle_stop(wcycle, ewcFORCE); - nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::All, forceOut.forceWithShiftForces().force()); + nbv->atomdata_add_nbat_f_to_f(AtomLocality::All, forceOut.forceWithShiftForces().force()); wallcycle_start_nocount(wcycle, ewcFORCE); } @@ -1515,7 +1517,7 @@ void do_force(FILE *fplog, if (simulationWork.useGpuNonbonded) { cycles_wait_gpu += Nbnxm::gpu_wait_finish_task(nbv->gpu_nbv, - stepWork, Nbnxm::AtomLocality::NonLocal, + stepWork, AtomLocality::NonLocal, enerd->grpp.ener[egLJSR].data(), enerd->grpp.ener[egCOULSR].data(), forceWithShiftForces.shiftForces(), @@ -1524,7 +1526,7 @@ void do_force(FILE *fplog, else { wallcycle_start_nocount(wcycle, ewcFORCE); - do_nb_verlet(fr, ic, enerd, stepWork, Nbnxm::InteractionLocality::NonLocal, enbvClearFYes, + do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::NonLocal, enbvClearFYes, step, nrnb, wcycle); wallcycle_stop(wcycle, ewcFORCE); } @@ -1540,12 +1542,12 @@ void do_force(FILE *fplog, if (haveNonLocalForceContribInCpuBuffer) { - stateGpu->copyForcesToGpu(forceOut.forceWithShiftForces().force(), gmx::StatePropagatorDataGpu::AtomLocality::NonLocal); - dependencyList.push_back(stateGpu->getForcesReadyOnDeviceEvent(gmx::StatePropagatorDataGpu::AtomLocality::NonLocal, + stateGpu->copyForcesToGpu(forceOut.forceWithShiftForces().force(), AtomLocality::NonLocal); + dependencyList.push_back(stateGpu->getForcesReadyOnDeviceEvent(AtomLocality::NonLocal, useGpuFBufOps == BufferOpsUseGpu::True)); } - nbv->atomdata_add_nbat_f_to_f_gpu(Nbnxm::AtomLocality::NonLocal, + nbv->atomdata_add_nbat_f_to_f_gpu(AtomLocality::NonLocal, stateGpu->getForces(), pme_gpu_get_device_f(fr->pmedata), dependencyList, @@ -1553,12 +1555,12 @@ void do_force(FILE *fplog, if (!useGpuForcesHaloExchange) { // copy from GPU input for dd_move_f() - stateGpu->copyForcesFromGpu(forceOut.forceWithShiftForces().force(), gmx::StatePropagatorDataGpu::AtomLocality::NonLocal); + stateGpu->copyForcesFromGpu(forceOut.forceWithShiftForces().force(), AtomLocality::NonLocal); } } else { - nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::NonLocal, + nbv->atomdata_add_nbat_f_to_f(AtomLocality::NonLocal, forceWithShiftForces.force()); } @@ -1593,7 +1595,7 @@ void do_force(FILE *fplog, { if (haveCpuLocalForces) { - stateGpu->copyForcesToGpu(forceOut.forceWithShiftForces().force(), gmx::StatePropagatorDataGpu::AtomLocality::Local); + stateGpu->copyForcesToGpu(forceOut.forceWithShiftForces().force(), AtomLocality::Local); } gpuHaloExchange->communicateHaloForces(haveCpuLocalForces); } @@ -1601,7 +1603,7 @@ void do_force(FILE *fplog, { if (useGpuFBufOps == BufferOpsUseGpu::True) { - stateGpu->waitForcesReadyOnHost(gmx::StatePropagatorDataGpu::AtomLocality::NonLocal); + stateGpu->waitForcesReadyOnHost(AtomLocality::NonLocal); } dd_move_f(cr->dd, &forceOut.forceWithShiftForces(), wcycle); } @@ -1635,7 +1637,7 @@ void do_force(FILE *fplog, const float gpuWaitApiOverheadMargin = 2e6F; /* cycles */ const float waitCycles = Nbnxm::gpu_wait_finish_task(nbv->gpu_nbv, - stepWork, Nbnxm::AtomLocality::Local, + stepWork, AtomLocality::Local, enerd->grpp.ener[egLJSR].data(), enerd->grpp.ener[egCOULSR].data(), forceOut.forceWithShiftForces().shiftForces(), @@ -1663,7 +1665,7 @@ void do_force(FILE *fplog, // NOTE: emulation kernel is not included in the balancing region, // but emulation mode does not target performance anyway wallcycle_start_nocount(wcycle, ewcFORCE); - do_nb_verlet(fr, ic, enerd, stepWork, Nbnxm::InteractionLocality::Local, + do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::Local, DOMAINDECOMP(cr) ? enbvClearFNo : enbvClearFYes, step, nrnb, wcycle); wallcycle_stop(wcycle, ewcFORCE); @@ -1724,8 +1726,8 @@ void do_force(FILE *fplog, // These should be unified. if (haveLocalForceContribInCpuBuffer && !useGpuForcesHaloExchange) { - stateGpu->copyForcesToGpu(forceWithShift, gmx::StatePropagatorDataGpu::AtomLocality::Local); - dependencyList.push_back(stateGpu->getForcesReadyOnDeviceEvent(gmx::StatePropagatorDataGpu::AtomLocality::Local, + stateGpu->copyForcesToGpu(forceWithShift, AtomLocality::Local); + dependencyList.push_back(stateGpu->getForcesReadyOnDeviceEvent(AtomLocality::Local, useGpuFBufOps == BufferOpsUseGpu::True)); } if (useGpuForcesHaloExchange) @@ -1739,17 +1741,17 @@ void do_force(FILE *fplog, // push the event into the dependencyList nbv->stream_local_wait_for_nonlocal(); } - nbv->atomdata_add_nbat_f_to_f_gpu(Nbnxm::AtomLocality::Local, + nbv->atomdata_add_nbat_f_to_f_gpu(AtomLocality::Local, stateGpu->getForces(), pmeForcePtr, dependencyList, stepWork.useGpuPmeFReduction, haveLocalForceContribInCpuBuffer); - stateGpu->copyForcesFromGpu(forceWithShift, gmx::StatePropagatorDataGpu::AtomLocality::Local); - stateGpu->waitForcesReadyOnHost(gmx::StatePropagatorDataGpu::AtomLocality::Local); + stateGpu->copyForcesFromGpu(forceWithShift, AtomLocality::Local); + stateGpu->waitForcesReadyOnHost(AtomLocality::Local); } else { - nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::Local, forceWithShift); + nbv->atomdata_add_nbat_f_to_f(AtomLocality::Local, forceWithShift); } } diff --git a/src/gromacs/mdrun/md.cpp b/src/gromacs/mdrun/md.cpp index a01c2decad..803dfb1103 100644 --- a/src/gromacs/mdrun/md.cpp +++ b/src/gromacs/mdrun/md.cpp @@ -749,8 +749,8 @@ void gmx::LegacySimulator::do_md() // TODO: Move to after all booleans are defined. if (useGpuForUpdate && !bFirstStep) { - stateGpu->copyCoordinatesFromGpu(ArrayRef(state->x), StatePropagatorDataGpu::AtomLocality::Local); - stateGpu->waitCoordinatesReadyOnHost(StatePropagatorDataGpu::AtomLocality::Local); + stateGpu->copyCoordinatesFromGpu(ArrayRef(state->x), AtomLocality::Local); + stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local); } /* PME grid + cut-off optimization with GPUs or PME nodes */ pme_loadbal_do(pme_loadbal, cr, @@ -816,8 +816,8 @@ void gmx::LegacySimulator::do_md() // - When needed for the output. if (bNS || do_per_step(step, ir->nstvout)) { - stateGpu->copyVelocitiesFromGpu(state->v, StatePropagatorDataGpu::AtomLocality::Local); - stateGpu->waitVelocitiesReadyOnHost(StatePropagatorDataGpu::AtomLocality::Local); + stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local); + stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local); } // Copy coordinate from the GPU when needed: @@ -828,8 +828,8 @@ void gmx::LegacySimulator::do_md() (runScheduleWork->domainWork.haveCpuBondedWork || runScheduleWork->domainWork.haveFreeEnergyWork) || do_per_step(step, ir->nstxout) || do_per_step(step, ir->nstxout_compressed)) { - stateGpu->copyCoordinatesFromGpu(ArrayRef(state->x), StatePropagatorDataGpu::AtomLocality::Local); - stateGpu->waitCoordinatesReadyOnHost(StatePropagatorDataGpu::AtomLocality::Local); + stateGpu->copyCoordinatesFromGpu(ArrayRef(state->x), AtomLocality::Local); + stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local); } } @@ -1278,11 +1278,11 @@ void gmx::LegacySimulator::do_md() integrator->setPbc(&pbc); // Copy data to the GPU after buffers might have being reinitialized - stateGpu->copyVelocitiesToGpu(state->v, StatePropagatorDataGpu::AtomLocality::Local); - stateGpu->copyCoordinatesToGpu(ArrayRef(state->x), StatePropagatorDataGpu::AtomLocality::Local); + stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local); + stateGpu->copyCoordinatesToGpu(ArrayRef(state->x), AtomLocality::Local); } - stateGpu->copyForcesToGpu(ArrayRef(f), StatePropagatorDataGpu::AtomLocality::All); + stateGpu->copyForcesToGpu(ArrayRef(f), AtomLocality::All); // TODO: Use StepWorkload fields. bool useGpuFBufferOps = simulationWork.useGpuBufferOps && !(bCalcVir || bCalcEner); @@ -1291,7 +1291,7 @@ void gmx::LegacySimulator::do_md() bool doParrinelloRahman = (ir->epc == epcPARRINELLORAHMAN && do_per_step(step + ir->nstpcouple - 1, ir->nstpcouple)); // This applies Leap-Frog, LINCS and SETTLE in succession - integrator->integrate(stateGpu->getForcesReadyOnDeviceEvent(StatePropagatorDataGpu::AtomLocality::Local, useGpuFBufferOps), + integrator->integrate(stateGpu->getForcesReadyOnDeviceEvent(AtomLocality::Local, useGpuFBufferOps), ir->delta_t, true, bCalcVir, shake_vir, doTempCouple, ekind->tcstat, doParrinelloRahman, ir->nstpcouple*ir->delta_t, M); @@ -1301,12 +1301,11 @@ void gmx::LegacySimulator::do_md() // - Temperature is needed for the next step. if (bGStat || needHalfStepKineticEnergy) { - stateGpu->copyVelocitiesFromGpu(state->v, StatePropagatorDataGpu::AtomLocality::Local); - stateGpu->waitVelocitiesReadyOnHost(StatePropagatorDataGpu::AtomLocality::Local); - stateGpu->copyCoordinatesFromGpu(ArrayRef(state->x), StatePropagatorDataGpu::AtomLocality::Local); - stateGpu->waitCoordinatesReadyOnHost(StatePropagatorDataGpu::AtomLocality::Local); + stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local); + stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local); + stateGpu->copyCoordinatesFromGpu(ArrayRef(state->x), AtomLocality::Local); + stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local); } - } else { @@ -1451,8 +1450,8 @@ void gmx::LegacySimulator::do_md() // TODO: The special case of removing CM motion should be dealt more gracefully if (useGpuForUpdate) { - stateGpu->copyCoordinatesToGpu(ArrayRef(state->x), StatePropagatorDataGpu::AtomLocality::Local); - stateGpu->waitCoordinatesCopiedToDevice(StatePropagatorDataGpu::AtomLocality::Local); + stateGpu->copyCoordinatesToGpu(ArrayRef(state->x), AtomLocality::Local); + stateGpu->waitCoordinatesCopiedToDevice(AtomLocality::Local); } } } diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp index 0a5dca3b8b..238c5567b5 100644 --- a/src/gromacs/mdrun/runner.cpp +++ b/src/gromacs/mdrun/runner.cpp @@ -1333,8 +1333,8 @@ int Mdrunner::mdrunner() if (havePPDomainDecomposition(cr) && prefer1DAnd1PulseDD && is1DAnd1PulseDD(*cr->dd)) { GMX_RELEASE_ASSERT(devFlags.enableGpuBufferOps, "Must use GMX_GPU_BUFFER_OPS=1 to use GMX_GPU_DD_COMMS=1"); - void *streamLocal = Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, Nbnxm::InteractionLocality::Local); - void *streamNonLocal = Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, Nbnxm::InteractionLocality::NonLocal); + void *streamLocal = Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::Local); + void *streamNonLocal = Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::NonLocal); void *coordinatesOnDeviceEvent = fr->nbv->get_x_on_device_event(); GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted( "NOTE: This run uses the 'GPU halo exchange' feature, enabled by the GMX_GPU_DD_COMMS environment variable."); @@ -1580,8 +1580,8 @@ int Mdrunner::mdrunner() if (gpusWereDetected && ((useGpuForPme && thisRankHasDuty(cr, DUTY_PME)) || devFlags.enableGpuBufferOps)) { const void *pmeStream = pme_gpu_get_device_stream(fr->pmedata); - const void *localStream = fr->nbv->gpu_nbv != nullptr ? Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, Nbnxm::InteractionLocality::Local) : nullptr; - const void *nonLocalStream = fr->nbv->gpu_nbv != nullptr ? Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, Nbnxm::InteractionLocality::NonLocal) : nullptr; + const void *localStream = fr->nbv->gpu_nbv != nullptr ? Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::Local) : nullptr; + const void *nonLocalStream = fr->nbv->gpu_nbv != nullptr ? Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::NonLocal) : nullptr; const void *deviceContext = pme_gpu_get_device_context(fr->pmedata); const int paddingSize = pme_gpu_get_padding_size(fr->pmedata); GpuApiCallBehavior transferKind = (inputrec->eI == eiMD && !doRerun && !useModularSimulator) ? GpuApiCallBehavior::Async : GpuApiCallBehavior::Sync; diff --git a/src/gromacs/mdrun/tpi.cpp b/src/gromacs/mdrun/tpi.cpp index c0db5dd992..b58165c61a 100644 --- a/src/gromacs/mdrun/tpi.cpp +++ b/src/gromacs/mdrun/tpi.cpp @@ -669,7 +669,7 @@ LegacySimulator::do_tpi() /* TODO: Avoid updating all atoms at every bNS step */ fr->nbv->setAtomProperties(*mdatoms, fr->cginfo); - fr->nbv->constructPairlist(Nbnxm::InteractionLocality::Local, + fr->nbv->constructPairlist(InteractionLocality::Local, &top.excls, step, nrnb); bNS = FALSE; @@ -724,7 +724,7 @@ LegacySimulator::do_tpi() } /* Note: NonLocal refers to the inserted molecule */ - fr->nbv->convertCoordinates(Nbnxm::AtomLocality::NonLocal, false, x); + fr->nbv->convertCoordinates(AtomLocality::NonLocal, false, x); /* Clear some matrix variables */ clear_mat(force_vir); diff --git a/src/gromacs/nbnxm/locality.h b/src/gromacs/mdtypes/locality.h similarity index 80% rename from src/gromacs/nbnxm/locality.h rename to src/gromacs/mdtypes/locality.h index 6ebd4fb409..d8cbce4627 100644 --- a/src/gromacs/nbnxm/locality.h +++ b/src/gromacs/mdtypes/locality.h @@ -34,16 +34,18 @@ */ /*! \libinternal \file - * \brief Defines nbnxn locality enums + * \brief Defines atom and atom interaction locality enums * * \author Berk Hess - * \ingroup module_nbnxm + * \ingroup module_mdtypes */ -#ifndef GMX_NBNXM_LOCALITY_H -#define GMX_NBNXM_LOCALITY_H +#ifndef GMX_MDTYPES_LOCALITY_H +#define GMX_MDTYPES_LOCALITY_H -namespace Nbnxm +#include "gromacs/utility/enumerationhelpers.h" + +namespace gmx { /*! \brief Atom locality indicator: local, non-local, all. @@ -59,6 +61,9 @@ enum class AtomLocality : int Count = 3 //!< The number of atom locality types }; +/*! \brief Descriptive strings for atom localities */ +static const EnumerationArray c_atomLocalityNames = { "local", "non-local", "all" }; + /*! \brief Interaction locality indicator: local, non-local, all. * * Used for calls to: @@ -71,6 +76,9 @@ enum class InteractionLocality : int Count = 2 //!< The number of interaction locality types }; -} // namespace Nbnxm +/*! \brief Descriptive strings for interaction localities */ +static const EnumerationArray c_interactionLocalityNames = { "local", "non-local" }; + +} // namespace gmx -#endif // GMX_NBNXM_LOCALITY_H +#endif // GMX_MDTYPES_LOCALITY_H diff --git a/src/gromacs/mdtypes/state_propagator_data_gpu.h b/src/gromacs/mdtypes/state_propagator_data_gpu.h index b5bc2699c0..5617395073 100644 --- a/src/gromacs/mdtypes/state_propagator_data_gpu.h +++ b/src/gromacs/mdtypes/state_propagator_data_gpu.h @@ -57,6 +57,8 @@ #include "gromacs/utility/arrayref.h" #include "gromacs/utility/classhelpers.h" +#include "locality.h" + class GpuEventSynchronizer; namespace gmx @@ -66,19 +68,6 @@ class StatePropagatorDataGpu { public: - /*! \brief Atom locality indicator: local, non-local, all. - * - * \todo This should be managed by a separate object, since the localities - * are used here and in buffer ops. - */ - enum class AtomLocality : int - { - Local = 0, //!< Local atoms - NonLocal = 1, //!< Non-local atoms - All = 2, //!< Both local and non-local atoms - Count = 3 //!< The number of atom locality types - }; - /*! \brief Constructor * * The buffers are reallocated only at the reinit call, the padding is diff --git a/src/gromacs/nbnxm/atomdata.cpp b/src/gromacs/nbnxm/atomdata.cpp index 99b69b0f9a..bfd01537c5 100644 --- a/src/gromacs/nbnxm/atomdata.cpp +++ b/src/gromacs/nbnxm/atomdata.cpp @@ -1001,25 +1001,25 @@ void nbnxn_atomdata_copy_shiftvec(gmx_bool bDynamicBox, // This is slightly different from nbnxn_get_atom_range(...) at the end of the file // TODO: Combine if possible static void getAtomRanges(const Nbnxm::GridSet &gridSet, - const Nbnxm::AtomLocality locality, + const gmx::AtomLocality locality, int *gridBegin, int *gridEnd) { switch (locality) { - case Nbnxm::AtomLocality::All: + case gmx::AtomLocality::All: *gridBegin = 0; *gridEnd = gridSet.grids().size(); break; - case Nbnxm::AtomLocality::Local: + case gmx::AtomLocality::Local: *gridBegin = 0; *gridEnd = 1; break; - case Nbnxm::AtomLocality::NonLocal: + case gmx::AtomLocality::NonLocal: *gridBegin = 1; *gridEnd = gridSet.grids().size(); break; - case Nbnxm::AtomLocality::Count: + case gmx::AtomLocality::Count: GMX_ASSERT(false, "Count is invalid locality specifier"); break; } @@ -1027,7 +1027,7 @@ static void getAtomRanges(const Nbnxm::GridSet &gridSet, /* Copies (and reorders) the coordinates to nbnxn_atomdata_t */ void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet &gridSet, - const Nbnxm::AtomLocality locality, + const gmx::AtomLocality locality, bool fillLocal, const rvec *coordinates, nbnxn_atomdata_t *nbat) @@ -1086,7 +1086,7 @@ void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet &gridSet, /* Copies (and reorders) the coordinates to nbnxn_atomdata_t on the GPU*/ void nbnxn_atomdata_x_to_nbat_x_gpu(const Nbnxm::GridSet &gridSet, - const Nbnxm::AtomLocality locality, + const gmx::AtomLocality locality, bool fillLocal, gmx_nbnxn_gpu_t *gpu_nbv, DeviceBuffer d_x, @@ -1442,7 +1442,7 @@ static void nbnxn_atomdata_add_nbat_f_to_f_stdreduce(nbnxn_atomdata_t *nbat, /* Add the force array(s) from nbnxn_atomdata_t to f */ void reduceForces(nbnxn_atomdata_t *nbat, - const Nbnxm::AtomLocality locality, + const gmx::AtomLocality locality, const Nbnxm::GridSet &gridSet, rvec *f) { @@ -1461,7 +1461,7 @@ void reduceForces(nbnxn_atomdata_t *nbat, if (nbat->out.size() > 1) { - if (locality != Nbnxm::AtomLocality::All) + if (locality != gmx::AtomLocality::All) { gmx_incons("add_f_to_f called with nout>1 and locality!=eatAll"); } @@ -1494,7 +1494,7 @@ void reduceForces(nbnxn_atomdata_t *nbat, } /* Add the force array(s) from nbnxn_atomdata_t to f */ -void reduceForcesGpu(const Nbnxm::AtomLocality locality, +void reduceForcesGpu(const gmx::AtomLocality locality, DeviceBuffer totalForcesDevice, const Nbnxm::GridSet &gridSet, void *pmeForcesDevice, @@ -1543,7 +1543,7 @@ void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t &nbat, } } -void nbnxn_get_atom_range(const Nbnxm::AtomLocality atomLocality, +void nbnxn_get_atom_range(const gmx::AtomLocality atomLocality, const Nbnxm::GridSet &gridSet, int *atomStart, int *nAtoms) @@ -1551,19 +1551,19 @@ void nbnxn_get_atom_range(const Nbnxm::AtomLocality atomLocality, switch (atomLocality) { - case Nbnxm::AtomLocality::All: + case gmx::AtomLocality::All: *atomStart = 0; *nAtoms = gridSet.numRealAtomsTotal(); break; - case Nbnxm::AtomLocality::Local: + case gmx::AtomLocality::Local: *atomStart = 0; *nAtoms = gridSet.numRealAtomsLocal(); break; - case Nbnxm::AtomLocality::NonLocal: + case gmx::AtomLocality::NonLocal: *atomStart = gridSet.numRealAtomsLocal(); *nAtoms = gridSet.numRealAtomsTotal() - gridSet.numRealAtomsLocal(); break; - case Nbnxm::AtomLocality::Count: + case gmx::AtomLocality::Count: GMX_ASSERT(false, "Count is invalid locality specifier"); break; } diff --git a/src/gromacs/nbnxm/atomdata.h b/src/gromacs/nbnxm/atomdata.h index bb3984d4d3..d44d8ed9ef 100644 --- a/src/gromacs/nbnxm/atomdata.h +++ b/src/gromacs/nbnxm/atomdata.h @@ -41,12 +41,12 @@ #include "gromacs/gpu_utils/devicebuffer_datatype.h" #include "gromacs/gpu_utils/hostallocator.h" #include "gromacs/math/vectypes.h" +#include "gromacs/mdtypes/locality.h" #include "gromacs/utility/basedefinitions.h" #include "gromacs/utility/bitmask.h" #include "gromacs/utility/real.h" #include "gpu_types.h" -#include "locality.h" namespace gmx { @@ -319,7 +319,7 @@ void nbnxn_atomdata_copy_shiftvec(gmx_bool dynamic_box, * \param[in,out] nbat Data in NBNXM format, used for mapping formats and to locate the output buffer. */ void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet &gridSet, - Nbnxm::AtomLocality locality, + gmx::AtomLocality locality, bool fillLocal, const rvec *coordinates, nbnxn_atomdata_t *nbat); @@ -337,7 +337,7 @@ void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet &gridSet, * \param[in] xReadyOnDevice Event synchronizer indicating that the coordinates are ready in the device memory. */ void nbnxn_atomdata_x_to_nbat_x_gpu(const Nbnxm::GridSet &gridSet, - Nbnxm::AtomLocality locality, + gmx::AtomLocality locality, bool fillLocal, gmx_nbnxn_gpu_t *gpu_nbv, DeviceBuffer d_x, @@ -351,7 +351,7 @@ void nbnxn_atomdata_x_to_nbat_x_gpu(const Nbnxm::GridSet &gridSet, * \param[out] totalForce Buffer to accumulate resulting force */ void reduceForces(nbnxn_atomdata_t *nbat, - Nbnxm::AtomLocality locality, + gmx::AtomLocality locality, const Nbnxm::GridSet &gridSet, rvec *totalForce); @@ -366,7 +366,7 @@ void reduceForces(nbnxn_atomdata_t *nbat, * \param[in] useGpuFPmeReduction Whether PME forces should be added. * \param[in] accumulateForce Whether there are usefull data already in the total force buffer. */ -void reduceForcesGpu(Nbnxm::AtomLocality locality, +void reduceForcesGpu(gmx::AtomLocality locality, DeviceBuffer totalForcesDevice, const Nbnxm::GridSet &gridSet, void *pmeForcesDevice, @@ -380,7 +380,7 @@ void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t &nbat, gmx::ArrayRef fshift); /* Get the atom start index and number of atoms for a given locality */ -void nbnxn_get_atom_range(Nbnxm::AtomLocality atomLocality, +void nbnxn_get_atom_range(gmx::AtomLocality atomLocality, const Nbnxm::GridSet &gridSet, int *atomStart, int *nAtoms); diff --git a/src/gromacs/nbnxm/benchmark/bench_setup.cpp b/src/gromacs/nbnxm/benchmark/bench_setup.cpp index bae840cb14..448321201f 100644 --- a/src/gromacs/nbnxm/benchmark/bench_setup.cpp +++ b/src/gromacs/nbnxm/benchmark/bench_setup.cpp @@ -234,7 +234,7 @@ setupNbnxmForBenchInstance(const KernelBenchOptions &options, atomInfo, system.coordinates, 0, nullptr); - nbv->constructPairlist(Nbnxm::InteractionLocality::Local, + nbv->constructPairlist(gmx::InteractionLocality::Local, &system.excls, 0, &nrnb); t_mdatoms mdatoms; @@ -322,20 +322,20 @@ static void setupAndRunInstance(const gmx::BenchmarkSystem &system, // Run pre-iteration to avoid cache misses for (int iter = 0; iter < options.numPreIterations; iter++) { - nbv->dispatchNonbondedKernel(InteractionLocality::Local, + nbv->dispatchNonbondedKernel(gmx::InteractionLocality::Local, ic, stepWork, enbvClearFYes, system.forceRec, &enerd, &nrnb); } const int numIterations = (doWarmup ? options.numWarmupIterations : options.numIterations); - const PairlistSet &pairlistSet = nbv->pairlistSets().pairlistSet(InteractionLocality::Local); + const PairlistSet &pairlistSet = nbv->pairlistSets().pairlistSet(gmx::InteractionLocality::Local); const gmx::index numPairs = pairlistSet.natpair_ljq_ + pairlistSet.natpair_lj_ + pairlistSet.natpair_q_; gmx_cycles_t cycles = gmx_cycles_read(); for (int iter = 0; iter < numIterations; iter++) { // Run the kernel without force clearing - nbv->dispatchNonbondedKernel(InteractionLocality::Local, + nbv->dispatchNonbondedKernel(gmx::InteractionLocality::Local, ic, stepWork, enbvClearFNo, system.forceRec, &enerd, &nrnb); diff --git a/src/gromacs/nbnxm/gpu_common.h b/src/gromacs/nbnxm/gpu_common.h index 5127db3c57..7dc72ce5e8 100644 --- a/src/gromacs/nbnxm/gpu_common.h +++ b/src/gromacs/nbnxm/gpu_common.h @@ -128,7 +128,7 @@ gpuAtomToInteractionLocality(const AtomLocality atomLocality) //NOLINTNEXTLINE(misc-definitions-in-headers) void setupGpuShortRangeWork(gmx_nbnxn_gpu_t *nb, const gmx::GpuBonded *gpuBonded, - const Nbnxm::InteractionLocality iLocality) + const gmx::InteractionLocality iLocality) { GMX_ASSERT(nb, "Need a valid nbnxn_gpu object"); @@ -151,14 +151,14 @@ void setupGpuShortRangeWork(gmx_nbnxn_gpu_t *nb, */ static bool haveGpuShortRangeWork(const gmx_nbnxn_gpu_t &nb, - const Nbnxm::InteractionLocality iLocality) + const gmx::InteractionLocality iLocality) { return nb.haveWork[iLocality]; } //NOLINTNEXTLINE(misc-definitions-in-headers) bool haveGpuShortRangeWork(const gmx_nbnxn_gpu_t *nb, - const Nbnxm::AtomLocality aLocality) + const gmx::AtomLocality aLocality) { GMX_ASSERT(nb, "Need a valid nbnxn_gpu object"); @@ -366,7 +366,12 @@ gpu_accumulate_timings(gmx_wallclock_gpu_nbnxn_t *timings, } } -//TODO: move into shared source file with gmx_compile_cpp_as_cuda +/*! \brief Attempts to complete nonbonded GPU task. + * + * See documentation in nbnxm_gpu.h for details. + * + * \todo Move into shared source file with gmx_compile_cpp_as_cuda + */ //NOLINTNEXTLINE(misc-definitions-in-headers) bool gpu_try_finish_task(gmx_nbnxn_gpu_t *nb, const gmx::StepWorkload &stepWork, diff --git a/src/gromacs/nbnxm/gpu_data_mgmt.h b/src/gromacs/nbnxm/gpu_data_mgmt.h index 7196ebe452..2237c3f0f6 100644 --- a/src/gromacs/nbnxm/gpu_data_mgmt.h +++ b/src/gromacs/nbnxm/gpu_data_mgmt.h @@ -48,9 +48,9 @@ #include "gromacs/gpu_utils/gpu_macros.h" #include "gromacs/mdtypes/interaction_const.h" +#include "gromacs/mdtypes/locality.h" #include "gpu_types.h" -#include "locality.h" struct NbnxnPairlistGpu; struct nbnxn_atomdata_t; @@ -77,7 +77,7 @@ gpu_init(const gmx_device_info_t gmx_unused *deviceInfo, GPU_FUNC_QUALIFIER void gpu_init_pairlist(gmx_nbnxn_gpu_t gmx_unused *nb, const struct NbnxnPairlistGpu gmx_unused *h_nblist, - InteractionLocality gmx_unused iloc) GPU_FUNC_TERM; + gmx::InteractionLocality gmx_unused iloc) GPU_FUNC_TERM; /** Initializes atom-data on the GPU, called at every pair search step. */ GPU_FUNC_QUALIFIER @@ -126,8 +126,8 @@ gmx_bool gpu_is_kernel_ewald_analytical(const gmx_nbnxn_gpu_t gmx_unused *nb) GP * Note: CUDA only. */ CUDA_FUNC_QUALIFIER -void *gpu_get_command_stream(gmx_nbnxn_gpu_t gmx_unused *nb, - InteractionLocality gmx_unused iloc) CUDA_FUNC_TERM_WITH_RETURN(nullptr); +void *gpu_get_command_stream(gmx_nbnxn_gpu_t gmx_unused *nb, + gmx::InteractionLocality gmx_unused iloc) CUDA_FUNC_TERM_WITH_RETURN(nullptr); /** Returns an opaque pointer to the GPU coordinate+charge array * Note: CUDA only. diff --git a/src/gromacs/nbnxm/gpu_types_common.h b/src/gromacs/nbnxm/gpu_types_common.h index 35270a8345..c818d534e8 100644 --- a/src/gromacs/nbnxm/gpu_types_common.h +++ b/src/gromacs/nbnxm/gpu_types_common.h @@ -44,9 +44,9 @@ #include "config.h" +#include "gromacs/mdtypes/locality.h" #include "gromacs/utility/enumerationhelpers.h" -#include "locality.h" #include "pairlist.h" #if GMX_GPU == GMX_GPU_OPENCL @@ -60,6 +60,9 @@ namespace Nbnxm { +using gmx::AtomLocality; +using gmx::InteractionLocality; + /*! \internal * \brief GPU region timers used for timing GPU kernels and H2D/D2H transfers. * diff --git a/src/gromacs/nbnxm/kerneldispatch.cpp b/src/gromacs/nbnxm/kerneldispatch.cpp index d7c68273e0..d4bf56f906 100644 --- a/src/gromacs/nbnxm/kerneldispatch.cpp +++ b/src/gromacs/nbnxm/kerneldispatch.cpp @@ -461,7 +461,7 @@ static void accountFlops(t_nrnb *nrnb, } void -nonbonded_verlet_t::dispatchNonbondedKernel(Nbnxm::InteractionLocality iLocality, +nonbonded_verlet_t::dispatchNonbondedKernel(gmx::InteractionLocality iLocality, const interaction_const_t &ic, const gmx::StepWorkload &stepWork, int clearF, @@ -517,7 +517,7 @@ nonbonded_verlet_t::dispatchNonbondedKernel(Nbnxm::InteractionLocality iLocality } void -nonbonded_verlet_t::dispatchFreeEnergyKernel(Nbnxm::InteractionLocality iLocality, +nonbonded_verlet_t::dispatchFreeEnergyKernel(gmx::InteractionLocality iLocality, const t_forcerec *fr, rvec x[], gmx::ForceWithShiftForces *forceWithShiftForces, diff --git a/src/gromacs/nbnxm/nbnxm.cpp b/src/gromacs/nbnxm/nbnxm.cpp index 2f9fe49ed1..eae838fa15 100644 --- a/src/gromacs/nbnxm/nbnxm.cpp +++ b/src/gromacs/nbnxm/nbnxm.cpp @@ -130,7 +130,7 @@ void nonbonded_verlet_t::setAtomProperties(const t_mdatoms &mdatoms, nbnxn_atomdata_set(nbat.get(), pairSearch_->gridSet(), &mdatoms, atomInfo.data()); } -void nonbonded_verlet_t::convertCoordinates(const Nbnxm::AtomLocality locality, +void nonbonded_verlet_t::convertCoordinates(const gmx::AtomLocality locality, const bool fillLocal, gmx::ArrayRef coordinates) { @@ -145,7 +145,7 @@ void nonbonded_verlet_t::convertCoordinates(const Nbnxm::AtomLocality loca wallcycle_stop(wcycle_, ewcNB_XF_BUF_OPS); } -void nonbonded_verlet_t::convertCoordinatesGpu(const Nbnxm::AtomLocality locality, +void nonbonded_verlet_t::convertCoordinatesGpu(const gmx::AtomLocality locality, const bool fillLocal, DeviceBuffer d_x, GpuEventSynchronizer *xReadyOnDevice) @@ -168,8 +168,8 @@ gmx::ArrayRef nonbonded_verlet_t::getGridIndices() const } void -nonbonded_verlet_t::atomdata_add_nbat_f_to_f(const Nbnxm::AtomLocality locality, - gmx::ArrayRef force) +nonbonded_verlet_t::atomdata_add_nbat_f_to_f(const gmx::AtomLocality locality, + gmx::ArrayRef force) { /* Skip the reduction if there was no short-range GPU work to do @@ -189,7 +189,7 @@ nonbonded_verlet_t::atomdata_add_nbat_f_to_f(const Nbnxm::AtomLocality } void -nonbonded_verlet_t::atomdata_add_nbat_f_to_f_gpu(const Nbnxm::AtomLocality locality, +nonbonded_verlet_t::atomdata_add_nbat_f_to_f_gpu(const gmx::AtomLocality locality, DeviceBuffer totalForcesDevice, void *forcesPmeDevice, gmx::ArrayRef dependencyList, @@ -256,7 +256,7 @@ nonbonded_verlet_t::atomdata_init_copy_x_to_nbat_x_gpu() Nbnxm::nbnxn_gpu_init_x_to_nbat_x(pairSearch_->gridSet(), gpu_nbv); } -void nonbonded_verlet_t::insertNonlocalGpuDependency(const Nbnxm::InteractionLocality interactionLocality) +void nonbonded_verlet_t::insertNonlocalGpuDependency(const gmx::InteractionLocality interactionLocality) { Nbnxm::nbnxnInsertNonlocalGpuDependency(gpu_nbv, interactionLocality); } diff --git a/src/gromacs/nbnxm/nbnxm.h b/src/gromacs/nbnxm/nbnxm.h index b799738f75..2125e62f10 100644 --- a/src/gromacs/nbnxm/nbnxm.h +++ b/src/gromacs/nbnxm/nbnxm.h @@ -114,14 +114,13 @@ #include "gromacs/gpu_utils/devicebuffer_datatype.h" #include "gromacs/math/vectypes.h" +#include "gromacs/mdtypes/locality.h" #include "gromacs/utility/arrayref.h" #include "gromacs/utility/enumerationhelpers.h" #include "gromacs/utility/range.h" #include "gromacs/utility/real.h" -#include "locality.h" - -// TODO: Remove this include and the two nbnxm includes above +// TODO: Remove this include #include "nbnxm_gpu.h" struct gmx_device_info_t; @@ -256,7 +255,7 @@ struct nonbonded_verlet_t gmx::ArrayRef getGridIndices() const; //! Constructs the pairlist for the given locality - void constructPairlist(Nbnxm::InteractionLocality iLocality, + void constructPairlist(gmx::InteractionLocality iLocality, const t_blocka *excl, int64_t step, t_nrnb *nrnb); @@ -273,7 +272,7 @@ struct nonbonded_verlet_t * \param[in] fillLocal If the coordinates for filler particles should be zeroed. * \param[in] coordinates Coordinates in plain rvec format to be transformed. */ - void convertCoordinates(Nbnxm::AtomLocality locality, + void convertCoordinates(gmx::AtomLocality locality, bool fillLocal, gmx::ArrayRef coordinates); @@ -286,7 +285,7 @@ struct nonbonded_verlet_t * \param[in] d_x GPU coordinates buffer in plain rvec format to be transformed. * \param[in] xReadyOnDevice Event synchronizer indicating that the coordinates are ready in the device memory. */ - void convertCoordinatesGpu(Nbnxm::AtomLocality locality, + void convertCoordinatesGpu(gmx::AtomLocality locality, bool fillLocal, DeviceBuffer d_x, GpuEventSynchronizer *xReadyOnDevice); @@ -295,7 +294,7 @@ struct nonbonded_verlet_t void atomdata_init_copy_x_to_nbat_x_gpu(); //! Sync the nonlocal GPU stream with dependent tasks in the local queue. - void insertNonlocalGpuDependency(Nbnxm::InteractionLocality interactionLocality); + void insertNonlocalGpuDependency(gmx::InteractionLocality interactionLocality); //! Returns a reference to the pairlist sets const PairlistSets &pairlistSets() const @@ -310,14 +309,14 @@ struct nonbonded_verlet_t bool isDynamicPruningStepGpu(int64_t step) const; //! Dispatches the dynamic pruning kernel for the given locality, for CPU lists - void dispatchPruneKernelCpu(Nbnxm::InteractionLocality iLocality, + void dispatchPruneKernelCpu(gmx::InteractionLocality iLocality, const rvec *shift_vec); //! Dispatches the dynamic pruning kernel for GPU lists void dispatchPruneKernelGpu(int64_t step); //! \brief Executes the non-bonded kernel of the GPU or launches it on the GPU - void dispatchNonbondedKernel(Nbnxm::InteractionLocality iLocality, + void dispatchNonbondedKernel(gmx::InteractionLocality iLocality, const interaction_const_t &ic, const gmx::StepWorkload &stepWork, int clearF, @@ -326,7 +325,7 @@ struct nonbonded_verlet_t t_nrnb *nrnb); //! Executes the non-bonded free-energy kernel, always runs on the CPU - void dispatchFreeEnergyKernel(Nbnxm::InteractionLocality iLocality, + void dispatchFreeEnergyKernel(gmx::InteractionLocality iLocality, const t_forcerec *fr, rvec x[], gmx::ForceWithShiftForces *forceWithShiftForces, @@ -341,7 +340,7 @@ struct nonbonded_verlet_t * \param [in] locality Local or non-local * \param [inout] force Force to be added to */ - void atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality locality, + void atomdata_add_nbat_f_to_f(gmx::AtomLocality locality, gmx::ArrayRef force); /*! \brief Add the forces stored in nbat to total force using GPU buffer opse @@ -353,7 +352,7 @@ struct nonbonded_verlet_t * \param [in] useGpuFPmeReduction Whether PME forces should be added * \param [in] accumulateForce If the total force buffer already contains data */ - void atomdata_add_nbat_f_to_f_gpu(Nbnxm::AtomLocality locality, + void atomdata_add_nbat_f_to_f_gpu(gmx::AtomLocality locality, DeviceBuffer totalForcesDevice, void *forcesPmeDevice, gmx::ArrayRef dependencyList, @@ -395,8 +394,8 @@ struct nonbonded_verlet_t real rlistInner); //! Set up internal flags that indicate what type of short-range work there is. - void setupGpuShortRangeWork(const gmx::GpuBonded *gpuBonded, - const Nbnxm::InteractionLocality iLocality) + void setupGpuShortRangeWork(const gmx::GpuBonded *gpuBonded, + const gmx::InteractionLocality iLocality) { if (useGpu() && !emulateGpu()) { @@ -405,7 +404,7 @@ struct nonbonded_verlet_t } //! Returns true if there is GPU short-range work for the given atom locality. - bool haveGpuShortRangeWork(const Nbnxm::AtomLocality aLocality) + bool haveGpuShortRangeWork(const gmx::AtomLocality aLocality) { return ((useGpu() && !emulateGpu()) && Nbnxm::haveGpuShortRangeWork(gpu_nbv, aLocality)); diff --git a/src/gromacs/nbnxm/nbnxm_gpu.h b/src/gromacs/nbnxm/nbnxm_gpu.h index c6eef3fa80..748484518f 100644 --- a/src/gromacs/nbnxm/nbnxm_gpu.h +++ b/src/gromacs/nbnxm/nbnxm_gpu.h @@ -45,12 +45,12 @@ #include "gromacs/gpu_utils/gpu_macros.h" #include "gromacs/math/vectypes.h" +#include "gromacs/mdtypes/locality.h" #include "gromacs/utility/basedefinitions.h" #include "gromacs/utility/real.h" #include "atomdata.h" #include "gpu_types.h" -#include "locality.h" struct interaction_const_t; struct nbnxn_atomdata_t; @@ -81,7 +81,7 @@ class Grid; GPU_FUNC_QUALIFIER void gpu_copy_xq_to_gpu(gmx_nbnxn_gpu_t gmx_unused *nb, const struct nbnxn_atomdata_t gmx_unused *nbdata, - AtomLocality gmx_unused aloc) GPU_FUNC_TERM; + gmx::AtomLocality gmx_unused aloc) GPU_FUNC_TERM; /*! \brief * Launch asynchronously the nonbonded force calculations. @@ -96,7 +96,7 @@ void gpu_copy_xq_to_gpu(gmx_nbnxn_gpu_t gmx_unused *nb, GPU_FUNC_QUALIFIER void gpu_launch_kernel(gmx_nbnxn_gpu_t gmx_unused *nb, const gmx::StepWorkload gmx_unused &stepWork, - InteractionLocality gmx_unused iloc) GPU_FUNC_TERM; + gmx::InteractionLocality gmx_unused iloc) GPU_FUNC_TERM; /*! \brief * Launch asynchronously the nonbonded prune-only kernel. @@ -134,9 +134,9 @@ void gpu_launch_kernel(gmx_nbnxn_gpu_t gmx_unused *nb, * \param [in] numParts Number of parts the pair list is split into in the rolling kernel. */ GPU_FUNC_QUALIFIER -void gpu_launch_kernel_pruneonly(gmx_nbnxn_gpu_t gmx_unused *nb, - InteractionLocality gmx_unused iloc, - int gmx_unused numParts) GPU_FUNC_TERM; +void gpu_launch_kernel_pruneonly(gmx_nbnxn_gpu_t gmx_unused *nb, + gmx::InteractionLocality gmx_unused iloc, + int gmx_unused numParts) GPU_FUNC_TERM; /*! \brief * Launch asynchronously the download of short-range forces from the GPU @@ -146,7 +146,7 @@ GPU_FUNC_QUALIFIER void gpu_launch_cpyback(gmx_nbnxn_gpu_t gmx_unused *nb, nbnxn_atomdata_t gmx_unused *nbatom, const gmx::StepWorkload gmx_unused &stepWork, - AtomLocality gmx_unused aloc) GPU_FUNC_TERM; + gmx::AtomLocality gmx_unused aloc) GPU_FUNC_TERM; /*! \brief Attempts to complete nonbonded GPU task. * @@ -188,7 +188,7 @@ void gpu_launch_cpyback(gmx_nbnxn_gpu_t gmx_unused *nb, GPU_FUNC_QUALIFIER bool gpu_try_finish_task(gmx_nbnxn_gpu_t gmx_unused *nb, const gmx::StepWorkload gmx_unused &stepWork, - AtomLocality gmx_unused aloc, + gmx::AtomLocality gmx_unused aloc, real gmx_unused *e_lj, real gmx_unused *e_el, gmx::ArrayRef gmx_unused shiftForces, @@ -208,11 +208,11 @@ bool gpu_try_finish_task(gmx_nbnxn_gpu_t gmx_unused *nb, * \param[out] e_lj Pointer to the LJ energy output to accumulate into * \param[out] e_el Pointer to the electrostatics energy output to accumulate into * \param[out] shiftForces Shift forces buffer to accumulate into - */ + * \param[out] wcycle Pointer to wallcycle data structure */ GPU_FUNC_QUALIFIER float gpu_wait_finish_task(gmx_nbnxn_gpu_t gmx_unused *nb, const gmx::StepWorkload gmx_unused &stepWork, - AtomLocality gmx_unused aloc, + gmx::AtomLocality gmx_unused aloc, real gmx_unused *e_lj, real gmx_unused *e_el, gmx::ArrayRef gmx_unused shiftForces, @@ -245,7 +245,7 @@ void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid gmx_unused &grid, gmx_nbnxn_gpu_t gmx_unused *gpu_nbv, DeviceBuffer gmx_unused d_x, GpuEventSynchronizer gmx_unused *xReadyOnDevice, - Nbnxm::AtomLocality gmx_unused locality, + gmx::AtomLocality gmx_unused locality, int gmx_unused gridId, int gmx_unused numColumnsMax) CUDA_FUNC_TERM; @@ -255,7 +255,7 @@ void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid gmx_unused &grid, */ CUDA_FUNC_QUALIFIER void nbnxnInsertNonlocalGpuDependency(const gmx_nbnxn_gpu_t gmx_unused *nb, - InteractionLocality gmx_unused interactionLocality) CUDA_FUNC_TERM; + gmx::InteractionLocality gmx_unused interactionLocality) CUDA_FUNC_TERM; /*! \brief Set up internal flags that indicate what type of short-range work there is. * @@ -272,7 +272,7 @@ void nbnxnInsertNonlocalGpuDependency(const gmx_nbnxn_gpu_t gmx_unused *nb, GPU_FUNC_QUALIFIER void setupGpuShortRangeWork(gmx_nbnxn_gpu_t gmx_unused *nb, const gmx::GpuBonded gmx_unused *gpuBonded, - Nbnxm::InteractionLocality gmx_unused iLocality) GPU_FUNC_TERM; + gmx::InteractionLocality gmx_unused iLocality) GPU_FUNC_TERM; /*! \brief Returns true if there is GPU short-range work for the given atom locality. * @@ -285,7 +285,7 @@ void setupGpuShortRangeWork(gmx_nbnxn_gpu_t gmx_unused *nb, */ GPU_FUNC_QUALIFIER bool haveGpuShortRangeWork(const gmx_nbnxn_gpu_t gmx_unused *nb, - Nbnxm::AtomLocality gmx_unused aLocality) GPU_FUNC_TERM_WITH_RETURN(false); + gmx::AtomLocality gmx_unused aLocality) GPU_FUNC_TERM_WITH_RETURN(false); /*! \brief Initialization for F buffer operations on GPU */ CUDA_FUNC_QUALIFIER @@ -311,7 +311,7 @@ void nbnxn_gpu_init_add_nbat_f_to_f(const int gmx_unused *cell * */ CUDA_FUNC_QUALIFIER -void nbnxn_gpu_add_nbat_f_to_f(AtomLocality gmx_unused atomLocality, +void nbnxn_gpu_add_nbat_f_to_f(gmx::AtomLocality gmx_unused atomLocality, DeviceBuffer gmx_unused totalForcesDevice, gmx_nbnxn_gpu_t gmx_unused *gpu_nbv, void gmx_unused *pmeForcesDevice, diff --git a/src/gromacs/nbnxm/nbnxm_setup.cpp b/src/gromacs/nbnxm/nbnxm_setup.cpp index cd13b9cc44..05a976dbfc 100644 --- a/src/gromacs/nbnxm/nbnxm_setup.cpp +++ b/src/gromacs/nbnxm/nbnxm_setup.cpp @@ -308,12 +308,12 @@ PairlistSets::PairlistSets(const PairlistParams &pairlistParams, params_(pairlistParams), minimumIlistCountForGpuBalancing_(minimumIlistCountForGpuBalancing) { - localSet_ = std::make_unique(Nbnxm::InteractionLocality::Local, + localSet_ = std::make_unique(gmx::InteractionLocality::Local, params_); if (haveMultipleDomains) { - nonlocalSet_ = std::make_unique(Nbnxm::InteractionLocality::NonLocal, + nonlocalSet_ = std::make_unique(gmx::InteractionLocality::NonLocal, params_); } } diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp index 867f289da5..a182226e3c 100644 --- a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp +++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp @@ -606,6 +606,10 @@ static inline int calc_shmem_required_prune(const int num_threads_z) return shmem; } +/*! \brief + * Launch the pairlist prune only kernel for the given locality. + * \p numParts tells in how many parts, i.e. calls the list will be pruned. + */ void gpu_launch_kernel_pruneonly(gmx_nbnxn_gpu_t *nb, const InteractionLocality iloc, const int numParts) diff --git a/src/gromacs/nbnxm/pairlist.cpp b/src/gromacs/nbnxm/pairlist.cpp index 4ab89bc072..2971d14cfa 100644 --- a/src/gromacs/nbnxm/pairlist.cpp +++ b/src/gromacs/nbnxm/pairlist.cpp @@ -83,7 +83,7 @@ using BoundingBox1D = Nbnxm::BoundingBox1D; // TODO: Remove when refactoring thi using Grid = Nbnxm::Grid; // TODO: Remove when refactoring this file // Convience alias for partial Nbnxn namespace usage -using InteractionLocality = Nbnxm::InteractionLocality; +using InteractionLocality = gmx::InteractionLocality; /* We shift the i-particles backward for PBC. * This leads to more conditionals than shifting forward. @@ -683,8 +683,8 @@ NbnxnPairlistGpu::NbnxnPairlistGpu(gmx::PinningPolicy pinningPolicy) : } // TODO: Move to pairlistset.cpp -PairlistSet::PairlistSet(const Nbnxm::InteractionLocality locality, - const PairlistParams &pairlistParams) : +PairlistSet::PairlistSet(const InteractionLocality locality, + const PairlistParams &pairlistParams) : locality_(locality), params_(pairlistParams) { @@ -3966,7 +3966,7 @@ static void sort_sci(NbnxnPairlistGpu *nbl) /* Returns the i-zone range for pairlist construction for the give locality */ static Range getIZoneRange(const Nbnxm::GridSet::DomainSetup &domainSetup, - const Nbnxm::InteractionLocality locality) + const InteractionLocality locality) { if (domainSetup.doTestParticleInsertion) { @@ -3993,9 +3993,9 @@ getIZoneRange(const Nbnxm::GridSet::DomainSetup &domainSetup, /* Returns the j-zone range for pairlist construction for the give locality and i-zone */ static Range -getJZoneRange(const gmx_domdec_zones_t &ddZones, - const Nbnxm::InteractionLocality locality, - const int iZone) +getJZoneRange(const gmx_domdec_zones_t &ddZones, + const InteractionLocality locality, + const int iZone) { if (locality == InteractionLocality::Local) { @@ -4327,7 +4327,7 @@ PairlistSets::construct(const InteractionLocality iLocality, nbat, excl, minimumIlistCountForGpuBalancing_, nrnb, &pairSearch->cycleCounting_); - if (iLocality == Nbnxm::InteractionLocality::Local) + if (iLocality == InteractionLocality::Local) { outerListCreationStep_ = step; } @@ -4351,10 +4351,10 @@ PairlistSets::construct(const InteractionLocality iLocality, } void -nonbonded_verlet_t::constructPairlist(const Nbnxm::InteractionLocality iLocality, - const t_blocka *excl, - int64_t step, - t_nrnb *nrnb) +nonbonded_verlet_t::constructPairlist(const InteractionLocality iLocality, + const t_blocka *excl, + int64_t step, + t_nrnb *nrnb) { pairlistSets_->construct(iLocality, pairSearch_.get(), nbat.get(), excl, step, nrnb); diff --git a/src/gromacs/nbnxm/pairlist.h b/src/gromacs/nbnxm/pairlist.h index 6e2cf8bf55..136a2f6c29 100644 --- a/src/gromacs/nbnxm/pairlist.h +++ b/src/gromacs/nbnxm/pairlist.h @@ -40,6 +40,7 @@ #include "gromacs/gpu_utils/hostallocator.h" #include "gromacs/math/vectypes.h" +#include "gromacs/mdtypes/locality.h" #include "gromacs/mdtypes/nblist.h" #include "gromacs/utility/basedefinitions.h" #include "gromacs/utility/defaultinitializationallocator.h" @@ -49,7 +50,6 @@ // This file with constants is separate from this file to be able // to include it during OpenCL jitting without including config.h #include "constants.h" -#include "locality.h" #include "pairlistparams.h" struct NbnxnPairlistCpuWork; diff --git a/src/gromacs/nbnxm/pairlistparams.h b/src/gromacs/nbnxm/pairlistparams.h index 78caf48e3c..9858360168 100644 --- a/src/gromacs/nbnxm/pairlistparams.h +++ b/src/gromacs/nbnxm/pairlistparams.h @@ -47,11 +47,10 @@ #include "config.h" +#include "gromacs/mdtypes/locality.h" #include "gromacs/utility/enumerationhelpers.h" #include "gromacs/utility/real.h" -#include "locality.h" - namespace Nbnxm { enum class KernelType; diff --git a/src/gromacs/nbnxm/pairlistset.h b/src/gromacs/nbnxm/pairlistset.h index 2fad5d6f17..8197f131d5 100644 --- a/src/gromacs/nbnxm/pairlistset.h +++ b/src/gromacs/nbnxm/pairlistset.h @@ -52,10 +52,10 @@ #include #include "gromacs/math/vectypes.h" +#include "gromacs/mdtypes/locality.h" #include "gromacs/utility/basedefinitions.h" #include "gromacs/utility/real.h" -#include "locality.h" #include "pairlist.h" struct nbnxn_atomdata_t; @@ -77,7 +77,7 @@ class PairlistSet { public: //! Constructor: initializes the pairlist set as empty - PairlistSet(Nbnxm::InteractionLocality locality, + PairlistSet(gmx::InteractionLocality locality, const PairlistParams &listParams); ~PairlistSet(); @@ -96,7 +96,7 @@ class PairlistSet const rvec *shift_vec); //! Returns the locality - Nbnxm::InteractionLocality locality() const + gmx::InteractionLocality locality() const { return locality_; } @@ -128,7 +128,7 @@ class PairlistSet private: //! The locality of the pairlist set - Nbnxm::InteractionLocality locality_; + gmx::InteractionLocality locality_; //! List of pairlists in CPU layout std::vector cpuLists_; //! List of working list for rebalancing CPU lists diff --git a/src/gromacs/nbnxm/pairlistsets.h b/src/gromacs/nbnxm/pairlistsets.h index 2cf5c41cf9..404c6d16b8 100644 --- a/src/gromacs/nbnxm/pairlistsets.h +++ b/src/gromacs/nbnxm/pairlistsets.h @@ -49,7 +49,8 @@ #include -#include "locality.h" +#include "gromacs/mdtypes/locality.h" + #include "pairlistparams.h" struct nbnxn_atomdata_t; @@ -68,7 +69,7 @@ class PairlistSets int minimumIlistCountForGpuBalancing); //! Construct the pairlist set for the given locality - void construct(Nbnxm::InteractionLocality iLocality, + void construct(gmx::InteractionLocality iLocality, PairSearch *pairSearch, nbnxn_atomdata_t *nbat, const t_blocka *excl, @@ -76,7 +77,7 @@ class PairlistSets t_nrnb *nrnb); //! Dispatches the dynamic pruning kernel for the given locality - void dispatchPruneKernel(Nbnxm::InteractionLocality iLocality, + void dispatchPruneKernel(gmx::InteractionLocality iLocality, const nbnxn_atomdata_t *nbat, const rvec *shift_vec); @@ -119,9 +120,9 @@ class PairlistSets } //! Returns the pair-list set for the given locality - const PairlistSet &pairlistSet(Nbnxm::InteractionLocality iLocality) const + const PairlistSet &pairlistSet(gmx::InteractionLocality iLocality) const { - if (iLocality == Nbnxm::InteractionLocality::Local) + if (iLocality == gmx::InteractionLocality::Local) { return *localSet_; } @@ -134,9 +135,9 @@ class PairlistSets private: //! Returns the pair-list set for the given locality - PairlistSet &pairlistSet(Nbnxm::InteractionLocality iLocality) + PairlistSet &pairlistSet(gmx::InteractionLocality iLocality) { - if (iLocality == Nbnxm::InteractionLocality::Local) + if (iLocality == gmx::InteractionLocality::Local) { return *localSet_; } diff --git a/src/gromacs/nbnxm/prunekerneldispatch.cpp b/src/gromacs/nbnxm/prunekerneldispatch.cpp index cc5e3d08d9..b371054068 100644 --- a/src/gromacs/nbnxm/prunekerneldispatch.cpp +++ b/src/gromacs/nbnxm/prunekerneldispatch.cpp @@ -48,9 +48,9 @@ #include "kernels_simd_4xm/kernel_prune.h" void -PairlistSets::dispatchPruneKernel(const Nbnxm::InteractionLocality iLocality, - const nbnxn_atomdata_t *nbat, - const rvec *shift_vec) +PairlistSets::dispatchPruneKernel(const gmx::InteractionLocality iLocality, + const nbnxn_atomdata_t *nbat, + const rvec *shift_vec) { pairlistSet(iLocality).dispatchPruneKernel(nbat, shift_vec); } @@ -90,8 +90,8 @@ PairlistSet::dispatchPruneKernel(const nbnxn_atomdata_t *nbat, } void -nonbonded_verlet_t::dispatchPruneKernelCpu(const Nbnxm::InteractionLocality iLocality, - const rvec *shift_vec) +nonbonded_verlet_t::dispatchPruneKernelCpu(const gmx::InteractionLocality iLocality, + const rvec *shift_vec) { pairlistSets_->dispatchPruneKernel(iLocality, nbat.get(), shift_vec); } @@ -104,7 +104,7 @@ void nonbonded_verlet_t::dispatchPruneKernelGpu(int64_t step) const bool stepIsEven = (pairlistSets().numStepsWithPairlist(step) % 2 == 0); Nbnxm::gpu_launch_kernel_pruneonly(gpu_nbv, - stepIsEven ? Nbnxm::InteractionLocality::Local : Nbnxm::InteractionLocality::NonLocal, + stepIsEven ? gmx::InteractionLocality::Local : gmx::InteractionLocality::NonLocal, pairlistSets().params().numRollingPruningParts); wallcycle_sub_stop(wcycle_, ewcsLAUNCH_GPU_NONBONDED); -- 2.22.0