# GPU update flag enables GPU update+constraints as well as buffer ops (dependency)
if context.opts.gpuupdate:
- context.env.set_env_var('GMX_USE_GPU_BUFFER_OPS', "1")
context.env.set_env_var('GMX_FORCE_UPDATE_DEFAULT_GPU', "1")
regressiontests_path = context.workspace.get_project_dir(Project.REGRESSIONTESTS)
/*! \brief Set up flags that have the lifetime of the domain indicating what type of work is there to compute.
*/
static DomainLifetimeWorkload
-setupDomainLifetimeWorkload(const t_inputrec &inputrec,
- const t_forcerec &fr,
- const pull_t *pull_work,
- const gmx_edsam *ed,
- const t_idef &idef,
- const t_fcdata &fcd,
- const t_mdatoms &mdatoms,
- const StepWorkload &stepWork)
+setupDomainLifetimeWorkload(const t_inputrec &inputrec,
+ const t_forcerec &fr,
+ const pull_t *pull_work,
+ const gmx_edsam *ed,
+ const t_idef &idef,
+ const t_fcdata &fcd,
+ const t_mdatoms &mdatoms,
+ const SimulationWorkload &simulationWork,
+ const StepWorkload &stepWork)
{
DomainLifetimeWorkload domainWork;
// Note that haveSpecialForces is constant over the whole run
domainWork.haveCpuListedForceWork = haveCpuListedForces(fr, idef, fcd);
// Note that haveFreeEnergyWork is constant over the whole run
domainWork.haveFreeEnergyWork = (fr.efep != efepNO && mdatoms.nPerturbed != 0);
+ // We assume we have local force work if there are CPU
+ // force tasks including PME or nonbondeds.
+ domainWork.haveCpuLocalForceWork = domainWork.haveSpecialForces || domainWork.haveCpuListedForceWork || domainWork.haveFreeEnergyWork ||
+ simulationWork.useCpuNonbonded || simulationWork.useCpuPme;
return domainWork;
}
// on virial steps the CPU reduction path is taken
// TODO: remove flags.computeEnergy, ref #3128
flags.useGpuFBufferOps = simulationWork.useGpuBufferOps && !(flags.computeVirial || flags.computeEnergy);
- flags.useGpuPmeFReduction = flags.useGpuFBufferOps && (simulationWork.usePmeGpu &&
- (rankHasPmeDuty || simulationWork.useGpuPmePPCommunication));
+ flags.useGpuPmeFReduction = flags.useGpuFBufferOps && (simulationWork.useGpuPme &&
+ (rankHasPmeDuty || simulationWork.useGpuPmePpCommunication));
return flags;
}
const StepWorkload &stepWork = runScheduleWork->stepWork;
- const bool useGpuPmeOnThisRank = simulationWork.usePmeGpu && thisRankHasDuty(cr, DUTY_PME);
+ const bool useGpuPmeOnThisRank = simulationWork.useGpuPme && thisRankHasDuty(cr, DUTY_PME);
const int pmeFlags = makePmeFlags(stepWork);
// Switches on whether to use GPU for position and force buffer operations
}
}
+ // Copy coordinate from the GPU if update is on the GPU and there are forces to be computed on the CPU. At search steps the
+ // current coordinates are already on the host, hence copy is not needed.
+ if (simulationWork.useGpuUpdate && !stepWork.doNeighborSearch &&
+ runScheduleWork->domainWork.haveCpuLocalForceWork)
+ {
+ stateGpu->copyCoordinatesFromGpu(x.unpaddedArrayRef(), AtomLocality::Local);
+ stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
+ }
+
#if GMX_MPI
if (!thisRankHasDuty(cr, DUTY_PME))
{
* and domain decomposition does not use the graph,
* we do not need to worry about shifting.
*/
- bool reinitGpuPmePpComms = simulationWork.useGpuPmePPCommunication && (stepWork.doNeighborSearch);
- bool sendCoordinatesFromGpu = simulationWork.useGpuPmePPCommunication && !(stepWork.doNeighborSearch);
+ bool reinitGpuPmePpComms = simulationWork.useGpuPmePpCommunication && (stepWork.doNeighborSearch);
+ bool sendCoordinatesFromGpu = simulationWork.useGpuPmePpCommunication && !(stepWork.doNeighborSearch);
gmx_pme_send_coordinates(fr, cr, box, as_rvec_array(x.unpaddedArrayRef().data()),
lambda[efptCOUL], lambda[efptVDW],
(stepWork.computeVirial || stepWork.computeEnergy),
- step, simulationWork.useGpuPmePPCommunication, reinitGpuPmePpComms,
+ step, simulationWork.useGpuPmePpCommunication, reinitGpuPmePpComms,
sendCoordinatesFromGpu, wcycle);
}
#endif /* GMX_MPI */
top->idef,
*fcd,
*mdatoms,
+ simulationWork,
stepWork);
wallcycle_start_nocount(wcycle, ewcNS);
}
}
- // TODO move this into StepWorkload
- const bool useCpuPmeFReduction = thisRankHasDuty(cr, DUTY_PME) && !stepWork.useGpuPmeFReduction;
- // TODO: move this into DomainLifetimeWorkload, including the second part of the condition
- const bool haveCpuLocalForces = (domainWork.haveSpecialForces || domainWork.haveCpuListedForceWork || useCpuPmeFReduction ||
- (fr->efep != efepNO));
-
if (havePPDomainDecomposition(cr))
{
/* We are done with the CPU compute.
if (useGpuForcesHaloExchange)
{
- if (haveCpuLocalForces)
+ if (domainWork.haveCpuLocalForceWork)
{
stateGpu->copyForcesToGpu(forceOut.forceWithShiftForces().force(), AtomLocality::Local);
}
- gpuHaloExchange->communicateHaloForces(haveCpuLocalForces);
+ gpuHaloExchange->communicateHaloForces(domainWork.haveCpuLocalForceWork);
}
else
{
// If on GPU PME-PP comms path, receive forces from PME before GPU buffer ops
// TODO refoactor this and unify with below default-path call to the same function
- if (PAR(cr) && !thisRankHasDuty(cr, DUTY_PME) && simulationWork.useGpuPmePPCommunication)
+ if (PAR(cr) && !thisRankHasDuty(cr, DUTY_PME) && simulationWork.useGpuPmePpCommunication)
{
/* In case of node-splitting, the PP nodes receive the long-range
* forces, virial and energy from the PME nodes here.
*/
- pme_receive_force_ener(fr, cr, &forceOut.forceWithVirial(), enerd, simulationWork.useGpuPmePPCommunication, stepWork.useGpuPmeFReduction, wcycle);
+ pme_receive_force_ener(fr, cr, &forceOut.forceWithVirial(), enerd, simulationWork.useGpuPmePpCommunication, stepWork.useGpuPmeFReduction, wcycle);
}
// local atoms. This depends on whether there are CPU-based force tasks
// or when DD is active the halo exchange has resulted in contributions
// from the non-local part.
- const bool haveLocalForceContribInCpuBuffer = (haveCpuLocalForces || havePPDomainDecomposition(cr));
+ const bool haveLocalForceContribInCpuBuffer = (domainWork.haveCpuLocalForceWork || havePPDomainDecomposition(cr));
// TODO: move these steps as early as possible:
// - CPU f H2D should be as soon as all CPU-side forces are done
}
// TODO refoactor this and unify with above PME-PP GPU communication path call to the same function
- if (PAR(cr) && !thisRankHasDuty(cr, DUTY_PME) && !simulationWork.useGpuPmePPCommunication)
+ if (PAR(cr) && !thisRankHasDuty(cr, DUTY_PME) && !simulationWork.useGpuPmePpCommunication)
{
/* In case of node-splitting, the PP nodes receive the long-range
* forces, virial and energy from the PME nodes here.
*/
pme_receive_force_ener(fr, cr, &forceOut.forceWithVirial(), enerd,
- simulationWork.useGpuPmePPCommunication, false, wcycle);
+ simulationWork.useGpuPmePpCommunication, false, wcycle);
}
if (stepWork.computeForces)
#include "gromacs/essentialdynamics/edsam.h"
#include "gromacs/ewald/pme.h"
#include "gromacs/ewald/pme_load_balancing.h"
+#include "gromacs/ewald/pme_pp_comm_gpu.h"
#include "gromacs/fileio/trxio.h"
#include "gromacs/gmxlib/network.h"
#include "gromacs/gmxlib/nrnb.h"
// 2. The proper GPU syncronization is introduced, so that the H2D and D2H data copies can be performed in the separate
// stream owned by the StatePropagatorDataGpu
const auto &simulationWork = runScheduleWork->simulationWork;
- const bool useGpuForPme = simulationWork.usePmeGpu;
+ const bool useGpuForPme = simulationWork.useGpuPme;
const bool useGpuForNonbonded = simulationWork.useGpuNonbonded;
// Temporary solution to make sure that the buffer ops are offloaded when update is offloaded
const bool useGpuForBufferOps = simulationWork.useGpuBufferOps;
// TODO: Move to after all booleans are defined.
if (useGpuForUpdate && !bFirstStep)
{
- stateGpu->copyCoordinatesFromGpu(ArrayRef<RVec>(state->x), AtomLocality::Local);
+ stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
}
/* PME grid + cut-off optimization with GPUs or PME nodes */
stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
}
- // Copy coordinate from the GPU when needed:
- // - On search steps to keep copy on host (device buffers are reinitialized).
- // - There are CPU bonded forces that need current coordinates
- // - When needed for the output.
- if (bNS ||
- (runScheduleWork->domainWork.haveCpuBondedWork || runScheduleWork->domainWork.haveFreeEnergyWork) ||
- do_per_step(step, ir->nstxout) || do_per_step(step, ir->nstxout_compressed))
+ // Copy coordinate from the GPU when needed at the search step.
+ // NOTE: The cases when coordinates needed on CPU for force evaluation are handled in sim_utils.
+ // NOTE: If the coordinates are to be written into output file they are also copied separately before the output.
+ if (bNS)
{
- stateGpu->copyCoordinatesFromGpu(ArrayRef<RVec>(state->x), AtomLocality::Local);
+ stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
}
}
}
}
+ // Copy coordinate from the GPU for the output if the update is offloaded and
+ // coordinates have not already been copied for i) search or ii) CPU force tasks.
+ if (useGpuForUpdate && !bNS && !runScheduleWork->domainWork.haveCpuLocalForceWork &&
+ (do_per_step(step, ir->nstxout) || do_per_step(step, ir->nstxout_compressed)))
+ {
+ stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
+ stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
+ }
/* Now we have the energies and forces corresponding to the
* coordinates at time t. We must output all of this before
* the update.
// Copy data to the GPU after buffers might have being reinitialized
stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local);
- stateGpu->copyCoordinatesToGpu(ArrayRef<RVec>(state->x), AtomLocality::Local);
+ stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
}
stateGpu->copyForcesToGpu(ArrayRef<RVec>(f), AtomLocality::All);
{
stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
- stateGpu->copyCoordinatesFromGpu(ArrayRef<RVec>(state->x), AtomLocality::Local);
- stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
}
}
else
if (bGStat || needEkinAtNextStep || doInterSimSignal)
{
+ // Copy coordinates when needed to stop the CM motion.
+ if (useGpuForUpdate && !EI_VV(ir->eI) && bStopCM)
+ {
+ stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
+ stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
+ }
// Since we're already communicating at this step, we
// can propagate intra-simulation signals. Note that
// check_nstglobalcomm has the responsibility for
// TODO: The special case of removing CM motion should be dealt more gracefully
if (useGpuForUpdate)
{
- stateGpu->copyCoordinatesToGpu(ArrayRef<RVec>(state->x), AtomLocality::Local);
+ stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
stateGpu->waitCoordinatesCopiedToDevice(AtomLocality::Local);
}
}
walltime_accounting_set_nsteps_done(walltime_accounting, step_rel);
+ if (fr->pmePpCommGpu)
+ {
+ // destroy object since it is no longer required. (This needs to be done while the GPU context still exists.)
+ fr->pmePpCommGpu.reset();
+ }
+
global_stat_destroy(gstat);
}
// and report those features that are enabled.
const DevelopmentFeatureFlags devFlags = manageDevelopmentFeatures(mdlog, useGpuForNonbonded, useGpuForPme);
+ // NOTE: The devFlags need decideWhetherToUseGpusForNonbonded(...) and decideWhetherToUseGpusForPme(...) for overrides,
+ // decideWhetherToUseGpuForUpdate() needs devFlags for the '-update auto' override, hence the interleaving.
+ // NOTE: When the simulationWork is constructed, the useGpuForUpdate overrides the devFlags.enableGpuBufferOps.
+ try
+ {
+ useGpuForUpdate = decideWhetherToUseGpuForUpdate(devFlags.forceGpuUpdateDefaultOn,
+ useDomainDecomposition,
+ useGpuForPme,
+ useGpuForNonbonded,
+ updateTarget,
+ gpusWereDetected,
+ *inputrec,
+ gmx_mtop_interaction_count(mtop, IF_VSITE) > 0,
+ doEssentialDynamics,
+ gmx_mtop_ftype_count(mtop, F_ORIRES) > 0,
+ gmx_mtop_ftype_count(mtop, F_DISRES) > 0,
+ replExParams.exchangeInterval > 0);
+ }
+ GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
+
+
// Build restraints.
// TODO: hide restraint implementation details from Mdrunner.
// There is nothing unique about restraints at this point as far as the
// TODO remove need to pass local stream into GPU halo exchange - Redmine #3093
if (havePPDomainDecomposition(cr) && prefer1DAnd1PulseDD && is1DAnd1PulseDD(*cr->dd))
{
- GMX_RELEASE_ASSERT(devFlags.enableGpuBufferOps, "Must use GMX_GPU_BUFFER_OPS=1 to use GMX_GPU_DD_COMMS=1");
+ GMX_RELEASE_ASSERT(devFlags.enableGpuBufferOps, "Must use GMX_USE_GPU_BUFFER_OPS=1 to use GMX_GPU_DD_COMMS=1");
void *streamLocal = Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::Local);
void *streamNonLocal = Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::NonLocal);
void *coordinatesOnDeviceEvent = fr->nbv->get_x_on_device_event();
fr->cginfo_mb);
}
- if (updateTarget == TaskTarget::Gpu)
- {
- if (SIMMASTER(cr))
- {
- gmx_fatal(FARGS, "It is currently not possible to redirect the calculation "
- "of update and constraints to the GPU!");
- }
- }
-
- // Before we start the actual simulator, try if we can run the update task on the GPU.
- useGpuForUpdate = decideWhetherToUseGpuForUpdate(devFlags.forceGpuUpdateDefaultOn,
- DOMAINDECOMP(cr),
- useGpuForPme,
- useGpuForNonbonded,
- devFlags.enableGpuBufferOps,
- updateTarget,
- gpusWereDetected,
- *inputrec,
- mdAtoms->mdatoms()->haveVsites,
- doEssentialDynamics,
- gmx_mtop_ftype_count(mtop, F_ORIRES) > 0,
- gmx_mtop_ftype_count(mtop, F_DISRES) > 0,
- replExParams.exchangeInterval > 0);
-
const bool inputIsCompatibleWithModularSimulator = ModularSimulator::isInputCompatible(
false,
inputrec, doRerun, vsite.get(), ms, replExParams,
const bool useModularSimulator = inputIsCompatibleWithModularSimulator && !(getenv("GMX_DISABLE_MODULAR_SIMULATOR") != nullptr);
+ // TODO This is not the right place to manage the lifetime of
+ // this data structure, but currently it's the easiest way to
+ // make it work.
+ MdrunScheduleWorkload runScheduleWork;
+ // Also populates the simulation constant workload description.
+ runScheduleWork.simulationWork = createSimulationWorkload(useGpuForNonbonded,
+ pmeRunMode,
+ useGpuForBonded,
+ useGpuForUpdate,
+ devFlags.enableGpuBufferOps,
+ devFlags.enableGpuHaloExchange,
+ devFlags.enableGpuPmePPComm);
+
std::unique_ptr<gmx::StatePropagatorDataGpu> stateGpu;
- if (gpusWereDetected && ((useGpuForPme && thisRankHasDuty(cr, DUTY_PME)) || devFlags.enableGpuBufferOps))
+ if (gpusWereDetected && ((useGpuForPme && thisRankHasDuty(cr, DUTY_PME)) || runScheduleWork.simulationWork.useGpuBufferOps))
{
const void *pmeStream = pme_gpu_get_device_stream(fr->pmedata);
const void *localStream = fr->nbv->gpu_nbv != nullptr ? Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::Local) : nullptr;
fr->stateGpu = stateGpu.get();
}
- // TODO This is not the right place to manage the lifetime of
- // this data structure, but currently it's the easiest way to
- // make it work.
- MdrunScheduleWorkload runScheduleWork;
- // Also populates the simulation constant workload description.
- runScheduleWork.simulationWork = createSimulationWorkload(useGpuForNonbonded,
- useGpuForPme,
- (pmeRunMode == PmeRunMode::GPU),
- useGpuForBonded,
- useGpuForUpdate,
- devFlags.enableGpuBufferOps,
- devFlags.enableGpuHaloExchange,
- devFlags.enableGpuPmePPComm);
-
-
GMX_ASSERT(stopHandlerBuilder_, "Runner must provide StopHandlerBuilder to simulator.");
SimulatorBuilder simulatorBuilder;
bool haveCpuListedForceWork = false;
//! Whether the current nstlist step-range has special forces on the CPU.
bool haveSpecialForces = false;
+ //! Whether there are currently any local forces to be computed on the CPU
+ bool haveCpuLocalForceWork = false;
// TODO
//! Whether the current nstlist step-range Free energy work on the CPU.
class SimulationWorkload
{
public:
+ //! If we have calculation of short range nonbondeds on CPU
+ bool useCpuNonbonded = false;
//! If we have calculation of short range nonbondeds on GPU
bool useGpuNonbonded = false;
//! If we have calculation of long range PME in GPU
- bool usePmeGpu = false;
+ bool useCpuPme = false;
+ //! If we have calculation of long range PME in GPU
+ bool useGpuPme = false;
//! If PME FFT solving is done on GPU.
- bool usePmeFftGpu = false;
+ bool useGpuPmeFft = false;
//! If bonded interactions are calculated on GPU.
bool useGpuBonded = false;
//! If update and constraint solving is performed on GPU.
//! If domain decomposition halo exchange is performed on GPU.
bool useGpuHaloExchange = false;
//! If direct PP-PME communication between GPU is used.
- bool useGpuPmePPCommunication = false;
+ bool useGpuPmePpCommunication = false;
//! If direct GPU-GPU communication is enabled.
bool useGpuDirectCommunication = false;
};
const bool isDomainDecomposition,
const bool useGpuForPme,
const bool useGpuForNonbonded,
- const bool useGpuForBufferOps,
const TaskTarget updateTarget,
const bool gpusWereDetected,
const t_inputrec &inputrec,
{
errorMessage += "Domain decomposition is not supported.\n";
}
- // Using the GPU-version of update makes sense if forces are already on the GPU, i.e. if at least:
- // 1. PME is on the GPU (there should be a copy of coordinates on a GPU in rvec format for PME spread).
- // 2. Non-bonded interactions and buffer ops are on the GPU.
- if (!(useGpuForPme || (useGpuForNonbonded && useGpuForBufferOps)))
+ // Using the GPU-version of update if:
+ // 1. PME is on the GPU (there should be a copy of coordinates on GPU for PME spread), or
+ // 2. Non-bonded interactions are on the GPU.
+ if (!(useGpuForPme || useGpuForNonbonded))
{
errorMessage += "Either PME or short-ranged non-bonded interaction tasks must run on the GPU.\n";
}
* \param[in] isDomainDecomposition Whether there more than one domain.
* \param[in] useGpuForPme Whether GPUs will be used for PME interactions.
* \param[in] useGpuForNonbonded Whether GPUs will be used for nonbonded interactions.
- * \param[in] useGpuForBufferOps Whether GPUs will be used for buffer operations.
* \param[in] updateTarget User choice for running simulation on GPU.
* \param[in] gpusWereDetected Whether compatible GPUs were detected on any node.
* \param[in] inputrec The user input.
bool isDomainDecomposition,
bool useGpuForPme,
bool useGpuForNonbonded,
- bool useGpuForBufferOps,
TaskTarget updateTarget,
bool gpusWereDetected,
const t_inputrec &inputrec,
#include "decidesimulationworkload.h"
+#include "gromacs/ewald/pme.h"
#include "gromacs/taskassignment/taskassignment.h"
#include "gromacs/utility/arrayref.h"
namespace gmx
{
-SimulationWorkload createSimulationWorkload(bool useGpuForNonbonded,
- bool useGpuForPme,
- bool useGpuForPmeFft,
- bool useGpuForBonded,
- bool useGpuForUpdateConstraints,
- bool useGpuForBufferOps,
- bool useGpuHaloExchange,
- bool useGpuPmePpComm)
+SimulationWorkload createSimulationWorkload(bool useGpuForNonbonded,
+ PmeRunMode pmeRunMode,
+ bool useGpuForBonded,
+ bool useGpuForUpdate,
+ bool useGpuForBufferOps,
+ bool useGpuHaloExchange,
+ bool useGpuPmePpComm)
{
- SimulationWorkload simulationWorkload {
- useGpuForNonbonded,
- useGpuForPme,
- useGpuForPmeFft,
- useGpuForBonded,
- useGpuForUpdateConstraints,
- useGpuForBufferOps,
- useGpuHaloExchange,
- useGpuPmePpComm,
- useGpuHaloExchange || useGpuPmePpComm
- };
+ SimulationWorkload simulationWorkload;
+ simulationWorkload.useCpuNonbonded = !useGpuForNonbonded;
+ simulationWorkload.useGpuNonbonded = useGpuForNonbonded;
+ simulationWorkload.useCpuPme = (pmeRunMode == PmeRunMode::CPU);
+ simulationWorkload.useGpuPme = (pmeRunMode == PmeRunMode::GPU || pmeRunMode == PmeRunMode::Mixed);
+ simulationWorkload.useGpuPmeFft = (pmeRunMode == PmeRunMode::Mixed);
+ simulationWorkload.useGpuBonded = useGpuForBonded;
+ simulationWorkload.useGpuUpdate = useGpuForUpdate;
+ simulationWorkload.useGpuBufferOps = useGpuForBufferOps || useGpuForUpdate;
+ simulationWorkload.useGpuHaloExchange = useGpuHaloExchange;
+ simulationWorkload.useGpuPmePpCommunication = useGpuPmePpComm;
+ simulationWorkload.useGpuDirectCommunication = useGpuHaloExchange || useGpuPmePpComm;
return simulationWorkload;
}
#include "gromacs/mdtypes/simulation_workload.h"
+enum class PmeRunMode;
+
namespace gmx
{
*
* \param[in] useGpuForNonbonded If we have short-range nonbonded interactions
* calculations on GPU(s).
- * \param[in] useGpuForPme If long range PME interactions are calculated on GPU(s).
- * \param[in] useGpuForPmeFft If FFT solving for PME is done on the GPU.
+ * \param[in] pmeRunMode Run mode indicating what resource is PME execured on.
* \param[in] useGpuForBonded If bonded interactions are calculated on GPU(s).
- * \param[in] useGpuForUpdateConstraints If coordinate update and constraint solving is performed on
- * GPU(s).
+ * \param[in] useGpuForUpdate If coordinate update and constraint solving is performed on
+ * GPU(s).
* \param[in] useGpuForBufferOps If buffer ops / reduction are calculated on GPU(s).
* \param[in] useGpuHaloExchange If GPU direct communication is used in halo exchange.
* \param[in] useGpuPmePpComm If GPu direct communication is used in PME-PP communication.
* \returns Simulation lifetime constant workload description.
*/
-SimulationWorkload createSimulationWorkload(bool useGpuForNonbonded,
- bool useGpuForPme,
- bool useGpuForPmeFft,
- bool useGpuForBonded,
- bool useGpuForUpdateConstraints,
- bool useGpuForBufferOps,
- bool useGpuHaloExchange,
- bool useGpuPmePpComm);
+SimulationWorkload createSimulationWorkload(bool useGpuForNonbonded,
+ PmeRunMode pmeRunMode,
+ bool useGpuForBonded,
+ bool useGpuForUpdate,
+ bool useGpuForBufferOps,
+ bool useGpuHaloExchange,
+ bool useGpuPmePpComm);
} // namespace gmx